├── .Rbuildignore ├── .gitignore ├── CODE_OF_CONDUCT.md ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── Makefile ├── NAMESPACE ├── R ├── ascii.R ├── chr-package.R ├── count.R ├── detect.R ├── extract.R ├── ngram.R ├── plus_paste.R ├── remove.R ├── replace.R ├── title.R ├── utils-pipe.R └── utils.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── docs ├── CNAME ├── CODE_OF_CONDUCT.html ├── LICENSE-text.html ├── LICENSE.html ├── README.html ├── authors.html ├── docsearch.css ├── docsearch.js ├── extra.css ├── favicon.ico ├── index.html ├── jquery.sticky-kit.min.js ├── link.svg ├── logo.png ├── pkgdown.css ├── pkgdown.js ├── pkgdown.yml ├── reference │ ├── chr-package.html │ ├── chr_count.html │ ├── chr_detect.html │ ├── chr_extract.html │ ├── chr_ngram_char.html │ ├── chr_remove_hashtags.html │ ├── chr_remove_linebreaks.html │ ├── chr_remove_links.html │ ├── chr_remove_mentions.html │ ├── chr_remove_tabs.html │ ├── chr_remove_ws.html │ ├── chr_replace.html │ ├── chr_replace_nonascii.html │ ├── figures │ │ └── logo.png │ ├── index.html │ ├── pipe.html │ └── title_case.html └── sitemap.xml ├── inst └── CITATION ├── man ├── chr-package.Rd ├── chr_count.Rd ├── chr_detect.Rd ├── chr_extract.Rd ├── chr_ngram_char.Rd ├── chr_remove_hashtags.Rd ├── chr_remove_linebreaks.Rd ├── chr_remove_links.Rd ├── chr_remove_mentions.Rd ├── chr_remove_tabs.Rd ├── chr_remove_ws.Rd ├── chr_replace.Rd ├── chr_replace_nonascii.Rd ├── figures │ └── logo.png ├── pipe.Rd └── title_case.Rd ├── pkgdown └── extra.css └── tests ├── testthat.R └── testthat ├── test-chr_extract-r.R └── test-detect.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^make$ 4 | ^make\.R$ 5 | ^\.readme\.rds$ 6 | ^R/plus_paste\.R$ 7 | ^LICENSE\.md$ 8 | ^CODE_OF_CONDUCT\.md$ 9 | ^_pkgdown\.yml$ 10 | ^docs$ 11 | ^pkgdown$ 12 | ^README\.Rmd$ 13 | ^chr-logo\.R$ 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .Rproj.user 3 | .Rhistory 4 | .RData 5 | .Ruserdata 6 | make.R 7 | make 8 | .readme.rds 9 | chr.Rproj 10 | chr-logo.R 11 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Code of Conduct 2 | 3 | As contributors and maintainers of this project, we pledge to respect all people who 4 | contribute through reporting issues, posting feature requests, updating documentation, 5 | submitting pull requests or patches, and other activities. 6 | 7 | We are committed to making participation in this project a harassment-free experience for 8 | everyone, regardless of level of experience, gender, gender identity and expression, 9 | sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion. 10 | 11 | Examples of unacceptable behavior by participants include the use of sexual language or 12 | imagery, derogatory comments or personal attacks, trolling, public or private harassment, 13 | insults, or other unprofessional conduct. 14 | 15 | Project maintainers have the right and responsibility to remove, edit, or reject comments, 16 | commits, code, wiki edits, issues, and other contributions that are not aligned to this 17 | Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed 18 | from the project team. 19 | 20 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by 21 | opening an issue or contacting one or more of the project maintainers. 22 | 23 | This Code of Conduct is adapted from the Contributor Covenant 24 | (http://contributor-covenant.org), version 1.0.0, available at 25 | http://contributor-covenant.org/version/1/0/0/ 26 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: chr 2 | Type: Package 3 | Title: Simple String Manipulation 4 | Version: 0.1.03 5 | Authors@R: person(given = "Michael Wayne", 6 | family = "Kearney", 7 | email = "kearneymw@missouri.edu", 8 | role = c("aut", "cre")) 9 | Maintainer: Michael W. Kearney 10 | Description: Clean, wrangle, and parse character [string] vectors 11 | using base exclusively base R functions. 12 | License: MIT + file LICENSE 13 | Encoding: UTF-8 14 | LazyData: true 15 | RoxygenNote: 6.1.0.9000 16 | Imports: 17 | magrittr 18 | Suggests: 19 | testthat 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2018 2 | COPYRIGHT HOLDER: Michael Wayne Kearney 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2018 Michael Wayne Kearney 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all clean 2 | all: 3 | @echo " 4 | --------------------- 5 | ## Documenting... 6 | --------------------- 7 | " 8 | Rscript -e "devtools::document(roclets=c('rd', 'collate', 'namespace'))" 9 | @echo " 10 | --------------------- 11 | ## Installing... 12 | --------------------- 13 | " 14 | R CMD INSTALL --no-multiarch --with-keep.source . 15 | clean: 16 | @echo " 17 | --------------------- 18 | ## Cleaning docs... 19 | --------------------- 20 | " 21 | rm man/*.Rd 22 | 23 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(chr_count,default) 4 | S3method(chr_count,list) 5 | S3method(chr_detect,default) 6 | S3method(chr_detect,list) 7 | S3method(chr_extract,default) 8 | S3method(chr_extract,list) 9 | S3method(chr_extract_first,default) 10 | S3method(chr_extract_first,list) 11 | export("%>%") 12 | export(chr_count) 13 | export(chr_detect) 14 | export(chr_extract) 15 | export(chr_extract_first) 16 | export(chr_extract_hashtags) 17 | export(chr_extract_links) 18 | export(chr_extract_mentions) 19 | export(chr_extract_phone) 20 | export(chr_extract_words) 21 | export(chr_ngram_char) 22 | export(chr_remove_hashtags) 23 | export(chr_remove_linebreaks) 24 | export(chr_remove_links) 25 | export(chr_remove_mentions) 26 | export(chr_remove_tabs) 27 | export(chr_remove_ws) 28 | export(chr_replace) 29 | export(chr_replace_nonascii) 30 | export(title_case) 31 | importFrom(magrittr,"%>%") 32 | -------------------------------------------------------------------------------- /R/ascii.R: -------------------------------------------------------------------------------- 1 | #' Replace non-ascii with similar ascii characters 2 | #' 3 | #' Replace weird UTF values with equivalent(ish) ascii values. 4 | #' 5 | #' @param x Character vector with non-ascii characters 6 | #' @return ASCII-friendly character vector. 7 | #' @export 8 | chr_replace_nonascii <- function(x) { 9 | ## spaces 10 | x <- gsub("\u00A0|\u200B|\u2060|\u3000|\uFEFF", "\u0020", x) 11 | 12 | ## exclamation mark 13 | x <- gsub("\u00A1|\u01C3|\u202C|\u203D|\u2762", "\u0021", x) 14 | 15 | ## quotation mark 16 | x <- gsub( 17 | "\u201C|\u201D|\u05F4|\u02BA|\u030B|\u030E|\u05F4|\u2033|\u3003", 18 | "\u0022", x) 19 | 20 | ## number sign 21 | x <- gsub("\u2114|\u2317|\u266F", "\u0023", x) 22 | 23 | ## dollar sign 24 | x <- gsub("\u00A4|\u20B1|\u1F4B2", "\u0024", x) 25 | 26 | ## percent signs 27 | x <- gsub("\u066A|\u2030|\u2031|\u2052", "\u0025", x) 28 | 29 | ## ampersands 30 | x <- gsub("\u204A|\u214B|\u1F674|\u0026amp;", "\u0026", x) 31 | 32 | ## apostrophe 33 | x <- gsub( 34 | "\u2018|\u2019|\u05F3|\u02B9|\u02Bc|\u02C8|\u0301|\u05F3|\u2032|\uA78C", 35 | "\u0027", x) 36 | 37 | ## asterisk 38 | x <- gsub("\u066D|\u204E|\u2217|\u26B9|\u2731", "\u002a", x) 39 | 40 | ## plus sign 41 | x <- gsub("\u2795", "\u002B", x) 42 | 43 | ## comma 44 | x <- gsub("\u2795", "\u002B", x) 45 | 46 | ## hyphen 47 | x <- gsub("\u2010|\u2011|\u2012|\u2013|\u2043|\u2212|\u10191", "\u002D", x) 48 | 49 | ## period 50 | x <- gsub("\u06D4|\u2E3C|\u3002", "\u002E", x) 51 | 52 | ## eplipses 53 | x <- gsub("\u2026", "\u002E\u002E\u002E", x) 54 | 55 | ## forward slash 56 | x <- gsub("\u0338|\u2044|\u2214", "\u002F", x) 57 | 58 | ## colon 59 | x <- gsub("\u0589|\u05C3|\u2236|\uA789", "\u003A", x) 60 | 61 | ## semicolon 62 | x <- gsub("\u037E|\u061B|\u204F", "\u003B", x) 63 | 64 | ## less than 65 | x <- gsub("\u2039|\u2329|\u27E8|\u3008", "\u003C", x) 66 | 67 | ## equal to 68 | x <- gsub("\u2261|\uA78A|\u10190", "\u003D", x) 69 | 70 | ## greater than 71 | x <- gsub("\u203A|\u232A|\u27E9|\u3009", "\u003E", x) 72 | 73 | ## question mark 74 | x <- gsub("\u00BF|\u037E|\u061F|\u203D|\u2048|\u2049", "\u003F", x) 75 | 76 | ## vertical line 77 | x <- gsub("\u01C0|\u05C0|\u2223|\u2758", "\u007C", x) 78 | 79 | ## tilde 80 | x <- gsub("\u02DC|\u0303|\u2053|\u223C|\uFF5E", "\u007E", x) 81 | 82 | ## convert rest to ascii 83 | iconv(x, to = "ascii", sub = "byte") 84 | } 85 | 86 | 87 | std_apos <- function(x) { 88 | gsub(paste0( 89 | "[[:alpha:]]+(\u2018|\u2019|\u05F3|\u02B9|\u02Bc|\u02C8|\u0301|", 90 | "\u05F3|\u2032|\uA78C)[[:alpha:]]+"), 91 | "\u0027", x) 92 | } 93 | -------------------------------------------------------------------------------- /R/chr-package.R: -------------------------------------------------------------------------------- 1 | #' @keywords internal 2 | "_PACKAGE" 3 | -------------------------------------------------------------------------------- /R/count.R: -------------------------------------------------------------------------------- 1 | 2 | #' Count matches from strings 3 | #' 4 | #' Count all pattern matches in character vector. 5 | #' 6 | #' @param x Character vector 7 | #' @param pat Pattern (regex) to extract from text. 8 | #' @param ignore.case Logical indicating whether to ignore capitalization. 9 | #' Defaults to false. 10 | #' @param invert Logical indicating whether to extract matching portion 11 | #' (default) or, if this value is true, non-matching portions of text. 12 | #' @param ... Other named arguments passed to \code{\link{gregexpr}}. 13 | #' @return Vector of matches extracted from input text. 14 | #' @export 15 | chr_count <- function(x, pat, 16 | ignore.case = FALSE, 17 | invert = FALSE, 18 | ...) { 19 | UseMethod("chr_count") 20 | } 21 | 22 | #' @export 23 | chr_count.default <- function(x, pat, 24 | ignore.case = FALSE, 25 | invert = FALSE, 26 | ...) { 27 | stopifnot(is.atomic(x)) 28 | x <- gregexpr(pat, x, ignore.case = ignore.case, ...) 29 | vapply(x, function(y) sum(y > 0, na.rm = TRUE), integer(1)) 30 | } 31 | 32 | #' @export 33 | chr_count.list <- function(x, pat, 34 | ignore.case = FALSE, 35 | invert = FALSE, 36 | ...) { 37 | x <- lapply( 38 | x, chr_count, pat = pat, ignore.case = ignore.case, 39 | invert = invert, ...) 40 | lapply(x, unlist, recursive = FALSE) 41 | } 42 | 43 | -------------------------------------------------------------------------------- /R/detect.R: -------------------------------------------------------------------------------- 1 | #' Detect matches in strings 2 | #' 3 | #' Detect matching pattern in character vector. 4 | #' 5 | #' @param x Character vector 6 | #' @param pat Pattern (regex) to detect from text. 7 | #' @param ignore.case Logical indicating whether to ignore capitalization. 8 | #' Defaults to false. 9 | #' @param ... Other named arguments passed to \code{\link{grepl}} or \code{\link{grep}} 10 | #' See details for more information. 11 | #' @return Logical vector indicating whether each element matched the supplied pattern. 12 | #' @details This is a wrapper around the base R functions \code{\link{grepl}} and 13 | #' \code{\link{grep}}. By default, logical values are returned (a la grepl). To return 14 | #' values, include \code{value = TRUE}. To return positions, include \code{which = TRUE}, 15 | #' \code{pos = TRUE}, or \code{position = TRUE}. 16 | #' @examples 17 | #' 18 | #' ## return logical vector 19 | #' chr_detect(letters, "a|b|c|x|y|z") 20 | #' 21 | #' ## return inverted logical values 22 | #' chr_detect(letters, "a|b|c|x|y|z", invert = TRUE) 23 | #' 24 | #' ## return matching positions 25 | #' chr_detect(letters, "a|b|c|x|y|z", which = TRUE) 26 | #' 27 | #' ## return inverted matching positions 28 | #' chr_detect(letters, "a|b|c|x|y|z", which = TRUE, invert = TRUE) 29 | #' 30 | #' ## return matching values 31 | #' chr_detect(letters, "a|b|c|x|y|z", value = TRUE) 32 | #' 33 | #' ## return inverted matching values 34 | #' chr_detect(letters, "a|b|c|x|y|z", value = TRUE, invert = TRUE) 35 | #' @export 36 | chr_detect <- function(x, pat, ignore.case = FALSE, ...) { 37 | UseMethod("chr_detect") 38 | } 39 | 40 | 41 | #' @export 42 | chr_detect.default <- function(x, pat, ignore.case = FALSE, ...) { 43 | args <- list(pat, x, ignore.case = ignore.case, ...) 44 | ## if look behind/ahead and perl not specified, set perl to TRUE 45 | if (grepl("\\(\\?[^\\)]+\\)", x) && !"perl" %in% names(x)) { 46 | dots$perl <- TRUE 47 | } 48 | if (any(c("which", "pos", "position", "value") %in% names(args))) { 49 | args <- args[!names(args) %in% c("which", "pos", "position")] 50 | do.call("grep", args) 51 | } else { 52 | if ("invert" %in% names(args) && isTRUE(args$invert)) { 53 | tf <- `!` 54 | } else { 55 | tf <- identity 56 | } 57 | args <- args[!names(args) %in% c("invert")] 58 | tf(do.call("grepl", args)) 59 | } 60 | } 61 | 62 | 63 | #' @export 64 | chr_detect.list <- function(x, pat, ignore.case = FALSE, ...) { 65 | x <- lapply(x, chr_detect, pat = pat, ignore.case = ignore.case, ...) 66 | lapply(x, unlist, recursive = FALSE) 67 | } 68 | -------------------------------------------------------------------------------- /R/extract.R: -------------------------------------------------------------------------------- 1 | 2 | #' Extract matches from strings 3 | #' 4 | #' Detect and return all matching patterns from character vector. 5 | #' 6 | #' @param x Character vector 7 | #' @param pat Pattern (regex) to extract from text. 8 | #' @param ignore.case Logical indicating whether to ignore capitalization. 9 | #' Defaults to false. 10 | #' @param collapse Text inserted between extracted matches. If non-null (the 11 | #' default) a vector of matches is returned for each inputted string. 12 | #' @param invert Logical indicating whether to extract matching portion 13 | #' (default) or, if this value is true, non-matching portions of text. 14 | #' @param na Logical indicating whether to return NA values for input elements 15 | #' without matches. Defaults to true. 16 | #' @param ... Other named arguments passed to \code{\link{gregexpr}}. 17 | #' @return Vector of matches extracted from input text. 18 | #' @export 19 | chr_extract <- function(x, pat, 20 | ignore.case = FALSE, 21 | collapse = NULL, 22 | invert = FALSE, 23 | na = TRUE, 24 | ...) { 25 | UseMethod("chr_extract") 26 | } 27 | 28 | #' @export 29 | chr_extract.default <- function(x, pat, 30 | ignore.case = FALSE, 31 | collapse = NULL, 32 | invert = FALSE, 33 | na = TRUE, 34 | ...) { 35 | stopifnot(is.atomic(x)) 36 | m <- gregexpr(pat, x, ignore.case = ignore.case, ...) 37 | x <- regmatches(x, m, invert = invert) 38 | if (na) { 39 | x[lengths(x) == 0] <- NA_character_ 40 | } 41 | if (isTRUE(collapse)) collapse <- " " 42 | if (isFALSE(collapse)) collapse <- NULL 43 | if (!is.null(collapse)) { 44 | y <- lengths(x) > 1L 45 | x[y] <- vapply( 46 | x[y], paste, collapse = collapse, 47 | character(1), USE.NAMES = FALSE) 48 | x <- as.character(x) 49 | } 50 | x 51 | } 52 | 53 | #' @export 54 | chr_extract.list <- function(x, pat, 55 | ignore.case = FALSE, 56 | collapse = NULL, 57 | invert = FALSE, 58 | na = TRUE, 59 | ...) { 60 | x <- lapply( 61 | x, chr_extract, pat = pat, ignore.case = ignore.case, 62 | collapse = collapse, invert = invert, na = na, ...) 63 | lapply(x, unlist, recursive = FALSE) 64 | } 65 | 66 | #' Extract first match from strings 67 | #' 68 | #' Detect and return first matching pattern from character vector. 69 | #' 70 | #' @return Character vector of matches extracted from input text. 71 | #' @rdname chr_extract 72 | #' @export 73 | chr_extract_first <- function(x, pat, 74 | ignore.case = FALSE, 75 | invert = FALSE, 76 | na = TRUE, 77 | ...) { 78 | UseMethod("chr_extract_first") 79 | } 80 | 81 | 82 | #' @export 83 | chr_extract_first.default <- function(x, pat, 84 | ignore.case = FALSE, 85 | invert = FALSE, 86 | na = TRUE, 87 | ...) { 88 | stopifnot(is.atomic(x)) 89 | m <- regexpr(pat, x, ignore.case = ignore.case, ...) 90 | x <- regmatches(x, m, invert = invert) 91 | if (na) { 92 | x[lengths(x) == 0] <- NA_character_ 93 | } 94 | as.character(x) 95 | } 96 | 97 | #' @export 98 | chr_extract_first.list <- function(x, pat, 99 | ignore.case = FALSE, 100 | invert = FALSE, 101 | na = TRUE, 102 | ...) { 103 | x <- lapply( 104 | x, chr_extract_first, pat = pat, ignore.case = ignore.case, 105 | invert = invert, na = na, ...) 106 | lapply(x, unlist, recursive = FALSE) 107 | } 108 | 109 | #' Extracts all hyper-links from character vector. 110 | #' 111 | #' @rdname chr_extract 112 | #' @export 113 | chr_extract_links <- function(x, collapse = NULL) { 114 | chr_extract(x, "https?\\S+", collapse = collapse) 115 | } 116 | 117 | 118 | 119 | #' Extracts all words from character vector. 120 | #' 121 | #' @rdname chr_extract 122 | #' @export 123 | chr_extract_words <- function(x, collapse = NULL) { 124 | ## standardize apostrophes 125 | x <- std_apos(x) 126 | chr_extract(x, "\\b[[:alnum:]]+\\b|\\b[[:alnum:]]+['-\\.]+[[:alnum:]]+\\b", 127 | collapse = collapse) 128 | } 129 | 130 | 131 | 132 | #' Extracts all [at] mentions from character vector. 133 | #' 134 | #' @rdname chr_extract 135 | #' @export 136 | chr_extract_mentions <- function(x, collapse = NULL) { 137 | chr_extract(x, "(?<=@)\\w+", collapse = collapse, perl = TRUE) 138 | } 139 | 140 | #' Extracts all hashtags from character vector. 141 | #' 142 | #' @rdname chr_extract 143 | #' @export 144 | chr_extract_hashtags <- function(x, collapse = NULL) { 145 | chr_extract(x, "(?<=#)\\w+", collapse = collapse, perl = TRUE) 146 | } 147 | 148 | #' Extracts all phone numbers from character vector. 149 | #' 150 | #' @rdname chr_extract 151 | #' @export 152 | chr_extract_phone <- function(x, collapse = NULL) { 153 | chr_extract(x, "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})", collapse = collapse) 154 | } 155 | 156 | -------------------------------------------------------------------------------- /R/ngram.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' Character n-grams 4 | #' 5 | #' Returns n-grams at the character level 6 | #' 7 | #' @param x Character vector 8 | #' @param n Number of characters to return per ngram 9 | #' @param lower Logical indicating whether to lower case all text, defaults to 10 | #' false. 11 | #' @param space Logical indicating whether to strip space, defaults to false. 12 | #' @param punct Logical indicating whether to strip punctation, defaults to 13 | #' false. 14 | #' 15 | #' @return List of length equal to input length consisting of ngram vectors. 16 | #' @export 17 | #' @author ChrisMuir 18 | #' @details Thanks to ChrisMuir \(https://github.com/mkearney/chr/issues/1) 19 | chr_ngram_char <- function(x, n = 3, lower = FALSE, space = FALSE, 20 | punct = FALSE) { 21 | # Input validation 22 | stopifnot(is.character(x)) 23 | if (n != as.integer(n) || n < 1) { 24 | stop("arg 'n' must be a whole number greater than zero") 25 | } 26 | n <- as.integer(n) 27 | stopifnot(is.logical(lower)) 28 | stopifnot(is.logical(punct)) 29 | stopifnot(is.logical(space)) 30 | 31 | # If arg "lower" is TRUE, make all chars in x lowercase. 32 | if (isTRUE(lower)) x <- tolower(x) 33 | 34 | # If arg "punct" is TRUE, remove all punctuation from x. 35 | if (isTRUE(punct)) x <- gsub("[[:punct:]]", "", x) 36 | 37 | # If arg "space" is TRUE, remove all white space from x. 38 | if (isTRUE(space)) x <- gsub("\\s+", "", x) 39 | 40 | # Split each element of x into individual chars. 41 | x <- strsplit(x, "", fixed = TRUE) 42 | 43 | # If n is 1L, return x, as strsplit handles tokenization into single 44 | # chars. 45 | if (identical(n, 1L)) return(x) 46 | 47 | # Generate ngram tokens. 48 | n <- n - 1 49 | lapply(x, function(strings) { 50 | strings_len <- length(strings) - n 51 | if (is.na(strings) || strings_len < 0) return(character()) 52 | vapply(seq_len(strings_len), function(char) { 53 | paste(strings[char:(char + n)], collapse = "") 54 | }, character(1)) 55 | }) 56 | } 57 | -------------------------------------------------------------------------------- /R/plus_paste.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' Add (paste) together character vectors 4 | #' 5 | #' Paste or paste0 character vectors using the plus operator. 6 | #' 7 | #' @param e1 lhs 8 | #' @param e2 rhs 9 | #' @return Computed value 10 | #' @examples 11 | #' ## normal plus operator 12 | #' 20 + 1 13 | #' Sys.Date() + 5 14 | #' matrix(1:4, 2, 2) + matrix(1:4, 2, 2) 15 | #' array(1:8, c(2, 2, 2)) + array(1:8, c(2, 2, 2)) 16 | #' 17 | #' ## normal errors 18 | #' Sys.Date() + Sys.Date() 19 | #' 20 | #' ## paste together characters 21 | #' "this" + "that" 22 | #' 23 | #' ## or characters and numbers 24 | #' "this" + 2000 25 | #' 2000 + "that" 26 | #' 27 | #' ## ignores missing 28 | #' c(NA, "this") + c("that", NA) + c("other", "other") 29 | #' 30 | #' ## add space to behave like paste 31 | #' "this" + " that" 32 | #' "this " + "that" 33 | #' 34 | #' ## string together any number of character vectors 35 | #' "this " + "that " + "and the other" 36 | #' "ABC_" + letters + "_XYZ" 37 | #' letters + letters 38 | #' "T" + "his " + "that " + "and " + "the other." 39 | #' 40 | #' 41 | #' @method + character 42 | #' @export 43 | "+.character" <- function(e1, e2) { 44 | unlist(Map("plus_character", e1, e2, USE.NAMES = FALSE)) 45 | } 46 | 47 | `plus_character` <- function(e1, e2) { 48 | if (is.na(e1)) e1 <- "" 49 | if (is.na(e2)) e2 <- "" 50 | paste0(e1, e2) 51 | } 52 | 53 | #' @export 54 | `+` <- function(e1, e2) { 55 | if (is.character(e1) || is.character(e2)) { 56 | return(try(`+.character`(e1, e2))) 57 | } 58 | try(.Primitive("+")(e1, e2)) 59 | } 60 | 61 | #' @export 62 | "%+%" <- `+.character` 63 | -------------------------------------------------------------------------------- /R/remove.R: -------------------------------------------------------------------------------- 1 | #' Remove URL links from text 2 | #' 3 | #' Removes all hyper-links from character vector. 4 | #' 5 | #' @param x Character vector. 6 | #' @return Vector without URLs. 7 | #' @export 8 | chr_remove_links <- function(x) { 9 | gsub("https?:[[:graph:]]+", "", x) 10 | } 11 | 12 | #' Remove line breaks from text 13 | #' 14 | #' Removes all line breaks from character vector. 15 | #' 16 | #' @param x Character vector. 17 | #' @return Vector without line breaks. 18 | #' @export 19 | chr_remove_linebreaks <- function(x) { 20 | gsub("\\n+", " ", x) 21 | } 22 | 23 | #' Remove tabs from text 24 | #' 25 | #' Removes all tabs from character vector. 26 | #' 27 | #' @param x Character vector. 28 | #' @return Vector without tabs. 29 | #' @export 30 | chr_remove_tabs <- function(x) { 31 | gsub("\\t+", " ", x) 32 | } 33 | 34 | #' Remove [at] mentions from text 35 | #' 36 | #' Removes all [at] mentions from character vector. 37 | #' 38 | #' @param x Character vector. 39 | #' @return Vector without screen names. 40 | #' @export 41 | chr_remove_mentions <- function(x) { 42 | gsub("@[[:alnum:]]+", " ", x) 43 | } 44 | 45 | #' Remove hashtags from text 46 | #' 47 | #' Removes all hashtags from character vector. 48 | #' 49 | #' @param x Character vector. 50 | #' @return Vector without hashtags. 51 | #' @export 52 | chr_remove_hashtags <- function(x) { 53 | gsub("#[[:alpha:]]{1}[[:alnum:]]{0,}", "", x) 54 | } 55 | 56 | 57 | chr_remove_stopwords <- function(x, stopwords) { 58 | stopwords <- chr_replace_nonascii(stopwords) 59 | stopwords <- c(stopwords, 60 | title_case(stopwords), 61 | toupper(stopwords), 62 | tolower(stopwords)) 63 | stopwords <- unique(stopwords) 64 | stopwords <- paste0("\\b", stopwords, "\\b") 65 | stopwords <- paste(stopwords, collapse = "|") 66 | x <- gsub(stopwords, " ", x) 67 | x <- gsub("\\s+[[:punct:]]+\\s+", " ", x) 68 | x <- gsub("\\s{2,}", " ", x) 69 | x <- gsub("^\\s+|\\s+$", "", x) 70 | x 71 | } 72 | 73 | #' Remove extra spaces from text 74 | #' 75 | #' Removes double+ spaces and trims white space from string ends. 76 | #' 77 | #' @param x Character vector. 78 | #' @return Vector without extra spaces. 79 | #' @export 80 | chr_remove_ws <- function(x) { 81 | x <- gsub("\\s{2,}", " ", x) 82 | gsub("^\\s+|\\s+$", "", x) 83 | } 84 | -------------------------------------------------------------------------------- /R/replace.R: -------------------------------------------------------------------------------- 1 | #' Replace text pattern with string 2 | #' 3 | #' Replaces all matching patterns with user-provided string. 4 | #' 5 | #' @param x Character vector. 6 | #' @param m Matching text or regular expression usd to locate text to be 7 | #' replaced. 8 | #' @param r Replacement text, the length of which must be either one (a single 9 | #' string used as the replacement text) or equal to the length of the supplied 10 | #' character vector 11 | #' @param ignore.case Logical indicating whether to ignore capitalization, 12 | #' defaults to FALSE. 13 | #' @param ... Other args passed on to \code{\link{gsub}}. 14 | #' @return Vector without URLs. 15 | #' @export 16 | chr_replace <- function(x, m, r, ignore.case = FALSE, ...) { 17 | chr_replace_(x, m, r, ignore.case = ignore.case, ...) 18 | } 19 | 20 | chr_replace_ <- function(x, m, r, ignore.case, ...) { 21 | stopifnot(is.vector(x)) 22 | if (is.list(x)) { 23 | chr_replace_list(x, m, r, ignore.case = ignore.case, ...) 24 | } else { 25 | chr_replace_default(x, m, r, ignore.case = ignore.case, ...) 26 | } 27 | } 28 | 29 | chr_replace_list <- function(x, m, r, ignore.case, ...) { 30 | stopifnot(is.character(m), is.character(r)) 31 | if (length(m) > 1L && length(r) > 1L) { 32 | if (length(m) > 1L && length(m) != length(r)) { 33 | stop("length of matching patterns must be 1 or equal to length of replacement patterns") 34 | } else if (length(r) > 1L && length(r) != length(m)) { 35 | stop("length of replacement text must be 1 or equal to length of matching patterns") 36 | } 37 | Map(gsub, m, r, x, MoreArgs = list(ignore.case = ignore.case, ...), use.names = FALSE) 38 | } else { 39 | lapply(x, function(i) gsub(m, r, i, ignore.case = ignore.case, ...)) 40 | } 41 | } 42 | 43 | chr_replace_default <- function(x, m, r, ignore.case, ...) { 44 | stopifnot(is.character(m), is.character(r)) 45 | if (length(m) > 1L && length(r) > 1L) { 46 | if (length(m) > 1L && length(m) != length(r)) { 47 | stop("length of matching patterns must be 1 or equal to length of replacement patterns") 48 | } else if (length(r) > 1L && length(r) != length(m)) { 49 | stop("length of replacement text must be 1 or equal to length of matching patterns") 50 | } 51 | unlist(Map(gsub, m, r, x, MoreArgs = list(ignore.case = ignore.case, ...), use.names = FALSE)) 52 | } else { 53 | gsub(m, r, x, ignore.case = ignore.case, ...) 54 | } 55 | } 56 | 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /R/title.R: -------------------------------------------------------------------------------- 1 | 2 | #' Convert string to title case 3 | #' 4 | #' Capitalize character vector using title case 5 | #' 6 | #' @param x Character vector. 7 | #' @return Character vector in title case. 8 | #' @export 9 | title_case <- function(x) { 10 | m <- regexpr("\\b[a-z]{1}", x) 11 | regmatches(x, m) <- toupper(regmatches(x, m)) 12 | x 13 | } 14 | -------------------------------------------------------------------------------- /R/utils-pipe.R: -------------------------------------------------------------------------------- 1 | #' Pipe operator 2 | #' 3 | #' See \code{magrittr::\link[magrittr]{\%>\%}} for details. 4 | #' 5 | #' @name %>% 6 | #' @rdname pipe 7 | #' @keywords internal 8 | #' @export 9 | #' @importFrom magrittr %>% 10 | #' @usage lhs \%>\% rhs 11 | NULL 12 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | isFALSE <- function(x) identical(x, FALSE) 2 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | ```{r setup, include=FALSE} 6 | knitr::opts_chunk$set(echo = TRUE, collapse = TRUE, comment = "#>") 7 | library(chr) 8 | ``` 9 | 10 | # chr 11 | 12 | [![lifecycle](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental) 13 | 14 | R package for simple string manipulation 15 | 16 | ## Description 17 | 18 | Clean, wrangle, and parse character [string] vectors using base exclusively base 19 | R functions. 20 | 21 | ## Install 22 | 23 | ```{r install, eval=FALSE} 24 | ## install devtools is not alreasy installed 25 | if (!requireNamespace("devtools", quietly = TRUE)) { 26 | install.packages("devtools") 27 | } 28 | 29 | ## install chr from github 30 | devtools::install_github("mkearney/chr") 31 | 32 | ## load chr 33 | library(chr) 34 | ``` 35 | 36 | ## Usage 37 | 38 | ### Detect 39 | 40 | **Detect** text patterns (an easy-to-use wrapper for `base::grep()` and `base::grepl()`). 41 | 42 | ```{r grep} 43 | ## return logical vector 44 | chr_detect(letters, "a|b|c|x|y|z") 45 | 46 | ## return inverted logical values 47 | chr_detect(letters, "a|b|c|x|y|z", invert = TRUE) 48 | 49 | ## return matching positions 50 | chr_detect(letters, "a|b|c|x|y|z", which = TRUE) 51 | 52 | ## return inverted matching positions 53 | chr_detect(letters, "a|b|c|x|y|z", which = TRUE, invert = TRUE) 54 | 55 | ## return matching values 56 | chr_detect(letters, "a|b|c|x|y|z", value = TRUE) 57 | 58 | ## return inverted matching values 59 | chr_detect(letters, "a|b|c|x|y|z", value = TRUE, invert = TRUE) 60 | ``` 61 | 62 | ### Extract 63 | 64 | **Extract** text patterns. 65 | 66 | ```{r extract} 67 | ## some text strings 68 | x <- c("this one is @there 69 | has #MultipleLines https://github.com and 70 | http://twitter.com @twitter", 71 | "this @one #istotally their and 72 | some non-ascii symbols: \u00BF \u037E", 73 | "this one is they're https://github.com", 74 | "this one #HasHashtags #afew #ofthem", 75 | "and more @kearneymw at https://mikew.com") 76 | 77 | ## extract all URLS 78 | chr_extract_links(x) 79 | 80 | ## extract all hashtags 81 | chr_extract_hashtags(x) 82 | 83 | ## extract mentions 84 | chr_extract_mentions(x) 85 | ``` 86 | 87 | ### Count 88 | 89 | **Count** number of matches. 90 | 91 | ```{r count} 92 | ## extract all there/their/they're 93 | chr_count(x, "there|their|they\\S?re", ignore.case = TRUE) 94 | ``` 95 | 96 | ### Remove 97 | 98 | **Remove** text patterns. 99 | 100 | ```{r remove} 101 | ## remove URLS 102 | chr_remove_links(x) 103 | 104 | ## string together functions with magrittr pipe 105 | library(magrittr) 106 | 107 | ## remove mentions and extra [white] spaces 108 | chr_remove_mentions(x) %>% 109 | chr_remove_ws() 110 | 111 | ## remove hashtags 112 | chr_remove_hashtags(x) 113 | 114 | ## remove hashtags, line breaks, and extra spaces 115 | x %>% 116 | chr_remove_hashtags() %>% 117 | chr_remove_linebreaks() %>% 118 | chr_remove_ws() 119 | 120 | ## remove links and extract words 121 | x %>% 122 | chr_remove_links() %>% 123 | chr_remove_mentions() %>% 124 | chr_extract_words() 125 | ``` 126 | 127 | ### Replace 128 | 129 | **Replace** text with string. 130 | 131 | ```{r replace} 132 | ## replace their with they're 133 | chr_replace(x, "their", "they're", ignore.case = TRUE) 134 | ``` 135 | 136 | ASCII functions currently *in progress*. For example, replace non-ASCII symbols 137 | with similar ASCII characters (*work in progress*). 138 | 139 | ```{r ascii} 140 | ## ascii version 141 | chr_replace_nonascii(x) 142 | ``` 143 | 144 | 145 | ### n-grams 146 | 147 | Create **ngram**s at the character-level. 148 | 149 | ```{r ngrams} 150 | ## character vector 151 | x <- c("Acme Pizza, Inc.", "Tom's Sports Equipment, LLC") 152 | 153 | ## 2 char level ngram 154 | chr_ngram_char(x, n = 2L) 155 | 156 | ## 3 char level ngram in lower case and stripped of punctation and white space 157 | chr_ngram_char(x, n = 3L, lower = TRUE, punct = TRUE, space = TRUE) 158 | ``` 159 | 160 | ### Contributions 161 | 162 | Please note that this project is released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By participating in this project you agree to abide by its terms. 163 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # chr 3 | 4 | [![lifecycle](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental) 5 | 6 | R package for simple string manipulation 7 | 8 | ## Description 9 | 10 | Clean, wrangle, and parse character \[string\] vectors using base 11 | exclusively base R functions. 12 | 13 | ## Install 14 | 15 | ``` r 16 | ## install devtools is not alreasy installed 17 | if (!requireNamespace("devtools", quietly = TRUE)) { 18 | install.packages("devtools") 19 | } 20 | 21 | ## install chr from github 22 | devtools::install_github("mkearney/chr") 23 | 24 | ## load chr 25 | library(chr) 26 | ``` 27 | 28 | ## Usage 29 | 30 | ### Detect 31 | 32 | **Detect** text patterns (an easy-to-use wrapper for `base::grep()` and 33 | `base::grepl()`). 34 | 35 | ``` r 36 | ## return logical vector 37 | chr_detect(letters, "a|b|c|x|y|z") 38 | #> [1] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 39 | #> [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 40 | #> [23] FALSE TRUE TRUE TRUE 41 | 42 | ## return inverted logical values 43 | chr_detect(letters, "a|b|c|x|y|z", invert = TRUE) 44 | #> [1] FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE 45 | #> [12] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE 46 | #> [23] TRUE FALSE FALSE FALSE 47 | 48 | ## return matching positions 49 | chr_detect(letters, "a|b|c|x|y|z", which = TRUE) 50 | #> [1] 1 2 3 24 25 26 51 | 52 | ## return inverted matching positions 53 | chr_detect(letters, "a|b|c|x|y|z", which = TRUE, invert = TRUE) 54 | #> [1] 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 55 | 56 | ## return matching values 57 | chr_detect(letters, "a|b|c|x|y|z", value = TRUE) 58 | #> [1] "a" "b" "c" "x" "y" "z" 59 | 60 | ## return inverted matching values 61 | chr_detect(letters, "a|b|c|x|y|z", value = TRUE, invert = TRUE) 62 | #> [1] "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t" 63 | #> [18] "u" "v" "w" 64 | ``` 65 | 66 | ### Extract 67 | 68 | **Extract** text patterns. 69 | 70 | ``` r 71 | ## some text strings 72 | x <- c("this one is @there 73 | has #MultipleLines https://github.com and 74 | http://twitter.com @twitter", 75 | "this @one #istotally their and 76 | some non-ascii symbols: \u00BF \u037E", 77 | "this one is they're https://github.com", 78 | "this one #HasHashtags #afew #ofthem", 79 | "and more @kearneymw at https://mikew.com") 80 | 81 | ## extract all URLS 82 | chr_extract_links(x) 83 | #> [[1]] 84 | #> [1] "https://github.com" "http://twitter.com" 85 | #> 86 | #> [[2]] 87 | #> [1] NA 88 | #> 89 | #> [[3]] 90 | #> [1] "https://github.com" 91 | #> 92 | #> [[4]] 93 | #> [1] NA 94 | #> 95 | #> [[5]] 96 | #> [1] "https://mikew.com" 97 | 98 | ## extract all hashtags 99 | chr_extract_hashtags(x) 100 | #> [[1]] 101 | #> [1] "MultipleLines" 102 | #> 103 | #> [[2]] 104 | #> [1] "istotally" 105 | #> 106 | #> [[3]] 107 | #> [1] NA 108 | #> 109 | #> [[4]] 110 | #> [1] "HasHashtags" "afew" "ofthem" 111 | #> 112 | #> [[5]] 113 | #> [1] NA 114 | 115 | ## extract mentions 116 | chr_extract_mentions(x) 117 | #> [[1]] 118 | #> [1] "there" "twitter" 119 | #> 120 | #> [[2]] 121 | #> [1] "one" 122 | #> 123 | #> [[3]] 124 | #> [1] NA 125 | #> 126 | #> [[4]] 127 | #> [1] NA 128 | #> 129 | #> [[5]] 130 | #> [1] "kearneymw" 131 | ``` 132 | 133 | ### Count 134 | 135 | **Count** number of matches. 136 | 137 | ``` r 138 | ## extract all there/their/they're 139 | chr_count(x, "there|their|they\\S?re", ignore.case = TRUE) 140 | #> [1] 1 1 1 0 0 141 | ``` 142 | 143 | ### Remove 144 | 145 | **Remove** text patterns. 146 | 147 | ``` r 148 | ## remove URLS 149 | chr_remove_links(x) 150 | #> [1] "this one is @there\n has #MultipleLines and \n @twitter" 151 | #> [2] "this @one #istotally their and \n some non-ascii symbols: ¿ ;" 152 | #> [3] "this one is they're " 153 | #> [4] "this one #HasHashtags #afew #ofthem" 154 | #> [5] "and more @kearneymw at " 155 | 156 | ## string together functions with magrittr pipe 157 | library(magrittr) 158 | 159 | ## remove mentions and extra [white] spaces 160 | chr_remove_mentions(x) %>% 161 | chr_remove_ws() 162 | #> [1] "this one is has #MultipleLines https://github.com and http://twitter.com" 163 | #> [2] "this #istotally their and some non-ascii symbols: ¿ ;" 164 | #> [3] "this one is they're https://github.com" 165 | #> [4] "this one #HasHashtags #afew #ofthem" 166 | #> [5] "and more at https://mikew.com" 167 | 168 | ## remove hashtags 169 | chr_remove_hashtags(x) 170 | #> [1] "this one is @there\n has https://github.com and \n http://twitter.com @twitter" 171 | #> [2] "this @one their and \n some non-ascii symbols: ¿ ;" 172 | #> [3] "this one is they're https://github.com" 173 | #> [4] "this one " 174 | #> [5] "and more @kearneymw at https://mikew.com" 175 | 176 | ## remove hashtags, line breaks, and extra spaces 177 | x %>% 178 | chr_remove_hashtags() %>% 179 | chr_remove_linebreaks() %>% 180 | chr_remove_ws() 181 | #> [1] "this one is @there has https://github.com and http://twitter.com @twitter" 182 | #> [2] "this @one their and some non-ascii symbols: ¿ ;" 183 | #> [3] "this one is they're https://github.com" 184 | #> [4] "this one" 185 | #> [5] "and more @kearneymw at https://mikew.com" 186 | 187 | ## remove links and extract words 188 | x %>% 189 | chr_remove_links() %>% 190 | chr_remove_mentions() %>% 191 | chr_extract_words() 192 | #> [[1]] 193 | #> [1] "this" "one" "is" "has" 194 | #> [5] "MultipleLines" "and" 195 | #> 196 | #> [[2]] 197 | #> [1] "this" "istotally" "their" "and" "some" "non-ascii" 198 | #> [7] "symbols" 199 | #> 200 | #> [[3]] 201 | #> [1] "this" "one" "is" "they're" 202 | #> 203 | #> [[4]] 204 | #> [1] "this" "one" "HasHashtags" "afew" "ofthem" 205 | #> 206 | #> [[5]] 207 | #> [1] "and" "more" "at" 208 | ``` 209 | 210 | ### Replace 211 | 212 | **Replace** text with string. 213 | 214 | ``` r 215 | ## replace their with they're 216 | chr_replace(x, "their", "they're", ignore.case = TRUE) 217 | #> [1] "this one is @there\n has #MultipleLines https://github.com and \n http://twitter.com @twitter" 218 | #> [2] "this @one #istotally they're and \n some non-ascii symbols: ¿ ;" 219 | #> [3] "this one is they're https://github.com" 220 | #> [4] "this one #HasHashtags #afew #ofthem" 221 | #> [5] "and more @kearneymw at https://mikew.com" 222 | ``` 223 | 224 | ASCII functions currently *in progress*. For example, replace non-ASCII 225 | symbols with similar ASCII characters (*work in progress*). 226 | 227 | ``` r 228 | ## ascii version 229 | chr_replace_nonascii(x) 230 | #> [1] "this one is @there\n has #MultipleLines https://github.com and \n http://twitter.com @twitter" 231 | #> [2] "this @one #istotally their and \n some non-ascii symbols: ? ;" 232 | #> [3] "this one is they're https://github.com" 233 | #> [4] "this one #HasHashtags #afew #ofthem" 234 | #> [5] "and more @kearneymw at https://mikew.com" 235 | ``` 236 | 237 | ### n-grams 238 | 239 | Create **ngram**s at the character-level. 240 | 241 | ``` r 242 | ## character vector 243 | x <- c("Acme Pizza, Inc.", "Tom's Sports Equipment, LLC") 244 | 245 | ## 2 char level ngram 246 | chr_ngram_char(x, n = 2L) 247 | #> [[1]] 248 | #> [1] "Ac" "cm" "me" "e " " P" "Pi" "iz" "zz" "za" "a," ", " " I" "In" "nc" 249 | #> [15] "c." 250 | #> 251 | #> [[2]] 252 | #> [1] "To" "om" "m'" "'s" "s " " S" "Sp" "po" "or" "rt" "ts" "s " " E" "Eq" 253 | #> [15] "qu" "ui" "ip" "pm" "me" "en" "nt" "t," ", " " L" "LL" "LC" 254 | 255 | ## 3 char level ngram in lower case and stripped of punctation and white space 256 | chr_ngram_char(x, n = 3L, lower = TRUE, punct = TRUE, space = TRUE) 257 | #> [[1]] 258 | #> [1] "acm" "cme" "mep" "epi" "piz" "izz" "zza" "zai" "ain" "inc" 259 | #> 260 | #> [[2]] 261 | #> [1] "tom" "oms" "mss" "ssp" "spo" "por" "ort" "rts" "tse" "seq" "equ" 262 | #> [12] "qui" "uip" "ipm" "pme" "men" "ent" "ntl" "tll" "llc" 263 | ``` 264 | 265 | ### Contributions 266 | 267 | Please note that this project is released with a [Contributor Code of 268 | Conduct](CODE_OF_CONDUCT.md). By participating in this project you agree 269 | to abide by its terms. 270 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://chr.mikewk.com/ 2 | 3 | template: 4 | ganalytics: "UA-64959413-1" 5 | 6 | authors: 7 | Michael Wayne Kearney: 8 | href: https://mikewk.com 9 | navbar: 10 | right: 11 | - icon: fa-github fa-lg 12 | href: https://github.com/mkearney/chr 13 | 14 | -------------------------------------------------------------------------------- /docs/CNAME: -------------------------------------------------------------------------------- 1 | chr.mikewk.com 2 | 3 | -------------------------------------------------------------------------------- /docs/CODE_OF_CONDUCT.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Contributor Code of Conduct • chr 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 49 | 50 | 51 | 52 | 53 | 54 |
55 |
56 | 95 | 96 | 97 |
98 | 99 |
100 |
101 | 104 | 105 |
106 | 107 |

As contributors and maintainers of this project, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities.

108 |

We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.

109 |

Examples of unacceptable behavior by participants include the use of sexual language or imagery, derogatory comments or personal attacks, trolling, public or private harassment, insults, or other unprofessional conduct.

110 |

Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed from the project team.

111 |

Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers.

112 |

This Code of Conduct is adapted from the Contributor Covenant (http://contributor-covenant.org), version 1.0.0, available at http://contributor-covenant.org/version/1/0/0/

113 |
114 | 115 |
116 | 117 |
118 | 119 | 120 | 130 |
131 | 132 | 133 | 134 | 135 | 136 | 137 | -------------------------------------------------------------------------------- /docs/LICENSE-text.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | License • chr 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 49 | 50 | 51 | 52 | 53 | 54 |
55 |
56 | 95 | 96 | 97 |
98 | 99 |
100 |
101 | 104 | 105 |
YEAR: 2018
106 | COPYRIGHT HOLDER: Michael Wayne Kearney
107 | 
108 | 109 |
110 | 111 |
112 | 113 | 114 | 124 |
125 | 126 | 127 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /docs/LICENSE.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | MIT License • chr 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 49 | 50 | 51 | 52 | 53 | 54 |
55 |
56 | 95 | 96 | 97 |
98 | 99 |
100 |
101 | 104 | 105 |
106 | 107 |

Copyright (c) 2018 Michael Wayne Kearney

108 |

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

109 |

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

110 |

THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

111 |
112 | 113 |
114 | 115 |
116 | 117 | 118 | 128 |
129 | 130 | 131 | 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /docs/authors.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Citation and Authors • chr 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 49 | 50 | 51 | 52 | 53 | 54 |
55 |
56 | 95 | 96 | 97 |
98 | 99 |
100 |
101 | 105 | 106 |

Kearney MW (2018). 107 | chr: Simple String Manipulation. 108 | R package version 0.1.03. 109 |

110 |
@Manual{chr-package,
111 |   title = {chr: Simple String Manipulation},
112 |   author = {Michael Wayne Kearney},
113 |   year = {2018},
114 |   note = {R package version 0.1.03},
115 | }
116 | 119 | 120 | 126 | 127 |
128 | 129 |
130 | 131 | 132 | 142 |
143 | 144 | 145 | 146 | 147 | 148 | 149 | -------------------------------------------------------------------------------- /docs/docsearch.css: -------------------------------------------------------------------------------- 1 | /* Docsearch -------------------------------------------------------------- */ 2 | /* 3 | Source: https://github.com/algolia/docsearch/ 4 | License: MIT 5 | */ 6 | 7 | .algolia-autocomplete { 8 | display: block; 9 | -webkit-box-flex: 1; 10 | -ms-flex: 1; 11 | flex: 1 12 | } 13 | 14 | .algolia-autocomplete .ds-dropdown-menu { 15 | width: 100%; 16 | min-width: none; 17 | max-width: none; 18 | padding: .75rem 0; 19 | background-color: #fff; 20 | background-clip: padding-box; 21 | border: 1px solid rgba(0, 0, 0, .1); 22 | box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175); 23 | } 24 | 25 | @media (min-width:768px) { 26 | .algolia-autocomplete .ds-dropdown-menu { 27 | width: 175% 28 | } 29 | } 30 | 31 | .algolia-autocomplete .ds-dropdown-menu::before { 32 | display: none 33 | } 34 | 35 | .algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] { 36 | padding: 0; 37 | background-color: rgb(255,255,255); 38 | border: 0; 39 | max-height: 80vh; 40 | } 41 | 42 | .algolia-autocomplete .ds-dropdown-menu .ds-suggestions { 43 | margin-top: 0 44 | } 45 | 46 | .algolia-autocomplete .algolia-docsearch-suggestion { 47 | padding: 0; 48 | overflow: visible 49 | } 50 | 51 | .algolia-autocomplete .algolia-docsearch-suggestion--category-header { 52 | padding: .125rem 1rem; 53 | margin-top: 0; 54 | font-size: 1.3em; 55 | font-weight: 500; 56 | color: #00008B; 57 | border-bottom: 0 58 | } 59 | 60 | .algolia-autocomplete .algolia-docsearch-suggestion--wrapper { 61 | float: none; 62 | padding-top: 0 63 | } 64 | 65 | .algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column { 66 | float: none; 67 | width: auto; 68 | padding: 0; 69 | text-align: left 70 | } 71 | 72 | .algolia-autocomplete .algolia-docsearch-suggestion--content { 73 | float: none; 74 | width: auto; 75 | padding: 0 76 | } 77 | 78 | .algolia-autocomplete .algolia-docsearch-suggestion--content::before { 79 | display: none 80 | } 81 | 82 | .algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header { 83 | padding-top: .75rem; 84 | margin-top: .75rem; 85 | border-top: 1px solid rgba(0, 0, 0, .1) 86 | } 87 | 88 | .algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column { 89 | display: block; 90 | padding: .1rem 1rem; 91 | margin-bottom: 0.1; 92 | font-size: 1.0em; 93 | font-weight: 400 94 | /* display: none */ 95 | } 96 | 97 | .algolia-autocomplete .algolia-docsearch-suggestion--title { 98 | display: block; 99 | padding: .25rem 1rem; 100 | margin-bottom: 0; 101 | font-size: 0.9em; 102 | font-weight: 400 103 | } 104 | 105 | .algolia-autocomplete .algolia-docsearch-suggestion--text { 106 | padding: 0 1rem .5rem; 107 | margin-top: -.25rem; 108 | font-size: 0.8em; 109 | font-weight: 400; 110 | line-height: 1.25 111 | } 112 | 113 | .algolia-autocomplete .algolia-docsearch-footer { 114 | width: 110px; 115 | height: 20px; 116 | z-index: 3; 117 | margin-top: 10.66667px; 118 | float: right; 119 | font-size: 0; 120 | line-height: 0; 121 | } 122 | 123 | .algolia-autocomplete .algolia-docsearch-footer--logo { 124 | background-image: url("data:image/svg+xml;utf8,"); 125 | background-repeat: no-repeat; 126 | background-position: 50%; 127 | background-size: 100%; 128 | overflow: hidden; 129 | text-indent: -9000px; 130 | width: 100%; 131 | height: 100%; 132 | display: block; 133 | transform: translate(-8px); 134 | } 135 | 136 | .algolia-autocomplete .algolia-docsearch-suggestion--highlight { 137 | color: #FF8C00; 138 | background: rgba(232, 189, 54, 0.1) 139 | } 140 | 141 | 142 | .algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight { 143 | box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5) 144 | } 145 | 146 | .algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content { 147 | background-color: rgba(192, 192, 192, .15) 148 | } 149 | -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /docs/extra.css: -------------------------------------------------------------------------------- 1 | @import url("https://fonts.googleapis.com/css?family=Lato:700"); 2 | @import url("https://cdn.rawgit.com/tonsky/FiraCode/1.205/distr/fira_code.css"); 3 | 4 | body { 5 | font-family: "Avenir Next", "Helvetica Neue", Helvetica, Arial, sans-serif; 6 | font-weight: 400; 7 | } 8 | 9 | h1, h2, h3, h4, .h1, .h2, .h3, .h4 { 10 | font-family: Lato, "Avenir Next", "Helvetica Neue", Helvetica, Arial, sans-serif; 11 | font-weight: 700; 12 | } 13 | 14 | pre, code { 15 | font-family: "Fira Code", Consolas, Inconsolata, monospace; 16 | } 17 | 18 | .navbar-default { 19 | background-color: #f3f3f3; 20 | border-color: #eee; 21 | } 22 | 23 | -------------------------------------------------------------------------------- /docs/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkearney/chr/02bb519e6656d9b97a43ba31c2ad38599d89e2be/docs/favicon.ico -------------------------------------------------------------------------------- /docs/jquery.sticky-kit.min.js: -------------------------------------------------------------------------------- 1 | /* 2 | Sticky-kit v1.1.2 | WTFPL | Leaf Corcoran 2015 | http://leafo.net 3 | */ 4 | (function(){var b,f;b=this.jQuery||window.jQuery;f=b(window);b.fn.stick_in_parent=function(d){var A,w,J,n,B,K,p,q,k,E,t;null==d&&(d={});t=d.sticky_class;B=d.inner_scrolling;E=d.recalc_every;k=d.parent;q=d.offset_top;p=d.spacer;w=d.bottoming;null==q&&(q=0);null==k&&(k=void 0);null==B&&(B=!0);null==t&&(t="is_stuck");A=b(document);null==w&&(w=!0);J=function(a,d,n,C,F,u,r,G){var v,H,m,D,I,c,g,x,y,z,h,l;if(!a.data("sticky_kit")){a.data("sticky_kit",!0);I=A.height();g=a.parent();null!=k&&(g=g.closest(k)); 5 | if(!g.length)throw"failed to find stick parent";v=m=!1;(h=null!=p?p&&a.closest(p):b("
"))&&h.css("position",a.css("position"));x=function(){var c,f,e;if(!G&&(I=A.height(),c=parseInt(g.css("border-top-width"),10),f=parseInt(g.css("padding-top"),10),d=parseInt(g.css("padding-bottom"),10),n=g.offset().top+c+f,C=g.height(),m&&(v=m=!1,null==p&&(a.insertAfter(h),h.detach()),a.css({position:"",top:"",width:"",bottom:""}).removeClass(t),e=!0),F=a.offset().top-(parseInt(a.css("margin-top"),10)||0)-q, 6 | u=a.outerHeight(!0),r=a.css("float"),h&&h.css({width:a.outerWidth(!0),height:u,display:a.css("display"),"vertical-align":a.css("vertical-align"),"float":r}),e))return l()};x();if(u!==C)return D=void 0,c=q,z=E,l=function(){var b,l,e,k;if(!G&&(e=!1,null!=z&&(--z,0>=z&&(z=E,x(),e=!0)),e||A.height()===I||x(),e=f.scrollTop(),null!=D&&(l=e-D),D=e,m?(w&&(k=e+u+c>C+n,v&&!k&&(v=!1,a.css({position:"fixed",bottom:"",top:c}).trigger("sticky_kit:unbottom"))),eb&&!v&&(c-=l,c=Math.max(b-u,c),c=Math.min(q,c),m&&a.css({top:c+"px"})))):e>F&&(m=!0,b={position:"fixed",top:c},b.width="border-box"===a.css("box-sizing")?a.outerWidth()+"px":a.width()+"px",a.css(b).addClass(t),null==p&&(a.after(h),"left"!==r&&"right"!==r||h.append(a)),a.trigger("sticky_kit:stick")),m&&w&&(null==k&&(k=e+u+c>C+n),!v&&k)))return v=!0,"static"===g.css("position")&&g.css({position:"relative"}), 8 | a.css({position:"absolute",bottom:d,top:"auto"}).trigger("sticky_kit:bottom")},y=function(){x();return l()},H=function(){G=!0;f.off("touchmove",l);f.off("scroll",l);f.off("resize",y);b(document.body).off("sticky_kit:recalc",y);a.off("sticky_kit:detach",H);a.removeData("sticky_kit");a.css({position:"",bottom:"",top:"",width:""});g.position("position","");if(m)return null==p&&("left"!==r&&"right"!==r||a.insertAfter(h),h.remove()),a.removeClass(t)},f.on("touchmove",l),f.on("scroll",l),f.on("resize", 9 | y),b(document.body).on("sticky_kit:recalc",y),a.on("sticky_kit:detach",H),setTimeout(l,0)}};n=0;for(K=this.length;n 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkearney/chr/02bb519e6656d9b97a43ba31c2ad38599d89e2be/docs/logo.png -------------------------------------------------------------------------------- /docs/pkgdown.css: -------------------------------------------------------------------------------- 1 | /* Sticky footer */ 2 | 3 | /** 4 | * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/ 5 | * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css 6 | * 7 | * .Site -> body > .container 8 | * .Site-content -> body > .container .row 9 | * .footer -> footer 10 | * 11 | * Key idea seems to be to ensure that .container and __all its parents__ 12 | * have height set to 100% 13 | * 14 | */ 15 | 16 | html, body { 17 | height: 100%; 18 | } 19 | 20 | body > .container { 21 | display: flex; 22 | height: 100%; 23 | flex-direction: column; 24 | 25 | padding-top: 60px; 26 | } 27 | 28 | body > .container .row { 29 | flex: 1 0 auto; 30 | } 31 | 32 | footer { 33 | margin-top: 45px; 34 | padding: 35px 0 36px; 35 | border-top: 1px solid #e5e5e5; 36 | color: #666; 37 | display: flex; 38 | flex-shrink: 0; 39 | } 40 | footer p { 41 | margin-bottom: 0; 42 | } 43 | footer div { 44 | flex: 1; 45 | } 46 | footer .pkgdown { 47 | text-align: right; 48 | } 49 | footer p { 50 | margin-bottom: 0; 51 | } 52 | 53 | img.icon { 54 | float: right; 55 | } 56 | 57 | img { 58 | max-width: 100%; 59 | } 60 | 61 | /* Typographic tweaking ---------------------------------*/ 62 | 63 | .contents h1.page-header { 64 | margin-top: calc(-60px + 1em); 65 | } 66 | 67 | /* Section anchors ---------------------------------*/ 68 | 69 | a.anchor { 70 | margin-left: -30px; 71 | display:inline-block; 72 | width: 30px; 73 | height: 30px; 74 | visibility: hidden; 75 | 76 | background-image: url(./link.svg); 77 | background-repeat: no-repeat; 78 | background-size: 20px 20px; 79 | background-position: center center; 80 | } 81 | 82 | .hasAnchor:hover a.anchor { 83 | visibility: visible; 84 | } 85 | 86 | @media (max-width: 767px) { 87 | .hasAnchor:hover a.anchor { 88 | visibility: hidden; 89 | } 90 | } 91 | 92 | 93 | /* Fixes for fixed navbar --------------------------*/ 94 | 95 | .contents h1, .contents h2, .contents h3, .contents h4 { 96 | padding-top: 60px; 97 | margin-top: -40px; 98 | } 99 | 100 | /* Static header placement on mobile devices */ 101 | @media (max-width: 767px) { 102 | .navbar-fixed-top { 103 | position: absolute; 104 | } 105 | .navbar { 106 | padding: 0; 107 | } 108 | } 109 | 110 | 111 | /* Sidebar --------------------------*/ 112 | 113 | #sidebar { 114 | margin-top: 30px; 115 | } 116 | #sidebar h2 { 117 | font-size: 1.5em; 118 | margin-top: 1em; 119 | } 120 | 121 | #sidebar h2:first-child { 122 | margin-top: 0; 123 | } 124 | 125 | #sidebar .list-unstyled li { 126 | margin-bottom: 0.5em; 127 | } 128 | 129 | .orcid { 130 | height: 16px; 131 | vertical-align: middle; 132 | } 133 | 134 | /* Reference index & topics ----------------------------------------------- */ 135 | 136 | .ref-index th {font-weight: normal;} 137 | 138 | .ref-index td {vertical-align: top;} 139 | .ref-index .alias {width: 40%;} 140 | .ref-index .title {width: 60%;} 141 | 142 | .ref-index .alias {width: 40%;} 143 | .ref-index .title {width: 60%;} 144 | 145 | .ref-arguments th {text-align: right; padding-right: 10px;} 146 | .ref-arguments th, .ref-arguments td {vertical-align: top;} 147 | .ref-arguments .name {width: 20%;} 148 | .ref-arguments .desc {width: 80%;} 149 | 150 | /* Nice scrolling for wide elements --------------------------------------- */ 151 | 152 | table { 153 | display: block; 154 | overflow: auto; 155 | } 156 | 157 | /* Syntax highlighting ---------------------------------------------------- */ 158 | 159 | pre { 160 | word-wrap: normal; 161 | word-break: normal; 162 | border: 1px solid #eee; 163 | } 164 | 165 | pre, code { 166 | background-color: #f8f8f8; 167 | color: #333; 168 | } 169 | 170 | pre code { 171 | overflow: auto; 172 | word-wrap: normal; 173 | white-space: pre; 174 | } 175 | 176 | pre .img { 177 | margin: 5px 0; 178 | } 179 | 180 | pre .img img { 181 | background-color: #fff; 182 | display: block; 183 | height: auto; 184 | } 185 | 186 | code a, pre a { 187 | color: #375f84; 188 | } 189 | 190 | a.sourceLine:hover { 191 | text-decoration: none; 192 | } 193 | 194 | .fl {color: #1514b5;} 195 | .fu {color: #000000;} /* function */ 196 | .ch,.st {color: #036a07;} /* string */ 197 | .kw {color: #264D66;} /* keyword */ 198 | .co {color: #888888;} /* comment */ 199 | 200 | .message { color: black; font-weight: bolder;} 201 | .error { color: orange; font-weight: bolder;} 202 | .warning { color: #6A0366; font-weight: bolder;} 203 | 204 | /* Clipboard --------------------------*/ 205 | 206 | .hasCopyButton { 207 | position: relative; 208 | } 209 | 210 | .btn-copy-ex { 211 | position: absolute; 212 | right: 0; 213 | top: 0; 214 | visibility: hidden; 215 | } 216 | 217 | .hasCopyButton:hover button.btn-copy-ex { 218 | visibility: visible; 219 | } 220 | 221 | /* mark.js ----------------------------*/ 222 | 223 | mark { 224 | background-color: rgba(255, 255, 51, 0.5); 225 | border-bottom: 2px solid rgba(255, 153, 51, 0.3); 226 | padding: 1px; 227 | } 228 | 229 | /* vertical spacing after htmlwidgets */ 230 | .html-widget { 231 | margin-bottom: 10px; 232 | } 233 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $("#sidebar") 6 | .stick_in_parent({offset_top: 40}) 7 | .on('sticky_kit:bottom', function(e) { 8 | $(this).parent().css('position', 'static'); 9 | }) 10 | .on('sticky_kit:unbottom', function(e) { 11 | $(this).parent().css('position', 'relative'); 12 | }); 13 | 14 | $('body').scrollspy({ 15 | target: '#sidebar', 16 | offset: 60 17 | }); 18 | 19 | $('[data-toggle="tooltip"]').tooltip(); 20 | 21 | var cur_path = paths(location.pathname); 22 | var links = $("#navbar ul li a"); 23 | var max_length = -1; 24 | var pos = -1; 25 | for (var i = 0; i < links.length; i++) { 26 | if (links[i].getAttribute("href") === "#") 27 | continue; 28 | var path = paths(links[i].pathname); 29 | 30 | var length = prefix_length(cur_path, path); 31 | if (length > max_length) { 32 | max_length = length; 33 | pos = i; 34 | } 35 | } 36 | 37 | // Add class to parent
  • , and enclosing
  • if in dropdown 38 | if (pos >= 0) { 39 | var menu_anchor = $(links[pos]); 40 | menu_anchor.parent().addClass("active"); 41 | menu_anchor.closest("li.dropdown").addClass("active"); 42 | } 43 | }); 44 | 45 | function paths(pathname) { 46 | var pieces = pathname.split("/"); 47 | pieces.shift(); // always starts with / 48 | 49 | var end = pieces[pieces.length - 1]; 50 | if (end === "index.html" || end === "") 51 | pieces.pop(); 52 | return(pieces); 53 | } 54 | 55 | function prefix_length(needle, haystack) { 56 | if (needle.length > haystack.length) 57 | return(0); 58 | 59 | // Special case for length-0 haystack, since for loop won't run 60 | if (haystack.length === 0) { 61 | return(needle.length === 0 ? 1 : 0); 62 | } 63 | 64 | for (var i = 0; i < haystack.length; i++) { 65 | if (needle[i] != haystack[i]) 66 | return(i); 67 | } 68 | 69 | return(haystack.length); 70 | } 71 | 72 | /* Clipboard --------------------------*/ 73 | 74 | function changeTooltipMessage(element, msg) { 75 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 76 | element.setAttribute('data-original-title', msg); 77 | $(element).tooltip('show'); 78 | element.setAttribute('data-original-title', tooltipOriginalTitle); 79 | } 80 | 81 | if(Clipboard.isSupported()) { 82 | $(document).ready(function() { 83 | var copyButton = ""; 84 | 85 | $(".examples, div.sourceCode").addClass("hasCopyButton"); 86 | 87 | // Insert copy buttons: 88 | $(copyButton).prependTo(".hasCopyButton"); 89 | 90 | // Initialize tooltips: 91 | $('.btn-copy-ex').tooltip({container: 'body'}); 92 | 93 | // Initialize clipboard: 94 | var clipboardBtnCopies = new Clipboard('[data-clipboard-copy]', { 95 | text: function(trigger) { 96 | return trigger.parentNode.textContent; 97 | } 98 | }); 99 | 100 | clipboardBtnCopies.on('success', function(e) { 101 | changeTooltipMessage(e.trigger, 'Copied!'); 102 | e.clearSelection(); 103 | }); 104 | 105 | clipboardBtnCopies.on('error', function() { 106 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 107 | }); 108 | }); 109 | } 110 | })(window.jQuery || window.$) 111 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 2.2.3.2 2 | pkgdown: 1.1.0.9000 3 | pkgdown_sha: ad5a58780c6551af53561df51b58bc874ca8ccd8 4 | articles: [] 5 | urls: 6 | reference: https://chr.mikewk.com//reference 7 | article: https://chr.mikewk.com//articles 8 | 9 | -------------------------------------------------------------------------------- /docs/reference/chr-package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | chr: Simple String Manipulation — chr-package • chr 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 53 | 54 | 55 | 56 | 57 | 58 |
    59 |
    60 | 99 | 100 | 101 |
    102 | 103 |
    104 |
    105 | 110 | 111 |
    112 | 113 |

    114 |

    Clean, wrangle, and parse character [string] vectors 115 | using base exclusively base R functions.

    116 | 117 |
    118 | 119 | 120 | 121 |
    122 | 133 |
    134 | 135 | 145 |
    146 | 147 | 148 | 149 | 150 | 151 | 152 | -------------------------------------------------------------------------------- /docs/reference/chr_count.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Count matches from strings — chr_count • chr 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 |
    57 |
    58 | 97 | 98 | 99 |
    100 | 101 |
    102 |
    103 | 108 | 109 |
    110 | 111 |

    Count all pattern matches in character vector.

    112 | 113 |
    114 | 115 |
    chr_count(x, pat, ignore.case = FALSE, invert = FALSE, ...)
    116 | 117 |

    Arguments

    118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 132 | 133 | 134 | 135 | 137 | 138 | 139 | 140 | 141 | 142 |
    x

    Character vector

    pat

    Pattern (regex) to extract from text.

    ignore.case

    Logical indicating whether to ignore capitalization. 131 | Defaults to false.

    invert

    Logical indicating whether to extract matching portion 136 | (default) or, if this value is true, non-matching portions of text.

    ...

    Other named arguments passed to gregexpr.

    143 | 144 |

    Value

    145 | 146 |

    Vector of matches extracted from input text.

    147 | 148 | 149 |
    150 | 159 |
    160 | 161 | 171 |
    172 | 173 | 174 | 175 | 176 | 177 | 178 | -------------------------------------------------------------------------------- /docs/reference/chr_detect.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Detect matches in strings — chr_detect • chr 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 |
    57 |
    58 | 97 | 98 | 99 |
    100 | 101 |
    102 |
    103 | 108 | 109 |
    110 | 111 |

    Detect matching pattern in character vector.

    112 | 113 |
    114 | 115 |
    chr_detect(x, pat, ignore.case = FALSE, ...)
    116 | 117 |

    Arguments

    118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 132 | 133 | 134 | 135 | 137 | 138 |
    x

    Character vector

    pat

    Pattern (regex) to detect from text.

    ignore.case

    Logical indicating whether to ignore capitalization. 131 | Defaults to false.

    ...

    Other named arguments passed to grepl or grep 136 | See details for more information.

    139 | 140 |

    Value

    141 | 142 |

    Logical vector indicating whether each element matched the supplied pattern.

    143 | 144 |

    Details

    145 | 146 |

    This is a wrapper around the base R functions grepl and 147 | grep. By default, logical values are returned (a la grepl). To return 148 | values, include value = TRUE. To return positions, include which = TRUE, 149 | pos = TRUE, or position = TRUE.

    150 | 151 | 152 |

    Examples

    153 |
    154 | ## return logical vector 155 | chr_detect(letters, "a|b|c|x|y|z")
    #> [1] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 156 | #> [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE 157 | #> [25] TRUE TRUE
    158 | ## return inverted logical values 159 | chr_detect(letters, "a|b|c|x|y|z", invert = TRUE)
    #> [1] FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE 160 | #> [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE 161 | #> [25] FALSE FALSE
    162 | ## return matching positions 163 | chr_detect(letters, "a|b|c|x|y|z", which = TRUE)
    #> [1] 1 2 3 24 25 26
    164 | ## return inverted matching positions 165 | chr_detect(letters, "a|b|c|x|y|z", which = TRUE, invert = TRUE)
    #> [1] 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
    166 | ## return matching values 167 | chr_detect(letters, "a|b|c|x|y|z", value = TRUE)
    #> [1] "a" "b" "c" "x" "y" "z"
    168 | ## return inverted matching values 169 | chr_detect(letters, "a|b|c|x|y|z", value = TRUE, invert = TRUE)
    #> [1] "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" 170 | #> [20] "w"
    171 |
    172 | 185 |
    186 | 187 | 197 |
    198 | 199 | 200 | 201 | 202 | 203 | 204 | -------------------------------------------------------------------------------- /docs/reference/chr_extract.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Extract matches from strings — chr_extract • chr 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 52 | 53 | 54 | 55 | 56 | 57 |
    58 |
    59 | 98 | 99 | 100 |
    101 | 102 |
    103 |
    104 | 109 | 110 |
    111 | 112 |

    Detect and return all matching patterns from character vector.

    113 |

    Detect and return first matching pattern from character vector.

    114 | 115 |
    116 | 117 |
    chr_extract(x, pat, ignore.case = FALSE, collapse = NULL,
    118 |   invert = FALSE, na = TRUE, ...)
    119 | 
    120 | chr_extract_first(x, pat, ignore.case = FALSE, invert = FALSE,
    121 |   na = TRUE, ...)
    122 | 
    123 | chr_extract_links(x, collapse = NULL)
    124 | 
    125 | chr_extract_words(x, collapse = NULL)
    126 | 
    127 | chr_extract_mentions(x, collapse = NULL)
    128 | 
    129 | chr_extract_hashtags(x, collapse = NULL)
    130 | 
    131 | chr_extract_phone(x, collapse = NULL)
    132 | 133 |

    Arguments

    134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 148 | 149 | 150 | 151 | 153 | 154 | 155 | 156 | 158 | 159 | 160 | 161 | 163 | 164 | 165 | 166 | 167 | 168 |
    x

    Character vector

    pat

    Pattern (regex) to extract from text.

    ignore.case

    Logical indicating whether to ignore capitalization. 147 | Defaults to false.

    collapse

    Text inserted between extracted matches. If non-null (the 152 | default) a vector of matches is returned for each inputted string.

    invert

    Logical indicating whether to extract matching portion 157 | (default) or, if this value is true, non-matching portions of text.

    na

    Logical indicating whether to return NA values for input elements 162 | without matches. Defaults to true.

    ...

    Other named arguments passed to gregexpr.

    169 | 170 |

    Value

    171 | 172 |

    Vector of matches extracted from input text.

    173 |

    Character vector of matches extracted from input text.

    174 | 175 | 176 |
    177 | 186 |
    187 | 188 | 198 |
    199 | 200 | 201 | 202 | 203 | 204 | 205 | -------------------------------------------------------------------------------- /docs/reference/chr_ngram_char.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Character n-grams — chr_ngram_char • chr 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 |
    57 |
    58 | 97 | 98 | 99 |
    100 | 101 |
    102 |
    103 | 108 | 109 |
    110 | 111 |

    Returns n-grams at the character level

    112 | 113 |
    114 | 115 |
    chr_ngram_char(x, n = 3, lower = FALSE, space = FALSE,
    116 |   punct = FALSE)
    117 | 118 |

    Arguments

    119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 142 | 143 |
    x

    Character vector

    n

    Number of characters to return per ngram

    lower

    Logical indicating whether to lower case all text, defaults to 132 | false.

    space

    Logical indicating whether to strip space, defaults to false.

    punct

    Logical indicating whether to strip punctation, defaults to 141 | false.

    144 | 145 |

    Value

    146 | 147 |

    List of length equal to input length consisting of ngram vectors.

    148 | 149 |

    Details

    150 | 151 |

    Thanks to ChrisMuir \(https://github.com/mkearney/chr/issues/1)

    152 | 153 | 154 |
    155 | 170 |
    171 | 172 | 182 |
    183 | 184 | 185 | 186 | 187 | 188 | 189 | -------------------------------------------------------------------------------- /docs/reference/chr_remove_hashtags.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Remove hashtags from text — chr_remove_hashtags • chr 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 |
    57 |
    58 | 97 | 98 | 99 |
    100 | 101 |
    102 |
    103 | 108 | 109 |
    110 | 111 |

    Removes all hashtags from character vector.

    112 | 113 |
    114 | 115 |
    chr_remove_hashtags(x)
    116 | 117 |

    Arguments

    118 | 119 | 120 | 121 | 122 | 123 | 124 |
    x

    Character vector.

    125 | 126 |

    Value

    127 | 128 |

    Vector without hashtags.

    129 | 130 | 131 |
    132 | 141 |
    142 | 143 | 153 |
    154 | 155 | 156 | 157 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /docs/reference/chr_remove_linebreaks.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Remove line breaks from text — chr_remove_linebreaks • chr 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 |
    57 |
    58 | 97 | 98 | 99 |
    100 | 101 |
    102 |
    103 | 108 | 109 |
    110 | 111 |

    Removes all line breaks from character vector.

    112 | 113 |
    114 | 115 |
    chr_remove_linebreaks(x)
    116 | 117 |

    Arguments

    118 | 119 | 120 | 121 | 122 | 123 | 124 |
    x

    Character vector.

    125 | 126 |

    Value

    127 | 128 |

    Vector without line breaks.

    129 | 130 | 131 |
    132 | 141 |
    142 | 143 | 153 |
    154 | 155 | 156 | 157 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /docs/reference/chr_remove_links.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Remove URL links from text — chr_remove_links • chr 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 |
    57 |
    58 | 97 | 98 | 99 |
    100 | 101 |
    102 |
    103 | 108 | 109 |
    110 | 111 |

    Removes all hyper-links from character vector.

    112 | 113 |
    114 | 115 |
    chr_remove_links(x)
    116 | 117 |

    Arguments

    118 | 119 | 120 | 121 | 122 | 123 | 124 |
    x

    Character vector.

    125 | 126 |

    Value

    127 | 128 |

    Vector without URLs.

    129 | 130 | 131 |
    132 | 141 |
    142 | 143 | 153 |
    154 | 155 | 156 | 157 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /docs/reference/chr_remove_mentions.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Remove [at] mentions from text — chr_remove_mentions • chr 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 |
    57 |
    58 | 97 | 98 | 99 |
    100 | 101 |
    102 |
    103 | 108 | 109 |
    110 | 111 |

    Removes all [at] mentions from character vector.

    112 | 113 |
    114 | 115 |
    chr_remove_mentions(x)
    116 | 117 |

    Arguments

    118 | 119 | 120 | 121 | 122 | 123 | 124 |
    x

    Character vector.

    125 | 126 |

    Value

    127 | 128 |

    Vector without screen names.

    129 | 130 | 131 |
    132 | 141 |
    142 | 143 | 153 |
    154 | 155 | 156 | 157 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /docs/reference/chr_remove_tabs.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Remove tabs from text — chr_remove_tabs • chr 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 |
    57 |
    58 | 97 | 98 | 99 |
    100 | 101 |
    102 |
    103 | 108 | 109 |
    110 | 111 |

    Removes all tabs from character vector.

    112 | 113 |
    114 | 115 |
    chr_remove_tabs(x)
    116 | 117 |

    Arguments

    118 | 119 | 120 | 121 | 122 | 123 | 124 |
    x

    Character vector.

    125 | 126 |

    Value

    127 | 128 |

    Vector without tabs.

    129 | 130 | 131 |
    132 | 141 |
    142 | 143 | 153 |
    154 | 155 | 156 | 157 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /docs/reference/chr_remove_ws.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Remove extra spaces from text — chr_remove_ws • chr 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 |
    57 |
    58 | 97 | 98 | 99 |
    100 | 101 |
    102 |
    103 | 108 | 109 |
    110 | 111 |

    Removes double+ spaces and trims white space from string ends.

    112 | 113 |
    114 | 115 |
    chr_remove_ws(x)
    116 | 117 |

    Arguments

    118 | 119 | 120 | 121 | 122 | 123 | 124 |
    x

    Character vector.

    125 | 126 |

    Value

    127 | 128 |

    Vector without extra spaces.

    129 | 130 | 131 |
    132 | 141 |
    142 | 143 | 153 |
    154 | 155 | 156 | 157 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /docs/reference/chr_replace.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Replace text pattern with string — chr_replace • chr 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 |
    57 |
    58 | 97 | 98 | 99 |
    100 | 101 |
    102 |
    103 | 108 | 109 |
    110 | 111 |

    Replaces all matching patterns with user-provided string.

    112 | 113 |
    114 | 115 |
    chr_replace(x, m, r, ignore.case = FALSE, ...)
    116 | 117 |

    Arguments

    118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 128 | 129 | 130 | 131 | 134 | 135 | 136 | 137 | 139 | 140 | 141 | 142 | 143 | 144 |
    x

    Character vector.

    m

    Matching text or regular expression usd to locate text to be 127 | replaced.

    r

    Replacement text, the length of which must be either one (a single 132 | string used as the replacement text) or equal to the length of the supplied 133 | character vector

    ignore.case

    Logical indicating whether to ignore capitalization, 138 | defaults to FALSE.

    ...

    Other args passed on to gsub.

    145 | 146 |

    Value

    147 | 148 |

    Vector without URLs.

    149 | 150 | 151 |
    152 | 161 |
    162 | 163 | 173 |
    174 | 175 | 176 | 177 | 178 | 179 | 180 | -------------------------------------------------------------------------------- /docs/reference/chr_replace_nonascii.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Replace non-ascii with similar ascii characters — chr_replace_nonascii • chr 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 |
    57 |
    58 | 97 | 98 | 99 |
    100 | 101 |
    102 |
    103 | 108 | 109 |
    110 | 111 |

    Replace weird UTF values with equivalent(ish) ascii values.

    112 | 113 |
    114 | 115 |
    chr_replace_nonascii(x)
    116 | 117 |

    Arguments

    118 | 119 | 120 | 121 | 122 | 123 | 124 |
    x

    Character vector with non-ascii characters

    125 | 126 |

    Value

    127 | 128 |

    ASCII-friendly character vector.

    129 | 130 | 131 |
    132 | 141 |
    142 | 143 | 153 |
    154 | 155 | 156 | 157 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /docs/reference/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkearney/chr/02bb519e6656d9b97a43ba31c2ad38599d89e2be/docs/reference/figures/logo.png -------------------------------------------------------------------------------- /docs/reference/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Function reference • chr 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 49 | 50 | 51 | 52 | 53 | 54 |
    55 |
    56 | 95 | 96 | 97 |
    98 | 99 |
    100 |
    101 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 119 | 120 | 121 | 122 | 125 | 126 | 127 | 128 | 131 | 132 | 133 | 134 | 137 | 138 | 139 | 140 | 143 | 144 | 145 | 146 | 149 | 150 | 151 | 152 | 155 | 156 | 157 | 158 | 161 | 162 | 163 | 164 | 167 | 168 | 169 | 170 | 173 | 174 | 175 | 176 | 179 | 180 | 181 | 182 | 185 | 186 | 187 | 188 | 191 | 192 | 193 | 194 | 197 | 198 | 199 | 200 |
    116 |

    All functions

    117 |

    118 |
    123 |

    chr_count()

    124 |

    Count matches from strings

    129 |

    chr_detect()

    130 |

    Detect matches in strings

    135 |

    chr_extract() chr_extract_first() chr_extract_links() chr_extract_words() chr_extract_mentions() chr_extract_hashtags() chr_extract_phone()

    136 |

    Extract matches from strings

    141 |

    chr_ngram_char()

    142 |

    Character n-grams

    147 |

    chr_remove_hashtags()

    148 |

    Remove hashtags from text

    153 |

    chr_remove_linebreaks()

    154 |

    Remove line breaks from text

    159 |

    chr_remove_links()

    160 |

    Remove URL links from text

    165 |

    chr_remove_mentions()

    166 |

    Remove [at] mentions from text

    171 |

    chr_remove_tabs()

    172 |

    Remove tabs from text

    177 |

    chr_remove_ws()

    178 |

    Remove extra spaces from text

    183 |

    chr_replace()

    184 |

    Replace text pattern with string

    189 |

    chr_replace_nonascii()

    190 |

    Replace non-ascii with similar ascii characters

    195 |

    title_case()

    196 |

    Convert string to title case

    201 |
    202 | 203 | 209 |
    210 | 211 | 221 |
    222 | 223 | 224 | 225 | 226 | 227 | 228 | -------------------------------------------------------------------------------- /docs/reference/pipe.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Pipe operator — %>% • chr 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 |
    57 |
    58 | 97 | 98 | 99 |
    100 | 101 |
    102 |
    103 | 108 | 109 |
    110 | 111 |

    See magrittr::%>% for details.

    112 | 113 |
    114 | 115 |
    lhs %>% rhs
    116 | 117 | 118 |
    119 | 125 |
    126 | 127 | 137 |
    138 | 139 | 140 | 141 | 142 | 143 | 144 | -------------------------------------------------------------------------------- /docs/reference/title_case.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Convert string to title case — title_case • chr 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 |
    57 |
    58 | 97 | 98 | 99 |
    100 | 101 |
    102 |
    103 | 108 | 109 |
    110 | 111 |

    Capitalize character vector using title case

    112 | 113 |
    114 | 115 |
    title_case(x)
    116 | 117 |

    Arguments

    118 | 119 | 120 | 121 | 122 | 123 | 124 |
    x

    Character vector.

    125 | 126 |

    Value

    127 | 128 |

    Character vector in title case.

    129 | 130 | 131 |
    132 | 141 |
    142 | 143 | 153 |
    154 | 155 | 156 | 157 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /docs/sitemap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | https://chr.mikewk.com//index.html 5 | 6 | 7 | https://chr.mikewk.com//reference/chr-package.html 8 | 9 | 10 | https://chr.mikewk.com//reference/chr_count.html 11 | 12 | 13 | https://chr.mikewk.com//reference/chr_detect.html 14 | 15 | 16 | https://chr.mikewk.com//reference/chr_extract.html 17 | 18 | 19 | https://chr.mikewk.com//reference/chr_ngram_char.html 20 | 21 | 22 | https://chr.mikewk.com//reference/chr_remove_hashtags.html 23 | 24 | 25 | https://chr.mikewk.com//reference/chr_remove_linebreaks.html 26 | 27 | 28 | https://chr.mikewk.com//reference/chr_remove_links.html 29 | 30 | 31 | https://chr.mikewk.com//reference/chr_remove_mentions.html 32 | 33 | 34 | https://chr.mikewk.com//reference/chr_remove_tabs.html 35 | 36 | 37 | https://chr.mikewk.com//reference/chr_remove_ws.html 38 | 39 | 40 | https://chr.mikewk.com//reference/chr_replace.html 41 | 42 | 43 | https://chr.mikewk.com//reference/chr_replace_nonascii.html 44 | 45 | 46 | https://chr.mikewk.com//reference/pipe.html 47 | 48 | 49 | https://chr.mikewk.com//reference/title_case.html 50 | 51 | 52 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | 2 | citHeader("To cite chr use:") 3 | 4 | citEntry( 5 | entry = "Manual", 6 | title = "chr: Simple String Manipulation", 7 | author = as.person("Michael Wayne Kearney"), 8 | year = 2018, 9 | note = "R package version 0.1.03", 10 | url = "", 11 | key = "chr-package", 12 | textVersion = paste( 13 | "Kearney, M. W. (2017). chr: Simple String Manipulation.", 14 | "R package version 0.1.02 Retrieved from", 15 | "" 16 | ) 17 | ) 18 | 19 | -------------------------------------------------------------------------------- /man/chr-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/chr-package.R 3 | \docType{package} 4 | \name{chr-package} 5 | \alias{chr} 6 | \alias{chr-package} 7 | \title{chr: Simple String Manipulation} 8 | \description{ 9 | \if{html}{\figure{logo.png}{options: align='right'}} 10 | 11 | Clean, wrangle, and parse character [string] vectors 12 | using base exclusively base R functions. 13 | } 14 | \author{ 15 | \strong{Maintainer}: Michael Wayne Kearney \email{kearneymw@missouri.edu} 16 | 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/chr_count.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/count.R 3 | \name{chr_count} 4 | \alias{chr_count} 5 | \title{Count matches from strings} 6 | \usage{ 7 | chr_count(x, pat, ignore.case = FALSE, invert = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{x}{Character vector} 11 | 12 | \item{pat}{Pattern (regex) to extract from text.} 13 | 14 | \item{ignore.case}{Logical indicating whether to ignore capitalization. 15 | Defaults to false.} 16 | 17 | \item{invert}{Logical indicating whether to extract matching portion 18 | (default) or, if this value is true, non-matching portions of text.} 19 | 20 | \item{...}{Other named arguments passed to \code{\link{gregexpr}}.} 21 | } 22 | \value{ 23 | Vector of matches extracted from input text. 24 | } 25 | \description{ 26 | Count all pattern matches in character vector. 27 | } 28 | -------------------------------------------------------------------------------- /man/chr_detect.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/detect.R 3 | \name{chr_detect} 4 | \alias{chr_detect} 5 | \title{Detect matches in strings} 6 | \usage{ 7 | chr_detect(x, pat, ignore.case = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{x}{Character vector} 11 | 12 | \item{pat}{Pattern (regex) to detect from text.} 13 | 14 | \item{ignore.case}{Logical indicating whether to ignore capitalization. 15 | Defaults to false.} 16 | 17 | \item{...}{Other named arguments passed to \code{\link{grepl}} or \code{\link{grep}} 18 | See details for more information.} 19 | } 20 | \value{ 21 | Logical vector indicating whether each element matched the supplied pattern. 22 | } 23 | \description{ 24 | Detect matching pattern in character vector. 25 | } 26 | \details{ 27 | This is a wrapper around the base R functions \code{\link{grepl}} and 28 | \code{\link{grep}}. By default, logical values are returned (a la grepl). To return 29 | values, include \code{value = TRUE}. To return positions, include \code{which = TRUE}, 30 | \code{pos = TRUE}, or \code{position = TRUE}. 31 | } 32 | \examples{ 33 | 34 | ## return logical vector 35 | chr_detect(letters, "a|b|c|x|y|z") 36 | 37 | ## return inverted logical values 38 | chr_detect(letters, "a|b|c|x|y|z", invert = TRUE) 39 | 40 | ## return matching positions 41 | chr_detect(letters, "a|b|c|x|y|z", which = TRUE) 42 | 43 | ## return inverted matching positions 44 | chr_detect(letters, "a|b|c|x|y|z", which = TRUE, invert = TRUE) 45 | 46 | ## return matching values 47 | chr_detect(letters, "a|b|c|x|y|z", value = TRUE) 48 | 49 | ## return inverted matching values 50 | chr_detect(letters, "a|b|c|x|y|z", value = TRUE, invert = TRUE) 51 | } 52 | -------------------------------------------------------------------------------- /man/chr_extract.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/extract.R 3 | \name{chr_extract} 4 | \alias{chr_extract} 5 | \alias{chr_extract_first} 6 | \alias{chr_extract_links} 7 | \alias{chr_extract_words} 8 | \alias{chr_extract_mentions} 9 | \alias{chr_extract_hashtags} 10 | \alias{chr_extract_phone} 11 | \title{Extract matches from strings} 12 | \usage{ 13 | chr_extract(x, pat, ignore.case = FALSE, collapse = NULL, 14 | invert = FALSE, na = TRUE, ...) 15 | 16 | chr_extract_first(x, pat, ignore.case = FALSE, invert = FALSE, 17 | na = TRUE, ...) 18 | 19 | chr_extract_links(x, collapse = NULL) 20 | 21 | chr_extract_words(x, collapse = NULL) 22 | 23 | chr_extract_mentions(x, collapse = NULL) 24 | 25 | chr_extract_hashtags(x, collapse = NULL) 26 | 27 | chr_extract_phone(x, collapse = NULL) 28 | } 29 | \arguments{ 30 | \item{x}{Character vector} 31 | 32 | \item{pat}{Pattern (regex) to extract from text.} 33 | 34 | \item{ignore.case}{Logical indicating whether to ignore capitalization. 35 | Defaults to false.} 36 | 37 | \item{collapse}{Text inserted between extracted matches. If non-null (the 38 | default) a vector of matches is returned for each inputted string.} 39 | 40 | \item{invert}{Logical indicating whether to extract matching portion 41 | (default) or, if this value is true, non-matching portions of text.} 42 | 43 | \item{na}{Logical indicating whether to return NA values for input elements 44 | without matches. Defaults to true.} 45 | 46 | \item{...}{Other named arguments passed to \code{\link{gregexpr}}.} 47 | } 48 | \value{ 49 | Vector of matches extracted from input text. 50 | 51 | Character vector of matches extracted from input text. 52 | } 53 | \description{ 54 | Detect and return all matching patterns from character vector. 55 | 56 | Detect and return first matching pattern from character vector. 57 | } 58 | -------------------------------------------------------------------------------- /man/chr_ngram_char.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ngram.R 3 | \name{chr_ngram_char} 4 | \alias{chr_ngram_char} 5 | \title{Character n-grams} 6 | \usage{ 7 | chr_ngram_char(x, n = 3, lower = FALSE, space = FALSE, 8 | punct = FALSE) 9 | } 10 | \arguments{ 11 | \item{x}{Character vector} 12 | 13 | \item{n}{Number of characters to return per ngram} 14 | 15 | \item{lower}{Logical indicating whether to lower case all text, defaults to 16 | false.} 17 | 18 | \item{space}{Logical indicating whether to strip space, defaults to false.} 19 | 20 | \item{punct}{Logical indicating whether to strip punctation, defaults to 21 | false.} 22 | } 23 | \value{ 24 | List of length equal to input length consisting of ngram vectors. 25 | } 26 | \description{ 27 | Returns n-grams at the character level 28 | } 29 | \details{ 30 | Thanks to ChrisMuir \(https://github.com/mkearney/chr/issues/1) 31 | } 32 | \author{ 33 | ChrisMuir 34 | } 35 | -------------------------------------------------------------------------------- /man/chr_remove_hashtags.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/remove.R 3 | \name{chr_remove_hashtags} 4 | \alias{chr_remove_hashtags} 5 | \title{Remove hashtags from text} 6 | \usage{ 7 | chr_remove_hashtags(x) 8 | } 9 | \arguments{ 10 | \item{x}{Character vector.} 11 | } 12 | \value{ 13 | Vector without hashtags. 14 | } 15 | \description{ 16 | Removes all hashtags from character vector. 17 | } 18 | -------------------------------------------------------------------------------- /man/chr_remove_linebreaks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/remove.R 3 | \name{chr_remove_linebreaks} 4 | \alias{chr_remove_linebreaks} 5 | \title{Remove line breaks from text} 6 | \usage{ 7 | chr_remove_linebreaks(x) 8 | } 9 | \arguments{ 10 | \item{x}{Character vector.} 11 | } 12 | \value{ 13 | Vector without line breaks. 14 | } 15 | \description{ 16 | Removes all line breaks from character vector. 17 | } 18 | -------------------------------------------------------------------------------- /man/chr_remove_links.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/remove.R 3 | \name{chr_remove_links} 4 | \alias{chr_remove_links} 5 | \title{Remove URL links from text} 6 | \usage{ 7 | chr_remove_links(x) 8 | } 9 | \arguments{ 10 | \item{x}{Character vector.} 11 | } 12 | \value{ 13 | Vector without URLs. 14 | } 15 | \description{ 16 | Removes all hyper-links from character vector. 17 | } 18 | -------------------------------------------------------------------------------- /man/chr_remove_mentions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/remove.R 3 | \name{chr_remove_mentions} 4 | \alias{chr_remove_mentions} 5 | \title{Remove [at] mentions from text} 6 | \usage{ 7 | chr_remove_mentions(x) 8 | } 9 | \arguments{ 10 | \item{x}{Character vector.} 11 | } 12 | \value{ 13 | Vector without screen names. 14 | } 15 | \description{ 16 | Removes all [at] mentions from character vector. 17 | } 18 | -------------------------------------------------------------------------------- /man/chr_remove_tabs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/remove.R 3 | \name{chr_remove_tabs} 4 | \alias{chr_remove_tabs} 5 | \title{Remove tabs from text} 6 | \usage{ 7 | chr_remove_tabs(x) 8 | } 9 | \arguments{ 10 | \item{x}{Character vector.} 11 | } 12 | \value{ 13 | Vector without tabs. 14 | } 15 | \description{ 16 | Removes all tabs from character vector. 17 | } 18 | -------------------------------------------------------------------------------- /man/chr_remove_ws.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/remove.R 3 | \name{chr_remove_ws} 4 | \alias{chr_remove_ws} 5 | \title{Remove extra spaces from text} 6 | \usage{ 7 | chr_remove_ws(x) 8 | } 9 | \arguments{ 10 | \item{x}{Character vector.} 11 | } 12 | \value{ 13 | Vector without extra spaces. 14 | } 15 | \description{ 16 | Removes double+ spaces and trims white space from string ends. 17 | } 18 | -------------------------------------------------------------------------------- /man/chr_replace.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/replace.R 3 | \name{chr_replace} 4 | \alias{chr_replace} 5 | \title{Replace text pattern with string} 6 | \usage{ 7 | chr_replace(x, m, r, ignore.case = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{x}{Character vector.} 11 | 12 | \item{m}{Matching text or regular expression usd to locate text to be 13 | replaced.} 14 | 15 | \item{r}{Replacement text, the length of which must be either one (a single 16 | string used as the replacement text) or equal to the length of the supplied 17 | character vector} 18 | 19 | \item{ignore.case}{Logical indicating whether to ignore capitalization, 20 | defaults to FALSE.} 21 | 22 | \item{...}{Other args passed on to \code{\link{gsub}}.} 23 | } 24 | \value{ 25 | Vector without URLs. 26 | } 27 | \description{ 28 | Replaces all matching patterns with user-provided string. 29 | } 30 | -------------------------------------------------------------------------------- /man/chr_replace_nonascii.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ascii.R 3 | \name{chr_replace_nonascii} 4 | \alias{chr_replace_nonascii} 5 | \title{Replace non-ascii with similar ascii characters} 6 | \usage{ 7 | chr_replace_nonascii(x) 8 | } 9 | \arguments{ 10 | \item{x}{Character vector with non-ascii characters} 11 | } 12 | \value{ 13 | ASCII-friendly character vector. 14 | } 15 | \description{ 16 | Replace weird UTF values with equivalent(ish) ascii values. 17 | } 18 | -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkearney/chr/02bb519e6656d9b97a43ba31c2ad38599d89e2be/man/figures/logo.png -------------------------------------------------------------------------------- /man/pipe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-pipe.R 3 | \name{\%>\%} 4 | \alias{\%>\%} 5 | \title{Pipe operator} 6 | \usage{ 7 | lhs \%>\% rhs 8 | } 9 | \description{ 10 | See \code{magrittr::\link[magrittr]{\%>\%}} for details. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/title_case.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/title.R 3 | \name{title_case} 4 | \alias{title_case} 5 | \title{Convert string to title case} 6 | \usage{ 7 | title_case(x) 8 | } 9 | \arguments{ 10 | \item{x}{Character vector.} 11 | } 12 | \value{ 13 | Character vector in title case. 14 | } 15 | \description{ 16 | Capitalize character vector using title case 17 | } 18 | -------------------------------------------------------------------------------- /pkgdown/extra.css: -------------------------------------------------------------------------------- 1 | @import url("https://fonts.googleapis.com/css?family=Lato:700"); 2 | @import url("https://cdn.rawgit.com/tonsky/FiraCode/1.205/distr/fira_code.css"); 3 | 4 | body { 5 | font-family: "Avenir Next", "Helvetica Neue", Helvetica, Arial, sans-serif; 6 | font-weight: 400; 7 | } 8 | 9 | h1, h2, h3, h4, .h1, .h2, .h3, .h4 { 10 | font-family: Lato, "Avenir Next", "Helvetica Neue", Helvetica, Arial, sans-serif; 11 | font-weight: 700; 12 | } 13 | 14 | pre, code { 15 | font-family: "Fira Code", Consolas, Inconsolata, monospace; 16 | } 17 | 18 | .navbar-default { 19 | background-color: #f3f3f3; 20 | border-color: #eee; 21 | } 22 | 23 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(chr) 3 | 4 | test_check("chr") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-chr_extract-r.R: -------------------------------------------------------------------------------- 1 | context("test-chr_extract-r.R") 2 | 3 | test_that("multiplication works", { 4 | ## some text strings 5 | x <- c("this one is @there 6 | has #MultipleLines https://github.com and 7 | http://twitter.com @twitter", 8 | "this @one #istotally their and 9 | some non-ascii symbols: \u00BF \u037E", 10 | "this one is they're https://github.com", 11 | "this one #HasHashtags #afew #ofthem", 12 | "and more @kearneymw at https://mikew.com") 13 | 14 | ## extract all 15 | extall <- chr_extract(x, "is") 16 | expect_true(is.list(extall)) 17 | expect_true(length(extall) == 5) 18 | expect_true(length(extall[[1]]) == 2) 19 | expect_true(is.na(extall[[5]])) 20 | 21 | ## extract first 22 | extfirst <- chr_extract_first(x, "is") 23 | expect_true(is.character(extfirst)) 24 | expect_true(length(extfirst) == 4) 25 | expect_true(all(extfirst == "is")) 26 | 27 | ## extract all URLS 28 | links <- chr_extract_links(x) 29 | expect_true(is.list(links)) 30 | expect_true(length(links) == 5) 31 | expect_true(length(links[[1]]) == 2) 32 | expect_true(is.na(links[[2]])) 33 | 34 | ## extract all hashtags 35 | hashtags <- chr_extract_hashtags(x) 36 | expect_true(is.list(hashtags)) 37 | expect_true(length(hashtags) == 5) 38 | expect_true(length(hashtags[[4]]) == 3) 39 | expect_true(is.na(hashtags[[5]])) 40 | 41 | ## extract mentions 42 | mentions <- chr_extract_mentions(x) 43 | expect_true(is.list(mentions)) 44 | expect_true(length(mentions) == 5) 45 | expect_true(length(mentions[[1]]) == 2) 46 | expect_true(is.na(mentions[[4]])) 47 | }) 48 | -------------------------------------------------------------------------------- /tests/testthat/test-detect.R: -------------------------------------------------------------------------------- 1 | context("test-detect.R") 2 | 3 | test_that("chr_detect", { 4 | expect_true(chr_detect("asdf", "asdf")) 5 | expect_true(chr_detect("asdf", "asdf", which = TRUE) == 1) 6 | expect_true(chr_detect("asdf", "asdf", value = TRUE) == "asdf") 7 | expect_true(chr_detect("ASDF", "asdf", invert = TRUE)) 8 | }) 9 | --------------------------------------------------------------------------------