├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── R ├── build_features.R ├── data.R ├── predict_troll.R ├── run_api.R └── run_shiny.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── data ├── mdl_data.rda └── shitwordlist.rda ├── docs ├── articles │ ├── index.html │ ├── trollR.html │ ├── trollR_files │ │ └── figure-html │ │ │ ├── unnamed-chunk-1-1.png │ │ │ └── unnamed-chunk-1-2.png │ └── vignette2.html ├── authors.html ├── index.html ├── jquery.sticky-kit.min.js ├── link.svg ├── pkgdown.css ├── pkgdown.js ├── pkgdown.yml └── reference │ ├── build_features.html │ ├── figures │ ├── README-plot1-1.png │ ├── README-plot2-1.png │ └── README-unnamed-chunk-10-1.png │ ├── index.html │ ├── predict_troll.html │ ├── run_api.html │ ├── run_shiny.html │ ├── shitwordlist.html │ └── test_function.html ├── helpers ├── create_manual.R ├── scrape_swearwords.R └── train_model.R ├── inst ├── plumber_api.R ├── shiny-examples │ ├── testapp │ │ ├── app.R │ │ └── modules.R │ └── trollR │ │ ├── app.R │ │ └── rsconnect │ │ └── shinyapps.io │ │ └── schliebs │ │ └── trollR.dcf └── xgboost_model.buffer ├── man ├── build_features.Rd ├── figures │ ├── README-plot1-1.png │ ├── README-plot2-1.png │ └── README-unnamed-chunk-10-1.png ├── predict_troll.Rd ├── run_api.Rd ├── run_shiny.Rd └── shitwordlist.Rd ├── trollR.Rproj └── trollR.pdf /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | 4 | ^/offline/ 5 | offline/ 6 | 7 | ^/helpers/ 8 | helpers/ 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # Example code in package build process 9 | *-Ex.R 10 | 11 | # Output files from R CMD build 12 | /*.tar.gz 13 | 14 | # Output files from R CMD check 15 | /*.Rcheck/ 16 | 17 | # RStudio files 18 | .Rproj.user/ 19 | 20 | # produced vignettes 21 | vignettes/*.html 22 | vignettes/*.pdf 23 | 24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 25 | .httr-oauth 26 | 27 | # knitr and R markdown default cache directories 28 | /*_cache/ 29 | /cache/ 30 | 31 | # Temporary files created by R markdown 32 | *.utf8.md 33 | *.knit.md 34 | 35 | # Data 36 | 37 | data-offline/ 38 | data_offline/ 39 | */offline/ 40 | offline/ 41 | 42 | .Rproj.user 43 | 44 | *.sqlite 45 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: trollR 2 | Title: Detecting if a post is trolling, one post at a time 3 | Version: 0.0.1 4 | Date: 2018-04-18 5 | Authors@R: c( 6 | person( "Marcel", "Schliebs", ,role = c("aut","cre"), email = "m.schliebs@zeppelin-university.net"), 7 | person("David", "Zimmermann", role = "aut"), 8 | person("Ben", "Thies", role = "aut"), 9 | person("Divakar", "Kumar", role = "aut"), 10 | person("Preet", "Parikh", role = "aut"), 11 | person("Mengxi", "Wang", role = "aut") 12 | ) 13 | Description: What the package does (one paragraph). 14 | Depends: 15 | R (>= 3.4.3), 16 | shiny, 17 | tidyverse 18 | Imports: 19 | doParallel, 20 | Matrix, 21 | plumber, 22 | shinydashboard, 23 | text2vec, 24 | tokenizers, 25 | tm, 26 | xgboost 27 | License: MIT 28 | Encoding: UTF-8 29 | LazyData: true 30 | Suggests: testthat 31 | RoxygenNote: 6.0.1 32 | 33 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(build_features) 4 | export(predict_troll) 5 | export(run_api) 6 | export(run_shiny) 7 | -------------------------------------------------------------------------------- /R/build_features.R: -------------------------------------------------------------------------------- 1 | #' Builds the feature-matrix from a text-vector 2 | #' 3 | #' @param x a vector of text 4 | #' @param term_count_min a number passed to 5 | #' \code{\link[text2vec]{prune_vocabulary}}, defaults to 1. In case the function 6 | #' is used for training, it can and should be set to some higher value, i.e., 3. 7 | #' @param mdl is a list of existing models-data (containing the vectorizer, the 8 | #' tfidf, and the lsa object), defaults to NULL, in which case it is rebuild 9 | #' @param parallel T/F if the task should be executed in parallel, defaults to TRUE 10 | #' @param quiet T/F if the function remains silent, defaults to FALSE 11 | #' 12 | #' @return a list of two: a dgCMatrix that contains the features (columns) for 13 | #' each text (row) and as a second element a list of the model that can be passed 14 | #' as mdl 15 | #' @export 16 | #' 17 | #' @examples 18 | #' text <- c( 19 | #' "This is a first text that describes something", 20 | #' "A second Text That USES A LOT of CAPITALS", 21 | #' "Lastly MANY!!!! (like, really a lot!) punctuations!!!" 22 | #' ) 23 | #' 24 | #' build_features(text) 25 | #' 26 | #' # a second example 27 | #' train <- c("Banking is finance", "flowers are not houses", "finance is power", "houses are build") 28 | #' test <- c("finance is greed", "flowers belong in the garbage", "houses are build") 29 | #' 30 | #' a1 <- build_features(test) 31 | #' a12 <- build_features(test, mdl = a1$mdl) 32 | #' 33 | #' a2 <- build_features(train, mdl = a1$mdl) 34 | #' a2$model_matrix %>% as.matrix() 35 | build_features <- function(x, term_count_min = 1, 36 | mdl = NULL, parallel = TRUE, quiet = FALSE) { 37 | 38 | t0 <- Sys.time() 39 | if (!quiet) cat("Calculating Features...\n") 40 | 41 | d <- data_frame(text = x) 42 | 43 | d <- d %>% mutate( 44 | length = str_length(text), 45 | ncap = str_count(text, "[A-Z]"), 46 | ncap_len = ncap / length, 47 | nsen = str_count(text, fixed(".")), 48 | nexcl = str_count(text, fixed("!")), 49 | nquest = str_count(text, fixed("?")), 50 | npunct = str_count(text, "[[:punct:]]"), 51 | nword = str_count(text, "\\w+"), 52 | nsymb = str_count(text, "&|@|#|\\$|%|\\*|\\^"), 53 | nsmile = str_count(text, "((?::|;|=)(?:-)?(?:\\)|D|P))")#, 54 | #nslur = str_count(tolower(text), paste(shitwordlist, collapse = "|")) 55 | ) 56 | 57 | it_raw <- x %>% 58 | str_to_lower() %>% 59 | str_replace_all("[^[:alpha:]]", " ") %>% 60 | str_replace_all("\\s+", " ") 61 | 62 | if (parallel) { 63 | n_cores <- parallel::detectCores() 64 | doParallel::registerDoParallel(n_cores) 65 | 66 | it <- it_raw %>% 67 | text2vec::itoken_parallel(tokenizer = tokenizers::tokenize_word_stems, 68 | progressbar = !quiet, n_chunks = n_cores) 69 | } else { # sequential execution 70 | 71 | it <- it_raw %>% text2vec::itoken(tokenizer = tokenizers::tokenize_word_stems, 72 | progressbar = !quiet) 73 | } 74 | 75 | if (!is.null(mdl)) { 76 | 77 | vectorizer <- mdl$vectorizer 78 | tfidf <- mdl$tfidf 79 | 80 | } else { 81 | 82 | vectorizer <- text2vec::create_vocabulary( 83 | it, ngram = c(1, 1), 84 | stopwords = tm::stopwords("en") 85 | ) %>% 86 | text2vec::prune_vocabulary( 87 | term_count_min = term_count_min, 88 | doc_proportion_max = 0.5, 89 | doc_proportion_min = 0.001 90 | # vocab_term_max = 4000 91 | ) %>% 92 | text2vec::vocab_vectorizer() 93 | } 94 | 95 | if (!quiet) cat("Create DTM...\n") 96 | dtm <- text2vec::create_dtm(it, vectorizer) 97 | 98 | mdl_new <- list( 99 | vectorizer = vectorizer 100 | ) 101 | 102 | res <- d %>% 103 | select(-text) %>% 104 | Matrix::sparse.model.matrix(~ . - 1, .) %>% 105 | cbind(dtm) 106 | 107 | if (!quiet) cat(sprintf("Finished in %s seconds\n", 108 | difftime(Sys.time(), t0, units = "secs") %>% round(2))) 109 | return(list(model_matrix = res, mdl = mdl_new)) 110 | } 111 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' Shitword list 2 | #' 3 | #' A list of Enlish curse words scraped from https://www.noswearing.com/dictionary 4 | #' 5 | #' @format A character vector containing 349 words. 6 | #' \describe{ 7 | #' \item{test}{this is only for illustration purpose how to document data} 8 | #' \item{london}{hello whatup} 9 | #' } 10 | "shitwordlist" 11 | -------------------------------------------------------------------------------- /R/predict_troll.R: -------------------------------------------------------------------------------- 1 | #' Detect if given texts are trolls 2 | #' 3 | #' @param x a vector of text 4 | #' @param model_ a model that is passed to predict, defaults to the \code{model} 5 | #' supplied with this package 6 | #' @param mdl_data_ a model as returned by \code{\link{build_features}} (the mdl) 7 | #' containing the vectorizer, tfidf, and the lsa objects. Defaults to the 8 | #' \code{mdl_data} from this package. 9 | #' 10 | #' @return a vector with the same lengths as x that holds the predicted probabilities 11 | #' that the given text is trolling 12 | #' @export 13 | #' 14 | #' @examples 15 | #' text <- c("You suck, die!", "What a nice world we have today", "I like you", "I hate you") 16 | #' (pred <- predict_troll(text)) 17 | predict_troll <- function(x, model_ = NULL, mdl_data_ = NULL) { 18 | if (is.null(mdl_data_)) mdl_data_ <- mdl_data 19 | if (is.null(model_)) model_ <- xgboost::xgb.load(system.file("xgboost_model.buffer", 20 | package = "trollR")) 21 | 22 | model_matrix <- build_features(x, mdl = mdl_data_$mdl, term_count_min = 1, 23 | parallel = F, quiet = T) 24 | pred <- predict(model_, model_matrix$model_matrix) 25 | return(pred) 26 | } 27 | -------------------------------------------------------------------------------- /R/run_api.R: -------------------------------------------------------------------------------- 1 | #' Run the Plumber API 2 | #' 3 | #' @param ... parameters passed to \code{\link[plumber]{plumber}} 4 | #' 5 | #' @return invisible NULL 6 | #' @export 7 | #' 8 | #' @examples 9 | #' \dontrun{ 10 | #' run_api() 11 | #' # try to got to: http://127.0.0.1:8000/trollR 12 | #' # or use http://127.0.0.1:8000/trollR?text=This may be a troll comment 13 | #' } 14 | run_api <- function(port = 8000) { 15 | library(plumber) 16 | r <- plumb(system.file("plumber_api.R", package = "trollR")) 17 | 18 | message("trollR Server API up and running!") 19 | message(sprintf("Running on localhost:%s/trollR (or http://127.0.0.1:%s/trollR)", port, port)) 20 | message(sprintf("To use the API use: localhost:%s/trollR?text=hello world", port)) 21 | message("End the Server API by pressing (ESC)...") 22 | suppressMessages(r$run(port = port)) 23 | 24 | return(invisible(NULL)) 25 | } 26 | -------------------------------------------------------------------------------- /R/run_shiny.R: -------------------------------------------------------------------------------- 1 | #' Shiny App Launcher 2 | #' 3 | #' @param example name of the app, defaults to trollR 4 | #' 5 | #' @return Nothing 6 | #' @export 7 | #' 8 | #' @examples 9 | #' \dontrun{ 10 | #' run_shiny() 11 | #' } 12 | run_shiny <- function(example = "trollR") { 13 | # locate all the shiny app examples that exist 14 | validExamples <- list.files(system.file("shiny-examples", package = "trollR")) 15 | 16 | validExamplesMsg <- 17 | paste0( 18 | "Valid examples are: '", 19 | paste(validExamples, collapse = "', '"), 20 | "'") 21 | 22 | # if an invalid example is given, throw an error 23 | if (!example %in% validExamples) { 24 | stop( 25 | 'Please run `run_shiny()` with a valid example app as an argument.\n', 26 | validExamplesMsg, 27 | call. = FALSE) 28 | } 29 | 30 | # find and launch the app 31 | appDir <- system.file("shiny-examples", example, package = "trollR") 32 | shiny::runApp(appDir, display.mode = "normal") 33 | } 34 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r setup, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | # eval = FALSE, 11 | comment = "#>", 12 | fig.path = "man/figures/README-", 13 | out.width = "100%" 14 | ) 15 | library(trollR) 16 | library(xgboost) 17 | theme_set(theme_light()) 18 | ``` 19 | 20 | # trollR - Online Troll Detection using R 21 | 22 | LSE Hackathon Challenge: Detecting Online Trolling Behaviour 23 | 24 | Click [here](https://schliebs.shinyapps.io/trollR/) to try out our shiny-app. 25 | 26 | 27 | Data source: https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge/ 28 | 29 | Data description 30 | 31 | A large number of Wikipedia comments which have been labeled by human raters for toxic behavior. The types of toxicity are: 32 | 33 | - toxic 34 | - severe_toxic 35 | - obscene 36 | - threat 37 | - insult 38 | - identity_hate 39 | 40 | # Usage 41 | 42 | To install the package use 43 | ```{r, eval=F} 44 | # install.packages("devtools") 45 | devtools::install_github("schliebs/trollR") 46 | library(trollR) 47 | library(xgboost) 48 | ``` 49 | ```{r} 50 | predict_troll("Hello World - this is an example of trollR - Identifying trolling comments using R") 51 | 52 | # take some text 53 | text <- c( 54 | "I would like to point out that your comment was substandard!", 55 | "YOU SHOULD DIE!!!!", 56 | "YOU SHOULD DIE", 57 | "you should die!!!!", 58 | "you should die", 59 | "Go rot in hell", 60 | "I can also write something non-toxic -- really", 61 | "COCKSUCKER BEFORE YOU PISS AROUND ON MY WORK", 62 | "bloody hell, i forgot my purse at the pub yesterday" 63 | ) 64 | 65 | # and find how likely it is to be trolling? 66 | data_frame(text = text, troll = predict_troll(text)) %>% arrange(-troll) 67 | ``` 68 | 69 | 70 | ## Thats all? 71 | 72 | Of course not 73 | ```{r, eval=F} 74 | run_api() 75 | ``` 76 | ![](https://puu.sh/A6q4N/d0661c33be.png) 77 | 78 | Or from a terminal 79 | ```{bash, eval=F} 80 | curl "http://localhost:8000/trollR?text=You suck you cocksucker" 81 | ``` 82 | `{"text":["You suck you cocksucker"],"troll_certainty":[0.9746]}` 83 | 84 | But wait, there is more 85 | ```{r, eval=F} 86 | run_shiny() 87 | ``` 88 | ![](https://puu.sh/A6r1b/169e66db24.png) 89 | 90 | # Understanding the model 91 | 92 | ```{r plot1,eval = TRUE} 93 | # load the model 94 | #model <- xgb.load(system.file("xgboost_model.buffer", package = "trollR")) 95 | model <- xgb.load("inst/xgboost_model.buffer") 96 | 97 | df <- xgb.importance(mdl_data$model_matrix %>% colnames(), model) %>% as_data_frame() 98 | 99 | vars <- c("length", "ncap", "ncap_len", "nsen", "nexcl", "nquest", "npunct", 100 | "nword", "nsymb", "nsmile", "nslur") 101 | df %>% 102 | arrange(-Gain) %>% 103 | top_n(20, Gain) %>% 104 | mutate(Feature = reorder(Feature, Gain), 105 | Vartype = Feature %in% vars) %>% 106 | ggplot(aes(x = Feature, y = Gain, fill = Vartype)) + 107 | geom_col() + 108 | coord_flip() + 109 | labs(y = "Feature Importance in the XGBoost Model", x = "", title = "") + 110 | theme(axis.text.y = element_text(size = 15, face = "bold")) + 111 | scale_fill_brewer(palette = "Set1", guide = F) 112 | ``` 113 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | trollR - Online Troll Detection using R 4 | ======================================= 5 | 6 | LSE Hackathon Challenge: Detecting Online Trolling Behaviour 7 | 8 | Data source: 9 | 10 | Data description 11 | 12 | A large number of Wikipedia comments which have been labeled by human raters for toxic behavior. The types of toxicity are: 13 | 14 | - toxic 15 | - severe\_toxic 16 | - obscene 17 | - threat 18 | - insult 19 | - identity\_hate 20 | 21 | Usage 22 | ===== 23 | 24 | To install the package use 25 | 26 | ``` r 27 | # install.packages("devtools") 28 | devtools::install_github("schliebs/trollR") 29 | library(trollR) 30 | library(xgboost) 31 | ``` 32 | 33 | ``` r 34 | predict_troll("Hello World - this is an example of trollR - Identifying trolling comments using R") 35 | #> [1] 0.0722369 36 | 37 | # take some text 38 | text <- c( 39 | "I would like to point out that your comment was substandard!", 40 | "YOU SHOULD DIE!!!!", 41 | "YOU SHOULD DIE", 42 | "you should die!!!!", 43 | "you should die", 44 | "Go rot in hell", 45 | "I can also write something non-toxic -- really", 46 | "COCKSUCKER BEFORE YOU PISS AROUND ON MY WORK", 47 | "bloody hell, i forgot my purse at the pub yesterday" 48 | ) 49 | 50 | # and find how likely it is to be trolling? 51 | data_frame(text = text, troll = predict_troll(text)) %>% arrange(-troll) 52 | #> # A tibble: 9 x 2 53 | #> text troll 54 | #> 55 | #> 1 COCKSUCKER BEFORE YOU PISS AROUND ON MY WORK 0.972 56 | #> 2 bloody hell, i forgot my purse at the pub yesterday 0.958 57 | #> 3 Go rot in hell 0.796 58 | #> 4 you should die!!!! 0.729 59 | #> 5 YOU SHOULD DIE!!!! 0.714 60 | #> 6 YOU SHOULD DIE 0.667 61 | #> 7 you should die 0.543 62 | #> 8 I would like to point out that your comment was substandard! 0.0739 63 | #> 9 I can also write something non-toxic -- really 0.0281 64 | ``` 65 | 66 | Thats all? 67 | ---------- 68 | 69 | Of course not 70 | 71 | ``` r 72 | run_api() 73 | ``` 74 | 75 | ![](https://puu.sh/A6q4N/d0661c33be.png) 76 | 77 | Or from a terminal 78 | 79 | ``` bash 80 | curl "http://localhost:8000/trollR?text=You suck you cocksucker" 81 | ``` 82 | 83 | `{"text":["You suck you cocksucker"],"troll_certainty":[0.9746]}` 84 | 85 | But wait, there is more 86 | 87 | ``` r 88 | run_shiny() 89 | ``` 90 | 91 | ![](https://puu.sh/A6r1b/169e66db24.png) 92 | 93 | Understanding the model 94 | ======================= 95 | 96 | ``` r 97 | # load the model 98 | model <- xgb.load(system.file("xgboost_model.buffer", package = "trollR")) 99 | df <- xgb.importance(mdl_data$model_matrix %>% colnames(), model) %>% as_data_frame() 100 | 101 | vars <- c("length", "ncap", "ncap_len", "nsen", "nexcl", "nquest", "npunct", 102 | "nword", "nsymb", "nsmile", "nslur") 103 | df %>% 104 | arrange(-Gain) %>% 105 | top_n(20, Gain) %>% 106 | mutate(Feature = reorder(Feature, Gain), 107 | Vartype = Feature %in% vars) %>% 108 | ggplot(aes(x = Feature, y = Gain, fill = Vartype)) + 109 | geom_col() + 110 | coord_flip() + 111 | labs(y = "Feature Importance in the XGBoost Model", x = "", title = "") + 112 | theme(axis.text.y = element_text(size = 15, face = "bold")) + 113 | scale_fill_brewer(palette = "Set1", guide = F) 114 | ``` 115 | 116 | 117 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | navbar: 2 | title: trollR 3 | type: default 4 | left: 5 | - icon: fa-home fa-lg 6 | href: index.html 7 | - text: Get Started 8 | href: articles/trollR.html 9 | - text: Reference 10 | href: reference/index.html 11 | - text: News 12 | href: news/index.html 13 | right: 14 | - icon: fa-github fa-lg 15 | href: https://github.com/schliebs/trollR 16 | 17 | reference: 18 | - title: All functions 19 | desc: ~ 20 | contents: 21 | - '`test_function`' 22 | - title: Data 23 | desc: ~ 24 | contents: 25 | - '`shitwordlist`' 26 | 27 | 28 | -------------------------------------------------------------------------------- /data/mdl_data.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schliebs/trollR/365b65ea7f5b5af0fec2d45ead9867e40e1cec03/data/mdl_data.rda -------------------------------------------------------------------------------- /data/shitwordlist.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schliebs/trollR/365b65ea7f5b5af0fec2d45ead9867e40e1cec03/data/shitwordlist.rda -------------------------------------------------------------------------------- /docs/articles/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Articles • trollR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 |
46 |
47 | 104 | 105 | 106 |
107 | 108 |
109 |
110 | 113 | 114 |
115 |

All vignettes

116 |

117 | 118 | 122 |
123 |
124 |
125 | 126 |
127 | 130 | 131 |
132 |

Site built with pkgdown.

133 |
134 | 135 |
136 |
137 | 138 | 139 | 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /docs/articles/trollR.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Vignette Title • trollR 9 | 10 | 11 | 12 | 13 | 14 | 15 | 19 | 20 | 21 |
22 |
79 | 80 | 81 | 82 |
83 |
84 | 94 | 95 | 96 | 97 |

lorrem ipsssilalal tikitaka Vignettes are long form documentation commonly included in packages. Because they are part of the distribution of the package, they need to be as compact as possible. The html_vignette output type provides a custom style sheet (and tweaks some options) to ensure that the resulting html is as small as possible. The html_vignette format:

98 |
    99 |
  • Never uses retina figures
  • 100 |
  • Has a smaller default figure size
  • 101 |
  • Uses a custom CSS stylesheet instead of the default Twitter Bootstrap style
  • 102 |
103 |
104 |

105 | Vignette Info

106 |

Note the various macros within the vignette section of the metadata block above. These are required in order to instruct R how to build the vignette. Note that you should change the title field and the \VignetteIndexEntry to match the title of your vignette.

107 |
108 |
109 |

110 | Styles

111 |

The html_vignette template includes a basic CSS theme. To override this theme you can specify your own CSS in the document metadata as follows:

112 |
output: 
113 |   rmarkdown::html_vignette:
114 |     css: mystyles.css
115 |
116 |
117 |

118 | Figures

119 |

The figure sizes have been customised so that you can easily put two images side-by-side.

120 |
plot(1:10)
121 | plot(10:1)
122 |

123 |

You can enable figure captions by fig_caption: yes in YAML:

124 |
output:
125 |   rmarkdown::html_vignette:
126 |     fig_caption: yes
127 |

Then you can use the chunk option fig.cap = "Your figure caption." in knitr.

128 |
129 |
130 |

131 | More Examples

132 |

You can write math expressions, e.g. \(Y = X\beta + \epsilon\), footnotes1, and tables, e.g. using knitr::kable().

133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 |
mpgcyldisphpdratwtqsecvsamgearcarb
Mazda RX421.06160.01103.902.62016.460144
Mazda RX4 Wag21.06160.01103.902.87517.020144
Datsun 71022.84108.0933.852.32018.611141
Hornet 4 Drive21.46258.01103.083.21519.441031
Hornet Sportabout18.78360.01753.153.44017.020032
Valiant18.16225.01052.763.46020.221031
Duster 36014.38360.02453.213.57015.840034
Merc 240D24.44146.7623.693.19020.001042
Merc 23022.84140.8953.923.15022.901042
Merc 28019.26167.61233.923.44018.301044
291 |

Also a quote using >:

292 |
293 |

“He who gives up [code] safety for [code] speed deserves neither.” (via)

294 |
295 |
296 |
297 |
298 |
    299 |
  1. A footnote here.

  2. 300 |
301 |
302 |
303 | 304 | 316 | 317 |
318 | 319 | 320 |
323 | 324 |
325 |

Site built with pkgdown.

326 |
327 | 328 |
329 |
330 | 331 | 332 | 333 | 334 | 335 | -------------------------------------------------------------------------------- /docs/articles/trollR_files/figure-html/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schliebs/trollR/365b65ea7f5b5af0fec2d45ead9867e40e1cec03/docs/articles/trollR_files/figure-html/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /docs/articles/trollR_files/figure-html/unnamed-chunk-1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schliebs/trollR/365b65ea7f5b5af0fec2d45ead9867e40e1cec03/docs/articles/trollR_files/figure-html/unnamed-chunk-1-2.png -------------------------------------------------------------------------------- /docs/articles/vignette2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Data Import • trollR 9 | 10 | 11 | 12 | 13 | 14 | 15 | 19 | 20 | 21 |
22 |
79 | 80 | 81 | 82 |
83 |
84 | 94 | 95 | 96 | 97 |
98 |

99 | Data collection

100 |

You can use your own smartphone to track your movements. So far, the import via SportsTracker is implemented. I am working on providing you with other import options in the future. If you find other apps that work with similar file formats, feel free to create a pull request or shoot me a tweet or email.

101 |
102 |

103 | SportsTracker

104 |

Sportstracker is an application …

105 |
    106 |
  • screenshot of how to track
  • 107 |
  • screenshot of download
  • 108 |
  • screenshot ??
  • 109 |
110 |

So far, you need to do this manually and download/export every single log file as described. I am thinking about maybe writing a scraper to automize this process, but would definitely have to check with SportsTracker first to see if they allow me to.

111 |
112 |
113 |
114 |

115 | Data import

116 |

Once you have imported your log files to a folder, you can import the files into R.

117 |
118 |
119 | 120 | 134 | 135 |
136 | 137 | 138 |
141 | 142 |
143 |

Site built with pkgdown.

144 |
145 | 146 |
147 |
148 | 149 | 150 | 151 | 152 | 153 | -------------------------------------------------------------------------------- /docs/authors.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Authors • trollR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 |
46 |
47 | 92 | 93 | 94 |
95 | 96 |
97 |
98 | 101 | 102 |
    103 |
  • 104 |

    Marcel Schliebs. Author, maintainer. 105 |

    106 |
  • 107 |
  • 108 |

    David Zimmermann. Author. 109 |

    110 |
  • 111 |
  • 112 |

    Ben Thies. Author. 113 |

    114 |
  • 115 |
  • 116 |

    Divakar Kumar. Author. 117 |

    118 |
  • 119 |
  • 120 |

    Preet Parikh. Author. 121 |

    122 |
  • 123 |
  • 124 |

    Mengxi Wang. Author. 125 |

    126 |
  • 127 |
128 | 129 |
130 | 131 |
132 | 133 | 134 |
135 | 138 | 139 |
140 |

Site built with pkgdown.

141 |
142 | 143 |
144 |
145 | 146 | 147 | 148 | 149 | 150 | 151 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Detecting if a post is trolling, one post at a time • trollR 9 | 10 | 11 | 12 | 13 | 14 | 15 | 19 | 20 | 21 |
22 |
67 | 68 | 69 | 70 |
71 |
72 | 73 | 74 | 75 | 76 | 77 |
78 | 80 |

LSE Hackathon Challenge: Detecting Online Trolling Behaviour

81 |

Click here to try out our shiny-app.

82 |

Data source: https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge/

83 |

Data description

84 |

A large number of Wikipedia comments which have been labeled by human raters for toxic behavior. The types of toxicity are:

85 |
    86 |
  • toxic
  • 87 |
  • severe_toxic
  • 88 |
  • obscene
  • 89 |
  • threat
  • 90 |
  • insult
  • 91 |
  • identity_hate
  • 92 |
93 |
94 |
95 |

96 | Usage

97 |

To install the package use

98 |
# install.packages("devtools")
 99 | devtools::install_github("schliebs/trollR",
100 |                          auth_token = "6957b42653250daa253173f2b5e0f8e384a8f961")
101 | library(trollR)
102 | library(xgboost)
103 |
predict_troll("Hello World - this is an example of trollR - Identifying trolling comments using R")
104 | #> [1] 0.0722369
105 | 
106 | # take some text
107 | text <- c(
108 |   "I would like to point out that your comment was substandard!",
109 |   "YOU SHOULD DIE!!!!",
110 |   "YOU SHOULD DIE",
111 |   "you should die!!!!",
112 |   "you should die",
113 |   "Go rot in hell",
114 |   "I can also write something non-toxic -- really",
115 |   "COCKSUCKER BEFORE YOU PISS AROUND ON MY WORK",
116 |   "bloody hell, i forgot my purse at the pub yesterday"
117 | )
118 | 
119 | # and find how likely it is to be trolling?
120 | data_frame(text = text, troll = predict_troll(text)) %>% arrange(-troll)
121 | #> # A tibble: 9 x 2
122 | #>   text                                                          troll
123 | #>   <chr>                                                         <dbl>
124 | #> 1 COCKSUCKER BEFORE YOU PISS AROUND ON MY WORK                 0.972 
125 | #> 2 bloody hell, i forgot my purse at the pub yesterday          0.958 
126 | #> 3 Go rot in hell                                               0.796 
127 | #> 4 you should die!!!!                                           0.729 
128 | #> 5 YOU SHOULD DIE!!!!                                           0.714 
129 | #> 6 YOU SHOULD DIE                                               0.667 
130 | #> 7 you should die                                               0.543 
131 | #> 8 I would like to point out that your comment was substandard! 0.0739
132 | #> 9 I can also write something non-toxic -- really               0.0281
133 |
134 |

135 | Thats all?

136 |

Of course not

137 | 138 |
139 | 140 |
141 |

Or from a terminal

142 |
curl "http://localhost:8000/trollR?text=You suck you cocksucker"
143 |

{"text":["You suck you cocksucker"],"troll_certainty":[0.9746]}

144 |

But wait, there is more

145 | 146 |
147 | 148 |
149 |
150 |
151 |
152 |

153 | Understanding the model

154 |
# load the model
155 | #model <- xgb.load(system.file("xgboost_model.buffer", package = "trollR"))
156 | model <- xgb.load("inst/xgboost_model.buffer")
157 | 
158 | df <- xgb.importance(mdl_data$model_matrix %>% colnames(), model) %>% as_data_frame()
159 | 
160 | vars <- c("length", "ncap", "ncap_len", "nsen", "nexcl", "nquest", "npunct", 
161 |           "nword", "nsymb", "nsmile", "nslur")
162 | df %>% 
163 |   arrange(-Gain) %>% 
164 |   top_n(20, Gain) %>% 
165 |   mutate(Feature = reorder(Feature, Gain),
166 |          Vartype = Feature %in% vars) %>% 
167 |   ggplot(aes(x = Feature, y = Gain, fill = Vartype)) + 
168 |   geom_col() +
169 |   coord_flip() +
170 |   labs(y = "Feature Importance in the XGBoost Model", x = "", title = "") +
171 |   theme(axis.text.y = element_text(size = 15, face = "bold")) +
172 |   scale_fill_brewer(palette = "Set1", guide = F)
173 |

174 |
175 |
176 | 177 | 204 | 205 |
206 | 207 | 208 |
211 | 212 |
213 |

Site built with pkgdown.

214 |
215 | 216 |
217 |
218 | 219 | 220 | 221 | 222 | 223 | -------------------------------------------------------------------------------- /docs/jquery.sticky-kit.min.js: -------------------------------------------------------------------------------- 1 | /* 2 | Sticky-kit v1.1.2 | WTFPL | Leaf Corcoran 2015 | http://leafo.net 3 | */ 4 | (function(){var b,f;b=this.jQuery||window.jQuery;f=b(window);b.fn.stick_in_parent=function(d){var A,w,J,n,B,K,p,q,k,E,t;null==d&&(d={});t=d.sticky_class;B=d.inner_scrolling;E=d.recalc_every;k=d.parent;q=d.offset_top;p=d.spacer;w=d.bottoming;null==q&&(q=0);null==k&&(k=void 0);null==B&&(B=!0);null==t&&(t="is_stuck");A=b(document);null==w&&(w=!0);J=function(a,d,n,C,F,u,r,G){var v,H,m,D,I,c,g,x,y,z,h,l;if(!a.data("sticky_kit")){a.data("sticky_kit",!0);I=A.height();g=a.parent();null!=k&&(g=g.closest(k)); 5 | if(!g.length)throw"failed to find stick parent";v=m=!1;(h=null!=p?p&&a.closest(p):b("
"))&&h.css("position",a.css("position"));x=function(){var c,f,e;if(!G&&(I=A.height(),c=parseInt(g.css("border-top-width"),10),f=parseInt(g.css("padding-top"),10),d=parseInt(g.css("padding-bottom"),10),n=g.offset().top+c+f,C=g.height(),m&&(v=m=!1,null==p&&(a.insertAfter(h),h.detach()),a.css({position:"",top:"",width:"",bottom:""}).removeClass(t),e=!0),F=a.offset().top-(parseInt(a.css("margin-top"),10)||0)-q, 6 | u=a.outerHeight(!0),r=a.css("float"),h&&h.css({width:a.outerWidth(!0),height:u,display:a.css("display"),"vertical-align":a.css("vertical-align"),"float":r}),e))return l()};x();if(u!==C)return D=void 0,c=q,z=E,l=function(){var b,l,e,k;if(!G&&(e=!1,null!=z&&(--z,0>=z&&(z=E,x(),e=!0)),e||A.height()===I||x(),e=f.scrollTop(),null!=D&&(l=e-D),D=e,m?(w&&(k=e+u+c>C+n,v&&!k&&(v=!1,a.css({position:"fixed",bottom:"",top:c}).trigger("sticky_kit:unbottom"))),eb&&!v&&(c-=l,c=Math.max(b-u,c),c=Math.min(q,c),m&&a.css({top:c+"px"})))):e>F&&(m=!0,b={position:"fixed",top:c},b.width="border-box"===a.css("box-sizing")?a.outerWidth()+"px":a.width()+"px",a.css(b).addClass(t),null==p&&(a.after(h),"left"!==r&&"right"!==r||h.append(a)),a.trigger("sticky_kit:stick")),m&&w&&(null==k&&(k=e+u+c>C+n),!v&&k)))return v=!0,"static"===g.css("position")&&g.css({position:"relative"}), 8 | a.css({position:"absolute",bottom:d,top:"auto"}).trigger("sticky_kit:bottom")},y=function(){x();return l()},H=function(){G=!0;f.off("touchmove",l);f.off("scroll",l);f.off("resize",y);b(document.body).off("sticky_kit:recalc",y);a.off("sticky_kit:detach",H);a.removeData("sticky_kit");a.css({position:"",bottom:"",top:"",width:""});g.position("position","");if(m)return null==p&&("left"!==r&&"right"!==r||a.insertAfter(h),h.remove()),a.removeClass(t)},f.on("touchmove",l),f.on("scroll",l),f.on("resize", 9 | y),b(document.body).on("sticky_kit:recalc",y),a.on("sticky_kit:detach",H),setTimeout(l,0)}};n=0;for(K=this.length;n 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/pkgdown.css: -------------------------------------------------------------------------------- 1 | /* Sticky footer */ 2 | 3 | /** 4 | * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/ 5 | * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css 6 | * 7 | * .Site -> body > .container 8 | * .Site-content -> body > .container .row 9 | * .footer -> footer 10 | * 11 | * Key idea seems to be to ensure that .container and __all its parents__ 12 | * have height set to 100% 13 | * 14 | */ 15 | 16 | html, body { 17 | height: 100%; 18 | } 19 | 20 | body > .container { 21 | display: flex; 22 | height: 100%; 23 | flex-direction: column; 24 | 25 | padding-top: 60px; 26 | } 27 | 28 | body > .container .row { 29 | flex: 1 0 auto; 30 | } 31 | 32 | footer { 33 | margin-top: 45px; 34 | padding: 35px 0 36px; 35 | border-top: 1px solid #e5e5e5; 36 | color: #666; 37 | display: flex; 38 | flex-shrink: 0; 39 | } 40 | footer p { 41 | margin-bottom: 0; 42 | } 43 | footer div { 44 | flex: 1; 45 | } 46 | footer .pkgdown { 47 | text-align: right; 48 | } 49 | footer p { 50 | margin-bottom: 0; 51 | } 52 | 53 | img.icon { 54 | float: right; 55 | } 56 | 57 | img { 58 | max-width: 100%; 59 | } 60 | 61 | /* Typographic tweaking ---------------------------------*/ 62 | 63 | .contents h1.page-header { 64 | margin-top: calc(-60px + 1em); 65 | } 66 | 67 | /* Section anchors ---------------------------------*/ 68 | 69 | a.anchor { 70 | margin-left: -30px; 71 | display:inline-block; 72 | width: 30px; 73 | height: 30px; 74 | visibility: hidden; 75 | 76 | background-image: url(./link.svg); 77 | background-repeat: no-repeat; 78 | background-size: 20px 20px; 79 | background-position: center center; 80 | } 81 | 82 | .hasAnchor:hover a.anchor { 83 | visibility: visible; 84 | } 85 | 86 | @media (max-width: 767px) { 87 | .hasAnchor:hover a.anchor { 88 | visibility: hidden; 89 | } 90 | } 91 | 92 | 93 | /* Fixes for fixed navbar --------------------------*/ 94 | 95 | .contents h1, .contents h2, .contents h3, .contents h4 { 96 | padding-top: 60px; 97 | margin-top: -40px; 98 | } 99 | 100 | /* Static header placement on mobile devices */ 101 | @media (max-width: 767px) { 102 | .navbar-fixed-top { 103 | position: absolute; 104 | } 105 | .navbar { 106 | padding: 0; 107 | } 108 | } 109 | 110 | 111 | /* Sidebar --------------------------*/ 112 | 113 | #sidebar { 114 | margin-top: 30px; 115 | } 116 | #sidebar h2 { 117 | font-size: 1.5em; 118 | margin-top: 1em; 119 | } 120 | 121 | #sidebar h2:first-child { 122 | margin-top: 0; 123 | } 124 | 125 | #sidebar .list-unstyled li { 126 | margin-bottom: 0.5em; 127 | } 128 | 129 | .orcid { 130 | height: 16px; 131 | vertical-align: middle; 132 | } 133 | 134 | /* Reference index & topics ----------------------------------------------- */ 135 | 136 | .ref-index th {font-weight: normal;} 137 | 138 | .ref-index td {vertical-align: top;} 139 | .ref-index .alias {width: 40%;} 140 | .ref-index .title {width: 60%;} 141 | 142 | .ref-index .alias {width: 40%;} 143 | .ref-index .title {width: 60%;} 144 | 145 | .ref-arguments th {text-align: right; padding-right: 10px;} 146 | .ref-arguments th, .ref-arguments td {vertical-align: top;} 147 | .ref-arguments .name {width: 20%;} 148 | .ref-arguments .desc {width: 80%;} 149 | 150 | /* Nice scrolling for wide elements --------------------------------------- */ 151 | 152 | table { 153 | display: block; 154 | overflow: auto; 155 | } 156 | 157 | /* Syntax highlighting ---------------------------------------------------- */ 158 | 159 | pre { 160 | word-wrap: normal; 161 | word-break: normal; 162 | border: 1px solid #eee; 163 | } 164 | 165 | pre, code { 166 | background-color: #f8f8f8; 167 | color: #333; 168 | } 169 | 170 | pre code { 171 | overflow: auto; 172 | word-wrap: normal; 173 | white-space: pre; 174 | } 175 | 176 | pre .img { 177 | margin: 5px 0; 178 | } 179 | 180 | pre .img img { 181 | background-color: #fff; 182 | display: block; 183 | height: auto; 184 | } 185 | 186 | code a, pre a { 187 | color: #375f84; 188 | } 189 | 190 | a.sourceLine:hover { 191 | text-decoration: none; 192 | } 193 | 194 | .fl {color: #1514b5;} 195 | .fu {color: #000000;} /* function */ 196 | .ch,.st {color: #036a07;} /* string */ 197 | .kw {color: #264D66;} /* keyword */ 198 | .co {color: #888888;} /* comment */ 199 | 200 | .message { color: black; font-weight: bolder;} 201 | .error { color: orange; font-weight: bolder;} 202 | .warning { color: #6A0366; font-weight: bolder;} 203 | 204 | /* Clipboard --------------------------*/ 205 | 206 | .hasCopyButton { 207 | position: relative; 208 | } 209 | 210 | .btn-copy-ex { 211 | position: absolute; 212 | right: 0; 213 | top: 0; 214 | visibility: hidden; 215 | } 216 | 217 | .hasCopyButton:hover button.btn-copy-ex { 218 | visibility: visible; 219 | } 220 | 221 | /* Docsearch -------------------------------------------------------------- */ 222 | 223 | div.ds-dataset-1 { 224 | overflow: auto; 225 | max-height: 80vh; 226 | } 227 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | $("#sidebar") 4 | .stick_in_parent({offset_top: 40}) 5 | .on('sticky_kit:bottom', function(e) { 6 | $(this).parent().css('position', 'static'); 7 | }) 8 | .on('sticky_kit:unbottom', function(e) { 9 | $(this).parent().css('position', 'relative'); 10 | }); 11 | 12 | $('body').scrollspy({ 13 | target: '#sidebar', 14 | offset: 60 15 | }); 16 | 17 | $('[data-toggle="tooltip"]').tooltip(); 18 | 19 | var cur_path = paths(location.pathname); 20 | $("#navbar ul li a").each(function(index, value) { 21 | if (value.text == "Home") 22 | return; 23 | if (value.getAttribute("href") === "#") 24 | return; 25 | 26 | var path = paths(value.pathname); 27 | if (is_prefix(cur_path, path)) { 28 | // Add class to parent
  • , and enclosing
  • if in dropdown 29 | var menu_anchor = $(value); 30 | menu_anchor.parent().addClass("active"); 31 | menu_anchor.closest("li.dropdown").addClass("active"); 32 | } 33 | }); 34 | }); 35 | 36 | function paths(pathname) { 37 | var pieces = pathname.split("/"); 38 | pieces.shift(); // always starts with / 39 | 40 | var end = pieces[pieces.length - 1]; 41 | if (end === "index.html" || end === "") 42 | pieces.pop(); 43 | return(pieces); 44 | } 45 | 46 | function is_prefix(needle, haystack) { 47 | if (needle.length > haystack.lengh) 48 | return(false); 49 | 50 | // Special case for length-0 haystack, since for loop won't run 51 | if (haystack.length === 0) { 52 | return(needle.length === 0); 53 | } 54 | 55 | for (var i = 0; i < haystack.length; i++) { 56 | if (needle[i] != haystack[i]) 57 | return(false); 58 | } 59 | 60 | return(true); 61 | } 62 | 63 | /* Clipboard --------------------------*/ 64 | 65 | function changeTooltipMessage(element, msg) { 66 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 67 | element.setAttribute('data-original-title', msg); 68 | $(element).tooltip('show'); 69 | element.setAttribute('data-original-title', tooltipOriginalTitle); 70 | } 71 | 72 | if(Clipboard.isSupported()) { 73 | $(document).ready(function() { 74 | var copyButton = ""; 75 | 76 | $(".examples").addClass("hasCopyButton"); 77 | 78 | // Insert copy buttons: 79 | $(copyButton).prependTo(".hasCopyButton"); 80 | 81 | // Initialize tooltips: 82 | $('.btn-copy-ex').tooltip({container: 'body'}); 83 | 84 | // Initialize clipboard: 85 | var clipboardBtnCopies = new Clipboard('[data-clipboard-copy]', { 86 | text: function(trigger) { 87 | return trigger.parentNode.textContent; 88 | } 89 | }); 90 | 91 | clipboardBtnCopies.on('success', function(e) { 92 | changeTooltipMessage(e.trigger, 'Copied!'); 93 | e.clearSelection(); 94 | }); 95 | 96 | clipboardBtnCopies.on('error', function() { 97 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 98 | }); 99 | }); 100 | } 101 | 102 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 1.19.2.1 2 | pkgdown: 0.1.0.9000 3 | pkgdown_sha: a7aa75eeb866e0e0c891785d8d7bc0c0d2294c36 4 | articles: [] 5 | 6 | -------------------------------------------------------------------------------- /docs/reference/build_features.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Builds the feature-matrix from a text-vector — build_features • trollR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 |
    49 |
    50 | 95 | 96 | 97 |
    98 | 99 |
    100 |
    101 | 106 | 107 | 108 |

    Builds the feature-matrix from a text-vector

    109 | 110 | 111 |
    build_features(x, term_count_min = 1, mdl = NULL, parallel = TRUE,
    112 |   quiet = FALSE)
    113 | 114 |

    Arguments

    115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 126 | 127 | 128 | 129 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 |
    x

    a vector of text

    term_count_min

    a number passed to 124 | prune_vocabulary, defaults to 1. In case the function 125 | is used for training, it can and should be set to some higher value, i.e., 3.

    mdl

    is a list of existing models-data (containing the vectorizer, the 130 | tfidf, and the lsa object), defaults to NULL, in which case it is rebuild

    parallel

    T/F if the task should be executed in parallel, defaults to TRUE

    quiet

    T/F if the function remains silent, defaults to FALSE

    141 | 142 |

    Value

    143 | 144 |

    a list of two: a dgCMatrix that contains the features (columns) for 145 | each text (row) and as a second element a list of the model that can be passed 146 | as mdl

    147 | 148 | 149 |

    Examples

    150 |
    text <- c( 151 | "This is a first text that describes something", 152 | "A second Text That USES A LOT of CAPITALS", 153 | "Lastly MANY!!!! (like, really a lot!) punctuations!!!" 154 | ) 155 | 156 | build_features(text)
    #> Calculating Features... 157 | #> Create DTM... 158 | #> Finished in 5.12 seconds
    #> $model_matrix 159 | #> 3 x 21 sparse Matrix of class "dgCMatrix"
    #> [[ suppressing 21 column names ‘length’, ‘ncap’, ‘ncap_len’ ... ]]
    #> 160 | #> 1 45 1 0.02222222 . . . . 8 . . 1 1 1 . . . . . . . . 161 | #> 2 41 19 0.46341463 . . . . 9 . . . . . 1 1 1 . . . . . 162 | #> 3 53 5 0.09433962 . 8 . 11 7 . . . . . . . . 1 1 1 1 1 163 | #> 164 | #> $mdl 165 | #> $mdl$vectorizer 166 | #> function (iterator, grow_dtm, skip_grams_window_context, window_size, 167 | #> weights) 168 | #> { 169 | #> vocab_corpus_ptr = cpp_vocabulary_corpus_create(vocabulary$term, 170 | #> attr(vocabulary, "ngram")[[1]], attr(vocabulary, "ngram")[[2]], 171 | #> attr(vocabulary, "stopwords"), attr(vocabulary, "sep_ngram")) 172 | #> setattr(vocab_corpus_ptr, "ids", character(0)) 173 | #> setattr(vocab_corpus_ptr, "class", "VocabCorpus") 174 | #> corpus_insert(vocab_corpus_ptr, iterator, grow_dtm, skip_grams_window_context, 175 | #> window_size, weights) 176 | #> } 177 | #> <environment: 0x000000000652caf0> 178 | #> 179 | #>
    180 | # a second example 181 | train <- c("Banking is finance", "flowers are not houses", "finance is power", "houses are build") 182 | test <- c("finance is greed", "flowers belong in the garbage", "houses are build") 183 | 184 | a1 <- build_features(test)
    #> Calculating Features... 185 | #> Create DTM... 186 | #> Finished in 3.38 seconds
    a12 <- build_features(test, mdl = a1$mdl)
    #> Calculating Features... 187 | #> Create DTM... 188 | #> Finished in 2.99 seconds
    189 | a2 <- build_features(train, mdl = a1$mdl)
    #> Calculating Features... 190 | #> Create DTM... 191 | #> Finished in 3.07 seconds
    a2$model_matrix %>% as.matrix()
    #> length ncap ncap_len nsen nexcl nquest npunct nword nsymb nsmile greed 192 | #> 1 18 1 0.05555556 0 0 0 0 3 0 0 0 193 | #> 2 22 0 0.00000000 0 0 0 0 4 0 0 0 194 | #> 3 16 0 0.00000000 0 0 0 0 3 0 0 0 195 | #> 4 16 0 0.00000000 0 0 0 0 3 0 0 0 196 | #> financ belong garbag flower build hous 197 | #> 1 1 0 0 0 0 0 198 | #> 2 0 0 0 1 0 1 199 | #> 3 1 0 0 0 0 0 200 | #> 4 0 0 0 0 1 1
    201 |
    202 | 213 |
    214 | 215 |
    216 | 219 | 220 |
    221 |

    Site built with pkgdown.

    222 |
    223 | 224 |
    225 |
    226 | 227 | 228 | 229 | 230 | 231 | 232 | -------------------------------------------------------------------------------- /docs/reference/figures/README-plot1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schliebs/trollR/365b65ea7f5b5af0fec2d45ead9867e40e1cec03/docs/reference/figures/README-plot1-1.png -------------------------------------------------------------------------------- /docs/reference/figures/README-plot2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schliebs/trollR/365b65ea7f5b5af0fec2d45ead9867e40e1cec03/docs/reference/figures/README-plot2-1.png -------------------------------------------------------------------------------- /docs/reference/figures/README-unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schliebs/trollR/365b65ea7f5b5af0fec2d45ead9867e40e1cec03/docs/reference/figures/README-unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /docs/reference/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Function reference • trollR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 |
    46 |
    47 | 92 | 93 | 94 |
    95 | 96 |
    97 |
    98 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 115 | 116 | 117 | 118 | 121 | 122 | 123 | 124 | 127 | 128 | 129 | 130 | 133 | 134 | 135 | 136 | 139 | 140 | 141 | 142 | 145 | 146 | 147 | 148 | 149 | 153 | 154 | 155 | 156 | 159 | 160 | 161 | 162 |
    112 |

    All functions

    113 |

    114 |
    119 |

    build_features()

    120 |

    Builds the feature-matrix from a text-vector

    125 |

    predict_troll()

    126 |

    Detect if given texts are trolls

    131 |

    run_api()

    132 |

    Run the Plumber API

    137 |

    run_shiny()

    138 |

    Shiny App Launcher

    143 |

    shitwordlist

    144 |

    Shitword list

    150 |

    Data

    151 |

    152 |
    157 |

    shitwordlist

    158 |

    Shitword list

    163 |
    164 | 165 | 172 |
    173 | 174 |
    175 | 178 | 179 |
    180 |

    Site built with pkgdown.

    181 |
    182 | 183 |
    184 |
    185 | 186 | 187 | 188 | 189 | 190 | 191 | -------------------------------------------------------------------------------- /docs/reference/predict_troll.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Detect if given texts are trolls — predict_troll • trollR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 |
    49 |
    50 | 95 | 96 | 97 |
    98 | 99 |
    100 |
    101 | 106 | 107 | 108 |

    Detect if given texts are trolls

    109 | 110 | 111 |
    predict_troll(x, model_ = NULL, mdl_data_ = NULL)
    112 | 113 |

    Arguments

    114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 124 | 125 | 126 | 127 | 130 | 131 |
    x

    a vector of text

    model_

    a model that is passed to predict, defaults to the model 123 | supplied with this package

    mdl_data_

    a model as returned by build_features (the mdl) 128 | containing the vectorizer, tfidf, and the lsa objects. Defaults to the 129 | mdl_data from this package.

    132 | 133 |

    Value

    134 | 135 |

    a vector with the same lengths as x that holds the predicted probabilities 136 | that the given text is trolling

    137 | 138 | 139 |

    Examples

    140 |
    text <- c("You suck, die!", "What a nice world we have today", "I like you", "I hate you") 141 | (pred <- predict_troll(text))
    #> [1] 0.99461335 0.06459010 0.01856389 0.19459596
    142 |
    143 | 154 |
    155 | 156 |
    157 | 160 | 161 |
    162 |

    Site built with pkgdown.

    163 |
    164 | 165 |
    166 |
    167 | 168 | 169 | 170 | 171 | 172 | 173 | -------------------------------------------------------------------------------- /docs/reference/run_api.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Run the Plumber API — run_api • trollR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 |
    49 |
    50 | 95 | 96 | 97 |
    98 | 99 |
    100 |
    101 | 106 | 107 | 108 |

    Run the Plumber API

    109 | 110 | 111 |
    run_api(port = 8000)
    112 | 113 |

    Arguments

    114 | 115 | 116 | 117 | 118 | 119 | 120 |
    ...

    parameters passed to plumber

    121 | 122 |

    Value

    123 | 124 |

    invisible NULL

    125 | 126 | 127 |

    Examples

    128 |
    # NOT RUN {
    129 |   run_api()
    130 |   # try to got to: http://127.0.0.1:8000/trollR
    131 |   # or use http://127.0.0.1:8000/trollR?text=This may be a troll comment
    132 | # }
    133 |
    134 | 145 |
    146 | 147 |
    148 | 151 | 152 |
    153 |

    Site built with pkgdown.

    154 |
    155 | 156 |
    157 |
    158 | 159 | 160 | 161 | 162 | 163 | 164 | -------------------------------------------------------------------------------- /docs/reference/run_shiny.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Shiny App Launcher — run_shiny • trollR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 |
    49 |
    50 | 95 | 96 | 97 |
    98 | 99 |
    100 |
    101 | 106 | 107 | 108 |

    Shiny App Launcher

    109 | 110 | 111 |
    run_shiny(example = "trollR")
    112 | 113 |

    Arguments

    114 | 115 | 116 | 117 | 118 | 119 | 120 |
    example

    name of the app, defaults to trollR

    121 | 122 |

    Value

    123 | 124 |

    Nothing

    125 | 126 | 127 |

    Examples

    128 |
    # NOT RUN {
    129 |  run_shiny()
    130 | # }
    131 |
    132 | 143 |
    144 | 145 |
    146 | 149 | 150 |
    151 |

    Site built with pkgdown.

    152 |
    153 | 154 |
    155 |
    156 | 157 | 158 | 159 | 160 | 161 | 162 | -------------------------------------------------------------------------------- /docs/reference/shitwordlist.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Shitword list — shitwordlist • trollR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 |
    49 |
    50 | 95 | 96 | 97 |
    98 | 99 |
    100 |
    101 | 106 | 107 | 108 |

    A list of Enlish curse words scraped from https://www.noswearing.com/dictionary

    109 | 110 | 111 |
    shitwordlist
    112 | 113 |

    Format

    114 | 115 |

    A character vector containing 349 words.

    116 |
    test

    this is only for illustration purpose how to document data

    117 |
    london

    hello whatup

    118 |
    119 | 120 | 121 |
    122 | 130 |
    131 | 132 |
    133 | 136 | 137 |
    138 |

    Site built with pkgdown.

    139 |
    140 | 141 |
    142 |
    143 | 144 | 145 | 146 | 147 | 148 | 149 | -------------------------------------------------------------------------------- /docs/reference/test_function.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | The function name pipapo. — test_function • trollR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 |
    49 |
    50 | 107 | 108 | 109 |
    110 | 111 |
    112 |
    113 | 118 | 119 | 120 |

    What it does.

    121 | 122 | 123 |
    test_function(x = c("hello"), y = 2, z = c(2, 7, 9))
    124 | 125 |

    Arguments

    126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 |
    x

    A string vector.

    y

    An integer.

    z

    A numeric vector.

    141 | 142 |

    Value

    143 | 144 |

    What the function returns (eg. a data frame with n rows of v variables).

    145 | 146 |

    Warning

    147 | 148 | 149 |

    Do not operate heavy machinery within 8 hours of using this function.

    150 | 151 | 152 |

    Examples

    153 |
    test_function(x = c("hello"), 154 | y = c(2), 155 | z = c(2,7,9))
    #> [1] "hello" 156 | #> [1] "this gives you the result of y * sum(z)"
    #> [1] 36
    157 |
    158 | 171 |
    172 | 173 | 183 |
    184 | 185 | 186 | 187 | 188 | 189 | 190 | -------------------------------------------------------------------------------- /helpers/create_manual.R: -------------------------------------------------------------------------------- 1 | #!!!! https://tex.stackexchange.com/questions/125274/error-font-ts1-zi4r-at-540-not-found !!! 2 | ' 3 | devtools::document() 4 | devtools::build() 5 | 6 | pack <- "trollR" 7 | path <- "C:/Users/Schliebs/OneDrive/github/trollR" #getwd()#find.package(pack)# 8 | 9 | file.remove("C:/Users/Schliebs/OneDrive/github/trollR/trollR.pdf") 10 | system(paste(shQuote(file.path(R.home("bin"), "R")),"CMD", "Rd2pdf", shQuote(path))) 11 | 12 | pkgdown::build_site() 13 | 14 | 15 | ' 16 | 17 | 18 | -------------------------------------------------------------------------------- /helpers/scrape_swearwords.R: -------------------------------------------------------------------------------- 1 | library(rvest) 2 | library(tidyverse) 3 | 4 | url <- 'https://www.noswearing.com/dictionary' 5 | 6 | site <- 7 | read_html(url) 8 | 9 | 10 | subsites <- 11 | site %>% 12 | html_nodes(xpath = '/html/body/center/div[2]/*') %>% 13 | html_attr('href') %>% 14 | . [4:length(.)] 15 | 16 | human_wait = function(t = 2, tt = 4){ 17 | Sys.sleep(sample(seq(t, tt, by=0.001), 1)) 18 | } 19 | 20 | getCurseWords <- function(x){ 21 | 22 | page <- 23 | read_html(x) 24 | 25 | selector <- "/html/body/center/center[2]/div/table" 26 | 27 | table <- page %>% 28 | html_nodes(xpath = selector) %>% 29 | html_nodes("a") %>% 30 | html_attr("name") 31 | 32 | return(table) 33 | 34 | human_wait(t = 2,tt = 4) 35 | 36 | } 37 | 38 | result <- map(.x = subsites,.f = getCurseWords) 39 | names(result) <- c(LETTERS) 40 | 41 | shitwordlist <- 42 | unlist(result) %>% 43 | as.character() %>% 44 | .[!is.na(.)] 45 | 46 | devtools::use_data(shitwordlist,overwrite = TRUE) 47 | 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /helpers/train_model.R: -------------------------------------------------------------------------------- 1 | library(trollR) 2 | library(xgboost) 3 | 4 | train <- read_csv("../train.csv") 5 | y <- train %>% select(-id, -comment_text) %>% rowSums() %>% {. > 0} * 1 6 | 7 | # 1. Build the Features 8 | mdl_data <- build_features(train$comment_text, term_count_min = 3) 9 | usethis::use_data(mdl_data, overwrite = T) 10 | 11 | # Load or Train the Model (xgbost) 12 | 13 | # directly load the model 14 | # model <- xgb.load("inst/xgboost_model.buffer") 15 | 16 | # train the model 17 | p <- list(objective = "binary:logistic", 18 | booster = "gbtree", 19 | eval_metric = "auc", 20 | nthread = 8, 21 | eta = 0.2, 22 | max_depth = 5, 23 | min_child_weight = 5, 24 | subsample = 0.7, 25 | colsample_bytree = 0.7) 26 | 27 | model <- xgboost(mdl_data$model_matrix, y, 28 | params = p, 29 | print_every_n = 20, nrounds = 1000, 30 | early_stopping_rounds = 100) 31 | 32 | xgb.save(model, "inst/xgboost_model.buffer") 33 | 34 | # Evaluate the Performance of the Model 35 | y_pred <- 1 * (predict(model, mdl_data$model_matrix) > 0.35) 36 | caret::confusionMatrix(factor(y_pred), factor(y)) 37 | 38 | 39 | 40 | # REBUILD PACKAGE NOW! 41 | -------------------------------------------------------------------------------- /inst/plumber_api.R: -------------------------------------------------------------------------------- 1 | 2 | #* @get /trollR 3 | classify_text <- function(text = "Hello World - Sample Text"){ 4 | library(trollR) 5 | 6 | troll <- predict_troll(text) 7 | 8 | list( 9 | text = text, 10 | troll_certainty = troll 11 | ) 12 | } 13 | -------------------------------------------------------------------------------- /inst/shiny-examples/testapp/app.R: -------------------------------------------------------------------------------- 1 | library(shiny) 2 | 3 | source("modules.R") 4 | 5 | ui <- fixedPage( 6 | h2("Module example"), 7 | actionButton("insertBtn", "Insert module") 8 | ) 9 | 10 | server <- function(input, output, session) { 11 | observeEvent(input$insertBtn, { 12 | btn <- input$insertBtn 13 | insertUI( 14 | selector = "h2", 15 | where = "beforeEnd", 16 | ui = tagList( 17 | h4(paste("Module no.", btn)), 18 | linkedScatterUI(paste0("scatters", btn)), 19 | textOutput(paste0("summary", btn)) 20 | ) 21 | ) 22 | 23 | df <- callModule(linkedScatter, 24 | paste0("scatters", btn), 25 | reactive(mpg), 26 | left = reactive(c("cty", "hwy")), 27 | right = reactive(c("drv", "hwy")) 28 | ) 29 | 30 | output$summary <- renderText({ 31 | sprintf("%d observation(s) selected", 32 | nrow(dplyr::filter(df(), selected_))) 33 | }) 34 | }) 35 | } 36 | 37 | shinyApp(ui, server) 38 | -------------------------------------------------------------------------------- /inst/shiny-examples/testapp/modules.R: -------------------------------------------------------------------------------- 1 | library(shiny) 2 | library(ggplot2) 3 | 4 | linkedScatterUI <- function(id) { 5 | ns <- NS(id) 6 | 7 | fluidRow( 8 | column(6, plotOutput(ns("plot1"), brush = ns("brush"))), 9 | column(6, plotOutput(ns("plot2"), brush = ns("brush"))) 10 | ) 11 | } 12 | 13 | linkedScatter <- function(input, output, session, data, left, right) { 14 | # Yields the data frame with an additional column "selected_" 15 | # that indicates whether that observation is brushed 16 | dataWithSelection <- reactive({ 17 | brushedPoints(data(), input$brush, allRows = TRUE) 18 | }) 19 | 20 | output$plot1 <- renderPlot({ 21 | scatterPlot(dataWithSelection(), left()) 22 | }) 23 | 24 | output$plot2 <- renderPlot({ 25 | scatterPlot(dataWithSelection(), right()) 26 | }) 27 | 28 | return(dataWithSelection) 29 | } 30 | 31 | scatterPlot <- function(data, cols) { 32 | ggplot(data, aes_string(x = cols[1], y = cols[2])) + 33 | geom_point(aes(color = selected_)) + 34 | scale_color_manual(values = c("black", "#66D65C"), guide = FALSE) 35 | } 36 | -------------------------------------------------------------------------------- /inst/shiny-examples/trollR/app.R: -------------------------------------------------------------------------------- 1 | library(shinydashboard) 2 | library(tidyverse) 3 | #devtools::install_github("schliebs/trollR") 4 | # 5 | library(trollR) 6 | library(doParallel) 7 | library(Matrix) 8 | library(plumber) 9 | library(shinydashboard) 10 | library(text2vec) 11 | library(tokenizers) 12 | library(tm) 13 | library(xgboost) 14 | 15 | shuffleButton <- function(id, label = "button1"){ 16 | ns <- NS(id) 17 | tagList( 18 | actionButton(("reshuffle"), label = paste0("Classify Comment")), 19 | checkboxInput(("safespace"), label = h6("Safespace on"),value = FALSE) 20 | ) 21 | } 22 | 23 | nice <- function(input) { 24 | token <- unlist(strsplit(input, " ")) 25 | words <- token %in% shitwordlist 26 | token[words] <- gsub("[A-z]", replacement = "*", x = token[words]) 27 | paste(token, collapse = " ") 28 | } 29 | 30 | server <- function(input,output,session) { 31 | string <- eventReactive(input$reshuffle, { 32 | if(input$safespace == FALSE) input$testinput else 33 | if(input$safespace == TRUE) input$testinput %>% nice() 34 | }) 35 | 36 | prob <- eventReactive(input$reshuffle, { 37 | (predict_troll(input$testinput)*100) %>% round(1) 38 | }) 39 | 40 | output$value <- renderText({string()}) 41 | 42 | output$gaugeee <- flexdashboard::renderGauge({ 43 | flexdashboard::gauge(value = prob(), min = 0, max = 100, symbol = '%', 44 | label = "Toxicity", 45 | flexdashboard::gaugeSectors( 46 | success = c(0, 25), warning = c(25,50), danger = c(75, 100), 47 | colors = c("blue","green","red") 48 | )) 49 | }) 50 | } 51 | 52 | sbw = "30%" # CSS unit 53 | 54 | ui <- 55 | dashboardPage( 56 | dashboardHeader(titleWidth = sbw, 57 | title = "trollR - Online Trolling Detection"), 58 | dashboardSidebar(width = sbw, 59 | textAreaInput(inputId = "testinput", 60 | label = "Comment", height = "20%", width = "100%", 61 | placeholder = "Please type your comment here."), 62 | shuffleButton(id = "A") 63 | ), 64 | dashboardBody( 65 | fluidRow( 66 | box(title = "Submitted Comment", 67 | width = 8, 68 | solidHeader = TRUE, 69 | status = "primary", 70 | verbatimTextOutput("value")), 71 | box(title = "Troll Probability", 72 | height = 175, 73 | width = 4, 74 | solidHeader = TRUE, status = "primary", 75 | flexdashboard::gaugeOutput("gaugeee")) 76 | ) 77 | )#, 78 | # tags$head(tags$style("#value{color: black; 79 | # font-size: 15px; 80 | # font-style: italic; 81 | # }")) 82 | 83 | ) 84 | 85 | shinyApp(ui = ui, server = server) 86 | 87 | 88 | 89 | # Ideas: Safe-Mode 90 | 91 | -------------------------------------------------------------------------------- /inst/shiny-examples/trollR/rsconnect/shinyapps.io/schliebs/trollR.dcf: -------------------------------------------------------------------------------- 1 | name: trollR 2 | title: trollR 3 | username: 4 | account: schliebs 5 | server: shinyapps.io 6 | hostUrl: https://api.shinyapps.io/v1 7 | appId: 326140 8 | bundleId: 1327839 9 | url: https://schliebs.shinyapps.io/trollR/ 10 | when: 1524156855.20144 11 | asMultiple: FALSE 12 | asStatic: FALSE 13 | -------------------------------------------------------------------------------- /inst/xgboost_model.buffer: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schliebs/trollR/365b65ea7f5b5af0fec2d45ead9867e40e1cec03/inst/xgboost_model.buffer -------------------------------------------------------------------------------- /man/build_features.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/build_features.R 3 | \name{build_features} 4 | \alias{build_features} 5 | \title{Builds the feature-matrix from a text-vector} 6 | \usage{ 7 | build_features(x, term_count_min = 1, mdl = NULL, parallel = TRUE, 8 | quiet = FALSE) 9 | } 10 | \arguments{ 11 | \item{x}{a vector of text} 12 | 13 | \item{term_count_min}{a number passed to 14 | \code{\link[text2vec]{prune_vocabulary}}, defaults to 1. In case the function 15 | is used for training, it can and should be set to some higher value, i.e., 3.} 16 | 17 | \item{mdl}{is a list of existing models-data (containing the vectorizer, the 18 | tfidf, and the lsa object), defaults to NULL, in which case it is rebuild} 19 | 20 | \item{parallel}{T/F if the task should be executed in parallel, defaults to TRUE} 21 | 22 | \item{quiet}{T/F if the function remains silent, defaults to FALSE} 23 | } 24 | \value{ 25 | a list of two: a dgCMatrix that contains the features (columns) for 26 | each text (row) and as a second element a list of the model that can be passed 27 | as mdl 28 | } 29 | \description{ 30 | Builds the feature-matrix from a text-vector 31 | } 32 | \examples{ 33 | text <- c( 34 | "This is a first text that describes something", 35 | "A second Text That USES A LOT of CAPITALS", 36 | "Lastly MANY!!!! (like, really a lot!) punctuations!!!" 37 | ) 38 | 39 | build_features(text) 40 | 41 | # a second example 42 | train <- c("Banking is finance", "flowers are not houses", "finance is power", "houses are build") 43 | test <- c("finance is greed", "flowers belong in the garbage", "houses are build") 44 | 45 | a1 <- build_features(test) 46 | a12 <- build_features(test, mdl = a1$mdl) 47 | 48 | a2 <- build_features(train, mdl = a1$mdl) 49 | a2$model_matrix \%>\% as.matrix() 50 | } 51 | -------------------------------------------------------------------------------- /man/figures/README-plot1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schliebs/trollR/365b65ea7f5b5af0fec2d45ead9867e40e1cec03/man/figures/README-plot1-1.png -------------------------------------------------------------------------------- /man/figures/README-plot2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schliebs/trollR/365b65ea7f5b5af0fec2d45ead9867e40e1cec03/man/figures/README-plot2-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schliebs/trollR/365b65ea7f5b5af0fec2d45ead9867e40e1cec03/man/figures/README-unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /man/predict_troll.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict_troll.R 3 | \name{predict_troll} 4 | \alias{predict_troll} 5 | \title{Detect if given texts are trolls} 6 | \usage{ 7 | predict_troll(x, model_ = NULL, mdl_data_ = NULL) 8 | } 9 | \arguments{ 10 | \item{x}{a vector of text} 11 | 12 | \item{model_}{a model that is passed to predict, defaults to the \code{model} 13 | supplied with this package} 14 | 15 | \item{mdl_data_}{a model as returned by \code{\link{build_features}} (the mdl) 16 | containing the vectorizer, tfidf, and the lsa objects. Defaults to the 17 | \code{mdl_data} from this package.} 18 | } 19 | \value{ 20 | a vector with the same lengths as x that holds the predicted probabilities 21 | that the given text is trolling 22 | } 23 | \description{ 24 | Detect if given texts are trolls 25 | } 26 | \examples{ 27 | text <- c("You suck, die!", "What a nice world we have today", "I like you", "I hate you") 28 | (pred <- predict_troll(text)) 29 | } 30 | -------------------------------------------------------------------------------- /man/run_api.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/run_api.R 3 | \name{run_api} 4 | \alias{run_api} 5 | \title{Run the Plumber API} 6 | \usage{ 7 | run_api(port = 8000) 8 | } 9 | \arguments{ 10 | \item{...}{parameters passed to \code{\link[plumber]{plumber}}} 11 | } 12 | \value{ 13 | invisible NULL 14 | } 15 | \description{ 16 | Run the Plumber API 17 | } 18 | \examples{ 19 | \dontrun{ 20 | run_api() 21 | # try to got to: http://127.0.0.1:8000/trollR 22 | # or use http://127.0.0.1:8000/trollR?text=This may be a troll comment 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /man/run_shiny.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/run_shiny.R 3 | \name{run_shiny} 4 | \alias{run_shiny} 5 | \title{Shiny App Launcher} 6 | \usage{ 7 | run_shiny(example = "trollR") 8 | } 9 | \arguments{ 10 | \item{example}{name of the app, defaults to trollR} 11 | } 12 | \value{ 13 | Nothing 14 | } 15 | \description{ 16 | Shiny App Launcher 17 | } 18 | \examples{ 19 | \dontrun{ 20 | run_shiny() 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /man/shitwordlist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{shitwordlist} 5 | \alias{shitwordlist} 6 | \title{Shitword list} 7 | \format{A character vector containing 349 words. 8 | \describe{ 9 | \item{test}{this is only for illustration purpose how to document data} 10 | \item{london}{hello whatup} 11 | }} 12 | \usage{ 13 | shitwordlist 14 | } 15 | \description{ 16 | A list of Enlish curse words scraped from https://www.noswearing.com/dictionary 17 | } 18 | \keyword{datasets} 19 | -------------------------------------------------------------------------------- /trollR.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /trollR.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schliebs/trollR/365b65ea7f5b5af0fec2d45ead9867e40e1cec03/trollR.pdf --------------------------------------------------------------------------------