├── tests
    ├── test-all.r
    └── testthat
    │   ├── data
    │       ├── wn-call-exec.rds
    │       ├── wn-call-info.rds
    │       ├── wn-ciao-exec.rds
    │       ├── wn-ciao-info.rds
    │       ├── wn-nine-exec.rds
    │       ├── wn-nine-info.rds
    │       ├── wn-abcdefghi-exec.rds
    │       └── wn-abcdefghi-info.rds
    │   ├── test-exec.r
    │   ├── test-mining.r
    │   ├── test-info.r
    │   └── test-cmd.r
├── R
    ├── search.r
    ├── messages.r
    ├── flatten.r
    ├── exec.r
    ├── cmd.r
    ├── hypernyms.r
    ├── assertions.r
    ├── info.r
    ├── wnwr.r
    └── mining.r
├── .gitignore
├── NAMESPACE
├── man
    ├── extract_gloss.Rd
    ├── delete_whit.Rd
    ├── delete_with.Rd
    ├── delete_sensenum.Rd
    ├── extract_offset.Rd
    ├── extract_lexinfo.Rd
    ├── build_list.Rd
    ├── identify_senses.Rd
    ├── identify_synsets.Rd
    └── wnwr.Rd
├── NEWS
├── DESCRIPTION
└── README.md


/tests/test-all.r:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | test_check('wnwr')


--------------------------------------------------------------------------------
/R/search.r:
--------------------------------------------------------------------------------
1 | # search <- function(word) {
2 | #   
3 | # }
4 | 
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .R*
2 | *.so
3 | *.o
4 | *.dll
5 | *.Rproj
6 | *.sublime*
7 | .Rproj.user
8 | 


--------------------------------------------------------------------------------
/R/messages.r:
--------------------------------------------------------------------------------
1 | missing_message <- function(var) paste0("'", deparse(substitute(var)), "'", " arg is missing.")
2 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | S3method(print,command)
2 | S3method(print,result)
3 | export(has)
4 | export(hypernyms)
5 | export(info)
6 | 


--------------------------------------------------------------------------------
/tests/testthat/data/wn-call-exec.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leodido/wnwr/develop/tests/testthat/data/wn-call-exec.rds


--------------------------------------------------------------------------------
/tests/testthat/data/wn-call-info.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leodido/wnwr/develop/tests/testthat/data/wn-call-info.rds


--------------------------------------------------------------------------------
/tests/testthat/data/wn-ciao-exec.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leodido/wnwr/develop/tests/testthat/data/wn-ciao-exec.rds


--------------------------------------------------------------------------------
/tests/testthat/data/wn-ciao-info.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leodido/wnwr/develop/tests/testthat/data/wn-ciao-info.rds


--------------------------------------------------------------------------------
/tests/testthat/data/wn-nine-exec.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leodido/wnwr/develop/tests/testthat/data/wn-nine-exec.rds


--------------------------------------------------------------------------------
/tests/testthat/data/wn-nine-info.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leodido/wnwr/develop/tests/testthat/data/wn-nine-info.rds


--------------------------------------------------------------------------------
/tests/testthat/data/wn-abcdefghi-exec.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leodido/wnwr/develop/tests/testthat/data/wn-abcdefghi-exec.rds


--------------------------------------------------------------------------------
/tests/testthat/data/wn-abcdefghi-info.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leodido/wnwr/develop/tests/testthat/data/wn-abcdefghi-info.rds


--------------------------------------------------------------------------------
/man/extract_gloss.Rd:
--------------------------------------------------------------------------------
 1 | \name{extract_gloss}
 2 | \alias{extract_gloss}
 3 | \title{Extracts gloss from a wn string}
 4 | \usage{
 5 | extract_gloss(str)
 6 | }
 7 | \description{
 8 | It is vectorized.
 9 | }
10 | 
11 | 


--------------------------------------------------------------------------------
/man/delete_whit.Rd:
--------------------------------------------------------------------------------
 1 | \name{delete_whit}
 2 | \alias{delete_whit}
 3 | \title{Removes elements that matches regexp}
 4 | \usage{
 5 | delete_whit(str, regexp)
 6 | }
 7 | \description{
 8 | It is vectorized.
 9 | }
10 | 
11 | 


--------------------------------------------------------------------------------
/man/delete_with.Rd:
--------------------------------------------------------------------------------
 1 | \name{delete_with}
 2 | \alias{delete_with}
 3 | \title{Removes elements that matches regexp}
 4 | \usage{
 5 | delete_with(str, regexp)
 6 | }
 7 | \description{
 8 | It is vectorized.
 9 | }
10 | 
11 | 


--------------------------------------------------------------------------------
/man/delete_sensenum.Rd:
--------------------------------------------------------------------------------
 1 | \name{delete_sensenum}
 2 | \alias{delete_sensenum}
 3 | \title{Removes synset number from a wn string}
 4 | \usage{
 5 | delete_sensenum(str)
 6 | }
 7 | \description{
 8 | It is vectorized.
 9 | }
10 | 
11 | 


--------------------------------------------------------------------------------
/man/extract_offset.Rd:
--------------------------------------------------------------------------------
 1 | \name{extract_offset}
 2 | \alias{extract_offset}
 3 | \title{Extracts synset offset from a wn string}
 4 | \usage{
 5 | extract_offset(str)
 6 | }
 7 | \description{
 8 | It is vectorized.
 9 | }
10 | 
11 | 


--------------------------------------------------------------------------------
/man/extract_lexinfo.Rd:
--------------------------------------------------------------------------------
 1 | \name{extract_lexinfo}
 2 | \alias{extract_lexinfo}
 3 | \title{Extracts lexicographer information from a wn string}
 4 | \usage{
 5 | extract_lexinfo(str)
 6 | }
 7 | \description{
 8 | It is vectorized.
 9 | }
10 | 
11 | 


--------------------------------------------------------------------------------
/man/build_list.Rd:
--------------------------------------------------------------------------------
 1 | \name{build_list}
 2 | \alias{build_list}
 3 | \title{Builds a list detecting the presence of a signal}
 4 | \usage{
 5 | build_list(vect, signal = "=>\\\\s")
 6 | }
 7 | \description{
 8 | Builds a list detecting the presence of a signal
 9 | }
10 | 
11 | 


--------------------------------------------------------------------------------
/man/identify_senses.Rd:
--------------------------------------------------------------------------------
 1 | \name{identify_senses}
 2 | \alias{identify_senses}
 3 | \title{Indexes a character vector identifying the senses which its elements belong}
 4 | \usage{
 5 | identify_senses(synset_data)
 6 | }
 7 | \description{
 8 | It return an index.
 9 | }
10 | 
11 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
1 | Version 0.2.0
2 | ------------------------------------------------------------------------------
3 |   * WordNet command executor
4 |   * Word information extraction capability
5 |   
6 | Version 0.1.0
7 | ------------------------------------------------------------------------------
8 |   * Wrapper to construct WordNet commands


--------------------------------------------------------------------------------
/R/flatten.r:
--------------------------------------------------------------------------------
 1 | flatten <- function(x) {
 2 |   y <- list()
 3 |   rapply(x, function(x) y <<- c(y,x))
 4 |   return(y)
 5 | }
 6 | 
 7 | depth <- function(this, thisdepth = 0) {
 8 |   if(!is.list(this)) {
 9 |     return(thisdepth)
10 |   } else{
11 |     return(max(unlist(lapply(this, depth, thisdepth = thisdepth + 1))))   
12 |   }
13 | }
14 | 


--------------------------------------------------------------------------------
/man/identify_synsets.Rd:
--------------------------------------------------------------------------------
 1 | \name{identify_synsets}
 2 | \alias{identify_synsets}
 3 | \title{Indexes a character vector identifying the synset which its elements belong}
 4 | \usage{
 5 | identify_synsets(data,
 6 |   synsets = unlist(unname(getOption("wnwr.supported.synset.types"))))
 7 | }
 8 | \description{
 9 | It returns an index.
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: wnwr
 2 | Title: A wordnet wrapper.
 3 | Version: 0.2
 4 | Author: Leonardo Di Donato <leodidonato@gmail.com>
 5 | Maintainer: Leonardo Di Donato <leodidonato@gmail.com>
 6 | Description: A wrapper for the WordNet command (i.e., wn).
 7 | License: GPL-3
 8 | URL: https://github.com/leodido/wnwr
 9 | BugReports: https://github.com/leodido/wnwr/issues
10 | Depends:
11 |     R (>= 3.0.0),
12 |     assertthat,
13 |     wordnet,
14 |     stringr
15 | Imports:
16 |     tools
17 | Suggests:
18 |     testthat
19 | 


--------------------------------------------------------------------------------
/man/wnwr.Rd:
--------------------------------------------------------------------------------
 1 | \docType{package}
 2 | \name{wnwr}
 3 | \alias{wnwr}
 4 | \alias{wnwr-package}
 5 | \title{wnwr provides ...}
 6 | \description{
 7 | wnwr provides ...
 8 | }
 9 | \section{Package options}{
10 |   It uses the following \code{options} to configure
11 |   behaviour: \itemize{ \item
12 |   \code{wnwr.supported.search.types}: vector of supported
13 |   search types \item \code{wnwr.supported.search.opts}:
14 |   vector of supported search options \item
15 |   \code{wnwr.supported.synset.types}: vector of supported
16 |   synset types \item \code{wnwr.wn.command}: wordnet shell
17 |   command }
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/R/exec.r:
--------------------------------------------------------------------------------
 1 | exec <- function(cmd) {
 2 |   # check cmd argument
 3 |   if (missing(cmd)) stop(missing_message(cmd))
 4 |   assert_that(is_command(cmd))
 5 |   # command execution
 6 |   output <- suppressWarnings(tryCatch(system(cmd, intern = TRUE), error = I))
 7 |   if (length(output) > 0) {
 8 |     attrs <- attributes(output)
 9 |     found_senses <- attrs$status    
10 |     return(structure(list(result = paste(output, collpase = ''), num_senses = found_senses), class = c('wn', 'result')))
11 |   } else {
12 |     return(NULL)
13 |   }
14 | }
15 | 
16 | #' @export
17 | print.result <- function(x, ...) {
18 |   cat(
19 |     '* Result:\n',
20 |     paste(delete_with(x$result, '\\s*'), collapse = '\n'),
21 |     '\n* Number of senses:\n',
22 |     x$num_senses,
23 |     '\n',
24 |     sep = ''
25 |   )
26 | }
27 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | wnwr [![Analytics](https://ga-beacon.appspot.com/UA-49657176-1/wnwr)](https://github.com/igrigorik/ga-beacon)
 2 | =======
 3 | 
 4 | A <b>w</b>ord<b>n</b>et <b>wr</b>apper.
 5 | 
 6 | The current R package contains a set of functions to query the [wordnet](http://wordnet.princeton.edu) lexical database. 
 7 | 
 8 | ## Why ?
 9 | 
10 | ...
11 | 
12 | ## Installation
13 | 
14 | ...
15 | 
16 | ## Usage
17 | 
18 | ...
19 | 
20 | ### Word informations
21 | 
22 | ...
23 | 
24 | * `info(word)`
25 | 
26 | * `has(word, searchtype)`
27 | 
28 | ### Search word informations
29 | 
30 | ...
31 | 
32 | * `search(word, searchtype, option, sense)`
33 | 
34 | * `hypernyms(word, option, sense)`
35 | 
36 | ## Requirements
37 | 
38 | - R language (version >= 3.0)
39 | - The wordnet software ([download](http://wordnet.princeton.edu/wordnet/download/current-version]))
40 | 
41 | ## License
42 | 
43 | This package is released under the terms of the [GPL-3](http://opensource.org/licenses/GPL-3.0).
44 | 


--------------------------------------------------------------------------------
/tests/testthat/test-exec.r:
--------------------------------------------------------------------------------
 1 | context('Command executor')
 2 | 
 3 | test_that("errors related to the 'cmd' argument", {
 4 |   expect_that(exec("wn 'ciao'"), throws_error('not a wn command'))
 5 |   expect_that(exec(NULL), throws_error('not a wn command'))
 6 |   expect_that(exec(NA), throws_error('not a wn command'))
 7 |   expect_that(exec(NA_character_), throws_error('not a wn command'))
 8 |   expect_that(exec(1L), throws_error('not a wn command'))
 9 |   expect_that(exec(list()), throws_error('not a wn command'))
10 |   expect_that(exec(), throws_error('arg is missing'))
11 | })
12 | 
13 | test_that("command execution", {
14 |   files <- file.path('data', list.files('data', pattern = '-exec.rds$'))
15 |   matches <- regexpr('-([^-]+)-', files)
16 |   matches <- matches + 1
17 |   attr(matches, 'match.length') <- attr(matches, 'match.length') - 2
18 |   words <- regmatches(files, matches)
19 |   invisible(sapply(seq_along(files), function(i) {
20 |     expect_equal(
21 |       exec(structure(paste0("wn '", words[[i]], "'"), class = c('wn', 'command'))),
22 |       readRDS(files[[i]])
23 |     )
24 |   }))
25 | })


--------------------------------------------------------------------------------
/R/cmd.r:
--------------------------------------------------------------------------------
 1 | wn_cmd <- function(word, search, opt = NULL, sense_num = NULL, info = FALSE) {
 2 |   # check word arg
 3 |   if (missing(word)) stop(missing_message(word))
 4 |   assert_that(not_empty_string(word))
 5 |   # check if we are in word information mode or not
 6 |   if (!info) {
 7 |     # check search arg
 8 |     if (missing(search)) stop(missing_message(search))
 9 |     assert_that(not_empty_character_vector(search))
10 |     search <- match.arg(unique(search), unlist(flatten(getOption('wnwr.supported.search.types'))), several.ok = TRUE)
11 |     # check opt arg
12 |     current_opt <- as.list(environment())$opt
13 |     opt <- match.arg(opt, getOption('wnwr.supported.search.opts'))
14 |   }
15 |   # construct wn command
16 |   if (initDict()) {
17 |     cmd <- paste(getOption('wnwr.wn.command'), shQuote(word))
18 |     if (info) return(structure(cmd, class = c('wn', 'command')))
19 |     if (identical(current_opt, opt)) cmd <- paste(cmd, opt, sep = ' -')
20 |     if (!is.null(sense_num)) {
21 |       assert_that(is.count(sense_num))
22 |       cmd <- paste(cmd, sense_num, sep = ' -n')
23 |     }
24 |     return(structure(paste(cmd, substring(paste(' -', search, collapse = '', sep = ''), 2)), class = c('wn', 'command')))
25 |   } else {
26 |     stop('wordnet not found.')
27 |   }
28 | }
29 | 
30 | #' @export
31 | print.command <- function(x, ...) cat(x, '\n', sep = '')
32 | 


--------------------------------------------------------------------------------
/R/hypernyms.r:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | hypernyms <- function(word, opt = NULL, sense_num = NULL) {
 3 |   this <- as.character(match.call()[[1]])
 4 |   types <- getOption('wnwr.supported.search.types')[this][[1]]
 5 |   return(extract_tree(exec(wn_cmd(word, types, opt, sense_num))))
 6 | }
 7 | 
 8 | # TODO: extract options
 9 | # TODO: filter by sense number
10 | 
11 | extract_tree <- function(data) {
12 |   if (missing(data)) stop(missing_message(data))
13 |   if (is.null(data)) return(data)
14 |   assert_that(is_result(data))
15 |   # remove empty string elements
16 |   data <- delete_with(data$result, '\\s*')
17 |   data <- delete_with(data, '\\d+\\ssense(s)?\\sof\\s.*')
18 |   # retrieve supported synset types
19 |   synsets <- unlist(unname(getOption('wnwr.supported.synset.types')))
20 |   # identify and index synset
21 |   index <- identify_synsets(data, synsets) # FIXME: see (1) @ mining.r
22 |   # subset by index as factor
23 |   data <- setNames(
24 |     lapply(split(data, unlist(index)), function(i) {
25 |       i <- i[-1]
26 |       if (length(i) > 0) i
27 |       else NULL
28 |     }),
29 |     unique(names(index))
30 |   )
31 |   # identify senses of each synset and process results
32 |   lapply(data, function(sset) {
33 |     unname(lapply(split(sset, identify_senses(sset)), function(sense) {
34 |       sense <- sense[-1]
35 |       if (length(sense) > 0) {
36 |         build_list(str_trim(sense, side = 'right'))
37 |       } else {
38 |         NULL
39 |       }
40 |     }))
41 |   })
42 | }
43 | 


--------------------------------------------------------------------------------
/R/assertions.r:
--------------------------------------------------------------------------------
 1 | not_empty_string <- function(x) {
 2 |   assert_that(is.string(x))
 3 |   noNA(x) && all(nchar(x) > 0)
 4 | }
 5 | on_failure(not_empty_string) <- function(call, env) {
 6 |   paste0(deparse(call$x), ' is an empty string.')
 7 | }
 8 | 
 9 | not_null <- function(x) !is.null(x)
10 | on_failure(not_null) <- function(call, env) {
11 |   paste0(deparse(call$x), ' is null.')
12 | }
13 | 
14 | not_empty_character_vector <- function(x) {
15 |   assert_that(is.character(x))
16 |   noNA(x) && all(nchar(x) > 0)
17 | }
18 | on_failure(not_empty_character_vector) <- function(call, env) {
19 |   paste0(deparse(call$x), ' contains empty or missing values')
20 | }
21 | 
22 | # not_string <- function(x) !is.string(x)
23 | # on_failure(not_string) <- function(call, env) {
24 | #   paste0(deparse(call$x), ' is a string.')
25 | # }
26 | 
27 | has_class <- function(x, which) has_attr(x, 'class') && which %in% attr(x, 'class', exact = TRUE)
28 | on_failure(has_class) <- function(call, env) {
29 |   paste0(deparse(call$x), " does not have class '", eval(call$which, env), "'.")
30 | }
31 | 
32 | is_command <- function(x) is.string(x) && has_class(x, 'wn') && has_class(x, 'command')
33 | on_failure(is_command) <- function(call, env) {
34 |   paste0(deparse(call$x), " is not a wn command.")
35 | }
36 | 
37 | is_result <- function(x) is.list(x) && x %has_name% 'result' && x %has_name% 'num_senses' && has_class(x, 'wn') && has_class(x, 'result')
38 | on_failure(is_result) <- function(call, env) {
39 |   paste0(deparse(call$x), " is not a wn result.")
40 | }


--------------------------------------------------------------------------------
/R/info.r:
--------------------------------------------------------------------------------
 1 | info_cmd <- function(word) {
 2 |   wn_cmd(word, info = TRUE)
 3 | }
 4 | 
 5 | extract_info <- function(info) {
 6 |   if (missing(info)) stop(missing_message(info))
 7 |   if (is.null(info)) return(info)
 8 |   assert_that(is_result(info))
 9 |   # remove empty string elements
10 |   info <- delete_with(info$result, '\\s*')
11 |   # retrieve supported search types
12 |   types <- unlist(flatten(getOption('wnwr.supported.search.types')))
13 |   # retrieve supported synset types
14 |   synsets <- unlist(unname(getOption('wnwr.supported.synset.types')))
15 |   # identify and index synset
16 |   index <- identify_synsets(info, synsets)
17 |   # subset by index as factor
18 |   info <- setNames(
19 |     lapply(split(info, unlist(index)), function(i) {
20 |       i <- i[-1]
21 |       if (length(i) > 0) i
22 |       else NULL
23 |     }),
24 |     unique(names(index))
25 |   )
26 |   # no information detected for any synsets ?
27 |   if (length(Filter(is.null, info)) == length(synsets)) return(info)
28 |   # matching search types only in children with information (not null children)
29 |   results <- lapply(Filter(Negate(is.null), info), function(i) {
30 |     i <- sub(paste0('.*(', paste(types, collapse = '|'), ').*'), '\\1', i)
31 |     i[i %in% types]
32 |   })
33 |   return(c(results, Filter(is.null, info))[synsets])
34 | }
35 | 
36 | #' @export
37 | info <- function(word) {
38 |   out <- exec(info_cmd(word))
39 |   return(extract_info(out))
40 | }
41 | 
42 | #' @export
43 | has <- function(word, search, synset = NULL, details = FALSE) {
44 |   assert_that(is.logical(details))
45 |   info <- info(word)
46 |   if (!is.null(synset)) {
47 |     assert_that(not_empty_character_vector(synset))
48 |     synset <- match.arg(synset, unlist(unname(getOption('wnwr.supported.synset.types'))), several.ok = TRUE)
49 |     info <- info[synset]
50 |   }
51 |   result <- vapply(info, function(x) search %in% x, logical(1))
52 |   if (!details) return(any(result))
53 |   else return(result)
54 | }
55 | # FIXME: if passed more than one search type it fails!
56 | 
57 | 


--------------------------------------------------------------------------------
/R/wnwr.r:
--------------------------------------------------------------------------------
 1 | #' wnwr provides ...
 2 | #'
 3 | #' @section Package options:
 4 | #'
 5 | #' It uses the following \code{options} to configure behaviour:
 6 | #' \itemize{
 7 | #'   \item \code{wnwr.supported.search.types}: vector of supported search types
 8 | #'   \item \code{wnwr.supported.search.opts}: vector of supported search options
 9 | #'   \item \code{wnwr.supported.synset.types}: vector of supported synset types
10 | #'   \item \code{wnwr.wn.command}: wordnet shell command
11 | #' }
12 | #' @docType package
13 | #' @name wnwr
14 | NULL
15 | 
16 | search_type <- list(
17 |   antonyms        = c('antsn', 'antsv', 'antsa', 'antsr'),
18 |   hypernyms       = c('hypen', 'hypev'),                     
19 |   hyponyms        = c('treen', 'treev'), # tree
20 |   verb_entailment = c('entav'), 
21 |   synonyms        = c('synsn', 'synsv', 'synsa', 'synsr'), # ordered by estimated frequency
22 |   holonyms        = c('hholn'), # hierarchical holonyms # c('holon', 'smemn', 'ssubn', 'sprtn') => all holonyms, member, substance, part of holonyms
23 |   meronyms        = c('hmern'), # hierarchical meronyms # c('meron', membn', 'subsn', 'partn') => all meronyms, has member meronyms, has substance meronyms, has part meronyms
24 | 	cause_to        = c('causv'), 
25 |   pertainyms      = c('perta', 'pertar'),	
26 |   attributes      = c('attrn', 'attrna'),  
27 |   derived_forms   = c('derin', 'deriv'),
28 |   domain          = c('domnn', 'domnv' , 'domna', 'domnr'),
29 |   domain_terms    = c('domtn', 'domtv', 'domta', 'domtr'),
30 |   polysemy_count  = c('famln', 'famlv', 'famla', 'famlr'), # familiarity
31 |   verb_frames     = c('framv'),	 
32 |   sisters         = c('coorn', 'coorv'), # coordinate terms
33 |   synonym_groups  = c('simsv'), # grouped by similarity of meaning
34 |   compound_words  = c('grepn', 'grepv', 'grepa' ,'grepr'),
35 |   sense_overview  = c('over')
36 | )
37 | search_opt <- c('h', 'g', 'l', 'a', 'o', 's')
38 | synset_type <- list(n = 'noun', v = 'verb', a = 'adj', r = 'adv')
39 | wn_command <- 'wn'
40 | 
41 | .onLoad <- function(libname, pkgname) {
42 |   ops <- options()
43 |   wnwr_opts <- list(
44 |     wnwr.supported.search.types = search_type,
45 |     wnwr.supported.search.opts = search_opt,
46 |     wnwr.supported.synset.types = synset_type,
47 |     wnwr.wn.command = wn_command
48 |   )
49 |   to_set <- !(names(wnwr_opts) %in% names(ops))
50 |   if (any(to_set)) options(wnwr_opts[to_set])
51 |   invisible()
52 | }
53 | 


--------------------------------------------------------------------------------
/tests/testthat/test-mining.r:
--------------------------------------------------------------------------------
 1 | context('Text mining')
 2 |  
 3 | test_that("lexicographer information", {
 4 |   lex_1 <- c('<noun.animal> beef, beef cattle', '<noun.animal> cattle, cows, kine, oxen, Bos taurus', '<noun.food> meat1')
 5 |   expect_equal(
 6 |     extract_lexinfo(lex_1),
 7 |     structure(
 8 |       list(c('noun.animal', 'noun.animal', 'noun.food'), c('beef, beef cattle', 'cattle, cows, kine, oxen, Bos taurus', 'meat1')),
 9 |       .Names = c('lexinfo', 'sense')
10 |     )
11 |   )
12 |   lex_2 <- c('=> <noun.animal> beef, beef cattle', ' => <noun.animal> cattle, cows, kine, oxen, Bos taurus')
13 |   expect_equal(
14 |     extract_lexinfo(lex_2),
15 |     structure(list(c('noun.animal', 'noun.animal'), c('beef, beef cattle', 'cattle, cows, kine, oxen, Bos taurus')), .Names = c('lexinfo', 'sense'))
16 |   )
17 | })
18 | 
19 | test_that("synset offset", {
20 |   so_1 <- c('{02402425} cattle, cows, kine, oxen, Bos taurus', '{02404186} beef, beef cattle', '{00001740} entity {00001740}')
21 |   expect_equal(
22 |     extract_offset(so_1),
23 |     structure(
24 |       list(c('02402425', '02404186', '00001740'), c('cattle, cows, kine, oxen, Bos taurus', 'beef, beef cattle', 'entity {00001740}')),
25 |       .Names = c('offset', 'sense')
26 |     )
27 |   )
28 |   so_2 <- c('=> {02402425} cattle, cows, kine, oxen, Bos taurus', '=> {02404186} beef, beef cattle')
29 |   expect_equal(
30 |     extract_offset(so_2),
31 |     structure(list(c('02402425', '02404186'), c('cattle, cows, kine, oxen, Bos taurus', 'beef, beef cattle')), .Names = c('offset', 'sense'))
32 |   )
33 | })
34 | 
35 | test_that("sense number deletion", {
36 |   snd_1 <- 'ciao#1'
37 |   expect_equal(
38 |     delete_sensenum(snd_1),
39 |     'ciao'
40 |   )
41 |   snd_2 <- c('cattle#1')
42 |   expect_equal(
43 |     delete_sensenum(snd_2),
44 |     'cattle'
45 |   )
46 |   snd_3 <- c('cattle#1, cows#1, kine#1, oxen#1, Bos taurus#1', 'bovine#1', 'bovid#1')
47 |   expect_equal(
48 |     delete_sensenum(snd_3),
49 |     c('cattle, cows, kine, oxen, Bos taurus', 'bovine', 'bovid')
50 |   )
51 | })
52 | 
53 | test_that("gloss and examples", {
54 |   g_1 <- c(
55 |     'gripe, bitch, grouse, crab, beef, squawk, bellyache, holler -- (complain; "What was he hollering about?")',
56 |     'complain, kick, plain, sound off, quetch, kvetch -- (express complaints, discontent, displeasure, or unhappiness; "My mother complains all day"; "She has a lot to kick about")'
57 |   )
58 |   expect_equal(
59 |     extract_gloss(g_1),
60 |     structure(
61 |       list(
62 |         c('complain', 'express complaints, discontent, displeasure, or unhappiness'),
63 |         list('What was he hollering about?', c('My mother complains all day', 'She has a lot to kick about')),
64 |         c('gripe, bitch, grouse, crab, beef, squawk, bellyache, holler', 'complain, kick, plain, sound off, quetch, kvetch')
65 |       ),
66 |       .Names = c('gloss', 'example', 'sense')
67 |     )
68 |   )
69 | })
70 | 


--------------------------------------------------------------------------------
/R/mining.r:
--------------------------------------------------------------------------------
 1 | #' Extracts lexicographer information from a wn string
 2 | #' 
 3 | #' It is vectorized.
 4 | extract_lexinfo <- function(str) {
 5 |   locat <- str_locate(str, '<\\w+\\.\\w+>\\s')
 6 |   lexinfo <- str_trim(substr(str, locat[, 'start'], locat[, 'end']), side = 'right')
 7 |   return(list(
 8 |     lexinfo = substring(lexinfo, 2, str_length(lexinfo) - 1),
 9 |     sense = substr(str, locat[, 'end'] + 1, str_length(str))
10 |   ))
11 | }
12 | 
13 | #' Extracts gloss from a wn string
14 | #'  
15 | #' It is vectorized.
16 | extract_gloss <- function(str) {
17 |   data <- strsplit(str, ' -- ')
18 |   glosses <- str_replace_all(sapply(data, `[[`, 2), '\\(|\\)|\\\\|\\"', '')
19 |   glosses <- str_split_fixed(glosses, '; ', 2)
20 |   return(list(
21 |     gloss = glosses[, 1],
22 |     example = str_split(glosses[, 2], '; '),
23 |     sense = sapply(data, `[[`, 1)
24 |   ))
25 | }
26 | 
27 | #' Extracts synset offset from a wn string
28 | #' 
29 | #' It is vectorized.
30 | extract_offset <- function(str) {
31 |   locat <- str_locate(str, '\\{(\\d+)\\}\\s')
32 |   offset <- str_trim(substr(str, locat[, 'start'], locat[, 'end']), side = 'right')
33 |   return(list(
34 |     offset = substring(offset, 2, str_length(offset) - 1),
35 |     sense = substr(str, locat[, 'end'] + 1, str_length(str))
36 |   ))
37 | }
38 | 
39 | #' Removes synset number from a wn string
40 | #' 
41 | #' It is vectorized.
42 | delete_sensenum <- function(str) {
43 |   return(gsub('\\#\\d+', '', str))
44 | }
45 | 
46 | #' Removes elements that matches regexp
47 | #'
48 | #' It is vectorized.
49 | delete_with <- function(str, regexp) {
50 |   str[!grepl(paste0('^', regexp , '$'), str)]
51 | }
52 | 
53 | #' Indexes a character vector identifying the synset which its elements belong
54 | #' 
55 | #' It returns an index.
56 | identify_synsets <- function(data, synsets = unlist(unname(getOption('wnwr.supported.synset.types')))) {
57 |   # identify and index synset
58 |   synset_regex <- paste(synsets, collapse = '|')
59 |   flags <- grepl(synset_regex, data)
60 |   nms <- sub(paste0('.*?(', synset_regex, ').*'), '\\1', data) # non greedy match
61 |   assert_that(length(flags) == length(nms))
62 |   nms <- nms[flags]
63 |   index <- cumsum(flags)
64 |   index <- flatten(tapply(index, index, function(i) setNames(i, rep(nms[i[1]], length(i))), simplify = FALSE))
65 |   # assert_that(length(unique(index)) == length(synsets), !any(unlist(index)) == 0)
66 |   # FIXME: (1) this is a strong assumption, it assumes that each synset exists in data 
67 |   # FIXME: 1st solution: remove this check
68 |   # FIXME: 2nd solution: when calling this pass the correct set of available synsets (obtainable via has function)
69 |   return(index)
70 | }
71 | 
72 | #' Indexes a character vector identifying the senses which its elements belong
73 | #' 
74 | #' It return an index.
75 | identify_senses <- function(synset_data) {
76 |   cumsum(grepl('Sense\\s\\d+', synset_data))
77 | }
78 | 
79 | #' Builds a list detecting the presence of a signal
80 | build_list <- function(vect, signal = '=>\\s') {
81 |   num_spaces <- str_locate(vect, signal)[, 'start']
82 |   level_groups <- split(vect, num_spaces)
83 |   depths <- names(level_groups)
84 |   out <- lapply(seq_along(level_groups), function(i) {
85 |     sub(paste0('^\\s{0,', depths[[i]], '}', signal), '', level_groups[[i]])
86 |   })
87 |   if (is.na(num_spaces[[1]])) out <- c(vect[[1]], out)
88 |   out
89 | }
90 | 
91 | 


--------------------------------------------------------------------------------
/tests/testthat/test-info.r:
--------------------------------------------------------------------------------
 1 | context('Word informations')
 2 | 
 3 | synset_types <- unlist(unname(synset_type))
 4 | search_types <- unlist(flatten(search_type))
 5 | 
 6 | test_that("errors related to the 'info' argument", {
 7 |   expect_that(extract_info(), throws_error('arg is missing'))
 8 |   expect_that(extract_info(NA), throws_error('not a wn result'))
 9 |   expect_that(extract_info(list()), throws_error('not a wn result'))
10 |   expect_that(extract_info(c('')), throws_error('not a wn result'))
11 |   expect_that(extract_info('single information'), throws_error('not a wn result'))
12 |   expect_that(extract_info(NA_character_), throws_error('not a wn result'))
13 | })
14 | 
15 | test_that("information extraction", {
16 |   expect_null(extract_info(NULL))
17 |   expect_null(extract_info(c()))
18 |   # no information detected
19 |   expect_equal(
20 |     extract_info(structure(list(synset_types, 1), .Names = c('result', 'num_senses'), class = c('wn', 'result'))),
21 |     structure(vector('list', length(synset_types)), .Names = synset_types)
22 |   )
23 |   # some common cases
24 |   obj_1 <- structure(
25 |     list(
26 |       c(synset_types[1], search_types[1], synset_types[2], search_types[2], synset_types[3], search_types[3], synset_types[4], search_types[4]),
27 |       1
28 |     ),
29 |     .Names = c('result', 'num_senses'),
30 |     class = c('wn', 'result')
31 |   )
32 |   expect_equal(
33 |     extract_info(obj_1),
34 |     structure(list(search_types[1], search_types[2], search_types[3], search_types[4]), .Names = synset_types)
35 |   )
36 |   obj_2 <- structure(
37 |     list(
38 |       c(synset_types[1], search_types[10], synset_types[4], search_types[1:9], synset_types[3], search_types[11], synset_types[2]),
39 |       1
40 |     ),
41 |     .Names = c('result', 'num_senses'),
42 |     class = c('wn', 'result')
43 |   )
44 |   expect_equal(
45 |     extract_info(obj_2),
46 |     structure(list(search_types[10], NULL, search_types[11], search_types[1:9]), .Names = synset_types)
47 |   )
48 |   # some real cases
49 |   files <- file.path('data', list.files('data', pattern = '-info.rds$'))
50 |   matches <- regexpr('-([^-]+)-', files)
51 |   matches <- matches + 1
52 |   attr(matches, 'match.length') <- attr(matches, 'match.length') - 2
53 |   words <- regmatches(files, matches)
54 |   invisible(sapply(seq_along(files), function(i) {
55 |     expect_equal(
56 |       info(words[[i]]),
57 |       readRDS(files[[i]])
58 |     )
59 |   }))
60 | })
61 | 
62 | test_that("errors related to the 'word' argument", {
63 |   expect_that(info(), throws_error('arg is missing'))
64 |   expect_that(info(''), throws_error('an empty string'))
65 |   expect_that(info(NULL), throws_error('not a string'))
66 |   expect_that(info(NA), throws_error('not a string'))
67 | })
68 | 
69 | test_that("word information command syntax", {
70 |   expect_equal(info_cmd('word'), structure(paste0(wn_command, " 'word'"), class = c('wn', 'command')))
71 | })
72 | 
73 | test_that("errors related to the 'synset' argument", {
74 |   expect_that(has('ciao', 'hypen', NA), throws_error('not a character vector'))
75 |   expect_that(has('ciao', 'hypen', NA, details = TRUE), throws_error('not a character vector'))
76 |   expect_that(has('ciao', 'hypen', list()), throws_error('not a character vector'))
77 |   expect_that(has('ciao', 'hypen', c('')), throws_error('empty or missing values'))
78 |   expect_that(has('ciao', 'hypen', NA_character_), throws_error('empty or missing values'))
79 | })
80 | 
81 | test_that("has information", {
82 |   expect_true(has('ciao', 'hypen', synset = NULL))
83 |   expect_equal(
84 |     has('ciao', 'hypen', synset = NULL, details = TRUE),
85 |     structure(c(TRUE, FALSE, FALSE, FALSE), .Names = c("noun", "verb", "adj", "adv"))
86 |   )
87 |   expect_true(has('ciao', 'hypen', c()))
88 |   expect_equal(
89 |     has('ciao', 'hypen', c(), details = TRUE),
90 |     structure(c(TRUE, FALSE, FALSE, FALSE), .Names = c("noun", "verb", "adj", "adv"))
91 |   )
92 | })
93 | 


--------------------------------------------------------------------------------
/tests/testthat/test-cmd.r:
--------------------------------------------------------------------------------
  1 | context('Shell command syntax')
  2 | 
  3 | search_types <- unlist(flatten(search_type))
  4 | 
  5 | test_that("Errors related to the 'word' argument", {
  6 |   expect_that(wn_cmd(), throws_error('is missing'))
  7 |   expect_that(wn_cmd(c()), throws_error('not a string'))
  8 |   expect_that(wn_cmd(c('x', 'y')), throws_error('not a string'))
  9 |   expect_that(wn_cmd(10), throws_error('not a string'))
 10 |   expect_that(wn_cmd(NA_integer_), throws_error('not a string'))
 11 |   expect_that(wn_cmd(NULL), throws_error('not a string'))
 12 |   expect_that(wn_cmd(NA), throws_error('not a string'))
 13 |   expect_that(wn_cmd(NA_character_), throws_error('empty string'))
 14 |   expect_that(wn_cmd(c('')), throws_error('empty string'))
 15 |   expect_that(wn_cmd(''), throws_error('empty string'))
 16 | })
 17 | 
 18 | test_that("Errors related to the 'search' argument", {
 19 |   expect_that(wn_cmd('word'), throws_error('is missing'))
 20 |   expect_that(wn_cmd('word', c()), throws_error('not a character vector'))
 21 |   expect_that(wn_cmd('word', NA_integer_), throws_error('not a character vector'))
 22 |   expect_that(wn_cmd('word', NULL), throws_error('not a character vector'))
 23 |   expect_that(wn_cmd('word', NA), throws_error('not a character vector'))
 24 |   expect_that(wn_cmd('word', NA_character_), throws_error('contains empty or missing values'))
 25 |   expect_that(wn_cmd('word', ''), throws_error('contains empty or missing values'))
 26 |   expect_that(wn_cmd('word', c('')), throws_error('contains empty or missing values'))
 27 |   expect_that(wn_cmd('word', c('x')), throws_error('should be one of'))
 28 | })
 29 | 
 30 | test_search_arg <- function(x) {
 31 |   expect_equal(
 32 |     wn_cmd('word', x),
 33 |     structure(paste0(wn_command, " 'word' ", substring(paste(' -', unique(x), collapse = '', sep = ''), 2)), class = c('wn', 'command'))
 34 |   )
 35 | }
 36 | 
 37 | test_that('Supported search types', {
 38 | #   # all possible combinations
 39 | #   invisible(lapply(seq_len(length(search_types)), function(m) {
 40 | #     invisible(combn(
 41 | #       search_types,
 42 | #       m,
 43 | #       test_search_arg,
 44 | #       simplify = FALSE
 45 | #     ))
 46 | #   }))
 47 |   # one command, one search type
 48 |   invisible(combn(
 49 |     search_types,
 50 |     1,
 51 |     test_search_arg,
 52 |     simplify = FALSE
 53 |   ))
 54 |   # one command, all search types
 55 |   invisible(combn(
 56 |     search_types,
 57 |     length(search_types),
 58 |     test_search_arg,
 59 |     simplify = FALSE
 60 |   ))
 61 |   # duplicates removed
 62 |   test_search_arg(rep(search_types[3], 3))
 63 | })
 64 | 
 65 | test_that("errors related to the 'opt' argument", {
 66 |   expect_that(wn_cmd('word', search_types[1], NA), throws_error('must be NULL or a character vector'))
 67 |   expect_that(wn_cmd('word', search_types[1], 1L), throws_error('must be NULL or a character vector'))
 68 |   expect_that(wn_cmd('word', search_types[1], 'x'), throws_error('should be one of'))
 69 |   expect_that(wn_cmd('word', search_types[1], c('x')), throws_error('should be one of'))
 70 |   expect_that(wn_cmd('word', search_types[1], NA_character_), throws_error('should be one of'))
 71 |   expect_that(wn_cmd('word', search_types[1], c('x', 'y')), throws_error('must be of length 1'))
 72 | })
 73 | 
 74 | test_that("supported search options", {
 75 |   expect_equal(wn_cmd('word', search_types[1]), structure(paste0(wn_command, " 'word' -", search_types[1]), class = c('wn', 'command')))
 76 |   expect_equal(wn_cmd('word', search_types[1], NULL), structure(paste0(wn_command, " 'word' -", search_types[1]), class = c('wn', 'command')))
 77 |   # all supported options
 78 |   invisible(lapply(
 79 |     search_opt,
 80 |     function(o) expect_equal(wn_cmd('word', search_types[1], o), structure(paste0(wn_command, " 'word' -", o, " -", search_types[1]), class = c('wn', 'command')))
 81 |   ))
 82 | })
 83 | 
 84 | test_that("errors related to the 'sense_num' argument", {
 85 |   expect_that(wn_cmd('word', search_types[1], search_opt[1], letters[1:5L]), throws_error('not a count'))
 86 |   expect_that(wn_cmd('word', search_types[1], search_opt[1], 'abc'), throws_error('not a count'))
 87 |   expect_that(wn_cmd('word', search_types[1], search_opt[1], NA), throws_error('not a count'))
 88 |   expect_that(wn_cmd('word', search_types[1], search_opt[1], 1:10L), throws_error('not a count'))
 89 |   expect_that(wn_cmd('word', search_types[1], search_opt[1], NA_integer_), throws_error('missing value'))
 90 | })
 91 | 
 92 | test_that("sense number filter", {
 93 |   expect_equal(wn_cmd('word', search_types[1]), structure(paste0(wn_command, " 'word' -", search_types[1]), class = c('wn', 'command')))
 94 |   expect_equal(wn_cmd('word', search_types[1], search_opt[1]), structure(paste0(wn_command, " 'word' -", search_opt[1], " -", search_types[1]), class = c('wn', 'command')))
 95 |   expect_equal(wn_cmd('word', search_types[1], search_opt[1], NULL), structure(paste0(wn_command, " 'word' -", search_opt[1], " -", search_types[1]), class = c('wn', 'command')))
 96 |   expect_equal(wn_cmd('word', search_types[1], search_opt[1], c()), structure(paste0(wn_command, " 'word' -", search_opt[1], " -", search_types[1]), class = c('wn', 'command')))
 97 |   expect_equal(wn_cmd('word', search_types[1], sense_num = 10L), structure(paste0(wn_command, " 'word' -n", 10L, " -", search_types[1]), class = c('wn', 'command')))
 98 |   expect_equal(wn_cmd('word', search_types[1], sense_num = 1), structure(paste0(wn_command, " 'word' -n", 1, " -", search_types[1]), class = c('wn', 'command')))
 99 |   expect_equal(wn_cmd('word', search_types[1], search_opt[1], c(1)), structure(paste0(wn_command, " 'word' -", search_opt[1], " -n", 1, " -", search_types[1]), class = c('wn', 'command')))
100 | })
101 | 
102 | test_that("parameters ignored in word information mode", {
103 |   expect_equal(wn_cmd('word', info = TRUE), structure(paste0(wn_command, " 'word'"), class = c('wn', 'command')))
104 |   expect_equal(wn_cmd('word', search_types[1], info = TRUE), structure(paste0(wn_command, " 'word'"), class = c('wn', 'command')))
105 |   expect_equal(wn_cmd('word', search_types, info = TRUE), structure(paste0(wn_command, " 'word'"), class = c('wn', 'command')))
106 |   expect_equal(wn_cmd('word', search_types[1], search_opt[1], info = TRUE), structure(paste0(wn_command, " 'word'"), class = c('wn', 'command')))
107 |   expect_equal(wn_cmd('word', search_types[1], letters[1:20], info = TRUE), structure(paste0(wn_command, " 'word'"), class = c('wn', 'command')))
108 |   expect_equal(wn_cmd('word', search_types[1], search_opt[1], 2L, info = TRUE), structure(paste0(wn_command, " 'word'"), class = c('wn', 'command')))
109 |   expect_equal(wn_cmd('word', search_types[1], search_opt[1], 1:2L, info = TRUE), structure(paste0(wn_command, " 'word'"), class = c('wn', 'command')))
110 | })
111 | 


--------------------------------------------------------------------------------