├── test ├── autotest.bat ├── autotest.R ├── 1test.R ├── elmar_2.R └── smoketest.R ├── .gitignore ├── pkg ├── data │ └── edits.RData ├── tests │ ├── test_all.R │ └── testthat │ │ ├── edit_test_1.txt │ │ ├── testDuplicated.R │ │ ├── testdatamodel.R │ │ ├── testgetVars.R │ │ ├── testc.R │ │ ├── testEditRow.R │ │ ├── testIsObviouslyInfeasible.R │ │ ├── testContains.R │ │ ├── testechelon.R │ │ ├── testIsObviouslyRedundant.R │ │ ├── testIsFeasible.R │ │ ├── testFourierMotzkin.R │ │ ├── testeditmatrixAttr.R │ │ ├── testBlocks.R │ │ ├── testCheck.R │ │ ├── testeditarray.R │ │ ├── testSubstValue.R │ │ ├── testParseEdits.R │ │ ├── testErrorLocalizer.R │ │ ├── testCheckDatamodel.R │ │ ├── testEditset.R │ │ ├── testLocalizeErrors.R │ │ └── testeditmatrix.R ├── R │ ├── editrules-data.R │ ├── perturbWeights.R │ ├── list2env.R │ ├── zzz.R │ ├── is.R │ ├── str.R │ ├── as.matrix.R │ ├── removeRedundant.R │ ├── getUpperBounds.R │ ├── duplicated.R │ ├── editAttr.R │ ├── getH.R │ ├── isSubset.R │ ├── isFeasible.R │ ├── as.igraph.R │ ├── editarrayAttr.R │ ├── parseEdits.R │ ├── c.R │ ├── echelon.R │ ├── editfile.R │ ├── blocks.R │ ├── expandEdits.R │ ├── checkRows.R │ ├── parseNum.R │ ├── isObviouslyInfeasible.R │ ├── getVars.R │ ├── parseCat.R │ ├── editmatrixAttr.R │ ├── subsetting.R │ ├── adjacency.R │ ├── checkDatamodel.R │ ├── print.R │ ├── generateEdits.R │ ├── softEdits.R │ ├── mip.R │ ├── parseMix.R │ ├── disjunct.R │ ├── isObviouslyRedundant.R │ ├── pkg.R │ ├── writeELAsMip.R │ └── reduce.R ├── inst │ └── script │ │ ├── edits │ │ ├── mixedits.R │ │ └── myedits.txt │ │ └── bench │ │ ├── randomEdits.R │ │ ├── benchMIP.R │ │ ├── benchmip_categorical.R │ │ ├── benchmip_mixed2.R │ │ ├── benchmip_balance.R │ │ ├── benchmip_mixed.R │ │ └── benchAB.R ├── vignettes │ └── editrules-vignette.Rnw └── DESCRIPTION ├── papers ├── DeJongeVanderLoo2011.pdf ├── DeJongeVanderLoo2011-2.pdf ├── sweave.r ├── .gitignore ├── buildmip.bash ├── buildVignette.bash ├── tex │ ├── errorLocation.tex │ └── smalltree.sty ├── mySweave.sty ├── index.html ├── fig │ └── tree.tex └── pic.R ├── document.bash ├── examples ├── impliedValues.R ├── getVars.R ├── dnf.R ├── listViolatedEdits.R ├── expandEdits.R ├── datamodel.R ├── generateEdits.R ├── separate.R ├── blocks.R ├── backtracker.R ├── editset.R ├── checkRows.R ├── editmatrix.R ├── editmatrixAttr.R ├── violatedEdits.R ├── substValue.R ├── editarray.R ├── graph.R ├── errorLocalizer.R ├── eliminate.R └── localizeErrors.R ├── roxygen.R ├── editrules.Rproj ├── check.bash ├── .travis.yml ├── develop ├── generateEM.R ├── categorical │ └── tst.R └── errorLocalizer_alt.R └── README.md /test/autotest.bat: -------------------------------------------------------------------------------- 1 | Rscript autotest.R -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /output 2 | .Rproj.user 3 | .Rhistory 4 | /pkg/man 5 | .DS_Store 6 | 7 | 8 | -------------------------------------------------------------------------------- /pkg/data/edits.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-cleaning/editrules/HEAD/pkg/data/edits.RData -------------------------------------------------------------------------------- /test/autotest.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | 3 | 4 | auto_test("../pkg/R", "../pkg/inst/tests") 5 | 6 | -------------------------------------------------------------------------------- /pkg/tests/test_all.R: -------------------------------------------------------------------------------- 1 | #library(editrules) 2 | 3 | #if (require(testthat)){ 4 | # test_package("editrules") 5 | #} 6 | -------------------------------------------------------------------------------- /papers/DeJongeVanderLoo2011.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-cleaning/editrules/HEAD/papers/DeJongeVanderLoo2011.pdf -------------------------------------------------------------------------------- /pkg/tests/testthat/edit_test_1.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | y <= b*x 5 | 6 | b <- 10 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /papers/DeJongeVanderLoo2011-2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-cleaning/editrules/HEAD/papers/DeJongeVanderLoo2011-2.pdf -------------------------------------------------------------------------------- /papers/sweave.r: -------------------------------------------------------------------------------- 1 | options(error=traceback) 2 | Sweave("editrules-linear.Rnw") 3 | Sweave("editrules-categorical.Rnw") 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /document.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | R -e 'devtools::document("./pkg")' 4 | R CMD Rd2pdf --force --no-preview -o manual.pdf ./pkg 5 | 6 | 7 | -------------------------------------------------------------------------------- /examples/impliedValues.R: -------------------------------------------------------------------------------- 1 | e <- editmatrix(expression( 2 | x <= 0, 3 | x >= 0, 4 | x + y == 3 5 | )) 6 | 7 | impliedValues(e) 8 | 9 | -------------------------------------------------------------------------------- /papers/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.log 3 | *.aux 4 | *.bbl 5 | *.blg 6 | *.loa 7 | *.log 8 | *.tex 9 | *.toc 10 | *.aux 11 | *.bbl 12 | *.blg 13 | *.idx 14 | *.ilg 15 | *.ind -------------------------------------------------------------------------------- /papers/buildmip.bash: -------------------------------------------------------------------------------- 1 | R -e "knitr::knit('editrules-as-mip.Rnw')" 2 | xelatex editrules-as-mip.tex 3 | bibtex editrules-as-mip 4 | xelatex editrules-as-mip.tex 5 | xelatex editrules-as-mip.tex 6 | 7 | -------------------------------------------------------------------------------- /pkg/R/editrules-data.R: -------------------------------------------------------------------------------- 1 | #' Some example editrules 2 | #' 3 | #' 4 | #' @title Example editrules, used in vignette 5 | #' @usage data(edits) 6 | #' @name edits 7 | #' @docType data 8 | #' @keywords data 9 | {} 10 | 11 | 12 | -------------------------------------------------------------------------------- /pkg/R/perturbWeights.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | # add perturbations to weights, without changing the sort order of nonunique 6 | # elements 7 | # 8 | # 9 | perturbWeights <- function(x){ 10 | d <- diff(sort(x)) 11 | p <- min(x, d[d > 0]) / 100 12 | x + runif(length(x), max=p) 13 | } 14 | 15 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testDuplicated.R: -------------------------------------------------------------------------------- 1 | 2 | context("duplicated") 3 | test_that("duplicated.editmatrix works",{ 4 | 5 | expect_equal( 6 | duplicated.editmatrix(editmatrix(expression( 7 | x < 0, x <= 0, y < 0))), 8 | c(FALSE,FALSE,FALSE) 9 | ) 10 | }) 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /examples/getVars.R: -------------------------------------------------------------------------------- 1 | 2 | E <- editmatrix(c( "x+3*y == 2*z" 3 | , "x > 2") 4 | ) 5 | getVars(E) 6 | 7 | E <- editarray(expression( 8 | gender %in% c('male','female'), 9 | pregnant %in% c(TRUE, FALSE), 10 | if( gender == 'male' ) pregnant == FALSE 11 | ) 12 | ) 13 | 14 | getVars(E) 15 | 16 | -------------------------------------------------------------------------------- /roxygen.R: -------------------------------------------------------------------------------- 1 | library(roxygen2) 2 | options(error=traceback) 3 | unlink( 'pkg/man', TRUE) 4 | 5 | setwd('pkg') 6 | roxygenize( '.' 7 | , roxygen.dir='.' 8 | , copy.package=FALSE 9 | , unlink.target=TRUE 10 | ) 11 | 12 | if (length(list.files('inst/doc')) == 0){ 13 | unlink( 'inst/doc', TRUE) 14 | } 15 | -------------------------------------------------------------------------------- /examples/dnf.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | E <- editset(expression( 4 | x + y == z, 5 | if ( x > 0 ) y > 0, 6 | x >= 0, 7 | y >= 0, 8 | z >= 0, 9 | A %in% letters[1:4], 10 | B %in% letters[1:4], 11 | if (A %in% c('a','b')) y > 0, 12 | if (A == 'c' ) B %in% letters[1:3] 13 | )) 14 | 15 | disjunct(E) 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /papers/buildVignette.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # usage: buildVignette.bash linear 4 | # buildVignette.bash categorical 5 | 6 | arg=\'editrules-$1\' 7 | 8 | 9 | R -e "Sweave($arg)" 10 | pdflatex editrules-$1.tex 11 | bibtex editrules-$1 12 | makeindex editrules-$1.idx 13 | pdflatex editrules-$1.tex 14 | pdflatex editrules-$1.tex 15 | 16 | 17 | -------------------------------------------------------------------------------- /pkg/R/list2env.R: -------------------------------------------------------------------------------- 1 | # legacy function to keep running under R<=2.10 2 | if (!exists("list2env")){ 3 | list2env <- function(x, envir=NULL, parent=parent.frame()){ 4 | if (is.null(envir)) 5 | envir <- new.env(parent=parent) 6 | for (i in names(x)){ 7 | envir[[i]] <- x[[i]] 8 | } 9 | envir 10 | } 11 | } 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /examples/listViolatedEdits.R: -------------------------------------------------------------------------------- 1 | # Using character vector to define contraints 2 | E <- editmatrix(editrules=c("x+3*y==2*z", "x==z")) 3 | print(E) 4 | 5 | dat <- data.frame( x = c(0,2,1) 6 | , y = c(0,0,1) 7 | , z = c(0,1,1) 8 | ) 9 | # valid rows? 10 | valid <- checkRows(E, dat) 11 | invalid <- dat[!valid,] 12 | listViolatedEdits(E,invalid) 13 | -------------------------------------------------------------------------------- /examples/expandEdits.R: -------------------------------------------------------------------------------- 1 | 2 | expandEdits("x_i > 0", i = 1:10) 3 | 4 | expandEdits("#var > 0", prefix="#", var = c("turnover", "profit", "employees")) 5 | 6 | expandEdits("x_i < y_j", i=1:3,j=2:3) 7 | 8 | #using a variable set in the parent environment 9 | i <- 1:5 10 | expandEdits("sum_i(x_i) == y") 11 | 12 | expandEdits("sum_month(x__year._month) == x__year", month=month.abb, year=2009:2011) 13 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testdatamodel.R: -------------------------------------------------------------------------------- 1 | require(testthat) 2 | context("datamodel") 3 | 4 | test_that("datamodel works",{ 5 | expect_identical( 6 | datamodel(editarray(c( 7 | "x %in% c('a','b')", 8 | "y %in% c('x','y')" 9 | ))), 10 | data.frame(variable = c('x','x','y','y'),value=c('a','b','x','y')) 11 | ) 12 | }) 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /examples/datamodel.R: -------------------------------------------------------------------------------- 1 | 2 | E <- editarray(expression( 3 | age %in% c('under aged','adult'), 4 | positionInHouseholda %in% c('marriage partner', 'child', 'other'), 5 | maritalStatus %in% c('unmarried','married','widowed','divorced'), 6 | if (maritalStatus %in% c('married','widowed','divorced') ) positionInHousehold != 'child', 7 | if ( age == 'under aged') maritalStatus == 'unmarried' 8 | ) 9 | ) 10 | datamodel(E) 11 | -------------------------------------------------------------------------------- /editrules.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: XeLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackagePath: pkg 18 | PackageInstallArgs: --no-multiarch --with-keep.source 19 | PackageRoxygenize: rd,namespace 20 | -------------------------------------------------------------------------------- /check.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "" 4 | echo "######## Removing "editrules" installation..." 5 | echo "" 6 | R CMD REMOVE editrules 7 | 8 | echo "" 9 | echo "######## Install "editrules" from output directory..." 10 | echo "" 11 | cd output 12 | for x in *.tar.gz 13 | do 14 | R CMD INSTALL $x 15 | done 16 | 17 | #cd ../../deducorrect 18 | #echo "" 19 | #echo "######## Building and checking deducorrect..." 20 | #echo "" 21 | #bash build.bash 22 | -------------------------------------------------------------------------------- /pkg/inst/script/edits/mixedits.R: -------------------------------------------------------------------------------- 1 | # 2 | # a bunch of mixed edits 3 | 4 | # define datamodels 5 | A %in% letters[1:6] 6 | B %in% letters[7:10] 7 | C %in% letters[11:20] 8 | 9 | # some purely categorical stuff 10 | if ( A %in% c('a','d') ) B %in% c('g','h') 11 | if ( A == 'b' ) C %in% letters[12:18] 12 | 13 | # some mixed numeric... 14 | if ( C == 'k' ) x > 0 15 | if ( y > 0 ) x + y >= z 16 | 17 | # some pure numeric 18 | u + v == w 19 | 2*v + 3*s == t 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /examples/generateEdits.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | E <- editarray(expression( 4 | a %in% letters[1:4], 5 | b %in% letters[5:8], 6 | if ( a %in% c('a','b') ) b %in% c('e','f'), 7 | if ( a %in% c('c','d') ) b %in% c('h') 8 | )) 9 | 10 | generateEdits(E) 11 | 12 | ## Not run 13 | # load 60 edits (36 variables) from demonstration file 14 | E <- editfile(system.file('script/bench/edits.R',package='editrules'),type='cat') 15 | F <- generateEdits(E) 16 | 17 | summary(F$edits) 18 | F$nodes 19 | F$dudation 20 | 21 | ## End(Not run) 22 | -------------------------------------------------------------------------------- /examples/separate.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | E <- editset(expression( 4 | x + y == z, 5 | 2*u + 0.5*v == 3*w, 6 | w >= 0, 7 | if ( x > 0 ) y > 0, 8 | x >= 0, 9 | y >= 0, 10 | z >= 0, 11 | A %in% letters[1:4], 12 | B %in% letters[1:4], 13 | C %in% c(TRUE,FALSE), 14 | D %in% letters[5:8], 15 | if ( A %in% c('a','b') ) y > 0, 16 | if ( A == 'c' ) B %in% letters[1:3], 17 | if ( !C == TRUE) D %in% c('e','f') 18 | )) 19 | 20 | (L <- separate(E)) 21 | 22 | sapply(L,class) 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testgetVars.R: -------------------------------------------------------------------------------- 1 | 2 | require(testthat) 3 | 4 | context("getVars") 5 | 6 | test_that("getVars.editmatrix works",{ 7 | expect_identical( 8 | getVars(editmatrix(c( "x+3*y == 2*z", "x > 2"))), 9 | c("x","y","z") 10 | ) 11 | }) 12 | 13 | test_that("getVars.editarray conforms to type argument",{ 14 | expect_true(is.null( 15 | getVars( 16 | editarray(expression(if ( A == 'a') B %in% letters[1:3])), 17 | type='num' 18 | ) 19 | )) 20 | }) 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /pkg/vignettes/editrules-vignette.Rnw: -------------------------------------------------------------------------------- 1 | %\VignetteIndexEntry{editrules-vignette} 2 | \documentclass[11pt, fleqn, a4paper]{article} 3 | 4 | \title{The editrules Vignette\\ 5 | {\small Package version \Sexpr{packageVersion("editrules")}} 6 | } 7 | \date{\today} 8 | 9 | \author{Edwin de Jonge and Mark van der Loo} 10 | 11 | \begin{document} 12 | \maketitle 13 | At the moment, this is only a stub of a vignette. Please refer to the 14 | working papers included in the package's {\tt inst/doc} directory for 15 | theory and examples. 16 | 17 | 18 | \end{document} 19 | 20 | -------------------------------------------------------------------------------- /pkg/inst/script/edits/myedits.txt: -------------------------------------------------------------------------------- 1 | 2 | # define category domains 3 | BOOL <- c(TRUE,FALSE) 4 | OPTIONS <- letters[1:4] 5 | 6 | # numerical edits 7 | x + y == z 8 | 2*u + 0.5*v == 3*w 9 | w >= 0 10 | if ( x > 0 ) y > 0 11 | if ( x > y ) z < 10 12 | x >= 0 13 | y >= 0 14 | z >= 0 15 | 16 | # categorical edits 17 | A %in% OPTIONS 18 | B %in% OPTIONS 19 | C %in% BOOL 20 | D %in% letters[5:8] 21 | if ( A %in% c('a','b') ) y > 0 22 | if ( A == 'c' ) B %in% letters[1:3] 23 | if ( !C == TRUE) D %in% c('e','f') 24 | -------------------------------------------------------------------------------- /pkg/R/zzz.R: -------------------------------------------------------------------------------- 1 | 2 | .onAttach <- function(libname,pkgname){ 3 | # msg <- "" 4 | # nfc <- "Use suppressPackageStartupMessages(library(editrules)) to suppress this message on loading editrules." 5 | # packageStartupMessage(msg) 6 | # packageStartupMessage(nfc) 7 | 8 | # default lpSolveAPI options 9 | options(er.lpcontrol = 10 | list( 11 | presolve = "rows" # move univariate constraints into bounds 12 | , epsint = 1e-15 13 | # , epssel = 1e-15 14 | # , epsb = 1e-15 15 | # , epsd = 1e-15 16 | , epspivot = 1e-15 17 | ) 18 | ) 19 | } 20 | 21 | 22 | -------------------------------------------------------------------------------- /test/1test.R: -------------------------------------------------------------------------------- 1 | # load sources 2 | src <- sapply(list.files("../pkg/R", full.names=TRUE), source) 3 | 4 | # load demos 5 | examples <- sapply(list.files("../examples", full.names=TRUE), source) 6 | 7 | x <- editmatrix(c( "x ==2" 8 | , "x - 2 > y + 1" 9 | , "x>1" 10 | , "y>-2" 11 | ,"-2>x" 12 | ,"-10 < -2" 13 | ) 14 | ) 15 | 16 | 17 | x 18 | # dat <- data.frame(x=1:2, y=3:2) 19 | 20 | # editrules(x) 21 | # checkRows(x,dat) 22 | # checkRows(as.character(x),dat) 23 | # checkRows(editrules(x),dat) 24 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | 2 | # travis config 3 | 4 | dist: trusty 5 | language: r 6 | sudo: required 7 | 8 | addons: 9 | apt: 10 | packages: 11 | - libxml2-dev 12 | 13 | r: 14 | - release 15 | 16 | before_install: 17 | - R -e "install.packages(c('igraph','devtools','roxygen2','testthat','knitr','lpSolveAPI'))" 18 | - R -e "devtools::document('./pkg')" 19 | - cd ./pkg 20 | 21 | r_packages: 22 | - covr 23 | - rmarkdown 24 | 25 | 26 | after_success: 27 | - Rscript -e 'library(covr);coveralls()' 28 | 29 | notifications: 30 | email: 31 | on_success: change 32 | on_failure: change 33 | 34 | 35 | -------------------------------------------------------------------------------- /pkg/R/is.R: -------------------------------------------------------------------------------- 1 | #' Check object class 2 | #' @name is.editrules 3 | #' @aliases is.editset is.editmatrix is.editarray 4 | #' @param x object to be checked 5 | #' @return \code{logical} 6 | #' 7 | {} 8 | 9 | #' @export 10 | #' @rdname is.editrules 11 | is.editset <- function(x) inherits(x,"editset") 12 | 13 | #' @export 14 | #' @rdname is.editrules 15 | is.editmatrix <- function(x) return(inherits(x, "editmatrix")) 16 | 17 | is.cateditmatrix <- function(x){ 18 | return(inherits(x, "cateditmatrix")) 19 | } 20 | 21 | #' @export 22 | #' @rdname is.editrules 23 | is.editarray <- function(x) inherits(x,"editarray") 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testc.R: -------------------------------------------------------------------------------- 1 | context("Concatenating") 2 | 3 | test_that("concatenate editmatrix",{ 4 | 5 | E1 <- editmatrix("x1 + x2 > 0") 6 | E2 <- editmatrix("x2 + x3 < 10") 7 | 8 | E <- editmatrix(c("x1 + x2 > 0", "x2 + x3 < 10")) 9 | #print(c.editmatrix(E1,E2)) 10 | #print(E) 11 | expect_equivalent(E, c(E1,E2)) 12 | e <- editmatrix(expression()) 13 | expect_equivalent(nedits(c(e,e)),0) 14 | expect_equivalent(nedits(c(e,NULL)),0) 15 | }) 16 | 17 | test_that("concatenate editmatrix",{ 18 | E <- editmatrix(c("x1 + x2 > 0", "x2 + x3 < 10")) 19 | expect_equivalent(E, c(E)) 20 | }) 21 | 22 | 23 | -------------------------------------------------------------------------------- /examples/blocks.R: -------------------------------------------------------------------------------- 1 | # three seperate blocks 2 | E <- editmatrix( expression( 3 | x1 + x2 == x3, 4 | x3 + x4 == x5, 5 | x5 + x6 == x7, 6 | y1 + y2 == y3, 7 | z1 + z2 == z3 8 | )) 9 | blocks(E) 10 | 11 | # four seperate blocks 12 | E <- editmatrix(expression( 13 | x1 + x2 == x3, 14 | x3 + x4 == x5, 15 | x8 + x6 == x7, 16 | y1 + y2 == y3, 17 | z1 + z2 == z3 18 | )) 19 | blocks(E) 20 | 21 | # two categorical blocks 22 | E <- editarray(expression( 23 | x %in% c('a','b','c'), 24 | y %in% c('d','e'), 25 | z %in% c('f','g'), 26 | u %in% c('w','t'), 27 | if ( x == 'a') y != 'd', 28 | if ( z == 'f') u != 'w' 29 | )) 30 | blocks(E) 31 | 32 | 33 | -------------------------------------------------------------------------------- /examples/backtracker.R: -------------------------------------------------------------------------------- 1 | 2 | bt <- backtracker( isSolution= { 3 | if (y == 0) return(TRUE) 4 | if (x == 0) return(FALSE) 5 | } 6 | , choiceLeft = { x <- x - 1; y <- y} 7 | , choiceRight = { y <- y - 1; x <- x} 8 | # starting values for x and y 9 | , x=2 10 | , y=1 11 | ) 12 | 13 | bt$searchNext(VERBOSE=TRUE) 14 | bt$searchNext(VERBOSE=TRUE) 15 | 16 | # next search will return NULL because there is no more solution 17 | bt$searchNext() 18 | 19 | bt$reset() 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /examples/editset.R: -------------------------------------------------------------------------------- 1 | 2 | # edits can be read from a vector of expressions 3 | E <- editset(expression( 4 | if ( x > 0 ) y > 0, 5 | x + y == z, 6 | A %in% letters[1:2], 7 | B %in% letters[2:3], 8 | if ( A == 'a') B == 'b', 9 | if ( A == 'b') x >= 0, 10 | u + v == w, 11 | if ( u >= 0 ) w >= 0 12 | )) 13 | E 14 | summary(E) 15 | as.data.frame(E) 16 | getVars(E) 17 | getVars(E,type='cat') 18 | getVars(E,type='num') 19 | 20 | 21 | ## see also editfile 22 | E <- editfile(system.file('script/edits/mixedits.R',package='editrules')) 23 | E 24 | summary(E) 25 | as.data.frame(E) 26 | getVars(E) 27 | getVars(E,type='cat') 28 | getVars(E,type='num') 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testEditRow.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | 3 | context("Linear editrow derivarions") 4 | 5 | test_that("Various rows work",{ 6 | edt <- parse(text=c("x==y", "x+w ==y")) 7 | e <- parseNum(edt[[1]]) 8 | #print(e) 9 | }) 10 | 11 | test_that("Parsing a constant works",{ 12 | edt <- parse(text=c("x < 2")) 13 | e <- parseNum(edt[[1]]) 14 | #print(e) 15 | }) 16 | 17 | test_that("Parsing a inequality works",{ 18 | edt <- parse(text=c("x > 2")) 19 | e <- parseNum(edt[[1]]) 20 | #print(e) 21 | }) 22 | 23 | test_that("Parsing a negative coefficient works",{ 24 | edt <- parse(text=c("x == -2")) 25 | #e <- makeEditRow(edt[[1]]) 26 | #print(e) 27 | }) 28 | -------------------------------------------------------------------------------- /examples/checkRows.R: -------------------------------------------------------------------------------- 1 | # Using character vector to define contraints 2 | E <- editmatrix(c( "x+3*y==2*z", 3 | "x==z")) 4 | print(E) 5 | 6 | dat <- data.frame( x = c(0,2,1) 7 | , y = c(0,0,1) 8 | , z = c(0,1,1) 9 | ) 10 | # valid rows? editmatrix method 11 | checkRows(E, dat) 12 | 13 | 14 | # example using the character method. 15 | data(women) 16 | e <- c("height<=71", "weight/height>=2.1") 17 | valid <- checkRows(e,women) 18 | women[valid,] 19 | 20 | # same example, using data.frame method 21 | ef <- data.frame( 22 | name=c("rule 1", "rule 2"), 23 | edit=e, 24 | description=c("descr1","descr2")) 25 | checkRows(ef,women) 26 | -------------------------------------------------------------------------------- /pkg/R/str.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' @method str editmatrix 4 | #' @rdname editmatrix 5 | #' @export 6 | str.editmatrix <- function(object,...){ 7 | vars <- paste(getVars(object),collapse=", ") 8 | if (nchar(vars) > 20 ) vars <- paste(strtrim(vars,16),"...") 9 | cat(paste("editmatrix with", nrow(object), "edits containing variables",vars,"\n")) 10 | } 11 | 12 | 13 | #' @method str editarray 14 | #' @export 15 | str.editarray <- function(object,...){ 16 | vars <- paste(getVars(object),collapse=", ") 17 | if (nchar(vars) > 20 ) vars <- paste(strtrim(vars,16),"...") 18 | cat(paste("editarray with", nrow(object), "edits containing variables",vars,"\n")) 19 | } 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /papers/tex/errorLocation.tex: -------------------------------------------------------------------------------- 1 | \begin{table} 2 | \caption{Slots in the {\sf errorLocation} object} 3 | \label{tblErrorLocation} 4 | \begin{tabular}{lp{0.8\textwidth}} 5 | \hline 6 | Slot & description. \\ 7 | \hline 8 | {\sf \$adapt} & boolean array, stating which variables must be adapted for each record.\\ 9 | {\sf \$status}& A {\sf data.frame}, giving solution weights, number of equivalent solutions, timings and whether the maximum search time was exceeded.\\ 10 | {\sf \$user} & Name of user running R during the error localization\\ 11 | {\sf \$timestamp} & {\sf date()} at the end of the run.\\ 12 | {\sf \$call} & The call to {\sf localizeErrors}\\ 13 | \hline 14 | \end{tabular} 15 | \end{table} 16 | -------------------------------------------------------------------------------- /pkg/R/as.matrix.R: -------------------------------------------------------------------------------- 1 | #' @include editmatrix.R 2 | #' @include editarray.R 3 | #' @include editset.R 4 | {} 5 | 6 | #' convert to matrix 7 | #' 8 | #' @export 9 | #' @method as.matrix editmatrix 10 | #' @rdname editmatrix 11 | #' 12 | #' @return \code{as.matrix}: Augmented \code{matrix} of \code{editmatrix}. (See also \code{\link{getAb}}). 13 | as.matrix.editmatrix <- function(x, ...){ 14 | array(x, dim=dim(x), dimnames=dimnames(x)) 15 | } 16 | 17 | 18 | #' convert to matrix 19 | #' @export 20 | #' @method as.matrix editarray 21 | #' @rdname editarray 22 | #' @return \code{as.matrix}: The boolean matrix part of the \code{editarray}. 23 | as.matrix.editarray <- function(x,...){ 24 | array(x,dim=dim(x),dimnames=dimnames(x)) 25 | } 26 | 27 | -------------------------------------------------------------------------------- /pkg/R/removeRedundant.R: -------------------------------------------------------------------------------- 1 | # 2 | removeRedundant <- function(E){ 3 | 4 | m <- as.mip(E) 5 | A <- -getA(m$E) 6 | b <- getb(m$E) 7 | keep <- rep(TRUE, nrow(A)) 8 | names(keep) <- rownames(A) 9 | 10 | sapply(seq_len(nrow(A)),function(r){ 11 | m1 <- m 12 | #keep[r] <<- FALSE 13 | m1$E <- m1$E[keep,] 14 | 15 | m1$objfn <- A[r,] 16 | 17 | lps <- as.lp.mip(m1) 18 | statuscode <- solve(lps) 19 | 20 | o <- -1*get.objective(lps) 21 | 22 | keep[r] <<- (b[r] <= o) 23 | c(o=o, t=b[r]) 24 | }) 25 | structure(E[names(keep)[keep],], removed=E[names(keep)[!keep],]) 26 | } 27 | 28 | # E <- editmatrix(c(A="x>2", B="y > x", C="y>1" , D="x>1")) 29 | # Er <- removeRedundant(E) 30 | # Er 31 | -------------------------------------------------------------------------------- /pkg/R/getUpperBounds.R: -------------------------------------------------------------------------------- 1 | #' Get upperbounds of edits, given the boundaries of all variables 2 | #' @param E \code{editmatrix} 3 | #' @param xlim \code{matrix} with columns lower and upper, and rows are variables (in same order as E) 4 | #' @return matrix with upperbounds per edit and a possible value 5 | #' @keywords internal 6 | getUpperBounds <- function(E, xlim){ 7 | #print(xlim) 8 | A_p <- A_m <- A <- getA(E) 9 | A_p[A < 0] <- 0 10 | A_m[A > 0] <- 0 11 | ub <- A_m %*% xlim[,1] + A_p %*% xlim[,2] 12 | lb <- A_m %*% xlim[,2] + A_p %*% xlim[,1] 13 | 14 | b <- getb(E) 15 | b[is.na(b)] <- lb[is.na(b)] 16 | 17 | ub <- cbind(ub=ub, b=b, dummy=b-ub) 18 | #colnames(ub) <- c("lim", "dummy") 19 | rownames(ub) <- rownames(E) 20 | ub 21 | } 22 | 23 | -------------------------------------------------------------------------------- /examples/editmatrix.R: -------------------------------------------------------------------------------- 1 | # Using a character vector to define contraints 2 | E <- editmatrix(c("x+3*y==2*z", "x==z")) 3 | print(E) 4 | 5 | # Using a expression vector to define contraints 6 | E <- editmatrix(expression(x+3*y==2*z, x==z)) 7 | print(E) 8 | 9 | # an editmatrix also has a summary method: 10 | summary(E) 11 | 12 | # select rows from an editmatrix: 13 | E <- editmatrix(c("x+3*y==2*z", "x >= z")) 14 | E[getOps(E) == "=="] 15 | 16 | 17 | #Using data.frame to define constraints 18 | E.df <- data.frame( 19 | name =c("A","B","C"), 20 | edit = c("x == y", 21 | "z + w == y + x", 22 | "z == y + 2*w"), 23 | description = c( 24 | "these variables should be equal","","") 25 | 26 | ) 27 | print(E.df) 28 | 29 | E <- editmatrix(E.df) 30 | print(E) 31 | -------------------------------------------------------------------------------- /examples/editmatrixAttr.R: -------------------------------------------------------------------------------- 1 | 2 | E <- editmatrix(c( "x+3*y == 2*z" 3 | , "x > 2") 4 | ) 5 | print(E) 6 | 7 | # get editrules, useful for storing and maintaining the rules external from your script 8 | as.data.frame(E) 9 | 10 | # get coeficient matrix of inequalities 11 | getA(E) 12 | 13 | # get augmented matrix of linear edit set 14 | getAb(E) 15 | 16 | # get constants of inequalities (i.e. c(0, 2)) 17 | getb(E) 18 | 19 | # get operators of inequalities (i.e. c("==",">")) 20 | getOps(E) 21 | 22 | # get variables of inequalities (i.e. c("x","y","z")) 23 | getVars(E) 24 | 25 | # isNormalized 26 | isNormalized(E) 27 | 28 | #normalized E 29 | E <- normalize(E) 30 | E 31 | 32 | # is het now normalized? 33 | isNormalized(E) 34 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testIsObviouslyInfeasible.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | 3 | context("Obvious infeasibility") 4 | 5 | test_that("Obvious infeasibility is detected",{ 6 | expect_true(isObviouslyInfeasible(editmatrix("0*x == 1"))) 7 | expect_true(isObviouslyInfeasible(editmatrix("0*x < -1"))) 8 | expect_true(isObviouslyInfeasible(editmatrix("1e-12*x <= -1"))) 9 | expect_true( isObviouslyInfeasible(editmatrix("0*x < 0"))) 10 | expect_true( isObviouslyInfeasible(editmatrix("0*x < 1e-12"))) 11 | expect_true( isObviouslyInfeasible(editmatrix("1e-12*x < 1e-12"))) 12 | expect_false(isObviouslyInfeasible(editmatrix("0*x <= 0"))) 13 | expect_false(isObviouslyInfeasible(editmatrix("x <= 0"))) 14 | expect_false(isObviouslyInfeasible(editmatrix("0*x <=1e-12"))) 15 | }) 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /pkg/R/duplicated.R: -------------------------------------------------------------------------------- 1 | #' Check for duplicate edit rules 2 | #' 3 | #' @method duplicated editarray 4 | #' @param x a \code{\link{editarray}} 5 | #' @param ... other parameters to be passed to or from other methods. 6 | #' @export 7 | #' @keywords internal 8 | duplicated.editarray <- function(x, ...) duplicated(getArr(x)) 9 | 10 | 11 | 12 | #' Check for duplicate edit rules 13 | #' 14 | #' @param x an \code{\link{editmatrix}} 15 | #' @param ... options to be passed to other methods 16 | #' @return logical vector 17 | #' @method duplicated editmatrix 18 | #' @export 19 | #' @keywords internal 20 | duplicated.editmatrix <- function(x,...){ 21 | ops2num <- c('<'=1,'<='=2,'=='=3,'>='=4,'>'=5) 22 | duplicated( 23 | cbind( 24 | getAb(x), 25 | ops2num[getOps(x)] 26 | ) 27 | ) 28 | } 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testContains.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | context('contains') 4 | test_that("contains works for editset",{ 5 | E <- editset(expression( 6 | if ( x > 0 ) y > 0, 7 | x >= 0, 8 | x+y == z 9 | )) 10 | expect_equivalent( 11 | contains(E,'z'), 12 | matrix(c(FALSE,TRUE,FALSE),nrow=3,ncol=1) 13 | ) 14 | # test generated from bug report of Jeroen Pannekoek & MvdL 15 | E <- editset(expression( 16 | 0 < v100 + v37 + v38 + v39 + v40 + v41 + v42 17 | ,0 <= v40 18 | ,if( 0 < v40 ) v50 >= 1 19 | ,if( 0 < v40 ) v51 >= 1 20 | ,if( 0 < v50 ) v40 > 0 21 | ,if( v40 <= 0 ) 0 >= v51 22 | ,if( v40 <= 0 ) 0 >= v132 23 | ,if( 0 < v40 ) v132 >= 1 24 | ,if( v115 < 1 ) 0 >= v40 25 | )) 26 | expect_equivalent( 27 | contains(E,'v40'), 28 | matrix(rep(TRUE,9),nrow=9,ncol=1) 29 | ) 30 | }) 31 | 32 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testechelon.R: -------------------------------------------------------------------------------- 1 | 2 | context("Echelon") 3 | 4 | 5 | test_that("Matrix reduces to echelon form",{ 6 | expect_equal( 7 | round(echelon( 8 | matrix(c( 9 | 1,3,1,4, 10 | 2,7,3,-9, 11 | 1,5,3,1, 12 | 1,2,0,8), byrow=TRUE, nrow=4 13 | ) 14 | )), 15 | matrix(c( 16 | 1,0,-2,0, 17 | 0,1,1,0, 18 | 0,0,0,1, 19 | 0,0,0,0),byrow=TRUE,nrow=4 20 | ) 21 | ) 22 | expect_equal( 23 | round(echelon( 24 | matrix(c( 25 | 2,1,-1,8, 26 | -3,-1,2,-11, 27 | -2,1,2,-3), byrow=TRUE, nrow=3 28 | ) 29 | )), 30 | matrix(c( 31 | 1,0,0,2, 32 | 0,1,0,3, 33 | 0,0,1,-1), 34 | byrow=TRUE,nrow=3 35 | ) 36 | ) 37 | }) 38 | 39 | 40 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testIsObviouslyRedundant.R: -------------------------------------------------------------------------------- 1 | 2 | require(testthat) 3 | context("Obvious redundancy") 4 | 5 | test_that("Obviously redundant rows are detected",{ 6 | expect_true(isObviouslyRedundant(editmatrix("0*x == 1e-12"))) 7 | expect_true(isObviouslyRedundant(editmatrix("0*x <= 0"))) 8 | expect_true(isObviouslyRedundant(editmatrix("0*x < 1"))) 9 | expect_false(isObviouslyRedundant(editmatrix("0*x < 0"))) 10 | expect_true(isObviouslyRedundant(editmatrix("0*x <= 1"))) 11 | expect_true(isObviouslyRedundant(editmatrix("1e-12*x < 1"))) 12 | 13 | expect_equal(isObviouslyRedundant(editmatrix(c("0*x <= 0" 14 | , "y < 0" 15 | ) 16 | ) 17 | ) 18 | , c(TRUE,FALSE) 19 | ) 20 | }) 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /examples/violatedEdits.R: -------------------------------------------------------------------------------- 1 | # Using character vector to define contraints 2 | E <- editmatrix(c( "x+3*y==2*z" 3 | , "x==z" 4 | ) 5 | ) 6 | 7 | dat <- data.frame( x = c(0,2,1) 8 | , y = c(0,0,1) 9 | , z = c(0,1,1) 10 | ) 11 | print(dat) 12 | 13 | ve <- violatedEdits(E,dat) 14 | 15 | print(ve) 16 | summary(ve, E) 17 | plot(ve) 18 | 19 | # An example with categorical data: 20 | 21 | E <- editarray(expression( 22 | gender %in% c('male','female'), 23 | pregnant %in% c(TRUE, FALSE), 24 | if( gender == 'male' ) !pregnant 25 | ) 26 | ) 27 | print(E) 28 | 29 | dat <- data.frame( 30 | gender=c('male','male','female','cylon'), 31 | pregnant=c(TRUE,FALSE,TRUE,TRUE) 32 | ) 33 | 34 | print(dat) 35 | # Standard, the datamodel is checked as well, 36 | violatedEdits(E,dat) 37 | 38 | # but we may turn this of 39 | violatedEdits(E,dat,datamodel=FALSE) 40 | 41 | 42 | -------------------------------------------------------------------------------- /examples/substValue.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | E <- editmatrix(expression( 4 | x + y == z, 5 | 2*y < 10, 6 | 3*x + 1.5*u < 7, 7 | z >= 0 8 | ) 9 | ) 10 | 11 | # single value 12 | substValue(E,'z',10) 13 | # multiple values 14 | substValue(E,c('x','y'),c(1,3)) 15 | # remove substituted variable from edits 16 | substValue(E,'z',10,reduce=TRUE) 17 | # do not remove redundant row: 18 | substValue(E,'z',10,removeredundant=FALSE) 19 | 20 | 21 | # example with an editset 22 | E <- editset(expression( 23 | x + y == z, 24 | x >= 0, 25 | y >= 0, 26 | A %in% c('a1','a2'), 27 | B %in% c('b1','b2'), 28 | if ( x > 0 ) y > 0, 29 | if ( y > 0 ) x > 0, 30 | if ( A == 'a' ) B == 'b', 31 | if ( A == 'b' ) y > 3 32 | ) 33 | ) 34 | 35 | # substitute pure numerical variable 36 | substValue(E,'z',10) 37 | # substitute pure categorical variable 38 | substValue(E,'A','a1') 39 | # substitute variable appearing in logical constraints 40 | substValue(E,'x',3) 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testIsFeasible.R: -------------------------------------------------------------------------------- 1 | require(testthat) 2 | 3 | context("Feasibility checks: numerical") 4 | 5 | test_that("isFeasible",{ 6 | expect_false(isFeasible(editmatrix(c("x +y < 0","x+y>0")), warn=FALSE)) 7 | }) 8 | 9 | test_that("isFeasible, warning",{ 10 | expect_warning(isFeasible(editmatrix(c("x +y < 0","x+y>0")), warn=TRUE)) 11 | }) 12 | 13 | test_that("isFeasible with 0==1",{ 14 | expect_false(isFeasible(editmatrix("0==1"), warn=FALSE)) 15 | }) 16 | 17 | 18 | context("Feasibility checks: categorical") 19 | test_that('isFeasible with categorical data',{ 20 | # counterintuitive example: edits are contradictory, but space of possible records is not empty. 21 | E <- editarray(c( 22 | "a %in% letters[1:3]", 23 | "b %in% letters[4:6]", 24 | "if ( a == 'a' ) b == 'd'", 25 | "if ( a == 'a' ) b != 'd'" 26 | )) 27 | expect_true(isFeasible(E)) 28 | # obvious contradiction 29 | expect_false(isFeasible( 30 | editarray(c("b %in% c('x','y')","if (TRUE) b == 'x'","if(TRUE) b != 'x'")) 31 | )) 32 | }) 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /papers/mySweave.sty: -------------------------------------------------------------------------------- 1 | \NeedsTeXFormat{LaTeX2e} 2 | \ProvidesPackage{Sweave}{} 3 | 4 | \RequirePackage{ifthen} 5 | \newboolean{Sweave@gin} 6 | \setboolean{Sweave@gin}{true} 7 | \newboolean{Sweave@ae} 8 | \setboolean{Sweave@ae}{true} 9 | 10 | \DeclareOption{nogin}{\setboolean{Sweave@gin}{false}} 11 | \DeclareOption{noae}{\setboolean{Sweave@ae}{false}} 12 | \ProcessOptions 13 | 14 | \RequirePackage{graphicx,fancyvrb} 15 | \IfFileExists{upquote.sty}{\RequirePackage{upquote}}{} 16 | 17 | \ifthenelse{\boolean{Sweave@gin}}{\setkeys{Gin}{width=0.8\textwidth}}{}% 18 | \ifthenelse{\boolean{Sweave@ae}}{% 19 | \RequirePackage[T1]{fontenc} 20 | \RequirePackage{ae} 21 | }{}% 22 | 23 | \DefineVerbatimEnvironment{Sinput}{Verbatim}{fontshape=sl,fontsize=\small} 24 | \DefineVerbatimEnvironment{Soutput}{Verbatim}{fontsize=\small} 25 | \DefineVerbatimEnvironment{Scode}{Verbatim}{fontshape=sl,fontsize=\small} 26 | 27 | \newenvironment{Schunk}{}{} 28 | 29 | \newcommand{\Sconcordance}[1]{% 30 | \ifx\pdfoutput\undefined% 31 | \csname newcount\endcsname\pdfoutput\fi% 32 | \ifcase\pdfoutput\special{#1}% 33 | \else\immediate\pdfobj{#1}\fi} 34 | -------------------------------------------------------------------------------- /develop/generateEM.R: -------------------------------------------------------------------------------- 1 | # generate large editmatrix useful for performance testing 2 | 3 | require(editrules) 4 | 5 | genEditMatrix <- function(nedits, nvars, nvr=min(4, nvars)){ 6 | nedits <- nedits 7 | nvars <- nvars 8 | gen <- function(){ 9 | m <- matrix(0, nrow=nedits, ncol=nvars) 10 | for (i in 1:nedits){ 11 | m[i,sample(nvars, nvr)] <- sample(c(-2,-1,1,2), size=nvr, replace=TRUE) 12 | } 13 | ops <- sample(c("==","<="), size=nedits, replace=TRUE) 14 | as.editmatrix(A=m, ops=ops) 15 | } 16 | 17 | E <- gen() 18 | while(!isFeasible(E)){ 19 | E <- gen() 20 | nedits <- nedits-1 21 | } 22 | #decrease number of constraint until the matrix becomes feasible 23 | E 24 | } 25 | 26 | nvars <- 50 27 | E <- genEditMatrix(10,nvars) 28 | vars <- getVars(E) 29 | 30 | x <- numeric(nvars) # since b==0, this is a valid solution 31 | names(x) <- vars 32 | 33 | x[sample(nvars,1)] <- 1 34 | 35 | el <- errorLocalizer(E, x) 36 | system.time({ 37 | sol1 <- el$searchBest() 38 | }) 39 | which(sol1$adapt) 40 | 41 | system.time({ 42 | sol2 <- localizeError_lp(E,x) 43 | }) 44 | which(sol2$adapt) 45 | 46 | #results should be analyzed! -------------------------------------------------------------------------------- /pkg/R/editAttr.R: -------------------------------------------------------------------------------- 1 | #' Number of edits 2 | #' Count the number of edits in a collection of edits. 3 | #' @param E \code{\link{editset}}, \code{\link{editarray}} or \code{\link{editmatrix}} 4 | #' @export 5 | nedits <- function(E){ 6 | if ( is.vector(E) ) return(length(E)) 7 | if (any(class(E) %in% c('editmatrix','editarray'))){ 8 | n <- nrow(E) 9 | } else if ( inherits(E,'editset') ) { 10 | n <- nrow(E$num) + nrow(E$mixcat) 11 | } else { 12 | stop('Argument must be character, editset, editarray, editmatrix') 13 | } 14 | n 15 | } 16 | 17 | #' Names of edits 18 | #' 19 | #' Retrieve edit names from editset, -array or -matrix 20 | #' @param E \code{\link{editset}}, \code{\link{editarray}} or \code{\link{editmatrix}} 21 | #' @export 22 | editnames <- function(E){ 23 | 24 | if (any(class(E) %in% c('editmatrix','editarray'))){ 25 | n <- rownames(E) 26 | } else if ( inherits(E,'editset') ) { 27 | n <- c(rownames(E$num),rownames(E$mixcat)) 28 | } else { 29 | stop('Argument must be editset, editarray or editmatrix') 30 | } 31 | n 32 | 33 | } 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /test/elmar_2.R: -------------------------------------------------------------------------------- 1 | library(editrules) 2 | 3 | # 4 | E_set <- editset(c( 5 | "local >= 1", 6 | "local <= 10000", 7 | "nace %in% c('461', '462')", 8 | "turnover < -1", # contradictionary range check 9 | "turnover > 1e6", # contradictionary range check 10 | "pers >= 1", 11 | "pers <= 99999", 12 | "pers >= local", 13 | "if (nace == '461') pers <= 3 * local", 14 | "if (nace == '461') turnover <= 10000 * pers", 15 | "turnover <= 1e6 * pers")) 16 | is.editset(E_set) 17 | E_set 18 | summary(E_set) 19 | # 20 | isFeasible(E_set, warn = TRUE) 21 | # 22 | rawdata <- data.frame( 23 | id = c("1","2"), 24 | nace = c("461", "462"), 25 | local = c(1,2), 26 | pers = c(100001,100001), 27 | turnover = c(1e6 + 1, 1e6 + 1) 28 | ) 29 | rawdata <- rawdata[,-1] 30 | rawdata 31 | # 32 | recwghts <- data.frame( 33 | id = c("1","2"), 34 | nace = c(1000,1000), 35 | local = c(90,90), 36 | pers = c(10,10), 37 | turnover = c(95,95) 38 | ) 39 | recwghts <- as.matrix(recwghts[,-1]) 40 | recwghts 41 | is.matrix(recwghts) 42 | # 43 | # 44 | localizeErrors(E_set,rawdata,weight=rep(1, ncol(rawdata))) 45 | 46 | # 47 | localizeErrors(E_set,rawdata,weight=recwghts, method = "mip") 48 | errorLocalizer.mip(E_set, rawdata[1,], weight=recwghts[1,]) 49 | # 50 | -------------------------------------------------------------------------------- /pkg/R/getH.R: -------------------------------------------------------------------------------- 1 | #' Returns the derivation history of an edit matrix or array 2 | #' 3 | #' Function \code{\link{eliminate}} tracks the history of edits in a logical array H. 4 | #' H has nrow(E) rows and the number of columns is the number of 5 | #' edits in the \code{\link{editmatrix}} as it was first defined. If 6 | #' H[i,j1], H[i,j2],...,H[i,jn] are \code{TRUE}, then E[i,] is some 7 | #' (positive, linear) combination of original edits E[j1,], E[j2,],...,E[jn,] 8 | #' 9 | #' Attributes H and h are used to detect redundant derived edits. 10 | #' 11 | #' @param E \code{\link{editmatrix}} 12 | #' @rdname geth 13 | #' @seealso \code{\link{editmatrix}}, \code{\link{eliminate}} 14 | #' 15 | #' 16 | #' @export 17 | getH <- function(E){ 18 | if ( !class(E) %in% c('editmatrix','editarray') ) 19 | stop("E has to be an editmatrix or editarray") 20 | attr(E,"H") 21 | } 22 | 23 | #' Returns the number of elimination steps performed on an edit matrix or array 24 | #' 25 | #' h records the number of variables eliminated from E by \code{\link{eliminate}} 26 | #' 27 | #' @rdname geth 28 | #' @seealso \code{\link{editmatrix}}, \code{\link{eliminate}} 29 | #' @export 30 | geth <- function(E){ 31 | if ( !class(E) %in% c('editmatrix','editarray') ) 32 | stop("E has to be an editmatrix or editarray") 33 | attr(E,"h") 34 | } 35 | 36 | -------------------------------------------------------------------------------- /examples/editarray.R: -------------------------------------------------------------------------------- 1 | 2 | # Here is the prototypical categorical edit: men cannot be pregnant. 3 | E <- editarray(expression( 4 | gender %in% c('male','female'), 5 | pregnant %in% c('yes','no'), 6 | if( gender == 'male' ) pregnant == 'no' 7 | ) 8 | ) 9 | E 10 | 11 | # an editarray has a summary method: 12 | summary(E) 13 | 14 | # A yes/no variable may also be modeled as a logical: 15 | editarray(expression( 16 | gender %in% c('male','female'), 17 | pregnant %in% c(TRUE, FALSE), 18 | if( gender == 'male' ) pregnant == FALSE 19 | ) 20 | ) 21 | 22 | # or, shorter (and using a character vector as input): 23 | editarray(expression( 24 | gender %in% c('male','female'), 25 | pregnant %in% c(TRUE, FALSE), 26 | if( gender == 'male' ) !pregnant 27 | ) 28 | ) 29 | 30 | # the \%in\% statement may be used at will 31 | editarray(expression( 32 | gender %in% c('male','female'), 33 | pregnant %in% c(TRUE, FALSE), 34 | positionInHousehold %in% c('marriage partner', 'child', 'other'), 35 | maritalStatus %in% c('unmarried','married','widowed','divorced'), 36 | if( gender == 'male' ) !pregnant, 37 | if( maritalStatus %in% c( 38 | 'unmarried', 39 | 'widowed', 40 | 'divorced') 41 | ) !positionInHousehold %in% c('marriage partner','child') 42 | ) 43 | ) 44 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testFourierMotzkin.R: -------------------------------------------------------------------------------- 1 | require(testthat) 2 | 3 | context("FM elimination") 4 | 5 | # test: the first example of eliminate 6 | test_that("eliminate works fine",{ 7 | 8 | P <- editmatrix(c( 9 | "4*x1 - 5*x2 - 3*x3 + z <= 0", 10 | "-x1 + x2 -x3 <= 2", 11 | "x1 + x2 + 2*x3 <= 3", 12 | "-x1 <= 0", 13 | "-x2 <= 0", 14 | "-x3 <= 0")) 15 | P1 <- eliminate(P, "x1", fancynames=TRUE) 16 | Ab <- matrix(c( 17 | 0, -0.25, -1.75, 0.25, 2, 18 | 0, -1.25, -0.75, 0.25, 0, 19 | 0, 2.00, 1.00, 0.00, 5, 20 | 0, 1.00, 2.00, 0.00, 3, 21 | 0, -1.00, 0.00, 0.00, 0, 22 | 0, 0.00, -1.00, 0.00, 0), byrow=TRUE,nrow=6) 23 | H <- matrix(c( 24 | TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, 25 | TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, 26 | FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, 27 | FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, 28 | FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, 29 | FALSE, FALSE, FALSE, FALSE, FALSE, TRUE), byrow=TRUE,nrow=6) 30 | op <- c("<=", "<=", "<=", "<=", "<=", "<=") 31 | expect_true(all( Ab == getAb(P1) )) 32 | expect_true(all( H == getH(P1) )) 33 | expect_true(all( op == getOps(P1))) 34 | expect_true( geth(P1) == 1 ) 35 | }) 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testeditmatrixAttr.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | 3 | context("Editmatrix attributes") 4 | 5 | test_that("editrules can derive the correct info from a matrix",{ 6 | mat <- matrix( c( 1,-2, 0 7 | , 2, 0, 1 8 | ) 9 | , nrow=2 10 | , byrow=TRUE 11 | # , dimnames=list(c("a", "b"), c("x","y", "z")) 12 | ) 13 | ei <- as.data.frame(as.editmatrix(mat)) 14 | expect_equal(ei$edit, c("x1 == 2*x2", "2*x1 + x3 == 0")) 15 | 16 | mat <- matrix( c( 1,-2 17 | , 2, 0 18 | ) 19 | , nrow=2 20 | , byrow=TRUE 21 | , dimnames=list(c("A", "B"), c("x","y")) 22 | ) 23 | ei <- as.data.frame(as.editmatrix(mat)) 24 | #expect_equal(ei$name, c("A","B")) 25 | expect_equal(ei$edit, c("x == 2*y", "2*x == 0")) 26 | }) 27 | 28 | test_that("getb works",{ 29 | cond <- c( "x + y > 2" 30 | , "y < 10" 31 | ) 32 | E <- editmatrix(cond, FALSE) 33 | b <- getb(E) 34 | 35 | expect_equal(b, c(num1=2,num2=10)) 36 | }) 37 | 38 | test_that("getOps works",{ 39 | cond <- c( "x + y > 2" 40 | , "y < 10" 41 | , "x + y == 2" 42 | , "y <= 10" 43 | , "y >= 10" 44 | ) 45 | E <- editmatrix(cond, FALSE) 46 | ops <- getOps(E) 47 | expect_equivalent(ops, c(">","<","==","<=",">=")) 48 | }) 49 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testBlocks.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | context("Detecting blocks") 4 | 5 | 6 | test_that("editmatrix separates in blocks",{ 7 | E <- editmatrix( c( 8 | "x1 + x2 == x3" 9 | , "x3 + x4 == x5" 10 | , "x5 + x6 == x7" 11 | , "y1 + y2 == y3" 12 | , "z1 + z2 == z3") 13 | ) 14 | expect_equal(length(blocks(E)),3) 15 | expect_true(all(getAb(blocks(E)[[2]])==c(1,1,-1,0))) 16 | }) 17 | 18 | test_that("editarray separates in blocks",{ 19 | E <- editarray(c( 20 | "x %in% c('a','b','c')", 21 | "y %in% c('d','e')", 22 | "z %in% c('f','g')", 23 | "u %in% c('w','t')", 24 | "if ( x == 'a') y != 'd'", 25 | "if ( z == 'f') u != 'w'")) 26 | expect_equal(length(blocks(E)),2) 27 | expect_true(all(getArr(blocks(E)[[1]]) == c(TRUE,FALSE,FALSE,TRUE,FALSE))) 28 | }) 29 | 30 | test_that("editset separates in blocks",{ 31 | E <- editset(expression( 32 | if ( x > 0 ) y > 0, 33 | x + y >= z, 34 | A %in% letters[1:2], 35 | B %in% letters[2:3], 36 | if ( A == 'a') B == 'b', 37 | if ( A == 'b') x >= 0, 38 | u + v >= w, 39 | if ( u <= 0 ) w >= 0 40 | )) 41 | b <- blocks(E) 42 | expect_equal(length(b),2) 43 | expect_equal(sort(getVars(b[[1]])),c("A","B","x","y","z")) 44 | expect_equal(sort(getVars(b[[2]])),c("u","v","w")) 45 | }) 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testCheck.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | 3 | context("Edit checking") 4 | 5 | test_that("Checking a data set works",{ 6 | edt <- editmatrix("2*x==y") 7 | dat <- data.frame(x=c(2,1), y=c(4,5)) 8 | cr <- checkRows(edt, dat) 9 | expect_equal(cr, c(TRUE,FALSE)) 10 | }) 11 | 12 | test_that("Checking a vector works",{ 13 | edt <- editmatrix(editrules = "2*x==y") 14 | dat <- c(x=2, y=1) 15 | expect_equal(as.logical(violatedEdits(edt, dat)), c(TRUE)) 16 | }) 17 | 18 | test_that("Showing data errors works",{ 19 | edt <- editmatrix(editrules = "2*x==y\n5*x==y") 20 | dat <- data.frame(x=c(2,1), y=c(4,5)) 21 | errors <- violatedEdits(edt, dat) 22 | 23 | # rule names equal? 24 | expect_equal(colnames(errors), rownames(edt)) 25 | 26 | dimnames(errors) <- NULL 27 | expect_equal(errors[,,drop=FALSE], matrix( c( FALSE, TRUE 28 | , TRUE , FALSE 29 | ) 30 | , nrow=2 31 | , byrow=TRUE 32 | 33 | ) 34 | ) 35 | }) 36 | 37 | 38 | test_that("Error lists works",{ 39 | edt <- editmatrix(editrules = "2*x==y\n5*x==y") 40 | dat <- data.frame(x=c(2,1), y=c(4,5)) 41 | errors <- listViolatedEdits(edt, dat) 42 | #expect_equal(errors, list("1"=c(2), "2"=c(1))) 43 | #print(str(errors)) 44 | #print(showErrors(dat, edt)) 45 | }) 46 | -------------------------------------------------------------------------------- /papers/index.html: -------------------------------------------------------------------------------- 1 | 2 | R: editrules vignettes 3 | 4 | 5 |

Vignettes of package editrules

6 |
7 |
editrules-linear.pdf: 8 |
Vignette about editing data under linear constraints. 9 |
editrules-categorical.pdf: 10 |
Vignette about categorical data editing with editrules. 11 |
12 | 13 |

Papers

14 |
15 |
Manipulation of Linear edits and error localization with the editrules package: 16 |
17 | E. De Jonge and Mark van der Loo (2011). Discussion paper 201120, Statistics Netherlands The Hague/Heerlen.
18 | Technical paper about the package. Equal to the vignette at version 1.0-0. 19 |
Manipulation of Categorical data edits and error localization with the editrules package: 20 |
21 | E. De Jonge and Mark van der Loo (2011). Discussion paper 201129, Statistics Netherlands The Hague/Heerlen.
22 | Technical paper about the package. Equal to the vignette at version 2.0-0. 23 |
24 | 25 |

Other references

26 |
27 |
yaRb 28 |
A blog with some simple examples on the usage of editrules, amongst others. 29 |
git 30 |
The git repository of editrules at Google code. 31 |
32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /develop/categorical/tst.R: -------------------------------------------------------------------------------- 1 | require(editrules) 2 | 3 | map <- "/media/Data/Projecten/editrules/develop/categorical" 4 | fil <- c("categorical.R", "editarray.R", "reduceCat.R") 5 | dmp <- sapply(file.path(map,fil),source) 6 | 7 | source("../../pkg/R/check.R") 8 | 9 | civilStatusLevels <- c("married","unmarried","widowed","divorced") 10 | hhLevels <- c("marriage partner", "child","other") 11 | 12 | x <- c( 13 | "civilStatus %in% civilStatusLevels", 14 | "positionInHousehold %in% hhLevels", 15 | "age %in% c('<16','16-60','>16')", 16 | "if ( age == '<16') civilStatus != 'married'", 17 | "if (civilStatus != 'married') positionInHousehold != 'marriage partner'" 18 | ) 19 | (E <- editarray(x)) 20 | 21 | datamodel(E) 22 | 23 | F <- eliminateFM.editarray(E,"civilStatus") 24 | contains(F,"civilStatus") 25 | t(substValue.editarray(E,"positionInHousehold","child")) 26 | 27 | r <- data.frame(age="<16",civilStatus="married",positionInHousehold="marriage partner") 28 | 29 | violatedEdits(E,r) 30 | 31 | u <- sapply(r,as.character) 32 | bt <- errorLocalizer.editarray(E,u) 33 | bt$searchNext(VERBOSE=FALSE) 34 | bt$searchNext(VERBOSE=FALSE) 35 | bt$searchNext(VERBOSE=FALSE) 36 | 37 | 38 | a <- editarray( 39 | c("gender %in% c('m','f')", 40 | "pregnant %in% c('y','n')", 41 | "if(gender == 'm') pregnant == 'n'" 42 | )) 43 | b <- neweditarray( 44 | E = array(c(FALSE,TRUE,FALSE,TRUE),dim=c(1,4)), 45 | ind = list( 46 | gender = as.integer(c('f'=1,'m'=2)), 47 | pregnant= as.integer(c('n'=3,'y'=4))), 48 | sep=":", 49 | names="e1", 50 | levels=c("gender:f","gender:m","pregnant:n","pregnant:y") 51 | ) 52 | 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /pkg/R/isSubset.R: -------------------------------------------------------------------------------- 1 | 2 | #' Check which edits are dominated by other ones. 3 | #' 4 | #' An edit defines a subregion of the space of all possible value combinations 5 | #' of a record. Records in this region are interpreted as invalid. An edit rule 6 | #' which defines a region equal to or contained in the region defined by another 7 | #' edit is redundant. (In data editing literature, this is often referred to as 8 | #' a \emph{domination} relation.) 9 | #' 10 | #' @param E \code{\link{editarray}} 11 | #' @return \code{logical} vector indicating if an edit is a subset of at least one other edit. 12 | #' @export 13 | isSubset <- function(E){ 14 | if ( !is.editarray(E) ) stop('argument is not an editarray') 15 | isSubset.boolmat(getArr(E)) 16 | } 17 | 18 | isSubset.boolmat <- function(A){ 19 | if (nrow(A)==0) return(logical(0)) 20 | if (nrow(A)==1) return(FALSE) 21 | 22 | d <- duplicated(A) 23 | wd <- which(d) 24 | m <- nrow(A) 25 | if ( m == 0 ) return(logical(0)) 26 | M <- (1:m)[!d] 27 | if ( length(M) == 1 ) return(d) 28 | m1 <- length(M)-1 29 | s <- logical(m) 30 | s[M] <- vapply(M, 31 | function(i){ 32 | I <- c(i,wd) 33 | any(rowSums(A[-I,,drop=FALSE] - (A[-I,,drop=FALSE] | A[rep(i,m1),,drop=FALSE]) ) == 0) 34 | }, 35 | FUN.VALUE=FALSE 36 | ) 37 | s | d 38 | } 39 | 40 | # check if edits in A are subset of edits in B: (returns boolean vector) 41 | isSubsetWrt.boolmat <- function(A,B){ 42 | m <- nrow(A) 43 | n <- nrow(B) 44 | if ( m == 0 ) return(logical(0)) 45 | if ( n == 0 ) return(rep(FALSE,n)) 46 | 47 | vapply(1:m,function(i){ 48 | any(rowSums(abs({A[rep(i,n),,drop=FALSE] | B} - B)) == 0) 49 | },FUN.VALUE=FALSE) 50 | } 51 | 52 | 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /pkg/R/isFeasible.R: -------------------------------------------------------------------------------- 1 | #' Check consistency of set of edits 2 | #' 3 | #' When variables are \code{\link[=eliminate]{eliminated}} one by one 4 | #' from a set of edits, eventually either no edits are left or an 5 | #' \code{\link[=isObviouslyInfeasible]{obvious contradiction}} is encountered. 6 | #' In the case no records can obey all edits in the set which is therefore 7 | #' \code{inFeasible}. 8 | #' 9 | #' 10 | #' @note This function can potentially take a long time to complete, especially 11 | #' when many connected (conditional) edits are present. Consider using \code{\link{blocks}} 12 | #' to check feasibility of indendent blocks. 13 | #' 14 | #' 15 | #' @param E an \code{\link{editmatrix}}, \code{\link{editarray}} or \code{\link{editset}} 16 | #' @param warn logical: should a warning be emitted when system is infeasible? 17 | #' @return TRUE or FALSE 18 | #' 19 | #' @seealso \code{\link{isObviouslyInfeasible}}, \code{\link{isObviouslyRedundant}} 20 | #' @export 21 | isFeasible <- function(E, warn=FALSE){ 22 | ## TODO: make it return the subset of edits causing the contradiction. 23 | vars <- getVars(E) 24 | vars2 <- vars 25 | feasible <- any(!isObviouslyInfeasible(E)) 26 | while( isTRUE(feasible) && length(vars) > 0 ){ 27 | E <- eliminate(E,vars[1]) 28 | vars <- vars[-1] 29 | ## TODO: cleanup editlists that have infeasible parts, currently they are included 30 | ## for all eliminations. 31 | feasible <- any(!isObviouslyInfeasible(E)) 32 | if ( !feasible && warn ) 33 | warning( 34 | paste("system becomes obviously infeasible after eliminating", 35 | paste(vars2[!(vars2 %in% vars)],collapse=", ")) 36 | ) 37 | } 38 | return(feasible) 39 | } 40 | 41 | 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testeditarray.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | library(testthat) 4 | 5 | context("Editarray") 6 | 7 | 8 | test_that("2x2 categorical datamodel",{ 9 | dm <- c( 10 | "g %in% c('m','f')", 11 | "p %in% c('y','n')") 12 | expect_equivalent(getArr(editarray(c(dm,"if( p == 'y' ) g != 'm'"))),array(c(F,T,F,T),dim=c(1,4))) 13 | expect_equivalent(getArr(editarray(c(dm,"if( p %in% c('y') ) g != 'm'"))),array(c(F,T,F,T),dim=c(1,4))) 14 | expect_equivalent(getArr(editarray(c(dm,"if( p %in% c('y') ) g == 'f'"))),array(c(F,T,F,T),dim=c(1,4))) 15 | }) 16 | 17 | 18 | test_that("2x{TRUE,FALSE} datamodel",{ 19 | dm <- c( 20 | "g %in% c('m','f')", 21 | "p %in% c(FALSE,TRUE)") 22 | expect_equivalent(getArr(editarray(c(dm,"if( p ) g != 'm'"))),array(c(F,T,F,T),dim=c(1,4))) 23 | expect_equivalent(getArr(editarray(c(dm,"if( g == 'm' ) !p"))),array(c(F,T,F,T),dim=c(1,4))) 24 | expect_equivalent(getArr(editarray(c(dm,"!p || g=='f'"))),array(c(F,T,F,T),dim=c(1,4))) 25 | }) 26 | 27 | context("Editarray parsing") 28 | test_that("parse editarray to character and back",{ 29 | edts <- c( 30 | "g %in% c('m','f')", 31 | "p %in% c(FALSE,TRUE)", 32 | "if (p) !g=='m'") 33 | expect_equivalent(editarray(edts), editarray(as.character(editarray(edts)))) 34 | # cornercase found in version 2.5-1 35 | edts <- expression( 36 | A %in% letters[1:3], 37 | if ( A %in% c('a','b') ) FALSE 38 | ) 39 | expect_equivalent(editarray(edts), editarray(as.character(editarray(edts)))) 40 | }) 41 | 42 | test_that("parse editarray to expression and back",{ 43 | edts <- expression( 44 | g %in% c('m','f'), 45 | p %in% c(FALSE,TRUE), 46 | if (p) !g=='m' 47 | ) 48 | expect_equivalent(editarray(edts), editarray(as.character(editarray(edts)))) 49 | }) 50 | -------------------------------------------------------------------------------- /papers/fig/tree.tex: -------------------------------------------------------------------------------- 1 | \setlength{\unitlength}{1mm} 2 | \begin{picture}(118,60) 3 | \put(46,40){\small 4 | $\left\langle 5 | \begin{array}{l} 6 | y > x-1\\ 7 | y > -x + 3\\ 8 | y < x + 1\\ 9 | y < -x + 5 10 | \end{array},(2,-1),0 11 | \right\rangle$ 12 | } 13 | \put(20,20){\small 14 | $\left\langle 15 | \begin{array}{l} 16 | y > 1\\ 17 | y < 3\\ 18 | \end{array},(2,-1),0 19 | \right\rangle$ 20 | } 21 | \put(80,20){\small 22 | $\left\langle 23 | \begin{array}{l} 24 | y > 1\\ 25 | y < 3\\ 26 | \end{array},(x,-1),1 27 | \right\rangle$ 28 | } 29 | \put(0,00){\small 30 | $\left\langle 31 | \begin{array}{l} 32 | -1 > 1\\ 33 | -1 < 3\\ 34 | \end{array},(2,-1),0 35 | \right\rangle$ 36 | } 37 | \put(40,00){\small 38 | $\left\langle 39 | \varnothing,(2,y),1 40 | \right\rangle$ 41 | } 42 | \put(60,00){\small 43 | $\left\langle 44 | \begin{array}{l} 45 | -1 > 1\\ 46 | -1 < 3\\ 47 | \end{array},(x,-1),1 48 | \right\rangle$ 49 | } 50 | \put(100,00){\small 51 | $\left\langle 52 | \varnothing,(x,y),2 53 | \right\rangle$ 54 | } 55 | \thicklines 56 | \put(64,32){\vector(-4,-1){30}} 57 | \put(64,32){\vector( 4,-1){30}} 58 | \put(34,17){\vector(-1,-1){12}} 59 | \put(34,17){\vector( 1,-1){12}} 60 | \put(94,17){\vector(-1,-1){12}} 61 | \put(94,17){\vector( 1,-1){12}} 62 | \put(27,28){\small subst. $x=2$} 63 | \put(87,28){\small elim. $x$} 64 | \put(6,10){\small subst. $y=-1$} 65 | \put(42,10){\small elim. $y$} 66 | \put(65,10){\small subst. $y=-1$} 67 | \put(102,10){\small elim. $y$} 68 | % frame 69 | %\thinlines 70 | %\put(0,0){\line(0,1){60}} 71 | %\put(0,0){\line(1,0){117}} 72 | %\put(0,60){\line(1,0){117}} 73 | %\put(117,0){\line(0,1){60}} 74 | \end{picture} 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /pkg/R/as.igraph.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' @method as.igraph editmatrix 4 | #' @param x An object of class \code{\link{editmatrix}}, \code{\link{editarray}} or \code{\link{editset}} 5 | #' @param weighted see \code{\link[igraph]{graph.adjacency}} 6 | #' 7 | #' @export 8 | #' @rdname adjacency 9 | as.igraph.editmatrix <- function(x, nodetype=c("all", "rules","vars"), rules=editnames(x), vars=getVars(x), weighted=TRUE, ...){ 10 | nodetype <- match.arg(nodetype) 11 | a <- adjacency(E=x, nodetype=nodetype, rules=rules, vars=vars, ...) 12 | g <- igraph::graph.adjacency( 13 | a, 14 | weighted=weighted, 15 | mode = 'undirected' 16 | ) 17 | # $type is handy for bipartite graph function in igraph... 18 | igraph::V(g)$type <- igraph::V(g)$vars <- attr(a, "vars") 19 | g 20 | } 21 | 22 | #' @method as.igraph editarray 23 | #' @export 24 | #' @rdname adjacency 25 | as.igraph.editarray <- function(x, nodetype=c("all", "rules","vars"), rules=editnames(x), vars=getVars(x),weighted=TRUE, ...){ 26 | nodetype <- match.arg(nodetype) 27 | a <- adjacency(E=x, nodetype=nodetype, rules=rules, vars=vars, ...) 28 | g <- igraph::graph.adjacency( 29 | a, 30 | weighted=weighted, 31 | mode = 'undirected' 32 | ) 33 | igraph::V(g)$type <- igraph::V(g)$vars <- attr(a, "vars") 34 | g 35 | } 36 | 37 | 38 | #' @method as.igraph editset 39 | #' @export 40 | #' @rdname adjacency 41 | as.igraph.editset <- function(x, nodetype=c("all", "rules","vars"), rules=editnames(x), vars=getVars(x),weighted=TRUE, ...){ 42 | nodetype <- match.arg(nodetype) 43 | a <- adjacency(E=x, nodetype=nodetype, rules=rules, vars=vars, ...) 44 | g <- igraph::graph.adjacency( 45 | a, 46 | weighted=weighted, 47 | mode = 'undirected' 48 | ) 49 | igraph::V(g)$type <- igraph::V(g)$vars <- attr(a, "vars") 50 | g 51 | } 52 | -------------------------------------------------------------------------------- /pkg/R/editarrayAttr.R: -------------------------------------------------------------------------------- 1 | 2 | #' get index list from editmatrix 3 | #' 4 | #' The 'ind' attribute is a named list of named integer vectors. The list names are the 5 | #' variable names. The vectors in the list index the columns in the editarray associated with the 6 | #' variables. The names of the vectors are the names of the columns of the editarray. 7 | #' 8 | #' @param E \code{\link{editarray}} 9 | #' @return named list, indexing category levels in the editarray (columns) 10 | #' @keywords internal 11 | getInd <- function(E) attr(E,"ind") 12 | 13 | 14 | #' get seprator used to seperate variables from levels in editarray 15 | #' @param E \code{\link{editarray}} 16 | #' @return character 17 | #' @keywords internal 18 | getSep <- function(E) attr(E,"sep") 19 | 20 | #' Get named logical array from editarray 21 | #' @param E \code{\link{editarray}} 22 | #' @return logical array 23 | #' @keywords internal 24 | getArr <- function(E) unclass(E)[,,drop=FALSE] 25 | 26 | #' retrieve level names from editarray 27 | #' @param E \code{\link{editarray}} 28 | #' @return character vector 29 | #' @keywords internal 30 | getlevels <- function(E) colnames(E) 31 | 32 | #' retrieve edit names from editarray 33 | #' @param E \code{\link{editarray}} 34 | #' @return character vector 35 | #' @keywords internal 36 | getnames <- function(E) rownames(E) 37 | 38 | #' Summarize data model of an editarray in a data.frame 39 | #' 40 | #' @param E \code{\link{editarray}} 41 | #' @return \code{data.frame} describing the categorical variables and their levels. 42 | #' @example ../examples/datamodel.R 43 | #' @seealso \code{\link{checkDatamodel}} 44 | #' @export 45 | datamodel <- function(E){ 46 | if (ncol(E) == 0 ) return(data.frame(variable=character(0),value=character(0))) 47 | st <- stack(getInd(E)) 48 | vals <- do.call(c,lapply(getInd(E),names)) 49 | data.frame(variable=as.character(st[,2]),value=vals,row.names=1:nrow(st)) 50 | } 51 | 52 | -------------------------------------------------------------------------------- /test/smoketest.R: -------------------------------------------------------------------------------- 1 | library(editrules) 2 | 3 | # n : number of variables 4 | # m : number of blocks (< (n-3)/2) 5 | accountBalance <- function(n, m, all.positive=TRUE){ 6 | nblock <- (n-m-1) %/% m 7 | varnames <- paste("x",1:n,sep="") 8 | A <- matrix(0, nrow=(m+1), ncol=n) 9 | for ( i in 1:m ){ 10 | iblock = ((i-1)*nblock+1):(i*nblock) 11 | A[i,iblock] <- 1 12 | A[i,(i*nblock)+1] <- -1 13 | } 14 | A[m+1,seq(nblock+1,n-m,by=nblock)] <- 1 15 | A[m+1,n] <- -1 16 | 17 | pos = character(0) 18 | if ( all.positive ) pos <- paste(varnames, 0, sep=">=") 19 | c( 20 | reduce(as.editmatrix(A)), 21 | editmatrix(pos) 22 | ) 23 | } 24 | 25 | # Generate N records for variables E. 26 | gen_data <- function(E,N,distr=rlnorm,...){ 27 | vars <- getVars(E) 28 | nval <- N*length(vars) 29 | as.data.frame( 30 | array( 31 | distr(nval,...), 32 | dim=c(N,length(vars)), 33 | dimnames=list(NULL, vars) 34 | ) 35 | ) 36 | } 37 | 38 | 39 | 40 | smoke_test <-function(N, nvar, nblocks, all.positive=TRUE, ...){ 41 | e <- accountBalance(nvar, nblocks,all.positive) 42 | dat <- gen_data(e,N,...) 43 | el1 <- localizeErrors(e,dat,verbose=TRUE) 44 | el2 <- localizeErrors(e,dat,verbose=TRUE,method="mip") 45 | list(el1,el2,dat,e) 46 | } 47 | 48 | ## this shows that even a well-scaled problem may give different results between 49 | ## B&B and MIP 50 | S <- smoke_test(1000,nvar=7,2,distr=rnorm) 51 | diff <- S[[1]]$status$weight != S[[2]]$status$weight 52 | 53 | if (any(diff)) { 54 | w <- which(diff) 55 | 56 | BB <- S[[1]]$adapt[w,,drop=FALSE] 57 | MIP <- S[[2]]$adapt[w,,drop=FALSE] 58 | dat <- S[[3]][w,] 59 | E <- S[[4]] 60 | 61 | x <- dat[w[1],,drop=TRUE] 62 | elMIP <- errorLocalizer.mip(E,x) 63 | 64 | cbind(x=x, bb=BB[1,], mip=MIP[1,], mipfeas=elMIP$x_feasible) 65 | write.lp(elMIP$lp, "test.lp") 66 | } 67 | 68 | -------------------------------------------------------------------------------- /pkg/R/parseEdits.R: -------------------------------------------------------------------------------- 1 | #' Parse a character vector of edits 2 | #' 3 | #' This function wraps the native \code{parse} function in a \code{tryCatch}. 4 | #' The function is \code{editrules} internal. It tries to give a meaningfull error message when 5 | #' parsing fails for some reason. 6 | #' 7 | #' @param E \code{character} 8 | #' @param type optional filter for type of edit, may be \code{"all"}, \code{"num"} 9 | #' \code{"cat"} or \code{"mix"} 10 | #' @return The edits in \code{E} parsed to R expressions. 11 | #' 12 | #' @keywords internal 13 | parseEdits <- function(E, type=c("all", "num", "cat", "mix")){ 14 | if (is.expression(E)){ 15 | edits <- E 16 | } else { 17 | edits <- 18 | tryCatch(parse(text=E), 19 | error=function(e){ 20 | stop(paste("Not all edits can be parsed, parser returned", e$message,sep="\n")) 21 | }) 22 | } 23 | 24 | #TODO fix this: should work, but currently generates errors in the tests. 25 | # nms <- names(E) 26 | # if (!is.null(nms)){ 27 | # names(edits) <- make.unique(nms, sep="") 28 | # } 29 | 30 | type <- match.arg(type) 31 | if (type=="all"){ 32 | return(edits) 33 | } 34 | return(edits[editTypes(edits) == type]) 35 | } 36 | 37 | parseTree <- function(expr,prefix=NULL){ 38 | if (length(expr) == 1){ 39 | indent <- paste("[", prefix,"]", sep="", collapse="") 40 | cat(indent, expr,"\n") 41 | } 42 | else { 43 | for (i in 1:length(expr)){ 44 | parseTree(expr[[i]], c(prefix,i)) 45 | } 46 | } 47 | } 48 | 49 | # basic test for type of edit 50 | # edts \code{expression} 51 | # 52 | editTypes <- function(edts){ 53 | ops <- sapply(edts, function(e){deparse(e[[1]])}) 54 | 55 | type <- ifelse(ops %in% NUMCMP, "num", "cat") 56 | # todo add check for "==" 57 | 58 | iff <- ops == "if" 59 | mix <- sapply(edts[iff], hasNum) 60 | type[iff] <- ifelse(mix, "mix", "cat") 61 | as.factor(type) 62 | } 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /pkg/inst/script/bench/randomEdits.R: -------------------------------------------------------------------------------- 1 | # functions to generate some random catgegorical edits. 2 | 3 | 4 | # make a set expression 5 | setexpr <- function(var,cats){ 6 | paste(var,'%in% c(',paste(cats,collapse=","),')') 7 | } 8 | 9 | # declare a subset FALSE 10 | makeedit <- function(setexpr){ 11 | paste('if (',setexpr,') FALSE') 12 | } 13 | 14 | 15 | # generate m random edits 16 | genedits <- function(m,group){ 17 | require(editrules) 18 | # use Boskovitz group A or B edits 19 | if ( group=='A' ){ 20 | dk <- c( 21 | x.2.1 = 2, 22 | x.2.2 = 2, 23 | x.3.1 = 3, 24 | x.3.2 = 3, 25 | x.4.1 = 4, 26 | x.4.2 = 4, 27 | x.5.1 = 5, 28 | x.5.2 = 5, 29 | x.6.1 = 6, 30 | x.6.2 = 6, 31 | x.7.1 = 7, 32 | x.7.1 = 7, 33 | x.8.1 = 8 34 | ) 35 | } else if (group=='B') { 36 | dk <- c( 37 | x.2.1 = 2, 38 | x.2.2 = 2, 39 | x.3.1 = 3, 40 | x.4.2 = 4, 41 | x.5.1 = 5, 42 | x.6.1 = 6, 43 | x.7.1 = 7, 44 | x.8.1 = 8, 45 | x.9.1 = 9, 46 | x.10.1 = 10 47 | ) 48 | } 49 | vars <- names(dk) 50 | dm <- c() 51 | for ( v in vars ) dm <- c(dm,setexpr(v,1:dk[v])) 52 | 53 | # generate random edits 54 | n <- length(dk) 55 | edits <- character(m) 56 | for ( i in 1:m ){ 57 | invar <- sample(2:n,1) 58 | ivars <- sample(vars,invar,replace=FALSE) 59 | if ( i > 1 && !ivars1[1] %in% ivars ) ivars <- c(ivars,ivars[1]) 60 | ivars1 <- ivars 61 | s <- character() 62 | for ( iv in ivars ){ 63 | incat <- sample(1:(dk[iv]-1),1) 64 | icats <- sample(1:dk[iv],incat) 65 | s <- c(s,setexpr(iv,icats)) 66 | } 67 | edits[i] <- makeedit(paste(s,collapse=" & ")) 68 | } 69 | editarray(c(dm,edits)) 70 | } 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /pkg/R/c.R: -------------------------------------------------------------------------------- 1 | 2 | #' Combine editmatrices 3 | #' 4 | #' @method c editmatrix 5 | #' @rdname editmatrix 6 | #' @export 7 | #' 8 | c.editmatrix <- function(...){ 9 | ems <- list(...) 10 | ems <- ems[!sapply(ems,is.null)] 11 | ems <- ems[!sapply(ems,function(e) nedits(e)==0)] 12 | 13 | if (length(ems)==0) return(editmatrix(expression())) 14 | 15 | ems <- lapply(ems, as.editmatrix) 16 | vars <- sort(unique(unlist(lapply(ems, getVars.editmatrix)))) 17 | 18 | A <- lapply(ems, function(E){ 19 | a <- matrix(0, nrow=nrow(E), ncol=length(vars), dimnames=list(NULL, vars)) 20 | a[, getVars(E, type="colnames")] <- getA(E) 21 | a 22 | }) 23 | A <- do.call(rbind, A) 24 | 25 | ops <- unlist(lapply(ems, getOps)) 26 | b <- unlist(lapply(ems, getb)) 27 | nms <- lapply(ems, function(E){ 28 | rn <- rownames(E) 29 | if (is.null(rn)){ 30 | rn <- rep("num", nrow(E)) 31 | } 32 | rn 33 | }) 34 | nms <- make.unique(unlist(nms), sep="") 35 | rownames(A) <- nms 36 | 37 | as.editmatrix(A=A, ops=ops, b=b) 38 | } 39 | 40 | #' Combine editarrays 41 | #' @method c editarray 42 | #' @export 43 | #' @rdname editarray 44 | c.editarray <- function(...){ 45 | ems <- list(...) 46 | ems <- ems[!sapply(ems,is.null)] 47 | 48 | lvls <- sort(unique(unlist(lapply(ems, getlevels)))) 49 | seps <- unlist(sapply(ems, getSep)) 50 | #TODO handle seperators that are not equal to ":" 51 | stopifnot(all(seps==":")) 52 | 53 | B <- lapply(ems, function(E){ 54 | a <- matrix(TRUE, nrow=nrow(E), ncol=length(lvls), dimnames=list(rownames(E), lvls)) 55 | a[, getlevels(E)] <- getArr(E) 56 | a 57 | }) 58 | 59 | B <- do.call(rbind, B) 60 | cats <- sub("^.+:", "", lvls) 61 | vars <- sub(":.*$", "", lvls) 62 | ind <- seq_along(lvls) 63 | names(ind) <- cats 64 | ind <- split(ind, vars) 65 | neweditarray(B, ind, names=rownames(B), sep=":") 66 | } 67 | 68 | 69 | #' @method c editset 70 | #' @rdname editset 71 | #' @export 72 | #' 73 | c.editset <- function(...){ 74 | editset( unlist(lapply(list(...), as.character)) ) 75 | } 76 | 77 | -------------------------------------------------------------------------------- /pkg/R/echelon.R: -------------------------------------------------------------------------------- 1 | #' Bring an (edit) matrix to reduced row echelon form. 2 | #' 3 | #' If \code{E} is a matrix, a matrix in reduced row echelon form is returned. 4 | #' If \code{E} is an \code{\link{editmatrix}} the equality part of \code{E} is transformed 5 | #' to reduced row echelon form. For an \code{\link{editset}}, the numerical part is 6 | #' transformed to reduced row echelon form. 7 | #' 8 | #' @aliases echelon.editmatrix echelon.matrix 9 | #' 10 | #' @param E a matrix or editmatrix 11 | #' @param ... options to pass on to further methods. 12 | #' @export 13 | #' @seealso \code{\link{eliminate}}, \code{\link{substValue}} 14 | echelon <- function(E,...){ 15 | UseMethod("echelon") 16 | } 17 | 18 | 19 | 20 | #' @method echelon editmatrix 21 | #' @rdname echelon 22 | #' @export 23 | echelon.editmatrix <- function(E,...){ 24 | o <- getOps(E) 25 | # nothing to eliminate? 26 | eq <- o == "==" 27 | if ( sum(eq) <= 1 ) return(E) 28 | Ab <- getAb(E) 29 | Ab <- rbind( 30 | echelon.matrix(Ab[eq,,drop=FALSE]), 31 | Ab[!eq,,drop=FALSE] 32 | ) 33 | neweditmatrix(Ab,c(o[eq],o[!eq])) 34 | 35 | } 36 | 37 | #' @rdname echelon 38 | #' @method echelon matrix 39 | #' @param tol tolerance that will be used to determine if a coefficient equals zero. 40 | #' @export 41 | echelon.matrix <- function(E, tol=sqrt(.Machine$double.eps), ...){ 42 | k <- min(ncol(E),nrow(E)) 43 | I <- 1:nrow(E) 44 | for ( i in 1:k ){ 45 | I1 <- which(I >= i) 46 | ip <- I1[which.max(abs(E[I1,i]))] 47 | p <- E[ip,] 48 | if ( abs(p[i]) < tol ) next 49 | if ( ip > i ) E[c(ip,i),] <- E[c(i,ip),] 50 | E[-i,] <- E[-i,] - outer(E[-i,i],p/p[i]) 51 | } 52 | d <- diag(E) 53 | id <- abs(d) > tol 54 | E[id,] <- E[id,]/d[id] 55 | I0 <- rowSums(abs(E) < tol) == ncol(E) 56 | rbind(E[!I0,,drop=FALSE],E[I0,,drop=FALSE]) 57 | } 58 | 59 | #' @method echelon editset 60 | #' @rdname echelon 61 | #' @export 62 | #' 63 | echelon.editset <- function(E,...){ 64 | E$num <- echelon(E$num) 65 | E 66 | } 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /pkg/R/editfile.R: -------------------------------------------------------------------------------- 1 | 2 | #' Read edits edits from free-form textfile 3 | #' 4 | #' This utility function allows for free editrule definition in a file. One can extract 5 | #' only the numerical (\code{type='num'}), only the categorical (\code{type='cat'}) or all 6 | #' edits (default) in which case an \code{\link{editset}} is returned. 7 | #' The function first parses all assignments in the file, so it is possible to compute or read 8 | #' a list of categories defining a datamodel for example. 9 | #' 10 | #' @param file name of text file to read in 11 | #' @param type type of edits to extract. Currently, only 'num' (numerical), 'cat' (categorical) and 'all' are implemented. 12 | #' 13 | #' @param ... extra parameters that are currently ignored 14 | #' 15 | #' @return \code{\link{editset}} with all edits if \code{type=all}, \code{\link{editarray}} if \code{type='cat'}, 16 | #' \code{\link{editmatrix}} if \code{type='num'}, \code{\link{editset}} with conditional edits if \code{type='mix'}. 17 | #' If the return value is a \code{list}, the elements are named \code{numedits} and \code{catedits}. 18 | #' 19 | #' @export 20 | editfile <- function(file,type=c("all","num","cat","mix"), ...){ 21 | # TODO: include expandEdits? 22 | type <- match.arg(type) 23 | if (!type %in% c('num','cat','all')) stop(paste("type",type,"invalid or not implemented yet")) 24 | p <- parse(file=file) 25 | ass <- sapply(p,class) %in% c('<-','=') 26 | e <- new.env() 27 | lapply(p[ass],eval,envir=e) 28 | edits <- p[!ass] 29 | # substitute constant assignments in rules. The if-statement prevents conversion to list() 30 | # if the file being read has no actual rules in it. 31 | if ( length(edits)>0 ){ 32 | edits <- sapply(edits,function(x) as.expression(do.call(substitute,list(x,e))) ) 33 | } 34 | et <- editTypes(edits) 35 | numedits <- edits[et == 'num'] 36 | catedits <- edits[et == 'cat'] 37 | mixedits <- edits[et == 'mix'] 38 | switch(type, 39 | num = editmatrix(numedits), 40 | cat = editarray(catedits,env=e), 41 | mix = editset(mixedits,env=e), 42 | all = editset(edits,env=e) 43 | ) 44 | } 45 | 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/data-cleaning/editrules.svg)](https://travis-ci.org/data-cleaning/editrules) 2 | [![CRAN](http://www.r-pkg.org/badges/version/editrules)](http://cran.r-project.org/package=editrules/) 3 | [![Downloads](http://cranlogs.r-pkg.org/badges/editrules)](http://www.r-pkg.org/pkg/editrules) 4 | [![status](https://tinyverse.netlify.app/badge/editrules)](https://CRAN.R-project.org/package=editrules) 5 | 6 | **`editrules` has been succeeded by R packages: [validate](http://cran.r-project.org/package=validate) and [errorlocate](http://cran.r-project.org/package=errorlocate)** 7 | 8 | editrules 9 | ========= 10 | 11 | R package for parsing edit rules 12 | The editrules package aims to provide an environment to conveniently define, read and check recordwise data constraints including 13 | * Linear (in)equality constraints for numerical data 14 | * Constraints on value combinations of categorical data 15 | * Conditional constraints on numerical and/or mixed data 16 | 17 | In literature these constraints, or restrictions are refered to as _edits_. 18 | editrules can perform common rule set manipulations like variable elimination and value substitution, 19 | and offers error localization functionality based on the (generalized) paradigm of Fellegi and Holt. 20 | Under this paradigm, one determines the smallest (weighted) number of variables to adapt such that no 21 | (additional or derived) rules are violated. The paradigm is based on the assumption that errors are distributed 22 | randomly over the variables and there is no detectable cause of error. 23 | It also decouples the detection of corrupt variables from their correction. 24 | For some types of error, such as sign flips, typing errors or rounding errors, this assumption does not hold. 25 | These errors can be detected and are closely related to their resolution. 26 | The reader is referred to the deducorrect package for treating such errors. 27 | 28 | To install the latest version in R: 29 | ```R 30 | install.packages("editrules") 31 | ``` 32 | 33 | To get started, see the [editrules vignette](https://cran.r-project.org/web/packages/editrules/vignettes/editrules-vignette.pdf). 34 | 35 | -------------------------------------------------------------------------------- /pkg/inst/script/bench/benchMIP.R: -------------------------------------------------------------------------------- 1 | #------several benchmarks for mip testing 2 | library(editrules) 3 | library(lpSolveAPI) 4 | 5 | genRandomEM <- function(nvar=10, nedits=10){ 6 | i <- 1 7 | while(TRUE){ 8 | A <- matrix(as.integer(rnorm(nvar*nedits)), ncol=nvar) 9 | ops <- rep("<=", nedits) 10 | b <- as.integer(rnorm(nedits)) 11 | E <- as.editmatrix(A, ops=ops, b=b) 12 | cat("\r",i,": Checking feasibility") 13 | try(if (isFeasible(E)) { 14 | x <- as.integer(rnorm(nvar)) 15 | names(x) <- getVars(E) 16 | return(list(E=E, x=x)) 17 | }) 18 | cat("\r") 19 | i <- i + 1 20 | } 21 | } 22 | 23 | genOrderedVars <- function(nvar=10){ 24 | I <- diag(nvar) 25 | A <- cbind(I[,-1], 0) - I 26 | as.editmatrix(A, ops=rep("<=", nvar)) 27 | } 28 | 29 | testOrdered <- function(nvars=10, method="localizer"){ 30 | E <- genOrderedVars(nvars) 31 | res <- NULL 32 | data <- as.data.frame(matrix(0, ncol=nvars, nrow=nvars, dimnames=list(errors=NULL, vars=getVars(E)))) 33 | for (errors in seq_len(nvars)){ 34 | data[errors,seq_len(errors)] <- rev(seq_len(errors)) # creates errors... 35 | } 36 | 37 | 38 | le <- localizeErrors(E, data, method=method) 39 | 40 | cbind(le$status, nvars=nvars, method=method) 41 | } 42 | 43 | f <- file("benchordered.txt", open="wt") 44 | writeLines(paste(c("weight", "degeneracy", "user", "system", "elapsed", "maxDurationExceeded", 45 | "nvars", "method"), collapse="\t"), f) 46 | 47 | m <- seq_len(100) 48 | for (i in m){ 49 | cat("nvars = ",i,"....") 50 | res <- testOrdered(nvars=i, method="mip") 51 | cat("\nWriting results\n") 52 | write.table(res, f, col.names=FALSE, row.names=FALSE) 53 | flush(f) 54 | } 55 | for (i in m){ 56 | cat("nvars = ",i,"....") 57 | res <- testOrdered(nvars=i, method="localizer") 58 | cat("\nWriting results\n") 59 | write.table(res, f, col.names=FALSE, row.names=FALSE) 60 | flush(f) 61 | } 62 | 63 | close(f) 64 | 65 | tab <- read.table("benchordered.txt", header=T) 66 | str(tab) 67 | 68 | #E <- genRandom2(nvar=100, nedits=20, r=4) 69 | #E 70 | 71 | # Ex <- genRandomEM(nvar=100, nedits=10) 72 | # errorLocalizer.mip(Ex$E,Ex$x, verbose=TRUE) 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testSubstValue.R: -------------------------------------------------------------------------------- 1 | 2 | context("Value substitution") 3 | 4 | test_that("value substitution for numerical data",{ 5 | E <- editmatrix(c("x+y==z","x-u==1")) 6 | expect_true( 7 | sum(abs( 8 | getAb(substValue(E,c('x','y'),c(1,2)))- 9 | matrix(c( 10 | 0, 0,-1, 0,-3, 11 | 0, 0, 0,-1, 0),nrow=2,byrow=TRUE) 12 | )) == 0 13 | ) 14 | expect_true( 15 | sum(abs( 16 | getAb(substValue(E,c('x','y'),c(1,2),reduce=TRUE))- 17 | matrix(c( 18 | -1, 0,-3, 19 | 0,-1, 0),nrow=2,byrow=TRUE) 20 | )) == 0 21 | ) 22 | }) 23 | 24 | 25 | test_that("value substitution for categorical data",{ 26 | 27 | E <- editarray(c( 28 | "v %in% letters[1:3]", 29 | "w %in% letters[4:6]", 30 | "x %in% letters[7:9]", 31 | "if ( v=='a' ) w !='d'", 32 | "if ( w %in% c('d','e')) x != 'g'" 33 | )) 34 | expect_true( 35 | all( 36 | getArr(substValue(E,c('v','w'),c('a','d'))) 37 | == 38 | matrix(c( 39 | TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 40 | TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE 41 | ),nrow=c(2),byrow=TRUE) 42 | ) 43 | ) 44 | expect_true( 45 | all( 46 | getArr(substValue(E,c('v','w'),c('a','d'),reduce=TRUE)) 47 | == 48 | matrix(c( 49 | TRUE, TRUE, TRUE, TRUE, TRUE, 50 | TRUE, TRUE, TRUE, FALSE, FALSE 51 | ),nrow=c(2),byrow=TRUE) 52 | ) 53 | ) 54 | 55 | 56 | }) 57 | 58 | test_that("value substitution works for boolean data",{ 59 | E <- editarray(c( 60 | "g %in% c('m','f')", 61 | "p %in% c(TRUE, FALSE)")) 62 | expect_true(nrow(getArr(substValue(E,'p',TRUE)))==0) 63 | }) 64 | 65 | test_that("Bug reported by Sander Scholtus is fixed",{ 66 | # this used to cause a crash after upgrading to R>=3.x.x 67 | E1 <- editset(expression( 68 | A %in% letters[1:2], 69 | B %in% letters[2:3], 70 | if ( (A == 'a') ) x > y 71 | )) 72 | E1 <- substValue(E1, var = "x", val = 1) 73 | }) 74 | 75 | 76 | -------------------------------------------------------------------------------- /pkg/R/blocks.R: -------------------------------------------------------------------------------- 1 | #' Decompose a matrix or edits into independent blocks 2 | #' 3 | #' \code{blocks} returns a \code{list} of independent blocks \eqn{M_i} such that 4 | #' \eqn{M=M_1\oplus M_2\oplus\cdots\oplus M_n}. 5 | #' 6 | #' @param M \code{matrix}, \code{\link{editmatrix}}, editarray or editset to be decomposed into independent blocks 7 | #' @return list of independent subobjects of \code{M}. 8 | #' @example ../examples/blocks.R 9 | #' @export 10 | blocks <- function(M){ 11 | blocks <- blockIndex(contains(M)) 12 | lapply( blocks, 13 | function(b){ 14 | reduce(M[b,,drop=FALSE]) 15 | } 16 | ) 17 | } 18 | 19 | 20 | #' \code{blockIndex} returns a list of row indices in a \code{logical} matrix \code{D} designating independent blocks. 21 | #' 22 | #' 23 | #' @param D matrix of type \code{logical} 24 | #' @return \code{list} of row indices in \code{D} indicating independent blocks. 25 | #' Empty rows (i.e. every column \code{FALSE}) are ignored. 26 | #' 27 | #' @rdname blocks 28 | #' @export 29 | blockIndex <- function(D){ 30 | 31 | block <- function(B){ 32 | x1 <- FALSE 33 | x <- B[1,] 34 | while (sum(x1 != x)){ 35 | x1 <- x 36 | b <- sapply( 1:nrow(B) 37 | , function(i){ 38 | any(B[i,] & x) 39 | } 40 | ) 41 | x <- colSums(B[b,,drop=FALSE]) > 0 #this is another way of "or"ring all found rows 42 | } 43 | b 44 | } 45 | orignames <- row.names(D) 46 | row.names(D) <- 1:nrow(D) 47 | 48 | #remove empty rows 49 | b <- rowSums(D) == 0 50 | D <- D[!b,,drop=FALSE] 51 | 52 | # create a list which will contain the blocks 53 | blocks <- list() 54 | L <- 1 55 | 56 | # detect and remove blocks until no blocks are left 57 | while (nrow(D) > 0){ 58 | 59 | # find block 60 | b <- block(D) 61 | 62 | # store the original row numbers of the detected block 63 | blocks[[L]] <- as.integer(row.names(D)[b]) 64 | L <- L + 1 65 | 66 | # remove the detected block 67 | D <- D[!b,,drop=FALSE] 68 | } 69 | lapply(blocks,function(b) {names(b)<-orignames[b]; b}) 70 | } 71 | -------------------------------------------------------------------------------- /pkg/inst/script/bench/benchmip_categorical.R: -------------------------------------------------------------------------------- 1 | library(editrules) 2 | FILE ="benchmip_categorical.txt" 3 | 4 | #' Create account balance 5 | generate_E <- function(nvar=10){ 6 | vars <- paste0("A", seq_len(nvar)) 7 | n <- nvar-1 8 | 9 | # if all TRUE no errors. FALSE values introduce errors 10 | edits <- c( paste0(vars, " %in% c(TRUE,FALSE)") 11 | , paste0("if (",head(vars, -1), ")", tail(vars, -1)) 12 | ) 13 | edits <- c(edits, "A1 == TRUE") 14 | editarray(edits) 15 | } 16 | 17 | generate_data <- function(E, nerrors=0){ 18 | vars <- getVars(E) 19 | n <- length(vars) 20 | 21 | x <- sapply(vars, function(v) TRUE) 22 | x1 <- x 23 | x1[seq_len(nerrors)] <- FALSE 24 | 25 | x2 <- x 26 | x2[1+n - seq_len(nerrors)] <- FALSE 27 | 28 | x3 <- x 29 | x3[round((n-nerrors)/2) + seq_len(nerrors)] <- FALSE 30 | as.data.frame(rbind(x1,x2,x3)) 31 | } 32 | 33 | 34 | bench <- function(nvars = 10, nerrors=9, method="bb"){ 35 | 36 | init <- !file.exists(FILE) 37 | txt <- file(FILE, "at") 38 | on.exit(close(txt)) 39 | 40 | errorloc <- c("begin", "end", "middle") 41 | 42 | if (nerrors >= nvars) stop("nvars cannot be less than nerrors") 43 | for (ne in seq_len(nerrors)){ 44 | for (nvar in seq(from=ne+1, to=nvars)){ 45 | try({ 46 | E <- generate_E(nvar) 47 | data <- generate_data(E, ne) 48 | cat("\r nvar=", nvar, " ne=", ne, " method=", method) 49 | le <- localizeErrors(E, data, method=method) 50 | rpt <- cbind(method=method, nvar=nvar, nerrors=ne, errorloc=errorloc, le$status) 51 | write.table(rpt, file=txt, col.names=init, row.names=FALSE) 52 | init <- FALSE 53 | flush(txt) 54 | }) 55 | gc() 56 | #print(rpt) 57 | } 58 | } 59 | } 60 | 61 | if (file.exists(FILE)) file.remove(FILE) 62 | 63 | bench(50,10, method="mip") 64 | bench(50,10, method="bb") 65 | 66 | dat <- read.table(FILE, header=TRUE) 67 | library(ggplot2) 68 | qplot(data=dat, y=elapsed, x=nvar, color=method, facets=nerrors~method, shape=errorloc) + geom_jitter() 69 | ggsave("benchmip_categorical.png") 70 | # 71 | 72 | 73 | ### quick testing 74 | # (E <- generate_E()) 75 | # (dat <- generate_data(E, nerrors=3)) 76 | # localizeErrors(E,dat, method="mip") 77 | # editrules:::errorLocalizer.mip(E,dat[1,]) 78 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testParseEdits.R: -------------------------------------------------------------------------------- 1 | require(testthat) 2 | 3 | 4 | context("Parsing") 5 | 6 | test_that("parseEdits all works",{ 7 | x <- c( "2*x < 1" 8 | , "if (A=='a') B == 'b'" 9 | , "if (A=='a') B == FALSE" 10 | , "if (A=='a') B > 1" 11 | , "if (c==1) B || C == FALSE" 12 | ) 13 | e <- parseEdits(x) 14 | expect_equal(length(e), 5) 15 | }) 16 | 17 | test_that("parseEdits num works",{ 18 | x <- c( "2*x < 1" 19 | , "if (A=='a') B == 'b'" 20 | , "if (A=='a') B == FALSE" 21 | , "if (A=='a') B > 1" 22 | , "if (c==1) B || C == FALSE" 23 | ) 24 | e <- parseEdits(x, "num") 25 | expect_equal(length(e), 1) 26 | u <- new.env(); u$x = 1 27 | expect_equal(eval(e,u),eval(expression(2*x < 1), u)) 28 | }) 29 | 30 | test_that("parseEdits cat works",{ 31 | x <- c( "2*x < 1" 32 | , "if (A=='a') B == 'b'" 33 | , "if (A=='a') B == FALSE" 34 | , "if (A=='a') B > 1" 35 | , "if (c==1) B || C == FALSE" 36 | ) 37 | e <- parseEdits(x, "cat") 38 | expect_equal(length(e), 2) 39 | # test fails: equivalence of expressions cannot be tested like this. 40 | # expect_equivalent(e, expression( if (A == "a") B == "b" 41 | # , if (A == "a") B == FALSE 42 | # ) 43 | # ) 44 | }) 45 | 46 | test_that("parseEdits mix works",{ 47 | x <- c( "2*x < 1" 48 | , "if (A=='a') B == 'b'" 49 | , "if (A=='a') B == FALSE" 50 | , "if (A=='a') B > 1" 51 | , "if (c==1) B || C == FALSE" 52 | ) 53 | e <- parseEdits(x, "mix") 54 | expect_equal(length(e), 2) 55 | # test fails: equivalence of expressions cannot be tested like this. 56 | # expect_equivalent(e, expression( if (A == "a") B > 1 57 | # , if (c == 1) B || C == FALSE 58 | # ) 59 | # , label=deparse(e) 60 | # ) 61 | }) 62 | 63 | context("editfile") 64 | test_that("assignments are parsed by editfile",{ 65 | e <- editfile("edit_test_1.txt",type='num') 66 | expect_equivalent(getA(e),array(c(-10,1),dim=c(1,2))) 67 | }) 68 | 69 | test_that("empty files are parsed correctly",{ 70 | expect_equal( 71 | nedits(editfile(textConnection("#test"),type='num')) 72 | , 0 73 | ) 74 | }) 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /pkg/inst/script/bench/benchmip_mixed2.R: -------------------------------------------------------------------------------- 1 | library(editrules) 2 | FILE ="benchmip_mixed2.txt" 3 | 4 | generate_E <- function(nvar=10){ 5 | s <- seq_len(nvar) 6 | num_vars <- paste0("x", s) 7 | edits <- "x1>=0" 8 | if (nvar > 1){ 9 | edits <- c(edits, 10 | paste0("if (", head(num_vars, -1)," >= 0)", tail(num_vars, -1), ">= 0")) 11 | } 12 | editset(edits) 13 | } 14 | 15 | error <- function(x){ 16 | lapply(x, function(v){ 17 | if (is.logical(v)) FALSE else -1 18 | }) 19 | } 20 | 21 | generate_data <- function(E, nerrors=0){ 22 | num_vars <- getVars(E,"num") 23 | 24 | x <- numeric(length(num_vars)) 25 | names(x) <- num_vars 26 | n <- length(x) 27 | 28 | x1 <- x 29 | idx <- seq_len(nerrors) 30 | x1[idx] <- -1 31 | 32 | x2 <- x 33 | idx <- 1+n - seq_len(nerrors) 34 | x2[idx] <- -1 35 | 36 | x3 <- x 37 | idx <- (round(n-nerrors)/2) + seq_len(nerrors) 38 | x3[idx] <- -1 39 | 40 | as.data.frame(rbind(x1,x2,x3)) 41 | } 42 | 43 | 44 | bench <- function(nvars = 10, nerrors=10, method="bb", maxduration=200){ 45 | 46 | init <- !file.exists(FILE) 47 | txt <- file(FILE, "at") 48 | on.exit(close(txt)) 49 | 50 | errorloc <- c("begin", "end", "middle") 51 | 52 | if (nerrors >= nvars) stop("nvars cannot be less than nerrors") 53 | for (nvar in seq_len(nvars)){ 54 | E <- generate_E(nvar) 55 | for (ne in seq(1, min(nerrors, nvar))){ 56 | try({ 57 | data <- generate_data(E, ne) 58 | # only select middle 59 | data <- data[3,,drop=FALSE] 60 | errorloc_m <- errorloc[3] 61 | 62 | cat("\r nvar=", nvar, " ne=", ne, " method=", method) 63 | le <- localizeErrors(E, data, method=method, maxduration=maxduration) 64 | rpt <- cbind(method=method, nvar=nvar, nerrors=ne, errorloc=errorloc_m, le$status) 65 | 66 | write.table(rpt, file=txt, col.names=init, row.names=FALSE) 67 | 68 | init <- FALSE 69 | flush(txt) 70 | }) 71 | gc() 72 | } 73 | } 74 | } 75 | 76 | if (file.exists(FILE)) file.remove(FILE) 77 | 78 | bench(50,10, method="mip") 79 | bench(50,10, method="bb") 80 | 81 | # dat <- read.table(FILE, header=TRUE) 82 | # library(ggplot2) 83 | # qplot(data=dat, y=elapsed, x=nvar, color=method, facets=nerrors~method, shape=errorloc) + geom_jitter() 84 | # ggsave("benchmip_categorical.png") 85 | 86 | 87 | ### quick testing 88 | E <- generate_E(4) 89 | E 90 | generate_data(E, nerrors=2) 91 | -------------------------------------------------------------------------------- /pkg/R/expandEdits.R: -------------------------------------------------------------------------------- 1 | #' Expand an edit expression 2 | #' 3 | #' Often many numeric variables have the same constraints. \code{expandEdits} is 4 | #' a utility function to define edits for multiple variables. See the examples for the syntax. 5 | #' @param s edit expression, can be a \code{character} or \code{expression} vector 6 | #' @param prefix prefix for variables to be expanded 7 | #' @param useSum if \code{TRUE} sum expressions will be expanded 8 | #' @param asExpression if \code{TRUE} an \code{\link{expression}} will be returned in stead of a \code{character} 9 | #' @param env enviroment that will be used to find variables to be expanded 10 | #' @param ... variables used in the expansion 11 | #' @return \code{character} or \code{expression} vector with expanded expressions 12 | #' @keywords internal 13 | expandEdits <- function(s, prefix="_", useSum=TRUE, asExpression=is.language(s), env=parent.frame(), ...){ 14 | #TODO replace special regex character in prefix with escaped character. 15 | 16 | force(asExpression) 17 | 18 | if (is.expression(s)){ 19 | s <- as.character(s) 20 | } 21 | 22 | if (is.language(s)){ 23 | s <- deparse(s) 24 | } 25 | 26 | if (length(s) > 1){ 27 | return(lapply(s, expandEdits, prefix=prefix, useSum=useSum, ...)) 28 | } 29 | 30 | l <- as.list(env) 31 | vars <- list(...) 32 | l[names(vars)] <- vars 33 | 34 | varnms <- paste(prefix,names(l), sep="") 35 | used <- sapply(varnms, grepl, s) 36 | varnms <- varnms[used] 37 | l <- l[used] 38 | 39 | if (useSum) { 40 | sumnms <- paste("\\bsum", names(l), sep=prefix) 41 | sumregex1 <- paste(sumnms, "\\((.+?)\\)", sep="") 42 | sumregex2 <- paste(sumnms, "\\((.+?)\\).+", sep="") 43 | 44 | vars <- names(l) 45 | for (i in seq_along(vars)){ 46 | if (length(grep(sumregex2[i], s))){ 47 | sumvars <- sub(sumregex2[i], "\\1", s) 48 | sumvars <- do.call(expandEdits, list(s=sumvars, env=l[vars[i]], prefix=prefix)) 49 | sumvars <- paste(sumvars, collapse=" + ") 50 | s <- sub(sumregex1[i], sumvars, s) 51 | l[[vars[i]]] <- NULL 52 | } 53 | } 54 | } 55 | 56 | varnms <- paste(prefix,names(l), sep="") 57 | for (i in seq_along(l)){ 58 | if (length(grep(varnms[i], s))) { 59 | s <- sapply(l[[i]], function(j) gsub(varnms[i],j,s)) 60 | } 61 | } 62 | 63 | if (is.array(s)) { 64 | dimnames(s) <- l 65 | } else if (is.vector(s) && length(l)){ 66 | names(s) <- l[[1]] 67 | } 68 | if (asExpression){ 69 | parse(text=s) 70 | } else { 71 | s 72 | } 73 | } 74 | 75 | -------------------------------------------------------------------------------- /pkg/inst/script/bench/benchmip_balance.R: -------------------------------------------------------------------------------- 1 | library(editrules) 2 | FILE ="benchmip_balance.txt" 3 | 4 | #' Create account balance 5 | generate_balance <- function(nvar=15){ 6 | i <- seq_len(floor((nvar)/2)) 7 | edits <- c( "x1 >= 0" 8 | , paste0("x",i,"==", "x", 2*i, "+", "x", 2*i + 1) 9 | ) 10 | 11 | vars <- paste0("x",seq_len(nvar)) 12 | 13 | if (nvar>1){ 14 | #edits <- c(edits, paste0(head(vars,-1), ">=", tail(vars,-1))) 15 | edits <- c(edits, paste0("x1 >= ", tail(vars,-1))) 16 | } 17 | 18 | E <- editmatrix(edits) 19 | if (nvar %% 2 == 0){ 20 | E[,-(nvar+1)] 21 | } else { 22 | E 23 | } 24 | } 25 | 26 | generate_data <- function(E, nerrors=0){ 27 | vars <- getVars(E) 28 | n <- length(vars) 29 | 30 | x <- sapply(vars, function(v) 0) 31 | x1 <- x 32 | x1[seq_len(nerrors)] <- -1 33 | 34 | x2 <- x 35 | x2[1+n - seq_len(nerrors)] <- -1 36 | 37 | x3 <- x 38 | x3[round((n-nerrors)/2) + seq_len(nerrors)] <- -1 39 | as.data.frame(rbind(x1,x2,x3)) 40 | } 41 | 42 | 43 | bench <- function(nvars = 10, nerrors=10, method="bb"){ 44 | 45 | init <- !file.exists(FILE) 46 | txt <- file(FILE, "at") 47 | on.exit(close(txt)) 48 | 49 | errorloc <- c("begin", "end", "middle") 50 | 51 | if (nerrors > nvars) stop("nvars cannot be less than nerrors") 52 | for (nvar in seq_len(nvars)){ 53 | for (ne in seq(1, min(nerrors, nvar))){ 54 | try({ 55 | E <- generate_balance(nvar) 56 | data <- generate_data(E, ne) 57 | cat("\r nvar=", nvar, " ne=", ne, " method=", method) 58 | le <- localizeErrors(E, data, method=method) 59 | rpt <- cbind(method=method, nvar=nvar, nerrors=ne, errorloc=errorloc, le$status) 60 | 61 | write.table(rpt, file=txt, col.names=init, row.names=FALSE) 62 | init <- FALSE 63 | flush(txt) 64 | }) 65 | gc() 66 | } 67 | } 68 | } 69 | 70 | ## quick testing 71 | start <- function(){ 72 | if (file.exists(FILE)) file.remove(FILE) 73 | 74 | bench(100,10, method="mip") 75 | bench(50,10, method="bb") 76 | 77 | dat <- read.table(FILE, header=TRUE) 78 | library(ggplot2) 79 | qplot(data=dat, y=elapsed, x=nvar, color=method, facets=nerrors~method, shape=errorloc, geom=c("point", "line")) + geom_jitter() 80 | ggsave("benchmip_balance.png") 81 | sdat <- subset(dat, errorloc=="middle") 82 | qplot(data=sdat, x=nvar, y=elapsed, color=method, group=nerrors, geom="line", 83 | facets=~method, ylim=c(0,150)) 84 | } 85 | #View(dat) 86 | # n <- 4 87 | # (E <- generate_balance(n)) 88 | # (dat <- generate_data(E,1)) 89 | # localizeErrors(E, dat, method="mip") -------------------------------------------------------------------------------- /examples/graph.R: -------------------------------------------------------------------------------- 1 | 2 | ## Examples with linear (in)equality edits 3 | 4 | # load predefined edits from package 5 | data(edits) 6 | edits 7 | 8 | # convert to editmatrix 9 | E <- editmatrix(edits) 10 | 11 | ## Not run: 12 | # (Note to reader: the Not run directive only prevents the examle commands from 13 | # running when package is built) 14 | 15 | # Total edit graph 16 | plot(E) 17 | 18 | # Graph with dependent edits 19 | plot(E, nodetype="rules") 20 | 21 | # Graph with dependent variables 22 | plot(E, nodetype="vars") 23 | 24 | # Total edit graph, but with curved lines (option from igraph package) 25 | plot(E, edge.curved=TRUE) 26 | 27 | 28 | # graph, plotting just the connections caused by variable 't' 29 | plot(E,vars='t') 30 | 31 | ## End(Not run) 32 | 33 | # here's an example with a broken record. 34 | r <- c(ct = 100, ch = 30, cp = 70, p=30,t=130 ) 35 | violatedEdits(E,r) 36 | errorLocalizer(E,r)$searchBest()$adapt 37 | 38 | # we color the violated edits and the variables that have to be adapted 39 | 40 | ## Not run 41 | set.seed(1) # (for reprodicibility) 42 | plot(E, 43 | adapt=errorLocalizer(E,r)$searchBest()$adapt, 44 | violated=violatedEdits(E,r)) 45 | ## End(Not run) 46 | 47 | 48 | 49 | # extract total graph (as igraph object) 50 | as.igraph(E) 51 | 52 | # extract graph with edges related to variable 't' and 'ch' 53 | as.igraph(E,vars=c('t','ch')) 54 | 55 | # extract total adjacency matrix 56 | adjacency(E) 57 | 58 | # extract adjacency matrix related to variables t and 'ch' 59 | adjacency(E,vars=c('t','ch')) 60 | 61 | ## Examples with categorical edits 62 | 63 | # generate an editarray: 64 | E <- editarray(expression( 65 | age %in% c('<15','16-65','>65'), 66 | employment %in% c('unemployed','employed','retired'), 67 | salary %in% c('none','low','medium','high'), 68 | if (age == '<15') employment=='unemployed', 69 | if (salary != 'none') employment != 'unemployed', 70 | if (employment == 'unemployed') salary == 'none')) 71 | 72 | 73 | ## Not run: 74 | # plot total edit graph 75 | plot(E) 76 | 77 | # plot with a different layout 78 | plot(E,layout=layout.circle) 79 | 80 | # plot edit graph, just the connections caused by 'salary' 81 | plot(E,vars='salary') 82 | 83 | ## End(Not run) 84 | 85 | # extract edit graph 86 | as.igraph(E) 87 | 88 | # extract edit graph, just the connections caused by 'salary' 89 | as.igraph(E,vars='salary') 90 | 91 | # extract adjacency matrix 92 | adjacency(E) 93 | 94 | # extract adjacency matrix, only caused by 'employment' 95 | adjacency(E,vars='employment') 96 | 97 | 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /papers/tex/smalltree.sty: -------------------------------------------------------------------------------- 1 | 2 | \setlength{\unitlength}{0.012mm} 3 | \definecolor{grey}{rgb}{0.8,0.8,0.8} 4 | 5 | \newcommand{\greycircle}{\textcolor{grey}{\circle*{100}}} 6 | \newcommand{\blackcircle}{\circle*{100}} 7 | \newcommand{\treeframe}{ 8 | \thinlines 9 | \put(700,900){\textcolor{grey}{\line( 2,-1){400}}} 10 | \put(700,900){\textcolor{grey}{\line(-2,-1){400}}} 11 | 12 | \multiput(300,700)(800,000){2}{\textcolor{grey}{\line( 1,-2){200}}} 13 | \multiput(300,700)(800,000){2}{\textcolor{grey}{\line(-1,-2){200}}} 14 | 15 | \multiput(100,300)(400,000){4}{\textcolor{grey}{\line(-1,-3){100}}} 16 | \multiput(100,300)(400,000){4}{\textcolor{grey}{\line( 1, -3){100}}} 17 | 18 | \put(700,900){\blackcircle} 19 | \multiput(300,700)(800,000){2}{\greycircle} 20 | \multiput(100,300)(400,000){4}{\greycircle} 21 | \multiput(000,000)(200,000){8}{\greycircle} 22 | \thicklines 23 | } 24 | \newcommand{\Troot}{\put(700,900){\blackcircle}} 25 | \newcommand{\Tl}{\put(300,700){\blackcircle}} 26 | \newcommand{\Tr}{\put(1100,700){\blackcircle}} 27 | \newcommand{\Tll}{\put(100,300){\blackcircle}} 28 | \newcommand{\XTll}{\put(-20,215){{$-$}}} 29 | \newcommand{\Tlr}{\put(500,300){\blackcircle}} 30 | \newcommand{\Trl}{\put(900,300){\blackcircle}} 31 | \newcommand{\Trr}{\put(1300,300){\blackcircle}} 32 | \newcommand{\XTrr}{\put(1171,215){$-$}} 33 | 34 | 35 | \newcommand{\Tlll}{\put(000,000){\blackcircle}} 36 | \newcommand{\Tllr}{\put(200,000){\blackcircle}} 37 | \newcommand{\Tlrl}{\put(400,000){\blackcircle}} 38 | \newcommand{\Tlrr}{\put(600,000){\blackcircle}} 39 | \newcommand{\XTlrr}{\put(471,-85){$-$}} 40 | \newcommand{\Trll}{\put(800,000){\blackcircle}} 41 | \newcommand{\XTrll}{\put(671,-85){$-$}} 42 | \newcommand{\Trlr}{\put(1000,000){\blackcircle}} 43 | \newcommand{\XTrlr}{\put(871,-85){$-$}} 44 | \newcommand{\Trrl}{\put(1200,000){\blackcircle}} 45 | \newcommand{\Trrr}{\put(1400,000){\blackcircle}} 46 | 47 | \newcommand{\El}{\put(700,900){\line(-2,-1){400}}} 48 | \newcommand{\Er}{\put(700,900){\line( 2,-1){400}}} 49 | \newcommand{\Ell}{\put(300,700){\line(-1,-2){200}}} 50 | \newcommand{\Elr}{\put(300,700){\line( 1,-2){200}}} 51 | \newcommand{\Erl}{\put(1100,700){\line(-1,-2){200}}} 52 | \newcommand{\Err}{\put(1100,700){\line( 1,-2){200}}} 53 | 54 | \newcommand{\Elll}{\put(100,300){\line(-1,-3){100}}} 55 | \newcommand{\Ellr}{\put(100,300){\line( 1,-3){100}}} 56 | \newcommand{\Elrl}{\put(500,300){\line(-1,-3){100}}} 57 | \newcommand{\Elrr}{\put(500,300){\line( 1,-3){100}}} 58 | \newcommand{\Erll}{\put(900,300){\line(-1,-3){100}}} 59 | \newcommand{\Erlr}{\put(900,300){\line( 1,-3){100}}} 60 | \newcommand{\Errl}{\put(1300,300){\line(-1,-3){100}}} 61 | \newcommand{\Errr}{\put(1300,300){\line( 1,-3){100}}} 62 | 63 | -------------------------------------------------------------------------------- /develop/errorLocalizer_alt.R: -------------------------------------------------------------------------------- 1 | # alternative implementation of errorlocalizer 2 | # differences: 3 | # *) returns all solutions, in stead of one by one 4 | # *) slightly faster then normal implementation (but not very) 5 | # *) uses a recursive function in stead of nested stack, inner workings are more understandable, but original 6 | # is better debugable using VERBOSE=TRUE and way more flexible. 7 | require(editrules) 8 | 9 | eloc <- function(E){ 10 | 11 | if ( !isNormalized(E) ) 12 | E <- normalize(E) 13 | 14 | btx <- function(x, weight=rep(1,length(x))){ 15 | adapt <- is.na(x) 16 | names(adapt) <- names(x) 17 | 18 | #order decreasing by weight 19 | o <- order(weight, decreasing=TRUE) 20 | totreat <- names(x)[o[!adapt]] 21 | 22 | # Eliminate missing variables. 23 | for (v in names(x)[is.na(x)]) E <- eliminateFM(E,v) 24 | 25 | wsol <- sum(weight) 26 | 27 | bt <- function(E, totreat, adapt, sol){ 28 | w <- sum(weight[adapt]) 29 | 30 | #reject 31 | if ( w > wsol || isObviouslyInfeasible(E) ){ 32 | return(sol) 33 | } 34 | 35 | #accept 36 | if (length(totreat) == 0){ 37 | wsol <<- w 38 | sol[[length(sol)+1]] <- list(w=w, adapt=adapt) 39 | return(sol) 40 | } 41 | 42 | var <- totreat[1] 43 | 44 | sol <- bt( E = substValue(E, var , x[var]) 45 | , totreat = totreat[-1] 46 | , adapt = adapt 47 | , sol = sol 48 | ) 49 | 50 | sol <- bt( E = eliminateFM(E, var , x[var]) 51 | , totreat = totreat[-1] 52 | , adapt = {a <- adapt; a[var] <- TRUE;a} 53 | , sol = sol 54 | ) 55 | sol 56 | } 57 | 58 | bt(E=E, totreat=totreat, adapt=adapt, sol=list()) 59 | } 60 | btx 61 | } 62 | 63 | E <- editmatrix(c( 64 | "p + c1 + c2 == t", 65 | "c1 - 0.3*t >= 0", 66 | "p > 0", 67 | "c1 > 0", 68 | "c2 > 0", 69 | "t > 0")) 70 | 71 | 72 | cp <- errorLocalizer(E,x=c(p=755, c1=50, c2=NA,t=200)) 73 | el <- eloc(E) 74 | 75 | sol <- cp$searchAll() 76 | sol 77 | 78 | sol <- el(x=c(p=755, c1=50, c2=NA,t=200)) 79 | sol 80 | 81 | cat("backtracker:\n") 82 | system.time(replicate(100, { 83 | cp <- errorLocalizer(E,x=c(p=755, c1=50, c2=NA,t=200)) 84 | sol<-cp$searchAll() 85 | })) 86 | 87 | cat("hardcoded:\n") 88 | system.time(replicate(100, { 89 | sol<-el(x=c(p=755, c1=50, c2=NA,t=200))} 90 | )) 91 | -------------------------------------------------------------------------------- /pkg/R/checkRows.R: -------------------------------------------------------------------------------- 1 | # Check rows of data.frame against edits. 2 | # 3 | # This is an S3 generic function for checking rows of a \code{data.frame} against 4 | # a number of edit restrictions. The edits can be entered either in \code{character} 5 | # \code{data.frame} or \code{editmatrix} format. 6 | # 7 | # If edits are represented as a \code{character} vector, the entries of \code{E} are parsed 8 | # and evaluated in the environment of \code{dat} 9 | # 10 | # If the edits are represented in a \code{data.frame}, the \code{data.frame} must have the format 11 | # described in \code{\link{editmatrix}}. The edits are coerced to a character vector, and passed 12 | # to \code{checkRows.character}. 13 | # 14 | # If the edits are represented by an \code{\link{editmatrix}} (representing linear (in)equalities) 15 | # verbose edits are extracted and passed on to \code{checkRows.character} 16 | # 17 | # 18 | # @aliases checkRows.character checkRows.data.frame checkRows.editmatrix 19 | # 20 | # @param E Edits, in \code{character}, \code{data.frame} or \code{\link{editmatrix}} representation 21 | # @param dat The data to check. 22 | # @return a logical vector with \code{length} equal to \code{nrow(dat)}. If a row is violates 23 | # no edit restrictions, \code{TRUE} otherwise \code{FALSE} 24 | # 25 | # @seealso violatedEdits 26 | # @example ../examples/checkRows.R 27 | # 28 | checkRows <- function(E, dat){ 29 | UseMethod("checkRows") 30 | } 31 | 32 | 33 | 34 | # @rdname checkRows 35 | # @method checkRows editmatrix 36 | # 37 | #' @export 38 | checkRows.editmatrix <- function(E, dat){ 39 | stopifnot(is.data.frame(dat)) 40 | vars <- getVars(E) %in% names(dat) 41 | if (!all(vars)){ 42 | stop("Edits contain variable(s):", paste(colnames(E)[!vars], collapse=","), 43 | ", that are not available in the data.frame") 44 | } 45 | 46 | check <- !logical(nrow(dat)) 47 | ed <- as.expression(E) 48 | for ( i in 1:length(ed)){ 49 | check <- check & tryCatch(eval(ed[[i]], envir=dat), error=function(e){ 50 | stop(paste("Edit",ed[[i]],"can not be checked. Evaluation returned",e$message,sep="\n" )) 51 | }) 52 | } 53 | return(check) 54 | } 55 | 56 | # @rdname checkRows 57 | # @method checkRows character 58 | # 59 | #' @export 60 | checkRows.character <- function(E, dat){ 61 | ed <- parseEdits(E) 62 | check <- !logical(nrow(dat)) 63 | for ( i in 1:length(E)){ 64 | check <- check & tryCatch(eval(ed[[i]], envir=dat), error=function(e){ 65 | stop(paste("Edit",ed[[i]],"can not be checked. Evaluation returned",e$message,sep="\n" )) 66 | }) 67 | } 68 | return(check) 69 | } 70 | 71 | # @rdname checkRows 72 | # @method checkRows data.frame 73 | # 74 | #' @export 75 | checkRows.data.frame <- function(E, dat){ 76 | checkRows(as.character(E$edit),dat) 77 | } 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /pkg/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: editrules 2 | Maintainer: Edwin de Jonge 3 | License: GPL-3 4 | Title: Parsing, Applying, and Manipulating Data Cleaning Rules 5 | LazyData: no 6 | Type: Package 7 | LazyLoad: yes 8 | Authors@R: c(person(given = "Edwin", 9 | family = "de Jonge", 10 | role = c("aut", "cre"), 11 | email = "edwindjonge@gmail.com", 12 | comment=c(ORCID="0000-0002-6580-4718") 13 | ), 14 | person(given = "Mark", 15 | family = "van der Loo", 16 | role = "aut")) 17 | Description: Please note: active development has moved to packages 'validate' 18 | and 'errorlocate'. Facilitates reading and manipulating (multivariate) data 19 | restrictions (edit rules) on numerical and categorical data. Rules can be 20 | defined with common R syntax and parsed to an internal (matrix-like format). 21 | Rules can be manipulated with variable elimination and value substitution 22 | methods, allowing for feasibility checks and more. Data can be tested against 23 | the rules and erroneous fields can be found based on Fellegi and Holt's 24 | generalized principle. Rules dependencies can be visualized with using the 25 | 'igraph' package. 26 | Version: 2.9.6 27 | Depends: 28 | R (>= 2.12.0), 29 | igraph 30 | Imports: 31 | lpSolveAPI 32 | Suggests: 33 | testthat 34 | URL: https://github.com/data-cleaning/editrules 35 | BugReports: https://github.com/data-cleaning/editrules/issues 36 | Collate: 37 | 'adjacency.R' 38 | 'as.igraph.R' 39 | 'editset.R' 40 | 'editarray.R' 41 | 'editmatrix.R' 42 | 'as.matrix.R' 43 | 'backtracker.R' 44 | 'blocks.R' 45 | 'c.R' 46 | 'cateditmatrix.R' 47 | 'checkDatamodel.R' 48 | 'checkRows.R' 49 | 'contains.R' 50 | 'disjunct.R' 51 | 'duplicated.R' 52 | 'echelon.R' 53 | 'editAttr.R' 54 | 'editarrayAttr.R' 55 | 'editfile.R' 56 | 'editmatrixAttr.R' 57 | 'editrules-data.R' 58 | 'eliminate.R' 59 | 'errorLocalizer.R' 60 | 'errorLocalizer_mip.R' 61 | 'errorLocation.R' 62 | 'expandEdits.R' 63 | 'generateEdits.R' 64 | 'getH.R' 65 | 'getUpperBounds.R' 66 | 'getVars.R' 67 | 'is.R' 68 | 'isFeasible.R' 69 | 'isObviouslyInfeasible.R' 70 | 'isObviouslyRedundant.R' 71 | 'isSubset.R' 72 | 'list2env.R' 73 | 'localizeErrors.R' 74 | 'mip.R' 75 | 'parseCat.R' 76 | 'parseEdits.R' 77 | 'parseMix.R' 78 | 'parseNum.R' 79 | 'perturbWeights.R' 80 | 'pkg.R' 81 | 'plot.R' 82 | 'plot_errorLocation.R' 83 | 'print.R' 84 | 'reduce.R' 85 | 'removeRedundant.R' 86 | 'softEdits.R' 87 | 'str.R' 88 | 'subsetting.R' 89 | 'substValue.R' 90 | 'summary.R' 91 | 'violatedEdits.R' 92 | 'writeELAsMip.R' 93 | 'zzz.R' 94 | RoxygenNote: 7.3.3 95 | Encoding: UTF-8 96 | -------------------------------------------------------------------------------- /examples/errorLocalizer.R: -------------------------------------------------------------------------------- 1 | #### examples with numerical edits 2 | # example with a single editrule 3 | # p = profit, c = cost, t = turnover 4 | E <- editmatrix(c("p + c == t")) 5 | cp <- errorLocalizer(E, x=c(p=755, c=125, t=200)) 6 | # x obviously violates E. With all weights equal, changing any variable will do. 7 | # first solution: 8 | cp$searchNext() 9 | # second solution: 10 | cp$searchNext() 11 | # third solution: 12 | cp$searchNext() 13 | # there are no more solution since changing more variables would increase the 14 | # weight, so the result of the next statement is NULL: 15 | cp$searchNext() 16 | 17 | # Increasing the reliability weight of turnover, yields 2 solutions: 18 | cp <- errorLocalizer(E, x=c(p=755, c=125, t=200), weight=c(1,1,2)) 19 | # first solution: 20 | cp$searchNext() 21 | # second solution: 22 | cp$searchNext() 23 | # no more solutions available: 24 | cp$searchNext() 25 | 26 | 27 | # A case with two restrictions. The second restriction demands that 28 | # c/t >= 0.6 (cost should be more than 60% of turnover) 29 | E <- editmatrix(c( 30 | "p + c == t", 31 | "c - 0.6*t >= 0")) 32 | cp <- errorLocalizer(E,x=c(p=755,c=125,t=200)) 33 | # Now, there's only one solution, but we need two runs to find it (the 1st one 34 | # has higher weight) 35 | cp$searchNext() 36 | cp$searchNext() 37 | 38 | # With the searchBest() function, the lowest weifght solution is found at once: 39 | errorLocalizer(E,x=c(p=755,c=125,t=200))$searchBest() 40 | 41 | 42 | # An example with missing data. 43 | E <- editmatrix(c( 44 | "p + c1 + c2 == t", 45 | "c1 - 0.3*t >= 0", 46 | "p > 0", 47 | "c1 > 0", 48 | "c2 > 0", 49 | "t > 0")) 50 | cp <- errorLocalizer(E,x=c(p=755, c1=50, c2=NA,t=200)) 51 | # (Note that e2 is violated.) 52 | # There are two solutions. Both demand that c2 is adapted: 53 | cp$searchNext() 54 | cp$searchNext() 55 | 56 | ##### Examples with categorical edits 57 | # 58 | # 3 variables, recording age class, position in household, and marital status: 59 | # We define the datamodel and the rules 60 | E <- editarray(expression( 61 | age %in% c('under aged','adult'), 62 | maritalStatus %in% c('unmarried','married','widowed','divorced'), 63 | positionInHousehold %in% c('marriage partner', 'child', 'other'), 64 | if( age == 'under aged' ) 65 | maritalStatus == 'unmarried', 66 | if( maritalStatus %in% c('married','widowed','divorced')) 67 | !positionInHousehold %in% c('marriage partner','child') 68 | ) 69 | ) 70 | E 71 | 72 | # Let's define a record with an obvious error: 73 | r <- c( 74 | age = 'under aged', 75 | maritalStatus='married', 76 | positionInHousehold='child') 77 | # The age class and position in household are consistent, while the marital 78 | # status conflicts. Therefore, changing only the marital status (in stead of 79 | # both age class and postition in household) seems reasonable. 80 | el <- errorLocalizer(E,r) 81 | el$searchNext() 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /pkg/R/parseNum.R: -------------------------------------------------------------------------------- 1 | NUMCMP <- c("==","<","<=",">",">=") 2 | NUMOPS <- c("+","-","*","<","<=",">=",">") 3 | 4 | #' Parse a numerical edit expression 5 | #' 6 | #' Parse a numerical edit expression into a named \code{numeric}. 7 | #' The \code{names} are the names of the variables 8 | #' @param e a valid R expression 9 | #' @keywords internal 10 | parseNum <- function(e){ 11 | if (!isNum(e)){ 12 | stop(paste("Invalid edit rule:", e)) 13 | } 14 | wgt <- retrieveCoef(e) 15 | # simplify the coefficients by summing them 16 | tapply(wgt, names(wgt), sum) 17 | } 18 | 19 | 20 | hasNum <- function(e){ 21 | if (length(e) == 1){ 22 | return(is.numeric(e)) 23 | } 24 | op <- deparse(e[[1]]) 25 | if (length(e) == 2){ 26 | return (op %in% NUMOPS || hasNum(e[[2]])) 27 | } 28 | if (length(e) == 3){ 29 | if (op == "%in%") return(FALSE) 30 | return(op %in% NUMOPS || hasNum(e[[2]]) || hasNum(e[[3]])) 31 | } 32 | } 33 | 34 | # basic test for numerical edit 35 | isNum <- function(e){ 36 | if (length(e) != 3) 37 | return(FALSE) 38 | cmp <- deparse(e[[1]]) 39 | if (cmp == "==") { 40 | return(!is.character(e[[3]]) && !is.logical(e[[3]])) 41 | } 42 | return(cmp %in% NUMCMP) 43 | } 44 | 45 | retrieveCoef <- function(e, co=1){ 46 | #stopifnot(is.language(e)) 47 | if (length(e) == 1){ 48 | if (is.numeric(e)){ 49 | l <- co*e #the resulting matrix is augmented so b has a - 50 | names(l) <- getOption("editrules.CONSTANT", "CONSTANT") 51 | } 52 | else { 53 | l <- co 54 | names(l) <- as.character(e) 55 | } 56 | return(l) 57 | } 58 | if (length(e) == 2){ 59 | op <- deparse(e[[1]]) 60 | rhs <- e[[2]] 61 | if (op == "("){ 62 | return(retrieveCoef(rhs, co)) 63 | } else if (op == "-"){ 64 | return(retrieveCoef(rhs, -1*co)) 65 | } 66 | else { 67 | stop("Operator ", op, " not implemented", "Invalid expression:", e) 68 | } 69 | } 70 | if (length(e) == 3){ 71 | op <- deparse(e[[1]]) 72 | lhs <- e[[2]] 73 | rhs <- e[[3]] 74 | lsign <- rsign <- co 75 | if ( op %in% c(NUMCMP, "-")){ 76 | rsign <- -1 * co 77 | } 78 | else if (op == "+"){ 79 | } 80 | else if (op == "*"){ 81 | if (length(lhs) == 2 || is.numeric(lhs)){ 82 | co <- retrieveCoef(lhs, co) 83 | return(retrieveCoef(rhs, co)) 84 | } else if (length(rhs) == 2 || is.numeric(rhs)){ 85 | co <- retrieveCoef(rhs, co) 86 | return(retrieveCoef(lhs, co)) 87 | } else{ 88 | stop("Expression '", deparse(e), "' contains nonconstant coefficient") 89 | } 90 | } 91 | else { 92 | stop("Operator ", op, " not implemented", "Invalid expression:", e) 93 | } 94 | return(c( retrieveCoef(lhs, lsign) 95 | , retrieveCoef(rhs, rsign) 96 | ) 97 | ) 98 | } 99 | stop("Invalid expression:", e) 100 | } 101 | -------------------------------------------------------------------------------- /pkg/R/isObviouslyInfeasible.R: -------------------------------------------------------------------------------- 1 | 2 | #' Check for obvious contradictions in a set of edits 3 | #' 4 | #' Obvious contradictions are edits of the form \eqn{1 < 0}, or categorical 5 | #' edits defining that a record fails for any value combination If 6 | #' this function evaluates to \code{TRUE}, the set of edits is guaranteed 7 | #' infeasible. If it evaluates to \code{FALSE} this does not garuantee feasibility. 8 | #' See \code{\link{isFeasible}} for a complete test. 9 | #' 10 | #' @param E An \code{\link{editset}}, \code{\link{editmatrix}}, \code{\link{editarray}}, \code{\link[=disjunct]{editlist}} or \code{\link[=disjunct]{editenv}} 11 | #' @param ... Arguments to be passed to or from other methods. 12 | #' @return A \code{logical} for objects of class \code{\link{editset}}, \code{\link{editarray}} or \code{\link{editmatrix}}. 13 | #' A \code{logical} vector in the case of an \code{\link[=disjunct]{editlist}} or \code{\link[=disjunct]{editset}}. 14 | #' 15 | #' @export 16 | #' @seealso \code{\link{isObviouslyRedundant}}, \code{\link{isFeasible}} 17 | isObviouslyInfeasible <- function(E,...){ 18 | UseMethod("isObviouslyInfeasible") 19 | } 20 | 21 | 22 | #' 23 | #' @method isObviouslyInfeasible editmatrix 24 | #' @param tol Tolerance for checking against zero. 25 | #' @seealso \code{\link{eliminate}} \code{\link{editmatrix}} 26 | #' @rdname isObviouslyInfeasible 27 | #' @export 28 | #' 29 | isObviouslyInfeasible.editmatrix <- function(E, tol=sqrt(.Machine$double.eps), ...){ 30 | if ( !isNormalized(E) ) E <- normalize(E) 31 | A <- getAb(E) 32 | operators <- getOps(E) 33 | ib <- ncol(A) 34 | zeroCoef <- rowSums(abs(A[,-ib,drop=FALSE])) <= tol 35 | b <- round(A[,ib],ceiling(-log10(tol))) 36 | if ( any(zeroCoef & operators == "<" & b <= 0) || 37 | any(zeroCoef & operators == "<=" & b < 0) || 38 | any(zeroCoef & operators == c("==") & abs(b) > tol)) return(TRUE) 39 | return(FALSE) 40 | } 41 | 42 | #' 43 | #' @method isObviouslyInfeasible editarray 44 | #' @rdname isObviouslyInfeasible 45 | #' @export 46 | #' 47 | isObviouslyInfeasible.editarray <- function(E,...){ 48 | any(rowSums(E)==ncol(E)) 49 | } 50 | 51 | 52 | #' @export 53 | isObviouslyInfeasible.NULL <- function(E,...){ 54 | FALSE 55 | } 56 | 57 | 58 | #' 59 | #' @method isObviouslyInfeasible editset 60 | #' @rdname isObviouslyInfeasible 61 | #' @export 62 | #' 63 | isObviouslyInfeasible.editset <- function(E,...){ 64 | isObviouslyInfeasible(E$num) || isObviouslyInfeasible(E$mixcat) 65 | } 66 | 67 | #' 68 | #' @method isObviouslyInfeasible editlist 69 | #' @rdname isObviouslyInfeasible 70 | #' @export 71 | #' 72 | isObviouslyInfeasible.editlist <- function(E,...){ 73 | vapply(E,isObviouslyInfeasible, FUN.VALUE=FALSE) 74 | } 75 | 76 | #' 77 | #' 78 | #' 79 | #' @method isObviouslyInfeasible editenv 80 | #' @rdname isObviouslyInfeasible 81 | #' @export 82 | #' 83 | #' 84 | isObviouslyInfeasible.editenv <- function(E,...){ 85 | # note: environments are coerced to lists by lapply 86 | vapply(E,isObviouslyInfeasible, FUN.VALUE=FALSE) 87 | } 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /examples/eliminate.R: -------------------------------------------------------------------------------- 1 | 2 | # The following is an example by Williams (1986). Eliminating all variables 3 | # except z maximizes -4x1 + 5x2 +3x3: 4 | P <- editmatrix(c( 5 | "4*x1 - 5*x2 - 3*x3 + z <= 0", 6 | "-x1 + x2 -x3 <= 2", 7 | "x1 + x2 + 2*x3 <= 3", 8 | "-x1 <= 0", 9 | "-x2 <= 0", 10 | "-x3 <= 0")) 11 | # eliminate 1st variable 12 | (P1 <- eliminate(P, "x1", fancynames=TRUE)) 13 | # eliminate 2nd variable. Note that redundant rows have been eliminated 14 | (P2 <- eliminate(P1, "x2", fancynames=TRUE)) 15 | # finally, the answer: 16 | (P3 <- eliminate(P2, "x3", fancynames=TRUE)) 17 | 18 | # check which original edits were used in deriving the new ones 19 | getH(P3) 20 | 21 | # check how many variables were eliminated 22 | geth(P3) 23 | 24 | 25 | # An example with an equality and two inequalities 26 | # The only thing to do is solving for x in e1 and substitute in e3. 27 | (E <- editmatrix(c( 28 | "2*x + y == 1", 29 | "y > 0", 30 | "x > 0"),normalize=TRUE)) 31 | eliminate(E,"x", fancynames=TRUE) 32 | 33 | 34 | # This example has two equalities, and it's solution 35 | # is the origin (x,y)=(0,0) 36 | (E <- editmatrix(c( 37 | "y <= 1 - x", 38 | "y >= -1 + x", 39 | "x == y", 40 | "y ==-2*x" ),normalize=TRUE)) 41 | eliminate(E,"x", fancynames=TRUE) 42 | 43 | # this example has no solution, the equalities demand (x,y) = (0,2) 44 | # while the inequalities demand y <= 1 45 | (E <- editmatrix(c( 46 | "y <= 1 - x", 47 | "y >= -1 + x", 48 | "y == 2 - x", 49 | "y == -2 + x" ),normalize=TRUE)) 50 | # this happens to result in an obviously unfeasable system: 51 | isObviouslyInfeasible(eliminate(E,"x")) 52 | 53 | 54 | # for categorical data, elimination amounts to logical derivartions. For 55 | # example 56 | E <- editarray(expression( 57 | age %in% c('under aged','adult'), 58 | positionInHousehold %in% c('marriage partner', 'child', 'other'), 59 | maritalStatus %in% c('unmarried','married','widowed','divorced'), 60 | if (maritalStatus %in% c('married','widowed','divorced') ) 61 | positionInHousehold != 'child', 62 | if (maritalStatus == 'unmarried') 63 | positionInHousehold != 'marriage partner' , 64 | if ( age == 'under aged') maritalStatus == 'unmarried' 65 | ) 66 | ) 67 | E 68 | 69 | # by eliminating 'maritalStatus' we can deduce that under aged persones cannot 70 | # be partner in marriage. 71 | eliminate(E,"maritalStatus") 72 | 73 | E <- editarray(expression( 74 | age %in% c('under aged','adult'), 75 | positionInHousehold %in% c('marriage partner', 'child', 'other'), 76 | maritalStatus %in% c('unmarried','married','widowed','divorced'), 77 | if (maritalStatus %in% c('married','widowed','divorced') ) 78 | positionInHousehold != 'child', 79 | if (maritalStatus == 'unmarried') 80 | positionInHousehold != 'marriage partner' , 81 | if ( age == 'under aged') 82 | maritalStatus == 'unmarried' 83 | ) 84 | ) 85 | E 86 | 87 | # by eliminating 'maritalStatus' we can deduce that under aged persones cannot 88 | # be partner in marriage. 89 | eliminate(E,"maritalStatus") 90 | 91 | 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /pkg/R/getVars.R: -------------------------------------------------------------------------------- 1 | 2 | #' get names of variables in a set of edits 3 | #' 4 | #' @param E \code{\link{editset}}, \code{\link{editmatrix}}, or \code{\link{editarray}} 5 | #' @param ... Arguments to be passed to or from other methods 6 | #' @seealso \code{\link{getA}}, \code{\link{getb}}, \code{\link{getAb}}, \code{\link{getOps}} 7 | #' @example ../examples/getVars.R 8 | #' @return \code{character} vector with the names of the variables. 9 | #' @export 10 | getVars <- function(E,...){ 11 | UseMethod("getVars") 12 | } 13 | 14 | 15 | #' Returns the variable names of an (in)equality \code{editmatrix} E 16 | #' 17 | #' @export 18 | #' @method getVars editmatrix 19 | #' @keywords internal 20 | getVars.editmatrix <- function(E,...){ 21 | colnames(E)[-ncol(E)] 22 | } 23 | 24 | #' Returns the variable names of an (in)equality \code{editmatrix} E 25 | #' 26 | #' @param type should unique variable names, colnames, all variable names or category names be extracted? 27 | #' @export 28 | #' @method getVars cateditmatrix 29 | #' @keywords internal 30 | getVars.cateditmatrix <- function(E, type=c("uniquevar", "colnames","var", "cat"), ...){ 31 | nms <- colnames(E)[-ncol(E)] 32 | var <- sub(":.+", "", nms) 33 | cat <- sub(".+:", "", nms) 34 | cat[var==cat] <- "TRUE" 35 | switch( match.arg(type) 36 | , colnames = nms 37 | , var = var 38 | , cat = cat 39 | , unique(var) 40 | ) 41 | } 42 | 43 | #' get variable names in editarray 44 | #' 45 | #' @export 46 | #' @method getVars editarray 47 | #' @keywords internal 48 | getVars.editarray <- function(E,type='cat',...){ 49 | if (!type=='cat') return(NULL) 50 | names(attr(E,"ind")) 51 | } 52 | 53 | #' getr variable names 54 | #' 55 | #' @method getVars editset 56 | #' @param type (editset- or list only) select which variables to return. \code{all} means all (except dummies), \code{num} means 57 | #' all numericals, \code{cat} means all categoricals, \code{mix} means those numericals appearing in a logical 58 | #' constraint and \code{dummy} means dummy variables connecting the logical with numerical constraints. 59 | #' @export 60 | #' @rdname getVars 61 | getVars.editset <- function(E, type=c('all','num','cat','mix','dummy'), ...){ 62 | type <- match.arg(type) 63 | numvars <- c() 64 | catvars <- c() 65 | 66 | if (type %in% c('all','num')){ 67 | numvars <- unique(c(getVars(E$num), getVars(E$mixnum))) 68 | } 69 | if ( type == 'mix' ) numvars <- getVars(E$mixnum) 70 | if ( type %in% c('all','cat')){ 71 | catvars <- getVars(E$mixcat) 72 | catvars <- catvars[!catvars %in% rownames(E$mixnum)] 73 | } 74 | if ( type == 'dummy'){ 75 | catvars <- rownames(E$mixnum) 76 | } 77 | c(numvars, catvars) 78 | } 79 | 80 | #' get variable names 81 | #' @method getVars NULL 82 | #' @export 83 | #' @rdname getVars 84 | getVars.NULL <- function(E,...){ 85 | NULL 86 | } 87 | 88 | #' get variable names 89 | #' @export 90 | #' @method getVars editlist 91 | #' @keywords internal 92 | getVars.editlist <- function(E,...){ 93 | # under normal circumstances, each part of an editlist has the same variables 94 | if ( length(E) == 0 ) return(NULL) 95 | getVars.editset(E[[1]], ...) 96 | } 97 | 98 | 99 | -------------------------------------------------------------------------------- /pkg/R/parseCat.R: -------------------------------------------------------------------------------- 1 | CATCMP <- c("==", "!=", "%in%") 2 | 3 | #' Parse a categorical edit expression 4 | #' 5 | #' @param x a valid R expression 6 | #' @param val logical (scalar) 7 | #' @param edit logical (vector) 8 | #' @param sep edit separator 9 | #' @param useLogical (logical), should logicals be treated as a factor or as a logical? 10 | #' @keywords internal 11 | parseCat <- function(x, val=NA, edit=logical(0), sep=":", useLogical=FALSE, env=parent.frame()){ 12 | if ( length(x) == 1 ) { 13 | # corner case: the always FALSE edit (array must be TRUE at every category) 14 | if ( is.na(val) && !x[[1]] ) return(NULL) 15 | if (is.logical(x)){ 16 | if (val == x) { return(edit) 17 | }else { 18 | # if this happens the statement is always true, so delete it... 19 | return (logical()) 20 | } 21 | } 22 | var <- if (useLogical) as.character(x) 23 | else paste(x,"TRUE",sep=sep) 24 | edit[var] <- val 25 | return(edit) 26 | } 27 | op <- as.character(x[[1]]) 28 | if ( op == "if" ){ 29 | edit <- parseCat(x[[2]],TRUE, edit, sep, useLogical, env=env) 30 | edit <- parseCat(x[[3]],FALSE, edit, sep, useLogical, env=env) 31 | } else if ( op %in% c("(","{") ){ 32 | edit <- parseCat(x[[2]], val, edit, sep, useLogical, env=env) 33 | } else if ( op %in% c("%in%","==") ){ 34 | cat <- eval(x[[3]], envir=env) 35 | if ( is.na(val) && op == "==" ) val <- FALSE 36 | if (is.logical(cat) && useLogical){ 37 | if (length(cat) > 1){ 38 | val <- NA 39 | } else { 40 | if (!cat) val <- !val 41 | } 42 | var <- as.character(x[[2]]) 43 | } else { 44 | var <- paste(x[[2]],cat,sep=sep) 45 | } 46 | edit[var] <- val 47 | } else if (op == "!=") { 48 | cat <- eval(x[[3]], envir=env) 49 | if (is.logical(cat) && useLogical){ 50 | var <- as.character(x[[2]]) 51 | if (!cat) val <- !val 52 | } else{ 53 | var <- paste(x[[2]],cat,sep=sep) 54 | } 55 | edit[var] <- !val 56 | } else if (op == "!") { 57 | if (is.na(val)){ 58 | val <- FALSE 59 | } 60 | edit <- parseCat(x[[2]],!val, edit, sep, useLogical, env=env) 61 | } else if (op %in% c("&", "&&")){ 62 | if (!isTRUE(val)){ 63 | stop( "Operator '",op,"' is not allowed in 'then' clause.\n Edit '" 64 | , deparse(x) 65 | ,"' can be split into multiple edits") 66 | } 67 | edit <- parseCat(x[[2]],TRUE, edit, sep, useLogical, env=env) 68 | edit <- parseCat(x[[3]],TRUE, edit, sep, useLogical, env=env) 69 | } else if (op %in% c("||","|")){ 70 | if (isTRUE(val)){ 71 | stop( "Operator '",op,"' is not allowed in 'if' clause.\n Edit '" 72 | , deparse(x) 73 | ,"' can be split into multiple edits") 74 | } 75 | edit <- parseCat(x[[2]],FALSE, edit, sep, useLogical, env=env) 76 | edit <- parseCat(x[[3]],FALSE, edit, sep, useLogical, env=env) 77 | } else { 78 | stop("Operator '",op,"' not implemented") 79 | } 80 | edit 81 | } 82 | 83 | isCat <- function(e){ 84 | if (length(e)==1) { 85 | return(is.symbol(e)) 86 | } 87 | 88 | cmp <- deparse(e[[1]]) 89 | return( cmp %in% c(CATCMP,"!")) 90 | } 91 | 92 | -------------------------------------------------------------------------------- /pkg/inst/script/bench/benchmip_mixed.R: -------------------------------------------------------------------------------- 1 | library(editrules) 2 | FILE ="benchmip_mixed.txt" 3 | 4 | generate_E <- function(nvar=10){ 5 | if (nvar < 1) stop("nvar needs to be bigger than 3") 6 | s <- seq_len(nvar) 7 | n_num <- ceiling(nvar/2) 8 | var_num <- head(s, n_num) 9 | var_cat <- tail(s, -n_num) 10 | n_cat <- length(var_cat) 11 | 12 | var_num <- paste0("x", var_num) 13 | if (n_cat) var_cat <- paste0("v", var_cat-n_num) else character() 14 | 15 | if (length(var_num) > 1){ 16 | nsum <- paste(tail(var_num, -1), collapse="+") 17 | edits <- paste0("x1 == ", nsum) 18 | edits <- c(edits, paste0(head(var_num, -1)," >= ", tail(var_num, -1))) 19 | } else { 20 | edits <- "x1 == 0" 21 | } 22 | 23 | if (n_cat){ 24 | edits <- c( paste0(tail(var_num, 1), ">= 0") 25 | , edits 26 | , paste0(var_cat, " %in% c(TRUE,FALSE)") 27 | , paste0("if (!", var_cat, ") ", head(var_num,n_cat),"< 0") 28 | ) 29 | } 30 | editset(edits) 31 | } 32 | 33 | error <- function(x){ 34 | lapply(x, function(v){ 35 | if (is.logical(v)) FALSE else -1 36 | }) 37 | } 38 | 39 | generate_data <- function(E, nerrors=0){ 40 | num_vars <- getVars(E,"num") 41 | 42 | x <- sapply(getVars(E), function(v){ 43 | if (v %in% num_vars) 0 else TRUE 44 | }, simplify=FALSE) 45 | 46 | n <- length(x) 47 | 48 | x1 <- x 49 | idx <- seq_len(nerrors) 50 | x1[idx] <- error(x1[idx]) 51 | 52 | x2 <- x 53 | idx <- 1+n - seq_len(nerrors) 54 | x2[idx] <- error(x2[idx]) 55 | 56 | x3 <- x 57 | idx <- (round(n-nerrors)/2) + seq_len(nerrors) 58 | x3[idx] <- error(x3[idx]) 59 | 60 | rbind(as.data.frame(x1),x2,x3) 61 | } 62 | 63 | 64 | bench <- function(nvars = 10, nerrors=10, method="bb", maxduration=200){ 65 | 66 | init <- !file.exists(FILE) 67 | txt <- file(FILE, "at") 68 | on.exit(close(txt)) 69 | 70 | errorloc <- c("begin", "end", "middle") 71 | 72 | if (nerrors >= nvars) stop("nvars cannot be less than nerrors") 73 | for (nvar in seq_len(nvars)){ 74 | E <- generate_E(nvar) 75 | max_dur <- logical(nrow(generate_data(E, 1))) 76 | for (ne in seq(1, min(nerrors, nvar))){ 77 | try({ 78 | if (all(max_dur)) break 79 | data <- generate_data(E, ne)[!max_dur,,drop=FALSE] 80 | # only select middle 81 | data <- data[3,,drop=FALSE] 82 | errorloc_m <- errorloc[3] 83 | 84 | cat("\r nvar=", nvar, " ne=", ne, " method=", method) 85 | le <- localizeErrors(E, data, method=method, maxduration=maxduration) 86 | max_dur[!max_dur] <- le$status$maxDurationExceeded 87 | rpt <- cbind(method=method, nvar=nvar, nerrors=ne, errorloc=errorloc_m, le$status) 88 | 89 | write.table(rpt, file=txt, col.names=init, row.names=FALSE) 90 | 91 | init <- FALSE 92 | flush(txt) 93 | }) 94 | gc() 95 | } 96 | } 97 | } 98 | 99 | if (file.exists(FILE)) file.remove(FILE) 100 | 101 | bench(50,10, method="mip") 102 | bench(50,10, method="bb") 103 | 104 | dat <- read.table(FILE, header=TRUE) 105 | library(ggplot2) 106 | qplot(data=dat, y=elapsed, x=nvar, color=method, facets=nerrors~method, shape=errorloc) + geom_jitter() 107 | ggsave("benchmip_categorical.png") 108 | 109 | 110 | ### quick testing 111 | # E <- generate_E(2) 112 | # generate_data(E, nerrors=1) 113 | -------------------------------------------------------------------------------- /pkg/R/editmatrixAttr.R: -------------------------------------------------------------------------------- 1 | 2 | #' Returns the constant part \code{b} of a linear (in)equality 3 | #' 4 | #' @example ../examples/editmatrixAttr.R 5 | #' @export getb 6 | #' @seealso \code{\link{editmatrix}} 7 | #' 8 | #' @param E \code{\link{editmatrix}} 9 | #' @return \code{numeric} vector \code{b} 10 | getb <- function(E){ 11 | if (!is.editmatrix(E)){ 12 | stop("E has to be an editmatrix.") 13 | } 14 | E <- unclass(E) 15 | E[,ncol(E)] 16 | } 17 | 18 | 19 | 20 | #' Returns the coefficient matrix \code{A} of linear (in)equalities 21 | #' 22 | #' @example ../examples/editmatrixAttr.R 23 | #' @export getA 24 | #' @seealso \code{\link{editmatrix}} 25 | #' @aliases getA 26 | #' 27 | #' @param E \code{\link{editmatrix}} 28 | #' @return \code{numeric} matrix \code{A} 29 | getA <- function(E){ 30 | if ( is.editmatrix(E) ){ 31 | unclass(E)[,-ncol(E),drop=FALSE] 32 | } else { 33 | stop("E has to be an editmatrix") 34 | } 35 | } 36 | 37 | 38 | #' Returns augmented matrix representation of edit set. 39 | #' 40 | #' For a system of linear (in)equations of the form \eqn{Ax \odot b}, \eqn{\odot\in\{<,\leq,=\}}, 41 | #' the matrix \eqn{A|b} is called the augmented matrix. 42 | #' 43 | #' @example ../examples/editmatrixAttr.R 44 | #' @seealso \code{\link{editmatrix}} \code{\link{as.matrix.editmatrix}} 45 | #' 46 | #' @param E \code{\link{editmatrix}} 47 | #' @return \code{numeric} matrix \code{A|b} 48 | #' @export 49 | getAb <- function(E){ 50 | if (!is.editmatrix(E)) stop("E has to be an editmatrix.") 51 | unclass(E)[,,drop=FALSE] 52 | } 53 | 54 | #' Returns the operator part of a linear (in)equality \code{editmatrix} E 55 | #' 56 | #' @export 57 | #' @seealso \code{\link{editmatrix}} 58 | #' 59 | #' @example ../examples/editmatrixAttr.R 60 | #' 61 | #' @param E \code{\link{editmatrix}} 62 | #' 63 | #' @return \code{character} vector with the (in)equality operators. 64 | getOps <- function(E){ 65 | if (!is.editmatrix(E)){ 66 | stop("E has to be an editmatrix.") 67 | } 68 | attr(E, "ops") 69 | } 70 | 71 | 72 | #' Check if an editmatrix is normalized 73 | #' 74 | #' @export 75 | #' @seealso \code{\link{editmatrix}} 76 | #' 77 | #' @param E \code{\link{editmatrix}} 78 | #' 79 | #' @return TRUE when all comparison operators of \code{E} are in \{\code{<,<=,==}\} 80 | isNormalized <- function(E){ 81 | if (!is.editmatrix(E)){ 82 | stop("Argument not of class editmatrix") 83 | } 84 | 85 | attr(E, "normalized") == TRUE || 86 | all(getOps(E) %in% c("==","<","<=")) 87 | } 88 | 89 | #' Normalizes an editmatrix 90 | #' 91 | #' An set of linear edits of the form \eqn{{\bf a}\cdot{\bf x}\odot b} with 92 | #' is called normalized when all \eqn{\odot\in\{==,\leq,<\}} 93 | #' 94 | #' @export 95 | #' @seealso \code{\link{editmatrix}} 96 | #' 97 | #' @example ../examples/editmatrixAttr.R 98 | #' 99 | #' @param E \code{\link{editmatrix}} 100 | #' 101 | #' @return If E was normalized, the original editmatrix is returned, otherwise 102 | #' a new normalized editmatrix will be returned 103 | normalize <- function(E){ 104 | if (!is.editmatrix(E)) stop("Argument not of class editmatrix") 105 | if (isNormalized(E)){ 106 | return(E) 107 | } 108 | 109 | A <- unclass(E) 110 | ops <- getOps(E) 111 | 112 | geq <- ops == ">=" 113 | gt <- ops == ">" 114 | A[geq | gt,] <- -A[geq | gt,] 115 | ops[geq] <- "<=" 116 | ops[gt] <- "<" 117 | 118 | neweditmatrix(A, ops, normalized=TRUE) 119 | } 120 | -------------------------------------------------------------------------------- /pkg/R/subsetting.R: -------------------------------------------------------------------------------- 1 | 2 | #' Row index operator for \code{editmatrix} 3 | #' 4 | #' Use this operator to select edits from an editmatrix or editarray object. 5 | #' 6 | #' @method [ editmatrix 7 | #' @param x an object of class \code{\link{editmatrix}} or \code{\link{editarray}} 8 | #' @param i the row index in the edit matrix (numeric, logical or rowname) 9 | #' @param j the column index in the edit matrix 10 | #' @param ... arguments to be passed to other methods. Currently ignored. 11 | #' @rdname subsetting 12 | #' @export 13 | `[.editmatrix` <- function(x, i, j, ...){ 14 | if (!missing(i) && is.character(i) ) i <- match(i,rownames(x),nomatch=0) 15 | E <- neweditmatrix( 16 | A = as.matrix(x)[i, j, drop=FALSE], 17 | ops = getOps(x)[i] 18 | ) 19 | attr(E,"H") <- attr(x,"H")[i, , drop=FALSE] 20 | attr(E,"h") <- attr(x,"h") 21 | E 22 | } 23 | 24 | 25 | #' Row index operator for \code{cateditmatrix} 26 | #' 27 | #' @method [ editarray 28 | #' @rdname subsetting 29 | #' @keywords internal 30 | #' 31 | `[.cateditmatrix` <- function(x, i, j, ...){ 32 | if (!missing(i) && is.character(i) ) i <- match(i,rownames(x),nomatch=0) 33 | E <- as.editmatrix( 34 | getA(x)[i,,drop=FALSE], 35 | getb(x)[i], 36 | getOps(x)[i], 37 | binvars=attr(x,'binvars') 38 | ) 39 | class(E) <- c("cateditmatrix","editmatrix") 40 | E 41 | } 42 | 43 | #' Row index operator for \code{editarray} 44 | #' 45 | #' @method [ editarray 46 | #' @rdname subsetting 47 | #' @export 48 | #' 49 | `[.editarray` <- function(x, i, j, ...){ 50 | A <- getArr(x)[i,j,drop=FALSE] 51 | sep <- getSep(x) 52 | ind <- indFromArray(A,sep) 53 | H <- getH(x) 54 | if (!is.null(H)) H <- H[i,j,drop=FALSE] 55 | neweditarray(E=A, ind=ind, sep=sep, names=getnames(x)[i],levels=getlevels(x)[j],H=H) 56 | } 57 | 58 | #' Row index operator for \code{editset} 59 | #' Note: the datamodel is not changed 60 | #' @method [ editset 61 | #' @rdname subsetting 62 | #' @export 63 | `[.editset` <- function(x,i,j, ...){ 64 | if (is.numeric(i) && i[1] < 0){ 65 | N <- nrow(x$num) + nrow(x$mixcat) 66 | # make a negative selection positive 67 | i <- seq_len(N)[i] 68 | print(list(i=i, N=N)) 69 | } 70 | if ( is.logical(i) ) i <- which(i) 71 | nnum <- nrow(x$num) 72 | mixcat <- x$mixcat[i[i>nnum]-nnum] 73 | # remove edits from mixnum not occuring in mixcat 74 | v <- getVars(reduce(mixcat)) 75 | mixnum <- x$mixnum[rownames(x$mixnum) %in% v,] 76 | # remove dummy variables from mixcat not referring to numerical edits anymore 77 | v <- getVars(x,type='dummy') 78 | delvars <- v[ !v %in% rownames(mixnum)] 79 | if ( length(delvars) > 0 ){ 80 | ind <- getInd(mixcat) 81 | delcols <- do.call('c',ind[delvars]) 82 | Amixcat <- getArr(mixcat)[,-delcols,drop=FALSE] 83 | sep <- getSep(mixcat) 84 | ind <- indFromArray(Amixcat,sep=sep) 85 | mixcat <- neweditarray(Amixcat,ind,sep) 86 | } 87 | neweditset( 88 | num = x$num[i[i<=nnum]], 89 | mixnum = mixnum, 90 | mixcat = mixcat, 91 | condition = attr(x,"condition") 92 | ) 93 | } 94 | 95 | 96 | #' Index operator for \code{editlist} 97 | #' @method [ editlist 98 | #' @rdname subsetting 99 | #' @export 100 | `[.editlist` <- function(x,i,j, ...){ 101 | x <- unclass(x)[i] 102 | class(x) <- 'editlist' 103 | x 104 | } 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /pkg/R/adjacency.R: -------------------------------------------------------------------------------- 1 | 2 | #' Derive adjecency matrix from collection of edits 3 | #' 4 | #' A set of edits can be represented as a graph where every vertex is 5 | #' an edit. Two vertices are connected if they have at least one variable 6 | #' in \code{vars} in common. 7 | #' 8 | #' \code{adjacency} returns the adjacency matrix. The elements of the matrix 9 | #' count the number of variables shared by the edits indicated in the row- and 10 | #' column names. The adjacency matrix can be converted to an igraph object with 11 | #' \code{graph.adjacency}from the \code{igraph} package. 12 | #' 13 | #' \code{as.igraph} converts a set of edits to an \code{igraph} object directly. 14 | #' 15 | #' 16 | #' @param E \code{\link{editmatrix}}, \code{\link{editarray}} or \code{\link{editset}} 17 | #' @param nodetype adjacency between rules, vars or both? 18 | #' @param rules selection of edits 19 | #' @param vars selection of variables 20 | #' @param ... arguments to be passed to or from other methods 21 | #' 22 | #' @return the adjacency matrix of edits in \code{E} with resect to 23 | #' the variables in \code{vars} 24 | #' 25 | #' @example ../examples/graph.R 26 | #' 27 | #' @seealso \code{\link{plot.editmatrix}}, \code{\link{plot.editarray}}, \code{\link{plot.editset}} 28 | #' @export 29 | adjacency <- function(E, nodetype=c("all", "rules","vars"), rules=rownames(E), vars=getVars(E),...){ 30 | stopifnot( all(vars %in% getVars(E)) ) 31 | UseMethod('adjacency') 32 | } 33 | 34 | #' @rdname adjacency 35 | #' @method adjacency editmatrix 36 | #' @export 37 | adjacency.editmatrix <- function(E, nodetype=c("all", "rules","vars"), rules=rownames(E), vars=getVars(E),...){ 38 | A <- abs(sign(getA(E))) 39 | nodetype <- match.arg(nodetype) 40 | adjec(A,nodetype=nodetype, rules=rules, vars=vars) 41 | } 42 | 43 | 44 | #' @rdname adjacency 45 | #' @method adjacency editarray 46 | #' @export 47 | adjacency.editarray <- function(E, nodetype=c("all", "rules","vars"), rules=rownames(E), vars=getVars(E),...){ 48 | A <- contains(E) 49 | nodetype <- match.arg(nodetype) 50 | adjec(A,nodetype=nodetype, rules=rules, vars=vars) 51 | } 52 | 53 | 54 | #' @rdname adjacency 55 | #' @method adjacency editset 56 | #' @export 57 | adjacency.editset <- function(E, nodetype=c("all", "rules","vars"), rules=c(rownames(E$num),rownames(E$mixcat)), vars=getVars(E),...){ 58 | A <- contains(E) 59 | nodetype <- match.arg(nodetype) 60 | adjec(A,nodetype=nodetype, rules=rules, vars=vars) 61 | } 62 | 63 | 64 | 65 | 66 | 67 | # derive adjacency from 1/0 or boolean matrix. 68 | # Internal loops only, but nrow(A)^2 memory complexity. 69 | # future optimization options: sparse matrices, lower/upper triangle only. 70 | adjec <- function(A, nodetype="all", rules=rownames(A), vars=colnames(A)){ 71 | A <- A[rules, vars, drop=FALSE] 72 | m <- NULL 73 | vars <- NULL 74 | if (nodetype=="all"){ 75 | N <- nrow(A) + ncol(A) 76 | nms <- c(rownames(A), colnames(A)) 77 | vars <- rep(c(FALSE, TRUE), times=c(nrow(A), ncol(A))) 78 | m <- matrix(0, nrow=N, ncol=N, dimnames=list(nms, nms)) 79 | m[!vars, vars] <- A 80 | m[vars, !vars] <- t(A) 81 | } else{ 82 | vars <- rep(FALSE, nrow(A)) 83 | if (nodetype=="vars"){ 84 | vars <- rep(TRUE, ncol(A)) 85 | A <- t(A) 86 | } 87 | I <- rep(1:nrow(A), times=nrow(A)) 88 | J <- rep(1:nrow(A), each=nrow(A)) 89 | m <- matrix( 90 | rowSums(A[I,,drop=FALSE] & A[J,,drop=FALSE]), 91 | nrow=nrow(A), 92 | dimnames=list(rownames(A), rownames=rownames(A)) 93 | ) 94 | diag(m) <- 0 95 | } 96 | attr(m,"vars") <- vars 97 | m 98 | } 99 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testErrorLocalizer.R: -------------------------------------------------------------------------------- 1 | require(testthat) 2 | context("Error Localization: numerical data") 3 | 4 | test_that("errorLocalizer for numerical data",{ 5 | bt <- errorLocalizer( 6 | E = editmatrix("x + y == z"), 7 | x = c(x=1,y=1,z=2)) 8 | expect_true( all(bt$searchNext()$adapt==c(FALSE,FALSE,FALSE)) ) 9 | expect_true( is.null(bt$searchNext())) 10 | expect_true(1 == errorLocalizer(editmatrix("x+y==z"),c(x=1,y=1,z=3))$searchNext()$w) 11 | expect_true(1 == errorLocalizer(editmatrix("x+y==z"),c(x=1,y=1,z=3,u=5))$searchNext()$w) 12 | expect_true(is.null(errorLocalizer(editmatrix("x+y==z"),c(x=1,y=1,z=3))$searchNext(maxduration=-1)) ) 13 | expect_true(is.null(errorLocalizer(editmatrix("x+y==z"),c(x=1,y=1,z=3),maxadapt=0)$searchNext()) ) 14 | expect_true(is.null(errorLocalizer(editmatrix("x+y==z"),c(x=1,y=1,z=3),maxweight=0)$searchNext()) ) 15 | expect_that(errorLocalizer(editmatrix("x+y==z"),c(x=1,y=NA,z=3),weight=c(1,NA,1))$searchNext(),throws_error()) 16 | }) 17 | 18 | test_that("weight calculation when checkDatamodel is activated",{ 19 | expect_equal( 20 | localizeErrors( 21 | editmatrix(expression(x+y+z==w,x>0)), 22 | data.frame(x=-1,y=1,z=0,w=0) 23 | )$status$weight, 2 24 | ) 25 | expect_equal( 26 | localizeErrors( 27 | editmatrix(expression(x+y+z==w,x>0)), 28 | data.frame(x=-1,y=1,z=0,w=0), 29 | method='mip' 30 | )$status$weight, 2 31 | ) 32 | 33 | }) 34 | 35 | 36 | 37 | #d <- "../../../pkg/R" 38 | #for ( b in file.path(d,dir(d)) ) dmp <- source(b,echo=FALSE) 39 | 40 | context("Error localization: categorical data") 41 | test_that("errorLocalizer for categorical data",{ 42 | E <- editarray(c( 43 | "positionInHouseHold %in% c('marriage partner','child','other')", 44 | "age %in% c('under aged','adult')", 45 | "maritalStatus %in% c('unmarried','married','widowed')", 46 | "if (age == 'under aged') maritalStatus == 'unmarried'", 47 | "if (maritalStatus != 'unmarried' ) !positionInHouseHold %in% c('child','marriage partner')" 48 | )) 49 | r <- c(age='under aged',maritalStatus='married',positionInHouseHold='child') 50 | e <- errorLocalizer(E,r) 51 | expect_equivalent(e$searchBest()$adapt,c(FALSE,TRUE,FALSE)) 52 | expect_true(e$degeneracy==1) 53 | e$reset() 54 | expect_true(e$searchBest()$w==1) 55 | expect_true(is.null(e$searchNext())) 56 | expect_true(is.null(errorLocalizer(E,r,maxweight=0)$searchBest()) ) 57 | expect_true(is.null(errorLocalizer(E,r,maxadapt=0)$searchBest()) ) 58 | expect_true(is.null(errorLocalizer(E,r)$searchBest(maxduration=-1)) ) 59 | expect_that(errorLocalizer(E,r,weight=c(1,NA,1)),throws_error()) 60 | }) 61 | 62 | test_that("errorlocalizer.editarray runs when blocks reduces the datamodel",{ 63 | # Thanks to Bikram Maharjan for reporting a bug in version 2.0-1 64 | ageGrps=rep(1,5) 65 | genderIndex=rep(1,5) 66 | icdIndex=rep(103,5) 67 | diagnosisIndex=c('TOT', 'A00', 'A02', 'C53','C54') 68 | 69 | 70 | e.diag=editarray(c( 71 | "ageGrps %in% c(1,2,3,4)", 72 | "genderIndex %in% c(0,1,2)", 73 | "icdIndex %in% c(103)", 74 | "diagnosisIndex %in% c('TOT', 'A00', 'A02', 'C53','C54')", 75 | "if(genderIndex=='1' & icdIndex=='103') diagnosisIndex %in% c('C53','C54')" 76 | )) 77 | dat.diag=data.frame(ageGrps,genderIndex,icdIndex,diagnosisIndex) 78 | err=localizeErrors(e.diag,dat.diag) 79 | }) 80 | 81 | test_that("if-else edits are parsed to MIP correctly",{ 82 | e <- editset("if (x > 0 ) y> 0") 83 | d <- data.frame(x = 1,y=0) 84 | localizeErrors(e,d,method='mip') 85 | }) 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /pkg/R/checkDatamodel.R: -------------------------------------------------------------------------------- 1 | #' Check data against a datamodel 2 | #' 3 | #' Categorical variables in \code{dat} which also occur in \code{E} are checked against the datamodel for 4 | #' those variables. Numerical variables are checked against edits in \code{E} that contain only a single 5 | #' variable (e.g. \eqn{x > 0}). Values violating such edits as well as empty values are set to adapt. 6 | #' 7 | #' @param E an object of class \code{\link{editset}}, \code{\link{editarray}}, or \code{\link{editmatrix}} 8 | #' @param dat a \code{data.frame} 9 | #' @param weight vector of weigths for every variable of \code{dat} or an array of weight of the same dimensions as \code{dat}. 10 | #' @param ... arguments to be passed to or from other methods 11 | #' 12 | #' @return An object of class \code{\link{errorLocation}}. 13 | #' @seealso \code{\link{errorLocation}}, \code{\link{localizeErrors}}. 14 | #' @export 15 | checkDatamodel <- function(E, dat,weight=rep(1,ncol(dat)), ...){ 16 | UseMethod('checkDatamodel') 17 | } 18 | 19 | # 20 | # 21 | #' @method checkDatamodel editmatrix 22 | #' @export 23 | #' @keywords internal 24 | checkDatamodel.editmatrix <- function(E, dat, weight=rep(1,ncol(dat)), call=sys.call(), ...){ 25 | if (nrow(E)==0) return(emptyerrorlocation(dat ,method="checkDatamodel",call=call,... )) 26 | I <- rowSums( abs(getA(E)) > 1e-8 ) == 1 27 | localize_singleton(E[I,], dat, weight, method="checkDatamodel", call=call, ... ) 28 | } 29 | 30 | # Check categorical data against datamodel 31 | # 32 | # 33 | 34 | #' @method checkDatamodel editarray 35 | #' @keywords internal 36 | #' @export 37 | checkDatamodel.editarray <- function(E, dat, weight=rep(1,ncol(dat)), ...){ 38 | if (any(! (getVars(E) %in% names(dat)) ) ){ 39 | vr <- paste(getVars(E)[!getVars(E) %in% names(dat)],collapse=', ') 40 | stop(paste('Variables',vr,'defined in E not present in dat')) 41 | } 42 | 43 | m <- nrow(dat) 44 | if ( is.vector(weight) || (is.array(weight) && nrow(weight)==1) ){ 45 | weight <- t(array(rep(weight,m),dim=c(ncol(dat),m))) 46 | dimnames(weight) <- dimnames(dat) 47 | } 48 | I <- names(dat)[names(dat) %in% getVars(E)] 49 | adapt <- array(FALSE,dim=dim(dat),dimnames=dimnames(dat)) 50 | ind <- getInd(E) 51 | 52 | w = rep(0,m) 53 | for ( ii in I ){ 54 | J <- !(dat[,ii] %in% names(ind[[ii]])) 55 | adapt[,ii] <- J 56 | w[J] <- w[J] + weight[J,ii] 57 | } 58 | status <- emptyStatus(n=m) 59 | status$weight <- w 60 | newerrorlocation( 61 | adapt = adapt, 62 | status = status, 63 | method = 'checkDatamodel', 64 | ) 65 | } 66 | 67 | 68 | # 69 | # For an \code{\link{editset}}, the categorical variables are tested against 70 | # the categorical datamodel. 71 | # 72 | # 73 | 74 | #' @method checkDatamodel editset 75 | #' @export 76 | #' @keywords internal 77 | checkDatamodel.editset <- function(E, dat, weight=rep(1,length(getVars(E))), ...){ 78 | if ( is.null(names(weight)) ) names(weight) <- getVars(E) 79 | et <- editType(E) 80 | err <- NULL 81 | if ( ncol(E$mixcat) > 0 ){ 82 | dvar <- getVars(E,type="dummy") 83 | iv <- -(1:ncol(E$mixcat)) 84 | if ( length(dvar) > 0 ) iv <- unlist(getInd(E$mixcat)[dvar]) 85 | A <- getArr(E$mixcat)[,-iv,drop=FALSE] 86 | sep <- getSep(E$mixcat) 87 | F <- neweditarray( 88 | A, 89 | indFromArray(A,sep), 90 | sep 91 | ) 92 | err <- checkDatamodel.editarray(F,dat,weight) 93 | } 94 | G <- reduce(E$num) 95 | err %+% checkDatamodel.editmatrix(G,dat,weight) 96 | } 97 | 98 | 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testCheckDatamodel.R: -------------------------------------------------------------------------------- 1 | 2 | context('Check datamodel') 3 | 4 | test_that("checkDatamodel.editmatrix works",{ 5 | expect_true( 6 | checkDatamodel( 7 | editmatrix("x > 0"), 8 | data.frame(x=-1) 9 | )$adapt[1,1] 10 | ) 11 | # test with NA 12 | expect_true( 13 | checkDatamodel( 14 | editmatrix("x > 0"), 15 | data.frame(x=NA) 16 | )$adapt[1,1] 17 | ) 18 | # test with valid entry 19 | expect_false( 20 | checkDatamodel( 21 | editmatrix("x < 0"), 22 | data.frame(x=-1) 23 | )$adapt[1,1] 24 | ) 25 | # test with no single-variable edits 26 | expect_false( 27 | checkDatamodel( 28 | editmatrix("x + y == 1"), 29 | data.frame(x=-1,y=2) 30 | )$adapt[1,1] 31 | ) 32 | 33 | }) 34 | 35 | 36 | test_that('checkDatamodel.editarray works',{ 37 | # dat has column also in E 38 | expect_equivalent( 39 | checkDatamodel( 40 | E = editarray('x %in% 1:2'), 41 | dat = data.frame(x=1:3) 42 | )$adapt[,1], 43 | c(FALSE,FALSE,TRUE) 44 | ) 45 | # dat has column not specified by E 46 | expect_equivalent( 47 | checkDatamodel( 48 | E = editarray('x %in% 1:2'), 49 | dat = data.frame(y=1:3,x=1:3) 50 | )$adapt[,1], 51 | c(FALSE,FALSE,FALSE) 52 | ) 53 | # dat misses a variable, specified in E 54 | expect_error( 55 | checkDatamodel( 56 | E = editarray('x %in% 1:2'), 57 | dat = data.frame(y=1:3) 58 | ) 59 | ) 60 | # dat computes correct weights 61 | expect_equivalent( 62 | checkDatamodel( 63 | E = editarray(c('x %in% 1:2','y %in% c("a","b")')), 64 | dat = data.frame(x=1:4,y=c('a','c','b','c')) 65 | )$status$weight, 66 | c(0,1,1,2) 67 | ) 68 | }) 69 | 70 | 71 | test_that("checkDatamodel.editset works with pure numerical edits",{ 72 | v <- checkDatamodel( 73 | editset(expression( 74 | x > 0, 75 | x + y == 3 76 | )), 77 | data.frame(x=c(-1,2),y=c(1,1)) 78 | ) 79 | expect_equivalent(v$adapt,array(c(TRUE,FALSE,FALSE,FALSE),dim=c(2,2))) 80 | }) 81 | 82 | 83 | test_that("checkDatamodel.editset works with pure categorical edits",{ 84 | v <- checkDatamodel( 85 | editset(expression( 86 | A %in% letters[1:3], 87 | B %in% 1:3 88 | )), 89 | data.frame(A=c('q','c'),B=c(1,10)) 90 | ) 91 | expect_equivalent(v$adapt,array(c(TRUE,FALSE,FALSE,TRUE),dim=c(2,2))) 92 | }) 93 | 94 | 95 | 96 | test_that("checkDatamodel.editset works with conditional numerical edits",{ 97 | v <- checkDatamodel( 98 | editset(expression( 99 | x + y == 3, 100 | x > 0, 101 | if ( x > 2 ) y < 1, 102 | v %in% letters[1:3])), 103 | data.frame( 104 | x = c(-1,3), 105 | y = c(0, 1), 106 | v = c("a","out-of-range") 107 | )) 108 | expect_equivalent( 109 | v$adapt, 110 | array(c(TRUE,FALSE,FALSE,FALSE,FALSE,TRUE),dim=c(2,3)) 111 | ) 112 | }) 113 | 114 | 115 | test_that("checkDatamodel.editset works with conditional categorical/numerical edits",{ 116 | v <- checkDatamodel( 117 | editset(expression( 118 | x + y == 3, 119 | x > 0, 120 | v %in% letters[1:3], 121 | if ( v == 'a' ) y > 0 122 | )), 123 | data.frame( 124 | x = c(-1,3), 125 | y = c(0, 1), 126 | v = c("a","out-of-range") 127 | )) 128 | 129 | }) 130 | 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /examples/localizeErrors.R: -------------------------------------------------------------------------------- 1 | 2 | # an editmatrix and some data: 3 | E <- editmatrix(c( 4 | "x + y == z", 5 | "x > 0", 6 | "y > 0", 7 | "z > 0")) 8 | 9 | dat <- data.frame( 10 | x = c(1,-1,1), 11 | y = c(-1,1,1), 12 | z = c(2,0,2)) 13 | 14 | # localize all errors in the data 15 | err <- localizeErrors(E,dat) 16 | 17 | summary(err) 18 | 19 | # what has to be adapted: 20 | err$adapt 21 | # weight, number of equivalent solutions, timings, 22 | err$status 23 | 24 | 25 | ## Not run 26 | 27 | # Demonstration of verbose processing 28 | # construct 2-block editmatrix 29 | F <- editmatrix(c( 30 | "x + y == z", 31 | "x > 0", 32 | "y > 0", 33 | "z > 0", 34 | "w > 10")) 35 | # Using 'dat' as defined above, generate some extra records 36 | dd <- dat 37 | for ( i in 1:5 ) dd <- rbind(dd,dd) 38 | dd$w <- sample(12,nrow(dd),replace=TRUE) 39 | 40 | # localize errors verbosely 41 | (err <- localizeErrors(F,dd,verbose=TRUE)) 42 | 43 | # printing is cut off, use summary for an overview 44 | summary(err) 45 | 46 | # or plot (not very informative in this artificial example) 47 | plot(err) 48 | 49 | ## End(Not run) 50 | 51 | for ( d in dir("../pkg/R",full.names=TRUE)) dmp <- source(d) 52 | # Example with different weights for each record 53 | E <- editmatrix('x + y == z') 54 | dat <- data.frame( 55 | x = c(1,1), 56 | y = c(1,1), 57 | z = c(1,1)) 58 | 59 | # At equal weights, both records have three solutions (degeneracy): adapt x, y 60 | # or z: 61 | localizeErrors(E,dat)$status 62 | 63 | # Set different weights per record (lower weight means lower reliability): 64 | w <- matrix(c( 65 | 1,2,2, 66 | 2,2,1),nrow=2,byrow=TRUE) 67 | 68 | localizeErrors(E,dat,weight=w) 69 | 70 | 71 | # an example with categorical variables 72 | E <- editarray(expression( 73 | age %in% c('under aged','adult'), 74 | maritalStatus %in% c('unmarried','married','widowed','divorced'), 75 | positionInHousehold %in% c('marriage partner', 'child', 'other'), 76 | if( age == 'under aged' ) maritalStatus == 'unmarried', 77 | if( maritalStatus %in% c('married','widowed','divorced')) 78 | !positionInHousehold %in% c('marriage partner','child') 79 | ) 80 | ) 81 | E 82 | 83 | # 84 | dat <- data.frame( 85 | age = c('under aged','adult','adult' ), 86 | maritalStatus=c('married','unmarried','widowed' ), 87 | positionInHousehold=c('child','other','marriage partner') 88 | ) 89 | dat 90 | localizeErrors(E,dat) 91 | # the last record of dat has 2 degenerate solutions. Running the last command 92 | # a few times demonstrates that one of those solutions is chosen at random. 93 | 94 | # Increasing the weight of 'positionInHousehold' for example, makes the best 95 | # solution unique again 96 | localizeErrors(E,dat,weight=c(1,1,2)) 97 | 98 | 99 | # an example with mixed data: 100 | 101 | E <- editset(expression( 102 | x + y == z, 103 | 2*u + 0.5*v == 3*w, 104 | w >= 0, 105 | if ( x > 0 ) y > 0, 106 | x >= 0, 107 | y >= 0, 108 | z >= 0, 109 | A %in% letters[1:4], 110 | B %in% letters[1:4], 111 | C %in% c(TRUE,FALSE), 112 | D %in% letters[5:8], 113 | if ( A %in% c('a','b') ) y > 0, 114 | if ( A == 'c' ) B %in% letters[1:3], 115 | if ( !C == TRUE) D %in% c('e','f') 116 | )) 117 | 118 | set.seed(1) 119 | dat <- data.frame( 120 | x = sample(-1:8), 121 | y = sample(-1:8), 122 | z = sample(10), 123 | u = sample(-1:8), 124 | v = sample(-1:8), 125 | w = sample(10), 126 | A = sample(letters[1:4],10,replace=TRUE), 127 | B = sample(letters[1:4],10,replace=TRUE), 128 | C = sample(c(TRUE,FALSE),10,replace=TRUE), 129 | D = sample(letters[5:9],10,replace=TRUE), 130 | stringsAsFactors=FALSE 131 | ) 132 | 133 | (el <-localizeErrors(E,dat,verbose=TRUE)) 134 | 135 | 136 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testEditset.R: -------------------------------------------------------------------------------- 1 | 2 | context("Editset") 3 | test_that("editset parses categorical edits",{ 4 | 5 | v <- expression( 6 | A %in% c('a','b'), 7 | B %in% c('c','d'), 8 | if ( A == 'a') B == 'c' 9 | ) 10 | E <- editset(v) 11 | expect_equal(E$num,editmatrix(expression())) 12 | expect_equal(E$mixnum,editmatrix(expression())) 13 | expect_equal(E$mixcat,editarray(v)) 14 | }) 15 | 16 | test_that("editset parses numerical edits",{ 17 | v <- expression(x + y == z, 2*x -u == v) 18 | E <- editset(v) 19 | expect_equal(E$num,editmatrix(v)) 20 | expect_equal(E$mixnum,editmatrix(expression())) 21 | expect_equal(E$mixcat,editarray(expression())) 22 | }) 23 | 24 | test_that("editset parses conditional numeric edits",{ 25 | # test 1: inequalities 26 | v <- expression( if ( x > 0 ) y > 0 ) 27 | E <- editset(v) 28 | expect_equal(E$num, editmatrix(expression())) 29 | expect_equivalent(E$mixnum, editmatrix(expression(x>0,y<=0))) 30 | expect_equivalent(getArr(E$mixcat),array(c(F,T,F,T),dim=c(1,4))) 31 | 32 | # test 2: with equality in if-statement 33 | 34 | v <- expression( if ( x >= 0 ) y >= 0) 35 | E <- editset(v) 36 | expect_equal(E$num, editmatrix(expression())) 37 | expect_equivalent(E$mixnum,editmatrix(expression(x>=0,y<0))) 38 | expect_equivalent(getArr(E$mixcat), array(c(F,T,F,T),dim=c(1,4))) 39 | 40 | }) 41 | 42 | test_that("editset parses conditional categorical/numerical edits",{ 43 | # test 1: numerical statement in 'then' clause 44 | v <- expression( 45 | A %in% letters[1:2], 46 | B %in% letters[3:4], 47 | if ( A == 'a' ) x > 0 48 | ) 49 | E <- editset(v) 50 | expect_equal(E$num, editmatrix(expression())) 51 | expect_equivalent(E$mixnum, editmatrix(expression(x<=0))) 52 | expect_equal(dim(E$mixcat),c(1,6)) 53 | expect_equivalent(getArr(E$mixcat),array(c(T,F,T,T,F,T),dim=c(1,6))) 54 | 55 | # test 2: numerical statement in 'then' clause 56 | v <- expression( 57 | A %in% letters[1:2], 58 | B %in% letters[3:4], 59 | if ( x > 0 ) A == 'a' 60 | ) 61 | E <- editset(v) 62 | expect_equal(E$num, editmatrix(expression())) 63 | expect_equivalent(E$mixnum, editmatrix(expression(x>0))) 64 | expect_equivalent(getArr(E$mixcat), array(c(F,T,T,T,F,T),dim=c(1,6))) 65 | 66 | # throws exception in editrules 2.8.0 (thanks to Alois Haslinger) 67 | editset(expression( 68 | x %in% letters[1:2] 69 | , y %in% letters[3:5] 70 | , z > 0 71 | , if(x =='a' && y == 'c') z < 7 72 | )) 73 | 74 | }) 75 | 76 | 77 | 78 | ## various editset functionalities 79 | test_that("contains finds the right variables in an editset",{ 80 | E <- editset(expression( 81 | x + y == z, 82 | if ( s + t >= 6 ) x < 10, 83 | A %in% letters[1:2], 84 | if ( A == 'a' ) x > 3 85 | )) 86 | expect_equivalent( 87 | contains(E), 88 | matrix(c( 89 | T,T,T,F,F,F, 90 | T,F,F,T,T,F, 91 | T,F,F,F,F,T), 92 | byrow=TRUE, 93 | nrow=3) 94 | ) 95 | 96 | }) 97 | 98 | test_that("as.editset for cateditmatrix works",{ 99 | E <- cateditmatrix(expression( 100 | gender %in% c("male", "female") 101 | , if (pregnant) gender == "female" 102 | )) 103 | 104 | as.editset(E) 105 | }) 106 | 107 | 108 | test_that("simple mixed edit without coefficients is recognized",{ 109 | E <- editset(expression(if ( A %in% c('a','b') ) x > 0 )) 110 | expect_equal(nedits(E),1) 111 | expect_equal(length(getVars(E,type="dummy")),1) 112 | }) 113 | 114 | 115 | test_that("Mixed parsing edit containing brackets works",{ 116 | E <- editset("if ((x > 0) && (y < 0)) z < y") 117 | }) 118 | 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /papers/pic.R: -------------------------------------------------------------------------------- 1 | # Generate all pictures for the vignettes. 2 | # mvdl 08.07.2011 3 | 4 | diamondFile <- "diamond.pdf" 5 | twodiamondFile <- "twodiamond.pdf" 6 | 7 | ## plot parameters 8 | linewidth = 2 9 | polygoncolor = "#E3E3E3" 10 | textcex = 2 11 | 12 | plotPoly <- function(x,y){ 13 | polygon(x,y,lwd=linewidth,col=polygoncolor) 14 | } 15 | 16 | ############################################################################### 17 | # DIAMOND AREA 18 | ############################################################################### 19 | 20 | # Allowed area is the diamond 21 | # {(x,y) in R : y <= x+1, y>=-x+3, y>=x-1, y<=-x+5 } 22 | x <- array(c( 23 | 1,4, 24 | 0,3, 25 | 0,3, 26 | 1,4), dim=c(2,4) 27 | ) 28 | y <- array(0,dim=c(2,4)) 29 | y[,1] <- x[,1] - 1 # e1 30 | y[,2] <- -x[,2] + 3 # e2 31 | y[,3] <- x[,3] + 1 # e3 32 | y[,4] <- -x[,4] +5 # e4 33 | 34 | 35 | pdf(diamondFile) 36 | plot(x[,1],y[,1], 37 | xlim=c(-1,4.5), 38 | ylim=c(-1,4.5), 39 | 'l', 40 | lwd=linewidth, 41 | xlab="x", 42 | ylab="y", 43 | cex.lab=2, 44 | cex.axis=2, 45 | ) 46 | 47 | for ( i in 2:4 ){ 48 | lines(x[,i],y[,i], 49 | lwd=linewidth 50 | ) 51 | } 52 | text(2.7,1.3,expression(e[1]),cex=textcex) 53 | text(1.3,1.3,expression(e[2]),cex=textcex) 54 | text(1.3,2.7,expression(e[3]),cex=textcex) 55 | text(2.7,2.7,expression(e[4]),cex=textcex) 56 | 57 | # fill up diamond 58 | xPoints <- c(2,3,2,1) 59 | yPoints <- c(1,2,3,2) 60 | plotPoly(xPoints,yPoints) 61 | 62 | # record 1 63 | points(2,-1,pch=19,cex=textcex) 64 | # record 2 65 | points(0,0,pch=19,cex=textcex) 66 | # record 3 67 | points(-1,2,pch=19,cex=textcex) 68 | 69 | arrows( 70 | x0=c( 2.0,0.2,0.2,-0.8), 71 | y0=c(-0.8,0.2,0.2, 2.0), 72 | x1=c( 2.0,0.85,1.6, 0.6), 73 | y1=c( 0.6,1.65,0.85, 2.0), 74 | lwd=linewidth,lty=c(1,2,2,1)) 75 | 76 | # solution sets 77 | lines( 78 | x = c(2,2), 79 | y = c(1,3)) 80 | lines( 81 | x = c(1,3), 82 | y = c(2,2)) 83 | 84 | # labels 85 | text(1.3,-1,"(2,-1)",cex=textcex) 86 | text(0,-0.4,"(0,0)",cex=textcex) 87 | text(-1,2.7,"(-1,2)",srt=90,cex=textcex) 88 | dev.off() 89 | 90 | ############################################################################### 91 | # nonconvex example with two diamonds 92 | ############################################################################### 93 | 94 | 95 | xleft <- c(-2,-3,-2,-1) 96 | yleft <- c( 1, 2, 3, 2) 97 | 98 | xright <- c(2,1,2,3) 99 | yright <- c(2,3,4,3) 100 | 101 | pdf(twodiamondFile) 102 | plot(0,0,xlim=c(-3,3),ylim=c(0,4), 103 | col="white", 104 | xlab="x",ylab="y", 105 | cex.lab=textcex, 106 | cex.axis=textcex) 107 | #grid() 108 | plotPoly(xleft,yleft) 109 | plotPoly(xright,yright) 110 | 111 | # record 1 112 | points(1.5,1.5,pch=19,cex=textcex) 113 | # record 2 114 | points(0,2.5,pch=19,cex=textcex) 115 | # record 2 116 | points(2,0,pch=19,cex=textcex) 117 | 118 | arrows( 119 | x0 = c( 1.3, 1.5,-0.2, 0.2, 2.0), 120 | y0 = c( 1.5, 1.7, 2.5, 2.5, 0.2), 121 | x1 = c(-1.3, 1.5,-1.3, 1.3, 2.0), 122 | y1 = c( 1.5, 2.3, 2.5, 2.5, 1.8), 123 | lwd=linewidth 124 | ) 125 | 126 | # solution sets 127 | lines( 128 | x=c(-1.5,-2.5), 129 | y=c(2.5,2.5)) 130 | lines( 131 | x=c(1.5,1.5), 132 | y=c(2.5,3.5)) 133 | lines( 134 | x=c(1.5,2.5), 135 | y=c(2.5,2.5)) 136 | lines( 137 | x=c(-1.5,-2.5), 138 | y=c(1.5,1.5)) 139 | lines( 140 | x=c(2,2), 141 | y=c(2,4)) 142 | 143 | # labels 144 | text(0.9,1.2,"(3/2,3/2)", cex=textcex) 145 | text(0.0,2.2,"(0,5/2)", cex=textcex) 146 | text(1.4,0.0,"(2,0)", cex=textcex) 147 | dev.off() 148 | -------------------------------------------------------------------------------- /pkg/R/print.R: -------------------------------------------------------------------------------- 1 | 2 | #' print editarray 3 | #' 4 | #' @method print editarray 5 | #' @param x an \code{\link{editarray}} 6 | #' @param textOnly If \code{FALSE}, also print internal structure 7 | #' @param ... arguments to be passed to or from other methods. 8 | #' @keywords internal 9 | #' @export 10 | print.editarray <- function(x, textOnly=FALSE, ...){ 11 | d <- datamodel(x) 12 | A <- getArr(x) 13 | if ( ncol(A) > 0 ){ 14 | cn <- paste(abbreviate(d$variable),":",abbreviate(d$value),sep="") 15 | colnames(A) <- cn 16 | } 17 | if( !textOnly ){ 18 | print(A) 19 | cat("Edit array:\n") 20 | } 21 | cat("\nEdit rules:\n") 22 | desc <- attr(x,'description') 23 | if ( is.null(desc) ){ 24 | desc <- rep("",nrow(x)) 25 | } else { 26 | desc <- paste('[',desc,']') 27 | } 28 | u <- as.character(x) 29 | nm <- names(u) 30 | pr <- paste(format(nm,width=max(nchar(nm))),':',paste(u,desc),collapse='\n') 31 | cat(pr,'\n') 32 | } 33 | 34 | 35 | 36 | #' print editmatrix 37 | #' 38 | #' @export 39 | #' @method print editmatrix 40 | #' 41 | #' @param x editmatrix object to be printed 42 | #' @param textOnly If \code{FALSE}, also print internal structure 43 | #' @param ... further arguments passed to or from other methods. 44 | #' @keywords internal 45 | print.editmatrix <- function(x, textOnly=FALSE, ...){ 46 | if (!textOnly){ 47 | cat("Edit matrix:\n") 48 | print(toDataFrame(x), ...) 49 | cat("\nEdit rules:\n") 50 | } 51 | desc <- attr(x,'description') 52 | if ( is.null(desc) ){ 53 | desc <- rep("",nrow(x)) 54 | } else { 55 | desc <- paste('[',desc,']') 56 | } 57 | u <- as.character(x) 58 | nm <- names(u) 59 | pr <- paste(format(nm,width=max(nchar(nm))),':', paste(u,desc), collapse='\n') 60 | cat(pr,'\n') 61 | } 62 | 63 | #' print cateditmatrix 64 | #' 65 | #' @export 66 | #' @method print cateditmatrix 67 | #' 68 | #' @param x cateditmatrix object to be printed 69 | #' @param ... further arguments passed to or from other methods. 70 | #' @keywords internal 71 | print.cateditmatrix <- function(x, textOnly=TRUE, ...){ 72 | if (!textOnly) { 73 | cat("Edit matrix:\n") 74 | print(as.data.frame(x), ...) 75 | cat("\nEdit rules:\n") 76 | } 77 | desc <- attr(x,'description') 78 | if ( is.null(desc) ){ 79 | desc <- rep("",nrow(x)) 80 | } else { 81 | desc <- paste('[',desc,']') 82 | } 83 | u <- as.character(x, asIfStatement=TRUE) 84 | nm <- names(u) 85 | pr <- paste(format(nm,width=max(nchar(nm))),':',paste(u,desc),collapse='\n') 86 | cat(pr,'\n') 87 | } 88 | 89 | #' print editset 90 | #' 91 | #' @export 92 | #' @method print editset 93 | #' 94 | #' @param x editset object to be printed 95 | #' @param ... further arguments passed to or from other methods. 96 | #' @keywords internal 97 | print.editset <- function(x, ...){ 98 | u <- as.character(x,datamodel=FALSE) 99 | v <- as.character(x,datamodel=TRUE) 100 | cnd <- attr(x,'condition') 101 | if ( nedits(cnd) > 0 ){ 102 | cat('conditions:\n') 103 | cat(paste(cnd,collapse=', '),'\n') 104 | } 105 | v <- v[! v%in% u] 106 | if ( length(v) > 0 ) cat("\nData model:\n") 107 | if ( length(v)>0 ){ 108 | nm <- names(v) 109 | cat(paste(format(nm,width=max(nchar(nm))),':',v,collapse='\n'),'\n') 110 | } 111 | cat("\nEdit set:\n") 112 | nm <- names(u) 113 | cat(paste(format(nm,width=max(nchar(nm))),':',u,collapse='\n'),'\n') 114 | } 115 | 116 | 117 | #' print editset 118 | #' 119 | #' @export 120 | #' @method print editlist 121 | #' 122 | #' @param x editset object to be printed 123 | #' @param ... further arguments passed to or from other methods. 124 | #' @keywords internal 125 | print.editlist <- function(x, ...){ 126 | cat("editsets:\n") 127 | j <- 0 128 | lapply(x,function(i) {j <<- j+1;cat("\nSet",j," ");print(i)}, ...) 129 | } 130 | 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /pkg/R/generateEdits.R: -------------------------------------------------------------------------------- 1 | #' Field code forest algorithm 2 | #' 3 | #' Workhorse function for \code{\link{generateEdits}} 4 | #' 5 | #' @param E an editarray 6 | #' @param totreat variable names still to be eliminated from E 7 | #' @param env an environment where all editmatrices will be stored 8 | #' 9 | #' @seealso \code{\link{generateEdits}}, \code{\link{editarray}} 10 | #' 11 | #' @example ../examples/generateEdits.R 12 | #' @keywords internal 13 | fcf.env <- function(E,totreat,env){ 14 | # add current edits to collection 15 | if (nrow(E)>0){ 16 | env$i <- env$i + 1 17 | U <- c(env$E,E) 18 | env$E <- U[!isSubset(U),] 19 | } else { 20 | # return if there are no more edits 21 | return() 22 | } 23 | 24 | # divide and conquer 25 | B <- blocks(E) 26 | for ( b in B){ 27 | # variables to be treated in the block 28 | totreatb <- totreat[names(totreat) %in% getVars(b)] 29 | vrs <- names(totreatb)[totreatb] 30 | # variables which cannot be resolved need not be treated (prune the tree) 31 | totreatb[!resolves(b,vrs)] <- FALSE 32 | vrs <- names(totreatb)[totreatb] 33 | # order variables so the most connected variables are eliminated first, 34 | # eliminate variables and recurse. 35 | vrs <- vrs[order(colSums(contains(b,vrs)),decreasing=TRUE)] 36 | for ( k in vrs ){ 37 | totreatb[k] <- FALSE 38 | fcf.env(reduce(eliminate(b,k)),totreatb[-which(names(totreatb)==k)],env) 39 | } 40 | } 41 | } 42 | 43 | 44 | 45 | #' Derive all essentially new implicit edits 46 | #' 47 | #' Implements the Field Code Forest (FCF) algorithm of Garfinkel et al (1986) to 48 | #' derive all essentially new implicit edits from an editarray. The FCF is really 49 | #' a single, highly unbalanced tree. This algorithm traverses the tree, pruning many 50 | #' unnecessary branches, uses \code{\link{blocks}} to divide and conquer, and 51 | #' optimizes traversing order. See Van der Loo (2012) for a description 52 | #' of the algorithms. 53 | #' 54 | #' @param E An \code{\link{editarray}} 55 | #' @return A 3-element named \code{list}, where element \code{E} is an \code{\link{editarray}} containing all 56 | #' generated edits. \code{nodes} contains information on the number of nodes in the tree and vs the number of nodes 57 | #' traversed and \code{duration} contains user, system and elapsed time inseconds. 58 | #' The \code{\link[=editarray]{summary}} method for \code{\link{editarray}} prints this information. 59 | #' 60 | #' 61 | #' @references 62 | #' R.S. Garfinkel, A.S. Kunnathur and G.E. Liepins (1986). 63 | #' Optimal imputation of erroneous data: categorical data, general edits. 64 | #' Operations Research 34, 744-751. 65 | #' 66 | #' M.P.J. Van der Loo (2012). Variable elimination and edit generation with a flavour of 67 | #' semigroup algebra (submitted) 68 | #' 69 | #' @export 70 | generateEdits <- function(E){ 71 | if ( !is.editarray(E) ) stop("Argument must be of class 'editarray' ") 72 | t0 <- proc.time() 73 | # initialize variables to treat 74 | vars <- getVars(E) 75 | totreat <- rep(TRUE,length(vars)) 76 | names(totreat) <- vars 77 | # set up environment to collect generated edits 78 | e <- new.env() 79 | # node counter 80 | e$i <- 0 81 | e$E <- E[logical(0),] 82 | # call the workhorse 83 | fcf.env(E,totreat,e) 84 | duration <- getDuration(proc.time()-t0) 85 | # return edits 86 | return( 87 | list( 88 | edits=e$E, 89 | nodes=c(nodesInTree = 2^length(vars), nodesTraversed = e$i), 90 | duration=duration 91 | ) 92 | ) 93 | } 94 | 95 | 96 | 97 | # Check which variables of 'vars' can be resolved 98 | resolves <- function(E,vars){ 99 | if ( length(vars)==0) return(logical(0)) 100 | ind <- getInd(E)[vars] 101 | Ic <- contains(E,vars) 102 | sapply(vars, function(v) all(colSums(E[Ic[,v],ind[[v]],drop=FALSE])>0)) 103 | } 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /pkg/R/softEdits.R: -------------------------------------------------------------------------------- 1 | #' Derive editmatrix with soft constraints based on boundaries of variables. This is a utility function that is used for 2 | #' constructing a mip/lp problem. 3 | #' @param E normalized \code{editmatrix} 4 | #' @param prefix \code{character} used for naming dummy variables in matrix. 5 | #' @keywords internal 6 | softEdits <- function(E, prefix="delta.", ...){ 7 | UseMethod("softEdits") 8 | } 9 | 10 | #' Derive editmatrix with soft constraints based on boundaries of variables. This is a utility function that is used for 11 | #' constructing a mip/lp problem. 12 | #' @param E normalized \code{editmatrix} 13 | #' @param prefix \code{character} used for naming dummy variables in matrix. 14 | #' @keywords internal 15 | softEdits.editmatrix <- function(E, prefix="delta.", postfix="", M=1e7, ...){ 16 | 17 | if (!nrow(E)){ 18 | return(E) 19 | } 20 | 21 | n <- nrow(E) 22 | vars <- getVars(E) 23 | ops <- getOps(E) 24 | 25 | adapt <- paste(prefix,rownames(E), sep="") 26 | 27 | A <- getA(E) 28 | b <- getb(E) 29 | isna <- is.na(b) 30 | eq <- (ops == "==") & !isna 31 | 32 | Ab <- cbind( A 33 | , diag(-M, n) 34 | , b 35 | )[!isna,,drop=FALSE] 36 | 37 | # copy the equality constraints 38 | Ab_eq <- if(any(eq)){ 39 | cbind( -A 40 | , diag(-M,n) 41 | , -b 42 | )[eq,,drop=FALSE] 43 | } 44 | 45 | # clear A, trick that keeps the rownames 46 | A[,] <- 0 47 | 48 | # NA's must be changed. 49 | Ab_na <- if(any(isna)){ 50 | cbind( A #matrix(0, nrow=n, ncol=ncol(A)) 51 | , diag(1, n) 52 | , 1 53 | )[isna,,drop=FALSE] 54 | } 55 | # TODO cleanup this code 56 | #print(list(Ab=Ab, Ab_eq=Ab_eq, Ab_na=Ab_na)) 57 | Ab <- rbind(Ab, Ab_eq, Ab_na) 58 | rownames(Ab) <- make.unique(paste0(rownames(Ab), postfix), "_" ) 59 | ops <- c(ops[!isna], ops[eq]) 60 | ops <- gsub("==", "<=", ops) 61 | ops <- c(ops, rep("==", sum(isna))) 62 | 63 | colnames(Ab) <- c(getVars(E), adapt, "CONSTANT") 64 | 65 | seE <- neweditmatrix(Ab, ops=ops) 66 | seE 67 | } 68 | 69 | #' Derive editmatrix with soft constraints. This is a utility function that is used for 70 | #' constructing a mip/lp problem. 71 | #' @param E normalized \code{editmatrix} 72 | #' @param prefix \code{character} used for naming dummy variables in matrix. 73 | #' @keywords internal 74 | softEdits.cateditmatrix <- function(E, prefix="delta.", postfix="", ...){ 75 | if (!nrow(E)){ 76 | return(E) 77 | } 78 | eq <- getOps(E) == "==" 79 | b <- getb(E) 80 | 81 | dummies <- paste(prefix, rownames(E), sep="") 82 | 83 | seA <- diag(ifelse(eq, -1, 1), ncol=length(eq), nrow=length(eq)) 84 | colnames(seA) <- dummies 85 | seA <- cbind(getA(E), seA) 86 | rownames(seA) <- paste0(rownames(seA), postfix) 87 | 88 | binvars <- sapply(colnames(seA), is.character) 89 | seE <- as.editmatrix(seA, b, getOps(E), binvars=binvars) 90 | seE 91 | } 92 | 93 | #' Derive editmatrix with soft constraints based on boundaries of variables. This is a utility function that is used for 94 | #' constructing a mip/lp problem. 95 | #' @param E normalized \code{editmatrix} 96 | #' @param prefix \code{character} used for naming dummy variables in matrix. 97 | #' @keywords internal 98 | softEdits.editarray<- function(E, prefix="delta.", postfix="", ...){ 99 | if (!nrow(E)){ 100 | return(E) 101 | } 102 | softEdits.cateditmatrix(cateditmatrix(E), prefix=prefix, postfix=postfix, ...) 103 | } 104 | 105 | #quick tests 106 | 107 | # E <- editmatrix(expression( x - y < 2 108 | # , x + y < 5 109 | # , x + y == 3 110 | # , z == 1 111 | # ) 112 | # ) 113 | # 114 | # # set the z == NA 115 | # E[4,ncol(E)] <- NA 116 | # (se <- softEdits(E)) 117 | #softEdits(cateditmatrix(c("if (married) adult", "married %in% c(TRUE,FALSE)","married==TRUE"))) 118 | -------------------------------------------------------------------------------- /pkg/R/mip.R: -------------------------------------------------------------------------------- 1 | #' Write an editset into a mip representation 2 | #' 3 | #' Writes an editset or an object coercable to an editset as a mip problem. 4 | #' @param E an \code{link{editset}} or an object that is coerciable to an 5 | #' \code{editset} 6 | #' @param x named \code{list}/\code{vector} with variable values 7 | #' @param weight reliability weights for values of \code{x} 8 | #' @param M Constant that is used for allowing the values to differ from \code{x} 9 | #' @param epsilon Constant that is used for converting '<' into '<=' 10 | #' @param prefix prefix for dummy variables that are created 11 | #' @param ... not used 12 | #' @return a mip object containing al information for transforming it 13 | #' into an lp/mip problem 14 | #' @export 15 | as.mip <- function( E, x=NULL, weight=NULL, M=1e7, epsilon=1e-3, prefix="delta." 16 | , ...){ 17 | 18 | #TODO add ruleWeights = c(Inf, ..., 1, 1, NA) etc. check with `is.finite` 19 | #TODO add lambda used in objfn of softEdits 20 | #TODO add objfn specification (needs reordering in function so it has the same order) 21 | 22 | E <- as.editset(E) 23 | objfn <- NULL 24 | 25 | E_mip = c( E$num 26 | , cateditmatrix(E$mixcat) 27 | , softEdits(editmatrix(invert(as.character(E$mixnum))), prefix="") 28 | ) 29 | 30 | if (!missing(x)){ 31 | if (is.null(weight)){ 32 | weight <- rep(1.0, length(x)) 33 | } else { 34 | stopifnot(length(weight) == length(x)) 35 | } 36 | 37 | names(weight) <- paste0(prefix, names(x)) 38 | # create expression vector with var == value 39 | expr <- as.expression( lapply(names(x) 40 | , function(v){ 41 | substitute( var == value 42 | , list(var=as.symbol(v), value=x[[v]]) 43 | ) 44 | })) 45 | 46 | isna <- sapply(x, is.na) 47 | 48 | num_vars <- getVars(E, type="num") 49 | nums <- !isna & names(x) %in% num_vars 50 | 51 | if (any(nums)) { 52 | A0_num <- editmatrix(expr[nums]) 53 | rownames(A0_num) <- names(x)[nums] 54 | A0_num <- softEdits(A0_num, prefix=prefix, postfix="0",M=M) 55 | } else A0_num <- NULL 56 | 57 | cat_vars <- getVars(E, type="cat") 58 | cats <- !isna & names(x) %in% cat_vars 59 | 60 | if (any(cats)){ 61 | A0_cat <- cateditmatrix(expr[cats]) 62 | rownames(A0_cat) <- names(x)[cats] 63 | A0_cat <- softEdits(A0_cat, prefix=prefix, postfix="0") 64 | } else A0_cat <- NULL 65 | 66 | E_mip <- c(E_mip, A0_num, A0_cat) 67 | if (any(isna)){ 68 | E_mip <- c( E_mip 69 | , editmatrix(paste0(prefix, names(x)[isna], "==", 1)) 70 | ) 71 | } 72 | 73 | } 74 | 75 | # replace strict inequalities... 76 | A <- getA(E_mip) 77 | b <- getb(E_mip) 78 | ops <- getOps(E_mip) 79 | lt <- ops == "<" 80 | b[lt] <- b[lt] - epsilon 81 | ops[lt] <- "<=" 82 | E_mip = as.editmatrix(A=A, b=b, ops=ops) 83 | 84 | vars <- colnames(A) 85 | objfn <- sapply(vars, function(v) 0) 86 | objfn[names(weight)] <- weight 87 | 88 | numvars = sapply(vars, `%in%`, getVars(E, type="num")) 89 | binvars = !numvars 90 | 91 | structure( 92 | list( E = E_mip 93 | , objfn = objfn 94 | , binvars = which(binvars) 95 | , numvars = which(numvars) 96 | , M = M 97 | , epsilon = epsilon 98 | ), 99 | class="mip" 100 | ) 101 | } 102 | 103 | #' @method print mip 104 | print.mip <- function(x, ...){ 105 | print.editmatrix(x$E, textOnly=T) 106 | if (!is.null(x$objfn)) { 107 | idx <- which(x$objfn != 0) 108 | if (length(idx)) { 109 | of <- paste0(x$objfn, "*", colnames(x$E))[idx] 110 | of <- paste(of, collapse=" + ") 111 | cat("objective function = min: ", of, "\n") 112 | } 113 | } 114 | } 115 | 116 | # # quick test 117 | # E <- editset(c(r1="x > 1","y >= x", r2="if (x>1) y> 2", r3="A %in% c('a', 'b')")) 118 | # as.mip(E) 119 | -------------------------------------------------------------------------------- /pkg/R/parseMix.R: -------------------------------------------------------------------------------- 1 | MIXOPS <- c("if", "||", "|", "&&", "&") 2 | 3 | #' Parse a mixed edit 4 | #' 5 | #' parseMix replaces all numerical edits with a generated dummy boolean variable and returns the resulting categorical 6 | #' edit plus the list of found of numerical edits. These expressions should be handled further by `parseCat` and 7 | #' `parseNum`. 8 | #' @param e expression to be parsed 9 | #' @param numid starting number for dummy name generation 10 | #' 11 | #' @return list with categorical expression (\code{cat}), which is normalized, a numerical expression (\code{nums}) 12 | #' and a negated version of this expression (\code{negNums}) 13 | #' @keywords internal 14 | parseMix <- function(e, editname="", numid=0, negate=TRUE){ 15 | 16 | # should the expressions be returned or should parseCat and parseNum be called on cat and nums? 17 | 18 | if (length(e) < 3) return(NULL) 19 | op <- as.character(e[[1]]) 20 | if (!op %in% MIXOPS) stop("invalid mix") 21 | 22 | cat <- e 23 | nums <- expression() 24 | numid <- numid 25 | 26 | pm <- function(i, neg=negate){ 27 | # rewrite equality as inequalities 28 | #e[[i]] <- rewriteEq(e[[i]]) 29 | edit <- e[[i]] 30 | if (length(edit) == 1) return() 31 | cmp <- deparse(edit[[1]]) 32 | 33 | # remove brackets 34 | if (cmp == "("){ 35 | edit <- edit[[2]] 36 | } 37 | 38 | if (isNum(edit)){ 39 | 40 | if (cmp == "=="){ 41 | stop("edit '", deparse(e),"' contains '==' which is not allowed, in a mixed edit") 42 | } 43 | 44 | numid <<- numid + 1 45 | numvar <- paste(editname, ".l",numid,sep="") 46 | #replace numeric edit with generated dummy boolean edit name and handle normal form 47 | dum <- as.name(numvar) 48 | if (neg){ 49 | dum <- quote(!a) 50 | dum[[2]] <- as.name(numvar) 51 | edit <- negateEdit(edit) 52 | } 53 | cat[[i]] <<- dum 54 | nums[numvar] <<- as.expression(edit) 55 | } else if (!isCat(edit)){ 56 | l <- parseMix(edit, numid=numid, editname=editname, negate=neg) 57 | cat[[i]] <<- l$cat 58 | nums[names(l$nums)] <<- l$nums 59 | numid <<- l$numid 60 | } 61 | } 62 | 63 | # don't negate the premisse (term 2) 64 | pm(2, neg=ifelse(op == "if", !negate, negate)) 65 | # negate the consequent (term 3) 66 | pm(3, neg=negate) 67 | 68 | negNums <- nums 69 | negNums[] <- sapply(nums, negateEdit) 70 | 71 | if (length(negNums)){ 72 | names(negNums) <- paste("!",names(negNums), sep="") 73 | } 74 | 75 | list( cat = cat # parseCat(cat) 76 | , nums = nums # lapply(nums, parseNum) 77 | , negNums = negNums # lapply(orNums, parseNum) 78 | , numid=numid 79 | ) 80 | } 81 | 82 | # e has to be an edit 83 | negateEdit <- function(e){ 84 | op <- as.character(e[[1]]) 85 | if (op == "!") 86 | return(e[[2]]) 87 | op <- switch( op 88 | , "<" = ">=" 89 | , "<=" = ">" 90 | , ">" = "<=" 91 | , ">=" = "<" 92 | , "==" = "!=" 93 | , "!=" = "==" 94 | ) 95 | if (is.null(op)){ 96 | ne <- quote(!a) 97 | ne[[2]] <- e 98 | e <- ne 99 | } else { 100 | e[[1]] <- as.symbol(op) 101 | } 102 | e 103 | } 104 | 105 | # e has to be an numerical inequality! 106 | rewriteInEq <- function(e){ 107 | op <- as.character(e[[1]]) 108 | if (op != "!=") return(e) 109 | eAnd <- quote(a || b) 110 | eAnd[[2]] <- e 111 | eAnd[[2]][[1]] <- as.symbol(">") 112 | eAnd[[3]] <- e 113 | eAnd[[3]][[1]] <- as.symbol("<") 114 | eAnd 115 | } 116 | 117 | # quick test 118 | # rewriteInEq(quote(x != y + 1)) 119 | # 120 | # a <- negateEdit(quote(x>2)) 121 | # a 122 | # negateEdit(a) 123 | # 124 | # a <- negateEdit(quote(A %in% "a")) 125 | # a 126 | # negateEdit(a) 127 | # 128 | # pm <- parseMix( quote( if(x>1 && 129 | # x < 10 && 130 | # A %in% c('a1')) y > 2 ) 131 | # , editname="e1") 132 | # pm 133 | # 134 | 135 | #pm <- parseMix(quote(if ((x>0)) A)) 136 | # pm 137 | -------------------------------------------------------------------------------- /pkg/R/disjunct.R: -------------------------------------------------------------------------------- 1 | 2 | #' Decouple a set of conditional edits 3 | #' 4 | #' An editset is transformed to a list of \code{\link{editset}s} 5 | #' which do not contain any conditional numeric/categorical edits anymore. Each \code{\link{editset}} gains an 6 | #' attribute \code{\link{condition}}, which holds the series of assumptions made to 7 | #' decouple the original edits. This attribute will be printed when not \code{NULL}. Warning: this may be slow 8 | #' for large, highly entangled sets of edits. 9 | #' 10 | #' @param E Object of class \code{\link{editset}} 11 | #' @param type Return type: \code{list} (default) for \code{editlist}, \code{env} for \code{editenv}. 12 | #' @return An object of class \code{editlist} (\code{editenv}), which is nothing more than a \code{list} (\code{environment}) of 13 | #' \code{editsets} with a class attribute. Each element has an attribute 'condition' showing which conditions 14 | #' were assumed to derive the editset. 15 | #' @seealso \code{\link{separate}}, \code{\link{condition}}, \code{\link{blocks}} 16 | #' @example ../examples/dnf.R 17 | #' @export 18 | disjunct <- function(E, type=c('list','env')){ 19 | if (!inherits(E,'editset')) stop('only for objects of class editset') 20 | type=match.arg(type) 21 | e <- new.env() 22 | e$i <- 0 23 | dnf(E, e) 24 | rm('i',envir=e) 25 | if ( type == 'list'){ 26 | e <- as.list(e) 27 | class(e) <- c("editlist") 28 | } else { 29 | class(e) <- c("editenv") 30 | } 31 | e 32 | } 33 | 34 | dnf <- function(E, env){ 35 | cnd <- attr(E,'condition') 36 | vars <- getVars(E,type="dummy") 37 | if (length(vars) == 0 ){ 38 | env$i <- env$i + 1 39 | condition(E) <- condition(E)[!duplicated(condition(E)),] 40 | assign(paste("E",env$i,sep=""),E,envir=env) 41 | } else { 42 | E1 <- substValue(E,vars[1],TRUE) 43 | if ( isFeasible(condition(E1)) && 44 | isFeasible(c(condition(E1), E1$num)) && 45 | isFeasible(E1$mixcat) 46 | ) dnf(E1,env) 47 | E2 <- substValue(E,vars[1],FALSE) 48 | if ( isFeasible(condition(E2)) && 49 | isFeasible(c(condition(E2), E2$num)) && 50 | isFeasible(E2$mixcat) 51 | ) dnf(E2,env) 52 | } 53 | } 54 | 55 | 56 | 57 | #' Separate an editset into its disconnected blocks and simplify 58 | #' 59 | #' The input edits are separated into disjunct blocks, and simplified to 60 | #' \code{editmatrix} or \code{\link{editarray}} where possible. Remaining 61 | #' \code{\link[=editset]{editsets}} are separated into \code{\link{disjunct}} 62 | #' \code{\link[=disjunct]{editlists}}. 63 | #' 64 | #' @param E An \code{\link{editset}} 65 | #' @return A \code{list} where each element is either an \code{\link{editmatrix}}, an \code{\link{editarray}} 66 | #' or an object of class \code{\link[=disjunct]{editlist}} which cannot be simplified further. 67 | #' 68 | #' @example ../examples/separate.R 69 | #' @seealso \code{\link{blocks}}, \code{\link{disjunct}}, \code{\link{condition}} 70 | #' 71 | #' @references 72 | #' M. van der Loo and De Jonge, E. (2012). Manipulation of conditional restrictions and error localization 73 | #' with the editrules package. Discussion paper 2012xx, Statistics Netherlands, The Hague 74 | #' (included with the package). 75 | #' 76 | #' @export 77 | separate <- function(E){ 78 | B <- blocks(E) 79 | B <- lapply(B, function(b){ 80 | et <- editType(b) 81 | if ( all(et == 'num') ){ 82 | b <- b$num 83 | } else if ( all(et == 'cat') ){ 84 | b <- b$mixcat 85 | } else { 86 | b <- disjunct(b) 87 | } 88 | b 89 | }) 90 | B 91 | } 92 | 93 | 94 | # For an editlist, determine the variable type and where in the list it occurs 95 | # E editlist 96 | # var character 97 | # returns 98 | # list() 99 | # $type : 'num' or 'mixcat' 100 | # $occurs : logical of length(E) 101 | varTypeAndOccurrence <- function(E,var){ 102 | ivar <- sapply(E,function(e) c( 103 | var %in% getVars(e$num), 104 | var %in% getVars(e$mixcat) 105 | ) 106 | ) 107 | if ( sum(ivar) == 0 ){ 108 | return(NA) 109 | } 110 | if ( !(is.array(ivar)) ) ivar <- array(ivar,dim=c(2,1)) 111 | if ( any(ivar[1,]) ) { 112 | type <- 'num' 113 | iset <- ivar[1,] 114 | } else { 115 | type <- 'mixcat' 116 | iset <- ivar[2,] 117 | } 118 | list(type=type,occurs=iset) 119 | } 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testLocalizeErrors.R: -------------------------------------------------------------------------------- 1 | 2 | context("Error localization: numerical data.frames") 3 | 4 | test_that('localizeErrors works without specified weight',{ 5 | 6 | expect_equivalent(localizeErrors( 7 | E = editmatrix(c('x+y==z','x<1')), 8 | dat = data.frame( 9 | x = c(1,1,1), 10 | y = c(1,1,1), 11 | z = c(1,1,1) 12 | ) 13 | )$adapt, 14 | matrix(c( 15 | TRUE , FALSE, FALSE, 16 | TRUE , FALSE, FALSE, 17 | TRUE , FALSE, FALSE), 18 | nrow=3, 19 | byrow=TRUE 20 | ) 21 | ) 22 | 23 | 24 | }) 25 | 26 | 27 | 28 | test_that('localizeErrors works with single specified weight',{ 29 | 30 | expect_equivalent(localizeErrors( 31 | E = editmatrix('x+y==z'), 32 | dat = data.frame( 33 | x = c(1,1,1), 34 | y = c(1,1,1), 35 | z = c(1,1,1) 36 | ), 37 | weight = c(1,2,2), 38 | )$adapt, 39 | matrix(c( 40 | TRUE , FALSE, FALSE, 41 | TRUE , FALSE, FALSE, 42 | TRUE , FALSE, FALSE), 43 | nrow=3, 44 | byrow=TRUE 45 | ) 46 | ) 47 | 48 | 49 | }) 50 | 51 | 52 | test_that('localizeErrors works with different weights per record',{ 53 | expect_equivalent(localizeErrors( 54 | E = editmatrix('x+y==z'), 55 | dat = data.frame( 56 | x = c(1,1,1), 57 | y = c(1,1,1), 58 | z = c(1,1,1) 59 | ), 60 | weight = matrix(c( 61 | 1,2,2, 62 | 2,1,2, 63 | 2,2,1), 64 | nrow=3, 65 | byrow=TRUE 66 | ) 67 | )$adapt, 68 | matrix(c( 69 | TRUE , FALSE, FALSE, 70 | FALSE, TRUE , FALSE , 71 | FALSE, FALSE, TRUE), 72 | nrow=3, 73 | byrow=TRUE 74 | ) 75 | ) 76 | expect_that(localizeErrors( 77 | E = editmatrix('x +y==z'), 78 | dat = data.frame( 79 | x = c(1,1,1), 80 | y = c(1,1,1), 81 | z = c(1,1,1) 82 | ), 83 | weight = matrix(c( 84 | 1,2,2, 85 | 2,2,1), 86 | nrow=3, 87 | byrow=TRUE 88 | ) 89 | ), 90 | throws_error() 91 | ) 92 | }) 93 | 94 | 95 | test_that('localizeErrors handles data out-of-datamodel correctly',{ 96 | # thanks to Elmar Wein for sending us this testcase. 97 | E <- editarray(c( 98 | "age %in% c('under aged','adult')", 99 | "maritalStatus %in% c('unmarried','married','widowed','divorced')", 100 | "positionInHousehold %in% c('marriage partner', 'child', 'other')", 101 | "if( age == 'under aged' ) maritalStatus == 'unmarried'", 102 | "if( maritalStatus %in% c('married','widowed','divorced')) !positionInHousehold %in% c('marriage partner','child')" 103 | )) 104 | record <- data.frame(age='under aged', maritalStatus='unmarried', positionInHousehold='out_of_range') 105 | expect_equivalent( 106 | localizeErrors(E,record)$adapt, 107 | array(c(FALSE,FALSE,TRUE),dim=c(1,3)) 108 | ) 109 | }) 110 | 111 | test_that("localizeErrors works with TRUE/FALSE",{ 112 | E <- editarray(expression( 113 | A %in% c(TRUE,FALSE), 114 | B %in% letters[1:4], 115 | if ( !A ) B %in% letters[1:2] 116 | )) 117 | 118 | # should run without errors... 119 | localizeErrors(E,data.frame(A=c(TRUE,FALSE),B=c('c',"d"))) 120 | }) 121 | 122 | test_that("localizeErrors works with mixed edit",{ 123 | E <- editset(expression( 124 | married %in% c(TRUE,FALSE), 125 | if (married==TRUE) age >=17 126 | )) 127 | 128 | # note bb is switched off for mixed edits 129 | le <- localizeErrors(E, data.frame(married=TRUE, age=9)) 130 | expect_equal(sum(le$adapt), 1, info="bb returns correct result") 131 | 132 | le <- localizeErrors(E, data.frame(married=TRUE, age=9), method="mip") 133 | expect_equal(sum(le$adapt), 1, info="mip returns correct result") 134 | 135 | }) 136 | 137 | test_that("localizeErrors works for simple numerical if-else", { 138 | le <- localizeErrors( 139 | editset("if ( x > 0 ) y > 0") 140 | , data.frame(x=1,y=0) 141 | ) 142 | expect_equal(sum(le$adapt),1) 143 | le <- localizeErrors( 144 | editset("if ( x > 0 ) y > 0") 145 | , data.frame(x=1,y=0) 146 | , method="mip" 147 | ) 148 | expect_equal(sum(le$adapt),1) 149 | 150 | }) 151 | 152 | 153 | -------------------------------------------------------------------------------- /pkg/R/isObviouslyRedundant.R: -------------------------------------------------------------------------------- 1 | #' Find obvious redundancies in set of edits 2 | #' 3 | #' Detect simple redundancies such as duplicates or edits of the form \code{0 < 1} or \code{0 == 0}. 4 | #' For categorical edits, simple redundancies are edits that define an empty subregion 5 | #' of the space of all possible records (no record can ever be contained in such a region). 6 | #' 7 | #' @param E An \code{\link{editset}}, \code{\link{editmatrix}}, \code{\link{editarray}}, 8 | #' \code{\link[=disjunct]{editlist}} or \code{\link[=disjunct]{editenv}} 9 | #' @param duplicates \code{logical}: check for duplicate edits? For an \code{\link{editset}}, 10 | #' \code{\link[=duplicated]{editlist}} or \code{\link[=duplicated]{editenv}} this should be a logical 11 | #' 2-vector indicating which of the numerical or categorical edits should be checked for duplicates. 12 | #' @param ... parameters to be passed to or from other methods. 13 | #' 14 | #' @return logical vector indicating which edits are (obviously) redundant 15 | #' @seealso \code{\link{isObviouslyInfeasible}}, \code{\link{isSubset}} 16 | #' @export 17 | isObviouslyRedundant <- function(E, duplicates=TRUE, ...){ 18 | UseMethod("isObviouslyRedundant") 19 | } 20 | 21 | 22 | # @method isObviouslyRedundant matrix 23 | # 24 | # @param operators character vecor of comparison operators in \code{<, <=, ==} of length \code{nrow(E)} 25 | # @param tol tolerance to check for zeros. 26 | # 27 | # @rdname isObviouslyRedundant 28 | # @keywords internal 29 | # @seealso \code{\link{isObviouslyRedundant}}, \code{\link{isObviouslyRedundant.editmatrix}} 30 | #' @export 31 | isObviouslyRedundant.matrix <- function( 32 | E, 33 | duplicates = TRUE, 34 | operators, 35 | tol=sqrt(.Machine$double.eps), 36 | ... ){ 37 | ib <- ncol(E) 38 | zeroCoef <- rowSums(abs(E[,-ib,drop=FALSE])) < tol 39 | as.vector( 40 | zeroCoef & ( (operators %in% c("==","<=") & abs(E[,ib]) < tol) 41 | | (operators %in% c("<", "<=") & E[,ib] > tol) 42 | ) 43 | ) 44 | } 45 | 46 | 47 | #' @method isObviouslyRedundant editmatrix 48 | #' @rdname isObviouslyRedundant 49 | #' 50 | #' @export 51 | isObviouslyRedundant.editmatrix <- function(E, duplicates=TRUE, ...){ 52 | if ( !isNormalized(E) ) E <- normalize(E) 53 | I <- isObviouslyRedundant.matrix(getAb(E), operators=getOps(E), ...) 54 | if ( duplicates ) I <- I | duplicated.editmatrix(E) 55 | I 56 | } 57 | 58 | 59 | #' @method isObviouslyRedundant editarray 60 | #' @rdname isObviouslyRedundant 61 | #' @export 62 | isObviouslyRedundant.editarray <- function(E, duplicates=TRUE, ...){ 63 | if ( ncol(E) == 0 ) return(logical(0)) 64 | if ( ncol(E) == 1 ) return(as.vector(E)) 65 | ind <- getInd(E) 66 | red <- isRedundant.boolmat(getArr(E),getInd(E)) 67 | if ( duplicates ) red <- red | duplicated.editarray(E) 68 | red 69 | } 70 | 71 | 72 | # Check redundancy in editarray after disection 73 | # 74 | # @keywords internal 75 | isRedundant.boolmat <- function(A, ind){ 76 | if ( nrow(A) == 1 ) return(any(vapply(ind,function(i) sum(A[,i])==0,FUN.VALUE=TRUE))) 77 | apply( 78 | vapply(ind, function(i) rowSums(A[,i,drop=FALSE])==0, FUN.VALUE=logical(nrow(A))), 79 | 1,any 80 | ) 81 | } 82 | 83 | # 84 | # For an \code{\link{editset}} it checks for obvious redundancies in the numerical 85 | # and categorical/mixed parts separately. Arguments \code{duplicates} must be a 86 | # logical 2-vector, the first element corresponding to the numeric part, the second 87 | # element to the conditional part. 88 | # 89 | 90 | #' @method isObviouslyRedundant editset 91 | #' @rdname isObviouslyRedundant 92 | #' @export 93 | isObviouslyRedundant.editset <- function(E, duplicates=rep(TRUE,2), ...){ 94 | c( 95 | isObviouslyRedundant(E$num, duplicates=duplicates[1], ...), 96 | isObviouslyRedundant(E$mixcat, duplicates=duplicates[2], ...) 97 | ) 98 | } 99 | 100 | 101 | 102 | # 103 | # 104 | # For an \code{\link[=disjunct]{editlist}} or \code{\link[=disjunct]{editenv}}, 105 | # a list of logical vectors is returned. 106 | # 107 | #' @method isObviouslyRedundant editlist 108 | #' @rdname isObviouslyRedundant 109 | #' @export 110 | isObviouslyRedundant.editlist <- function(E, duplicates=rep(TRUE,2), ...){ 111 | lapply(E, isObviouslyRedundant.editset, duplicates=duplicates, ...) 112 | } 113 | 114 | 115 | # 116 | # For an \code{\link[=disjunct]{editlist}} or \code{\link[=disjunct]{editenv}}, 117 | # a list of logical vectors is returned. 118 | # 119 | 120 | #' @method isObviouslyRedundant editenv 121 | #' @rdname isObviouslyRedundant 122 | #' @export 123 | isObviouslyRedundant.editenv <- function(E, duplicates=rep(TRUE,2), ...){ 124 | lapply(E, isObviouslyRedundant.editset, duplicates=duplicates, ...) 125 | } 126 | 127 | 128 | 129 | -------------------------------------------------------------------------------- /pkg/R/pkg.R: -------------------------------------------------------------------------------- 1 | #' An overview of the function of package \code{editrules} 2 | #' 3 | #' @section NOTE: 4 | #' This package is no longer under active development. The package is superseded 5 | #' by R packages \href{https://CRAN.R-project.org/package=validate}{validate} 6 | #' for data validation and 7 | #' \href{https://CRAN.R-project.org/package=errorlocate}{errorlocate} 8 | #' for error localization. We urge new users to use those packages instead. 9 | #' 10 | #' The \code{editrules} package aims to provide an environment to conveniently 11 | #' define, read and check recordwise data constraints including 12 | #' \itemize{ 13 | #' \item{Linear (in)equality constraints for numerical data}, 14 | #' \item{Constraints on value combinations of categorical data} 15 | #' \item{Conditional constraints on numerical and/or mixed data} 16 | #' } 17 | #' In literature these constraints, or restrictions are refered to as ``edits''. 18 | #' \code{editrules} can perform common rule 19 | #' set manipulations like variable elimination and value substitution, and 20 | #' offers error localization functionality based on the 21 | #' (generalized) paradigm of Fellegi and Holt. Under this paradigm, one determines 22 | #' the smallest (weighted) number of variables to adapt such that no (additional or derived) 23 | #' rules are violated. The paradigm is based on the assumption that errors 24 | #' are distributed randomly over the variables and there is no detectable cause of 25 | #' error. It also decouples the detection of corrupt variables from their 26 | #' correction. For some types of error, such as sign flips, typing errors or 27 | #' rounding errors, this assumption does not hold. These errors can be detected 28 | #' and are closely related to their resolution. The reader is referred to the 29 | #' \pkg{deducorrect} package for treating such errors. 30 | #' 31 | #' @section I. Define edits: 32 | #' 33 | #' \code{editrules} provides several methods for creating edits from a \code{character} 34 | #' , \code{expression}, \code{data.frame} or a text file. 35 | #' \tabular{ll}{ 36 | #' \code{\link{editfile}} \tab Read conditional numerical, numerical and categorical constraints from textfile \cr 37 | #' \code{\link{editset}} \tab Create conditional numerical, numerical and categorical constraints \cr 38 | #' \code{\link{editmatrix}} \tab Create a linear constraint matrix for numerical data \cr 39 | #' \code{\link{editarray}} \tab Create value combination constraints for categorical data \cr 40 | #' } 41 | #' 42 | #' @section II. Check and find errors in data: 43 | #' 44 | #' \code{editrules} provides several method for checking \code{data.frame}s with edits 45 | #' \tabular{ll}{ 46 | #' \code{\link{violatedEdits}} \tab Find out which record violates which edit. \cr 47 | #' \code{\link{localizeErrors}} \tab Localize erroneous fields using Fellegi and Holt's principle. \cr 48 | #' \code{\link{errorLocalizer}} \tab Low-level error localization function using B&B algorithm \cr 49 | #' } 50 | #' Note that you can call \code{plot}, \code{summary} and \code{print} on results of these functions. 51 | #' 52 | #' @section IV. Manipulate and check edits: 53 | #' 54 | #' \code{editrules} provides several methods for manipulating edits 55 | #' \tabular{ll}{ 56 | #' \code{\link{substValue}} \tab Substitute a value in a set of rules \cr 57 | #' \code{\link{eliminate}} \tab Derive implied rules by variable elimination \cr 58 | #' \code{\link{reduce}} \tab Remove unconstraint variables \cr 59 | #' \code{\link{isFeasible}} \tab Check for contradictions \cr 60 | #' \code{\link{duplicated}} \tab Find duplicated rules \cr 61 | #' \code{\link{blocks}} \tab Decompose rules into independent blocks \cr 62 | #' \code{\link{disjunct}} \tab Decouple conditional edits into disjunct edit sets\cr 63 | #' \code{\link{separate}} \tab Decompose rules in blocks and decouple conditinal edits \cr 64 | #' \code{\link{generateEdits}} \tab Generate all nonredundant implicit edits (\code{\link{editarray}} only) \cr 65 | #' } 66 | #' 67 | #' @section V. Plot and coerce edits: 68 | #' 69 | #' \code{editrules} provides several methods for plotting and coercion. 70 | #' \tabular{ll}{ 71 | #' \code{\link{editrules.plotting}} \tab Plot edit-variable connectivity graph \cr 72 | #' \code{\link[igraph]{as.igraph}} \tab Coerce to edit-variable connectivity \code{igraph} object \cr 73 | #' \code{as.character} \tab Coerce edits to \code{character} representation \cr 74 | #' \code{as.data.frame} \tab Store \code{character} representation in \code{data.frame} \cr 75 | #' } 76 | #' 77 | #' @import lpSolveAPI 78 | #' @importFrom igraph as.igraph 79 | #' @importFrom graphics axis barplot grid lines mtext par plot 80 | #' @importFrom stats density dist quantile runif 81 | #' @importFrom utils flush.console ls.str stack str 82 | #' 83 | #' @name editrules_package 84 | #' @docType package 85 | "_PACKAGE" 86 | 87 | 88 | # not on imports: we need to DEPEND on igraph since are exporting methods of an igraph 89 | # generic (as.igraph). 90 | 91 | 92 | -------------------------------------------------------------------------------- /pkg/tests/testthat/testeditmatrix.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | 3 | context("Editmatrix") 4 | 5 | test_that("editmatrix works correcly with character",{ 6 | cond <- c( "x == y" 7 | , "z + w == y + x" 8 | , "x + z == y + 2*w" 9 | ) 10 | 11 | mat <- editmatrix(cond) 12 | mat <- getA(mat) 13 | expect_equivalent(mat[1,], c(1,-1,0,0)) 14 | expect_equivalent(mat[2,], c(-1,-1,1,1)) 15 | expect_equivalent(mat[3,], c(1,-1,-2,1)) 16 | }) 17 | 18 | test_that("editmatrix works correcly with expression",{ 19 | cond <- expression( x == y 20 | , z + w == y + x 21 | , x + z == y + 2*w 22 | ) 23 | 24 | mat <- editmatrix(cond) 25 | mat <- getA(mat) 26 | expect_equivalent(mat[1,], c(1,-1,0,0)) 27 | expect_equivalent(mat[2,], c(-1,-1,1,1)) 28 | expect_equivalent(mat[3,], c(1,-1,-2,1)) 29 | }) 30 | 31 | test_that("editmatrix can simplify",{ 32 | cond <- c( "2*x == x + y" 33 | , "z + 2 == y + 3" 34 | , "w == 3" 35 | ) 36 | 37 | E <- editmatrix(cond) 38 | mat <- getA(E) 39 | C <- getb(E) 40 | expect_equal(as.integer(mat[1,]), c(1,-1,0,0)) 41 | expect_equal(as.integer(mat[2,]), c(0,-1,1,0)) 42 | expect_equal(C[2], c(num2=1)) 43 | 44 | expect_equal(mat[3,], c(x=0,y=0,z=0,w=1)) 45 | expect_equal(C[3], c(num3=3)) 46 | }) 47 | 48 | 49 | test_that("editmatrix works correcly with data.frame",{ 50 | 51 | edtinf.csv <- 52 | "name,edit 53 | A,x == y 54 | B,z + w == y + x 55 | C,z == y + 2*w 56 | " 57 | edtinf <- read.csv((con <- textConnection(edtinf.csv))) 58 | close(con) 59 | 60 | mat <- editmatrix(edtinf) 61 | A <- getA(mat) 62 | expect_equivalent(A[1,], c(1,-1,0,0)) 63 | expect_equivalent(A[2,], c(-1,-1,1,1)) 64 | expect_equivalent(A[3,], c(0,-1,-2,1)) 65 | }) 66 | 67 | test_that("editmatrix works with constants",{ 68 | cond <- c( "x + y > 2" 69 | , "y < 10" 70 | ) 71 | E <- editmatrix(cond) 72 | mat <- getA(E) 73 | expect_equal(as.integer(mat[1,]), c(-1,-1)) 74 | expect_equal(as.integer(mat[2,]), c(0,1)) 75 | expect_equal(as.integer(getb(E)), c(-2,10)) 76 | }) 77 | 78 | test_that("conditional statement parsing is not working..",{ 79 | expect_error(editmatrix("if(x < 2) y > 4")) 80 | }) 81 | 82 | test_that("editmatrix works with negative constants",{ 83 | cond <- c( "x + y > -2" 84 | , "y < -10" 85 | ) 86 | E <- editmatrix(cond) 87 | mat <- getA(E) 88 | expect_equal(as.integer(mat[1,]), c(-1,-1)) 89 | expect_equal(as.integer(mat[2,]), c(0,1)) 90 | expect_equal(as.integer(getb(E)), c(2,-10)) 91 | }) 92 | 93 | test_that("editmatrix works with negative coefficients",{ 94 | cond <- c( "-2*x + y > 2" 95 | ) 96 | E <- editmatrix(cond) 97 | mat <- getAb(E) 98 | expect_equivalent(mat[1,], c(2,-1,-2)) 99 | }) 100 | 101 | test_that("editmatrix works with coefficient after variable",{ 102 | cond <- c( "x*-2 + y > 2" 103 | ) 104 | E <- editmatrix(cond) 105 | mat <- getAb(E) 106 | expect_equivalent(mat[1,], c(2,-1,-2)) 107 | }) 108 | 109 | test_that("editmatrix fails with nonconstant coefficient",{ 110 | cond <- c( "a*x == 2" 111 | ) 112 | expect_error(editmatrix(cond)) 113 | }) 114 | 115 | 116 | test_that("is.editmatrix works",{ 117 | mat <- editmatrix("x==y") 118 | expect_true(is.editmatrix(mat)) 119 | expect_false(is.editmatrix(unclass(mat))) 120 | }) 121 | 122 | test_that("as.editmatrix works",{ 123 | A <- matrix( c( 1,-2, 0 124 | , 2, 0, 1 125 | ) 126 | , nrow=2 127 | , byrow=TRUE 128 | # , dimnames=list(c("a", "b"), c("x","y", "z")) 129 | ) 130 | E <- as.editmatrix(A, b=c(0,1), ops=c("==","<")) 131 | ei <- as.data.frame(E) 132 | expect_equivalent(ei$edit, c("x1 == 2*x2", "2*x1 + x3 < 1")) 133 | }) 134 | 135 | test_that("editmatrix normalize works",{ 136 | cond <- c( "x > y" 137 | , "z + w >= y + x" 138 | , "x + z < y + 2*w" 139 | , "x + z == y + 2*w" 140 | , "x + z <= y + 2*w" 141 | ) 142 | 143 | E <- editmatrix(editrules=cond, normalize=TRUE) 144 | mat <- getA(E) 145 | 146 | expect_equivalent(mat[1,], c(-1,1,0,0)) 147 | expect_equivalent(mat[2,], c(1,1,-1,-1)) 148 | expect_equivalent(mat[3,], c(1,-1,-2,1)) 149 | expect_equivalent(mat[4,], c(1,-1,-2,1)) 150 | expect_equivalent(mat[5,], c(1,-1,-2,1)) 151 | ops <- getOps(E) 152 | expect_equivalent(ops, c("<", "<=", "<","==", "<=")) 153 | }) 154 | 155 | 156 | test_that("coercions work",{ 157 | E <- editmatrix("x+y==z") 158 | expect_that(E, is_identical_to(editmatrix(as.data.frame(E)))) 159 | expect_that(E, is_identical_to(as.editmatrix(A=getA(E), b=getb(E), ops=getOps(E)))) 160 | # edge case, testing as.character feature 161 | E <- editmatrix("x + 0.1*y==z") 162 | expect_that(E, is_identical_to(editmatrix(as.character(E)))) 163 | }) 164 | 165 | -------------------------------------------------------------------------------- /pkg/R/writeELAsMip.R: -------------------------------------------------------------------------------- 1 | #' Rewrite an editset and reported values into the components needed for a mip solver 2 | #' 3 | #' @param E \code{\link{editset}} or any object that is coercable to an editset. 4 | #' @param x named \code{list} or \code{vector} with data 5 | #' @param weight vector with weights of the variable in the same order as x 6 | #' @param M maximum allowed difference between reported value and corrected value 7 | #' @param epsilon offset needed for writing a strict inequality into a an inequality 8 | #' @return list with an editmatrix, objfn, binvars, numvars, M and epsilon 9 | #' @keywords internal 10 | writeELAsMip <- function( E 11 | , x 12 | , weight = rep(1, length(x)) 13 | , M = 1e7 14 | , epsilon = 1e-3 15 | , ... 16 | ){ 17 | E <- as.editset(E) 18 | el.E <- NULL 19 | 20 | # soft <- is.finite(editweight) 21 | # if (any(soft)){ 22 | # soft.E <- E[soft,] 23 | # soft.weights <- editweight[soft] 24 | # 25 | # #TODO process softedits into el.E 26 | # soft.num <- softEdits(soft.E$num, xlim, prefix=".soft.") 27 | # 28 | # #TODO column with diag(1, nrow(soft.E$mixcat)) 29 | # soft.cat <- NULL 30 | # #soft.cat <- softEdits(cateditmatrix(soft.E$mixcat),xlim,prefix=".softcat.") 31 | # el.E <- c(soft.num, soft.cat, el.E) 32 | # E <- E[!soft,] 33 | # } 34 | 35 | # num part 36 | num.vars <- getVars(E, type="num") 37 | 38 | if (!is.null(num.vars)){ 39 | num.idx <- match(num.vars, names(x)) 40 | num.x <- diag(1, nrow=length(num.vars)) 41 | dimnames(num.x) <- list(num.vars,num.vars) 42 | num.x0 <- unlist(x[num.idx]) 43 | # create an editmatrix x_i == x^0_i 44 | num.E <- as.editmatrix(num.x, num.x0) 45 | num.se <- softEdits(num.E, "adapt.") 46 | el.E <- c(num.se, E$num, el.E) 47 | } 48 | 49 | # cat part 50 | cat.vars <- getVars(E, type="cat") 51 | if ( length(cat.vars) > 0 ){ 52 | cat.idx <- match(cat.vars, names(x)) 53 | cat.x_0 <- unlist(x[cat.idx]) 54 | 55 | 56 | cat.A <- diag(1, nrow=length(cat.x_0)) 57 | cat.A <- cbind(cat.A, cat.A) 58 | #browser() 59 | colnames(cat.A) <- c(asCat(cat.x_0), paste("adapt.", cat.vars, sep="")) 60 | 61 | # check for non existing levels (including NA's) 62 | invalidCats <- !(asCat(cat.x_0, useLogicals=FALSE) %in% getlevels(E$mixcat)) 63 | if (any(invalidCats)){ # remove invalid categories otherwise they will turn up in the resulting editmatrix... 64 | cat.A <- cat.A[,-which(invalidCats), drop=FALSE] 65 | } 66 | cat.b <- rep(1, nrow(cat.A)) 67 | 68 | cat.se <- as.editmatrix(cat.A, b=cat.b) 69 | el.E <- c(cat.se, cateditmatrix(E$mixcat), el.E) 70 | } 71 | 72 | # mix part 73 | mix.E <- editmatrix(invert(as.character(E$mixnum))) 74 | mix.vars <- getVars(mix.E) 75 | if (!is.null(mix.vars)){ 76 | mix.idx <- match(mix.vars, names(x)) 77 | mix.se <- softEdits(mix.E, prefix="") 78 | el.E <- c(mix.se, el.E) 79 | } 80 | 81 | # el.E <- c(mix.se, cat.se, num.se, E$num, cateditmatrix(E$mixcat)) 82 | lt <- getOps(el.E) == "<" 83 | 84 | el.vars <- getVars(el.E) 85 | el.binvars <- sapply(el.vars, is.character) 86 | el.binvars[el.vars %in% num.vars] <- FALSE 87 | g <- grepl("delta.", el.vars, fixed=TRUE) 88 | #print(g) 89 | el.binvars[g] <- FALSE 90 | 91 | objfn <- sapply(el.vars, function(v) 0) 92 | adapt.idx <- grep("^adapt\\.", el.vars) 93 | adapt.nms <- names(adapt.idx) <- sub("^adapt\\.", "", el.vars[adapt.idx]) 94 | 95 | objfn[adapt.idx] <- weight[match(adapt.nms, names(x))] 96 | 97 | # if (any(soft)){ 98 | # soft.idx <- grep("^\\.soft", el.vars) 99 | # objfn[soft.idx] <- (1-lambda) * soft.weights 100 | # } 101 | structure( 102 | list( E = el.E 103 | , objfn = objfn 104 | , binvars = which(el.binvars) 105 | , numvars = match(num.vars, el.vars) 106 | , M = M 107 | , epsilon = epsilon 108 | ), 109 | class="mip" 110 | ) 111 | } 112 | 113 | buildELMatrix <- writeELAsMip 114 | 115 | # E <- editset(expression( 116 | # x < y, 117 | # y < z, 118 | # x < z, 119 | # a %in% c(TRUE, FALSE), 120 | # if (a) x > 1 121 | # )) 122 | # 123 | # editsetToMip(E) 124 | #testing... 125 | 126 | # E <- editset(expression( 127 | # if (x>0) y > 0 128 | # , maritalstatus %in% c("married", "single") 129 | # , if (maritalstatus == "married") age >= 17 130 | # )) 131 | # # 132 | # x <- list(x = 1, y = -1, age=16, maritalstatus="married") 133 | # # #x <- list(x = 1, y = -1, age=16, maritalstatus=NA) 134 | # # # e <- expression( pregnant %in% c(TRUE, FALSE) 135 | # # # , gender %in% c("male", "female") 136 | # # # , if (pregnant) gender == "female" 137 | # # # ) 138 | # # # 139 | # # # cateditmatrix(e) 140 | # # checkXlim(list(age=c(0,200)), x) 141 | # # 142 | # buildELMatrix(E, x)# -> B 143 | # #errorLocalizer.mip(E, x=x,, xlim=list(age=c(0,200))) 144 | -------------------------------------------------------------------------------- /pkg/inst/script/bench/benchAB.R: -------------------------------------------------------------------------------- 1 | # Some benchmarks on elimination Algorithm A and B 2 | # mvdl, 05.01.2011 3 | # 4 | 5 | cat('-------------------------------------------------------------------\n') 6 | cat('This benchmark requires packages editrules and Hmisc\n') 7 | require(editrules) 8 | if (!require(Hmisc)) stop('install Hmisc') 9 | 10 | fdir <- system.file('script/bench',package='editrules') 11 | edits <- file.path(fdir,'edits.R') 12 | files <- file.path(fdir,c('eliminator.R','randomEdits.R')) 13 | 14 | for ( f in files ) source(f) 15 | 16 | ## The double conversion makes sure that the empty character ('') 17 | ## is interpreted as empty value. 18 | E <- editarray(as.character(editfile(edits,type='cat'))) 19 | 20 | vars <- getVars(E) 21 | 22 | cat('timing algorithms A and B...\n') 23 | 24 | t1 <- system.time(e1 <- elimInfo(E, algorithmA.follow)) 25 | t2 <- system.time(e2 <- elimInfo(E, algorithmB.follow)) 26 | 27 | for ( i in 1:99 ){ 28 | t1 <- t1 + system.time(e1 <- elimInfo(E, algorithmA.follow)) 29 | t2 <- t2 + system.time(e2 <- elimInfo(E, algorithmB.follow)) 30 | } 31 | 32 | 33 | ## eliminate one variable from E, do this for all variables and follow progress of elimination 34 | tA <- tB <- numeric(length(vars)) 35 | names(tA) <- names(tB) <- vars 36 | for ( v in vars ){ 37 | tA[v] <- system.time({for ( i in 1:50 ) u <- algorithmA(E,v)})['user.self']/50*1000 38 | tB[v] <- system.time({for ( i in 1:50 ) u <- algorithmB(E,v)})['user.self']/50*1000 39 | } 40 | times <- cbind(tA,tB,diff=tB-tA,ratio=tB/tA) 41 | o <- order(times[,3],decreasing=TRUE) 42 | times <- cbind(times,e1$Created > e2$Created) 43 | 44 | totA <- sum(times[,'tA']) 45 | totB <- sum(times[,'tB']) 46 | cat('Ratio tB/tA over all variables :', totB/totA,'\n') 47 | 48 | totA <- sum(times[rev(o)[1:13],'tA']) 49 | totB <- sum(times[rev(o)[1:13],'tB']) 50 | cat('Ratio tB/tA over variables where iterations are necessary :', totB/totA,'\n\n') 51 | 52 | 53 | cat('Creating plots\n') 54 | 55 | ## Create dotchart of timing difference and scatterplot of timings. 56 | 57 | par(mfrow=c(1,2),oma=c(0,0,0,0),mar=c(4,2,2,1.1)) 58 | 59 | lb <- rep("",36) 60 | lb[seq(1,36,5)] <- seq(1,36,5) 61 | pch <- 1+numeric(nrow(times)) 62 | I <- e1$Created > e2$Created 63 | pch[I] <- 16 64 | dotchart(times[o,'diff'], 65 | pch=pch[o], 66 | main=expression(paste('Timing ',t[B]-t[A])), 67 | xlab=expression(t[B]-t[A]), 68 | labels=lb 69 | ) 70 | abline(v=0,lty=1,col='grey') 71 | 72 | pc1 <- 2 + numeric(36) 73 | pc1[I] <- 17 74 | plot(e1$contained,times[,'tA'], 75 | xlab='Nr of edits containing variable', 76 | main='Timing A and B (ms)', 77 | xlim=c(0,20), 78 | ylim=c(0,40), 79 | pch=pc1, 80 | oma=c(1,1,1,1)) 81 | pc2 <- 22 + numeric(36) 82 | pc2[I] <- 15 83 | points(e2$contained,times[,'tB'],pch=pc2) 84 | legend('topright', 85 | legend=c('A','B'), 86 | pch=c(2,22), 87 | bty='n') 88 | 89 | 90 | cat('-------------------------------------------------------------------\n') 91 | 92 | cat('-------------------------------------------------------------------\n') 93 | cat('Determine complexity as function of number of edits.\n') 94 | M <- c(seq(10,100,10),150,200,250,500) 95 | timing <- array(0,dim=c(length(M),2)) 96 | colnames(timing) <- c('A','B') 97 | i <- 0 98 | 99 | timeAlg <- function(E,alg,n=10){ 100 | vars <- getVars(E) 101 | t = 0 102 | t = t + system.time({ for(v in vars ) E <- alg(E,v)})['user.self'] 103 | t/n 104 | } 105 | 106 | nAverage = 100 107 | TA <- array(0,dim=c(length(M),8)) 108 | colnames(TA) <-c(names(summary(1:10)), 109 | names(quantile(1:10,probs=c(0.05,0.95)) )) 110 | TB <- TAB <- TA 111 | 112 | i <- 0 113 | ms=1/1000 114 | for ( m in M ){ 115 | cat('\rworking on m=',m) 116 | tA <- numeric(nAverage) 117 | tB <- numeric(nAverage) 118 | tBA <- numeric(nAverage) 119 | i <- i + 1 120 | for ( j in 1:nAverage ){ 121 | E <- genedits(m,group='A') 122 | vars <- getVars(E) 123 | tA[j] <- timeAlg(E,algorithmA,n=3) 124 | tB[j] <- timeAlg(E,algorithmB,n=3) 125 | } 126 | TA[i,] <- c(summary(tA), quantile(tA,probs=c(0.05,0.95)))/ms 127 | TB[i,] <- c(summary(tB), quantile(tB,probs=c(0.05,0.95)))/ms 128 | TAB[i,] <- c(summary(tA-tB),quantile(tA-tB,probs=c(0.05,0.95)))/ms 129 | } 130 | cat('\n') 131 | 132 | 133 | dev.new() 134 | par(mfrow=c(1,2),oma=c(0,0,0,0),mar=c(4,2,2,1.1)) 135 | errbar(M,sqrt(TA[,'Mean']),sqrt(TA[,'5%']),sqrt(TA[,'95%']), 136 | xlab='nr of edits', 137 | ylab=expression(paste(sqrt(time),' (',sqrt(ms),')')), 138 | ) 139 | errbar(M,sqrt(TB[,'Mean']),sqrt(TB[,'5%']),sqrt(TB[,'95%']),add=TRUE,pch=2) 140 | legend('topleft', 141 | legend=c('A','B'), 142 | pch=c(16,2), 143 | bty='n') 144 | title(main=expression(paste('Timing (',sqrt(ms),')'))) 145 | 146 | errbar(M,TAB[,'Mean'],TAB[,'5%'],TAB[,'95%'], 147 | xlab='nr of edits', 148 | ylab=expression(paste(t[A]-t[B],' (ms)')) 149 | ) 150 | abline(h=0) 151 | title(main=expression(paste(t[A]-t[B],' ','(ms)'))) 152 | 153 | cat('finished\n') 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | -------------------------------------------------------------------------------- /pkg/R/reduce.R: -------------------------------------------------------------------------------- 1 | 2 | #' Remove redundant variables and edits. 3 | #' 4 | #' Remove variables which are not contained in any edit and remove edits which are 5 | #' \code{\link[=isObviouslyRedundant]{obviously redundant}}. 6 | #' 7 | #' @param E \code{\link{editmatrix}} or \code{\link{editarray}} 8 | #' @param ... arguments to pass to other methods 9 | #' 10 | #' @export 11 | #' @seealso \code{\link{contains}}, \code{\link{eliminate}}, \code{\link{substValue}} 12 | #' 13 | reduce <- function(E,...){ 14 | UseMethod('reduce') 15 | } 16 | 17 | 18 | #' 19 | #' @method reduce editmatrix 20 | #' @param tol elements of \code{E} with absolute value < \code{tol} are considered 0. 21 | #' 22 | #' @rdname reduce 23 | #' @export 24 | reduce.editmatrix <- function(E, tol=sqrt(.Machine$double.eps),...){ 25 | if ( nrow(E) == 0 ) return(neweditmatrix(matrix(numeric(0)),character(0))) 26 | m <- as.matrix(E) 27 | if ( tol > 0 ) m[abs(m) < tol] <- 0 28 | B <- m != 0 29 | v <- 1:(ncol(m)-1) 30 | vars <- which(colSums(B[,v,drop=FALSE]) != 0) 31 | edits <- (rowSums(B) != 0) 32 | E[edits,c(vars,ncol(m)) , drop=FALSE] 33 | } 34 | 35 | 36 | #' 37 | #' @method reduce editarray 38 | #' 39 | #' @export 40 | #' @rdname reduce 41 | reduce.editarray <- function(E,...){ 42 | E <- E[!isObviouslyRedundant.editarray(E),,drop=FALSE] 43 | m <- as.matrix(E) 44 | ind <- getInd(E) 45 | H <- getH(E) 46 | b <- sapply(ind,function(ind) all(m[,ind,drop=FALSE])) 47 | if ( any(b) ){ 48 | J <- logical(0) 49 | for ( j in ind[b] ) J <- c(J,j) 50 | sep=getSep(E) 51 | m <- m[,-J,drop=FALSE] 52 | ind <- indFromArray(m,sep=sep) 53 | i <- apply(!m,1,all) 54 | m <- m[!i,,drop=FALSE] 55 | if (!is.null(H)) H <- H[!i,,drop=FALSE] 56 | E <- neweditarray(E=m, ind=ind, sep=sep, names=rownames(m), H=H) 57 | } 58 | E 59 | } 60 | 61 | 62 | #' 63 | #' @method reduce editset 64 | #' @export 65 | #' @rdname reduce 66 | #' 67 | reduce.editset <- function(E,...){ 68 | num <- reduce(E$num) 69 | mixcat <- reduce(E$mixcat) 70 | v <- getVars(mixcat) 71 | mixnum <- reduce(E$mixnum[rownames(E$mixnum) %in% v,]) 72 | 73 | imix <- grepl("^.l",v) 74 | if ( nrow(mixcat) > 0 ){ 75 | m <- logical(nedits(mixcat)) 76 | if ( any(imix) ) m <- apply(contains(mixcat, v[imix]), 1, any) 77 | pref <- ifelse(m,"mix","cat") 78 | rownames(mixcat) <- paste(pref, 1:nrow(mixcat), sep="") 79 | } 80 | simplify(neweditset( 81 | num = num, 82 | mixnum = mixnum, 83 | mixcat = mixcat 84 | )) 85 | 86 | } 87 | 88 | #' Retrieve values stricktly implied by rules 89 | #' 90 | #' @export 91 | impliedValues <- function(E,...){ 92 | UseMethod('impliedValues') 93 | } 94 | 95 | #' 96 | #' 97 | #' Detects cases where two inequalities imply an equality, e.g. \eqn{x\leq 0} and \eqn{x\geq0} 98 | #' implies \eqn{x=0}. Also detects straight equalities, e.g. \eqn{x==0} implies \eqn{x=0}. Such 99 | #' cases arise frequently when manipulating edits by value subsitution or variable elimination. 100 | #' The function recursively detects equalities and combined inequalities that imply fixed values, 101 | #' substitutes those fixed values and looks for new implied values until no new values are found. 102 | #' 103 | #' @method impliedValues editmatrix 104 | #' 105 | #' @param E editmatrix 106 | #' @param tol Maximum deviation for two values to be considered equal. 107 | #' @param ... Currently unused 108 | #' @return Numeric vector, whose names are variable names and values are unique values implied by the rules. 109 | #' @rdname impliedValues 110 | #' 111 | #' @seealso \code{\link{reduce}}, \code{\link{substValue}}, \code{\link{eliminate}} 112 | #' 113 | #' @export 114 | impliedValues.editmatrix <- function(E,tol=sqrt(.Machine$double.eps),...){ 115 | if (!isNormalized(E)) E <- normalize(E) 116 | 117 | iv <- numeric(0) 118 | iv[getVars(E)] <- NA 119 | 120 | # edits containing one variable 121 | i <- rowSums(contains(E,tol=tol)) == 1 122 | if ( sum(i) == 0 ) return(numeric(0)) 123 | e <- reduce(E[i,]) 124 | 125 | # direct equalities 126 | ops <- getOps(e) 127 | eqs <- e[ops == '==', ] 128 | eqA <- getA(eqs) 129 | eqb <- getb(eqs) 130 | j <- apply(abs(eqA),1,which.max) 131 | i <- 1:nrow(eqs) 132 | iv[colnames(eqA)[j]] <- eqb/eqA[cbind(i,j)] 133 | 134 | x <- iv[!is.na(iv)] 135 | # combined inequalities 136 | ieq <- e[ops %in% c('<=','>=')] 137 | if (length(x) > 0) 138 | ieq <- substValue(ieq, names(x), x,reduce=TRUE) 139 | Ab <- getAb(e) 140 | for ( v in getVars(ieq) ){ 141 | ab <- Ab[abs(Ab[,v])>tol,,drop=FALSE] 142 | ab <- ab/abs(ab[,v]) 143 | b <- ab[,'CONSTANT'] 144 | for ( i in 1:length(b) ){ 145 | j <- ( abs(b[i] + b[-i]) < tol & abs(ab[i,v] + ab[-i,v]) < tol ) 146 | if (any(j)){ 147 | iv[v] = abs(b[i]) 148 | break 149 | } 150 | } 151 | } 152 | iv <- iv[!is.na(iv)] 153 | if ( length(iv) > 0 ){ 154 | E <- substValue(E,names(iv),iv,reduce=TRUE) 155 | c(iv,impliedValues.editmatrix(E,tol)) 156 | } else { 157 | iv 158 | } 159 | } 160 | --------------------------------------------------------------------------------