├── test
├── autotest.bat
├── autotest.R
├── 1test.R
├── elmar_2.R
└── smoketest.R
├── .gitignore
├── pkg
├── data
│ └── edits.RData
├── tests
│ ├── test_all.R
│ └── testthat
│ │ ├── edit_test_1.txt
│ │ ├── testDuplicated.R
│ │ ├── testdatamodel.R
│ │ ├── testgetVars.R
│ │ ├── testc.R
│ │ ├── testEditRow.R
│ │ ├── testIsObviouslyInfeasible.R
│ │ ├── testContains.R
│ │ ├── testechelon.R
│ │ ├── testIsObviouslyRedundant.R
│ │ ├── testIsFeasible.R
│ │ ├── testFourierMotzkin.R
│ │ ├── testeditmatrixAttr.R
│ │ ├── testBlocks.R
│ │ ├── testCheck.R
│ │ ├── testeditarray.R
│ │ ├── testSubstValue.R
│ │ ├── testParseEdits.R
│ │ ├── testErrorLocalizer.R
│ │ ├── testCheckDatamodel.R
│ │ ├── testEditset.R
│ │ ├── testLocalizeErrors.R
│ │ └── testeditmatrix.R
├── R
│ ├── editrules-data.R
│ ├── perturbWeights.R
│ ├── list2env.R
│ ├── zzz.R
│ ├── is.R
│ ├── str.R
│ ├── as.matrix.R
│ ├── removeRedundant.R
│ ├── getUpperBounds.R
│ ├── duplicated.R
│ ├── editAttr.R
│ ├── getH.R
│ ├── isSubset.R
│ ├── isFeasible.R
│ ├── as.igraph.R
│ ├── editarrayAttr.R
│ ├── parseEdits.R
│ ├── c.R
│ ├── echelon.R
│ ├── editfile.R
│ ├── blocks.R
│ ├── expandEdits.R
│ ├── checkRows.R
│ ├── parseNum.R
│ ├── isObviouslyInfeasible.R
│ ├── getVars.R
│ ├── parseCat.R
│ ├── editmatrixAttr.R
│ ├── subsetting.R
│ ├── adjacency.R
│ ├── checkDatamodel.R
│ ├── print.R
│ ├── generateEdits.R
│ ├── softEdits.R
│ ├── mip.R
│ ├── parseMix.R
│ ├── disjunct.R
│ ├── isObviouslyRedundant.R
│ ├── pkg.R
│ ├── writeELAsMip.R
│ └── reduce.R
├── inst
│ └── script
│ │ ├── edits
│ │ ├── mixedits.R
│ │ └── myedits.txt
│ │ └── bench
│ │ ├── randomEdits.R
│ │ ├── benchMIP.R
│ │ ├── benchmip_categorical.R
│ │ ├── benchmip_mixed2.R
│ │ ├── benchmip_balance.R
│ │ ├── benchmip_mixed.R
│ │ └── benchAB.R
├── vignettes
│ └── editrules-vignette.Rnw
└── DESCRIPTION
├── papers
├── DeJongeVanderLoo2011.pdf
├── DeJongeVanderLoo2011-2.pdf
├── sweave.r
├── .gitignore
├── buildmip.bash
├── buildVignette.bash
├── tex
│ ├── errorLocation.tex
│ └── smalltree.sty
├── mySweave.sty
├── index.html
├── fig
│ └── tree.tex
└── pic.R
├── document.bash
├── examples
├── impliedValues.R
├── getVars.R
├── dnf.R
├── listViolatedEdits.R
├── expandEdits.R
├── datamodel.R
├── generateEdits.R
├── separate.R
├── blocks.R
├── backtracker.R
├── editset.R
├── checkRows.R
├── editmatrix.R
├── editmatrixAttr.R
├── violatedEdits.R
├── substValue.R
├── editarray.R
├── graph.R
├── errorLocalizer.R
├── eliminate.R
└── localizeErrors.R
├── roxygen.R
├── editrules.Rproj
├── check.bash
├── .travis.yml
├── develop
├── generateEM.R
├── categorical
│ └── tst.R
└── errorLocalizer_alt.R
└── README.md
/test/autotest.bat:
--------------------------------------------------------------------------------
1 | Rscript autotest.R
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /output
2 | .Rproj.user
3 | .Rhistory
4 | /pkg/man
5 | .DS_Store
6 |
7 |
8 |
--------------------------------------------------------------------------------
/pkg/data/edits.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-cleaning/editrules/HEAD/pkg/data/edits.RData
--------------------------------------------------------------------------------
/test/autotest.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 |
3 |
4 | auto_test("../pkg/R", "../pkg/inst/tests")
5 |
6 |
--------------------------------------------------------------------------------
/pkg/tests/test_all.R:
--------------------------------------------------------------------------------
1 | #library(editrules)
2 |
3 | #if (require(testthat)){
4 | # test_package("editrules")
5 | #}
6 |
--------------------------------------------------------------------------------
/papers/DeJongeVanderLoo2011.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-cleaning/editrules/HEAD/papers/DeJongeVanderLoo2011.pdf
--------------------------------------------------------------------------------
/pkg/tests/testthat/edit_test_1.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | y <= b*x
5 |
6 | b <- 10
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/papers/DeJongeVanderLoo2011-2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data-cleaning/editrules/HEAD/papers/DeJongeVanderLoo2011-2.pdf
--------------------------------------------------------------------------------
/papers/sweave.r:
--------------------------------------------------------------------------------
1 | options(error=traceback)
2 | Sweave("editrules-linear.Rnw")
3 | Sweave("editrules-categorical.Rnw")
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/document.bash:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | R -e 'devtools::document("./pkg")'
4 | R CMD Rd2pdf --force --no-preview -o manual.pdf ./pkg
5 |
6 |
7 |
--------------------------------------------------------------------------------
/examples/impliedValues.R:
--------------------------------------------------------------------------------
1 | e <- editmatrix(expression(
2 | x <= 0,
3 | x >= 0,
4 | x + y == 3
5 | ))
6 |
7 | impliedValues(e)
8 |
9 |
--------------------------------------------------------------------------------
/papers/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | *.log
3 | *.aux
4 | *.bbl
5 | *.blg
6 | *.loa
7 | *.log
8 | *.tex
9 | *.toc
10 | *.aux
11 | *.bbl
12 | *.blg
13 | *.idx
14 | *.ilg
15 | *.ind
--------------------------------------------------------------------------------
/papers/buildmip.bash:
--------------------------------------------------------------------------------
1 | R -e "knitr::knit('editrules-as-mip.Rnw')"
2 | xelatex editrules-as-mip.tex
3 | bibtex editrules-as-mip
4 | xelatex editrules-as-mip.tex
5 | xelatex editrules-as-mip.tex
6 |
7 |
--------------------------------------------------------------------------------
/pkg/R/editrules-data.R:
--------------------------------------------------------------------------------
1 | #' Some example editrules
2 | #'
3 | #'
4 | #' @title Example editrules, used in vignette
5 | #' @usage data(edits)
6 | #' @name edits
7 | #' @docType data
8 | #' @keywords data
9 | {}
10 |
11 |
12 |
--------------------------------------------------------------------------------
/pkg/R/perturbWeights.R:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | # add perturbations to weights, without changing the sort order of nonunique
6 | # elements
7 | #
8 | #
9 | perturbWeights <- function(x){
10 | d <- diff(sort(x))
11 | p <- min(x, d[d > 0]) / 100
12 | x + runif(length(x), max=p)
13 | }
14 |
15 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testDuplicated.R:
--------------------------------------------------------------------------------
1 |
2 | context("duplicated")
3 | test_that("duplicated.editmatrix works",{
4 |
5 | expect_equal(
6 | duplicated.editmatrix(editmatrix(expression(
7 | x < 0, x <= 0, y < 0))),
8 | c(FALSE,FALSE,FALSE)
9 | )
10 | })
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/examples/getVars.R:
--------------------------------------------------------------------------------
1 |
2 | E <- editmatrix(c( "x+3*y == 2*z"
3 | , "x > 2")
4 | )
5 | getVars(E)
6 |
7 | E <- editarray(expression(
8 | gender %in% c('male','female'),
9 | pregnant %in% c(TRUE, FALSE),
10 | if( gender == 'male' ) pregnant == FALSE
11 | )
12 | )
13 |
14 | getVars(E)
15 |
16 |
--------------------------------------------------------------------------------
/roxygen.R:
--------------------------------------------------------------------------------
1 | library(roxygen2)
2 | options(error=traceback)
3 | unlink( 'pkg/man', TRUE)
4 |
5 | setwd('pkg')
6 | roxygenize( '.'
7 | , roxygen.dir='.'
8 | , copy.package=FALSE
9 | , unlink.target=TRUE
10 | )
11 |
12 | if (length(list.files('inst/doc')) == 0){
13 | unlink( 'inst/doc', TRUE)
14 | }
15 |
--------------------------------------------------------------------------------
/examples/dnf.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | E <- editset(expression(
4 | x + y == z,
5 | if ( x > 0 ) y > 0,
6 | x >= 0,
7 | y >= 0,
8 | z >= 0,
9 | A %in% letters[1:4],
10 | B %in% letters[1:4],
11 | if (A %in% c('a','b')) y > 0,
12 | if (A == 'c' ) B %in% letters[1:3]
13 | ))
14 |
15 | disjunct(E)
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/papers/buildVignette.bash:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # usage: buildVignette.bash linear
4 | # buildVignette.bash categorical
5 |
6 | arg=\'editrules-$1\'
7 |
8 |
9 | R -e "Sweave($arg)"
10 | pdflatex editrules-$1.tex
11 | bibtex editrules-$1
12 | makeindex editrules-$1.idx
13 | pdflatex editrules-$1.tex
14 | pdflatex editrules-$1.tex
15 |
16 |
17 |
--------------------------------------------------------------------------------
/pkg/R/list2env.R:
--------------------------------------------------------------------------------
1 | # legacy function to keep running under R<=2.10
2 | if (!exists("list2env")){
3 | list2env <- function(x, envir=NULL, parent=parent.frame()){
4 | if (is.null(envir))
5 | envir <- new.env(parent=parent)
6 | for (i in names(x)){
7 | envir[[i]] <- x[[i]]
8 | }
9 | envir
10 | }
11 | }
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/examples/listViolatedEdits.R:
--------------------------------------------------------------------------------
1 | # Using character vector to define contraints
2 | E <- editmatrix(editrules=c("x+3*y==2*z", "x==z"))
3 | print(E)
4 |
5 | dat <- data.frame( x = c(0,2,1)
6 | , y = c(0,0,1)
7 | , z = c(0,1,1)
8 | )
9 | # valid rows?
10 | valid <- checkRows(E, dat)
11 | invalid <- dat[!valid,]
12 | listViolatedEdits(E,invalid)
13 |
--------------------------------------------------------------------------------
/examples/expandEdits.R:
--------------------------------------------------------------------------------
1 |
2 | expandEdits("x_i > 0", i = 1:10)
3 |
4 | expandEdits("#var > 0", prefix="#", var = c("turnover", "profit", "employees"))
5 |
6 | expandEdits("x_i < y_j", i=1:3,j=2:3)
7 |
8 | #using a variable set in the parent environment
9 | i <- 1:5
10 | expandEdits("sum_i(x_i) == y")
11 |
12 | expandEdits("sum_month(x__year._month) == x__year", month=month.abb, year=2009:2011)
13 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testdatamodel.R:
--------------------------------------------------------------------------------
1 | require(testthat)
2 | context("datamodel")
3 |
4 | test_that("datamodel works",{
5 | expect_identical(
6 | datamodel(editarray(c(
7 | "x %in% c('a','b')",
8 | "y %in% c('x','y')"
9 | ))),
10 | data.frame(variable = c('x','x','y','y'),value=c('a','b','x','y'))
11 | )
12 | })
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/examples/datamodel.R:
--------------------------------------------------------------------------------
1 |
2 | E <- editarray(expression(
3 | age %in% c('under aged','adult'),
4 | positionInHouseholda %in% c('marriage partner', 'child', 'other'),
5 | maritalStatus %in% c('unmarried','married','widowed','divorced'),
6 | if (maritalStatus %in% c('married','widowed','divorced') ) positionInHousehold != 'child',
7 | if ( age == 'under aged') maritalStatus == 'unmarried'
8 | )
9 | )
10 | datamodel(E)
11 |
--------------------------------------------------------------------------------
/editrules.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: knitr
13 | LaTeX: XeLaTeX
14 |
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackagePath: pkg
18 | PackageInstallArgs: --no-multiarch --with-keep.source
19 | PackageRoxygenize: rd,namespace
20 |
--------------------------------------------------------------------------------
/check.bash:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | echo ""
4 | echo "######## Removing "editrules" installation..."
5 | echo ""
6 | R CMD REMOVE editrules
7 |
8 | echo ""
9 | echo "######## Install "editrules" from output directory..."
10 | echo ""
11 | cd output
12 | for x in *.tar.gz
13 | do
14 | R CMD INSTALL $x
15 | done
16 |
17 | #cd ../../deducorrect
18 | #echo ""
19 | #echo "######## Building and checking deducorrect..."
20 | #echo ""
21 | #bash build.bash
22 |
--------------------------------------------------------------------------------
/pkg/inst/script/edits/mixedits.R:
--------------------------------------------------------------------------------
1 | #
2 | # a bunch of mixed edits
3 |
4 | # define datamodels
5 | A %in% letters[1:6]
6 | B %in% letters[7:10]
7 | C %in% letters[11:20]
8 |
9 | # some purely categorical stuff
10 | if ( A %in% c('a','d') ) B %in% c('g','h')
11 | if ( A == 'b' ) C %in% letters[12:18]
12 |
13 | # some mixed numeric...
14 | if ( C == 'k' ) x > 0
15 | if ( y > 0 ) x + y >= z
16 |
17 | # some pure numeric
18 | u + v == w
19 | 2*v + 3*s == t
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/examples/generateEdits.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | E <- editarray(expression(
4 | a %in% letters[1:4],
5 | b %in% letters[5:8],
6 | if ( a %in% c('a','b') ) b %in% c('e','f'),
7 | if ( a %in% c('c','d') ) b %in% c('h')
8 | ))
9 |
10 | generateEdits(E)
11 |
12 | ## Not run
13 | # load 60 edits (36 variables) from demonstration file
14 | E <- editfile(system.file('script/bench/edits.R',package='editrules'),type='cat')
15 | F <- generateEdits(E)
16 |
17 | summary(F$edits)
18 | F$nodes
19 | F$dudation
20 |
21 | ## End(Not run)
22 |
--------------------------------------------------------------------------------
/examples/separate.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | E <- editset(expression(
4 | x + y == z,
5 | 2*u + 0.5*v == 3*w,
6 | w >= 0,
7 | if ( x > 0 ) y > 0,
8 | x >= 0,
9 | y >= 0,
10 | z >= 0,
11 | A %in% letters[1:4],
12 | B %in% letters[1:4],
13 | C %in% c(TRUE,FALSE),
14 | D %in% letters[5:8],
15 | if ( A %in% c('a','b') ) y > 0,
16 | if ( A == 'c' ) B %in% letters[1:3],
17 | if ( !C == TRUE) D %in% c('e','f')
18 | ))
19 |
20 | (L <- separate(E))
21 |
22 | sapply(L,class)
23 |
24 |
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testgetVars.R:
--------------------------------------------------------------------------------
1 |
2 | require(testthat)
3 |
4 | context("getVars")
5 |
6 | test_that("getVars.editmatrix works",{
7 | expect_identical(
8 | getVars(editmatrix(c( "x+3*y == 2*z", "x > 2"))),
9 | c("x","y","z")
10 | )
11 | })
12 |
13 | test_that("getVars.editarray conforms to type argument",{
14 | expect_true(is.null(
15 | getVars(
16 | editarray(expression(if ( A == 'a') B %in% letters[1:3])),
17 | type='num'
18 | )
19 | ))
20 | })
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/pkg/vignettes/editrules-vignette.Rnw:
--------------------------------------------------------------------------------
1 | %\VignetteIndexEntry{editrules-vignette}
2 | \documentclass[11pt, fleqn, a4paper]{article}
3 |
4 | \title{The editrules Vignette\\
5 | {\small Package version \Sexpr{packageVersion("editrules")}}
6 | }
7 | \date{\today}
8 |
9 | \author{Edwin de Jonge and Mark van der Loo}
10 |
11 | \begin{document}
12 | \maketitle
13 | At the moment, this is only a stub of a vignette. Please refer to the
14 | working papers included in the package's {\tt inst/doc} directory for
15 | theory and examples.
16 |
17 |
18 | \end{document}
19 |
20 |
--------------------------------------------------------------------------------
/pkg/inst/script/edits/myedits.txt:
--------------------------------------------------------------------------------
1 |
2 | # define category domains
3 | BOOL <- c(TRUE,FALSE)
4 | OPTIONS <- letters[1:4]
5 |
6 | # numerical edits
7 | x + y == z
8 | 2*u + 0.5*v == 3*w
9 | w >= 0
10 | if ( x > 0 ) y > 0
11 | if ( x > y ) z < 10
12 | x >= 0
13 | y >= 0
14 | z >= 0
15 |
16 | # categorical edits
17 | A %in% OPTIONS
18 | B %in% OPTIONS
19 | C %in% BOOL
20 | D %in% letters[5:8]
21 | if ( A %in% c('a','b') ) y > 0
22 | if ( A == 'c' ) B %in% letters[1:3]
23 | if ( !C == TRUE) D %in% c('e','f')
24 |
--------------------------------------------------------------------------------
/pkg/R/zzz.R:
--------------------------------------------------------------------------------
1 |
2 | .onAttach <- function(libname,pkgname){
3 | # msg <- ""
4 | # nfc <- "Use suppressPackageStartupMessages(library(editrules)) to suppress this message on loading editrules."
5 | # packageStartupMessage(msg)
6 | # packageStartupMessage(nfc)
7 |
8 | # default lpSolveAPI options
9 | options(er.lpcontrol =
10 | list(
11 | presolve = "rows" # move univariate constraints into bounds
12 | , epsint = 1e-15
13 | # , epssel = 1e-15
14 | # , epsb = 1e-15
15 | # , epsd = 1e-15
16 | , epspivot = 1e-15
17 | )
18 | )
19 | }
20 |
21 |
22 |
--------------------------------------------------------------------------------
/test/1test.R:
--------------------------------------------------------------------------------
1 | # load sources
2 | src <- sapply(list.files("../pkg/R", full.names=TRUE), source)
3 |
4 | # load demos
5 | examples <- sapply(list.files("../examples", full.names=TRUE), source)
6 |
7 | x <- editmatrix(c( "x ==2"
8 | , "x - 2 > y + 1"
9 | , "x>1"
10 | , "y>-2"
11 | ,"-2>x"
12 | ,"-10 < -2"
13 | )
14 | )
15 |
16 |
17 | x
18 | # dat <- data.frame(x=1:2, y=3:2)
19 |
20 | # editrules(x)
21 | # checkRows(x,dat)
22 | # checkRows(as.character(x),dat)
23 | # checkRows(editrules(x),dat)
24 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 |
2 | # travis config
3 |
4 | dist: trusty
5 | language: r
6 | sudo: required
7 |
8 | addons:
9 | apt:
10 | packages:
11 | - libxml2-dev
12 |
13 | r:
14 | - release
15 |
16 | before_install:
17 | - R -e "install.packages(c('igraph','devtools','roxygen2','testthat','knitr','lpSolveAPI'))"
18 | - R -e "devtools::document('./pkg')"
19 | - cd ./pkg
20 |
21 | r_packages:
22 | - covr
23 | - rmarkdown
24 |
25 |
26 | after_success:
27 | - Rscript -e 'library(covr);coveralls()'
28 |
29 | notifications:
30 | email:
31 | on_success: change
32 | on_failure: change
33 |
34 |
35 |
--------------------------------------------------------------------------------
/pkg/R/is.R:
--------------------------------------------------------------------------------
1 | #' Check object class
2 | #' @name is.editrules
3 | #' @aliases is.editset is.editmatrix is.editarray
4 | #' @param x object to be checked
5 | #' @return \code{logical}
6 | #'
7 | {}
8 |
9 | #' @export
10 | #' @rdname is.editrules
11 | is.editset <- function(x) inherits(x,"editset")
12 |
13 | #' @export
14 | #' @rdname is.editrules
15 | is.editmatrix <- function(x) return(inherits(x, "editmatrix"))
16 |
17 | is.cateditmatrix <- function(x){
18 | return(inherits(x, "cateditmatrix"))
19 | }
20 |
21 | #' @export
22 | #' @rdname is.editrules
23 | is.editarray <- function(x) inherits(x,"editarray")
24 |
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testc.R:
--------------------------------------------------------------------------------
1 | context("Concatenating")
2 |
3 | test_that("concatenate editmatrix",{
4 |
5 | E1 <- editmatrix("x1 + x2 > 0")
6 | E2 <- editmatrix("x2 + x3 < 10")
7 |
8 | E <- editmatrix(c("x1 + x2 > 0", "x2 + x3 < 10"))
9 | #print(c.editmatrix(E1,E2))
10 | #print(E)
11 | expect_equivalent(E, c(E1,E2))
12 | e <- editmatrix(expression())
13 | expect_equivalent(nedits(c(e,e)),0)
14 | expect_equivalent(nedits(c(e,NULL)),0)
15 | })
16 |
17 | test_that("concatenate editmatrix",{
18 | E <- editmatrix(c("x1 + x2 > 0", "x2 + x3 < 10"))
19 | expect_equivalent(E, c(E))
20 | })
21 |
22 |
23 |
--------------------------------------------------------------------------------
/examples/blocks.R:
--------------------------------------------------------------------------------
1 | # three seperate blocks
2 | E <- editmatrix( expression(
3 | x1 + x2 == x3,
4 | x3 + x4 == x5,
5 | x5 + x6 == x7,
6 | y1 + y2 == y3,
7 | z1 + z2 == z3
8 | ))
9 | blocks(E)
10 |
11 | # four seperate blocks
12 | E <- editmatrix(expression(
13 | x1 + x2 == x3,
14 | x3 + x4 == x5,
15 | x8 + x6 == x7,
16 | y1 + y2 == y3,
17 | z1 + z2 == z3
18 | ))
19 | blocks(E)
20 |
21 | # two categorical blocks
22 | E <- editarray(expression(
23 | x %in% c('a','b','c'),
24 | y %in% c('d','e'),
25 | z %in% c('f','g'),
26 | u %in% c('w','t'),
27 | if ( x == 'a') y != 'd',
28 | if ( z == 'f') u != 'w'
29 | ))
30 | blocks(E)
31 |
32 |
33 |
--------------------------------------------------------------------------------
/examples/backtracker.R:
--------------------------------------------------------------------------------
1 |
2 | bt <- backtracker( isSolution= {
3 | if (y == 0) return(TRUE)
4 | if (x == 0) return(FALSE)
5 | }
6 | , choiceLeft = { x <- x - 1; y <- y}
7 | , choiceRight = { y <- y - 1; x <- x}
8 | # starting values for x and y
9 | , x=2
10 | , y=1
11 | )
12 |
13 | bt$searchNext(VERBOSE=TRUE)
14 | bt$searchNext(VERBOSE=TRUE)
15 |
16 | # next search will return NULL because there is no more solution
17 | bt$searchNext()
18 |
19 | bt$reset()
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/examples/editset.R:
--------------------------------------------------------------------------------
1 |
2 | # edits can be read from a vector of expressions
3 | E <- editset(expression(
4 | if ( x > 0 ) y > 0,
5 | x + y == z,
6 | A %in% letters[1:2],
7 | B %in% letters[2:3],
8 | if ( A == 'a') B == 'b',
9 | if ( A == 'b') x >= 0,
10 | u + v == w,
11 | if ( u >= 0 ) w >= 0
12 | ))
13 | E
14 | summary(E)
15 | as.data.frame(E)
16 | getVars(E)
17 | getVars(E,type='cat')
18 | getVars(E,type='num')
19 |
20 |
21 | ## see also editfile
22 | E <- editfile(system.file('script/edits/mixedits.R',package='editrules'))
23 | E
24 | summary(E)
25 | as.data.frame(E)
26 | getVars(E)
27 | getVars(E,type='cat')
28 | getVars(E,type='num')
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testEditRow.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 |
3 | context("Linear editrow derivarions")
4 |
5 | test_that("Various rows work",{
6 | edt <- parse(text=c("x==y", "x+w ==y"))
7 | e <- parseNum(edt[[1]])
8 | #print(e)
9 | })
10 |
11 | test_that("Parsing a constant works",{
12 | edt <- parse(text=c("x < 2"))
13 | e <- parseNum(edt[[1]])
14 | #print(e)
15 | })
16 |
17 | test_that("Parsing a inequality works",{
18 | edt <- parse(text=c("x > 2"))
19 | e <- parseNum(edt[[1]])
20 | #print(e)
21 | })
22 |
23 | test_that("Parsing a negative coefficient works",{
24 | edt <- parse(text=c("x == -2"))
25 | #e <- makeEditRow(edt[[1]])
26 | #print(e)
27 | })
28 |
--------------------------------------------------------------------------------
/examples/checkRows.R:
--------------------------------------------------------------------------------
1 | # Using character vector to define contraints
2 | E <- editmatrix(c( "x+3*y==2*z",
3 | "x==z"))
4 | print(E)
5 |
6 | dat <- data.frame( x = c(0,2,1)
7 | , y = c(0,0,1)
8 | , z = c(0,1,1)
9 | )
10 | # valid rows? editmatrix method
11 | checkRows(E, dat)
12 |
13 |
14 | # example using the character method.
15 | data(women)
16 | e <- c("height<=71", "weight/height>=2.1")
17 | valid <- checkRows(e,women)
18 | women[valid,]
19 |
20 | # same example, using data.frame method
21 | ef <- data.frame(
22 | name=c("rule 1", "rule 2"),
23 | edit=e,
24 | description=c("descr1","descr2"))
25 | checkRows(ef,women)
26 |
--------------------------------------------------------------------------------
/pkg/R/str.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | #' @method str editmatrix
4 | #' @rdname editmatrix
5 | #' @export
6 | str.editmatrix <- function(object,...){
7 | vars <- paste(getVars(object),collapse=", ")
8 | if (nchar(vars) > 20 ) vars <- paste(strtrim(vars,16),"...")
9 | cat(paste("editmatrix with", nrow(object), "edits containing variables",vars,"\n"))
10 | }
11 |
12 |
13 | #' @method str editarray
14 | #' @export
15 | str.editarray <- function(object,...){
16 | vars <- paste(getVars(object),collapse=", ")
17 | if (nchar(vars) > 20 ) vars <- paste(strtrim(vars,16),"...")
18 | cat(paste("editarray with", nrow(object), "edits containing variables",vars,"\n"))
19 | }
20 |
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/papers/tex/errorLocation.tex:
--------------------------------------------------------------------------------
1 | \begin{table}
2 | \caption{Slots in the {\sf errorLocation} object}
3 | \label{tblErrorLocation}
4 | \begin{tabular}{lp{0.8\textwidth}}
5 | \hline
6 | Slot & description. \\
7 | \hline
8 | {\sf \$adapt} & boolean array, stating which variables must be adapted for each record.\\
9 | {\sf \$status}& A {\sf data.frame}, giving solution weights, number of equivalent solutions, timings and whether the maximum search time was exceeded.\\
10 | {\sf \$user} & Name of user running R during the error localization\\
11 | {\sf \$timestamp} & {\sf date()} at the end of the run.\\
12 | {\sf \$call} & The call to {\sf localizeErrors}\\
13 | \hline
14 | \end{tabular}
15 | \end{table}
16 |
--------------------------------------------------------------------------------
/pkg/R/as.matrix.R:
--------------------------------------------------------------------------------
1 | #' @include editmatrix.R
2 | #' @include editarray.R
3 | #' @include editset.R
4 | {}
5 |
6 | #' convert to matrix
7 | #'
8 | #' @export
9 | #' @method as.matrix editmatrix
10 | #' @rdname editmatrix
11 | #'
12 | #' @return \code{as.matrix}: Augmented \code{matrix} of \code{editmatrix}. (See also \code{\link{getAb}}).
13 | as.matrix.editmatrix <- function(x, ...){
14 | array(x, dim=dim(x), dimnames=dimnames(x))
15 | }
16 |
17 |
18 | #' convert to matrix
19 | #' @export
20 | #' @method as.matrix editarray
21 | #' @rdname editarray
22 | #' @return \code{as.matrix}: The boolean matrix part of the \code{editarray}.
23 | as.matrix.editarray <- function(x,...){
24 | array(x,dim=dim(x),dimnames=dimnames(x))
25 | }
26 |
27 |
--------------------------------------------------------------------------------
/pkg/R/removeRedundant.R:
--------------------------------------------------------------------------------
1 | #
2 | removeRedundant <- function(E){
3 |
4 | m <- as.mip(E)
5 | A <- -getA(m$E)
6 | b <- getb(m$E)
7 | keep <- rep(TRUE, nrow(A))
8 | names(keep) <- rownames(A)
9 |
10 | sapply(seq_len(nrow(A)),function(r){
11 | m1 <- m
12 | #keep[r] <<- FALSE
13 | m1$E <- m1$E[keep,]
14 |
15 | m1$objfn <- A[r,]
16 |
17 | lps <- as.lp.mip(m1)
18 | statuscode <- solve(lps)
19 |
20 | o <- -1*get.objective(lps)
21 |
22 | keep[r] <<- (b[r] <= o)
23 | c(o=o, t=b[r])
24 | })
25 | structure(E[names(keep)[keep],], removed=E[names(keep)[!keep],])
26 | }
27 |
28 | # E <- editmatrix(c(A="x>2", B="y > x", C="y>1" , D="x>1"))
29 | # Er <- removeRedundant(E)
30 | # Er
31 |
--------------------------------------------------------------------------------
/pkg/R/getUpperBounds.R:
--------------------------------------------------------------------------------
1 | #' Get upperbounds of edits, given the boundaries of all variables
2 | #' @param E \code{editmatrix}
3 | #' @param xlim \code{matrix} with columns lower and upper, and rows are variables (in same order as E)
4 | #' @return matrix with upperbounds per edit and a possible value
5 | #' @keywords internal
6 | getUpperBounds <- function(E, xlim){
7 | #print(xlim)
8 | A_p <- A_m <- A <- getA(E)
9 | A_p[A < 0] <- 0
10 | A_m[A > 0] <- 0
11 | ub <- A_m %*% xlim[,1] + A_p %*% xlim[,2]
12 | lb <- A_m %*% xlim[,2] + A_p %*% xlim[,1]
13 |
14 | b <- getb(E)
15 | b[is.na(b)] <- lb[is.na(b)]
16 |
17 | ub <- cbind(ub=ub, b=b, dummy=b-ub)
18 | #colnames(ub) <- c("lim", "dummy")
19 | rownames(ub) <- rownames(E)
20 | ub
21 | }
22 |
23 |
--------------------------------------------------------------------------------
/examples/editmatrix.R:
--------------------------------------------------------------------------------
1 | # Using a character vector to define contraints
2 | E <- editmatrix(c("x+3*y==2*z", "x==z"))
3 | print(E)
4 |
5 | # Using a expression vector to define contraints
6 | E <- editmatrix(expression(x+3*y==2*z, x==z))
7 | print(E)
8 |
9 | # an editmatrix also has a summary method:
10 | summary(E)
11 |
12 | # select rows from an editmatrix:
13 | E <- editmatrix(c("x+3*y==2*z", "x >= z"))
14 | E[getOps(E) == "=="]
15 |
16 |
17 | #Using data.frame to define constraints
18 | E.df <- data.frame(
19 | name =c("A","B","C"),
20 | edit = c("x == y",
21 | "z + w == y + x",
22 | "z == y + 2*w"),
23 | description = c(
24 | "these variables should be equal","","")
25 |
26 | )
27 | print(E.df)
28 |
29 | E <- editmatrix(E.df)
30 | print(E)
31 |
--------------------------------------------------------------------------------
/examples/editmatrixAttr.R:
--------------------------------------------------------------------------------
1 |
2 | E <- editmatrix(c( "x+3*y == 2*z"
3 | , "x > 2")
4 | )
5 | print(E)
6 |
7 | # get editrules, useful for storing and maintaining the rules external from your script
8 | as.data.frame(E)
9 |
10 | # get coeficient matrix of inequalities
11 | getA(E)
12 |
13 | # get augmented matrix of linear edit set
14 | getAb(E)
15 |
16 | # get constants of inequalities (i.e. c(0, 2))
17 | getb(E)
18 |
19 | # get operators of inequalities (i.e. c("==",">"))
20 | getOps(E)
21 |
22 | # get variables of inequalities (i.e. c("x","y","z"))
23 | getVars(E)
24 |
25 | # isNormalized
26 | isNormalized(E)
27 |
28 | #normalized E
29 | E <- normalize(E)
30 | E
31 |
32 | # is het now normalized?
33 | isNormalized(E)
34 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testIsObviouslyInfeasible.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 |
3 | context("Obvious infeasibility")
4 |
5 | test_that("Obvious infeasibility is detected",{
6 | expect_true(isObviouslyInfeasible(editmatrix("0*x == 1")))
7 | expect_true(isObviouslyInfeasible(editmatrix("0*x < -1")))
8 | expect_true(isObviouslyInfeasible(editmatrix("1e-12*x <= -1")))
9 | expect_true( isObviouslyInfeasible(editmatrix("0*x < 0")))
10 | expect_true( isObviouslyInfeasible(editmatrix("0*x < 1e-12")))
11 | expect_true( isObviouslyInfeasible(editmatrix("1e-12*x < 1e-12")))
12 | expect_false(isObviouslyInfeasible(editmatrix("0*x <= 0")))
13 | expect_false(isObviouslyInfeasible(editmatrix("x <= 0")))
14 | expect_false(isObviouslyInfeasible(editmatrix("0*x <=1e-12")))
15 | })
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/pkg/R/duplicated.R:
--------------------------------------------------------------------------------
1 | #' Check for duplicate edit rules
2 | #'
3 | #' @method duplicated editarray
4 | #' @param x a \code{\link{editarray}}
5 | #' @param ... other parameters to be passed to or from other methods.
6 | #' @export
7 | #' @keywords internal
8 | duplicated.editarray <- function(x, ...) duplicated(getArr(x))
9 |
10 |
11 |
12 | #' Check for duplicate edit rules
13 | #'
14 | #' @param x an \code{\link{editmatrix}}
15 | #' @param ... options to be passed to other methods
16 | #' @return logical vector
17 | #' @method duplicated editmatrix
18 | #' @export
19 | #' @keywords internal
20 | duplicated.editmatrix <- function(x,...){
21 | ops2num <- c('<'=1,'<='=2,'=='=3,'>='=4,'>'=5)
22 | duplicated(
23 | cbind(
24 | getAb(x),
25 | ops2num[getOps(x)]
26 | )
27 | )
28 | }
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testContains.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | context('contains')
4 | test_that("contains works for editset",{
5 | E <- editset(expression(
6 | if ( x > 0 ) y > 0,
7 | x >= 0,
8 | x+y == z
9 | ))
10 | expect_equivalent(
11 | contains(E,'z'),
12 | matrix(c(FALSE,TRUE,FALSE),nrow=3,ncol=1)
13 | )
14 | # test generated from bug report of Jeroen Pannekoek & MvdL
15 | E <- editset(expression(
16 | 0 < v100 + v37 + v38 + v39 + v40 + v41 + v42
17 | ,0 <= v40
18 | ,if( 0 < v40 ) v50 >= 1
19 | ,if( 0 < v40 ) v51 >= 1
20 | ,if( 0 < v50 ) v40 > 0
21 | ,if( v40 <= 0 ) 0 >= v51
22 | ,if( v40 <= 0 ) 0 >= v132
23 | ,if( 0 < v40 ) v132 >= 1
24 | ,if( v115 < 1 ) 0 >= v40
25 | ))
26 | expect_equivalent(
27 | contains(E,'v40'),
28 | matrix(rep(TRUE,9),nrow=9,ncol=1)
29 | )
30 | })
31 |
32 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testechelon.R:
--------------------------------------------------------------------------------
1 |
2 | context("Echelon")
3 |
4 |
5 | test_that("Matrix reduces to echelon form",{
6 | expect_equal(
7 | round(echelon(
8 | matrix(c(
9 | 1,3,1,4,
10 | 2,7,3,-9,
11 | 1,5,3,1,
12 | 1,2,0,8), byrow=TRUE, nrow=4
13 | )
14 | )),
15 | matrix(c(
16 | 1,0,-2,0,
17 | 0,1,1,0,
18 | 0,0,0,1,
19 | 0,0,0,0),byrow=TRUE,nrow=4
20 | )
21 | )
22 | expect_equal(
23 | round(echelon(
24 | matrix(c(
25 | 2,1,-1,8,
26 | -3,-1,2,-11,
27 | -2,1,2,-3), byrow=TRUE, nrow=3
28 | )
29 | )),
30 | matrix(c(
31 | 1,0,0,2,
32 | 0,1,0,3,
33 | 0,0,1,-1),
34 | byrow=TRUE,nrow=3
35 | )
36 | )
37 | })
38 |
39 |
40 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testIsObviouslyRedundant.R:
--------------------------------------------------------------------------------
1 |
2 | require(testthat)
3 | context("Obvious redundancy")
4 |
5 | test_that("Obviously redundant rows are detected",{
6 | expect_true(isObviouslyRedundant(editmatrix("0*x == 1e-12")))
7 | expect_true(isObviouslyRedundant(editmatrix("0*x <= 0")))
8 | expect_true(isObviouslyRedundant(editmatrix("0*x < 1")))
9 | expect_false(isObviouslyRedundant(editmatrix("0*x < 0")))
10 | expect_true(isObviouslyRedundant(editmatrix("0*x <= 1")))
11 | expect_true(isObviouslyRedundant(editmatrix("1e-12*x < 1")))
12 |
13 | expect_equal(isObviouslyRedundant(editmatrix(c("0*x <= 0"
14 | , "y < 0"
15 | )
16 | )
17 | )
18 | , c(TRUE,FALSE)
19 | )
20 | })
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/examples/violatedEdits.R:
--------------------------------------------------------------------------------
1 | # Using character vector to define contraints
2 | E <- editmatrix(c( "x+3*y==2*z"
3 | , "x==z"
4 | )
5 | )
6 |
7 | dat <- data.frame( x = c(0,2,1)
8 | , y = c(0,0,1)
9 | , z = c(0,1,1)
10 | )
11 | print(dat)
12 |
13 | ve <- violatedEdits(E,dat)
14 |
15 | print(ve)
16 | summary(ve, E)
17 | plot(ve)
18 |
19 | # An example with categorical data:
20 |
21 | E <- editarray(expression(
22 | gender %in% c('male','female'),
23 | pregnant %in% c(TRUE, FALSE),
24 | if( gender == 'male' ) !pregnant
25 | )
26 | )
27 | print(E)
28 |
29 | dat <- data.frame(
30 | gender=c('male','male','female','cylon'),
31 | pregnant=c(TRUE,FALSE,TRUE,TRUE)
32 | )
33 |
34 | print(dat)
35 | # Standard, the datamodel is checked as well,
36 | violatedEdits(E,dat)
37 |
38 | # but we may turn this of
39 | violatedEdits(E,dat,datamodel=FALSE)
40 |
41 |
42 |
--------------------------------------------------------------------------------
/examples/substValue.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | E <- editmatrix(expression(
4 | x + y == z,
5 | 2*y < 10,
6 | 3*x + 1.5*u < 7,
7 | z >= 0
8 | )
9 | )
10 |
11 | # single value
12 | substValue(E,'z',10)
13 | # multiple values
14 | substValue(E,c('x','y'),c(1,3))
15 | # remove substituted variable from edits
16 | substValue(E,'z',10,reduce=TRUE)
17 | # do not remove redundant row:
18 | substValue(E,'z',10,removeredundant=FALSE)
19 |
20 |
21 | # example with an editset
22 | E <- editset(expression(
23 | x + y == z,
24 | x >= 0,
25 | y >= 0,
26 | A %in% c('a1','a2'),
27 | B %in% c('b1','b2'),
28 | if ( x > 0 ) y > 0,
29 | if ( y > 0 ) x > 0,
30 | if ( A == 'a' ) B == 'b',
31 | if ( A == 'b' ) y > 3
32 | )
33 | )
34 |
35 | # substitute pure numerical variable
36 | substValue(E,'z',10)
37 | # substitute pure categorical variable
38 | substValue(E,'A','a1')
39 | # substitute variable appearing in logical constraints
40 | substValue(E,'x',3)
41 |
42 |
43 |
44 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testIsFeasible.R:
--------------------------------------------------------------------------------
1 | require(testthat)
2 |
3 | context("Feasibility checks: numerical")
4 |
5 | test_that("isFeasible",{
6 | expect_false(isFeasible(editmatrix(c("x +y < 0","x+y>0")), warn=FALSE))
7 | })
8 |
9 | test_that("isFeasible, warning",{
10 | expect_warning(isFeasible(editmatrix(c("x +y < 0","x+y>0")), warn=TRUE))
11 | })
12 |
13 | test_that("isFeasible with 0==1",{
14 | expect_false(isFeasible(editmatrix("0==1"), warn=FALSE))
15 | })
16 |
17 |
18 | context("Feasibility checks: categorical")
19 | test_that('isFeasible with categorical data',{
20 | # counterintuitive example: edits are contradictory, but space of possible records is not empty.
21 | E <- editarray(c(
22 | "a %in% letters[1:3]",
23 | "b %in% letters[4:6]",
24 | "if ( a == 'a' ) b == 'd'",
25 | "if ( a == 'a' ) b != 'd'"
26 | ))
27 | expect_true(isFeasible(E))
28 | # obvious contradiction
29 | expect_false(isFeasible(
30 | editarray(c("b %in% c('x','y')","if (TRUE) b == 'x'","if(TRUE) b != 'x'"))
31 | ))
32 | })
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/papers/mySweave.sty:
--------------------------------------------------------------------------------
1 | \NeedsTeXFormat{LaTeX2e}
2 | \ProvidesPackage{Sweave}{}
3 |
4 | \RequirePackage{ifthen}
5 | \newboolean{Sweave@gin}
6 | \setboolean{Sweave@gin}{true}
7 | \newboolean{Sweave@ae}
8 | \setboolean{Sweave@ae}{true}
9 |
10 | \DeclareOption{nogin}{\setboolean{Sweave@gin}{false}}
11 | \DeclareOption{noae}{\setboolean{Sweave@ae}{false}}
12 | \ProcessOptions
13 |
14 | \RequirePackage{graphicx,fancyvrb}
15 | \IfFileExists{upquote.sty}{\RequirePackage{upquote}}{}
16 |
17 | \ifthenelse{\boolean{Sweave@gin}}{\setkeys{Gin}{width=0.8\textwidth}}{}%
18 | \ifthenelse{\boolean{Sweave@ae}}{%
19 | \RequirePackage[T1]{fontenc}
20 | \RequirePackage{ae}
21 | }{}%
22 |
23 | \DefineVerbatimEnvironment{Sinput}{Verbatim}{fontshape=sl,fontsize=\small}
24 | \DefineVerbatimEnvironment{Soutput}{Verbatim}{fontsize=\small}
25 | \DefineVerbatimEnvironment{Scode}{Verbatim}{fontshape=sl,fontsize=\small}
26 |
27 | \newenvironment{Schunk}{}{}
28 |
29 | \newcommand{\Sconcordance}[1]{%
30 | \ifx\pdfoutput\undefined%
31 | \csname newcount\endcsname\pdfoutput\fi%
32 | \ifcase\pdfoutput\special{#1}%
33 | \else\immediate\pdfobj{#1}\fi}
34 |
--------------------------------------------------------------------------------
/develop/generateEM.R:
--------------------------------------------------------------------------------
1 | # generate large editmatrix useful for performance testing
2 |
3 | require(editrules)
4 |
5 | genEditMatrix <- function(nedits, nvars, nvr=min(4, nvars)){
6 | nedits <- nedits
7 | nvars <- nvars
8 | gen <- function(){
9 | m <- matrix(0, nrow=nedits, ncol=nvars)
10 | for (i in 1:nedits){
11 | m[i,sample(nvars, nvr)] <- sample(c(-2,-1,1,2), size=nvr, replace=TRUE)
12 | }
13 | ops <- sample(c("==","<="), size=nedits, replace=TRUE)
14 | as.editmatrix(A=m, ops=ops)
15 | }
16 |
17 | E <- gen()
18 | while(!isFeasible(E)){
19 | E <- gen()
20 | nedits <- nedits-1
21 | }
22 | #decrease number of constraint until the matrix becomes feasible
23 | E
24 | }
25 |
26 | nvars <- 50
27 | E <- genEditMatrix(10,nvars)
28 | vars <- getVars(E)
29 |
30 | x <- numeric(nvars) # since b==0, this is a valid solution
31 | names(x) <- vars
32 |
33 | x[sample(nvars,1)] <- 1
34 |
35 | el <- errorLocalizer(E, x)
36 | system.time({
37 | sol1 <- el$searchBest()
38 | })
39 | which(sol1$adapt)
40 |
41 | system.time({
42 | sol2 <- localizeError_lp(E,x)
43 | })
44 | which(sol2$adapt)
45 |
46 | #results should be analyzed!
--------------------------------------------------------------------------------
/pkg/R/editAttr.R:
--------------------------------------------------------------------------------
1 | #' Number of edits
2 | #' Count the number of edits in a collection of edits.
3 | #' @param E \code{\link{editset}}, \code{\link{editarray}} or \code{\link{editmatrix}}
4 | #' @export
5 | nedits <- function(E){
6 | if ( is.vector(E) ) return(length(E))
7 | if (any(class(E) %in% c('editmatrix','editarray'))){
8 | n <- nrow(E)
9 | } else if ( inherits(E,'editset') ) {
10 | n <- nrow(E$num) + nrow(E$mixcat)
11 | } else {
12 | stop('Argument must be character, editset, editarray, editmatrix')
13 | }
14 | n
15 | }
16 |
17 | #' Names of edits
18 | #'
19 | #' Retrieve edit names from editset, -array or -matrix
20 | #' @param E \code{\link{editset}}, \code{\link{editarray}} or \code{\link{editmatrix}}
21 | #' @export
22 | editnames <- function(E){
23 |
24 | if (any(class(E) %in% c('editmatrix','editarray'))){
25 | n <- rownames(E)
26 | } else if ( inherits(E,'editset') ) {
27 | n <- c(rownames(E$num),rownames(E$mixcat))
28 | } else {
29 | stop('Argument must be editset, editarray or editmatrix')
30 | }
31 | n
32 |
33 | }
34 |
35 |
36 |
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/test/elmar_2.R:
--------------------------------------------------------------------------------
1 | library(editrules)
2 |
3 | #
4 | E_set <- editset(c(
5 | "local >= 1",
6 | "local <= 10000",
7 | "nace %in% c('461', '462')",
8 | "turnover < -1", # contradictionary range check
9 | "turnover > 1e6", # contradictionary range check
10 | "pers >= 1",
11 | "pers <= 99999",
12 | "pers >= local",
13 | "if (nace == '461') pers <= 3 * local",
14 | "if (nace == '461') turnover <= 10000 * pers",
15 | "turnover <= 1e6 * pers"))
16 | is.editset(E_set)
17 | E_set
18 | summary(E_set)
19 | #
20 | isFeasible(E_set, warn = TRUE)
21 | #
22 | rawdata <- data.frame(
23 | id = c("1","2"),
24 | nace = c("461", "462"),
25 | local = c(1,2),
26 | pers = c(100001,100001),
27 | turnover = c(1e6 + 1, 1e6 + 1)
28 | )
29 | rawdata <- rawdata[,-1]
30 | rawdata
31 | #
32 | recwghts <- data.frame(
33 | id = c("1","2"),
34 | nace = c(1000,1000),
35 | local = c(90,90),
36 | pers = c(10,10),
37 | turnover = c(95,95)
38 | )
39 | recwghts <- as.matrix(recwghts[,-1])
40 | recwghts
41 | is.matrix(recwghts)
42 | #
43 | #
44 | localizeErrors(E_set,rawdata,weight=rep(1, ncol(rawdata)))
45 |
46 | #
47 | localizeErrors(E_set,rawdata,weight=recwghts, method = "mip")
48 | errorLocalizer.mip(E_set, rawdata[1,], weight=recwghts[1,])
49 | #
50 |
--------------------------------------------------------------------------------
/pkg/R/getH.R:
--------------------------------------------------------------------------------
1 | #' Returns the derivation history of an edit matrix or array
2 | #'
3 | #' Function \code{\link{eliminate}} tracks the history of edits in a logical array H.
4 | #' H has nrow(E) rows and the number of columns is the number of
5 | #' edits in the \code{\link{editmatrix}} as it was first defined. If
6 | #' H[i,j1], H[i,j2],...,H[i,jn] are \code{TRUE}, then E[i,] is some
7 | #' (positive, linear) combination of original edits E[j1,], E[j2,],...,E[jn,]
8 | #'
9 | #' Attributes H and h are used to detect redundant derived edits.
10 | #'
11 | #' @param E \code{\link{editmatrix}}
12 | #' @rdname geth
13 | #' @seealso \code{\link{editmatrix}}, \code{\link{eliminate}}
14 | #'
15 | #'
16 | #' @export
17 | getH <- function(E){
18 | if ( !class(E) %in% c('editmatrix','editarray') )
19 | stop("E has to be an editmatrix or editarray")
20 | attr(E,"H")
21 | }
22 |
23 | #' Returns the number of elimination steps performed on an edit matrix or array
24 | #'
25 | #' h records the number of variables eliminated from E by \code{\link{eliminate}}
26 | #'
27 | #' @rdname geth
28 | #' @seealso \code{\link{editmatrix}}, \code{\link{eliminate}}
29 | #' @export
30 | geth <- function(E){
31 | if ( !class(E) %in% c('editmatrix','editarray') )
32 | stop("E has to be an editmatrix or editarray")
33 | attr(E,"h")
34 | }
35 |
36 |
--------------------------------------------------------------------------------
/examples/editarray.R:
--------------------------------------------------------------------------------
1 |
2 | # Here is the prototypical categorical edit: men cannot be pregnant.
3 | E <- editarray(expression(
4 | gender %in% c('male','female'),
5 | pregnant %in% c('yes','no'),
6 | if( gender == 'male' ) pregnant == 'no'
7 | )
8 | )
9 | E
10 |
11 | # an editarray has a summary method:
12 | summary(E)
13 |
14 | # A yes/no variable may also be modeled as a logical:
15 | editarray(expression(
16 | gender %in% c('male','female'),
17 | pregnant %in% c(TRUE, FALSE),
18 | if( gender == 'male' ) pregnant == FALSE
19 | )
20 | )
21 |
22 | # or, shorter (and using a character vector as input):
23 | editarray(expression(
24 | gender %in% c('male','female'),
25 | pregnant %in% c(TRUE, FALSE),
26 | if( gender == 'male' ) !pregnant
27 | )
28 | )
29 |
30 | # the \%in\% statement may be used at will
31 | editarray(expression(
32 | gender %in% c('male','female'),
33 | pregnant %in% c(TRUE, FALSE),
34 | positionInHousehold %in% c('marriage partner', 'child', 'other'),
35 | maritalStatus %in% c('unmarried','married','widowed','divorced'),
36 | if( gender == 'male' ) !pregnant,
37 | if( maritalStatus %in% c(
38 | 'unmarried',
39 | 'widowed',
40 | 'divorced')
41 | ) !positionInHousehold %in% c('marriage partner','child')
42 | )
43 | )
44 |
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testFourierMotzkin.R:
--------------------------------------------------------------------------------
1 | require(testthat)
2 |
3 | context("FM elimination")
4 |
5 | # test: the first example of eliminate
6 | test_that("eliminate works fine",{
7 |
8 | P <- editmatrix(c(
9 | "4*x1 - 5*x2 - 3*x3 + z <= 0",
10 | "-x1 + x2 -x3 <= 2",
11 | "x1 + x2 + 2*x3 <= 3",
12 | "-x1 <= 0",
13 | "-x2 <= 0",
14 | "-x3 <= 0"))
15 | P1 <- eliminate(P, "x1", fancynames=TRUE)
16 | Ab <- matrix(c(
17 | 0, -0.25, -1.75, 0.25, 2,
18 | 0, -1.25, -0.75, 0.25, 0,
19 | 0, 2.00, 1.00, 0.00, 5,
20 | 0, 1.00, 2.00, 0.00, 3,
21 | 0, -1.00, 0.00, 0.00, 0,
22 | 0, 0.00, -1.00, 0.00, 0), byrow=TRUE,nrow=6)
23 | H <- matrix(c(
24 | TRUE, TRUE, FALSE, FALSE, FALSE, FALSE,
25 | TRUE, FALSE, FALSE, TRUE, FALSE, FALSE,
26 | FALSE, TRUE, TRUE, FALSE, FALSE, FALSE,
27 | FALSE, FALSE, TRUE, TRUE, FALSE, FALSE,
28 | FALSE, FALSE, FALSE, FALSE, TRUE, FALSE,
29 | FALSE, FALSE, FALSE, FALSE, FALSE, TRUE), byrow=TRUE,nrow=6)
30 | op <- c("<=", "<=", "<=", "<=", "<=", "<=")
31 | expect_true(all( Ab == getAb(P1) ))
32 | expect_true(all( H == getH(P1) ))
33 | expect_true(all( op == getOps(P1)))
34 | expect_true( geth(P1) == 1 )
35 | })
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testeditmatrixAttr.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 |
3 | context("Editmatrix attributes")
4 |
5 | test_that("editrules can derive the correct info from a matrix",{
6 | mat <- matrix( c( 1,-2, 0
7 | , 2, 0, 1
8 | )
9 | , nrow=2
10 | , byrow=TRUE
11 | # , dimnames=list(c("a", "b"), c("x","y", "z"))
12 | )
13 | ei <- as.data.frame(as.editmatrix(mat))
14 | expect_equal(ei$edit, c("x1 == 2*x2", "2*x1 + x3 == 0"))
15 |
16 | mat <- matrix( c( 1,-2
17 | , 2, 0
18 | )
19 | , nrow=2
20 | , byrow=TRUE
21 | , dimnames=list(c("A", "B"), c("x","y"))
22 | )
23 | ei <- as.data.frame(as.editmatrix(mat))
24 | #expect_equal(ei$name, c("A","B"))
25 | expect_equal(ei$edit, c("x == 2*y", "2*x == 0"))
26 | })
27 |
28 | test_that("getb works",{
29 | cond <- c( "x + y > 2"
30 | , "y < 10"
31 | )
32 | E <- editmatrix(cond, FALSE)
33 | b <- getb(E)
34 |
35 | expect_equal(b, c(num1=2,num2=10))
36 | })
37 |
38 | test_that("getOps works",{
39 | cond <- c( "x + y > 2"
40 | , "y < 10"
41 | , "x + y == 2"
42 | , "y <= 10"
43 | , "y >= 10"
44 | )
45 | E <- editmatrix(cond, FALSE)
46 | ops <- getOps(E)
47 | expect_equivalent(ops, c(">","<","==","<=",">="))
48 | })
49 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testBlocks.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | context("Detecting blocks")
4 |
5 |
6 | test_that("editmatrix separates in blocks",{
7 | E <- editmatrix( c(
8 | "x1 + x2 == x3"
9 | , "x3 + x4 == x5"
10 | , "x5 + x6 == x7"
11 | , "y1 + y2 == y3"
12 | , "z1 + z2 == z3")
13 | )
14 | expect_equal(length(blocks(E)),3)
15 | expect_true(all(getAb(blocks(E)[[2]])==c(1,1,-1,0)))
16 | })
17 |
18 | test_that("editarray separates in blocks",{
19 | E <- editarray(c(
20 | "x %in% c('a','b','c')",
21 | "y %in% c('d','e')",
22 | "z %in% c('f','g')",
23 | "u %in% c('w','t')",
24 | "if ( x == 'a') y != 'd'",
25 | "if ( z == 'f') u != 'w'"))
26 | expect_equal(length(blocks(E)),2)
27 | expect_true(all(getArr(blocks(E)[[1]]) == c(TRUE,FALSE,FALSE,TRUE,FALSE)))
28 | })
29 |
30 | test_that("editset separates in blocks",{
31 | E <- editset(expression(
32 | if ( x > 0 ) y > 0,
33 | x + y >= z,
34 | A %in% letters[1:2],
35 | B %in% letters[2:3],
36 | if ( A == 'a') B == 'b',
37 | if ( A == 'b') x >= 0,
38 | u + v >= w,
39 | if ( u <= 0 ) w >= 0
40 | ))
41 | b <- blocks(E)
42 | expect_equal(length(b),2)
43 | expect_equal(sort(getVars(b[[1]])),c("A","B","x","y","z"))
44 | expect_equal(sort(getVars(b[[2]])),c("u","v","w"))
45 | })
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testCheck.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 |
3 | context("Edit checking")
4 |
5 | test_that("Checking a data set works",{
6 | edt <- editmatrix("2*x==y")
7 | dat <- data.frame(x=c(2,1), y=c(4,5))
8 | cr <- checkRows(edt, dat)
9 | expect_equal(cr, c(TRUE,FALSE))
10 | })
11 |
12 | test_that("Checking a vector works",{
13 | edt <- editmatrix(editrules = "2*x==y")
14 | dat <- c(x=2, y=1)
15 | expect_equal(as.logical(violatedEdits(edt, dat)), c(TRUE))
16 | })
17 |
18 | test_that("Showing data errors works",{
19 | edt <- editmatrix(editrules = "2*x==y\n5*x==y")
20 | dat <- data.frame(x=c(2,1), y=c(4,5))
21 | errors <- violatedEdits(edt, dat)
22 |
23 | # rule names equal?
24 | expect_equal(colnames(errors), rownames(edt))
25 |
26 | dimnames(errors) <- NULL
27 | expect_equal(errors[,,drop=FALSE], matrix( c( FALSE, TRUE
28 | , TRUE , FALSE
29 | )
30 | , nrow=2
31 | , byrow=TRUE
32 |
33 | )
34 | )
35 | })
36 |
37 |
38 | test_that("Error lists works",{
39 | edt <- editmatrix(editrules = "2*x==y\n5*x==y")
40 | dat <- data.frame(x=c(2,1), y=c(4,5))
41 | errors <- listViolatedEdits(edt, dat)
42 | #expect_equal(errors, list("1"=c(2), "2"=c(1)))
43 | #print(str(errors))
44 | #print(showErrors(dat, edt))
45 | })
46 |
--------------------------------------------------------------------------------
/papers/index.html:
--------------------------------------------------------------------------------
1 |
2 |
R: editrules vignettes
3 |
4 |
5 | Vignettes of package editrules
6 |
7 | editrules-linear.pdf :
8 | Vignette about editing data under linear constraints.
9 | editrules-categorical.pdf :
10 | Vignette about categorical data editing with editrules.
11 |
12 |
13 | Papers
14 |
15 | Manipulation of Linear edits and error localization with the editrules package :
16 |
17 | E. De Jonge and Mark van der Loo (2011). Discussion paper 201120, Statistics Netherlands The Hague/Heerlen.
18 | Technical paper about the package. Equal to the vignette at version 1.0-0.
19 | Manipulation of Categorical data edits and error localization with the editrules package :
20 |
21 | E. De Jonge and Mark van der Loo (2011). Discussion paper 201129, Statistics Netherlands The Hague/Heerlen.
22 | Technical paper about the package. Equal to the vignette at version 2.0-0.
23 |
24 |
25 | Other references
26 |
27 | yaRb
28 | A blog with some simple examples on the usage of editrules, amongst others.
29 | git
30 | The git repository of editrules at Google code.
31 |
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/develop/categorical/tst.R:
--------------------------------------------------------------------------------
1 | require(editrules)
2 |
3 | map <- "/media/Data/Projecten/editrules/develop/categorical"
4 | fil <- c("categorical.R", "editarray.R", "reduceCat.R")
5 | dmp <- sapply(file.path(map,fil),source)
6 |
7 | source("../../pkg/R/check.R")
8 |
9 | civilStatusLevels <- c("married","unmarried","widowed","divorced")
10 | hhLevels <- c("marriage partner", "child","other")
11 |
12 | x <- c(
13 | "civilStatus %in% civilStatusLevels",
14 | "positionInHousehold %in% hhLevels",
15 | "age %in% c('<16','16-60','>16')",
16 | "if ( age == '<16') civilStatus != 'married'",
17 | "if (civilStatus != 'married') positionInHousehold != 'marriage partner'"
18 | )
19 | (E <- editarray(x))
20 |
21 | datamodel(E)
22 |
23 | F <- eliminateFM.editarray(E,"civilStatus")
24 | contains(F,"civilStatus")
25 | t(substValue.editarray(E,"positionInHousehold","child"))
26 |
27 | r <- data.frame(age="<16",civilStatus="married",positionInHousehold="marriage partner")
28 |
29 | violatedEdits(E,r)
30 |
31 | u <- sapply(r,as.character)
32 | bt <- errorLocalizer.editarray(E,u)
33 | bt$searchNext(VERBOSE=FALSE)
34 | bt$searchNext(VERBOSE=FALSE)
35 | bt$searchNext(VERBOSE=FALSE)
36 |
37 |
38 | a <- editarray(
39 | c("gender %in% c('m','f')",
40 | "pregnant %in% c('y','n')",
41 | "if(gender == 'm') pregnant == 'n'"
42 | ))
43 | b <- neweditarray(
44 | E = array(c(FALSE,TRUE,FALSE,TRUE),dim=c(1,4)),
45 | ind = list(
46 | gender = as.integer(c('f'=1,'m'=2)),
47 | pregnant= as.integer(c('n'=3,'y'=4))),
48 | sep=":",
49 | names="e1",
50 | levels=c("gender:f","gender:m","pregnant:n","pregnant:y")
51 | )
52 |
53 |
54 |
55 |
56 |
--------------------------------------------------------------------------------
/pkg/R/isSubset.R:
--------------------------------------------------------------------------------
1 |
2 | #' Check which edits are dominated by other ones.
3 | #'
4 | #' An edit defines a subregion of the space of all possible value combinations
5 | #' of a record. Records in this region are interpreted as invalid. An edit rule
6 | #' which defines a region equal to or contained in the region defined by another
7 | #' edit is redundant. (In data editing literature, this is often referred to as
8 | #' a \emph{domination} relation.)
9 | #'
10 | #' @param E \code{\link{editarray}}
11 | #' @return \code{logical} vector indicating if an edit is a subset of at least one other edit.
12 | #' @export
13 | isSubset <- function(E){
14 | if ( !is.editarray(E) ) stop('argument is not an editarray')
15 | isSubset.boolmat(getArr(E))
16 | }
17 |
18 | isSubset.boolmat <- function(A){
19 | if (nrow(A)==0) return(logical(0))
20 | if (nrow(A)==1) return(FALSE)
21 |
22 | d <- duplicated(A)
23 | wd <- which(d)
24 | m <- nrow(A)
25 | if ( m == 0 ) return(logical(0))
26 | M <- (1:m)[!d]
27 | if ( length(M) == 1 ) return(d)
28 | m1 <- length(M)-1
29 | s <- logical(m)
30 | s[M] <- vapply(M,
31 | function(i){
32 | I <- c(i,wd)
33 | any(rowSums(A[-I,,drop=FALSE] - (A[-I,,drop=FALSE] | A[rep(i,m1),,drop=FALSE]) ) == 0)
34 | },
35 | FUN.VALUE=FALSE
36 | )
37 | s | d
38 | }
39 |
40 | # check if edits in A are subset of edits in B: (returns boolean vector)
41 | isSubsetWrt.boolmat <- function(A,B){
42 | m <- nrow(A)
43 | n <- nrow(B)
44 | if ( m == 0 ) return(logical(0))
45 | if ( n == 0 ) return(rep(FALSE,n))
46 |
47 | vapply(1:m,function(i){
48 | any(rowSums(abs({A[rep(i,n),,drop=FALSE] | B} - B)) == 0)
49 | },FUN.VALUE=FALSE)
50 | }
51 |
52 |
53 |
54 |
55 |
56 |
--------------------------------------------------------------------------------
/pkg/R/isFeasible.R:
--------------------------------------------------------------------------------
1 | #' Check consistency of set of edits
2 | #'
3 | #' When variables are \code{\link[=eliminate]{eliminated}} one by one
4 | #' from a set of edits, eventually either no edits are left or an
5 | #' \code{\link[=isObviouslyInfeasible]{obvious contradiction}} is encountered.
6 | #' In the case no records can obey all edits in the set which is therefore
7 | #' \code{inFeasible}.
8 | #'
9 | #'
10 | #' @note This function can potentially take a long time to complete, especially
11 | #' when many connected (conditional) edits are present. Consider using \code{\link{blocks}}
12 | #' to check feasibility of indendent blocks.
13 | #'
14 | #'
15 | #' @param E an \code{\link{editmatrix}}, \code{\link{editarray}} or \code{\link{editset}}
16 | #' @param warn logical: should a warning be emitted when system is infeasible?
17 | #' @return TRUE or FALSE
18 | #'
19 | #' @seealso \code{\link{isObviouslyInfeasible}}, \code{\link{isObviouslyRedundant}}
20 | #' @export
21 | isFeasible <- function(E, warn=FALSE){
22 | ## TODO: make it return the subset of edits causing the contradiction.
23 | vars <- getVars(E)
24 | vars2 <- vars
25 | feasible <- any(!isObviouslyInfeasible(E))
26 | while( isTRUE(feasible) && length(vars) > 0 ){
27 | E <- eliminate(E,vars[1])
28 | vars <- vars[-1]
29 | ## TODO: cleanup editlists that have infeasible parts, currently they are included
30 | ## for all eliminations.
31 | feasible <- any(!isObviouslyInfeasible(E))
32 | if ( !feasible && warn )
33 | warning(
34 | paste("system becomes obviously infeasible after eliminating",
35 | paste(vars2[!(vars2 %in% vars)],collapse=", "))
36 | )
37 | }
38 | return(feasible)
39 | }
40 |
41 |
42 |
43 |
44 |
45 |
46 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testeditarray.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | library(testthat)
4 |
5 | context("Editarray")
6 |
7 |
8 | test_that("2x2 categorical datamodel",{
9 | dm <- c(
10 | "g %in% c('m','f')",
11 | "p %in% c('y','n')")
12 | expect_equivalent(getArr(editarray(c(dm,"if( p == 'y' ) g != 'm'"))),array(c(F,T,F,T),dim=c(1,4)))
13 | expect_equivalent(getArr(editarray(c(dm,"if( p %in% c('y') ) g != 'm'"))),array(c(F,T,F,T),dim=c(1,4)))
14 | expect_equivalent(getArr(editarray(c(dm,"if( p %in% c('y') ) g == 'f'"))),array(c(F,T,F,T),dim=c(1,4)))
15 | })
16 |
17 |
18 | test_that("2x{TRUE,FALSE} datamodel",{
19 | dm <- c(
20 | "g %in% c('m','f')",
21 | "p %in% c(FALSE,TRUE)")
22 | expect_equivalent(getArr(editarray(c(dm,"if( p ) g != 'm'"))),array(c(F,T,F,T),dim=c(1,4)))
23 | expect_equivalent(getArr(editarray(c(dm,"if( g == 'm' ) !p"))),array(c(F,T,F,T),dim=c(1,4)))
24 | expect_equivalent(getArr(editarray(c(dm,"!p || g=='f'"))),array(c(F,T,F,T),dim=c(1,4)))
25 | })
26 |
27 | context("Editarray parsing")
28 | test_that("parse editarray to character and back",{
29 | edts <- c(
30 | "g %in% c('m','f')",
31 | "p %in% c(FALSE,TRUE)",
32 | "if (p) !g=='m'")
33 | expect_equivalent(editarray(edts), editarray(as.character(editarray(edts))))
34 | # cornercase found in version 2.5-1
35 | edts <- expression(
36 | A %in% letters[1:3],
37 | if ( A %in% c('a','b') ) FALSE
38 | )
39 | expect_equivalent(editarray(edts), editarray(as.character(editarray(edts))))
40 | })
41 |
42 | test_that("parse editarray to expression and back",{
43 | edts <- expression(
44 | g %in% c('m','f'),
45 | p %in% c(FALSE,TRUE),
46 | if (p) !g=='m'
47 | )
48 | expect_equivalent(editarray(edts), editarray(as.character(editarray(edts))))
49 | })
50 |
--------------------------------------------------------------------------------
/papers/fig/tree.tex:
--------------------------------------------------------------------------------
1 | \setlength{\unitlength}{1mm}
2 | \begin{picture}(118,60)
3 | \put(46,40){\small
4 | $\left\langle
5 | \begin{array}{l}
6 | y > x-1\\
7 | y > -x + 3\\
8 | y < x + 1\\
9 | y < -x + 5
10 | \end{array},(2,-1),0
11 | \right\rangle$
12 | }
13 | \put(20,20){\small
14 | $\left\langle
15 | \begin{array}{l}
16 | y > 1\\
17 | y < 3\\
18 | \end{array},(2,-1),0
19 | \right\rangle$
20 | }
21 | \put(80,20){\small
22 | $\left\langle
23 | \begin{array}{l}
24 | y > 1\\
25 | y < 3\\
26 | \end{array},(x,-1),1
27 | \right\rangle$
28 | }
29 | \put(0,00){\small
30 | $\left\langle
31 | \begin{array}{l}
32 | -1 > 1\\
33 | -1 < 3\\
34 | \end{array},(2,-1),0
35 | \right\rangle$
36 | }
37 | \put(40,00){\small
38 | $\left\langle
39 | \varnothing,(2,y),1
40 | \right\rangle$
41 | }
42 | \put(60,00){\small
43 | $\left\langle
44 | \begin{array}{l}
45 | -1 > 1\\
46 | -1 < 3\\
47 | \end{array},(x,-1),1
48 | \right\rangle$
49 | }
50 | \put(100,00){\small
51 | $\left\langle
52 | \varnothing,(x,y),2
53 | \right\rangle$
54 | }
55 | \thicklines
56 | \put(64,32){\vector(-4,-1){30}}
57 | \put(64,32){\vector( 4,-1){30}}
58 | \put(34,17){\vector(-1,-1){12}}
59 | \put(34,17){\vector( 1,-1){12}}
60 | \put(94,17){\vector(-1,-1){12}}
61 | \put(94,17){\vector( 1,-1){12}}
62 | \put(27,28){\small subst. $x=2$}
63 | \put(87,28){\small elim. $x$}
64 | \put(6,10){\small subst. $y=-1$}
65 | \put(42,10){\small elim. $y$}
66 | \put(65,10){\small subst. $y=-1$}
67 | \put(102,10){\small elim. $y$}
68 | % frame
69 | %\thinlines
70 | %\put(0,0){\line(0,1){60}}
71 | %\put(0,0){\line(1,0){117}}
72 | %\put(0,60){\line(1,0){117}}
73 | %\put(117,0){\line(0,1){60}}
74 | \end{picture}
75 |
76 |
77 |
78 |
--------------------------------------------------------------------------------
/pkg/R/as.igraph.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | #' @method as.igraph editmatrix
4 | #' @param x An object of class \code{\link{editmatrix}}, \code{\link{editarray}} or \code{\link{editset}}
5 | #' @param weighted see \code{\link[igraph]{graph.adjacency}}
6 | #'
7 | #' @export
8 | #' @rdname adjacency
9 | as.igraph.editmatrix <- function(x, nodetype=c("all", "rules","vars"), rules=editnames(x), vars=getVars(x), weighted=TRUE, ...){
10 | nodetype <- match.arg(nodetype)
11 | a <- adjacency(E=x, nodetype=nodetype, rules=rules, vars=vars, ...)
12 | g <- igraph::graph.adjacency(
13 | a,
14 | weighted=weighted,
15 | mode = 'undirected'
16 | )
17 | # $type is handy for bipartite graph function in igraph...
18 | igraph::V(g)$type <- igraph::V(g)$vars <- attr(a, "vars")
19 | g
20 | }
21 |
22 | #' @method as.igraph editarray
23 | #' @export
24 | #' @rdname adjacency
25 | as.igraph.editarray <- function(x, nodetype=c("all", "rules","vars"), rules=editnames(x), vars=getVars(x),weighted=TRUE, ...){
26 | nodetype <- match.arg(nodetype)
27 | a <- adjacency(E=x, nodetype=nodetype, rules=rules, vars=vars, ...)
28 | g <- igraph::graph.adjacency(
29 | a,
30 | weighted=weighted,
31 | mode = 'undirected'
32 | )
33 | igraph::V(g)$type <- igraph::V(g)$vars <- attr(a, "vars")
34 | g
35 | }
36 |
37 |
38 | #' @method as.igraph editset
39 | #' @export
40 | #' @rdname adjacency
41 | as.igraph.editset <- function(x, nodetype=c("all", "rules","vars"), rules=editnames(x), vars=getVars(x),weighted=TRUE, ...){
42 | nodetype <- match.arg(nodetype)
43 | a <- adjacency(E=x, nodetype=nodetype, rules=rules, vars=vars, ...)
44 | g <- igraph::graph.adjacency(
45 | a,
46 | weighted=weighted,
47 | mode = 'undirected'
48 | )
49 | igraph::V(g)$type <- igraph::V(g)$vars <- attr(a, "vars")
50 | g
51 | }
52 |
--------------------------------------------------------------------------------
/pkg/R/editarrayAttr.R:
--------------------------------------------------------------------------------
1 |
2 | #' get index list from editmatrix
3 | #'
4 | #' The 'ind' attribute is a named list of named integer vectors. The list names are the
5 | #' variable names. The vectors in the list index the columns in the editarray associated with the
6 | #' variables. The names of the vectors are the names of the columns of the editarray.
7 | #'
8 | #' @param E \code{\link{editarray}}
9 | #' @return named list, indexing category levels in the editarray (columns)
10 | #' @keywords internal
11 | getInd <- function(E) attr(E,"ind")
12 |
13 |
14 | #' get seprator used to seperate variables from levels in editarray
15 | #' @param E \code{\link{editarray}}
16 | #' @return character
17 | #' @keywords internal
18 | getSep <- function(E) attr(E,"sep")
19 |
20 | #' Get named logical array from editarray
21 | #' @param E \code{\link{editarray}}
22 | #' @return logical array
23 | #' @keywords internal
24 | getArr <- function(E) unclass(E)[,,drop=FALSE]
25 |
26 | #' retrieve level names from editarray
27 | #' @param E \code{\link{editarray}}
28 | #' @return character vector
29 | #' @keywords internal
30 | getlevels <- function(E) colnames(E)
31 |
32 | #' retrieve edit names from editarray
33 | #' @param E \code{\link{editarray}}
34 | #' @return character vector
35 | #' @keywords internal
36 | getnames <- function(E) rownames(E)
37 |
38 | #' Summarize data model of an editarray in a data.frame
39 | #'
40 | #' @param E \code{\link{editarray}}
41 | #' @return \code{data.frame} describing the categorical variables and their levels.
42 | #' @example ../examples/datamodel.R
43 | #' @seealso \code{\link{checkDatamodel}}
44 | #' @export
45 | datamodel <- function(E){
46 | if (ncol(E) == 0 ) return(data.frame(variable=character(0),value=character(0)))
47 | st <- stack(getInd(E))
48 | vals <- do.call(c,lapply(getInd(E),names))
49 | data.frame(variable=as.character(st[,2]),value=vals,row.names=1:nrow(st))
50 | }
51 |
52 |
--------------------------------------------------------------------------------
/test/smoketest.R:
--------------------------------------------------------------------------------
1 | library(editrules)
2 |
3 | # n : number of variables
4 | # m : number of blocks (< (n-3)/2)
5 | accountBalance <- function(n, m, all.positive=TRUE){
6 | nblock <- (n-m-1) %/% m
7 | varnames <- paste("x",1:n,sep="")
8 | A <- matrix(0, nrow=(m+1), ncol=n)
9 | for ( i in 1:m ){
10 | iblock = ((i-1)*nblock+1):(i*nblock)
11 | A[i,iblock] <- 1
12 | A[i,(i*nblock)+1] <- -1
13 | }
14 | A[m+1,seq(nblock+1,n-m,by=nblock)] <- 1
15 | A[m+1,n] <- -1
16 |
17 | pos = character(0)
18 | if ( all.positive ) pos <- paste(varnames, 0, sep=">=")
19 | c(
20 | reduce(as.editmatrix(A)),
21 | editmatrix(pos)
22 | )
23 | }
24 |
25 | # Generate N records for variables E.
26 | gen_data <- function(E,N,distr=rlnorm,...){
27 | vars <- getVars(E)
28 | nval <- N*length(vars)
29 | as.data.frame(
30 | array(
31 | distr(nval,...),
32 | dim=c(N,length(vars)),
33 | dimnames=list(NULL, vars)
34 | )
35 | )
36 | }
37 |
38 |
39 |
40 | smoke_test <-function(N, nvar, nblocks, all.positive=TRUE, ...){
41 | e <- accountBalance(nvar, nblocks,all.positive)
42 | dat <- gen_data(e,N,...)
43 | el1 <- localizeErrors(e,dat,verbose=TRUE)
44 | el2 <- localizeErrors(e,dat,verbose=TRUE,method="mip")
45 | list(el1,el2,dat,e)
46 | }
47 |
48 | ## this shows that even a well-scaled problem may give different results between
49 | ## B&B and MIP
50 | S <- smoke_test(1000,nvar=7,2,distr=rnorm)
51 | diff <- S[[1]]$status$weight != S[[2]]$status$weight
52 |
53 | if (any(diff)) {
54 | w <- which(diff)
55 |
56 | BB <- S[[1]]$adapt[w,,drop=FALSE]
57 | MIP <- S[[2]]$adapt[w,,drop=FALSE]
58 | dat <- S[[3]][w,]
59 | E <- S[[4]]
60 |
61 | x <- dat[w[1],,drop=TRUE]
62 | elMIP <- errorLocalizer.mip(E,x)
63 |
64 | cbind(x=x, bb=BB[1,], mip=MIP[1,], mipfeas=elMIP$x_feasible)
65 | write.lp(elMIP$lp, "test.lp")
66 | }
67 |
68 |
--------------------------------------------------------------------------------
/pkg/R/parseEdits.R:
--------------------------------------------------------------------------------
1 | #' Parse a character vector of edits
2 | #'
3 | #' This function wraps the native \code{parse} function in a \code{tryCatch}.
4 | #' The function is \code{editrules} internal. It tries to give a meaningfull error message when
5 | #' parsing fails for some reason.
6 | #'
7 | #' @param E \code{character}
8 | #' @param type optional filter for type of edit, may be \code{"all"}, \code{"num"}
9 | #' \code{"cat"} or \code{"mix"}
10 | #' @return The edits in \code{E} parsed to R expressions.
11 | #'
12 | #' @keywords internal
13 | parseEdits <- function(E, type=c("all", "num", "cat", "mix")){
14 | if (is.expression(E)){
15 | edits <- E
16 | } else {
17 | edits <-
18 | tryCatch(parse(text=E),
19 | error=function(e){
20 | stop(paste("Not all edits can be parsed, parser returned", e$message,sep="\n"))
21 | })
22 | }
23 |
24 | #TODO fix this: should work, but currently generates errors in the tests.
25 | # nms <- names(E)
26 | # if (!is.null(nms)){
27 | # names(edits) <- make.unique(nms, sep="")
28 | # }
29 |
30 | type <- match.arg(type)
31 | if (type=="all"){
32 | return(edits)
33 | }
34 | return(edits[editTypes(edits) == type])
35 | }
36 |
37 | parseTree <- function(expr,prefix=NULL){
38 | if (length(expr) == 1){
39 | indent <- paste("[", prefix,"]", sep="", collapse="")
40 | cat(indent, expr,"\n")
41 | }
42 | else {
43 | for (i in 1:length(expr)){
44 | parseTree(expr[[i]], c(prefix,i))
45 | }
46 | }
47 | }
48 |
49 | # basic test for type of edit
50 | # edts \code{expression}
51 | #
52 | editTypes <- function(edts){
53 | ops <- sapply(edts, function(e){deparse(e[[1]])})
54 |
55 | type <- ifelse(ops %in% NUMCMP, "num", "cat")
56 | # todo add check for "=="
57 |
58 | iff <- ops == "if"
59 | mix <- sapply(edts[iff], hasNum)
60 | type[iff] <- ifelse(mix, "mix", "cat")
61 | as.factor(type)
62 | }
63 |
64 |
65 |
66 |
--------------------------------------------------------------------------------
/pkg/inst/script/bench/randomEdits.R:
--------------------------------------------------------------------------------
1 | # functions to generate some random catgegorical edits.
2 |
3 |
4 | # make a set expression
5 | setexpr <- function(var,cats){
6 | paste(var,'%in% c(',paste(cats,collapse=","),')')
7 | }
8 |
9 | # declare a subset FALSE
10 | makeedit <- function(setexpr){
11 | paste('if (',setexpr,') FALSE')
12 | }
13 |
14 |
15 | # generate m random edits
16 | genedits <- function(m,group){
17 | require(editrules)
18 | # use Boskovitz group A or B edits
19 | if ( group=='A' ){
20 | dk <- c(
21 | x.2.1 = 2,
22 | x.2.2 = 2,
23 | x.3.1 = 3,
24 | x.3.2 = 3,
25 | x.4.1 = 4,
26 | x.4.2 = 4,
27 | x.5.1 = 5,
28 | x.5.2 = 5,
29 | x.6.1 = 6,
30 | x.6.2 = 6,
31 | x.7.1 = 7,
32 | x.7.1 = 7,
33 | x.8.1 = 8
34 | )
35 | } else if (group=='B') {
36 | dk <- c(
37 | x.2.1 = 2,
38 | x.2.2 = 2,
39 | x.3.1 = 3,
40 | x.4.2 = 4,
41 | x.5.1 = 5,
42 | x.6.1 = 6,
43 | x.7.1 = 7,
44 | x.8.1 = 8,
45 | x.9.1 = 9,
46 | x.10.1 = 10
47 | )
48 | }
49 | vars <- names(dk)
50 | dm <- c()
51 | for ( v in vars ) dm <- c(dm,setexpr(v,1:dk[v]))
52 |
53 | # generate random edits
54 | n <- length(dk)
55 | edits <- character(m)
56 | for ( i in 1:m ){
57 | invar <- sample(2:n,1)
58 | ivars <- sample(vars,invar,replace=FALSE)
59 | if ( i > 1 && !ivars1[1] %in% ivars ) ivars <- c(ivars,ivars[1])
60 | ivars1 <- ivars
61 | s <- character()
62 | for ( iv in ivars ){
63 | incat <- sample(1:(dk[iv]-1),1)
64 | icats <- sample(1:dk[iv],incat)
65 | s <- c(s,setexpr(iv,icats))
66 | }
67 | edits[i] <- makeedit(paste(s,collapse=" & "))
68 | }
69 | editarray(c(dm,edits))
70 | }
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
--------------------------------------------------------------------------------
/pkg/R/c.R:
--------------------------------------------------------------------------------
1 |
2 | #' Combine editmatrices
3 | #'
4 | #' @method c editmatrix
5 | #' @rdname editmatrix
6 | #' @export
7 | #'
8 | c.editmatrix <- function(...){
9 | ems <- list(...)
10 | ems <- ems[!sapply(ems,is.null)]
11 | ems <- ems[!sapply(ems,function(e) nedits(e)==0)]
12 |
13 | if (length(ems)==0) return(editmatrix(expression()))
14 |
15 | ems <- lapply(ems, as.editmatrix)
16 | vars <- sort(unique(unlist(lapply(ems, getVars.editmatrix))))
17 |
18 | A <- lapply(ems, function(E){
19 | a <- matrix(0, nrow=nrow(E), ncol=length(vars), dimnames=list(NULL, vars))
20 | a[, getVars(E, type="colnames")] <- getA(E)
21 | a
22 | })
23 | A <- do.call(rbind, A)
24 |
25 | ops <- unlist(lapply(ems, getOps))
26 | b <- unlist(lapply(ems, getb))
27 | nms <- lapply(ems, function(E){
28 | rn <- rownames(E)
29 | if (is.null(rn)){
30 | rn <- rep("num", nrow(E))
31 | }
32 | rn
33 | })
34 | nms <- make.unique(unlist(nms), sep="")
35 | rownames(A) <- nms
36 |
37 | as.editmatrix(A=A, ops=ops, b=b)
38 | }
39 |
40 | #' Combine editarrays
41 | #' @method c editarray
42 | #' @export
43 | #' @rdname editarray
44 | c.editarray <- function(...){
45 | ems <- list(...)
46 | ems <- ems[!sapply(ems,is.null)]
47 |
48 | lvls <- sort(unique(unlist(lapply(ems, getlevels))))
49 | seps <- unlist(sapply(ems, getSep))
50 | #TODO handle seperators that are not equal to ":"
51 | stopifnot(all(seps==":"))
52 |
53 | B <- lapply(ems, function(E){
54 | a <- matrix(TRUE, nrow=nrow(E), ncol=length(lvls), dimnames=list(rownames(E), lvls))
55 | a[, getlevels(E)] <- getArr(E)
56 | a
57 | })
58 |
59 | B <- do.call(rbind, B)
60 | cats <- sub("^.+:", "", lvls)
61 | vars <- sub(":.*$", "", lvls)
62 | ind <- seq_along(lvls)
63 | names(ind) <- cats
64 | ind <- split(ind, vars)
65 | neweditarray(B, ind, names=rownames(B), sep=":")
66 | }
67 |
68 |
69 | #' @method c editset
70 | #' @rdname editset
71 | #' @export
72 | #'
73 | c.editset <- function(...){
74 | editset( unlist(lapply(list(...), as.character)) )
75 | }
76 |
77 |
--------------------------------------------------------------------------------
/pkg/R/echelon.R:
--------------------------------------------------------------------------------
1 | #' Bring an (edit) matrix to reduced row echelon form.
2 | #'
3 | #' If \code{E} is a matrix, a matrix in reduced row echelon form is returned.
4 | #' If \code{E} is an \code{\link{editmatrix}} the equality part of \code{E} is transformed
5 | #' to reduced row echelon form. For an \code{\link{editset}}, the numerical part is
6 | #' transformed to reduced row echelon form.
7 | #'
8 | #' @aliases echelon.editmatrix echelon.matrix
9 | #'
10 | #' @param E a matrix or editmatrix
11 | #' @param ... options to pass on to further methods.
12 | #' @export
13 | #' @seealso \code{\link{eliminate}}, \code{\link{substValue}}
14 | echelon <- function(E,...){
15 | UseMethod("echelon")
16 | }
17 |
18 |
19 |
20 | #' @method echelon editmatrix
21 | #' @rdname echelon
22 | #' @export
23 | echelon.editmatrix <- function(E,...){
24 | o <- getOps(E)
25 | # nothing to eliminate?
26 | eq <- o == "=="
27 | if ( sum(eq) <= 1 ) return(E)
28 | Ab <- getAb(E)
29 | Ab <- rbind(
30 | echelon.matrix(Ab[eq,,drop=FALSE]),
31 | Ab[!eq,,drop=FALSE]
32 | )
33 | neweditmatrix(Ab,c(o[eq],o[!eq]))
34 |
35 | }
36 |
37 | #' @rdname echelon
38 | #' @method echelon matrix
39 | #' @param tol tolerance that will be used to determine if a coefficient equals zero.
40 | #' @export
41 | echelon.matrix <- function(E, tol=sqrt(.Machine$double.eps), ...){
42 | k <- min(ncol(E),nrow(E))
43 | I <- 1:nrow(E)
44 | for ( i in 1:k ){
45 | I1 <- which(I >= i)
46 | ip <- I1[which.max(abs(E[I1,i]))]
47 | p <- E[ip,]
48 | if ( abs(p[i]) < tol ) next
49 | if ( ip > i ) E[c(ip,i),] <- E[c(i,ip),]
50 | E[-i,] <- E[-i,] - outer(E[-i,i],p/p[i])
51 | }
52 | d <- diag(E)
53 | id <- abs(d) > tol
54 | E[id,] <- E[id,]/d[id]
55 | I0 <- rowSums(abs(E) < tol) == ncol(E)
56 | rbind(E[!I0,,drop=FALSE],E[I0,,drop=FALSE])
57 | }
58 |
59 | #' @method echelon editset
60 | #' @rdname echelon
61 | #' @export
62 | #'
63 | echelon.editset <- function(E,...){
64 | E$num <- echelon(E$num)
65 | E
66 | }
67 |
68 |
69 |
70 |
--------------------------------------------------------------------------------
/pkg/R/editfile.R:
--------------------------------------------------------------------------------
1 |
2 | #' Read edits edits from free-form textfile
3 | #'
4 | #' This utility function allows for free editrule definition in a file. One can extract
5 | #' only the numerical (\code{type='num'}), only the categorical (\code{type='cat'}) or all
6 | #' edits (default) in which case an \code{\link{editset}} is returned.
7 | #' The function first parses all assignments in the file, so it is possible to compute or read
8 | #' a list of categories defining a datamodel for example.
9 | #'
10 | #' @param file name of text file to read in
11 | #' @param type type of edits to extract. Currently, only 'num' (numerical), 'cat' (categorical) and 'all' are implemented.
12 | #'
13 | #' @param ... extra parameters that are currently ignored
14 | #'
15 | #' @return \code{\link{editset}} with all edits if \code{type=all}, \code{\link{editarray}} if \code{type='cat'},
16 | #' \code{\link{editmatrix}} if \code{type='num'}, \code{\link{editset}} with conditional edits if \code{type='mix'}.
17 | #' If the return value is a \code{list}, the elements are named \code{numedits} and \code{catedits}.
18 | #'
19 | #' @export
20 | editfile <- function(file,type=c("all","num","cat","mix"), ...){
21 | # TODO: include expandEdits?
22 | type <- match.arg(type)
23 | if (!type %in% c('num','cat','all')) stop(paste("type",type,"invalid or not implemented yet"))
24 | p <- parse(file=file)
25 | ass <- sapply(p,class) %in% c('<-','=')
26 | e <- new.env()
27 | lapply(p[ass],eval,envir=e)
28 | edits <- p[!ass]
29 | # substitute constant assignments in rules. The if-statement prevents conversion to list()
30 | # if the file being read has no actual rules in it.
31 | if ( length(edits)>0 ){
32 | edits <- sapply(edits,function(x) as.expression(do.call(substitute,list(x,e))) )
33 | }
34 | et <- editTypes(edits)
35 | numedits <- edits[et == 'num']
36 | catedits <- edits[et == 'cat']
37 | mixedits <- edits[et == 'mix']
38 | switch(type,
39 | num = editmatrix(numedits),
40 | cat = editarray(catedits,env=e),
41 | mix = editset(mixedits,env=e),
42 | all = editset(edits,env=e)
43 | )
44 | }
45 |
46 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://travis-ci.org/data-cleaning/editrules)
2 | [](http://cran.r-project.org/package=editrules/)
3 | [](http://www.r-pkg.org/pkg/editrules)
4 | [](https://CRAN.R-project.org/package=editrules)
5 |
6 | **`editrules` has been succeeded by R packages: [validate](http://cran.r-project.org/package=validate) and [errorlocate](http://cran.r-project.org/package=errorlocate)**
7 |
8 | editrules
9 | =========
10 |
11 | R package for parsing edit rules
12 | The editrules package aims to provide an environment to conveniently define, read and check recordwise data constraints including
13 | * Linear (in)equality constraints for numerical data
14 | * Constraints on value combinations of categorical data
15 | * Conditional constraints on numerical and/or mixed data
16 |
17 | In literature these constraints, or restrictions are refered to as _edits_.
18 | editrules can perform common rule set manipulations like variable elimination and value substitution,
19 | and offers error localization functionality based on the (generalized) paradigm of Fellegi and Holt.
20 | Under this paradigm, one determines the smallest (weighted) number of variables to adapt such that no
21 | (additional or derived) rules are violated. The paradigm is based on the assumption that errors are distributed
22 | randomly over the variables and there is no detectable cause of error.
23 | It also decouples the detection of corrupt variables from their correction.
24 | For some types of error, such as sign flips, typing errors or rounding errors, this assumption does not hold.
25 | These errors can be detected and are closely related to their resolution.
26 | The reader is referred to the deducorrect package for treating such errors.
27 |
28 | To install the latest version in R:
29 | ```R
30 | install.packages("editrules")
31 | ```
32 |
33 | To get started, see the [editrules vignette](https://cran.r-project.org/web/packages/editrules/vignettes/editrules-vignette.pdf).
34 |
35 |
--------------------------------------------------------------------------------
/pkg/inst/script/bench/benchMIP.R:
--------------------------------------------------------------------------------
1 | #------several benchmarks for mip testing
2 | library(editrules)
3 | library(lpSolveAPI)
4 |
5 | genRandomEM <- function(nvar=10, nedits=10){
6 | i <- 1
7 | while(TRUE){
8 | A <- matrix(as.integer(rnorm(nvar*nedits)), ncol=nvar)
9 | ops <- rep("<=", nedits)
10 | b <- as.integer(rnorm(nedits))
11 | E <- as.editmatrix(A, ops=ops, b=b)
12 | cat("\r",i,": Checking feasibility")
13 | try(if (isFeasible(E)) {
14 | x <- as.integer(rnorm(nvar))
15 | names(x) <- getVars(E)
16 | return(list(E=E, x=x))
17 | })
18 | cat("\r")
19 | i <- i + 1
20 | }
21 | }
22 |
23 | genOrderedVars <- function(nvar=10){
24 | I <- diag(nvar)
25 | A <- cbind(I[,-1], 0) - I
26 | as.editmatrix(A, ops=rep("<=", nvar))
27 | }
28 |
29 | testOrdered <- function(nvars=10, method="localizer"){
30 | E <- genOrderedVars(nvars)
31 | res <- NULL
32 | data <- as.data.frame(matrix(0, ncol=nvars, nrow=nvars, dimnames=list(errors=NULL, vars=getVars(E))))
33 | for (errors in seq_len(nvars)){
34 | data[errors,seq_len(errors)] <- rev(seq_len(errors)) # creates errors...
35 | }
36 |
37 |
38 | le <- localizeErrors(E, data, method=method)
39 |
40 | cbind(le$status, nvars=nvars, method=method)
41 | }
42 |
43 | f <- file("benchordered.txt", open="wt")
44 | writeLines(paste(c("weight", "degeneracy", "user", "system", "elapsed", "maxDurationExceeded",
45 | "nvars", "method"), collapse="\t"), f)
46 |
47 | m <- seq_len(100)
48 | for (i in m){
49 | cat("nvars = ",i,"....")
50 | res <- testOrdered(nvars=i, method="mip")
51 | cat("\nWriting results\n")
52 | write.table(res, f, col.names=FALSE, row.names=FALSE)
53 | flush(f)
54 | }
55 | for (i in m){
56 | cat("nvars = ",i,"....")
57 | res <- testOrdered(nvars=i, method="localizer")
58 | cat("\nWriting results\n")
59 | write.table(res, f, col.names=FALSE, row.names=FALSE)
60 | flush(f)
61 | }
62 |
63 | close(f)
64 |
65 | tab <- read.table("benchordered.txt", header=T)
66 | str(tab)
67 |
68 | #E <- genRandom2(nvar=100, nedits=20, r=4)
69 | #E
70 |
71 | # Ex <- genRandomEM(nvar=100, nedits=10)
72 | # errorLocalizer.mip(Ex$E,Ex$x, verbose=TRUE)
73 |
74 |
75 |
76 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testSubstValue.R:
--------------------------------------------------------------------------------
1 |
2 | context("Value substitution")
3 |
4 | test_that("value substitution for numerical data",{
5 | E <- editmatrix(c("x+y==z","x-u==1"))
6 | expect_true(
7 | sum(abs(
8 | getAb(substValue(E,c('x','y'),c(1,2)))-
9 | matrix(c(
10 | 0, 0,-1, 0,-3,
11 | 0, 0, 0,-1, 0),nrow=2,byrow=TRUE)
12 | )) == 0
13 | )
14 | expect_true(
15 | sum(abs(
16 | getAb(substValue(E,c('x','y'),c(1,2),reduce=TRUE))-
17 | matrix(c(
18 | -1, 0,-3,
19 | 0,-1, 0),nrow=2,byrow=TRUE)
20 | )) == 0
21 | )
22 | })
23 |
24 |
25 | test_that("value substitution for categorical data",{
26 |
27 | E <- editarray(c(
28 | "v %in% letters[1:3]",
29 | "w %in% letters[4:6]",
30 | "x %in% letters[7:9]",
31 | "if ( v=='a' ) w !='d'",
32 | "if ( w %in% c('d','e')) x != 'g'"
33 | ))
34 | expect_true(
35 | all(
36 | getArr(substValue(E,c('v','w'),c('a','d')))
37 | ==
38 | matrix(c(
39 | TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
40 | TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE
41 | ),nrow=c(2),byrow=TRUE)
42 | )
43 | )
44 | expect_true(
45 | all(
46 | getArr(substValue(E,c('v','w'),c('a','d'),reduce=TRUE))
47 | ==
48 | matrix(c(
49 | TRUE, TRUE, TRUE, TRUE, TRUE,
50 | TRUE, TRUE, TRUE, FALSE, FALSE
51 | ),nrow=c(2),byrow=TRUE)
52 | )
53 | )
54 |
55 |
56 | })
57 |
58 | test_that("value substitution works for boolean data",{
59 | E <- editarray(c(
60 | "g %in% c('m','f')",
61 | "p %in% c(TRUE, FALSE)"))
62 | expect_true(nrow(getArr(substValue(E,'p',TRUE)))==0)
63 | })
64 |
65 | test_that("Bug reported by Sander Scholtus is fixed",{
66 | # this used to cause a crash after upgrading to R>=3.x.x
67 | E1 <- editset(expression(
68 | A %in% letters[1:2],
69 | B %in% letters[2:3],
70 | if ( (A == 'a') ) x > y
71 | ))
72 | E1 <- substValue(E1, var = "x", val = 1)
73 | })
74 |
75 |
76 |
--------------------------------------------------------------------------------
/pkg/R/blocks.R:
--------------------------------------------------------------------------------
1 | #' Decompose a matrix or edits into independent blocks
2 | #'
3 | #' \code{blocks} returns a \code{list} of independent blocks \eqn{M_i} such that
4 | #' \eqn{M=M_1\oplus M_2\oplus\cdots\oplus M_n}.
5 | #'
6 | #' @param M \code{matrix}, \code{\link{editmatrix}}, editarray or editset to be decomposed into independent blocks
7 | #' @return list of independent subobjects of \code{M}.
8 | #' @example ../examples/blocks.R
9 | #' @export
10 | blocks <- function(M){
11 | blocks <- blockIndex(contains(M))
12 | lapply( blocks,
13 | function(b){
14 | reduce(M[b,,drop=FALSE])
15 | }
16 | )
17 | }
18 |
19 |
20 | #' \code{blockIndex} returns a list of row indices in a \code{logical} matrix \code{D} designating independent blocks.
21 | #'
22 | #'
23 | #' @param D matrix of type \code{logical}
24 | #' @return \code{list} of row indices in \code{D} indicating independent blocks.
25 | #' Empty rows (i.e. every column \code{FALSE}) are ignored.
26 | #'
27 | #' @rdname blocks
28 | #' @export
29 | blockIndex <- function(D){
30 |
31 | block <- function(B){
32 | x1 <- FALSE
33 | x <- B[1,]
34 | while (sum(x1 != x)){
35 | x1 <- x
36 | b <- sapply( 1:nrow(B)
37 | , function(i){
38 | any(B[i,] & x)
39 | }
40 | )
41 | x <- colSums(B[b,,drop=FALSE]) > 0 #this is another way of "or"ring all found rows
42 | }
43 | b
44 | }
45 | orignames <- row.names(D)
46 | row.names(D) <- 1:nrow(D)
47 |
48 | #remove empty rows
49 | b <- rowSums(D) == 0
50 | D <- D[!b,,drop=FALSE]
51 |
52 | # create a list which will contain the blocks
53 | blocks <- list()
54 | L <- 1
55 |
56 | # detect and remove blocks until no blocks are left
57 | while (nrow(D) > 0){
58 |
59 | # find block
60 | b <- block(D)
61 |
62 | # store the original row numbers of the detected block
63 | blocks[[L]] <- as.integer(row.names(D)[b])
64 | L <- L + 1
65 |
66 | # remove the detected block
67 | D <- D[!b,,drop=FALSE]
68 | }
69 | lapply(blocks,function(b) {names(b)<-orignames[b]; b})
70 | }
71 |
--------------------------------------------------------------------------------
/pkg/inst/script/bench/benchmip_categorical.R:
--------------------------------------------------------------------------------
1 | library(editrules)
2 | FILE ="benchmip_categorical.txt"
3 |
4 | #' Create account balance
5 | generate_E <- function(nvar=10){
6 | vars <- paste0("A", seq_len(nvar))
7 | n <- nvar-1
8 |
9 | # if all TRUE no errors. FALSE values introduce errors
10 | edits <- c( paste0(vars, " %in% c(TRUE,FALSE)")
11 | , paste0("if (",head(vars, -1), ")", tail(vars, -1))
12 | )
13 | edits <- c(edits, "A1 == TRUE")
14 | editarray(edits)
15 | }
16 |
17 | generate_data <- function(E, nerrors=0){
18 | vars <- getVars(E)
19 | n <- length(vars)
20 |
21 | x <- sapply(vars, function(v) TRUE)
22 | x1 <- x
23 | x1[seq_len(nerrors)] <- FALSE
24 |
25 | x2 <- x
26 | x2[1+n - seq_len(nerrors)] <- FALSE
27 |
28 | x3 <- x
29 | x3[round((n-nerrors)/2) + seq_len(nerrors)] <- FALSE
30 | as.data.frame(rbind(x1,x2,x3))
31 | }
32 |
33 |
34 | bench <- function(nvars = 10, nerrors=9, method="bb"){
35 |
36 | init <- !file.exists(FILE)
37 | txt <- file(FILE, "at")
38 | on.exit(close(txt))
39 |
40 | errorloc <- c("begin", "end", "middle")
41 |
42 | if (nerrors >= nvars) stop("nvars cannot be less than nerrors")
43 | for (ne in seq_len(nerrors)){
44 | for (nvar in seq(from=ne+1, to=nvars)){
45 | try({
46 | E <- generate_E(nvar)
47 | data <- generate_data(E, ne)
48 | cat("\r nvar=", nvar, " ne=", ne, " method=", method)
49 | le <- localizeErrors(E, data, method=method)
50 | rpt <- cbind(method=method, nvar=nvar, nerrors=ne, errorloc=errorloc, le$status)
51 | write.table(rpt, file=txt, col.names=init, row.names=FALSE)
52 | init <- FALSE
53 | flush(txt)
54 | })
55 | gc()
56 | #print(rpt)
57 | }
58 | }
59 | }
60 |
61 | if (file.exists(FILE)) file.remove(FILE)
62 |
63 | bench(50,10, method="mip")
64 | bench(50,10, method="bb")
65 |
66 | dat <- read.table(FILE, header=TRUE)
67 | library(ggplot2)
68 | qplot(data=dat, y=elapsed, x=nvar, color=method, facets=nerrors~method, shape=errorloc) + geom_jitter()
69 | ggsave("benchmip_categorical.png")
70 | #
71 |
72 |
73 | ### quick testing
74 | # (E <- generate_E())
75 | # (dat <- generate_data(E, nerrors=3))
76 | # localizeErrors(E,dat, method="mip")
77 | # editrules:::errorLocalizer.mip(E,dat[1,])
78 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testParseEdits.R:
--------------------------------------------------------------------------------
1 | require(testthat)
2 |
3 |
4 | context("Parsing")
5 |
6 | test_that("parseEdits all works",{
7 | x <- c( "2*x < 1"
8 | , "if (A=='a') B == 'b'"
9 | , "if (A=='a') B == FALSE"
10 | , "if (A=='a') B > 1"
11 | , "if (c==1) B || C == FALSE"
12 | )
13 | e <- parseEdits(x)
14 | expect_equal(length(e), 5)
15 | })
16 |
17 | test_that("parseEdits num works",{
18 | x <- c( "2*x < 1"
19 | , "if (A=='a') B == 'b'"
20 | , "if (A=='a') B == FALSE"
21 | , "if (A=='a') B > 1"
22 | , "if (c==1) B || C == FALSE"
23 | )
24 | e <- parseEdits(x, "num")
25 | expect_equal(length(e), 1)
26 | u <- new.env(); u$x = 1
27 | expect_equal(eval(e,u),eval(expression(2*x < 1), u))
28 | })
29 |
30 | test_that("parseEdits cat works",{
31 | x <- c( "2*x < 1"
32 | , "if (A=='a') B == 'b'"
33 | , "if (A=='a') B == FALSE"
34 | , "if (A=='a') B > 1"
35 | , "if (c==1) B || C == FALSE"
36 | )
37 | e <- parseEdits(x, "cat")
38 | expect_equal(length(e), 2)
39 | # test fails: equivalence of expressions cannot be tested like this.
40 | # expect_equivalent(e, expression( if (A == "a") B == "b"
41 | # , if (A == "a") B == FALSE
42 | # )
43 | # )
44 | })
45 |
46 | test_that("parseEdits mix works",{
47 | x <- c( "2*x < 1"
48 | , "if (A=='a') B == 'b'"
49 | , "if (A=='a') B == FALSE"
50 | , "if (A=='a') B > 1"
51 | , "if (c==1) B || C == FALSE"
52 | )
53 | e <- parseEdits(x, "mix")
54 | expect_equal(length(e), 2)
55 | # test fails: equivalence of expressions cannot be tested like this.
56 | # expect_equivalent(e, expression( if (A == "a") B > 1
57 | # , if (c == 1) B || C == FALSE
58 | # )
59 | # , label=deparse(e)
60 | # )
61 | })
62 |
63 | context("editfile")
64 | test_that("assignments are parsed by editfile",{
65 | e <- editfile("edit_test_1.txt",type='num')
66 | expect_equivalent(getA(e),array(c(-10,1),dim=c(1,2)))
67 | })
68 |
69 | test_that("empty files are parsed correctly",{
70 | expect_equal(
71 | nedits(editfile(textConnection("#test"),type='num'))
72 | , 0
73 | )
74 | })
75 |
76 |
77 |
78 |
--------------------------------------------------------------------------------
/pkg/inst/script/bench/benchmip_mixed2.R:
--------------------------------------------------------------------------------
1 | library(editrules)
2 | FILE ="benchmip_mixed2.txt"
3 |
4 | generate_E <- function(nvar=10){
5 | s <- seq_len(nvar)
6 | num_vars <- paste0("x", s)
7 | edits <- "x1>=0"
8 | if (nvar > 1){
9 | edits <- c(edits,
10 | paste0("if (", head(num_vars, -1)," >= 0)", tail(num_vars, -1), ">= 0"))
11 | }
12 | editset(edits)
13 | }
14 |
15 | error <- function(x){
16 | lapply(x, function(v){
17 | if (is.logical(v)) FALSE else -1
18 | })
19 | }
20 |
21 | generate_data <- function(E, nerrors=0){
22 | num_vars <- getVars(E,"num")
23 |
24 | x <- numeric(length(num_vars))
25 | names(x) <- num_vars
26 | n <- length(x)
27 |
28 | x1 <- x
29 | idx <- seq_len(nerrors)
30 | x1[idx] <- -1
31 |
32 | x2 <- x
33 | idx <- 1+n - seq_len(nerrors)
34 | x2[idx] <- -1
35 |
36 | x3 <- x
37 | idx <- (round(n-nerrors)/2) + seq_len(nerrors)
38 | x3[idx] <- -1
39 |
40 | as.data.frame(rbind(x1,x2,x3))
41 | }
42 |
43 |
44 | bench <- function(nvars = 10, nerrors=10, method="bb", maxduration=200){
45 |
46 | init <- !file.exists(FILE)
47 | txt <- file(FILE, "at")
48 | on.exit(close(txt))
49 |
50 | errorloc <- c("begin", "end", "middle")
51 |
52 | if (nerrors >= nvars) stop("nvars cannot be less than nerrors")
53 | for (nvar in seq_len(nvars)){
54 | E <- generate_E(nvar)
55 | for (ne in seq(1, min(nerrors, nvar))){
56 | try({
57 | data <- generate_data(E, ne)
58 | # only select middle
59 | data <- data[3,,drop=FALSE]
60 | errorloc_m <- errorloc[3]
61 |
62 | cat("\r nvar=", nvar, " ne=", ne, " method=", method)
63 | le <- localizeErrors(E, data, method=method, maxduration=maxduration)
64 | rpt <- cbind(method=method, nvar=nvar, nerrors=ne, errorloc=errorloc_m, le$status)
65 |
66 | write.table(rpt, file=txt, col.names=init, row.names=FALSE)
67 |
68 | init <- FALSE
69 | flush(txt)
70 | })
71 | gc()
72 | }
73 | }
74 | }
75 |
76 | if (file.exists(FILE)) file.remove(FILE)
77 |
78 | bench(50,10, method="mip")
79 | bench(50,10, method="bb")
80 |
81 | # dat <- read.table(FILE, header=TRUE)
82 | # library(ggplot2)
83 | # qplot(data=dat, y=elapsed, x=nvar, color=method, facets=nerrors~method, shape=errorloc) + geom_jitter()
84 | # ggsave("benchmip_categorical.png")
85 |
86 |
87 | ### quick testing
88 | E <- generate_E(4)
89 | E
90 | generate_data(E, nerrors=2)
91 |
--------------------------------------------------------------------------------
/pkg/R/expandEdits.R:
--------------------------------------------------------------------------------
1 | #' Expand an edit expression
2 | #'
3 | #' Often many numeric variables have the same constraints. \code{expandEdits} is
4 | #' a utility function to define edits for multiple variables. See the examples for the syntax.
5 | #' @param s edit expression, can be a \code{character} or \code{expression} vector
6 | #' @param prefix prefix for variables to be expanded
7 | #' @param useSum if \code{TRUE} sum expressions will be expanded
8 | #' @param asExpression if \code{TRUE} an \code{\link{expression}} will be returned in stead of a \code{character}
9 | #' @param env enviroment that will be used to find variables to be expanded
10 | #' @param ... variables used in the expansion
11 | #' @return \code{character} or \code{expression} vector with expanded expressions
12 | #' @keywords internal
13 | expandEdits <- function(s, prefix="_", useSum=TRUE, asExpression=is.language(s), env=parent.frame(), ...){
14 | #TODO replace special regex character in prefix with escaped character.
15 |
16 | force(asExpression)
17 |
18 | if (is.expression(s)){
19 | s <- as.character(s)
20 | }
21 |
22 | if (is.language(s)){
23 | s <- deparse(s)
24 | }
25 |
26 | if (length(s) > 1){
27 | return(lapply(s, expandEdits, prefix=prefix, useSum=useSum, ...))
28 | }
29 |
30 | l <- as.list(env)
31 | vars <- list(...)
32 | l[names(vars)] <- vars
33 |
34 | varnms <- paste(prefix,names(l), sep="")
35 | used <- sapply(varnms, grepl, s)
36 | varnms <- varnms[used]
37 | l <- l[used]
38 |
39 | if (useSum) {
40 | sumnms <- paste("\\bsum", names(l), sep=prefix)
41 | sumregex1 <- paste(sumnms, "\\((.+?)\\)", sep="")
42 | sumregex2 <- paste(sumnms, "\\((.+?)\\).+", sep="")
43 |
44 | vars <- names(l)
45 | for (i in seq_along(vars)){
46 | if (length(grep(sumregex2[i], s))){
47 | sumvars <- sub(sumregex2[i], "\\1", s)
48 | sumvars <- do.call(expandEdits, list(s=sumvars, env=l[vars[i]], prefix=prefix))
49 | sumvars <- paste(sumvars, collapse=" + ")
50 | s <- sub(sumregex1[i], sumvars, s)
51 | l[[vars[i]]] <- NULL
52 | }
53 | }
54 | }
55 |
56 | varnms <- paste(prefix,names(l), sep="")
57 | for (i in seq_along(l)){
58 | if (length(grep(varnms[i], s))) {
59 | s <- sapply(l[[i]], function(j) gsub(varnms[i],j,s))
60 | }
61 | }
62 |
63 | if (is.array(s)) {
64 | dimnames(s) <- l
65 | } else if (is.vector(s) && length(l)){
66 | names(s) <- l[[1]]
67 | }
68 | if (asExpression){
69 | parse(text=s)
70 | } else {
71 | s
72 | }
73 | }
74 |
75 |
--------------------------------------------------------------------------------
/pkg/inst/script/bench/benchmip_balance.R:
--------------------------------------------------------------------------------
1 | library(editrules)
2 | FILE ="benchmip_balance.txt"
3 |
4 | #' Create account balance
5 | generate_balance <- function(nvar=15){
6 | i <- seq_len(floor((nvar)/2))
7 | edits <- c( "x1 >= 0"
8 | , paste0("x",i,"==", "x", 2*i, "+", "x", 2*i + 1)
9 | )
10 |
11 | vars <- paste0("x",seq_len(nvar))
12 |
13 | if (nvar>1){
14 | #edits <- c(edits, paste0(head(vars,-1), ">=", tail(vars,-1)))
15 | edits <- c(edits, paste0("x1 >= ", tail(vars,-1)))
16 | }
17 |
18 | E <- editmatrix(edits)
19 | if (nvar %% 2 == 0){
20 | E[,-(nvar+1)]
21 | } else {
22 | E
23 | }
24 | }
25 |
26 | generate_data <- function(E, nerrors=0){
27 | vars <- getVars(E)
28 | n <- length(vars)
29 |
30 | x <- sapply(vars, function(v) 0)
31 | x1 <- x
32 | x1[seq_len(nerrors)] <- -1
33 |
34 | x2 <- x
35 | x2[1+n - seq_len(nerrors)] <- -1
36 |
37 | x3 <- x
38 | x3[round((n-nerrors)/2) + seq_len(nerrors)] <- -1
39 | as.data.frame(rbind(x1,x2,x3))
40 | }
41 |
42 |
43 | bench <- function(nvars = 10, nerrors=10, method="bb"){
44 |
45 | init <- !file.exists(FILE)
46 | txt <- file(FILE, "at")
47 | on.exit(close(txt))
48 |
49 | errorloc <- c("begin", "end", "middle")
50 |
51 | if (nerrors > nvars) stop("nvars cannot be less than nerrors")
52 | for (nvar in seq_len(nvars)){
53 | for (ne in seq(1, min(nerrors, nvar))){
54 | try({
55 | E <- generate_balance(nvar)
56 | data <- generate_data(E, ne)
57 | cat("\r nvar=", nvar, " ne=", ne, " method=", method)
58 | le <- localizeErrors(E, data, method=method)
59 | rpt <- cbind(method=method, nvar=nvar, nerrors=ne, errorloc=errorloc, le$status)
60 |
61 | write.table(rpt, file=txt, col.names=init, row.names=FALSE)
62 | init <- FALSE
63 | flush(txt)
64 | })
65 | gc()
66 | }
67 | }
68 | }
69 |
70 | ## quick testing
71 | start <- function(){
72 | if (file.exists(FILE)) file.remove(FILE)
73 |
74 | bench(100,10, method="mip")
75 | bench(50,10, method="bb")
76 |
77 | dat <- read.table(FILE, header=TRUE)
78 | library(ggplot2)
79 | qplot(data=dat, y=elapsed, x=nvar, color=method, facets=nerrors~method, shape=errorloc, geom=c("point", "line")) + geom_jitter()
80 | ggsave("benchmip_balance.png")
81 | sdat <- subset(dat, errorloc=="middle")
82 | qplot(data=sdat, x=nvar, y=elapsed, color=method, group=nerrors, geom="line",
83 | facets=~method, ylim=c(0,150))
84 | }
85 | #View(dat)
86 | # n <- 4
87 | # (E <- generate_balance(n))
88 | # (dat <- generate_data(E,1))
89 | # localizeErrors(E, dat, method="mip")
--------------------------------------------------------------------------------
/examples/graph.R:
--------------------------------------------------------------------------------
1 |
2 | ## Examples with linear (in)equality edits
3 |
4 | # load predefined edits from package
5 | data(edits)
6 | edits
7 |
8 | # convert to editmatrix
9 | E <- editmatrix(edits)
10 |
11 | ## Not run:
12 | # (Note to reader: the Not run directive only prevents the examle commands from
13 | # running when package is built)
14 |
15 | # Total edit graph
16 | plot(E)
17 |
18 | # Graph with dependent edits
19 | plot(E, nodetype="rules")
20 |
21 | # Graph with dependent variables
22 | plot(E, nodetype="vars")
23 |
24 | # Total edit graph, but with curved lines (option from igraph package)
25 | plot(E, edge.curved=TRUE)
26 |
27 |
28 | # graph, plotting just the connections caused by variable 't'
29 | plot(E,vars='t')
30 |
31 | ## End(Not run)
32 |
33 | # here's an example with a broken record.
34 | r <- c(ct = 100, ch = 30, cp = 70, p=30,t=130 )
35 | violatedEdits(E,r)
36 | errorLocalizer(E,r)$searchBest()$adapt
37 |
38 | # we color the violated edits and the variables that have to be adapted
39 |
40 | ## Not run
41 | set.seed(1) # (for reprodicibility)
42 | plot(E,
43 | adapt=errorLocalizer(E,r)$searchBest()$adapt,
44 | violated=violatedEdits(E,r))
45 | ## End(Not run)
46 |
47 |
48 |
49 | # extract total graph (as igraph object)
50 | as.igraph(E)
51 |
52 | # extract graph with edges related to variable 't' and 'ch'
53 | as.igraph(E,vars=c('t','ch'))
54 |
55 | # extract total adjacency matrix
56 | adjacency(E)
57 |
58 | # extract adjacency matrix related to variables t and 'ch'
59 | adjacency(E,vars=c('t','ch'))
60 |
61 | ## Examples with categorical edits
62 |
63 | # generate an editarray:
64 | E <- editarray(expression(
65 | age %in% c('<15','16-65','>65'),
66 | employment %in% c('unemployed','employed','retired'),
67 | salary %in% c('none','low','medium','high'),
68 | if (age == '<15') employment=='unemployed',
69 | if (salary != 'none') employment != 'unemployed',
70 | if (employment == 'unemployed') salary == 'none'))
71 |
72 |
73 | ## Not run:
74 | # plot total edit graph
75 | plot(E)
76 |
77 | # plot with a different layout
78 | plot(E,layout=layout.circle)
79 |
80 | # plot edit graph, just the connections caused by 'salary'
81 | plot(E,vars='salary')
82 |
83 | ## End(Not run)
84 |
85 | # extract edit graph
86 | as.igraph(E)
87 |
88 | # extract edit graph, just the connections caused by 'salary'
89 | as.igraph(E,vars='salary')
90 |
91 | # extract adjacency matrix
92 | adjacency(E)
93 |
94 | # extract adjacency matrix, only caused by 'employment'
95 | adjacency(E,vars='employment')
96 |
97 |
98 |
99 |
100 |
101 |
--------------------------------------------------------------------------------
/papers/tex/smalltree.sty:
--------------------------------------------------------------------------------
1 |
2 | \setlength{\unitlength}{0.012mm}
3 | \definecolor{grey}{rgb}{0.8,0.8,0.8}
4 |
5 | \newcommand{\greycircle}{\textcolor{grey}{\circle*{100}}}
6 | \newcommand{\blackcircle}{\circle*{100}}
7 | \newcommand{\treeframe}{
8 | \thinlines
9 | \put(700,900){\textcolor{grey}{\line( 2,-1){400}}}
10 | \put(700,900){\textcolor{grey}{\line(-2,-1){400}}}
11 |
12 | \multiput(300,700)(800,000){2}{\textcolor{grey}{\line( 1,-2){200}}}
13 | \multiput(300,700)(800,000){2}{\textcolor{grey}{\line(-1,-2){200}}}
14 |
15 | \multiput(100,300)(400,000){4}{\textcolor{grey}{\line(-1,-3){100}}}
16 | \multiput(100,300)(400,000){4}{\textcolor{grey}{\line( 1, -3){100}}}
17 |
18 | \put(700,900){\blackcircle}
19 | \multiput(300,700)(800,000){2}{\greycircle}
20 | \multiput(100,300)(400,000){4}{\greycircle}
21 | \multiput(000,000)(200,000){8}{\greycircle}
22 | \thicklines
23 | }
24 | \newcommand{\Troot}{\put(700,900){\blackcircle}}
25 | \newcommand{\Tl}{\put(300,700){\blackcircle}}
26 | \newcommand{\Tr}{\put(1100,700){\blackcircle}}
27 | \newcommand{\Tll}{\put(100,300){\blackcircle}}
28 | \newcommand{\XTll}{\put(-20,215){{$-$}}}
29 | \newcommand{\Tlr}{\put(500,300){\blackcircle}}
30 | \newcommand{\Trl}{\put(900,300){\blackcircle}}
31 | \newcommand{\Trr}{\put(1300,300){\blackcircle}}
32 | \newcommand{\XTrr}{\put(1171,215){$-$}}
33 |
34 |
35 | \newcommand{\Tlll}{\put(000,000){\blackcircle}}
36 | \newcommand{\Tllr}{\put(200,000){\blackcircle}}
37 | \newcommand{\Tlrl}{\put(400,000){\blackcircle}}
38 | \newcommand{\Tlrr}{\put(600,000){\blackcircle}}
39 | \newcommand{\XTlrr}{\put(471,-85){$-$}}
40 | \newcommand{\Trll}{\put(800,000){\blackcircle}}
41 | \newcommand{\XTrll}{\put(671,-85){$-$}}
42 | \newcommand{\Trlr}{\put(1000,000){\blackcircle}}
43 | \newcommand{\XTrlr}{\put(871,-85){$-$}}
44 | \newcommand{\Trrl}{\put(1200,000){\blackcircle}}
45 | \newcommand{\Trrr}{\put(1400,000){\blackcircle}}
46 |
47 | \newcommand{\El}{\put(700,900){\line(-2,-1){400}}}
48 | \newcommand{\Er}{\put(700,900){\line( 2,-1){400}}}
49 | \newcommand{\Ell}{\put(300,700){\line(-1,-2){200}}}
50 | \newcommand{\Elr}{\put(300,700){\line( 1,-2){200}}}
51 | \newcommand{\Erl}{\put(1100,700){\line(-1,-2){200}}}
52 | \newcommand{\Err}{\put(1100,700){\line( 1,-2){200}}}
53 |
54 | \newcommand{\Elll}{\put(100,300){\line(-1,-3){100}}}
55 | \newcommand{\Ellr}{\put(100,300){\line( 1,-3){100}}}
56 | \newcommand{\Elrl}{\put(500,300){\line(-1,-3){100}}}
57 | \newcommand{\Elrr}{\put(500,300){\line( 1,-3){100}}}
58 | \newcommand{\Erll}{\put(900,300){\line(-1,-3){100}}}
59 | \newcommand{\Erlr}{\put(900,300){\line( 1,-3){100}}}
60 | \newcommand{\Errl}{\put(1300,300){\line(-1,-3){100}}}
61 | \newcommand{\Errr}{\put(1300,300){\line( 1,-3){100}}}
62 |
63 |
--------------------------------------------------------------------------------
/develop/errorLocalizer_alt.R:
--------------------------------------------------------------------------------
1 | # alternative implementation of errorlocalizer
2 | # differences:
3 | # *) returns all solutions, in stead of one by one
4 | # *) slightly faster then normal implementation (but not very)
5 | # *) uses a recursive function in stead of nested stack, inner workings are more understandable, but original
6 | # is better debugable using VERBOSE=TRUE and way more flexible.
7 | require(editrules)
8 |
9 | eloc <- function(E){
10 |
11 | if ( !isNormalized(E) )
12 | E <- normalize(E)
13 |
14 | btx <- function(x, weight=rep(1,length(x))){
15 | adapt <- is.na(x)
16 | names(adapt) <- names(x)
17 |
18 | #order decreasing by weight
19 | o <- order(weight, decreasing=TRUE)
20 | totreat <- names(x)[o[!adapt]]
21 |
22 | # Eliminate missing variables.
23 | for (v in names(x)[is.na(x)]) E <- eliminateFM(E,v)
24 |
25 | wsol <- sum(weight)
26 |
27 | bt <- function(E, totreat, adapt, sol){
28 | w <- sum(weight[adapt])
29 |
30 | #reject
31 | if ( w > wsol || isObviouslyInfeasible(E) ){
32 | return(sol)
33 | }
34 |
35 | #accept
36 | if (length(totreat) == 0){
37 | wsol <<- w
38 | sol[[length(sol)+1]] <- list(w=w, adapt=adapt)
39 | return(sol)
40 | }
41 |
42 | var <- totreat[1]
43 |
44 | sol <- bt( E = substValue(E, var , x[var])
45 | , totreat = totreat[-1]
46 | , adapt = adapt
47 | , sol = sol
48 | )
49 |
50 | sol <- bt( E = eliminateFM(E, var , x[var])
51 | , totreat = totreat[-1]
52 | , adapt = {a <- adapt; a[var] <- TRUE;a}
53 | , sol = sol
54 | )
55 | sol
56 | }
57 |
58 | bt(E=E, totreat=totreat, adapt=adapt, sol=list())
59 | }
60 | btx
61 | }
62 |
63 | E <- editmatrix(c(
64 | "p + c1 + c2 == t",
65 | "c1 - 0.3*t >= 0",
66 | "p > 0",
67 | "c1 > 0",
68 | "c2 > 0",
69 | "t > 0"))
70 |
71 |
72 | cp <- errorLocalizer(E,x=c(p=755, c1=50, c2=NA,t=200))
73 | el <- eloc(E)
74 |
75 | sol <- cp$searchAll()
76 | sol
77 |
78 | sol <- el(x=c(p=755, c1=50, c2=NA,t=200))
79 | sol
80 |
81 | cat("backtracker:\n")
82 | system.time(replicate(100, {
83 | cp <- errorLocalizer(E,x=c(p=755, c1=50, c2=NA,t=200))
84 | sol<-cp$searchAll()
85 | }))
86 |
87 | cat("hardcoded:\n")
88 | system.time(replicate(100, {
89 | sol<-el(x=c(p=755, c1=50, c2=NA,t=200))}
90 | ))
91 |
--------------------------------------------------------------------------------
/pkg/R/checkRows.R:
--------------------------------------------------------------------------------
1 | # Check rows of data.frame against edits.
2 | #
3 | # This is an S3 generic function for checking rows of a \code{data.frame} against
4 | # a number of edit restrictions. The edits can be entered either in \code{character}
5 | # \code{data.frame} or \code{editmatrix} format.
6 | #
7 | # If edits are represented as a \code{character} vector, the entries of \code{E} are parsed
8 | # and evaluated in the environment of \code{dat}
9 | #
10 | # If the edits are represented in a \code{data.frame}, the \code{data.frame} must have the format
11 | # described in \code{\link{editmatrix}}. The edits are coerced to a character vector, and passed
12 | # to \code{checkRows.character}.
13 | #
14 | # If the edits are represented by an \code{\link{editmatrix}} (representing linear (in)equalities)
15 | # verbose edits are extracted and passed on to \code{checkRows.character}
16 | #
17 | #
18 | # @aliases checkRows.character checkRows.data.frame checkRows.editmatrix
19 | #
20 | # @param E Edits, in \code{character}, \code{data.frame} or \code{\link{editmatrix}} representation
21 | # @param dat The data to check.
22 | # @return a logical vector with \code{length} equal to \code{nrow(dat)}. If a row is violates
23 | # no edit restrictions, \code{TRUE} otherwise \code{FALSE}
24 | #
25 | # @seealso violatedEdits
26 | # @example ../examples/checkRows.R
27 | #
28 | checkRows <- function(E, dat){
29 | UseMethod("checkRows")
30 | }
31 |
32 |
33 |
34 | # @rdname checkRows
35 | # @method checkRows editmatrix
36 | #
37 | #' @export
38 | checkRows.editmatrix <- function(E, dat){
39 | stopifnot(is.data.frame(dat))
40 | vars <- getVars(E) %in% names(dat)
41 | if (!all(vars)){
42 | stop("Edits contain variable(s):", paste(colnames(E)[!vars], collapse=","),
43 | ", that are not available in the data.frame")
44 | }
45 |
46 | check <- !logical(nrow(dat))
47 | ed <- as.expression(E)
48 | for ( i in 1:length(ed)){
49 | check <- check & tryCatch(eval(ed[[i]], envir=dat), error=function(e){
50 | stop(paste("Edit",ed[[i]],"can not be checked. Evaluation returned",e$message,sep="\n" ))
51 | })
52 | }
53 | return(check)
54 | }
55 |
56 | # @rdname checkRows
57 | # @method checkRows character
58 | #
59 | #' @export
60 | checkRows.character <- function(E, dat){
61 | ed <- parseEdits(E)
62 | check <- !logical(nrow(dat))
63 | for ( i in 1:length(E)){
64 | check <- check & tryCatch(eval(ed[[i]], envir=dat), error=function(e){
65 | stop(paste("Edit",ed[[i]],"can not be checked. Evaluation returned",e$message,sep="\n" ))
66 | })
67 | }
68 | return(check)
69 | }
70 |
71 | # @rdname checkRows
72 | # @method checkRows data.frame
73 | #
74 | #' @export
75 | checkRows.data.frame <- function(E, dat){
76 | checkRows(as.character(E$edit),dat)
77 | }
78 |
79 |
80 |
81 |
--------------------------------------------------------------------------------
/pkg/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: editrules
2 | Maintainer: Edwin de Jonge
3 | License: GPL-3
4 | Title: Parsing, Applying, and Manipulating Data Cleaning Rules
5 | LazyData: no
6 | Type: Package
7 | LazyLoad: yes
8 | Authors@R: c(person(given = "Edwin",
9 | family = "de Jonge",
10 | role = c("aut", "cre"),
11 | email = "edwindjonge@gmail.com",
12 | comment=c(ORCID="0000-0002-6580-4718")
13 | ),
14 | person(given = "Mark",
15 | family = "van der Loo",
16 | role = "aut"))
17 | Description: Please note: active development has moved to packages 'validate'
18 | and 'errorlocate'. Facilitates reading and manipulating (multivariate) data
19 | restrictions (edit rules) on numerical and categorical data. Rules can be
20 | defined with common R syntax and parsed to an internal (matrix-like format).
21 | Rules can be manipulated with variable elimination and value substitution
22 | methods, allowing for feasibility checks and more. Data can be tested against
23 | the rules and erroneous fields can be found based on Fellegi and Holt's
24 | generalized principle. Rules dependencies can be visualized with using the
25 | 'igraph' package.
26 | Version: 2.9.6
27 | Depends:
28 | R (>= 2.12.0),
29 | igraph
30 | Imports:
31 | lpSolveAPI
32 | Suggests:
33 | testthat
34 | URL: https://github.com/data-cleaning/editrules
35 | BugReports: https://github.com/data-cleaning/editrules/issues
36 | Collate:
37 | 'adjacency.R'
38 | 'as.igraph.R'
39 | 'editset.R'
40 | 'editarray.R'
41 | 'editmatrix.R'
42 | 'as.matrix.R'
43 | 'backtracker.R'
44 | 'blocks.R'
45 | 'c.R'
46 | 'cateditmatrix.R'
47 | 'checkDatamodel.R'
48 | 'checkRows.R'
49 | 'contains.R'
50 | 'disjunct.R'
51 | 'duplicated.R'
52 | 'echelon.R'
53 | 'editAttr.R'
54 | 'editarrayAttr.R'
55 | 'editfile.R'
56 | 'editmatrixAttr.R'
57 | 'editrules-data.R'
58 | 'eliminate.R'
59 | 'errorLocalizer.R'
60 | 'errorLocalizer_mip.R'
61 | 'errorLocation.R'
62 | 'expandEdits.R'
63 | 'generateEdits.R'
64 | 'getH.R'
65 | 'getUpperBounds.R'
66 | 'getVars.R'
67 | 'is.R'
68 | 'isFeasible.R'
69 | 'isObviouslyInfeasible.R'
70 | 'isObviouslyRedundant.R'
71 | 'isSubset.R'
72 | 'list2env.R'
73 | 'localizeErrors.R'
74 | 'mip.R'
75 | 'parseCat.R'
76 | 'parseEdits.R'
77 | 'parseMix.R'
78 | 'parseNum.R'
79 | 'perturbWeights.R'
80 | 'pkg.R'
81 | 'plot.R'
82 | 'plot_errorLocation.R'
83 | 'print.R'
84 | 'reduce.R'
85 | 'removeRedundant.R'
86 | 'softEdits.R'
87 | 'str.R'
88 | 'subsetting.R'
89 | 'substValue.R'
90 | 'summary.R'
91 | 'violatedEdits.R'
92 | 'writeELAsMip.R'
93 | 'zzz.R'
94 | RoxygenNote: 7.3.3
95 | Encoding: UTF-8
96 |
--------------------------------------------------------------------------------
/examples/errorLocalizer.R:
--------------------------------------------------------------------------------
1 | #### examples with numerical edits
2 | # example with a single editrule
3 | # p = profit, c = cost, t = turnover
4 | E <- editmatrix(c("p + c == t"))
5 | cp <- errorLocalizer(E, x=c(p=755, c=125, t=200))
6 | # x obviously violates E. With all weights equal, changing any variable will do.
7 | # first solution:
8 | cp$searchNext()
9 | # second solution:
10 | cp$searchNext()
11 | # third solution:
12 | cp$searchNext()
13 | # there are no more solution since changing more variables would increase the
14 | # weight, so the result of the next statement is NULL:
15 | cp$searchNext()
16 |
17 | # Increasing the reliability weight of turnover, yields 2 solutions:
18 | cp <- errorLocalizer(E, x=c(p=755, c=125, t=200), weight=c(1,1,2))
19 | # first solution:
20 | cp$searchNext()
21 | # second solution:
22 | cp$searchNext()
23 | # no more solutions available:
24 | cp$searchNext()
25 |
26 |
27 | # A case with two restrictions. The second restriction demands that
28 | # c/t >= 0.6 (cost should be more than 60% of turnover)
29 | E <- editmatrix(c(
30 | "p + c == t",
31 | "c - 0.6*t >= 0"))
32 | cp <- errorLocalizer(E,x=c(p=755,c=125,t=200))
33 | # Now, there's only one solution, but we need two runs to find it (the 1st one
34 | # has higher weight)
35 | cp$searchNext()
36 | cp$searchNext()
37 |
38 | # With the searchBest() function, the lowest weifght solution is found at once:
39 | errorLocalizer(E,x=c(p=755,c=125,t=200))$searchBest()
40 |
41 |
42 | # An example with missing data.
43 | E <- editmatrix(c(
44 | "p + c1 + c2 == t",
45 | "c1 - 0.3*t >= 0",
46 | "p > 0",
47 | "c1 > 0",
48 | "c2 > 0",
49 | "t > 0"))
50 | cp <- errorLocalizer(E,x=c(p=755, c1=50, c2=NA,t=200))
51 | # (Note that e2 is violated.)
52 | # There are two solutions. Both demand that c2 is adapted:
53 | cp$searchNext()
54 | cp$searchNext()
55 |
56 | ##### Examples with categorical edits
57 | #
58 | # 3 variables, recording age class, position in household, and marital status:
59 | # We define the datamodel and the rules
60 | E <- editarray(expression(
61 | age %in% c('under aged','adult'),
62 | maritalStatus %in% c('unmarried','married','widowed','divorced'),
63 | positionInHousehold %in% c('marriage partner', 'child', 'other'),
64 | if( age == 'under aged' )
65 | maritalStatus == 'unmarried',
66 | if( maritalStatus %in% c('married','widowed','divorced'))
67 | !positionInHousehold %in% c('marriage partner','child')
68 | )
69 | )
70 | E
71 |
72 | # Let's define a record with an obvious error:
73 | r <- c(
74 | age = 'under aged',
75 | maritalStatus='married',
76 | positionInHousehold='child')
77 | # The age class and position in household are consistent, while the marital
78 | # status conflicts. Therefore, changing only the marital status (in stead of
79 | # both age class and postition in household) seems reasonable.
80 | el <- errorLocalizer(E,r)
81 | el$searchNext()
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
--------------------------------------------------------------------------------
/pkg/R/parseNum.R:
--------------------------------------------------------------------------------
1 | NUMCMP <- c("==","<","<=",">",">=")
2 | NUMOPS <- c("+","-","*","<","<=",">=",">")
3 |
4 | #' Parse a numerical edit expression
5 | #'
6 | #' Parse a numerical edit expression into a named \code{numeric}.
7 | #' The \code{names} are the names of the variables
8 | #' @param e a valid R expression
9 | #' @keywords internal
10 | parseNum <- function(e){
11 | if (!isNum(e)){
12 | stop(paste("Invalid edit rule:", e))
13 | }
14 | wgt <- retrieveCoef(e)
15 | # simplify the coefficients by summing them
16 | tapply(wgt, names(wgt), sum)
17 | }
18 |
19 |
20 | hasNum <- function(e){
21 | if (length(e) == 1){
22 | return(is.numeric(e))
23 | }
24 | op <- deparse(e[[1]])
25 | if (length(e) == 2){
26 | return (op %in% NUMOPS || hasNum(e[[2]]))
27 | }
28 | if (length(e) == 3){
29 | if (op == "%in%") return(FALSE)
30 | return(op %in% NUMOPS || hasNum(e[[2]]) || hasNum(e[[3]]))
31 | }
32 | }
33 |
34 | # basic test for numerical edit
35 | isNum <- function(e){
36 | if (length(e) != 3)
37 | return(FALSE)
38 | cmp <- deparse(e[[1]])
39 | if (cmp == "==") {
40 | return(!is.character(e[[3]]) && !is.logical(e[[3]]))
41 | }
42 | return(cmp %in% NUMCMP)
43 | }
44 |
45 | retrieveCoef <- function(e, co=1){
46 | #stopifnot(is.language(e))
47 | if (length(e) == 1){
48 | if (is.numeric(e)){
49 | l <- co*e #the resulting matrix is augmented so b has a -
50 | names(l) <- getOption("editrules.CONSTANT", "CONSTANT")
51 | }
52 | else {
53 | l <- co
54 | names(l) <- as.character(e)
55 | }
56 | return(l)
57 | }
58 | if (length(e) == 2){
59 | op <- deparse(e[[1]])
60 | rhs <- e[[2]]
61 | if (op == "("){
62 | return(retrieveCoef(rhs, co))
63 | } else if (op == "-"){
64 | return(retrieveCoef(rhs, -1*co))
65 | }
66 | else {
67 | stop("Operator ", op, " not implemented", "Invalid expression:", e)
68 | }
69 | }
70 | if (length(e) == 3){
71 | op <- deparse(e[[1]])
72 | lhs <- e[[2]]
73 | rhs <- e[[3]]
74 | lsign <- rsign <- co
75 | if ( op %in% c(NUMCMP, "-")){
76 | rsign <- -1 * co
77 | }
78 | else if (op == "+"){
79 | }
80 | else if (op == "*"){
81 | if (length(lhs) == 2 || is.numeric(lhs)){
82 | co <- retrieveCoef(lhs, co)
83 | return(retrieveCoef(rhs, co))
84 | } else if (length(rhs) == 2 || is.numeric(rhs)){
85 | co <- retrieveCoef(rhs, co)
86 | return(retrieveCoef(lhs, co))
87 | } else{
88 | stop("Expression '", deparse(e), "' contains nonconstant coefficient")
89 | }
90 | }
91 | else {
92 | stop("Operator ", op, " not implemented", "Invalid expression:", e)
93 | }
94 | return(c( retrieveCoef(lhs, lsign)
95 | , retrieveCoef(rhs, rsign)
96 | )
97 | )
98 | }
99 | stop("Invalid expression:", e)
100 | }
101 |
--------------------------------------------------------------------------------
/pkg/R/isObviouslyInfeasible.R:
--------------------------------------------------------------------------------
1 |
2 | #' Check for obvious contradictions in a set of edits
3 | #'
4 | #' Obvious contradictions are edits of the form \eqn{1 < 0}, or categorical
5 | #' edits defining that a record fails for any value combination If
6 | #' this function evaluates to \code{TRUE}, the set of edits is guaranteed
7 | #' infeasible. If it evaluates to \code{FALSE} this does not garuantee feasibility.
8 | #' See \code{\link{isFeasible}} for a complete test.
9 | #'
10 | #' @param E An \code{\link{editset}}, \code{\link{editmatrix}}, \code{\link{editarray}}, \code{\link[=disjunct]{editlist}} or \code{\link[=disjunct]{editenv}}
11 | #' @param ... Arguments to be passed to or from other methods.
12 | #' @return A \code{logical} for objects of class \code{\link{editset}}, \code{\link{editarray}} or \code{\link{editmatrix}}.
13 | #' A \code{logical} vector in the case of an \code{\link[=disjunct]{editlist}} or \code{\link[=disjunct]{editset}}.
14 | #'
15 | #' @export
16 | #' @seealso \code{\link{isObviouslyRedundant}}, \code{\link{isFeasible}}
17 | isObviouslyInfeasible <- function(E,...){
18 | UseMethod("isObviouslyInfeasible")
19 | }
20 |
21 |
22 | #'
23 | #' @method isObviouslyInfeasible editmatrix
24 | #' @param tol Tolerance for checking against zero.
25 | #' @seealso \code{\link{eliminate}} \code{\link{editmatrix}}
26 | #' @rdname isObviouslyInfeasible
27 | #' @export
28 | #'
29 | isObviouslyInfeasible.editmatrix <- function(E, tol=sqrt(.Machine$double.eps), ...){
30 | if ( !isNormalized(E) ) E <- normalize(E)
31 | A <- getAb(E)
32 | operators <- getOps(E)
33 | ib <- ncol(A)
34 | zeroCoef <- rowSums(abs(A[,-ib,drop=FALSE])) <= tol
35 | b <- round(A[,ib],ceiling(-log10(tol)))
36 | if ( any(zeroCoef & operators == "<" & b <= 0) ||
37 | any(zeroCoef & operators == "<=" & b < 0) ||
38 | any(zeroCoef & operators == c("==") & abs(b) > tol)) return(TRUE)
39 | return(FALSE)
40 | }
41 |
42 | #'
43 | #' @method isObviouslyInfeasible editarray
44 | #' @rdname isObviouslyInfeasible
45 | #' @export
46 | #'
47 | isObviouslyInfeasible.editarray <- function(E,...){
48 | any(rowSums(E)==ncol(E))
49 | }
50 |
51 |
52 | #' @export
53 | isObviouslyInfeasible.NULL <- function(E,...){
54 | FALSE
55 | }
56 |
57 |
58 | #'
59 | #' @method isObviouslyInfeasible editset
60 | #' @rdname isObviouslyInfeasible
61 | #' @export
62 | #'
63 | isObviouslyInfeasible.editset <- function(E,...){
64 | isObviouslyInfeasible(E$num) || isObviouslyInfeasible(E$mixcat)
65 | }
66 |
67 | #'
68 | #' @method isObviouslyInfeasible editlist
69 | #' @rdname isObviouslyInfeasible
70 | #' @export
71 | #'
72 | isObviouslyInfeasible.editlist <- function(E,...){
73 | vapply(E,isObviouslyInfeasible, FUN.VALUE=FALSE)
74 | }
75 |
76 | #'
77 | #'
78 | #'
79 | #' @method isObviouslyInfeasible editenv
80 | #' @rdname isObviouslyInfeasible
81 | #' @export
82 | #'
83 | #'
84 | isObviouslyInfeasible.editenv <- function(E,...){
85 | # note: environments are coerced to lists by lapply
86 | vapply(E,isObviouslyInfeasible, FUN.VALUE=FALSE)
87 | }
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
--------------------------------------------------------------------------------
/examples/eliminate.R:
--------------------------------------------------------------------------------
1 |
2 | # The following is an example by Williams (1986). Eliminating all variables
3 | # except z maximizes -4x1 + 5x2 +3x3:
4 | P <- editmatrix(c(
5 | "4*x1 - 5*x2 - 3*x3 + z <= 0",
6 | "-x1 + x2 -x3 <= 2",
7 | "x1 + x2 + 2*x3 <= 3",
8 | "-x1 <= 0",
9 | "-x2 <= 0",
10 | "-x3 <= 0"))
11 | # eliminate 1st variable
12 | (P1 <- eliminate(P, "x1", fancynames=TRUE))
13 | # eliminate 2nd variable. Note that redundant rows have been eliminated
14 | (P2 <- eliminate(P1, "x2", fancynames=TRUE))
15 | # finally, the answer:
16 | (P3 <- eliminate(P2, "x3", fancynames=TRUE))
17 |
18 | # check which original edits were used in deriving the new ones
19 | getH(P3)
20 |
21 | # check how many variables were eliminated
22 | geth(P3)
23 |
24 |
25 | # An example with an equality and two inequalities
26 | # The only thing to do is solving for x in e1 and substitute in e3.
27 | (E <- editmatrix(c(
28 | "2*x + y == 1",
29 | "y > 0",
30 | "x > 0"),normalize=TRUE))
31 | eliminate(E,"x", fancynames=TRUE)
32 |
33 |
34 | # This example has two equalities, and it's solution
35 | # is the origin (x,y)=(0,0)
36 | (E <- editmatrix(c(
37 | "y <= 1 - x",
38 | "y >= -1 + x",
39 | "x == y",
40 | "y ==-2*x" ),normalize=TRUE))
41 | eliminate(E,"x", fancynames=TRUE)
42 |
43 | # this example has no solution, the equalities demand (x,y) = (0,2)
44 | # while the inequalities demand y <= 1
45 | (E <- editmatrix(c(
46 | "y <= 1 - x",
47 | "y >= -1 + x",
48 | "y == 2 - x",
49 | "y == -2 + x" ),normalize=TRUE))
50 | # this happens to result in an obviously unfeasable system:
51 | isObviouslyInfeasible(eliminate(E,"x"))
52 |
53 |
54 | # for categorical data, elimination amounts to logical derivartions. For
55 | # example
56 | E <- editarray(expression(
57 | age %in% c('under aged','adult'),
58 | positionInHousehold %in% c('marriage partner', 'child', 'other'),
59 | maritalStatus %in% c('unmarried','married','widowed','divorced'),
60 | if (maritalStatus %in% c('married','widowed','divorced') )
61 | positionInHousehold != 'child',
62 | if (maritalStatus == 'unmarried')
63 | positionInHousehold != 'marriage partner' ,
64 | if ( age == 'under aged') maritalStatus == 'unmarried'
65 | )
66 | )
67 | E
68 |
69 | # by eliminating 'maritalStatus' we can deduce that under aged persones cannot
70 | # be partner in marriage.
71 | eliminate(E,"maritalStatus")
72 |
73 | E <- editarray(expression(
74 | age %in% c('under aged','adult'),
75 | positionInHousehold %in% c('marriage partner', 'child', 'other'),
76 | maritalStatus %in% c('unmarried','married','widowed','divorced'),
77 | if (maritalStatus %in% c('married','widowed','divorced') )
78 | positionInHousehold != 'child',
79 | if (maritalStatus == 'unmarried')
80 | positionInHousehold != 'marriage partner' ,
81 | if ( age == 'under aged')
82 | maritalStatus == 'unmarried'
83 | )
84 | )
85 | E
86 |
87 | # by eliminating 'maritalStatus' we can deduce that under aged persones cannot
88 | # be partner in marriage.
89 | eliminate(E,"maritalStatus")
90 |
91 |
92 |
93 |
94 |
95 |
--------------------------------------------------------------------------------
/pkg/R/getVars.R:
--------------------------------------------------------------------------------
1 |
2 | #' get names of variables in a set of edits
3 | #'
4 | #' @param E \code{\link{editset}}, \code{\link{editmatrix}}, or \code{\link{editarray}}
5 | #' @param ... Arguments to be passed to or from other methods
6 | #' @seealso \code{\link{getA}}, \code{\link{getb}}, \code{\link{getAb}}, \code{\link{getOps}}
7 | #' @example ../examples/getVars.R
8 | #' @return \code{character} vector with the names of the variables.
9 | #' @export
10 | getVars <- function(E,...){
11 | UseMethod("getVars")
12 | }
13 |
14 |
15 | #' Returns the variable names of an (in)equality \code{editmatrix} E
16 | #'
17 | #' @export
18 | #' @method getVars editmatrix
19 | #' @keywords internal
20 | getVars.editmatrix <- function(E,...){
21 | colnames(E)[-ncol(E)]
22 | }
23 |
24 | #' Returns the variable names of an (in)equality \code{editmatrix} E
25 | #'
26 | #' @param type should unique variable names, colnames, all variable names or category names be extracted?
27 | #' @export
28 | #' @method getVars cateditmatrix
29 | #' @keywords internal
30 | getVars.cateditmatrix <- function(E, type=c("uniquevar", "colnames","var", "cat"), ...){
31 | nms <- colnames(E)[-ncol(E)]
32 | var <- sub(":.+", "", nms)
33 | cat <- sub(".+:", "", nms)
34 | cat[var==cat] <- "TRUE"
35 | switch( match.arg(type)
36 | , colnames = nms
37 | , var = var
38 | , cat = cat
39 | , unique(var)
40 | )
41 | }
42 |
43 | #' get variable names in editarray
44 | #'
45 | #' @export
46 | #' @method getVars editarray
47 | #' @keywords internal
48 | getVars.editarray <- function(E,type='cat',...){
49 | if (!type=='cat') return(NULL)
50 | names(attr(E,"ind"))
51 | }
52 |
53 | #' getr variable names
54 | #'
55 | #' @method getVars editset
56 | #' @param type (editset- or list only) select which variables to return. \code{all} means all (except dummies), \code{num} means
57 | #' all numericals, \code{cat} means all categoricals, \code{mix} means those numericals appearing in a logical
58 | #' constraint and \code{dummy} means dummy variables connecting the logical with numerical constraints.
59 | #' @export
60 | #' @rdname getVars
61 | getVars.editset <- function(E, type=c('all','num','cat','mix','dummy'), ...){
62 | type <- match.arg(type)
63 | numvars <- c()
64 | catvars <- c()
65 |
66 | if (type %in% c('all','num')){
67 | numvars <- unique(c(getVars(E$num), getVars(E$mixnum)))
68 | }
69 | if ( type == 'mix' ) numvars <- getVars(E$mixnum)
70 | if ( type %in% c('all','cat')){
71 | catvars <- getVars(E$mixcat)
72 | catvars <- catvars[!catvars %in% rownames(E$mixnum)]
73 | }
74 | if ( type == 'dummy'){
75 | catvars <- rownames(E$mixnum)
76 | }
77 | c(numvars, catvars)
78 | }
79 |
80 | #' get variable names
81 | #' @method getVars NULL
82 | #' @export
83 | #' @rdname getVars
84 | getVars.NULL <- function(E,...){
85 | NULL
86 | }
87 |
88 | #' get variable names
89 | #' @export
90 | #' @method getVars editlist
91 | #' @keywords internal
92 | getVars.editlist <- function(E,...){
93 | # under normal circumstances, each part of an editlist has the same variables
94 | if ( length(E) == 0 ) return(NULL)
95 | getVars.editset(E[[1]], ...)
96 | }
97 |
98 |
99 |
--------------------------------------------------------------------------------
/pkg/R/parseCat.R:
--------------------------------------------------------------------------------
1 | CATCMP <- c("==", "!=", "%in%")
2 |
3 | #' Parse a categorical edit expression
4 | #'
5 | #' @param x a valid R expression
6 | #' @param val logical (scalar)
7 | #' @param edit logical (vector)
8 | #' @param sep edit separator
9 | #' @param useLogical (logical), should logicals be treated as a factor or as a logical?
10 | #' @keywords internal
11 | parseCat <- function(x, val=NA, edit=logical(0), sep=":", useLogical=FALSE, env=parent.frame()){
12 | if ( length(x) == 1 ) {
13 | # corner case: the always FALSE edit (array must be TRUE at every category)
14 | if ( is.na(val) && !x[[1]] ) return(NULL)
15 | if (is.logical(x)){
16 | if (val == x) { return(edit)
17 | }else {
18 | # if this happens the statement is always true, so delete it...
19 | return (logical())
20 | }
21 | }
22 | var <- if (useLogical) as.character(x)
23 | else paste(x,"TRUE",sep=sep)
24 | edit[var] <- val
25 | return(edit)
26 | }
27 | op <- as.character(x[[1]])
28 | if ( op == "if" ){
29 | edit <- parseCat(x[[2]],TRUE, edit, sep, useLogical, env=env)
30 | edit <- parseCat(x[[3]],FALSE, edit, sep, useLogical, env=env)
31 | } else if ( op %in% c("(","{") ){
32 | edit <- parseCat(x[[2]], val, edit, sep, useLogical, env=env)
33 | } else if ( op %in% c("%in%","==") ){
34 | cat <- eval(x[[3]], envir=env)
35 | if ( is.na(val) && op == "==" ) val <- FALSE
36 | if (is.logical(cat) && useLogical){
37 | if (length(cat) > 1){
38 | val <- NA
39 | } else {
40 | if (!cat) val <- !val
41 | }
42 | var <- as.character(x[[2]])
43 | } else {
44 | var <- paste(x[[2]],cat,sep=sep)
45 | }
46 | edit[var] <- val
47 | } else if (op == "!=") {
48 | cat <- eval(x[[3]], envir=env)
49 | if (is.logical(cat) && useLogical){
50 | var <- as.character(x[[2]])
51 | if (!cat) val <- !val
52 | } else{
53 | var <- paste(x[[2]],cat,sep=sep)
54 | }
55 | edit[var] <- !val
56 | } else if (op == "!") {
57 | if (is.na(val)){
58 | val <- FALSE
59 | }
60 | edit <- parseCat(x[[2]],!val, edit, sep, useLogical, env=env)
61 | } else if (op %in% c("&", "&&")){
62 | if (!isTRUE(val)){
63 | stop( "Operator '",op,"' is not allowed in 'then' clause.\n Edit '"
64 | , deparse(x)
65 | ,"' can be split into multiple edits")
66 | }
67 | edit <- parseCat(x[[2]],TRUE, edit, sep, useLogical, env=env)
68 | edit <- parseCat(x[[3]],TRUE, edit, sep, useLogical, env=env)
69 | } else if (op %in% c("||","|")){
70 | if (isTRUE(val)){
71 | stop( "Operator '",op,"' is not allowed in 'if' clause.\n Edit '"
72 | , deparse(x)
73 | ,"' can be split into multiple edits")
74 | }
75 | edit <- parseCat(x[[2]],FALSE, edit, sep, useLogical, env=env)
76 | edit <- parseCat(x[[3]],FALSE, edit, sep, useLogical, env=env)
77 | } else {
78 | stop("Operator '",op,"' not implemented")
79 | }
80 | edit
81 | }
82 |
83 | isCat <- function(e){
84 | if (length(e)==1) {
85 | return(is.symbol(e))
86 | }
87 |
88 | cmp <- deparse(e[[1]])
89 | return( cmp %in% c(CATCMP,"!"))
90 | }
91 |
92 |
--------------------------------------------------------------------------------
/pkg/inst/script/bench/benchmip_mixed.R:
--------------------------------------------------------------------------------
1 | library(editrules)
2 | FILE ="benchmip_mixed.txt"
3 |
4 | generate_E <- function(nvar=10){
5 | if (nvar < 1) stop("nvar needs to be bigger than 3")
6 | s <- seq_len(nvar)
7 | n_num <- ceiling(nvar/2)
8 | var_num <- head(s, n_num)
9 | var_cat <- tail(s, -n_num)
10 | n_cat <- length(var_cat)
11 |
12 | var_num <- paste0("x", var_num)
13 | if (n_cat) var_cat <- paste0("v", var_cat-n_num) else character()
14 |
15 | if (length(var_num) > 1){
16 | nsum <- paste(tail(var_num, -1), collapse="+")
17 | edits <- paste0("x1 == ", nsum)
18 | edits <- c(edits, paste0(head(var_num, -1)," >= ", tail(var_num, -1)))
19 | } else {
20 | edits <- "x1 == 0"
21 | }
22 |
23 | if (n_cat){
24 | edits <- c( paste0(tail(var_num, 1), ">= 0")
25 | , edits
26 | , paste0(var_cat, " %in% c(TRUE,FALSE)")
27 | , paste0("if (!", var_cat, ") ", head(var_num,n_cat),"< 0")
28 | )
29 | }
30 | editset(edits)
31 | }
32 |
33 | error <- function(x){
34 | lapply(x, function(v){
35 | if (is.logical(v)) FALSE else -1
36 | })
37 | }
38 |
39 | generate_data <- function(E, nerrors=0){
40 | num_vars <- getVars(E,"num")
41 |
42 | x <- sapply(getVars(E), function(v){
43 | if (v %in% num_vars) 0 else TRUE
44 | }, simplify=FALSE)
45 |
46 | n <- length(x)
47 |
48 | x1 <- x
49 | idx <- seq_len(nerrors)
50 | x1[idx] <- error(x1[idx])
51 |
52 | x2 <- x
53 | idx <- 1+n - seq_len(nerrors)
54 | x2[idx] <- error(x2[idx])
55 |
56 | x3 <- x
57 | idx <- (round(n-nerrors)/2) + seq_len(nerrors)
58 | x3[idx] <- error(x3[idx])
59 |
60 | rbind(as.data.frame(x1),x2,x3)
61 | }
62 |
63 |
64 | bench <- function(nvars = 10, nerrors=10, method="bb", maxduration=200){
65 |
66 | init <- !file.exists(FILE)
67 | txt <- file(FILE, "at")
68 | on.exit(close(txt))
69 |
70 | errorloc <- c("begin", "end", "middle")
71 |
72 | if (nerrors >= nvars) stop("nvars cannot be less than nerrors")
73 | for (nvar in seq_len(nvars)){
74 | E <- generate_E(nvar)
75 | max_dur <- logical(nrow(generate_data(E, 1)))
76 | for (ne in seq(1, min(nerrors, nvar))){
77 | try({
78 | if (all(max_dur)) break
79 | data <- generate_data(E, ne)[!max_dur,,drop=FALSE]
80 | # only select middle
81 | data <- data[3,,drop=FALSE]
82 | errorloc_m <- errorloc[3]
83 |
84 | cat("\r nvar=", nvar, " ne=", ne, " method=", method)
85 | le <- localizeErrors(E, data, method=method, maxduration=maxduration)
86 | max_dur[!max_dur] <- le$status$maxDurationExceeded
87 | rpt <- cbind(method=method, nvar=nvar, nerrors=ne, errorloc=errorloc_m, le$status)
88 |
89 | write.table(rpt, file=txt, col.names=init, row.names=FALSE)
90 |
91 | init <- FALSE
92 | flush(txt)
93 | })
94 | gc()
95 | }
96 | }
97 | }
98 |
99 | if (file.exists(FILE)) file.remove(FILE)
100 |
101 | bench(50,10, method="mip")
102 | bench(50,10, method="bb")
103 |
104 | dat <- read.table(FILE, header=TRUE)
105 | library(ggplot2)
106 | qplot(data=dat, y=elapsed, x=nvar, color=method, facets=nerrors~method, shape=errorloc) + geom_jitter()
107 | ggsave("benchmip_categorical.png")
108 |
109 |
110 | ### quick testing
111 | # E <- generate_E(2)
112 | # generate_data(E, nerrors=1)
113 |
--------------------------------------------------------------------------------
/pkg/R/editmatrixAttr.R:
--------------------------------------------------------------------------------
1 |
2 | #' Returns the constant part \code{b} of a linear (in)equality
3 | #'
4 | #' @example ../examples/editmatrixAttr.R
5 | #' @export getb
6 | #' @seealso \code{\link{editmatrix}}
7 | #'
8 | #' @param E \code{\link{editmatrix}}
9 | #' @return \code{numeric} vector \code{b}
10 | getb <- function(E){
11 | if (!is.editmatrix(E)){
12 | stop("E has to be an editmatrix.")
13 | }
14 | E <- unclass(E)
15 | E[,ncol(E)]
16 | }
17 |
18 |
19 |
20 | #' Returns the coefficient matrix \code{A} of linear (in)equalities
21 | #'
22 | #' @example ../examples/editmatrixAttr.R
23 | #' @export getA
24 | #' @seealso \code{\link{editmatrix}}
25 | #' @aliases getA
26 | #'
27 | #' @param E \code{\link{editmatrix}}
28 | #' @return \code{numeric} matrix \code{A}
29 | getA <- function(E){
30 | if ( is.editmatrix(E) ){
31 | unclass(E)[,-ncol(E),drop=FALSE]
32 | } else {
33 | stop("E has to be an editmatrix")
34 | }
35 | }
36 |
37 |
38 | #' Returns augmented matrix representation of edit set.
39 | #'
40 | #' For a system of linear (in)equations of the form \eqn{Ax \odot b}, \eqn{\odot\in\{<,\leq,=\}},
41 | #' the matrix \eqn{A|b} is called the augmented matrix.
42 | #'
43 | #' @example ../examples/editmatrixAttr.R
44 | #' @seealso \code{\link{editmatrix}} \code{\link{as.matrix.editmatrix}}
45 | #'
46 | #' @param E \code{\link{editmatrix}}
47 | #' @return \code{numeric} matrix \code{A|b}
48 | #' @export
49 | getAb <- function(E){
50 | if (!is.editmatrix(E)) stop("E has to be an editmatrix.")
51 | unclass(E)[,,drop=FALSE]
52 | }
53 |
54 | #' Returns the operator part of a linear (in)equality \code{editmatrix} E
55 | #'
56 | #' @export
57 | #' @seealso \code{\link{editmatrix}}
58 | #'
59 | #' @example ../examples/editmatrixAttr.R
60 | #'
61 | #' @param E \code{\link{editmatrix}}
62 | #'
63 | #' @return \code{character} vector with the (in)equality operators.
64 | getOps <- function(E){
65 | if (!is.editmatrix(E)){
66 | stop("E has to be an editmatrix.")
67 | }
68 | attr(E, "ops")
69 | }
70 |
71 |
72 | #' Check if an editmatrix is normalized
73 | #'
74 | #' @export
75 | #' @seealso \code{\link{editmatrix}}
76 | #'
77 | #' @param E \code{\link{editmatrix}}
78 | #'
79 | #' @return TRUE when all comparison operators of \code{E} are in \{\code{<,<=,==}\}
80 | isNormalized <- function(E){
81 | if (!is.editmatrix(E)){
82 | stop("Argument not of class editmatrix")
83 | }
84 |
85 | attr(E, "normalized") == TRUE ||
86 | all(getOps(E) %in% c("==","<","<="))
87 | }
88 |
89 | #' Normalizes an editmatrix
90 | #'
91 | #' An set of linear edits of the form \eqn{{\bf a}\cdot{\bf x}\odot b} with
92 | #' is called normalized when all \eqn{\odot\in\{==,\leq,<\}}
93 | #'
94 | #' @export
95 | #' @seealso \code{\link{editmatrix}}
96 | #'
97 | #' @example ../examples/editmatrixAttr.R
98 | #'
99 | #' @param E \code{\link{editmatrix}}
100 | #'
101 | #' @return If E was normalized, the original editmatrix is returned, otherwise
102 | #' a new normalized editmatrix will be returned
103 | normalize <- function(E){
104 | if (!is.editmatrix(E)) stop("Argument not of class editmatrix")
105 | if (isNormalized(E)){
106 | return(E)
107 | }
108 |
109 | A <- unclass(E)
110 | ops <- getOps(E)
111 |
112 | geq <- ops == ">="
113 | gt <- ops == ">"
114 | A[geq | gt,] <- -A[geq | gt,]
115 | ops[geq] <- "<="
116 | ops[gt] <- "<"
117 |
118 | neweditmatrix(A, ops, normalized=TRUE)
119 | }
120 |
--------------------------------------------------------------------------------
/pkg/R/subsetting.R:
--------------------------------------------------------------------------------
1 |
2 | #' Row index operator for \code{editmatrix}
3 | #'
4 | #' Use this operator to select edits from an editmatrix or editarray object.
5 | #'
6 | #' @method [ editmatrix
7 | #' @param x an object of class \code{\link{editmatrix}} or \code{\link{editarray}}
8 | #' @param i the row index in the edit matrix (numeric, logical or rowname)
9 | #' @param j the column index in the edit matrix
10 | #' @param ... arguments to be passed to other methods. Currently ignored.
11 | #' @rdname subsetting
12 | #' @export
13 | `[.editmatrix` <- function(x, i, j, ...){
14 | if (!missing(i) && is.character(i) ) i <- match(i,rownames(x),nomatch=0)
15 | E <- neweditmatrix(
16 | A = as.matrix(x)[i, j, drop=FALSE],
17 | ops = getOps(x)[i]
18 | )
19 | attr(E,"H") <- attr(x,"H")[i, , drop=FALSE]
20 | attr(E,"h") <- attr(x,"h")
21 | E
22 | }
23 |
24 |
25 | #' Row index operator for \code{cateditmatrix}
26 | #'
27 | #' @method [ editarray
28 | #' @rdname subsetting
29 | #' @keywords internal
30 | #'
31 | `[.cateditmatrix` <- function(x, i, j, ...){
32 | if (!missing(i) && is.character(i) ) i <- match(i,rownames(x),nomatch=0)
33 | E <- as.editmatrix(
34 | getA(x)[i,,drop=FALSE],
35 | getb(x)[i],
36 | getOps(x)[i],
37 | binvars=attr(x,'binvars')
38 | )
39 | class(E) <- c("cateditmatrix","editmatrix")
40 | E
41 | }
42 |
43 | #' Row index operator for \code{editarray}
44 | #'
45 | #' @method [ editarray
46 | #' @rdname subsetting
47 | #' @export
48 | #'
49 | `[.editarray` <- function(x, i, j, ...){
50 | A <- getArr(x)[i,j,drop=FALSE]
51 | sep <- getSep(x)
52 | ind <- indFromArray(A,sep)
53 | H <- getH(x)
54 | if (!is.null(H)) H <- H[i,j,drop=FALSE]
55 | neweditarray(E=A, ind=ind, sep=sep, names=getnames(x)[i],levels=getlevels(x)[j],H=H)
56 | }
57 |
58 | #' Row index operator for \code{editset}
59 | #' Note: the datamodel is not changed
60 | #' @method [ editset
61 | #' @rdname subsetting
62 | #' @export
63 | `[.editset` <- function(x,i,j, ...){
64 | if (is.numeric(i) && i[1] < 0){
65 | N <- nrow(x$num) + nrow(x$mixcat)
66 | # make a negative selection positive
67 | i <- seq_len(N)[i]
68 | print(list(i=i, N=N))
69 | }
70 | if ( is.logical(i) ) i <- which(i)
71 | nnum <- nrow(x$num)
72 | mixcat <- x$mixcat[i[i>nnum]-nnum]
73 | # remove edits from mixnum not occuring in mixcat
74 | v <- getVars(reduce(mixcat))
75 | mixnum <- x$mixnum[rownames(x$mixnum) %in% v,]
76 | # remove dummy variables from mixcat not referring to numerical edits anymore
77 | v <- getVars(x,type='dummy')
78 | delvars <- v[ !v %in% rownames(mixnum)]
79 | if ( length(delvars) > 0 ){
80 | ind <- getInd(mixcat)
81 | delcols <- do.call('c',ind[delvars])
82 | Amixcat <- getArr(mixcat)[,-delcols,drop=FALSE]
83 | sep <- getSep(mixcat)
84 | ind <- indFromArray(Amixcat,sep=sep)
85 | mixcat <- neweditarray(Amixcat,ind,sep)
86 | }
87 | neweditset(
88 | num = x$num[i[i<=nnum]],
89 | mixnum = mixnum,
90 | mixcat = mixcat,
91 | condition = attr(x,"condition")
92 | )
93 | }
94 |
95 |
96 | #' Index operator for \code{editlist}
97 | #' @method [ editlist
98 | #' @rdname subsetting
99 | #' @export
100 | `[.editlist` <- function(x,i,j, ...){
101 | x <- unclass(x)[i]
102 | class(x) <- 'editlist'
103 | x
104 | }
105 |
106 |
107 |
108 |
--------------------------------------------------------------------------------
/pkg/R/adjacency.R:
--------------------------------------------------------------------------------
1 |
2 | #' Derive adjecency matrix from collection of edits
3 | #'
4 | #' A set of edits can be represented as a graph where every vertex is
5 | #' an edit. Two vertices are connected if they have at least one variable
6 | #' in \code{vars} in common.
7 | #'
8 | #' \code{adjacency} returns the adjacency matrix. The elements of the matrix
9 | #' count the number of variables shared by the edits indicated in the row- and
10 | #' column names. The adjacency matrix can be converted to an igraph object with
11 | #' \code{graph.adjacency}from the \code{igraph} package.
12 | #'
13 | #' \code{as.igraph} converts a set of edits to an \code{igraph} object directly.
14 | #'
15 | #'
16 | #' @param E \code{\link{editmatrix}}, \code{\link{editarray}} or \code{\link{editset}}
17 | #' @param nodetype adjacency between rules, vars or both?
18 | #' @param rules selection of edits
19 | #' @param vars selection of variables
20 | #' @param ... arguments to be passed to or from other methods
21 | #'
22 | #' @return the adjacency matrix of edits in \code{E} with resect to
23 | #' the variables in \code{vars}
24 | #'
25 | #' @example ../examples/graph.R
26 | #'
27 | #' @seealso \code{\link{plot.editmatrix}}, \code{\link{plot.editarray}}, \code{\link{plot.editset}}
28 | #' @export
29 | adjacency <- function(E, nodetype=c("all", "rules","vars"), rules=rownames(E), vars=getVars(E),...){
30 | stopifnot( all(vars %in% getVars(E)) )
31 | UseMethod('adjacency')
32 | }
33 |
34 | #' @rdname adjacency
35 | #' @method adjacency editmatrix
36 | #' @export
37 | adjacency.editmatrix <- function(E, nodetype=c("all", "rules","vars"), rules=rownames(E), vars=getVars(E),...){
38 | A <- abs(sign(getA(E)))
39 | nodetype <- match.arg(nodetype)
40 | adjec(A,nodetype=nodetype, rules=rules, vars=vars)
41 | }
42 |
43 |
44 | #' @rdname adjacency
45 | #' @method adjacency editarray
46 | #' @export
47 | adjacency.editarray <- function(E, nodetype=c("all", "rules","vars"), rules=rownames(E), vars=getVars(E),...){
48 | A <- contains(E)
49 | nodetype <- match.arg(nodetype)
50 | adjec(A,nodetype=nodetype, rules=rules, vars=vars)
51 | }
52 |
53 |
54 | #' @rdname adjacency
55 | #' @method adjacency editset
56 | #' @export
57 | adjacency.editset <- function(E, nodetype=c("all", "rules","vars"), rules=c(rownames(E$num),rownames(E$mixcat)), vars=getVars(E),...){
58 | A <- contains(E)
59 | nodetype <- match.arg(nodetype)
60 | adjec(A,nodetype=nodetype, rules=rules, vars=vars)
61 | }
62 |
63 |
64 |
65 |
66 |
67 | # derive adjacency from 1/0 or boolean matrix.
68 | # Internal loops only, but nrow(A)^2 memory complexity.
69 | # future optimization options: sparse matrices, lower/upper triangle only.
70 | adjec <- function(A, nodetype="all", rules=rownames(A), vars=colnames(A)){
71 | A <- A[rules, vars, drop=FALSE]
72 | m <- NULL
73 | vars <- NULL
74 | if (nodetype=="all"){
75 | N <- nrow(A) + ncol(A)
76 | nms <- c(rownames(A), colnames(A))
77 | vars <- rep(c(FALSE, TRUE), times=c(nrow(A), ncol(A)))
78 | m <- matrix(0, nrow=N, ncol=N, dimnames=list(nms, nms))
79 | m[!vars, vars] <- A
80 | m[vars, !vars] <- t(A)
81 | } else{
82 | vars <- rep(FALSE, nrow(A))
83 | if (nodetype=="vars"){
84 | vars <- rep(TRUE, ncol(A))
85 | A <- t(A)
86 | }
87 | I <- rep(1:nrow(A), times=nrow(A))
88 | J <- rep(1:nrow(A), each=nrow(A))
89 | m <- matrix(
90 | rowSums(A[I,,drop=FALSE] & A[J,,drop=FALSE]),
91 | nrow=nrow(A),
92 | dimnames=list(rownames(A), rownames=rownames(A))
93 | )
94 | diag(m) <- 0
95 | }
96 | attr(m,"vars") <- vars
97 | m
98 | }
99 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testErrorLocalizer.R:
--------------------------------------------------------------------------------
1 | require(testthat)
2 | context("Error Localization: numerical data")
3 |
4 | test_that("errorLocalizer for numerical data",{
5 | bt <- errorLocalizer(
6 | E = editmatrix("x + y == z"),
7 | x = c(x=1,y=1,z=2))
8 | expect_true( all(bt$searchNext()$adapt==c(FALSE,FALSE,FALSE)) )
9 | expect_true( is.null(bt$searchNext()))
10 | expect_true(1 == errorLocalizer(editmatrix("x+y==z"),c(x=1,y=1,z=3))$searchNext()$w)
11 | expect_true(1 == errorLocalizer(editmatrix("x+y==z"),c(x=1,y=1,z=3,u=5))$searchNext()$w)
12 | expect_true(is.null(errorLocalizer(editmatrix("x+y==z"),c(x=1,y=1,z=3))$searchNext(maxduration=-1)) )
13 | expect_true(is.null(errorLocalizer(editmatrix("x+y==z"),c(x=1,y=1,z=3),maxadapt=0)$searchNext()) )
14 | expect_true(is.null(errorLocalizer(editmatrix("x+y==z"),c(x=1,y=1,z=3),maxweight=0)$searchNext()) )
15 | expect_that(errorLocalizer(editmatrix("x+y==z"),c(x=1,y=NA,z=3),weight=c(1,NA,1))$searchNext(),throws_error())
16 | })
17 |
18 | test_that("weight calculation when checkDatamodel is activated",{
19 | expect_equal(
20 | localizeErrors(
21 | editmatrix(expression(x+y+z==w,x>0)),
22 | data.frame(x=-1,y=1,z=0,w=0)
23 | )$status$weight, 2
24 | )
25 | expect_equal(
26 | localizeErrors(
27 | editmatrix(expression(x+y+z==w,x>0)),
28 | data.frame(x=-1,y=1,z=0,w=0),
29 | method='mip'
30 | )$status$weight, 2
31 | )
32 |
33 | })
34 |
35 |
36 |
37 | #d <- "../../../pkg/R"
38 | #for ( b in file.path(d,dir(d)) ) dmp <- source(b,echo=FALSE)
39 |
40 | context("Error localization: categorical data")
41 | test_that("errorLocalizer for categorical data",{
42 | E <- editarray(c(
43 | "positionInHouseHold %in% c('marriage partner','child','other')",
44 | "age %in% c('under aged','adult')",
45 | "maritalStatus %in% c('unmarried','married','widowed')",
46 | "if (age == 'under aged') maritalStatus == 'unmarried'",
47 | "if (maritalStatus != 'unmarried' ) !positionInHouseHold %in% c('child','marriage partner')"
48 | ))
49 | r <- c(age='under aged',maritalStatus='married',positionInHouseHold='child')
50 | e <- errorLocalizer(E,r)
51 | expect_equivalent(e$searchBest()$adapt,c(FALSE,TRUE,FALSE))
52 | expect_true(e$degeneracy==1)
53 | e$reset()
54 | expect_true(e$searchBest()$w==1)
55 | expect_true(is.null(e$searchNext()))
56 | expect_true(is.null(errorLocalizer(E,r,maxweight=0)$searchBest()) )
57 | expect_true(is.null(errorLocalizer(E,r,maxadapt=0)$searchBest()) )
58 | expect_true(is.null(errorLocalizer(E,r)$searchBest(maxduration=-1)) )
59 | expect_that(errorLocalizer(E,r,weight=c(1,NA,1)),throws_error())
60 | })
61 |
62 | test_that("errorlocalizer.editarray runs when blocks reduces the datamodel",{
63 | # Thanks to Bikram Maharjan for reporting a bug in version 2.0-1
64 | ageGrps=rep(1,5)
65 | genderIndex=rep(1,5)
66 | icdIndex=rep(103,5)
67 | diagnosisIndex=c('TOT', 'A00', 'A02', 'C53','C54')
68 |
69 |
70 | e.diag=editarray(c(
71 | "ageGrps %in% c(1,2,3,4)",
72 | "genderIndex %in% c(0,1,2)",
73 | "icdIndex %in% c(103)",
74 | "diagnosisIndex %in% c('TOT', 'A00', 'A02', 'C53','C54')",
75 | "if(genderIndex=='1' & icdIndex=='103') diagnosisIndex %in% c('C53','C54')"
76 | ))
77 | dat.diag=data.frame(ageGrps,genderIndex,icdIndex,diagnosisIndex)
78 | err=localizeErrors(e.diag,dat.diag)
79 | })
80 |
81 | test_that("if-else edits are parsed to MIP correctly",{
82 | e <- editset("if (x > 0 ) y> 0")
83 | d <- data.frame(x = 1,y=0)
84 | localizeErrors(e,d,method='mip')
85 | })
86 |
87 |
88 |
89 |
90 |
--------------------------------------------------------------------------------
/pkg/R/checkDatamodel.R:
--------------------------------------------------------------------------------
1 | #' Check data against a datamodel
2 | #'
3 | #' Categorical variables in \code{dat} which also occur in \code{E} are checked against the datamodel for
4 | #' those variables. Numerical variables are checked against edits in \code{E} that contain only a single
5 | #' variable (e.g. \eqn{x > 0}). Values violating such edits as well as empty values are set to adapt.
6 | #'
7 | #' @param E an object of class \code{\link{editset}}, \code{\link{editarray}}, or \code{\link{editmatrix}}
8 | #' @param dat a \code{data.frame}
9 | #' @param weight vector of weigths for every variable of \code{dat} or an array of weight of the same dimensions as \code{dat}.
10 | #' @param ... arguments to be passed to or from other methods
11 | #'
12 | #' @return An object of class \code{\link{errorLocation}}.
13 | #' @seealso \code{\link{errorLocation}}, \code{\link{localizeErrors}}.
14 | #' @export
15 | checkDatamodel <- function(E, dat,weight=rep(1,ncol(dat)), ...){
16 | UseMethod('checkDatamodel')
17 | }
18 |
19 | #
20 | #
21 | #' @method checkDatamodel editmatrix
22 | #' @export
23 | #' @keywords internal
24 | checkDatamodel.editmatrix <- function(E, dat, weight=rep(1,ncol(dat)), call=sys.call(), ...){
25 | if (nrow(E)==0) return(emptyerrorlocation(dat ,method="checkDatamodel",call=call,... ))
26 | I <- rowSums( abs(getA(E)) > 1e-8 ) == 1
27 | localize_singleton(E[I,], dat, weight, method="checkDatamodel", call=call, ... )
28 | }
29 |
30 | # Check categorical data against datamodel
31 | #
32 | #
33 |
34 | #' @method checkDatamodel editarray
35 | #' @keywords internal
36 | #' @export
37 | checkDatamodel.editarray <- function(E, dat, weight=rep(1,ncol(dat)), ...){
38 | if (any(! (getVars(E) %in% names(dat)) ) ){
39 | vr <- paste(getVars(E)[!getVars(E) %in% names(dat)],collapse=', ')
40 | stop(paste('Variables',vr,'defined in E not present in dat'))
41 | }
42 |
43 | m <- nrow(dat)
44 | if ( is.vector(weight) || (is.array(weight) && nrow(weight)==1) ){
45 | weight <- t(array(rep(weight,m),dim=c(ncol(dat),m)))
46 | dimnames(weight) <- dimnames(dat)
47 | }
48 | I <- names(dat)[names(dat) %in% getVars(E)]
49 | adapt <- array(FALSE,dim=dim(dat),dimnames=dimnames(dat))
50 | ind <- getInd(E)
51 |
52 | w = rep(0,m)
53 | for ( ii in I ){
54 | J <- !(dat[,ii] %in% names(ind[[ii]]))
55 | adapt[,ii] <- J
56 | w[J] <- w[J] + weight[J,ii]
57 | }
58 | status <- emptyStatus(n=m)
59 | status$weight <- w
60 | newerrorlocation(
61 | adapt = adapt,
62 | status = status,
63 | method = 'checkDatamodel',
64 | )
65 | }
66 |
67 |
68 | #
69 | # For an \code{\link{editset}}, the categorical variables are tested against
70 | # the categorical datamodel.
71 | #
72 | #
73 |
74 | #' @method checkDatamodel editset
75 | #' @export
76 | #' @keywords internal
77 | checkDatamodel.editset <- function(E, dat, weight=rep(1,length(getVars(E))), ...){
78 | if ( is.null(names(weight)) ) names(weight) <- getVars(E)
79 | et <- editType(E)
80 | err <- NULL
81 | if ( ncol(E$mixcat) > 0 ){
82 | dvar <- getVars(E,type="dummy")
83 | iv <- -(1:ncol(E$mixcat))
84 | if ( length(dvar) > 0 ) iv <- unlist(getInd(E$mixcat)[dvar])
85 | A <- getArr(E$mixcat)[,-iv,drop=FALSE]
86 | sep <- getSep(E$mixcat)
87 | F <- neweditarray(
88 | A,
89 | indFromArray(A,sep),
90 | sep
91 | )
92 | err <- checkDatamodel.editarray(F,dat,weight)
93 | }
94 | G <- reduce(E$num)
95 | err %+% checkDatamodel.editmatrix(G,dat,weight)
96 | }
97 |
98 |
99 |
100 |
101 |
102 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testCheckDatamodel.R:
--------------------------------------------------------------------------------
1 |
2 | context('Check datamodel')
3 |
4 | test_that("checkDatamodel.editmatrix works",{
5 | expect_true(
6 | checkDatamodel(
7 | editmatrix("x > 0"),
8 | data.frame(x=-1)
9 | )$adapt[1,1]
10 | )
11 | # test with NA
12 | expect_true(
13 | checkDatamodel(
14 | editmatrix("x > 0"),
15 | data.frame(x=NA)
16 | )$adapt[1,1]
17 | )
18 | # test with valid entry
19 | expect_false(
20 | checkDatamodel(
21 | editmatrix("x < 0"),
22 | data.frame(x=-1)
23 | )$adapt[1,1]
24 | )
25 | # test with no single-variable edits
26 | expect_false(
27 | checkDatamodel(
28 | editmatrix("x + y == 1"),
29 | data.frame(x=-1,y=2)
30 | )$adapt[1,1]
31 | )
32 |
33 | })
34 |
35 |
36 | test_that('checkDatamodel.editarray works',{
37 | # dat has column also in E
38 | expect_equivalent(
39 | checkDatamodel(
40 | E = editarray('x %in% 1:2'),
41 | dat = data.frame(x=1:3)
42 | )$adapt[,1],
43 | c(FALSE,FALSE,TRUE)
44 | )
45 | # dat has column not specified by E
46 | expect_equivalent(
47 | checkDatamodel(
48 | E = editarray('x %in% 1:2'),
49 | dat = data.frame(y=1:3,x=1:3)
50 | )$adapt[,1],
51 | c(FALSE,FALSE,FALSE)
52 | )
53 | # dat misses a variable, specified in E
54 | expect_error(
55 | checkDatamodel(
56 | E = editarray('x %in% 1:2'),
57 | dat = data.frame(y=1:3)
58 | )
59 | )
60 | # dat computes correct weights
61 | expect_equivalent(
62 | checkDatamodel(
63 | E = editarray(c('x %in% 1:2','y %in% c("a","b")')),
64 | dat = data.frame(x=1:4,y=c('a','c','b','c'))
65 | )$status$weight,
66 | c(0,1,1,2)
67 | )
68 | })
69 |
70 |
71 | test_that("checkDatamodel.editset works with pure numerical edits",{
72 | v <- checkDatamodel(
73 | editset(expression(
74 | x > 0,
75 | x + y == 3
76 | )),
77 | data.frame(x=c(-1,2),y=c(1,1))
78 | )
79 | expect_equivalent(v$adapt,array(c(TRUE,FALSE,FALSE,FALSE),dim=c(2,2)))
80 | })
81 |
82 |
83 | test_that("checkDatamodel.editset works with pure categorical edits",{
84 | v <- checkDatamodel(
85 | editset(expression(
86 | A %in% letters[1:3],
87 | B %in% 1:3
88 | )),
89 | data.frame(A=c('q','c'),B=c(1,10))
90 | )
91 | expect_equivalent(v$adapt,array(c(TRUE,FALSE,FALSE,TRUE),dim=c(2,2)))
92 | })
93 |
94 |
95 |
96 | test_that("checkDatamodel.editset works with conditional numerical edits",{
97 | v <- checkDatamodel(
98 | editset(expression(
99 | x + y == 3,
100 | x > 0,
101 | if ( x > 2 ) y < 1,
102 | v %in% letters[1:3])),
103 | data.frame(
104 | x = c(-1,3),
105 | y = c(0, 1),
106 | v = c("a","out-of-range")
107 | ))
108 | expect_equivalent(
109 | v$adapt,
110 | array(c(TRUE,FALSE,FALSE,FALSE,FALSE,TRUE),dim=c(2,3))
111 | )
112 | })
113 |
114 |
115 | test_that("checkDatamodel.editset works with conditional categorical/numerical edits",{
116 | v <- checkDatamodel(
117 | editset(expression(
118 | x + y == 3,
119 | x > 0,
120 | v %in% letters[1:3],
121 | if ( v == 'a' ) y > 0
122 | )),
123 | data.frame(
124 | x = c(-1,3),
125 | y = c(0, 1),
126 | v = c("a","out-of-range")
127 | ))
128 |
129 | })
130 |
131 |
132 |
133 |
134 |
--------------------------------------------------------------------------------
/examples/localizeErrors.R:
--------------------------------------------------------------------------------
1 |
2 | # an editmatrix and some data:
3 | E <- editmatrix(c(
4 | "x + y == z",
5 | "x > 0",
6 | "y > 0",
7 | "z > 0"))
8 |
9 | dat <- data.frame(
10 | x = c(1,-1,1),
11 | y = c(-1,1,1),
12 | z = c(2,0,2))
13 |
14 | # localize all errors in the data
15 | err <- localizeErrors(E,dat)
16 |
17 | summary(err)
18 |
19 | # what has to be adapted:
20 | err$adapt
21 | # weight, number of equivalent solutions, timings,
22 | err$status
23 |
24 |
25 | ## Not run
26 |
27 | # Demonstration of verbose processing
28 | # construct 2-block editmatrix
29 | F <- editmatrix(c(
30 | "x + y == z",
31 | "x > 0",
32 | "y > 0",
33 | "z > 0",
34 | "w > 10"))
35 | # Using 'dat' as defined above, generate some extra records
36 | dd <- dat
37 | for ( i in 1:5 ) dd <- rbind(dd,dd)
38 | dd$w <- sample(12,nrow(dd),replace=TRUE)
39 |
40 | # localize errors verbosely
41 | (err <- localizeErrors(F,dd,verbose=TRUE))
42 |
43 | # printing is cut off, use summary for an overview
44 | summary(err)
45 |
46 | # or plot (not very informative in this artificial example)
47 | plot(err)
48 |
49 | ## End(Not run)
50 |
51 | for ( d in dir("../pkg/R",full.names=TRUE)) dmp <- source(d)
52 | # Example with different weights for each record
53 | E <- editmatrix('x + y == z')
54 | dat <- data.frame(
55 | x = c(1,1),
56 | y = c(1,1),
57 | z = c(1,1))
58 |
59 | # At equal weights, both records have three solutions (degeneracy): adapt x, y
60 | # or z:
61 | localizeErrors(E,dat)$status
62 |
63 | # Set different weights per record (lower weight means lower reliability):
64 | w <- matrix(c(
65 | 1,2,2,
66 | 2,2,1),nrow=2,byrow=TRUE)
67 |
68 | localizeErrors(E,dat,weight=w)
69 |
70 |
71 | # an example with categorical variables
72 | E <- editarray(expression(
73 | age %in% c('under aged','adult'),
74 | maritalStatus %in% c('unmarried','married','widowed','divorced'),
75 | positionInHousehold %in% c('marriage partner', 'child', 'other'),
76 | if( age == 'under aged' ) maritalStatus == 'unmarried',
77 | if( maritalStatus %in% c('married','widowed','divorced'))
78 | !positionInHousehold %in% c('marriage partner','child')
79 | )
80 | )
81 | E
82 |
83 | #
84 | dat <- data.frame(
85 | age = c('under aged','adult','adult' ),
86 | maritalStatus=c('married','unmarried','widowed' ),
87 | positionInHousehold=c('child','other','marriage partner')
88 | )
89 | dat
90 | localizeErrors(E,dat)
91 | # the last record of dat has 2 degenerate solutions. Running the last command
92 | # a few times demonstrates that one of those solutions is chosen at random.
93 |
94 | # Increasing the weight of 'positionInHousehold' for example, makes the best
95 | # solution unique again
96 | localizeErrors(E,dat,weight=c(1,1,2))
97 |
98 |
99 | # an example with mixed data:
100 |
101 | E <- editset(expression(
102 | x + y == z,
103 | 2*u + 0.5*v == 3*w,
104 | w >= 0,
105 | if ( x > 0 ) y > 0,
106 | x >= 0,
107 | y >= 0,
108 | z >= 0,
109 | A %in% letters[1:4],
110 | B %in% letters[1:4],
111 | C %in% c(TRUE,FALSE),
112 | D %in% letters[5:8],
113 | if ( A %in% c('a','b') ) y > 0,
114 | if ( A == 'c' ) B %in% letters[1:3],
115 | if ( !C == TRUE) D %in% c('e','f')
116 | ))
117 |
118 | set.seed(1)
119 | dat <- data.frame(
120 | x = sample(-1:8),
121 | y = sample(-1:8),
122 | z = sample(10),
123 | u = sample(-1:8),
124 | v = sample(-1:8),
125 | w = sample(10),
126 | A = sample(letters[1:4],10,replace=TRUE),
127 | B = sample(letters[1:4],10,replace=TRUE),
128 | C = sample(c(TRUE,FALSE),10,replace=TRUE),
129 | D = sample(letters[5:9],10,replace=TRUE),
130 | stringsAsFactors=FALSE
131 | )
132 |
133 | (el <-localizeErrors(E,dat,verbose=TRUE))
134 |
135 |
136 |
137 |
138 |
139 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testEditset.R:
--------------------------------------------------------------------------------
1 |
2 | context("Editset")
3 | test_that("editset parses categorical edits",{
4 |
5 | v <- expression(
6 | A %in% c('a','b'),
7 | B %in% c('c','d'),
8 | if ( A == 'a') B == 'c'
9 | )
10 | E <- editset(v)
11 | expect_equal(E$num,editmatrix(expression()))
12 | expect_equal(E$mixnum,editmatrix(expression()))
13 | expect_equal(E$mixcat,editarray(v))
14 | })
15 |
16 | test_that("editset parses numerical edits",{
17 | v <- expression(x + y == z, 2*x -u == v)
18 | E <- editset(v)
19 | expect_equal(E$num,editmatrix(v))
20 | expect_equal(E$mixnum,editmatrix(expression()))
21 | expect_equal(E$mixcat,editarray(expression()))
22 | })
23 |
24 | test_that("editset parses conditional numeric edits",{
25 | # test 1: inequalities
26 | v <- expression( if ( x > 0 ) y > 0 )
27 | E <- editset(v)
28 | expect_equal(E$num, editmatrix(expression()))
29 | expect_equivalent(E$mixnum, editmatrix(expression(x>0,y<=0)))
30 | expect_equivalent(getArr(E$mixcat),array(c(F,T,F,T),dim=c(1,4)))
31 |
32 | # test 2: with equality in if-statement
33 |
34 | v <- expression( if ( x >= 0 ) y >= 0)
35 | E <- editset(v)
36 | expect_equal(E$num, editmatrix(expression()))
37 | expect_equivalent(E$mixnum,editmatrix(expression(x>=0,y<0)))
38 | expect_equivalent(getArr(E$mixcat), array(c(F,T,F,T),dim=c(1,4)))
39 |
40 | })
41 |
42 | test_that("editset parses conditional categorical/numerical edits",{
43 | # test 1: numerical statement in 'then' clause
44 | v <- expression(
45 | A %in% letters[1:2],
46 | B %in% letters[3:4],
47 | if ( A == 'a' ) x > 0
48 | )
49 | E <- editset(v)
50 | expect_equal(E$num, editmatrix(expression()))
51 | expect_equivalent(E$mixnum, editmatrix(expression(x<=0)))
52 | expect_equal(dim(E$mixcat),c(1,6))
53 | expect_equivalent(getArr(E$mixcat),array(c(T,F,T,T,F,T),dim=c(1,6)))
54 |
55 | # test 2: numerical statement in 'then' clause
56 | v <- expression(
57 | A %in% letters[1:2],
58 | B %in% letters[3:4],
59 | if ( x > 0 ) A == 'a'
60 | )
61 | E <- editset(v)
62 | expect_equal(E$num, editmatrix(expression()))
63 | expect_equivalent(E$mixnum, editmatrix(expression(x>0)))
64 | expect_equivalent(getArr(E$mixcat), array(c(F,T,T,T,F,T),dim=c(1,6)))
65 |
66 | # throws exception in editrules 2.8.0 (thanks to Alois Haslinger)
67 | editset(expression(
68 | x %in% letters[1:2]
69 | , y %in% letters[3:5]
70 | , z > 0
71 | , if(x =='a' && y == 'c') z < 7
72 | ))
73 |
74 | })
75 |
76 |
77 |
78 | ## various editset functionalities
79 | test_that("contains finds the right variables in an editset",{
80 | E <- editset(expression(
81 | x + y == z,
82 | if ( s + t >= 6 ) x < 10,
83 | A %in% letters[1:2],
84 | if ( A == 'a' ) x > 3
85 | ))
86 | expect_equivalent(
87 | contains(E),
88 | matrix(c(
89 | T,T,T,F,F,F,
90 | T,F,F,T,T,F,
91 | T,F,F,F,F,T),
92 | byrow=TRUE,
93 | nrow=3)
94 | )
95 |
96 | })
97 |
98 | test_that("as.editset for cateditmatrix works",{
99 | E <- cateditmatrix(expression(
100 | gender %in% c("male", "female")
101 | , if (pregnant) gender == "female"
102 | ))
103 |
104 | as.editset(E)
105 | })
106 |
107 |
108 | test_that("simple mixed edit without coefficients is recognized",{
109 | E <- editset(expression(if ( A %in% c('a','b') ) x > 0 ))
110 | expect_equal(nedits(E),1)
111 | expect_equal(length(getVars(E,type="dummy")),1)
112 | })
113 |
114 |
115 | test_that("Mixed parsing edit containing brackets works",{
116 | E <- editset("if ((x > 0) && (y < 0)) z < y")
117 | })
118 |
119 |
120 |
121 |
122 |
--------------------------------------------------------------------------------
/papers/pic.R:
--------------------------------------------------------------------------------
1 | # Generate all pictures for the vignettes.
2 | # mvdl 08.07.2011
3 |
4 | diamondFile <- "diamond.pdf"
5 | twodiamondFile <- "twodiamond.pdf"
6 |
7 | ## plot parameters
8 | linewidth = 2
9 | polygoncolor = "#E3E3E3"
10 | textcex = 2
11 |
12 | plotPoly <- function(x,y){
13 | polygon(x,y,lwd=linewidth,col=polygoncolor)
14 | }
15 |
16 | ###############################################################################
17 | # DIAMOND AREA
18 | ###############################################################################
19 |
20 | # Allowed area is the diamond
21 | # {(x,y) in R : y <= x+1, y>=-x+3, y>=x-1, y<=-x+5 }
22 | x <- array(c(
23 | 1,4,
24 | 0,3,
25 | 0,3,
26 | 1,4), dim=c(2,4)
27 | )
28 | y <- array(0,dim=c(2,4))
29 | y[,1] <- x[,1] - 1 # e1
30 | y[,2] <- -x[,2] + 3 # e2
31 | y[,3] <- x[,3] + 1 # e3
32 | y[,4] <- -x[,4] +5 # e4
33 |
34 |
35 | pdf(diamondFile)
36 | plot(x[,1],y[,1],
37 | xlim=c(-1,4.5),
38 | ylim=c(-1,4.5),
39 | 'l',
40 | lwd=linewidth,
41 | xlab="x",
42 | ylab="y",
43 | cex.lab=2,
44 | cex.axis=2,
45 | )
46 |
47 | for ( i in 2:4 ){
48 | lines(x[,i],y[,i],
49 | lwd=linewidth
50 | )
51 | }
52 | text(2.7,1.3,expression(e[1]),cex=textcex)
53 | text(1.3,1.3,expression(e[2]),cex=textcex)
54 | text(1.3,2.7,expression(e[3]),cex=textcex)
55 | text(2.7,2.7,expression(e[4]),cex=textcex)
56 |
57 | # fill up diamond
58 | xPoints <- c(2,3,2,1)
59 | yPoints <- c(1,2,3,2)
60 | plotPoly(xPoints,yPoints)
61 |
62 | # record 1
63 | points(2,-1,pch=19,cex=textcex)
64 | # record 2
65 | points(0,0,pch=19,cex=textcex)
66 | # record 3
67 | points(-1,2,pch=19,cex=textcex)
68 |
69 | arrows(
70 | x0=c( 2.0,0.2,0.2,-0.8),
71 | y0=c(-0.8,0.2,0.2, 2.0),
72 | x1=c( 2.0,0.85,1.6, 0.6),
73 | y1=c( 0.6,1.65,0.85, 2.0),
74 | lwd=linewidth,lty=c(1,2,2,1))
75 |
76 | # solution sets
77 | lines(
78 | x = c(2,2),
79 | y = c(1,3))
80 | lines(
81 | x = c(1,3),
82 | y = c(2,2))
83 |
84 | # labels
85 | text(1.3,-1,"(2,-1)",cex=textcex)
86 | text(0,-0.4,"(0,0)",cex=textcex)
87 | text(-1,2.7,"(-1,2)",srt=90,cex=textcex)
88 | dev.off()
89 |
90 | ###############################################################################
91 | # nonconvex example with two diamonds
92 | ###############################################################################
93 |
94 |
95 | xleft <- c(-2,-3,-2,-1)
96 | yleft <- c( 1, 2, 3, 2)
97 |
98 | xright <- c(2,1,2,3)
99 | yright <- c(2,3,4,3)
100 |
101 | pdf(twodiamondFile)
102 | plot(0,0,xlim=c(-3,3),ylim=c(0,4),
103 | col="white",
104 | xlab="x",ylab="y",
105 | cex.lab=textcex,
106 | cex.axis=textcex)
107 | #grid()
108 | plotPoly(xleft,yleft)
109 | plotPoly(xright,yright)
110 |
111 | # record 1
112 | points(1.5,1.5,pch=19,cex=textcex)
113 | # record 2
114 | points(0,2.5,pch=19,cex=textcex)
115 | # record 2
116 | points(2,0,pch=19,cex=textcex)
117 |
118 | arrows(
119 | x0 = c( 1.3, 1.5,-0.2, 0.2, 2.0),
120 | y0 = c( 1.5, 1.7, 2.5, 2.5, 0.2),
121 | x1 = c(-1.3, 1.5,-1.3, 1.3, 2.0),
122 | y1 = c( 1.5, 2.3, 2.5, 2.5, 1.8),
123 | lwd=linewidth
124 | )
125 |
126 | # solution sets
127 | lines(
128 | x=c(-1.5,-2.5),
129 | y=c(2.5,2.5))
130 | lines(
131 | x=c(1.5,1.5),
132 | y=c(2.5,3.5))
133 | lines(
134 | x=c(1.5,2.5),
135 | y=c(2.5,2.5))
136 | lines(
137 | x=c(-1.5,-2.5),
138 | y=c(1.5,1.5))
139 | lines(
140 | x=c(2,2),
141 | y=c(2,4))
142 |
143 | # labels
144 | text(0.9,1.2,"(3/2,3/2)", cex=textcex)
145 | text(0.0,2.2,"(0,5/2)", cex=textcex)
146 | text(1.4,0.0,"(2,0)", cex=textcex)
147 | dev.off()
148 |
--------------------------------------------------------------------------------
/pkg/R/print.R:
--------------------------------------------------------------------------------
1 |
2 | #' print editarray
3 | #'
4 | #' @method print editarray
5 | #' @param x an \code{\link{editarray}}
6 | #' @param textOnly If \code{FALSE}, also print internal structure
7 | #' @param ... arguments to be passed to or from other methods.
8 | #' @keywords internal
9 | #' @export
10 | print.editarray <- function(x, textOnly=FALSE, ...){
11 | d <- datamodel(x)
12 | A <- getArr(x)
13 | if ( ncol(A) > 0 ){
14 | cn <- paste(abbreviate(d$variable),":",abbreviate(d$value),sep="")
15 | colnames(A) <- cn
16 | }
17 | if( !textOnly ){
18 | print(A)
19 | cat("Edit array:\n")
20 | }
21 | cat("\nEdit rules:\n")
22 | desc <- attr(x,'description')
23 | if ( is.null(desc) ){
24 | desc <- rep("",nrow(x))
25 | } else {
26 | desc <- paste('[',desc,']')
27 | }
28 | u <- as.character(x)
29 | nm <- names(u)
30 | pr <- paste(format(nm,width=max(nchar(nm))),':',paste(u,desc),collapse='\n')
31 | cat(pr,'\n')
32 | }
33 |
34 |
35 |
36 | #' print editmatrix
37 | #'
38 | #' @export
39 | #' @method print editmatrix
40 | #'
41 | #' @param x editmatrix object to be printed
42 | #' @param textOnly If \code{FALSE}, also print internal structure
43 | #' @param ... further arguments passed to or from other methods.
44 | #' @keywords internal
45 | print.editmatrix <- function(x, textOnly=FALSE, ...){
46 | if (!textOnly){
47 | cat("Edit matrix:\n")
48 | print(toDataFrame(x), ...)
49 | cat("\nEdit rules:\n")
50 | }
51 | desc <- attr(x,'description')
52 | if ( is.null(desc) ){
53 | desc <- rep("",nrow(x))
54 | } else {
55 | desc <- paste('[',desc,']')
56 | }
57 | u <- as.character(x)
58 | nm <- names(u)
59 | pr <- paste(format(nm,width=max(nchar(nm))),':', paste(u,desc), collapse='\n')
60 | cat(pr,'\n')
61 | }
62 |
63 | #' print cateditmatrix
64 | #'
65 | #' @export
66 | #' @method print cateditmatrix
67 | #'
68 | #' @param x cateditmatrix object to be printed
69 | #' @param ... further arguments passed to or from other methods.
70 | #' @keywords internal
71 | print.cateditmatrix <- function(x, textOnly=TRUE, ...){
72 | if (!textOnly) {
73 | cat("Edit matrix:\n")
74 | print(as.data.frame(x), ...)
75 | cat("\nEdit rules:\n")
76 | }
77 | desc <- attr(x,'description')
78 | if ( is.null(desc) ){
79 | desc <- rep("",nrow(x))
80 | } else {
81 | desc <- paste('[',desc,']')
82 | }
83 | u <- as.character(x, asIfStatement=TRUE)
84 | nm <- names(u)
85 | pr <- paste(format(nm,width=max(nchar(nm))),':',paste(u,desc),collapse='\n')
86 | cat(pr,'\n')
87 | }
88 |
89 | #' print editset
90 | #'
91 | #' @export
92 | #' @method print editset
93 | #'
94 | #' @param x editset object to be printed
95 | #' @param ... further arguments passed to or from other methods.
96 | #' @keywords internal
97 | print.editset <- function(x, ...){
98 | u <- as.character(x,datamodel=FALSE)
99 | v <- as.character(x,datamodel=TRUE)
100 | cnd <- attr(x,'condition')
101 | if ( nedits(cnd) > 0 ){
102 | cat('conditions:\n')
103 | cat(paste(cnd,collapse=', '),'\n')
104 | }
105 | v <- v[! v%in% u]
106 | if ( length(v) > 0 ) cat("\nData model:\n")
107 | if ( length(v)>0 ){
108 | nm <- names(v)
109 | cat(paste(format(nm,width=max(nchar(nm))),':',v,collapse='\n'),'\n')
110 | }
111 | cat("\nEdit set:\n")
112 | nm <- names(u)
113 | cat(paste(format(nm,width=max(nchar(nm))),':',u,collapse='\n'),'\n')
114 | }
115 |
116 |
117 | #' print editset
118 | #'
119 | #' @export
120 | #' @method print editlist
121 | #'
122 | #' @param x editset object to be printed
123 | #' @param ... further arguments passed to or from other methods.
124 | #' @keywords internal
125 | print.editlist <- function(x, ...){
126 | cat("editsets:\n")
127 | j <- 0
128 | lapply(x,function(i) {j <<- j+1;cat("\nSet",j," ");print(i)}, ...)
129 | }
130 |
131 |
132 |
133 |
134 |
--------------------------------------------------------------------------------
/pkg/R/generateEdits.R:
--------------------------------------------------------------------------------
1 | #' Field code forest algorithm
2 | #'
3 | #' Workhorse function for \code{\link{generateEdits}}
4 | #'
5 | #' @param E an editarray
6 | #' @param totreat variable names still to be eliminated from E
7 | #' @param env an environment where all editmatrices will be stored
8 | #'
9 | #' @seealso \code{\link{generateEdits}}, \code{\link{editarray}}
10 | #'
11 | #' @example ../examples/generateEdits.R
12 | #' @keywords internal
13 | fcf.env <- function(E,totreat,env){
14 | # add current edits to collection
15 | if (nrow(E)>0){
16 | env$i <- env$i + 1
17 | U <- c(env$E,E)
18 | env$E <- U[!isSubset(U),]
19 | } else {
20 | # return if there are no more edits
21 | return()
22 | }
23 |
24 | # divide and conquer
25 | B <- blocks(E)
26 | for ( b in B){
27 | # variables to be treated in the block
28 | totreatb <- totreat[names(totreat) %in% getVars(b)]
29 | vrs <- names(totreatb)[totreatb]
30 | # variables which cannot be resolved need not be treated (prune the tree)
31 | totreatb[!resolves(b,vrs)] <- FALSE
32 | vrs <- names(totreatb)[totreatb]
33 | # order variables so the most connected variables are eliminated first,
34 | # eliminate variables and recurse.
35 | vrs <- vrs[order(colSums(contains(b,vrs)),decreasing=TRUE)]
36 | for ( k in vrs ){
37 | totreatb[k] <- FALSE
38 | fcf.env(reduce(eliminate(b,k)),totreatb[-which(names(totreatb)==k)],env)
39 | }
40 | }
41 | }
42 |
43 |
44 |
45 | #' Derive all essentially new implicit edits
46 | #'
47 | #' Implements the Field Code Forest (FCF) algorithm of Garfinkel et al (1986) to
48 | #' derive all essentially new implicit edits from an editarray. The FCF is really
49 | #' a single, highly unbalanced tree. This algorithm traverses the tree, pruning many
50 | #' unnecessary branches, uses \code{\link{blocks}} to divide and conquer, and
51 | #' optimizes traversing order. See Van der Loo (2012) for a description
52 | #' of the algorithms.
53 | #'
54 | #' @param E An \code{\link{editarray}}
55 | #' @return A 3-element named \code{list}, where element \code{E} is an \code{\link{editarray}} containing all
56 | #' generated edits. \code{nodes} contains information on the number of nodes in the tree and vs the number of nodes
57 | #' traversed and \code{duration} contains user, system and elapsed time inseconds.
58 | #' The \code{\link[=editarray]{summary}} method for \code{\link{editarray}} prints this information.
59 | #'
60 | #'
61 | #' @references
62 | #' R.S. Garfinkel, A.S. Kunnathur and G.E. Liepins (1986).
63 | #' Optimal imputation of erroneous data: categorical data, general edits.
64 | #' Operations Research 34, 744-751.
65 | #'
66 | #' M.P.J. Van der Loo (2012). Variable elimination and edit generation with a flavour of
67 | #' semigroup algebra (submitted)
68 | #'
69 | #' @export
70 | generateEdits <- function(E){
71 | if ( !is.editarray(E) ) stop("Argument must be of class 'editarray' ")
72 | t0 <- proc.time()
73 | # initialize variables to treat
74 | vars <- getVars(E)
75 | totreat <- rep(TRUE,length(vars))
76 | names(totreat) <- vars
77 | # set up environment to collect generated edits
78 | e <- new.env()
79 | # node counter
80 | e$i <- 0
81 | e$E <- E[logical(0),]
82 | # call the workhorse
83 | fcf.env(E,totreat,e)
84 | duration <- getDuration(proc.time()-t0)
85 | # return edits
86 | return(
87 | list(
88 | edits=e$E,
89 | nodes=c(nodesInTree = 2^length(vars), nodesTraversed = e$i),
90 | duration=duration
91 | )
92 | )
93 | }
94 |
95 |
96 |
97 | # Check which variables of 'vars' can be resolved
98 | resolves <- function(E,vars){
99 | if ( length(vars)==0) return(logical(0))
100 | ind <- getInd(E)[vars]
101 | Ic <- contains(E,vars)
102 | sapply(vars, function(v) all(colSums(E[Ic[,v],ind[[v]],drop=FALSE])>0))
103 | }
104 |
105 |
106 |
107 |
--------------------------------------------------------------------------------
/pkg/R/softEdits.R:
--------------------------------------------------------------------------------
1 | #' Derive editmatrix with soft constraints based on boundaries of variables. This is a utility function that is used for
2 | #' constructing a mip/lp problem.
3 | #' @param E normalized \code{editmatrix}
4 | #' @param prefix \code{character} used for naming dummy variables in matrix.
5 | #' @keywords internal
6 | softEdits <- function(E, prefix="delta.", ...){
7 | UseMethod("softEdits")
8 | }
9 |
10 | #' Derive editmatrix with soft constraints based on boundaries of variables. This is a utility function that is used for
11 | #' constructing a mip/lp problem.
12 | #' @param E normalized \code{editmatrix}
13 | #' @param prefix \code{character} used for naming dummy variables in matrix.
14 | #' @keywords internal
15 | softEdits.editmatrix <- function(E, prefix="delta.", postfix="", M=1e7, ...){
16 |
17 | if (!nrow(E)){
18 | return(E)
19 | }
20 |
21 | n <- nrow(E)
22 | vars <- getVars(E)
23 | ops <- getOps(E)
24 |
25 | adapt <- paste(prefix,rownames(E), sep="")
26 |
27 | A <- getA(E)
28 | b <- getb(E)
29 | isna <- is.na(b)
30 | eq <- (ops == "==") & !isna
31 |
32 | Ab <- cbind( A
33 | , diag(-M, n)
34 | , b
35 | )[!isna,,drop=FALSE]
36 |
37 | # copy the equality constraints
38 | Ab_eq <- if(any(eq)){
39 | cbind( -A
40 | , diag(-M,n)
41 | , -b
42 | )[eq,,drop=FALSE]
43 | }
44 |
45 | # clear A, trick that keeps the rownames
46 | A[,] <- 0
47 |
48 | # NA's must be changed.
49 | Ab_na <- if(any(isna)){
50 | cbind( A #matrix(0, nrow=n, ncol=ncol(A))
51 | , diag(1, n)
52 | , 1
53 | )[isna,,drop=FALSE]
54 | }
55 | # TODO cleanup this code
56 | #print(list(Ab=Ab, Ab_eq=Ab_eq, Ab_na=Ab_na))
57 | Ab <- rbind(Ab, Ab_eq, Ab_na)
58 | rownames(Ab) <- make.unique(paste0(rownames(Ab), postfix), "_" )
59 | ops <- c(ops[!isna], ops[eq])
60 | ops <- gsub("==", "<=", ops)
61 | ops <- c(ops, rep("==", sum(isna)))
62 |
63 | colnames(Ab) <- c(getVars(E), adapt, "CONSTANT")
64 |
65 | seE <- neweditmatrix(Ab, ops=ops)
66 | seE
67 | }
68 |
69 | #' Derive editmatrix with soft constraints. This is a utility function that is used for
70 | #' constructing a mip/lp problem.
71 | #' @param E normalized \code{editmatrix}
72 | #' @param prefix \code{character} used for naming dummy variables in matrix.
73 | #' @keywords internal
74 | softEdits.cateditmatrix <- function(E, prefix="delta.", postfix="", ...){
75 | if (!nrow(E)){
76 | return(E)
77 | }
78 | eq <- getOps(E) == "=="
79 | b <- getb(E)
80 |
81 | dummies <- paste(prefix, rownames(E), sep="")
82 |
83 | seA <- diag(ifelse(eq, -1, 1), ncol=length(eq), nrow=length(eq))
84 | colnames(seA) <- dummies
85 | seA <- cbind(getA(E), seA)
86 | rownames(seA) <- paste0(rownames(seA), postfix)
87 |
88 | binvars <- sapply(colnames(seA), is.character)
89 | seE <- as.editmatrix(seA, b, getOps(E), binvars=binvars)
90 | seE
91 | }
92 |
93 | #' Derive editmatrix with soft constraints based on boundaries of variables. This is a utility function that is used for
94 | #' constructing a mip/lp problem.
95 | #' @param E normalized \code{editmatrix}
96 | #' @param prefix \code{character} used for naming dummy variables in matrix.
97 | #' @keywords internal
98 | softEdits.editarray<- function(E, prefix="delta.", postfix="", ...){
99 | if (!nrow(E)){
100 | return(E)
101 | }
102 | softEdits.cateditmatrix(cateditmatrix(E), prefix=prefix, postfix=postfix, ...)
103 | }
104 |
105 | #quick tests
106 |
107 | # E <- editmatrix(expression( x - y < 2
108 | # , x + y < 5
109 | # , x + y == 3
110 | # , z == 1
111 | # )
112 | # )
113 | #
114 | # # set the z == NA
115 | # E[4,ncol(E)] <- NA
116 | # (se <- softEdits(E))
117 | #softEdits(cateditmatrix(c("if (married) adult", "married %in% c(TRUE,FALSE)","married==TRUE")))
118 |
--------------------------------------------------------------------------------
/pkg/R/mip.R:
--------------------------------------------------------------------------------
1 | #' Write an editset into a mip representation
2 | #'
3 | #' Writes an editset or an object coercable to an editset as a mip problem.
4 | #' @param E an \code{link{editset}} or an object that is coerciable to an
5 | #' \code{editset}
6 | #' @param x named \code{list}/\code{vector} with variable values
7 | #' @param weight reliability weights for values of \code{x}
8 | #' @param M Constant that is used for allowing the values to differ from \code{x}
9 | #' @param epsilon Constant that is used for converting '<' into '<='
10 | #' @param prefix prefix for dummy variables that are created
11 | #' @param ... not used
12 | #' @return a mip object containing al information for transforming it
13 | #' into an lp/mip problem
14 | #' @export
15 | as.mip <- function( E, x=NULL, weight=NULL, M=1e7, epsilon=1e-3, prefix="delta."
16 | , ...){
17 |
18 | #TODO add ruleWeights = c(Inf, ..., 1, 1, NA) etc. check with `is.finite`
19 | #TODO add lambda used in objfn of softEdits
20 | #TODO add objfn specification (needs reordering in function so it has the same order)
21 |
22 | E <- as.editset(E)
23 | objfn <- NULL
24 |
25 | E_mip = c( E$num
26 | , cateditmatrix(E$mixcat)
27 | , softEdits(editmatrix(invert(as.character(E$mixnum))), prefix="")
28 | )
29 |
30 | if (!missing(x)){
31 | if (is.null(weight)){
32 | weight <- rep(1.0, length(x))
33 | } else {
34 | stopifnot(length(weight) == length(x))
35 | }
36 |
37 | names(weight) <- paste0(prefix, names(x))
38 | # create expression vector with var == value
39 | expr <- as.expression( lapply(names(x)
40 | , function(v){
41 | substitute( var == value
42 | , list(var=as.symbol(v), value=x[[v]])
43 | )
44 | }))
45 |
46 | isna <- sapply(x, is.na)
47 |
48 | num_vars <- getVars(E, type="num")
49 | nums <- !isna & names(x) %in% num_vars
50 |
51 | if (any(nums)) {
52 | A0_num <- editmatrix(expr[nums])
53 | rownames(A0_num) <- names(x)[nums]
54 | A0_num <- softEdits(A0_num, prefix=prefix, postfix="0",M=M)
55 | } else A0_num <- NULL
56 |
57 | cat_vars <- getVars(E, type="cat")
58 | cats <- !isna & names(x) %in% cat_vars
59 |
60 | if (any(cats)){
61 | A0_cat <- cateditmatrix(expr[cats])
62 | rownames(A0_cat) <- names(x)[cats]
63 | A0_cat <- softEdits(A0_cat, prefix=prefix, postfix="0")
64 | } else A0_cat <- NULL
65 |
66 | E_mip <- c(E_mip, A0_num, A0_cat)
67 | if (any(isna)){
68 | E_mip <- c( E_mip
69 | , editmatrix(paste0(prefix, names(x)[isna], "==", 1))
70 | )
71 | }
72 |
73 | }
74 |
75 | # replace strict inequalities...
76 | A <- getA(E_mip)
77 | b <- getb(E_mip)
78 | ops <- getOps(E_mip)
79 | lt <- ops == "<"
80 | b[lt] <- b[lt] - epsilon
81 | ops[lt] <- "<="
82 | E_mip = as.editmatrix(A=A, b=b, ops=ops)
83 |
84 | vars <- colnames(A)
85 | objfn <- sapply(vars, function(v) 0)
86 | objfn[names(weight)] <- weight
87 |
88 | numvars = sapply(vars, `%in%`, getVars(E, type="num"))
89 | binvars = !numvars
90 |
91 | structure(
92 | list( E = E_mip
93 | , objfn = objfn
94 | , binvars = which(binvars)
95 | , numvars = which(numvars)
96 | , M = M
97 | , epsilon = epsilon
98 | ),
99 | class="mip"
100 | )
101 | }
102 |
103 | #' @method print mip
104 | print.mip <- function(x, ...){
105 | print.editmatrix(x$E, textOnly=T)
106 | if (!is.null(x$objfn)) {
107 | idx <- which(x$objfn != 0)
108 | if (length(idx)) {
109 | of <- paste0(x$objfn, "*", colnames(x$E))[idx]
110 | of <- paste(of, collapse=" + ")
111 | cat("objective function = min: ", of, "\n")
112 | }
113 | }
114 | }
115 |
116 | # # quick test
117 | # E <- editset(c(r1="x > 1","y >= x", r2="if (x>1) y> 2", r3="A %in% c('a', 'b')"))
118 | # as.mip(E)
119 |
--------------------------------------------------------------------------------
/pkg/R/parseMix.R:
--------------------------------------------------------------------------------
1 | MIXOPS <- c("if", "||", "|", "&&", "&")
2 |
3 | #' Parse a mixed edit
4 | #'
5 | #' parseMix replaces all numerical edits with a generated dummy boolean variable and returns the resulting categorical
6 | #' edit plus the list of found of numerical edits. These expressions should be handled further by `parseCat` and
7 | #' `parseNum`.
8 | #' @param e expression to be parsed
9 | #' @param numid starting number for dummy name generation
10 | #'
11 | #' @return list with categorical expression (\code{cat}), which is normalized, a numerical expression (\code{nums})
12 | #' and a negated version of this expression (\code{negNums})
13 | #' @keywords internal
14 | parseMix <- function(e, editname="", numid=0, negate=TRUE){
15 |
16 | # should the expressions be returned or should parseCat and parseNum be called on cat and nums?
17 |
18 | if (length(e) < 3) return(NULL)
19 | op <- as.character(e[[1]])
20 | if (!op %in% MIXOPS) stop("invalid mix")
21 |
22 | cat <- e
23 | nums <- expression()
24 | numid <- numid
25 |
26 | pm <- function(i, neg=negate){
27 | # rewrite equality as inequalities
28 | #e[[i]] <- rewriteEq(e[[i]])
29 | edit <- e[[i]]
30 | if (length(edit) == 1) return()
31 | cmp <- deparse(edit[[1]])
32 |
33 | # remove brackets
34 | if (cmp == "("){
35 | edit <- edit[[2]]
36 | }
37 |
38 | if (isNum(edit)){
39 |
40 | if (cmp == "=="){
41 | stop("edit '", deparse(e),"' contains '==' which is not allowed, in a mixed edit")
42 | }
43 |
44 | numid <<- numid + 1
45 | numvar <- paste(editname, ".l",numid,sep="")
46 | #replace numeric edit with generated dummy boolean edit name and handle normal form
47 | dum <- as.name(numvar)
48 | if (neg){
49 | dum <- quote(!a)
50 | dum[[2]] <- as.name(numvar)
51 | edit <- negateEdit(edit)
52 | }
53 | cat[[i]] <<- dum
54 | nums[numvar] <<- as.expression(edit)
55 | } else if (!isCat(edit)){
56 | l <- parseMix(edit, numid=numid, editname=editname, negate=neg)
57 | cat[[i]] <<- l$cat
58 | nums[names(l$nums)] <<- l$nums
59 | numid <<- l$numid
60 | }
61 | }
62 |
63 | # don't negate the premisse (term 2)
64 | pm(2, neg=ifelse(op == "if", !negate, negate))
65 | # negate the consequent (term 3)
66 | pm(3, neg=negate)
67 |
68 | negNums <- nums
69 | negNums[] <- sapply(nums, negateEdit)
70 |
71 | if (length(negNums)){
72 | names(negNums) <- paste("!",names(negNums), sep="")
73 | }
74 |
75 | list( cat = cat # parseCat(cat)
76 | , nums = nums # lapply(nums, parseNum)
77 | , negNums = negNums # lapply(orNums, parseNum)
78 | , numid=numid
79 | )
80 | }
81 |
82 | # e has to be an edit
83 | negateEdit <- function(e){
84 | op <- as.character(e[[1]])
85 | if (op == "!")
86 | return(e[[2]])
87 | op <- switch( op
88 | , "<" = ">="
89 | , "<=" = ">"
90 | , ">" = "<="
91 | , ">=" = "<"
92 | , "==" = "!="
93 | , "!=" = "=="
94 | )
95 | if (is.null(op)){
96 | ne <- quote(!a)
97 | ne[[2]] <- e
98 | e <- ne
99 | } else {
100 | e[[1]] <- as.symbol(op)
101 | }
102 | e
103 | }
104 |
105 | # e has to be an numerical inequality!
106 | rewriteInEq <- function(e){
107 | op <- as.character(e[[1]])
108 | if (op != "!=") return(e)
109 | eAnd <- quote(a || b)
110 | eAnd[[2]] <- e
111 | eAnd[[2]][[1]] <- as.symbol(">")
112 | eAnd[[3]] <- e
113 | eAnd[[3]][[1]] <- as.symbol("<")
114 | eAnd
115 | }
116 |
117 | # quick test
118 | # rewriteInEq(quote(x != y + 1))
119 | #
120 | # a <- negateEdit(quote(x>2))
121 | # a
122 | # negateEdit(a)
123 | #
124 | # a <- negateEdit(quote(A %in% "a"))
125 | # a
126 | # negateEdit(a)
127 | #
128 | # pm <- parseMix( quote( if(x>1 &&
129 | # x < 10 &&
130 | # A %in% c('a1')) y > 2 )
131 | # , editname="e1")
132 | # pm
133 | #
134 |
135 | #pm <- parseMix(quote(if ((x>0)) A))
136 | # pm
137 |
--------------------------------------------------------------------------------
/pkg/R/disjunct.R:
--------------------------------------------------------------------------------
1 |
2 | #' Decouple a set of conditional edits
3 | #'
4 | #' An editset is transformed to a list of \code{\link{editset}s}
5 | #' which do not contain any conditional numeric/categorical edits anymore. Each \code{\link{editset}} gains an
6 | #' attribute \code{\link{condition}}, which holds the series of assumptions made to
7 | #' decouple the original edits. This attribute will be printed when not \code{NULL}. Warning: this may be slow
8 | #' for large, highly entangled sets of edits.
9 | #'
10 | #' @param E Object of class \code{\link{editset}}
11 | #' @param type Return type: \code{list} (default) for \code{editlist}, \code{env} for \code{editenv}.
12 | #' @return An object of class \code{editlist} (\code{editenv}), which is nothing more than a \code{list} (\code{environment}) of
13 | #' \code{editsets} with a class attribute. Each element has an attribute 'condition' showing which conditions
14 | #' were assumed to derive the editset.
15 | #' @seealso \code{\link{separate}}, \code{\link{condition}}, \code{\link{blocks}}
16 | #' @example ../examples/dnf.R
17 | #' @export
18 | disjunct <- function(E, type=c('list','env')){
19 | if (!inherits(E,'editset')) stop('only for objects of class editset')
20 | type=match.arg(type)
21 | e <- new.env()
22 | e$i <- 0
23 | dnf(E, e)
24 | rm('i',envir=e)
25 | if ( type == 'list'){
26 | e <- as.list(e)
27 | class(e) <- c("editlist")
28 | } else {
29 | class(e) <- c("editenv")
30 | }
31 | e
32 | }
33 |
34 | dnf <- function(E, env){
35 | cnd <- attr(E,'condition')
36 | vars <- getVars(E,type="dummy")
37 | if (length(vars) == 0 ){
38 | env$i <- env$i + 1
39 | condition(E) <- condition(E)[!duplicated(condition(E)),]
40 | assign(paste("E",env$i,sep=""),E,envir=env)
41 | } else {
42 | E1 <- substValue(E,vars[1],TRUE)
43 | if ( isFeasible(condition(E1)) &&
44 | isFeasible(c(condition(E1), E1$num)) &&
45 | isFeasible(E1$mixcat)
46 | ) dnf(E1,env)
47 | E2 <- substValue(E,vars[1],FALSE)
48 | if ( isFeasible(condition(E2)) &&
49 | isFeasible(c(condition(E2), E2$num)) &&
50 | isFeasible(E2$mixcat)
51 | ) dnf(E2,env)
52 | }
53 | }
54 |
55 |
56 |
57 | #' Separate an editset into its disconnected blocks and simplify
58 | #'
59 | #' The input edits are separated into disjunct blocks, and simplified to
60 | #' \code{editmatrix} or \code{\link{editarray}} where possible. Remaining
61 | #' \code{\link[=editset]{editsets}} are separated into \code{\link{disjunct}}
62 | #' \code{\link[=disjunct]{editlists}}.
63 | #'
64 | #' @param E An \code{\link{editset}}
65 | #' @return A \code{list} where each element is either an \code{\link{editmatrix}}, an \code{\link{editarray}}
66 | #' or an object of class \code{\link[=disjunct]{editlist}} which cannot be simplified further.
67 | #'
68 | #' @example ../examples/separate.R
69 | #' @seealso \code{\link{blocks}}, \code{\link{disjunct}}, \code{\link{condition}}
70 | #'
71 | #' @references
72 | #' M. van der Loo and De Jonge, E. (2012). Manipulation of conditional restrictions and error localization
73 | #' with the editrules package. Discussion paper 2012xx, Statistics Netherlands, The Hague
74 | #' (included with the package).
75 | #'
76 | #' @export
77 | separate <- function(E){
78 | B <- blocks(E)
79 | B <- lapply(B, function(b){
80 | et <- editType(b)
81 | if ( all(et == 'num') ){
82 | b <- b$num
83 | } else if ( all(et == 'cat') ){
84 | b <- b$mixcat
85 | } else {
86 | b <- disjunct(b)
87 | }
88 | b
89 | })
90 | B
91 | }
92 |
93 |
94 | # For an editlist, determine the variable type and where in the list it occurs
95 | # E editlist
96 | # var character
97 | # returns
98 | # list()
99 | # $type : 'num' or 'mixcat'
100 | # $occurs : logical of length(E)
101 | varTypeAndOccurrence <- function(E,var){
102 | ivar <- sapply(E,function(e) c(
103 | var %in% getVars(e$num),
104 | var %in% getVars(e$mixcat)
105 | )
106 | )
107 | if ( sum(ivar) == 0 ){
108 | return(NA)
109 | }
110 | if ( !(is.array(ivar)) ) ivar <- array(ivar,dim=c(2,1))
111 | if ( any(ivar[1,]) ) {
112 | type <- 'num'
113 | iset <- ivar[1,]
114 | } else {
115 | type <- 'mixcat'
116 | iset <- ivar[2,]
117 | }
118 | list(type=type,occurs=iset)
119 | }
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testLocalizeErrors.R:
--------------------------------------------------------------------------------
1 |
2 | context("Error localization: numerical data.frames")
3 |
4 | test_that('localizeErrors works without specified weight',{
5 |
6 | expect_equivalent(localizeErrors(
7 | E = editmatrix(c('x+y==z','x<1')),
8 | dat = data.frame(
9 | x = c(1,1,1),
10 | y = c(1,1,1),
11 | z = c(1,1,1)
12 | )
13 | )$adapt,
14 | matrix(c(
15 | TRUE , FALSE, FALSE,
16 | TRUE , FALSE, FALSE,
17 | TRUE , FALSE, FALSE),
18 | nrow=3,
19 | byrow=TRUE
20 | )
21 | )
22 |
23 |
24 | })
25 |
26 |
27 |
28 | test_that('localizeErrors works with single specified weight',{
29 |
30 | expect_equivalent(localizeErrors(
31 | E = editmatrix('x+y==z'),
32 | dat = data.frame(
33 | x = c(1,1,1),
34 | y = c(1,1,1),
35 | z = c(1,1,1)
36 | ),
37 | weight = c(1,2,2),
38 | )$adapt,
39 | matrix(c(
40 | TRUE , FALSE, FALSE,
41 | TRUE , FALSE, FALSE,
42 | TRUE , FALSE, FALSE),
43 | nrow=3,
44 | byrow=TRUE
45 | )
46 | )
47 |
48 |
49 | })
50 |
51 |
52 | test_that('localizeErrors works with different weights per record',{
53 | expect_equivalent(localizeErrors(
54 | E = editmatrix('x+y==z'),
55 | dat = data.frame(
56 | x = c(1,1,1),
57 | y = c(1,1,1),
58 | z = c(1,1,1)
59 | ),
60 | weight = matrix(c(
61 | 1,2,2,
62 | 2,1,2,
63 | 2,2,1),
64 | nrow=3,
65 | byrow=TRUE
66 | )
67 | )$adapt,
68 | matrix(c(
69 | TRUE , FALSE, FALSE,
70 | FALSE, TRUE , FALSE ,
71 | FALSE, FALSE, TRUE),
72 | nrow=3,
73 | byrow=TRUE
74 | )
75 | )
76 | expect_that(localizeErrors(
77 | E = editmatrix('x +y==z'),
78 | dat = data.frame(
79 | x = c(1,1,1),
80 | y = c(1,1,1),
81 | z = c(1,1,1)
82 | ),
83 | weight = matrix(c(
84 | 1,2,2,
85 | 2,2,1),
86 | nrow=3,
87 | byrow=TRUE
88 | )
89 | ),
90 | throws_error()
91 | )
92 | })
93 |
94 |
95 | test_that('localizeErrors handles data out-of-datamodel correctly',{
96 | # thanks to Elmar Wein for sending us this testcase.
97 | E <- editarray(c(
98 | "age %in% c('under aged','adult')",
99 | "maritalStatus %in% c('unmarried','married','widowed','divorced')",
100 | "positionInHousehold %in% c('marriage partner', 'child', 'other')",
101 | "if( age == 'under aged' ) maritalStatus == 'unmarried'",
102 | "if( maritalStatus %in% c('married','widowed','divorced')) !positionInHousehold %in% c('marriage partner','child')"
103 | ))
104 | record <- data.frame(age='under aged', maritalStatus='unmarried', positionInHousehold='out_of_range')
105 | expect_equivalent(
106 | localizeErrors(E,record)$adapt,
107 | array(c(FALSE,FALSE,TRUE),dim=c(1,3))
108 | )
109 | })
110 |
111 | test_that("localizeErrors works with TRUE/FALSE",{
112 | E <- editarray(expression(
113 | A %in% c(TRUE,FALSE),
114 | B %in% letters[1:4],
115 | if ( !A ) B %in% letters[1:2]
116 | ))
117 |
118 | # should run without errors...
119 | localizeErrors(E,data.frame(A=c(TRUE,FALSE),B=c('c',"d")))
120 | })
121 |
122 | test_that("localizeErrors works with mixed edit",{
123 | E <- editset(expression(
124 | married %in% c(TRUE,FALSE),
125 | if (married==TRUE) age >=17
126 | ))
127 |
128 | # note bb is switched off for mixed edits
129 | le <- localizeErrors(E, data.frame(married=TRUE, age=9))
130 | expect_equal(sum(le$adapt), 1, info="bb returns correct result")
131 |
132 | le <- localizeErrors(E, data.frame(married=TRUE, age=9), method="mip")
133 | expect_equal(sum(le$adapt), 1, info="mip returns correct result")
134 |
135 | })
136 |
137 | test_that("localizeErrors works for simple numerical if-else", {
138 | le <- localizeErrors(
139 | editset("if ( x > 0 ) y > 0")
140 | , data.frame(x=1,y=0)
141 | )
142 | expect_equal(sum(le$adapt),1)
143 | le <- localizeErrors(
144 | editset("if ( x > 0 ) y > 0")
145 | , data.frame(x=1,y=0)
146 | , method="mip"
147 | )
148 | expect_equal(sum(le$adapt),1)
149 |
150 | })
151 |
152 |
153 |
--------------------------------------------------------------------------------
/pkg/R/isObviouslyRedundant.R:
--------------------------------------------------------------------------------
1 | #' Find obvious redundancies in set of edits
2 | #'
3 | #' Detect simple redundancies such as duplicates or edits of the form \code{0 < 1} or \code{0 == 0}.
4 | #' For categorical edits, simple redundancies are edits that define an empty subregion
5 | #' of the space of all possible records (no record can ever be contained in such a region).
6 | #'
7 | #' @param E An \code{\link{editset}}, \code{\link{editmatrix}}, \code{\link{editarray}},
8 | #' \code{\link[=disjunct]{editlist}} or \code{\link[=disjunct]{editenv}}
9 | #' @param duplicates \code{logical}: check for duplicate edits? For an \code{\link{editset}},
10 | #' \code{\link[=duplicated]{editlist}} or \code{\link[=duplicated]{editenv}} this should be a logical
11 | #' 2-vector indicating which of the numerical or categorical edits should be checked for duplicates.
12 | #' @param ... parameters to be passed to or from other methods.
13 | #'
14 | #' @return logical vector indicating which edits are (obviously) redundant
15 | #' @seealso \code{\link{isObviouslyInfeasible}}, \code{\link{isSubset}}
16 | #' @export
17 | isObviouslyRedundant <- function(E, duplicates=TRUE, ...){
18 | UseMethod("isObviouslyRedundant")
19 | }
20 |
21 |
22 | # @method isObviouslyRedundant matrix
23 | #
24 | # @param operators character vecor of comparison operators in \code{<, <=, ==} of length \code{nrow(E)}
25 | # @param tol tolerance to check for zeros.
26 | #
27 | # @rdname isObviouslyRedundant
28 | # @keywords internal
29 | # @seealso \code{\link{isObviouslyRedundant}}, \code{\link{isObviouslyRedundant.editmatrix}}
30 | #' @export
31 | isObviouslyRedundant.matrix <- function(
32 | E,
33 | duplicates = TRUE,
34 | operators,
35 | tol=sqrt(.Machine$double.eps),
36 | ... ){
37 | ib <- ncol(E)
38 | zeroCoef <- rowSums(abs(E[,-ib,drop=FALSE])) < tol
39 | as.vector(
40 | zeroCoef & ( (operators %in% c("==","<=") & abs(E[,ib]) < tol)
41 | | (operators %in% c("<", "<=") & E[,ib] > tol)
42 | )
43 | )
44 | }
45 |
46 |
47 | #' @method isObviouslyRedundant editmatrix
48 | #' @rdname isObviouslyRedundant
49 | #'
50 | #' @export
51 | isObviouslyRedundant.editmatrix <- function(E, duplicates=TRUE, ...){
52 | if ( !isNormalized(E) ) E <- normalize(E)
53 | I <- isObviouslyRedundant.matrix(getAb(E), operators=getOps(E), ...)
54 | if ( duplicates ) I <- I | duplicated.editmatrix(E)
55 | I
56 | }
57 |
58 |
59 | #' @method isObviouslyRedundant editarray
60 | #' @rdname isObviouslyRedundant
61 | #' @export
62 | isObviouslyRedundant.editarray <- function(E, duplicates=TRUE, ...){
63 | if ( ncol(E) == 0 ) return(logical(0))
64 | if ( ncol(E) == 1 ) return(as.vector(E))
65 | ind <- getInd(E)
66 | red <- isRedundant.boolmat(getArr(E),getInd(E))
67 | if ( duplicates ) red <- red | duplicated.editarray(E)
68 | red
69 | }
70 |
71 |
72 | # Check redundancy in editarray after disection
73 | #
74 | # @keywords internal
75 | isRedundant.boolmat <- function(A, ind){
76 | if ( nrow(A) == 1 ) return(any(vapply(ind,function(i) sum(A[,i])==0,FUN.VALUE=TRUE)))
77 | apply(
78 | vapply(ind, function(i) rowSums(A[,i,drop=FALSE])==0, FUN.VALUE=logical(nrow(A))),
79 | 1,any
80 | )
81 | }
82 |
83 | #
84 | # For an \code{\link{editset}} it checks for obvious redundancies in the numerical
85 | # and categorical/mixed parts separately. Arguments \code{duplicates} must be a
86 | # logical 2-vector, the first element corresponding to the numeric part, the second
87 | # element to the conditional part.
88 | #
89 |
90 | #' @method isObviouslyRedundant editset
91 | #' @rdname isObviouslyRedundant
92 | #' @export
93 | isObviouslyRedundant.editset <- function(E, duplicates=rep(TRUE,2), ...){
94 | c(
95 | isObviouslyRedundant(E$num, duplicates=duplicates[1], ...),
96 | isObviouslyRedundant(E$mixcat, duplicates=duplicates[2], ...)
97 | )
98 | }
99 |
100 |
101 |
102 | #
103 | #
104 | # For an \code{\link[=disjunct]{editlist}} or \code{\link[=disjunct]{editenv}},
105 | # a list of logical vectors is returned.
106 | #
107 | #' @method isObviouslyRedundant editlist
108 | #' @rdname isObviouslyRedundant
109 | #' @export
110 | isObviouslyRedundant.editlist <- function(E, duplicates=rep(TRUE,2), ...){
111 | lapply(E, isObviouslyRedundant.editset, duplicates=duplicates, ...)
112 | }
113 |
114 |
115 | #
116 | # For an \code{\link[=disjunct]{editlist}} or \code{\link[=disjunct]{editenv}},
117 | # a list of logical vectors is returned.
118 | #
119 |
120 | #' @method isObviouslyRedundant editenv
121 | #' @rdname isObviouslyRedundant
122 | #' @export
123 | isObviouslyRedundant.editenv <- function(E, duplicates=rep(TRUE,2), ...){
124 | lapply(E, isObviouslyRedundant.editset, duplicates=duplicates, ...)
125 | }
126 |
127 |
128 |
129 |
--------------------------------------------------------------------------------
/pkg/R/pkg.R:
--------------------------------------------------------------------------------
1 | #' An overview of the function of package \code{editrules}
2 | #'
3 | #' @section NOTE:
4 | #' This package is no longer under active development. The package is superseded
5 | #' by R packages \href{https://CRAN.R-project.org/package=validate}{validate}
6 | #' for data validation and
7 | #' \href{https://CRAN.R-project.org/package=errorlocate}{errorlocate}
8 | #' for error localization. We urge new users to use those packages instead.
9 | #'
10 | #' The \code{editrules} package aims to provide an environment to conveniently
11 | #' define, read and check recordwise data constraints including
12 | #' \itemize{
13 | #' \item{Linear (in)equality constraints for numerical data},
14 | #' \item{Constraints on value combinations of categorical data}
15 | #' \item{Conditional constraints on numerical and/or mixed data}
16 | #' }
17 | #' In literature these constraints, or restrictions are refered to as ``edits''.
18 | #' \code{editrules} can perform common rule
19 | #' set manipulations like variable elimination and value substitution, and
20 | #' offers error localization functionality based on the
21 | #' (generalized) paradigm of Fellegi and Holt. Under this paradigm, one determines
22 | #' the smallest (weighted) number of variables to adapt such that no (additional or derived)
23 | #' rules are violated. The paradigm is based on the assumption that errors
24 | #' are distributed randomly over the variables and there is no detectable cause of
25 | #' error. It also decouples the detection of corrupt variables from their
26 | #' correction. For some types of error, such as sign flips, typing errors or
27 | #' rounding errors, this assumption does not hold. These errors can be detected
28 | #' and are closely related to their resolution. The reader is referred to the
29 | #' \pkg{deducorrect} package for treating such errors.
30 | #'
31 | #' @section I. Define edits:
32 | #'
33 | #' \code{editrules} provides several methods for creating edits from a \code{character}
34 | #' , \code{expression}, \code{data.frame} or a text file.
35 | #' \tabular{ll}{
36 | #' \code{\link{editfile}} \tab Read conditional numerical, numerical and categorical constraints from textfile \cr
37 | #' \code{\link{editset}} \tab Create conditional numerical, numerical and categorical constraints \cr
38 | #' \code{\link{editmatrix}} \tab Create a linear constraint matrix for numerical data \cr
39 | #' \code{\link{editarray}} \tab Create value combination constraints for categorical data \cr
40 | #' }
41 | #'
42 | #' @section II. Check and find errors in data:
43 | #'
44 | #' \code{editrules} provides several method for checking \code{data.frame}s with edits
45 | #' \tabular{ll}{
46 | #' \code{\link{violatedEdits}} \tab Find out which record violates which edit. \cr
47 | #' \code{\link{localizeErrors}} \tab Localize erroneous fields using Fellegi and Holt's principle. \cr
48 | #' \code{\link{errorLocalizer}} \tab Low-level error localization function using B&B algorithm \cr
49 | #' }
50 | #' Note that you can call \code{plot}, \code{summary} and \code{print} on results of these functions.
51 | #'
52 | #' @section IV. Manipulate and check edits:
53 | #'
54 | #' \code{editrules} provides several methods for manipulating edits
55 | #' \tabular{ll}{
56 | #' \code{\link{substValue}} \tab Substitute a value in a set of rules \cr
57 | #' \code{\link{eliminate}} \tab Derive implied rules by variable elimination \cr
58 | #' \code{\link{reduce}} \tab Remove unconstraint variables \cr
59 | #' \code{\link{isFeasible}} \tab Check for contradictions \cr
60 | #' \code{\link{duplicated}} \tab Find duplicated rules \cr
61 | #' \code{\link{blocks}} \tab Decompose rules into independent blocks \cr
62 | #' \code{\link{disjunct}} \tab Decouple conditional edits into disjunct edit sets\cr
63 | #' \code{\link{separate}} \tab Decompose rules in blocks and decouple conditinal edits \cr
64 | #' \code{\link{generateEdits}} \tab Generate all nonredundant implicit edits (\code{\link{editarray}} only) \cr
65 | #' }
66 | #'
67 | #' @section V. Plot and coerce edits:
68 | #'
69 | #' \code{editrules} provides several methods for plotting and coercion.
70 | #' \tabular{ll}{
71 | #' \code{\link{editrules.plotting}} \tab Plot edit-variable connectivity graph \cr
72 | #' \code{\link[igraph]{as.igraph}} \tab Coerce to edit-variable connectivity \code{igraph} object \cr
73 | #' \code{as.character} \tab Coerce edits to \code{character} representation \cr
74 | #' \code{as.data.frame} \tab Store \code{character} representation in \code{data.frame} \cr
75 | #' }
76 | #'
77 | #' @import lpSolveAPI
78 | #' @importFrom igraph as.igraph
79 | #' @importFrom graphics axis barplot grid lines mtext par plot
80 | #' @importFrom stats density dist quantile runif
81 | #' @importFrom utils flush.console ls.str stack str
82 | #'
83 | #' @name editrules_package
84 | #' @docType package
85 | "_PACKAGE"
86 |
87 |
88 | # not on imports: we need to DEPEND on igraph since are exporting methods of an igraph
89 | # generic (as.igraph).
90 |
91 |
92 |
--------------------------------------------------------------------------------
/pkg/tests/testthat/testeditmatrix.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 |
3 | context("Editmatrix")
4 |
5 | test_that("editmatrix works correcly with character",{
6 | cond <- c( "x == y"
7 | , "z + w == y + x"
8 | , "x + z == y + 2*w"
9 | )
10 |
11 | mat <- editmatrix(cond)
12 | mat <- getA(mat)
13 | expect_equivalent(mat[1,], c(1,-1,0,0))
14 | expect_equivalent(mat[2,], c(-1,-1,1,1))
15 | expect_equivalent(mat[3,], c(1,-1,-2,1))
16 | })
17 |
18 | test_that("editmatrix works correcly with expression",{
19 | cond <- expression( x == y
20 | , z + w == y + x
21 | , x + z == y + 2*w
22 | )
23 |
24 | mat <- editmatrix(cond)
25 | mat <- getA(mat)
26 | expect_equivalent(mat[1,], c(1,-1,0,0))
27 | expect_equivalent(mat[2,], c(-1,-1,1,1))
28 | expect_equivalent(mat[3,], c(1,-1,-2,1))
29 | })
30 |
31 | test_that("editmatrix can simplify",{
32 | cond <- c( "2*x == x + y"
33 | , "z + 2 == y + 3"
34 | , "w == 3"
35 | )
36 |
37 | E <- editmatrix(cond)
38 | mat <- getA(E)
39 | C <- getb(E)
40 | expect_equal(as.integer(mat[1,]), c(1,-1,0,0))
41 | expect_equal(as.integer(mat[2,]), c(0,-1,1,0))
42 | expect_equal(C[2], c(num2=1))
43 |
44 | expect_equal(mat[3,], c(x=0,y=0,z=0,w=1))
45 | expect_equal(C[3], c(num3=3))
46 | })
47 |
48 |
49 | test_that("editmatrix works correcly with data.frame",{
50 |
51 | edtinf.csv <-
52 | "name,edit
53 | A,x == y
54 | B,z + w == y + x
55 | C,z == y + 2*w
56 | "
57 | edtinf <- read.csv((con <- textConnection(edtinf.csv)))
58 | close(con)
59 |
60 | mat <- editmatrix(edtinf)
61 | A <- getA(mat)
62 | expect_equivalent(A[1,], c(1,-1,0,0))
63 | expect_equivalent(A[2,], c(-1,-1,1,1))
64 | expect_equivalent(A[3,], c(0,-1,-2,1))
65 | })
66 |
67 | test_that("editmatrix works with constants",{
68 | cond <- c( "x + y > 2"
69 | , "y < 10"
70 | )
71 | E <- editmatrix(cond)
72 | mat <- getA(E)
73 | expect_equal(as.integer(mat[1,]), c(-1,-1))
74 | expect_equal(as.integer(mat[2,]), c(0,1))
75 | expect_equal(as.integer(getb(E)), c(-2,10))
76 | })
77 |
78 | test_that("conditional statement parsing is not working..",{
79 | expect_error(editmatrix("if(x < 2) y > 4"))
80 | })
81 |
82 | test_that("editmatrix works with negative constants",{
83 | cond <- c( "x + y > -2"
84 | , "y < -10"
85 | )
86 | E <- editmatrix(cond)
87 | mat <- getA(E)
88 | expect_equal(as.integer(mat[1,]), c(-1,-1))
89 | expect_equal(as.integer(mat[2,]), c(0,1))
90 | expect_equal(as.integer(getb(E)), c(2,-10))
91 | })
92 |
93 | test_that("editmatrix works with negative coefficients",{
94 | cond <- c( "-2*x + y > 2"
95 | )
96 | E <- editmatrix(cond)
97 | mat <- getAb(E)
98 | expect_equivalent(mat[1,], c(2,-1,-2))
99 | })
100 |
101 | test_that("editmatrix works with coefficient after variable",{
102 | cond <- c( "x*-2 + y > 2"
103 | )
104 | E <- editmatrix(cond)
105 | mat <- getAb(E)
106 | expect_equivalent(mat[1,], c(2,-1,-2))
107 | })
108 |
109 | test_that("editmatrix fails with nonconstant coefficient",{
110 | cond <- c( "a*x == 2"
111 | )
112 | expect_error(editmatrix(cond))
113 | })
114 |
115 |
116 | test_that("is.editmatrix works",{
117 | mat <- editmatrix("x==y")
118 | expect_true(is.editmatrix(mat))
119 | expect_false(is.editmatrix(unclass(mat)))
120 | })
121 |
122 | test_that("as.editmatrix works",{
123 | A <- matrix( c( 1,-2, 0
124 | , 2, 0, 1
125 | )
126 | , nrow=2
127 | , byrow=TRUE
128 | # , dimnames=list(c("a", "b"), c("x","y", "z"))
129 | )
130 | E <- as.editmatrix(A, b=c(0,1), ops=c("==","<"))
131 | ei <- as.data.frame(E)
132 | expect_equivalent(ei$edit, c("x1 == 2*x2", "2*x1 + x3 < 1"))
133 | })
134 |
135 | test_that("editmatrix normalize works",{
136 | cond <- c( "x > y"
137 | , "z + w >= y + x"
138 | , "x + z < y + 2*w"
139 | , "x + z == y + 2*w"
140 | , "x + z <= y + 2*w"
141 | )
142 |
143 | E <- editmatrix(editrules=cond, normalize=TRUE)
144 | mat <- getA(E)
145 |
146 | expect_equivalent(mat[1,], c(-1,1,0,0))
147 | expect_equivalent(mat[2,], c(1,1,-1,-1))
148 | expect_equivalent(mat[3,], c(1,-1,-2,1))
149 | expect_equivalent(mat[4,], c(1,-1,-2,1))
150 | expect_equivalent(mat[5,], c(1,-1,-2,1))
151 | ops <- getOps(E)
152 | expect_equivalent(ops, c("<", "<=", "<","==", "<="))
153 | })
154 |
155 |
156 | test_that("coercions work",{
157 | E <- editmatrix("x+y==z")
158 | expect_that(E, is_identical_to(editmatrix(as.data.frame(E))))
159 | expect_that(E, is_identical_to(as.editmatrix(A=getA(E), b=getb(E), ops=getOps(E))))
160 | # edge case, testing as.character feature
161 | E <- editmatrix("x + 0.1*y==z")
162 | expect_that(E, is_identical_to(editmatrix(as.character(E))))
163 | })
164 |
165 |
--------------------------------------------------------------------------------
/pkg/R/writeELAsMip.R:
--------------------------------------------------------------------------------
1 | #' Rewrite an editset and reported values into the components needed for a mip solver
2 | #'
3 | #' @param E \code{\link{editset}} or any object that is coercable to an editset.
4 | #' @param x named \code{list} or \code{vector} with data
5 | #' @param weight vector with weights of the variable in the same order as x
6 | #' @param M maximum allowed difference between reported value and corrected value
7 | #' @param epsilon offset needed for writing a strict inequality into a an inequality
8 | #' @return list with an editmatrix, objfn, binvars, numvars, M and epsilon
9 | #' @keywords internal
10 | writeELAsMip <- function( E
11 | , x
12 | , weight = rep(1, length(x))
13 | , M = 1e7
14 | , epsilon = 1e-3
15 | , ...
16 | ){
17 | E <- as.editset(E)
18 | el.E <- NULL
19 |
20 | # soft <- is.finite(editweight)
21 | # if (any(soft)){
22 | # soft.E <- E[soft,]
23 | # soft.weights <- editweight[soft]
24 | #
25 | # #TODO process softedits into el.E
26 | # soft.num <- softEdits(soft.E$num, xlim, prefix=".soft.")
27 | #
28 | # #TODO column with diag(1, nrow(soft.E$mixcat))
29 | # soft.cat <- NULL
30 | # #soft.cat <- softEdits(cateditmatrix(soft.E$mixcat),xlim,prefix=".softcat.")
31 | # el.E <- c(soft.num, soft.cat, el.E)
32 | # E <- E[!soft,]
33 | # }
34 |
35 | # num part
36 | num.vars <- getVars(E, type="num")
37 |
38 | if (!is.null(num.vars)){
39 | num.idx <- match(num.vars, names(x))
40 | num.x <- diag(1, nrow=length(num.vars))
41 | dimnames(num.x) <- list(num.vars,num.vars)
42 | num.x0 <- unlist(x[num.idx])
43 | # create an editmatrix x_i == x^0_i
44 | num.E <- as.editmatrix(num.x, num.x0)
45 | num.se <- softEdits(num.E, "adapt.")
46 | el.E <- c(num.se, E$num, el.E)
47 | }
48 |
49 | # cat part
50 | cat.vars <- getVars(E, type="cat")
51 | if ( length(cat.vars) > 0 ){
52 | cat.idx <- match(cat.vars, names(x))
53 | cat.x_0 <- unlist(x[cat.idx])
54 |
55 |
56 | cat.A <- diag(1, nrow=length(cat.x_0))
57 | cat.A <- cbind(cat.A, cat.A)
58 | #browser()
59 | colnames(cat.A) <- c(asCat(cat.x_0), paste("adapt.", cat.vars, sep=""))
60 |
61 | # check for non existing levels (including NA's)
62 | invalidCats <- !(asCat(cat.x_0, useLogicals=FALSE) %in% getlevels(E$mixcat))
63 | if (any(invalidCats)){ # remove invalid categories otherwise they will turn up in the resulting editmatrix...
64 | cat.A <- cat.A[,-which(invalidCats), drop=FALSE]
65 | }
66 | cat.b <- rep(1, nrow(cat.A))
67 |
68 | cat.se <- as.editmatrix(cat.A, b=cat.b)
69 | el.E <- c(cat.se, cateditmatrix(E$mixcat), el.E)
70 | }
71 |
72 | # mix part
73 | mix.E <- editmatrix(invert(as.character(E$mixnum)))
74 | mix.vars <- getVars(mix.E)
75 | if (!is.null(mix.vars)){
76 | mix.idx <- match(mix.vars, names(x))
77 | mix.se <- softEdits(mix.E, prefix="")
78 | el.E <- c(mix.se, el.E)
79 | }
80 |
81 | # el.E <- c(mix.se, cat.se, num.se, E$num, cateditmatrix(E$mixcat))
82 | lt <- getOps(el.E) == "<"
83 |
84 | el.vars <- getVars(el.E)
85 | el.binvars <- sapply(el.vars, is.character)
86 | el.binvars[el.vars %in% num.vars] <- FALSE
87 | g <- grepl("delta.", el.vars, fixed=TRUE)
88 | #print(g)
89 | el.binvars[g] <- FALSE
90 |
91 | objfn <- sapply(el.vars, function(v) 0)
92 | adapt.idx <- grep("^adapt\\.", el.vars)
93 | adapt.nms <- names(adapt.idx) <- sub("^adapt\\.", "", el.vars[adapt.idx])
94 |
95 | objfn[adapt.idx] <- weight[match(adapt.nms, names(x))]
96 |
97 | # if (any(soft)){
98 | # soft.idx <- grep("^\\.soft", el.vars)
99 | # objfn[soft.idx] <- (1-lambda) * soft.weights
100 | # }
101 | structure(
102 | list( E = el.E
103 | , objfn = objfn
104 | , binvars = which(el.binvars)
105 | , numvars = match(num.vars, el.vars)
106 | , M = M
107 | , epsilon = epsilon
108 | ),
109 | class="mip"
110 | )
111 | }
112 |
113 | buildELMatrix <- writeELAsMip
114 |
115 | # E <- editset(expression(
116 | # x < y,
117 | # y < z,
118 | # x < z,
119 | # a %in% c(TRUE, FALSE),
120 | # if (a) x > 1
121 | # ))
122 | #
123 | # editsetToMip(E)
124 | #testing...
125 |
126 | # E <- editset(expression(
127 | # if (x>0) y > 0
128 | # , maritalstatus %in% c("married", "single")
129 | # , if (maritalstatus == "married") age >= 17
130 | # ))
131 | # #
132 | # x <- list(x = 1, y = -1, age=16, maritalstatus="married")
133 | # # #x <- list(x = 1, y = -1, age=16, maritalstatus=NA)
134 | # # # e <- expression( pregnant %in% c(TRUE, FALSE)
135 | # # # , gender %in% c("male", "female")
136 | # # # , if (pregnant) gender == "female"
137 | # # # )
138 | # # #
139 | # # # cateditmatrix(e)
140 | # # checkXlim(list(age=c(0,200)), x)
141 | # #
142 | # buildELMatrix(E, x)# -> B
143 | # #errorLocalizer.mip(E, x=x,, xlim=list(age=c(0,200)))
144 |
--------------------------------------------------------------------------------
/pkg/inst/script/bench/benchAB.R:
--------------------------------------------------------------------------------
1 | # Some benchmarks on elimination Algorithm A and B
2 | # mvdl, 05.01.2011
3 | #
4 |
5 | cat('-------------------------------------------------------------------\n')
6 | cat('This benchmark requires packages editrules and Hmisc\n')
7 | require(editrules)
8 | if (!require(Hmisc)) stop('install Hmisc')
9 |
10 | fdir <- system.file('script/bench',package='editrules')
11 | edits <- file.path(fdir,'edits.R')
12 | files <- file.path(fdir,c('eliminator.R','randomEdits.R'))
13 |
14 | for ( f in files ) source(f)
15 |
16 | ## The double conversion makes sure that the empty character ('')
17 | ## is interpreted as empty value.
18 | E <- editarray(as.character(editfile(edits,type='cat')))
19 |
20 | vars <- getVars(E)
21 |
22 | cat('timing algorithms A and B...\n')
23 |
24 | t1 <- system.time(e1 <- elimInfo(E, algorithmA.follow))
25 | t2 <- system.time(e2 <- elimInfo(E, algorithmB.follow))
26 |
27 | for ( i in 1:99 ){
28 | t1 <- t1 + system.time(e1 <- elimInfo(E, algorithmA.follow))
29 | t2 <- t2 + system.time(e2 <- elimInfo(E, algorithmB.follow))
30 | }
31 |
32 |
33 | ## eliminate one variable from E, do this for all variables and follow progress of elimination
34 | tA <- tB <- numeric(length(vars))
35 | names(tA) <- names(tB) <- vars
36 | for ( v in vars ){
37 | tA[v] <- system.time({for ( i in 1:50 ) u <- algorithmA(E,v)})['user.self']/50*1000
38 | tB[v] <- system.time({for ( i in 1:50 ) u <- algorithmB(E,v)})['user.self']/50*1000
39 | }
40 | times <- cbind(tA,tB,diff=tB-tA,ratio=tB/tA)
41 | o <- order(times[,3],decreasing=TRUE)
42 | times <- cbind(times,e1$Created > e2$Created)
43 |
44 | totA <- sum(times[,'tA'])
45 | totB <- sum(times[,'tB'])
46 | cat('Ratio tB/tA over all variables :', totB/totA,'\n')
47 |
48 | totA <- sum(times[rev(o)[1:13],'tA'])
49 | totB <- sum(times[rev(o)[1:13],'tB'])
50 | cat('Ratio tB/tA over variables where iterations are necessary :', totB/totA,'\n\n')
51 |
52 |
53 | cat('Creating plots\n')
54 |
55 | ## Create dotchart of timing difference and scatterplot of timings.
56 |
57 | par(mfrow=c(1,2),oma=c(0,0,0,0),mar=c(4,2,2,1.1))
58 |
59 | lb <- rep("",36)
60 | lb[seq(1,36,5)] <- seq(1,36,5)
61 | pch <- 1+numeric(nrow(times))
62 | I <- e1$Created > e2$Created
63 | pch[I] <- 16
64 | dotchart(times[o,'diff'],
65 | pch=pch[o],
66 | main=expression(paste('Timing ',t[B]-t[A])),
67 | xlab=expression(t[B]-t[A]),
68 | labels=lb
69 | )
70 | abline(v=0,lty=1,col='grey')
71 |
72 | pc1 <- 2 + numeric(36)
73 | pc1[I] <- 17
74 | plot(e1$contained,times[,'tA'],
75 | xlab='Nr of edits containing variable',
76 | main='Timing A and B (ms)',
77 | xlim=c(0,20),
78 | ylim=c(0,40),
79 | pch=pc1,
80 | oma=c(1,1,1,1))
81 | pc2 <- 22 + numeric(36)
82 | pc2[I] <- 15
83 | points(e2$contained,times[,'tB'],pch=pc2)
84 | legend('topright',
85 | legend=c('A','B'),
86 | pch=c(2,22),
87 | bty='n')
88 |
89 |
90 | cat('-------------------------------------------------------------------\n')
91 |
92 | cat('-------------------------------------------------------------------\n')
93 | cat('Determine complexity as function of number of edits.\n')
94 | M <- c(seq(10,100,10),150,200,250,500)
95 | timing <- array(0,dim=c(length(M),2))
96 | colnames(timing) <- c('A','B')
97 | i <- 0
98 |
99 | timeAlg <- function(E,alg,n=10){
100 | vars <- getVars(E)
101 | t = 0
102 | t = t + system.time({ for(v in vars ) E <- alg(E,v)})['user.self']
103 | t/n
104 | }
105 |
106 | nAverage = 100
107 | TA <- array(0,dim=c(length(M),8))
108 | colnames(TA) <-c(names(summary(1:10)),
109 | names(quantile(1:10,probs=c(0.05,0.95)) ))
110 | TB <- TAB <- TA
111 |
112 | i <- 0
113 | ms=1/1000
114 | for ( m in M ){
115 | cat('\rworking on m=',m)
116 | tA <- numeric(nAverage)
117 | tB <- numeric(nAverage)
118 | tBA <- numeric(nAverage)
119 | i <- i + 1
120 | for ( j in 1:nAverage ){
121 | E <- genedits(m,group='A')
122 | vars <- getVars(E)
123 | tA[j] <- timeAlg(E,algorithmA,n=3)
124 | tB[j] <- timeAlg(E,algorithmB,n=3)
125 | }
126 | TA[i,] <- c(summary(tA), quantile(tA,probs=c(0.05,0.95)))/ms
127 | TB[i,] <- c(summary(tB), quantile(tB,probs=c(0.05,0.95)))/ms
128 | TAB[i,] <- c(summary(tA-tB),quantile(tA-tB,probs=c(0.05,0.95)))/ms
129 | }
130 | cat('\n')
131 |
132 |
133 | dev.new()
134 | par(mfrow=c(1,2),oma=c(0,0,0,0),mar=c(4,2,2,1.1))
135 | errbar(M,sqrt(TA[,'Mean']),sqrt(TA[,'5%']),sqrt(TA[,'95%']),
136 | xlab='nr of edits',
137 | ylab=expression(paste(sqrt(time),' (',sqrt(ms),')')),
138 | )
139 | errbar(M,sqrt(TB[,'Mean']),sqrt(TB[,'5%']),sqrt(TB[,'95%']),add=TRUE,pch=2)
140 | legend('topleft',
141 | legend=c('A','B'),
142 | pch=c(16,2),
143 | bty='n')
144 | title(main=expression(paste('Timing (',sqrt(ms),')')))
145 |
146 | errbar(M,TAB[,'Mean'],TAB[,'5%'],TAB[,'95%'],
147 | xlab='nr of edits',
148 | ylab=expression(paste(t[A]-t[B],' (ms)'))
149 | )
150 | abline(h=0)
151 | title(main=expression(paste(t[A]-t[B],' ','(ms)')))
152 |
153 | cat('finished\n')
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
--------------------------------------------------------------------------------
/pkg/R/reduce.R:
--------------------------------------------------------------------------------
1 |
2 | #' Remove redundant variables and edits.
3 | #'
4 | #' Remove variables which are not contained in any edit and remove edits which are
5 | #' \code{\link[=isObviouslyRedundant]{obviously redundant}}.
6 | #'
7 | #' @param E \code{\link{editmatrix}} or \code{\link{editarray}}
8 | #' @param ... arguments to pass to other methods
9 | #'
10 | #' @export
11 | #' @seealso \code{\link{contains}}, \code{\link{eliminate}}, \code{\link{substValue}}
12 | #'
13 | reduce <- function(E,...){
14 | UseMethod('reduce')
15 | }
16 |
17 |
18 | #'
19 | #' @method reduce editmatrix
20 | #' @param tol elements of \code{E} with absolute value < \code{tol} are considered 0.
21 | #'
22 | #' @rdname reduce
23 | #' @export
24 | reduce.editmatrix <- function(E, tol=sqrt(.Machine$double.eps),...){
25 | if ( nrow(E) == 0 ) return(neweditmatrix(matrix(numeric(0)),character(0)))
26 | m <- as.matrix(E)
27 | if ( tol > 0 ) m[abs(m) < tol] <- 0
28 | B <- m != 0
29 | v <- 1:(ncol(m)-1)
30 | vars <- which(colSums(B[,v,drop=FALSE]) != 0)
31 | edits <- (rowSums(B) != 0)
32 | E[edits,c(vars,ncol(m)) , drop=FALSE]
33 | }
34 |
35 |
36 | #'
37 | #' @method reduce editarray
38 | #'
39 | #' @export
40 | #' @rdname reduce
41 | reduce.editarray <- function(E,...){
42 | E <- E[!isObviouslyRedundant.editarray(E),,drop=FALSE]
43 | m <- as.matrix(E)
44 | ind <- getInd(E)
45 | H <- getH(E)
46 | b <- sapply(ind,function(ind) all(m[,ind,drop=FALSE]))
47 | if ( any(b) ){
48 | J <- logical(0)
49 | for ( j in ind[b] ) J <- c(J,j)
50 | sep=getSep(E)
51 | m <- m[,-J,drop=FALSE]
52 | ind <- indFromArray(m,sep=sep)
53 | i <- apply(!m,1,all)
54 | m <- m[!i,,drop=FALSE]
55 | if (!is.null(H)) H <- H[!i,,drop=FALSE]
56 | E <- neweditarray(E=m, ind=ind, sep=sep, names=rownames(m), H=H)
57 | }
58 | E
59 | }
60 |
61 |
62 | #'
63 | #' @method reduce editset
64 | #' @export
65 | #' @rdname reduce
66 | #'
67 | reduce.editset <- function(E,...){
68 | num <- reduce(E$num)
69 | mixcat <- reduce(E$mixcat)
70 | v <- getVars(mixcat)
71 | mixnum <- reduce(E$mixnum[rownames(E$mixnum) %in% v,])
72 |
73 | imix <- grepl("^.l",v)
74 | if ( nrow(mixcat) > 0 ){
75 | m <- logical(nedits(mixcat))
76 | if ( any(imix) ) m <- apply(contains(mixcat, v[imix]), 1, any)
77 | pref <- ifelse(m,"mix","cat")
78 | rownames(mixcat) <- paste(pref, 1:nrow(mixcat), sep="")
79 | }
80 | simplify(neweditset(
81 | num = num,
82 | mixnum = mixnum,
83 | mixcat = mixcat
84 | ))
85 |
86 | }
87 |
88 | #' Retrieve values stricktly implied by rules
89 | #'
90 | #' @export
91 | impliedValues <- function(E,...){
92 | UseMethod('impliedValues')
93 | }
94 |
95 | #'
96 | #'
97 | #' Detects cases where two inequalities imply an equality, e.g. \eqn{x\leq 0} and \eqn{x\geq0}
98 | #' implies \eqn{x=0}. Also detects straight equalities, e.g. \eqn{x==0} implies \eqn{x=0}. Such
99 | #' cases arise frequently when manipulating edits by value subsitution or variable elimination.
100 | #' The function recursively detects equalities and combined inequalities that imply fixed values,
101 | #' substitutes those fixed values and looks for new implied values until no new values are found.
102 | #'
103 | #' @method impliedValues editmatrix
104 | #'
105 | #' @param E editmatrix
106 | #' @param tol Maximum deviation for two values to be considered equal.
107 | #' @param ... Currently unused
108 | #' @return Numeric vector, whose names are variable names and values are unique values implied by the rules.
109 | #' @rdname impliedValues
110 | #'
111 | #' @seealso \code{\link{reduce}}, \code{\link{substValue}}, \code{\link{eliminate}}
112 | #'
113 | #' @export
114 | impliedValues.editmatrix <- function(E,tol=sqrt(.Machine$double.eps),...){
115 | if (!isNormalized(E)) E <- normalize(E)
116 |
117 | iv <- numeric(0)
118 | iv[getVars(E)] <- NA
119 |
120 | # edits containing one variable
121 | i <- rowSums(contains(E,tol=tol)) == 1
122 | if ( sum(i) == 0 ) return(numeric(0))
123 | e <- reduce(E[i,])
124 |
125 | # direct equalities
126 | ops <- getOps(e)
127 | eqs <- e[ops == '==', ]
128 | eqA <- getA(eqs)
129 | eqb <- getb(eqs)
130 | j <- apply(abs(eqA),1,which.max)
131 | i <- 1:nrow(eqs)
132 | iv[colnames(eqA)[j]] <- eqb/eqA[cbind(i,j)]
133 |
134 | x <- iv[!is.na(iv)]
135 | # combined inequalities
136 | ieq <- e[ops %in% c('<=','>=')]
137 | if (length(x) > 0)
138 | ieq <- substValue(ieq, names(x), x,reduce=TRUE)
139 | Ab <- getAb(e)
140 | for ( v in getVars(ieq) ){
141 | ab <- Ab[abs(Ab[,v])>tol,,drop=FALSE]
142 | ab <- ab/abs(ab[,v])
143 | b <- ab[,'CONSTANT']
144 | for ( i in 1:length(b) ){
145 | j <- ( abs(b[i] + b[-i]) < tol & abs(ab[i,v] + ab[-i,v]) < tol )
146 | if (any(j)){
147 | iv[v] = abs(b[i])
148 | break
149 | }
150 | }
151 | }
152 | iv <- iv[!is.na(iv)]
153 | if ( length(iv) > 0 ){
154 | E <- substValue(E,names(iv),iv,reduce=TRUE)
155 | c(iv,impliedValues.editmatrix(E,tol))
156 | } else {
157 | iv
158 | }
159 | }
160 |
--------------------------------------------------------------------------------