├── data └── bs1.rda ├── .Rbuildignore ├── tests ├── testthat.R └── testthat │ ├── test_presencecount.R │ ├── test_rmnegcorr.R │ ├── test_newcorrtable.R │ ├── test_linkage.R │ ├── test_reltable.R │ ├── test_predictvo.R │ └── test_corrmat.R ├── R ├── utils-pipe.R ├── data.R ├── linkage.R ├── predictvo.R ├── plot_functions.R └── data_munging_functions.R ├── man ├── pipe.Rd ├── caplot.Rd ├── bs1.Rd ├── linkage.Rd ├── presencecount.Rd ├── reltable.Rd ├── newcorrtable.Rd ├── predictvo.Rd ├── rmnegcorr.Rd ├── igraphmask.Rd ├── corrplotmask.Rd └── corrmat.Rd ├── NAMESPACE ├── data-raw ├── modify_data_useful_commands.R └── bs1.csv ├── varnastats.Rproj ├── README.md ├── .gitignore ├── DESCRIPTION ├── vignettes └── varnastats-vignette-1.Rmd └── LICENSE /data/bs1.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nevrome/varnastats/master/data/bs1.rda -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^data-raw$ 2 | ^.*\.Rproj$ 3 | ^\.Rproj\.user$ 4 | ^packrat/ 5 | ^\.Rprofile$ 6 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(varnastats) 3 | 4 | test_check("varnastats") 5 | -------------------------------------------------------------------------------- /R/utils-pipe.R: -------------------------------------------------------------------------------- 1 | #' Pipe operator 2 | #' 3 | #' See \code{magrittr::\link[magrittr]{\%>\%}} for details. 4 | #' 5 | #' @name %>% 6 | #' @rdname pipe 7 | #' @keywords internal 8 | #' @export 9 | #' @importFrom magrittr %>% 10 | #' @usage lhs \%>\% rhs 11 | NULL 12 | -------------------------------------------------------------------------------- /man/pipe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-pipe.R 3 | \name{\%>\%} 4 | \alias{\%>\%} 5 | \title{Pipe operator} 6 | \usage{ 7 | lhs \%>\% rhs 8 | } 9 | \description{ 10 | See \code{magrittr::\link[magrittr]{\%>\%}} for details. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export("%>%") 4 | export(caplot) 5 | export(corrmat) 6 | export(corrplotmask) 7 | export(igraphmask) 8 | export(linkage) 9 | export(newcorrtable) 10 | export(predictvo) 11 | export(presencecount) 12 | export(reltable) 13 | export(rmnegcorr) 14 | importFrom(magrittr,"%>%") 15 | -------------------------------------------------------------------------------- /data-raw/modify_data_useful_commands.R: -------------------------------------------------------------------------------- 1 | bs1 <- read.csv("~/Rstats/varnastats/data-raw/bs1.csv", 2 | sep=";", 3 | header=TRUE, 4 | row.names=1, 5 | stringsAsFactors = FALSE, 6 | check.names = FALSE) 7 | 8 | devtools::use_data(bs1, overwrite = TRUE) 9 | 10 | devtools::load_all() 11 | 12 | bs1 13 | -------------------------------------------------------------------------------- /varnastats.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | PackageRoxygenize: rd,collate,namespace 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # varnastats 2 | 3 | Tools for bi- and multivariate analysis of matrizes in the context of archaeological research. Developed and used for the analysis of Varna Necropolis (Bulgaria). 4 | 5 | *** 6 | 7 | To install you have to use Hadley Wickhams [devtools package](https://github.com/hadley/devtools). 8 | 9 | devtools::install_github("nevrome/varnastats") 10 | 11 | As the developement of the package is an ongoing prozess it's advisable to install the latest version before using it. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | .RData 5 | 6 | # Example code in package build process 7 | *-Ex.R 8 | # RStudio files 9 | .Rproj.user/ 10 | # produced vignettes 11 | vignettes/*.html 12 | vignettes/*.pdf 13 | .Rproj.user 14 | packrat/lib*/ 15 | # check https://gist.github.com/octocat/9257657 # 16 | # Packages # 17 | ############ 18 | # it's better to unpack these files and commit the raw source 19 | # git has its own built in compression methods 20 | *.7z 21 | *.dmg 22 | *.gz 23 | *.iso 24 | *.jar 25 | *.rar 26 | *.tar 27 | *.zip 28 | # Logs and databases # 29 | ###################### 30 | *.log 31 | *.sql 32 | *.sqlite 33 | # OS generated files # 34 | .DS_Store 35 | .DS_Store? 36 | ._* 37 | .Spotlight-V100 38 | .Trashes 39 | ehthumbs.db 40 | Thumbs.db 41 | # Compiled source # 42 | ################### 43 | *.com 44 | *.class 45 | *.dll 46 | *.exe 47 | *.o 48 | *.so 49 | -------------------------------------------------------------------------------- /man/caplot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_functions.R 3 | \name{caplot} 4 | \alias{caplot} 5 | \title{Plot a selection of plots to get a first impression of the results of a CA} 6 | \usage{ 7 | caplot(cadoc) 8 | } 9 | \arguments{ 10 | \item{cadoc}{object of class \code{ca}. Produced e.g. \code{by ca::ca()}} 11 | } 12 | \description{ 13 | \code{caplot()} delivers three pages of plots based on \code{ca::plot.ca()}. 14 | Useful to get a first impression of the results to decide, whether a more elaborated 15 | plot should be created and whether other axis besides dim1 and dim2 contain interesting 16 | information. 17 | } 18 | \examples{ 19 | testmatrixrand <- data.frame( 20 | matrix(base::sample(0:1,400,replace=TRUE), nrow=20, ncol=20) 21 | ) 22 | rownames(testmatrixrand) <- paste("row", seq(1:nrow(testmatrixrand))) 23 | 24 | library(ca) 25 | 26 | caplot(ca(testmatrixrand)) 27 | 28 | } 29 | -------------------------------------------------------------------------------- /tests/testthat/test_presencecount.R: -------------------------------------------------------------------------------- 1 | context("Tests of function presencecount") 2 | 3 | testmatrix <- data.frame(c1 = c(0, 3, 8, 2), 4 | c2 = c(0, 6, 7, 8), 5 | c3 = c(0, 0, 0, 0)) 6 | rownames(testmatrix) <- c("r1", "r2", "r3", "r4") 7 | 8 | test_that( 9 | "the output of presencecount is a data.frame", { 10 | expect_equal( 11 | is.data.frame(presencecount(testmatrix)), 12 | TRUE 13 | ) 14 | } 15 | ) 16 | 17 | test_that( 18 | "the output of presencecount has the correct length", { 19 | expect_equal( 20 | ncol(presencecount(testmatrix, dim = 1)), 21 | ncol(testmatrix) 22 | ) 23 | expect_equal( 24 | ncol(presencecount(testmatrix, dim = 2)), 25 | nrow(testmatrix) 26 | ) 27 | } 28 | ) 29 | 30 | test_that( 31 | "presencecount counts correctly ", { 32 | expect_equal( 33 | presencecount(testmatrix, dim = 1)[, 2], 34 | 3 35 | ) 36 | expect_equal( 37 | presencecount(testmatrix, dim = 2)[, 2], 38 | 2 39 | ) 40 | } 41 | ) -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' A fictional burial site 2 | #' 3 | #' A dataset containing graves (objects) and stereotypical grave attributes (variables) 4 | #' 5 | #' @format A data frame with 50 rows and 18 variables: 6 | #' \itemize{ 7 | #' \item sex_male: anthropologically male (0,1) 8 | #' \item sex_female: anthropologically female (0,1) 9 | #' \item pos_crouched: buried in a crouched position (0,1) 10 | #' \item pos_extended: buried in an extended position (0,1) 11 | #' \item orient_N-S: N-S-oriented (0,1) 12 | #' \item orient_W-E: W-E-oriented (0,1) 13 | #' \item axe_1: axe type 1 (1--10) 14 | #' \item axe_2: axe type 2 (1--10) 15 | #' \item adze_1: adze type 2 (1--10) 16 | #' \item adze_2: adze type 2 (1--10) 17 | #' \item pottery_1: pottery type 1 (1--10) 18 | #' \item pottery_2: pottery type 2 (1--10) 19 | #' \item pottery_3: pottery type 3 (1--10) 20 | #' \item pottery_4: pottery type 4 (1--10) 21 | #' \item goldring: goldring (1--10) 22 | #' \item goldbead: goldbead type 2 (1--10) 23 | #' \item fibula_1: fibula type 1 (1--10) 24 | #' \item fibula_2: fibula type 2 (1--10) 25 | #' } 26 | #' @name bs1 27 | NULL 28 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: varnastats 2 | Type: Package 3 | Title: Analyse some aspects of prehistoric burial sites 4 | Version: 0.0.0.9000 5 | Date: 2015-06-05 6 | Authors@R: c( 7 | person("Clemens", "Schmid", email = "clemens@nevrome.de", role = c("aut", "cre")), 8 | person("David", "Kirschenheuter", email = "david@kirschenheuter.de", role = c("aut")), 9 | person("Jonas", "Abele", email = "jonas.abele@uni-tuebingen.de", role = c("aut")) 10 | ) 11 | Description: Tools for bi- and multivariate analysis of matrizes in the context of 12 | archaeological research. Developed and used for the analysis of Varna Necropolis 13 | (Bulgaria). 14 | License: file LICENSE 15 | LazyData: TRUE 16 | URL: https://github.com/nevrome/varnastats 17 | Depends: R (>= 3.0.0) 18 | Imports: 19 | dplyr (>= 0.4.1), 20 | magrittr (>= 1.5), 21 | igraph (>= 0.7.1), 22 | corrplot (>= 0.73), 23 | MASS (>= 7.3.0), 24 | rapport (>= 0.4.1), 25 | rapportools, 26 | reshape2 (>= 1.4.1), 27 | quantAAR 28 | Suggests: 29 | devtools, 30 | ca, 31 | ggplot2, 32 | testthat, 33 | knitr 34 | VignetteBuilder: knitr 35 | RoxygenNote: 6.1.1 36 | Encoding: UTF-8 37 | -------------------------------------------------------------------------------- /man/bs1.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \name{bs1} 4 | \alias{bs1} 5 | \title{A fictional burial site} 6 | \format{A data frame with 50 rows and 18 variables: 7 | \itemize{ 8 | \item sex_male: anthropologically male (0,1) 9 | \item sex_female: anthropologically female (0,1) 10 | \item pos_crouched: buried in a crouched position (0,1) 11 | \item pos_extended: buried in an extended position (0,1) 12 | \item orient_N-S: N-S-oriented (0,1) 13 | \item orient_W-E: W-E-oriented (0,1) 14 | \item axe_1: axe type 1 (1--10) 15 | \item axe_2: axe type 2 (1--10) 16 | \item adze_1: adze type 2 (1--10) 17 | \item adze_2: adze type 2 (1--10) 18 | \item pottery_1: pottery type 1 (1--10) 19 | \item pottery_2: pottery type 2 (1--10) 20 | \item pottery_3: pottery type 3 (1--10) 21 | \item pottery_4: pottery type 4 (1--10) 22 | \item goldring: goldring (1--10) 23 | \item goldbead: goldbead type 2 (1--10) 24 | \item fibula_1: fibula type 1 (1--10) 25 | \item fibula_2: fibula type 2 (1--10) 26 | }} 27 | \description{ 28 | A dataset containing graves (objects) and stereotypical grave attributes (variables) 29 | } 30 | -------------------------------------------------------------------------------- /man/linkage.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/linkage.R 3 | \name{linkage} 4 | \alias{linkage} 5 | \title{Calculate linkage values of variables and objects of a numeric data.frame} 6 | \usage{ 7 | linkage(matrix) 8 | } 9 | \arguments{ 10 | \item{matrix}{data.frame with numeric values} 11 | } 12 | \value{ 13 | table with linkage values 14 | } 15 | \description{ 16 | \code{linkage()} calculates a linkage value for every variable and every object 17 | of an input data.frame and returns a table with this information. 18 | This linkage value allows predictions about whether a variable/object can be used 19 | for multivariate analysis. If a variable/object is not well linked to the other entities, 20 | it will often appear as an outlier. 21 | } 22 | \details{ 23 | Structure of the resulting table: 24 | 25 | column 1: linkage value (see code to understand how it's calculated) 26 | 27 | column 2: logarithm of linkage value (useful for interpretation) 28 | 29 | column 3: type (variable or object) 30 | } 31 | \examples{ 32 | testmatrixrand <- data.frame( 33 | matrix(base::sample(0:1,400,replace=TRUE), nrow=20, ncol=20) 34 | ) 35 | 36 | linkage(testmatrixrand) 37 | 38 | link <- subset(linkage(testmatrixrand), type == "obj") 39 | barplot(link$linkage, names.arg = rownames(link)) 40 | 41 | } 42 | -------------------------------------------------------------------------------- /tests/testthat/test_rmnegcorr.R: -------------------------------------------------------------------------------- 1 | context("Tests of function rmnegcorr") 2 | 3 | testmatrix <- data.frame( 4 | c1 = c(5, 0, 0, 0, 1, 1), 5 | c2 = c(5, 0, 0, 0, 1, 1), 6 | c3 = c(5, 1, 7, 0, 0, 2), 7 | c4 = c(5, 6, 7, 0, 0, 0), 8 | c5 = c(5, 3, 2, 0, 0, 3), 9 | c6 = c(0, 6, 1, 0, 0, 0), 10 | c7 = c(0, 1, 1, 1, 0, 0) 11 | ) 12 | testmatrix <- quantAAR::booleanize(testmatrix) 13 | testcorrmatrix <- corrmat(testmatrix, "chi2", chi2limit = 0.2, dim = 1) 14 | 15 | test_that( 16 | "the output of rmnegcorr is a data.frame", { 17 | expect_equal( 18 | is.data.frame(rmnegcorr(testmatrix, testmatrix, dim = 1, niv = 0.1)), 19 | TRUE 20 | ) 21 | } 22 | ) 23 | 24 | test_that( 25 | "the output of rmnegcorr is a data.frame 26 | with the correct width and heigth", { 27 | expect_equal( 28 | ncol(rmnegcorr(testmatrix, testmatrix, dim = 1, niv = 0.1)), 29 | ncol(testmatrix) 30 | ) 31 | expect_equal( 32 | nrow(newcorrtable(rmnegcorr(testmatrix, testmatrix, dim = 1, niv = 0.1))), 33 | ncol(testmatrix) 34 | ) 35 | } 36 | ) 37 | 38 | test_that( 39 | "the removal of negative relations in rmnegcorr works", { 40 | expect_equal( 41 | testcorrmatrix[1, 7], 42 | 1 43 | ) 44 | expect_equal( 45 | rmnegcorr(testmatrix, testmatrix, dim = 1, niv = 0.1)[1, 7], 46 | 0 47 | ) 48 | } 49 | ) 50 | -------------------------------------------------------------------------------- /man/presencecount.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_munging_functions.R 3 | \name{presencecount} 4 | \alias{presencecount} 5 | \title{Count appearence of variables and objects in a data.frame with numeric values} 6 | \usage{ 7 | presencecount(matrix, dim = 1) 8 | } 9 | \arguments{ 10 | \item{matrix}{data.frame with numeric values} 11 | 12 | \item{dim}{switch to define if the appearences in columns (variables) or rows (objects) 13 | should be counted. 14 | 15 | 1: column (variables) appearences are counted 16 | 17 | 2: row (objects) appearences are counted} 18 | } 19 | \value{ 20 | sorted data.frame with amount of appearences for objacts or variables 21 | } 22 | \description{ 23 | A row or a column of a data.frame with numeric values is considered empty, if all 24 | its values are 0. If objects or variables are present, their rows and columns contain 25 | values != 0. To count the appearence of objects or variables, \code{presencecount} counts 26 | the amount of values != 0 within the rows or columns and writes them into a data.frame. 27 | The result is sorted by the amount of appearences. 28 | } 29 | \examples{ 30 | testmatrix <- data.frame(c1 = c(0,3,8,2), c2 = c(0,6,7,8), c3 = c(0,0,0,0)) 31 | rownames(testmatrix) <- c("r1","r2","r3","r4") 32 | 33 | countvar <- presencecount(testmatrix, 1) 34 | countobj <- presencecount(testmatrix, 2) 35 | 36 | } 37 | -------------------------------------------------------------------------------- /man/reltable.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_munging_functions.R 3 | \name{reltable} 4 | \alias{reltable} 5 | \title{Convert a correlation matrix to a table of relations} 6 | \usage{ 7 | reltable(corrtable, corrtable2 = data.frame()) 8 | } 9 | \arguments{ 10 | \item{corrtable}{correlation matrix (as produced by varnastats::corrmat())} 11 | 12 | \item{corrtable2}{optional second corrmatrix correlation matrix (as produced by varnastats::corrmat())} 13 | } 14 | \value{ 15 | table of relations and their correlation value 16 | } 17 | \description{ 18 | \code{reltable} creates a sorted table of the relations of a correlation matrix. 19 | Relations with a correlation value of zero and autocorrelations get removed. 20 | \code{reltable} can consume a lot of time for big datasets! 21 | } 22 | \details{ 23 | Structure of the resulting table: 24 | 25 | column 1 + 2: indezes of the variables/objects in the correlation matrix 26 | 27 | column 3: correlation value 28 | 29 | column 4 + 5: names of the variables/objects 30 | 31 | column 6: optional second correlation value 32 | } 33 | \examples{ 34 | testmatrixrand <- data.frame( 35 | matrix(base::sample(0:1,400,replace=TRUE), nrow=20, ncol=20) 36 | ) 37 | 38 | testcorr <- corrmat(testmatrixrand, "lambda", dim = 1) 39 | testcorr2 <- corrmat(testmatrixrand, "chi2", chi2limit = 0.1, dim = 1) 40 | 41 | reltable(testcorr) 42 | reltable(testcorr, testcorr2) 43 | 44 | } 45 | -------------------------------------------------------------------------------- /man/newcorrtable.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_munging_functions.R 3 | \name{newcorrtable} 4 | \alias{newcorrtable} 5 | \title{Create an empty correlation matrix of a given data.frame} 6 | \usage{ 7 | newcorrtable(matrix, dim = 1) 8 | } 9 | \arguments{ 10 | \item{matrix}{data.frame} 11 | 12 | \item{dim}{switch to define, whether the new correlation matrix should be created 13 | for columns or rows. 14 | 15 | 1 (default): table is created for column (variables) relations. 16 | 17 | 2: table is created for row (objects) relations.} 18 | } 19 | \value{ 20 | empty correlation matrix data.frame 21 | } 22 | \description{ 23 | \code{newcorrtable} returns an empty correlation matrix of a data.frames columns 24 | or rows. Empty means: filled with the numeric value "0". 25 | This correlation matrix data.frame can be used to store the results of functions which 26 | calculate correlation values for all bivariate relations. 27 | } 28 | \examples{ 29 | testmatrix <- data.frame(c1 = c(5,2,3,8), c2 = c(5,6,7,0), c3 = c(5,6,7,9)) 30 | 31 | # correlation table is created for the columns of the input data.frame testmatrix 32 | newcorrtable(testmatrix) 33 | newcorrtable(testmatrix, 1) 34 | corrtabcolumns <- newcorrtable(matrix = testmatrix, dim = 1) 35 | 36 | # correlation table is created for the rows of the input data.frame testmatrix 37 | newcorrtable(testmatrix, 2) 38 | corrtabrows <- newcorrtable(matrix = testmatrix, dim = 2) 39 | 40 | } 41 | -------------------------------------------------------------------------------- /tests/testthat/test_newcorrtable.R: -------------------------------------------------------------------------------- 1 | context("Tests of function newcorrtable") 2 | 3 | testmatrixrand <- data.frame( 4 | matrix(base::sample(0:1, 400, replace = T), nrow = 20, ncol = 20), 5 | check.names = FALSE 6 | ) 7 | 8 | test_that( 9 | "the output of newcorrtable is a data.frame", { 10 | expect_equal( 11 | is.data.frame(newcorrtable(testmatrixrand, dim = 1)), 12 | TRUE 13 | ) 14 | expect_equal( 15 | is.data.frame(newcorrtable(testmatrixrand, dim = 2)), 16 | TRUE 17 | ) 18 | } 19 | ) 20 | 21 | test_that( 22 | "the output of newcorrtable is a data.frame without any values 23 | besides 0 and the correct amount of cells", { 24 | expect_equal( 25 | length(which(newcorrtable(testmatrixrand, dim = 1) == 0)), 26 | length(testmatrixrand) ^ 2 27 | ) 28 | expect_equal( 29 | length(which(newcorrtable(testmatrixrand, dim = 2) == 0)), 30 | nrow(testmatrixrand) ^ 2 31 | ) 32 | } 33 | ) 34 | 35 | testmatrixrand <- data.frame(c1 = c(5, 2, 3, 8), 36 | c2 = c(5, 6, 7, 0), 37 | c3 = c(5, 6, 7, 9)) 38 | 39 | test_that( 40 | "the output of newcorrtable is a data.frame 41 | with the correct width and heigth", { 42 | expect_equal( 43 | length(newcorrtable(testmatrixrand, dim = 1)), 44 | length(testmatrixrand) 45 | ) 46 | expect_equal( 47 | length(newcorrtable(testmatrixrand, dim = 2)), 48 | nrow(testmatrixrand) 49 | ) 50 | } 51 | ) -------------------------------------------------------------------------------- /tests/testthat/test_linkage.R: -------------------------------------------------------------------------------- 1 | context("Tests of function linkage") 2 | 3 | testmatrixrand <- data.frame( 4 | matrix(base::sample(0:1, 400, replace = T), nrow = 20, ncol = 20) 5 | ) 6 | 7 | test_that( 8 | "the output of linkage is a data.frame", { 9 | expect_equal( 10 | is.data.frame(linkage(testmatrixrand)), 11 | TRUE 12 | ) 13 | } 14 | ) 15 | 16 | test_that( 17 | "the output of linkage has the two main output-columns", { 18 | expect_equal( 19 | c("linkage", "type") %in% colnames(linkage(testmatrixrand)), 20 | c(TRUE, TRUE) 21 | ) 22 | } 23 | ) 24 | 25 | test_that( 26 | "the output of linkage has contains information for variables and objects", { 27 | expect_equal( 28 | c("obj", "var") %in% linkage(testmatrixrand)$type, 29 | c(TRUE, TRUE) 30 | ) 31 | } 32 | ) 33 | 34 | test_that( 35 | "the output of linkage has the correct length and therefore covers 36 | all variables and objects", { 37 | expect_equal( 38 | nrow(linkage(testmatrixrand)), 39 | ncol(testmatrixrand) + nrow(testmatrixrand) 40 | ) 41 | } 42 | ) 43 | 44 | testmatrix_short <- data.frame( 45 | matrix(c(1,0,0,0,0,0,0,0,1,0,0,1),nrow=4) 46 | ) 47 | 48 | 49 | test_that( 50 | "with zero linkage value is not calculated as logarithm", { 51 | expect_equal( 52 | linkage(testmatrix_short)[2,1], 53 | linkage(testmatrix_short)[2,2] 54 | ) 55 | expect_equal( 56 | linkage(testmatrix_short)[6,1], 57 | linkage(testmatrix_short)[6,2] 58 | ) 59 | } 60 | ) 61 | -------------------------------------------------------------------------------- /tests/testthat/test_reltable.R: -------------------------------------------------------------------------------- 1 | context("Tests of function reltable") 2 | 3 | testmatrixrand <- data.frame( 4 | matrix(base::sample(0:1, 400, replace = T), nrow = 20, ncol = 20), 5 | check.names = FALSE 6 | ) 7 | testcorr1 <- corrmat(testmatrixrand, "lambda", dim = 1) 8 | testcorr2 <- corrmat(testmatrixrand, "chi2", chi2limit = 0.1, dim = 1) 9 | 10 | test_that( 11 | "the output of reltable is a data.frame", { 12 | expect_equal( 13 | is.data.frame(reltable(testcorr1)), 14 | TRUE 15 | ) 16 | } 17 | ) 18 | 19 | test_that( 20 | "the output of reltable has the main output-columns", { 21 | expect_equal( 22 | c("indexvar1", "indexvar2", "corrvalue", "namevar1", "namevar2") %in% 23 | colnames(reltable(testcorr1)), 24 | c(TRUE, TRUE, TRUE, TRUE, TRUE) 25 | ) 26 | expect_equal( 27 | c("indexvar1", 28 | "indexvar2", 29 | "corrvalue", 30 | "namevar1", 31 | "namevar2", 32 | "corrvalue2") %in% 33 | colnames(reltable(testcorr1, testcorr2)), 34 | c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE) 35 | ) 36 | } 37 | ) 38 | 39 | test_that( 40 | "the output of reltable doesn't contain autocorrelations", { 41 | expect_equal( 42 | nrow(subset(reltable(testcorr1, testcorr2), namevar1 == namevar2)) == 0, 43 | TRUE 44 | ) 45 | } 46 | ) 47 | 48 | test_that( 49 | "the output of reltable has a plausible amount of rows for relations", { 50 | expect_equal( 51 | nrow(reltable(testcorr1, testcorr2)) <= 52 | nrow(testcorr1) * ncol(testcorr1) - nrow(testcorr1), 53 | TRUE 54 | ) 55 | } 56 | ) -------------------------------------------------------------------------------- /tests/testthat/test_predictvo.R: -------------------------------------------------------------------------------- 1 | context("Tests of function predictvo") 2 | 3 | testmatrix <- data.frame( 4 | c1 = c(5, 0, 0, 0, 1, 1), 5 | c2 = c(5, 0, 0, 0, 1, 1), 6 | c3 = c(5, 1, 7, 0, 0, 2), 7 | c4 = c(5, 6, 7, 0, 0, 0), 8 | c5 = c(5, 3, 2, 0, 0, 3), 9 | c6 = c(0, 6, 1, 0, 0, 0), 10 | c7 = c(0, 1, 1, 1, 0, 0) 11 | ) 12 | testmatrix <- quantAAR::booleanize(testmatrix) 13 | testcorr <- corrmat(testmatrix, "chi2", chi2limit = 0.2, dim = 1) 14 | rel <- reltable(testcorr) 15 | testvars <- c("c1", "c3", "c7") 16 | 17 | test_that( 18 | "the output of predictvo is a data.frame", { 19 | expect_equal( 20 | is.data.frame(predictvo(testmatrix, rel, testvars)), 21 | TRUE 22 | ) 23 | } 24 | ) 25 | 26 | test_that( 27 | "the output of predictvo has the correct amount of cols 28 | (prediction+actual for every variable of interest)", { 29 | expect_equal( 30 | ncol(predictvo(testmatrix, rel, testvars)), 31 | 2 * length(c("c1", "c3", "c7")) 32 | ) 33 | } 34 | ) 35 | 36 | test_that( 37 | "the output of predictvo has the correct amount of rows 38 | (one row for every object of the testmatrix)", { 39 | expect_equal( 40 | nrow(predictvo(testmatrix, rel, testvars)), 41 | nrow(testmatrix) 42 | ) 43 | } 44 | ) 45 | 46 | test_that( 47 | "the output of predictvo contains the correct actual presence values", { 48 | expect_equal( 49 | predictvo(testmatrix, rel, testvars)[1, 2], 50 | 1 51 | ) 52 | expect_equal( 53 | predictvo(testmatrix, rel, testvars)[6, 4], 54 | 1 55 | ) 56 | expect_equal( 57 | predictvo(testmatrix, rel, testvars)[5, 6], 58 | 0 59 | ) 60 | } 61 | ) -------------------------------------------------------------------------------- /man/predictvo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predictvo.R 3 | \name{predictvo} 4 | \alias{predictvo} 5 | \title{Predict the relation of objects to a list of variables of interest} 6 | \usage{ 7 | predictvo(matrix, reltable, mvars, level = 2) 8 | } 9 | \arguments{ 10 | \item{matrix}{data.frame with numeric values} 11 | 12 | \item{reltable}{table of correlation values (e.g. produced by \code{reltable()})} 13 | 14 | \item{mvars}{vector of variables of interest (full name)} 15 | 16 | \item{level}{switch to define if the prediction should be based on level 1 17 | or on level 1 + level 2 variables. A level 1 variable is directly linked to a variable 18 | of interest, a level 2 variable is linked to the level 1 variables of said variable of 19 | interest 20 | 21 | 1: prediction is based only on level 1 variables. 22 | 23 | 2 (default): prediction is based on level 1 + level 2 variables.} 24 | } 25 | \value{ 26 | table with predicted, normalized relation values of every object and given 27 | variables of interest. If no variable of interest has correlations to other variables, 28 | \code{predictvo()} returns FALSE. 29 | } 30 | \description{ 31 | By comparison of significant correlations within variables and the variables the objects 32 | incorporate \code{predictvo()} makes an prediction about the relation to given variables 33 | for every object. 34 | } 35 | \examples{ 36 | testmatrixrand <- data.frame( 37 | matrix(base::sample(0:1,400,replace=TRUE), nrow=20, ncol=20) 38 | ) 39 | 40 | testcorr <- corrmat(testmatrixrand, "chi2", chi2limit = 0.1, dim = 1) 41 | 42 | rel <- reltable(testcorr) 43 | 44 | predictvo(testmatrixrand, rel, c("X2", "X3")) 45 | 46 | } 47 | -------------------------------------------------------------------------------- /man/rmnegcorr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_munging_functions.R 3 | \name{rmnegcorr} 4 | \alias{rmnegcorr} 5 | \title{Remove negative correlations from a correlation matrix} 6 | \usage{ 7 | rmnegcorr(corrmatrix, matrix, dim, niv = 0.1) 8 | } 9 | \arguments{ 10 | \item{corrmatrix}{correlation matrix (as produced by varnastats::corrmat())} 11 | 12 | \item{matrix}{underlying data.frame} 13 | 14 | \item{dim}{switch to define if the correlation matrix should be created 15 | for columns or rows. 16 | 17 | 1: table is created for column (variables) relations. 18 | 19 | 2: table is created for row (objects) relations.} 20 | 21 | \item{niv}{decision niveau. The smaller niv the weaker the overlap of two 22 | variables/objects can be, to still be recognised as the cause for a positiv relation. 23 | 24 | default: 0.1 -> 10\%} 25 | } 26 | \value{ 27 | correlation matrix without negative relations 28 | } 29 | \description{ 30 | \code{rmnegcorr} removes "negative correlations" from a correlation matrix. 31 | A negative correlation is defined as a relation between to variables/objects, 32 | that have a high correlation value due to NOT frequently appearing together. 33 | This function can be applied for correlation matrizes created for variable or 34 | object relations. 35 | } 36 | \examples{ 37 | testmatrixrand <- data.frame( 38 | matrix(base::sample(0:1,400,replace=TRUE), nrow=20, ncol=20) 39 | ) 40 | 41 | testmatrixrand[,1] <- c(1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0) 42 | testmatrixrand[,2] <- c(0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1) 43 | 44 | testcorr <- corrmat(testmatrixrand, "chi2", chi2limit = 0.1, dim = 1) 45 | 46 | rmnegcorr(testcorr, testmatrixrand, dim = 1) 47 | 48 | } 49 | -------------------------------------------------------------------------------- /man/igraphmask.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_functions.R 3 | \name{igraphmask} 4 | \alias{igraphmask} 5 | \title{Mask function for igraph::plot.igraph() to visualize a relation table as a graph} 6 | \usage{ 7 | igraphmask(reltable, mypath, w = 3000, h = 3000, 8 | colorvector = c("red")) 9 | } 10 | \arguments{ 11 | \item{reltable}{list of bivariate relations (as produced by varnastats::reltable())} 12 | 13 | \item{mypath}{file.path where the plot file should be stored 14 | 15 | mypath <- file.path("~/path/to/my/directory/",paste("myfilename", ".png", sep = ""))} 16 | 17 | \item{w}{width of the resulting graphic file in px 18 | 19 | default = 3000} 20 | 21 | \item{h}{width of the resulting graphic file in px 22 | 23 | default = 3000} 24 | 25 | \item{colorvector}{string vector with colour values to mark certain vertices 26 | 27 | default = c("red")} 28 | } 29 | \value{ 30 | graphic file of a graph plot 31 | } 32 | \description{ 33 | \code{igraphmask} is an input mask for the \code{plot.igraph()} function of the 34 | package igraph. \code{plot.igraph()} creates graph plots based on bivariate relations. 35 | See \code{?igraph} for further info. 36 | \code{igraphmask} allows to get a nice, basic graph plot as a png-file 37 | that is directly saved to the file system. It doesn't allow to define specific 38 | plot settings. 39 | } 40 | \details{ 41 | For graph layout the fruchterman.reingold.grid algorithm is used. 42 | } 43 | \examples{ 44 | testmatrixrand <- data.frame( 45 | matrix(base::sample(0:1,400,replace=TRUE), nrow=20, ncol=20) 46 | ) 47 | 48 | testcorr <- corrmat(testmatrixrand, "lambda", chi2limit = 0.1, dim = 1) 49 | testrel <- reltable(testcorr) 50 | 51 | testpath <- file.path(".",paste("testfile", ".png", sep = "")) 52 | 53 | igraphmask(testrel, testpath, w = 1000, h = 1000) 54 | 55 | } 56 | -------------------------------------------------------------------------------- /tests/testthat/test_corrmat.R: -------------------------------------------------------------------------------- 1 | context("Tests of function corrmat") 2 | 3 | testmatrixrand1 <- data.frame( 4 | matrix(base::sample(0:1, 400, replace = T), nrow = 20, ncol = 20), 5 | check.names = FALSE 6 | ) 7 | 8 | test_that( 9 | "the output of corrmat is a data.frame", { 10 | expect_equal( 11 | is.data.frame(corrmat(testmatrixrand1, dim = 1)), 12 | TRUE 13 | ) 14 | expect_equal( 15 | is.data.frame(corrmat(testmatrixrand1, dim = 2)), 16 | TRUE 17 | ) 18 | } 19 | ) 20 | 21 | testmatrix2 <- data.frame( 22 | c1 = c(5, 0, 0, 0, 1, 1), 23 | c2 = c(5, 0, 0, 0, 1, 1), 24 | c3 = c(5, 1, 7, 0, 0, 2), 25 | c4 = c(5, 6, 7, 0, 0, 0), 26 | c5 = c(5, 3, 2, 0, 0, 3), 27 | c6 = c(0, 6, 1, 0, 0, 0), 28 | c7 = c(0, 1, 1, 1, 0, 0) 29 | ) 30 | testmatrix2 <- quantAAR::booleanize(testmatrix2) 31 | 32 | test_that( 33 | "the output of corrmat is a data.frame with the correct width and heigth", { 34 | expect_equal( 35 | length(newcorrtable(testmatrix2, dim = 1)), 36 | length(testmatrix2) 37 | ) 38 | expect_equal( 39 | length(newcorrtable(testmatrix2, dim = 2)), 40 | nrow(testmatrix2) 41 | ) 42 | } 43 | ) 44 | 45 | test_that( 46 | "the different methods of corrmat are calculated correctly", { 47 | expect_equal( 48 | corrmat(testmatrix2, "chi2", chi2limit = 0.2)[5, 3], 49 | 1 50 | ) 51 | expect_equal( 52 | round(corrmat(testmatrix2, "phi", chi2limit = 0.2)[5, 3], 3), 53 | 0.625 54 | ) 55 | expect_equal( 56 | round(corrmat(testmatrix2, "cc", chi2limit = 0.2)[5, 3], 3), 57 | 0.53 58 | ) 59 | expect_equal( 60 | round(corrmat(testmatrix2, "lambda", chi2limit = 0.2)[5, 3], 3), 61 | 1 62 | ) 63 | } 64 | ) 65 | 66 | test_that( 67 | "the removal of negative relations in corrmat works", { 68 | expect_equal( 69 | corrmat(testmatrix2, "chi2", chi2limit = 0.2)[1, 7], 70 | 1 71 | ) 72 | expect_equal( 73 | corrmat(testmatrix2, "chi2", chi2limit = 0.2, rmnegniv = 0.1)[1, 7], 74 | 0 75 | ) 76 | } 77 | ) 78 | -------------------------------------------------------------------------------- /man/corrplotmask.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_functions.R 3 | \name{corrplotmask} 4 | \alias{corrplotmask} 5 | \title{Mask function for corrplot::corrplot()} 6 | \usage{ 7 | corrplotmask(corrmatrix, xmatrix = "notact", mypath, 8 | voi = 1:ncol(corrmatrix), w = 3000, h = 3000) 9 | } 10 | \arguments{ 11 | \item{corrmatrix}{correlation matrix (as produced by varnastats::corrmat())} 12 | 13 | \item{xmatrix}{correlation matrix only with the values 0 and 1, where 1 serves as 14 | a marker (as produced by varnastats::corrmat(method="chi2")). Relations with the 15 | value 1 will be marked with an X in the corrplot. 16 | 17 | default = 0 (nothing is marked)} 18 | 19 | \item{mypath}{file.path where the plot file should be stored 20 | 21 | mypath <- file.path("~/path/to/my/directory/",paste("myfilename", ".png", sep = ""))} 22 | 23 | \item{voi}{vector of indezes of variables/objects that should be shown on the x-axis. 24 | 25 | default = 1:length(corrmatrix[1,]) (every variable/object is shown)} 26 | 27 | \item{w}{width of the resulting graphic file in px 28 | 29 | default = 3000} 30 | 31 | \item{h}{height of the resulting graphic file in px 32 | 33 | default = 3000} 34 | } 35 | \value{ 36 | graphic file of a correlation matrix plot 37 | } 38 | \description{ 39 | \code{corrplotmask} is an input mask for the \code{corrplot()} function of the 40 | package corrplot. \code{corrplot()} visualizes correlation matrizes. See 41 | \code{?corrplot()} for further info. 42 | \code{corrplotmask} allows to get a nice, basic corrplot graphic as a png-file 43 | that is directly saved to the file system. It doesn't allow to define specific 44 | plot settings. 45 | } 46 | \examples{ 47 | testmatrixrand <- data.frame( 48 | matrix(base::sample(0:1,400,replace=TRUE), nrow=20, ncol=20) 49 | ) 50 | 51 | testcorr <- corrmat(testmatrixrand, "lambda", chi2limit = 0.1, dim = 1) 52 | xtestcorr <- corrmat(testmatrixrand, "chi2", chi2limit = 0.1, dim = 1) 53 | 54 | testpath <- file.path(".",paste("testfile", ".png", sep = "")) 55 | 56 | corrplotmask( 57 | corrmatrix = testcorr, 58 | xmatrix = xtestcorr, 59 | mypath = testpath, 60 | voi = 1:10, 61 | w = 500, 62 | h = 1000 63 | ) 64 | 65 | } 66 | -------------------------------------------------------------------------------- /data-raw/bs1.csv: -------------------------------------------------------------------------------- 1 | ;sex_male;sex_female;pos_crouched;pos_extended;orient_N_S;orient_W_E;axe_1;axe_2;adze_1;adze_2;pottery_1;pottery_2;pottery_3;pottery_4;goldring;goldbead;fibula_1;fibula_2 2 | 1;1;0;0;1;1;0;1;3;9;5;0;0;0;0;0;10;0;0 3 | 2;1;0;0;1;1;0;0;0;0;0;8;1;0;0;0;30;0;0 4 | 3;1;0;0;0;0;0;3;0;3;0;0;0;0;0;0;0;0;0 5 | 4;1;0;0;1;1;0;5;7;4;0;8;0;0;0;0;2;0;0 6 | 5;1;0;0;0;1;0;0;10;0;5;0;2;0;0;0;9;0;3 7 | 6;1;0;0;1;0;0;0;5;4;0;5;0;0;0;0;0;0;0 8 | 7;1;0;0;0;1;0;7;0;0;0;0;0;0;0;0;0;0;0 9 | 8;1;0;0;0;1;0;0;0;3;0;5;0;0;0;0;0;0;0 10 | 9;1;0;0;1;1;0;0;1;0;0;0;3;0;0;0;5;0;0 11 | 10;1;0;0;0;0;0;0;0;9;4;7;0;0;0;0;3;0;0 12 | 11;1;0;0;1;1;0;9;0;7;0;0;1;0;0;0;8;0;0 13 | 12;1;0;0;0;1;0;0;3;3;0;7;0;0;0;0;0;0;0 14 | 13;1;0;0;1;0;0;0;6;0;0;0;2;0;0;0;2;0;0 15 | 14;1;0;1;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0 16 | 15;1;0;0;1;1;0;3;0;8;0;0;0;0;0;0;0;0;0 17 | 16;1;0;0;0;1;0;0;4;7;2;5;7;0;0;0;3;0;0 18 | 17;1;0;0;1;1;0;0;4;2;0;0;9;0;0;0;4;0;0 19 | 18;1;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0 20 | 19;1;0;0;1;1;0;7;0;6;0;0;0;0;0;0;7;0;0 21 | 20;1;0;0;1;1;0;0;0;10;4;9;0;0;0;0;3;0;0 22 | 21;0;0;0;0;1;0;7;4;4;0;0;4;0;0;0;0;0;0 23 | 22;0;0;1;0;0;1;0;0;0;0;4;2;0;0;1;5;3;0 24 | 23;0;0;1;0;1;0;7;0;9;0;0;0;0;0;0;3;0;0 25 | 24;0;0;0;0;0;1;0;7;0;0;0;0;0;0;10;0;0;0 26 | 25;0;0;0;1;1;0;6;1;0;5;0;10;0;0;0;5;0;0 27 | 26;0;0;1;0;0;1;0;0;0;5;6;0;0;0;0;5;0;0 28 | 27;0;0;0;1;1;0;4;0;5;0;0;7;0;0;0;8;4;8 29 | 28;0;0;1;0;0;1;0;0;0;0;7;0;0;0;8;0;0;0 30 | 29;0;0;0;1;1;0;0;1;5;0;0;7;0;0;0;2;0;0 31 | 30;0;0;1;0;1;0;0;0;0;7;0;0;0;0;0;3;0;0 32 | 31;0;1;0;0;0;1;0;0;0;8;0;4;0;0;0;0;0;0 33 | 32;0;1;1;0;0;1;0;0;0;0;5;7;4;0;0;5;0;2 34 | 33;0;1;1;0;0;1;0;0;0;0;0;0;6;0;10;9;10;1 35 | 34;0;1;1;0;0;0;0;0;0;0;2;0;5;0;0;0;0;2 36 | 35;0;1;1;0;0;1;0;0;0;0;0;8;0;0;0;3;8;0 37 | 36;0;1;0;0;0;1;0;0;2;0;0;0;4;0;0;9;0;3 38 | 37;0;1;1;0;0;1;0;0;0;0;0;0;6;0;0;0;7;3 39 | 38;0;1;0;0;0;1;0;0;0;0;8;0;0;1;2;0;2;0 40 | 39;0;1;1;0;0;0;0;0;0;0;0;0;5;0;2;8;3;1 41 | 40;0;1;1;0;0;1;0;0;0;1;0;7;9;0;0;0;0;2 42 | 41;0;1;0;0;0;1;0;2;0;0;0;0;0;3;0;70;7;0 43 | 42;0;1;1;0;0;0;0;0;0;0;8;0;2;0;0;0;0;1 44 | 43;0;1;1;0;0;1;0;0;0;0;0;6;8;0;7;0;6;1 45 | 44;0;1;0;0;0;0;0;0;0;0;7;0;0;3;0;60;0;2 46 | 45;0;1;1;0;0;1;1;0;0;0;0;0;5;0;4;3;6;0 47 | 46;0;1;0;0;0;1;0;0;0;3;4;5;0;0;10;0;0;2 48 | 47;0;1;1;0;0;1;1;0;0;0;0;1;2;0;0;3;1;5 49 | 48;0;1;0;0;0;1;0;0;0;0;6;0;4;1;0;0;0;0 50 | 49;0;1;1;0;0;0;0;0;0;0;0;9;0;2;5;0;2;0 51 | 50;0;1;0;0;0;1;0;0;0;3;0;5;1;0;0;50;3;2 52 | -------------------------------------------------------------------------------- /man/corrmat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_munging_functions.R 3 | \name{corrmat} 4 | \alias{corrmat} 5 | \title{Create a correlation matrix of an input data.frame} 6 | \usage{ 7 | corrmat(matrix, method = "chi2", dim = 1, chi2limit = 0.05, 8 | rmnegniv = 0) 9 | } 10 | \arguments{ 11 | \item{matrix}{data.frame with numeric values} 12 | 13 | \item{method}{switch to define which contingency value should be used: 14 | 15 | "chi2" (default): test decision of the chi-square test for a defined decision niveau. 16 | A significant relation of two variables/objects is marked with a numeric "1", a 17 | negativ test result with a numeric "0". 18 | 19 | "phi": Pearson's phi coefficient ("mean square contingency coefficient"). 20 | 21 | "cc": Pearson's contingency coefficient. 22 | 23 | "lambda": Goodman and Kruskal's lambda value. Mean of both values calculated 24 | depending on what's the dependent and what's the independent variable} 25 | 26 | \item{dim}{switch to define if the correlation matrix should be created 27 | for columns or rows. 28 | 29 | 1 (default): table is created for column (variables) relations. 30 | 31 | 2: table is created for row (objects) relations.} 32 | 33 | \item{chi2limit}{significance level for the test decision. Just relevant for method 34 | "chi2". The higher chi2limit the less results will get removed. 35 | 36 | default: 0.05 -> 5\%} 37 | 38 | \item{rmnegniv}{option allows to remove "negativ relations". If >0 \code{rmnegcorr} gets 39 | called and applied. A usual value for rmnegniv is 0.1.} 40 | } 41 | \value{ 42 | correlation matrix 43 | } 44 | \description{ 45 | \code{corrmat} returns a correlation matrix of a data.frame. Several different 46 | correlation methods can be choosen and the matrix can be created for column or row 47 | relations. 48 | } 49 | \details{ 50 | The rmnegniv option allows to remove "negativ relations", by activating 51 | the rmnegcorr function for values >0. The smaller rmnegniv the weaker the overlap 52 | of two variables/objects can be, to still be recognised as the cause of a positiv 53 | relation. 54 | 55 | See \code{?rmnegcorr} for further info. This function can also be applied later. 56 | } 57 | \examples{ 58 | testmatrixrand <- data.frame( 59 | matrix(base::sample(0:1,400,replace=TRUE), nrow=20, ncol=20) 60 | ) 61 | 62 | corrmat(testmatrixrand, "chi2", chi2limit = 0.03) 63 | 64 | corrmat(matrix = testmatrixrand, method = "lambda", dim = 2) 65 | 66 | phicorrtable <- corrmat( 67 | matrix = testmatrixrand, 68 | method = "phi", 69 | dim = 1 70 | ) 71 | 72 | # Without negative relations: 73 | phicorrtablewnr <- corrmat( 74 | matrix = testmatrixrand, 75 | method = "phi", 76 | dim = 1, 77 | rmnegniv = 0.1 78 | ) 79 | 80 | } 81 | -------------------------------------------------------------------------------- /R/linkage.R: -------------------------------------------------------------------------------- 1 | #' Calculate linkage values of variables and objects of a numeric data.frame 2 | #' 3 | #' \code{linkage()} calculates a linkage value for every variable and every object 4 | #' of an input data.frame and returns a table with this information. 5 | #' This linkage value allows predictions about whether a variable/object can be used 6 | #' for multivariate analysis. If a variable/object is not well linked to the other entities, 7 | #' it will often appear as an outlier. 8 | #' 9 | #' @details 10 | #' Structure of the resulting table: 11 | #' 12 | #' column 1: linkage value (see code to understand how it's calculated) 13 | #' 14 | #' column 2: logarithm of linkage value (useful for interpretation) 15 | #' 16 | #' column 3: type (variable or object) 17 | #' 18 | #' @param matrix data.frame with numeric values 19 | #' @return table with linkage values 20 | #' 21 | #' @examples 22 | #' testmatrixrand <- data.frame( 23 | #' matrix(base::sample(0:1,400,replace=TRUE), nrow=20, ncol=20) 24 | #' ) 25 | #' 26 | #' linkage(testmatrixrand) 27 | #' 28 | #' link <- subset(linkage(testmatrixrand), type == "obj") 29 | #' barplot(link$linkage, names.arg = rownames(link)) 30 | #' 31 | #' @export 32 | #' 33 | 34 | linkage <- function(matrix) { 35 | 36 | ## calculate linkage value for objects 37 | 38 | # create table to store linkage value for objects 39 | weigthtab.obj <- matrix[1:3] 40 | colnames(weigthtab.obj) <- c("linkage", "loglinkage", "type") 41 | 42 | ## loop to check every object 43 | for (i in 1:nrow(matrix)) { 44 | 45 | # calculate sum of the variables present in the current object 46 | sumvar <- 0 47 | for (n in 1:ncol(matrix)) { 48 | if (matrix[i, n] != 0) { 49 | sumvar <- sumvar + length(which(matrix[, n] != "0")) 50 | } 51 | 52 | } 53 | 54 | # calculate linkage value: sumvar * amount of different variables 55 | # incorporated by the object 56 | weigthtab.obj[i, 1] <- sumvar * length(which(matrix[i, ] != "0")) 57 | 58 | # calculate logarithm of linkage value 59 | if (weigthtab.obj[i, 1] != 0) { 60 | weigthtab.obj[i, 2] <- log(weigthtab.obj[i, 1]) 61 | } else { 62 | weigthtab.obj[i, 2] <- 0 63 | } 64 | 65 | } 66 | 67 | 68 | ## calculate linkage value for variables 69 | 70 | # transpose input matrix 71 | t.matrix <- t(matrix) 72 | 73 | # create table to store linkage value for variables 74 | weigthtab.var <- t.matrix[, 1:3] 75 | colnames(weigthtab.var) <- c("linkage", "loglinkage", "type") 76 | 77 | ## loop to check every variable 78 | for (i in 1:nrow(t.matrix)) { 79 | 80 | # calculate sum of the objects that incorporate the current variable 81 | sumobj <- 0 82 | for (n in 1:ncol(t.matrix)) { 83 | if (t.matrix[i, n] != 0) { 84 | sumobj <- sumobj + length(which(t.matrix[, n] != "0")) 85 | } 86 | 87 | } 88 | 89 | # calculate linkage value: sumvar * amount of different objects 90 | # that incorporate the variable 91 | weigthtab.var[i, 1] <- sumobj * length(which(t.matrix[i, ] != "0")) 92 | 93 | # calculate logarithm of linkage value 94 | if (weigthtab.var[i, 1] != 0) { 95 | weigthtab.var[i, 2] <- log(weigthtab.var[i, 1]) 96 | } else { 97 | weigthtab.var[i, 2] <- 0 98 | } 99 | 100 | } 101 | 102 | weigthtab.var <- data.frame(weigthtab.var) 103 | 104 | 105 | # fill column type to mark objects and variables 106 | weigthtab.obj$type <- rep("obj", 1, nrow(weigthtab.obj)) 107 | weigthtab.var$type <- rep("var", 1, nrow(weigthtab.var)) 108 | 109 | # combine object- and variable tables 110 | weigthtab <- rbind(weigthtab.obj, weigthtab.var) 111 | 112 | return(weigthtab) 113 | 114 | } 115 | -------------------------------------------------------------------------------- /R/predictvo.R: -------------------------------------------------------------------------------- 1 | #' Predict the relation of objects to a list of variables of interest 2 | #' 3 | #' By comparison of significant correlations within variables and the variables the objects 4 | #' incorporate \code{predictvo()} makes an prediction about the relation to given variables 5 | #' for every object. 6 | #' 7 | #' @param matrix data.frame with numeric values 8 | #' @param reltable table of correlation values (e.g. produced by \code{reltable()}) 9 | #' @param mvars vector of variables of interest (full name) 10 | #' @param level switch to define if the prediction should be based on level 1 11 | #' or on level 1 + level 2 variables. A level 1 variable is directly linked to a variable 12 | #' of interest, a level 2 variable is linked to the level 1 variables of said variable of 13 | #' interest 14 | #' 15 | #' 1: prediction is based only on level 1 variables. 16 | #' 17 | #' 2 (default): prediction is based on level 1 + level 2 variables. 18 | #' 19 | #' @return table with predicted, normalized relation values of every object and given 20 | #' variables of interest. If no variable of interest has correlations to other variables, 21 | #' \code{predictvo()} returns FALSE. 22 | #' 23 | #' @examples 24 | #' testmatrixrand <- data.frame( 25 | #' matrix(base::sample(0:1,400,replace=TRUE), nrow=20, ncol=20) 26 | #' ) 27 | #' 28 | #' testcorr <- corrmat(testmatrixrand, "chi2", chi2limit = 0.1, dim = 1) 29 | #' 30 | #' rel <- reltable(testcorr) 31 | #' 32 | #' predictvo(testmatrixrand, rel, c("X2", "X3")) 33 | #' 34 | #' @export 35 | #' 36 | 37 | predictvo <- function (matrix, reltable, mvars, level = 2) { 38 | 39 | namevar1 <- NULL; 40 | namevar2 <- NULL; 41 | 42 | # check if reltable has the variables namevar1 and namevar2 43 | if (c("namevar1", "namevar2") %in% colnames(reltable) %>% all %>% `!`) { 44 | stop("reltable doesn't have the variables namevar1 and namevar2.") 45 | } 46 | 47 | # loop: check relations of every variable of interest 48 | for (pointer in 1:length(mvars)){ 49 | 50 | mvar <- mvars[pointer] 51 | 52 | # find variables, that are linked to the current variable of interest 53 | redtovar <- dplyr::filter( 54 | reltable, 55 | namevar1 == mvar | namevar2 == mvar 56 | ) 57 | 58 | # if no variables are linked to the current variable of interest, the loop 59 | # continues with the next variable 60 | if (nrow(redtovar) == 0) { 61 | next() 62 | } 63 | 64 | # extract partner variables of the variable of interest (1. Level) 65 | withoutmvar <- c( 66 | redtovar[redtovar$namevar1 != mvar, ]$namevar1, 67 | redtovar[redtovar$namevar2 != mvar, ]$namevar2 68 | ) 69 | 70 | # extract partner variables of partner variables of interest (2. Level) 71 | mvarnet <- dplyr::filter( 72 | reltable, 73 | namevar1 == withoutmvar[1] | 74 | namevar2 == withoutmvar[1] 75 | ) 76 | for (i in 2:length(withoutmvar)) { 77 | mvarnet <- rbind( 78 | mvarnet, 79 | dplyr::filter ( 80 | reltable, 81 | namevar1 == withoutmvar[i] | 82 | namevar2 == withoutmvar[i] 83 | ) 84 | ) 85 | } 86 | 87 | # create vector of partner variables 88 | # (2. Level or 1. Level + Variable of Interest) 89 | if (level == 1) { 90 | mvarvec <- c(withoutmvar, mvar) 91 | } else if (level == 2) { 92 | mvarvec <- c(mvarnet$namevar1, mvarnet$namevar2, withoutmvar, mvar) 93 | } 94 | 95 | # remove multiple values to get a simple list of partner variables 96 | mvarvec <- unique(mvarvec) 97 | 98 | mvarrel <- c() 99 | # loop: check relation of the variable of interest with every object 100 | for (i in 1:length(matrix[, 1])){ 101 | # determine variables present in current object 102 | cur <- colnames(matrix)[as.logical(matrix[i, ])] 103 | # compare variables present in current object with the list of 2. Level 104 | # partner variables. Count overlap 105 | mvarrel[i] <- length(mvarvec[mvarvec %in% cur]) 106 | } 107 | 108 | # normalize overlap vector 109 | for (i in 1:length(mvarrel)){ 110 | mvarrel[i] <- mvarrel[i] / max(mvarrel) 111 | } 112 | 113 | # write overlap vector into a data.frame to collect the information for 114 | # every variable of information in one table 115 | if (!(exists("relvaluetable"))) { 116 | relvaluetable <- data.frame(mvarrel, matrix[, mvar]) 117 | } else { 118 | relvaluetable <- data.frame(relvaluetable, mvarrel, matrix[, mvar]) 119 | } 120 | 121 | } 122 | # If no variable of interest has correlations to other variables the function 123 | # returns FALSE. 124 | if (!(exists("relvaluetable"))) { 125 | return(FALSE) 126 | } 127 | 128 | # adjust colnames of resulting data.frame 129 | even <- seq(2, length(relvaluetable), 2) 130 | odd <- seq(1, length(relvaluetable), 2) 131 | suppressWarnings( 132 | colnames(relvaluetable)[odd] <- paste(mvars, "PREDICTION") 133 | ) 134 | suppressWarnings( 135 | colnames(relvaluetable)[even] <- paste(mvars, "ACTUAL") 136 | ) 137 | 138 | relvaluetable 139 | 140 | } 141 | -------------------------------------------------------------------------------- /vignettes/varnastats-vignette-1.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Variable contingency and affiliation prediction" 3 | author: "Clemens Schmid" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Variable contingency and affiliation prediction} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | ```{r, echo=FALSE, message=FALSE} 13 | devtools::load_all() 14 | library(reshape2) 15 | library(ggplot2) 16 | library(corrplot) 17 | library(dplyr) 18 | library(igraph) 19 | library(ca) 20 | ``` 21 | 22 | This Vignette shows a workflow for archaeologists workig on burial sites. Starting point is a data.frame with burials (objects) and their classified attributes (grave goods, orientation, size, sex etc.). An example dataset with 50 fictional burials and 18 stereotypical attributes is provided in `data/bs1`. Here's an extract of bs1: 23 | 24 | ```{r, echo=FALSE} 25 | bs1[1:10,9:16] 26 | ``` 27 | 28 | For bivariate analysis it's useful to use categorized data. In the context of burialsites the simple information wether a grave good is present or absent in a certain grave is often already sufficient. To reduce our dataset to this information the function `booleanize()` can be applied. Empty graves and attributes that never appeare can be removed with `itremove(x,1)`. 29 | 30 | ```{r, echo=FALSE, echo=TRUE} 31 | bs <- quantAAR::booleanize(bs1) 32 | bs <- quantAAR::itremove(bs,1) 33 | bs[1:10,9:16] 34 | ``` 35 | 36 | Let's look at the appearences of the individual grave goods with `presencecount()`. While most goods are frequent, *pottery_4* is quite rare - too rare to be meaningfully analyzed with chi square statistics. 37 | 38 | ```{r, echo=TRUE, message=FALSE, fig.width=7.5, fig.height=5} 39 | presencematerial <- varnastats::presencecount(bs[,7:18], dim = 1) 40 | 41 | presence.m <- reshape2::melt(presencematerial) 42 | ggplot(presence.m, 43 | aes(x = variable, 44 | y = value)) + 45 | geom_bar(stat = "identity") 46 | ``` 47 | 48 | With `itremove()` we can delete every grave good that appeares less than ten times. As a consequence *pottery_4* is removed. 49 | 50 | ```{r, echo=TRUE, message=FALSE, fig.width=7.5, fig.height=5} 51 | bsred <- bs[,7:18] 52 | bsred <- quantAAR::itremove(bsred, cmin = 10, rmin = 0) 53 | 54 | presencematerial <- varnastats::presencecount(bsred, dim = 1) 55 | presence.m <- reshape2::melt(presencematerial) 56 | ggplot(presence.m, 57 | aes(x = variable, 58 | y = value)) + 59 | geom_bar(stat = "identity") 60 | ``` 61 | 62 | Now we can calculate the bivariate relations of all attributes with the function `corrmat()`. `corrmat()` offers different correlation values. In this case we use the testdecision of the chi square test on a significance level of 2% and the phi coefficient. `rmnegcorr()` is used to remove significant but negative relationships. 63 | 64 | ```{r, echo=TRUE, message=FALSE, warning=FALSE, fig.width=7.5, fig.height=7.5} 65 | bsprep <- data.frame(bs[,1:6], bsred) 66 | corrtablechi2test <- varnastats::corrmat(bsprep, method = "chi2", dim = 1, chi2limit = 0.02) 67 | corrtablephi <- varnastats::corrmat(bsprep, method = "phi", dim = 1) 68 | mastercorr <- varnastats::rmnegcorr(corrtablephi, bsprep, niv = 0.1, dim = 1) 69 | 70 | col2 <- grDevices::colorRampPalette(c("white","white", "chartreuse4")) 71 | corrplot::corrplot( 72 | t(mastercorr), 73 | method = c("color") 74 | ) 75 | ``` 76 | 77 | `reltable()` creates a list of the significant relationships within the correlation table. We choose the phi coefficient to be the first correlation value and the chi square test decision to be the second. 78 | 79 | ```{r, echo=TRUE, message=FALSE, fig.width=7.5, fig.height=5, warning=FALSE} 80 | signicorr <- varnastats::reltable(mastercorr, corrtablechi2test) 81 | signicorr <- dplyr::filter(signicorr, corrvalue2 == TRUE) 82 | 83 | signicorr 84 | ``` 85 | 86 | The network of bivariate relationships can be plotted as a graph with the igraph-package. The male and the female clusters are clearly distinguishable. 87 | 88 | ```{r, echo=TRUE, message=FALSE, fig.width=7.5, fig.height=5, warning=FALSE} 89 | signicorrmod <- data.frame( 90 | from = signicorr$namevar1, 91 | to = signicorr$namevar2, 92 | weight = signicorr$corrvalue) 93 | 94 | graphbasis <- igraph::graph.data.frame(signicorrmod, directed = TRUE) 95 | igraph::plot.igraph(graphbasis) 96 | ``` 97 | 98 | Within *signicorr* it's easy to search for the directly to an individual attribute linked attributes (level 1 relations). 99 | 100 | ```{r, echo=TRUE, message=FALSE, fig.width=7.5, fig.height=7.5, warning=FALSE} 101 | mvar <- c("sex_male", "sex_female") 102 | 103 | mvar1male <- dplyr::filter( 104 | signicorr, 105 | namevar1 == mvar[1] | 106 | namevar2 == mvar[1] 107 | ) 108 | 109 | mvar1male 110 | ``` 111 | 112 | ```{r, echo=TRUE, message=FALSE, fig.width=7.5, fig.height=7.5, warning=FALSE} 113 | mvar1female <- dplyr::filter( 114 | signicorr, 115 | namevar1 == mvar[2] | 116 | namevar2 == mvar[2] 117 | ) 118 | 119 | 120 | mvar1female 121 | ``` 122 | 123 | `predictvo()` uses the initial data.frame and *signicorr* to make a prediction about whether an object could contain a variable or not based on cross-references. Useful to determine for example the sex of buried individuals based on their grave goods when no anthropological determination is availbable. 124 | 125 | ```{r, echo=TRUE, message=FALSE, fig.width=7.5, fig.height=5, warning=FALSE} 126 | predictgen <- varnastats::predictvo(bsprep, signicorr, mvar, level = 1) 127 | 128 | sexprediction <- data.frame( 129 | m = predictgen[,1], 130 | w = predictgen[,3], 131 | statsex = NA, 132 | mtat = bsprep$sex_male, 133 | wtat = bsprep$sex_female, 134 | names = make.names(rownames(bsprep)) 135 | ) 136 | 137 | for (i in 1:length(sexprediction[,1])){ 138 | if (sexprediction$m[i] >= 1.5*sexprediction$w[i]){ 139 | sexprediction$statsex[i] <- "m" 140 | } else if (sexprediction$w[i] >= 1.5*sexprediction$m[i]) { 141 | sexprediction$statsex[i] <- "w" 142 | } else { 143 | sexprediction$statsex[i] <- "uncertain" 144 | } 145 | } 146 | 147 | sexprediction 148 | ``` -------------------------------------------------------------------------------- /R/plot_functions.R: -------------------------------------------------------------------------------- 1 | # Begin CA Plot Functions --------------------------- 2 | 3 | #' Plot a selection of plots to get a first impression of the results of a CA 4 | #' 5 | #' \code{caplot()} delivers three pages of plots based on \code{ca::plot.ca()}. 6 | #' Useful to get a first impression of the results to decide, whether a more elaborated 7 | #' plot should be created and whether other axis besides dim1 and dim2 contain interesting 8 | #' information. 9 | #' 10 | #' @param cadoc object of class \code{ca}. Produced e.g. \code{by ca::ca()} 11 | #' 12 | #' @examples 13 | #' testmatrixrand <- data.frame( 14 | #' matrix(base::sample(0:1,400,replace=TRUE), nrow=20, ncol=20) 15 | #' ) 16 | #' rownames(testmatrixrand) <- paste("row", seq(1:nrow(testmatrixrand))) 17 | #' 18 | #' library(ca) 19 | #' 20 | #' caplot(ca(testmatrixrand)) 21 | #' 22 | #' @export 23 | #' 24 | 25 | caplot <- function (cadoc) { 26 | 27 | # save default graphics values 28 | .graphicdefault <- graphics::par(no.readonly = T) 29 | 30 | 31 | ## PAGE 1 32 | 33 | m <- graphics::layout ( 34 | matrix(c(1, 2, 3, 4, 4, 4), 3, 2), 35 | widths = c(1.5, 3), 36 | heights = c(1, 1, 1) 37 | ) 38 | 39 | # inertia barplot 40 | summary(cadoc)[[1]][, 2] -> inert 41 | inert <- inert[1:5] 42 | 43 | graphics::barplot( 44 | inert, 45 | names.arg = paste("", 1:length(inert)), 46 | col = 8, 47 | ylim = c(0, round(1.5 * max(inert), 1)), 48 | space = 0, 49 | las = 1 50 | ) 51 | graphics::text( 52 | (1:length(inert)) - .5, inert, round(inert, 3), 53 | pos = 3 54 | ) 55 | graphics::title(main = "Inertia value of dimensions 1-5", font = 2) 56 | 57 | # ca plots 58 | graphics::plot( 59 | cadoc, 60 | dim = c(1, 2), 61 | labels = c(0, 0), 62 | map = "rowprincipal", 63 | mass = c(TRUE, FALSE) 64 | ) 65 | graphics::title(main = "Mass", font = 2) 66 | 67 | graphics::plot( 68 | cadoc, 69 | dim = c(1, 2), 70 | labels = c(0, 0), 71 | map = "rowprincipal", 72 | contrib = "relative" 73 | ) 74 | graphics::title(main = "Quality", font = 2) 75 | 76 | graphics::plot( 77 | cadoc, 78 | dim = c(1, 2), 79 | labels = c(2, 0), 80 | map = "rowprincipal" 81 | ) 82 | graphics::title(main = "CA - X: dim1, Y: dim2", font = 2) 83 | 84 | 85 | ## Page 2 86 | 87 | m <- graphics::layout ( 88 | matrix(c(1, 1, 2, 3), 2, 2), 89 | widths = c(1, 1), 90 | heights = c(1, 1) 91 | ) 92 | 93 | # ca plots 94 | graphics::plot( 95 | cadoc, 96 | dim = c(1, 2), 97 | labels = c(2, 0), 98 | map = "rowprincipal", 99 | xlim = c(-2, 2), 100 | ylim = c(-2, 2) 101 | ) 102 | graphics::title(main = "X: dim1, Y: dim2 Zoom", font = 2) 103 | 104 | graphics::plot( 105 | cadoc, 106 | dim = c(1, 3), 107 | labels = c(2, 0), 108 | map = "rowprincipal" 109 | ) 110 | graphics::title(main = "X: dim1, Y: dim3", font = 2) 111 | 112 | graphics::plot( 113 | cadoc, 114 | dim = c(2, 3), 115 | labels = c(2, 0), 116 | map = "rowprincipal") 117 | graphics::title(main = "X: dim2, Y: dim3", font = 2) 118 | 119 | 120 | ## PAGE 3 121 | 122 | m <- graphics::layout ( 123 | matrix(c(1), 2, 2), 124 | widths = c(1, 1), 125 | heights = c(1, 1) 126 | ) 127 | 128 | # ca plot 129 | graphics::plot( 130 | cadoc, 131 | dim = c(1, 2), 132 | labels = c(0, 2), 133 | map = "rowprincipal" 134 | ) 135 | graphics::title(main = "X: dim1, Y: dim2", font = 2) 136 | 137 | # set graphic values back to default 138 | graphics::par(.graphicdefault) 139 | } 140 | 141 | # End CA Plot Functions --------------------------- 142 | 143 | # Begin corrplot Plot Functions --------------------------- 144 | 145 | #' Mask function for corrplot::corrplot() 146 | #' 147 | #' \code{corrplotmask} is an input mask for the \code{corrplot()} function of the 148 | #' package corrplot. \code{corrplot()} visualizes correlation matrizes. See 149 | #' \code{?corrplot()} for further info. 150 | #' \code{corrplotmask} allows to get a nice, basic corrplot graphic as a png-file 151 | #' that is directly saved to the file system. It doesn't allow to define specific 152 | #' plot settings. 153 | #' 154 | #' @param corrmatrix correlation matrix (as produced by varnastats::corrmat()) 155 | #' @param xmatrix correlation matrix only with the values 0 and 1, where 1 serves as 156 | #' a marker (as produced by varnastats::corrmat(method="chi2")). Relations with the 157 | #' value 1 will be marked with an X in the corrplot. 158 | #' 159 | #' default = 0 (nothing is marked) 160 | #' 161 | #' @param mypath file.path where the plot file should be stored 162 | #' 163 | #' mypath <- file.path("~/path/to/my/directory/",paste("myfilename", ".png", sep = "")) 164 | #' 165 | #' @param voi vector of indezes of variables/objects that should be shown on the x-axis. 166 | #' 167 | #' default = 1:length(corrmatrix[1,]) (every variable/object is shown) 168 | #' 169 | #' @param w width of the resulting graphic file in px 170 | #' 171 | #' default = 3000 172 | #' 173 | #' @param h height of the resulting graphic file in px 174 | #' 175 | #' default = 3000 176 | #' 177 | #' @return graphic file of a correlation matrix plot 178 | #' 179 | #'@examples 180 | #' testmatrixrand <- data.frame( 181 | #' matrix(base::sample(0:1,400,replace=TRUE), nrow=20, ncol=20) 182 | #' ) 183 | #' 184 | #' testcorr <- corrmat(testmatrixrand, "lambda", chi2limit = 0.1, dim = 1) 185 | #' xtestcorr <- corrmat(testmatrixrand, "chi2", chi2limit = 0.1, dim = 1) 186 | #' 187 | #' testpath <- file.path(".",paste("testfile", ".png", sep = "")) 188 | #' 189 | #' corrplotmask( 190 | #' corrmatrix = testcorr, 191 | #' xmatrix = xtestcorr, 192 | #' mypath = testpath, 193 | #' voi = 1:10, 194 | #' w = 500, 195 | #' h = 1000 196 | #' ) 197 | #' 198 | #' @export 199 | #' 200 | 201 | corrplotmask <- function(corrmatrix, 202 | xmatrix = "notact", 203 | mypath, 204 | voi = 1:ncol(corrmatrix), 205 | w = 3000, 206 | h = 3000) { 207 | 208 | # define color palette (with a nice green) 209 | col2 <- grDevices::colorRampPalette(c("white", "white", "chartreuse4")) 210 | 211 | # plotting in file 212 | grDevices::png(file = mypath, width = w, height = h) 213 | 214 | # decision: with or without crossmarking of certain values. 215 | if (is.character(xmatrix)) { 216 | corrplot::corrplot( 217 | corrmatrix[, voi], 218 | method = "color", 219 | cl.lim = c(0, 1), 220 | tl.col = "black", 221 | tl.cex = 2, 222 | col = col2(50) 223 | ) 224 | } else { 225 | corrplot::corrplot( 226 | as.matrix(corrmatrix[, voi]), 227 | p.mat = as.matrix(xmatrix[, voi]), 228 | method = "color", 229 | cl.lim = c(0, 1), 230 | tl.col = "black", 231 | tl.cex = 2, 232 | col = col2(50) 233 | ) 234 | } 235 | 236 | grDevices::dev.off() 237 | 238 | } 239 | 240 | 241 | # End corrplot Plot Functions --------------------------- 242 | 243 | # Begin igraph Plot Functions --------------------------- 244 | 245 | #' Mask function for igraph::plot.igraph() to visualize a relation table as a graph 246 | #' 247 | #' \code{igraphmask} is an input mask for the \code{plot.igraph()} function of the 248 | #' package igraph. \code{plot.igraph()} creates graph plots based on bivariate relations. 249 | #' See \code{?igraph} for further info. 250 | #' \code{igraphmask} allows to get a nice, basic graph plot as a png-file 251 | #' that is directly saved to the file system. It doesn't allow to define specific 252 | #' plot settings. 253 | #' 254 | #' @details For graph layout the fruchterman.reingold.grid algorithm is used. 255 | #' 256 | #' @param reltable list of bivariate relations (as produced by varnastats::reltable()) 257 | #' @param mypath file.path where the plot file should be stored 258 | #' 259 | #' mypath <- file.path("~/path/to/my/directory/",paste("myfilename", ".png", sep = "")) 260 | #' 261 | #' @param w width of the resulting graphic file in px 262 | #' 263 | #' default = 3000 264 | #' 265 | #' @param h width of the resulting graphic file in px 266 | #' 267 | #' default = 3000 268 | #' 269 | #' @param colorvector string vector with colour values to mark certain vertices 270 | #' 271 | #' default = c("red") 272 | #' 273 | #' @return graphic file of a graph plot 274 | #' 275 | #'@examples 276 | #' testmatrixrand <- data.frame( 277 | #' matrix(base::sample(0:1,400,replace=TRUE), nrow=20, ncol=20) 278 | #' ) 279 | #' 280 | #' testcorr <- corrmat(testmatrixrand, "lambda", chi2limit = 0.1, dim = 1) 281 | #' testrel <- reltable(testcorr) 282 | #' 283 | #' testpath <- file.path(".",paste("testfile", ".png", sep = "")) 284 | #' 285 | #' igraphmask(testrel, testpath, w = 1000, h = 1000) 286 | #' 287 | #' @export 288 | #' 289 | 290 | igraphmask <- function (reltable, 291 | mypath, 292 | w = 3000, 293 | h = 3000, 294 | colorvector = c("red")) { 295 | 296 | # reduce reltable to the necessary columns 297 | reltablesimple <- data.frame(from = reltable$namevar1, 298 | to = reltable$namevar2, 299 | weight = reltable$corrvalue) 300 | 301 | # create graph-list (class of igraph) 302 | graphbasis <- igraph::graph.data.frame(reltablesimple, directed = TRUE) 303 | 304 | # modify color vector to frame white vertices 305 | colorvector2 <- colorvector 306 | colorvector2[grep("white", colorvector2)] <- "black" 307 | 308 | # plotting in file 309 | grDevices::png(file = mypath, width = w, height = h) 310 | 311 | igraph::plot.igraph( 312 | graphbasis, 313 | layout = igraph::layout.fruchterman.reingold.grid(graphbasis), 314 | # design vertices 315 | vertex.shape = "circle", 316 | vertex.size = 7, 317 | vertex.color = colorvector, 318 | vertex.frame.color = colorvector2, 319 | vertex.label.family = "sans", 320 | vertex.label.cex = 1.5, 321 | vertex.label.color = "black", 322 | # design edges 323 | edge.color = "darkgrey", 324 | edge.width = 3, 325 | edge.label = NA, 326 | edge.lty = 3, 327 | edge.arrow.mode = 0 328 | ) 329 | 330 | grDevices::dev.off() 331 | 332 | } 333 | 334 | # End igraph Plot Functions --------------------------- -------------------------------------------------------------------------------- /R/data_munging_functions.R: -------------------------------------------------------------------------------- 1 | # Begin General Data Munging Functions --------------------------- 2 | 3 | #' Count appearence of variables and objects in a data.frame with numeric values 4 | #' 5 | #' A row or a column of a data.frame with numeric values is considered empty, if all 6 | #' its values are 0. If objects or variables are present, their rows and columns contain 7 | #' values != 0. To count the appearence of objects or variables, \code{presencecount} counts 8 | #' the amount of values != 0 within the rows or columns and writes them into a data.frame. 9 | #' The result is sorted by the amount of appearences. 10 | #' 11 | #' @param matrix data.frame with numeric values 12 | #' @param dim switch to define if the appearences in columns (variables) or rows (objects) 13 | #' should be counted. 14 | #' 15 | #' 1: column (variables) appearences are counted 16 | #' 17 | #' 2: row (objects) appearences are counted 18 | #' 19 | #' @return sorted data.frame with amount of appearences for objacts or variables 20 | #' 21 | #' @examples 22 | #' testmatrix <- data.frame(c1 = c(0,3,8,2), c2 = c(0,6,7,8), c3 = c(0,0,0,0)) 23 | #' rownames(testmatrix) <- c("r1","r2","r3","r4") 24 | #' 25 | #' countvar <- presencecount(testmatrix, 1) 26 | #' countobj <- presencecount(testmatrix, 2) 27 | #' 28 | #' @export 29 | #' 30 | 31 | presencecount <- function(matrix, dim=1){ 32 | 33 | # count variables 34 | if (dim == 1) { 35 | # prepare data.frame to store the results 36 | widthdataset <- length(matrix) 37 | presencecount <- data.frame(matrix[1, ], row.names = "count") 38 | # loop to determine the amount of appearences 39 | for (i in 1:widthdataset) { 40 | presencecount[1, i] <- length(which(matrix[, i] != "0")) 41 | } 42 | # sort results by amount of appearences 43 | presencecount <- presencecount[, order(presencecount[1, ])] 44 | } 45 | 46 | # count objects 47 | if (dim == 2) { 48 | # transpose matrix (then the code of dim=1 applies again) 49 | matrix <- data.frame(t(matrix)) 50 | # prepare data.frame to store the results 51 | widthdataset <- length(matrix) 52 | presencecount <- data.frame(matrix[1, ], row.names = "count") 53 | # loop to determine the amount of appearences 54 | for (i in 1:widthdataset) { 55 | presencecount[1, i] <- length(which(matrix[, i] != "0")) 56 | } 57 | # sort results by amount of appearences 58 | presencecount <- presencecount[, order(presencecount[1, ])] 59 | } 60 | 61 | return(presencecount) 62 | } 63 | 64 | # End General Data Munging Functions --------------------------- 65 | 66 | # Begin Correlation Data Munging Functions --------------------------- 67 | 68 | #' Remove negative correlations from a correlation matrix 69 | #' 70 | #' \code{rmnegcorr} removes "negative correlations" from a correlation matrix. 71 | #' A negative correlation is defined as a relation between to variables/objects, 72 | #' that have a high correlation value due to NOT frequently appearing together. 73 | #' This function can be applied for correlation matrizes created for variable or 74 | #' object relations. 75 | #' 76 | #' @param corrmatrix correlation matrix (as produced by varnastats::corrmat()) 77 | #' @param matrix underlying data.frame 78 | #' @param dim switch to define if the correlation matrix should be created 79 | #' for columns or rows. 80 | #' 81 | #' 1: table is created for column (variables) relations. 82 | #' 83 | #' 2: table is created for row (objects) relations. 84 | #' 85 | #' @param niv decision niveau. The smaller niv the weaker the overlap of two 86 | #' variables/objects can be, to still be recognised as the cause for a positiv relation. 87 | #' 88 | #' default: 0.1 -> 10\% 89 | #' 90 | #' @return correlation matrix without negative relations 91 | #' 92 | #' @examples 93 | #' testmatrixrand <- data.frame( 94 | #' matrix(base::sample(0:1,400,replace=TRUE), nrow=20, ncol=20) 95 | #' ) 96 | #' 97 | #' testmatrixrand[,1] <- c(1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0) 98 | #' testmatrixrand[,2] <- c(0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1) 99 | #' 100 | #' testcorr <- corrmat(testmatrixrand, "chi2", chi2limit = 0.1, dim = 1) 101 | #' 102 | #' rmnegcorr(testcorr, testmatrixrand, dim = 1) 103 | #' 104 | #' @export 105 | #' 106 | 107 | rmnegcorr <- function (corrmatrix, matrix, dim, niv = 0.1) { 108 | 109 | # decision between application for or columns (variables) or rows (objects) 110 | if (dim == 2) { 111 | matrix <- t(matrix) 112 | } 113 | 114 | # loop to check every cell of the correlation matrix 115 | for (l in 1:nrow(corrmatrix)) { 116 | for (c in 1:ncol(corrmatrix)) { 117 | # calculation of a contingency table for the current corrmatrix cell 118 | corrtab <- table(matrix[, l], matrix[, c]) 119 | # set ratio value to 100% 120 | prop1 <- 1 121 | prop2 <- 1 122 | # calculate ratio values for current cell, if there's a least one positiv 123 | # overlap of occurences of the underlying variables/objects 124 | if (corrtab[1, 2] != 0) { 125 | prop1 <- corrtab[2, 2] / sum(corrtab[, 2]) 126 | } 127 | if (corrtab[2, 1] != 0) { 128 | prop2 <- corrtab[2, 2] / sum(corrtab[2, ]) 129 | } 130 | # set correlation value to 0, if one or both ratio values is smaller than 131 | # a defined niveau. 132 | if (prop1 < niv) { 133 | corrmatrix[l, c] <- 0 134 | } else if (prop2 < niv) { 135 | corrmatrix[l, c] <- 0 136 | } else if (prop1 < niv && prop2 < niv) { 137 | corrmatrix[l, c] <- 0 138 | } 139 | } 140 | } 141 | 142 | return(corrmatrix) 143 | } 144 | 145 | #' Convert a correlation matrix to a table of relations 146 | #' 147 | #' \code{reltable} creates a sorted table of the relations of a correlation matrix. 148 | #' Relations with a correlation value of zero and autocorrelations get removed. 149 | #' \code{reltable} can consume a lot of time for big datasets! 150 | #' 151 | #' @details 152 | #' Structure of the resulting table: 153 | #' 154 | #' column 1 + 2: indezes of the variables/objects in the correlation matrix 155 | #' 156 | #' column 3: correlation value 157 | #' 158 | #' column 4 + 5: names of the variables/objects 159 | #' 160 | #' column 6: optional second correlation value 161 | #' 162 | #' @param corrtable correlation matrix (as produced by varnastats::corrmat()) 163 | #' @param corrtable2 optional second corrmatrix correlation matrix (as produced by varnastats::corrmat()) 164 | #' @return table of relations and their correlation value 165 | #' 166 | #' @examples 167 | #' testmatrixrand <- data.frame( 168 | #' matrix(base::sample(0:1,400,replace=TRUE), nrow=20, ncol=20) 169 | #' ) 170 | #' 171 | #' testcorr <- corrmat(testmatrixrand, "lambda", dim = 1) 172 | #' testcorr2 <- corrmat(testmatrixrand, "chi2", chi2limit = 0.1, dim = 1) 173 | #' 174 | #' reltable(testcorr) 175 | #' reltable(testcorr, testcorr2) 176 | #' 177 | #' @export 178 | #' 179 | 180 | reltable <- function(corrtable, corrtable2 = data.frame()) { 181 | 182 | # avoiding note (no visible binding for global variable) 183 | indexvar1 <- NULL; 184 | indexvar2 <- NULL; 185 | 186 | # copy matrix to apply an increasingly fast search algorithm 187 | # for the matrix maximum 188 | destroycorr <- corrtable 189 | 190 | # Setup an empty data.frame as basis for the relation table 191 | if (nrow(corrtable2) == 0) { 192 | a <- matrix( 193 | NA, 194 | nrow = length(corrtable[corrtable != 0]), 195 | ncol = 5 196 | ) 197 | a <- data.frame(a) 198 | colnames(a) <- c( 199 | "indexvar1", 200 | "indexvar2", 201 | "corrvalue", 202 | "namevar1", 203 | "namevar2" 204 | ) 205 | } else { 206 | a <- matrix( 207 | NA, 208 | nrow = length(corrtable[corrtable != 0]), 209 | ncol = 6 210 | ) 211 | a <- data.frame(a) 212 | colnames(a) <- c( 213 | "indexvar1", 214 | "indexvar2", 215 | "corrvalue", 216 | "namevar1", 217 | "namevar2", 218 | "corrvalue2" 219 | ) 220 | } 221 | 222 | # loop to fill relationship table (in order of decreasing correlation values) 223 | for (i in 1:nrow(a)) { 224 | if (max(destroycorr) != 0) { 225 | # search for current max value (highest correlation / best relation) 226 | a[i, 1:2] <- which(destroycorr == max(destroycorr), arr.ind = TRUE)[1, ] 227 | a[i, 3] <- destroycorr[a[i, 1], a[i, 2]] 228 | a[i, 4] <- colnames(destroycorr)[a[i, 1]] 229 | a[i, 5] <- colnames(destroycorr)[a[i, 2]] 230 | if (nrow(corrtable2) != 0) { 231 | a[i, 6] <- corrtable2[a[i, 1], a[i, 2]] 232 | } 233 | # set current relation to 0, 234 | # to find the next best relation in the next loop run 235 | destroycorr[a[i, 1], a[i, 2]] <- 0 236 | } 237 | } 238 | 239 | # remove autocorrelation 240 | b <- dplyr::filter(a, indexvar1 != indexvar2) 241 | 242 | # remove every relation, that is already present 243 | # inversely (var1 & var2 = var2 & var1) 244 | p1 <- 1 245 | while (p1 <= nrow(b)) { 246 | 247 | ind1 <- b[p1, 1] 248 | ind2 <- b[p1, 2] 249 | 250 | ind1in1 <- which(b$indexvar1 == ind1) 251 | ind1in2 <- which(b$indexvar2 == ind1) 252 | ind2in2 <- which(b$indexvar2 == ind2) 253 | ind2in1 <- which(b$indexvar1 == ind2) 254 | 255 | check1 <- ind1in1[ind1in1 %in% ind2in2] 256 | check2 <- ind1in2[ind1in2 %in% ind2in1] 257 | check <- c(check1, check2) 258 | 259 | 260 | if (length(check) > 1) { 261 | b <- b[-check[-1], ] 262 | } 263 | 264 | p1 <- p1 + 1 265 | } 266 | 267 | row.names(b) <- 1:nrow(b) 268 | 269 | return(b) 270 | } 271 | 272 | #' Create an empty correlation matrix of a given data.frame 273 | #' 274 | #' \code{newcorrtable} returns an empty correlation matrix of a data.frames columns 275 | #' or rows. Empty means: filled with the numeric value "0". 276 | #' This correlation matrix data.frame can be used to store the results of functions which 277 | #' calculate correlation values for all bivariate relations. 278 | #' 279 | #' @param matrix data.frame 280 | #' @param dim switch to define, whether the new correlation matrix should be created 281 | #' for columns or rows. 282 | #' 283 | #' 1 (default): table is created for column (variables) relations. 284 | #' 285 | #' 2: table is created for row (objects) relations. 286 | #' 287 | #' @return empty correlation matrix data.frame 288 | #' 289 | #' @examples 290 | #' testmatrix <- data.frame(c1 = c(5,2,3,8), c2 = c(5,6,7,0), c3 = c(5,6,7,9)) 291 | #' 292 | #' # correlation table is created for the columns of the input data.frame testmatrix 293 | #' newcorrtable(testmatrix) 294 | #' newcorrtable(testmatrix, 1) 295 | #' corrtabcolumns <- newcorrtable(matrix = testmatrix, dim = 1) 296 | #' 297 | #' # correlation table is created for the rows of the input data.frame testmatrix 298 | #' newcorrtable(testmatrix, 2) 299 | #' corrtabrows <- newcorrtable(matrix = testmatrix, dim = 2) 300 | #' 301 | #' @export 302 | #' 303 | 304 | newcorrtable <- function (matrix, dim = 1) { 305 | 306 | # table is created for column (variables) relations 307 | if (dim == 1) { 308 | matrixwidth <- ncol(matrix) 309 | newtable <- matrix(nrow = matrixwidth, ncol = matrixwidth, 0) 310 | colnames(newtable) <- colnames(matrix) 311 | rownames(newtable) <- colnames(matrix) 312 | } 313 | 314 | # table is created for row (objects) relations 315 | if (dim == 2) { 316 | matrixheight <- nrow(matrix) 317 | newtable <- matrix(nrow = matrixheight, ncol = matrixheight, 0) 318 | colnames(newtable) <- rownames(matrix) 319 | rownames(newtable) <- rownames(matrix) 320 | } 321 | 322 | newtable <- data.frame(newtable, check.names = FALSE) 323 | 324 | return(newtable) 325 | 326 | } 327 | 328 | #' Create a correlation matrix of an input data.frame 329 | #' 330 | #' \code{corrmat} returns a correlation matrix of a data.frame. Several different 331 | #' correlation methods can be choosen and the matrix can be created for column or row 332 | #' relations. 333 | #' 334 | #' The rmnegniv option allows to remove "negativ relations", by activating 335 | #' the rmnegcorr function for values >0. The smaller rmnegniv the weaker the overlap 336 | #' of two variables/objects can be, to still be recognised as the cause of a positiv 337 | #' relation. 338 | #' 339 | #' See \code{?rmnegcorr} for further info. This function can also be applied later. 340 | #' 341 | #' @param matrix data.frame with numeric values 342 | #' @param method switch to define which contingency value should be used: 343 | #' 344 | #' "chi2" (default): test decision of the chi-square test for a defined decision niveau. 345 | #' A significant relation of two variables/objects is marked with a numeric "1", a 346 | #' negativ test result with a numeric "0". 347 | #' 348 | #' "phi": Pearson's phi coefficient ("mean square contingency coefficient"). 349 | #' 350 | #' "cc": Pearson's contingency coefficient. 351 | #' 352 | #' "lambda": Goodman and Kruskal's lambda value. Mean of both values calculated 353 | #' depending on what's the dependent and what's the independent variable 354 | #' 355 | #' @param dim switch to define if the correlation matrix should be created 356 | #' for columns or rows. 357 | #' 358 | #' 1 (default): table is created for column (variables) relations. 359 | #' 360 | #' 2: table is created for row (objects) relations. 361 | #' 362 | #' @param chi2limit significance level for the test decision. Just relevant for method 363 | #' "chi2". The higher chi2limit the less results will get removed. 364 | #' 365 | #' default: 0.05 -> 5\% 366 | #' 367 | #' @param rmnegniv option allows to remove "negativ relations". If >0 \code{rmnegcorr} gets 368 | #' called and applied. A usual value for rmnegniv is 0.1. 369 | #' 370 | #' @return correlation matrix 371 | #' 372 | #'@examples 373 | #' testmatrixrand <- data.frame( 374 | #' matrix(base::sample(0:1,400,replace=TRUE), nrow=20, ncol=20) 375 | #' ) 376 | #' 377 | #' corrmat(testmatrixrand, "chi2", chi2limit = 0.03) 378 | #' 379 | #' corrmat(matrix = testmatrixrand, method = "lambda", dim = 2) 380 | #' 381 | #' phicorrtable <- corrmat( 382 | #' matrix = testmatrixrand, 383 | #' method = "phi", 384 | #' dim = 1 385 | #' ) 386 | #' 387 | #' # Without negative relations: 388 | #' phicorrtablewnr <- corrmat( 389 | #' matrix = testmatrixrand, 390 | #' method = "phi", 391 | #' dim = 1, 392 | #' rmnegniv = 0.1 393 | #' ) 394 | #' 395 | #' @export 396 | #' 397 | 398 | corrmat <- function (matrix, method = "chi2", 399 | dim = 1, chi2limit = 0.05, rmnegniv = 0) { 400 | 401 | # create empty correlation table that fits to the input data.frame 402 | corrtab <- newcorrtable(matrix, dim) 403 | 404 | # invert column/row selection due to unusal api (1 --> cols / 2 --> rows) 405 | my_dim <- c(2, 1)[dim] 406 | 407 | # loop to apply calculation of correlation values for every bivariate 408 | # variable relation 409 | newcortab <- apply(matrix, my_dim, function(z){ 410 | apply(matrix, my_dim, function(s){ 411 | 412 | # create data.frame and table of current relation 413 | tbl <- table(z, s) 414 | 415 | # perform chisq.test and store result values 416 | options(warn = -1) 417 | x <- stats::chisq.test(tbl) 418 | chi2 <- unlist(x[1]) 419 | pval <- unlist(x[3]) 420 | options(warn = 0) 421 | 422 | if (method == "chi2") { 423 | # comparing p-Value with defined decision niveau chi2limit 424 | # to make a test decision 425 | if (pval < chi2limit) { 426 | result <- 1 427 | } else { 428 | result <- 0 429 | } 430 | } else if (method == "phi") { 431 | # calculation of phi = sqrt(chi2/n) 432 | result <- sqrt((chi2) / sum(tbl)) 433 | } else if (method == "cc") { 434 | # calculation of CC = sqrt(chi2/(chi2+n)) 435 | result <- sqrt((chi2) / ((chi2) + sum(tbl))) 436 | } else if (method == "lambda") { 437 | # calculation of mean lambda value 438 | result <- mean(unlist(rapportools::lambda.test(tbl, direction = 0))) 439 | } else { 440 | stop("Wrong method name!", 441 | call. = FALSE) 442 | } 443 | result 444 | }) 445 | }) 446 | 447 | # transpose result correlation matrix to correct format 448 | newcortab <- as.data.frame(t(newcortab)) 449 | 450 | # set colnames and rownames of result correlation matrix 451 | rownames(newcortab) <- rownames(corrtab) 452 | colnames(newcortab) <- colnames(corrtab) 453 | corrtab <- newcortab 454 | 455 | # apply removal of negativ relations with rmnegcorr 456 | if (rmnegniv > 0) { 457 | corrtab <- rmnegcorr( 458 | corrmatrix = corrtab, 459 | matrix = matrix, 460 | dim = dim, 461 | niv = rmnegniv 462 | ) 463 | } 464 | 465 | return(corrtab) 466 | } 467 | 468 | # End Correlation Data Munging Functions --------------------------- 469 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | 341 | --------------------------------------------------------------------------------