├── .github
    ├── .gitignore
    ├── ISSUE_TEMPLATE
    │   ├── feature_request.md
    │   └── bug_report.md
    └── workflows
    │   ├── pkgdown.yaml
    │   ├── test-coverage.yaml
    │   └── R-CMD-check.yaml
├── vignettes
    ├── .gitignore
    ├── how-to.Rmd
    └── data_preparation.Rmd
├── data
    ├── Beazley.rda
    ├── DAT_df.rda
    └── Inscr_Bithynia.rda
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-get-probability.R
    │   ├── test-switch-dating.R
    │   ├── test-get-weights.R
    │   ├── test-get-histogramscale.R
    │   ├── test-check-structure.R
    │   ├── test-generate-stepsize.R
    │   ├── test-scaleweight.R
    │   ├── test-create-sub-objects.R
    │   ├── test-datsteps.R
    │   └── test-get-step-sequence.R
├── _pkgdown.yml
├── man
    ├── figures
    │   └── demo_readme.png
    ├── check.structure.Rd
    ├── switch.dating.Rd
    ├── DAT_df.Rd
    ├── generate.stepsize.Rd
    ├── scaleweight.Rd
    ├── get.probability.Rd
    ├── Beazley.Rd
    ├── datplot-package.Rd
    ├── get.weights.Rd
    ├── create.sub.objects.Rd
    ├── get.histogramscale.Rd
    ├── get.step.sequence.Rd
    ├── Inscr_Bithynia.Rd
    └── datsteps.Rd
├── CRAN-SUBMISSION
├── R
    ├── datplot-package.R
    ├── get_histogramscale.R
    ├── scaleweight.R
    ├── data.R
    ├── datsteps.R
    └── datplot_utility.R
├── NAMESPACE
├── inst
    ├── extdata
    │   ├── periods_edit_AD.csv
    │   ├── periods_edit.csv
    │   └── num_dating_edit.csv
    └── literatur.bib
├── .gitignore
├── .Rbuildignore
├── datplot.Rproj
├── cran-comments.md
├── DESCRIPTION
├── NEWS.md
├── README.md
├── data-raw
    └── Inscr_Bithynia.R
└── LICENSE.md


/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/data/Beazley.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsteinmann/datplot/HEAD/data/Beazley.rda


--------------------------------------------------------------------------------
/data/DAT_df.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsteinmann/datplot/HEAD/data/DAT_df.rda


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(datplot)
3 | 
4 | test_check("datplot")
5 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: https://lsteinmann.github.io/datplot/
2 | template:
3 |   bootstrap: 5
4 | 
5 | 


--------------------------------------------------------------------------------
/data/Inscr_Bithynia.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsteinmann/datplot/HEAD/data/Inscr_Bithynia.rda


--------------------------------------------------------------------------------
/man/figures/demo_readme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsteinmann/datplot/HEAD/man/figures/demo_readme.png


--------------------------------------------------------------------------------
/CRAN-SUBMISSION:
--------------------------------------------------------------------------------
1 | Version: 1.1.1
2 | Date: 2023-11-18 13:59:39 UTC
3 | SHA: 134d497a5a27e74cdc313a16a210541e99a40fba
4 | 


--------------------------------------------------------------------------------
/R/datplot-package.R:
--------------------------------------------------------------------------------
1 | #' @keywords internal
2 | "_PACKAGE"
3 | 
4 | ## usethis namespace: start
5 | ## usethis namespace: end
6 | NULL
7 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 | 
3 | export(datsteps)
4 | export(get.histogramscale)
5 | export(get.probability)
6 | export(get.step.sequence)
7 | export(get.weights)
8 | export(scaleweight)
9 | 


--------------------------------------------------------------------------------
/inst/extdata/periods_edit_AD.csv:
--------------------------------------------------------------------------------
1 | ,Dating,DAT_min,DAT_max
2 | 1,Roman Imp. period,1,395
3 | 6,Early/Middle Roman Imp. p.,1,192
4 | 7,Early Roman Imp. period,1,69
5 | 10,Roman Imperial period,1,395
6 | 26,Roman Imp. period ,1,395
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Ruserdata
 5 | inst/prep.R
 6 | inst/literatur.bib.bak
 7 | inst/literatur.bib.sav
 8 | *.log
 9 | *.aux
10 | *.out
11 | *.fls
12 | *.fdb*
13 | *.ttf
14 | inst/doc
15 | todo.R
16 | docs
17 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^\.github$
 4 | ^data-raw$
 5 | ^\.travis\.yml$
 6 | ^codecov\.yml$
 7 | ^cran-comments\.md$
 8 | ^todo\.R$
 9 | ^CRAN-RELEASE$
10 | ^LICENSE\.md$
11 | ^_pkgdown\.yml$
12 | ^docs$
13 | ^pkgdown$
14 | ^CRAN-SUBMISSION$
15 | 


--------------------------------------------------------------------------------
/datplot.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageCheckArgs: --as-cran
22 | PackageRoxygenize: rd,collate,namespace
23 | 


--------------------------------------------------------------------------------
/tests/testthat/test-get-probability.R:
--------------------------------------------------------------------------------
 1 | test_that("returns probabilities as expected", {
 2 |   DAT_min <- c(-100, 10, 101, 201)
 3 |   DAT_max <- c(-100, 11, 110, 300)
 4 |   results <- c(1, 0.5, 0.1, 0.01)
 5 | 
 6 |   expect_equal(get.probability(DAT_min = DAT_min,
 7 |                                DAT_max = DAT_max),
 8 |                results)
 9 | })
10 | 
11 | test_that("fails for nun-numeric values", {
12 |   DAT_min <- c("1")
13 |   DAT_max <- c("1")
14 | 
15 |   expect_error(get.probability(DAT_min = DAT_min,
16 |                                DAT_max = DAT_max),
17 |                "numeric")
18 | })
19 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/man/check.structure.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datplot_utility.R
 3 | \name{check.structure}
 4 | \alias{check.structure}
 5 | \title{Check if the structure is compatible with [datsteps()]  (internal)}
 6 | \usage{
 7 | check.structure(DAT_df, verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{DAT_df}{An object to check}
11 | 
12 | \item{verbose}{TRUE / FALSE: Should the function issue additional
13 | messages pointing to possible inconsistencies and notify of methods?}
14 | }
15 | \value{
16 | TRUE if object can be processed by [datsteps()], error / FALSE if not
17 | }
18 | \description{
19 | Checks if the object passed to [datsteps()] can be used for
20 | processing.
21 | }
22 | \keyword{internal}
23 | 


--------------------------------------------------------------------------------
/man/switch.dating.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datplot_utility.R
 3 | \name{switch.dating}
 4 | \alias{switch.dating}
 5 | \title{Switch values where dating is in wrong order (internal)}
 6 | \usage{
 7 | switch.dating(DAT_df)
 8 | }
 9 | \arguments{
10 | \item{DAT_df}{a data.frame with 4 variables in this order: ID, group,
11 | minimum date (int/num), maximum date (int/num)}
12 | }
13 | \value{
14 | The same data.frame with the dating values which were in wrong order
15 | switched.
16 | }
17 | \description{
18 | Requires a data.frame with 2 numeric variables in the
19 | 3rd and 4th column: minimum date (int/numeric) and
20 | maximum date (int/numeric) as used in [datsteps()].
21 | }
22 | \seealso{
23 | [datsteps()]
24 | }
25 | \keyword{internal}
26 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Resubmission
 2 | This is a resubmission. In this version I have:
 3 |   
 4 | * Improved the messaging/warning behaviour as requested (no more printing & 'verbose'-argument for info-messages). 
 5 | * (Package was archived on CRAN before due to UTF-8/Latin-1 strings in data and failure to correct on time. All strings have been converted to ASCII.)
 6 | 
 7 | ## Test environments
 8 | * local R installation, R 4.2.2 on Windows
 9 | * GitHub actions (release) on macos, win and ubuntu
10 | * GitHub actions (devel and oldrel-1) on ubuntu
11 | * win-builder (devel)
12 | 
13 | ## R CMD check results
14 | 
15 | 0 errors | 0 warnings | 1 note
16 | 
17 | * This is a new release.
18 | 
19 | ## Notes
20 | win-builder says "Aoristic / aoristic" in the DESCRIPTION may be misspelled, 
21 | but they are not. 
22 | 


--------------------------------------------------------------------------------
/man/DAT_df.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{DAT_df}
 5 | \alias{DAT_df}
 6 | \title{datplot Testing data}
 7 | \format{
 8 | A data frame with 5000 rows and 4 variables
 9 | }
10 | \usage{
11 | data(DAT_df)
12 | }
13 | \description{
14 | A test dataset containing a data.frame how it should ideally be arranged
15 | to work with datplot. Data are not real and illustrate some common problems
16 | such as lower and upper dating in the wrong columns.
17 | }
18 | \details{
19 | \itemize{
20 |   \item ID. Identifier of the Objects (has to be unique)
21 |   \item var. Grouping variable, such as a Type or a Findspot
22 |   \item DAT_min. Integer: lower range of the dating, BCE in negative numbers
23 |   \item DAT_max. Integer: upper range of the dating, BCE in negative numbers
24 | }
25 | }
26 | \keyword{datasets}
27 | 


--------------------------------------------------------------------------------
/man/generate.stepsize.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datplot_utility.R
 3 | \name{generate.stepsize}
 4 | \alias{generate.stepsize}
 5 | \title{Determine stepsize (internal)}
 6 | \usage{
 7 | generate.stepsize(DAT_mat, verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{DAT_mat}{a matrix as prepared by [datsteps()], resp. a matrix witch
11 | columns names `datmin` and `datmax` containing numeric/integer value of the
12 | dating ranges.}
13 | 
14 | \item{verbose}{TRUE / FALSE: Should the function issue additional
15 | messages pointing to possible inconsistencies and notify of methods?}
16 | }
17 | \value{
18 | A single numeric value that can be used as minimal stepsize.
19 | }
20 | \description{
21 | Determines stepsize by selecting the absolute minimum value
22 | between the upper and lower end of all dating ranges.
23 | }
24 | \seealso{
25 | [datsteps()]
26 | }
27 | \keyword{internal}
28 | 


--------------------------------------------------------------------------------
/tests/testthat/test-switch-dating.R:
--------------------------------------------------------------------------------
 1 | correct_df <- as.data.frame(matrix(c("id1", "A", 10, 20,
 2 |                                      "id2", "B", -10, 10,
 3 |                                      "id3", "C", 8, 8),
 4 |                                    byrow = TRUE, ncol = 4))
 5 | wrong_df <- as.data.frame(matrix(c("id1", "A", 20, 10,
 6 |                                    "id2", "B", -10, 10,
 7 |                                    "id3", "C", 8, 8),
 8 |                                  byrow = TRUE, ncol = 4))
 9 | 
10 | 
11 | test_that("switch.dating returns values in correct order", {
12 |   expect_equal(suppressWarnings(switch.dating(wrong_df)), correct_df)
13 | })
14 | 
15 | test_that("switch.dating issues a warning", {
16 |   expect_warning(switch.dating(wrong_df), "wrong order at ID id1")
17 | })
18 | 
19 | test_that("switch.dating issues no warning", {
20 |   expect_failure(expect_warning(switch.dating(correct_df)))
21 | })
22 | 


--------------------------------------------------------------------------------
/man/scaleweight.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scaleweight.R
 3 | \name{scaleweight}
 4 | \alias{scaleweight}
 5 | \title{Scales the content of a column}
 6 | \usage{
 7 | scaleweight(DAT_df, var = "all", val = 5)
 8 | }
 9 | \arguments{
10 | \item{DAT_df}{a data.frame}
11 | 
12 | \item{var}{index or name of the column that should be used
13 | as the group variable, OR "all"}
14 | 
15 | \item{val}{index or name of the column that should be
16 | scaled (has to be numeric)}
17 | }
18 | \value{
19 | the same data.frame, with the scaled values in the specified column
20 | }
21 | \description{
22 | Requires a data.frame with one variable and one value column.
23 | }
24 | \examples{
25 | data("Inscr_Bithynia")
26 | DAT_df <- Inscr_Bithynia[, c("ID", "Location", "DAT_min", "DAT_max")]
27 | DAT_df_steps <- datsteps(DAT_df, stepsize = 25)
28 | DAT_df_scaled <- scaleweight(DAT_df_steps, var = 2, val = 5)
29 | }
30 | 


--------------------------------------------------------------------------------
/tests/testthat/test-get-weights.R:
--------------------------------------------------------------------------------
 1 | test_that("returns weights as expected", {
 2 |   DAT_min <- c(-100, 10, 100, 200)
 3 |   DAT_max <- c(-100, 11, 110, 300)
 4 |   results <- c(1, 1, 0.1, 0.01)
 5 | 
 6 |   expect_equal(get.weights(DAT_min = DAT_min,
 7 |                            DAT_max = DAT_max,
 8 |                            verbose = FALSE),
 9 |                results)
10 | })
11 | 
12 | test_that("returns message for same values", {
13 |   DAT_min <- c(1)
14 |   DAT_max <- c(1)
15 |   results <- c(1)
16 | 
17 |   expect_message(get.weights(DAT_min = DAT_min,
18 |                              DAT_max = DAT_max,
19 |                              verbose = TRUE),
20 |                  "same value")
21 | })
22 | 
23 | test_that("fails for nun-numeric values", {
24 |   DAT_min <- c("1")
25 |   DAT_max <- c("1")
26 | 
27 |   expect_error(get.weights(DAT_min = DAT_min,
28 |                            DAT_max = DAT_max),
29 |                  "numeric")
30 | })
31 | 


--------------------------------------------------------------------------------
/man/get.probability.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datplot_utility.R
 3 | \name{get.probability}
 4 | \alias{get.probability}
 5 | \title{Calculate the probability for each year and each dated object}
 6 | \usage{
 7 | get.probability(DAT_min, DAT_max)
 8 | }
 9 | \arguments{
10 | \item{DAT_min}{a numeric vector containing the minimum date of each object}
11 | 
12 | \item{DAT_max}{a numeric vector containing the maximum date of each object}
13 | }
14 | \value{
15 | a vector of probabilities for each object being dated to any
16 | single year within the timespan (lesser value means object is dated to
17 | larger timespans, i.e. with less confidence).
18 | }
19 | \description{
20 | Calculates the probability of each object being dated into
21 | each year / timeslot from two vectors of minimum and maximum
22 | dating. Returns a vector of probabilities.
23 | }
24 | \seealso{
25 | [datsteps()], [get.weights()]
26 | }
27 | 


--------------------------------------------------------------------------------
/man/Beazley.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{Beazley}
 5 | \alias{Beazley}
 6 | \title{Beazley (sample of 1000)}
 7 | \format{
 8 | A data frame with 1000 rows and 4 variables
 9 | }
10 | \source{
11 | https://www.carc.ox.ac.uk/carc/pottery
12 | }
13 | \usage{
14 | data(Beazley)
15 | }
16 | \description{
17 | A test dataset containing a data.frame how it should ideally be arranged
18 | to work with datplot. Data are gathered from the Beazley Archive Pottery
19 | Database (BAPD) -- https://www.carc.ox.ac.uk/carc/pottery and
20 | transformed to work with datplot.
21 | }
22 | \details{
23 | \itemize{
24 |   \item Identifier (Vase.Number in BAPD)
25 |   \item Technique: Sample contains only red- or blackfigured objects
26 |   \item DAT_min. Integer: lower range of the dating, BCE in negative numbers
27 |   \item DAT_max. Integer: upper range of the dating, BCE in negative numbers
28 | }
29 | }
30 | \keyword{datasets}
31 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Smartphone (please complete the following information):**
32 |  - Device: [e.g. iPhone6]
33 |  - OS: [e.g. iOS8.1]
34 |  - Browser [e.g. stock browser, safari]
35 |  - Version [e.g. 22]
36 | 
37 | **Additional context**
38 | Add any other context about the problem here.
39 | 


--------------------------------------------------------------------------------
/man/datplot-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datplot-package.R
 3 | \docType{package}
 4 | \name{datplot-package}
 5 | \alias{datplot}
 6 | \alias{datplot-package}
 7 | \title{datplot: Preparation of Object Dating Ranges for Density Plots (Aoristic Analysis)}
 8 | \description{
 9 | Converting date ranges into dating 'steps' eases the visualization of changes in e.g. pottery consumption, style and other variables over time. This package provides tools to process and prepare data for visualization and employs the concept of aoristic analysis.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 |   \item \url{https://github.com/lsteinmann/datplot}
15 |   \item \url{https://lsteinmann.github.io/datplot/}
16 |   \item Report bugs at \url{https://github.com/lsteinmann/datplot/issues}
17 | }
18 | 
19 | }
20 | \author{
21 | \strong{Maintainer}: Lisa Steinmann \email{lisa.steinmann@rub.de} (\href{https://orcid.org/0000-0002-2215-1243}{ORCID}) [copyright holder]
22 | 
23 | Other contributors:
24 | \itemize{
25 |   \item Barbora Weissova \email{barbora.weissova@rub.de} (\href{https://orcid.org/0000-0002-3297-6855}{ORCID}) [contributor]
26 | }
27 | 
28 | }
29 | \keyword{internal}
30 | 


--------------------------------------------------------------------------------
/man/get.weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datplot_utility.R
 3 | \name{get.weights}
 4 | \alias{get.weights}
 5 | \title{Calculate the weights for each dated object}
 6 | \usage{
 7 | get.weights(DAT_min, DAT_max, verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{DAT_min}{a numeric vector containing the minimum date of each object}
11 | 
12 | \item{DAT_max}{a numeric vector containing the maximum date of each object}
13 | 
14 | \item{verbose}{TRUE / FALSE: Should the function issue additional
15 | messages pointing to possible inconsistencies and notify of methods?}
16 | }
17 | \value{
18 | a vector of 'weight'-values for the datsteps-data.frame, that is a
19 | quantification of how well the object is dated (lesser value means object
20 | is dated to larger timespans, i.e. with less confidence)
21 | }
22 | \description{
23 | Calculates the weights from two vectors of minimum and maximum
24 | dating for each object. Returns a dataframe with the weight in the first
25 | column and FALSE in the second if two rows have the same value in both
26 | min and max dating. See [publication](https://doi.org/10.1017/aap.2021.8)
27 | for information about how this is calculated.
28 | }
29 | \seealso{
30 | [datsteps()], [get.probability()]
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/testthat/test-get-histogramscale.R:
--------------------------------------------------------------------------------
 1 | test_that("works for single number", {
 2 |   expect_equal(get.histogramscale(2, binwidth = 2), 4)
 3 | })
 4 | 
 5 | test_that("fails for single number with binwidth = stepsize", {
 6 |   expect_error(get.histogramscale(2, binwidth = "stepsize"),
 7 |                "datsteps")
 8 | })
 9 | 
10 | 
11 | test_that("fails for vector with binwidth = stepsize", {
12 |   expect_error(get.histogramscale(c(1, 2, 3, 4), binwidth = "stepsize"),
13 |                "datsteps")
14 | })
15 | 
16 | 
17 | data("DAT_df")
18 | test <- datsteps(DAT_df[4:10,], stepsize = 3,
19 |                  calc = "weight", verbose = FALSE)
20 | 
21 | test_that("fails for wrong value of binwidth", {
22 |   expect_error(get.histogramscale(test, binwidth = "fail"), "numeric")
23 | })
24 | 
25 | test_that("returns a number when attribute is used", {
26 |   expect_true(is.numeric(get.histogramscale(test, binwidth = "stepsize")))
27 | })
28 | 
29 | test_that("returns a number when numeric binwidth is used", {
30 |   expect_true(is.numeric(get.histogramscale(test, binwidth = 1)))
31 | })
32 | 
33 | test_that("returns a number when a vector is supplied", {
34 |   expect_equal(get.histogramscale(test$DAT_step, binwidth = 2),
35 |                get.histogramscale(test, binwidth = 2))
36 | })
37 | 


--------------------------------------------------------------------------------
/inst/extdata/periods_edit.csv:
--------------------------------------------------------------------------------
 1 | ,Dating,DAT_min,DAT_max
 2 | 1,Roman Imp. period,-31,395
 3 | 2,Late Roman Imp./                  Early Byzantine period,193,565
 4 | 3,Late Roman Imp. period,193,395
 5 | 4,Late Hellenistic/                Early Roman Imp. period,-150,69
 6 | 5,Late Hellenistic period,-150,-74
 7 | 6,Early/Middle Roman Imp. p.,-31,192
 8 | 7,Early Roman Imp. period,-31,69
 9 | 8,Byzantine period,395,799
10 | 9,NA,NA,NA
11 | 10,Roman Imperial period,-31,395
12 | 11,Hellenistic period,-336,-74
13 | 12,Early Roman period,-74,-31
14 | 13,"Chr.-""Byz.",100,799
15 | 14,AD,1,799
16 | 15,Christian,300,799
17 | 16,Antonine or Severan,96,235
18 | 17,Roman period,-74,395
19 | 18,Roman,-74,395
20 | 19,early Byzantine period,395,565
21 | 20,Middle Roman Imp. period,69,192
22 | 21,Late Roman Imp. p.,193,395
23 | 22,Late Hellenistic/                 Early Roman imp. period,-150,69
24 | 23,Late Hellenistic/                  Early Roman imp. period,-150,69
25 | 24,Late Hellenistic,-150,-74
26 | 25,Early Byzantine period,395,565
27 | 26,Roman Imp. period ,-31,395
28 | 27,Late Hellenistic/                 Early Roman Imp. period,-150,69
29 | 28,Late Hellenistic/                  Early Roman Imp. period,-150,69
30 | 29,Late Hellenistic/                   Early Roman Imp. period,-150,69
31 | 30,Early Roman,-74,-31
32 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Type: Package
 2 | Package: datplot
 3 | Title: Preparation of Object Dating Ranges for Density Plots (Aoristic
 4 |     Analysis)
 5 | Version: 1.1.1
 6 | Authors@R: c(
 7 |     person("Lisa", "Steinmann", , "lisa.steinmann@rub.de", role = c("aut", "cre", "cph"),
 8 |            comment = c(ORCID = "0000-0002-2215-1243")),
 9 |     person("Barbora", "Weissova", , "barbora.weissova@rub.de", role = "ctb",
10 |            comment = c(ORCID = "0000-0002-3297-6855"))
11 |   )
12 | Maintainer: Lisa Steinmann <lisa.steinmann@rub.de>
13 | Description: Converting date ranges into dating 'steps' eases the
14 |     visualization of changes in e.g. pottery consumption, style and other
15 |     variables over time. This package provides tools to process and
16 |     prepare data for visualization and employs the concept of aoristic
17 |     analysis.
18 | License: GPL (>= 3)
19 | URL: https://github.com/lsteinmann/datplot,
20 |     https://lsteinmann.github.io/datplot/
21 | BugReports: https://github.com/lsteinmann/datplot/issues
22 | Depends: 
23 |     R (>= 3.3)
24 | Suggests: 
25 |     covr,
26 |     devtools,
27 |     dplyr,
28 |     forcats,
29 |     ggplot2,
30 |     ggridges,
31 |     knitr,
32 |     reshape2,
33 |     rmarkdown,
34 |     stringr,
35 |     testthat
36 | VignetteBuilder: 
37 |     knitr
38 | Encoding: UTF-8
39 | LazyData: true
40 | RoxygenNote: 7.2.3
41 | 


--------------------------------------------------------------------------------
/man/create.sub.objects.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datplot_utility.R
 3 | \name{create.sub.objects}
 4 | \alias{create.sub.objects}
 5 | \title{Create sub-objects for each object in a dataframe (internal)}
 6 | \usage{
 7 | create.sub.objects(DAT_list, stepsize, calc = "weight", cumulative = FALSE)
 8 | }
 9 | \arguments{
10 | \item{DAT_list}{a list as prepared by [datsteps()]}
11 | 
12 | \item{stepsize}{numeric, default is 1. Number of years that should be used
13 | as an interval for creating dating steps.}
14 | 
15 | \item{calc}{method of calculation to use;
16 | can be either one of "weight" (default) or "probability":
17 |  * "weight": use the
18 |     [published original calculation](https://doi.org/10.1017/aap.2021.8)
19 |     for weights,
20 |  * "probability": calculate year-wise probability instead (only reasonable
21 |     when `stepsize = 1`)}
22 | 
23 | \item{cumulative}{FALSE (default), TRUE: add a column containing the
24 | cumulative probability for each object (only reasonable when `stepsize = 1`,
25 | and will automatically use probability calculation)}
26 | }
27 | \value{
28 | an expanded list of the same structure to be further processed by
29 | [datsteps()] each object duplicated according to the number of steps required
30 | }
31 | \description{
32 | Requires a list with named vectors as [datsteps()] will
33 | hand to the function.
34 | }
35 | \keyword{internal}
36 | 


--------------------------------------------------------------------------------
/man/get.histogramscale.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_histogramscale.R
 3 | \name{get.histogramscale}
 4 | \alias{get.histogramscale}
 5 | \title{Scaling Factor for Combined Histogram Plots}
 6 | \usage{
 7 | get.histogramscale(DAT_df_steps, binwidth = "stepsize")
 8 | }
 9 | \arguments{
10 | \item{DAT_df_steps}{a data.frame as returned by [datsteps()]. (Will also
11 | work with a single number and a vector.)}
12 | 
13 | \item{binwidth}{the bandwidth to use for the density function and histogram.
14 | Should equal the stepsize used to create the data.frame. If a data.frame as
15 | returned by [datsteps()] is given, stepsize can be automatically assigned
16 | using the corresponding attribute (`binwidth = "stepsize"`)}
17 | }
18 | \value{
19 | the value with which to scale the density curve to a histogram
20 | plot so that both will be visible
21 | }
22 | \description{
23 | Requires a data.frame as produced by [datsteps()] or a number as
24 | DAT_df_steps. Calculates the value with which the y-axis of a density graph
25 | should be multiplied by in order to be visible in the corresponding histogram.
26 | }
27 | \examples{
28 | data("Inscr_Bithynia")
29 | DAT_df <- Inscr_Bithynia[, c("ID", "Location", "DAT_min", "DAT_max")]
30 | DAT_df_steps <- datsteps(DAT_df, stepsize = 25)
31 | get.histogramscale(DAT_df_steps)
32 | 
33 | get.histogramscale(DAT_df_steps$DAT_step, binwidth = 20)
34 | get.histogramscale(500, binwidth = 20)
35 | }
36 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 |   release:
 9 |     types: [published]
10 |   workflow_dispatch:
11 | 
12 | name: pkgdown
13 | 
14 | jobs:
15 |   pkgdown:
16 |     runs-on: ubuntu-latest
17 |     # Only restrict concurrency for non-PR jobs
18 |     concurrency:
19 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
20 |     env:
21 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
22 |     permissions:
23 |       contents: write
24 |     steps:
25 |       - uses: actions/checkout@v3
26 | 
27 |       - uses: r-lib/actions/setup-pandoc@v2
28 | 
29 |       - uses: r-lib/actions/setup-r@v2
30 |         with:
31 |           use-public-rspm: true
32 | 
33 |       - uses: r-lib/actions/setup-r-dependencies@v2
34 |         with:
35 |           extra-packages: any::pkgdown, local::.
36 |           needs: website
37 | 
38 |       - name: Build site
39 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
40 |         shell: Rscript {0}
41 | 
42 |       - name: Deploy to GitHub pages 🚀
43 |         if: github.event_name != 'pull_request'
44 |         uses: JamesIves/github-pages-deploy-action@v4.4.1
45 |         with:
46 |           clean: false
47 |           branch: gh-pages
48 |           folder: docs
49 | 


--------------------------------------------------------------------------------
/tests/testthat/test-check-structure.R:
--------------------------------------------------------------------------------
 1 | test <- matrix(data = c("ID1", "Type A", 100, 200,
 2 |                         "ID2", "Type A", -100, 100),
 3 |                byrow = TRUE, ncol = 4)
 4 | 
 5 | test_that("check.structure fails if not data.frame", {
 6 |   expect_error(check.structure(test), "data.frame")
 7 | })
 8 | 
 9 | test <- as.data.frame(test)
10 | test$V3 <- as.numeric(test$V3)
11 | test$V4 <- as.numeric(test$V4)
12 | 
13 | test_that("returns true if data.frame", {
14 |   expect_true(check.structure(test))
15 | })
16 | 
17 | test_that("check.structure issues message when verbose and columns
18 |           do not conform to expected classes", {
19 |     expect_message(check.structure(test, verbose = TRUE), regexp = "recommended")
20 |     expect_failure(expect_message(check.structure(test, verbose = FALSE)))
21 | })
22 | 
23 | 
24 | test_that("check.structure issues no message when verbose and columns
25 |           conform to expected classes", {
26 |     test$V2 <- as.factor(test$V2)
27 |     expect_true(check.structure(test, verbose = TRUE))
28 |     expect_failure(expect_message(check.structure(test, verbose = TRUE)))
29 | })
30 | 
31 | test_that("check.structure fails if column 3 or 4 are not numeric", {
32 |   test$V4 <- as.character(test$V4)
33 |   expect_error(check.structure(test), regexp = "numeric")
34 |   test$V4 <- as.numeric(test$V4)
35 |   test$V3 <- as.character(test$V3)
36 |   expect_error(check.structure(test), regexp = "numeric")
37 |   test$V4 <- as.character(test$V4)
38 |   expect_error(check.structure(test), regexp = "numeric")
39 | })
40 | 


--------------------------------------------------------------------------------
/man/get.step.sequence.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datplot_utility.R
 3 | \name{get.step.sequence}
 4 | \alias{get.step.sequence}
 5 | \title{Calculate the sequence of dating steps}
 6 | \usage{
 7 | get.step.sequence(datmin = 0, datmax = 100, stepsize = 25)
 8 | }
 9 | \arguments{
10 | \item{datmin}{numeric value of the minimum dating of one object}
11 | 
12 | \item{datmax}{numeric value of the maximum dating of one object}
13 | 
14 | \item{stepsize}{the stepsize to be used}
15 | }
16 | \value{
17 | sequence of steps to be created by [create.sub.objects()]
18 | }
19 | \description{
20 | Produces an appropriate sequence of years between the minimum
21 | and maximum dating.
22 | 
23 | If they cannot be properly divided by the stepsize set
24 | beforehand, either three values are generated for objects that are dated to
25 | a range of more then 60% of the stepsize (min, mean, max), or two values for
26 | objects dated to a timespan of less or equal to 60% of the stepsize.
27 | If they can be divided without residual, the normal sequence is returned.
28 | If there is a residual, the stepsize is modified depending on how large the
29 | residual is.
30 | }
31 | \examples{
32 | min_year <- -494
33 | max_year <- -334
34 | sequence <- get.step.sequence(datmin = min_year, datmax = max_year, stepsize = 25)
35 | sequence
36 | 
37 | min_year <- 1
38 | max_year <- 100
39 | sequence <- get.step.sequence(datmin = min_year, datmax = max_year, stepsize = 25)
40 | sequence
41 | }
42 | \seealso{
43 | [datsteps()], [create.sub.objects()]
44 | }
45 | 


--------------------------------------------------------------------------------
/tests/testthat/test-generate-stepsize.R:
--------------------------------------------------------------------------------
 1 | test_that("generate.stepsize returns one when smallest difference is 1", {
 2 |   testmat <- matrix(c(1, 0, 50,
 3 |                       2, 10, 12,
 4 |                       3, -100, -99),
 5 |                     byrow = TRUE, ncol = 3)
 6 |   colnames(testmat) <- c("index", "datmin", "datmax")
 7 | 
 8 |   expect_equal(generate.stepsize(testmat, verbose = FALSE), 1)
 9 | })
10 | 
11 | test_that("generate.stepsize returns one when smallest difference is 0", {
12 |   testmat <- matrix(c(1, 0, 50,
13 |                       2, 10, 10,
14 |                       3, -100, -50),
15 |                     byrow = TRUE, ncol = 3)
16 |   colnames(testmat) <- c("index", "datmin", "datmax")
17 | 
18 |   expect_equal(generate.stepsize(testmat, verbose = FALSE), 1)
19 | })
20 | 
21 | test_that("generate.stepsize notifies of result when verbose", {
22 |   testmat <- matrix(c(1, -100, 50,
23 |                       2, 1, 11,
24 |                       3, -100, -99),
25 |                     byrow = TRUE, ncol = 3)
26 |   colnames(testmat) <- c("index", "datmin", "datmax")
27 | 
28 |   expect_message(generate.stepsize(testmat, verbose = TRUE), "auto")
29 |   expect_message(generate.stepsize(testmat, verbose = TRUE), "1")
30 | })
31 | 
32 | test_that("generate.stepsize throws error for nun-numeric values", {
33 |   testmat <- matrix(c(1, "-100", "50", 2, "1", "11"),
34 |                     byrow = TRUE, ncol = 3)
35 |   colnames(testmat) <- c("index", "datmin", "datmax")
36 | 
37 |   expect_error(generate.stepsize(testmat, verbose = FALSE),
38 |                "numeric")
39 | })
40 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | name: test-coverage
10 | 
11 | jobs:
12 |   test-coverage:
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v3
19 | 
20 |       - uses: r-lib/actions/setup-r@v2
21 |         with:
22 |           use-public-rspm: true
23 | 
24 |       - uses: r-lib/actions/setup-r-dependencies@v2
25 |         with:
26 |           extra-packages: any::covr
27 |           needs: coverage
28 | 
29 |       - name: Test coverage
30 |         run: |
31 |           covr::codecov(
32 |             quiet = FALSE,
33 |             clean = FALSE,
34 |             install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package")
35 |           )
36 |         shell: Rscript {0}
37 | 
38 |       - name: Show testthat output
39 |         if: always()
40 |         run: |
41 |           ## --------------------------------------------------------------------
42 |           find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
43 |         shell: bash
44 | 
45 |       - name: Upload coverage reports to Codecov
46 |         uses: codecov/codecov-action@v3
47 |         env:
48 |           CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
49 |         with:
50 |           name: coverage-test-failures
51 |           path: ${{ runner.temp }}/package
52 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ${{ matrix.config.os }}
14 | 
15 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         config:
21 |           - {os: macos-latest,   r: 'release'}
22 |           - {os: windows-latest, r: 'release'}
23 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
24 |           - {os: ubuntu-latest,   r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
26 | 
27 |     env:
28 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 |       R_KEEP_PKG_SOURCE: yes
30 | 
31 |     steps:
32 |       - uses: actions/checkout@v3
33 | 
34 |       - uses: r-lib/actions/setup-pandoc@v2
35 | 
36 |       - uses: r-lib/actions/setup-r@v2
37 |         with:
38 |           r-version: ${{ matrix.config.r }}
39 |           http-user-agent: ${{ matrix.config.http-user-agent }}
40 |           use-public-rspm: true
41 | 
42 |       - uses: r-lib/actions/setup-r-dependencies@v2
43 |         with:
44 |           extra-packages: any::rcmdcheck
45 |           needs: check
46 | 
47 |       - uses: r-lib/actions/check-r-package@v2
48 |         with:
49 |           args: 'c("--no-manual", "--as-cran")'
50 |           upload-snapshots: true
51 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # datplot 1.1.1
 2 | * Improved error handling in some functions. 
 3 | * Reduced and improved messaging and warning behaviour. 
 4 | * Fixed slight problem in `generate.stepsize()` where it would not handle same values in min & max dating properly.
 5 | * Completely updated and redesigned tests. 
 6 | * Removed unnecessary internal function `check.number()`.
 7 | 
 8 | # datplot 1.1.0
 9 | * Using either the original calculation (weights) or calculation of 
10 | year-wise probability is now an option in `datsteps()` with the 
11 | argument `calc = "weight"` or `calc = "probability"`
12 | * There is now an option to calculate the cumulative probability in 
13 | `datsteps()` with the argument `cumulative = TRUE`. This only works with 
14 | probability calculation instead of the original (weights) calculation.
15 | * Significantly improved the efficiency of `datsteps()`.
16 | * Change and improve error-handling of `scaleweight()`. 
17 | * Remove UTF-8 characters from data and other files to comply with CRAN. 
18 | * Update documentation and add a [pkgdown-site](https://lsteinmann.github.io/datplot/).
19 | 
20 | 
21 | # datplot 1.0.1
22 | 
23 | * Change calculation in `get.weights()` to `1 / (abs(DAT_min - DAT_max) + 1)` 
24 | to get real probability values for each year. This only has a real effect when 
25 | using a stepsize of 1, as it makes the weight-values 
26 | usable as "dating probability".
27 | * Clean up `calculate.outputrows()` and `scaleweight()` somewhat.
28 | 
29 | # datplot 1.0.0
30 | 
31 | * Added a `NEWS.md` file to track changes to the package
32 | * some style corrections
33 | * First release for submission to CRAN, accepted -> datplot is now on CRAN
34 | 
35 | ---
36 | 
37 | # datplot 0.2.4
38 | 
39 | * peer-review version for Advances in Archaeological Practice
40 | 


--------------------------------------------------------------------------------
/tests/testthat/test-scaleweight.R:
--------------------------------------------------------------------------------
 1 | data("DAT_df")
 2 | DAT_df_steps <- datsteps(DAT_df[4:5, ], stepsize = 25, verbose = FALSE)
 3 | 
 4 | test_that("works with defaults for df generated by datsteps", {
 5 |   expect_true(inherits(scaleweight(DAT_df_steps), "data.frame"))
 6 | })
 7 | 
 8 | test_that("works with column name", {
 9 |   expect_equal(scaleweight(DAT_df_steps, var = "variable"),
10 |                scaleweight(DAT_df_steps, var = 2))
11 |   expect_equal(scaleweight(DAT_df_steps, val = "weight"),
12 |                scaleweight(DAT_df_steps, val = 5))
13 | })
14 | 
15 | test_that("fails on non-existent column name", {
16 |   expect_error(scaleweight(DAT_df_steps, val = "börek"), "column")
17 |   expect_error(scaleweight(DAT_df_steps, var = "börek"), "column")
18 | })
19 | 
20 | test_that("fails on non-existent column index", {
21 |   expect_error(scaleweight(DAT_df_steps, val = 12), "column")
22 |   expect_error(scaleweight(DAT_df_steps, var = 12), "column")
23 | })
24 | 
25 | test_that("fails if value-column is not numeric", {
26 |   expect_error(scaleweight(DAT_df_steps, val = 2), "numeric")
27 | })
28 | 
29 | test_that("attribute description is altered correctly", {
30 |   test <- scaleweight(DAT_df_steps, var = "all", val = "weight")
31 |   expect_match(as.character(attributes(test$weight)),
32 |                "(scaled to sum of all objects)")
33 |   test <- scaleweight(DAT_df_steps, var = "variable", val = "weight")
34 |   expect_match(as.character(attributes(test$weight)),
35 |                "grouped by column")
36 | })
37 | 
38 | 
39 | test_that("attribute description is altered correctly", {
40 |   test <- scaleweight(DAT_df_steps, var = "all", val = "weight")
41 |   expect_equal(sum(test$weight), 1)
42 |   test <- scaleweight(DAT_df_steps, var = 2, val = "weight")
43 |   expect_equal(sum(test$weight), length(unique(test$variable)))
44 | })
45 | 


--------------------------------------------------------------------------------
/tests/testthat/test-create-sub-objects.R:
--------------------------------------------------------------------------------
 1 | test <- c(1, 50, 70, 0, NA)
 2 | names(test) <- c("index", "datmin", "datmax", "calc", "step")
 3 | test <- list(test)
 4 | 
 5 | test_that("returns a matrix", {
 6 |   expect_true(inherits(create.sub.objects(test, stepsize = 10), "matrix"))
 7 | })
 8 | 
 9 | test_that("creates appropriate amount of sub-objects", {
10 |   expect_equal(nrow(create.sub.objects(test, stepsize = 10)), 3)
11 | })
12 | 
13 | test_that("warning (or not) when timespan is larger than stepsize", {
14 |   expect_warning(create.sub.objects(test, stepsize = 50), "larger")
15 |   expect_failure(expect_warning(create.sub.objects(test, stepsize = 2)))
16 | })
17 | 
18 | 
19 | test_that("attaches correct attribute", {
20 |   check <- create.sub.objects(test, stepsize = 10, calc = "weight")
21 |   expect_match(attributes(check)$calc, "weight")
22 |   check <- create.sub.objects(test, stepsize = 10, calc = "probability")
23 |   expect_match(attributes(check)$calc, "probability")
24 | })
25 | 
26 | 
27 | test_that("adds cumulative probability", {
28 |   test <- c(1, 50, 51, 0.5, NA)
29 |   names(test) <- c("index", "datmin", "datmax", "probability", "step")
30 |   test <- list(test)
31 |   check <- create.sub.objects(test, stepsize = 1,
32 |                               calc = "probability",
33 |                               cumulative = TRUE)
34 |   expect_true("cumul_prob" %in% colnames(check))
35 | })
36 | 
37 | test_that("cumulative probability adds correctly", {
38 |   test <- c(1, 50, 61, 0.1, NA)
39 |   names(test) <- c("index", "datmin", "datmax", "probability", "step")
40 |   test <- list(test)
41 |   check <- create.sub.objects(test, stepsize = 1,
42 |                               calc = "probability",
43 |                               cumulative = TRUE)
44 |   expect_equal(check[1,"cumul_prob"] * nrow(check),
45 |                check[nrow(check),"cumul_prob"])
46 | })
47 | 


--------------------------------------------------------------------------------
/man/Inscr_Bithynia.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{Inscr_Bithynia}
 5 | \alias{Inscr_Bithynia}
 6 | \title{Inscr_Bithynia}
 7 | \format{
 8 | A data frame with 2878 rows and 9 variables:
 9 | \describe{
10 |   \item{\code{ID}}{character COLUMN_DESCRIPTION}
11 |   \item{\code{ikey}}{character ID at \url{https://inscriptions.packhum.org/}
12 |   / \url{https://edh-www.adw.uni-heidelberg.de/home}, if available}
13 |   \item{\code{Location}}{factor Findspot of the Inscription (City)}
14 |   \item{\code{Source}}{character Corpus/Citation of the Inscription}
15 |   \item{\code{Dating}}{character Original Chronological Assessment,
16 |   may contain inconsistencies}
17 |   \item{\code{Language}}{factor Language of the Inscription,
18 |   can either be Latin, Greek, or both}
19 |   \item{\code{uncertain_dating}}{logical TRUE if Dating is not certain,
20 |   FALSE if dating is certain}
21 |   \item{\code{DAT_min}}{integer lower border of the dating timespan,
22 |   negative values for BCE, positive values for CE}
23 |   \item{\code{DAT_max}}{integer upper border of the dating timespan,
24 |   negative values for BCE, positive values for CE}
25 |   \item{\code{URL}}{Link to the inscription (if available) at
26 |   \url{https://inscriptions.packhum.org/} or
27 |   \url{https://edh-www.adw.uni-heidelberg.de/home}}
28 | }
29 | }
30 | \source{
31 | Weissova, Barbora. 2019. “Regional Economy, Settlement Patterns and
32 | the Road System in Bithynia (4th Century BC - 6th Century AD). Spatial and
33 | Quantitative Analysis.” Dissertation, Berlin: Freie Universität Berlin.
34 | \url{https://refubium.fu-berlin.de/handle/fub188/23730},
35 | partially after \url{https://inscriptions.packhum.org/}
36 | }
37 | \usage{
38 | Inscr_Bithynia
39 | }
40 | \description{
41 | The data set was gathered by Barbora Weissova and published
42 | as part of her dissertation “Regional Economy, Settlement Patterns and the
43 | Road System in Bithynia (4th Century BC - 6th Century AD). Spatial and
44 | Quantitative Analysis.”.
45 | }
46 | \keyword{datasets}
47 | 


--------------------------------------------------------------------------------
/R/get_histogramscale.R:
--------------------------------------------------------------------------------
 1 | #' @title Scaling Factor for Combined Histogram Plots
 2 | #'
 3 | #' @description Requires a data.frame as produced by [datsteps()] or a number as
 4 | #' DAT_df_steps. Calculates the value with which the y-axis of a density graph
 5 | #' should be multiplied by in order to be visible in the corresponding histogram.
 6 | #'
 7 | #' @param DAT_df_steps a data.frame as returned by [datsteps()]. (Will also
 8 | #' work with a single number and a vector.)
 9 | #' @param binwidth the bandwidth to use for the density function and histogram.
10 | #' Should equal the stepsize used to create the data.frame. If a data.frame as
11 | #' returned by [datsteps()] is given, stepsize can be automatically assigned
12 | #' using the corresponding attribute (`binwidth = "stepsize"`)
13 | #'
14 | #' @return the value with which to scale the density curve to a histogram
15 | #' plot so that both will be visible
16 | #'
17 | #' @export get.histogramscale
18 | #'
19 | #' @examples
20 | #' data("Inscr_Bithynia")
21 | #' DAT_df <- Inscr_Bithynia[, c("ID", "Location", "DAT_min", "DAT_max")]
22 | #' DAT_df_steps <- datsteps(DAT_df, stepsize = 25)
23 | #' get.histogramscale(DAT_df_steps)
24 | #'
25 | #' get.histogramscale(DAT_df_steps$DAT_step, binwidth = 20)
26 | #' get.histogramscale(500, binwidth = 20)
27 | get.histogramscale <- function(DAT_df_steps, binwidth = "stepsize") {
28 |   msg_sts <- paste("'binwidth = 'stepsize'' can only be used when",
29 |                    "a data.frame as returned by `datsteps()` is supplied.",
30 |                    "Otherwise, binwidth needs to be numeric.")
31 |   if (is.numeric(DAT_df_steps) & length(DAT_df_steps) == 1) {
32 |     nrow <- DAT_df_steps
33 |     if (binwidth == "stepsize") {
34 |       stop(msg_sts)
35 |     }
36 |   } else {
37 |     if (inherits(DAT_df_steps, "data.frame")) {
38 |       nrow <- nrow(DAT_df_steps)
39 |     }
40 |     if (is.atomic(DAT_df_steps)) {
41 |       nrow <- length(DAT_df_steps)
42 |     }
43 |     if (binwidth == "stepsize") {
44 |       binwidth <- attributes(DAT_df_steps)$stepsize
45 |       if (is.null(binwidth)) {
46 |         stop(msg_sts)
47 |       }
48 |     } else if (!is.numeric(binwidth)) {
49 |       stop("Argument 'binwidth' has to be either 'stepsize' or numeric.")
50 |     }
51 |   }
52 |   histogramscale <- nrow * binwidth
53 |   return(histogramscale)
54 | }
55 | 


--------------------------------------------------------------------------------
/R/scaleweight.R:
--------------------------------------------------------------------------------
 1 | #' @title Scales the content of a column
 2 | #'
 3 | #' @description Requires a data.frame with one variable and one value column.
 4 | #'
 5 | #' @param DAT_df a data.frame
 6 | #' @param var index or name of the column that should be used
 7 | #' as the group variable, OR "all"
 8 | #' @param val index or name of the column that should be
 9 | #' scaled (has to be numeric)
10 | #'
11 | #' @return the same data.frame, with the scaled values in the specified column
12 | #'
13 | #' @export scaleweight
14 | #'
15 | #' @examples
16 | #' data("Inscr_Bithynia")
17 | #' DAT_df <- Inscr_Bithynia[, c("ID", "Location", "DAT_min", "DAT_max")]
18 | #' DAT_df_steps <- datsteps(DAT_df, stepsize = 25)
19 | #' DAT_df_scaled <- scaleweight(DAT_df_steps, var = 2, val = 5)
20 | 
21 | scaleweight <- function(DAT_df, var = "all", val = 5) {
22 | 
23 |   if (!is.numeric(val)) {
24 |     val <- which(colnames(DAT_df) == val)
25 |     if (length(val) == 0) {
26 |       stop(paste("'val' needs to be a number",
27 |                  "(the index of the column that should be scaled)"))
28 |     }
29 |   }
30 |   if (val > ncol(DAT_df)) {
31 |     stop(paste("No column at index", val))
32 |   }
33 |   if (!is.numeric(DAT_df[, val])) {
34 |     stop(paste("Column", val, "is not numeric."))
35 |   }
36 | 
37 |   if (is.character(var) && var != "all") {
38 |     var <- which(colnames(DAT_df) == var)
39 |     if (length(var) == 0) {
40 |       stop(paste("var needs to be either 'all' or the index of the",
41 |                  "column containing the variable",
42 |                  "that is to be used for scaling"))
43 |     }
44 |   } else if (is.numeric(var) && var > ncol(DAT_df)) {
45 |     stop(paste("No column at index", var))
46 |   }
47 | 
48 | 
49 |   if (var == "all") {
50 |     DAT_df[, val] <- DAT_df[, val] / sum(DAT_df[, val])
51 |     new_desc <- paste(attr(DAT_df[, val], "descr"),
52 |                       "(scaled to sum of all objects)")
53 |   } else {
54 |     uvar <- unique(DAT_df[, var])
55 |     for (row in seq_len(length(uvar))) {
56 |       index <- which(DAT_df[, var] == uvar[row])
57 |       DAT_df[index, val] <-  DAT_df[index, val] / sum(DAT_df[index, val])
58 |     }
59 |     new_desc <- paste0(attr(DAT_df[, val], "descr"),
60 |                        " (scaled to sum of objects grouped by column '",
61 |                        colnames(DAT_df)[var], "')")
62 |   }
63 |   attr(DAT_df[, val], "descr") <- new_desc
64 | 
65 |   return(DAT_df)
66 | }
67 | 


--------------------------------------------------------------------------------
/man/datsteps.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datsteps.R
 3 | \name{datsteps}
 4 | \alias{datsteps}
 5 | \title{Create 'steps' of dates for each object in a data.frame}
 6 | \usage{
 7 | datsteps(
 8 |   DAT_df,
 9 |   stepsize = 1,
10 |   calc = "weight",
11 |   cumulative = FALSE,
12 |   verbose = TRUE
13 | )
14 | }
15 | \arguments{
16 | \item{DAT_df}{a data.frame with 4 variables:
17 |   * `ID` : An identifier for each row, e.g. an Inventory number (ideally character).
18 |   * `group` : A grouping variable, such as type or context (ideally factor).
19 |   * `DAT_min` : minimum dating (int/num), the minimum dating boundary for a
20 |   single object, i.e. the earliest year the object may be dated to.
21 |   * `DAT_min` : maximum dating (int/num), the maximum dating boundary for a
22 |   single object, i.e. the latest year the object may be dated to.
23 | The columns _must_ be in this order, column names are irrelevant; each row
24 | _must_ correspond to one datable entity / object.}
25 | 
26 | \item{stepsize}{numeric, default is 1. Number of years that should be used
27 | as an interval for creating dating steps.}
28 | 
29 | \item{calc}{method of calculation to use;
30 | can be either one of "weight" (default) or "probability":
31 |  * "weight": use the
32 |     [published original calculation](https://doi.org/10.1017/aap.2021.8)
33 |     for weights,
34 |  * "probability": calculate year-wise probability instead (only reasonable
35 |     when `stepsize = 1`)}
36 | 
37 | \item{cumulative}{FALSE (default), TRUE: add a column containing the
38 | cumulative probability for each object (only reasonable when `stepsize = 1`,
39 | and will automatically use probability calculation)}
40 | 
41 | \item{verbose}{TRUE / FALSE: Should the function issue additional
42 | messages pointing to possible inconsistencies and notify of methods?}
43 | }
44 | \value{
45 | an expanded data.frame in with each row represents a dating 'step'.
46 | Added columns contain the value of each step, the 'weight' or 'probability'-
47 | value for each step, and (if chosen) the cumulative probability.
48 | }
49 | \description{
50 | This function transforms a data.frame of dated objects with associated data
51 | to a new data.frame which contains a row for each dating 'step' for each
52 | objects. Dating 'steps' can be single years (with `stepsize = 1`) or an
53 | arbitrary number that will be used as a guideline for the interval.
54 | It's expected that dates BCE are displayed as negative
55 | values while dates CE are positive values. Ignoring this will cause
56 | problems. If dates are provided in the wrong order in any number of
57 | rows they will automatically be switched.
58 | 
59 | The function along with a guide on how to use it and a case study is published
60 | in [Steinmann -- Weissova 2021](https://doi.org/10.1017/aap.2021.8).
61 | }
62 | \examples{
63 | data("Inscr_Bithynia")
64 | DAT_df <- Inscr_Bithynia[, c("ID", "Location", "DAT_min", "DAT_max")]
65 | DAT_df_steps <- datsteps(DAT_df, stepsize = 25)
66 | plot(density(DAT_df_steps$DAT_step))
67 | }
68 | 


--------------------------------------------------------------------------------
/tests/testthat/test-datsteps.R:
--------------------------------------------------------------------------------
 1 | data("DAT_df")
 2 | DAT_df$ID <- as.character(DAT_df$ID)
 3 | DAT_df$var <- as.factor(DAT_df$var)
 4 | 
 5 | test_that("uses probability calculation when cumulative = TRUE", {
 6 |   expect_warning(datsteps(DAT_df[4:8, ],
 7 |                           stepsize = 1,
 8 |                           calc = "weight",
 9 |                           cumulative = TRUE,
10 |                           verbose = FALSE),
11 |                  "cumulative")
12 | })
13 | 
14 | test_that("colnames are as expected", {
15 |   test <- datsteps(DAT_df[4:8, ], stepsize = 1,
16 |                    calc = "probability", cumulative = TRUE,
17 |                    verbose = FALSE)
18 |   expect_equal(colnames(test), c("ID", "variable", "DAT_min",
19 |                                  "DAT_max", "probability",
20 |                                  "DAT_step", "cumul_prob"))
21 | })
22 | 
23 | test_that("warns for unreasonable stepsize when using probability", {
24 |   expect_warning(datsteps(DAT_df[4:8, ],
25 |                           stepsize = 20,
26 |                           calc = "probability",
27 |                           verbose = FALSE),
28 |                  "meaningful")
29 | })
30 | 
31 | 
32 | test_that("calculation argument is guessed correctly from partial word", {
33 |   expect_message(datsteps(DAT_df[4:5, ],
34 |                           stepsize = 1,
35 |                           calc = "pro",
36 |                           verbose = TRUE),
37 |                  "probability")
38 |   expect_message(datsteps(DAT_df[4:5, ],
39 |                           stepsize = 1,
40 |                           calc = "we",
41 |                           verbose = TRUE),
42 |                  "weight")
43 | })
44 | 
45 | test_that("error for non-expected calc-argument", {
46 |   expect_error(datsteps(DAT_df[4:5, ], stepsize = 1,
47 |                         calc = "börek"),
48 |                  "probability")
49 | })
50 | 
51 | 
52 | test_that("error for non-expected calc-argument", {
53 |   DAT_df[12, 3] <- NA
54 |   expect_warning(datsteps(DAT_df[10:12, ], stepsize = 1, verbose = FALSE),
55 |                  "NA-values")
56 |   test <- suppressWarnings(datsteps(DAT_df[10:12, ], verbose = FALSE)$ID)
57 |   expect_false("12" %in% test)
58 | })
59 | 
60 | 
61 | 
62 | test_that("stepsize = auto can be used", {
63 |   expect_message(datsteps(DAT_df[3:4, ], stepsize = "auto", verbose = TRUE), "auto")
64 |   test <- datsteps(DAT_df[3:4, ], stepsize = "auto", verbose = FALSE)
65 |   expect_equal(attributes(test)$stepsize, 1)
66 | })
67 | 
68 | 
69 | test_that("stepsize attribute is attached", {
70 |   stepsize <- 2
71 |   test <- suppressWarnings(datsteps(DAT_df[3:4, ],
72 |                                     stepsize = stepsize,
73 |                                     verbose = FALSE))
74 |   expect_equal(attributes(test)$stepsize, stepsize)
75 | })
76 | 
77 | 
78 | test_that("stepsize attribute is attached", {
79 |   stepsize <- "no"
80 |   expect_error(datsteps(DAT_df[3:4, ], stepsize = stepsize, verbose = FALSE),
81 |                "numeric")
82 | })
83 | 
84 | 
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/inst/extdata/num_dating_edit.csv:
--------------------------------------------------------------------------------
  1 | ,Dating,DAT_min,DAT_max
  2 | 1,end of the 2nd c. AD,180,199
  3 | 2,end of 1st c. BC - beg. of 1st c. AD,-20,20
  4 | 3,end 2nd c. BC,-120,-100
  5 | 4,early 1st c. AD,1,20
  6 | 7,after 212 AD,212,222
  7 | 8,AD 531,531,531
  8 | 10,5th/6th century AD,400,599
  9 | 14,3rd/4th century AD,200,399
 10 | 19,2nd - 3rd c. AD,100,299
 11 | 20,2nd - 1st c. BC,-199,-1
 12 | 27,1st - beg. of the 2nd c. AD,1,120
 13 | 28,1st - 2nd c. AD,1,199
 14 | 35,2nd century AD,100,199
 15 | 38,late 3rd - early 2nd c. BC,-220,-180
 16 | 43,5th - 6th c. AD,400,599
 17 | 50,3rd quarter of the 6th c. BC,-550,-525
 18 | 51,3rd - 2nd c. BC,-299,-100
 19 | 52,340/339 BC,-340,-339
 20 | 54,27 BC-14 AD,-27,14
 21 | 56,1st century AD,1,99
 22 | 57,1st c. BC - 1st c. AD,-99,99
 23 | 60,middle 2nd c. AD,140,160
 24 | 61,end of the 3rd c. AD,280,299
 25 | 62,end of the 2nd century AD,180,199
 26 | 68,6th - 8th century AD,500,799
 27 | 69,3rd century AD,200,299
 28 | 70,3rd - 4th c. AD,200,399
 29 | 71,2nd/early 3rd century AD,100,220
 30 | 72,2nd/3rd century AD,100,299
 31 | 73,2nd - 3rd century AD,100,299
 32 | 80,145 or 204 AD,145,204
 33 | 84,end of the 2nd - end of the 4th century AD,180,380
 34 | 87,2nd century BC,-199,-100
 35 | 88,2nd century AD ,100,199
 36 | 89,1st/2nd century AD,1,199
 37 | 92,mid. 2nd c. AD,140,160
 38 | 93,mid. 1st c. AD,40,60
 39 | 94,late 2nd c. AD,180,199
 40 | 95,end of the 1st c. AD,80,99
 41 | 96,AD 123,123,123
 42 | 100,1st - 3rd century AD,1,299
 43 | 102,2nd/3rd century AD or 4th - 5th century AD ,100,499
 44 | 104,mid. 3rd c. AD,240,260
 45 | 105,late 3rd century AD,280,299
 46 | 106,late 2nd century AD,180,199
 47 | 107,end of the 2nd/beg. of the 3rd century AD,180,220
 48 | 108,after 98 AD,98,108
 49 | 111,AD 209,209,209
 50 | 113,6th century AD,500,599
 51 | 114,5th - 6th century AD,400,599
 52 | 115,4th century AD,300,399
 53 | 117,3rd century AD ,200,299
 54 | 118,3rd - 4th century AD ,200,399
 55 | 119,3rd - 4th century AD,200,399
 56 | 122,2nd - 3rd  c. AD,100,299
 57 | 146,Late 7th century AD,680,699
 58 | 147,beg. of the 3rd c. AD,200,220
 59 | 148,beg. of the 2nd c. AD,100,120
 60 | 149,after ca. 70-79 AD,70,79
 61 | 150,after 128 AD,128,138
 62 | 152,AD 243/244,243,244
 63 | 154,97/98-98/99 AD,97,99
 64 | 158,5th/4th century BC,-499,-300
 65 | 160,4th - 5th c. AD,300,499
 66 | 168,2nd/3rd century AD ,100,299
 67 | 169,2nd half of the 2nd c. AD,150,199
 68 | 170,2nd - 3rd C. AD,100,299
 69 | 183,1st century BC,-99,-1
 70 | 200,102-114,102,114
 71 | 201,early 3rd century AD,200,220
 72 | 203,after 138 AD,138,148
 73 | 207,3rd/4th century AD (or later),200,450
 74 | 208,3rd or 4th century AD,200,399
 75 | 209,3rd c. AD or before,180,299
 76 | 211,2nd - beg. of the 3rd c. AD,100,220
 77 | 222,2 BC or later,-2,20
 78 | 223,1st half of the 2nd c. AD,100,150
 79 | 232,128/129-130/131 &   184/185 AD,128,185
 80 | 234,122/123-126/127 AD,122,127
 81 | 237,early 3rd c. AD,200,220
 82 | 238,ca. 2nd century AD,100,199
 83 | 239,before 212 AD,202,212
 84 | 240,before 211 AD,201,211
 85 | 241,after 202 AD,202,212
 86 | 259,mid - end of 3rd c. AD,240,299
 87 | 263,281 or 190 BC,-281,-190
 88 | 270,AD 288/289,288,289
 89 | 271,mid - end of 2nd c. AD,140,199
 90 | 272,late 2nd century AD ,180,199
 91 | 273,end of the 2nd - 3rd c. AD,180,299
 92 | 274,beg. of the 2nd c. BC,-200,-180
 93 | 275,"after 171 BC, ca. 188 BC",-188,-161
 94 | 279,7th century AD,600,699
 95 | 281,5th - 6th C AD,400,599
 96 | 283,4th - 6th c. AD,300,599
 97 | 284,3rd c. AD (after 211 AD),211,299
 98 | 285,1st - early 2nd c. AD,1,120
 99 | 288,123-131/132 AD,123,132
100 | 289,AD 140,140,140
101 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
 1 | #' datplot Testing data
 2 | #'
 3 | #' A test dataset containing a data.frame how it should ideally be arranged
 4 | #' to work with datplot. Data are not real and illustrate some common problems
 5 | #' such as lower and upper dating in the wrong columns.
 6 | #'
 7 | #' \itemize{
 8 | #'   \item ID. Identifier of the Objects (has to be unique)
 9 | #'   \item var. Grouping variable, such as a Type or a Findspot
10 | #'   \item DAT_min. Integer: lower range of the dating, BCE in negative numbers
11 | #'   \item DAT_max. Integer: upper range of the dating, BCE in negative numbers
12 | #' }
13 | #'
14 | #' @docType data
15 | #' @keywords datasets
16 | #' @name DAT_df
17 | #' @usage data(DAT_df)
18 | #' @format A data frame with 5000 rows and 4 variables
19 | "DAT_df"
20 | NULL
21 | 
22 | 
23 | #' Beazley (sample of 1000)
24 | #'
25 | #' A test dataset containing a data.frame how it should ideally be arranged
26 | #' to work with datplot. Data are gathered from the Beazley Archive Pottery
27 | #' Database (BAPD) -- https://www.carc.ox.ac.uk/carc/pottery and
28 | #' transformed to work with datplot.
29 | #'
30 | #' \itemize{
31 | #'   \item Identifier (Vase.Number in BAPD)
32 | #'   \item Technique: Sample contains only red- or blackfigured objects
33 | #'   \item DAT_min. Integer: lower range of the dating, BCE in negative numbers
34 | #'   \item DAT_max. Integer: upper range of the dating, BCE in negative numbers
35 | #' }
36 | #'
37 | #' @docType data
38 | #' @keywords datasets
39 | #' @name Beazley
40 | #' @usage data(Beazley)
41 | #' @format A data frame with 1000 rows and 4 variables
42 | #' @source https://www.carc.ox.ac.uk/carc/pottery
43 | "Beazley"
44 | NULL
45 | 
46 | 
47 | #' @title Inscr_Bithynia
48 | #' @description The data set was gathered by Barbora Weissova and published
49 | #' as part of her dissertation “Regional Economy, Settlement Patterns and the
50 | #' Road System in Bithynia (4th Century BC - 6th Century AD). Spatial and
51 | #' Quantitative Analysis.”.
52 | #'
53 | #'
54 | #' @format A data frame with 2878 rows and 9 variables:
55 | #' \describe{
56 | #'   \item{\code{ID}}{character COLUMN_DESCRIPTION}
57 | #'   \item{\code{ikey}}{character ID at \url{https://inscriptions.packhum.org/}
58 | #'   / \url{https://edh-www.adw.uni-heidelberg.de/home}, if available}
59 | #'   \item{\code{Location}}{factor Findspot of the Inscription (City)}
60 | #'   \item{\code{Source}}{character Corpus/Citation of the Inscription}
61 | #'   \item{\code{Dating}}{character Original Chronological Assessment,
62 | #'   may contain inconsistencies}
63 | #'   \item{\code{Language}}{factor Language of the Inscription,
64 | #'   can either be Latin, Greek, or both}
65 | #'   \item{\code{uncertain_dating}}{logical TRUE if Dating is not certain,
66 | #'   FALSE if dating is certain}
67 | #'   \item{\code{DAT_min}}{integer lower border of the dating timespan,
68 | #'   negative values for BCE, positive values for CE}
69 | #'   \item{\code{DAT_max}}{integer upper border of the dating timespan,
70 | #'   negative values for BCE, positive values for CE}
71 | #'   \item{\code{URL}}{Link to the inscription (if available) at
72 | #'   \url{https://inscriptions.packhum.org/} or
73 | #'   \url{https://edh-www.adw.uni-heidelberg.de/home}}
74 | #'}
75 | #' @source Weissova, Barbora. 2019. “Regional Economy, Settlement Patterns and
76 | #' the Road System in Bithynia (4th Century BC - 6th Century AD). Spatial and
77 | #' Quantitative Analysis.” Dissertation, Berlin: Freie Universität Berlin.
78 | #' \url{https://refubium.fu-berlin.de/handle/fub188/23730},
79 | #' partially after \url{https://inscriptions.packhum.org/}
80 | "Inscr_Bithynia"
81 | NULL
82 | 
83 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <!-- badges: start -->
 2 | [![R-CMD-check](https://github.com/lsteinmann/datplot/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/lsteinmann/datplot/actions/workflows/R-CMD-check.yaml)
 3 | [![codecov](https://codecov.io/gh/lsteinmann/datplot/branch/main/graph/badge.svg)](https://app.codecov.io/gh/lsteinmann/datplot)
 4 | [![DOI](https://img.shields.io/badge/Publication-10.1017/aap.2021.8-green.svg)](https://doi.org/10.1017/aap.2021.8)
 5 | [![CRAN status](https://www.r-pkg.org/badges/version/datplot)](https://CRAN.R-project.org/package=datplot)
 6 | 
 7 | <!-- badges: end -->
 8 | 
 9 | [datplot](https://lsteinmann.github.io/datplot/)
10 | =======
11 | 
12 | Converting date ranges into dating 'steps' eases the visualization of changes in e.g. pottery consumption, style and other variables over time. This package provides tools to process and prepare data for visualization.
13 | 
14 | A rather common problem in archaeology is the fuzziness of dates assigned to objects. If one wants to visualize overall changes in - let's say - pottery consumption, bar charts often fall short in that regard. If, e.g., the Phases a -- f are employed, then some of the objects can usually be dated to a, c, and f, as an example, but others will by classified as "a to c" or "b to c". But how can these data still be used for examining changes in a large set of objects without completely disregarding the information added by providing multiple phases for one object?
15 | 
16 | This package proposes implements the concepts of aoristic analysis to prepare archaeological data for the visualization using density plots. An example is shown in the vignettes, which can be found at
17 | 
18 |     browseVignettes("datplot")
19 | 
20 | or [on the pkgdown-site](https://lsteinmann.github.io/datplot/articles/how-to.html) after installing the package, or on GitHub in the /vignettes/ directory. Density plots are easy to understand and are usually aesthetically pleasing. They do omit a some information, such as individual counts, that bar histograms can communicate better. On the other hand, ranges can be incorporated into the visualization as well to regard the variety of timespans archaeological objects may be dated to.
21 | 
22 | ![Attic Pottery from BAPD by Date](man/figures/demo_readme.png "Attic Pottery from BAPD by Date")
23 | 
24 | Publication
25 | -------
26 | The package at version 1.0.0 has been published along with a case study on inscriptions from Bithynia: [Steinmann, L., & Weissova, B. (2021). Datplot: A New R Package for the Visualization of Date Ranges in Archaeology. Advances in Archaeological Practice, 1-11. doi:10.1017/aap.2021.8](https://doi.org/10.1017/aap.2021.8). Data used in the case study is included in the package.
27 | 
28 | Recommendation
29 | -------
30 | People interested in employing this method should also consider taking a look at [ISAAKiel's package aoristAAR](https://github.com/ISAAKiel/aoristAAR/), or at [archSeries](https://github.com/davidcorton/archSeries), [tabula](https://github.com/tesselle/tabula), [rtefact](https://github.com/ahb108/rtfact), [aoristic-analysis (LimesLimits)](https://github.com/LimesLimits/aoristic-analysis) and (in the future) [baorista](https://github.com/ercrema/baorista). 
31 | 
32 | 
33 | Installation 
34 | -------
35 | 'datplot' can be installed from GitHub with devtools:
36 | 
37 |     devtools::install_github("lsteinmann/datplot")
38 | 
39 | Or via downloading the latest release and installing from the file: 
40 | 
41 |     devtools::install_local(path = "../datplot_1.x.x.tar.gz")
42 |     
43 | In case you are unable to find the vignettes after installing from github directly, try: 
44 | 
45 |     devtools::install_github("lsteinmann/datplot", build_vignettes = TRUE)
46 | 
47 | But you may have to install vignette dependencies manually (see suggests in the DESCRIPTION). Anyone who has the tidyverse installed should encounter no issues.
48 | 
49 | Contact
50 | -------
51 | 
52 | Please feel free to use and change the code to your liking. We would be happy for any feedback on the package, or if you you notify us of your publications using this package!
53 | 
54 | 


--------------------------------------------------------------------------------
/tests/testthat/test-get-step-sequence.R:
--------------------------------------------------------------------------------
  1 | test_that("failure for non-numeric values", {
  2 |   expect_error(get.step.sequence(datmin = "min",
  3 |                                  datmax = 2,
  4 |                                  stepsize = 1),
  5 |                "numeric")
  6 |   expect_error(get.step.sequence(datmin = 1,
  7 |                                  datmax = "max",
  8 |                                  stepsize = 1),
  9 |                "numeric")
 10 |   expect_error(get.step.sequence(datmin = 1,
 11 |                                  datmax = 2,
 12 |                                  stepsize = "size"),
 13 |                "numeric")
 14 | })
 15 | 
 16 | 
 17 | test_that("first and last values in sequence are correct", {
 18 |   min <- -100
 19 |   max <- -50
 20 |   test <- get.step.sequence(datmin = min,
 21 |                             datmax = max,
 22 |                             stepsize = 25)
 23 |   expect_equal(test[1], min)
 24 |   expect_equal(test[length(test)], max)
 25 | })
 26 | 
 27 | test_that("stepsize 1 builds simple sequence", {
 28 |   min <- 1
 29 |   max <- 11
 30 |   stepsize <- 1
 31 |   seq <- get.step.sequence(datmin = min,
 32 |                            datmax = max,
 33 |                            stepsize = stepsize)
 34 |   expect_equal(length(seq), length(seq(min, max, stepsize)))
 35 | })
 36 | 
 37 | test_that("timespan not divisible by stepsize,
 38 |           timespan exceeds 60% of stepsize,
 39 |           schould return min, max and mean", {
 40 |   min <- 1
 41 |   max <- 21
 42 |   stepsize <- 30
 43 |   seq <- get.step.sequence(datmin = min,
 44 |                            datmax = max,
 45 |                            stepsize = stepsize)
 46 |   expect_equal(length(seq), 3)
 47 |   expect_equal(seq[1], min)
 48 |   expect_equal(seq[2], mean(c(min, max)))
 49 |   expect_equal(seq[3], max)
 50 | })
 51 | 
 52 | 
 53 | test_that("timespan not divisible by stepsize,
 54 |           dated to only one year", {
 55 |             min <- 1
 56 |             max <- 1
 57 |             stepsize <- 30
 58 |             seq <- get.step.sequence(datmin = min,
 59 |                                      datmax = max,
 60 |                                      stepsize = stepsize)
 61 |             expect_equal(seq, min)
 62 |             expect_equal(seq, max)
 63 |             expect_equal(length(seq), 1)
 64 | })
 65 | 
 66 | test_that("timespan not divisible by stepsize,
 67 |           timespan less then 60% of stepsize,
 68 |           should return only min and max date", {
 69 |             min <- 1
 70 |             max <- 10
 71 |             stepsize <- 30
 72 |             seq <- get.step.sequence(datmin = min,
 73 |                                      datmax = max,
 74 |                                      stepsize = stepsize)
 75 |             expect_equal(seq[1], min)
 76 |             expect_equal(seq[2], max)
 77 |             expect_equal(length(seq), 2)
 78 | })
 79 | 
 80 | test_that("timespan divisible by stepsize,
 81 |           no residuals", {
 82 |             min <- 1
 83 |             max <- 11
 84 |             stepsize <- 2
 85 |             seq <- get.step.sequence(datmin = min,
 86 |                                      datmax = max,
 87 |                                      stepsize = stepsize)
 88 |             expect_equal(seq, seq(from = min, to = max, by = 2))
 89 | })
 90 | 
 91 | test_that("timespan divisible by stepsize,
 92 |           with residuals smaller than half the stepsize", {
 93 |             min <- 1
 94 |             max <- 17
 95 |             stepsize <- 3
 96 |             check <- seq(from = min, to = max, by = stepsize)
 97 |             seq <- get.step.sequence(datmin = min,
 98 |                                      datmax = max,
 99 |                                      stepsize = stepsize)
100 |             expect_equal(length(seq), length(check))
101 |             expect_failure(expect_equal(seq, check))
102 | })
103 | 
104 | 
105 | test_that("timespan divisible by stepsize,
106 |           with residuals larger than half the stepsize,
107 |           modifies stepsize", {
108 |             min <- 1
109 |             max <- 49
110 |             stepsize <- 25
111 |             seq <- get.step.sequence(datmin = min,
112 |                                      datmax = max,
113 |                                      stepsize = stepsize)
114 |             expect_lt(diff(seq)[1], stepsize)
115 | })
116 | 
117 | 


--------------------------------------------------------------------------------
/inst/literatur.bib:
--------------------------------------------------------------------------------
  1 | % Encoding: UTF-8
  2 | 
  3 | @Www{BAPD,
  4 |   title     = {Beazley Archive Pottery Database ({{BAPD}})},
  5 |   author     = {University of Oxford},
  6 |   urldate = {2018-04-27},
  7 |   url       = {https://www.carc.ox.ac.uk/carc/pottery},
  8 | }
  9 | 
 10 | @article{datplotarticle,
 11 |   title = {datplot: {{A}} new r-package for the visualization of date ranges in archaeology},
 12 |   author = {Weissova, Barbora and Steinmann, Lisa},
 13 |   date = {2021},
 14 |   journaltitle = {Advances in Archaeological Practice},
 15 |   volume = {9},
 16 |   number = {7},
 17 |   pages = {288-298},
 18 |   doi = {10.1017/aap.2021.8}
 19 | }
 20 | 
 21 | 
 22 | 
 23 | @thesis{weissova2019,
 24 |   title = {Regional Economy, Settlement Patterns and the Road System in {{Bithynia}} (4th century {{BC}} - 6th century {{AD}}). {{Spatial}} and {{Quantitative Analysis}}},
 25 |   author = {Weissova, Barbora},
 26 |   date = {2019},
 27 |   institution = {{Freie Universit\"at Berlin}},
 28 |   location = {{Berlin}},
 29 |   url = {https://refubium.fu-berlin.de/handle/fub188/23730},
 30 |   type = {Dissertation}
 31 | }
 32 | 
 33 | 
 34 | 
 35 | @article{weissova_HinterlandNikaiaNicaea_2019,
 36 |   title = {The {{Hinterland}} of {{Nikaia}}/{{Nicaea}}/{{Iznik}}: {{Analyzing}} the {{Hellenistic Roman}} and {{Late Antique Bithynian Landscape}} through {{Remote Sensing}} and {{GIS Techniques}}},
 37 |   shorttitle = {The {{Hinterland}} of {{Nikaia}}/{{Nicaea}}/{{Iznik}}},
 38 |   author = {Weissova, Barbora and Brigand, Robin and Polla, Silvia},
 39 |   date = {2019},
 40 |   journaltitle = {eTopoi Journal for Ancient Studies},
 41 |   volume = {8},
 42 |   pages = {21--49},
 43 |   doi = {10.17169/REFUBIUM-25618},
 44 | }
 45 | 
 46 | 
 47 | @article{ratcliffe_Aoristicanalysisspatial_2000,
 48 |   title = {Aoristic analysis: the spatial interpretation of unspecific temporal events},
 49 |   shorttitle = {Aoristic analysis},
 50 |   author = {Ratcliffe, Jerry H.},
 51 |   date = {2000-10},
 52 |   journaltitle = {International Journal of Geographical Information Science},
 53 |   volume = {14},
 54 |   pages = {669--679},
 55 |   doi = {10.1080/136588100424963},
 56 |   number = {7}
 57 | }
 58 | 
 59 | 
 60 | 
 61 | @article{orton_CatchUnitResearch_2017,
 62 |   title = {Catch Per Unit Research Effort: Sampling Intensity, Chronological Uncertainty, and the Onset of Marine Fish Consumption in Historic London},
 63 |   shorttitle = {Catch Per Unit Research Effort},
 64 |   author = {Orton, David and Morris, James and Pipe, Alan},
 65 |   date = {2017-01-20},
 66 |   journaltitle = {Open Quaternary},
 67 |   volume = {3},
 68 |   pages = {1},
 69 |   doi = {10.5334/oq.29}
 70 | }
 71 | 
 72 | @incollection{johnson_AoristicAnalysisSeeds_2004,
 73 |   title = {Aoristic Analysis: Seeds of a New Approach to Mapping Archaeological Distributions through Time},
 74 |   booktitle = {2003 - Enter the Past. The E-way into the four Dimensions of Cultural Heritage. CAA 2003. Computer Applications and Quantitative Methods in Archaeology},
 75 |   author = {Johnson, Ian},
 76 |   editor = {B\"orner, W. and Goriany, M.},
 77 |   date = {2004},
 78 |   publisher = {Archaeopress},
 79 |   location = {Oxford},
 80 |   url = {https://publikationen.uni-tuebingen.de/xmlui/handle/10900/60663}
 81 | }
 82 | 
 83 | 
 84 | 
 85 | @article{baxter_ReinventingwheelModelling_2016,
 86 |   title = {Reinventing the Wheel? Modelling temporal uncertainty with applications to brooch distributions in Roman Britain},
 87 |   shorttitle = {Reinventing the wheel?},
 88 |   author = {Baxter, Mike J. and Cool, H.E.M.},
 89 |   date = {2016-02},
 90 |   journaltitle = {Journal of Archaeological Science},
 91 |   volume = {66},
 92 |   pages = {120--127},
 93 |   doi = {10.1016/j.jas.2015.12.007}
 94 | }
 95 | 
 96 | @article{crema_probabilisticframeworkassessing_2010,
 97 |   title = {A probabilistic framework for assessing spatio-temporal point patterns in the archaeological record},
 98 |   author = {Crema, Enrico R. and Bevan, Andrew and Lake, Mark W.},
 99 |   date = {2010-05},
100 |   journaltitle = {Journal of Archaeological Science},
101 |   volume = {37},
102 |   pages = {1118--1130},
103 |   doi = {10.1016/j.jas.2009.12.012},
104 |   number = {5}
105 | }
106 | 
107 | 
108 | 
109 | 
110 | @book{shennan_Quantifyingarchaeology_1988,
111 |   title = {Quantifying Archaeology},
112 |   author = {Shennan, Stephen},
113 |   date = {1988},
114 |   publisher = {Edinburgh University Press},
115 |   location = {Edinburgh}
116 | }
117 | 
118 | 
119 | @article{baxter_histogramimprovedapproachessimple_1996,
120 |   title = {Beyond the Histogram -- Improved Approaches to Simple Data Display in Archaeology using Kernel Density Estimates},
121 |   author = {Baxter, Mike J. and Beardah, Christian C.},
122 |   date = {1996},
123 |   journaltitle = {Archeologia e calcolatori},
124 |   volume = {7},
125 |   pages = {397--408},
126 |   number = {1}
127 | }
128 | 
129 | 


--------------------------------------------------------------------------------
/R/datsteps.R:
--------------------------------------------------------------------------------
  1 | #' @title Create 'steps' of dates for each object in a data.frame
  2 | #'
  3 | #' @description
  4 | #' This function transforms a data.frame of dated objects with associated data
  5 | #' to a new data.frame which contains a row for each dating 'step' for each
  6 | #' objects. Dating 'steps' can be single years (with `stepsize = 1`) or an
  7 | #' arbitrary number that will be used as a guideline for the interval.
  8 | #' It's expected that dates BCE are displayed as negative
  9 | #' values while dates CE are positive values. Ignoring this will cause
 10 | #' problems. If dates are provided in the wrong order in any number of
 11 | #' rows they will automatically be switched.
 12 | #'
 13 | #' The function along with a guide on how to use it and a case study is published
 14 | #' in [Steinmann -- Weissova 2021](https://doi.org/10.1017/aap.2021.8).
 15 | #'
 16 | #'
 17 | #' @param DAT_df a data.frame with 4 variables:
 18 | #'   * `ID` : An identifier for each row, e.g. an Inventory number (ideally character).
 19 | #'   * `group` : A grouping variable, such as type or context (ideally factor).
 20 | #'   * `DAT_min` : minimum dating (int/num), the minimum dating boundary for a
 21 | #'   single object, i.e. the earliest year the object may be dated to.
 22 | #'   * `DAT_min` : maximum dating (int/num), the maximum dating boundary for a
 23 | #'   single object, i.e. the latest year the object may be dated to.
 24 | #' The columns _must_ be in this order, column names are irrelevant; each row
 25 | #' _must_ correspond to one datable entity / object.
 26 | #' @param stepsize numeric, default is 1. Number of years that should be used
 27 | #' as an interval for creating dating steps.
 28 | #' @param calc method of calculation to use;
 29 | #' can be either one of "weight" (default) or "probability":
 30 | #'  * "weight": use the
 31 | #'     [published original calculation](https://doi.org/10.1017/aap.2021.8)
 32 | #'     for weights,
 33 | #'  * "probability": calculate year-wise probability instead (only reasonable
 34 | #'     when `stepsize = 1`)
 35 | #' @param cumulative FALSE (default), TRUE: add a column containing the
 36 | #' cumulative probability for each object (only reasonable when `stepsize = 1`,
 37 | #' and will automatically use probability calculation)
 38 | #' @param verbose TRUE / FALSE: Should the function issue additional
 39 | #' messages pointing to possible inconsistencies and notify of methods?
 40 | #'
 41 | #' @return an expanded data.frame in with each row represents a dating 'step'.
 42 | #' Added columns contain the value of each step, the 'weight' or 'probability'-
 43 | #' value for each step, and (if chosen) the cumulative probability.
 44 | #'
 45 | #' @export datsteps
 46 | #'
 47 | #' @examples
 48 | #' data("Inscr_Bithynia")
 49 | #' DAT_df <- Inscr_Bithynia[, c("ID", "Location", "DAT_min", "DAT_max")]
 50 | #' DAT_df_steps <- datsteps(DAT_df, stepsize = 25)
 51 | #' plot(density(DAT_df_steps$DAT_step))
 52 | datsteps <- function(DAT_df,
 53 |                      stepsize = 1,
 54 |                      calc = "weight",
 55 |                      cumulative = FALSE,
 56 |                      verbose = TRUE) {
 57 | 
 58 |   calc <- ifelse(grepl("weight", calc),
 59 |                  "weight",
 60 |                  calc)
 61 |   calc <- ifelse(grepl("prob", calc),
 62 |                  "probability",
 63 |                  calc)
 64 | 
 65 |   # redundand
 66 |   if (cumulative && calc != "probability") {
 67 |     warning("Switching to probability calculation to provide cumulative probability.")
 68 |     calc <- "probability"
 69 |   }
 70 |   if (stepsize != 1 && calc == "probability") {
 71 |     warning("Probability calculation is only meaningful for stepsize = 1.")
 72 |   }
 73 | 
 74 |   calc <- match.arg(calc, c("weight", "probability"))
 75 | 
 76 |   if (verbose) {
 77 |     switch(calc,
 78 |          weight = message(paste("Using 'weight'-calculation",
 79 |                                 "(see https://doi.org/10.1017/aap.2021.8).")),
 80 |          probability = message("Using step-wise probability calculation."))
 81 |   }
 82 | 
 83 | 
 84 |   if (any(is.na(DAT_df))) {
 85 |     NA_rows <- c(which(is.na(DAT_df[, 3])),
 86 |                  which(is.na(DAT_df[, 4])))
 87 |     NA_rows <- unique(NA_rows)
 88 |     DAT_df <- DAT_df[-NA_rows, ]
 89 |     warning(paste0(length(NA_rows), " rows with NA-values in the ",
 90 |                    "dating columns will be omitted."))
 91 |   }
 92 | 
 93 |   DAT_df <- as.data.frame(DAT_df)
 94 |   # Checking the overall structure
 95 |   check.structure(DAT_df, verbose = verbose)
 96 | 
 97 |   colnames <- c("index", "datmin", "datmax", calc, "step")
 98 | 
 99 |   # check for Dating in wrong order and switch accordingly
100 |   DAT_df <- switch.dating(DAT_df)
101 | 
102 |   # Prepare the Matrix to be used instead of the df for faster processing
103 |   DAT_mat <- matrix(ncol = 5, nrow = nrow(DAT_df))
104 |   DAT_mat[, 1] <- seq_len(nrow(DAT_df))
105 |   DAT_mat[, 2] <- DAT_df[, 3]
106 |   DAT_mat[, 3] <- DAT_df[, 4]
107 | 
108 |   colnames(DAT_mat) <- colnames
109 | 
110 |   # If not already set, set stepsize
111 |   if (stepsize == "auto") {
112 |     stepsize <- generate.stepsize(DAT_mat, verbose = verbose)
113 |   } else if (!is.numeric(stepsize)) {
114 |     stop("stepsize has to be either 'auto' or numeric.")
115 |   }
116 | 
117 |   # calculate the weights or probabilities
118 |   if (calc == "weight") {
119 |     res <- get.weights(DAT_mat[, "datmin"],
120 |                        DAT_mat[, "datmax"],
121 |                        verbose = verbose)
122 |   } else if (calc == "probability") {
123 |     res <- get.probability(DAT_mat[, "datmin"],
124 |                            DAT_mat[, "datmax"])
125 |   }
126 |   DAT_mat[, calc] <- res
127 | 
128 |   DAT_list <- as.data.frame(DAT_mat)
129 |   rownames(DAT_list) <- DAT_list[,1]
130 | 
131 |   DAT_list <- unlist(apply(DAT_list, 1, list), recursive = FALSE)
132 | 
133 | 
134 |   # Process the dating to create the steps
135 |   DAT_res <- create.sub.objects(DAT_list,
136 |                                 stepsize,
137 |                                 calc,
138 |                                 cumulative)
139 | 
140 |   # convert to data.frame again and store the variable and ID in the correct
141 |   # order, using the matrix index as reference
142 |   result <- as.data.frame(DAT_res)
143 | 
144 |   # names and attributes
145 |   colnames <- c("ID", "variable", "DAT_min", "DAT_max",
146 |                 calc, "DAT_step")
147 |   if(cumulative) {
148 |     colnames <- c(colnames, "cumul_prob")
149 |   }
150 |   result <- as.data.frame(matrix(nrow = nrow(DAT_res), ncol = length(colnames)))
151 |   colnames(result) <- colnames
152 | 
153 |   result$ID <- DAT_df[DAT_res[, 1], 1]
154 |   result$variable <- DAT_df[DAT_res[, 1], 2]
155 |   result$DAT_min <- DAT_res[, "datmin"]
156 |   result$DAT_max <- DAT_res[, "datmax"]
157 |   result[, calc] <- DAT_res[, calc]
158 |   result$DAT_step <- DAT_res[, "step"]
159 |   if(cumulative) {
160 |     result$cumul_prob <- DAT_res[, "cumul_prob"]
161 |   }
162 | 
163 |   attr(result$DAT_step, "descr") <- "step"
164 |   switch(calc,
165 |          weight = attr(result$weight, "descr") <- "Calculated weight of each object according to doi.org/10.1017/aap.2021.8",
166 |          probability = attr(result$probability, "descr") <- "Dating-Probability of each object")
167 |   attr(result, "stepsize") <- stepsize
168 | 
169 |   return(result)
170 | }
171 | 


--------------------------------------------------------------------------------
/data-raw/Inscr_Bithynia.R:
--------------------------------------------------------------------------------
  1 | ## code to prepare `Inscr_Bithynia` dataset goes here
  2 | 
  3 | #' Since the conversion of the original Excel file to CRAN-compatible
  4 | #' ASCII-data this script does not convert all datings anymore.
  5 | #' I apologize for not updating this script, but it seems too irrelevant
  6 | #' to spent more time on it than I already have, since the package
  7 | #' contains the clean Data already. Keep this as a reminder that
  8 | #' data is complicated, and that things change.
  9 | 
 10 | library(dplyr)
 11 | library(stringr)
 12 | library(forcats)
 13 | 
 14 | inscriptions <- read.csv("inst/extdata/Bithynia_Inscriptions_ascii.csv")
 15 | 
 16 | inscriptions <- inscriptions %>%
 17 |   mutate(ikey = na_if(ikey, "N / A"),
 18 |          ikey = na_if(ikey, ""),
 19 |          ikey = gsub("2PH", "PH", ikey),
 20 |          ikey = gsub("v", "PH", ikey),
 21 |          URL = NA)
 22 | 
 23 | repl <- grep("HD", inscriptions$ikey)
 24 | inscriptions$URL[repl] <- paste("https://edh-www.adw.uni-heidelberg.de/edh/inschrift/",
 25 |                                 gsub("HD", "", inscriptions$ikey[repl]),
 26 |                                 sep = "")
 27 | repl <- grep("PH", inscriptions$ikey)
 28 | inscriptions$URL[repl] <- paste("https://epigraphy.packhum.org/text/",
 29 |                                 gsub("PH", "", inscriptions$ikey[repl]),
 30 |                                 sep = "")
 31 | 
 32 | inscriptions$ID <- paste("I_", seq_len(nrow(inscriptions)), sep = "")
 33 | inscriptions <- inscriptions %>%
 34 |   rename(Dating = Chronological.Frame) %>%
 35 |   mutate(Language = replace(Language, Language == "Gr/Lat", "Greek/Latin"),
 36 |          Language = replace(Language, Language == "Gr / Lat", "Greek/Latin"),
 37 |          Language = factor(Language, levels = c("Greek", "Latin",
 38 |                                                 "Greek/Latin")),
 39 |          Location = replace(Location, str_detect(Location, "unknown"),
 40 |                             "unknown"),
 41 |          Location = replace(Location,
 42 |                             Location == "Prusias ad Mare (Keramed)",
 43 |                             "Prusias ad Mare"),
 44 |          Location = factor(Location),
 45 |          Dating = na_if(Dating, "---"))
 46 | 
 47 | inscriptions$uncertain_dating <- FALSE
 48 | sel <- grep("\\?", inscriptions$Dating)
 49 | inscriptions$uncertain_dating[sel] <- TRUE
 50 | inscriptions$Dating <- gsub("\\?", "", inscriptions$Dating)
 51 | 
 52 | sel <- grepl("[0-9]", inscriptions$Dating)
 53 | periods <- data.frame("Dating" = unique(inscriptions$Dating[which(sel == FALSE)]))
 54 | periods$DAT_min <- NA
 55 | periods$DAT_max <- NA
 56 | #write.csv(periods, file = "periods.csv", fileEncoding = "UTF-8")
 57 | # .... Manual editing of the resulting table, saving it as "periods_edit.csv".
 58 | join_dating <- read.csv(file = system.file('extdata', 'periods_edit.csv',
 59 |                                            package = 'datplot',
 60 |                                            mustWork = TRUE),
 61 |                         row.names = 1,
 62 |                         colClasses = c("character", "character",
 63 |                                        "integer", "integer"),
 64 |                         encoding = "UTF-8")
 65 | 
 66 | num_dating <- data.frame("Dating" = unique(inscriptions$Dating[which(sel == TRUE)]))
 67 | num_dating$DAT_min <- NA
 68 | num_dating$DAT_max <- NA
 69 | 
 70 | sel <- grep("^[0-9]{1,3} AD$", num_dating$Dating)
 71 | num_dating$DAT_min[sel] <- gsub(" AD", "", num_dating$Dating[sel])
 72 | num_dating$DAT_max[sel] <- gsub(" AD", "", num_dating$Dating[sel])
 73 | sel <- grep("^[0-9]{1,3} BC$", num_dating$Dating)
 74 | num_dating$DAT_min[sel] <- paste("-", gsub(" BC", "", num_dating$Dating[sel]),
 75 |                                  sep = "")
 76 | num_dating$DAT_max[sel] <- paste("-", gsub(" BC", "", num_dating$Dating[sel]),
 77 |                                  sep = "")
 78 | 
 79 | join_dating <- rbind(join_dating, num_dating[!is.na(num_dating$DAT_min), ])
 80 | num_dating <- num_dating[which(is.na(num_dating$DAT_min)), ]
 81 | 
 82 | num_dating$Dating <- as.character(num_dating$Dating)
 83 | 
 84 | 
 85 | # Values like: 92-120 AD
 86 | sel <- grep("^[0-9]{1,3}-[0-9]{1,3} AD", num_dating$Dating)
 87 | for (r in sel) {
 88 |   split <- strsplit(x = num_dating$Dating[r], split = "-| ")
 89 |   num_dating$DAT_min[r] <- split[[1]][1]
 90 |   num_dating$DAT_max[r] <- split[[1]][2]
 91 | }
 92 | # Values like: AD 92-120
 93 | sel <- grep("^AD [0-9]{1,3}-[0-9]{1,3}$", num_dating$Dating)
 94 | for (r in sel) {
 95 |   split <- strsplit(x = num_dating$Dating[r], split = "-| ")
 96 |   num_dating$DAT_min[r] <- split[[1]][2]
 97 |   num_dating$DAT_max[r] <- split[[1]][3]
 98 | }
 99 | # Values like: AD 92-120
100 | sel <- grep("^AD [0-9]{1,3}-[0-9]{1,3}$", num_dating$Dating)
101 | for (r in sel) {
102 |   split <- strsplit(x = num_dating$Dating[r], split = "-| ")
103 |   num_dating$DAT_min[r] <- split[[1]][2]
104 |   num_dating$DAT_max[r] <- split[[1]][3]
105 | }
106 | # Values like: AD 92 - 120
107 | sel <- grep("^AD [0-9]{1,3} - [0-9]{1,3}", num_dating$Dating)
108 | for (r in sel) {
109 |   split <- strsplit(x = num_dating$Dating[r], split = " - | ")
110 |   num_dating$DAT_min[r] <- split[[1]][2]
111 |   num_dating$DAT_max[r] <- split[[1]][3]
112 | }
113 | # Values like: 198/199 AD
114 | sel <- grep("^[0-9]{1,3}/[0-9]{1,3} AD", num_dating$Dating)
115 | for (r in sel) {
116 |   split <- strsplit(x = num_dating$Dating[r], split = "/| ")
117 |   num_dating$DAT_min[r] <- split[[1]][1]
118 |   num_dating$DAT_max[r] <- split[[1]][2]
119 | }
120 | # Values like: 525-75 BC
121 | sel <- grep("^[0-9]{1,3}-[0-9]{1,3} BC", num_dating$Dating)
122 | for (r in sel) {
123 |   split <- strsplit(x = num_dating$Dating[r], split = "-| ")
124 |   num_dating$DAT_min[r] <- 0 - as.numeric(split[[1]][1])
125 |   num_dating$DAT_max[r] <- 0 - as.numeric(split[[1]][2])
126 | }
127 | 
128 | join_dating <- rbind(join_dating, num_dating[!is.na(num_dating$DAT_min), ])
129 | num_dating <- num_dating[which(is.na(num_dating$DAT_min)), ]
130 | 
131 | sel <- grep("^[0-9]{1}[a-z]{2} c\\. AD$", num_dating$Dating)
132 | for (r in sel) {
133 |   split <- strsplit(x = num_dating$Dating[r], split = "[a-z]{2} c\\.")
134 |   split <- as.numeric(split[[1]][1])
135 |   num_dating$DAT_min[r] <- ((split - 1) * 100)
136 |   num_dating$DAT_max[r] <- ((split - 1) * 100) + 99
137 | }
138 | 
139 | sel <- grep("^[0-9]{1}[a-z]{2} c\\. BC$", num_dating$Dating)
140 | for (r in sel) {
141 |   split <- strsplit(x = num_dating$Dating[r], split = "[a-z]{2} c\\.")
142 |   split <- as.numeric(split[[1]][1])
143 |   num_dating$DAT_min[r] <- 0 - (split * 100) + 1
144 |   num_dating$DAT_max[r] <- 0 - ((split - 1) * 100)
145 | }
146 | 
147 | join_dating <- rbind(join_dating, num_dating[!is.na(num_dating$DAT_min), ])
148 | num_dating <- num_dating[which(is.na(num_dating$DAT_min)), ]
149 | 
150 | sel <- grep("^ca\\. [0-9]{1,3} AD$", num_dating$Dating)
151 | for (r in sel) {
152 |   split <- strsplit(x = num_dating$Dating[r], split = " ")
153 |   split <- as.numeric(split[[1]][2])
154 |   num_dating$DAT_min[r] <- split - 10
155 |   num_dating$DAT_max[r] <- split + 10
156 | }
157 | sel <- grep("^ca\\. [0-9]{1,3} BC$", num_dating$Dating)
158 | for (r in sel) {
159 |   split <- strsplit(x = num_dating$Dating[r], split = " ")
160 |   split <- 0 - as.numeric(split[[1]][2])
161 |   num_dating$DAT_min[r] <- split - 10
162 |   num_dating$DAT_max[r] <- split + 10
163 | }
164 | 
165 | 
166 | join_dating <- rbind(join_dating, num_dating[!is.na(num_dating$DAT_min), ])
167 | num_dating <- num_dating[which(is.na(num_dating$DAT_min)), ]
168 | #unique(num_dating$Dating)[1:20]
169 | 
170 | join_dating$DAT_min[which(join_dating$DAT_min == 0)] <- 1
171 | join_dating$DAT_max[which(join_dating$DAT_max == 0)] <- -1
172 | 
173 | 
174 | 
175 | #write.csv(num_dating, file = "num_dating.csv", fileEncoding = "UTF-8")
176 | num_dating <- read.csv(file = system.file('extdata', 'num_dating_edit.csv',
177 |                                           package = 'datplot',
178 |                                           mustWork = TRUE),
179 |                        encoding = "UTF-8",
180 |                        row.names = 1,
181 |                        colClasses = c("character", "character",
182 |                                       "integer", "integer"))
183 | 
184 | 
185 | join_dating <- join_dating %>%
186 |   mutate(DAT_min = as.integer(DAT_min),
187 |          DAT_max = as.integer(DAT_max)) %>%
188 |   rbind(num_dating)
189 | 
190 | 
191 | inscriptions <- left_join(inscriptions, join_dating, by = "Dating")
192 | 
193 | 
194 | # Manual error correction
195 | inscriptions[which(inscriptions$ID == "I_1162"), "DAT_max"] <- 63
196 | inscriptions[which(inscriptions$ID == "I_2725"), c("DAT_min", "DAT_max")] <-
197 |   inscriptions[which(inscriptions$ID == "I_2725"), c("DAT_max", "DAT_min")]
198 | 
199 | 
200 | 
201 | inscriptions <- inscriptions[, c("ID", "ikey", "Location", "Source", "Dating",
202 |                                  "Language", "uncertain_dating",
203 |                                  "DAT_min", "DAT_max", "URL")]
204 | 
205 | 
206 | 
207 | 
208 | 
209 | attr(inscriptions, "contact") <-
210 |   "Barbora Weissova (Barbora.Weissova@ruhr-uni-bochum.de),
211 | Lisa Steinmann (lisa.steinmann@rub.de)"
212 | attr(inscriptions, "time_created") <- Sys.Date()
213 | attr(inscriptions, "source") <-
214 |   "Data: https://inscriptions.packhum.org/ and
215 |   B. Weissova, Regional Economy,
216 |   Settlement Patterns and the Road System in Bithynia
217 |   (4th century BC - 6th century AD) (Diss. FU Berlin 2019)"
218 | attr(inscriptions, "source_repo") <- "https://github.com/lsteinmann/datplot"
219 | attr(inscriptions$ikey, "descr") <- "ID at https://inscriptions.packhum.org/"
220 | attr(inscriptions$Location, "descr") <- "Findspot"
221 | attr(inscriptions$Source, "descr") <- "Corpus/Citation of the Inscription"
222 | attr(inscriptions$Dating, "descr") <- "Original Chronological Assesment as chr"
223 | attr(inscriptions$Language, "descr") <- "Language of the Inscription"
224 | attr(inscriptions$uncertain_dating, "descr") <-
225 |   "TRUE if Dating is not certain, FALSE if dating is certain"
226 | attr(inscriptions$DAT_min, "descr") <- "lower border of the dating timespan"
227 | attr(inscriptions$DAT_max, "descr") <- "uppper border of the dating timespan"
228 | attr(inscriptions$URL, "descr") <- "Link to the Inscription at
229 | https://inscriptions.packhum.org/"
230 | 
231 | #write.table(inscriptions, file = "inscriptions.csv",
232 | #            fileEncoding = "UTF-8", sep = ";", row.names = FALSE)
233 | 
234 | 
235 | Inscr_Bithynia <- inscriptions
236 | 
237 | #inscriptions[which(inscriptions$DAT_min == 0),c(1,2,4,5,8,9)]
238 | #inscriptions[which(inscriptions$DAT_max == 0),c(1,2,4,5,8,9)]
239 | 
240 | usethis::use_data(Inscr_Bithynia, overwrite = TRUE)
241 | 
242 | 


--------------------------------------------------------------------------------
/R/datplot_utility.R:
--------------------------------------------------------------------------------
  1 | #' @title Determine stepsize (internal)
  2 | #'
  3 | #' @description Determines stepsize by selecting the absolute minimum value
  4 | #' between the upper and lower end of all dating ranges.
  5 | #'
  6 | #' @param DAT_mat a matrix as prepared by [datsteps()], resp. a matrix witch
  7 | #' columns names `datmin` and `datmax` containing numeric/integer value of the
  8 | #' dating ranges.
  9 | #' @inheritParams datsteps
 10 | #'
 11 | #' @return A single numeric value that can be used as minimal stepsize.
 12 | #'
 13 | #' @seealso [datsteps()]
 14 | #'
 15 | #' @keywords internal
 16 | generate.stepsize <- function(DAT_mat, verbose = FALSE) {
 17 |   if (!is.numeric(DAT_mat[, "datmin"]) & !is.numeric(DAT_mat[, "datmax"])) {
 18 |     stop("Non numeric values handed to generate.stepsize().")
 19 |   }
 20 | 
 21 |   timespans <- (abs(DAT_mat[, "datmin"] - DAT_mat[, "datmax"]))
 22 | 
 23 |   stepsize <- min(timespans)
 24 | 
 25 |   stepsize <- ifelse(stepsize == 0, 1, stepsize)
 26 | 
 27 |   if(verbose) {
 28 |     message(paste("Using stepsize = ", stepsize, " (auto).", sep = ""))
 29 |   }
 30 |   return(stepsize)
 31 | }
 32 | 
 33 | #' @title Switch values where dating is in wrong order (internal)
 34 | #'
 35 | #' @description Requires a data.frame with 2 numeric variables in the
 36 | #' 3rd and 4th column: minimum date (int/numeric) and
 37 | #' maximum date (int/numeric) as used in [datsteps()].
 38 | #'
 39 | #' @param DAT_df a data.frame with 4 variables in this order: ID, group,
 40 | #' minimum date (int/num), maximum date (int/num)
 41 | #'
 42 | #' @return The same data.frame with the dating values which were in wrong order
 43 | #' switched.
 44 | #'
 45 | #' @seealso [datsteps()]
 46 | #'
 47 | #' @keywords internal
 48 | switch.dating <- function(DAT_df) {
 49 |   dat_wrong_order <- which(DAT_df[, 3] > DAT_df[, 4])
 50 | 
 51 |   if (length(dat_wrong_order) > 0) {
 52 |     # Switch the Dating of Rows assumed to be in wrong order:
 53 |     DAT_df[dat_wrong_order, 3:4] <- DAT_df[dat_wrong_order, 4:3]
 54 |     # Notifying is important, because the data have been changed!
 55 |     warning(paste0("Warning: Dating seems to be in wrong order at ID ",
 56 |                    paste(DAT_df[dat_wrong_order, 1], collapse = ", "),
 57 |                    " (Index: ", paste(dat_wrong_order, collapse = ", "),
 58 |                    "). Dates have been switched, but be sure to check ",
 59 |                    "your original data for possible mistakes."))
 60 |   }
 61 | 
 62 |   return(DAT_df)
 63 | }
 64 | 
 65 | 
 66 | #' @title Calculate the weights for each dated object
 67 | #'
 68 | #' @description Calculates the weights from two vectors of minimum and maximum
 69 | #' dating for each object. Returns a dataframe with the weight in the first
 70 | #' column and FALSE in the second if two rows have the same value in both
 71 | #' min and max dating. See [publication](https://doi.org/10.1017/aap.2021.8)
 72 | #' for information about how this is calculated.
 73 | #'
 74 | #' @param DAT_min a numeric vector containing the minimum date of each object
 75 | #' @param DAT_max a numeric vector containing the maximum date of each object
 76 | #' @inheritParams datsteps
 77 | #'
 78 | #' @seealso [datsteps()], [get.probability()]
 79 | #'
 80 | #' @return a vector of 'weight'-values for the datsteps-data.frame, that is a
 81 | #' quantification of how well the object is dated (lesser value means object
 82 | #' is dated to larger timespans, i.e. with less confidence)
 83 | #'
 84 | #' @export get.weights
 85 | get.weights <- function(DAT_min, DAT_max, verbose = FALSE) {
 86 |   stopifnot(is.numeric(DAT_min))
 87 |   stopifnot(is.numeric(DAT_max))
 88 | 
 89 |   weights <- abs(DAT_min - DAT_max)
 90 | 
 91 |   if (any(weights == 0)) {
 92 |     if (verbose) {
 93 |       msg <- paste0("DAT_min and DAT_max at Index: ",
 94 |                     paste(which(weights == 1), collapse = ", "),
 95 |                     " have the same value! Is this correct? ",
 96 |                     "If unsure, check your data for possible errors.")
 97 |       message(msg)
 98 |     }
 99 |     # set weight to 1 to treat objects with same min and max
100 |     # dating (dated to one year precisely) as very influential
101 |     # will have the same weight as objects dated to two years,
102 |     # (which may also equal a span of 1 year)
103 |     weights[which(weights == 0)] <- 1
104 |   }
105 |   # weights have to be below 1
106 |   weights <- 1 / weights
107 | 
108 |   return(weights)
109 | }
110 | 
111 | 
112 | #' @title Calculate the probability for each year and each dated object
113 | #'
114 | #' @description Calculates the probability of each object being dated into
115 | #' each year / timeslot from two vectors of minimum and maximum
116 | #' dating. Returns a vector of probabilities.
117 | #'
118 | #' @inheritParams get.weights
119 | #'
120 | #' @return a vector of probabilities for each object being dated to any
121 | #' single year within the timespan (lesser value means object is dated to
122 | #' larger timespans, i.e. with less confidence).
123 | #'
124 | #' @seealso [datsteps()], [get.weights()]
125 | #'
126 | #' @export get.probability
127 | get.probability <- function(DAT_min, DAT_max) {
128 |   stopifnot(is.numeric(DAT_min))
129 |   stopifnot(is.numeric(DAT_max))
130 | 
131 |   # calculate the dating probability
132 |   # (thanks to Christian Gugl for requesting this)
133 |   prob <- abs(DAT_min - DAT_max)
134 |   prob <- prob + 1
135 |   prob <- 1 / prob
136 | 
137 |   return(prob)
138 | }
139 | 
140 | 
141 | #' @title Calculate the sequence of dating steps
142 | #'
143 | #' @description
144 | #' Produces an appropriate sequence of years between the minimum
145 | #' and maximum dating.
146 | #'
147 | #' If they cannot be properly divided by the stepsize set
148 | #' beforehand, either three values are generated for objects that are dated to
149 | #' a range of more then 60% of the stepsize (min, mean, max), or two values for
150 | #' objects dated to a timespan of less or equal to 60% of the stepsize.
151 | #' If they can be divided without residual, the normal sequence is returned.
152 | #' If there is a residual, the stepsize is modified depending on how large the
153 | #' residual is.
154 | #'
155 | #' @param datmin numeric value of the minimum dating of one object
156 | #' @param datmax numeric value of the maximum dating of one object
157 | #' @param stepsize the stepsize to be used
158 | #'
159 | #' @return sequence of steps to be created by [create.sub.objects()]
160 | #'
161 | #' @seealso [datsteps()], [create.sub.objects()]
162 | #'
163 | #' @export get.step.sequence
164 | #'
165 | #' @examples
166 | #' min_year <- -494
167 | #' max_year <- -334
168 | #' sequence <- get.step.sequence(datmin = min_year, datmax = max_year, stepsize = 25)
169 | #' sequence
170 | #'
171 | #' min_year <- 1
172 | #' max_year <- 100
173 | #' sequence <- get.step.sequence(datmin = min_year, datmax = max_year, stepsize = 25)
174 | #' sequence
175 | get.step.sequence <- function(datmin = 0, datmax = 100, stepsize = 25) {
176 | 
177 |   stopifnot(is.numeric(datmin))
178 |   stopifnot(is.numeric(datmax))
179 |   stopifnot(is.numeric(stepsize))
180 | 
181 |   # Get the difference of the two dating values
182 |   timespan <- datmax - datmin
183 | 
184 |   # First: If the stepsize is larger than the timespan, two different
185 |   # strategies can be employed
186 |   if (timespan %/% stepsize == 0) {
187 |     if (timespan > (stepsize * 0.6)) {
188 |       # If the timespan exceeds 60% of the stepsize, three steps will be
189 |       # created corresponding to minimum, mean and maximum dating
190 |       sequence <- c(datmin, round(((datmin + datmax) / 2), digits = 0), datmax)
191 |     } else if (timespan == 0) {
192 |       # for objects dated to one year, only use one year!
193 |       sequence <- datmin
194 |     } else {
195 |       # if the timespan is less than 60% of the stepsize, just two values
196 |       # corresponding to minimum and maximum dating will be returned
197 |       sequence <- c(datmin, datmax)
198 |     }
199 |   } else {
200 |     # If the timespan can be devided at least once, first generate the sequence
201 |     sequence <- seq(from = datmin, to = datmax, by = stepsize)
202 |     # then check how many years the maximum dating would be off
203 |     resid <- datmax - sequence[length(sequence)]
204 |     if (resid >= (stepsize / 2)) {
205 |       # if the residual is larger or equals half the stepsize, the stepsize is
206 |       # temporarily modified to fit the as many values
207 |       # as it would with the length of the sequence generated
208 |       stepsize_mod <- (datmax - datmin) / (length(sequence) + 1)
209 |       sequence <- seq(datmin, datmax, stepsize_mod)
210 |       # then rounds all values except first and last, which need to stay as
211 |       # minimum and maximum date
212 |       sequence[-c(1, length(sequence))] <-
213 |         round(sequence[-c(1, length(sequence))],
214 |               digits = 0)
215 |     } else if (resid != 0) {
216 |       # if the residual is smaller but also not 0, the sequence values are moved
217 |       # by an appropriate fraction
218 |       move <- round(resid / (length(sequence) - 1), digits = 0)
219 |       sequence[2:length(sequence)] <- sequence[2:length(sequence)] + move
220 |       # and the end of the sequence is reset as the maximum dating
221 |       sequence[length(sequence)] <- datmax
222 |       # TODO: these two things do essentially the same? I need to fix the first
223 |       # one to use the largest possible division, maybe
224 |     } else {
225 |       # this implies that there was no residual, so the original
226 |       # sequence can be used
227 |     }
228 |   }
229 |   # returns the sequence
230 |   return(sequence)
231 | }
232 | 
233 | 
234 | #' @title Create sub-objects for each object in a dataframe (internal)
235 | #'
236 | #' @description Requires a list with named vectors as [datsteps()] will
237 | #' hand to the function.
238 | #'
239 | #' @param DAT_list a list as prepared by [datsteps()]
240 | #' @inheritParams datsteps
241 | #'
242 | #' @return an expanded list of the same structure to be further processed by
243 | #' [datsteps()] each object duplicated according to the number of steps required
244 | #'
245 | #' @keywords internal
246 | 
247 | create.sub.objects <- function(DAT_list,
248 |                                stepsize,
249 |                                calc = "weight",
250 |                                cumulative = FALSE) {
251 | 
252 |   diffs <- unlist(lapply(DAT_list, function(x) x["datmax"] - x["datmin"]))
253 | 
254 |   switch (calc,
255 |           weight = diffs[diffs == 0] <- 1,
256 |           probability = diffs <- diffs + 1
257 |   )
258 | 
259 | 
260 |   if (any(diffs < stepsize)) {
261 |     warning(paste0("stepsize is larger than the range of the ",
262 |                    "closest dated object at Index = ",
263 |                    paste(which(diffs < stepsize), collapse = ", "), "). ",
264 |                    "This is not recommended. ",
265 |                    "For information see documentation of get.step.sequence()."))
266 |   }
267 | 
268 |   DAT_list <- lapply(DAT_list, function(object) {
269 |     sequence <- get.step.sequence(object["datmin"], object["datmax"],
270 |                                   stepsize)
271 |     new_object <- lapply(sequence, function(step) {
272 |       new_object <- object
273 |       new_object["step"] <- step
274 |       return(new_object)
275 |     })
276 |     names(new_object) <- NULL
277 |     new_object <- do.call(rbind, new_object)
278 |     if (cumulative) {
279 |       cumul_prob <- cumsum(new_object[, calc])
280 |       new_object <- cbind(new_object, cumul_prob)
281 |     }
282 |     return(new_object)
283 |   })
284 | 
285 | 
286 | 
287 |   result <- do.call(rbind, DAT_list)
288 | 
289 |   switch(calc,
290 |          weight = attr <- "Calculated weight of each object according to doi.org/10.1017/aap.2021.8",
291 |          probability = attr <- "year-wise probability of each object")
292 | 
293 |   attributes(result)$calc <- c(calc, attr)
294 | 
295 |   return(result)
296 | }
297 | 
298 | #' @title Check if the structure is compatible with [datsteps()]  (internal)
299 | #'
300 | #' @description Checks if the object passed to [datsteps()] can be used for
301 | #' processing.
302 | #'
303 | #' @param DAT_df An object to check
304 | #' @inheritParams datsteps
305 | #'
306 | #' @return TRUE if object can be processed by [datsteps()], error / FALSE if not
307 | #'
308 | #' @keywords internal
309 | 
310 | check.structure <- function(DAT_df, verbose = FALSE) {
311 |   dat_df_structure <- c(NA, NA, NA, NA, NA)
312 |   names(dat_df_structure) <- c("is.df", "is.id", "is.var",
313 |                                "is.minDAT", "is.maxDAT")
314 |   # Todo
315 |   dat_df_structure["is.df"] <- is.data.frame(DAT_df)
316 |   dat_df_structure["is.id"] <- is.character(DAT_df[, 1, drop = TRUE])
317 |   dat_df_structure["is.var"] <- is.factor(DAT_df[, 2, drop = TRUE])
318 |   dat_df_structure["is.minDAT"] <- is.numeric(DAT_df[, 3, drop = TRUE])
319 |   dat_df_structure["is.maxDAT"] <- is.numeric(DAT_df[, 4, drop = TRUE])
320 | 
321 | 
322 |   if (dat_df_structure[1] == FALSE) {
323 |     result <- FALSE
324 |     stop("datsteps requires an object of class data.frame")
325 |   } else {
326 |     result <- TRUE
327 |     }
328 |   if (any(dat_df_structure[c("is.minDAT", "is.maxDAT")] == FALSE)) {
329 |     result <- FALSE
330 |     stop("The 3rd and 4th columns of your data.frame have to be numeric.")
331 |   } else {
332 |     result <- TRUE
333 |     }
334 |   if (any(dat_df_structure[2:3] == FALSE) & verbose) {
335 |     message(paste0("It is recommended to use ",
336 |                    "character vector for the 'ID'-column (1) ",
337 |                    "and ",
338 |                    "factor for the 'variable'-column (2)."))
339 |   }
340 |   return(result)
341 | }
342 | 


--------------------------------------------------------------------------------
/vignettes/how-to.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Density Plots for Dates"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{datplot: Visualizing chronological distribution}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   %\VignetteEncoding{UTF-8}
  8 | bibliography: ../inst/literatur.bib
  9 | ---
 10 | 
 11 | ```{r, include = FALSE}
 12 | knitr::opts_chunk$set(
 13 |   collapse = TRUE,
 14 |   comment = "#>",
 15 |   fig.width = 7,
 16 |   fig.align = "center"
 17 | )
 18 | 
 19 | library(knitr)
 20 | library(ggplot2)
 21 | 
 22 | Plot_Theme <- theme(panel.background = element_blank(), 
 23 |         panel.grid.major = element_line(color = "grey60", linetype = "dashed"),
 24 |         panel.grid.minor = element_line(color = "grey80", linetype = "dashed"),
 25 |         legend.position = c(0.9, 0.85),
 26 |         legend.background = element_rect(fill = "white", color = "grey60"))
 27 | Plot_Fill <- scale_fill_manual(name = "Technique", values = c("gray30",
 28 |                                                               "tomato3"))
 29 | ```
 30 | 
 31 | 
 32 | ## Why?
 33 | 
 34 | A rather common problem in archaeology is the fuzziness of dates assigned to objects. If one wants to visualize overall changes in - let's say - pottery consumption, bar charts often fall short in that regard. If we have Phases a -- f, then some of the objects can usually be dated to a, c, and f, as an example, but others will by classified as "a to c" or "b to c". But how can these data still be used for examining changes in a large set of objects, respecting the quality of their dating? The answer lies in aoristic analysis, which was implemented in criminology to assess the general temporal occurrence of burglaries from the reported time span in between which individual burglaries had happened [@ratcliffe_Aoristicanalysisspatial_2000]. This concept has been implemented in archaeology before (see @orton_CatchUnitResearch_2017, 3--5 for a recent overview; generally credited with the first implementation for archaeology is @johnson_AoristicAnalysisSeeds_2004). There are other R-packages that implement aoristic analysis in archaeology, usually with a less object-based focus and with differing algorithms (namely [aoristAAR](https://github.com/ISAAKiel/aoristAAR/), [archSeries](https://github.com/davidcorton/archSeries), [tabula](https://github.com/tesselle/tabula) and [rtefact](https://github.com/ahb108/rtfact)). We wholeheartedly recommend trying out those packages as well to find the best fit for your data. This implementation (datplot) is kept rather simple. The case study with which the package is published [@datplotarticle] addresses classical archaeology, but the package itself is in no way restricted to any discipline. 
 35 | 
 36 | ## How?
 37 | 
 38 | First, it is important to translate the phases into numbers. This should be easily possible for archaeological data, as there is usually an accepted date for each phase or period. While not directly providing numeric or absolute chronological values, e.g. the [iDAI chronontology](https://chronontology.dainst.org/) provides approaches for unified terminology. Usually, this would depend on the discipline and geographical frame of research in particular and has to be chosen for each analysis. When a concordance for phases and their absolute dating exists, it should be easy to apply these to single objects. We have illustrated the process of cleaning and transforming real world archaeological data in a very manual solution in the "Data Preparation and Visualization"-vignette to this package. Using may cause other problems in the end, since such phases are often employed to avoid dates, but it is necessary as the aim is to visualize the distribution on a (pseudo-)continuous scale, for which numbers are needed. Also, this step may be reversed for the final visualization by supplementing or replacing the scale on the x-axis with the respective phases. An automated process has not been implemented yet.
 39 | 
 40 | Ideally, one can produce a 'beginning' and 'end' date for each object, or let's say an earliest possible dating and a latest possible dating, e.g. corresponding to beginning and start of each phase the object is dated to. 
 41 | 
 42 | To show and explain how this would work, we chose a random sample of Athenian pottery from the Beazley archive [@BAPD], as it is a large publicly available data set. (Since the format provided by the BAPD is slightly different from that needed here we converted the data beforehand to match requirements. No values have been changed. The sample data set is included in datplot. In a recent version the Inscriptions of Bithynia data set has been added to datplot as well. Its cleaning and formatting is detailed in the "data_preparation"-vignette as an example for scholars looking for approaches to the transformation of verbose and heterogeneous data. The same vignettes also suggests some visualizations of said data set.)
 43 | 
 44 | ```{r prep}
 45 | library(datplot)
 46 | data(Beazley)
 47 | ```
 48 | ```{r preptable, echo = FALSE}
 49 | knitr::kable(Beazley[sample(seq_len(nrow(Beazley)), 10, replace = FALSE), ])
 50 | ```
 51 | 
 52 | ## How to Display a Range?
 53 | 
 54 | The table provides two dates for each object. The earliest possible dating (DAT_min) and the latest possible dating (DAT_max). In order to be able to process this to a [density graph](https://en.wikipedia.org/wiki/Kernel_density_estimation), which is the most elegant means of visualization for continuous distributions (see also @crema_probabilisticframeworkassessing_2010, 1123 or @baxter_ReinventingwheelModelling_2016, 125--126). (At least if the goal is merely to evaluate changes over time and the probability of object groups dating to a certain point in time, and not to look at actual objects counts, which will be obscured.) The scale we use is only pseudo-continuous, as the data actually comprises of intervals (1 year, or 25 years, etc.), but we nonetheless treat it as a continuous one. 
 55 | 
 56 | Objects that can be dated with greater confidence should have a larger impact on the overall visualization. The core function of this package (`datsteps()`) produces a column named either 'weight' or 'probability' which contains a value that reflects the quality of dating ('weight') or the probability of an object being dated to a certain year ('probability'). For the calculation of 'weight', see [@datplotarticle]. The formula for probability calculation is simply '1 / ((max - min) + 1)'. 
 57 | 
 58 | In any case, the greater the time span, the lower the weight value or probability. This is a generally accepted method of aoristic analysis [@ratcliffe_Aoristicanalysisspatial_2000, 671--672 with Fig. 1] and explained in greater detail in [@datplotarticle]. In the case of a stepsize of 1, the weight value can be seen as an actual dating probability to the specific year. As an example, a coin dated between 36 and 37 CE will thus have a probability of 0.5 for the year 36, and 0.5 for the year 37. The calculation of probability does, however not work with greater stepsizes. It is possible to switch between both variants of calculation by using `datsteps(x, calc = "weight")` or `datsteps(x, calc = "probability")`. When the probability is calculated, there is an option to add a cumulative probability column (`datsteps(x, calc = "probability", cumulative = TRUE)`).
 59 | 
 60 | Secondly, every object is duplicated a number of times equal to the dating range divided by the stepsize-variable. Each duplicate has its own 'DAT_step' -- one single value between the two extremes. The above mentioned weight variable is divided by the number of steps, so that each new fictional object or 'date step' counts only as a fraction of the actual object. With this, we hope to achieve a greater influence of closely dated objects and a higher emphasis on overlap. 
 61 | 
 62 | This method will not be useful for dating specific contexts, since any concept of _terminus post/ante quem_ is lost here, which is important on a smaller scale. It may however be applicable for contexts that are disturbed, as a noticeable overlap of objects could give an indication for the original _tpq_-date. The method is suitable also for the visualization of changes in _trends_ over time, e.g. the popularity of pottery types, or overall peaks in occupation from survey data [@orton_CatchUnitResearch_2017, 5]. 
 63 | 
 64 | Other approaches, e.g. using the median date of each object, may in some cases produce similar outcomes, but create other problems. A lot of information is lost on the way when employing averaged or median data, as large amount of loosely dated objects will produce peaks at unreasonable values. (Consider a large amount of objects dated between 600 and 400 BCE all attributed to the year 500 BCE.)
 65 | 
 66 | ```{r barplot}
 67 | Beazley$DAT_mean <- (Beazley$DAT_max + Beazley$DAT_min) / 2
 68 | library(ggplot2)
 69 | ggplot(Beazley, aes(x = DAT_mean, fill = Technique)) +
 70 |   geom_histogram(binwidth = 25, position = "dodge") + Plot_Theme + Plot_Fill
 71 | ```
 72 | 
 73 | Employing dating steps will even out unreasonable peaks. Note especially the gap between -425 and -300 in the plot above, that is -- in the plot below -- filled with a constant amount of objects in each year. This is due to the data containing large amounts of objects dating from -400 to -300 BCE. Of course, due to duplicating each object numerous times (see table below), the counts represented on the y-axis now inform us of the maximum amount of objects that might be dated to a given bin rather than the actual amount.
 74 | 
 75 | The method datplot uses for partitioning steps an be explained as follows: If any given object had been dated as '475 to 425 BC' and steps of 25 years were used for each object, this would result in the creation of three sub-objects with the respective dating of: 475 BC, 450 BC, 425 BC. For an interval of 5 years, the same object would be partitioned into 11 sub-objects, each dating to 475 BC, 470 BC, 465 BC, 460 BC, 455 BC, 450 BC, 445 BC, 440 BC, 435 BC, 430 BC and 425 BC respectively. In order to keep the data comparable, the interval (expressed as the stepsize-variable in the R-package) must be chosen globally for all objects. As the time span an object is dated to can in many cases not be divided without remainder by the value given as a stepsize (i.e. may only be divided with the modulus of the division being greater than 0), datplot resolves these conflicts by temporarily modifying the size of steps for the objects in question without deviating significantly from the size set beforehand, so that the output is representative and comparable. Objects which are dated to a time span undercutting the stepsize-variable can either be assigned to one year (for objects dated precisely to one year), to two years (for objects dated to a number of years less than or equal to 40% of the stepsize) or to three years (for objects dated to a number of years exceeding 60% of the stepsize).
 76 | 
 77 | ```{r warning = FALSE}
 78 | system.time(result <- datsteps(Beazley, stepsize = 25))[3]
 79 | system.time(result <- datsteps(Beazley, stepsize = 1))[3]
 80 | ```
 81 | 
 82 | Using larger step sizes is only recommended if the stepsize of 1 year leads to significant delays in processing while yielding no additional information. If one where to look at a total time span of 10000 years, step sizes of 1 no longer seem very reasonable or rather necessary, as such detailed changes should not be visible or graspable at all, and using steps of 100 years will lead to the same outcome. As classical archaeologists a duration of more than 1000 years mostly exceeds our area of study. We seem to get good results with stepsizes between 1 and 25.
 83 | 
 84 | 
 85 | ```{r steps1}
 86 | library(datplot)
 87 | result <- datsteps(Beazley, stepsize = 25)
 88 | ggplot(result, aes(x = DAT_step, fill = variable)) +
 89 |   geom_histogram(binwidth = 25, position = "dodge") + Plot_Theme + Plot_Fill
 90 | ```
 91 | 
 92 | 
 93 | 
 94 | 
 95 | `datsteps()` can also calculate a stepsize on its own. It equals the closest possible dating of any object. As the stepsize used is saved in an attribute, this can be recycled to dynamically chose the binwidth for histograms:
 96 | 
 97 | ```{r steps2}
 98 | result <- datsteps(Beazley, stepsize = "auto")
 99 | ggplot(result, aes(x = DAT_step, fill = variable)) + Plot_Theme + Plot_Fill +
100 |   geom_histogram(binwidth = attributes(result)$stepsize, position = "dodge")
101 | ```
102 | 
103 | 
104 | ```{r stepstable, echo = FALSE}
105 | knitr::kable(head(result))
106 | ```
107 | 
108 | As the sample we use here only encompasses objects dated with a resolution of 50 years, we it would be best to also proceed with this stepsize. A smaller stepsize would not paint a more detailed picture, it just uses up computing time without any benefit. 
109 | 
110 | Due to the impossibility of displaying object counts as well, it is ideal to use kernel density estimates for visualization. The density plot below shows the result. The peak at around -500 indicates that is area has the highest overlay, so a large part of the objects in our sample have been dated around this time. The same distribution can also be seen in the bar plots above. This, however, is not yet very informative. 
111 | 
112 | ```{r density one}
113 | result <- datsteps(Beazley, stepsize = 25)
114 | dens <- result
115 | dens <- scaleweight(result, var = "all")
116 | dens <- density(x = dens$DAT_step, weights = dens$weight)
117 | plot(dens)
118 | ```
119 | 
120 | 
121 | 
122 | ## Scaling the Weight along Groups of Objects
123 | 
124 | In order to display the objects separated into groups, the weights first have to be scaled along group membership, so that the sum of all weights in a group will equal 1. datplots function `scaleweight()` does exactly that for a dataframe as it was returned by `datsteps()`. A column that contains the variables for group membership needs to indicated.
125 | 
126 | 
127 | ```{r scaleweight}
128 | result <- scaleweight(result, var = 2)
129 | ```
130 | 
131 | ```{r scaleweighttable, echo = FALSE}
132 | knitr::kable(head(result))
133 | ```
134 | 
135 | 
136 |   
137 | ## Plots for the Distribution of Objects across Time
138 | 
139 | In the case of the Beazley archives data [@BAPD] we can clearly see what we knew before: Black-figure pottery is older than red-figure pottery. (The data are from a random sample of athenian pottery from the Beazley archive, n = 1000.)
140 | 
141 | ```{r ggplot, warning=FALSE}
142 | ggplot(data = result, aes(x = DAT_step,
143 |                           fill = variable,
144 |                           weight = weight)) +
145 |   geom_density(alpha = 0.5) +
146 |   xlab("Dating") + Plot_Theme + Plot_Fill
147 | ```
148 | 
149 | In case of this data, the changes between showing and omitting the weight are negligent. This, however, is **not** the case with more heterogeneously dated sets of objects (see the "Data Preparation and Visualization"-vignette). Please note that -- even with the Beazley-data -- the plot, however little, does change when the weights are omitted: 
150 | 
151 | ```{r ggplot without weight, warning=FALSE}
152 | ggplot(data = result, aes(x = DAT_step,
153 |                           fill = variable)) +
154 |   geom_density(alpha = 0.5) +
155 |   xlab("Dating") + Plot_Theme + Plot_Fill
156 | ```
157 | 
158 | When every step is valued equally, a lot of steps fall into the end of the 4th century (as mentioned above), since they were dated as e.g. "-400 to -300". The impact here is not very huge, as the dating ranges of the objects do not vary greatly. However, the differences can be very dramatic for more heterogeneous data. 
159 | 
160 | To quickly illustrate that we can plot two versions of the density from the Inscriptions of Bithynia data included in the package: 
161 | 
162 | ```{r, warning = FALSE}
163 | data("Inscr_Bithynia")
164 | Inscr_Bithynia <- na.omit(Inscr_Bithynia[, c(1, 3, 8, 9)])
165 | result_bith <- scaleweight(datsteps(Inscr_Bithynia, stepsize = "auto"),
166 |                            var = "all")
167 | 
168 | ggplot(result_bith, aes(x = DAT_step)) + Plot_Theme + Plot_Fill +
169 |   geom_density(alpha = 0.5, fill = "grey30") + xlab("Dating")
170 | 
171 | ggplot(result_bith, aes(x = DAT_step, weight = weight)) +
172 |   Plot_Theme + Plot_Fill +
173 |   geom_density(alpha = 0.5, fill = "grey30") + xlab("Dating")
174 | ```
175 | 
176 | ## Including Histograms for an Impression of Quantity
177 | 
178 | We also added a function that calculated the value needed to scale the density curve to the axis of a Histogram of the dating steps. Please note that the histogram will not show the actual objects counts, but the counts of the maximum possible objects dated to the corresponding year resp. bin. The value to scale the density curve for a combined plot with a histogram can be obtained via `get.histogramscale`: 
179 | 
180 | 
181 | ```{r histogramscale, warning = FALSE, message = FALSE}
182 | histogramscale <- get.histogramscale(result)
183 | ```
184 | 
185 | 
186 | ```{r ggplot-combination}
187 | ggplot(result, aes(x = DAT_step, fill = variable)) + Plot_Theme + Plot_Fill +
188 |   stat_density(alpha = 0.5, position = "dodge",
189 |                aes(y = (after_stat(density) * histogramscale), weight = weight)) +
190 |   geom_histogram(alpha = 0.5, binwidth = attributes(result)$stepsize,
191 |                  position = "dodge") +
192 |   labs(y = "maximum number of objects per year", x = "Dating")
193 | ```
194 | 
195 | The combination of density curve and histogram also shows the common problem of histograms. Their output depends significantly on where the first bin is placed and may show a skewed distribution especially for roughly dated objects. Additionally, histograms also depend greatly on the binwidth and may not display any meaningful pattern when small bins are chosen [@shennan_Quantifyingarchaeology_1988, 25--26; @baxter_histogramimprovedapproachessimple_1996].
196 | 
197 | The smooth curves of kernel density estimates are a more realistic approach to dating. The production of objects was as continuous as their use, so it seems only reasonable to display it in a more continuous fashion on a flexible timescale.
198 | 
199 | 
200 | ## Weights and Cumulative Weights
201 | 
202 | With the update to v1.1.0 there are new option for calculation. When using a stepsize of 1 and `calc = "probability"`, the weights now reflect the true probability for each objects dating into the specific year. With this, the probability may be used as an indicative value itself and not only as a helper for visualization. Many thanks to Christian Gugl for this suggestion. 
203 | 
204 | This now opens the possibility to calculate the cumulative probability for each successive year -- also suggested by Christian Gugl --, which is useful in the case of numismatic research, where it reflects the rising probability of minting. The cumulative probability has been introduced as an option in v1.1.0 and is calculated in a separate column when setting `cumulative = TRUE` in the `datsteps()`-function. Please note that the cumulative probability does not work (and does not mean anything) for stepsizes larger than 1!
205 | 
206 | ```{r cumulative demo, fig.height = 10}
207 | data("Inscr_Bithynia")
208 | Inscr_Bithynia <- na.omit(Inscr_Bithynia[, c(1, 3, 8, 9)])
209 | Inscr_Bithynia <- Inscr_Bithynia[sample(seq_len(nrow(Inscr_Bithynia)), 5), ]
210 | Inscr_Bithynia_steps <- datsteps(Inscr_Bithynia, 
211 |                                  stepsize = 1, 
212 |                                  calc = "probability", 
213 |                                  cumulative = TRUE)
214 | 
215 | ggplot(Inscr_Bithynia_steps, aes(x = DAT_step, y = cumul_prob, fill = variable)) + 
216 |   geom_col() + facet_wrap(. ~ ID, ncol = 1) +
217 |   labs(y = "Cumulative Probability", x = "Dating", fill = "Origin") + 
218 |   theme(legend.position = "bottom")
219 | ```
220 | 
221 | ## References
222 | 
223 | 


--------------------------------------------------------------------------------
/vignettes/data_preparation.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Data Preparation and Visualization"
  3 | output: rmarkdown::html_vignette
  4 | bibliography: ../inst/literatur.bib
  5 | vignette: >
  6 |   %\VignetteIndexEntry{Data Preparation and Visualization}
  7 |   %\VignetteEngine{knitr::rmarkdown}
  8 |   %\VignetteEncoding{UTF-8}
  9 | ---
 10 | ```{r, include = FALSE}
 11 | knitr::opts_chunk$set(
 12 |   collapse = TRUE, fig.width = 7,
 13 |   comment = "#>"
 14 | )
 15 | ```
 16 | 
 17 | Archaeological data as it can be found "in the wild" rarely conforms to the formats suitable for any kind of statistical analysis. This vignette is intended as a way of demonstrating a possibility of data cleaning on a data set as it can frequently be found in archaeological literature, or put together during archaeological research. The data is prepared for use with the `datplot`-package. An analysis based on this data can be found as a case study in the paper "datplot: A new R-Package for the Visualization of Date Ranges in Archaeology" [@datplotarticle] by the authors of this package. 
 18 | 
 19 | The inscriptions of Bithynia data set, that is included in this package, was compiled for a publication analyzing the settlement patterns and road systems in ancient Bithynia [@weissova2019]. The vignette is thus meant as guidance for archaeologists looking to employ the [datplot-package](https://github.com/lsteinmann/datplot), and finding themselves in need of reformatting their data without much experience in programming languages. It points to certain problems arising with data sets, which are often incarnated in the form of spreadsheets. The Bithynia data set is a typical example of the structure of data as it is used by many (classical) archaeologists. The process of cleaning it highlights solutions for the issues encountered with such spreadsheets, and may also be adapted to other data as well as fields.
 20 | 
 21 | First, we attach the packages to be used in the vignette: 
 22 | 
 23 | ```{r message = FALSE}
 24 | library(dplyr)
 25 | library(stringr)
 26 | library(forcats)
 27 | library(ggplot2)
 28 | library(knitr)
 29 | library(datplot)
 30 | library(ggridges)
 31 | library(reshape2)
 32 | ```
 33 | 
 34 | 
 35 | # The "Inscriptions of Bithynia" data set
 36 | 
 37 | The manually curated data that B. Weissova prepared for her dissertation was saved as an Excel-spreadsheet. In order to comply with CRAN, we slightly reformatted it into a csv-file containing only ASCII-characters. We continue from this file, which is available [in this repositories](https://github.com/lsteinmann/datplot) "inst/extdata/"-sub-directory. It can be loaded into R without further complications using the `read.csv()`-function. 
 38 | 
 39 | *Please note:* Since the conversion of the original Excel file to CRAN-compatible ASCII-data this script does not convert all data anymore. I apologize for not updating this script, but it seems too irrelevant to spent more time on it than I already have, since the package contains the clean Data already. I am keeping this as a lesson that data is complicated, and that things change.
 40 | 
 41 | ```{r }
 42 | inscriptions <- read.csv(system.file("extdata",
 43 |                                      "Bithynia_Inscriptions_ascii.csv",
 44 |                                      package = "datplot"))
 45 | summary(inscriptions)
 46 | ```
 47 | 
 48 | A large amount of the initial data was provided by the [Searchable Greek Inscriptions Tool of the Packard Humanities Institute](https://inscriptions.packhum.org/) and the [Epigraphische Datenbank Heidelberg](https://edh-www.adw.uni-heidelberg.de/home). Those inscriptions (n = `r nrow(inscriptions[-is.na(inscriptions$ikey), ])`) can be referenced in the databases with their `ikey`. The data set was supplemented with `r nrow(inscriptions[is.na(inscriptions$ikey), ])` inscriptions that were manually gathered from different sources. The relevant citations are to be found in the `Source` column. Additional information on the creation and curation of this data set can be found in the publication mentioned above [@weissova2019]. 
 49 | 
 50 | The original file consists of five columns which each row representing a single inscription: `ikey` contains the reference to the [Searchable Greek Inscriptions Tool of the Packard Humanities Institute](https://inscriptions.packhum.org/), indicated via `PH` as a prefix, or to the [Epigraphische Datenbank Heidelberg](https://edh-www.adw.uni-heidelberg.de/home), indicated via `HD` as a prefix. `Location` refers to the find spot of the inscription. `Source` states the source of the data. `Chronological Frame` contains the dating in a verbose format, such as "Roman Imperial Period". `Language` records the language in which the inscription was written, which can either be Latin, Greek, or both. 
 51 | 
 52 | 
 53 | # Data Preparation, Cleaning and Reformatting
 54 | 
 55 | The data set is not yet suited for analysis, as some variables, especially the chronological frame, have many inconsistencies. For further processing, we should also be sure to include an identifier-column. As `r nrow(inscriptions) - length(unique(inscriptions$ikey))` inscriptions do not have an ikey-Value, which might have otherwise been a good candidate for identification, we chose to create a new automatically generated ID, so that every inscription can be individually identifiable.
 56 | 
 57 | ```{r }
 58 | inscriptions$ID <- paste("I_", seq_len(nrow(inscriptions)), sep = "")
 59 | ```
 60 | 
 61 | Two of the variables of this data set are almost ready for further use, i.e. Location and Language. A look at their unique values reveals only small inconsistencies that can be easily fixed:
 62 | 
 63 | ```{r }
 64 | unique(inscriptions$Location)
 65 | unique(inscriptions$Language)
 66 | ```
 67 | 
 68 | Using functions from the `tidyverse` package family, we can easily transform the columns. We rename the "Chronological Frame" to `Dating`, as shorter names without spaces are more convenient to work with, and add proper NA-values if there is no chronological assessment. With `mutate()` and `replace()` we also clear out the redundant variable values from `Location` and `Language`:
 69 | 
 70 | ```{r message=FALSE}
 71 | inscriptions <- inscriptions %>%
 72 |   rename(Dating = Chronological.Frame) %>%
 73 |   mutate(Dating = na_if(Dating, "---"),
 74 |          Language = replace(Language, Language == "Gr/Lat", "Greek/Latin"),
 75 |          Language = replace(Language, Language == "Gr / Lat", "Greek/Latin"),
 76 |          Language = factor(Language, levels = c("Greek", "Latin",
 77 |                                                 "Greek/Latin")),
 78 |          Location = replace(Location, str_detect(Location, "unknown"),
 79 |                             "unknown"),
 80 |          Location = replace(Location,
 81 |                             Location == "Prusias ad Mare (Keramed)",
 82 |                             "Prusias ad Mare"),
 83 |          Location = factor(Location))
 84 | ```
 85 | 
 86 | 
 87 | This conversion leaves us with a more compact overview of the data sets contents:
 88 | 
 89 | ```{r}
 90 | summary(inscriptions)
 91 | ```
 92 | 
 93 | ## Cleaning up the Dating-variable
 94 | 
 95 | Some of the values in the `Dating`-variable contain question signs, indicating an uncertainty of the chronological frames assessment. To keep this information, we store it in a new variable `uncertain_dating`, which contains `TRUE` if there was a question mark in the original assessment and `FALSE` if the dating was certain, so that if one wishes the additional information could later be used to select or exclude uncertain values. 
 96 | 
 97 | ```{r }
 98 | inscriptions$uncertain_dating <- FALSE
 99 | sel <- grep("\\?", inscriptions$Dating)
100 | inscriptions$uncertain_dating[sel] <- TRUE
101 | inscriptions$Dating <- gsub("\\?", "", inscriptions$Dating)
102 | ```
103 | 
104 | 
105 | ### Creating a Concordance for Periods
106 | 
107 | The next step is sorting out values from the `Dating` variable that have to be manually entered, such as the "Roman Imperial Period". We achieve this by excluding all values that contain a number, preparing a table in which we can manually enter the desired dating span and saving it as a .csv-file. After adding the values manually, we reload the file. The manually edited concordance can be found in the "inst/extdata/"-sub-directory [of this repository](https://github.com/lsteinmann/datplot) in the file `periods_edit.csv` and corresponds with the chronological assessment of periods used in the original publication [@weissova2019, 42].
108 | 
109 | ```{r }
110 | sel <- grepl("[0-9]", inscriptions$Dating)
111 | periods <- data.frame("Dating" = unique(inscriptions$Dating[which(sel == FALSE)]))
112 | periods$DAT_min <- NA
113 | periods$DAT_max <- NA
114 | #write.csv(periods, file = "../data-raw/periods.csv", fileEncoding = "UTF-8")
115 | # .... Manual editing of the resulting table, saving it as "periods_edit.csv".
116 | join_dating <- read.csv(file = system.file('extdata', 'periods_edit.csv',
117 |                                            package = 'datplot',
118 |                                            mustWork = TRUE),
119 |                         row.names = 1,
120 |                         colClasses = c("character", "character",
121 |                                        "integer", "integer"),
122 |                         encoding = "UTF-8")
123 | ```
124 | 
125 | It is of course possible to add the corresponding values in R. Since the process could hardly be automated, this way seemed more efficient to us, though it is - sadly - less transparent in this vignette. The values can be examined by the reader in the csv-Table mentioned above, or by loading the table via `system.file()` as seen in the code chunk.
126 | 
127 | ### Reformatting of Partially Numerical Dating Values
128 | 
129 | There remains, however, a large amount of values that is not covered in this concordance. We can easily automate some of the conversions as a series of steps that we store in another `data.frame` called `num_dating`, encompassing all the unique values that contain some form of a numerical dating.
130 | 
131 | ```{r }
132 | num_dating <- data.frame("Dating" = unique(inscriptions$Dating[which(sel == TRUE)]))
133 | num_dating$DAT_min <- NA
134 | num_dating$DAT_max <- NA
135 | ```
136 | 
137 | First, there is a number of inscriptions dated to a single year. We select these using a regular expression with `grep()`[^1] and can simply delete the character-part of the values so that only the specified year remains and is stored in both of the `DAT_`-variables. We do this separately for AD and BC-values since BC needs to be stored as a negative number.
138 | 
139 | [^1]: For information and guidance on regular expressions see e.g. [https://regex101.com/](https://regex101.com/). However, R has a slightly different handling of escape characters. Information can be found in the R-Documentation under `?regex` or [here](https://stat.ethz.ch/R-manual/R-devel/library/base/html/regex.html).
140 | 
141 | 
142 | ```{r }
143 | sel <- grep("^[0-9]{1,3} AD$", num_dating$Dating)
144 | num_dating$DAT_min[sel] <- gsub(" AD", "", num_dating$Dating[sel]) 
145 | num_dating$DAT_max[sel] <- gsub(" AD", "", num_dating$Dating[sel]) 
146 | sel <- grep("^[0-9]{1,3} BC$", num_dating$Dating)
147 | num_dating$DAT_min[sel] <- paste("-", gsub(" BC", "", num_dating$Dating[sel]), 
148 |                                  sep = "")
149 | num_dating$DAT_max[sel] <- paste("-", gsub(" BC", "", num_dating$Dating[sel]), 
150 |                                  sep = "")
151 | ```
152 | 
153 | As a demonstration, this is the resulting table (`num_dating`) of up to this point converted values:
154 | 
155 | ```{r echo = FALSE}
156 | require(knitr)
157 | knitr::kable(na.omit(na.omit(num_dating)[sample(seq_len(nrow(na.omit(num_dating))), 
158 |                                                 10), ]))
159 | ```
160 | 
161 | Since we frequently check the values in `num_dating` to look for errors, we append the finished rows to `join_dating`, which we later use as a look-up-table for our data set, and remove the finished rows from `num_dating`.
162 | 
163 | ```{r }
164 | join_dating <- rbind(join_dating, num_dating[!is.na(num_dating$DAT_min), ])
165 | num_dating <- num_dating[which(is.na(num_dating$DAT_min)), ]
166 | ```
167 | 
168 | We have to convert the `Dating` variable from `factor` to `character` now, so we can use the `strsplit()`-function on the values:
169 | 
170 | ```{r }
171 | num_dating$Dating <- as.character(num_dating$Dating)
172 | ```
173 | 
174 | As some of the values are in the format year-year, e.g. "150-160 AD", we can easily grab both numbers from the string. To achieve this, we select all the rows containing the relevant format, then loop over each of these rows and split the character string along either "-" or spaces, and later "/", as we chose to treat values of the format "198/198 AD" as "197 - 198 AD". Selecting the numerical values from the resulting list according to their position gives us the desired values for `DAT_min` and `DAT_max`. We need to do the same for the BC-values to make sure they return as negative numbers.
175 | 
176 | ```{r }
177 | # Values like: 92-120 AD
178 | sel <- grep("^[0-9]{1,3}-[0-9]{1,3} AD", num_dating$Dating)
179 | for (r in sel) {
180 |   split <- strsplit(x = num_dating$Dating[r], split = "-| ")
181 |   num_dating$DAT_min[r] <- split[[1]][1]
182 |   num_dating$DAT_max[r] <- split[[1]][2]
183 | }
184 | # Values like: AD 92-120
185 | sel <- grep("^AD [0-9]{1,3}-[0-9]{1,3}$", num_dating$Dating)
186 | for (r in sel) {
187 |   split <- strsplit(x = num_dating$Dating[r], split = "-| ")
188 |   num_dating$DAT_min[r] <- split[[1]][2]
189 |   num_dating$DAT_max[r] <- split[[1]][3]
190 | }
191 | # Values like: AD 92 - 120
192 | sel <- grep("^AD [0-9]{1,3} - [0-9]{1,3}", num_dating$Dating)
193 | for (r in sel) {
194 |   split <- strsplit(x = num_dating$Dating[r], split = " - | ")
195 |   num_dating$DAT_min[r] <- split[[1]][2]
196 |   num_dating$DAT_max[r] <- split[[1]][3]
197 | }
198 | # Values like: 198/199 AD
199 | sel <- grep("^[0-9]{1,3}/[0-9]{1,3} AD", num_dating$Dating)
200 | for (r in sel) {
201 |   split <- strsplit(x = num_dating$Dating[r], split = "/| ")
202 |   num_dating$DAT_min[r] <- split[[1]][1]
203 |   num_dating$DAT_max[r] <- split[[1]][2]
204 | }
205 | # Values like: 525-75 BC
206 | sel <- grep("^[0-9]{1,3}-[0-9]{1,3} BC", num_dating$Dating)
207 | for (r in sel) {
208 |   split <- strsplit(x = num_dating$Dating[r], split = "-| ")
209 |   num_dating$DAT_min[r] <- 0 - as.numeric(split[[1]][1])
210 |   num_dating$DAT_max[r] <- 0 - as.numeric(split[[1]][2])
211 | }
212 | ```
213 | 
214 | Another look at the data set can help us to check for possible errors.
215 | 
216 | ```{r echo = FALSE}
217 | knitr::kable(na.omit(na.omit(num_dating)[sample(seq_len(nrow(na.omit(num_dating))),
218 |                                                 10), ]))
219 | ```
220 | 
221 | Putting aside the finished values again makes it easier to spot errors in the process:
222 | 
223 | ```{r }
224 | join_dating <- rbind(join_dating, num_dating[!is.na(num_dating$DAT_min), ])
225 | num_dating <- num_dating[which(is.na(num_dating$DAT_min)), ]
226 | ```
227 | 
228 | ### Reformatting of Inscriptions Dated to Complete Centuries
229 | 
230 | Next, we separate values that identify complete centuries. As we want to express the dating in absolute numbers, we convert "1st c. AD" to a time span ranging from 0 to 99, and "1st c. BC" to -99 to 0 respectively. The regular expression selects all values, where a single number in the beginning of the string is followed by two letters (i.e. 2nd, 3rd, 1st) and "c. AD" resp. "c. BC". We subtract one and multiply the number by 100 to get the respective boundaries, again taking care to treat AD and BC differently.
231 | 
232 | ```{r }
233 | sel <- grep("^[0-9]{1}[a-z]{2} c\\. AD$", num_dating$Dating)
234 | for (r in sel) {
235 |   split <- strsplit(x = num_dating$Dating[r], split = "[a-z]{2} c\\.")
236 |   split <- as.numeric(split[[1]][1])
237 |   num_dating$DAT_min[r] <- ((split - 1) * 100)
238 |   num_dating$DAT_max[r] <- ((split - 1) * 100) + 99
239 | }
240 | 
241 | sel <- grep("^[0-9]{1}[a-z]{2} c\\. BC$", num_dating$Dating)
242 | for (r in sel) {
243 |   split <- strsplit(x = num_dating$Dating[r], split = "[a-z]{2} c\\.")
244 |   split <- as.numeric(split[[1]][1])
245 |   num_dating$DAT_min[r] <- 0-(split * 100) + 1
246 |   num_dating$DAT_max[r] <- 0-((split - 1) * 100)
247 | }
248 | 
249 | ```
250 | ```{r echo = FALSE}
251 | knitr::kable(na.omit(na.omit(num_dating)[sample(seq_len(nrow(na.omit(num_dating))), 
252 |                                                 10), ]))
253 | ```
254 | 
255 | Again, putting aside the finished values makes it easier to spot errors in further processing:
256 | 
257 | ```{r }
258 | join_dating <- rbind(join_dating, num_dating[!is.na(num_dating$DAT_min), ])
259 | num_dating <- num_dating[which(is.na(num_dating$DAT_min)), ]
260 | ```
261 | 
262 | ### Reformatting of Imprecise Dating Values
263 | 
264 | For dates that are around a certain value, i.e. of the format "ca. 190 AD", we are not able to make a more informed decision than guess about what the researchers providing this assessment had in mind. This might also change from inscription to inscription. While a closer look at the individual inscriptions may yield a more sensible estimate, this seems not feasible as part of the data preparation process. It seems that in most cases, if the dating can be as precise as a span of around 10 years, researchers tend to emphasize this. Therefore, we decided to take a total span of 20 years, i.e. 10 years before and 10 years after the mentioned value, reflecting some uncertainty on the precision and duration of the estimate. "ca. 190 AD" thus becomes "180--200 AD", with the same mechanism for BC in negative values.
265 | 
266 | 
267 | ```{r }
268 | sel <- grep("^ca\\. [0-9]{1,3} AD$", num_dating$Dating)
269 | for (r in sel) {
270 |   split <- strsplit(x = num_dating$Dating[r], split = " ")
271 |   split <- as.numeric(split[[1]][2])
272 |   num_dating$DAT_min[r] <- split - 10
273 |   num_dating$DAT_max[r] <- split + 10
274 | }
275 | sel <- grep("^ca\\. [0-9]{1,3} BC$", num_dating$Dating)
276 | for (r in sel) {
277 |   split <- strsplit(x = num_dating$Dating[r], split = " ")
278 |   split <- 0-as.numeric(split[[1]][2])
279 |   num_dating$DAT_min[r] <- split - 10
280 |   num_dating$DAT_max[r] <- split + 10
281 | }
282 | ```
283 | ```{r echo = FALSE}
284 | knitr::kable(na.omit(na.omit(num_dating)))
285 | ```
286 | 
287 | ### Creating a Second Concordance for Verbose Chronological Assessments
288 | 
289 | Again, saving the finished values in `join_dating` leaves us with the list of not yet converted values as seen in `num_dating`s `Dating`-variable.
290 | 
291 | ```{r }
292 | join_dating <- rbind(join_dating, num_dating[!is.na(num_dating$DAT_min), ])
293 | num_dating <- num_dating[which(is.na(num_dating$DAT_min)), ]
294 | unique(num_dating$Dating)[1:20]
295 | ```
296 | 
297 | Due to the heterogeneous nature of these `r nrow(num_dating)` values, we decided to convert them manually again. Our criteria for translating terms like "beginning of" are the following: "beginning of", "end of", "early", "late" are -- similarly to ca. -- all translated to 20 years, as we assume that more information would have been given if the time span was greater than a quarter century. For the other values, we employed the same criteria as seen above in the automated process. We decided to measure the 'beginning of a century' at e.g. -199 for BC values and 100 for AD values, and accordingly identify the 'end of a century' with the values e.g. -100 (BC) / 199 (AD). Since the data is epigraphical, in the case of "before" or "after" (i.e. terminus ante/post quem) dates, we assume some connection to a datable event, and therefore add or subtract a span of 10 years, which is, however, still rather arbitrary. 
298 | 
299 | As the last step, we switch the -- in some cases automatically assigned -- year 0, so that it will be treated as either 1 or -1:
300 | ```{r}
301 | join_dating$DAT_min[which(join_dating$DAT_min == 0)] <- 1
302 | join_dating$DAT_max[which(join_dating$DAT_max == 0)] <- -1
303 | ```
304 | 
305 | We then have to reload the corrected data. The values that we put aside in the `join_dating` `data.frame` beforehand serve as the basis for our new `data.frame` that we use as a look-up table. As the variables were treated as character-strings, we need to convert them to a numeric format first. We append the newly loaded manually assigned values to this data.frame. Again, to keep this process as transparent as possible, we included the *.csv-Table in the "inst/extdata/" sub-directory as "num_dating_edit.csv".
306 | 
307 | ```{r }
308 | #write.csv(num_dating, file = "../data-raw/num_dating.csv", 
309 | #          fileEncoding = "UTF-8")
310 | num_dating <- read.csv(file = system.file('extdata', 'num_dating_edit.csv',
311 |                                           package = 'datplot', mustWork = TRUE), 
312 |                        encoding = "UTF-8",
313 |                        row.names = 1,
314 |                        colClasses = c("character", "character",
315 |                                       "integer", "integer"))
316 | join_dating <- join_dating %>%
317 |   mutate(DAT_min = as.integer(DAT_min),
318 |          DAT_max = as.integer(DAT_max)) %>%
319 |   rbind(num_dating)
320 | ```
321 | 
322 | 
323 | ## Joining the Reformatted Data with the Original Data Set
324 | 
325 | `left_join()` lets us add the `DAT_`-variables from the look-up table `join_dating` to our original data.frame.
326 | 
327 | ```{r }
328 | inscriptions <- left_join(inscriptions, join_dating, by = "Dating")
329 | ```
330 | 
331 | The `DAT_`-variables -- as we can now see -- contain the desired information: 
332 | 
333 | ```{r echo = FALSE}
334 | knitr::kable(na.omit(inscriptions)[sample(nrow(na.omit(inscriptions)), 15),
335 |                                    c(6, 2, 4, 8, 9)])
336 | ```
337 | 
338 | ## Fixing mistakes
339 | 
340 | We can now make a 'test run' using datplot with the stepsize-value set to 5 in order to save as much time as possible[^2] and check for errors first. We have to select exactly 4 columns in the correct order of: Identifier, Grouping Variable, Minimum Dating, Maximum Dating. We transform the output from the pipe operator to a `data.frame` before handing it to `datstepts()`, as **datplot** needs this format.
341 | 
342 | 
343 | [^2]: Using different stepsizes is a bit of an artifact left over from a less efficient version of datplot, where is processing would take more than a minute if stepsize was set to 1, so I would usually start out with 100 or 25 just to look for errors and problems and to not lose much time. This, however, is technically not necessary in any way. The function now works faster, though there is a lot of room for improvement, as it may still take several seconds or longer, but generally not several minutes. We determined that the feature might still be useful, and therefore it was not removed.
344 | 
345 | ```{r warning = TRUE, message = TRUE, out.lines = 20}
346 | inscr_steps <- inscriptions %>%
347 |   select(ID, Location, DAT_min, DAT_max) %>%
348 |   na.omit() %>%
349 |   as.data.frame() %>%
350 |   datplot::datsteps(stepsize = 5)
351 | ```
352 | 
353 | There are indeed problems in the data. The first of the three warnings issued by `datsteps()` informs us that we may have assigned some values wrong: "Warning: Dating seems to be in wrong order at ID I_1162, I_2725 (Index: 637, 1458). Dates have been switched, but be sure to check your original data for possible mistakes." If dates are in the wrong order, **datplot** will automatically switch them before proceeding. This, however, might not always be the correct way of handling the situation, as other errors might have occurred as well. Therefore, we should check the data again using the Index-Values or IDs provided by datplots warning: 
354 | 
355 | ```{r }
356 | inscriptions %>%
357 |   select(ID, Location, Dating, uncertain_dating, DAT_min, DAT_max) %>%
358 |   na.omit() %>%
359 |   slice(637, 1458) %>%
360 |   kable()
361 | ```
362 | 
363 | The problem here is twofold. In the first entry, our automated translation of the dating could not handle the format "62/3" and thus returned a wrong value for `DAT_max`. If we had a truly large data set, we should correct this in the original reformatting process. In this case, it is more efficient to fix this problem right now in the fastest possible way.
364 | 
365 | ```{r }
366 | inscriptions[which(inscriptions$ID == "I_1162"),"DAT_max"] <- 63
367 | ```
368 | 
369 | In the other case, the Dating-column provided the BC-values in the wrong order. We can also fix this quickly right here, which is basically the same way **datplot** would handle both cases internally:
370 | 
371 | ```{r }
372 | inscriptions[which(inscriptions$ID == "I_2725"),c("DAT_min", "DAT_max")] <-
373 |   inscriptions[which(inscriptions$ID == "I_2725"),c("DAT_max", "DAT_min")]
374 | ```
375 | 
376 | Note that `datsteps()` changes the date and proceeds, but warns you of the possibility of errors. It is therefore recommended to check the data mentioned in that warning. As we 'fixed' our data set, we should save it again, so that the same error will not occur twice:
377 | 
378 | ```{r eval = FALSE}
379 | #write.table(inscriptions, file = "../data-raw/inscriptions.csv", 
380 | #            fileEncoding = "UTF-8", sep = ";", row.names = FALSE)
381 | ```
382 | 
383 | 
384 | ## Storing the Prepared Data Set
385 | 
386 | For later use and publication, we add explanations of the variables and metadata to the R-object and store them in the *.rda file. As a backup, we also save the finished table as a .csv-file, suitable for archiving. The .csv-Table, however, does not contain the additional information added as attributes. 
387 | 
388 | All files are available [in this repository](https://github.com/lsteinmann/datplot) and as part of this package for further use by other researchers. We kindly ask you to cite @weissova2019, the Packard Humanities Institute and this repository at [https://github.com/lsteinmann/datplot](https://github.com/lsteinmann/datplot) as sources.
389 | 
390 | The dataset can be loaded into R by simply calling
391 | ```{r}
392 | #library(datplot)
393 | data("Inscr_Bithynia")
394 | ```
395 | when `datplot` is loaded.
396 | 
397 | # Selecting the Data for Further Analysis and using datplot
398 | 
399 | As our aim is to get an overview of the spatio-temporal distribution of the inscriptions in Bithynia, we can only analyze inscriptions with known Location and Dating. Thus, we have to remove all the rows from the data set that do not contain this information.
400 | 
401 | ```{r }
402 | inscr_clean <- Inscr_Bithynia %>%
403 |   filter(Dating != "NA",
404 |          Location != "unknown") %>%
405 |   droplevels()
406 | ```
407 | 
408 | This means that we removed a total of `r nrow(inscriptions) - nrow(inscr_clean)` rows, which did not contain the information needed. The data set suitable for the analysis can be summarized as follows:
409 | 
410 | ```{r }
411 | summary(inscr_clean)
412 | ```
413 | 
414 | # datplot
415 | 
416 | We can now begin using the **datplot**-package... 
417 | 
418 | ```{r eval = FALSE}
419 | library(datplot)
420 | ```
421 | 
422 | ...and can actually try to use `datsteps()`, first with a `stepsize` of 25 years:
423 | 
424 | ```{r warning=TRUE, out.lines = 13}
425 | inscr_steps <- inscr_clean %>%
426 |   select(ID, Location, DAT_min, DAT_max) %>%
427 |   as.data.frame() %>%
428 |   datplot::datsteps(stepsize = 25) %>%
429 |   datplot::scaleweight(var = 2)
430 | ```
431 | 
432 | `datsteps()` tells us that a number or objects are dated to one year, asking us if this is correct. We included this output to avoid errors occurring through faulty values, as objects dated to one year have a very high impact on the outcome. It might help to spot problems or discrepancies in datasets as well, as the warning about the order of dating above has already shown.
433 | 
434 | In a second step, we scale the weights according to our grouping variable (Location), so that the sum of weights in each group equals 1, which is important for displaying the weight correctly in a density plot.
435 | 
436 | # Visualizing the Output of `datsteps()`
437 | 
438 | To get a general impression of the data set and the possibilities of visualization, we explore and recommend different plot methods below. The simplest and fastest way to plot the output with base R is the `plot()` function in combination with the `density()` function: 
439 | 
440 | ```{r}
441 | plot(density(inscr_steps$DAT_step))
442 | ```
443 | 
444 | ## Using `ggplot2`
445 | 
446 | A somewhat crowded overview containing the Locations of all Inscriptions can easily be achieved when using `ggplot2` and its `geom_density()`-method, which is based on the same procedure as the `density()`-function used above. A result may look like this:
447 | 
448 | ```{r }
449 | ggplot(data = inscr_steps, aes(x = DAT_step, fill = variable,
450 |                                weight = weight)) +
451 |   geom_density(alpha = 0.3)
452 | ```
453 | 
454 | Note that the output without the weights calculated by `datsteps()` is very different: 
455 | 
456 | ```{r }
457 | ggplot(data = inscr_steps, aes(x = DAT_step, fill = variable)) +
458 |   geom_density(alpha = 0.3)
459 | ```
460 | 
461 | The weights calculated by **datplot** (internally using the `get.weights()`-function) are a way of assigning an importance to the objects in question. They relate to the range an object is dated to (more detailed information on the process is available in [@datplotarticle]). Therefore, any objects dated to large time spans as for example "Roman Imperial Period" ranging from 31 BC to 395 AD in our data set, are contributing to the curve significantly less than any object dated to one exact year. At times it can be very useful to look at both outputs and discuss them separately. We will look at a case study of a single city later on to clarify this.
462 | 
463 | 
464 | ## Using `ggplot2` and `ggridges`
465 | 
466 | Since the graph is very crowded in this layout, we actually recommend using `geom_density_ridges` from the `ggridges`-package. Since the package has no built in support for assigning weight, we have to use `stat = "density"`, which will reference the `density()`-function to get the appropriate calculations:
467 | 
468 | ```{r}
469 | ggplot(data = inscr_steps,
470 |        aes(x = DAT_step,
471 |            y = fct_rev(as_factor(variable)),
472 |            fill = variable,
473 |            weight = weight)) +
474 |   geom_density_ridges(aes(height = after_stat(density)),
475 |                       stat = "density", alpha = 0.9) +
476 |   scale_fill_discrete(guide = FALSE)
477 | ```
478 | Styling will help to make the plots more readable:
479 | 
480 | ```{r }
481 | bluegreen <- colorRampPalette(c("#8dae25", "#17365c"))
482 | 
483 | ggplot(data = inscr_steps,
484 |        aes(x = DAT_step,
485 |            y = fct_rev(as_factor(variable)),
486 |            fill = variable,
487 |            weight = weight)) +
488 |   geom_density_ridges(aes(height = after_stat(density)), stat = "density", alpha = 0.9) +
489 |   scale_x_continuous(breaks = seq(from = -800, to = 800, by = 100),
490 |                      limits = c(-800,800), name = "") +
491 |   geom_vline(xintercept = 0, alpha = 0.5, lwd = 1) +
492 |   theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
493 |         panel.background = element_blank(),
494 |         panel.grid.major.x = element_line(linetype = "dashed",
495 |                                           color = "gray30"),
496 |         panel.grid.minor.x = element_line(linetype = "dotted",
497 |                                           color = "gray80")) +
498 |   scale_fill_manual(guide=FALSE,
499 |                     values = bluegreen(length(unique(inscr_steps$variable)))) +
500 |   labs(title = "Epigraphic Evidence from Bithynia",
501 |        subtitle = "Spatio-temporal distribution",
502 |        y = "administrative centres",
503 |        caption = attributes(inscriptions)$source)
504 | ```
505 | 
506 | ## Using `ggplot2` and `facet_wrap()`
507 | 
508 | Another option is to separate the variables with `facet_wrap`, which is not as condensed and does not support the option to scale the densities in the graph for maximum visibility, as `geom_density_ridges()` automatically does (see Documentation of `ggridges`). In our case, this leads to a worse visibility of smaller density curves:
509 | 
510 | ```{r fig.height = 10}
511 | ggplot(data = inscr_steps, aes(x = DAT_step, 
512 |                                fill = variable, weight = weight)) +
513 |   geom_density(alpha = 0.9) +
514 |   scale_fill_discrete(guide = FALSE) +
515 |   facet_wrap(variable ~ ., ncol = 1)
516 | ```
517 | 
518 | Styling also helps very much to improve the graphs readability:
519 | 
520 | ```{r fig.height = 10}
521 | ggplot(data = inscr_steps, aes(x = DAT_step,
522 |                                fill = variable, weight = weight)) +
523 |   geom_density(alpha = 0.9) +
524 |   theme(panel.background = element_blank()) +
525 |   scale_fill_manual(guide=FALSE,
526 |                     values = bluegreen(length(unique(inscr_steps$variable)))) +
527 |   scale_x_continuous(breaks = seq(from = -800, to = 800, by = 100),
528 |                      limits = c(-800,800), name = "") +
529 |   facet_wrap(variable ~ ., ncol = 1) +
530 |   theme(strip.text.x = element_text(size=8),
531 |         strip.background = element_blank(),
532 |         axis.text.x = element_text(angle = 90, vjust = 0.5),
533 |         axis.text.y = element_blank(),
534 |         axis.ticks.y = element_blank(),
535 |         axis.title.y = element_blank(),
536 |         panel.background = element_blank(),
537 |         panel.grid.major.x = element_line(linetype = "dashed",
538 |                                           color = "gray30"),
539 |         panel.grid.minor.x = element_line(linetype = "dotted",
540 |                                           color = "gray80")) +
541 |   labs(title = "Epigraphic Evidence from Bithynia",
542 |        subtitle = "Spatio-temporal distribution",
543 |        caption = attributes(inscriptions)$source)
544 | ```
545 | 
546 | # How Does Datplot and Kernel Density Estimation Perform Compared to Histograms?
547 | 
548 | In order to compare this output to generally more common visualizations, we can prepare a histogram of the original data. Here we have two possible approaches. We first prepare a two-part histogram for the upper and lower boundaries of the respective dating for each object. We select only three locations to keep the visualization short:
549 | 
550 | ```{r  fig.height = 6}
551 | inscr_clean %>% 
552 |   select(ID, Location, DAT_min, DAT_max) %>%
553 |   filter(Location %in% c("Prusias ad Hypium", "Nicomedia", "Apamea")) %>%
554 |   reshape2::melt(id.vars = c("ID", "Location")) %>%
555 |   ggplot(aes(x = value, fill = variable)) +
556 |   geom_histogram(binwidth = 25, position = "dodge") +
557 |   facet_wrap(. ~ Location, ncol = 1) +
558 |   labs(title = "Distribution of Dated Inscriptions in Bithynia (selection)",
559 |        x = "Dating", y = "Number of Inscriptions") +
560 |   theme(legend.position = "top")
561 | ```
562 | 
563 | Alternatively, we could display the mean of the Dating range: 
564 | 
565 | ```{r  fig.height = 6}
566 | inscr_clean %>% 
567 |   transmute(ID, Location, Language, DAT_mean = ((DAT_min + DAT_max)/2)) %>%
568 |   filter(Location %in% c("Prusias ad Hypium", "Nicomedia", "Apamea")) %>%
569 |   reshape2::melt(id.vars = c("ID", "Location", "Language")) %>%
570 |   ggplot(aes(x = value, fill = Language)) +
571 |   geom_histogram(binwidth = 25) +
572 |   facet_wrap(. ~ Location, ncol = 1) +
573 |   labs(title = "Distribution of Dated Inscriptions in Bithynia (selection)",
574 |        x = "Dating", y = "Number of Inscriptions")
575 | ```
576 | 
577 | 
578 | While this also gives us an impression of the distribution, it seems some information got lost on the way. Many inscriptions were dated to large time spans, and those have now been gathered into their mean value. The upside is that the histograms show the real count of inscriptions, which density graphs cannot. 
579 | 
580 | We have shown in this vignette, how a data set as it is often encountered by archaeologists can be prepared and formatted in order to be processed by datplot. Furthermore, we have made some suggestions on visualization. This vignette hopes to help researchers that are not yet very familiar with the processes of data cleaning to solutions for their own projects.
581 | 
582 | 
583 | # References
584 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
  1 | GNU General Public License
  2 | ==========================
  3 | 
  4 | _Version 3, 29 June 2007_  
  5 | _Copyright © 2007 Free Software Foundation, Inc. &lt;<http://fsf.org/>&gt;_
  6 | 
  7 | Everyone is permitted to copy and distribute verbatim copies of this license
  8 | document, but changing it is not allowed.
  9 | 
 10 | ## Preamble
 11 | 
 12 | The GNU General Public License is a free, copyleft license for software and other
 13 | kinds of works.
 14 | 
 15 | The licenses for most software and other practical works are designed to take away
 16 | your freedom to share and change the works. By contrast, the GNU General Public
 17 | License is intended to guarantee your freedom to share and change all versions of a
 18 | program--to make sure it remains free software for all its users. We, the Free
 19 | Software Foundation, use the GNU General Public License for most of our software; it
 20 | applies also to any other work released this way by its authors. You can apply it to
 21 | your programs, too.
 22 | 
 23 | When we speak of free software, we are referring to freedom, not price. Our General
 24 | Public Licenses are designed to make sure that you have the freedom to distribute
 25 | copies of free software (and charge for them if you wish), that you receive source
 26 | code or can get it if you want it, that you can change the software or use pieces of
 27 | it in new free programs, and that you know you can do these things.
 28 | 
 29 | To protect your rights, we need to prevent others from denying you these rights or
 30 | asking you to surrender the rights. Therefore, you have certain responsibilities if
 31 | you distribute copies of the software, or if you modify it: responsibilities to
 32 | respect the freedom of others.
 33 | 
 34 | For example, if you distribute copies of such a program, whether gratis or for a fee,
 35 | you must pass on to the recipients the same freedoms that you received. You must make
 36 | sure that they, too, receive or can get the source code. And you must show them these
 37 | terms so they know their rights.
 38 | 
 39 | Developers that use the GNU GPL protect your rights with two steps: **(1)** assert
 40 | copyright on the software, and **(2)** offer you this License giving you legal permission
 41 | to copy, distribute and/or modify it.
 42 | 
 43 | For the developers' and authors' protection, the GPL clearly explains that there is
 44 | no warranty for this free software. For both users' and authors' sake, the GPL
 45 | requires that modified versions be marked as changed, so that their problems will not
 46 | be attributed erroneously to authors of previous versions.
 47 | 
 48 | Some devices are designed to deny users access to install or run modified versions of
 49 | the software inside them, although the manufacturer can do so. This is fundamentally
 50 | incompatible with the aim of protecting users' freedom to change the software. The
 51 | systematic pattern of such abuse occurs in the area of products for individuals to
 52 | use, which is precisely where it is most unacceptable. Therefore, we have designed
 53 | this version of the GPL to prohibit the practice for those products. If such problems
 54 | arise substantially in other domains, we stand ready to extend this provision to
 55 | those domains in future versions of the GPL, as needed to protect the freedom of
 56 | users.
 57 | 
 58 | Finally, every program is threatened constantly by software patents. States should
 59 | not allow patents to restrict development and use of software on general-purpose
 60 | computers, but in those that do, we wish to avoid the special danger that patents
 61 | applied to a free program could make it effectively proprietary. To prevent this, the
 62 | GPL assures that patents cannot be used to render the program non-free.
 63 | 
 64 | The precise terms and conditions for copying, distribution and modification follow.
 65 | 
 66 | ## TERMS AND CONDITIONS
 67 | 
 68 | ### 0. Definitions
 69 | 
 70 | “This License” refers to version 3 of the GNU General Public License.
 71 | 
 72 | “Copyright” also means copyright-like laws that apply to other kinds of
 73 | works, such as semiconductor masks.
 74 | 
 75 | “The Program” refers to any copyrightable work licensed under this
 76 | License. Each licensee is addressed as “you”. “Licensees” and
 77 | “recipients” may be individuals or organizations.
 78 | 
 79 | To “modify” a work means to copy from or adapt all or part of the work in
 80 | a fashion requiring copyright permission, other than the making of an exact copy. The
 81 | resulting work is called a “modified version” of the earlier work or a
 82 | work “based on” the earlier work.
 83 | 
 84 | A “covered work” means either the unmodified Program or a work based on
 85 | the Program.
 86 | 
 87 | To “propagate” a work means to do anything with it that, without
 88 | permission, would make you directly or secondarily liable for infringement under
 89 | applicable copyright law, except executing it on a computer or modifying a private
 90 | copy. Propagation includes copying, distribution (with or without modification),
 91 | making available to the public, and in some countries other activities as well.
 92 | 
 93 | To “convey” a work means any kind of propagation that enables other
 94 | parties to make or receive copies. Mere interaction with a user through a computer
 95 | network, with no transfer of a copy, is not conveying.
 96 | 
 97 | An interactive user interface displays “Appropriate Legal Notices” to the
 98 | extent that it includes a convenient and prominently visible feature that **(1)**
 99 | displays an appropriate copyright notice, and **(2)** tells the user that there is no
100 | warranty for the work (except to the extent that warranties are provided), that
101 | licensees may convey the work under this License, and how to view a copy of this
102 | License. If the interface presents a list of user commands or options, such as a
103 | menu, a prominent item in the list meets this criterion.
104 | 
105 | ### 1. Source Code
106 | 
107 | The “source code” for a work means the preferred form of the work for
108 | making modifications to it. “Object code” means any non-source form of a
109 | work.
110 | 
111 | A “Standard Interface” means an interface that either is an official
112 | standard defined by a recognized standards body, or, in the case of interfaces
113 | specified for a particular programming language, one that is widely used among
114 | developers working in that language.
115 | 
116 | The “System Libraries” of an executable work include anything, other than
117 | the work as a whole, that **(a)** is included in the normal form of packaging a Major
118 | Component, but which is not part of that Major Component, and **(b)** serves only to
119 | enable use of the work with that Major Component, or to implement a Standard
120 | Interface for which an implementation is available to the public in source code form.
121 | A “Major Component”, in this context, means a major essential component
122 | (kernel, window system, and so on) of the specific operating system (if any) on which
123 | the executable work runs, or a compiler used to produce the work, or an object code
124 | interpreter used to run it.
125 | 
126 | The “Corresponding Source” for a work in object code form means all the
127 | source code needed to generate, install, and (for an executable work) run the object
128 | code and to modify the work, including scripts to control those activities. However,
129 | it does not include the work's System Libraries, or general-purpose tools or
130 | generally available free programs which are used unmodified in performing those
131 | activities but which are not part of the work. For example, Corresponding Source
132 | includes interface definition files associated with source files for the work, and
133 | the source code for shared libraries and dynamically linked subprograms that the work
134 | is specifically designed to require, such as by intimate data communication or
135 | control flow between those subprograms and other parts of the work.
136 | 
137 | The Corresponding Source need not include anything that users can regenerate
138 | automatically from other parts of the Corresponding Source.
139 | 
140 | The Corresponding Source for a work in source code form is that same work.
141 | 
142 | ### 2. Basic Permissions
143 | 
144 | All rights granted under this License are granted for the term of copyright on the
145 | Program, and are irrevocable provided the stated conditions are met. This License
146 | explicitly affirms your unlimited permission to run the unmodified Program. The
147 | output from running a covered work is covered by this License only if the output,
148 | given its content, constitutes a covered work. This License acknowledges your rights
149 | of fair use or other equivalent, as provided by copyright law.
150 | 
151 | You may make, run and propagate covered works that you do not convey, without
152 | conditions so long as your license otherwise remains in force. You may convey covered
153 | works to others for the sole purpose of having them make modifications exclusively
154 | for you, or provide you with facilities for running those works, provided that you
155 | comply with the terms of this License in conveying all material for which you do not
156 | control copyright. Those thus making or running the covered works for you must do so
157 | exclusively on your behalf, under your direction and control, on terms that prohibit
158 | them from making any copies of your copyrighted material outside their relationship
159 | with you.
160 | 
161 | Conveying under any other circumstances is permitted solely under the conditions
162 | stated below. Sublicensing is not allowed; section 10 makes it unnecessary.
163 | 
164 | ### 3. Protecting Users' Legal Rights From Anti-Circumvention Law
165 | 
166 | No covered work shall be deemed part of an effective technological measure under any
167 | applicable law fulfilling obligations under article 11 of the WIPO copyright treaty
168 | adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention
169 | of such measures.
170 | 
171 | When you convey a covered work, you waive any legal power to forbid circumvention of
172 | technological measures to the extent such circumvention is effected by exercising
173 | rights under this License with respect to the covered work, and you disclaim any
174 | intention to limit operation or modification of the work as a means of enforcing,
175 | against the work's users, your or third parties' legal rights to forbid circumvention
176 | of technological measures.
177 | 
178 | ### 4. Conveying Verbatim Copies
179 | 
180 | You may convey verbatim copies of the Program's source code as you receive it, in any
181 | medium, provided that you conspicuously and appropriately publish on each copy an
182 | appropriate copyright notice; keep intact all notices stating that this License and
183 | any non-permissive terms added in accord with section 7 apply to the code; keep
184 | intact all notices of the absence of any warranty; and give all recipients a copy of
185 | this License along with the Program.
186 | 
187 | You may charge any price or no price for each copy that you convey, and you may offer
188 | support or warranty protection for a fee.
189 | 
190 | ### 5. Conveying Modified Source Versions
191 | 
192 | You may convey a work based on the Program, or the modifications to produce it from
193 | the Program, in the form of source code under the terms of section 4, provided that
194 | you also meet all of these conditions:
195 | 
196 | * **a)** The work must carry prominent notices stating that you modified it, and giving a
197 | relevant date.
198 | * **b)** The work must carry prominent notices stating that it is released under this
199 | License and any conditions added under section 7. This requirement modifies the
200 | requirement in section 4 to “keep intact all notices”.
201 | * **c)** You must license the entire work, as a whole, under this License to anyone who
202 | comes into possession of a copy. This License will therefore apply, along with any
203 | applicable section 7 additional terms, to the whole of the work, and all its parts,
204 | regardless of how they are packaged. This License gives no permission to license the
205 | work in any other way, but it does not invalidate such permission if you have
206 | separately received it.
207 | * **d)** If the work has interactive user interfaces, each must display Appropriate Legal
208 | Notices; however, if the Program has interactive interfaces that do not display
209 | Appropriate Legal Notices, your work need not make them do so.
210 | 
211 | A compilation of a covered work with other separate and independent works, which are
212 | not by their nature extensions of the covered work, and which are not combined with
213 | it such as to form a larger program, in or on a volume of a storage or distribution
214 | medium, is called an “aggregate” if the compilation and its resulting
215 | copyright are not used to limit the access or legal rights of the compilation's users
216 | beyond what the individual works permit. Inclusion of a covered work in an aggregate
217 | does not cause this License to apply to the other parts of the aggregate.
218 | 
219 | ### 6. Conveying Non-Source Forms
220 | 
221 | You may convey a covered work in object code form under the terms of sections 4 and
222 | 5, provided that you also convey the machine-readable Corresponding Source under the
223 | terms of this License, in one of these ways:
224 | 
225 | * **a)** Convey the object code in, or embodied in, a physical product (including a
226 | physical distribution medium), accompanied by the Corresponding Source fixed on a
227 | durable physical medium customarily used for software interchange.
228 | * **b)** Convey the object code in, or embodied in, a physical product (including a
229 | physical distribution medium), accompanied by a written offer, valid for at least
230 | three years and valid for as long as you offer spare parts or customer support for
231 | that product model, to give anyone who possesses the object code either **(1)** a copy of
232 | the Corresponding Source for all the software in the product that is covered by this
233 | License, on a durable physical medium customarily used for software interchange, for
234 | a price no more than your reasonable cost of physically performing this conveying of
235 | source, or **(2)** access to copy the Corresponding Source from a network server at no
236 | charge.
237 | * **c)** Convey individual copies of the object code with a copy of the written offer to
238 | provide the Corresponding Source. This alternative is allowed only occasionally and
239 | noncommercially, and only if you received the object code with such an offer, in
240 | accord with subsection 6b.
241 | * **d)** Convey the object code by offering access from a designated place (gratis or for
242 | a charge), and offer equivalent access to the Corresponding Source in the same way
243 | through the same place at no further charge. You need not require recipients to copy
244 | the Corresponding Source along with the object code. If the place to copy the object
245 | code is a network server, the Corresponding Source may be on a different server
246 | (operated by you or a third party) that supports equivalent copying facilities,
247 | provided you maintain clear directions next to the object code saying where to find
248 | the Corresponding Source. Regardless of what server hosts the Corresponding Source,
249 | you remain obligated to ensure that it is available for as long as needed to satisfy
250 | these requirements.
251 | * **e)** Convey the object code using peer-to-peer transmission, provided you inform
252 | other peers where the object code and Corresponding Source of the work are being
253 | offered to the general public at no charge under subsection 6d.
254 | 
255 | A separable portion of the object code, whose source code is excluded from the
256 | Corresponding Source as a System Library, need not be included in conveying the
257 | object code work.
258 | 
259 | A “User Product” is either **(1)** a “consumer product”, which
260 | means any tangible personal property which is normally used for personal, family, or
261 | household purposes, or **(2)** anything designed or sold for incorporation into a
262 | dwelling. In determining whether a product is a consumer product, doubtful cases
263 | shall be resolved in favor of coverage. For a particular product received by a
264 | particular user, “normally used” refers to a typical or common use of
265 | that class of product, regardless of the status of the particular user or of the way
266 | in which the particular user actually uses, or expects or is expected to use, the
267 | product. A product is a consumer product regardless of whether the product has
268 | substantial commercial, industrial or non-consumer uses, unless such uses represent
269 | the only significant mode of use of the product.
270 | 
271 | “Installation Information” for a User Product means any methods,
272 | procedures, authorization keys, or other information required to install and execute
273 | modified versions of a covered work in that User Product from a modified version of
274 | its Corresponding Source. The information must suffice to ensure that the continued
275 | functioning of the modified object code is in no case prevented or interfered with
276 | solely because modification has been made.
277 | 
278 | If you convey an object code work under this section in, or with, or specifically for
279 | use in, a User Product, and the conveying occurs as part of a transaction in which
280 | the right of possession and use of the User Product is transferred to the recipient
281 | in perpetuity or for a fixed term (regardless of how the transaction is
282 | characterized), the Corresponding Source conveyed under this section must be
283 | accompanied by the Installation Information. But this requirement does not apply if
284 | neither you nor any third party retains the ability to install modified object code
285 | on the User Product (for example, the work has been installed in ROM).
286 | 
287 | The requirement to provide Installation Information does not include a requirement to
288 | continue to provide support service, warranty, or updates for a work that has been
289 | modified or installed by the recipient, or for the User Product in which it has been
290 | modified or installed. Access to a network may be denied when the modification itself
291 | materially and adversely affects the operation of the network or violates the rules
292 | and protocols for communication across the network.
293 | 
294 | Corresponding Source conveyed, and Installation Information provided, in accord with
295 | this section must be in a format that is publicly documented (and with an
296 | implementation available to the public in source code form), and must require no
297 | special password or key for unpacking, reading or copying.
298 | 
299 | ### 7. Additional Terms
300 | 
301 | “Additional permissions” are terms that supplement the terms of this
302 | License by making exceptions from one or more of its conditions. Additional
303 | permissions that are applicable to the entire Program shall be treated as though they
304 | were included in this License, to the extent that they are valid under applicable
305 | law. If additional permissions apply only to part of the Program, that part may be
306 | used separately under those permissions, but the entire Program remains governed by
307 | this License without regard to the additional permissions.
308 | 
309 | When you convey a copy of a covered work, you may at your option remove any
310 | additional permissions from that copy, or from any part of it. (Additional
311 | permissions may be written to require their own removal in certain cases when you
312 | modify the work.) You may place additional permissions on material, added by you to a
313 | covered work, for which you have or can give appropriate copyright permission.
314 | 
315 | Notwithstanding any other provision of this License, for material you add to a
316 | covered work, you may (if authorized by the copyright holders of that material)
317 | supplement the terms of this License with terms:
318 | 
319 | * **a)** Disclaiming warranty or limiting liability differently from the terms of
320 | sections 15 and 16 of this License; or
321 | * **b)** Requiring preservation of specified reasonable legal notices or author
322 | attributions in that material or in the Appropriate Legal Notices displayed by works
323 | containing it; or
324 | * **c)** Prohibiting misrepresentation of the origin of that material, or requiring that
325 | modified versions of such material be marked in reasonable ways as different from the
326 | original version; or
327 | * **d)** Limiting the use for publicity purposes of names of licensors or authors of the
328 | material; or
329 | * **e)** Declining to grant rights under trademark law for use of some trade names,
330 | trademarks, or service marks; or
331 | * **f)** Requiring indemnification of licensors and authors of that material by anyone
332 | who conveys the material (or modified versions of it) with contractual assumptions of
333 | liability to the recipient, for any liability that these contractual assumptions
334 | directly impose on those licensors and authors.
335 | 
336 | All other non-permissive additional terms are considered “further
337 | restrictions” within the meaning of section 10. If the Program as you received
338 | it, or any part of it, contains a notice stating that it is governed by this License
339 | along with a term that is a further restriction, you may remove that term. If a
340 | license document contains a further restriction but permits relicensing or conveying
341 | under this License, you may add to a covered work material governed by the terms of
342 | that license document, provided that the further restriction does not survive such
343 | relicensing or conveying.
344 | 
345 | If you add terms to a covered work in accord with this section, you must place, in
346 | the relevant source files, a statement of the additional terms that apply to those
347 | files, or a notice indicating where to find the applicable terms.
348 | 
349 | Additional terms, permissive or non-permissive, may be stated in the form of a
350 | separately written license, or stated as exceptions; the above requirements apply
351 | either way.
352 | 
353 | ### 8. Termination
354 | 
355 | You may not propagate or modify a covered work except as expressly provided under
356 | this License. Any attempt otherwise to propagate or modify it is void, and will
357 | automatically terminate your rights under this License (including any patent licenses
358 | granted under the third paragraph of section 11).
359 | 
360 | However, if you cease all violation of this License, then your license from a
361 | particular copyright holder is reinstated **(a)** provisionally, unless and until the
362 | copyright holder explicitly and finally terminates your license, and **(b)** permanently,
363 | if the copyright holder fails to notify you of the violation by some reasonable means
364 | prior to 60 days after the cessation.
365 | 
366 | Moreover, your license from a particular copyright holder is reinstated permanently
367 | if the copyright holder notifies you of the violation by some reasonable means, this
368 | is the first time you have received notice of violation of this License (for any
369 | work) from that copyright holder, and you cure the violation prior to 30 days after
370 | your receipt of the notice.
371 | 
372 | Termination of your rights under this section does not terminate the licenses of
373 | parties who have received copies or rights from you under this License. If your
374 | rights have been terminated and not permanently reinstated, you do not qualify to
375 | receive new licenses for the same material under section 10.
376 | 
377 | ### 9. Acceptance Not Required for Having Copies
378 | 
379 | You are not required to accept this License in order to receive or run a copy of the
380 | Program. Ancillary propagation of a covered work occurring solely as a consequence of
381 | using peer-to-peer transmission to receive a copy likewise does not require
382 | acceptance. However, nothing other than this License grants you permission to
383 | propagate or modify any covered work. These actions infringe copyright if you do not
384 | accept this License. Therefore, by modifying or propagating a covered work, you
385 | indicate your acceptance of this License to do so.
386 | 
387 | ### 10. Automatic Licensing of Downstream Recipients
388 | 
389 | Each time you convey a covered work, the recipient automatically receives a license
390 | from the original licensors, to run, modify and propagate that work, subject to this
391 | License. You are not responsible for enforcing compliance by third parties with this
392 | License.
393 | 
394 | An “entity transaction” is a transaction transferring control of an
395 | organization, or substantially all assets of one, or subdividing an organization, or
396 | merging organizations. If propagation of a covered work results from an entity
397 | transaction, each party to that transaction who receives a copy of the work also
398 | receives whatever licenses to the work the party's predecessor in interest had or
399 | could give under the previous paragraph, plus a right to possession of the
400 | Corresponding Source of the work from the predecessor in interest, if the predecessor
401 | has it or can get it with reasonable efforts.
402 | 
403 | You may not impose any further restrictions on the exercise of the rights granted or
404 | affirmed under this License. For example, you may not impose a license fee, royalty,
405 | or other charge for exercise of rights granted under this License, and you may not
406 | initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging
407 | that any patent claim is infringed by making, using, selling, offering for sale, or
408 | importing the Program or any portion of it.
409 | 
410 | ### 11. Patents
411 | 
412 | A “contributor” is a copyright holder who authorizes use under this
413 | License of the Program or a work on which the Program is based. The work thus
414 | licensed is called the contributor's “contributor version”.
415 | 
416 | A contributor's “essential patent claims” are all patent claims owned or
417 | controlled by the contributor, whether already acquired or hereafter acquired, that
418 | would be infringed by some manner, permitted by this License, of making, using, or
419 | selling its contributor version, but do not include claims that would be infringed
420 | only as a consequence of further modification of the contributor version. For
421 | purposes of this definition, “control” includes the right to grant patent
422 | sublicenses in a manner consistent with the requirements of this License.
423 | 
424 | Each contributor grants you a non-exclusive, worldwide, royalty-free patent license
425 | under the contributor's essential patent claims, to make, use, sell, offer for sale,
426 | import and otherwise run, modify and propagate the contents of its contributor
427 | version.
428 | 
429 | In the following three paragraphs, a “patent license” is any express
430 | agreement or commitment, however denominated, not to enforce a patent (such as an
431 | express permission to practice a patent or covenant not to sue for patent
432 | infringement). To “grant” such a patent license to a party means to make
433 | such an agreement or commitment not to enforce a patent against the party.
434 | 
435 | If you convey a covered work, knowingly relying on a patent license, and the
436 | Corresponding Source of the work is not available for anyone to copy, free of charge
437 | and under the terms of this License, through a publicly available network server or
438 | other readily accessible means, then you must either **(1)** cause the Corresponding
439 | Source to be so available, or **(2)** arrange to deprive yourself of the benefit of the
440 | patent license for this particular work, or **(3)** arrange, in a manner consistent with
441 | the requirements of this License, to extend the patent license to downstream
442 | recipients. “Knowingly relying” means you have actual knowledge that, but
443 | for the patent license, your conveying the covered work in a country, or your
444 | recipient's use of the covered work in a country, would infringe one or more
445 | identifiable patents in that country that you have reason to believe are valid.
446 | 
447 | If, pursuant to or in connection with a single transaction or arrangement, you
448 | convey, or propagate by procuring conveyance of, a covered work, and grant a patent
449 | license to some of the parties receiving the covered work authorizing them to use,
450 | propagate, modify or convey a specific copy of the covered work, then the patent
451 | license you grant is automatically extended to all recipients of the covered work and
452 | works based on it.
453 | 
454 | A patent license is “discriminatory” if it does not include within the
455 | scope of its coverage, prohibits the exercise of, or is conditioned on the
456 | non-exercise of one or more of the rights that are specifically granted under this
457 | License. You may not convey a covered work if you are a party to an arrangement with
458 | a third party that is in the business of distributing software, under which you make
459 | payment to the third party based on the extent of your activity of conveying the
460 | work, and under which the third party grants, to any of the parties who would receive
461 | the covered work from you, a discriminatory patent license **(a)** in connection with
462 | copies of the covered work conveyed by you (or copies made from those copies), or **(b)**
463 | primarily for and in connection with specific products or compilations that contain
464 | the covered work, unless you entered into that arrangement, or that patent license
465 | was granted, prior to 28 March 2007.
466 | 
467 | Nothing in this License shall be construed as excluding or limiting any implied
468 | license or other defenses to infringement that may otherwise be available to you
469 | under applicable patent law.
470 | 
471 | ### 12. No Surrender of Others' Freedom
472 | 
473 | If conditions are imposed on you (whether by court order, agreement or otherwise)
474 | that contradict the conditions of this License, they do not excuse you from the
475 | conditions of this License. If you cannot convey a covered work so as to satisfy
476 | simultaneously your obligations under this License and any other pertinent
477 | obligations, then as a consequence you may not convey it at all. For example, if you
478 | agree to terms that obligate you to collect a royalty for further conveying from
479 | those to whom you convey the Program, the only way you could satisfy both those terms
480 | and this License would be to refrain entirely from conveying the Program.
481 | 
482 | ### 13. Use with the GNU Affero General Public License
483 | 
484 | Notwithstanding any other provision of this License, you have permission to link or
485 | combine any covered work with a work licensed under version 3 of the GNU Affero
486 | General Public License into a single combined work, and to convey the resulting work.
487 | The terms of this License will continue to apply to the part which is the covered
488 | work, but the special requirements of the GNU Affero General Public License, section
489 | 13, concerning interaction through a network will apply to the combination as such.
490 | 
491 | ### 14. Revised Versions of this License
492 | 
493 | The Free Software Foundation may publish revised and/or new versions of the GNU
494 | General Public License from time to time. Such new versions will be similar in spirit
495 | to the present version, but may differ in detail to address new problems or concerns.
496 | 
497 | Each version is given a distinguishing version number. If the Program specifies that
498 | a certain numbered version of the GNU General Public License “or any later
499 | version” applies to it, you have the option of following the terms and
500 | conditions either of that numbered version or of any later version published by the
501 | Free Software Foundation. If the Program does not specify a version number of the GNU
502 | General Public License, you may choose any version ever published by the Free
503 | Software Foundation.
504 | 
505 | If the Program specifies that a proxy can decide which future versions of the GNU
506 | General Public License can be used, that proxy's public statement of acceptance of a
507 | version permanently authorizes you to choose that version for the Program.
508 | 
509 | Later license versions may give you additional or different permissions. However, no
510 | additional obligations are imposed on any author or copyright holder as a result of
511 | your choosing to follow a later version.
512 | 
513 | ### 15. Disclaimer of Warranty
514 | 
515 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
516 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
517 | PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY OF ANY KIND, EITHER
518 | EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
519 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE
520 | QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
521 | DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
522 | 
523 | ### 16. Limitation of Liability
524 | 
525 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY
526 | COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS
527 | PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL,
528 | INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
529 | PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE
530 | OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE
531 | WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
532 | POSSIBILITY OF SUCH DAMAGES.
533 | 
534 | ### 17. Interpretation of Sections 15 and 16
535 | 
536 | If the disclaimer of warranty and limitation of liability provided above cannot be
537 | given local legal effect according to their terms, reviewing courts shall apply local
538 | law that most closely approximates an absolute waiver of all civil liability in
539 | connection with the Program, unless a warranty or assumption of liability accompanies
540 | a copy of the Program in return for a fee.
541 | 
542 | _END OF TERMS AND CONDITIONS_
543 | 
544 | ## How to Apply These Terms to Your New Programs
545 | 
546 | If you develop a new program, and you want it to be of the greatest possible use to
547 | the public, the best way to achieve this is to make it free software which everyone
548 | can redistribute and change under these terms.
549 | 
550 | To do so, attach the following notices to the program. It is safest to attach them
551 | to the start of each source file to most effectively state the exclusion of warranty;
552 | and each file should have at least the “copyright” line and a pointer to
553 | where the full notice is found.
554 | 
555 |     <one line to give the program's name and a brief idea of what it does.>
556 |     Copyright (C) <year>  <name of author>
557 | 
558 |     This program is free software: you can redistribute it and/or modify
559 |     it under the terms of the GNU General Public License as published by
560 |     the Free Software Foundation, either version 3 of the License, or
561 |     (at your option) any later version.
562 | 
563 |     This program is distributed in the hope that it will be useful,
564 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
565 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
566 |     GNU General Public License for more details.
567 | 
568 |     You should have received a copy of the GNU General Public License
569 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
570 | 
571 | Also add information on how to contact you by electronic and paper mail.
572 | 
573 | If the program does terminal interaction, make it output a short notice like this
574 | when it starts in an interactive mode:
575 | 
576 |     <program>  Copyright (C) <year>  <name of author>
577 |     This program comes with ABSOLUTELY NO WARRANTY; for details type 'show w'.
578 |     This is free software, and you are welcome to redistribute it
579 |     under certain conditions; type 'show c' for details.
580 | 
581 | The hypothetical commands `show w` and `show c` should show the appropriate parts of
582 | the General Public License. Of course, your program's commands might be different;
583 | for a GUI interface, you would use an “about box”.
584 | 
585 | You should also get your employer (if you work as a programmer) or school, if any, to
586 | sign a “copyright disclaimer” for the program, if necessary. For more
587 | information on this, and how to apply and follow the GNU GPL, see
588 | &lt;<http://www.gnu.org/licenses/>&gt;.
589 | 
590 | The GNU General Public License does not permit incorporating your program into
591 | proprietary programs. If your program is a subroutine library, you may consider it
592 | more useful to permit linking proprietary applications with the library. If this is
593 | what you want to do, use the GNU Lesser General Public License instead of this
594 | License. But first, please read
595 | &lt;<http://www.gnu.org/philosophy/why-not-lgpl.html>&gt;.
596 | 


--------------------------------------------------------------------------------