├── .Rbuildignore
├── .gitattributes
├── static_plots
    └── neiman_general.jpeg
├── presentation_version
    ├── styles_presentation.css
    ├── README.md
    └── neiman1995_presentation.Rmd
├── run_neiman_simulation.R
├── styles.css
├── neiman1995.Rproj
├── DESCRIPTION
├── README.md
├── .gitignore
├── document_code
    ├── squared_euclidian_distance.R
    ├── multi_group_matrix_calculation.R
    └── group_drift_simulation.R
├── references.bib
├── simulation_code
    ├── 200_neiman_simulation_plot_basic.R
    ├── 300_run_neiman_simulation.R
    └── 100_general_neiman_simulation.R
├── LICENSE
├── .travis.yml
└── neiman1995.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | * linguist-vendored
2 | R/* linguist-vendored=false
3 | 


--------------------------------------------------------------------------------
/static_plots/neiman_general.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nevrome/neiman1995/master/static_plots/neiman_general.jpeg


--------------------------------------------------------------------------------
/presentation_version/styles_presentation.css:
--------------------------------------------------------------------------------
1 | h2 {
2 |   font-size: 22px;
3 |   letter-spacing: 0;
4 |   margin-bottom: -50px; 
5 | }
6 | 


--------------------------------------------------------------------------------
/presentation_version/README.md:
--------------------------------------------------------------------------------
1 | This presentation version is just an early draft, but could be fleshed out to become solid course material in the future.
2 | 


--------------------------------------------------------------------------------
/run_neiman_simulation.R:
--------------------------------------------------------------------------------
 1 | library(magrittr)
 2 | 
 3 | empty <- lapply(
 4 |   list.files("simulation_code", full.names = TRUE),
 5 |   function(y) {
 6 |     message("\n###### ", y, " ######\n")
 7 |     source(y)
 8 |     rm(list = ls())
 9 |   }
10 | )
11 | 


--------------------------------------------------------------------------------
/styles.css:
--------------------------------------------------------------------------------
 1 | /*create an equation counter variable */
 2 | 
 3 | body {
 4 |    counter-reset:  EQU_COUNT 0 ;
 5 | }
 6 | 
 7 | .MathJax_Display:before {
 8 |       content: "(" counter(EQU_COUNT) ")\00a0\00a0\00a0";    
 9 |       counter-increment: EQU_COUNT;           
10 |     }
11 | 


--------------------------------------------------------------------------------
/neiman1995.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: placeholder
 2 | Title: Does not matter.
 3 | Version: 0.0.1
 4 | Imports:
 5 |   cowplot,
 6 |   dplyr,
 7 |   expm,
 8 |   ggplot2,
 9 |   knitr,
10 |   latex2exp,
11 |   magrittr,
12 |   purrr,
13 |   reshape2,
14 |   rmarkdown,
15 |   rsconnect,
16 |   shiny,
17 |   tibble,
18 |   tidyr
19 |   
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [![Travis-CI Build Status](https://travis-ci.org/nevrome/neiman1995.svg?branch=master)](https://travis-ci.org/nevrome/neiman1995) 
2 | 
3 | The [interactive document](https://rmarkdown.rstudio.com/authoring_shiny.html) [neiman1995.Rmd](https://nevrome.shinyapps.io/neiman1995) aims to visualize the model functions in 
4 | 
5 | Neiman, F. (1995). Stylistic Variation in Evolutionary Perspective: Inferences from Decorative Diversity and Interassemblage Distance in Illinois Woodland Ceramic Assemblages. American Antiquity, 60(1), 7-36. [doi:10.2307/282074](http://dx.doi.org/10.2307/282074)
6 | 
7 | For study and teaching purposes.
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | 
 5 | # Session Data files
 6 | .RData
 7 | 
 8 | # Example code in package build process
 9 | *-Ex.R
10 | 
11 | # Output files from R CMD build
12 | /*.tar.gz
13 | 
14 | # Output files from R CMD check
15 | /*.Rcheck/
16 | 
17 | # RStudio files
18 | .Rproj.user/
19 | 
20 | # produced vignettes
21 | vignettes/*.html
22 | vignettes/*.pdf
23 | 
24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
25 | .httr-oauth
26 | 
27 | # knitr and R markdown default cache directories
28 | /*_cache/
29 | /cache/
30 | 
31 | # Temporary files created by R markdown
32 | *.utf8.md
33 | *.knit.md
34 | .Rproj.user
35 | 
36 | # rendered documents
37 | *.html
38 | 
39 | # rsconnect deployment files
40 | rsconnect
41 | 
42 | 


--------------------------------------------------------------------------------
/document_code/squared_euclidian_distance.R:
--------------------------------------------------------------------------------
 1 | sed <- function(pi, pj) {
 2 |   pi <- pi / sum(pi)
 3 |   pj <- pj / sum(pj)
 4 |   sum((pi - pj)^2)
 5 | }
 6 | 
 7 | calculate_sed_for_group_drift_simulation_result <- function(pop_devel_sum, sim_run = 1) {
 8 | 
 9 |   groups <- pop_devel_sum %>%
10 |     base::split(., .$group)
11 |   
12 |   A <- groups$A %>%
13 |     base::split(., .$time)
14 |   B <- groups$B %>%
15 |     base::split(., .$time)
16 |   
17 |   A <- lapply(A, function(x) {x$individuals_with_variant})
18 |   B <- lapply(B, function(x) {x$individuals_with_variant})
19 |   
20 |   sed_res <- purrr::map2_dbl(A, B, function(a, b) {sed(a, b)})
21 |   
22 |   tibble::tibble(
23 |     t = unique(pop_devel_sum$time),
24 |     sed = sed_res,
25 |     sim_run = sim_run
26 |   )
27 |   
28 | }
29 | 


--------------------------------------------------------------------------------
/references.bib:
--------------------------------------------------------------------------------
 1 | @Article{neiman_stylistic_1995,
 2 |   title = {Stylistic {{Variation}} in {{Evolutionary Perspective}}: {{Inferences}} from {{Decorative Diversity}} and {{Interassemblage Distance}} in {{Illinois Woodland Ceramic Assemblages}}},
 3 |   volume = {60},
 4 |   issn = {0002-7316},
 5 |   shorttitle = {Stylistic {{Variation}} in {{Evolutionary Perspective}}},
 6 |   doi = {10.2307/282074},
 7 |   number = {1},
 8 |   journal = {American Antiquity},
 9 |   author = {Fraser D. Neiman},
10 |   year = {1995},
11 |   pages = {7-36},
12 | }
13 | @Article{dunnell1978style,
14 |   title = {Style and {{Function}}: {{A Fundamental Dichotomy}}},
15 |   volume = {43},
16 |   doi = {10.2307/279244},
17 |   number = {2},
18 |   journal = {American Antiquity},
19 |   author = {Robert C. Dunnell},
20 |   year = {1978},
21 |   keywords = {Style transmission},
22 |   pages = {192-202},
23 | }
24 | 


--------------------------------------------------------------------------------
/simulation_code/200_neiman_simulation_plot_basic.R:
--------------------------------------------------------------------------------
 1 | #### specialized plot function ####
 2 | 
 3 | plot_by_group <- function(x) {
 4 | x %>% 
 5 |   dplyr::filter(
 6 |     idea == 1
 7 |   ) %>%
 8 |   ggplot() +
 9 |   # geom_area(aes(x = timestep, y = proportion, fill = idea, group = idea)) +
10 |   geom_line(
11 |     aes(x = timestep, y = proportion, color = as.factor(model_id), group = model_id),
12 |     size = 0.2
13 |   ) +
14 |   facet_wrap(~region, nrow = 8) +
15 |   theme_bw() +
16 |   theme(
17 |     strip.background = element_blank(),
18 |     strip.text.x = element_blank(),
19 |     axis.title = element_blank(),
20 |     axis.text.y = element_blank(),
21 |     axis.text.x = element_text(size = 8, angle = 45, hjust = 1),
22 |     axis.ticks.y = element_blank(),
23 |     plot.margin = unit(c(1.4,0.2,0.2,0), "lines")
24 |   ) +
25 |   guides(color = FALSE) +
26 |   scale_y_continuous(
27 |     breaks = c(0, 0.5, 1),
28 |     labels = c("0%", "50%", "100%")
29 |   ) +
30 |   scale_x_continuous(
31 |     breaks = seq(0, 1400, 200), 
32 |     limits = c(0, 1400)
33 |   ) +
34 |   ggthemes::scale_colour_colorblind()
35 | }
36 | 


--------------------------------------------------------------------------------
/document_code/multi_group_matrix_calculation.R:
--------------------------------------------------------------------------------
 1 | multi_group_matrizes <- function(Ne, t, Mk, mi_3) {
 2 |   
 3 |   k <- 1
 4 |   amount_groups <- nrow(Mk(k, mi_3))
 5 | 
 6 |   group_matrix <- matrix(data = rep(0, amount_groups^2), nrow = amount_groups, ncol = amount_groups)
 7 | 
 8 |   M <- Mk
 9 |     
10 |   U <- group_matrix
11 |   diag(U) <- 1/Ne
12 |   
13 |   matrix_time_list <- list()
14 |   
15 |   for (pk in 0:t) {
16 |     matrix_time_list[[pk + 1]] <- (M(k, mi_3) %^% pk) %*% U %*% t(M(k, mi_3) %^% pk) 
17 |   }
18 |   
19 |   V <- Reduce(`+`, matrix_time_list) 
20 |   
21 |   # matrix of squared euclidian distance
22 |   mosed <- group_matrix
23 |   for (p1 in 1:nrow(V)) {
24 |     for (p2 in 1:ncol(V)) {
25 |       mosed[p1, p2] <- V[p1, p1] + V[p2, p2] - 2 * V[p1, p2]
26 |     }
27 |   }
28 |   
29 |   # mean squared distance of a group 
30 |   msd <- group_matrix
31 |   for (p3 in 1:nrow(V)) {
32 |     diag(msd)[p3] <- sum(mosed[p3,] / (amount_groups - 1))  
33 |   }
34 |   
35 |   list(
36 |     longM = reshape2::melt(M(k, mi_3)),
37 |     longMt = reshape2::melt(t(M(k, mi_3))),
38 |     longU = reshape2::melt(U),
39 |     longV = reshape2::melt(V),
40 |     longmosed = reshape2::melt(mosed),
41 |     longmsd = reshape2::melt(msd)
42 |   )
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <http://unlicense.org>
25 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: r
 2 | cache: packages
 3 | r:
 4 |   - oldrel
 5 | 
 6 | install:
 7 | - R -e 'install.packages("devtools")'
 8 | - R -e 'devtools::install_deps(dep = T)'
 9 | 
10 | script:
11 | - R -e 'shinyappsiotoken <- Sys.getenv("shinyappsiotoken"); shinyappsiosecret <- Sys.getenv("shinyappsiosecret"); rsconnect::setAccountInfo(name = "nevrome", token = shinyappsiotoken, secret = shinyappsiosecret)'
12 | - R -e 'rsconnect::deployApp(appPrimaryDoc = "neiman1995.Rmd", launch.browser = FALSE)'
13 | 
14 | env:
15 |   global:
16 |   - secure: u4L57PJLejoMsJV3+uZvYv34s4IeS/wMTtBVh58qCPS2LYEIvMVpvZJK1nMiplHmr0b4yMsphJtLjiu/ZJcLdIAp3j4HiXRDq+zHxfn1UfLWAWxrx04lCq3ozsvZmziOw3wqaafm6WvYAE6P/hnidTBvlUgCYxpgkofoMmO1kqQiw29BhG6lLT6MvB7fVx+m7GN+VjxeRWAyjExe0RTWVsFQe/gggcUm1PHZ2YwhGEBrB/PZOvpktAV0Ipqf+tx0xEYB1zYvCmqjnYN6+nescVBTFZPgZCQa8VVvDaoHHB3O8PXf1xKOA2ydWJuV4P/miJD4KdYPFNC50UHfa+n73AiDUiNVc7x4iO/BwPuLbQN1VHPHTvzmb6FKVuRlCo+cfRCl2CKLZ19mAeVyOQuDBb+OmLO84S/2g/UAVeZl8Sf+qS5JlVasK8xE8r6Z+v+pfGsxRvQMzWsL5bNkF+txgQ7D4MqHTEl6Gyhamu6JoAZx19JuPRvv3/4jFUnuJOc8/6JJ/sKcGAfLeNidsXTrCF+Xv0H14Bh8zxRwu3JefZzlpPjMMkEbka5egLBCwLEQuEghNeQI4DHGeg2OrDCrqBR27ICdW8pB74aGULTOkjiiem3df41usbkCiTcze33OViVCiHv/3/UvC82lXsXQVjhrX1jPx7JMvhF/9XUCnOg=
17 |   - secure: iyYmM6Iu8SyMay2deCtZlfIviSxNMPmSinUcID7sqjhQGqk6TzH/YMJMG5qGn9a368fp+yKZ8fVgri9foWz7a0dtZyPmLfDlMG7+w/e1Mw2FwyNch62TTDJAK3Wk3bnNrg1Osbilzlvk2pTgYI93eiBVTWPC5rfamz+f3m55KkUAkbFgvAFJj4+zHdc446X62pzWrQQtV2iVjWL4waY1/5IlpXCg2CkoYLRrEKkx/QicRKI6IiDIF/7qeh4U5mN/JIP5IGPnQCOzvCp0L3u6yORZzLH3Gu4qwhsKv91PcQkD6ycdWIXBtBvsl9PiHoKPXBmGUbXebYVz8aDorJcnBSc4SLUWT9EnmTXzddPs/Jx8NXzxjNAkEgW0QWng+zuGZSpg/6DqAoc52fth5tpAuP5RgHXEdK7Ryq/fr5BqoRsYXHr6YKObBGBT6nwEfmbJ0vNmo793R5BWjlUYZbTCFTB+rLUZdy5jFqJw2MMY2Aah8R5Ee5Hqi/OEHYwnuyiyaPEMMnRBmzeDD5bQptFcieUBSKTp50XsV/+KthdTQ8H4UOnvNN18orkUxdKTGgD7By7Z9CPGQX4+hBoQj/C3B+B7y//IsnxOhdHYQOKQBwyI9w5SC/6MZgkw44LMsyaUn3VGQzxcVn9vY4EAqvCNmx2Pp0EzjMzROIcdC5oPxxo=
18 | 


--------------------------------------------------------------------------------
/document_code/group_drift_simulation.R:
--------------------------------------------------------------------------------
 1 | group_drift_simulation <- function(k, N, t_final, mi) {
 2 |   
 3 |   population <- 1:N
 4 |   variants <- 1:k
 5 |   timesteps <- 2:t_final
 6 |   
 7 |   popA0 <- tibble::tibble(
 8 |     time = as.integer(0),
 9 |     individual = 1:N,
10 |     variant = rep_len(1:k, N),
11 |     group = "A"
12 |   )
13 |   popB0 <- tibble::tibble(
14 |     time = as.integer(0),
15 |     individual = 1:N,
16 |     variant = rep_len(1:k, N),
17 |     group = "B"
18 |   )
19 |   
20 |   popA_devel <- list()
21 |   popB_devel <- list()
22 |   popA_devel[[1]] <- popA0
23 |   popB_devel[[1]] <- popB0
24 |   
25 |   for (p1 in timesteps) {
26 |     popA_new <- popA_devel[[p1 - 1]]
27 |     popB_new <- popB_devel[[p1 - 1]]
28 |     
29 |     popA_new$time <- p1 - 1
30 |     popB_new$time <- p1 - 1
31 |     
32 |     exchange_here <- sample(
33 |       c(TRUE, FALSE), 
34 |       length(popA_new$variant), 
35 |       prob = c(mi, 1 - mi), 
36 |       replace = T
37 |     )
38 |     
39 |     popA_old_variant <- popA_new$variant
40 |     # in group A
41 |     popA_new$variant <- sample(popA_new$variant, length(popA_new$variant), replace = T)
42 |     # ex group A
43 |     popA_new$variant[exchange_here] <- sample(popB_new$variant, sum(exchange_here))
44 |     # in group B
45 |     popB_new$variant <- sample(popB_new$variant, length(popB_new$variant), replace = T)
46 |     # ex group B
47 |     popB_new$variant[exchange_here] <- sample(popA_old_variant, sum(exchange_here))
48 |     
49 |     popA_devel[[p1]] <- popA_new
50 |     popB_devel[[p1]] <- popB_new
51 |   }
52 |   
53 |   pop_devel <- append(popA_devel, popB_devel)
54 |   
55 |   pop_devel_df <- do.call(rbind, pop_devel)
56 |   
57 |   pop_devel_sum <- pop_devel_df %>%
58 |     dplyr::group_by(
59 |       time, variant, group
60 |     ) %>%
61 |     dplyr::summarise(
62 |       individuals_with_variant = n()
63 |     ) %>%
64 |     dplyr::ungroup() %>%
65 |     # that's just to fil gaps in the area plot
66 |     tidyr::complete(
67 |       time, 
68 |       variant, 
69 |       group,
70 |       fill = list(individuals_with_variant = as.integer(0))
71 |     )
72 |   
73 |   return(pop_devel_sum)
74 | }
75 | 


--------------------------------------------------------------------------------
/simulation_code/300_run_neiman_simulation.R:
--------------------------------------------------------------------------------
 1 | #### setup settings grid ####
 2 | 
 3 | config_matrix <- expand.grid(
 4 |   k = 2,
 5 |   N_g = c(10, 50, 200),
 6 |   t_final = 1400,
 7 |   mu = 0,
 8 |   g = 8,
 9 |   mi = c(0, 0.01, 0.1, 0.5 , 1),
10 |   I = NA
11 | ) %>%
12 |   tibble::as.tibble() %>%
13 |   dplyr::mutate(
14 |     model_group = 1:nrow(.)
15 |   ) %>%
16 |   tidyr::uncount(8) %>%
17 |   dplyr::mutate(
18 |     model_id = 1:nrow(.)
19 |   )
20 | 
21 | 
22 | 
23 | #### run simulation ####
24 | 
25 | models <- pbapply::pblapply(
26 |   1:nrow(config_matrix),
27 |   function(i, config_matrix) {
28 |     neiman_simulation(
29 |       config_matrix$k[i], 
30 |       config_matrix$N_g[i], 
31 |       config_matrix$t_final[i], 
32 |       config_matrix$mu[i],
33 |       config_matrix$g[i], 
34 |       config_matrix$mi[i], 
35 |       config_matrix$I[i]
36 |     ) %>% standardize_neiman_output %>%
37 |       dplyr::mutate(
38 |         model_id = config_matrix$model_id[i], 
39 |         model_group = config_matrix$model_group[i],
40 |         region_population_size = config_matrix$N_g[i],
41 |         degree_interregion_interaction = config_matrix$mi[i]
42 |       )
43 |   },
44 |   config_matrix,
45 |   cl = 2
46 | )
47 | 
48 | models_groups <- do.call(rbind, models) %>%
49 |   base::split(.$model_group)
50 | 
51 | #### create plots ####
52 | 
53 | library(ggplot2)
54 | complete_plot <- cowplot::plot_grid(
55 |   plotlist = lapply(models_groups, plot_by_group) %>% 
56 |     matrix(., 3, 5) %>% t %>% c(),
57 |   labels = sapply(
58 |     models_groups, function(x) {
59 |       rps <- x$region_population_size[1]
60 |       cui <- x$degree_interregion_interaction[1]
61 |       paste0(LETTERS[x$model_group[1]], " - ", rps, ", ", cui)
62 |     }
63 |   ) %>% 
64 |     matrix(., 3, 5) %>% t %>% c(),
65 |   label_x = 0,
66 |   hjust = 0,
67 |   label_size = 10,
68 |   ncol = 5,
69 |   nrow = 3,
70 |   align = "hv"
71 | )
72 | 
73 | complete_plot %>%
74 |   ggsave(
75 |     "static_plots/neiman_general.jpeg",
76 |     plot = .,
77 |     device = "jpeg",
78 |     scale = 1,
79 |     dpi = 300,
80 |     width = 210, height = 297, units = "mm",
81 |     limitsize = F
82 |   )
83 | 


--------------------------------------------------------------------------------
/simulation_code/100_general_neiman_simulation.R:
--------------------------------------------------------------------------------
  1 | #### simulation function ####
  2 | 
  3 | #' neiman_simulation
  4 | #'
  5 | #' @param k Integer. Number of ideas at t = 0
  6 | #' @param N_g Integer. Population per region
  7 | #' @param t_final Integer. Final timestep
  8 | #' @param mu Double. Innovation rate
  9 | #' @param g Integer. Number of regions
 10 | #' @param mi Double. Degree of interregion interaction
 11 | #' @param I Doublematrix. Interregion interaction matrix. NA means equal interaction
 12 | #' 
 13 | neiman_simulation <- function(k, N_g, t_final, mu, g, mi, I) {
 14 | 
 15 |   # define variables
 16 |   regions <- 1:g
 17 |   population <- 1:N_g
 18 |   ideas <- 1:k
 19 |   timesteps <- 2:t_final
 20 |   if (is.na(I)) {
 21 |     I <- matrix(
 22 |       rep(1, g*g), g, g
 23 |     )
 24 |     diag(I) <- 0
 25 |   }
 26 |   
 27 |   # create starting populations
 28 |   pop0 <- lapply(
 29 |     regions, function(region, N, k) {
 30 |       tibble::tibble(
 31 |         timestep = as.integer(0),
 32 |         individual = population,
 33 |         idea = rep_len(ideas, max(population)),
 34 |         region = region
 35 |       )
 36 |     },
 37 |     k, population
 38 |   )
 39 |   
 40 |   # create development list
 41 |   pop_devel <- list()
 42 |   pop_devel[[1]] <- pop0
 43 |   
 44 |   # determine number of ideas
 45 |   last_idea <- max(do.call(rbind, pop_devel[[1]])$idea)
 46 |   
 47 |   # simulation loop
 48 |   for (p1 in timesteps) {
 49 |     
 50 |     # new timestep list
 51 |     pop_old <- pop_devel[[p1 - 1]]
 52 |     pop_new <- pop_old
 53 |     
 54 |     # adjust time in new timestep list
 55 |     pop_new <- lapply(
 56 |       pop_new, function(x, p1) {
 57 |         x$timestep <- p1 - 1
 58 |         return(x)
 59 |       },
 60 |       p1
 61 |     )
 62 |     
 63 |     # intraregion learning
 64 |     pop_new <- lapply(
 65 |       pop_new, function(x) {
 66 |         x$idea <- sample(x$idea, length(x$idea), replace = T)
 67 |         return(x)
 68 |       }
 69 |     )
 70 |     
 71 |     # interregion learning
 72 |     pop_new <- lapply(
 73 |       regions, function(i, pop_new, pop_old, mi, I, regions) {
 74 |         exchange_where <- which(sample(c(TRUE, FALSE), nrow(pop_new[[i]]), prob = c(mi, 1 - mi), replace = T))
 75 |         exchange_with <- sample(regions, length(exchange_where), prob = I[,i], replace = T)
 76 |         pop_new[[i]]$idea[exchange_where] <- unlist(sapply(
 77 |           seq_along(exchange_where),
 78 |           function(j, pop_old, exchange_with, exchange_where) {
 79 |             v <- pop_old[[exchange_with[j]]]$idea
 80 |             return(v[exchange_where[j]])
 81 |           },
 82 |           pop_old, exchange_with, exchange_where
 83 |         ))
 84 |         return(pop_new[[i]])
 85 |       },
 86 |       pop_new, pop_old, mi, I, regions
 87 |     )
 88 |     
 89 |     # innovation
 90 |     if(mu != 0) {
 91 |       for (i in seq_along(regions)) {
 92 |         innovate_where <- which(sample(c(TRUE, FALSE), nrow(pop_new[[i]]), prob = c(mu, 1 - mu), replace = T))
 93 |         new_ideas <- seq(last_idea + 1, last_idea + length(innovate_where))
 94 |         last_idea <- last_idea + length(innovate_where)
 95 |         pop_new[[i]]$idea[innovate_where] <- new_ideas
 96 |       }
 97 |     }
 98 | 
 99 |     pop_devel[[p1]] <- pop_new
100 |   }
101 |   
102 |   # transform to data.frame
103 |   pop_devel_time_dfs <- lapply(
104 |     pop_devel, function(x) {
105 |       do.call(rbind, x)
106 |     }
107 |   )
108 |   pop_devel_df <- do.call(rbind, pop_devel_time_dfs)
109 |   
110 |   return(pop_devel_df)
111 | }
112 | 
113 | 
114 | 
115 | #### output preparation ####
116 | 
117 | standardize_neiman_output <- function(x) {
118 |   
119 |   x %>%
120 |     dplyr::group_by(
121 |       timestep, idea, region
122 |     ) %>%
123 |     dplyr::summarise(
124 |       number = n()
125 |     ) %>%
126 |     dplyr::ungroup() %>%
127 |     # calculate proportion
128 |     dplyr::group_by(
129 |       timestep, region
130 |     ) %>%
131 |     dplyr::mutate(
132 |       proportion = number/sum(number)
133 |     ) %>%
134 |     dplyr::ungroup() %>%
135 |     # that's just to fill gaps in the area plot
136 |     tidyr::complete(
137 |       timestep,
138 |       idea,
139 |       region,
140 |       fill = list(number = as.integer(0), proportion = as.double(0))
141 |     ) %>%
142 |     dplyr::select(
143 |       region, timestep, idea, proportion
144 |     )
145 |   
146 | }
147 | 


--------------------------------------------------------------------------------
/neiman1995.Rmd:
--------------------------------------------------------------------------------
   1 | ---
   2 | title: "Visualizing Neimans 'Stylistic variation in evolutionary perspective'"
   3 | author: "Clemens Schmid"
   4 | date: "April 2018"
   5 | output: 
   6 |   html_document:
   7 |     css: styles.css
   8 |     toc: true
   9 |     toc_depth: 4
  10 | runtime: shiny
  11 | bibliography: references.bib
  12 | link-citations: true
  13 | ---
  14 | 
  15 | ```{r include=FALSE}
  16 | # knitr chunk settings
  17 | knitr::opts_chunk$set(echo = TRUE)
  18 | ```
  19 | 
  20 | **Disclaimer**
  21 | 
  22 | *This document visualizes the simulations and the general model functions in Fraser D. Neimans article Stylistic Variation in Evolutionary Perspective: Inferences from Decorative Diversity and Interassemblage Distance in Illinois Woodland Ceramic Assemblages [@neiman_stylistic_1995].*
  23 | 
  24 | *It mirrors the structure of the article and should be read in conjunction with it. It can be used to deepen the understanding of the complex variable relations, but it doesn't attempt to retell the story: the case study about woodland pottery isn't relevant here. The small descriptive texts are compiled from the article and also contain sentences copied verbatimly. I focused on the segments and explanations I considered most important for understanding the models and parameters. This document does not attempt to reach the threshold of orginality but is meant as a mere tool for learning and teaching.*
  25 | 
  26 | ```{r echo = FALSE}
  27 | # load the packages, that are inconvinient to reference directly via namespace::
  28 | library(ggplot2)
  29 | library(magrittr)
  30 | suppressMessages(library(expm))
  31 | # load the functions in the source files
  32 | purrr::walk(list.files("./document_code", full.names = T), function(x) {source(x)})
  33 | ```
  34 | 
  35 | ### Drift, Innovation and Diversity
  36 | 
  37 | #### Temporal Dynamics of Drift *p.10*
  38 | 
  39 | The first simulation shows the effect of drift in a population of $N$ individuals with $k$ variants over $t$ time steps. Drift is sampling error that accompanies all forms of cultural transmission and causes individual variants to vanish or dominate randomly. In the end one variant usually eradicates all the others. The speed with which the variation is destroyed increases as the population size decreases.  
  40 | 
  41 | Mechanics of the simulation: In each time step every individual randomly adopts a variant from somebody else. The probability of getting somebody else's variant is $(N - 1) / N$ while the probability of keeping the own variant is only $1 / N$. 
  42 | 
  43 | - $k$: Amount of variants in a population
  44 | - $N_e$: Size of the effective population. The effective population consists of the individuals within a population that is actively involved in a cultural transmission process
  45 | - $t$: Time / number of iterations
  46 | 
  47 | ```{r echo=FALSE}
  48 | inputPanel(
  49 |   sliderInput("k_drift_simulation", label = "k",
  50 |                min = 5, max = 15, value = 10, step = 1),
  51 |   sliderInput("Ne_drift_simulation", label = "Ne",
  52 |                min = 10, max = 100, value = 20, step = 10),
  53 |   sliderInput("t_drift_simulation", label = "t",
  54 |                min = 50, max = 200, value = 100, step = 50),
  55 |   actionButton("run_button_drift_simulation", "Run simulation")
  56 | )
  57 | 
  58 | drift_simulation_data <- eventReactive(
  59 |   input$run_button_drift_simulation, {
  60 |     
  61 |     # read input
  62 |     k <- input$k_drift_simulation
  63 |     N <- input$Ne_drift_simulation
  64 |     time <- input$t_drift_simulation
  65 |     
  66 |     # calculate population parameters
  67 |     population <- 1:N
  68 |     variants <- 1:k
  69 |     timesteps <- 2:time
  70 |     
  71 |     # create initial population
  72 |     pop0 <- tibble::tibble(
  73 |       time = as.integer(0),
  74 |       individual = 1:N,
  75 |       variant = rep_len(1:k, N)
  76 |     )
  77 |     
  78 |     # list to store population stages over time
  79 |     pop_devel <- list()
  80 |     pop_devel[[1]] <- pop0
  81 |     
  82 |     # simulation loop
  83 |     for (p1 in timesteps) {
  84 |       pop_new <- pop_devel[[p1 - 1]] 
  85 |       pop_new$time <- p1 - 1
  86 |       pop_new$variant <- sample(pop_new$variant, length(pop_new$variant), replace = T)
  87 |       pop_devel[[p1]] <- pop_new
  88 |     }
  89 |     
  90 |     # bind individual population stages into data.frame
  91 |     pop_devel_df <- do.call(rbind, pop_devel)
  92 |     
  93 |     # calculate number of individuals per timestep and variant
  94 |     pop_devel_sum <- pop_devel_df %>%
  95 |       dplyr::group_by(
  96 |         time, variant
  97 |       ) %>%
  98 |       dplyr::summarise(
  99 |         individuals_with_variant = n()
 100 |       ) %>%
 101 |       dplyr::ungroup() %>%
 102 |       # complete (expand.grid) to fill gaps in the area plot
 103 |       tidyr::complete(
 104 |         time, 
 105 |         variant, 
 106 |         fill = list(individuals_with_variant = as.integer(0))
 107 |       )
 108 |     
 109 |     pop_devel_sum
 110 |   },
 111 |   ignoreNULL = FALSE
 112 | )
 113 | 
 114 | renderPlot({
 115 |   
 116 |   drift_simulation_data() %>%
 117 |     ggplot() +
 118 |       geom_area(aes(x = time, y = individuals_with_variant, fill = variant, group = variant)) +
 119 |       geom_line(aes(x = time, y = individuals_with_variant, group = variant), position = "stack") +
 120 |       theme_bw() +
 121 |       xlab(expression(paste("t"))) +
 122 |       ylab("variants and their occurence in the population [%]")
 123 |   
 124 | })
 125 | ```
 126 | 
 127 | #### Homogeneity Under Drift *p.10-12* **(1)**
 128 | 
 129 | The theory of neutral alleles allows to describe the within-population homogeneity $F$ as a function of the effective population size $N_e$. $F$ is the probability that two randomly chosen individuals in the population carry variants that are copies of a common antecedent variant. In a given time period the probability of drawing an individual, who learned from the same model as some other randomly selected individual in the previous time period, is $1 / N$. The probability of the opposite is $1 - (1 / N)$. $F_{t - 1}$ is the probability that the model of the second individual learned from the same model as the model of the first individual in an earlier time step. $F_t$ can be calculated by taking all this probabilities into consideration.  
 130 | 
 131 | $$F_t = \frac{1}{N_e} + \left(1 - \frac{1}{N_e} \right) F_{t-1}$$
 132 | 
 133 | As an effect of drift, $F_t$ approaches one as $t$ increases.
 134 | 
 135 | - $F_t$: Within-population homogeneity. That's the probability that two randomly chosen individuals in the population carry variants that are copies of a common antecedent variant at a certain time step
 136 | - $F_{t - 1}$: Probability that the model of the second individual learned from the same model as the model of the first individual in an earlier time step
 137 | 
 138 | ```{r echo=FALSE}
 139 | Ft <- function(F0, Ne, time) {
 140 |   if(time == 0) { return(F0) }
 141 |   1/Ne + (1 - 1/Ne) * Ft(F0, Ne, time - 1)
 142 | }
 143 | 
 144 | inputPanel(
 145 |   sliderInput("F0_homogeneity_drift", label = "F0 (Ft for t == 0)",
 146 |                min = 0, max = 1, value = 0.5, step = 0.05),
 147 |   sliderInput("Ne_homogeneity_drift", label = "Ne",
 148 |                min = 10, max = 100, value = 20, step = 10),
 149 |   sliderInput("t_homogeneity_drift", label = "t",
 150 |                min = 50, max = 200, value = 100, step = 50)
 151 | )
 152 | 
 153 | renderPlot({
 154 |   
 155 |   # read input
 156 |   F0 <- input$F0_homogeneity_drift
 157 |   Ne <- input$Ne_homogeneity_drift
 158 |   time <- input$t_homogeneity_drift
 159 |   
 160 |   timesteps <- 0:time
 161 |   
 162 |   # apply function for all timesteps
 163 |   Ft_time <- sapply(timesteps, function(x) { Ft(F0, Ne, x) })
 164 |   
 165 |   data.frame(
 166 |     t = timesteps,
 167 |     Ft = Ft_time
 168 |   ) %>%
 169 |     ggplot() +
 170 |       geom_hline(aes(yintercept = 1), color = "dodgerblue4") +
 171 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 172 |       geom_line(aes(t, Ft), size = 1) +
 173 |       theme_bw() +
 174 |       xlab(latex2exp::TeX("t")) +
 175 |       ylab(latex2exp::TeX("F_{t}"))
 176 | })
 177 | ```
 178 | 
 179 | #### Temporal Dynamics of Drift and Innovation *p.12-14*
 180 | 
 181 | The following simulation is built upon the first simple drift simulation. Here the concept of innovation is added: Individuals can not just inherit but also create new variants with the probability $μ$. In this context $k$ is just the initial number of variants at $t = 0$. The effect of drift is still strongly visible, but with increasing $μ$ it becomes less relevant. New variants can form and replace the dominant one. 
 182 | 
 183 | Mechanics of the simulation: The simulation works like the first one, but now there's a chance of $μ$ for every individual in every time step to create a new variant.
 184 | 
 185 | - $μ$: Innovation rate. Probability of the creation of a new variant
 186 | 
 187 | ```{r echo=FALSE}
 188 | inputPanel(
 189 |   sliderInput("k_drift_simulation_with_innovation", label = "k for t == 0",
 190 |                min = 5, max = 15, value = 10, step = 1),
 191 |   sliderInput("Ne_drift_simulation_with_innovation", label = "Ne",
 192 |                min = 10, max = 100, value = 20, step = 10),
 193 |   sliderInput("t_drift_simulation_with_innovation", label = "t",
 194 |                min = 50, max = 200, value = 100, step = 50),
 195 |   sliderInput("mu_drift_simulation_with_innovation", label = "μ",
 196 |                min = 0, max = 0.1, value = 0.01, step = 0.01),
 197 |   actionButton("run_button_drift_simulation_with_innovation", "Run simulation")
 198 | )
 199 | 
 200 | drift_simulation_with_innovation_data <- eventReactive(
 201 |   input$run_button_drift_simulation_with_innovation, {
 202 |     
 203 |     # read input
 204 |     k <- input$k_drift_simulation_with_innovation
 205 |     N <- input$Ne_drift_simulation_with_innovation
 206 |     time <- input$t_drift_simulation_with_innovation
 207 |     mu <- input$mu_drift_simulation_with_innovation
 208 |   
 209 |     # prepare population parameters
 210 |     population <- 1:N
 211 |     variants <- 1:k
 212 |     timesteps <- 2:time
 213 |   
 214 |     # create starting population
 215 |     pop0 <- tibble::tibble(
 216 |       time = as.integer(0),
 217 |       individual = 1:N,
 218 |       variant = rep_len(1:k, N)
 219 |     )
 220 |   
 221 |     # list to store population stages over time
 222 |     pop_devel <- list()
 223 |     pop_devel[[1]] <- pop0
 224 |   
 225 |     # simulation loop
 226 |     last_variant <- max(pop_devel[[1]]$variant)
 227 |     for (p1 in timesteps) {
 228 |       pop_new <- pop_devel[[p1 - 1]]
 229 |       pop_new$time <- p1 - 1
 230 |       pop_new$variant <- sample(pop_new$variant, length(pop_new$variant), replace = T)
 231 |   
 232 |       # innovation
 233 |       innovate_here <- sample(
 234 |         c(TRUE, FALSE), 
 235 |         length(pop_new$variant), 
 236 |         prob = c(mu, 1 - mu), 
 237 |         replace = T
 238 |       )
 239 |       new_variants <- seq(last_variant + 1, last_variant + sum(innovate_here))
 240 |       last_variant <- last_variant + sum(innovate_here)
 241 |       pop_new$variant[innovate_here] <- new_variants
 242 |   
 243 |       pop_devel[[p1]] <- pop_new
 244 |     }
 245 |   
 246 |     # bind individual population stages into data.frame
 247 |     pop_devel_df <- do.call(rbind, pop_devel)
 248 |     
 249 |     # calculate number of individuals per timestep and variant
 250 |     pop_devel_sum <- pop_devel_df %>%
 251 |       dplyr::group_by(
 252 |         time, variant
 253 |       ) %>%
 254 |       dplyr::summarise(
 255 |         individuals_with_variant = n()
 256 |       ) %>%
 257 |       dplyr::ungroup() %>%
 258 |       tidyr::complete(
 259 |         time, 
 260 |         variant, 
 261 |         fill = list(individuals_with_variant = as.integer(0))
 262 |       )
 263 |     
 264 |     pop_devel_sum
 265 |   
 266 |   },
 267 |   ignoreNULL = FALSE
 268 | )
 269 | 
 270 | renderPlot({
 271 |   drift_simulation_with_innovation_data() %>%
 272 |     ggplot() +
 273 |       geom_area(aes(x = time, y = individuals_with_variant, fill = variant, group = variant)) +
 274 |       geom_line(aes(x = time, y = individuals_with_variant, group = variant), position = "stack") +
 275 |       theme_bw() +
 276 |       xlab("t") +
 277 |       ylab("variants and their occurence in the population [%]")
 278 | })
 279 | ```
 280 | 
 281 | #### Homogeneity Under Drift and Innovation *p.14* **(2)** **(3)** **(4)** **(5)**
 282 | 
 283 | Drift and innovation are opposing forces: drift increases homogeneity and innovation decreases it. This causes an equilibrium after some time steps in the simulation. We can incorporate innovation into the calculation of $F$ by modifying equation (1). If innovation is possible, $F$ is not just defined as learning from yourself or learning from somebody else: there's also the possibility to create a new variant. 
 284 | 
 285 | $$F_t = \left(\frac{1}{N_e} + \left(1 - \frac{1}{N_e} \right) F_{t-1}\right)(1 - μ)^2$$
 286 | 
 287 | ```{r echo=FALSE}
 288 | Ft_innovation <- function(F0, Ne, time, mu) {
 289 |   if(time == 0) {return(F0)}
 290 |   (1/Ne + (1 - 1/Ne) * Ft_innovation(F0, Ne, time - 1, mu)) * (1 - mu)^2
 291 | }
 292 | 
 293 | inputPanel(
 294 |   sliderInput("F0_homogeneity_drift_with_innovation", label = "F0 (Ft for t == 0)",
 295 |                min = 0, max = 1, value = 0.5, step = 0.05),
 296 |   sliderInput("Ne_homogeneity_drift_with_innovation", label = "Ne",
 297 |                min = 10, max = 100, value = 20, step = 10),
 298 |   sliderInput("t_homogeneity_drift_with_innovation", label = "t",
 299 |                min = 50, max = 200, value = 100, step = 50),
 300 |   sliderInput("mu_homogeneity_drift_with_innovation", label = "μ",
 301 |                min = 0, max = 1, value = 0.1, step = 0.1)
 302 | )
 303 | 
 304 | renderPlot({
 305 |   
 306 |   # read input
 307 |   F0 <- input$F0_homogeneity_drift_with_innovation
 308 |   Ne <- input$Ne_homogeneity_drift_with_innovation
 309 |   mu <- input$mu_homogeneity_drift_with_innovation
 310 |   time <- input$t_homogeneity_drift_with_innovation
 311 |   
 312 |   timesteps <- 0:time
 313 |   
 314 |   # apply function for every timestep
 315 |   Ft_innovation_time <- sapply(timesteps, function(x) {Ft_innovation(F0, Ne, x, mu)})
 316 |   
 317 |   data.frame(
 318 |     t = timesteps,
 319 |     Ft = Ft_innovation_time
 320 |   ) %>%
 321 |     ggplot() +    
 322 |       geom_hline(aes(yintercept = 1), color = "dodgerblue4") +
 323 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 324 |       geom_line(aes(t, Ft), size = 1) +
 325 |       theme_bw() +
 326 |       xlab(latex2exp::TeX("t")) +
 327 |       ylab(latex2exp::TeX("F_{t}"))
 328 | })
 329 | ```
 330 | 
 331 | The equilibrium is reached, when $F_t = F_{t-1}$. Setting $F_t = F_{t-1}$ in equation (2) defines $\hat{F}$.
 332 | 
 333 | $$\hat{F} = \frac{(1 - μ)^2}{N_e} - (N_e - 1)(1 - μ)^2$$
 334 | 
 335 | If we assume $μ$ is quite small we can further simplify this definition.
 336 | 
 337 | $$\hat{F} \simeq \frac{1}{2 N_e μ + 1}$$
 338 | 
 339 | That means the homogeneity of neutral variants within a population is inversely proportional to twice the effective population size times the innovation rate: $2*N_e*μ$. This expression will be called $θ$.
 340 | 
 341 | - $θ$: Twice the effective population size times the innovation rate ($2 N_e μ$)
 342 | 
 343 | ```{r echo=FALSE}
 344 | theta <- function(Ne, mu) {
 345 |   (2 * Ne * mu)
 346 | }
 347 | 
 348 | Fhat <- function(theta) {
 349 |   1 / (theta + 1)
 350 | }
 351 | 
 352 | inputPanel(
 353 |   sliderInput("Ne_homogeneity_equilibrium", label = "Ne",
 354 |                min = 10, max = 100, value = 20, step = 10),
 355 |   sliderInput("mu_homogeneity_equilibrium", label = "μ",
 356 |                min = 0, max = 1, value = 0.1, step = 0.1)
 357 | )
 358 | 
 359 | renderText({
 360 |   paste0("1 * theta = ", theta(input$Ne_homogeneity_equilibrium, input$mu_homogeneity_equilibrium))
 361 | })
 362 | 
 363 | renderPlot({
 364 |   Ne <- input$Ne_homogeneity_equilibrium
 365 |   mu <- input$mu_homogeneity_equilibrium
 366 |   
 367 |   tibble::tibble(
 368 |     theta = 0:100 * theta(Ne, mu),
 369 |     Fhat = sapply(theta, function(x) {Fhat(x)})
 370 |   ) %>%
 371 |     ggplot() +    
 372 |       geom_hline(aes(yintercept = 1), color = "dodgerblue4") +
 373 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 374 |       geom_line(aes(theta, Fhat), size = 1) +
 375 |       theme_bw() +
 376 |       xlab(latex2exp::TeX("θ")) +
 377 |       ylab(latex2exp::TeX("\\hat{F}"))
 378 | })
 379 | ```
 380 | 
 381 | The homogeneity $\hat{F}$ of a population can also be described as a function of the relative frequency of the variants in it: $p$. $p_i$ is the relative frequency of the $i$'th variant in the population, so $p$ is a vector of frequencies with the sum one. The probability of choosing a given variant at random is its relative frequency $p_i$, which is also the probability of picking another copy of this same variant on the second try. The probability of getting this variant twice in a row is therefore $p_i^2$. The total probability of getting any of the $i = 1$ to $k$ variants twice in a row is the sum of all these probabilities.
 382 | 
 383 | For this plot $p$ is randomly drawn some hundred times from a uniform distribution for every value of $k$. The black line shows the mean result for $\hat{F}$ the ribbon the min and max results.
 384 | 
 385 | $$\hat{F} = \sum_{i=1}^{k} p^2_i$$
 386 | 
 387 | - $p_i$: relative frequency of the $i$'th variant in the population 
 388 | 
 389 | ```{r echo=FALSE}
 390 | Fhat_relative_frequency <- function(amount_of_variants) {
 391 |   p <- runif(amount_of_variants)
 392 |   p <- p / sum(p)
 393 |   sum(p^2)
 394 | }
 395 | 
 396 | inputPanel(
 397 |   sliderInput("k_relative_frequency", label = "k",
 398 |                min = 50, max = 150, value = 100, step = 10),
 399 |   sliderInput("number_of_replications_relative_frequency", label = "Number of simulation runs",
 400 |                min = 100, max = 1000, value = 500, step = 100),
 401 |   actionButton("run_button_relative_frequency", "Run simulation")
 402 | )
 403 | 
 404 | relative_frequency_data <- eventReactive(
 405 |   input$run_button_relative_frequency, {
 406 |   
 407 |     # read input  
 408 |     number_of_replications <- input$number_of_replications_relative_frequency
 409 |     k <- input$k_relative_frequency
 410 |     
 411 |     # calculate Fhat
 412 |     tibble::tibble(
 413 |       k = 0:k,
 414 |       Fhat_min = sapply(k, function(x) {
 415 |         min(replicate(number_of_replications, Fhat_relative_frequency(x)))
 416 |       }),
 417 |       Fhat_max = sapply(k, function(x) {
 418 |         max(replicate(number_of_replications, Fhat_relative_frequency(x)))
 419 |       }),
 420 |       Fhat_mean = (Fhat_min + Fhat_max) / 2
 421 |     )
 422 |     
 423 |   },
 424 |   ignoreNULL = FALSE
 425 | )
 426 | 
 427 | renderPlot({
 428 |   relative_frequency_data() %>%
 429 |     ggplot() +    
 430 |       geom_hline(aes(yintercept = 1), color = "dodgerblue4") +
 431 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 432 |       geom_ribbon(
 433 |         aes(x = k, ymin = Fhat_min, ymax = Fhat_max), 
 434 |         fill = "deepskyblue", alpha = 0.5
 435 |       ) +
 436 |       geom_line(aes(x = k, y = Fhat_mean), color = "black", size = 1) +
 437 |       theme_bw() +
 438 |       xlab(latex2exp::TeX("k")) +
 439 |       ylab(latex2exp::TeX("\\hat{F}"))
 440 | })
 441 | ```
 442 | 
 443 | #### Diversity *p.14-15* **(6)** **(7)**
 444 | 
 445 | Instead of looking at homogeneity we can also estimate the diversity inside a population. One measure of diversity is the "effective number" of variants $n_e$, which is the reciprocal of $\hat{F}$. Estimating diversity within a population in these terms means that the population contains the same amount of diversity as would be found in an imaginary population with the effective number of variants at equal frequency. $n_e$ scales linearly with $θ$. Larger populations contain more diversity at a given level of innovation. Greater diversity is also expected in populations into which new variants are being introduced at higher rates. 
 446 | 
 447 | $$n_e = 2 N_e μ + 1$$
 448 | 
 449 | - $n_e$: Effective number of variants
 450 | 
 451 | ```{r echo=FALSE}
 452 | ne <- function(theta) {
 453 |   theta + 1
 454 | }
 455 | 
 456 | inputPanel(
 457 |   sliderInput("Ne_diversity_effective_number", label = "Ne",
 458 |                min = 10, max = 100, value = 20, step = 10),
 459 |   sliderInput("mu_diversity_effective_number", label = "μ",
 460 |                min = 0, max = 1, value = 0.01, step = 0.01)
 461 | )
 462 | 
 463 | renderText({
 464 |   paste0("1 * theta = ", theta(input$Ne_diversity_effective_number, input$mu_diversity_effective_number))
 465 | })
 466 | 
 467 | renderPlot({
 468 |   
 469 |   # read input
 470 |   Ne <- input$Ne_diversity_effective_number
 471 |   mu <- input$mu_diversity_effective_number  
 472 |   
 473 |   tibble::tibble(
 474 |     theta = 0:100 * theta(Ne, mu),
 475 |     ne = sapply(theta, function(x) {ne(x)})
 476 |   ) %>%
 477 |     ggplot() +    
 478 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 479 |       geom_line(aes(theta, ne), size = 1) +
 480 |       theme_bw() +
 481 |       xlab(latex2exp::TeX("θ")) +
 482 |       ylab(latex2exp::TeX("n_e"))
 483 | })
 484 | ```
 485 | 
 486 | Furthermore, it is clear that $θ$ can be estimated empirically by computing the reciprocal of the sum of squares of variant frequencies in a population. We can call such estimates $t_F$, to distinguish them from the actual population values and to remind ourselves that they are based on the homogeneity $\hat{F}$ in Equation (5). With this equation we can get useful insights into variations in $θ$ among groups or demes.
 487 | 
 488 | For this plot $p$ is randomly drawn some hundred times from a uniform distribution for every value of $k$. The black line shows the mean result for $t_F$, the ribbon the min and max results.
 489 | 
 490 | $$t_F = \frac{1}{\sum_{i=1}^{k} p^2_i} - 1$$
 491 | 
 492 | - $t_F$: Estimated $θ$ by the sum of squares of variant frequencies
 493 | 
 494 | ```{r echo=FALSE}
 495 | theta_F <- function(amount_of_variants) {
 496 |   p <- runif(amount_of_variants)
 497 |   p <- p / sum(p)
 498 |   (1 / sum(p^2)) - 1
 499 | }
 500 | 
 501 | inputPanel(
 502 |   sliderInput("k_reciprocal_relative_frequency", label = "k",
 503 |                min = 50, max = 150, value = 100, step = 10),
 504 |   sliderInput("number_of_replications_reciprocal_relative_frequency", label = "Number of simulation runs",
 505 |                min = 100, max = 1000, value = 500, step = 100),
 506 |   actionButton("run_button_reciprocal_relative_frequency", "Run simulation")
 507 | )
 508 | 
 509 | reciprocal_relative_frequency_data <- eventReactive(
 510 |   input$run_button_reciprocal_relative_frequency, {
 511 |   
 512 |     # read input  
 513 |     number_of_replications <- input$number_of_replications_reciprocal_relative_frequency
 514 |     k <- input$k_reciprocal_relative_frequency
 515 |     
 516 |     # calculate Fhat
 517 |     tibble::tibble(
 518 |       k = 0:k,
 519 |       theta_F_min = sapply(k, function(x) {
 520 |         min(replicate(number_of_replications, theta_F(x)))
 521 |       }),
 522 |       theta_F_max = sapply(k, function(x) {
 523 |         max(replicate(number_of_replications, theta_F(x)))
 524 |       }),
 525 |       theta_F_mean = (theta_F_min + theta_F_max) / 2
 526 |     )
 527 |     
 528 |   },
 529 |   ignoreNULL = FALSE
 530 | )
 531 | 
 532 | renderPlot({
 533 |   reciprocal_relative_frequency_data() %>%
 534 |     ggplot() +    
 535 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 536 |       geom_ribbon(
 537 |         aes(x = k, ymin = theta_F_min, ymax = theta_F_max), 
 538 |         fill = "deepskyblue", alpha = 0.5
 539 |       ) +
 540 |       geom_line(aes(x = k, y = theta_F_mean), size = 1) +
 541 |       theme_bw() +
 542 |       xlab(latex2exp::TeX("k")) +
 543 |       ylab(latex2exp::TeX("t_F"))
 544 | })
 545 | ```
 546 | 
 547 | ### Archaeological Application
 548 | 
 549 | #### Diversity and Sample Size *p.15-16* **(8)** **(9)**
 550 | 
 551 | One problem of the set of equations so far is that the frequency of variants in a population of social learners is not equal to the frequency preserved and documented in the archaeological assemblage. Under appropriate taphonomic circumstances, however, they are surprisingly similar. Another problem arises because we try to estimate diversity only on the basis of a sample derived from a population. The equations (5) and (7) are similar to the Shannon-Weaver information statistic, which is very sensitive to sample-size variation.
 552 | 
 553 | $$H = -\sum_{i = 1}^{k} p_i \log(p_i)$$
 554 | 
 555 | - $H$: [Shannon's diversity index](http://www.tiem.utk.edu/~gross/bioed/bealsmodules/shannonDI.html)
 556 | 
 557 | ```{r echo=FALSE}
 558 | H <- function(amount_of_variants) {
 559 |   p <- runif(amount_of_variants)
 560 |   p <- p / sum(p)
 561 |   -sum(p * log(p))
 562 | }
 563 | 
 564 | inputPanel(
 565 |   sliderInput("k_shannon", label = "k",
 566 |                min = 50, max = 150, value = 100, step = 10),
 567 |   sliderInput("number_of_replications_shannon", label = "Number of simulation runs",
 568 |                min = 100, max = 1000, value = 500, step = 100),
 569 |   actionButton("run_button_shannon", "Run simulation")
 570 | )
 571 | 
 572 | shannon_data <- eventReactive(
 573 |   input$run_button_shannon, {
 574 |   
 575 |     # read input  
 576 |     number_of_replications <- input$number_of_replications_shannon
 577 |     k <- input$k_shannon
 578 |     
 579 |     # calculate Fhat
 580 |     tibble::tibble(
 581 |       k = 0:k,
 582 |       H_min = sapply(k, function(x) {
 583 |         min(replicate(number_of_replications, H(x)))
 584 |       }),
 585 |       H_max = sapply(k, function(x) {
 586 |         max(replicate(number_of_replications, H(x)))
 587 |       }),
 588 |       H_mean = (H_min + H_max) / 2
 589 |     ) 
 590 |     
 591 |   },
 592 |   ignoreNULL = FALSE
 593 | )
 594 | 
 595 | renderPlot({
 596 |   shannon_data() %>%
 597 |     ggplot() +    
 598 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 599 |       geom_ribbon(
 600 |         aes(x = k, ymin = H_min, ymax = H_max), 
 601 |         fill = "deepskyblue", alpha = 0.5
 602 |       ) +
 603 |       geom_line(aes(x = k, y = H_mean), color = "black", size = 1) +
 604 |       theme_bw() +
 605 |       xlab(latex2exp::TeX("k")) +
 606 |       ylab(latex2exp::TeX("H"))
 607 | })
 608 | ```
 609 | 
 610 | To deal with the problem of sample-size dependency in the calculation of $t_F$ with equation (7), we have to consider that $θ$ also controls a characteristic probability distribution of variants. (7) shows how $θ$ is related to the sum of squared variant frequencies in a population, so it should be possible to predict for a given value of $θ$ how many variants occur with certain frequencies. When $θ$ is low ($θ \ll 1$), we should expect that most of the time, a population will be dominated by a few variants, with other variants at low frequencies. When $θ$ is high ($θ \gg 1$), we are more likely to see a large number of variants at low to moderate frequencies. 
 611 | 
 612 | By combining this frequency distribution with the probability that a variant at a given frequency is represented at least once in a sample of a given size drawn at random from the population, it should be possible to deduce how many different variants we can expect to find in a sample of a given size. If the variants are selectively neutral, the expected number of different variants $k$ found in a sample drawn from a population is a function of it's sample size $n$ and $θ$ value. The number of variants will be larger when either $θ$ or $n$ are large.
 613 | 
 614 | $$E(k) = \sum_{i = 0}^{n - 1} \frac{θ}{θ + i}$$
 615 | 
 616 | - $E(k)$: Expected number of different variants
 617 | - $n$: Sample size
 618 | 
 619 | ```{r echo=FALSE}
 620 | Ek <- function(n, theta) {
 621 |   ressum <- 0
 622 |   for (i in 0:(n-1)) {
 623 |     ressum = ressum + (theta / (theta + i))
 624 |   }
 625 |   return(ressum)
 626 | }
 627 | 
 628 | inputPanel(
 629 |   sliderInput("Ne_expected_variants", label = "Ne",
 630 |                min = 10, max = 100, value = 20, step = 10),
 631 |   sliderInput("mu_expected_variants", label = "μ",
 632 |                min = 0, max = 1, value = 0.01, step = 0.01),
 633 |   sliderInput("n_expected_variants", label = "n",
 634 |                min = 0, max = 100, value = 10, step = 1)
 635 | )
 636 | 
 637 | renderText({
 638 |   paste0("1 * theta = ", theta(input$Ne_expected_variants, input$mu_expected_variants))
 639 | })
 640 | 
 641 | renderPlot({
 642 |   
 643 |   # read input
 644 |   Ne <- input$Ne_expected_variants
 645 |   mu <- input$mu_expected_variants
 646 |   n <- input$n_expected_variants
 647 |   
 648 |   tibble::tibble(
 649 |     theta = 0:100 * theta(Ne, mu),
 650 |     n = rep(n, 101),
 651 |     Ek = purrr::map2_dbl(n, theta, function(a, b) {Ek(a, b)})
 652 |   ) %>%
 653 |     ggplot() +    
 654 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 655 |       geom_line(aes(theta, Ek), size = 1) +
 656 |       theme_bw() +
 657 |       xlab(latex2exp::TeX("θ")) +
 658 |       ylab(latex2exp::TeX("E(k)"))
 659 | })
 660 | ```
 661 | 
 662 | Equation (9) could be used to produce a maximum likelihood estimate of $θ$, that is the value of $θ$ that maximizes the chances of drawing the observed number of variants $k$ in a sample of the size $n$. Unfortunately it doesn't have an analytic solution, but we can still use it to estimate $θ$ iteratively by changing its value until $E(k)$ equals the observed number of variants. We call the result of doing so $t_E$. Both $t_E$ and $t_F$ offer us special statistical summaries of assemblage diversity, but there are theoretical reasons to prefer $t_E$ if $n$ and $k$ (and not just $p$) are known. 
 663 | 
 664 | This applet allows to solve (9) for theta approximately. It starts at $θ = 10$, changes it in steps of $0.1$ and stops when $|E(k) - k| < 0.5$.
 665 | 
 666 | ```{r echo=FALSE}
 667 | inputPanel(
 668 |   sliderInput("n_iterative_theta", label = "n",
 669 |                min = 10, max = 100, value = 20, step = 10),
 670 |   sliderInput("k_iterative_theta", label = "k",
 671 |                min = 5, max = 15, value = 10, step = 1),
 672 |   actionButton("run_iterative_theta", "Approximate theta")
 673 | )
 674 | 
 675 | iterative_theta_data <- eventReactive(
 676 |   input$run_iterative_theta, {
 677 |   
 678 |     # read input
 679 |     n <- input$n_iterative_theta
 680 |     k <- input$k_iterative_theta
 681 |     start_theta <- 10
 682 |     
 683 |     # iterative solving loop
 684 |     theta_iter <- start_theta
 685 |     while (TRUE) {
 686 |       diffi <- Ek(n, theta_iter) - k
 687 |       if (diffi < 0) {
 688 |         theta_iter <- theta_iter + 0.1
 689 |       } else if (diffi > 0) {
 690 |         theta_iter <- theta_iter - 0.1
 691 |       }
 692 |       if (abs(diffi) < 0.5) {
 693 |         break;
 694 |       }
 695 |     }
 696 |   
 697 |     theta_iter
 698 |   
 699 |   },
 700 |   ignoreNULL = FALSE
 701 | )
 702 | 
 703 | renderText({
 704 |   paste0("theta ≈ ", round(iterative_theta_data(), 2))
 705 | })
 706 | ```
 707 | 
 708 | #### Archaeological Sample Size *p.16-17*
 709 | 
 710 | $t_E$ may yield wrong estimates of $θ$ for very large archaeological samples that accumulated over long periods of time. Equation (9) implies that for a given number of variants $k$, $t_E$ will decline as the sample size $n$ increases. Usually $n$ is limited by the population size $N$, but in case of a long-term sample this limit can be exceeded: $n$ can grow beyond $N$ and causes the estimate of $θ$ to be too low. In this case $t_F$ can again be the better function to determine $θ$. Another solution is to look at the difference between both estimators ($t_F - t_E$) as a function of assemblage size. For the application in archaeological contexts it's always important to carefully examine the relationships among $t_F$ and $t_E$ and possible sample size effects.
 711 | 
 712 | #### Inferences About Group Size and Innovation Rate *p.17-18*
 713 | 
 714 | $θ$ is defined as twice the effective population size times the innovation rate ($2 N_e μ$). The innovation rate $μ$ includes the combined effects of both in situ innovation $v$ and the introduction of novel variants from other groups $m$. If $μ = v + m$, then $N_e μ = N_e v + N_e m$. $v$ should be roughly constant across demes, but $m$ is likely to be more variable in time and space. Under these circumstances, most of the variation in $θ$ associated with $μ$ will be caused by variation in intergroup transmission rates. $θ$ mostly depends on the number of times local group members learn from members of other groups: $N_e m$. That means that variation in $θ$ is an indicator for the absolute amount of cultural transmission among demes in a geographical region. 
 715 |  
 716 | ### Drift, Intergroup Transmission, and Interassemblage Distance
 717 | 
 718 | #### Dynamics of Stylistic Distance between Two Groups *p.21-24* **(10)**
 719 | 
 720 | To compare groups and track the cultural transmission we need a simple measure of between-group similarity. One possibility is to look at the squared Euclidean distance $d_{ij}^2$. It's the sum of squared differences in variant frequencies $p$ between two groups $i$ and $j$.  
 721 | 
 722 | $$d_{ij}^2 = \sum_{k = 1}^{n} (p_{ik} - p_{jk})^2$$
 723 | 
 724 | - $d_{ij}^2$: Squared Euclidean distance between two groups
 725 | - $n$: total amount of variants (here not sample size)
 726 | 
 727 | ```{r echo=FALSE}
 728 | inputPanel(
 729 |   sliderInput("n_sed_general", label = "n",
 730 |                min = 10, max = 100, value = 50, step = 10),
 731 |   actionButton("run_sed_general", "Run simulation")
 732 | )
 733 | 
 734 | sed_general_data <- eventReactive(
 735 |   input$run_sed_general, {
 736 |   
 737 |     # read input
 738 |     n <- input$n_sed_general
 739 | 
 740 |     # calculate 
 741 |     tibble::tibble(
 742 |       amount_of_variants = 0:n,
 743 |       sed_min = sapply(amount_of_variants, function(x) {
 744 |         min(replicate(500, sed(runif(x), runif(x))))
 745 |       }),
 746 |       sed_max = sapply(amount_of_variants, function(x) {
 747 |         max(replicate(500, sed(runif(x), runif(x))))
 748 |       }),
 749 |       sed_mean = (sed_min + sed_max) / 2
 750 |     )
 751 |   
 752 |   },
 753 |   ignoreNULL = FALSE
 754 | )
 755 | 
 756 | renderPlot({
 757 |   sed_general_data() %>%
 758 |     ggplot() +    
 759 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 760 |       geom_ribbon(
 761 |         aes(x = amount_of_variants, ymin = sed_min, ymax = sed_max), 
 762 |         fill = "deepskyblue", alpha = 0.5
 763 |       ) +
 764 |       geom_line(aes(x = amount_of_variants, y = sed_mean), color = "black", size = 1) +
 765 |       theme_bw() +
 766 |       xlab(latex2exp::TeX("k")) +
 767 |       ylab(latex2exp::TeX("d_{ij}^2"))
 768 | })
 769 | ```
 770 | 
 771 | The study of intergroup cultural transmission requires a model setup that contains the effect of drift but also exchange between groups. One promising approach comes from migration matrix models, which in population genetics have proven useful in studying the effects of drift and intergroup genetic transmission on genetic distances among small, localized demes. Such models reveal what happens to variant frequencies in a finite number of demes that are subject to the joint effects of drift, whose strength is controlled by the effective size of each population, and intergroup transmission, occurring between demes at constant pairwise rates. There is no role in these models for in situ innovation, but they offer a means of checking the assumption that variation in $θ$ is largely a function of intergroup transmission. 
 772 | 
 773 | The following simulation is constructed like the very first one about the effects of drift. Only in this case, there is a set probability, the intergroup transmission rate $m_i$, that the individual contacted is derived from the other group. If $m_i = 0$, both groups act independently, but if it increases they tend to develop roughly alike.
 774 | 
 775 | The line chart below the variant frequency area plot shows the distance measure $d_{ij}^2$ calculated for the two groups at the respective point in model time.
 776 | 
 777 | $m_i$: intergroup transmission rate
 778 | 
 779 | ```{r echo=FALSE}
 780 | inputPanel(
 781 |   sliderInput("k_two_groups", label = "k",
 782 |                min = 2, max = 10, value = 10, step = 1),
 783 |   sliderInput("N_two_groups", label = "N",
 784 |                min = 10, max = 100, value = 20, step = 10),
 785 |   sliderInput("t_two_groups", label = "t",
 786 |                min = 50, max = 200, value = 100, step = 50),
 787 |   sliderInput("mi_two_groups", label = "mi",
 788 |                min = 0, max = 1, value = 0.1, step = 0.01),
 789 |   actionButton("run_button_two_groups", "Run simulation")
 790 | )
 791 | 
 792 | group_drift_simulation_data <- eventReactive(
 793 |   input$run_button_two_groups, {
 794 |     
 795 |     # run simulation (code in source file)
 796 |     group_drift_simulation(
 797 |       input$k_two_groups, 
 798 |       input$N_two_groups, 
 799 |       input$t_two_groups, 
 800 |       input$mi_two_groups
 801 |     )
 802 |     
 803 |   },
 804 |   ignoreNULL = FALSE
 805 | )
 806 | 
 807 | renderPlot({
 808 |   A <- group_drift_simulation_data() %>%
 809 |     ggplot() +
 810 |       geom_area(
 811 |         aes(x = time, y = individuals_with_variant, fill = variant, group = variant)
 812 |       ) +
 813 |       geom_line(
 814 |         aes(x = time, y = individuals_with_variant, group = variant), 
 815 |         position = "stack"
 816 |       ) +
 817 |       theme_bw() +
 818 |       xlab("t") +
 819 |       ylab("variants and their occurence in the population [%]") +
 820 |       facet_grid(group ~ .)
 821 |   
 822 |   B <- calculate_sed_for_group_drift_simulation_result(
 823 |     group_drift_simulation_data()
 824 |   ) %>%
 825 |     ggplot() +
 826 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 827 |       geom_hline(aes(yintercept = 2), color = "dodgerblue4") +
 828 |       geom_line(aes(x = t, y = sed), color = "black", size = 1, alpha = 0.3) +
 829 |       geom_point(aes(x = t, y = sed), color = "black", size = 1) +
 830 |       theme_bw()  +
 831 |       xlab(latex2exp::TeX("t")) +
 832 |       ylab(latex2exp::TeX("d_{ij}^2"))
 833 |   
 834 |   cowplot::plot_grid(A, B, nrow = 2, align = "v", axis = "lr")
 835 | })
 836 | ```
 837 | 
 838 | We can run the above simulation many times to see different results random drift can produce. At the beginning the distance between the groups is always zero, because the variants and variant frequencies are equal in both. However, as time passes, the distance increases up to a quasi-stationary equilibrium. The equilibrium is only quasi-stationary because the groups are finite and there is no innovation: if the simulations are continued for a sufficient number of time periods, a single variant is fixed and as a result the between-group distance reverts to zero. The equilibrium holds over the period during which the opposing forces of drift, locally reducing variation within each group, and intergroup transmission, introducing potentially novel variation into each group from the other, are balanced. Later drift globally depletes all variation in both groups.
 839 | 
 840 | Higher levels of intergroup transmission $m_i$ will lower the equilibrium level, while lower levels of intergroup transmission raise it. Lower effective sizes $N$ for either or both groups will raise the equilibrium since the effects of drift scale with $N$. The speed with which the equilibrium is achieved depends heavily on the modes of transmission. Horizontal transmission can cause rather fast changes, while vertical and oblique transmission may take centuries. The equilibrium is also approached in case of initially divergent groups with different variants and variant frequencies if there is intergroup transmission among them ($m_i > 0$).
 841 | 
 842 | ```{r echo=FALSE}
 843 | inputPanel(
 844 |   sliderInput("k_two_groups_many_runs", label = "k",
 845 |                min = 5, max = 15, value = 10, step = 1),
 846 |   sliderInput("N_two_groups_many_runs", label = "N",
 847 |                min = 10, max = 100, value = 20, step = 10),
 848 |   sliderInput("t_two_groups_many_runs", label = "t",
 849 |                min = 20, max = 200, value = 20, step = 20),
 850 |   sliderInput("mi_two_groups_many_runs", label = "mi",
 851 |                min = 0, max = 1, value = 0.1, step = 0.01),
 852 |   sliderInput("sim_runs_two_groups_many_runs", label = "Number of simulation runs",
 853 |                min = 1, max = 100, value = 20, step = 1),
 854 |   actionButton("run_button_two_groups_many_runs", "Run simulation")
 855 | )
 856 | 
 857 | two_groups_many_runs_data <- eventReactive(
 858 |   input$run_button_two_groups_many_runs, {
 859 |     
 860 |     # run the simulation many times and store results in res_list
 861 |     res_list <- list()
 862 |     for (i in 1:input$sim_runs_two_groups_many_runs) {
 863 |       res_list[[i]] <- calculate_sed_for_group_drift_simulation_result(
 864 |         group_drift_simulation(
 865 |           input$k_two_groups_many_runs, 
 866 |           input$N_two_groups_many_runs, 
 867 |           input$t_two_groups_many_runs, 
 868 |           input$mi_two_groups_many_runs
 869 |         ),
 870 |         sim_run = i
 871 |       ) 
 872 |     }
 873 |     
 874 |     # rbind individual simulation results into one data.frame
 875 |     do.call(rbind, res_list)
 876 |     
 877 |   },
 878 |   ignoreNULL = FALSE
 879 | )
 880 | 
 881 | renderPlot({
 882 |   two_groups_many_runs_data() %>%
 883 |     ggplot() +
 884 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 885 |       geom_hline(aes(yintercept = 2), color = "dodgerblue4") +
 886 |       geom_line(aes(x = t, y = sed, group = sim_run), color = "black", size = 1, alpha = 0.3) +
 887 |       geom_point(aes(x = t, y = sed), color = "black", size = 1) +
 888 |       theme_bw() +
 889 |       xlab(latex2exp::TeX("t")) +
 890 |       ylab(latex2exp::TeX("d_{ij}^2"))
 891 | })
 892 | ```
 893 | 
 894 | #### Stylistic Distances for Multiple Groups *p.24-25* **(11)** **(12)**
 895 | 
 896 | The real world is better represented in terms of more than two groups learning from another. The multi-group case can be described in a matrix based model. The matrix $\mathbf{M}$ contains the intergroup transmission rates $m_{ij}$ that give the proportion of the $i$'th deme that learned from the $j$'th deme. The diagonal elements of $\mathbf{M}$ are the proportion of each deme that learned from its own members. The matrix $\mathbf{U}$ contains the reciprocals of the effective population sizes $1 / N_e$ of each group on its diagonal and $0$'s elsewhere. 
 897 | 
 898 | In this setup all groups initially have the same variant frequencies. This means that the expected frequencies for each group in any time period are identical and equal to the starting frequency. Drift-driven departures from this expectation for a given deme can be characterized analytically in terms of variance. Since there are multiple demes in the system, their joint evolution must be handled in terms of a matrix of variances and covariances: $\mathbf{V}$. Diagonal elements in $\mathbf{V}$ contain variances: the square of the departure of the variant frequency in the respective deme at time $t$ from the situation at the starting time. The other elements are covariances: the product of the departures of the variant frequencies in each pair of demes. 
 899 | 
 900 | $$\mathbf{V}^{(t)} = \sum_{r = 0}^{t - 1} \mathbf{M}^r \mathbf{U}(\mathbf{M}^r)^{\prime}$$
 901 | 
 902 | - $\mathbf{M}$: Matrix of intergroup transmission rates
 903 | - $\mathbf{V}^{(t)}$: Matrix of variances and covariances
 904 | - $r$: Index of the successive time periods
 905 | - $\mathbf{U}$ Matrix of reciprocals of the effective population sizes ($1 / N_e$). These are on the diagonal, the rest is $0$
 906 | - $(\mathbf{M}^r)^{\prime}$: M transposed
 907 | 
 908 | Equation (11) says that in the first time period, the covariance between two groups is a function of the sum of products of the intergroup transmission rates from all groups to those two groups and the reciprocal of their two effective population sizes. In later time periods, as individuals who learned from non-group members are in turn distributed among the other groups, the intergroup transmission rates from each group to *all* the others assumes increasing importance. The resulting cumulative effect of the indirect movement of variants among groups is handled by successively powering the intergroup transmission matrix.
 909 | 
 910 | The magnitude of the covariance between two groups will scale inversely with their sizes $N$ and positively with the intergroup transmission rates $m$. High levels of intergroup transmission will mean that whatever departures from the initial frequency occur, they will be similar and in the same direction, hence the covariance between the groups will be high. On the other hand, if either or both group sizes are low, causing drift to play a stronger role, variant frequencies are less likely to depart from initial frequencies in a similar fashion. The covariances will be low.
 911 | 
 912 | The matrix of variances and covariances $\mathbf{V}$ can be converted to a matrix of squared Euclidean distances $d_{ij}^{2}$ (or $\mathbf{SED}$).
 913 | 
 914 | $$d_{ij}^{2(t)} = v_{ii}^{(t)} + v_{jj}^{(t)} - 2 v_{ij}^{(t)}$$
 915 | 
 916 | - $v_{ii}^{(t)}$: Diagonal element of $\mathbf{V}^{(t)}$ -- a variance. The square of of the departure of the variant frequency in the $i$'th deme at time $t$ from its starting frequency when the demes were identical, standardized by the variance of that starting frequency
 917 | - $v_{ij}^{(t)}$: Covariance. The product of the departures of the variant frequencies in each pair of demes $i$ and $j$, again standardized by the variance of the starting frequency
 918 | 
 919 | ```{r echo = FALSE}
 920 | inputPanel(
 921 |   sliderInput("Ne_matrix", label = "Ne",
 922 |              min = 10, max = 100, value = 20, step = 10),
 923 |   sliderInput("t_matrix", label = "t",
 924 |              min = 10, max = 100, value = 20, step = 10),
 925 |   actionButton("run_button_matrix_calculation", "Run matrix calculation")
 926 | )
 927 | 
 928 | matrix_calculation_data <- eventReactive(
 929 |   input$run_button_matrix_calculation, {
 930 |     Mk <- function(k, mi_3) {
 931 |       M <- matrix(data = rep(0, 5^2), nrow = 5, ncol = 5)
 932 |       #M[] <- 0.2
 933 |       M[1,] <- c(0.4, 0.0, 0.2, 0.0, 0.4)
 934 |       M[2,] <- c(0.0, 0.4, 0.2, 0.4, 0.0)
 935 |       M[3,] <- c(0.2, 0.2, 0.2, 0.2, 0.2)
 936 |       M[4,] <- c(0.2, 0.1, 0.3, 0.2, 0.2)
 937 |       M[5,] <- c(0.2, 0.3, 0.1, 0.2, 0.2)
 938 |       M
 939 |     }
 940 |     
 941 |     multi_group_matrizes(input$Ne_matrix, input$t_matrix, Mk)
 942 |   },
 943 |   ignoreNULL = FALSE
 944 | )
 945 | 
 946 | renderPlot({
 947 |   plotlist <- lapply(matrix_calculation_data(), function(x) {
 948 |     ggplot(x, aes(x = Var2, y = Var1)) + 
 949 |       geom_raster(aes(fill=value)) +
 950 |       geom_text(aes(x = Var2, y = Var1, label = round(value, 3)), color = "white", size = 5) +
 951 |       scale_y_reverse(sec.axis = dup_axis()) +
 952 |       scale_x_continuous(sec.axis = dup_axis()) +
 953 |       xlab("group") +
 954 |       ylab("group")
 955 |   })
 956 | 
 957 |   cowplot::plot_grid(
 958 |     plotlist = plotlist,
 959 |     labels = c("M", "M'", "U", "V", "SED", "MSD"), 
 960 |     nrow = 3, ncol = 2
 961 |   )
 962 | },
 963 | height = 1200
 964 | )
 965 | ``` 
 966 | 
 967 | ### Woodland Interassemblage Distance
 968 | 
 969 | #### Trends in Interassemblage Distance *p.25-26* 
 970 | 
 971 | The migration matrix approach can be applied to cultural transmission contexts by computing a matrix of squared Euclidean distances $\mathbf{SED}$ between the groups for each of the variants. We expect to find that the overall level of differentiation among groups, as measured by the mean of the squared Euclidean distances $\mathbf{MSD}$, is the mirror image of our estimates of $θ$.
 972 | 
 973 | #### Diversity and Distance for Individual Assemblages *p.26-27* **(13)**
 974 | 
 975 | The mean of the squared Euclidean distance $\bar{d}_{i}^{2}$ (or $MSD$) can be calculated with (13). 
 976 | 
 977 | $$\bar{d}_{i}^{2} = \sum_{j = 1}^{n} d_{ij}^{2} / (n - 1), i \neq j$$
 978 | 
 979 | ```{r echo = FALSE}
 980 | inputPanel(
 981 |   sliderInput("k_two_groups_many_runs_with_msd", label = "k",
 982 |                min = 15, max = 25, value = 20, step = 1),
 983 |   sliderInput("N_two_groups_many_runs_with_msd", label = "N",
 984 |                min = 10, max = 100, value = 20, step = 10),
 985 |   sliderInput("t_two_groups_many_runs_with_msd", label = "t",
 986 |                min = 20, max = 200, value = 20, step = 20),
 987 |   sliderInput("mi_two_groups_many_runs_with_msd", label = "mi",
 988 |                min = 0, max = 1, value = 0.1, step = 0.01),
 989 |   sliderInput("sim_runs_two_groups_many_runs_with_msd", label = "Number of simulation runs",
 990 |                min = 1, max = 100, value = 20, step = 1),
 991 |   actionButton("run_button_two_groups_many_runs_with_msd", "Run simulation")
 992 | )
 993 | 
 994 | two_groups_many_runs_with_msd_data <- eventReactive(
 995 |   input$run_button_two_groups_many_runs_with_msd, {
 996 |     
 997 |     # read input
 998 |     k <- input$k_two_groups_many_runs_with_msd 
 999 |     N <- input$N_two_groups_many_runs_with_msd 
1000 |     t_final <- input$t_two_groups_many_runs_with_msd
1001 |     mi <- input$mi_two_groups_many_runs_with_msd
1002 |     sim_runs <- input$sim_runs_two_groups_many_runs_with_msd
1003 |     
1004 |     mixed_list <- list()
1005 |     
1006 |     # run group drift simulation
1007 |     res_list <- list()
1008 |     for (i in 1:sim_runs) {
1009 |       res_list[[i]] <- calculate_sed_for_group_drift_simulation_result(
1010 |         group_drift_simulation(
1011 |           k, 
1012 |           N, 
1013 |           t_final, 
1014 |           mi
1015 |         ),
1016 |         sim_run = i
1017 |       ) 
1018 |     }
1019 |     
1020 |     mixed_list[[1]] <- do.call(rbind, res_list)
1021 |     
1022 |     # calculate matrix and mean squared distance
1023 |     Mk_msd <- function(k, mi) {
1024 |       M <- matrix(data = rep(0, 2^2), nrow = 2, ncol = 2)
1025 |       M[1,] <- c(1 - mi, mi)
1026 |       M[2,] <- c(mi, 1 - mi)
1027 |       M
1028 |     }
1029 |     
1030 |     msd <- c() 
1031 |     for (t_m in 0:t_final) {
1032 |       msd[t_m + 1] <- multi_group_matrizes(
1033 |         N, t_m, Mk_msd, mi
1034 |       )$longmosed$value[2]
1035 |     }
1036 |     
1037 |     mixed_list[[2]] <- data.frame(t = 0:t_final, sed = msd)
1038 |     
1039 |     mixed_list
1040 |   },
1041 |   ignoreNULL = FALSE
1042 | )
1043 | 
1044 | renderPlot({
1045 |   mixed_list <- two_groups_many_runs_with_msd_data()
1046 |   
1047 |   ggplot() +
1048 |     geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
1049 |     geom_hline(aes(yintercept = 2), color = "dodgerblue4") +
1050 |     geom_line(
1051 |       data = mixed_list[[1]], 
1052 |       aes(x = t, y = sed, group = sim_run), 
1053 |       color = "black", size = 1, alpha = 0.3
1054 |     ) +
1055 |     geom_point(
1056 |       data = mixed_list[[1]],
1057 |       aes(x = t, y = sed), 
1058 |       color = "black", size = 1
1059 |     ) +
1060 |     geom_line(
1061 |       data = mixed_list[[2]], 
1062 |       aes(x = t, y = sed), 
1063 |       color = "dodgerblue4", size = 2
1064 |     ) +
1065 |     theme_bw() +
1066 |     xlab(latex2exp::TeX("t")) +
1067 |     ylab(latex2exp::TeX("d_{ij}^2"))
1068 | })
1069 | ```
1070 | 
1071 | ### Literature
1072 | 


--------------------------------------------------------------------------------
/presentation_version/neiman1995_presentation.Rmd:
--------------------------------------------------------------------------------
   1 | ---
   2 | title: "'Stylistic variation in evolutionary perspective'"
   3 | author: "Clemens Schmid"
   4 | date: "Juni 2018"
   5 | output: 
   6 |   ioslides_presentation:
   7 |     css: styles_presentation.css
   8 |     widescreen: true
   9 |     smaller: true
  10 | runtime: shiny
  11 | bibliography: ../references.bib
  12 | link-citations: true
  13 | ---
  14 | 
  15 | ```{r include=FALSE}
  16 | # start presentation from console: R -e "rmarkdown::run('~/neomod/neiman1995/neiman1995_presentation.Rmd', shiny_args = list(port = 8241, launch.browser = T))"
  17 | 
  18 | # knitr chunk settings
  19 | knitr::opts_chunk$set(echo = TRUE)
  20 | ```
  21 | 
  22 | ```{r echo = FALSE}
  23 | # load the packages, that are inconvinient to reference directly via namespace::
  24 | library(ggplot2)
  25 | library(magrittr)
  26 | suppressMessages(library(expm))
  27 | # load the functions in the source files
  28 | purrr::walk(list.files("../R", full.names = T), function(x) {source(x)})
  29 | ```
  30 | 
  31 | ## Disclaimer
  32 | 
  33 | *This document visualizes the simulations and the general model functions in Fraser D. Neimans article Stylistic Variation in Evolutionary Perspective: Inferences from Decorative Diversity and Interassemblage Distance in Illinois Woodland Ceramic Assemblages [@neiman_stylistic_1995].*
  34 | 
  35 | <iframe src="http://www.jstor.org/stable/282074"></iframe>
  36 | 
  37 | # Introduction 
  38 | 
  39 | ## Introduction: Cultural Evolution
  40 | 
  41 | **Cultural Evolution theory**     
  42 | The processes of natural development of species through evolution also affect human cultural development.
  43 | 
  44 | <hr>
  45 | 
  46 | > Certain aspects of what archaeologists have traditionally called **stylistic variation** can be understood as the result of the introduction of **selectively neutral variation** into **social-learning populations** and the **sampling error** in the cultural transmission of that variation (**drift**). 
  47 | > 
  48 | > Simple mathematical models allow the deduction of expectation for the dynamics of these evolutionary mechanisms as monitored in the archaeological record through **assemblage diversity** and **interassemblage distance**. 
  49 | >
  50 | > The models are applied to make inferences about the causes of change in decorative diversity and interassemblage distance for **Woodland ceramics from Illinois**.
  51 | >
  52 | > -- @neiman_stylistic_1995, Abstract. (boldface and paragraphs in this and in the following citations from me)
  53 | 
  54 | ## Introduction: Stylistic vs. functional cultural traits
  55 | 
  56 | > **Style** denotes those forms that do not have detectable selective values. **Function** is manifest as those forms that directly affect the Darwinian fitness of the populations in which they occur.
  57 | >
  58 | > [...]
  59 | >
  60 | > Traits that have discrete selective values over measurable amounts of time should be accountable by natural selection and a set of external conditions. **Traits identified as adaptively neutral will display a very different kind of behavior** because their frequencies in a population are not directly accountable in terms of selection and external contingencies. Their behavior should be more adequately accommodated by **stochastic processes**
  61 | >
  62 | > -- @dunnell1978style, 199.
  63 | 
  64 | \ 
  65 | 
  66 | > In a neo-Darwinian framework these definitions imply that **variation in stylistic variant frequencies in time and space** will be affected by a subset of evolutionary forces that introduce selectively neutral cultural variation into populations of social learners, for example, **innovation** and **intergroup transmission** [(**flow**)], and then sort it stochastically (**drift**).
  67 | >
  68 | > -- @neiman_stylistic_1995, 8.    
  69 | 
  70 | \
  71 | 
  72 | The main mechanisms of diffusion of neutral variants are **innovation**, **drift** and **flow**.
  73 | 
  74 | ## Introduction: Method?
  75 | 
  76 | > The signal advantage of embedding style in a Darwinian framework is that it **becomes possible to make and evaluate inferences about what happened in history**. These twin goals are accomplished by developing **models of the operation of different evolutionary forces or mechanisms** defined by theory. Models in turn deliver **expectations concerning spatial and temporal distributions** of elements that result from the operation of different mechanisms. Given a suite of measurements of real-world phenomena or statistical summaries of them, we are in a position to make **inferences about the mechanisms that caused observed values**. The inferences can be checked by developing additional models and expectations, based on independent lines of reasoning. Over the long term, the agreement of independent inferences leads to historical knowledge. **Thus while model building is an abstract enterprise, the payoff is fundamentally pragmatic.**
  77 | >
  78 | > -- @neiman_stylistic_1995, 8.    
  79 | 
  80 | \
  81 | 
  82 | > The chances of developing a deductively correct answer are increased if we work with **formal models**, capable of representation in either **mathematical formalism** or **computer simulation**, of the evolutionary mechanisms that govern the distribution of selectively neutral forms in time and space.
  83 | >
  84 | > -- @neiman_stylistic_1995, 9.
  85 | 
  86 | # Drift, Innovation and Diversity
  87 | 
  88 | ## Temporal Dynamics of Drift
  89 | 
  90 | **Drift** is sampling error that accompanies all forms of cultural transmission and **causes individual variants to vanish or dominate randomly**. The speed with which the variation is destroyed increases as the population size decreases. 
  91 | 
  92 | <span style="color:red">Mechanics of the following simulation: 
  93 | In each time step every individual randomly adopts a variant from somebody else. The probability of getting somebody else's variant is $(N - 1) / N$ while the probability of keeping the own variant is only $1 / N$. 
  94 | </span>    
  95 | 
  96 | - $k$: Amount of variants in a population
  97 | - $N_e$: Size of the effective population. The effective population consists of the individuals within a population that is actively involved in a cultural transmission process
  98 | - $t$: Time / number of iterations
  99 | 
 100 | ## Temporal Dynamics of Drift
 101 | 
 102 | ```{r echo=FALSE}
 103 | inputPanel(
 104 |   sliderInput("k_drift_simulation", label = "k",
 105 |                min = 5, max = 15, value = 10, step = 1),
 106 |   sliderInput("Ne_drift_simulation", label = "Ne",
 107 |                min = 10, max = 100, value = 20, step = 10),
 108 |   sliderInput("t_drift_simulation", label = "t",
 109 |                min = 50, max = 200, value = 100, step = 50),
 110 |   actionButton("run_button_drift_simulation", "Run simulation")
 111 | )
 112 | 
 113 | drift_simulation_data <- eventReactive(
 114 |   input$run_button_drift_simulation, {
 115 |     
 116 |     # read input
 117 |     k <- input$k_drift_simulation
 118 |     N <- input$Ne_drift_simulation
 119 |     time <- input$t_drift_simulation
 120 |     
 121 |     # calculate population parameters
 122 |     population <- 1:N
 123 |     variants <- 1:k
 124 |     timesteps <- 2:time
 125 |     
 126 |     # create initial population
 127 |     pop0 <- tibble::tibble(
 128 |       time = as.integer(0),
 129 |       individual = 1:N,
 130 |       variant = rep_len(variants, N)
 131 |     )
 132 |     
 133 |     # list to store population stages over time
 134 |     pop_devel <- list()
 135 |     pop_devel[[1]] <- pop0
 136 |     
 137 |     # simulation loop
 138 |     for (p1 in timesteps) {
 139 |       pop_new <- pop_devel[[p1 - 1]] 
 140 |       pop_new$time <- p1 - 1
 141 |       pop_new$variant <- sample(pop_new$variant, length(pop_new$variant), replace = T)
 142 |       pop_devel[[p1]] <- pop_new
 143 |     }
 144 |     
 145 |     # bind individual population stages into data.frame
 146 |     pop_devel_df <- do.call(rbind, pop_devel)
 147 |     
 148 |     # calculate number of individuals per timestep and variant
 149 |     pop_devel_sum <- pop_devel_df %>%
 150 |       dplyr::group_by(
 151 |         time, variant
 152 |       ) %>%
 153 |       dplyr::summarise(
 154 |         individuals_with_variant = n()
 155 |       ) %>%
 156 |       dplyr::ungroup() %>%
 157 |       # complete (expand.grid) to fill gaps in the area plot
 158 |       tidyr::complete(
 159 |         time, 
 160 |         variant, 
 161 |         fill = list(individuals_with_variant = as.integer(0))
 162 |       )
 163 |     
 164 |     pop_devel_sum
 165 |   },
 166 |   ignoreNULL = FALSE
 167 | )
 168 | 
 169 | renderPlot({
 170 |   
 171 |   drift_simulation_data() %>%
 172 |     ggplot() +
 173 |       geom_area(aes(x = time, y = individuals_with_variant, fill = variant, group = variant)) +
 174 |       geom_line(aes(x = time, y = individuals_with_variant, group = variant), position = "stack") +
 175 |       theme_bw() +
 176 |       xlab(expression(paste("t"))) +
 177 |       ylab("variants and their occurence in the population [%]")
 178 |   
 179 | })
 180 | ```
 181 | 
 182 | ## Homogeneity Under Drift
 183 | 
 184 | The theory of neutral alleles allows to describe the **within-population homogeneity $F$** as a function of the effective population size $N_e$. 
 185 | 
 186 | $$F_t = \frac{1}{N_e} + \left(1 - \frac{1}{N_e} \right) F_{t-1}$$
 187 | 
 188 | - $F_t$: Within-population homogeneity. Calculated as the probability that two randomly chosen individuals in the population carry variants that are copies of a common antecedent variant at a certain time step
 189 | 
 190 | In a given time period the probability of drawing an individual, who learned from the same model as some other randomly selected individual in the previous time period, is $1 / N$. The probability of the opposite is $1 - (1 / N)$. 
 191 | 
 192 | - $F_{t - 1}$: Probability that the model of the second individual learned from the same model as the model of the first individual in an earlier time step
 193 | 
 194 | As an effect of drift, $F_t$ approaches one as $t$ increases.
 195 | 
 196 | ## Homogeneity Under Drift
 197 | 
 198 | ```{r echo=FALSE}
 199 | Ft <- function(F0, Ne, time) {
 200 |   if(time == 0) { return(F0) }
 201 |   1/Ne + (1 - 1/Ne) * Ft(F0, Ne, time - 1)
 202 | }
 203 | 
 204 | inputPanel(
 205 |   sliderInput("F0_homogeneity_drift", label = "F0 (Ft for t == 0)",
 206 |                min = 0, max = 1, value = 0.5, step = 0.05),
 207 |   sliderInput("Ne_homogeneity_drift", label = "Ne",
 208 |                min = 10, max = 100, value = 20, step = 10),
 209 |   sliderInput("t_homogeneity_drift", label = "t",
 210 |                min = 50, max = 200, value = 100, step = 50)
 211 | )
 212 | 
 213 | renderPlot({
 214 |   
 215 |   # read input
 216 |   F0 <- input$F0_homogeneity_drift
 217 |   Ne <- input$Ne_homogeneity_drift
 218 |   time <- input$t_homogeneity_drift
 219 |   
 220 |   timesteps <- 0:time
 221 |   
 222 |   # apply function for all timesteps
 223 |   Ft_time <- sapply(timesteps, function(x) { Ft(F0, Ne, x) })
 224 |   
 225 |   data.frame(
 226 |     t = timesteps,
 227 |     Ft = Ft_time
 228 |   ) %>%
 229 |     ggplot() +
 230 |       geom_hline(aes(yintercept = 1), color = "dodgerblue4") +
 231 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 232 |       geom_line(aes(t, Ft), size = 1) +
 233 |       theme_bw() +
 234 |       xlab(latex2exp::TeX("t")) +
 235 |       ylab(latex2exp::TeX("F_{t}"))
 236 | })
 237 | ```
 238 | 
 239 | ## Temporal Dynamics of Drift and Innovation
 240 | 
 241 | The effect of **drift** on **homongeneity** shrinks if **innovation** is possible.
 242 | 
 243 | <span style="color:red">Mechanics of the following simulation:      
 244 | The simulation works like the first one, but now there's a chance of $\mu$ for every individual in every time step to create a new variant.
 245 | </span> 
 246 | 
 247 | - $\mu$: Innovation rate. Probability of the creation of a new variant
 248 | 
 249 | The effect of drift is still strongly visible, but with increasing $\mu$ it becomes less relevant. New variants can form and replace the dominant one. 
 250 | 
 251 | ## Temporal Dynamics of Drift and Innovation
 252 | 
 253 | ```{r echo=FALSE}
 254 | inputPanel(
 255 |   sliderInput("k_drift_simulation_with_innovation", label = "k for t == 0",
 256 |                min = 5, max = 15, value = 10, step = 1),
 257 |   sliderInput("Ne_drift_simulation_with_innovation", label = "Ne",
 258 |                min = 10, max = 100, value = 20, step = 10),
 259 |   sliderInput("t_drift_simulation_with_innovation", label = "t",
 260 |                min = 50, max = 200, value = 100, step = 50),
 261 |   sliderInput("mu_drift_simulation_with_innovation", label = "μ",
 262 |                min = 0, max = 0.1, value = 0.01, step = 0.01),
 263 |   actionButton("run_button_drift_simulation_with_innovation", "Run simulation")
 264 | )
 265 | 
 266 | drift_simulation_with_innovation_data <- eventReactive(
 267 |   input$run_button_drift_simulation_with_innovation, {
 268 |     
 269 |     # read input
 270 |     k <- input$k_drift_simulation_with_innovation
 271 |     N <- input$Ne_drift_simulation_with_innovation
 272 |     time <- input$t_drift_simulation_with_innovation
 273 |     mu <- input$mu_drift_simulation_with_innovation
 274 |   
 275 |     # prepare population parameters
 276 |     population <- 1:N
 277 |     variants <- 1:k
 278 |     timesteps <- 2:time
 279 |   
 280 |     # create starting population
 281 |     pop0 <- tibble::tibble(
 282 |       time = as.integer(0),
 283 |       individual = 1:N,
 284 |       variant = rep_len(1:k, N)
 285 |     )
 286 |   
 287 |     # list to store population stages over time
 288 |     pop_devel <- list()
 289 |     pop_devel[[1]] <- pop0
 290 |   
 291 |     # simulation loop
 292 |     last_variant <- max(pop_devel[[1]]$variant)
 293 |     for (p1 in timesteps) {
 294 |       pop_new <- pop_devel[[p1 - 1]]
 295 |       pop_new$time <- p1 - 1
 296 |       pop_new$variant <- sample(pop_new$variant, length(pop_new$variant), replace = T)
 297 |   
 298 |       # innovation
 299 |       innovate_here <- sample(
 300 |         c(TRUE, FALSE), 
 301 |         length(pop_new$variant), 
 302 |         prob = c(mu, 1 - mu), 
 303 |         replace = T
 304 |       )
 305 |       new_variants <- seq(last_variant + 1, last_variant + sum(innovate_here))
 306 |       last_variant <- last_variant + sum(innovate_here)
 307 |       pop_new$variant[innovate_here] <- new_variants
 308 |   
 309 |       pop_devel[[p1]] <- pop_new
 310 |     }
 311 |   
 312 |     # bind individual population stages into data.frame
 313 |     pop_devel_df <- do.call(rbind, pop_devel)
 314 |     
 315 |     # calculate number of individuals per timestep and variant
 316 |     pop_devel_sum <- pop_devel_df %>%
 317 |       dplyr::group_by(
 318 |         time, variant
 319 |       ) %>%
 320 |       dplyr::summarise(
 321 |         individuals_with_variant = n()
 322 |       ) %>%
 323 |       dplyr::ungroup() %>%
 324 |       tidyr::complete(
 325 |         time, 
 326 |         variant, 
 327 |         fill = list(individuals_with_variant = as.integer(0))
 328 |       )
 329 |     
 330 |     pop_devel_sum
 331 |   
 332 |   },
 333 |   ignoreNULL = FALSE
 334 | )
 335 | 
 336 | renderPlot({
 337 |   drift_simulation_with_innovation_data() %>%
 338 |     ggplot() +
 339 |       geom_area(aes(x = time, y = individuals_with_variant, fill = variant, group = variant)) +
 340 |       geom_line(aes(x = time, y = individuals_with_variant, group = variant), position = "stack") +
 341 |       theme_bw() +
 342 |       xlab("t") +
 343 |       ylab("variants and their occurence in the population [%]")
 344 | })
 345 | ```
 346 | 
 347 | ## Homogeneity Under Drift and Innovation
 348 | 
 349 | Drift and innovation are opposing forces: **drift increases homogeneity and innovation decreases it**. 
 350 | 
 351 | This causes an **equilibrium** after some time steps in the simulation. 
 352 | 
 353 | We can incorporate innovation into the calculation of $F$ by modifying equation (1). 
 354 | 
 355 | $$F_t = \left(\frac{1}{N_e} + \left(1 - \frac{1}{N_e} \right) F_{t-1}\right)(1 - \mu)^2$$
 356 | 
 357 | ## Homogeneity Under Drift and Innovation
 358 | 
 359 | ```{r echo=FALSE}
 360 | Ft_innovation <- function(F0, Ne, time, mu) {
 361 |   if(time == 0) {return(F0)}
 362 |   (1/Ne + (1 - 1/Ne) * Ft_innovation(F0, Ne, time - 1, mu)) * (1 - mu)^2
 363 | }
 364 | 
 365 | inputPanel(
 366 |   sliderInput("F0_homogeneity_drift_with_innovation", label = "F0 (Ft for t == 0)",
 367 |                min = 0, max = 1, value = 0.5, step = 0.05),
 368 |   sliderInput("Ne_homogeneity_drift_with_innovation", label = "Ne",
 369 |                min = 10, max = 100, value = 20, step = 10),
 370 |   sliderInput("t_homogeneity_drift_with_innovation", label = "t",
 371 |                min = 50, max = 200, value = 100, step = 50),
 372 |   sliderInput("mu_homogeneity_drift_with_innovation", label = "μ",
 373 |                min = 0, max = 1, value = 0.1, step = 0.1)
 374 | )
 375 | 
 376 | renderPlot({
 377 |   
 378 |   # read input
 379 |   F0 <- input$F0_homogeneity_drift_with_innovation
 380 |   Ne <- input$Ne_homogeneity_drift_with_innovation
 381 |   mu <- input$mu_homogeneity_drift_with_innovation
 382 |   time <- input$t_homogeneity_drift_with_innovation
 383 |   
 384 |   timesteps <- 0:time
 385 |   
 386 |   # apply function for every timestep
 387 |   Ft_innovation_time <- sapply(timesteps, function(x) {Ft_innovation(F0, Ne, x, mu)})
 388 |   
 389 |   data.frame(
 390 |     t = timesteps,
 391 |     Ft = Ft_innovation_time
 392 |   ) %>%
 393 |     ggplot() +    
 394 |       geom_hline(aes(yintercept = 1), color = "dodgerblue4") +
 395 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 396 |       geom_line(aes(t, Ft), size = 1) +
 397 |       theme_bw() +
 398 |       xlab(latex2exp::TeX("t")) +
 399 |       ylab(latex2exp::TeX("F_{t}"))
 400 | })
 401 | ```
 402 | 
 403 | ## Homogeneity Under Drift and Innovation
 404 | 
 405 | $$F_t = \left(\frac{1}{N_e} + \left(1 - \frac{1}{N_e} \right) F_{t-1}\right)(1 - \mu)^2$$
 406 | 
 407 | The equilibrium is reached, when $F_t = F_{t-1}$. Setting $F_t = F_{t-1}$ in equation (2) defines $\hat{F}$.
 408 | 
 409 | $$\hat{F} = \frac{(1 - \mu)^2}{N_e} - (N_e - 1)(1 - \mu)^2$$
 410 | 
 411 | If we assume $\mu$ is quite small we can further simplify this definition.
 412 | 
 413 | $$\hat{F} \simeq \frac{1}{2 N_e \mu + 1}$$
 414 | 
 415 | That means **the homogeneity of neutral variants within a population is inversely proportional to twice the effective population size times the innovation rate**: $2*N_e*\mu$. This expression will be called $\theta$.
 416 | 
 417 | - $\theta$: Twice the effective population size times the innovation rate ($2 N_e \mu$)
 418 | 
 419 | ## Homogeneity Under Drift and Innovation
 420 | 
 421 | ```{r echo=FALSE}
 422 | theta <- function(Ne, mu) {
 423 |   (2 * Ne * mu)
 424 | }
 425 | 
 426 | Fhat <- function(theta) {
 427 |   1 / (theta + 1)
 428 | }
 429 | 
 430 | inputPanel(
 431 |   sliderInput("Ne_homogeneity_equilibrium", label = "Ne",
 432 |                min = 10, max = 100, value = 20, step = 10),
 433 |   sliderInput("mu_homogeneity_equilibrium", label = "μ",
 434 |                min = 0, max = 1, value = 0.1, step = 0.1)
 435 | )
 436 | 
 437 | renderText({
 438 |   paste0("1 * theta = ", theta(input$Ne_homogeneity_equilibrium, input$mu_homogeneity_equilibrium))
 439 | })
 440 | 
 441 | renderPlot({
 442 |   Ne <- input$Ne_homogeneity_equilibrium
 443 |   mu <- input$mu_homogeneity_equilibrium
 444 |   
 445 |   tibble::tibble(
 446 |     theta = 0:100 * theta(Ne, mu),
 447 |     Fhat = sapply(theta, function(x) {Fhat(x)})
 448 |   ) %>%
 449 |     ggplot() +    
 450 |       geom_hline(aes(yintercept = 1), color = "dodgerblue4") +
 451 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 452 |       geom_line(aes(theta, Fhat), size = 1) +
 453 |       theme_bw() +
 454 |       xlab(latex2exp::TeX("θ")) +
 455 |       ylab(latex2exp::TeX("\\hat{F}"))
 456 | })
 457 | ```
 458 | 
 459 | ## Homogeneity Under Drift and Innovation
 460 | 
 461 | The **homogeneity $\hat{F}$** of a population can also be described as a **function of the relative frequency of the variants in it**: $p$. 
 462 | 
 463 | $p_i$ is the relative frequency of the $i$'th variant in the population, so $p$ is a vector of frequencies with the sum one. The probability of choosing a given variant at random is its relative frequency $p_i$, which is also the probability of picking another copy of this same variant on the second try. The probability of getting this variant twice in a row is therefore $p_i^2$. 
 464 | 
 465 | The total probability of getting any of the $i = 1$ to $k$ variants twice in a row is the sum of all these probabilities.
 466 | 
 467 | $$\hat{F} = \sum_{i=1}^{k} p^2_i$$
 468 | 
 469 | <span style="color:red">Mechanics of the following simulation:    
 470 | $p$ is randomly drawn some hundred times from a uniform distribution for every value of $k$. The black line shows the mean result for $\hat{F}$ the ribbon the min and max results.
 471 | </span>
 472 | 
 473 | - $p_i$: relative frequency of the $i$'th variant in the population 
 474 | 
 475 | ## Homogeneity Under Drift and Innovation
 476 | 
 477 | ```{r echo=FALSE}
 478 | Fhat_relative_frequency <- function(amount_of_variants) {
 479 |   p <- runif(amount_of_variants)
 480 |   p <- p / sum(p)
 481 |   sum(p^2)
 482 | }
 483 | 
 484 | inputPanel(
 485 |   sliderInput("k_relative_frequency", label = "k",
 486 |                min = 50, max = 150, value = 100, step = 10),
 487 |   sliderInput("number_of_replications_relative_frequency", label = "Number of simulation runs",
 488 |                min = 100, max = 1000, value = 500, step = 100),
 489 |   actionButton("run_button_relative_frequency", "Run simulation")
 490 | )
 491 | 
 492 | relative_frequency_data <- eventReactive(
 493 |   input$run_button_relative_frequency, {
 494 |   
 495 |     # read input  
 496 |     number_of_replications <- input$number_of_replications_relative_frequency
 497 |     k <- input$k_relative_frequency
 498 |     
 499 |     # calculate Fhat
 500 |     tibble::tibble(
 501 |       k = 0:k,
 502 |       Fhat_min = sapply(k, function(x) {
 503 |         min(replicate(number_of_replications, Fhat_relative_frequency(x)))
 504 |       }),
 505 |       Fhat_max = sapply(k, function(x) {
 506 |         max(replicate(number_of_replications, Fhat_relative_frequency(x)))
 507 |       }),
 508 |       Fhat_mean = (Fhat_min + Fhat_max) / 2
 509 |     )
 510 |     
 511 |   },
 512 |   ignoreNULL = FALSE
 513 | )
 514 | 
 515 | renderPlot({
 516 |   relative_frequency_data() %>%
 517 |     ggplot() +    
 518 |       geom_hline(aes(yintercept = 1), color = "dodgerblue4") +
 519 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 520 |       geom_ribbon(
 521 |         aes(x = k, ymin = Fhat_min, ymax = Fhat_max), 
 522 |         fill = "deepskyblue", alpha = 0.5
 523 |       ) +
 524 |       geom_line(aes(x = k, y = Fhat_mean), color = "black", size = 1) +
 525 |       theme_bw() +
 526 |       xlab(latex2exp::TeX("k")) +
 527 |       ylab(latex2exp::TeX("\\hat{F}"))
 528 | })
 529 | ```
 530 | 
 531 | ## Diversity
 532 | 
 533 | Instead of looking at homogeneity we can also estimate the **diversity inside a population**. One measure of diversity is the **"effective number" of variants $n_e$**, which is the reciprocal of $\hat{F}$. 
 534 | 
 535 | $n_e$ scales linearly with $\theta$. Larger populations contain more diversity at a given level of innovation. Greater diversity is also expected in populations into which new variants are being introduced at higher rates. 
 536 | 
 537 | $$n_e = 2 N_e \mu + 1$$
 538 | 
 539 | - $n_e$: Effective number of variants
 540 | 
 541 | ## Diversity
 542 | 
 543 | ```{r echo=FALSE}
 544 | ne <- function(theta) {
 545 |   theta + 1
 546 | }
 547 | 
 548 | inputPanel(
 549 |   sliderInput("Ne_diversity_effective_number", label = "Ne",
 550 |                min = 10, max = 100, value = 20, step = 10),
 551 |   sliderInput("mu_diversity_effective_number", label = "μ",
 552 |                min = 0, max = 1, value = 0.01, step = 0.01)
 553 | )
 554 | 
 555 | renderText({
 556 |   paste0("1 * theta = ", theta(input$Ne_diversity_effective_number, input$mu_diversity_effective_number))
 557 | })
 558 | 
 559 | renderPlot({
 560 |   
 561 |   # read input
 562 |   Ne <- input$Ne_diversity_effective_number
 563 |   mu <- input$mu_diversity_effective_number  
 564 |   
 565 |   tibble::tibble(
 566 |     theta = 0:100 * theta(Ne, mu),
 567 |     ne = sapply(theta, function(x) {ne(x)})
 568 |   ) %>%
 569 |     ggplot() +    
 570 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 571 |       geom_line(aes(theta, ne), size = 1) +
 572 |       theme_bw() +
 573 |       xlab(latex2exp::TeX("θ")) +
 574 |       ylab(latex2exp::TeX("n_e"))
 575 | })
 576 | ```
 577 | 
 578 | ## Diversity
 579 | 
 580 | **$\theta$ can be estimated empirically by computing the reciprocal of the sum of squares of variant frequencies in a population**. 
 581 | 
 582 | $$t_F = \frac{1}{\sum_{i=1}^{k} p^2_i} - 1$$
 583 | 
 584 | We can call such estimates $t_F$, to distinguish them from the actual population values and to remind ourselves that they are based on the homogeneity $\hat{F}$ in Equation (5). 
 585 | 
 586 | $$\hat{F} = \sum_{i=1}^{k} p^2_i$$
 587 | 
 588 | With this equation we can get useful insights into variations in $\theta$ among groups or demes.
 589 | 
 590 | <span style="color:red">Mechanics of the following simulation:    
 591 | $p$ is randomly drawn some hundred times from a uniform distribution for every value of $k$. The black line shows the mean result for $t_F$, the ribbon the min and max results.
 592 | </span>
 593 | 
 594 | - $t_F$: Estimated $\theta$ by the sum of squares of variant frequencies
 595 | 
 596 | ## Diversity
 597 | 
 598 | ```{r echo=FALSE}
 599 | theta_F <- function(amount_of_variants) {
 600 |   p <- runif(amount_of_variants)
 601 |   p <- p / sum(p)
 602 |   (1 / sum(p^2)) - 1
 603 | }
 604 | 
 605 | inputPanel(
 606 |   sliderInput("k_reciprocal_relative_frequency", label = "k",
 607 |                min = 50, max = 150, value = 100, step = 10),
 608 |   sliderInput("number_of_replications_reciprocal_relative_frequency", label = "Number of simulation runs",
 609 |                min = 100, max = 1000, value = 500, step = 100),
 610 |   actionButton("run_button_reciprocal_relative_frequency", "Run simulation")
 611 | )
 612 | 
 613 | reciprocal_relative_frequency_data <- eventReactive(
 614 |   input$run_button_reciprocal_relative_frequency, {
 615 |   
 616 |     # read input  
 617 |     number_of_replications <- input$number_of_replications_reciprocal_relative_frequency
 618 |     k <- input$k_reciprocal_relative_frequency
 619 |     
 620 |     # calculate Fhat
 621 |     tibble::tibble(
 622 |       k = 0:k,
 623 |       theta_F_min = sapply(k, function(x) {
 624 |         min(replicate(number_of_replications, theta_F(x)))
 625 |       }),
 626 |       theta_F_max = sapply(k, function(x) {
 627 |         max(replicate(number_of_replications, theta_F(x)))
 628 |       }),
 629 |       theta_F_mean = (theta_F_min + theta_F_max) / 2
 630 |     )
 631 |     
 632 |   },
 633 |   ignoreNULL = FALSE
 634 | )
 635 | 
 636 | renderPlot({
 637 |   reciprocal_relative_frequency_data() %>%
 638 |     ggplot() +    
 639 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 640 |       geom_ribbon(
 641 |         aes(x = k, ymin = theta_F_min, ymax = theta_F_max), 
 642 |         fill = "deepskyblue", alpha = 0.5
 643 |       ) +
 644 |       geom_line(aes(x = k, y = theta_F_mean), size = 1) +
 645 |       theme_bw() +
 646 |       xlab(latex2exp::TeX("k")) +
 647 |       ylab(latex2exp::TeX("t_F"))
 648 | })
 649 | ```
 650 | 
 651 | # Archaeological Application
 652 | 
 653 | ## Diversity and Sample Size
 654 | 
 655 | The **frequency of variants** in a population of social learners is not equal to the **frequency preserved and documented** in the **archaeological assemblage**. Under appropriate taphonomic circumstances, however, they are surprisingly similar. 
 656 | 
 657 | Another problem arises because we try to estimate diversity only on the basis of a **sample derived from a population**. The equations (5) and (7) are similar to the Shannon-Weaver information statistic, which is very sensitive to sample-size variation.
 658 | 
 659 | $$\hat{F} = \sum_{i=1}^{k} p^2_i$$
 660 | 
 661 | $$t_F = \frac{1}{\sum_{i=1}^{k} p^2_i} - 1$$
 662 | 
 663 | $$H = -\sum_{i = 1}^{k} p_i \log(p_i)$$
 664 | 
 665 | - $H$: [Shannon's diversity index](http://www.tiem.utk.edu/~gross/bioed/bealsmodules/shannonDI.html)
 666 | 
 667 | ## Diversity and Sample Size
 668 | 
 669 | $\theta$ is related to the sum of squared variant frequencies in a population, so it should be possible to **predict for a given value of $\theta$ how many variants occur with certain frequencies**. 
 670 |  
 671 | $$t_F = \frac{1}{\sum_{i=1}^{k} p^2_i} - 1$$
 672 | 
 673 | When $\theta$ is low ($\theta \ll 1$), we should expect that most of the time, a population will be dominated by a few variants, with other variants at low frequencies. When $\theta$ is high ($\theta \gg 1$), we are more likely to see a large number of variants at low to moderate frequencies. 
 674 | 
 675 | If the variants are selectively neutral, the expected number of different variants $k$ found in a sample drawn from a population is a function of it's sample size $n$ and $\theta$ value. The number of variants will be larger when either $\theta$ or $n$ are large.
 676 | 
 677 | $$E(k) = \sum_{i = 0}^{n - 1} \frac{\theta}{\theta + i}$$
 678 | 
 679 | - $E(k)$: Expected number of different variants
 680 | - $n$: Sample size
 681 | 
 682 | ## Diversity and Sample Size
 683 | 
 684 | ```{r echo=FALSE}
 685 | Ek <- function(n, theta) {
 686 |   ressum <- 0
 687 |   for (i in 0:(n-1)) {
 688 |     ressum = ressum + (theta / (theta + i))
 689 |   }
 690 |   return(ressum)
 691 | }
 692 | 
 693 | inputPanel(
 694 |   sliderInput("Ne_expected_variants", label = "Ne",
 695 |                min = 10, max = 100, value = 20, step = 10),
 696 |   sliderInput("mu_expected_variants", label = "μ",
 697 |                min = 0, max = 1, value = 0.01, step = 0.01),
 698 |   sliderInput("n_expected_variants", label = "n",
 699 |                min = 0, max = 100, value = 10, step = 1)
 700 | )
 701 | 
 702 | renderText({
 703 |   paste0("1 * theta = ", theta(input$Ne_expected_variants, input$mu_expected_variants))
 704 | })
 705 | 
 706 | renderPlot({
 707 |   
 708 |   # read input
 709 |   Ne <- input$Ne_expected_variants
 710 |   mu <- input$mu_expected_variants
 711 |   n <- input$n_expected_variants
 712 |   
 713 |   tibble::tibble(
 714 |     theta = 0:100 * theta(Ne, mu),
 715 |     n = rep(n, 101),
 716 |     Ek = purrr::map2_dbl(n, theta, function(a, b) {Ek(a, b)})
 717 |   ) %>%
 718 |     ggplot() +    
 719 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 720 |       geom_line(aes(theta, Ek), size = 1) +
 721 |       theme_bw() +
 722 |       xlab(latex2exp::TeX("θ")) +
 723 |       ylab(latex2exp::TeX("E(k)"))
 724 | })
 725 | ```
 726 | 
 727 | ## Diversity and Sample Size
 728 | 
 729 | This could be used to produce a **maximum likelihood estimate of $\theta$**, that is the value of $\theta$ that maximizes the chances of drawing the observed number of variants $k$ in a sample of the size $n$. 
 730 | 
 731 | Unfortunately it doesn't have an analytic solution, but we can still use it to estimate $\theta$ iteratively by changing its value until $E(k)$ equals the observed number of variants. 
 732 | 
 733 | The following applet allows to solve (9) for theta approximately. It starts at $\theta = 10$, changes it in steps of $0.1$ and stops when $|E(k) - k| < 0.5$.
 734 | 
 735 | We call the result of doing so $t_E$. Both $t_E$ and $t_F$ offer us special statistical summaries of assemblage diversity, but there are theoretical reasons to prefer $t_E$ if $n$ and $k$ (and not just $p$) are known. 
 736 | 
 737 | ## Diversity and Sample Size
 738 | 
 739 | ```{r echo=FALSE}
 740 | inputPanel(
 741 |   sliderInput("n_iterative_theta", label = "n",
 742 |                min = 10, max = 100, value = 20, step = 10),
 743 |   sliderInput("k_iterative_theta", label = "k",
 744 |                min = 5, max = 15, value = 10, step = 1),
 745 |   actionButton("run_iterative_theta", "Approximate theta")
 746 | )
 747 | 
 748 | iterative_theta_data <- eventReactive(
 749 |   input$run_iterative_theta, {
 750 |   
 751 |     # read input
 752 |     n <- input$n_iterative_theta
 753 |     k <- input$k_iterative_theta
 754 |     start_theta <- 10
 755 |     
 756 |     # iterative solving loop
 757 |     theta_iter <- start_theta
 758 |     while (TRUE) {
 759 |       diffi <- Ek(n, theta_iter) - k
 760 |       if (diffi < 0) {
 761 |         theta_iter <- theta_iter + 0.1
 762 |       } else if (diffi > 0) {
 763 |         theta_iter <- theta_iter - 0.1
 764 |       }
 765 |       if (abs(diffi) < 0.5) {
 766 |         break;
 767 |       }
 768 |     }
 769 |   
 770 |     theta_iter
 771 |   
 772 |   },
 773 |   ignoreNULL = FALSE
 774 | )
 775 | 
 776 | renderText({
 777 |   paste0("theta ≈ ", round(iterative_theta_data(), 2))
 778 | })
 779 | ```
 780 | 
 781 | ## Archaeological Sample Size
 782 | 
 783 | $t_E$ may yield wrong estimates of $\theta$ for very large archaeological samples that accumulated over long periods of time.
 784 | 
 785 | Equation (9) implies that for a given number of variants $k$, $t_E$ will decline as the sample size $n$ increases. Usually $n$ is limited by the population size $N$, but in case of a long-term sample this limit can be exceeded: $n$ can grow beyond $N$ and causes the estimate of $\theta$ to be too low. 
 786 | 
 787 | In this case $t_F$ can again be the better function to determine $\theta$. Another solution is to look at the difference between both estimators ($t_F - t_E$) as a function of assemblage size. For the application in archaeological contexts it's always important to carefully examine the relationships among $t_F$ and $t_E$ and possible sample size effects.
 788 | 
 789 | ## Inferences About Group Size and Innovation Rate
 790 | 
 791 | $\theta$ is defined as twice the effective population size times the innovation rate ($2 N_e \mu$). 
 792 | 
 793 | The innovation rate $\mu$ includes the combined effects of both in situ innovation $v$ and the introduction of novel variants from other groups $m$. If $\mu = v + m$, then $N_e \mu = N_e v + N_e m$. 
 794 | 
 795 | $v$ should be roughly constant across demes, but $m$ is likely to be more variable in time and space. Under these circumstances, most of the variation in $\theta$ associated with $\mu$ will be caused by variation in intergroup transmission rates. 
 796 | 
 797 | $\theta$ mostly depends on the number of times local group members learn from members of other groups: $N_e m$. That means that variation in $\theta$ is an indicator for the absolute amount of cultural transmission among demes in a geographical region. 
 798 | 
 799 | # Drift, Intergroup Transmission, and Interassemblage Distance
 800 | 
 801 | ## Dynamics of Stylistic Distance between Two Groups
 802 | 
 803 | To compare groups and track the cultural transmission we need a simple **measure of between-group similarity**. One possibility is to look at the **squared Euclidean distance $d_{ij}^2$**. It's the sum of squared differences in variant frequencies $p$ between two groups $i$ and $j$. 
 804 | 
 805 | $$d_{ij}^2 = \sum_{k = 1}^{n} (p_{ik} - p_{jk})^2$$
 806 | 
 807 | - $d_{ij}^2$: Squared Euclidean distance between two groups
 808 | - $n$: total amount of variants (here not sample size)
 809 | 
 810 | ## Dynamics of Stylistic Distance between Two Groups
 811 | 
 812 | ```{r echo=FALSE}
 813 | inputPanel(
 814 |   sliderInput("n_sed_general", label = "n",
 815 |                min = 10, max = 100, value = 50, step = 10),
 816 |   actionButton("run_sed_general", "Run simulation")
 817 | )
 818 | 
 819 | sed_general_data <- eventReactive(
 820 |   input$run_sed_general, {
 821 |   
 822 |     # read input
 823 |     n <- input$n_sed_general
 824 | 
 825 |     # calculate 
 826 |     tibble::tibble(
 827 |       amount_of_variants = 0:n,
 828 |       sed_min = sapply(amount_of_variants, function(x) {
 829 |         min(replicate(500, sed(runif(x), runif(x))))
 830 |       }),
 831 |       sed_max = sapply(amount_of_variants, function(x) {
 832 |         max(replicate(500, sed(runif(x), runif(x))))
 833 |       }),
 834 |       sed_mean = (sed_min + sed_max) / 2
 835 |     )
 836 |   
 837 |   },
 838 |   ignoreNULL = FALSE
 839 | )
 840 | 
 841 | renderPlot({
 842 |   sed_general_data() %>%
 843 |     ggplot() +    
 844 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 845 |       geom_ribbon(
 846 |         aes(x = amount_of_variants, ymin = sed_min, ymax = sed_max), 
 847 |         fill = "deepskyblue", alpha = 0.5
 848 |       ) +
 849 |       geom_line(aes(x = amount_of_variants, y = sed_mean), color = "black", size = 1) +
 850 |       theme_bw() +
 851 |       xlab(latex2exp::TeX("k")) +
 852 |       ylab(latex2exp::TeX("d_{ij}^2"))
 853 | })
 854 | ```
 855 | 
 856 | ## Dynamics of Stylistic Distance between Two Groups
 857 | 
 858 | The study of intergroup cultural transmission requires a model setup that contains the effect of drift but also exchange between groups. One promising approach comes from **migration matrix models**, which in population genetics have proven useful. 
 859 | 
 860 | Such models reveal what happens to **variant frequencies** in a **finite number of demes** that are subject to the joint effects of **drift**, whose strength is controlled by the effective size of each population, and **intergroup transmission**, occurring between demes at constant pairwise rates. 
 861 | 
 862 | There is **no** role in these models for **in situ innovation**, but they offer a means of checking the assumption that variation in $\theta$ is largely a function of intergroup transmission. 
 863 | 
 864 | <span style="color:red">Mechanics of the following simulation:  
 865 | This simulation is constructed like the very first one about the effects of drift. Only in this case, there is a set probability, the intergroup transmission rate $m_i$, that the individual contacted is derived from the other group. If $m_i = 0$, both groups act independently, but if it increases they tend to develop roughly alike.
 866 | </span>
 867 | 
 868 | <span style="color:red">
 869 | The line chart below the variant frequency area plot shows the distance measure $d_{ij}^2$ calculated for the two groups at the respective point in model time.
 870 | </span>
 871 | 
 872 | $m_i$: intergroup transmission rate
 873 | 
 874 | ## Dynamics of Stylistic Distance between Two Groups
 875 | 
 876 | ```{r echo=FALSE}
 877 | inputPanel(
 878 |   sliderInput("k_two_groups", label = "k",
 879 |                min = 5, max = 15, value = 10, step = 1),
 880 |   sliderInput("N_two_groups", label = "N",
 881 |                min = 10, max = 100, value = 20, step = 10),
 882 |   sliderInput("t_two_groups", label = "t",
 883 |                min = 50, max = 200, value = 100, step = 50),
 884 |   sliderInput("mi_two_groups", label = "mi",
 885 |                min = 0, max = 1, value = 0.1, step = 0.01),
 886 |   actionButton("run_button_two_groups", "Run simulation")
 887 | )
 888 | 
 889 | group_drift_simulation_data <- eventReactive(
 890 |   input$run_button_group_drift_simulation, {
 891 |     
 892 |     # run simulation (code in source file)
 893 |     group_drift_simulation(
 894 |       input$k_two_groups, 
 895 |       input$N_two_groups, 
 896 |       input$t_two_groups, 
 897 |       input$mi_two_groups
 898 |     )
 899 |     
 900 |   },
 901 |   ignoreNULL = FALSE
 902 | )
 903 | 
 904 | renderPlot({
 905 |   A <- group_drift_simulation_data() %>%
 906 |     ggplot() +
 907 |       geom_area(
 908 |         aes(x = time, y = individuals_with_variant, fill = variant, group = variant)
 909 |       ) +
 910 |       geom_line(
 911 |         aes(x = time, y = individuals_with_variant, group = variant), 
 912 |         position = "stack"
 913 |       ) +
 914 |       theme_bw() +
 915 |       xlab("t") +
 916 |       ylab("variants and their occurence in the population [%]") +
 917 |       facet_grid(group ~ .)
 918 |   
 919 |   B <- calculate_sed_for_group_drift_simulation_result(
 920 |     group_drift_simulation_data()
 921 |   ) %>%
 922 |     ggplot() +
 923 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 924 |       geom_hline(aes(yintercept = 2), color = "dodgerblue4") +
 925 |       geom_line(aes(x = t, y = sed), color = "black", size = 1, alpha = 0.3) +
 926 |       geom_point(aes(x = t, y = sed), color = "black", size = 1) +
 927 |       theme_bw()  +
 928 |       xlab(latex2exp::TeX("t")) +
 929 |       ylab(latex2exp::TeX("d_{ij}^2"))
 930 |   
 931 |   cowplot::plot_grid(A, B, nrow = 2, align = "v", axis = "lr")
 932 | })
 933 | ```
 934 | 
 935 | ## Dynamics of Stylistic Distance between Two Groups
 936 | 
 937 | We can run the above simulation many times to see different results random drift can produce. 
 938 | 
 939 | At the **beginning** the **distance between the groups is always zero**, because the variants and variant frequencies are equal in both. However, as time passes, the **distance increases up to a quasi-stationary equilibrium**. 
 940 | 
 941 | The equilibrium is only quasi-stationary because the groups are finite and there is no innovation: if the simulations are continued for a sufficient number of time periods, a single variant is fixed and as a result the between-group distance reverts to zero. 
 942 | 
 943 | The equilibrium holds over the period during which the opposing forces of drift, locally reducing variation within each group, and intergroup transmission, introducing potentially novel variation into each group from the other, are balanced. Later drift globally depletes all variation in both groups.
 944 | 
 945 | Higher levels of intergroup transmission $m_i$ will lower the equilibrium level, while lower levels of intergroup transmission raise it. Lower effective sizes $N$ for either or both groups will raise the equilibrium since the effects of drift scale with $N$. 
 946 | 
 947 | The speed with which the equilibrium is achieved depends heavily on the modes of transmission. Horizontal transmission can cause rather fast changes, while vertical and oblique transmission may take centuries. The equilibrium is also approached in case of initially divergent groups with different variants and variant frequencies if there is intergroup transmission among them ($m_i > 0$).
 948 | 
 949 | ## Dynamics of Stylistic Distance between Two Groups
 950 | 
 951 | ```{r echo=FALSE}
 952 | inputPanel(
 953 |   sliderInput("k_two_groups_many_runs", label = "k",
 954 |                min = 5, max = 15, value = 10, step = 1),
 955 |   sliderInput("N_two_groups_many_runs", label = "N",
 956 |                min = 10, max = 100, value = 20, step = 10),
 957 |   sliderInput("t_two_groups_many_runs", label = "t",
 958 |                min = 20, max = 200, value = 20, step = 20),
 959 |   sliderInput("mi_two_groups_many_runs", label = "mi",
 960 |                min = 0, max = 1, value = 0.1, step = 0.01),
 961 |   sliderInput("sim_runs_two_groups_many_runs", label = "Number of simulation runs",
 962 |                min = 1, max = 100, value = 20, step = 1),
 963 |   actionButton("run_button_two_groups_many_runs", "Run simulation")
 964 | )
 965 | 
 966 | two_groups_many_runs_data <- eventReactive(
 967 |   input$run_button_two_groups_many_runs, {
 968 |     
 969 |     # run the simulation many times and store results in res_list
 970 |     res_list <- list()
 971 |     for (i in 1:input$sim_runs_two_groups_many_runs) {
 972 |       res_list[[i]] <- calculate_sed_for_group_drift_simulation_result(
 973 |         group_drift_simulation(
 974 |           input$k_two_groups_many_runs, 
 975 |           input$N_two_groups_many_runs, 
 976 |           input$t_two_groups_many_runs, 
 977 |           input$mi_two_groups_many_runs
 978 |         ),
 979 |         sim_run = i
 980 |       ) 
 981 |     }
 982 |     
 983 |     # rbind individual simulation results into one data.frame
 984 |     do.call(rbind, res_list)
 985 |     
 986 |   },
 987 |   ignoreNULL = FALSE
 988 | )
 989 | 
 990 | renderPlot({
 991 |   two_groups_many_runs_data() %>%
 992 |     ggplot() +
 993 |       geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
 994 |       geom_hline(aes(yintercept = 2), color = "dodgerblue4") +
 995 |       geom_line(aes(x = t, y = sed, group = sim_run), color = "black", size = 1, alpha = 0.3) +
 996 |       geom_point(aes(x = t, y = sed), color = "black", size = 1) +
 997 |       theme_bw() +
 998 |       xlab(latex2exp::TeX("t")) +
 999 |       ylab(latex2exp::TeX("d_{ij}^2"))
1000 | })
1001 | ```
1002 | 
1003 | ## Stylistic Distances for Multiple Groups
1004 | 
1005 | The real world is better represented in terms of more than two groups learning from another. The multi-group case can be described in a matrix based model. The matrix $\mathbf{M}$ contains the intergroup transmission rates $m_{ij}$ that give the proportion of the $i$'th deme that learned from the $j$'th deme. The diagonal elements of $\mathbf{M}$ are the proportion of each deme that learned from its own members. The matrix $\mathbf{U}$ contains the reciprocals of the effective population sizes $1 / N_e$ of each group on its diagonal and $0$'s elsewhere. 
1006 | 
1007 | In this setup all groups initially have the same variant frequencies. This means that the expected frequencies for each group in any time period are identical and equal to the starting frequency. Drift-driven departures from this expectation for a given deme can be characterized analytically in terms of variance. Since there are multiple demes in the system, their joint evolution must be handled in terms of a matrix of variances and covariances: $\mathbf{V}$. Diagonal elements in $\mathbf{V}$ contain variances: the square of the departure of the variant frequency in the respective deme at time $t$ from the situation at the starting time. The other elements are covariances: the product of the departures of the variant frequencies in each pair of demes. 
1008 | 
1009 | $$\mathbf{V}^{(t)} = \sum_{r = 0}^{t - 1} \mathbf{M}^r \mathbf{U}(\mathbf{M}^r)^{\prime}$$
1010 | 
1011 | - $\mathbf{M}$: Matrix of intergroup transmission rates
1012 | - $\mathbf{V}^{(t)}$: Matrix of variances and covariances
1013 | - $r$: Index of the successive time periods
1014 | - $\mathbf{U}$ Matrix of reciprocals of the effective population sizes ($1 / N_e$). These are on the diagonal, the rest is $0$
1015 | - $(\mathbf{M}^r)^{\prime}$: M transposed
1016 | 
1017 | Equation (11) says that in the first time period, the covariance between two groups is a function of the sum of products of the intergroup transmission rates from all groups to those two groups and the reciprocal of their two effective population sizes. In later time periods, as individuals who learned from non-group members are in turn distributed among the other groups, the intergroup transmission rates from each group to *all* the others assumes increasing importance. The resulting cumulative effect of the indirect movement of variants among groups is handled by successively powering the intergroup transmission matrix.
1018 | 
1019 | The magnitude of the covariance between two groups will scale inversely with their sizes $N$ and positively with the intergroup transmission rates $m$. High levels of intergroup transmission will mean that whatever departures from the initial frequency occur, they will be similar and in the same direction, hence the covariance between the groups will be high. On the other hand, if either or both group sizes are low, causing drift to play a stronger role, variant frequencies are less likely to depart from initial frequencies in a similar fashion. The covariances will be low.
1020 | 
1021 | The matrix of variances and covariances $\mathbf{V}$ can be converted to a matrix of squared Euclidean distances $d_{ij}^{2}$ (or $\mathbf{SED}$).
1022 | 
1023 | $$d_{ij}^{2(t)} = v_{ii}^{(t)} + v_{jj}^{(t)} - 2 v_{ij}^{(t)}$$
1024 | 
1025 | - $v_{ii}^{(t)}$: Diagonal element of $\mathbf{V}^{(t)}$ -- a variance. The square of of the departure of the variant frequency in the $i$'th deme at time $t$ from its starting frequency when the demes were identical, standardized by the variance of that starting frequency
1026 | - $v_{ij}^{(t)}$: Covariance. The product of the departures of the variant frequencies in each pair of demes $i$ and $j$, again standardized by the variance of the starting frequency
1027 | 
1028 | ## Stylistic Distances for Multiple Groups
1029 | 
1030 | ```{r echo = FALSE}
1031 | inputPanel(
1032 |   sliderInput("Ne_matrix", label = "Ne",
1033 |              min = 10, max = 100, value = 20, step = 10),
1034 |   sliderInput("t_matrix", label = "t",
1035 |              min = 10, max = 100, value = 20, step = 10),
1036 |   actionButton("run_button_matrix_calculation", "Run matrix calculation")
1037 | )
1038 | 
1039 | matrix_calculation_data <- eventReactive(
1040 |   input$run_button_matrix_calculation, {
1041 |     Mk <- function(k, mi_3) {
1042 |       M <- matrix(data = rep(0, 5^2), nrow = 5, ncol = 5)
1043 |       #M[] <- 0.2
1044 |       M[1,] <- c(0.4, 0.0, 0.2, 0.0, 0.4)
1045 |       M[2,] <- c(0.0, 0.4, 0.2, 0.4, 0.0)
1046 |       M[3,] <- c(0.2, 0.2, 0.2, 0.2, 0.2)
1047 |       M[4,] <- c(0.2, 0.1, 0.3, 0.2, 0.2)
1048 |       M[5,] <- c(0.2, 0.3, 0.1, 0.2, 0.2)
1049 |       M
1050 |     }
1051 |     
1052 |     multi_group_matrizes(input$Ne_matrix, input$t_matrix, Mk)
1053 |   },
1054 |   ignoreNULL = FALSE
1055 | )
1056 | 
1057 | renderPlot({
1058 |   plotlist <- lapply(matrix_calculation_data(), function(x) {
1059 |     ggplot(x, aes(x = Var2, y = Var1)) + 
1060 |       geom_raster(aes(fill=value)) +
1061 |       geom_text(aes(x = Var2, y = Var1, label = round(value, 3)), color = "white", size = 5) +
1062 |       scale_y_reverse(sec.axis = dup_axis()) +
1063 |       scale_x_continuous(sec.axis = dup_axis()) +
1064 |       xlab("group") +
1065 |       ylab("group")
1066 |   })
1067 | 
1068 |   cowplot::plot_grid(
1069 |     plotlist = plotlist,
1070 |     labels = c("M", "M'", "U", "V", "SED", "MSD"), 
1071 |     nrow = 3, ncol = 2
1072 |   )
1073 | },
1074 | height = 1200
1075 | )
1076 | ``` 
1077 | 
1078 | # Woodland Interassemblage Distance
1079 | 
1080 | ## Trends in Interassemblage Distance
1081 | 
1082 | The migration matrix approach can be applied to cultural transmission contexts by computing a matrix of squared Euclidean distances $\mathbf{SED}$ between the groups for each of the variants. We expect to find that the overall level of differentiation among groups, as measured by the mean of the squared Euclidean distances $\mathbf{MSD}$, is the mirror image of our estimates of $\theta$.
1083 | 
1084 | ## Diversity and Distance for Individual Assemblages
1085 | 
1086 | The mean of the squared Euclidean distance $\bar{d}_{i}^{2}$ (or $MSD$) can be calculated with (13). 
1087 | 
1088 | $$\bar{d}_{i}^{2} = \sum_{j = 1}^{n} d_{ij}^{2} / (n - 1), i \neq j$$
1089 | 
1090 | ## Diversity and Distance for Individual Assemblages
1091 | 
1092 | ```{r echo = FALSE}
1093 | inputPanel(
1094 |   sliderInput("k_two_groups_many_runs_with_msd", label = "k",
1095 |                min = 15, max = 25, value = 20, step = 1),
1096 |   sliderInput("N_two_groups_many_runs_with_msd", label = "N",
1097 |                min = 10, max = 100, value = 20, step = 10),
1098 |   sliderInput("t_two_groups_many_runs_with_msd", label = "t",
1099 |                min = 20, max = 200, value = 20, step = 20),
1100 |   sliderInput("mi_two_groups_many_runs_with_msd", label = "mi",
1101 |                min = 0, max = 1, value = 0.1, step = 0.01),
1102 |   sliderInput("sim_runs_two_groups_many_runs_with_msd", label = "Number of simulation runs",
1103 |                min = 1, max = 100, value = 20, step = 1),
1104 |   actionButton("run_button_two_groups_many_runs_with_msd", "Run simulation")
1105 | )
1106 | 
1107 | two_groups_many_runs_with_msd_data <- eventReactive(
1108 |   input$run_button_two_groups_many_runs_with_msd, {
1109 |     
1110 |     # read input
1111 |     k <- input$k_two_groups_many_runs_with_msd 
1112 |     N <- input$N_two_groups_many_runs_with_msd 
1113 |     t_final <- input$t_two_groups_many_runs_with_msd
1114 |     mi <- input$mi_two_groups_many_runs_with_msd
1115 |     sim_runs <- input$sim_runs_two_groups_many_runs_with_msd
1116 |     
1117 |     mixed_list <- list()
1118 |     
1119 |     # run group drift simulation
1120 |     res_list <- list()
1121 |     for (i in 1:sim_runs) {
1122 |       res_list[[i]] <- calculate_sed_for_group_drift_simulation_result(
1123 |         group_drift_simulation(
1124 |           k, 
1125 |           N, 
1126 |           t_final, 
1127 |           mi
1128 |         ),
1129 |         sim_run = i
1130 |       ) 
1131 |     }
1132 |     
1133 |     mixed_list[[1]] <- do.call(rbind, res_list)
1134 |     
1135 |     # calculate matrix and mean squared distance
1136 |     Mk_msd <- function(k, mi) {
1137 |       M <- matrix(data = rep(0, 2^2), nrow = 2, ncol = 2)
1138 |       M[1,] <- c(1 - mi, mi)
1139 |       M[2,] <- c(mi, 1 - mi)
1140 |       M
1141 |     }
1142 |     
1143 |     msd <- c() 
1144 |     for (t_m in 0:t_final) {
1145 |       msd[t_m + 1] <- multi_group_matrizes(
1146 |         N, t_m, Mk_msd, mi
1147 |       )$longmosed$value[2]
1148 |     }
1149 |     
1150 |     mixed_list[[2]] <- data.frame(t = 0:t_final, sed = msd)
1151 |     
1152 |     mixed_list
1153 |   },
1154 |   ignoreNULL = FALSE
1155 | )
1156 | 
1157 | renderPlot({
1158 |   mixed_list <- two_groups_many_runs_with_msd_data()
1159 |   
1160 |   ggplot() +
1161 |     geom_hline(aes(yintercept = 0), color = "dodgerblue4") +
1162 |     geom_hline(aes(yintercept = 2), color = "dodgerblue4") +
1163 |     geom_line(
1164 |       data = mixed_list[[1]], 
1165 |       aes(x = t, y = sed, group = sim_run), 
1166 |       color = "black", size = 1, alpha = 0.3
1167 |     ) +
1168 |     geom_point(
1169 |       data = mixed_list[[1]],
1170 |       aes(x = t, y = sed), 
1171 |       color = "black", size = 1
1172 |     ) +
1173 |     geom_line(
1174 |       data = mixed_list[[2]], 
1175 |       aes(x = t, y = sed), 
1176 |       color = "dodgerblue4", size = 2
1177 |     ) +
1178 |     theme_bw() +
1179 |     xlab(latex2exp::TeX("t")) +
1180 |     ylab(latex2exp::TeX("d_{ij}^2"))
1181 | })
1182 | ```
1183 | 
1184 | ## References
1185 | 


--------------------------------------------------------------------------------