├── .Rbuildignore ├── .gitattributes ├── static_plots └── neiman_general.jpeg ├── presentation_version ├── styles_presentation.css ├── README.md └── neiman1995_presentation.Rmd ├── run_neiman_simulation.R ├── styles.css ├── neiman1995.Rproj ├── DESCRIPTION ├── README.md ├── .gitignore ├── document_code ├── squared_euclidian_distance.R ├── multi_group_matrix_calculation.R └── group_drift_simulation.R ├── references.bib ├── simulation_code ├── 200_neiman_simulation_plot_basic.R ├── 300_run_neiman_simulation.R └── 100_general_neiman_simulation.R ├── LICENSE ├── .travis.yml └── neiman1995.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * linguist-vendored 2 | R/* linguist-vendored=false 3 | -------------------------------------------------------------------------------- /static_plots/neiman_general.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nevrome/neiman1995/master/static_plots/neiman_general.jpeg -------------------------------------------------------------------------------- /presentation_version/styles_presentation.css: -------------------------------------------------------------------------------- 1 | h2 { 2 | font-size: 22px; 3 | letter-spacing: 0; 4 | margin-bottom: -50px; 5 | } 6 | -------------------------------------------------------------------------------- /presentation_version/README.md: -------------------------------------------------------------------------------- 1 | This presentation version is just an early draft, but could be fleshed out to become solid course material in the future. 2 | -------------------------------------------------------------------------------- /run_neiman_simulation.R: -------------------------------------------------------------------------------- 1 | library(magrittr) 2 | 3 | empty <- lapply( 4 | list.files("simulation_code", full.names = TRUE), 5 | function(y) { 6 | message("\n###### ", y, " ######\n") 7 | source(y) 8 | rm(list = ls()) 9 | } 10 | ) 11 | -------------------------------------------------------------------------------- /styles.css: -------------------------------------------------------------------------------- 1 | /*create an equation counter variable */ 2 | 3 | body { 4 | counter-reset: EQU_COUNT 0 ; 5 | } 6 | 7 | .MathJax_Display:before { 8 | content: "(" counter(EQU_COUNT) ")\00a0\00a0\00a0"; 9 | counter-increment: EQU_COUNT; 10 | } 11 | -------------------------------------------------------------------------------- /neiman1995.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: placeholder 2 | Title: Does not matter. 3 | Version: 0.0.1 4 | Imports: 5 | cowplot, 6 | dplyr, 7 | expm, 8 | ggplot2, 9 | knitr, 10 | latex2exp, 11 | magrittr, 12 | purrr, 13 | reshape2, 14 | rmarkdown, 15 | rsconnect, 16 | shiny, 17 | tibble, 18 | tidyr 19 | 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Travis-CI Build Status](https://travis-ci.org/nevrome/neiman1995.svg?branch=master)](https://travis-ci.org/nevrome/neiman1995) 2 | 3 | The [interactive document](https://rmarkdown.rstudio.com/authoring_shiny.html) [neiman1995.Rmd](https://nevrome.shinyapps.io/neiman1995) aims to visualize the model functions in 4 | 5 | Neiman, F. (1995). Stylistic Variation in Evolutionary Perspective: Inferences from Decorative Diversity and Interassemblage Distance in Illinois Woodland Ceramic Assemblages. American Antiquity, 60(1), 7-36. [doi:10.2307/282074](http://dx.doi.org/10.2307/282074) 6 | 7 | For study and teaching purposes. 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # Example code in package build process 9 | *-Ex.R 10 | 11 | # Output files from R CMD build 12 | /*.tar.gz 13 | 14 | # Output files from R CMD check 15 | /*.Rcheck/ 16 | 17 | # RStudio files 18 | .Rproj.user/ 19 | 20 | # produced vignettes 21 | vignettes/*.html 22 | vignettes/*.pdf 23 | 24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 25 | .httr-oauth 26 | 27 | # knitr and R markdown default cache directories 28 | /*_cache/ 29 | /cache/ 30 | 31 | # Temporary files created by R markdown 32 | *.utf8.md 33 | *.knit.md 34 | .Rproj.user 35 | 36 | # rendered documents 37 | *.html 38 | 39 | # rsconnect deployment files 40 | rsconnect 41 | 42 | -------------------------------------------------------------------------------- /document_code/squared_euclidian_distance.R: -------------------------------------------------------------------------------- 1 | sed <- function(pi, pj) { 2 | pi <- pi / sum(pi) 3 | pj <- pj / sum(pj) 4 | sum((pi - pj)^2) 5 | } 6 | 7 | calculate_sed_for_group_drift_simulation_result <- function(pop_devel_sum, sim_run = 1) { 8 | 9 | groups <- pop_devel_sum %>% 10 | base::split(., .$group) 11 | 12 | A <- groups$A %>% 13 | base::split(., .$time) 14 | B <- groups$B %>% 15 | base::split(., .$time) 16 | 17 | A <- lapply(A, function(x) {x$individuals_with_variant}) 18 | B <- lapply(B, function(x) {x$individuals_with_variant}) 19 | 20 | sed_res <- purrr::map2_dbl(A, B, function(a, b) {sed(a, b)}) 21 | 22 | tibble::tibble( 23 | t = unique(pop_devel_sum$time), 24 | sed = sed_res, 25 | sim_run = sim_run 26 | ) 27 | 28 | } 29 | -------------------------------------------------------------------------------- /references.bib: -------------------------------------------------------------------------------- 1 | @Article{neiman_stylistic_1995, 2 | title = {Stylistic {{Variation}} in {{Evolutionary Perspective}}: {{Inferences}} from {{Decorative Diversity}} and {{Interassemblage Distance}} in {{Illinois Woodland Ceramic Assemblages}}}, 3 | volume = {60}, 4 | issn = {0002-7316}, 5 | shorttitle = {Stylistic {{Variation}} in {{Evolutionary Perspective}}}, 6 | doi = {10.2307/282074}, 7 | number = {1}, 8 | journal = {American Antiquity}, 9 | author = {Fraser D. Neiman}, 10 | year = {1995}, 11 | pages = {7-36}, 12 | } 13 | @Article{dunnell1978style, 14 | title = {Style and {{Function}}: {{A Fundamental Dichotomy}}}, 15 | volume = {43}, 16 | doi = {10.2307/279244}, 17 | number = {2}, 18 | journal = {American Antiquity}, 19 | author = {Robert C. Dunnell}, 20 | year = {1978}, 21 | keywords = {Style transmission}, 22 | pages = {192-202}, 23 | } 24 | -------------------------------------------------------------------------------- /simulation_code/200_neiman_simulation_plot_basic.R: -------------------------------------------------------------------------------- 1 | #### specialized plot function #### 2 | 3 | plot_by_group <- function(x) { 4 | x %>% 5 | dplyr::filter( 6 | idea == 1 7 | ) %>% 8 | ggplot() + 9 | # geom_area(aes(x = timestep, y = proportion, fill = idea, group = idea)) + 10 | geom_line( 11 | aes(x = timestep, y = proportion, color = as.factor(model_id), group = model_id), 12 | size = 0.2 13 | ) + 14 | facet_wrap(~region, nrow = 8) + 15 | theme_bw() + 16 | theme( 17 | strip.background = element_blank(), 18 | strip.text.x = element_blank(), 19 | axis.title = element_blank(), 20 | axis.text.y = element_blank(), 21 | axis.text.x = element_text(size = 8, angle = 45, hjust = 1), 22 | axis.ticks.y = element_blank(), 23 | plot.margin = unit(c(1.4,0.2,0.2,0), "lines") 24 | ) + 25 | guides(color = FALSE) + 26 | scale_y_continuous( 27 | breaks = c(0, 0.5, 1), 28 | labels = c("0%", "50%", "100%") 29 | ) + 30 | scale_x_continuous( 31 | breaks = seq(0, 1400, 200), 32 | limits = c(0, 1400) 33 | ) + 34 | ggthemes::scale_colour_colorblind() 35 | } 36 | -------------------------------------------------------------------------------- /document_code/multi_group_matrix_calculation.R: -------------------------------------------------------------------------------- 1 | multi_group_matrizes <- function(Ne, t, Mk, mi_3) { 2 | 3 | k <- 1 4 | amount_groups <- nrow(Mk(k, mi_3)) 5 | 6 | group_matrix <- matrix(data = rep(0, amount_groups^2), nrow = amount_groups, ncol = amount_groups) 7 | 8 | M <- Mk 9 | 10 | U <- group_matrix 11 | diag(U) <- 1/Ne 12 | 13 | matrix_time_list <- list() 14 | 15 | for (pk in 0:t) { 16 | matrix_time_list[[pk + 1]] <- (M(k, mi_3) %^% pk) %*% U %*% t(M(k, mi_3) %^% pk) 17 | } 18 | 19 | V <- Reduce(`+`, matrix_time_list) 20 | 21 | # matrix of squared euclidian distance 22 | mosed <- group_matrix 23 | for (p1 in 1:nrow(V)) { 24 | for (p2 in 1:ncol(V)) { 25 | mosed[p1, p2] <- V[p1, p1] + V[p2, p2] - 2 * V[p1, p2] 26 | } 27 | } 28 | 29 | # mean squared distance of a group 30 | msd <- group_matrix 31 | for (p3 in 1:nrow(V)) { 32 | diag(msd)[p3] <- sum(mosed[p3,] / (amount_groups - 1)) 33 | } 34 | 35 | list( 36 | longM = reshape2::melt(M(k, mi_3)), 37 | longMt = reshape2::melt(t(M(k, mi_3))), 38 | longU = reshape2::melt(U), 39 | longV = reshape2::melt(V), 40 | longmosed = reshape2::melt(mosed), 41 | longmsd = reshape2::melt(msd) 42 | ) 43 | 44 | } 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r 2 | cache: packages 3 | r: 4 | - oldrel 5 | 6 | install: 7 | - R -e 'install.packages("devtools")' 8 | - R -e 'devtools::install_deps(dep = T)' 9 | 10 | script: 11 | - R -e 'shinyappsiotoken <- Sys.getenv("shinyappsiotoken"); shinyappsiosecret <- Sys.getenv("shinyappsiosecret"); rsconnect::setAccountInfo(name = "nevrome", token = shinyappsiotoken, secret = shinyappsiosecret)' 12 | - R -e 'rsconnect::deployApp(appPrimaryDoc = "neiman1995.Rmd", launch.browser = FALSE)' 13 | 14 | env: 15 | global: 16 | - secure: u4L57PJLejoMsJV3+uZvYv34s4IeS/wMTtBVh58qCPS2LYEIvMVpvZJK1nMiplHmr0b4yMsphJtLjiu/ZJcLdIAp3j4HiXRDq+zHxfn1UfLWAWxrx04lCq3ozsvZmziOw3wqaafm6WvYAE6P/hnidTBvlUgCYxpgkofoMmO1kqQiw29BhG6lLT6MvB7fVx+m7GN+VjxeRWAyjExe0RTWVsFQe/gggcUm1PHZ2YwhGEBrB/PZOvpktAV0Ipqf+tx0xEYB1zYvCmqjnYN6+nescVBTFZPgZCQa8VVvDaoHHB3O8PXf1xKOA2ydWJuV4P/miJD4KdYPFNC50UHfa+n73AiDUiNVc7x4iO/BwPuLbQN1VHPHTvzmb6FKVuRlCo+cfRCl2CKLZ19mAeVyOQuDBb+OmLO84S/2g/UAVeZl8Sf+qS5JlVasK8xE8r6Z+v+pfGsxRvQMzWsL5bNkF+txgQ7D4MqHTEl6Gyhamu6JoAZx19JuPRvv3/4jFUnuJOc8/6JJ/sKcGAfLeNidsXTrCF+Xv0H14Bh8zxRwu3JefZzlpPjMMkEbka5egLBCwLEQuEghNeQI4DHGeg2OrDCrqBR27ICdW8pB74aGULTOkjiiem3df41usbkCiTcze33OViVCiHv/3/UvC82lXsXQVjhrX1jPx7JMvhF/9XUCnOg= 17 | - secure: iyYmM6Iu8SyMay2deCtZlfIviSxNMPmSinUcID7sqjhQGqk6TzH/YMJMG5qGn9a368fp+yKZ8fVgri9foWz7a0dtZyPmLfDlMG7+w/e1Mw2FwyNch62TTDJAK3Wk3bnNrg1Osbilzlvk2pTgYI93eiBVTWPC5rfamz+f3m55KkUAkbFgvAFJj4+zHdc446X62pzWrQQtV2iVjWL4waY1/5IlpXCg2CkoYLRrEKkx/QicRKI6IiDIF/7qeh4U5mN/JIP5IGPnQCOzvCp0L3u6yORZzLH3Gu4qwhsKv91PcQkD6ycdWIXBtBvsl9PiHoKPXBmGUbXebYVz8aDorJcnBSc4SLUWT9EnmTXzddPs/Jx8NXzxjNAkEgW0QWng+zuGZSpg/6DqAoc52fth5tpAuP5RgHXEdK7Ryq/fr5BqoRsYXHr6YKObBGBT6nwEfmbJ0vNmo793R5BWjlUYZbTCFTB+rLUZdy5jFqJw2MMY2Aah8R5Ee5Hqi/OEHYwnuyiyaPEMMnRBmzeDD5bQptFcieUBSKTp50XsV/+KthdTQ8H4UOnvNN18orkUxdKTGgD7By7Z9CPGQX4+hBoQj/C3B+B7y//IsnxOhdHYQOKQBwyI9w5SC/6MZgkw44LMsyaUn3VGQzxcVn9vY4EAqvCNmx2Pp0EzjMzROIcdC5oPxxo= 18 | -------------------------------------------------------------------------------- /document_code/group_drift_simulation.R: -------------------------------------------------------------------------------- 1 | group_drift_simulation <- function(k, N, t_final, mi) { 2 | 3 | population <- 1:N 4 | variants <- 1:k 5 | timesteps <- 2:t_final 6 | 7 | popA0 <- tibble::tibble( 8 | time = as.integer(0), 9 | individual = 1:N, 10 | variant = rep_len(1:k, N), 11 | group = "A" 12 | ) 13 | popB0 <- tibble::tibble( 14 | time = as.integer(0), 15 | individual = 1:N, 16 | variant = rep_len(1:k, N), 17 | group = "B" 18 | ) 19 | 20 | popA_devel <- list() 21 | popB_devel <- list() 22 | popA_devel[[1]] <- popA0 23 | popB_devel[[1]] <- popB0 24 | 25 | for (p1 in timesteps) { 26 | popA_new <- popA_devel[[p1 - 1]] 27 | popB_new <- popB_devel[[p1 - 1]] 28 | 29 | popA_new$time <- p1 - 1 30 | popB_new$time <- p1 - 1 31 | 32 | exchange_here <- sample( 33 | c(TRUE, FALSE), 34 | length(popA_new$variant), 35 | prob = c(mi, 1 - mi), 36 | replace = T 37 | ) 38 | 39 | popA_old_variant <- popA_new$variant 40 | # in group A 41 | popA_new$variant <- sample(popA_new$variant, length(popA_new$variant), replace = T) 42 | # ex group A 43 | popA_new$variant[exchange_here] <- sample(popB_new$variant, sum(exchange_here)) 44 | # in group B 45 | popB_new$variant <- sample(popB_new$variant, length(popB_new$variant), replace = T) 46 | # ex group B 47 | popB_new$variant[exchange_here] <- sample(popA_old_variant, sum(exchange_here)) 48 | 49 | popA_devel[[p1]] <- popA_new 50 | popB_devel[[p1]] <- popB_new 51 | } 52 | 53 | pop_devel <- append(popA_devel, popB_devel) 54 | 55 | pop_devel_df <- do.call(rbind, pop_devel) 56 | 57 | pop_devel_sum <- pop_devel_df %>% 58 | dplyr::group_by( 59 | time, variant, group 60 | ) %>% 61 | dplyr::summarise( 62 | individuals_with_variant = n() 63 | ) %>% 64 | dplyr::ungroup() %>% 65 | # that's just to fil gaps in the area plot 66 | tidyr::complete( 67 | time, 68 | variant, 69 | group, 70 | fill = list(individuals_with_variant = as.integer(0)) 71 | ) 72 | 73 | return(pop_devel_sum) 74 | } 75 | -------------------------------------------------------------------------------- /simulation_code/300_run_neiman_simulation.R: -------------------------------------------------------------------------------- 1 | #### setup settings grid #### 2 | 3 | config_matrix <- expand.grid( 4 | k = 2, 5 | N_g = c(10, 50, 200), 6 | t_final = 1400, 7 | mu = 0, 8 | g = 8, 9 | mi = c(0, 0.01, 0.1, 0.5 , 1), 10 | I = NA 11 | ) %>% 12 | tibble::as.tibble() %>% 13 | dplyr::mutate( 14 | model_group = 1:nrow(.) 15 | ) %>% 16 | tidyr::uncount(8) %>% 17 | dplyr::mutate( 18 | model_id = 1:nrow(.) 19 | ) 20 | 21 | 22 | 23 | #### run simulation #### 24 | 25 | models <- pbapply::pblapply( 26 | 1:nrow(config_matrix), 27 | function(i, config_matrix) { 28 | neiman_simulation( 29 | config_matrix$k[i], 30 | config_matrix$N_g[i], 31 | config_matrix$t_final[i], 32 | config_matrix$mu[i], 33 | config_matrix$g[i], 34 | config_matrix$mi[i], 35 | config_matrix$I[i] 36 | ) %>% standardize_neiman_output %>% 37 | dplyr::mutate( 38 | model_id = config_matrix$model_id[i], 39 | model_group = config_matrix$model_group[i], 40 | region_population_size = config_matrix$N_g[i], 41 | degree_interregion_interaction = config_matrix$mi[i] 42 | ) 43 | }, 44 | config_matrix, 45 | cl = 2 46 | ) 47 | 48 | models_groups <- do.call(rbind, models) %>% 49 | base::split(.$model_group) 50 | 51 | #### create plots #### 52 | 53 | library(ggplot2) 54 | complete_plot <- cowplot::plot_grid( 55 | plotlist = lapply(models_groups, plot_by_group) %>% 56 | matrix(., 3, 5) %>% t %>% c(), 57 | labels = sapply( 58 | models_groups, function(x) { 59 | rps <- x$region_population_size[1] 60 | cui <- x$degree_interregion_interaction[1] 61 | paste0(LETTERS[x$model_group[1]], " - ", rps, ", ", cui) 62 | } 63 | ) %>% 64 | matrix(., 3, 5) %>% t %>% c(), 65 | label_x = 0, 66 | hjust = 0, 67 | label_size = 10, 68 | ncol = 5, 69 | nrow = 3, 70 | align = "hv" 71 | ) 72 | 73 | complete_plot %>% 74 | ggsave( 75 | "static_plots/neiman_general.jpeg", 76 | plot = ., 77 | device = "jpeg", 78 | scale = 1, 79 | dpi = 300, 80 | width = 210, height = 297, units = "mm", 81 | limitsize = F 82 | ) 83 | -------------------------------------------------------------------------------- /simulation_code/100_general_neiman_simulation.R: -------------------------------------------------------------------------------- 1 | #### simulation function #### 2 | 3 | #' neiman_simulation 4 | #' 5 | #' @param k Integer. Number of ideas at t = 0 6 | #' @param N_g Integer. Population per region 7 | #' @param t_final Integer. Final timestep 8 | #' @param mu Double. Innovation rate 9 | #' @param g Integer. Number of regions 10 | #' @param mi Double. Degree of interregion interaction 11 | #' @param I Doublematrix. Interregion interaction matrix. NA means equal interaction 12 | #' 13 | neiman_simulation <- function(k, N_g, t_final, mu, g, mi, I) { 14 | 15 | # define variables 16 | regions <- 1:g 17 | population <- 1:N_g 18 | ideas <- 1:k 19 | timesteps <- 2:t_final 20 | if (is.na(I)) { 21 | I <- matrix( 22 | rep(1, g*g), g, g 23 | ) 24 | diag(I) <- 0 25 | } 26 | 27 | # create starting populations 28 | pop0 <- lapply( 29 | regions, function(region, N, k) { 30 | tibble::tibble( 31 | timestep = as.integer(0), 32 | individual = population, 33 | idea = rep_len(ideas, max(population)), 34 | region = region 35 | ) 36 | }, 37 | k, population 38 | ) 39 | 40 | # create development list 41 | pop_devel <- list() 42 | pop_devel[[1]] <- pop0 43 | 44 | # determine number of ideas 45 | last_idea <- max(do.call(rbind, pop_devel[[1]])$idea) 46 | 47 | # simulation loop 48 | for (p1 in timesteps) { 49 | 50 | # new timestep list 51 | pop_old <- pop_devel[[p1 - 1]] 52 | pop_new <- pop_old 53 | 54 | # adjust time in new timestep list 55 | pop_new <- lapply( 56 | pop_new, function(x, p1) { 57 | x$timestep <- p1 - 1 58 | return(x) 59 | }, 60 | p1 61 | ) 62 | 63 | # intraregion learning 64 | pop_new <- lapply( 65 | pop_new, function(x) { 66 | x$idea <- sample(x$idea, length(x$idea), replace = T) 67 | return(x) 68 | } 69 | ) 70 | 71 | # interregion learning 72 | pop_new <- lapply( 73 | regions, function(i, pop_new, pop_old, mi, I, regions) { 74 | exchange_where <- which(sample(c(TRUE, FALSE), nrow(pop_new[[i]]), prob = c(mi, 1 - mi), replace = T)) 75 | exchange_with <- sample(regions, length(exchange_where), prob = I[,i], replace = T) 76 | pop_new[[i]]$idea[exchange_where] <- unlist(sapply( 77 | seq_along(exchange_where), 78 | function(j, pop_old, exchange_with, exchange_where) { 79 | v <- pop_old[[exchange_with[j]]]$idea 80 | return(v[exchange_where[j]]) 81 | }, 82 | pop_old, exchange_with, exchange_where 83 | )) 84 | return(pop_new[[i]]) 85 | }, 86 | pop_new, pop_old, mi, I, regions 87 | ) 88 | 89 | # innovation 90 | if(mu != 0) { 91 | for (i in seq_along(regions)) { 92 | innovate_where <- which(sample(c(TRUE, FALSE), nrow(pop_new[[i]]), prob = c(mu, 1 - mu), replace = T)) 93 | new_ideas <- seq(last_idea + 1, last_idea + length(innovate_where)) 94 | last_idea <- last_idea + length(innovate_where) 95 | pop_new[[i]]$idea[innovate_where] <- new_ideas 96 | } 97 | } 98 | 99 | pop_devel[[p1]] <- pop_new 100 | } 101 | 102 | # transform to data.frame 103 | pop_devel_time_dfs <- lapply( 104 | pop_devel, function(x) { 105 | do.call(rbind, x) 106 | } 107 | ) 108 | pop_devel_df <- do.call(rbind, pop_devel_time_dfs) 109 | 110 | return(pop_devel_df) 111 | } 112 | 113 | 114 | 115 | #### output preparation #### 116 | 117 | standardize_neiman_output <- function(x) { 118 | 119 | x %>% 120 | dplyr::group_by( 121 | timestep, idea, region 122 | ) %>% 123 | dplyr::summarise( 124 | number = n() 125 | ) %>% 126 | dplyr::ungroup() %>% 127 | # calculate proportion 128 | dplyr::group_by( 129 | timestep, region 130 | ) %>% 131 | dplyr::mutate( 132 | proportion = number/sum(number) 133 | ) %>% 134 | dplyr::ungroup() %>% 135 | # that's just to fill gaps in the area plot 136 | tidyr::complete( 137 | timestep, 138 | idea, 139 | region, 140 | fill = list(number = as.integer(0), proportion = as.double(0)) 141 | ) %>% 142 | dplyr::select( 143 | region, timestep, idea, proportion 144 | ) 145 | 146 | } 147 | -------------------------------------------------------------------------------- /neiman1995.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Visualizing Neimans 'Stylistic variation in evolutionary perspective'" 3 | author: "Clemens Schmid" 4 | date: "April 2018" 5 | output: 6 | html_document: 7 | css: styles.css 8 | toc: true 9 | toc_depth: 4 10 | runtime: shiny 11 | bibliography: references.bib 12 | link-citations: true 13 | --- 14 | 15 | ```{r include=FALSE} 16 | # knitr chunk settings 17 | knitr::opts_chunk$set(echo = TRUE) 18 | ``` 19 | 20 | **Disclaimer** 21 | 22 | *This document visualizes the simulations and the general model functions in Fraser D. Neimans article Stylistic Variation in Evolutionary Perspective: Inferences from Decorative Diversity and Interassemblage Distance in Illinois Woodland Ceramic Assemblages [@neiman_stylistic_1995].* 23 | 24 | *It mirrors the structure of the article and should be read in conjunction with it. It can be used to deepen the understanding of the complex variable relations, but it doesn't attempt to retell the story: the case study about woodland pottery isn't relevant here. The small descriptive texts are compiled from the article and also contain sentences copied verbatimly. I focused on the segments and explanations I considered most important for understanding the models and parameters. This document does not attempt to reach the threshold of orginality but is meant as a mere tool for learning and teaching.* 25 | 26 | ```{r echo = FALSE} 27 | # load the packages, that are inconvinient to reference directly via namespace:: 28 | library(ggplot2) 29 | library(magrittr) 30 | suppressMessages(library(expm)) 31 | # load the functions in the source files 32 | purrr::walk(list.files("./document_code", full.names = T), function(x) {source(x)}) 33 | ``` 34 | 35 | ### Drift, Innovation and Diversity 36 | 37 | #### Temporal Dynamics of Drift *p.10* 38 | 39 | The first simulation shows the effect of drift in a population of $N$ individuals with $k$ variants over $t$ time steps. Drift is sampling error that accompanies all forms of cultural transmission and causes individual variants to vanish or dominate randomly. In the end one variant usually eradicates all the others. The speed with which the variation is destroyed increases as the population size decreases. 40 | 41 | Mechanics of the simulation: In each time step every individual randomly adopts a variant from somebody else. The probability of getting somebody else's variant is $(N - 1) / N$ while the probability of keeping the own variant is only $1 / N$. 42 | 43 | - $k$: Amount of variants in a population 44 | - $N_e$: Size of the effective population. The effective population consists of the individuals within a population that is actively involved in a cultural transmission process 45 | - $t$: Time / number of iterations 46 | 47 | ```{r echo=FALSE} 48 | inputPanel( 49 | sliderInput("k_drift_simulation", label = "k", 50 | min = 5, max = 15, value = 10, step = 1), 51 | sliderInput("Ne_drift_simulation", label = "Ne", 52 | min = 10, max = 100, value = 20, step = 10), 53 | sliderInput("t_drift_simulation", label = "t", 54 | min = 50, max = 200, value = 100, step = 50), 55 | actionButton("run_button_drift_simulation", "Run simulation") 56 | ) 57 | 58 | drift_simulation_data <- eventReactive( 59 | input$run_button_drift_simulation, { 60 | 61 | # read input 62 | k <- input$k_drift_simulation 63 | N <- input$Ne_drift_simulation 64 | time <- input$t_drift_simulation 65 | 66 | # calculate population parameters 67 | population <- 1:N 68 | variants <- 1:k 69 | timesteps <- 2:time 70 | 71 | # create initial population 72 | pop0 <- tibble::tibble( 73 | time = as.integer(0), 74 | individual = 1:N, 75 | variant = rep_len(1:k, N) 76 | ) 77 | 78 | # list to store population stages over time 79 | pop_devel <- list() 80 | pop_devel[[1]] <- pop0 81 | 82 | # simulation loop 83 | for (p1 in timesteps) { 84 | pop_new <- pop_devel[[p1 - 1]] 85 | pop_new$time <- p1 - 1 86 | pop_new$variant <- sample(pop_new$variant, length(pop_new$variant), replace = T) 87 | pop_devel[[p1]] <- pop_new 88 | } 89 | 90 | # bind individual population stages into data.frame 91 | pop_devel_df <- do.call(rbind, pop_devel) 92 | 93 | # calculate number of individuals per timestep and variant 94 | pop_devel_sum <- pop_devel_df %>% 95 | dplyr::group_by( 96 | time, variant 97 | ) %>% 98 | dplyr::summarise( 99 | individuals_with_variant = n() 100 | ) %>% 101 | dplyr::ungroup() %>% 102 | # complete (expand.grid) to fill gaps in the area plot 103 | tidyr::complete( 104 | time, 105 | variant, 106 | fill = list(individuals_with_variant = as.integer(0)) 107 | ) 108 | 109 | pop_devel_sum 110 | }, 111 | ignoreNULL = FALSE 112 | ) 113 | 114 | renderPlot({ 115 | 116 | drift_simulation_data() %>% 117 | ggplot() + 118 | geom_area(aes(x = time, y = individuals_with_variant, fill = variant, group = variant)) + 119 | geom_line(aes(x = time, y = individuals_with_variant, group = variant), position = "stack") + 120 | theme_bw() + 121 | xlab(expression(paste("t"))) + 122 | ylab("variants and their occurence in the population [%]") 123 | 124 | }) 125 | ``` 126 | 127 | #### Homogeneity Under Drift *p.10-12* **(1)** 128 | 129 | The theory of neutral alleles allows to describe the within-population homogeneity $F$ as a function of the effective population size $N_e$. $F$ is the probability that two randomly chosen individuals in the population carry variants that are copies of a common antecedent variant. In a given time period the probability of drawing an individual, who learned from the same model as some other randomly selected individual in the previous time period, is $1 / N$. The probability of the opposite is $1 - (1 / N)$. $F_{t - 1}$ is the probability that the model of the second individual learned from the same model as the model of the first individual in an earlier time step. $F_t$ can be calculated by taking all this probabilities into consideration. 130 | 131 | $$F_t = \frac{1}{N_e} + \left(1 - \frac{1}{N_e} \right) F_{t-1}$$ 132 | 133 | As an effect of drift, $F_t$ approaches one as $t$ increases. 134 | 135 | - $F_t$: Within-population homogeneity. That's the probability that two randomly chosen individuals in the population carry variants that are copies of a common antecedent variant at a certain time step 136 | - $F_{t - 1}$: Probability that the model of the second individual learned from the same model as the model of the first individual in an earlier time step 137 | 138 | ```{r echo=FALSE} 139 | Ft <- function(F0, Ne, time) { 140 | if(time == 0) { return(F0) } 141 | 1/Ne + (1 - 1/Ne) * Ft(F0, Ne, time - 1) 142 | } 143 | 144 | inputPanel( 145 | sliderInput("F0_homogeneity_drift", label = "F0 (Ft for t == 0)", 146 | min = 0, max = 1, value = 0.5, step = 0.05), 147 | sliderInput("Ne_homogeneity_drift", label = "Ne", 148 | min = 10, max = 100, value = 20, step = 10), 149 | sliderInput("t_homogeneity_drift", label = "t", 150 | min = 50, max = 200, value = 100, step = 50) 151 | ) 152 | 153 | renderPlot({ 154 | 155 | # read input 156 | F0 <- input$F0_homogeneity_drift 157 | Ne <- input$Ne_homogeneity_drift 158 | time <- input$t_homogeneity_drift 159 | 160 | timesteps <- 0:time 161 | 162 | # apply function for all timesteps 163 | Ft_time <- sapply(timesteps, function(x) { Ft(F0, Ne, x) }) 164 | 165 | data.frame( 166 | t = timesteps, 167 | Ft = Ft_time 168 | ) %>% 169 | ggplot() + 170 | geom_hline(aes(yintercept = 1), color = "dodgerblue4") + 171 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 172 | geom_line(aes(t, Ft), size = 1) + 173 | theme_bw() + 174 | xlab(latex2exp::TeX("t")) + 175 | ylab(latex2exp::TeX("F_{t}")) 176 | }) 177 | ``` 178 | 179 | #### Temporal Dynamics of Drift and Innovation *p.12-14* 180 | 181 | The following simulation is built upon the first simple drift simulation. Here the concept of innovation is added: Individuals can not just inherit but also create new variants with the probability $μ$. In this context $k$ is just the initial number of variants at $t = 0$. The effect of drift is still strongly visible, but with increasing $μ$ it becomes less relevant. New variants can form and replace the dominant one. 182 | 183 | Mechanics of the simulation: The simulation works like the first one, but now there's a chance of $μ$ for every individual in every time step to create a new variant. 184 | 185 | - $μ$: Innovation rate. Probability of the creation of a new variant 186 | 187 | ```{r echo=FALSE} 188 | inputPanel( 189 | sliderInput("k_drift_simulation_with_innovation", label = "k for t == 0", 190 | min = 5, max = 15, value = 10, step = 1), 191 | sliderInput("Ne_drift_simulation_with_innovation", label = "Ne", 192 | min = 10, max = 100, value = 20, step = 10), 193 | sliderInput("t_drift_simulation_with_innovation", label = "t", 194 | min = 50, max = 200, value = 100, step = 50), 195 | sliderInput("mu_drift_simulation_with_innovation", label = "μ", 196 | min = 0, max = 0.1, value = 0.01, step = 0.01), 197 | actionButton("run_button_drift_simulation_with_innovation", "Run simulation") 198 | ) 199 | 200 | drift_simulation_with_innovation_data <- eventReactive( 201 | input$run_button_drift_simulation_with_innovation, { 202 | 203 | # read input 204 | k <- input$k_drift_simulation_with_innovation 205 | N <- input$Ne_drift_simulation_with_innovation 206 | time <- input$t_drift_simulation_with_innovation 207 | mu <- input$mu_drift_simulation_with_innovation 208 | 209 | # prepare population parameters 210 | population <- 1:N 211 | variants <- 1:k 212 | timesteps <- 2:time 213 | 214 | # create starting population 215 | pop0 <- tibble::tibble( 216 | time = as.integer(0), 217 | individual = 1:N, 218 | variant = rep_len(1:k, N) 219 | ) 220 | 221 | # list to store population stages over time 222 | pop_devel <- list() 223 | pop_devel[[1]] <- pop0 224 | 225 | # simulation loop 226 | last_variant <- max(pop_devel[[1]]$variant) 227 | for (p1 in timesteps) { 228 | pop_new <- pop_devel[[p1 - 1]] 229 | pop_new$time <- p1 - 1 230 | pop_new$variant <- sample(pop_new$variant, length(pop_new$variant), replace = T) 231 | 232 | # innovation 233 | innovate_here <- sample( 234 | c(TRUE, FALSE), 235 | length(pop_new$variant), 236 | prob = c(mu, 1 - mu), 237 | replace = T 238 | ) 239 | new_variants <- seq(last_variant + 1, last_variant + sum(innovate_here)) 240 | last_variant <- last_variant + sum(innovate_here) 241 | pop_new$variant[innovate_here] <- new_variants 242 | 243 | pop_devel[[p1]] <- pop_new 244 | } 245 | 246 | # bind individual population stages into data.frame 247 | pop_devel_df <- do.call(rbind, pop_devel) 248 | 249 | # calculate number of individuals per timestep and variant 250 | pop_devel_sum <- pop_devel_df %>% 251 | dplyr::group_by( 252 | time, variant 253 | ) %>% 254 | dplyr::summarise( 255 | individuals_with_variant = n() 256 | ) %>% 257 | dplyr::ungroup() %>% 258 | tidyr::complete( 259 | time, 260 | variant, 261 | fill = list(individuals_with_variant = as.integer(0)) 262 | ) 263 | 264 | pop_devel_sum 265 | 266 | }, 267 | ignoreNULL = FALSE 268 | ) 269 | 270 | renderPlot({ 271 | drift_simulation_with_innovation_data() %>% 272 | ggplot() + 273 | geom_area(aes(x = time, y = individuals_with_variant, fill = variant, group = variant)) + 274 | geom_line(aes(x = time, y = individuals_with_variant, group = variant), position = "stack") + 275 | theme_bw() + 276 | xlab("t") + 277 | ylab("variants and their occurence in the population [%]") 278 | }) 279 | ``` 280 | 281 | #### Homogeneity Under Drift and Innovation *p.14* **(2)** **(3)** **(4)** **(5)** 282 | 283 | Drift and innovation are opposing forces: drift increases homogeneity and innovation decreases it. This causes an equilibrium after some time steps in the simulation. We can incorporate innovation into the calculation of $F$ by modifying equation (1). If innovation is possible, $F$ is not just defined as learning from yourself or learning from somebody else: there's also the possibility to create a new variant. 284 | 285 | $$F_t = \left(\frac{1}{N_e} + \left(1 - \frac{1}{N_e} \right) F_{t-1}\right)(1 - μ)^2$$ 286 | 287 | ```{r echo=FALSE} 288 | Ft_innovation <- function(F0, Ne, time, mu) { 289 | if(time == 0) {return(F0)} 290 | (1/Ne + (1 - 1/Ne) * Ft_innovation(F0, Ne, time - 1, mu)) * (1 - mu)^2 291 | } 292 | 293 | inputPanel( 294 | sliderInput("F0_homogeneity_drift_with_innovation", label = "F0 (Ft for t == 0)", 295 | min = 0, max = 1, value = 0.5, step = 0.05), 296 | sliderInput("Ne_homogeneity_drift_with_innovation", label = "Ne", 297 | min = 10, max = 100, value = 20, step = 10), 298 | sliderInput("t_homogeneity_drift_with_innovation", label = "t", 299 | min = 50, max = 200, value = 100, step = 50), 300 | sliderInput("mu_homogeneity_drift_with_innovation", label = "μ", 301 | min = 0, max = 1, value = 0.1, step = 0.1) 302 | ) 303 | 304 | renderPlot({ 305 | 306 | # read input 307 | F0 <- input$F0_homogeneity_drift_with_innovation 308 | Ne <- input$Ne_homogeneity_drift_with_innovation 309 | mu <- input$mu_homogeneity_drift_with_innovation 310 | time <- input$t_homogeneity_drift_with_innovation 311 | 312 | timesteps <- 0:time 313 | 314 | # apply function for every timestep 315 | Ft_innovation_time <- sapply(timesteps, function(x) {Ft_innovation(F0, Ne, x, mu)}) 316 | 317 | data.frame( 318 | t = timesteps, 319 | Ft = Ft_innovation_time 320 | ) %>% 321 | ggplot() + 322 | geom_hline(aes(yintercept = 1), color = "dodgerblue4") + 323 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 324 | geom_line(aes(t, Ft), size = 1) + 325 | theme_bw() + 326 | xlab(latex2exp::TeX("t")) + 327 | ylab(latex2exp::TeX("F_{t}")) 328 | }) 329 | ``` 330 | 331 | The equilibrium is reached, when $F_t = F_{t-1}$. Setting $F_t = F_{t-1}$ in equation (2) defines $\hat{F}$. 332 | 333 | $$\hat{F} = \frac{(1 - μ)^2}{N_e} - (N_e - 1)(1 - μ)^2$$ 334 | 335 | If we assume $μ$ is quite small we can further simplify this definition. 336 | 337 | $$\hat{F} \simeq \frac{1}{2 N_e μ + 1}$$ 338 | 339 | That means the homogeneity of neutral variants within a population is inversely proportional to twice the effective population size times the innovation rate: $2*N_e*μ$. This expression will be called $θ$. 340 | 341 | - $θ$: Twice the effective population size times the innovation rate ($2 N_e μ$) 342 | 343 | ```{r echo=FALSE} 344 | theta <- function(Ne, mu) { 345 | (2 * Ne * mu) 346 | } 347 | 348 | Fhat <- function(theta) { 349 | 1 / (theta + 1) 350 | } 351 | 352 | inputPanel( 353 | sliderInput("Ne_homogeneity_equilibrium", label = "Ne", 354 | min = 10, max = 100, value = 20, step = 10), 355 | sliderInput("mu_homogeneity_equilibrium", label = "μ", 356 | min = 0, max = 1, value = 0.1, step = 0.1) 357 | ) 358 | 359 | renderText({ 360 | paste0("1 * theta = ", theta(input$Ne_homogeneity_equilibrium, input$mu_homogeneity_equilibrium)) 361 | }) 362 | 363 | renderPlot({ 364 | Ne <- input$Ne_homogeneity_equilibrium 365 | mu <- input$mu_homogeneity_equilibrium 366 | 367 | tibble::tibble( 368 | theta = 0:100 * theta(Ne, mu), 369 | Fhat = sapply(theta, function(x) {Fhat(x)}) 370 | ) %>% 371 | ggplot() + 372 | geom_hline(aes(yintercept = 1), color = "dodgerblue4") + 373 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 374 | geom_line(aes(theta, Fhat), size = 1) + 375 | theme_bw() + 376 | xlab(latex2exp::TeX("θ")) + 377 | ylab(latex2exp::TeX("\\hat{F}")) 378 | }) 379 | ``` 380 | 381 | The homogeneity $\hat{F}$ of a population can also be described as a function of the relative frequency of the variants in it: $p$. $p_i$ is the relative frequency of the $i$'th variant in the population, so $p$ is a vector of frequencies with the sum one. The probability of choosing a given variant at random is its relative frequency $p_i$, which is also the probability of picking another copy of this same variant on the second try. The probability of getting this variant twice in a row is therefore $p_i^2$. The total probability of getting any of the $i = 1$ to $k$ variants twice in a row is the sum of all these probabilities. 382 | 383 | For this plot $p$ is randomly drawn some hundred times from a uniform distribution for every value of $k$. The black line shows the mean result for $\hat{F}$ the ribbon the min and max results. 384 | 385 | $$\hat{F} = \sum_{i=1}^{k} p^2_i$$ 386 | 387 | - $p_i$: relative frequency of the $i$'th variant in the population 388 | 389 | ```{r echo=FALSE} 390 | Fhat_relative_frequency <- function(amount_of_variants) { 391 | p <- runif(amount_of_variants) 392 | p <- p / sum(p) 393 | sum(p^2) 394 | } 395 | 396 | inputPanel( 397 | sliderInput("k_relative_frequency", label = "k", 398 | min = 50, max = 150, value = 100, step = 10), 399 | sliderInput("number_of_replications_relative_frequency", label = "Number of simulation runs", 400 | min = 100, max = 1000, value = 500, step = 100), 401 | actionButton("run_button_relative_frequency", "Run simulation") 402 | ) 403 | 404 | relative_frequency_data <- eventReactive( 405 | input$run_button_relative_frequency, { 406 | 407 | # read input 408 | number_of_replications <- input$number_of_replications_relative_frequency 409 | k <- input$k_relative_frequency 410 | 411 | # calculate Fhat 412 | tibble::tibble( 413 | k = 0:k, 414 | Fhat_min = sapply(k, function(x) { 415 | min(replicate(number_of_replications, Fhat_relative_frequency(x))) 416 | }), 417 | Fhat_max = sapply(k, function(x) { 418 | max(replicate(number_of_replications, Fhat_relative_frequency(x))) 419 | }), 420 | Fhat_mean = (Fhat_min + Fhat_max) / 2 421 | ) 422 | 423 | }, 424 | ignoreNULL = FALSE 425 | ) 426 | 427 | renderPlot({ 428 | relative_frequency_data() %>% 429 | ggplot() + 430 | geom_hline(aes(yintercept = 1), color = "dodgerblue4") + 431 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 432 | geom_ribbon( 433 | aes(x = k, ymin = Fhat_min, ymax = Fhat_max), 434 | fill = "deepskyblue", alpha = 0.5 435 | ) + 436 | geom_line(aes(x = k, y = Fhat_mean), color = "black", size = 1) + 437 | theme_bw() + 438 | xlab(latex2exp::TeX("k")) + 439 | ylab(latex2exp::TeX("\\hat{F}")) 440 | }) 441 | ``` 442 | 443 | #### Diversity *p.14-15* **(6)** **(7)** 444 | 445 | Instead of looking at homogeneity we can also estimate the diversity inside a population. One measure of diversity is the "effective number" of variants $n_e$, which is the reciprocal of $\hat{F}$. Estimating diversity within a population in these terms means that the population contains the same amount of diversity as would be found in an imaginary population with the effective number of variants at equal frequency. $n_e$ scales linearly with $θ$. Larger populations contain more diversity at a given level of innovation. Greater diversity is also expected in populations into which new variants are being introduced at higher rates. 446 | 447 | $$n_e = 2 N_e μ + 1$$ 448 | 449 | - $n_e$: Effective number of variants 450 | 451 | ```{r echo=FALSE} 452 | ne <- function(theta) { 453 | theta + 1 454 | } 455 | 456 | inputPanel( 457 | sliderInput("Ne_diversity_effective_number", label = "Ne", 458 | min = 10, max = 100, value = 20, step = 10), 459 | sliderInput("mu_diversity_effective_number", label = "μ", 460 | min = 0, max = 1, value = 0.01, step = 0.01) 461 | ) 462 | 463 | renderText({ 464 | paste0("1 * theta = ", theta(input$Ne_diversity_effective_number, input$mu_diversity_effective_number)) 465 | }) 466 | 467 | renderPlot({ 468 | 469 | # read input 470 | Ne <- input$Ne_diversity_effective_number 471 | mu <- input$mu_diversity_effective_number 472 | 473 | tibble::tibble( 474 | theta = 0:100 * theta(Ne, mu), 475 | ne = sapply(theta, function(x) {ne(x)}) 476 | ) %>% 477 | ggplot() + 478 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 479 | geom_line(aes(theta, ne), size = 1) + 480 | theme_bw() + 481 | xlab(latex2exp::TeX("θ")) + 482 | ylab(latex2exp::TeX("n_e")) 483 | }) 484 | ``` 485 | 486 | Furthermore, it is clear that $θ$ can be estimated empirically by computing the reciprocal of the sum of squares of variant frequencies in a population. We can call such estimates $t_F$, to distinguish them from the actual population values and to remind ourselves that they are based on the homogeneity $\hat{F}$ in Equation (5). With this equation we can get useful insights into variations in $θ$ among groups or demes. 487 | 488 | For this plot $p$ is randomly drawn some hundred times from a uniform distribution for every value of $k$. The black line shows the mean result for $t_F$, the ribbon the min and max results. 489 | 490 | $$t_F = \frac{1}{\sum_{i=1}^{k} p^2_i} - 1$$ 491 | 492 | - $t_F$: Estimated $θ$ by the sum of squares of variant frequencies 493 | 494 | ```{r echo=FALSE} 495 | theta_F <- function(amount_of_variants) { 496 | p <- runif(amount_of_variants) 497 | p <- p / sum(p) 498 | (1 / sum(p^2)) - 1 499 | } 500 | 501 | inputPanel( 502 | sliderInput("k_reciprocal_relative_frequency", label = "k", 503 | min = 50, max = 150, value = 100, step = 10), 504 | sliderInput("number_of_replications_reciprocal_relative_frequency", label = "Number of simulation runs", 505 | min = 100, max = 1000, value = 500, step = 100), 506 | actionButton("run_button_reciprocal_relative_frequency", "Run simulation") 507 | ) 508 | 509 | reciprocal_relative_frequency_data <- eventReactive( 510 | input$run_button_reciprocal_relative_frequency, { 511 | 512 | # read input 513 | number_of_replications <- input$number_of_replications_reciprocal_relative_frequency 514 | k <- input$k_reciprocal_relative_frequency 515 | 516 | # calculate Fhat 517 | tibble::tibble( 518 | k = 0:k, 519 | theta_F_min = sapply(k, function(x) { 520 | min(replicate(number_of_replications, theta_F(x))) 521 | }), 522 | theta_F_max = sapply(k, function(x) { 523 | max(replicate(number_of_replications, theta_F(x))) 524 | }), 525 | theta_F_mean = (theta_F_min + theta_F_max) / 2 526 | ) 527 | 528 | }, 529 | ignoreNULL = FALSE 530 | ) 531 | 532 | renderPlot({ 533 | reciprocal_relative_frequency_data() %>% 534 | ggplot() + 535 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 536 | geom_ribbon( 537 | aes(x = k, ymin = theta_F_min, ymax = theta_F_max), 538 | fill = "deepskyblue", alpha = 0.5 539 | ) + 540 | geom_line(aes(x = k, y = theta_F_mean), size = 1) + 541 | theme_bw() + 542 | xlab(latex2exp::TeX("k")) + 543 | ylab(latex2exp::TeX("t_F")) 544 | }) 545 | ``` 546 | 547 | ### Archaeological Application 548 | 549 | #### Diversity and Sample Size *p.15-16* **(8)** **(9)** 550 | 551 | One problem of the set of equations so far is that the frequency of variants in a population of social learners is not equal to the frequency preserved and documented in the archaeological assemblage. Under appropriate taphonomic circumstances, however, they are surprisingly similar. Another problem arises because we try to estimate diversity only on the basis of a sample derived from a population. The equations (5) and (7) are similar to the Shannon-Weaver information statistic, which is very sensitive to sample-size variation. 552 | 553 | $$H = -\sum_{i = 1}^{k} p_i \log(p_i)$$ 554 | 555 | - $H$: [Shannon's diversity index](http://www.tiem.utk.edu/~gross/bioed/bealsmodules/shannonDI.html) 556 | 557 | ```{r echo=FALSE} 558 | H <- function(amount_of_variants) { 559 | p <- runif(amount_of_variants) 560 | p <- p / sum(p) 561 | -sum(p * log(p)) 562 | } 563 | 564 | inputPanel( 565 | sliderInput("k_shannon", label = "k", 566 | min = 50, max = 150, value = 100, step = 10), 567 | sliderInput("number_of_replications_shannon", label = "Number of simulation runs", 568 | min = 100, max = 1000, value = 500, step = 100), 569 | actionButton("run_button_shannon", "Run simulation") 570 | ) 571 | 572 | shannon_data <- eventReactive( 573 | input$run_button_shannon, { 574 | 575 | # read input 576 | number_of_replications <- input$number_of_replications_shannon 577 | k <- input$k_shannon 578 | 579 | # calculate Fhat 580 | tibble::tibble( 581 | k = 0:k, 582 | H_min = sapply(k, function(x) { 583 | min(replicate(number_of_replications, H(x))) 584 | }), 585 | H_max = sapply(k, function(x) { 586 | max(replicate(number_of_replications, H(x))) 587 | }), 588 | H_mean = (H_min + H_max) / 2 589 | ) 590 | 591 | }, 592 | ignoreNULL = FALSE 593 | ) 594 | 595 | renderPlot({ 596 | shannon_data() %>% 597 | ggplot() + 598 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 599 | geom_ribbon( 600 | aes(x = k, ymin = H_min, ymax = H_max), 601 | fill = "deepskyblue", alpha = 0.5 602 | ) + 603 | geom_line(aes(x = k, y = H_mean), color = "black", size = 1) + 604 | theme_bw() + 605 | xlab(latex2exp::TeX("k")) + 606 | ylab(latex2exp::TeX("H")) 607 | }) 608 | ``` 609 | 610 | To deal with the problem of sample-size dependency in the calculation of $t_F$ with equation (7), we have to consider that $θ$ also controls a characteristic probability distribution of variants. (7) shows how $θ$ is related to the sum of squared variant frequencies in a population, so it should be possible to predict for a given value of $θ$ how many variants occur with certain frequencies. When $θ$ is low ($θ \ll 1$), we should expect that most of the time, a population will be dominated by a few variants, with other variants at low frequencies. When $θ$ is high ($θ \gg 1$), we are more likely to see a large number of variants at low to moderate frequencies. 611 | 612 | By combining this frequency distribution with the probability that a variant at a given frequency is represented at least once in a sample of a given size drawn at random from the population, it should be possible to deduce how many different variants we can expect to find in a sample of a given size. If the variants are selectively neutral, the expected number of different variants $k$ found in a sample drawn from a population is a function of it's sample size $n$ and $θ$ value. The number of variants will be larger when either $θ$ or $n$ are large. 613 | 614 | $$E(k) = \sum_{i = 0}^{n - 1} \frac{θ}{θ + i}$$ 615 | 616 | - $E(k)$: Expected number of different variants 617 | - $n$: Sample size 618 | 619 | ```{r echo=FALSE} 620 | Ek <- function(n, theta) { 621 | ressum <- 0 622 | for (i in 0:(n-1)) { 623 | ressum = ressum + (theta / (theta + i)) 624 | } 625 | return(ressum) 626 | } 627 | 628 | inputPanel( 629 | sliderInput("Ne_expected_variants", label = "Ne", 630 | min = 10, max = 100, value = 20, step = 10), 631 | sliderInput("mu_expected_variants", label = "μ", 632 | min = 0, max = 1, value = 0.01, step = 0.01), 633 | sliderInput("n_expected_variants", label = "n", 634 | min = 0, max = 100, value = 10, step = 1) 635 | ) 636 | 637 | renderText({ 638 | paste0("1 * theta = ", theta(input$Ne_expected_variants, input$mu_expected_variants)) 639 | }) 640 | 641 | renderPlot({ 642 | 643 | # read input 644 | Ne <- input$Ne_expected_variants 645 | mu <- input$mu_expected_variants 646 | n <- input$n_expected_variants 647 | 648 | tibble::tibble( 649 | theta = 0:100 * theta(Ne, mu), 650 | n = rep(n, 101), 651 | Ek = purrr::map2_dbl(n, theta, function(a, b) {Ek(a, b)}) 652 | ) %>% 653 | ggplot() + 654 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 655 | geom_line(aes(theta, Ek), size = 1) + 656 | theme_bw() + 657 | xlab(latex2exp::TeX("θ")) + 658 | ylab(latex2exp::TeX("E(k)")) 659 | }) 660 | ``` 661 | 662 | Equation (9) could be used to produce a maximum likelihood estimate of $θ$, that is the value of $θ$ that maximizes the chances of drawing the observed number of variants $k$ in a sample of the size $n$. Unfortunately it doesn't have an analytic solution, but we can still use it to estimate $θ$ iteratively by changing its value until $E(k)$ equals the observed number of variants. We call the result of doing so $t_E$. Both $t_E$ and $t_F$ offer us special statistical summaries of assemblage diversity, but there are theoretical reasons to prefer $t_E$ if $n$ and $k$ (and not just $p$) are known. 663 | 664 | This applet allows to solve (9) for theta approximately. It starts at $θ = 10$, changes it in steps of $0.1$ and stops when $|E(k) - k| < 0.5$. 665 | 666 | ```{r echo=FALSE} 667 | inputPanel( 668 | sliderInput("n_iterative_theta", label = "n", 669 | min = 10, max = 100, value = 20, step = 10), 670 | sliderInput("k_iterative_theta", label = "k", 671 | min = 5, max = 15, value = 10, step = 1), 672 | actionButton("run_iterative_theta", "Approximate theta") 673 | ) 674 | 675 | iterative_theta_data <- eventReactive( 676 | input$run_iterative_theta, { 677 | 678 | # read input 679 | n <- input$n_iterative_theta 680 | k <- input$k_iterative_theta 681 | start_theta <- 10 682 | 683 | # iterative solving loop 684 | theta_iter <- start_theta 685 | while (TRUE) { 686 | diffi <- Ek(n, theta_iter) - k 687 | if (diffi < 0) { 688 | theta_iter <- theta_iter + 0.1 689 | } else if (diffi > 0) { 690 | theta_iter <- theta_iter - 0.1 691 | } 692 | if (abs(diffi) < 0.5) { 693 | break; 694 | } 695 | } 696 | 697 | theta_iter 698 | 699 | }, 700 | ignoreNULL = FALSE 701 | ) 702 | 703 | renderText({ 704 | paste0("theta ≈ ", round(iterative_theta_data(), 2)) 705 | }) 706 | ``` 707 | 708 | #### Archaeological Sample Size *p.16-17* 709 | 710 | $t_E$ may yield wrong estimates of $θ$ for very large archaeological samples that accumulated over long periods of time. Equation (9) implies that for a given number of variants $k$, $t_E$ will decline as the sample size $n$ increases. Usually $n$ is limited by the population size $N$, but in case of a long-term sample this limit can be exceeded: $n$ can grow beyond $N$ and causes the estimate of $θ$ to be too low. In this case $t_F$ can again be the better function to determine $θ$. Another solution is to look at the difference between both estimators ($t_F - t_E$) as a function of assemblage size. For the application in archaeological contexts it's always important to carefully examine the relationships among $t_F$ and $t_E$ and possible sample size effects. 711 | 712 | #### Inferences About Group Size and Innovation Rate *p.17-18* 713 | 714 | $θ$ is defined as twice the effective population size times the innovation rate ($2 N_e μ$). The innovation rate $μ$ includes the combined effects of both in situ innovation $v$ and the introduction of novel variants from other groups $m$. If $μ = v + m$, then $N_e μ = N_e v + N_e m$. $v$ should be roughly constant across demes, but $m$ is likely to be more variable in time and space. Under these circumstances, most of the variation in $θ$ associated with $μ$ will be caused by variation in intergroup transmission rates. $θ$ mostly depends on the number of times local group members learn from members of other groups: $N_e m$. That means that variation in $θ$ is an indicator for the absolute amount of cultural transmission among demes in a geographical region. 715 | 716 | ### Drift, Intergroup Transmission, and Interassemblage Distance 717 | 718 | #### Dynamics of Stylistic Distance between Two Groups *p.21-24* **(10)** 719 | 720 | To compare groups and track the cultural transmission we need a simple measure of between-group similarity. One possibility is to look at the squared Euclidean distance $d_{ij}^2$. It's the sum of squared differences in variant frequencies $p$ between two groups $i$ and $j$. 721 | 722 | $$d_{ij}^2 = \sum_{k = 1}^{n} (p_{ik} - p_{jk})^2$$ 723 | 724 | - $d_{ij}^2$: Squared Euclidean distance between two groups 725 | - $n$: total amount of variants (here not sample size) 726 | 727 | ```{r echo=FALSE} 728 | inputPanel( 729 | sliderInput("n_sed_general", label = "n", 730 | min = 10, max = 100, value = 50, step = 10), 731 | actionButton("run_sed_general", "Run simulation") 732 | ) 733 | 734 | sed_general_data <- eventReactive( 735 | input$run_sed_general, { 736 | 737 | # read input 738 | n <- input$n_sed_general 739 | 740 | # calculate 741 | tibble::tibble( 742 | amount_of_variants = 0:n, 743 | sed_min = sapply(amount_of_variants, function(x) { 744 | min(replicate(500, sed(runif(x), runif(x)))) 745 | }), 746 | sed_max = sapply(amount_of_variants, function(x) { 747 | max(replicate(500, sed(runif(x), runif(x)))) 748 | }), 749 | sed_mean = (sed_min + sed_max) / 2 750 | ) 751 | 752 | }, 753 | ignoreNULL = FALSE 754 | ) 755 | 756 | renderPlot({ 757 | sed_general_data() %>% 758 | ggplot() + 759 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 760 | geom_ribbon( 761 | aes(x = amount_of_variants, ymin = sed_min, ymax = sed_max), 762 | fill = "deepskyblue", alpha = 0.5 763 | ) + 764 | geom_line(aes(x = amount_of_variants, y = sed_mean), color = "black", size = 1) + 765 | theme_bw() + 766 | xlab(latex2exp::TeX("k")) + 767 | ylab(latex2exp::TeX("d_{ij}^2")) 768 | }) 769 | ``` 770 | 771 | The study of intergroup cultural transmission requires a model setup that contains the effect of drift but also exchange between groups. One promising approach comes from migration matrix models, which in population genetics have proven useful in studying the effects of drift and intergroup genetic transmission on genetic distances among small, localized demes. Such models reveal what happens to variant frequencies in a finite number of demes that are subject to the joint effects of drift, whose strength is controlled by the effective size of each population, and intergroup transmission, occurring between demes at constant pairwise rates. There is no role in these models for in situ innovation, but they offer a means of checking the assumption that variation in $θ$ is largely a function of intergroup transmission. 772 | 773 | The following simulation is constructed like the very first one about the effects of drift. Only in this case, there is a set probability, the intergroup transmission rate $m_i$, that the individual contacted is derived from the other group. If $m_i = 0$, both groups act independently, but if it increases they tend to develop roughly alike. 774 | 775 | The line chart below the variant frequency area plot shows the distance measure $d_{ij}^2$ calculated for the two groups at the respective point in model time. 776 | 777 | $m_i$: intergroup transmission rate 778 | 779 | ```{r echo=FALSE} 780 | inputPanel( 781 | sliderInput("k_two_groups", label = "k", 782 | min = 2, max = 10, value = 10, step = 1), 783 | sliderInput("N_two_groups", label = "N", 784 | min = 10, max = 100, value = 20, step = 10), 785 | sliderInput("t_two_groups", label = "t", 786 | min = 50, max = 200, value = 100, step = 50), 787 | sliderInput("mi_two_groups", label = "mi", 788 | min = 0, max = 1, value = 0.1, step = 0.01), 789 | actionButton("run_button_two_groups", "Run simulation") 790 | ) 791 | 792 | group_drift_simulation_data <- eventReactive( 793 | input$run_button_two_groups, { 794 | 795 | # run simulation (code in source file) 796 | group_drift_simulation( 797 | input$k_two_groups, 798 | input$N_two_groups, 799 | input$t_two_groups, 800 | input$mi_two_groups 801 | ) 802 | 803 | }, 804 | ignoreNULL = FALSE 805 | ) 806 | 807 | renderPlot({ 808 | A <- group_drift_simulation_data() %>% 809 | ggplot() + 810 | geom_area( 811 | aes(x = time, y = individuals_with_variant, fill = variant, group = variant) 812 | ) + 813 | geom_line( 814 | aes(x = time, y = individuals_with_variant, group = variant), 815 | position = "stack" 816 | ) + 817 | theme_bw() + 818 | xlab("t") + 819 | ylab("variants and their occurence in the population [%]") + 820 | facet_grid(group ~ .) 821 | 822 | B <- calculate_sed_for_group_drift_simulation_result( 823 | group_drift_simulation_data() 824 | ) %>% 825 | ggplot() + 826 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 827 | geom_hline(aes(yintercept = 2), color = "dodgerblue4") + 828 | geom_line(aes(x = t, y = sed), color = "black", size = 1, alpha = 0.3) + 829 | geom_point(aes(x = t, y = sed), color = "black", size = 1) + 830 | theme_bw() + 831 | xlab(latex2exp::TeX("t")) + 832 | ylab(latex2exp::TeX("d_{ij}^2")) 833 | 834 | cowplot::plot_grid(A, B, nrow = 2, align = "v", axis = "lr") 835 | }) 836 | ``` 837 | 838 | We can run the above simulation many times to see different results random drift can produce. At the beginning the distance between the groups is always zero, because the variants and variant frequencies are equal in both. However, as time passes, the distance increases up to a quasi-stationary equilibrium. The equilibrium is only quasi-stationary because the groups are finite and there is no innovation: if the simulations are continued for a sufficient number of time periods, a single variant is fixed and as a result the between-group distance reverts to zero. The equilibrium holds over the period during which the opposing forces of drift, locally reducing variation within each group, and intergroup transmission, introducing potentially novel variation into each group from the other, are balanced. Later drift globally depletes all variation in both groups. 839 | 840 | Higher levels of intergroup transmission $m_i$ will lower the equilibrium level, while lower levels of intergroup transmission raise it. Lower effective sizes $N$ for either or both groups will raise the equilibrium since the effects of drift scale with $N$. The speed with which the equilibrium is achieved depends heavily on the modes of transmission. Horizontal transmission can cause rather fast changes, while vertical and oblique transmission may take centuries. The equilibrium is also approached in case of initially divergent groups with different variants and variant frequencies if there is intergroup transmission among them ($m_i > 0$). 841 | 842 | ```{r echo=FALSE} 843 | inputPanel( 844 | sliderInput("k_two_groups_many_runs", label = "k", 845 | min = 5, max = 15, value = 10, step = 1), 846 | sliderInput("N_two_groups_many_runs", label = "N", 847 | min = 10, max = 100, value = 20, step = 10), 848 | sliderInput("t_two_groups_many_runs", label = "t", 849 | min = 20, max = 200, value = 20, step = 20), 850 | sliderInput("mi_two_groups_many_runs", label = "mi", 851 | min = 0, max = 1, value = 0.1, step = 0.01), 852 | sliderInput("sim_runs_two_groups_many_runs", label = "Number of simulation runs", 853 | min = 1, max = 100, value = 20, step = 1), 854 | actionButton("run_button_two_groups_many_runs", "Run simulation") 855 | ) 856 | 857 | two_groups_many_runs_data <- eventReactive( 858 | input$run_button_two_groups_many_runs, { 859 | 860 | # run the simulation many times and store results in res_list 861 | res_list <- list() 862 | for (i in 1:input$sim_runs_two_groups_many_runs) { 863 | res_list[[i]] <- calculate_sed_for_group_drift_simulation_result( 864 | group_drift_simulation( 865 | input$k_two_groups_many_runs, 866 | input$N_two_groups_many_runs, 867 | input$t_two_groups_many_runs, 868 | input$mi_two_groups_many_runs 869 | ), 870 | sim_run = i 871 | ) 872 | } 873 | 874 | # rbind individual simulation results into one data.frame 875 | do.call(rbind, res_list) 876 | 877 | }, 878 | ignoreNULL = FALSE 879 | ) 880 | 881 | renderPlot({ 882 | two_groups_many_runs_data() %>% 883 | ggplot() + 884 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 885 | geom_hline(aes(yintercept = 2), color = "dodgerblue4") + 886 | geom_line(aes(x = t, y = sed, group = sim_run), color = "black", size = 1, alpha = 0.3) + 887 | geom_point(aes(x = t, y = sed), color = "black", size = 1) + 888 | theme_bw() + 889 | xlab(latex2exp::TeX("t")) + 890 | ylab(latex2exp::TeX("d_{ij}^2")) 891 | }) 892 | ``` 893 | 894 | #### Stylistic Distances for Multiple Groups *p.24-25* **(11)** **(12)** 895 | 896 | The real world is better represented in terms of more than two groups learning from another. The multi-group case can be described in a matrix based model. The matrix $\mathbf{M}$ contains the intergroup transmission rates $m_{ij}$ that give the proportion of the $i$'th deme that learned from the $j$'th deme. The diagonal elements of $\mathbf{M}$ are the proportion of each deme that learned from its own members. The matrix $\mathbf{U}$ contains the reciprocals of the effective population sizes $1 / N_e$ of each group on its diagonal and $0$'s elsewhere. 897 | 898 | In this setup all groups initially have the same variant frequencies. This means that the expected frequencies for each group in any time period are identical and equal to the starting frequency. Drift-driven departures from this expectation for a given deme can be characterized analytically in terms of variance. Since there are multiple demes in the system, their joint evolution must be handled in terms of a matrix of variances and covariances: $\mathbf{V}$. Diagonal elements in $\mathbf{V}$ contain variances: the square of the departure of the variant frequency in the respective deme at time $t$ from the situation at the starting time. The other elements are covariances: the product of the departures of the variant frequencies in each pair of demes. 899 | 900 | $$\mathbf{V}^{(t)} = \sum_{r = 0}^{t - 1} \mathbf{M}^r \mathbf{U}(\mathbf{M}^r)^{\prime}$$ 901 | 902 | - $\mathbf{M}$: Matrix of intergroup transmission rates 903 | - $\mathbf{V}^{(t)}$: Matrix of variances and covariances 904 | - $r$: Index of the successive time periods 905 | - $\mathbf{U}$ Matrix of reciprocals of the effective population sizes ($1 / N_e$). These are on the diagonal, the rest is $0$ 906 | - $(\mathbf{M}^r)^{\prime}$: M transposed 907 | 908 | Equation (11) says that in the first time period, the covariance between two groups is a function of the sum of products of the intergroup transmission rates from all groups to those two groups and the reciprocal of their two effective population sizes. In later time periods, as individuals who learned from non-group members are in turn distributed among the other groups, the intergroup transmission rates from each group to *all* the others assumes increasing importance. The resulting cumulative effect of the indirect movement of variants among groups is handled by successively powering the intergroup transmission matrix. 909 | 910 | The magnitude of the covariance between two groups will scale inversely with their sizes $N$ and positively with the intergroup transmission rates $m$. High levels of intergroup transmission will mean that whatever departures from the initial frequency occur, they will be similar and in the same direction, hence the covariance between the groups will be high. On the other hand, if either or both group sizes are low, causing drift to play a stronger role, variant frequencies are less likely to depart from initial frequencies in a similar fashion. The covariances will be low. 911 | 912 | The matrix of variances and covariances $\mathbf{V}$ can be converted to a matrix of squared Euclidean distances $d_{ij}^{2}$ (or $\mathbf{SED}$). 913 | 914 | $$d_{ij}^{2(t)} = v_{ii}^{(t)} + v_{jj}^{(t)} - 2 v_{ij}^{(t)}$$ 915 | 916 | - $v_{ii}^{(t)}$: Diagonal element of $\mathbf{V}^{(t)}$ -- a variance. The square of of the departure of the variant frequency in the $i$'th deme at time $t$ from its starting frequency when the demes were identical, standardized by the variance of that starting frequency 917 | - $v_{ij}^{(t)}$: Covariance. The product of the departures of the variant frequencies in each pair of demes $i$ and $j$, again standardized by the variance of the starting frequency 918 | 919 | ```{r echo = FALSE} 920 | inputPanel( 921 | sliderInput("Ne_matrix", label = "Ne", 922 | min = 10, max = 100, value = 20, step = 10), 923 | sliderInput("t_matrix", label = "t", 924 | min = 10, max = 100, value = 20, step = 10), 925 | actionButton("run_button_matrix_calculation", "Run matrix calculation") 926 | ) 927 | 928 | matrix_calculation_data <- eventReactive( 929 | input$run_button_matrix_calculation, { 930 | Mk <- function(k, mi_3) { 931 | M <- matrix(data = rep(0, 5^2), nrow = 5, ncol = 5) 932 | #M[] <- 0.2 933 | M[1,] <- c(0.4, 0.0, 0.2, 0.0, 0.4) 934 | M[2,] <- c(0.0, 0.4, 0.2, 0.4, 0.0) 935 | M[3,] <- c(0.2, 0.2, 0.2, 0.2, 0.2) 936 | M[4,] <- c(0.2, 0.1, 0.3, 0.2, 0.2) 937 | M[5,] <- c(0.2, 0.3, 0.1, 0.2, 0.2) 938 | M 939 | } 940 | 941 | multi_group_matrizes(input$Ne_matrix, input$t_matrix, Mk) 942 | }, 943 | ignoreNULL = FALSE 944 | ) 945 | 946 | renderPlot({ 947 | plotlist <- lapply(matrix_calculation_data(), function(x) { 948 | ggplot(x, aes(x = Var2, y = Var1)) + 949 | geom_raster(aes(fill=value)) + 950 | geom_text(aes(x = Var2, y = Var1, label = round(value, 3)), color = "white", size = 5) + 951 | scale_y_reverse(sec.axis = dup_axis()) + 952 | scale_x_continuous(sec.axis = dup_axis()) + 953 | xlab("group") + 954 | ylab("group") 955 | }) 956 | 957 | cowplot::plot_grid( 958 | plotlist = plotlist, 959 | labels = c("M", "M'", "U", "V", "SED", "MSD"), 960 | nrow = 3, ncol = 2 961 | ) 962 | }, 963 | height = 1200 964 | ) 965 | ``` 966 | 967 | ### Woodland Interassemblage Distance 968 | 969 | #### Trends in Interassemblage Distance *p.25-26* 970 | 971 | The migration matrix approach can be applied to cultural transmission contexts by computing a matrix of squared Euclidean distances $\mathbf{SED}$ between the groups for each of the variants. We expect to find that the overall level of differentiation among groups, as measured by the mean of the squared Euclidean distances $\mathbf{MSD}$, is the mirror image of our estimates of $θ$. 972 | 973 | #### Diversity and Distance for Individual Assemblages *p.26-27* **(13)** 974 | 975 | The mean of the squared Euclidean distance $\bar{d}_{i}^{2}$ (or $MSD$) can be calculated with (13). 976 | 977 | $$\bar{d}_{i}^{2} = \sum_{j = 1}^{n} d_{ij}^{2} / (n - 1), i \neq j$$ 978 | 979 | ```{r echo = FALSE} 980 | inputPanel( 981 | sliderInput("k_two_groups_many_runs_with_msd", label = "k", 982 | min = 15, max = 25, value = 20, step = 1), 983 | sliderInput("N_two_groups_many_runs_with_msd", label = "N", 984 | min = 10, max = 100, value = 20, step = 10), 985 | sliderInput("t_two_groups_many_runs_with_msd", label = "t", 986 | min = 20, max = 200, value = 20, step = 20), 987 | sliderInput("mi_two_groups_many_runs_with_msd", label = "mi", 988 | min = 0, max = 1, value = 0.1, step = 0.01), 989 | sliderInput("sim_runs_two_groups_many_runs_with_msd", label = "Number of simulation runs", 990 | min = 1, max = 100, value = 20, step = 1), 991 | actionButton("run_button_two_groups_many_runs_with_msd", "Run simulation") 992 | ) 993 | 994 | two_groups_many_runs_with_msd_data <- eventReactive( 995 | input$run_button_two_groups_many_runs_with_msd, { 996 | 997 | # read input 998 | k <- input$k_two_groups_many_runs_with_msd 999 | N <- input$N_two_groups_many_runs_with_msd 1000 | t_final <- input$t_two_groups_many_runs_with_msd 1001 | mi <- input$mi_two_groups_many_runs_with_msd 1002 | sim_runs <- input$sim_runs_two_groups_many_runs_with_msd 1003 | 1004 | mixed_list <- list() 1005 | 1006 | # run group drift simulation 1007 | res_list <- list() 1008 | for (i in 1:sim_runs) { 1009 | res_list[[i]] <- calculate_sed_for_group_drift_simulation_result( 1010 | group_drift_simulation( 1011 | k, 1012 | N, 1013 | t_final, 1014 | mi 1015 | ), 1016 | sim_run = i 1017 | ) 1018 | } 1019 | 1020 | mixed_list[[1]] <- do.call(rbind, res_list) 1021 | 1022 | # calculate matrix and mean squared distance 1023 | Mk_msd <- function(k, mi) { 1024 | M <- matrix(data = rep(0, 2^2), nrow = 2, ncol = 2) 1025 | M[1,] <- c(1 - mi, mi) 1026 | M[2,] <- c(mi, 1 - mi) 1027 | M 1028 | } 1029 | 1030 | msd <- c() 1031 | for (t_m in 0:t_final) { 1032 | msd[t_m + 1] <- multi_group_matrizes( 1033 | N, t_m, Mk_msd, mi 1034 | )$longmosed$value[2] 1035 | } 1036 | 1037 | mixed_list[[2]] <- data.frame(t = 0:t_final, sed = msd) 1038 | 1039 | mixed_list 1040 | }, 1041 | ignoreNULL = FALSE 1042 | ) 1043 | 1044 | renderPlot({ 1045 | mixed_list <- two_groups_many_runs_with_msd_data() 1046 | 1047 | ggplot() + 1048 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 1049 | geom_hline(aes(yintercept = 2), color = "dodgerblue4") + 1050 | geom_line( 1051 | data = mixed_list[[1]], 1052 | aes(x = t, y = sed, group = sim_run), 1053 | color = "black", size = 1, alpha = 0.3 1054 | ) + 1055 | geom_point( 1056 | data = mixed_list[[1]], 1057 | aes(x = t, y = sed), 1058 | color = "black", size = 1 1059 | ) + 1060 | geom_line( 1061 | data = mixed_list[[2]], 1062 | aes(x = t, y = sed), 1063 | color = "dodgerblue4", size = 2 1064 | ) + 1065 | theme_bw() + 1066 | xlab(latex2exp::TeX("t")) + 1067 | ylab(latex2exp::TeX("d_{ij}^2")) 1068 | }) 1069 | ``` 1070 | 1071 | ### Literature 1072 | -------------------------------------------------------------------------------- /presentation_version/neiman1995_presentation.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "'Stylistic variation in evolutionary perspective'" 3 | author: "Clemens Schmid" 4 | date: "Juni 2018" 5 | output: 6 | ioslides_presentation: 7 | css: styles_presentation.css 8 | widescreen: true 9 | smaller: true 10 | runtime: shiny 11 | bibliography: ../references.bib 12 | link-citations: true 13 | --- 14 | 15 | ```{r include=FALSE} 16 | # start presentation from console: R -e "rmarkdown::run('~/neomod/neiman1995/neiman1995_presentation.Rmd', shiny_args = list(port = 8241, launch.browser = T))" 17 | 18 | # knitr chunk settings 19 | knitr::opts_chunk$set(echo = TRUE) 20 | ``` 21 | 22 | ```{r echo = FALSE} 23 | # load the packages, that are inconvinient to reference directly via namespace:: 24 | library(ggplot2) 25 | library(magrittr) 26 | suppressMessages(library(expm)) 27 | # load the functions in the source files 28 | purrr::walk(list.files("../R", full.names = T), function(x) {source(x)}) 29 | ``` 30 | 31 | ## Disclaimer 32 | 33 | *This document visualizes the simulations and the general model functions in Fraser D. Neimans article Stylistic Variation in Evolutionary Perspective: Inferences from Decorative Diversity and Interassemblage Distance in Illinois Woodland Ceramic Assemblages [@neiman_stylistic_1995].* 34 | 35 | 36 | 37 | # Introduction 38 | 39 | ## Introduction: Cultural Evolution 40 | 41 | **Cultural Evolution theory** 42 | The processes of natural development of species through evolution also affect human cultural development. 43 | 44 |
45 | 46 | > Certain aspects of what archaeologists have traditionally called **stylistic variation** can be understood as the result of the introduction of **selectively neutral variation** into **social-learning populations** and the **sampling error** in the cultural transmission of that variation (**drift**). 47 | > 48 | > Simple mathematical models allow the deduction of expectation for the dynamics of these evolutionary mechanisms as monitored in the archaeological record through **assemblage diversity** and **interassemblage distance**. 49 | > 50 | > The models are applied to make inferences about the causes of change in decorative diversity and interassemblage distance for **Woodland ceramics from Illinois**. 51 | > 52 | > -- @neiman_stylistic_1995, Abstract. (boldface and paragraphs in this and in the following citations from me) 53 | 54 | ## Introduction: Stylistic vs. functional cultural traits 55 | 56 | > **Style** denotes those forms that do not have detectable selective values. **Function** is manifest as those forms that directly affect the Darwinian fitness of the populations in which they occur. 57 | > 58 | > [...] 59 | > 60 | > Traits that have discrete selective values over measurable amounts of time should be accountable by natural selection and a set of external conditions. **Traits identified as adaptively neutral will display a very different kind of behavior** because their frequencies in a population are not directly accountable in terms of selection and external contingencies. Their behavior should be more adequately accommodated by **stochastic processes** 61 | > 62 | > -- @dunnell1978style, 199. 63 | 64 | \ 65 | 66 | > In a neo-Darwinian framework these definitions imply that **variation in stylistic variant frequencies in time and space** will be affected by a subset of evolutionary forces that introduce selectively neutral cultural variation into populations of social learners, for example, **innovation** and **intergroup transmission** [(**flow**)], and then sort it stochastically (**drift**). 67 | > 68 | > -- @neiman_stylistic_1995, 8. 69 | 70 | \ 71 | 72 | The main mechanisms of diffusion of neutral variants are **innovation**, **drift** and **flow**. 73 | 74 | ## Introduction: Method? 75 | 76 | > The signal advantage of embedding style in a Darwinian framework is that it **becomes possible to make and evaluate inferences about what happened in history**. These twin goals are accomplished by developing **models of the operation of different evolutionary forces or mechanisms** defined by theory. Models in turn deliver **expectations concerning spatial and temporal distributions** of elements that result from the operation of different mechanisms. Given a suite of measurements of real-world phenomena or statistical summaries of them, we are in a position to make **inferences about the mechanisms that caused observed values**. The inferences can be checked by developing additional models and expectations, based on independent lines of reasoning. Over the long term, the agreement of independent inferences leads to historical knowledge. **Thus while model building is an abstract enterprise, the payoff is fundamentally pragmatic.** 77 | > 78 | > -- @neiman_stylistic_1995, 8. 79 | 80 | \ 81 | 82 | > The chances of developing a deductively correct answer are increased if we work with **formal models**, capable of representation in either **mathematical formalism** or **computer simulation**, of the evolutionary mechanisms that govern the distribution of selectively neutral forms in time and space. 83 | > 84 | > -- @neiman_stylistic_1995, 9. 85 | 86 | # Drift, Innovation and Diversity 87 | 88 | ## Temporal Dynamics of Drift 89 | 90 | **Drift** is sampling error that accompanies all forms of cultural transmission and **causes individual variants to vanish or dominate randomly**. The speed with which the variation is destroyed increases as the population size decreases. 91 | 92 | Mechanics of the following simulation: 93 | In each time step every individual randomly adopts a variant from somebody else. The probability of getting somebody else's variant is $(N - 1) / N$ while the probability of keeping the own variant is only $1 / N$. 94 | 95 | 96 | - $k$: Amount of variants in a population 97 | - $N_e$: Size of the effective population. The effective population consists of the individuals within a population that is actively involved in a cultural transmission process 98 | - $t$: Time / number of iterations 99 | 100 | ## Temporal Dynamics of Drift 101 | 102 | ```{r echo=FALSE} 103 | inputPanel( 104 | sliderInput("k_drift_simulation", label = "k", 105 | min = 5, max = 15, value = 10, step = 1), 106 | sliderInput("Ne_drift_simulation", label = "Ne", 107 | min = 10, max = 100, value = 20, step = 10), 108 | sliderInput("t_drift_simulation", label = "t", 109 | min = 50, max = 200, value = 100, step = 50), 110 | actionButton("run_button_drift_simulation", "Run simulation") 111 | ) 112 | 113 | drift_simulation_data <- eventReactive( 114 | input$run_button_drift_simulation, { 115 | 116 | # read input 117 | k <- input$k_drift_simulation 118 | N <- input$Ne_drift_simulation 119 | time <- input$t_drift_simulation 120 | 121 | # calculate population parameters 122 | population <- 1:N 123 | variants <- 1:k 124 | timesteps <- 2:time 125 | 126 | # create initial population 127 | pop0 <- tibble::tibble( 128 | time = as.integer(0), 129 | individual = 1:N, 130 | variant = rep_len(variants, N) 131 | ) 132 | 133 | # list to store population stages over time 134 | pop_devel <- list() 135 | pop_devel[[1]] <- pop0 136 | 137 | # simulation loop 138 | for (p1 in timesteps) { 139 | pop_new <- pop_devel[[p1 - 1]] 140 | pop_new$time <- p1 - 1 141 | pop_new$variant <- sample(pop_new$variant, length(pop_new$variant), replace = T) 142 | pop_devel[[p1]] <- pop_new 143 | } 144 | 145 | # bind individual population stages into data.frame 146 | pop_devel_df <- do.call(rbind, pop_devel) 147 | 148 | # calculate number of individuals per timestep and variant 149 | pop_devel_sum <- pop_devel_df %>% 150 | dplyr::group_by( 151 | time, variant 152 | ) %>% 153 | dplyr::summarise( 154 | individuals_with_variant = n() 155 | ) %>% 156 | dplyr::ungroup() %>% 157 | # complete (expand.grid) to fill gaps in the area plot 158 | tidyr::complete( 159 | time, 160 | variant, 161 | fill = list(individuals_with_variant = as.integer(0)) 162 | ) 163 | 164 | pop_devel_sum 165 | }, 166 | ignoreNULL = FALSE 167 | ) 168 | 169 | renderPlot({ 170 | 171 | drift_simulation_data() %>% 172 | ggplot() + 173 | geom_area(aes(x = time, y = individuals_with_variant, fill = variant, group = variant)) + 174 | geom_line(aes(x = time, y = individuals_with_variant, group = variant), position = "stack") + 175 | theme_bw() + 176 | xlab(expression(paste("t"))) + 177 | ylab("variants and their occurence in the population [%]") 178 | 179 | }) 180 | ``` 181 | 182 | ## Homogeneity Under Drift 183 | 184 | The theory of neutral alleles allows to describe the **within-population homogeneity $F$** as a function of the effective population size $N_e$. 185 | 186 | $$F_t = \frac{1}{N_e} + \left(1 - \frac{1}{N_e} \right) F_{t-1}$$ 187 | 188 | - $F_t$: Within-population homogeneity. Calculated as the probability that two randomly chosen individuals in the population carry variants that are copies of a common antecedent variant at a certain time step 189 | 190 | In a given time period the probability of drawing an individual, who learned from the same model as some other randomly selected individual in the previous time period, is $1 / N$. The probability of the opposite is $1 - (1 / N)$. 191 | 192 | - $F_{t - 1}$: Probability that the model of the second individual learned from the same model as the model of the first individual in an earlier time step 193 | 194 | As an effect of drift, $F_t$ approaches one as $t$ increases. 195 | 196 | ## Homogeneity Under Drift 197 | 198 | ```{r echo=FALSE} 199 | Ft <- function(F0, Ne, time) { 200 | if(time == 0) { return(F0) } 201 | 1/Ne + (1 - 1/Ne) * Ft(F0, Ne, time - 1) 202 | } 203 | 204 | inputPanel( 205 | sliderInput("F0_homogeneity_drift", label = "F0 (Ft for t == 0)", 206 | min = 0, max = 1, value = 0.5, step = 0.05), 207 | sliderInput("Ne_homogeneity_drift", label = "Ne", 208 | min = 10, max = 100, value = 20, step = 10), 209 | sliderInput("t_homogeneity_drift", label = "t", 210 | min = 50, max = 200, value = 100, step = 50) 211 | ) 212 | 213 | renderPlot({ 214 | 215 | # read input 216 | F0 <- input$F0_homogeneity_drift 217 | Ne <- input$Ne_homogeneity_drift 218 | time <- input$t_homogeneity_drift 219 | 220 | timesteps <- 0:time 221 | 222 | # apply function for all timesteps 223 | Ft_time <- sapply(timesteps, function(x) { Ft(F0, Ne, x) }) 224 | 225 | data.frame( 226 | t = timesteps, 227 | Ft = Ft_time 228 | ) %>% 229 | ggplot() + 230 | geom_hline(aes(yintercept = 1), color = "dodgerblue4") + 231 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 232 | geom_line(aes(t, Ft), size = 1) + 233 | theme_bw() + 234 | xlab(latex2exp::TeX("t")) + 235 | ylab(latex2exp::TeX("F_{t}")) 236 | }) 237 | ``` 238 | 239 | ## Temporal Dynamics of Drift and Innovation 240 | 241 | The effect of **drift** on **homongeneity** shrinks if **innovation** is possible. 242 | 243 | Mechanics of the following simulation: 244 | The simulation works like the first one, but now there's a chance of $\mu$ for every individual in every time step to create a new variant. 245 | 246 | 247 | - $\mu$: Innovation rate. Probability of the creation of a new variant 248 | 249 | The effect of drift is still strongly visible, but with increasing $\mu$ it becomes less relevant. New variants can form and replace the dominant one. 250 | 251 | ## Temporal Dynamics of Drift and Innovation 252 | 253 | ```{r echo=FALSE} 254 | inputPanel( 255 | sliderInput("k_drift_simulation_with_innovation", label = "k for t == 0", 256 | min = 5, max = 15, value = 10, step = 1), 257 | sliderInput("Ne_drift_simulation_with_innovation", label = "Ne", 258 | min = 10, max = 100, value = 20, step = 10), 259 | sliderInput("t_drift_simulation_with_innovation", label = "t", 260 | min = 50, max = 200, value = 100, step = 50), 261 | sliderInput("mu_drift_simulation_with_innovation", label = "μ", 262 | min = 0, max = 0.1, value = 0.01, step = 0.01), 263 | actionButton("run_button_drift_simulation_with_innovation", "Run simulation") 264 | ) 265 | 266 | drift_simulation_with_innovation_data <- eventReactive( 267 | input$run_button_drift_simulation_with_innovation, { 268 | 269 | # read input 270 | k <- input$k_drift_simulation_with_innovation 271 | N <- input$Ne_drift_simulation_with_innovation 272 | time <- input$t_drift_simulation_with_innovation 273 | mu <- input$mu_drift_simulation_with_innovation 274 | 275 | # prepare population parameters 276 | population <- 1:N 277 | variants <- 1:k 278 | timesteps <- 2:time 279 | 280 | # create starting population 281 | pop0 <- tibble::tibble( 282 | time = as.integer(0), 283 | individual = 1:N, 284 | variant = rep_len(1:k, N) 285 | ) 286 | 287 | # list to store population stages over time 288 | pop_devel <- list() 289 | pop_devel[[1]] <- pop0 290 | 291 | # simulation loop 292 | last_variant <- max(pop_devel[[1]]$variant) 293 | for (p1 in timesteps) { 294 | pop_new <- pop_devel[[p1 - 1]] 295 | pop_new$time <- p1 - 1 296 | pop_new$variant <- sample(pop_new$variant, length(pop_new$variant), replace = T) 297 | 298 | # innovation 299 | innovate_here <- sample( 300 | c(TRUE, FALSE), 301 | length(pop_new$variant), 302 | prob = c(mu, 1 - mu), 303 | replace = T 304 | ) 305 | new_variants <- seq(last_variant + 1, last_variant + sum(innovate_here)) 306 | last_variant <- last_variant + sum(innovate_here) 307 | pop_new$variant[innovate_here] <- new_variants 308 | 309 | pop_devel[[p1]] <- pop_new 310 | } 311 | 312 | # bind individual population stages into data.frame 313 | pop_devel_df <- do.call(rbind, pop_devel) 314 | 315 | # calculate number of individuals per timestep and variant 316 | pop_devel_sum <- pop_devel_df %>% 317 | dplyr::group_by( 318 | time, variant 319 | ) %>% 320 | dplyr::summarise( 321 | individuals_with_variant = n() 322 | ) %>% 323 | dplyr::ungroup() %>% 324 | tidyr::complete( 325 | time, 326 | variant, 327 | fill = list(individuals_with_variant = as.integer(0)) 328 | ) 329 | 330 | pop_devel_sum 331 | 332 | }, 333 | ignoreNULL = FALSE 334 | ) 335 | 336 | renderPlot({ 337 | drift_simulation_with_innovation_data() %>% 338 | ggplot() + 339 | geom_area(aes(x = time, y = individuals_with_variant, fill = variant, group = variant)) + 340 | geom_line(aes(x = time, y = individuals_with_variant, group = variant), position = "stack") + 341 | theme_bw() + 342 | xlab("t") + 343 | ylab("variants and their occurence in the population [%]") 344 | }) 345 | ``` 346 | 347 | ## Homogeneity Under Drift and Innovation 348 | 349 | Drift and innovation are opposing forces: **drift increases homogeneity and innovation decreases it**. 350 | 351 | This causes an **equilibrium** after some time steps in the simulation. 352 | 353 | We can incorporate innovation into the calculation of $F$ by modifying equation (1). 354 | 355 | $$F_t = \left(\frac{1}{N_e} + \left(1 - \frac{1}{N_e} \right) F_{t-1}\right)(1 - \mu)^2$$ 356 | 357 | ## Homogeneity Under Drift and Innovation 358 | 359 | ```{r echo=FALSE} 360 | Ft_innovation <- function(F0, Ne, time, mu) { 361 | if(time == 0) {return(F0)} 362 | (1/Ne + (1 - 1/Ne) * Ft_innovation(F0, Ne, time - 1, mu)) * (1 - mu)^2 363 | } 364 | 365 | inputPanel( 366 | sliderInput("F0_homogeneity_drift_with_innovation", label = "F0 (Ft for t == 0)", 367 | min = 0, max = 1, value = 0.5, step = 0.05), 368 | sliderInput("Ne_homogeneity_drift_with_innovation", label = "Ne", 369 | min = 10, max = 100, value = 20, step = 10), 370 | sliderInput("t_homogeneity_drift_with_innovation", label = "t", 371 | min = 50, max = 200, value = 100, step = 50), 372 | sliderInput("mu_homogeneity_drift_with_innovation", label = "μ", 373 | min = 0, max = 1, value = 0.1, step = 0.1) 374 | ) 375 | 376 | renderPlot({ 377 | 378 | # read input 379 | F0 <- input$F0_homogeneity_drift_with_innovation 380 | Ne <- input$Ne_homogeneity_drift_with_innovation 381 | mu <- input$mu_homogeneity_drift_with_innovation 382 | time <- input$t_homogeneity_drift_with_innovation 383 | 384 | timesteps <- 0:time 385 | 386 | # apply function for every timestep 387 | Ft_innovation_time <- sapply(timesteps, function(x) {Ft_innovation(F0, Ne, x, mu)}) 388 | 389 | data.frame( 390 | t = timesteps, 391 | Ft = Ft_innovation_time 392 | ) %>% 393 | ggplot() + 394 | geom_hline(aes(yintercept = 1), color = "dodgerblue4") + 395 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 396 | geom_line(aes(t, Ft), size = 1) + 397 | theme_bw() + 398 | xlab(latex2exp::TeX("t")) + 399 | ylab(latex2exp::TeX("F_{t}")) 400 | }) 401 | ``` 402 | 403 | ## Homogeneity Under Drift and Innovation 404 | 405 | $$F_t = \left(\frac{1}{N_e} + \left(1 - \frac{1}{N_e} \right) F_{t-1}\right)(1 - \mu)^2$$ 406 | 407 | The equilibrium is reached, when $F_t = F_{t-1}$. Setting $F_t = F_{t-1}$ in equation (2) defines $\hat{F}$. 408 | 409 | $$\hat{F} = \frac{(1 - \mu)^2}{N_e} - (N_e - 1)(1 - \mu)^2$$ 410 | 411 | If we assume $\mu$ is quite small we can further simplify this definition. 412 | 413 | $$\hat{F} \simeq \frac{1}{2 N_e \mu + 1}$$ 414 | 415 | That means **the homogeneity of neutral variants within a population is inversely proportional to twice the effective population size times the innovation rate**: $2*N_e*\mu$. This expression will be called $\theta$. 416 | 417 | - $\theta$: Twice the effective population size times the innovation rate ($2 N_e \mu$) 418 | 419 | ## Homogeneity Under Drift and Innovation 420 | 421 | ```{r echo=FALSE} 422 | theta <- function(Ne, mu) { 423 | (2 * Ne * mu) 424 | } 425 | 426 | Fhat <- function(theta) { 427 | 1 / (theta + 1) 428 | } 429 | 430 | inputPanel( 431 | sliderInput("Ne_homogeneity_equilibrium", label = "Ne", 432 | min = 10, max = 100, value = 20, step = 10), 433 | sliderInput("mu_homogeneity_equilibrium", label = "μ", 434 | min = 0, max = 1, value = 0.1, step = 0.1) 435 | ) 436 | 437 | renderText({ 438 | paste0("1 * theta = ", theta(input$Ne_homogeneity_equilibrium, input$mu_homogeneity_equilibrium)) 439 | }) 440 | 441 | renderPlot({ 442 | Ne <- input$Ne_homogeneity_equilibrium 443 | mu <- input$mu_homogeneity_equilibrium 444 | 445 | tibble::tibble( 446 | theta = 0:100 * theta(Ne, mu), 447 | Fhat = sapply(theta, function(x) {Fhat(x)}) 448 | ) %>% 449 | ggplot() + 450 | geom_hline(aes(yintercept = 1), color = "dodgerblue4") + 451 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 452 | geom_line(aes(theta, Fhat), size = 1) + 453 | theme_bw() + 454 | xlab(latex2exp::TeX("θ")) + 455 | ylab(latex2exp::TeX("\\hat{F}")) 456 | }) 457 | ``` 458 | 459 | ## Homogeneity Under Drift and Innovation 460 | 461 | The **homogeneity $\hat{F}$** of a population can also be described as a **function of the relative frequency of the variants in it**: $p$. 462 | 463 | $p_i$ is the relative frequency of the $i$'th variant in the population, so $p$ is a vector of frequencies with the sum one. The probability of choosing a given variant at random is its relative frequency $p_i$, which is also the probability of picking another copy of this same variant on the second try. The probability of getting this variant twice in a row is therefore $p_i^2$. 464 | 465 | The total probability of getting any of the $i = 1$ to $k$ variants twice in a row is the sum of all these probabilities. 466 | 467 | $$\hat{F} = \sum_{i=1}^{k} p^2_i$$ 468 | 469 | Mechanics of the following simulation: 470 | $p$ is randomly drawn some hundred times from a uniform distribution for every value of $k$. The black line shows the mean result for $\hat{F}$ the ribbon the min and max results. 471 | 472 | 473 | - $p_i$: relative frequency of the $i$'th variant in the population 474 | 475 | ## Homogeneity Under Drift and Innovation 476 | 477 | ```{r echo=FALSE} 478 | Fhat_relative_frequency <- function(amount_of_variants) { 479 | p <- runif(amount_of_variants) 480 | p <- p / sum(p) 481 | sum(p^2) 482 | } 483 | 484 | inputPanel( 485 | sliderInput("k_relative_frequency", label = "k", 486 | min = 50, max = 150, value = 100, step = 10), 487 | sliderInput("number_of_replications_relative_frequency", label = "Number of simulation runs", 488 | min = 100, max = 1000, value = 500, step = 100), 489 | actionButton("run_button_relative_frequency", "Run simulation") 490 | ) 491 | 492 | relative_frequency_data <- eventReactive( 493 | input$run_button_relative_frequency, { 494 | 495 | # read input 496 | number_of_replications <- input$number_of_replications_relative_frequency 497 | k <- input$k_relative_frequency 498 | 499 | # calculate Fhat 500 | tibble::tibble( 501 | k = 0:k, 502 | Fhat_min = sapply(k, function(x) { 503 | min(replicate(number_of_replications, Fhat_relative_frequency(x))) 504 | }), 505 | Fhat_max = sapply(k, function(x) { 506 | max(replicate(number_of_replications, Fhat_relative_frequency(x))) 507 | }), 508 | Fhat_mean = (Fhat_min + Fhat_max) / 2 509 | ) 510 | 511 | }, 512 | ignoreNULL = FALSE 513 | ) 514 | 515 | renderPlot({ 516 | relative_frequency_data() %>% 517 | ggplot() + 518 | geom_hline(aes(yintercept = 1), color = "dodgerblue4") + 519 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 520 | geom_ribbon( 521 | aes(x = k, ymin = Fhat_min, ymax = Fhat_max), 522 | fill = "deepskyblue", alpha = 0.5 523 | ) + 524 | geom_line(aes(x = k, y = Fhat_mean), color = "black", size = 1) + 525 | theme_bw() + 526 | xlab(latex2exp::TeX("k")) + 527 | ylab(latex2exp::TeX("\\hat{F}")) 528 | }) 529 | ``` 530 | 531 | ## Diversity 532 | 533 | Instead of looking at homogeneity we can also estimate the **diversity inside a population**. One measure of diversity is the **"effective number" of variants $n_e$**, which is the reciprocal of $\hat{F}$. 534 | 535 | $n_e$ scales linearly with $\theta$. Larger populations contain more diversity at a given level of innovation. Greater diversity is also expected in populations into which new variants are being introduced at higher rates. 536 | 537 | $$n_e = 2 N_e \mu + 1$$ 538 | 539 | - $n_e$: Effective number of variants 540 | 541 | ## Diversity 542 | 543 | ```{r echo=FALSE} 544 | ne <- function(theta) { 545 | theta + 1 546 | } 547 | 548 | inputPanel( 549 | sliderInput("Ne_diversity_effective_number", label = "Ne", 550 | min = 10, max = 100, value = 20, step = 10), 551 | sliderInput("mu_diversity_effective_number", label = "μ", 552 | min = 0, max = 1, value = 0.01, step = 0.01) 553 | ) 554 | 555 | renderText({ 556 | paste0("1 * theta = ", theta(input$Ne_diversity_effective_number, input$mu_diversity_effective_number)) 557 | }) 558 | 559 | renderPlot({ 560 | 561 | # read input 562 | Ne <- input$Ne_diversity_effective_number 563 | mu <- input$mu_diversity_effective_number 564 | 565 | tibble::tibble( 566 | theta = 0:100 * theta(Ne, mu), 567 | ne = sapply(theta, function(x) {ne(x)}) 568 | ) %>% 569 | ggplot() + 570 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 571 | geom_line(aes(theta, ne), size = 1) + 572 | theme_bw() + 573 | xlab(latex2exp::TeX("θ")) + 574 | ylab(latex2exp::TeX("n_e")) 575 | }) 576 | ``` 577 | 578 | ## Diversity 579 | 580 | **$\theta$ can be estimated empirically by computing the reciprocal of the sum of squares of variant frequencies in a population**. 581 | 582 | $$t_F = \frac{1}{\sum_{i=1}^{k} p^2_i} - 1$$ 583 | 584 | We can call such estimates $t_F$, to distinguish them from the actual population values and to remind ourselves that they are based on the homogeneity $\hat{F}$ in Equation (5). 585 | 586 | $$\hat{F} = \sum_{i=1}^{k} p^2_i$$ 587 | 588 | With this equation we can get useful insights into variations in $\theta$ among groups or demes. 589 | 590 | Mechanics of the following simulation: 591 | $p$ is randomly drawn some hundred times from a uniform distribution for every value of $k$. The black line shows the mean result for $t_F$, the ribbon the min and max results. 592 | 593 | 594 | - $t_F$: Estimated $\theta$ by the sum of squares of variant frequencies 595 | 596 | ## Diversity 597 | 598 | ```{r echo=FALSE} 599 | theta_F <- function(amount_of_variants) { 600 | p <- runif(amount_of_variants) 601 | p <- p / sum(p) 602 | (1 / sum(p^2)) - 1 603 | } 604 | 605 | inputPanel( 606 | sliderInput("k_reciprocal_relative_frequency", label = "k", 607 | min = 50, max = 150, value = 100, step = 10), 608 | sliderInput("number_of_replications_reciprocal_relative_frequency", label = "Number of simulation runs", 609 | min = 100, max = 1000, value = 500, step = 100), 610 | actionButton("run_button_reciprocal_relative_frequency", "Run simulation") 611 | ) 612 | 613 | reciprocal_relative_frequency_data <- eventReactive( 614 | input$run_button_reciprocal_relative_frequency, { 615 | 616 | # read input 617 | number_of_replications <- input$number_of_replications_reciprocal_relative_frequency 618 | k <- input$k_reciprocal_relative_frequency 619 | 620 | # calculate Fhat 621 | tibble::tibble( 622 | k = 0:k, 623 | theta_F_min = sapply(k, function(x) { 624 | min(replicate(number_of_replications, theta_F(x))) 625 | }), 626 | theta_F_max = sapply(k, function(x) { 627 | max(replicate(number_of_replications, theta_F(x))) 628 | }), 629 | theta_F_mean = (theta_F_min + theta_F_max) / 2 630 | ) 631 | 632 | }, 633 | ignoreNULL = FALSE 634 | ) 635 | 636 | renderPlot({ 637 | reciprocal_relative_frequency_data() %>% 638 | ggplot() + 639 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 640 | geom_ribbon( 641 | aes(x = k, ymin = theta_F_min, ymax = theta_F_max), 642 | fill = "deepskyblue", alpha = 0.5 643 | ) + 644 | geom_line(aes(x = k, y = theta_F_mean), size = 1) + 645 | theme_bw() + 646 | xlab(latex2exp::TeX("k")) + 647 | ylab(latex2exp::TeX("t_F")) 648 | }) 649 | ``` 650 | 651 | # Archaeological Application 652 | 653 | ## Diversity and Sample Size 654 | 655 | The **frequency of variants** in a population of social learners is not equal to the **frequency preserved and documented** in the **archaeological assemblage**. Under appropriate taphonomic circumstances, however, they are surprisingly similar. 656 | 657 | Another problem arises because we try to estimate diversity only on the basis of a **sample derived from a population**. The equations (5) and (7) are similar to the Shannon-Weaver information statistic, which is very sensitive to sample-size variation. 658 | 659 | $$\hat{F} = \sum_{i=1}^{k} p^2_i$$ 660 | 661 | $$t_F = \frac{1}{\sum_{i=1}^{k} p^2_i} - 1$$ 662 | 663 | $$H = -\sum_{i = 1}^{k} p_i \log(p_i)$$ 664 | 665 | - $H$: [Shannon's diversity index](http://www.tiem.utk.edu/~gross/bioed/bealsmodules/shannonDI.html) 666 | 667 | ## Diversity and Sample Size 668 | 669 | $\theta$ is related to the sum of squared variant frequencies in a population, so it should be possible to **predict for a given value of $\theta$ how many variants occur with certain frequencies**. 670 | 671 | $$t_F = \frac{1}{\sum_{i=1}^{k} p^2_i} - 1$$ 672 | 673 | When $\theta$ is low ($\theta \ll 1$), we should expect that most of the time, a population will be dominated by a few variants, with other variants at low frequencies. When $\theta$ is high ($\theta \gg 1$), we are more likely to see a large number of variants at low to moderate frequencies. 674 | 675 | If the variants are selectively neutral, the expected number of different variants $k$ found in a sample drawn from a population is a function of it's sample size $n$ and $\theta$ value. The number of variants will be larger when either $\theta$ or $n$ are large. 676 | 677 | $$E(k) = \sum_{i = 0}^{n - 1} \frac{\theta}{\theta + i}$$ 678 | 679 | - $E(k)$: Expected number of different variants 680 | - $n$: Sample size 681 | 682 | ## Diversity and Sample Size 683 | 684 | ```{r echo=FALSE} 685 | Ek <- function(n, theta) { 686 | ressum <- 0 687 | for (i in 0:(n-1)) { 688 | ressum = ressum + (theta / (theta + i)) 689 | } 690 | return(ressum) 691 | } 692 | 693 | inputPanel( 694 | sliderInput("Ne_expected_variants", label = "Ne", 695 | min = 10, max = 100, value = 20, step = 10), 696 | sliderInput("mu_expected_variants", label = "μ", 697 | min = 0, max = 1, value = 0.01, step = 0.01), 698 | sliderInput("n_expected_variants", label = "n", 699 | min = 0, max = 100, value = 10, step = 1) 700 | ) 701 | 702 | renderText({ 703 | paste0("1 * theta = ", theta(input$Ne_expected_variants, input$mu_expected_variants)) 704 | }) 705 | 706 | renderPlot({ 707 | 708 | # read input 709 | Ne <- input$Ne_expected_variants 710 | mu <- input$mu_expected_variants 711 | n <- input$n_expected_variants 712 | 713 | tibble::tibble( 714 | theta = 0:100 * theta(Ne, mu), 715 | n = rep(n, 101), 716 | Ek = purrr::map2_dbl(n, theta, function(a, b) {Ek(a, b)}) 717 | ) %>% 718 | ggplot() + 719 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 720 | geom_line(aes(theta, Ek), size = 1) + 721 | theme_bw() + 722 | xlab(latex2exp::TeX("θ")) + 723 | ylab(latex2exp::TeX("E(k)")) 724 | }) 725 | ``` 726 | 727 | ## Diversity and Sample Size 728 | 729 | This could be used to produce a **maximum likelihood estimate of $\theta$**, that is the value of $\theta$ that maximizes the chances of drawing the observed number of variants $k$ in a sample of the size $n$. 730 | 731 | Unfortunately it doesn't have an analytic solution, but we can still use it to estimate $\theta$ iteratively by changing its value until $E(k)$ equals the observed number of variants. 732 | 733 | The following applet allows to solve (9) for theta approximately. It starts at $\theta = 10$, changes it in steps of $0.1$ and stops when $|E(k) - k| < 0.5$. 734 | 735 | We call the result of doing so $t_E$. Both $t_E$ and $t_F$ offer us special statistical summaries of assemblage diversity, but there are theoretical reasons to prefer $t_E$ if $n$ and $k$ (and not just $p$) are known. 736 | 737 | ## Diversity and Sample Size 738 | 739 | ```{r echo=FALSE} 740 | inputPanel( 741 | sliderInput("n_iterative_theta", label = "n", 742 | min = 10, max = 100, value = 20, step = 10), 743 | sliderInput("k_iterative_theta", label = "k", 744 | min = 5, max = 15, value = 10, step = 1), 745 | actionButton("run_iterative_theta", "Approximate theta") 746 | ) 747 | 748 | iterative_theta_data <- eventReactive( 749 | input$run_iterative_theta, { 750 | 751 | # read input 752 | n <- input$n_iterative_theta 753 | k <- input$k_iterative_theta 754 | start_theta <- 10 755 | 756 | # iterative solving loop 757 | theta_iter <- start_theta 758 | while (TRUE) { 759 | diffi <- Ek(n, theta_iter) - k 760 | if (diffi < 0) { 761 | theta_iter <- theta_iter + 0.1 762 | } else if (diffi > 0) { 763 | theta_iter <- theta_iter - 0.1 764 | } 765 | if (abs(diffi) < 0.5) { 766 | break; 767 | } 768 | } 769 | 770 | theta_iter 771 | 772 | }, 773 | ignoreNULL = FALSE 774 | ) 775 | 776 | renderText({ 777 | paste0("theta ≈ ", round(iterative_theta_data(), 2)) 778 | }) 779 | ``` 780 | 781 | ## Archaeological Sample Size 782 | 783 | $t_E$ may yield wrong estimates of $\theta$ for very large archaeological samples that accumulated over long periods of time. 784 | 785 | Equation (9) implies that for a given number of variants $k$, $t_E$ will decline as the sample size $n$ increases. Usually $n$ is limited by the population size $N$, but in case of a long-term sample this limit can be exceeded: $n$ can grow beyond $N$ and causes the estimate of $\theta$ to be too low. 786 | 787 | In this case $t_F$ can again be the better function to determine $\theta$. Another solution is to look at the difference between both estimators ($t_F - t_E$) as a function of assemblage size. For the application in archaeological contexts it's always important to carefully examine the relationships among $t_F$ and $t_E$ and possible sample size effects. 788 | 789 | ## Inferences About Group Size and Innovation Rate 790 | 791 | $\theta$ is defined as twice the effective population size times the innovation rate ($2 N_e \mu$). 792 | 793 | The innovation rate $\mu$ includes the combined effects of both in situ innovation $v$ and the introduction of novel variants from other groups $m$. If $\mu = v + m$, then $N_e \mu = N_e v + N_e m$. 794 | 795 | $v$ should be roughly constant across demes, but $m$ is likely to be more variable in time and space. Under these circumstances, most of the variation in $\theta$ associated with $\mu$ will be caused by variation in intergroup transmission rates. 796 | 797 | $\theta$ mostly depends on the number of times local group members learn from members of other groups: $N_e m$. That means that variation in $\theta$ is an indicator for the absolute amount of cultural transmission among demes in a geographical region. 798 | 799 | # Drift, Intergroup Transmission, and Interassemblage Distance 800 | 801 | ## Dynamics of Stylistic Distance between Two Groups 802 | 803 | To compare groups and track the cultural transmission we need a simple **measure of between-group similarity**. One possibility is to look at the **squared Euclidean distance $d_{ij}^2$**. It's the sum of squared differences in variant frequencies $p$ between two groups $i$ and $j$. 804 | 805 | $$d_{ij}^2 = \sum_{k = 1}^{n} (p_{ik} - p_{jk})^2$$ 806 | 807 | - $d_{ij}^2$: Squared Euclidean distance between two groups 808 | - $n$: total amount of variants (here not sample size) 809 | 810 | ## Dynamics of Stylistic Distance between Two Groups 811 | 812 | ```{r echo=FALSE} 813 | inputPanel( 814 | sliderInput("n_sed_general", label = "n", 815 | min = 10, max = 100, value = 50, step = 10), 816 | actionButton("run_sed_general", "Run simulation") 817 | ) 818 | 819 | sed_general_data <- eventReactive( 820 | input$run_sed_general, { 821 | 822 | # read input 823 | n <- input$n_sed_general 824 | 825 | # calculate 826 | tibble::tibble( 827 | amount_of_variants = 0:n, 828 | sed_min = sapply(amount_of_variants, function(x) { 829 | min(replicate(500, sed(runif(x), runif(x)))) 830 | }), 831 | sed_max = sapply(amount_of_variants, function(x) { 832 | max(replicate(500, sed(runif(x), runif(x)))) 833 | }), 834 | sed_mean = (sed_min + sed_max) / 2 835 | ) 836 | 837 | }, 838 | ignoreNULL = FALSE 839 | ) 840 | 841 | renderPlot({ 842 | sed_general_data() %>% 843 | ggplot() + 844 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 845 | geom_ribbon( 846 | aes(x = amount_of_variants, ymin = sed_min, ymax = sed_max), 847 | fill = "deepskyblue", alpha = 0.5 848 | ) + 849 | geom_line(aes(x = amount_of_variants, y = sed_mean), color = "black", size = 1) + 850 | theme_bw() + 851 | xlab(latex2exp::TeX("k")) + 852 | ylab(latex2exp::TeX("d_{ij}^2")) 853 | }) 854 | ``` 855 | 856 | ## Dynamics of Stylistic Distance between Two Groups 857 | 858 | The study of intergroup cultural transmission requires a model setup that contains the effect of drift but also exchange between groups. One promising approach comes from **migration matrix models**, which in population genetics have proven useful. 859 | 860 | Such models reveal what happens to **variant frequencies** in a **finite number of demes** that are subject to the joint effects of **drift**, whose strength is controlled by the effective size of each population, and **intergroup transmission**, occurring between demes at constant pairwise rates. 861 | 862 | There is **no** role in these models for **in situ innovation**, but they offer a means of checking the assumption that variation in $\theta$ is largely a function of intergroup transmission. 863 | 864 | Mechanics of the following simulation: 865 | This simulation is constructed like the very first one about the effects of drift. Only in this case, there is a set probability, the intergroup transmission rate $m_i$, that the individual contacted is derived from the other group. If $m_i = 0$, both groups act independently, but if it increases they tend to develop roughly alike. 866 | 867 | 868 | 869 | The line chart below the variant frequency area plot shows the distance measure $d_{ij}^2$ calculated for the two groups at the respective point in model time. 870 | 871 | 872 | $m_i$: intergroup transmission rate 873 | 874 | ## Dynamics of Stylistic Distance between Two Groups 875 | 876 | ```{r echo=FALSE} 877 | inputPanel( 878 | sliderInput("k_two_groups", label = "k", 879 | min = 5, max = 15, value = 10, step = 1), 880 | sliderInput("N_two_groups", label = "N", 881 | min = 10, max = 100, value = 20, step = 10), 882 | sliderInput("t_two_groups", label = "t", 883 | min = 50, max = 200, value = 100, step = 50), 884 | sliderInput("mi_two_groups", label = "mi", 885 | min = 0, max = 1, value = 0.1, step = 0.01), 886 | actionButton("run_button_two_groups", "Run simulation") 887 | ) 888 | 889 | group_drift_simulation_data <- eventReactive( 890 | input$run_button_group_drift_simulation, { 891 | 892 | # run simulation (code in source file) 893 | group_drift_simulation( 894 | input$k_two_groups, 895 | input$N_two_groups, 896 | input$t_two_groups, 897 | input$mi_two_groups 898 | ) 899 | 900 | }, 901 | ignoreNULL = FALSE 902 | ) 903 | 904 | renderPlot({ 905 | A <- group_drift_simulation_data() %>% 906 | ggplot() + 907 | geom_area( 908 | aes(x = time, y = individuals_with_variant, fill = variant, group = variant) 909 | ) + 910 | geom_line( 911 | aes(x = time, y = individuals_with_variant, group = variant), 912 | position = "stack" 913 | ) + 914 | theme_bw() + 915 | xlab("t") + 916 | ylab("variants and their occurence in the population [%]") + 917 | facet_grid(group ~ .) 918 | 919 | B <- calculate_sed_for_group_drift_simulation_result( 920 | group_drift_simulation_data() 921 | ) %>% 922 | ggplot() + 923 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 924 | geom_hline(aes(yintercept = 2), color = "dodgerblue4") + 925 | geom_line(aes(x = t, y = sed), color = "black", size = 1, alpha = 0.3) + 926 | geom_point(aes(x = t, y = sed), color = "black", size = 1) + 927 | theme_bw() + 928 | xlab(latex2exp::TeX("t")) + 929 | ylab(latex2exp::TeX("d_{ij}^2")) 930 | 931 | cowplot::plot_grid(A, B, nrow = 2, align = "v", axis = "lr") 932 | }) 933 | ``` 934 | 935 | ## Dynamics of Stylistic Distance between Two Groups 936 | 937 | We can run the above simulation many times to see different results random drift can produce. 938 | 939 | At the **beginning** the **distance between the groups is always zero**, because the variants and variant frequencies are equal in both. However, as time passes, the **distance increases up to a quasi-stationary equilibrium**. 940 | 941 | The equilibrium is only quasi-stationary because the groups are finite and there is no innovation: if the simulations are continued for a sufficient number of time periods, a single variant is fixed and as a result the between-group distance reverts to zero. 942 | 943 | The equilibrium holds over the period during which the opposing forces of drift, locally reducing variation within each group, and intergroup transmission, introducing potentially novel variation into each group from the other, are balanced. Later drift globally depletes all variation in both groups. 944 | 945 | Higher levels of intergroup transmission $m_i$ will lower the equilibrium level, while lower levels of intergroup transmission raise it. Lower effective sizes $N$ for either or both groups will raise the equilibrium since the effects of drift scale with $N$. 946 | 947 | The speed with which the equilibrium is achieved depends heavily on the modes of transmission. Horizontal transmission can cause rather fast changes, while vertical and oblique transmission may take centuries. The equilibrium is also approached in case of initially divergent groups with different variants and variant frequencies if there is intergroup transmission among them ($m_i > 0$). 948 | 949 | ## Dynamics of Stylistic Distance between Two Groups 950 | 951 | ```{r echo=FALSE} 952 | inputPanel( 953 | sliderInput("k_two_groups_many_runs", label = "k", 954 | min = 5, max = 15, value = 10, step = 1), 955 | sliderInput("N_two_groups_many_runs", label = "N", 956 | min = 10, max = 100, value = 20, step = 10), 957 | sliderInput("t_two_groups_many_runs", label = "t", 958 | min = 20, max = 200, value = 20, step = 20), 959 | sliderInput("mi_two_groups_many_runs", label = "mi", 960 | min = 0, max = 1, value = 0.1, step = 0.01), 961 | sliderInput("sim_runs_two_groups_many_runs", label = "Number of simulation runs", 962 | min = 1, max = 100, value = 20, step = 1), 963 | actionButton("run_button_two_groups_many_runs", "Run simulation") 964 | ) 965 | 966 | two_groups_many_runs_data <- eventReactive( 967 | input$run_button_two_groups_many_runs, { 968 | 969 | # run the simulation many times and store results in res_list 970 | res_list <- list() 971 | for (i in 1:input$sim_runs_two_groups_many_runs) { 972 | res_list[[i]] <- calculate_sed_for_group_drift_simulation_result( 973 | group_drift_simulation( 974 | input$k_two_groups_many_runs, 975 | input$N_two_groups_many_runs, 976 | input$t_two_groups_many_runs, 977 | input$mi_two_groups_many_runs 978 | ), 979 | sim_run = i 980 | ) 981 | } 982 | 983 | # rbind individual simulation results into one data.frame 984 | do.call(rbind, res_list) 985 | 986 | }, 987 | ignoreNULL = FALSE 988 | ) 989 | 990 | renderPlot({ 991 | two_groups_many_runs_data() %>% 992 | ggplot() + 993 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 994 | geom_hline(aes(yintercept = 2), color = "dodgerblue4") + 995 | geom_line(aes(x = t, y = sed, group = sim_run), color = "black", size = 1, alpha = 0.3) + 996 | geom_point(aes(x = t, y = sed), color = "black", size = 1) + 997 | theme_bw() + 998 | xlab(latex2exp::TeX("t")) + 999 | ylab(latex2exp::TeX("d_{ij}^2")) 1000 | }) 1001 | ``` 1002 | 1003 | ## Stylistic Distances for Multiple Groups 1004 | 1005 | The real world is better represented in terms of more than two groups learning from another. The multi-group case can be described in a matrix based model. The matrix $\mathbf{M}$ contains the intergroup transmission rates $m_{ij}$ that give the proportion of the $i$'th deme that learned from the $j$'th deme. The diagonal elements of $\mathbf{M}$ are the proportion of each deme that learned from its own members. The matrix $\mathbf{U}$ contains the reciprocals of the effective population sizes $1 / N_e$ of each group on its diagonal and $0$'s elsewhere. 1006 | 1007 | In this setup all groups initially have the same variant frequencies. This means that the expected frequencies for each group in any time period are identical and equal to the starting frequency. Drift-driven departures from this expectation for a given deme can be characterized analytically in terms of variance. Since there are multiple demes in the system, their joint evolution must be handled in terms of a matrix of variances and covariances: $\mathbf{V}$. Diagonal elements in $\mathbf{V}$ contain variances: the square of the departure of the variant frequency in the respective deme at time $t$ from the situation at the starting time. The other elements are covariances: the product of the departures of the variant frequencies in each pair of demes. 1008 | 1009 | $$\mathbf{V}^{(t)} = \sum_{r = 0}^{t - 1} \mathbf{M}^r \mathbf{U}(\mathbf{M}^r)^{\prime}$$ 1010 | 1011 | - $\mathbf{M}$: Matrix of intergroup transmission rates 1012 | - $\mathbf{V}^{(t)}$: Matrix of variances and covariances 1013 | - $r$: Index of the successive time periods 1014 | - $\mathbf{U}$ Matrix of reciprocals of the effective population sizes ($1 / N_e$). These are on the diagonal, the rest is $0$ 1015 | - $(\mathbf{M}^r)^{\prime}$: M transposed 1016 | 1017 | Equation (11) says that in the first time period, the covariance between two groups is a function of the sum of products of the intergroup transmission rates from all groups to those two groups and the reciprocal of their two effective population sizes. In later time periods, as individuals who learned from non-group members are in turn distributed among the other groups, the intergroup transmission rates from each group to *all* the others assumes increasing importance. The resulting cumulative effect of the indirect movement of variants among groups is handled by successively powering the intergroup transmission matrix. 1018 | 1019 | The magnitude of the covariance between two groups will scale inversely with their sizes $N$ and positively with the intergroup transmission rates $m$. High levels of intergroup transmission will mean that whatever departures from the initial frequency occur, they will be similar and in the same direction, hence the covariance between the groups will be high. On the other hand, if either or both group sizes are low, causing drift to play a stronger role, variant frequencies are less likely to depart from initial frequencies in a similar fashion. The covariances will be low. 1020 | 1021 | The matrix of variances and covariances $\mathbf{V}$ can be converted to a matrix of squared Euclidean distances $d_{ij}^{2}$ (or $\mathbf{SED}$). 1022 | 1023 | $$d_{ij}^{2(t)} = v_{ii}^{(t)} + v_{jj}^{(t)} - 2 v_{ij}^{(t)}$$ 1024 | 1025 | - $v_{ii}^{(t)}$: Diagonal element of $\mathbf{V}^{(t)}$ -- a variance. The square of of the departure of the variant frequency in the $i$'th deme at time $t$ from its starting frequency when the demes were identical, standardized by the variance of that starting frequency 1026 | - $v_{ij}^{(t)}$: Covariance. The product of the departures of the variant frequencies in each pair of demes $i$ and $j$, again standardized by the variance of the starting frequency 1027 | 1028 | ## Stylistic Distances for Multiple Groups 1029 | 1030 | ```{r echo = FALSE} 1031 | inputPanel( 1032 | sliderInput("Ne_matrix", label = "Ne", 1033 | min = 10, max = 100, value = 20, step = 10), 1034 | sliderInput("t_matrix", label = "t", 1035 | min = 10, max = 100, value = 20, step = 10), 1036 | actionButton("run_button_matrix_calculation", "Run matrix calculation") 1037 | ) 1038 | 1039 | matrix_calculation_data <- eventReactive( 1040 | input$run_button_matrix_calculation, { 1041 | Mk <- function(k, mi_3) { 1042 | M <- matrix(data = rep(0, 5^2), nrow = 5, ncol = 5) 1043 | #M[] <- 0.2 1044 | M[1,] <- c(0.4, 0.0, 0.2, 0.0, 0.4) 1045 | M[2,] <- c(0.0, 0.4, 0.2, 0.4, 0.0) 1046 | M[3,] <- c(0.2, 0.2, 0.2, 0.2, 0.2) 1047 | M[4,] <- c(0.2, 0.1, 0.3, 0.2, 0.2) 1048 | M[5,] <- c(0.2, 0.3, 0.1, 0.2, 0.2) 1049 | M 1050 | } 1051 | 1052 | multi_group_matrizes(input$Ne_matrix, input$t_matrix, Mk) 1053 | }, 1054 | ignoreNULL = FALSE 1055 | ) 1056 | 1057 | renderPlot({ 1058 | plotlist <- lapply(matrix_calculation_data(), function(x) { 1059 | ggplot(x, aes(x = Var2, y = Var1)) + 1060 | geom_raster(aes(fill=value)) + 1061 | geom_text(aes(x = Var2, y = Var1, label = round(value, 3)), color = "white", size = 5) + 1062 | scale_y_reverse(sec.axis = dup_axis()) + 1063 | scale_x_continuous(sec.axis = dup_axis()) + 1064 | xlab("group") + 1065 | ylab("group") 1066 | }) 1067 | 1068 | cowplot::plot_grid( 1069 | plotlist = plotlist, 1070 | labels = c("M", "M'", "U", "V", "SED", "MSD"), 1071 | nrow = 3, ncol = 2 1072 | ) 1073 | }, 1074 | height = 1200 1075 | ) 1076 | ``` 1077 | 1078 | # Woodland Interassemblage Distance 1079 | 1080 | ## Trends in Interassemblage Distance 1081 | 1082 | The migration matrix approach can be applied to cultural transmission contexts by computing a matrix of squared Euclidean distances $\mathbf{SED}$ between the groups for each of the variants. We expect to find that the overall level of differentiation among groups, as measured by the mean of the squared Euclidean distances $\mathbf{MSD}$, is the mirror image of our estimates of $\theta$. 1083 | 1084 | ## Diversity and Distance for Individual Assemblages 1085 | 1086 | The mean of the squared Euclidean distance $\bar{d}_{i}^{2}$ (or $MSD$) can be calculated with (13). 1087 | 1088 | $$\bar{d}_{i}^{2} = \sum_{j = 1}^{n} d_{ij}^{2} / (n - 1), i \neq j$$ 1089 | 1090 | ## Diversity and Distance for Individual Assemblages 1091 | 1092 | ```{r echo = FALSE} 1093 | inputPanel( 1094 | sliderInput("k_two_groups_many_runs_with_msd", label = "k", 1095 | min = 15, max = 25, value = 20, step = 1), 1096 | sliderInput("N_two_groups_many_runs_with_msd", label = "N", 1097 | min = 10, max = 100, value = 20, step = 10), 1098 | sliderInput("t_two_groups_many_runs_with_msd", label = "t", 1099 | min = 20, max = 200, value = 20, step = 20), 1100 | sliderInput("mi_two_groups_many_runs_with_msd", label = "mi", 1101 | min = 0, max = 1, value = 0.1, step = 0.01), 1102 | sliderInput("sim_runs_two_groups_many_runs_with_msd", label = "Number of simulation runs", 1103 | min = 1, max = 100, value = 20, step = 1), 1104 | actionButton("run_button_two_groups_many_runs_with_msd", "Run simulation") 1105 | ) 1106 | 1107 | two_groups_many_runs_with_msd_data <- eventReactive( 1108 | input$run_button_two_groups_many_runs_with_msd, { 1109 | 1110 | # read input 1111 | k <- input$k_two_groups_many_runs_with_msd 1112 | N <- input$N_two_groups_many_runs_with_msd 1113 | t_final <- input$t_two_groups_many_runs_with_msd 1114 | mi <- input$mi_two_groups_many_runs_with_msd 1115 | sim_runs <- input$sim_runs_two_groups_many_runs_with_msd 1116 | 1117 | mixed_list <- list() 1118 | 1119 | # run group drift simulation 1120 | res_list <- list() 1121 | for (i in 1:sim_runs) { 1122 | res_list[[i]] <- calculate_sed_for_group_drift_simulation_result( 1123 | group_drift_simulation( 1124 | k, 1125 | N, 1126 | t_final, 1127 | mi 1128 | ), 1129 | sim_run = i 1130 | ) 1131 | } 1132 | 1133 | mixed_list[[1]] <- do.call(rbind, res_list) 1134 | 1135 | # calculate matrix and mean squared distance 1136 | Mk_msd <- function(k, mi) { 1137 | M <- matrix(data = rep(0, 2^2), nrow = 2, ncol = 2) 1138 | M[1,] <- c(1 - mi, mi) 1139 | M[2,] <- c(mi, 1 - mi) 1140 | M 1141 | } 1142 | 1143 | msd <- c() 1144 | for (t_m in 0:t_final) { 1145 | msd[t_m + 1] <- multi_group_matrizes( 1146 | N, t_m, Mk_msd, mi 1147 | )$longmosed$value[2] 1148 | } 1149 | 1150 | mixed_list[[2]] <- data.frame(t = 0:t_final, sed = msd) 1151 | 1152 | mixed_list 1153 | }, 1154 | ignoreNULL = FALSE 1155 | ) 1156 | 1157 | renderPlot({ 1158 | mixed_list <- two_groups_many_runs_with_msd_data() 1159 | 1160 | ggplot() + 1161 | geom_hline(aes(yintercept = 0), color = "dodgerblue4") + 1162 | geom_hline(aes(yintercept = 2), color = "dodgerblue4") + 1163 | geom_line( 1164 | data = mixed_list[[1]], 1165 | aes(x = t, y = sed, group = sim_run), 1166 | color = "black", size = 1, alpha = 0.3 1167 | ) + 1168 | geom_point( 1169 | data = mixed_list[[1]], 1170 | aes(x = t, y = sed), 1171 | color = "black", size = 1 1172 | ) + 1173 | geom_line( 1174 | data = mixed_list[[2]], 1175 | aes(x = t, y = sed), 1176 | color = "dodgerblue4", size = 2 1177 | ) + 1178 | theme_bw() + 1179 | xlab(latex2exp::TeX("t")) + 1180 | ylab(latex2exp::TeX("d_{ij}^2")) 1181 | }) 1182 | ``` 1183 | 1184 | ## References 1185 | --------------------------------------------------------------------------------