├── data
└── .placeholder
├── plots
└── .placeholder
├── schemata
├── interaction_schema
│ └── interaction_schema.pdf
├── overview_and_package_by_michelle_oreilly
│ ├── schema_package.pdf
│ ├── AI
│ │ ├── MPI-CS-Package.ai
│ │ └── MPI-CS-Overview_Schema.ai
│ ├── schema_overview.pdf
│ ├── EPS
│ │ ├── MPI-CS-Package.eps
│ │ └── MPI-CS-Overview_Schema.eps
│ ├── PDF
│ │ ├── MPI-CS-Package.pdf
│ │ └── MPI-CS-Overview_Schema.pdf
│ ├── schema_package.svg
│ └── SVG
│ │ └── MPI-CS-Package.svg
├── README.md
└── overview_and_package_by_clemens_schmid
│ ├── paper_overview_schema.svg
│ ├── isba_poster_overview_schema.svg
│ └── popgen_toolbox_schema.svg
├── poseidon.analysis.2024.Rproj
├── osf
├── prepare_upload.sh
├── repos.txt
└── to_upload
│ └── README.md
├── data_tracked
├── author_submitted_packages.txt
└── bibkey_mapping.csv
├── .gitignore
├── code
├── presentation
│ ├── samples_per_year_barplot.R
│ └── spatiotemporal_distribution_paa.R
├── publication_coverage_comparison.R
├── archive_data_preparation.R
├── spatiotemporal_distribution.R
└── archive_comparison_barplots.R
└── README.md
/data/.placeholder:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/plots/.placeholder:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/schemata/interaction_schema/interaction_schema.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nevrome/poseidon.analysis.2024/main/schemata/interaction_schema/interaction_schema.pdf
--------------------------------------------------------------------------------
/schemata/overview_and_package_by_michelle_oreilly/schema_package.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nevrome/poseidon.analysis.2024/main/schemata/overview_and_package_by_michelle_oreilly/schema_package.pdf
--------------------------------------------------------------------------------
/schemata/overview_and_package_by_michelle_oreilly/AI/MPI-CS-Package.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nevrome/poseidon.analysis.2024/main/schemata/overview_and_package_by_michelle_oreilly/AI/MPI-CS-Package.ai
--------------------------------------------------------------------------------
/schemata/overview_and_package_by_michelle_oreilly/schema_overview.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nevrome/poseidon.analysis.2024/main/schemata/overview_and_package_by_michelle_oreilly/schema_overview.pdf
--------------------------------------------------------------------------------
/schemata/overview_and_package_by_michelle_oreilly/EPS/MPI-CS-Package.eps:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nevrome/poseidon.analysis.2024/main/schemata/overview_and_package_by_michelle_oreilly/EPS/MPI-CS-Package.eps
--------------------------------------------------------------------------------
/schemata/overview_and_package_by_michelle_oreilly/PDF/MPI-CS-Package.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nevrome/poseidon.analysis.2024/main/schemata/overview_and_package_by_michelle_oreilly/PDF/MPI-CS-Package.pdf
--------------------------------------------------------------------------------
/schemata/overview_and_package_by_michelle_oreilly/AI/MPI-CS-Overview_Schema.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nevrome/poseidon.analysis.2024/main/schemata/overview_and_package_by_michelle_oreilly/AI/MPI-CS-Overview_Schema.ai
--------------------------------------------------------------------------------
/schemata/overview_and_package_by_michelle_oreilly/EPS/MPI-CS-Overview_Schema.eps:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nevrome/poseidon.analysis.2024/main/schemata/overview_and_package_by_michelle_oreilly/EPS/MPI-CS-Overview_Schema.eps
--------------------------------------------------------------------------------
/schemata/overview_and_package_by_michelle_oreilly/PDF/MPI-CS-Overview_Schema.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nevrome/poseidon.analysis.2024/main/schemata/overview_and_package_by_michelle_oreilly/PDF/MPI-CS-Overview_Schema.pdf
--------------------------------------------------------------------------------
/poseidon.analysis.2024.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
--------------------------------------------------------------------------------
/osf/prepare_upload.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Get the current date in the format of YYYY-MM-DD
4 | current_date=$(date +%Y-%m-%d)
5 |
6 | # Read the file line by line
7 | while IFS=, read -r name url
8 | do
9 | # Append the current_date to the name
10 | new_filename="to_upload/${name}-${current_date}.zip"
11 |
12 | # Download and rename the zip file
13 | wget -O "$new_filename" "$url"
14 | done < repos.txt
15 |
--------------------------------------------------------------------------------
/data_tracked/author_submitted_packages.txt:
--------------------------------------------------------------------------------
1 | 2018_Lamnidis_Fennoscandia
2 | 2020_Rivollat_FranceGermany
3 | 2020_Skourtanioti_NearEast
4 | 2020_Yu_NorthRussia
5 | 2021_CarlhoffNature
6 | 2021_Freilich_Croatia
7 | 2021_GnecchiRuscone_KazakhSteppe
8 | 2022_Altinisik_UpperMesopotamia
9 | 2022_GnecchiRuscone_CarpathianBasin
10 | 2022_Gretzinger_AngloSaxons
11 | 2022_Yu_VanDeLoosdrecht_Sicily
12 | 2023_Carlhoff_Thailand
13 | 2023_Koptekin_SouthwestAsia
14 | 2023_Peltola_VolgaOka
15 | 2023_Penske_EarlyContact
16 | 2024_GnecchiRuscone_CarpathianBasinAvarPedigrees
17 | 2024_Gretzinger_EarlyCelts
18 | 2024_Wang_ViennaBasinAvarPeriod
--------------------------------------------------------------------------------
/schemata/README.md:
--------------------------------------------------------------------------------
1 | convert .svg to .pdf:
2 |
3 | ```bash
4 | inkscape -d 300 --export-area-drawing interaction_schema/interaction_schema.svg -o interaction_schema/interaction_schema.pdf
5 | inkscape -d 300 --export-area-drawing overview_and_package_by_michelle_oreilly/schema_overview.svg -o overview_and_package_by_michelle_oreilly/schema_overview.pdf
6 | inkscape -d 300 --export-area-drawing overview_and_package_by_michelle_oreilly/schema_package.svg -o overview_and_package_by_michelle_oreilly/schema_package.pdf
7 | ```
8 |
9 | convert .svg to .png:
10 |
11 | ```bash
12 | convert poseidon_overview.svg -background none -size 2048x2048 poseidon_overview.png
13 | ```
14 |
15 |
--------------------------------------------------------------------------------
/osf/repos.txt:
--------------------------------------------------------------------------------
1 | poseidon-schema,https://github.com/poseidon-framework/poseidon-schema/archive/refs/heads/master.zip
2 | poseidon-hs,https://github.com/poseidon-framework/poseidon-hs/archive/refs/heads/master.zip
3 | poseidon-analysis-hs,https://github.com/poseidon-framework/poseidon-analysis-hs/archive/refs/heads/main.zip
4 | qjanno,https://github.com/poseidon-framework/qjanno/archive/refs/heads/main.zip
5 | janno,https://github.com/poseidon-framework/janno/archive/refs/heads/master.zip
6 | poseidon-framework.github.io,https://github.com/poseidon-framework/poseidon-framework.github.io/archive/refs/heads/master.zip
7 | poseidon-blog,https://github.com/poseidon-framework/poseidon-blog/archive/refs/heads/main.zip
8 | aadr2poseidon,https://github.com/poseidon-framework/aadr2poseidon/archive/refs/heads/main.zip
9 | poseidon.analysis.2024,https://github.com/nevrome/poseidon.analysis.2024/archive/refs/heads/main.zip
10 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # OSF upload
2 | osf/to_upload/
3 |
4 | # Output
5 | *.png
6 | plots/
7 | data/
8 |
9 | # History files
10 | .Rhistory
11 | .Rapp.history
12 |
13 | # Session Data files
14 | .RData
15 | .RDataTmp
16 |
17 | # User-specific files
18 | .Ruserdata
19 |
20 | # Example code in package build process
21 | *-Ex.R
22 |
23 | # Output files from R CMD build
24 | /*.tar.gz
25 |
26 | # Output files from R CMD check
27 | /*.Rcheck/
28 |
29 | # RStudio files
30 | .Rproj.user/
31 |
32 | # produced vignettes
33 | vignettes/*.html
34 | vignettes/*.pdf
35 |
36 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
37 | .httr-oauth
38 |
39 | # knitr and R markdown default cache directories
40 | *_cache/
41 | /cache/
42 |
43 | # Temporary files created by R markdown
44 | *.utf8.md
45 | *.knit.md
46 |
47 | # R Environment Variables
48 | .Renviron
49 |
50 | # pkgdown site
51 | docs/
52 |
53 | # translation temp files
54 | po/*~
55 |
56 | # RStudio Connect folder
57 | rsconnect/
58 |
--------------------------------------------------------------------------------
/code/presentation/samples_per_year_barplot.R:
--------------------------------------------------------------------------------
1 | library(magrittr)
2 | library(ggplot2)
3 |
4 | #### load janno data ####
5 |
6 | load("data/janno_data.RData")
7 | load("data/bib_data.RData")
8 |
9 | #### count individual-proxy per year ####
10 |
11 | individuals_with_year <- paa %>%
12 | dplyr::filter(Date_Type != "modern") %>%
13 | # dplyr::select(Approx_Individual_ID, Publication) %>%
14 | # dplyr::mutate(Publication = purrr::map_chr(Publication, \(x) x[[1]])) %>%
15 | # dplyr::left_join(paa_bib, by = c("Publication" = "bibtexkey")) %>%
16 | # dplyr::select(Approx_Individual_ID, year) %>%
17 | # dplyr::arrange(dplyr::desc(year)) %>%
18 | # dplyr::distinct(Approx_Individual_ID, .keep_all = TRUE)
19 | dplyr::transmute(Approx_Individual_ID, year = as.integer(AADR_Year_First_Publication)) %>%
20 | dplyr::arrange(dplyr::desc(year)) %>%
21 | dplyr::distinct(Approx_Individual_ID, .keep_all = TRUE)
22 |
23 |
24 | individuals_per_year <- individuals_with_year %>%
25 | dplyr::group_by(year) %>%
26 | dplyr::summarise(n = dplyr::n())
27 |
28 | # control with the AADR_Year_First_Publication column
29 | # paa %>%
30 | # dplyr::filter(Date_Type != "modern") %>%
31 | # dplyr::select(Poseidon_ID, AADR_Year_First_Publication) %>%
32 | # dplyr::group_by(AADR_Year_First_Publication) %>%
33 | # dplyr::summarise(dplyr::n())
34 |
35 | #### prepare plot ####
36 |
37 | p <- individuals_per_year %>%
38 | dplyr::filter(year < 2024) %>%
39 | ggplot() +
40 | geom_col(aes(x = year, y = n)) +
41 | geom_text(
42 | aes(
43 | x = year, y = n,
44 | label = dplyr::case_when(n < 10 ~ as.character(n), n >= 10 ~ paste0("≈", round(n, -1)))
45 | ),
46 | vjust = -0.25, size = 3
47 | ) +
48 | scale_x_continuous(breaks = 2010:2023) +
49 | theme_bw() +
50 | theme(axis.title.x = element_blank()) +
51 | ylab("Approx. nr of ancient individuals")
52 |
53 | ggsave(
54 | paste0("plots/presentation/samples_per_year.png"),
55 | plot = p,
56 | device = "png",
57 | scale = 0.4,
58 | dpi = 300,
59 | width = 500, height = 220, units = "mm",
60 | limitsize = F,
61 | bg = "white"
62 | )
63 |
--------------------------------------------------------------------------------
/osf/to_upload/README.md:
--------------------------------------------------------------------------------
1 | This OSF compendium contains copies of multiple repositories with specification and code developed for and in the Poseidon framework for archaeogenetic human genotype data management. It was compiled for this publication:
2 |
3 | to be announced
4 |
5 | Here is a list of the repositories:
6 |
7 | | repository | description | GitHub URL |
8 | |------------------------------|-------------------------------------------------------------------|--------------------------------------------------------------------|
9 | | aadr2poseidon | Code and resources to transform AADR datasets to Poseidon format | https://github.com/poseidon-framework/aadr2poseidon |
10 | | janno | Code of the janno R package | https://github.com/poseidon-framework/janno |
11 | | poseidon-analysis-hs | Code of the xerxes software tool | https://github.com/poseidon-framework/poseidon-analysis-hs |
12 | | poseidon-blog | Code and content for the Poseidon blog website | https://github.com/poseidon-framework/poseidon-blog |
13 | | poseidon-framework.github.io | Code and content for the main Poseidon website | https://github.com/poseidon-framework/poseidon-framework.github.io |
14 | | poseidon-hs | Code of the trident software tool | https://github.com/poseidon-framework/poseidon-hs |
15 | | poseidon-schema | Specification of the Poseidon package file format | https://github.com/poseidon-framework/poseidon-schema |
16 | | poseidon.analysis.2024 | Code and figures supporting the Poseidon publication | https://github.com/nevrome/poseidon.analysis.2024 |
17 | | qjanno | Code of the qjanno software tool | https://github.com/poseidon-framework/qjanno |
18 |
--------------------------------------------------------------------------------
/code/publication_coverage_comparison.R:
--------------------------------------------------------------------------------
1 | library(magrittr)
2 | library(ggplot2)
3 |
4 | #### load janno data ####
5 |
6 | load("data/janno_data.RData")
7 | load("data/bib_data.RData")
8 | load("data/bibkey_lookup_hashmap.RData")
9 |
10 | pca_bib_linked_to_samples <- pca %>%
11 | dplyr::select(Publication) %>%
12 | tidyr::unnest(cols = c("Publication")) %>%
13 | dplyr::distinct() %>%
14 | dplyr::left_join(
15 | pca_bib, by = c("Publication" = "bibtexkey")
16 | ) %>%
17 | dplyr::mutate(
18 | Publication = lookup_paa_key(Publication)
19 | )
20 |
21 | paa_bib_linked_to_samples <- paa %>%
22 | dplyr::select(Publication) %>%
23 | tidyr::unnest(cols = c("Publication")) %>%
24 | dplyr::distinct() %>%
25 | dplyr::left_join(
26 | paa_bib, by = c("Publication" = "bibtexkey")
27 | )
28 |
29 | publication_overlap <- dplyr::full_join(
30 | pca_bib_linked_to_samples,
31 | paa_bib_linked_to_samples,
32 | by = c("Publication", "year"),
33 | suffix = c("_PCA", "_PAA")
34 | ) %>%
35 | dplyr::arrange(year, Publication) %>%
36 | dplyr::mutate(
37 | plot_group = sort(rep_len(c("A", "B", "C", "D"), length.out = dplyr::n()))
38 | ) %>%
39 | tidyr::pivot_longer(
40 | cols = c("archive_PCA", "archive_PAA"),
41 | values_to = "archive"
42 | ) %>%
43 | dplyr::filter(!is.na(archive)) %>%
44 | dplyr::arrange(year, Publication) %>%
45 | dplyr::mutate(
46 | Publication = factor(Publication, levels = rev(unique(Publication)))
47 | )
48 |
49 | # table_for_stephan <- dplyr::full_join(
50 | # pca_bib_linked_to_samples,
51 | # paa_bib_linked_to_samples,
52 | # by = c("Publication", "year"),
53 | # suffix = c("_PCA", "_PAA")
54 | # ) %>%
55 | # dplyr::group_by(Publication) %>%
56 | # dplyr::summarize(
57 | # year = min(year),
58 | # doi = na.omit(c(doi_PAA, doi_PCA))[1],
59 | # archive_PCA = any(!is.na(archive_PCA)),
60 | # archive_PAA = any(!is.na(archive_PAA))
61 | # ) %>%
62 | # dplyr::arrange(year, Publication) %>%
63 | # dplyr::filter(!is.na(doi))
64 | #
65 | # readr::write_csv(table_for_stephan, file = "publication_list.csv")
66 |
67 | make_one_figure <- function(x) {
68 | x %>%
69 | ggplot() +
70 | geom_point(
71 | aes(
72 | y = Publication,
73 | x = archive
74 | ),
75 | shape = 15
76 | ) +
77 | theme_minimal() +
78 | theme(
79 | axis.text.y = element_text(size = 8, hjust = 0),
80 | axis.text.x = element_text(size = 8, angle = 90),
81 | panel.grid.major = element_line(linewidth = 0.5),
82 | axis.title = element_blank()
83 | ) +
84 | scale_y_discrete(position = "right")
85 | }
86 |
87 | p_list <- purrr::map(
88 | c("A", "B", "C", "D"),
89 | function(x) {
90 | publication_overlap %>%
91 | dplyr::filter(plot_group == x) %>%
92 | make_one_figure()
93 | }
94 | )
95 |
96 | p <- cowplot::plot_grid(
97 | plotlist = p_list, nrow = 1, ncol = 4,
98 | align = "v"
99 | )
100 |
101 | ggsave(
102 | paste0("plots/figure_publication_overlap.pdf"),
103 | plot = p,
104 | device = "pdf",
105 | scale = 0.7,
106 | dpi = 300,
107 | width = 500, height = 280, units = "mm",
108 | limitsize = F,
109 | bg = "white"
110 | )
111 |
112 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Data analysis code for 'Poseidon -- A framework for archaeogenetic human genotype data management'
2 |
3 | ### Published in:
4 |
5 | to be announced
6 |
7 | **Preprint (2024-04-16)**
8 |
9 | Schmid, C., Ghalichi, A., Lamnidis, T. C., Mudiyanselage, D. B. A., Haak, W., & Schiffels, S. (2024). Poseidon – A framework for archaeogenetic human genotype data management. bioRxiv. https://doi.org/10.1101/2024.04.12.589180
10 |
11 | ### Compendium DOI (long-term archive):
12 |
13 | https://doi.org/10.17605/OSF.IO/ZUQGB at Open Science Framework (OSF)
14 |
15 | The files in this archived storage will generate the results as found in the publication. The files hosted on GitHub are the development versions and may have changed since the paper was published.
16 |
17 | ### Authors of this repository:
18 |
19 | Clemens Schmid [](http://orcid.org/0000-0003-3448-5715)
20 |
21 | ### Overview of contents:
22 |
23 | This repository contains the following main top level directories:
24 |
25 | - `code`: The R scripts necessary to reproduce the analysis and create the figures. Some scripts provide code beyond what is required to reproduce figures and results in the publication (e.g. scripts to create didactic figures for presentations).
26 | - `data`: Intermediate data output by the scripts, not tracked by Git.
27 | - `data_tracked`: Input data files manually or only semi-automatically created for this analysis.
28 | - `osf`: Scripts to prepare the data upload to the Open Science Framework (OSF) long-term archive.
29 | - `plots`: Rendered versions of the plots for the publication, not tracked by Git.
30 | - `schemata`: Schematic drawings created for the paper.
31 |
32 | ### Reproducing the results:
33 |
34 | The following versions of the Poseidon public archives downloaded on 2024-03-15 were analysed:
35 |
36 | |archive |Git commit | Corresponding package versions |
37 | |:-----------|:----------|:----------------------------------|
38 | |PCA |[b159991](https://github.com/poseidon-framework/community-archive/tree/b159991)|[archive.chron](https://github.com/poseidon-framework/community-archive/blob/b159991/archive.chron)|
39 | |PAA |[60ddda0](https://github.com/poseidon-framework/aadr-archive/tree/60ddda0)|[archive.chron](https://github.com/poseidon-framework/aadr-archive/blob/60ddda0/archive.chron)|
40 |
41 | The archive.chron files include all present and past package versions, but only the latest ones were considered here.
42 |
43 | The analysis was performed with R v4.3.2 using the following packages and package versions as available on CRAN (Comprehensive R Archive Network) on 2024-03-13:
44 |
45 |
46 | Generated with this code
47 |
48 | ```r
49 | tibble::tibble(
50 | package = rrtools::add_dependencies_to_description(just_packages = T),
51 | version = purrr::map_chr(package, \(x) utils::packageVersion(x) |> as.character())
52 | ) |> knitr::kable()
53 | ```
54 |
55 |
56 | |package |version |
57 | |:-----------|:---------|
58 | |bib2df |1.1.1 |
59 | |cowplot |1.1.3 |
60 | |dplyr |1.1.4 |
61 | |ggpattern |1.0.1 |
62 | |ggplot2 |3.5.0 |
63 | |ggrepel |0.9.4 |
64 | |ggsankey |0.0.99999 |
65 | |giscoR |0.4.1 |
66 | |hash |2.2.6.3 |
67 | |janno |1.0.0 |
68 | |magrittr |2.0.3 |
69 | |purrr |1.0.2 |
70 | |readr |2.1.5 |
71 | |scales |1.3.0 |
72 | |sf |1.0.15 |
73 | |stringr |1.5.1 |
74 | |tibble |3.2.1 |
75 | |tidyr |1.3.1 |
76 | |tidyselect |1.2.1 |
77 | |wesanderson |0.3.7 |
78 |
79 | ### Licenses:
80 |
81 | [](http://choosealicense.com/licenses/mit/) year: 2024, copyright holder: Clemens Schmid
82 |
--------------------------------------------------------------------------------
/code/presentation/spatiotemporal_distribution_paa.R:
--------------------------------------------------------------------------------
1 | library(magrittr)
2 | library(ggplot2)
3 |
4 | #### load janno data ####s
5 |
6 | load("data/janno_data.RData")
7 |
8 | #### space-time figure ####
9 |
10 | # filter datasets to spatiotemporally informed subset
11 |
12 | paa_ancient_with_coords <- paa %>%
13 | dplyr::filter(
14 | Date_Type %in% c("C14", "contextual"),
15 | !is.na(Date_BC_AD_Start) & !is.na(Date_BC_AD_Stop)
16 | ) %>%
17 | dplyr::filter(!is.na(Latitude) & !is.na(Longitude)) %>%
18 | tibble::as_tibble()
19 |
20 | # make sample data spatial
21 |
22 | paa_ancient_sf_6933 <- paa_ancient_with_coords %>%
23 | sf::st_as_sf(coords = c('Longitude', 'Latitude'), crs = 4326) %>%
24 | sf::st_transform(6933)
25 |
26 | # prepare spatial context data
27 |
28 | world <- giscoR::gisco_get_countries()
29 |
30 | world_6933 <- world %>%
31 | sf::st_transform(6933) %>%
32 | sf::st_union() %>%
33 | sf::st_simplify(dTolerance = 20000)
34 |
35 | extent_world_6933 <- world_6933 %>%
36 | sf::st_bbox() %>%
37 | sf::st_as_sfc() %>%
38 | sf::st_segmentize(dfMaxLength = 10000)
39 |
40 | world_grid_6933_basic <- sf::st_make_grid(
41 | world_6933,
42 | n = c(72,36), #c(54, 27), #c(36,18),
43 | what = 'polygons',
44 | flat_topped = TRUE
45 | ) %>% sf::st_as_sf() %>%
46 | dplyr::mutate(
47 | area_id = seq_along(x)
48 | )
49 |
50 | world_grid_6933 <- world_grid_6933_basic %>%
51 | sf::st_segmentize(dfMaxLength = 10000)
52 |
53 | # perform counting in spatial bins
54 |
55 | inter_world <- function(x) {
56 | sf::st_intersects(world_grid_6933, x) %>% lengths()
57 | }
58 |
59 | world_with_count <- world_grid_6933 %>%
60 | dplyr::mutate(
61 | PAA = paa_ancient_sf_6933 %>% inter_world()
62 | ) %>%
63 | tidyr::pivot_longer(
64 | tidyselect::one_of("PAA"),
65 | names_to = "database", values_to = "count"
66 | ) %>%
67 | dplyr::filter(count != 0)
68 |
69 | # construct map figure
70 |
71 | map_plot <- ggplot() +
72 | geom_sf(data = extent_world_6933, fill = "#c2eeff", color = "black", alpha = 0.5) +
73 | geom_sf(data = world_6933, fill = "white", color = NA) +
74 | geom_sf(
75 | data = world_with_count,
76 | aes(fill = count),
77 | color = NA,
78 | alpha = 0.5
79 | ) +
80 | scale_fill_viridis_c(
81 | option = "magma", direction = -1, limits = c(0, 800), oob = scales::squish
82 | ) +
83 | guides(fill = guide_colorbar(barwidth = 20, barheight = 1)) +
84 | geom_sf(
85 | data = world_with_count %>%
86 | dplyr::group_by(area_id) %>%
87 | dplyr::slice_head(n = 1) %>%
88 | dplyr::ungroup(),
89 | color = "black", size = 0.1, fill = NA
90 | ) +
91 | geom_sf(data = world_6933, fill = NA, color = "black", cex = 0.2) +
92 | coord_sf(expand = F, crs = "+proj=natearth") +
93 | theme_minimal() +
94 | theme(
95 | legend.position = "bottom",
96 | panel.grid.major = element_line(colour = "grey", linewidth = 0.3),
97 | axis.title = element_blank(),
98 | axis.text = element_blank(),
99 | plot.title = element_text(face = "bold", size = 14)
100 | )
101 |
102 | # time histogram
103 |
104 | samples_with_mean_age <- paa_ancient_with_coords %>%
105 | dplyr::select(Approx_Individual_ID, tidyselect::starts_with("Date_BC_AD"), package, source) %>%
106 | dplyr::mutate(
107 | Date_BC_AD_Median = dplyr::case_when(
108 | is.na(Date_BC_AD_Median) ~ (Date_BC_AD_Start + Date_BC_AD_Stop) / 2,
109 | TRUE ~ Date_BC_AD_Median
110 | ),
111 | age_cut = cut(
112 | Date_BC_AD_Median,
113 | breaks = c(
114 | min(Date_BC_AD_Median),
115 | seq(-10000, 2000, 500)
116 | ),
117 | labels = c("< -10000", paste0("> ", seq(-10000, 1500, 500))),
118 | include.lowest = T
119 | )
120 | )
121 |
122 | age_groups_count <- samples_with_mean_age %>%
123 | dplyr::group_by(age_cut) %>%
124 | dplyr::summarise(n = dplyr::n(), .groups = "drop")
125 |
126 | time_hist_plot <- ggplot() +
127 | geom_bar(
128 | data = age_groups_count,
129 | mapping = aes(x = age_cut, y = n, fill = n),
130 | stat = "identity",
131 | alpha = 0.7,
132 | color = "black",
133 | linewidth = 0.2
134 | ) +
135 | scale_fill_viridis_c(option = "magma", direction = -1, limits = c(0, 800), oob = scales::squish) +
136 | ylim(-100, 1500) +
137 | theme_bw() +
138 | theme(
139 | legend.position = "none",
140 | axis.text.y = element_text(angle = 20, hjust = 1, vjust = 0.5),
141 | axis.title.x = element_blank()
142 | ) +
143 | coord_flip() +
144 | xlab("age BC/AD")
145 |
146 | # combine plots
147 |
148 | p <- cowplot::plot_grid(
149 | time_hist_plot, map_plot,
150 | ncol = 2,
151 | #labels = c("A", "B"),
152 | rel_widths = c(0.3, 1)
153 | )
154 |
155 | ggsave(
156 | paste0("plots/presentation/figure_spacetime_paa.png"),
157 | plot = p,
158 | device = "png",
159 | scale = 0.6,
160 | dpi = 300,
161 | width = 500, height = 220, units = "mm",
162 | limitsize = F,
163 | bg = "white"
164 | )
165 |
--------------------------------------------------------------------------------
/code/archive_data_preparation.R:
--------------------------------------------------------------------------------
1 | library(magrittr)
2 |
3 | #### prepare .janno file data ####
4 |
5 | pca_raw <- janno::read_janno("~/agora/community-archive", validate = F)
6 | pca_author_packages <- readr::read_lines("data_tracked/author_submitted_packages.txt")
7 | paa_raw <- list.files("~/agora/aadr-archive", pattern = "62", full.names = TRUE) %>%
8 | janno::read_janno(validate = F)
9 |
10 | cleaningPatterns <- c(
11 | "\\.HO",
12 | "\\.DG",
13 | "\\.SG",
14 | "\\.SDG",
15 | "\\.AG",
16 | "\\_WGA",
17 | "\\_noUDG",
18 | "\\_udg",
19 | "\\.WGC",
20 | "\\_d",
21 | "\\_old",
22 | "\\_new",
23 | "\\_alt",
24 | "\\.EC",
25 | "\\_genotyping",
26 | "\\_enhanced",
27 | "\\_snpAD",
28 | "\\_merged",
29 | "\\.merged",
30 | "\\_merge",
31 | "\\_lib",
32 | "\\_renamed",
33 | "\\_in.preparation",
34 | "\\_contam",
35 | "\\.cont",
36 | "\\.A0101",
37 | "\\.A",
38 | "\\.B0101",
39 | "\\.B",
40 | "\\_v54.1_addback",
41 | "\\_petrous",
42 | "\\_published",
43 | "\\_all",
44 | "\\_provisional",
45 | "\\_final",
46 | "-ALL_DATA",
47 | "\\.minus",
48 | "\\.bam",
49 | "\\.sorted",
50 | "\\.fixedHeader",
51 | "\\_oEEF",
52 | "\\_LC",
53 | "\\_ss",
54 | "_original"
55 | ) %>% paste0(collapse = "|")
56 |
57 | cleanPoseidonIDs <- function(x) {
58 | x %>%
59 | dplyr::mutate(
60 | Approx_Individual_ID = stringr::str_remove_all(
61 | Poseidon_ID, cleaningPatterns
62 | ) %>%
63 | stringr::str_replace_all(., "\\+", "\\_"),
64 | .before = "Poseidon_ID"
65 | )
66 | }
67 |
68 | pca <- pca_raw %>%
69 | tibble::as_tibble() %>%
70 | dplyr::mutate(
71 | archive = factor("PCA", levels = c("PAA", "PCA") %>% rev()),
72 | package = source_file %>% dirname(),
73 | source = dplyr::case_when(
74 | package %in% pca_author_packages ~ "Submitted by author",
75 | purrr::map_lgl(Publication, \(x) "AADRv424" %in% x) ~ "AADR v42.4",
76 | purrr::map_lgl(Publication, \(x) "AADRv443" %in% x) ~ "AADR v44.3",
77 | purrr::map_lgl(Publication, \(x) "AADRv50" %in% x) ~ "AADR v50",
78 | TRUE ~ "Extracted from paper"
79 | ),
80 | main_publication = purrr::map_chr(Publication, \(x) x[[1]]),
81 | .before = "Poseidon_ID"
82 | ) %>%
83 | cleanPoseidonIDs()
84 |
85 | #janno::write_janno(pca, "poseidon_community_archive_2024-07-15.tsv")
86 |
87 | paa <- paa_raw %>%
88 | tibble::as_tibble() %>%
89 | dplyr::mutate(
90 | archive = factor("PAA", levels = c("PAA", "PCA") %>% rev()),
91 | package = source_file %>% dirname(),
92 | source = "AADR v62.0",
93 | main_publication = purrr::map_chr(Publication, \(x) x[[1]])
94 | ) %>%
95 | cleanPoseidonIDs()
96 |
97 | paa %>%
98 | dplyr::mutate(main_id = purrr::map_chr(Alternative_IDs, \(x) x[[1]]), .after = "Approx_Individual_ID") %>%
99 | dplyr::filter(
100 | Approx_Individual_ID != main_id
101 | )
102 |
103 | # check for avoidable mismatches in Approx_Individual_ID
104 | # dplyr::bind_rows(pca, paa) %>%
105 | # dplyr::select(Approx_Individual_ID, archive) %>%
106 | # dplyr::distinct() %>%
107 | # dplyr::group_by(Approx_Individual_ID) %>%
108 | # dplyr::mutate(n = dplyr::n()) %>%
109 | # dplyr::filter(n == 1) %>%
110 | # dplyr::ungroup() %>%
111 | # dplyr::arrange(Approx_Individual_ID) %>%
112 | # dplyr::filter(dplyr::lag(as.character(archive), n = 1, default = "") != as.character(archive)) %>%
113 | # View()
114 | # very similar entries should appear right after each other
115 |
116 | # set levels of source factor
117 | source_order <- c(
118 | "AADR v42.4", "AADR v44.3", "AADR v50", "AADR v54.1.p1",
119 | "Extracted from paper", "Submitted by author"
120 | ) %>% rev()
121 | pca$source <- factor(pca$source, levels = source_order)
122 | paa$source <- factor(paa$source, levels = source_order)
123 |
124 | save(
125 | pca, paa,
126 | file = "data/janno_data.RData"
127 | )
128 |
129 | #### prepare .bib file data ####
130 |
131 | pca_bib_raw <- purrr::map_dfr(
132 | list.files(
133 | "~/agora/community-archive",
134 | pattern = "\\.bib$",
135 | full.names = T,
136 | recursive = T
137 | ),
138 | function(bib_path) {
139 | bib_df <- bib2df::bib2df(bib_path)
140 | bib_df %>% dplyr::mutate(
141 | package = bib_path %>% dirname() %>% basename(),
142 | archive = "PCA"
143 | )
144 | }
145 | )
146 |
147 | pca_bib <- pca_bib_raw %>%
148 | dplyr::transmute(
149 | bibtexkey = BIBTEXKEY,
150 | doi = DOI,
151 | year = YEAR,
152 | archive = "PCA"
153 | ) %>%
154 | dplyr::group_by(bibtexkey) %>%
155 | dplyr::arrange(doi) %>%
156 | dplyr::slice_head(n = 1) %>%
157 | dplyr::ungroup()
158 |
159 | paa_bib_raw <- purrr::map_dfr(
160 | list.files(
161 | "~/agora/aadr-archive",
162 | pattern = "\\.bib$",
163 | full.names = T,
164 | recursive = T
165 | ),
166 | function(bib_path) {
167 | bib_df <- bib2df::bib2df(bib_path)
168 | bib_df %>% dplyr::mutate(
169 | package = bib_path %>% dirname() %>% basename(),
170 | archive = "PAA"
171 | )
172 | }
173 | )
174 |
175 | paa_bib <- paa_bib_raw %>%
176 | dplyr::transmute(
177 | bibtexkey = BIBTEXKEY,
178 | doi = DOI,
179 | year = YEAR,
180 | archive = "PAA"
181 | ) %>%
182 | dplyr::distinct(.keep_all = T)
183 |
184 | save(
185 | pca_bib, paa_bib,
186 | file = "data/bib_data.RData"
187 | )
188 |
189 | #### prepare bib key lookup PCA -> PAA ####
190 |
191 | # The following code is commented out, because it was only used run once to prepare the
192 | # original template for "data_tracked/bibkey_mapping.csv", which was then manually
193 | # cleaned
194 |
195 | # pca_bib_linked_to_samples <- pca %>%
196 | # dplyr::select(Publication) %>%
197 | # tidyr::unnest(cols = c("Publication")) %>%
198 | # dplyr::distinct() %>%
199 | # dplyr::left_join(
200 | # pca_bib, by = c("Publication" = "bibtexkey")
201 | # )
202 | #
203 | # paa_bib_linked_to_samples <- paa %>%
204 | # dplyr::select(Publication) %>%
205 | # tidyr::unnest(cols = c("Publication")) %>%
206 | # dplyr::distinct() %>%
207 | # dplyr::left_join(
208 | # paa_bib, by = c("Publication" = "bibtexkey")
209 | # )
210 | #
211 | # publication_overlap <- dplyr::full_join(
212 | # pca_bib_linked_to_samples,
213 | # paa_bib_linked_to_samples,
214 | # by = c("Publication", "year"),
215 | # suffix = c("_PCA", "_PAA"),
216 | # keep = TRUE
217 | # )
218 | #
219 | # publication_overlap %>%
220 | # dplyr::select(Publication_PAA, doi_PAA, Publication_PCA, doi_PCA) %>%
221 | # readr::write_csv("data_tracked/bibkey_mapping.csv")
222 |
223 | bibkey_lookup_table <- readr::read_csv("data_tracked/bibkey_mapping.csv") %>%
224 | dplyr::filter(!is.na(Publication_PCA))
225 |
226 | bibkey_lookup_hashmap <- hash::hash(
227 | bibkey_lookup_table$Publication_PCA,
228 | bibkey_lookup_table$Publication_PAA
229 | )
230 |
231 | lookup_paa_key <- function(pca_keys) {
232 | purrr::map_chr(pca_keys, function(pca_key) {
233 | if (!hash::has.key(pca_key, bibkey_lookup_hashmap)) {
234 | pca_key
235 | } else {
236 | paa_key <- hash::values(bibkey_lookup_hashmap, pca_key)
237 | if (!is.na(paa_key)) {
238 | paa_key
239 | } else {
240 | pca_key
241 | }
242 | }
243 | })
244 | }
245 |
246 | save(
247 | bibkey_lookup_hashmap, lookup_paa_key,
248 | file = "data/bibkey_lookup_hashmap.RData"
249 | )
250 |
--------------------------------------------------------------------------------
/code/spatiotemporal_distribution.R:
--------------------------------------------------------------------------------
1 | library(magrittr)
2 | library(ggplot2)
3 |
4 | #### load janno data ####s
5 |
6 | load("data/janno_data.RData")
7 | pca_author_packages <- readr::read_lines("data_tracked/author_submitted_packages.txt")
8 |
9 | #### space-time figure ####
10 |
11 | # filter datasets to spatiotemporally informed subset
12 |
13 | pca_ancient_with_coords <- pca %>%
14 | dplyr::filter(
15 | Date_Type %in% c("C14", "contextual"),
16 | !is.na(Date_BC_AD_Start) & !is.na(Date_BC_AD_Stop)
17 | ) %>%
18 | dplyr::filter(!is.na(Latitude) & !is.na(Longitude)) %>%
19 | tibble::as_tibble()
20 |
21 | paa_ancient_with_coords <- paa %>%
22 | dplyr::filter(
23 | Date_Type %in% c("C14", "contextual"),
24 | !is.na(Date_BC_AD_Start) & !is.na(Date_BC_AD_Stop)
25 | ) %>%
26 | dplyr::filter(!is.na(Latitude) & !is.na(Longitude)) %>%
27 | tibble::as_tibble()
28 |
29 | # reduce selection from all Poseidon_IDs to approx. individual count
30 |
31 | pca_individuals <- pca_ancient_with_coords %>%
32 | dplyr::group_by(Approx_Individual_ID) %>%
33 | dplyr::arrange(dplyr::desc(Nr_SNPs)) %>%
34 | dplyr::slice_head() %>%
35 | dplyr::ungroup()
36 |
37 | paa_individuals <- paa_ancient_with_coords %>%
38 | dplyr::group_by(Approx_Individual_ID) %>%
39 | dplyr::arrange(dplyr::desc(Nr_SNPs)) %>%
40 | dplyr::slice_head() %>%
41 | dplyr::ungroup()
42 |
43 | # make sample data spatial
44 |
45 | pca_ancient_sf_6933 <- pca_individuals %>%
46 | sf::st_as_sf(coords = c('Longitude', 'Latitude'), crs = 4326) %>%
47 | sf::st_transform(6933)
48 |
49 | pca_ancient_author_submitted_sf_6933 <- pca_ancient_sf_6933 %>%
50 | dplyr::filter(package %in% pca_author_packages)
51 |
52 | paa_ancient_sf_6933 <- paa_individuals %>%
53 | sf::st_as_sf(coords = c('Longitude', 'Latitude'), crs = 4326) %>%
54 | sf::st_transform(6933)
55 |
56 | # prepare spatial context data
57 |
58 | world <- giscoR::gisco_get_countries()
59 |
60 | world_6933 <- world %>%
61 | sf::st_transform(6933) %>%
62 | sf::st_union() %>%
63 | sf::st_simplify(dTolerance = 20000)
64 |
65 | extent_world_6933 <- world_6933 %>%
66 | sf::st_bbox() %>%
67 | sf::st_as_sfc() %>%
68 | sf::st_segmentize(dfMaxLength = 10000)
69 |
70 | world_grid_6933_basic <- sf::st_make_grid(
71 | world_6933,
72 | n = c(72,36), #c(54, 27), #c(36,18),
73 | what = 'polygons',
74 | flat_topped = TRUE
75 | ) %>% sf::st_as_sf() %>%
76 | dplyr::mutate(
77 | area_id = seq_along(x)
78 | )
79 |
80 | world_grid_6933 <- world_grid_6933_basic %>%
81 | sf::st_segmentize(dfMaxLength = 10000)
82 |
83 | world_grid_6933_top_triangles <- world_grid_6933_basic %>%
84 | dplyr::mutate(x = purrr::map(x, function(y) {
85 | sf::st_polygon(list(y[[1]][-2,]))
86 | }) %>% sf::st_sfc(crs = sf::st_crs(world_grid_6933_basic))) %>%
87 | sf::st_segmentize(dfMaxLength = 10000)
88 |
89 | world_grid_6933_bottom_triangles <- world_grid_6933_basic %>%
90 | dplyr::mutate(x = purrr::map(x, function(y) {
91 | sf::st_polygon(list(y[[1]][-4,]))
92 | }) %>% sf::st_sfc(crs = sf::st_crs(world_grid_6933_basic))) %>%
93 | sf::st_segmentize(dfMaxLength = 10000)
94 |
95 | # perform counting in spatial bins
96 |
97 | inter_world <- function(x) {
98 | sf::st_intersects(world_grid_6933, x) %>% lengths()
99 | }
100 |
101 | world_with_count <- world_grid_6933 %>%
102 | dplyr::mutate(
103 | PCA = pca_ancient_sf_6933 %>% inter_world(),
104 | PCA_authors_submitted = pca_ancient_author_submitted_sf_6933 %>% inter_world(),
105 | PAA = paa_ancient_sf_6933 %>% inter_world()
106 | ) %>%
107 | tidyr::pivot_longer(
108 | tidyselect::one_of("PCA", "PAA", "PCA_authors_submitted"),
109 | names_to = "database", values_to = "count"
110 | ) %>%
111 | dplyr::filter(count != 0)
112 |
113 | centroid_pca_author_submitted <- world_with_count %>%
114 | dplyr::filter(database == "PCA_authors_submitted") %>%
115 | sf::st_centroid()
116 |
117 | triangles_pca <- world_grid_6933_bottom_triangles %>%
118 | dplyr::inner_join(
119 | world_with_count %>%
120 | sf::st_drop_geometry() %>%
121 | dplyr::filter(database == "PCA") %>%
122 | dplyr::select(area_id, database, count),
123 | by = "area_id"
124 | )
125 |
126 | triangles_paa <- world_grid_6933_top_triangles %>%
127 | dplyr::inner_join(
128 | world_with_count %>%
129 | sf::st_drop_geometry() %>%
130 | dplyr::filter(database == "PAA") %>%
131 | dplyr::select(area_id, database, count),
132 | by = "area_id"
133 | )
134 |
135 | # construct map figure
136 |
137 | map_plot <- ggplot() +
138 | geom_sf(data = extent_world_6933, fill = "#c2eeff", color = "black", alpha = 0.5) +
139 | geom_sf(data = world_6933, fill = "white", color = NA) +
140 | geom_sf(
141 | data = triangles_paa,
142 | fill = "#f37748",
143 | color = NA,
144 | alpha = 0.5
145 | ) +
146 | geom_sf(
147 | data = triangles_pca,
148 | fill = "#095256",
149 | color = NA,
150 | alpha = 0.5
151 | ) +
152 | geom_sf(
153 | data = world_with_count %>%
154 | dplyr::group_by(area_id) %>%
155 | dplyr::slice_head(n = 1) %>%
156 | dplyr::ungroup(),
157 | color = "black", size = 0.1, fill = NA
158 | ) +
159 | geom_sf(data = world_6933, fill = NA, color = "black", cex = 0.2) +
160 | geom_sf(
161 | data = centroid_pca_author_submitted,
162 | color = "#042325",
163 | shape = 18, size = 2.5
164 | ) +
165 | coord_sf(expand = F, crs = "+proj=natearth") +
166 | theme_minimal() +
167 | theme(
168 | legend.position = "bottom",
169 | legend.background = element_blank(),
170 | panel.grid.major = element_line(colour = "grey", linewidth = 0.3),
171 | axis.title = element_blank(),
172 | axis.text = element_blank(),
173 | plot.title = element_text(face = "bold", size = 14)
174 | ) +
175 | ggtitle(
176 | paste("Spatial and temporal distribution of ancient human individuals in PCA and PAA"),
177 | # https://stackoverflow.com/questions/35957129/r-ggplot2-evaluate-object-inside-expression
178 | as.expression(bquote(
179 | "World in Natural Earth projection, "~
180 | "Number of ancient individuals with spatial and temporal data:"~
181 | "PCA"%~~%.(round(nrow(pca_individuals), -2))~"&"~
182 | "PAA"%~~%.(round(nrow(paa_individuals), -2))
183 | ))
184 | )
185 |
186 | # time histogram
187 |
188 | samples_with_mean_age <- dplyr::bind_rows(pca_individuals, paa_individuals) %>%
189 | dplyr::select(
190 | Approx_Individual_ID,
191 | tidyselect::starts_with("Date_BC_AD"),
192 | package, archive, source
193 | ) %>%
194 | dplyr::mutate(
195 | Date_BC_AD_Median = dplyr::case_when(
196 | is.na(Date_BC_AD_Median) ~ (Date_BC_AD_Start + Date_BC_AD_Stop) / 2,
197 | TRUE ~ Date_BC_AD_Median
198 | ),
199 | age_cut = cut(
200 | Date_BC_AD_Median,
201 | breaks = c(
202 | min(Date_BC_AD_Median),
203 | seq(-10000, 2000, 500)
204 | ),
205 | labels = c("< -10000", paste0("> ", seq(-10000, 1500, 500))),
206 | include.lowest = T
207 | )
208 | )
209 |
210 | age_groups_author_submitted <- samples_with_mean_age %>%
211 | dplyr::filter(
212 | archive == "PCA",
213 | source == "Submitted by author"
214 | ) %>%
215 | dplyr::group_by(age_cut) %>%
216 | dplyr::summarise()
217 |
218 | age_groups_count <- samples_with_mean_age %>%
219 | dplyr::group_by(archive, age_cut) %>%
220 | dplyr::summarise(n = dplyr::n(), .groups = "drop")
221 |
222 | time_hist_plot <- ggplot() +
223 | geom_bar(
224 | data = age_groups_count,
225 | mapping = aes(x = age_cut, y = n, fill = archive),
226 | stat = "identity",
227 | position = "dodge",
228 | alpha = 0.7
229 | ) +
230 | geom_point(
231 | data = age_groups_author_submitted,
232 | mapping = aes(x = age_cut, y = -80, color = "Author-submitted samples (PCA)"),
233 | shape = 18, size = 2.5
234 | ) +
235 | scale_color_manual("", values = c("Author-submitted samples (PCA)" = "black")) +
236 | ylim(-100, 1500) +
237 | theme_bw() +
238 | theme(
239 | legend.position = c(.75,0.10),
240 | legend.box.background = element_rect(colour = "black", fill = "white"),
241 | legend.box.margin = margin(0,0,0,0),
242 | legend.background = element_rect(fill = NA),
243 | #legend.box = "vertical",
244 | legend.spacing.y = unit(-0.9, "cm"),
245 | axis.text.y = element_text(angle = 20, hjust = 1, vjust = 0.5),
246 | axis.title.x = element_blank()
247 | ) +
248 | scale_fill_manual(
249 | values = c("PAA" = "#f37748", "PCA" = "#095256")
250 | ) +
251 | guides(
252 | fill = guide_legend(title = "Archive", direction = "horizontal", order = 2)
253 | #color = guide_legend()
254 | ) +
255 | coord_flip() +
256 | xlab("age BC/AD")
257 |
258 | # combine plots
259 |
260 | p <- cowplot::plot_grid(
261 | time_hist_plot, map_plot,
262 | ncol = 2,
263 | #labels = c("A", "B"),
264 | rel_widths = c(0.3, 1)
265 | )
266 |
267 | ggsave(
268 | paste0("plots/figure_spacetime.pdf"),
269 | plot = p,
270 | device = "pdf",
271 | scale = 0.7,
272 | dpi = 300,
273 | width = 500, height = 220, units = "mm",
274 | limitsize = F,
275 | bg = "white"
276 | )
277 |
--------------------------------------------------------------------------------
/data_tracked/bibkey_mapping.csv:
--------------------------------------------------------------------------------
1 | Publication_PAA,doi_PAA,Publication_PCA,doi_PCA
2 | RasmussenNature2010,10.1038/nature08835,RasmussenNature2010,10.1038/nature08835
3 | AADR,10.1101/2023.04.06.535797,AADR,10.1101/2023.04.06.535797
4 | NA,NA,AADRv424,10.7910/DVN/FFIDCW
5 | KellerNatureCommunications2012,10.1038/ncomms1701,KellerNatureCommunications2012,10.1038/ncomms1701
6 | MeyerScience2012,10.1126/science.1224344,MeyerScience2012,10.1126/science.1224344
7 | PattersonGenetics2012,10.1534/genetics.112.145037,PattersonGenetics2012,10.1534/genetics.112.145037
8 | PickrellNatureCommunications2012,10.1038/ncomms2140,PickrellNatureCommunications2012,10.1038/ncomms2140
9 | FuNature2014,10.1038/nature13810,FuNature2014,10.1038/nature13810
10 | GambaNatureCommunications2014,10.1038/ncomms6257,GambaNatureCommunications2014,10.1038/ncomms6257
11 | LazaridisNature2014,10.1038/nature13673,LazaridisNature2014,10.1038/nature13673
12 | MalaspinasCurrentBiology2014,10.1016/j.cub.2014.09.078,MalaspinasCurrentBiology2014,10.1016/j.cub.2014.09.078
13 | OlaldeNature2014,10.1038/nature12960,OlaldeNature2014,10.1038/nature12960
14 | RaghavanNature2014,10.1038/nature12736,RaghavanNature2014,10.1038/nature12736
15 | RaghavanScience2014,10.1126/science.1255832,RaghavanScience2014,10.1126/science.1255832
16 | RasmussenNature2014,10.1038/nature13025,RasmussenNature2014,10.1038/nature13025
17 | OrlandoScience2014,10.1126/science.aaa0114,SeguinOrlandoScience2014,10.1126/science.aaa0114
18 | SkoglundScience2014,10.1126/science.1253448,SkoglundScience2014,10.1126/science.1253448
19 | 1KGPhase3,10.1038/nature15393,1KGPhase3,NA
20 | AllentoftNature2015,10.1038/nature14507,AllentoftNature2015,10.1038/nature14507
21 | CassidyPNAS2016,10.1073/pnas.1518445113,CassidyPNAS2015,10.1073/pnas.1518445113
22 | FuNature2015,10.1038/nature14558,FuNature2015,10.1038/nature14558
23 | GuntherPNAS2015,10.1073/pnas.1509851112,GuntherPNAS2015,10.1073/pnas.1509851112
24 | JonesNatureCommunications2015,10.1038/ncomms9912,JonesNatureCommunications2015,10.1038/ncomms9912
25 | LlorenteScience2015,10.1126/science.aad2879,LlorenteScience2015,10.1126/science.aad2879
26 | MathiesonNature2015,10.1038/nature16152,MathiesonNature2015,10.1038/nature16152
27 | OlaldeMBE2015,10.1093/molbev/msv181,OlaldeMBE2015,10.1093/molbev/msv181
28 | RaghavanScience2015,10.1126/science.aab3884,RaghavanScience2015,10.1126/science.aab3884
29 | RasmussenNature2015,10.1038/nature14625,RasmussenNature2015,10.1038/nature14625
30 | BroushakiScience2016,10.1126/science.aaf7943,BroushakiScience2016,10.1126/science.aaf7943
31 | FuNature2016,10.1038/nature17993,FuNature2016,10.1038/nature17993
32 | HofmanovaPNAS2016,10.1073/pnas.1523951113,HofmanovaPNAS2016,10.1073/pnas.1523951113
33 | JeongPNAS2016,10.1073/pnas.1520844113,JeongPNAS2016,10.1073/pnas.1520844113
34 | KanzawaKiriyamaJHG2016,10.1038/jhg.2016.110,KanzawaJHG2016,10.1038/jhg.2016.110
35 | KilincCurrentBiology2016,10.1016/j.cub.2016.07.057,KilincCurrentBiology2016,10.1016/j.cub.2016.07.057
36 | LazaridisNature2016,10.1038/nature19310,LazaridisNature2016,10.1038/nature19310
37 | MallickNature2016,10.1038/nature18964,MallickNature2016,NA
38 | SkoglundNature2015,10.1038/nature14895,SkoglundNature2015,NA
39 | NA,NA,PrueferNature2014,NA
40 | MartinianoNatureCommunications2016,10.1038/ncomms10326,MartinianoNatureCommunications2016,10.1038/ncomms10326
41 | MondalNatureGenetics2016,10.1038/ng.3621,MondalNatureGenetics2016,10.1038/ng.3621
42 | OmrakCurrentBiology2016,10.1016/j.cub.2015.12.019,OmrakCurrentBiology2016,10.1016/j.cub.2015.12.019
43 | SchiffelsNatureCommunications2016,10.1038/ncomms10408,SchiffelsNatureCommunications2016,10.1038/ncomms10408
44 | SkoglundNature2016,10.1038/nature19844,SkoglundNature2016,10.1038/nature19844
45 | GonzalesFortesCurrentBiology2017,10.1016/j.cub.2017.05.023,GonzalesFortesCurrentBiology2017,10.1016/j.cub.2017.05.023
46 | HaberAJHG2017,10.1016/j.ajhg.2017.06.013,HaberAJHG2017,10.1016/j.ajhg.2017.06.013
47 | JonesCurrentBiology2017,10.1016/j.cub.2016.12.060,JonesCurrentBiology2017,10.1016/j.cub.2016.12.060
48 | KennettNatureCommunications2017,10.1038/ncomms14115,KennettNatureCommunications2017,10.1038/ncomms14115
49 | LazaridisNature2017,10.1038/nature23310,LazaridisNature2017,10.1038/nature23310
50 | LindoPNAS2017,10.1073/pnas.1620410114,LindoPNAS2017,10.1073/pnas.1620410114
51 | LipsonNature2017,10.1038/nature24476,LipsonNature2017,10.1038/nature24476
52 | MartinianoPLoSGenetics2017,10.1371/journal.pgen.1006852,MartinianoPLoSGenetics2017,10.1371/journal.pgen.1006852
53 | NA,NA,PrueferScience2017,10.1126/science.aao1887
54 | RodriguezVarelaCurrentBiology2017,10.1016/j.cub.2017.09.059,RodriguezVarelaCurrentBiology2017,10.1016/j.cub.2017.09.059
55 | SaagCurrentBiology2017,10.1016/j.cub.2017.06.022,SaagCurrentBiology2017,10.1016/j.cub.2017.06.022
56 | SchlebuschScience2017,10.1126/science.aao6266,SchlebuschScience2017,10.1126/science.aao6266
57 | ScheunemannNatureCommunications2017,10.1038/ncomms15694,ScheunemannNatureCommunications2017,NA
58 | SikoraScience2017,10.1126/science.aao1807,SikoraScience2017,10.1126/science.aao1807
59 | SkoglundCell2017,10.1016/j.cell.2017.08.049,SkoglundCell2017,10.1016/j.cell.2017.08.049
60 | UnterlanderNatureCommunications2017,10.1038/ncomms14615,UnterlanderNatureCommunications2017,10.1038/ncomms14615
61 | VanDenBrink2017,10.1080/00758914.2017.1368204,VanDenBrink2017,10.1080/00758914.2017.1368204
62 | VyasAJPA2017,10.1002/ajpa.23312,VyasAJPA2017,10.1002/ajpa.23312
63 | YangCurrentBiology2017,10.1016/j.cub.2017.09.030,YangCurrentBiology2017,10.1016/j.cub.2017.09.030
64 | AmorimNatureCommunications2018,10.1038/s41467-018-06024-4,AmorimNatureCommunications2018,10.1038/s41467-018-06024-4
65 | DamgaardNature2018,10.1038/s41586-018-0094-2,DamgaardNature2018,10.1038/s41586-018-0094-2
66 | DamgaardScience2018,10.1126/science.aar7711,DamgaardScience2018,10.1126/science.aar7711
67 | delaFuentePNAS2018,10.1073/pnas.1715688115,delaFuentePNAS2018,10.1073/pnas.1715688115
68 | EbenesersdottirScience2018,10.1126/science.aar2625,EbenesersdottirScience2018,NA
69 | FernandesScientificReports2018,10.1038/s41598-018-33067-w,FernandesScientificReports2018,10.1038/s41598-018-33067-w
70 | FregelPNAS2018,10.1073/pnas.1800851115,FregelPNAS2018,10.1073/pnas.1800851115
71 | GuntherPLoSBiology2018,10.1371/journal.pbio.2003703,GuntherPLoSBiology2018,10.1371/journal.pbio.2003703
72 | HarneyMayNatureCommunications2018,10.1038/s41467-018-05649-9,HarneyMayNatureCommunications2018,10.1038/s41467-018-05649-9
73 | JeongPNAS2018,10.1073/pnas.1813608115,JeongPNAS2018,NA
74 | KrzewinskaCurrentBiology2018,10.1016/j.cub.2018.06.053,KrzewinskaCurrentBiology2018,10.1016/j.cub.2018.06.053
75 | KrzewinskaScienceAdvances2018,10.1126/sciadv.aat4457,KrzewinskaScienceAdvances2018,10.1126/sciadv.aat4457
76 | LamnidisNatureCommunications2018,10.1038/s41467-018-07483-5,LamnidisNatureCommunications2018,NA
77 | LindoScienceAdvances2018,10.1126/sciadv.aau4921,LindoScienceAdvances2018,10.1126/sciadv.aau4921
78 | LipsonCurrentBiology2018,10.1016/j.cub.2018.02.051,LipsonCurrentBiology2018,10.1016/j.cub.2018.02.051
79 | LipsonScience2018,10.1126/science.aat3188,LipsonScience2018,10.1126/science.aat3188
80 | MathiesonNature2018,10.1038/nature25778,MathiesonNature2018,10.1038/nature25778
81 | McCollScience2018,10.1126/science.aat3628,McCollScience2018,10.1126/science.aat3628
82 | MittnikNatureCommunications2018,10.1038/s41467-018-02825-9,MittnikNatureCommunications2018,NA
83 | MorenoMayarNature2017,10.1038/nature25173,MorenoMayarNature2018,10.1038/nature25173
84 | MorenoMayarScience2018,10.1126/science.aav2621,MorenoMayarScience2018,10.1126/science.aav2621
85 | OlaldeNature2018,10.1038/nature25738,OlaldeNature2018,10.1038/nature25738
86 | PosthNakatsukaCell2018,10.1016/j.cell.2018.10.027,PosthNakatsukaCell2018,NA
87 | PosthNatureEcologyEvolution2018,10.1038/s41559-018-0498-2,PosthNatureEcologyEvolution2018,10.1038/s41559-018-0498-2
88 | ScheibScience2018,10.1126/science.aar6851,ScheibScience2018,10.1126/science.aar6851
89 | SchroederPNAS2018,10.1073/pnas.1716839115,SchroederPNAS2018,10.1073/pnas.1716839115
90 | ValdioseraPNAS2018,10.1073/pnas.1717762115,ValdioseraPNAS2018,10.1073/pnas.1717762115
91 | vandeLoosdrechtScience2018,10.1126/science.aar8380,vandeLoosdrechtScience2018,NA
92 | VeeramahPNAS2018,10.1073/pnas.1719880115,VeeramahPNAS2018,10.1073/pnas.1719880115
93 | ZallouaScientificReports2018,10.1038/s41598-018-35667-y,ZallouaScientificReports2018,NA
94 | AntonioGaoMootsScience2019,10.1126/science.aay6826,AntonioGaoMootsScience2019,NA
95 | BiaginiEJHG2019,10.1038/s41431-019-0361-1,BiaginiEJHG2019,10.1038/s41431-019-0361-1
96 | BraceDiekmannNatureEcologyEvolution2019,10.1038/s41559-019-0871-9,BraceNatureEcologyEvolution2019,10.1038/s41559-019-0871-9
97 | FeldmanNatureCommunications2019,10.1038/s41467-019-09209-7,FeldmanNatureCommunications2019,NA
98 | FeldmanScienceAdvances2019,10.1126/sciadv.aax0061,FeldmanScienceAdvances2019,NA
99 | FlegontovNature2019,10.1038/s41586-019-1251-y,FlegontovNature2019,10.1038/s41586-019-1251-y
100 | GonzalesFortesProcRoyalSocB2019,10.1098/rspb.2018.2288,GonzalesFortesProcRoyalSocB2019,NA
101 | HaberAJHG2018,10.1016/j.ajhg.2019.03.015,HaberAJHG2019,NA
102 | HarneyNatureCommunications2019,10.1038/s41467-019-11357-9,HarneyNatureCommunications2019,NA
103 | JarveCurrentBiology2019,10.1016/j.cub.2019.06.019,JarveCurrentBiology2019,NA
104 | JeongNatureEcologyEvolution2019,10.1038/s41559-019-0878-2,JeongNatureEcologyEvolution2019,10.1038/s41559-019-0878-2
105 | MalmstromProcBiolSci2019,10.1098/rspb.2019.1528,MalmstromProcBiolSci2019,NA
106 | MittnikScience2019,10.1126/science.aax6219,MittnikScience2019,NA
107 | NarasimhanPattersonScience2019,10.1126/science.aat7487,NarasimhanPattersonScience2019,10.1126/science.aat7487
108 | NikitinScientificReports2019,10.1038/s41598-019-56029-2,NikitinScientificReports2019,NA
109 | NingCurrentBiology2019,10.1016/j.cub.2019.06.044,NingCurrentBiology2019,NA
110 | OlaldeScience2019,10.1126/science.aav4040,OlaldeScience2019,10.1126/science.aav4040
111 | PrendergastLipsonSawchukScience2019,10.1126/science.aaw6275,PrendergastLipsonSawchukScience2019,10.1126/science.aaw6275
112 | SaagCurrentBiology2019,10.1016/j.cub.2019.04.026,SaagCurrentBiology2019,NA
113 | SanchezQuintoPNAS2019,10.1073/pnas.1818037116,SanchezQuintoPNAS2019,NA
114 | SchroederPNAS2019,10.1073/pnas.1820210116,SchroederPNAS2019,NA
115 | ShindeNarasimhanCell2019,10.1016/j.cell.2019.08.048,ShindeNarasimhanCell2019,NA
116 | SikoraNature2019,10.1038/s41586-019-1279-z,SikoraNature2019,NA
117 | VillalbaMoucoCurrentBiology2019,10.1016/j.cub.2019.02.006,VillalbaMoucoCurrentBiology2019,NA
118 | WangNatureCommunications2019,10.1038/s41467-018-08220-8,WangNatureCommunications2019,NA
119 | AgranatTamirCell2020,10.1016/j.cell.2020.04.024,AgranatTamirCell2020,10.1016/j.cell.2020.04.024
120 | NA,NA,AADRv443,10.7910/DVN/FFIDCW
121 | NA,NA,BarqueraCurrBiol2020,10.1016/j.cub.2020.04.002
122 | BergstromScience2020,10.1126/science.aay5012,BergstromScience2020,10.1126/science.aay5012
123 | BongersPNAS2020,10.1073/pnas.2005965117,BongersPNAS2020,10.1073/pnas.2005965117
124 | BrunelPNAS2020,10.1073/pnas.1918034117,BrunelPNAS2020,10.1073/pnas.1918034117
125 | WegmannCurBio2020,10.1016/j.cub.2020.08.033,Burger2020,10.1016/j.cub.2020.08.033
126 | CassidyNature2020,10.1038/s41586-020-2378-6,CassidyNature2020,10.1038/s41586-020-2378-6
127 | CoutinhoJakobssonAJPA2020,10.1002/ajpa.24079,CoutinhoJakobssonAJPA2020,10.1002/ajpa.24079
128 | FernandesSirakNature2020,10.1038/s41586-020-03053-2,FernandesSirakNature2020,10.1038/s41586-020-03053-2
129 | FernandesNatureEcologyEvolution2020,10.1038/s41559-020-1102-0,FernandesNatureEcologyEvolution2020,10.1038/s41559-020-1102-0
130 | FurtwanglerNatureCommunications2020,10.2144/btn-2020-0100,FurtwaenglerNatCom2020,NA
131 | GokhmanNatureCommunications2020,10.1038/s41467-020-15020-6,GokhmanNatureCommunications2020,NA
132 | HaberAJHG2020,10.1016/j.ajhg.2020.05.008,HaberAJHG2020,10.1016/j.ajhg.2020.05.008
133 | NA,NA,ImmelScientificReports2020,10.1038/s41598-020-61190-0
134 | JeongCell2020,10.1016/j.cell.2020.10.015,JeongCell2020,10.1016/j.cell.2020.10.015
135 | LinderholmNatureScientificReports2020,10.1038/s41598-020-63138-w,LinderholmScientificReports2020,10.1038/s41598-020-63138-w
136 | LipsonNature2020,10.1038/s41586-020-1929-1,LipsonNature2020,10.1038/s41586-020-1929-1
137 | MafessoniPNAS2020,10.1073/pnas.2004944117,MafessoniPNAS2020,10.1073/pnas.2004944117
138 | MarcusNatureCommunications2020,10.1038/s41467-020-14523-6,MarcusNatureCommunications2020,10.1038/s41467-020-14523-6
139 | MargaryanWillerslevNature2020,10.1038/s41586-020-2688-8,MargaryanWillerslevNature2020,10.1038/s41586-020-2688-8
140 | MassilaniPaaboScience2020,10.1126/science.abc1166,MassilaniPaaboScience2020,10.1126/science.abc1166
141 | NagelePosthScience2020,10.1126/science.aba8697,NagelePosthScience2020,10.1126/science.aba8697
142 | NakatsukaCell2020,10.1016/j.cell.2020.04.015,NakatsukaCell2020,10.1016/j.cell.2020.04.015
143 | NakatsukaLuisiNatureCommunications2020,10.1038/s41467-020-17656-w,NakatsukaLuisiNatureCommunications2020,10.1038/s41467-020-17656-w
144 | ColonMolecularBiologyandEvolution2020,10.1093/molbev/msz267,Nieves_ColonMolecularBiologyandEvolution2020,10.1093/molbev/msz267
145 | NingNatureCommunications2020,10.1038/s41467-020-16557-2,Ning2020NatureCommunications,10.1038/s41467-020-16557-2
146 | RivollatScienceAdvance2020,10.1126/sciadv.aaz5344,RivollatScienceAdvances2020,10.1126/sciadv.aaz5344
147 | SkourtaniotiCell2020,10.1016/j.cell.2020.04.044,SkourtaniotiCell2020,10.1016/j.cell.2020.04.044
148 | PugachPNAS2020,10.1073/pnas.2022112118,StonekingPNAS2020,10.1073/pnas.2022112118
149 | TeschlerNicolaCommunicationsBiology2020,10.1038/s42003-020-01372-8,TeschlerNicolaCommunicationsBiology2020,10.1038/s42003-020-01372-8
150 | WangSciAdv2020,10.1126/sciadv.aaz0183,WangGoldstein2020ScienceAdvances,10.1126/sciadv.aaz0183
151 | YangScience2020,10.1126/science.aba0909,Yang2020Science,10.1126/science.aba0909
152 | YuCell2020,10.1016/j.cell.2020.04.037,YuCell2020,10.1016/j.cell.2020.04.037
153 | BortoliniBenazziCurBio2021,10.1016/j.cub.2021.03.078,BortoliniBenazziCurBio2021,10.1016/j.cub.2021.03.078
154 | NA,NA,AADRv50,10.7910/DVN/FFIDCW
155 | CapodiferroAchilliCell2021,10.1016/j.cell.2021.02.040,CapodiferroAchilliCell2021,10.1016/j.cell.2021.02.040
156 | CarlhoffBrummNature2021,10.1038/s41586-021-03823-6,CarlhoffNature2021,10.1038/s41586-021-03823-6
157 | ClementeCell2021,10.1016/j.cell.2021.03.039,ClementeCell2021,10.1016/j.cell.2021.03.039
158 | EgfjordAllentoftPLOS2021,10.1371/journal.pone.0244872,EgfjordAllentoftPLOS2021,10.1371/journal.pone.0244872
159 | FreilichPinhasiScientificReports2021,10.1038/s41598-021-94932-9,FreilichSciRep2021,https://doi.org/10.1038/s41598-021-94932-9
160 | GnecchiRusconeScienceAdvances2021,10.1126/sciadv.abe4414,GnecchiRuscone2021,10.1126/sciadv.abe4414
161 | HarneyCheronetGenomeResearch2021,10.1101/gr.267534.120,HarneyCheronetGenomeResearch2021,10.1101/gr.267534.120
162 | KilincSciAdv2021,10.1126/sciadv.abc4587,KilincScienceAdvances2021,10.1126/sciadv.abc4587
163 | LarenaJakobbsonPNAS2021,10.1073/pnas.2026132118,LarenaJakobbsonPNAS2021,10.1073/pnas.2026132118
164 | MaoFuCell2021,10.1016/j.cell.2021.04.040,MaoFuCell2021,10.1016/j.cell.2021.04.040
165 | NovakPLoSOne2021,10.1371/journal.pone.0247332,NovakPLoSOne2021,10.1371/journal.pone.0247332
166 | PapacScienceAdvances2021,10.1126/sciadv.abi6941,PapacScienceAdvances2021,10.1126/sciadv.abi6941
167 | PattersonNature2021,10.1038/s41586-021-04287-4,PattersonNature2021,NA
168 | HaakLazaridis2015,10.1038/nature14317,HaakLazaridisNature2015,NA
169 | SaagMetspaluScience2021,10.1126/sciadv.abd6535,SaagScienceAdvances2021,10.1126/sciadv.abd6535
170 | SaupeScheibCurrBio2021,10.1016/j.cub.2021.04.022,SaupeCurrBiol2021,NA
171 | SeguinOrlandoCurrBio2021,10.1016/j.cub.2020.12.015,SeguinOrlandoCurrBio2021,10.1016/j.cub.2020.12.015
172 | SvenssonJakobssonCurrBio2021,10.1016/j.cub.2021.04.045,SvenssonJakobssonCurrBio2021,10.1016/j.cub.2021.04.045
173 | WangNature2021,10.1038/s41586-021-03336-2,WangNature2021,10.1038/s41586-021-03336-2
174 | YakaSomelCurrBio2021,10.1016/j.cub.2021.03.050,YakaSomelCurrBio2021,10.1016/j.cub.2021.03.050
175 | ZegaracBurgerbioRxiv2021,10.1101/2020.05.18.101337,Zegarac2021SciReports,10.1038/s41598-021-89090-x
176 | GretzingerNature2022,10.1038/s41586-022-05247-2,Gretzinger2022,10.1038/s41586-022-05247-2
177 | Pruefer2017,10.1126/science.aao1887,Pruefer2017,10.1126/science.aao1887
178 | PrueferNature2013,10.1038/nature12886,PrueferNature2013,10.1038/nature12886
179 | SlonNature2018,10.1038/s41586-018-0455-x,SlonNature2018,10.1038/s41586-018-0455-x
180 | HajdinjakNature2018,10.1038/nature26151,HajdinjakNature2018,10.1038/nature26151
181 | NA,NA,GRC_for_GRCh37_e71,10.1371/journal.pbio.1001091
182 | 1KGPhase3,10.1038/nature15393,1000_Genomes_Project_Consortium_2015,10.1038/nature15393
183 | NA,NA,WatersonNature2005,10.1038/nature04072
184 | NA,NA,GordonScience2016,10.1126/science.aae0344
185 | AADRv541p1,10.7910/DVN/FFIDCW,NA,NA
186 | LazaridisAlpaslanRoodenbergScience2022,10.1126/science.abm4247,NA,NA
187 | KennettLipsonPruferNatureCommunications2022,10.1038/s41467-022-29158-y,NA,NA
188 | LipsonSawchukNature2022,10.1038/s41586-022-04430-9,NA,NA
189 | ReichNature2010,10.1038/nature09710,NA,NA
190 | LiuScience2022,10.1126/science.abm6536,NA,NA
191 | UllingerNearEasternArchaeology2022,10.1086/720748,NA,NA
192 | CookeNakagomeSciAdv2021,10.1126/sciadv.abh2419,NA,NA
193 | TieslerSedigAntiquity2022,10.15184/aqy.2022.79,NA,NA
194 | WangbioRxiv2020,10.1101/2020.03.25.004606,NA,NA
195 | WohnsScience2022,10.1126/science.abi8264,NA,NA
196 | CsakySciRep2020,10.1038/s41598-020-75910-z,NA,NA
197 | SirakNatureCommunications2021,10.1038/s41467-021-27356-8,NA,NA
198 | LipsonCurrentBiology2020,10.1016/j.cub.2020.09.035,NA,NA
199 | IngmanStockhammerPLoS2021,10.1371/journal.pone.0241883,NA,NA
200 | KumarFuScience2022,10.1126/science.abk1534,NA,NA
201 | LiuJeongNatComm2022,10.1038/s41467-022-28827-2,NA,NA
202 | PopovicBacaSciAdv2021,10.1126/sciadv.abg7261,NA,NA
203 | RobbeetsNingNature2021,10.1038/s41586-021-04108-8,NA,NA
204 | AneliPaganiMolBioEvol2022,10.1093/molbev/msac014,NA,NA
205 | SpyrouNature2022,10.1038/s41586-022-04800-3,NA,NA
206 | SrigyanValdioseraCommBio2022,10.1038/s42003-022-03508-4,NA,NA
207 | LindoFigueiroPNASNexus2022,10.1093/pnasnexus/pgac047,NA,NA
208 | OliveiraNatureEcologyEvolution2022,10.1038/s41559-022-01775-2,NA,NA
209 | WangCell2021,10.1016/j.cell.2021.05.018,NA,NA
210 | ZhangNature2021,10.1038/s41586-021-04052-7,NA,NA
211 | AntoniobioRxiv2022,10.1101/2022.05.15.491973,NA,NA
212 | GelabertCurrBio2022,10.1016/j.cub.2022.06.004,NA,NA
213 | MarotiTorokCurrBio2022,10.1016/j.cub.2022.04.093,NA,NA
214 | FowlerOlaldeNature2021,10.1038/s41586-021-04241-4,NA,NA
215 | ScheibAnnHumBio2019,10.1080/03014460.2019.1623912,NA,NA
216 | SullivanScienceAdvances2018,10.1126/sciadv.aao1262,NA,NA
217 | GnecchiRusconeCell2022,10.1016/j.cell.2022.03.007,2022_GnecchiRuscone_CarpathianBasin,10.1016/j.cell.2022.03.007
218 | PosthScienceAdvances2021,10.1126/sciadv.abi7673,NA,NA
219 | VillalbaMoucoSciAdv2021,10.1126/sciadv.abi7038,NA,NA
220 | GreenScience2010,10.1126/science.1188021,NA,NA
221 | HajdinjakPaaboNature2021,10.1038/s41586-021-03335-3,NA,NA
222 | JensenSchroederNatureCommunications2019,10.1038/s41467-019-13549-9,NA,NA
223 | PrueferNatureEcologyEvolution2021,10.1038/s41559-021-01443-x,NA,NA
224 | YuvandeLoosdrechtiScience2022,10.1016/j.isci.2022.104244,YuiScience2022,10.1016/j.isci.2022.104244
225 | ChildebayevaHaakMolBioEvo2022,10.1093/molbev/msac108,NA,NA
226 | DeAngelisRickardsGenes2022,10.3390/genes13010136,NA,NA
227 | RivollatDeguillouxPNAS2022,10.1073/pnas.2120786119,NA,NA
228 | ScorranoMacciardiSciRep2022,10.1038/s41598-022-10899-1,NA,NA
229 | MarchExcoffierCell2022,10.1016/j.cell.2022.04.008,NA,NA
230 | DuliasPNAS2022,10.1073/pnas.2108001119,NA,NA
231 | GelabertSciRep2022,10.1038/s41598-022-11117-8,NA,NA
232 | SilvaRichardsSciRep2021,10.1038/s41598-021-95996-3,NA,NA
233 | MootsbioRxiv2022,10.1101/2022.03.13.483276,NA,NA
234 | LiScience2008,10.1126/science.1153717,NA,NA
235 | JakobssonNature2008,10.1038/nature06742,NA,NA
236 | ChangmaiPLoSGenetics2022,10.1371/journal.pgen.1010036,NA,NA
237 | VyasDryadDigitalRepository2017,10.5061/DRYAD.1PM3R,NA,NA
238 |
--------------------------------------------------------------------------------
/code/archive_comparison_barplots.R:
--------------------------------------------------------------------------------
1 | library(magrittr)
2 | library(ggplot2)
3 |
4 | #### load janno data ####
5 |
6 | load("data/janno_data.RData")
7 | load("data/bib_data.RData")
8 | load("data/bibkey_lookup_hashmap.RData")
9 |
10 | #### A: publications barplot ####
11 |
12 | publication_per_package <- dplyr::bind_rows(pca, paa) %>%
13 | dplyr::group_by(archive, package, main_publication) %>%
14 | dplyr::mutate(main_publication = lookup_paa_key(main_publication)) %>%
15 | dplyr::summarise(.groups = "drop")
16 |
17 | summarized_multi_counting <- publication_per_package %>%
18 | dplyr::group_by(archive, main_publication) %>%
19 | dplyr::summarise(
20 | n = dplyr::n(),
21 | package = dplyr::case_when(
22 | n == 1 ~ package[1],
23 | n > 1 ~ "_" # appears in multiple packages
24 | ),
25 | .groups = "drop"
26 | ) %>%
27 | dplyr::arrange(dplyr::desc(n)) %>%
28 | dplyr::select(-n)
29 |
30 | publication_count <- summarized_multi_counting %>%
31 | dplyr::group_by(archive, package) %>%
32 | dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
33 | dplyr::group_by(archive) %>%
34 | dplyr::arrange(archive, package) %>%
35 | dplyr::mutate(package = factor(package, levels = package %>% unique)) %>%
36 | # alternating colouring of packages
37 | dplyr::mutate(colour_group = rep_len(c("A", "B"), dplyr::n())) %>%
38 | dplyr::mutate(colour_group = ifelse(package == "_", "A", colour_group)) %>%
39 | dplyr::ungroup()
40 |
41 | package_publication_plot <- publication_count %>%
42 | ggplot() +
43 | ggpattern::geom_col_pattern(
44 | mapping = aes(
45 | x = archive,
46 | y = n,
47 | group = package,
48 | fill = colour_group,
49 | pattern = package == "_"
50 | ),
51 | pattern_fill = "lightgrey",
52 | pattern_colour = NA,
53 | pattern_density = 0.3
54 | ) +
55 | ggpattern::scale_pattern_manual(
56 | values = c("TRUE" = "stripe", "FALSE" = "none"),
57 | guide = "none"
58 | ) +
59 | scale_fill_manual(values = c("A" = "darkgrey", "B" = "lightgrey")) +
60 | guides(fill = guide_legend(
61 | title = "Alternating colours for packages: ",
62 | label = F,
63 | override.aes = list(pattern = "none")
64 | )) +
65 | coord_flip() +
66 | theme_bw() +
67 | theme(
68 | axis.title.y = element_blank(),
69 | legend.position = "top",
70 | legend.margin = margin(t = -0.1, b = -0.3, unit='cm'),
71 | legend.justification = "right",
72 | legend.spacing.x = unit(0, 'cm'),
73 | plot.title = element_text(size = 11)
74 | ) +
75 | ylab(
76 | "Number of main data publications per package"
77 | )
78 |
79 | ggsave(
80 | paste0("plots/figure_barplots_A.pdf"),
81 | plot = package_publication_plot,
82 | device = "pdf",
83 | scale = 0.7,
84 | dpi = 300,
85 | width = 250, height = 70, units = "mm",
86 | limitsize = F,
87 | bg = "white"
88 | )
89 |
90 | #### B: publication comparison barcode plot ####
91 |
92 | keys_with_years <- dplyr::bind_rows(pca_bib, paa_bib) %>%
93 | dplyr::select(archive, bibtexkey, year) %>%
94 | dplyr::distinct() %>%
95 | # select paa key if available, otherwise the pca key
96 | dplyr::group_by(bibtexkey) %>%
97 | dplyr::arrange(archive) %>%
98 | dplyr::slice_head() %>%
99 | dplyr::ungroup() %>%
100 | dplyr::select(-archive)
101 |
102 | samples_per_publication <- dplyr::bind_rows(pca, paa) %>%
103 | # dplyr::select(Approx_Individual_ID, archive, Publication = main_publication) %>%
104 | dplyr::select(Approx_Individual_ID, archive, Publication) %>%
105 | tidyr::unnest(cols = "Publication") %>%
106 | dplyr::filter(!grepl("AADR", Publication)) %>%
107 | dplyr::distinct() %>%
108 | dplyr::mutate(Publication = lookup_paa_key(Publication)) %>%
109 | dplyr::group_by(archive, Publication) %>%
110 | dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
111 | tidyr::complete(archive, Publication, fill = list(n = 0)) %>%
112 | dplyr::left_join(keys_with_years, by = c("Publication" = "bibtexkey")) %>%
113 | dplyr::group_split(Publication) %>%
114 | purrr::map_dfr(
115 | function(x) {
116 | if (x$n %>% unique() %>% length() == 1) {
117 | x %>% dplyr::mutate(availability = "yes")
118 | } else {
119 | less <- x %>% dplyr::slice_min(order_by = n)
120 | more <- x %>% dplyr::slice_max(order_by = n)
121 | dplyr::bind_rows(
122 | x %>% dplyr::mutate(availability = "yes", year = more$year),
123 | less %>% dplyr::mutate(n = more$n - less$n, year = more$year, availability = "no")
124 | )
125 | }
126 | }
127 | ) %>%
128 | dplyr::arrange(year) %>%
129 | dplyr::mutate(
130 | Publication = factor(Publication, levels = rev(unique(Publication))),
131 | archive = factor(archive, levels = c("PCA", "PAA"))
132 | )
133 |
134 | year_separators <- samples_per_publication %>%
135 | dplyr::filter(archive == "PAA") %>%
136 | dplyr::group_by(year) %>%
137 | dplyr::summarise(n = sum(n)) %>%
138 | dplyr::arrange(year) %>%
139 | dplyr::mutate(n = cumsum(n)) %>%
140 | dplyr::mutate(year = dplyr::lead(year, n = 1, default = NA)) %>%
141 | dplyr::filter(year >= 2012)
142 |
143 | # samples_per_publication %>% dplyr::filter(availability == "no") %>%
144 | # dplyr::arrange(dplyr::desc(n)) %>% View()
145 |
146 | publication_barcode_plot <- samples_per_publication %>%
147 | ggplot() +
148 | geom_bar(
149 | aes(x = archive, y = n, group = Publication, fill = availability),
150 | stat = "identity"
151 | ) +
152 | ggrepel::geom_text_repel(
153 | data = year_separators %>% dplyr::mutate(x = "PCA"),
154 | mapping = aes(x = x, y = n, label= year),
155 | position = position_nudge(x = -0.5),
156 | angle = 90, segment.color = 'transparent',
157 | direction = "x",
158 | hjust = -0.2,
159 | size = 2.4,
160 | box.padding = 0.1
161 | ) +
162 | geom_point(
163 | data = year_separators %>% dplyr::mutate(x = "PCA"),
164 | mapping = aes(x = x, y = n),
165 | position = position_nudge(x = -0.5),
166 | shape = 17, size = 0.7
167 | ) +
168 | coord_flip() +
169 | theme_bw() +
170 | theme(
171 | legend.position = "top",
172 | axis.title.y = element_blank(),
173 | legend.margin = margin(t = -0.1, b = -0.3, unit='cm'),
174 | legend.justification = "right",
175 | plot.title = element_text(size = 11)
176 | ) +
177 | scale_fill_manual(values = c("yes" = "lightgrey", "no" = "darkgrey")) +
178 | scale_y_continuous(breaks = seq(0,18000,3000)) +
179 | guides(fill = guide_legend(title = "Is the respective individual in the archive?")) +
180 | ylab(
181 | "Number of individuals per publication by year"
182 | )
183 |
184 | ggsave(
185 | paste0("plots/figure_barplots_B.pdf"),
186 | plot = publication_barcode_plot,
187 | device = "pdf",
188 | scale = 0.7,
189 | dpi = 300,
190 | width = 250, height = 70, units = "mm",
191 | limitsize = F,
192 | bg = "white"
193 | )
194 |
195 | #### C: source barplot ####
196 |
197 | source_count_poseidon_id <- dplyr::bind_rows(pca, paa) %>%
198 | dplyr::distinct(archive, Poseidon_ID, .keep_all = T) %>%
199 | dplyr::group_by(archive, source) %>%
200 | dplyr::summarise(n = dplyr::n(), .groups = "drop")
201 |
202 | source_count_approx_ind_id <- dplyr::bind_rows(pca, paa) %>%
203 | dplyr::distinct(archive, Approx_Individual_ID, .keep_all = T) %>%
204 | dplyr::group_by(archive, source) %>%
205 | dplyr::summarise(n = dplyr::n(), .groups = "drop")
206 |
207 | source_count_approx_ind_id_simple <- source_count_approx_ind_id %>%
208 | dplyr::mutate(
209 | source = dplyr::case_when(
210 | grepl("AADR", source) ~ "AADR",
211 | .default = source
212 | ) %>%
213 | factor(levels = c("Submitted by author", "Extracted from paper", "AADR"))
214 | ) %>%
215 | dplyr::group_by(archive, source) %>%
216 | dplyr::summarise(n = sum(n), .groups = "drop")
217 |
218 | # simple version of the plot
219 | source_plot_simple <- source_count_approx_ind_id_simple %>%
220 | ggplot() +
221 | geom_col(
222 | mapping = aes(x = archive, y = n, fill = source)
223 | ) +
224 | coord_flip() +
225 | scale_fill_manual(
226 | values = c(
227 | "AADR" = "#CCBA72",
228 | "Extracted from paper" = "#9986A5",
229 | "Submitted by author" = "lightblue"
230 | )
231 | ) + #wesanderson::wes_palette("IsleofDogs1")
232 | guides(
233 | fill = guide_legend(
234 | title = "Original data source",
235 | reverse = TRUE
236 | )
237 | ) +
238 | theme_bw() +
239 | theme(
240 | legend.position = "top",
241 | axis.title.y = element_blank(),
242 | legend.margin = margin(t = -0.1, b = -0.3, unit='cm'),
243 | legend.justification = "right",
244 | plot.title = element_text(size = 11)
245 | ) +
246 | ylab(
247 | "Number of individuals per source & primary mechanism of origin"
248 | )
249 |
250 | ggsave(
251 | paste0("plots/figure_barplots_C_simple.pdf"),
252 | plot = source_plot_simple,
253 | device = "pdf",
254 | scale = 0.7,
255 | dpi = 300,
256 | width = 250, height = 70, units = "mm",
257 | limitsize = F,
258 | bg = "white"
259 | )
260 |
261 | source_count <- dplyr::full_join(
262 | source_count_poseidon_id, source_count_approx_ind_id,
263 | by = c("archive", "source"), suffix = c("_poseidon_id", "_approx_ind_id")
264 | ) %>%
265 | dplyr::transmute(
266 | archive, source,
267 | n_approx_ind_id,
268 | n_diff = n_poseidon_id - n_approx_ind_id
269 | ) %>%
270 | tidyr::pivot_longer(
271 | cols = tidyselect::starts_with("n_"),
272 | names_to = "count_type",
273 | values_to = "n"
274 | ) %>%
275 | dplyr::mutate(
276 | count_type = factor(count_type, levels = c("n_diff", "n_approx_ind_id"))
277 | )
278 |
279 | # full version of the plot
280 | source_plot <- source_count %>%
281 | ggplot() +
282 | ggpattern::geom_col_pattern(
283 | mapping = aes(x = archive, y = n, fill = source, pattern = count_type),
284 | pattern_color = "white",
285 | pattern_fill = "white"
286 | ) +
287 | coord_flip() +
288 | scale_fill_manual(
289 | values = c(
290 | "AADR v42.4" = "#795043",
291 | "AADR v44.3" = "#1C181A",
292 | "AADR v50" = "#D9D0D3",
293 | "AADR v54.1.p1" = "#CCBA72",
294 | "Extracted from paper" = "#9986A5",
295 | "Submitted by author" = "lightblue"
296 | )
297 | ) +
298 | ggpattern::scale_pattern_manual(
299 | values = c("n_approx_ind_id" = "none", "n_diff" = "stripe"),
300 | guide = "none"
301 | ) +
302 | guides(
303 | fill = guide_legend(
304 | title = "Original data source",
305 | reverse = TRUE,
306 | override.aes = list(pattern = "none")
307 | )
308 | ) +
309 | theme_bw() +
310 | theme(
311 | legend.position = "top",
312 | axis.title.y = element_blank(),
313 | legend.margin = margin(t = -0.1, b = -0.3, unit='cm'),
314 | legend.justification = "right",
315 | plot.title = element_text(size = 11)
316 | ) +
317 | ylab(
318 | "Number of individuals/samples per source & primary mechanism of origin"
319 | )
320 |
321 | ggsave(
322 | paste0("plots/figure_barplots_C.pdf"),
323 | plot = source_plot,
324 | device = "pdf",
325 | scale = 0.7,
326 | dpi = 300,
327 | width = 250, height = 70, units = "mm",
328 | limitsize = F,
329 | bg = "white"
330 | )
331 |
332 | #### D: sankey sources ####
333 |
334 | simple_levels <- c(
335 | "Submitted by author",
336 | "Extracted from paper",
337 | "AADR",
338 | "not in archive"
339 | )
340 |
341 | sankey_sources_input_simple <- dplyr::bind_rows(pca, paa) %>%
342 | dplyr::select(Poseidon_ID, Approx_Individual_ID, archive, source) %>%
343 | dplyr::mutate(
344 | source = dplyr::case_when(
345 | grepl("AADR", source) ~ "AADR",
346 | .default = source
347 | ) %>%
348 | factor(levels = simple_levels)
349 | ) %>%
350 | dplyr::group_by(Approx_Individual_ID, archive) %>%
351 | dplyr::arrange(source) %>%
352 | dplyr::slice_head(n = 1) %>%
353 | dplyr::ungroup() %>%
354 | tidyr::pivot_wider(
355 | id_cols = "Approx_Individual_ID", names_from = "archive", values_from = "source"
356 | ) %>%
357 | tidyr::replace_na(list(PCA = "not in archive", PAA = "not in archive")) %>%
358 | ggsankey::make_long(PCA, PAA)
359 |
360 |
361 | sources_sankey_plot_simple <- sankey_sources_input_simple %>%
362 | # to add a label only for missing values
363 | dplyr::mutate(
364 | label = dplyr::case_match(
365 | node,
366 | "not in archive" ~ "missing",
367 | .default = ""
368 | )
369 | ) %>%
370 | ggplot(
371 | aes(
372 | x = x,
373 | next_x = next_x,
374 | node = node,
375 | next_node = next_node,
376 | fill = factor(node, levels = simple_levels),
377 | label = label
378 | )
379 | ) +
380 | ggsankey::geom_alluvial(
381 | flow.alpha = .7,
382 | width = 0.1,
383 | #space = 200
384 | ) +
385 | ggsankey::geom_alluvial_text(
386 | size = 2, color = "white"
387 | ) +
388 | labs(x = NULL) +
389 | scale_fill_manual(
390 | values = c(
391 | "AADR" = "#CCBA72",
392 | "Extracted from paper" = "#9986A5",
393 | "Submitted by author" = "lightblue"
394 | ),
395 | na.value = "darkgrey"
396 | ) +
397 | scale_x_discrete(expand = c(0.1, 0.1)) +
398 | guides(fill = guide_legend(title = "Original data source")) +
399 | theme_bw() +
400 | theme(
401 | legend.position = "none",
402 | plot.title = element_text(size = 11)
403 | ) +
404 | coord_flip() +
405 | ylab(
406 | "Number of individuals that match across archives per data source"
407 | )
408 |
409 | ggsave(
410 | paste0("plots/figure_barplots_D_simple.pdf"),
411 | plot = sources_sankey_plot_simple,
412 | device = "pdf",
413 | scale = 0.7,
414 | dpi = 300,
415 | width = 250, height = 70, units = "mm",
416 | limitsize = F,
417 | bg = "white"
418 | )
419 |
420 | sankey_sources_input <- dplyr::bind_rows(pca, paa) %>%
421 | dplyr::select(Poseidon_ID, Approx_Individual_ID, archive, source) %>%
422 | dplyr::mutate(source = factor(source, levels = c(levels(source), "not in archive"))) %>%
423 | dplyr::group_by(Approx_Individual_ID, archive) %>%
424 | dplyr::arrange(source) %>%
425 | dplyr::slice_head(n = 1) %>%
426 | dplyr::ungroup() %>%
427 | tidyr::pivot_wider(
428 | id_cols = "Approx_Individual_ID", names_from = "archive", values_from = "source"
429 | ) %>%
430 | tidyr::replace_na(list(PCA = "not in archive", PAA = "not in archive")) %>%
431 | ggsankey::make_long(PCA, PAA)
432 |
433 | sources_sankey_plot <- sankey_sources_input %>%
434 | # to add a label only for missing values
435 | dplyr::mutate(
436 | label = dplyr::case_match(
437 | node,
438 | "not in archive" ~ "missing",
439 | .default = ""
440 | )
441 | ) %>%
442 | ggplot(
443 | aes(
444 | x = x,
445 | next_x = next_x,
446 | node = node,
447 | next_node = next_node,
448 | fill = factor(node, levels = levels(pca$source)),
449 | label = label
450 | )
451 | ) +
452 | ggsankey::geom_alluvial(
453 | flow.alpha = .7,
454 | width = 0.1,
455 | #space = 200
456 | ) +
457 | ggsankey::geom_alluvial_text(
458 | size = 2, color = "white"
459 | ) +
460 | labs(x = NULL) +
461 | scale_fill_manual(
462 | values = c(
463 | "AADR v42.4" = "#795043",
464 | "AADR v44.3" = "#1C181A",
465 | "AADR v50" = "#D9D0D3",
466 | "AADR v54.1.p1" = "#CCBA72",
467 | "Extracted from paper" = "#9986A5",
468 | "Submitted by author" = "lightblue"
469 | ),
470 | na.value = "darkgrey"
471 | ) +
472 | scale_x_discrete(expand = c(0.1, 0.1)) +
473 | guides(fill = guide_legend(title = "Original data source")) +
474 | theme_bw() +
475 | theme(
476 | legend.position = "none",
477 | plot.title = element_text(size = 11)
478 | ) +
479 | coord_flip() +
480 | ylab(
481 | "Number of individuals that match across archives per data source"
482 | )
483 |
484 | ggsave(
485 | paste0("plots/figure_barplots_D.pdf"),
486 | plot = sources_sankey_plot,
487 | device = "pdf",
488 | scale = 0.7,
489 | dpi = 300,
490 | width = 250, height = 70, units = "mm",
491 | limitsize = F,
492 | bg = "white"
493 | )
494 |
495 | #### E: dating barplot ####
496 |
497 | dating_count_approx_ind_id <- dplyr::bind_rows(pca, paa) %>%
498 | dplyr::distinct(archive, Approx_Individual_ID, .keep_all = T) %>%
499 | dplyr::group_by(archive, Date_Type) %>%
500 | dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
501 | tidyr::replace_na(list(Date_Type = "missing")) %>%
502 | dplyr::mutate(
503 | Date_Type = factor(
504 | Date_Type,
505 | levels = c("modern", "C14", "contextual", "missing") %>% rev()
506 | )
507 | )
508 |
509 | # simple version of the plot
510 | dating_plot_simple <- dating_count_approx_ind_id %>%
511 | ggplot() +
512 | geom_col(
513 | mapping = aes(x = archive, y = n, fill = Date_Type)
514 | ) +
515 | coord_flip() +
516 | scale_fill_manual(values = c("darkgrey", wesanderson::wes_palette("IsleofDogs2")[1:3])) +
517 | guides(
518 | fill = guide_legend(
519 | title = "Age information",
520 | reverse = TRUE
521 | )
522 | ) +
523 | theme_bw() +
524 | theme(
525 | legend.position = "top",
526 | axis.title.y = element_blank(),
527 | legend.margin = margin(t = -0.1, b = -0.3, unit='cm'),
528 | legend.justification = "right",
529 | plot.title = element_text(size = 11)
530 | ) +
531 | ylab(
532 | "Number of individuals with different types of archaeological age information"
533 | )
534 |
535 | ggsave(
536 | paste0("plots/figure_barplots_E_simple.pdf"),
537 | plot = dating_plot_simple,
538 | device = "pdf",
539 | scale = 0.7,
540 | dpi = 300,
541 | width = 250, height = 70, units = "mm",
542 | limitsize = F,
543 | bg = "white"
544 | )
545 |
546 | # complete version of the plot
547 | dating_count_poseidon_id <- dplyr::bind_rows(pca, paa) %>%
548 | dplyr::distinct(archive, Poseidon_ID, .keep_all = T) %>%
549 | dplyr::group_by(archive, Date_Type) %>%
550 | dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
551 | tidyr::replace_na(list(Date_Type = "missing")) %>%
552 | dplyr::mutate(
553 | Date_Type = factor(
554 | Date_Type,
555 | levels = c("modern", "C14", "contextual", "missing") %>% rev()
556 | )
557 | )
558 |
559 | dating_count <- dplyr::full_join(
560 | dating_count_poseidon_id, dating_count_approx_ind_id,
561 | by = c("archive", "Date_Type"), suffix = c("_poseidon_id", "_approx_ind_id")
562 | ) %>%
563 | dplyr::transmute(
564 | archive, Date_Type,
565 | n_approx_ind_id,
566 | n_diff = n_poseidon_id - n_approx_ind_id
567 | ) %>%
568 | tidyr::pivot_longer(
569 | cols = tidyselect::starts_with("n_"),
570 | names_to = "count_type",
571 | values_to = "n"
572 | ) %>%
573 | dplyr::mutate(
574 | count_type = factor(count_type, levels = c("n_diff", "n_approx_ind_id"))
575 | )
576 |
577 | dating_plot <- dating_count %>%
578 | ggplot() +
579 | ggpattern::geom_col_pattern(
580 | mapping = aes(x = archive, y = n, fill = Date_Type, pattern = count_type),
581 | pattern_color = "white", pattern_fill = "white"
582 | ) +
583 | coord_flip() +
584 | scale_fill_manual(values = c("darkgrey", wesanderson::wes_palette("IsleofDogs2")[1:3])) +
585 | ggpattern::scale_pattern_manual(
586 | values = c("n_approx_ind_id" = "none", "n_diff" = "stripe"),
587 | guide = "none"
588 | ) +
589 | guides(
590 | fill = guide_legend(
591 | title = "Age information",
592 | reverse = TRUE,
593 | override.aes = list(pattern = "none")
594 | )
595 | ) +
596 | theme_bw() +
597 | theme(
598 | legend.position = "top",
599 | axis.title.y = element_blank(),
600 | legend.margin = margin(t = -0.1, b = -0.3, unit='cm'),
601 | legend.justification = "right",
602 | plot.title = element_text(size = 11)
603 | ) +
604 | ylab(
605 | "Number of individuals/samples with different types of archaeological age information"
606 | )
607 |
608 | ggsave(
609 | paste0("plots/figure_barplots_E.pdf"),
610 | plot = dating_plot,
611 | device = "pdf",
612 | scale = 0.7,
613 | dpi = 300,
614 | width = 250, height = 70, units = "mm",
615 | limitsize = F,
616 | bg = "white"
617 | )
618 |
619 | #### F: coords barplot ####
620 |
621 | coord_count_approx_ind_id <- dplyr::bind_rows(pca, paa) %>%
622 | dplyr::distinct(archive, Approx_Individual_ID, .keep_all = T) %>%
623 | dplyr::mutate(
624 | has_coordinates = dplyr::case_when(
625 | !is.na(Latitude) & !is.na(Longitude) ~ "available",
626 | TRUE ~ "missing"
627 | ) %>% factor(., levels = c("missing", "available"))
628 | ) %>%
629 | dplyr::group_by(archive, has_coordinates) %>%
630 | dplyr::summarise(n = dplyr::n(), .groups = "drop")
631 |
632 | coord_plot_simple <- coord_count_approx_ind_id %>%
633 | ggplot() +
634 | geom_col(
635 | mapping = aes(x = archive, y = n, fill = has_coordinates)
636 | ) +
637 | coord_flip() +
638 | scale_fill_manual(values = c("darkgrey", "lightgrey")) +
639 | guides(
640 | fill = guide_legend(
641 | title = "Coordinate information",
642 | reverse = TRUE
643 | )
644 | ) +
645 | theme_bw() +
646 | theme(
647 | legend.position = "top",
648 | axis.title.y = element_blank(),
649 | legend.margin = margin(t = -0.1, b = -0.3, unit='cm'),
650 | legend.justification = "right",
651 | plot.title = element_text(size = 11)
652 | ) +
653 | ylab(
654 | "Number of individuals with latitude and longitude coordinates"
655 | )
656 |
657 | ggsave(
658 | paste0("plots/figure_barplots_F_simple.pdf"),
659 | plot = coord_plot_simple,
660 | device = "pdf",
661 | scale = 0.7,
662 | dpi = 300,
663 | width = 250, height = 70, units = "mm",
664 | limitsize = F,
665 | bg = "white"
666 | )
667 |
668 | coord_count_poseidon_id <- dplyr::bind_rows(pca, paa) %>%
669 | dplyr::distinct(archive, Poseidon_ID, .keep_all = T) %>%
670 | dplyr::mutate(
671 | has_coordinates = dplyr::case_when(
672 | !is.na(Latitude) & !is.na(Longitude) ~ "available",
673 | TRUE ~ "missing"
674 | ) %>% factor(., levels = c("missing", "available"))
675 | ) %>%
676 | dplyr::group_by(archive, has_coordinates) %>%
677 | dplyr::summarise(n = dplyr::n(), .groups = "drop")
678 |
679 | coord_count <- dplyr::full_join(
680 | coord_count_poseidon_id, coord_count_approx_ind_id,
681 | by = c("archive", "has_coordinates"), suffix = c("_poseidon_id", "_approx_ind_id")
682 | ) %>%
683 | dplyr::transmute(
684 | archive, has_coordinates,
685 | n_approx_ind_id,
686 | n_diff = n_poseidon_id - n_approx_ind_id
687 | ) %>%
688 | tidyr::pivot_longer(
689 | cols = tidyselect::starts_with("n_"),
690 | names_to = "count_type",
691 | values_to = "n"
692 | ) %>%
693 | dplyr::mutate(
694 | count_type = factor(count_type, levels = c("n_diff", "n_approx_ind_id"))
695 | )
696 |
697 | coord_plot <- coord_count %>%
698 | ggplot() +
699 | ggpattern::geom_col_pattern(
700 | mapping = aes(x = archive, y = n, fill = has_coordinates, pattern = count_type),
701 | pattern_color = "white", pattern_fill = "white"
702 | ) +
703 | coord_flip() +
704 | #scale_fill_manual(values = wesanderson::wes_palette("GrandBudapest1")) +
705 | scale_fill_manual(values = c("darkgrey", "lightgrey")) +
706 | ggpattern::scale_pattern_manual(
707 | values = c("n_approx_ind_id" = "none", "n_diff" = "stripe"),
708 | guide = "none"
709 | ) +
710 | guides(
711 | fill = guide_legend(
712 | title = "Coordinate information",
713 | reverse = TRUE,
714 | override.aes = list(pattern = "none")
715 | )
716 | ) +
717 | theme_bw() +
718 | theme(
719 | legend.position = "top",
720 | axis.title.y = element_blank(),
721 | legend.margin = margin(t = -0.1, b = -0.3, unit='cm'),
722 | legend.justification = "right",
723 | plot.title = element_text(size = 11)
724 | ) +
725 | ylab(
726 | #"Samples with spatial coordinates",
727 | "Number of individuals/samples with latitude and longitude coordinates"
728 | )
729 |
730 | ggsave(
731 | paste0("plots/figure_barplots_F.pdf"),
732 | plot = coord_plot,
733 | device = "pdf",
734 | scale = 0.7,
735 | dpi = 300,
736 | width = 250, height = 70, units = "mm",
737 | limitsize = F,
738 | bg = "white"
739 | )
740 |
741 | # combine plots
742 |
743 | p_panel <- cowplot::plot_grid(
744 | package_publication_plot, publication_barcode_plot,
745 | source_plot_simple, sources_sankey_plot_simple,
746 | dating_plot_simple, coord_plot_simple,
747 | nrow = 3, ncol = 2, align = "v", axis = "tb",
748 | labels = c("A", "B", "C", "D", "E", "F")
749 | )
750 |
751 | p_title <- cowplot::ggdraw() +
752 | cowplot::draw_label(
753 | "Comparison of PCA and PAA by different metrics",
754 | fontface = "bold",
755 | hjust = 1.51
756 | )
757 |
758 | p <- cowplot::plot_grid(p_title, p_panel, ncol = 1, rel_heights = c(0.05, 1))
759 |
760 | ggsave(
761 | paste0("plots/figure_barplots.pdf"),
762 | plot = p,
763 | device = "pdf",
764 | scale = 0.7,
765 | dpi = 300,
766 | width = 500, height = 220, units = "mm",
767 | limitsize = F,
768 | bg = "white"
769 | )
770 |
771 |
--------------------------------------------------------------------------------
/schemata/overview_and_package_by_clemens_schmid/paper_overview_schema.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
453 |
--------------------------------------------------------------------------------
/schemata/overview_and_package_by_clemens_schmid/isba_poster_overview_schema.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
555 |
--------------------------------------------------------------------------------
/schemata/overview_and_package_by_clemens_schmid/popgen_toolbox_schema.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
555 |
--------------------------------------------------------------------------------
/schemata/overview_and_package_by_michelle_oreilly/schema_package.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
557 |
--------------------------------------------------------------------------------
/schemata/overview_and_package_by_michelle_oreilly/SVG/MPI-CS-Package.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
505 |
--------------------------------------------------------------------------------