├── NAMESPACE ├── report ├── binford_main.pdf ├── preamble-latex.tex ├── binford_main.Rmd ├── binford_end.Rmd ├── binford_datasets.Rmd ├── binford_introduction.Rmd ├── bibliography.bib ├── binford_discussion.Rmd ├── deutsches-archaologisches-institut.csl └── binford_tale_1.Rmd ├── .Rbuildignore ├── binfordanalysis.Rproj ├── playground ├── default_model.R ├── get_a_feeling.R ├── model_test.R └── area.R ├── .gitignore ├── .travis.yml ├── README.md ├── DESCRIPTION ├── LICENSE └── data-raw └── LRBkey.csv /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | -------------------------------------------------------------------------------- /report/binford_main.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nevrome/binford-analysis/master/report/binford_main.pdf -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^report$ 4 | ^playground$ 5 | ^data-raw$ 6 | ^\.travis\.yml$ 7 | -------------------------------------------------------------------------------- /report/preamble-latex.tex: -------------------------------------------------------------------------------- 1 | \usepackage{float} 2 | \let\origfigure\figure 3 | \let\endorigfigure\endfigure 4 | \renewenvironment{figure}[1][2] { 5 | \expandafter\origfigure\expandafter[htb] 6 | } { 7 | \endorigfigure 8 | } 9 | 10 | \let\origtable\table 11 | \let\endorigtable\endtable 12 | \renewenvironment{table}[1][2] { 13 | \expandafter\origtable\expandafter[htb] 14 | } { 15 | \endorigtable 16 | } 17 | -------------------------------------------------------------------------------- /binfordanalysis.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | 17 | BuildType: Package 18 | PackageUseDevtools: Yes 19 | PackageInstallArgs: --no-multiarch --with-keep.source 20 | PackageRoxygenize: rd,collate,namespace 21 | -------------------------------------------------------------------------------- /playground/default_model.R: -------------------------------------------------------------------------------- 1 | default_model <- function(df, relation){ 2 | 3 | # create model function 4 | model_func <- function(df){ 5 | lm(relation, data = df) 6 | } 7 | 8 | # check if input data.frame is grouped 9 | # if not, add artificial grouping into one 10 | # big group 11 | if(!is.grouped_df(df)){ 12 | df %<>% mutate(id = 1) %>% 13 | group_by(id) 14 | } 15 | 16 | # calculate model and parameters 17 | df %<>% nest %>% 18 | mutate(model = map(data, model_func)) %>% 19 | mutate( 20 | predictions = map2(data, model, add_predictions), 21 | resids = map2(data, model, add_residuals), 22 | glance = map(model, broom::glance) 23 | ) 24 | 25 | return(df) 26 | } 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # Example code in package build process 9 | *-Ex.R 10 | 11 | # Output files from R CMD build 12 | /*.tar.gz 13 | 14 | # Output files from R CMD check 15 | /*.Rcheck/ 16 | 17 | # RStudio files 18 | .Rproj.user/ 19 | 20 | # produced vignettes 21 | vignettes/*.html 22 | vignettes/*.pdf 23 | 24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 25 | .httr-oauth 26 | 27 | # knitr and R markdown default cache directories 28 | /*_cache/ 29 | /cache/ 30 | 31 | # Temporary files created by R markdown 32 | *.utf8.md 33 | *.knit.md 34 | .Rproj.user 35 | 36 | # Rendered documents and knitr cache 37 | report/*.pdf 38 | report/*_cache 39 | report/*_files 40 | !report/binford_main.pdf 41 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r 2 | 3 | language: R 4 | sudo: true 5 | cache: packages 6 | 7 | # take packages from MRAN snapshot 8 | repos: 9 | MRAN: "https://mran.microsoft.com/snapshot/2017-08-29" 10 | 11 | before_install: 12 | # install tex german language package 13 | - tlmgr install babel-german 14 | 15 | before_script: 16 | # install microsoft fonts 17 | - echo ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true | sudo debconf-set-selections 18 | - sudo apt-get install ttf-mscorefonts-installer 19 | # rebuild font cache 20 | - sudo fc-cache 21 | 22 | script: 23 | # install package 24 | - Rscript -e "devtools::install()" 25 | # render document 26 | - Rscript -e "rmarkdown::render('report/binford_main.Rmd')" 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This is a paper (in German language) I wrote for the seminar [Der Mensch in der Landschaft. Archäologisch-geographische Modelle](http://univis.uni-kiel.de/form?__s=2&dsc=anew/lecture_view&lvs=gemei/instit_2/zentr/dermen&anonymous=1&lang=en&ref=tlecture&sem=2017s&tdir=philos/fachwi/_urund/bachel/haupts&__e=505) by Priv.-Doz. Dr. Oliver Nakoinz [\@OliverNakoinz](https://github.com/OliverNakoinz) and Dr. rer. nat. Daniel Knitter [\@dakni](https://github.com/dakni) in the summer semester 2017. 2 | 3 | I tried to understand, reproduce and improve one specific model from Lewis Binfords book *Constructing Frames of Reference: An Analytical Method for Archaeological Theory Building using Ethnographic and Environmental Data Sets* from 2001. 4 | 5 | **Reproduce me!** 6 | 7 | 1. Clone the repo 8 | 2. Install the R package 9 | 3. Knit *report/binford_main.Rmd* 10 | 4. Solve some nasty Tex dependency problems (see *travis.yml*) 11 | 5. Back to step 3 12 | 6. Enjoy! 13 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: binfordanalysis 2 | Type: Package 3 | Title: Seminar Paper "Reconstructing Frames of Reference" 4 | Version: 0.1.0 5 | Authors@R: person("Clemens", "Schmid", email = "clemens@nevrome.de", role = c("aut", "cre")) 6 | Maintainer: Clemens Schmid 7 | Description: Paper to understand, reproduce and improve one specific model from Lewis Binfords Book *Constructing Frames of Reference: An Analytical Method for Archaeological Theory Building using Ethnographic and Environmental Data Sets* from 2001. 8 | Date: 2017-08-08 9 | License: GPL-2 | file LICENSE 10 | Encoding: UTF-8 11 | LazyData: true 12 | Imports: 13 | ggplot2 (>= 2.2.1), 14 | magrittr (>= 1.5), 15 | tidyverse (>= 1.1.1), 16 | binford (>= 0.1.0), 17 | broom (>= 0.4.2), 18 | cowplot (>= 0.8.0), 19 | dplyr (>= 0.7.2), 20 | kableExtra (>= 0.4.0), 21 | knitr (>= 1.17), 22 | MASS (>= 7.3-47), 23 | purrr (>= 0.2.3), 24 | readr (>= 1.1.1), 25 | reshape2 (>= 1.4.2), 26 | tibble (>= 1.3.3), 27 | tidyr (>= 0.6.3) 28 | RoxygenNote: 6.0.1 29 | Depends: 30 | R (>= 3.4.0) 31 | -------------------------------------------------------------------------------- /report/binford_main.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Reconstructing Frames of Reference" 3 | subtitle: "Auf den Spuren der Datenanalyse Lewis R. Binfords" 4 | author: "Clemens Schmid" 5 | date: "SS 17" 6 | header-includes: 7 | - \usepackage{fancyhdr} 8 | - \pagestyle{fancy} 9 | - \usepackage[font=small,labelfont=bf]{caption} 10 | - \usepackage[ngerman]{babel} 11 | output: 12 | pdf_document: 13 | pandoc_args: [ 14 | "-V", "classoption=twocolumn" 15 | ] 16 | toc: yes 17 | toc_depth: 4 18 | fig_caption: yes 19 | latex_engine: xelatex 20 | includes: 21 | in_header: preamble-latex.tex 22 | #keep_tex: yes 23 | geometry: "left=0.90cm,right=0.90cm,top=1.45cm,bottom=1.5cm" 24 | bibliography: bibliography.bib 25 | mainfont: "Arial" 26 | csl: https://www.zotero.org/styles/chicago-author-date-de 27 | --- 28 | 29 | \fancyhead{} 30 | \fancyfoot{} 31 | \fancyhead[R]{\thepage} 32 | \fancyhead[L]{\leftmark} 33 | 34 | 35 | \parskip 4pt 36 | \setlength{\textfloatsep}{10pt plus 1.0pt minus 2.0pt} 37 | 38 | ```{r global chunk options, include=FALSE} 39 | knitr::opts_chunk$set(echo = FALSE) 40 | ``` 41 | 42 | ```{r child = 'binford_introduction.Rmd'} 43 | ``` 44 | 45 | 46 | ```{r child = 'binford_datasets.Rmd'} 47 | ``` 48 | 49 | 50 | ```{r child = 'binford_tale_1.Rmd'} 51 | ``` 52 | 53 | \newpage 54 | 55 | ```{r child = 'binford_end.Rmd'} 56 | ``` 57 | 58 | \section{Literatur} 59 | -------------------------------------------------------------------------------- /report/binford_end.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | pdf_document: 4 | toc: yes 5 | fig_caption: yes 6 | latex_engine: xelatex 7 | bibliography: bibliography.bib 8 | mainfont: "Arial" 9 | csl: deutsches-archaologisches-institut.csl 10 | --- 11 | 12 | # Abschließende Gedanken 13 | 14 | Die Ziele, die ich mir für diese Arbeit gesetzt habe, sind weitestgehend erreicht worden. Die Methode der Multiplen Regression habe ich besser verstanden und sinnvoll zur Anwendung bringen können -- obgleich wie zu erwarten viele Fragen offen geblieben sind und ich gerne mehr Zeit in die Weiterentwicklung auf der erreichten Grundlage investiert hätte. Ich konnte ein Modell formulieren, das nach objektiven, statistischen Kriterien besser zur Erklärung der Variable *area* geeignet ist, als das von Binford vorgeschlagene. Ich erlaube mir jedoch kein Urteil darüber, ob Binfords Modell, in das mittels händischer Auswahl von Variablen bewusst oder unbewusst Fachwissen verarbeitet wurde, dennoch das wissenschaftlich wertvollere ist. Sicher bin ich jedoch, dass Binfords Vorgehen zur Erstellung des Modells nicht ausreichend dokumentiert wurde, um eine exakte Reproduktion zu ermöglichen. Das ist in Anbetracht der Tatsache, dass andere Teile des Buches dahingehend wesentlich besser vorbereitet sind, schade. Auch deswegen lohnt es sich, weiter mit *Constructing Frames of Reference* zu arbeiten und aus dem reichen Schatz an Hypothesen jene herauszuarbeiten, die tatsächlich als solide Grundlage für weitere Forschung dienen können. 15 | -------------------------------------------------------------------------------- /playground/get_a_feeling.R: -------------------------------------------------------------------------------- 1 | library(binford) 2 | library(purrr) 3 | library(dplyr) 4 | library(magrittr) 5 | library(reshape2) 6 | 7 | main <- binford::LRB 8 | key <- binford::LRBkey 9 | 10 | unique(key$class) 11 | unique(key$Fclass) 12 | unique(key$type) 13 | 14 | 15 | # automatic type classification 16 | # level_info <- list(key$Fclass, key$type) %>% purrr::pmap_chr( 17 | # function(x, y) { 18 | # if (x == "numeric" & y == "categorical") { return("nominal") } 19 | # if (x == "numeric" & y == "ordinal" ) { return("ratio") } 20 | # if (x == "integer" & y == "categorical") { return("nominal") } 21 | # if (x == "integer" & y == "ordinal" ) { return("nominal") } # case not existend 22 | # if (x == "factor" & y == "categorical") { return("nominal") } 23 | # if (x == "factor" & y == "ordinal" ) { return("ordinal") } 24 | # } 25 | # ) 26 | 27 | # ratio_vars_index <- which(level_info == "ratio") 28 | # 29 | # ratio_vars <- main[, ratio_vars_index] 30 | 31 | ratio_vars_names <- key %>% dplyr::filter( 32 | class == "numeric", 33 | Fclass == "numeric", 34 | type == "ordinal" 35 | ) %$% X 36 | 37 | ratio_vars <- main[, colnames(main) %in% ratio_vars_names] 38 | 39 | cor_matrix <- cor(ratio_vars, use = "pairwise.complete.obs", method = "pearson") 40 | 41 | cor_matrix[lower.tri(cor_matrix, diag = TRUE)] <- NA 42 | 43 | cor_list <- melt(cor_matrix) %>% 44 | dplyr::filter( 45 | value %>% is.na %>% `!` 46 | ) 47 | 48 | cor_sel <- cor_list %>% dplyr::filter( 49 | Var1 != Var2, 50 | value != 1, 51 | abs(value) >= 0.9 52 | ) %>% dplyr::arrange( 53 | value %>% abs %>% desc 54 | ) 55 | 56 | cor_sel2 <- cor_sel %>% dplyr::left_join( 57 | y = key[c("X", "description")], by = c("Var1" = "X") 58 | ) %>% dplyr::left_join( 59 | y = key[c("X", "description")], by = c("Var2" = "X") 60 | ) 61 | -------------------------------------------------------------------------------- /playground/model_test.R: -------------------------------------------------------------------------------- 1 | library(binford) 2 | library(magrittr) 3 | library(modelr) 4 | library(tidyverse) 5 | library(cowplot) 6 | library(broom) 7 | 8 | main <- binford::LRB 9 | key <- binford::LRBkey 10 | 11 | #key[grep("dismov", key$variable), ]$description 12 | 13 | main_sel <- main[, c("subpop", "density", "dismov", "hunting")] %>% 14 | dplyr::filter(!is.na(dismov)) 15 | 16 | main_sel <- main_sel %>% dplyr::mutate( 17 | ldensity = log10(density) 18 | ) 19 | 20 | main_sel %>% ggplot(aes(x = ldensity, y = dismov)) + 21 | geom_hex(bins = 30) 22 | 23 | mod_pop <- main_sel %>% lm(dismov ~ ldensity, data = .) 24 | 25 | grid <- main_sel %>% 26 | data_grid(density = seq_range(density, 15)) %>% 27 | mutate(ldensity = log10(density)) %>% 28 | add_predictions(mod_pop, "dismov") 29 | 30 | main_sel %>% ggplot(aes(x = density, y = dismov)) + 31 | geom_hex(bins = 30) + 32 | geom_line(data = grid, color = "red", size = 1) 33 | 34 | main_sel <- main_sel %>% 35 | modelr::add_residuals(mod_pop, "lresid") 36 | 37 | main_sel %>% ggplot(aes(ldensity, lresid)) + 38 | geom_hex(bins = 30) 39 | 40 | #### 41 | 42 | main_sel <- main[, c("subsp.1", "nagp", "density")] %>% 43 | mutate(lnagp = log10(nagp), ldensity = log10(density)) %>% 44 | group_by(subsp.1) 45 | 46 | main_sel %>% ggplot(aes(x = lnagp, y = ldensity)) + 47 | geom_hex(bins = 20) + 48 | facet_wrap(~subsp.1) 49 | 50 | 51 | relation <- ldensity ~ lnagp 52 | 53 | by_subsp <- default_model(main_sel, relation) 54 | 55 | predictions <- by_subsp %>% unnest(predictions) 56 | resids <- by_subsp %>% unnest(resids) 57 | glance <- by_subsp %>% unnest(glance) 58 | 59 | predictions_plot <- predictions %>% ggplot(aes(lnagp, ldensity)) + 60 | geom_point() + 61 | geom_line(aes(x = lnagp, y = pred), size = 1, colour = "red") + 62 | facet_wrap(~subsp.1) 63 | 64 | resids_plot <- resids %>% ggplot(aes(lnagp, resid)) + 65 | geom_point() + 66 | facet_wrap(~subsp.1) + 67 | geom_ref_line(h = 0, size = 1, colour = "black") 68 | 69 | plot_grid(predictions_plot, resids_plot, labels = c("model", "residuals"), ncol = 1, nrow = 2) 70 | -------------------------------------------------------------------------------- /playground/area.R: -------------------------------------------------------------------------------- 1 | library(binford) 2 | library(magrittr) 3 | library(modelr) 4 | library(tidyverse) 5 | library(cowplot) 6 | library(broom) 7 | 8 | #### load data #### 9 | #main <- binford::LRB 10 | main <- binford::LRBfact 11 | key <- binford::LRBkey 12 | 13 | #key[grep("runoff", key$variable), ]$description 14 | 15 | #### data selection #### 16 | main_sel <- main[, c( 17 | "area", "hunting", "lbio5", "lcoklm", 18 | "lrunoff", "watrgrc", "medstab", "perwltg", 19 | "rlow", "rungrc", "sdtemp" 20 | )] 21 | 22 | #### create model ##### 23 | area_relation <- area ~ hunting + lbio5 + lcoklm + 24 | lrunoff + watrgrc + medstab + perwltg + 25 | rlow + rungrc + sdtemp 26 | 27 | mod <- main_sel %>% default_model(relation = area_relation) 28 | 29 | predictions <- mod %>% unnest(predictions) 30 | resids <- mod %>% unnest(resids) 31 | glance <- mod %>% unnest(glance) 32 | 33 | predictions %>% ggplot(aes(hunting, area)) + 34 | geom_point() + 35 | geom_line(aes(x = hunting, y = pred), size = 1, colour = "red") 36 | 37 | predictions %>% ggplot(aes(watrgrc, area)) + 38 | geom_point() + 39 | geom_line(aes(x = watrgrc, y = pred), size = 1, colour = "red") 40 | 41 | predictions %>% ggplot(aes(lcoklm, area)) + 42 | geom_point() + 43 | geom_line(aes(x = lcoklm, y = pred), size = 1, colour = "red") 44 | 45 | predictions %>% ggplot(aes(rlow, area)) + 46 | geom_point() + 47 | geom_line(aes(x = rlow, y = pred), size = 1, colour = "red") 48 | 49 | #### Model analysis #### 50 | 51 | mod$model[[1]] -> fit 52 | 53 | # 54 | fit %>% car::avPlots() 55 | 56 | # 57 | MASS::stepAIC(fit, direction="both") 58 | 59 | # 60 | leaps <- leaps::regsubsets(area_relation, data = main_sel, nbest=10) 61 | plot(leaps, scale="r2") 62 | 63 | # 64 | car::subsets(leaps, statistic="rsq") 65 | 66 | # 67 | relaimpo::calc.relimp( 68 | fit, type=c("lmg","last","first","pratt"), 69 | rela=TRUE 70 | ) 71 | 72 | boot <- relaimpo::boot.relimp( 73 | fit, b = 1000, type = c("lmg","last", "first", "pratt"), rank = TRUE, 74 | diff = TRUE, rela = TRUE 75 | ) 76 | relaimpo::booteval.relimp(boot) # print result 77 | plot(relaimpo::booteval.relimp(boot,sort=TRUE)) # plot result 78 | 79 | # 80 | plot(fit) 81 | 82 | # 83 | area_relation2 <- area ~ hunting + lbio5 + 84 | lrunoff + watrgrc + perwltg + 85 | rungrc + sdtemp 86 | 87 | fit2 <- main_sel %>% default_model(relation = area_relation2) %$% model 88 | 89 | anova(fit, fit2) 90 | 91 | # 92 | library(GGally) 93 | ggpairs(main_sel) 94 | -------------------------------------------------------------------------------- /report/binford_datasets.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | pdf_document: 4 | toc: yes 5 | fig_caption: yes 6 | latex_engine: xelatex 7 | bibliography: bibliography.bib 8 | mainfont: "Arial" 9 | csl: deutsches-archaologisches-institut.csl 10 | --- 11 | 12 | # Datensätze 13 | 14 | Binfords komplexes Unterfangen erfordert Forschungsdaten, die sowohl für Kultur- als auch Naturphänomene ein möglichst breites Set an Observationen enthalten. Dabei muss einerseits die Anzahl als auch die Variabilität der Beobachtungen ausreichend groß, andererseits auch ein sinnvolles Set an Kennwerten und Proxies erfasst sein. Die Zusammenstellung der anthropologischen Daten ist besonders problematisch, da der Vergleich eine selten angewandte Systematisierung ethnographischer Datenaufnahme erfordert. 15 | 16 | > It took me two years to develop the data bases dealing with the world's environments and the geographical distribution of documented hunter-gatherers. Once this aspect of the work was completed, it became clear that the limited range of hunter-gatherer characteristics upon which cross-cultural studies had focused was not really relevant to most of the issues that I hoped to address in my book. 17 | > 18 | > -- [@binford_constructing_2001, 2.] 19 | 20 | Ein großer Teil des Buches beschäftigt sich mit der Beschreibung und Kontextualisierung von Variablen, die Binford aus der geographischen oder anthropologischen Literatur entnommen und anschließend gesammelt oder gegebenenfalls berechnet hat. Das Ergebnis ist ein komplexer Datenbestand, der im Verlauf des Buches immer weiter in die Breite wächst. Es ist Amber Johnson, Doug White und Anthon Eff zu verdanken, dass der Datensatz heute in gegenüber der abgedruckten Version noch einmal deutlich erweiterter Form digitalisiert und leicht zugänglich vorliegt ^[http://ajohnson.sites.truman.edu/data-and-program/ [14.8.2017]]. Über ein Paket der Statistikprogrammiersprache R, das Ben Marwick zusammengestellt hat, lässt sich auf die Daten besonders bequem zugreifen ^[@marwick_binford:_2016]. 21 | 22 | Binford hat mit zwei Hauptdatensätzen gearbeitet: Eine Tabelle mit Informationen zu 339 ethnographisch aufgenommenen Jäger- und Sammlergruppen und eine Tabelle mit Atmosphärendaten von 1429 weltweit verteilten Wetterstationen, die anhand ihrer Position in verschiedenen Vegetationszonen ausgewählt wurden. Johnson, White und Eff haben ersteren Datensatz strukturell überarbeitet und eine Auswahl von 507 gesammelten und berechneten Variablen zusammengestellt. Dieser Datensatz **LRB** (im folgenden auch "Gruppendatensatz") liegt entsprechend in Form einer .csv-Tabelle mit 339 Zeilen und 507 Spalten vor. Der Metadatensatz **LRBkey** steht ebenfalls als .csv-Tabelle zur Verfügung und enthält zu jeder der 507 Variablen Informationen wie semantische Kurzbeschreibung, Skalenniveau und Fehlstellen. Auch der Wetterstationendatensatz ist in dieser Form zugänglich. 23 | 24 | Gerade letzterer ist relativ einfach systematisch erweiterbar: Für die Berechnung der von Binford hinzugefügten, abhängigen Größen sowohl im Wetterstationen- als auch im Gruppendatensatz kann auf die Java-Software EnvCalc2.1 zurückgegriffen werden. Sie ist seit 2001 aus dem von Binfords Arbeitsgruppe entwickelten Programmcode hervorgegangen und wurde zuletzt 2014 aktualisiert. 25 | -------------------------------------------------------------------------------- /report/binford_introduction.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | pdf_document: 4 | pandoc_args: [ 5 | "-V", "classoption=twocolumn" 6 | ] 7 | toc: yes 8 | fig_caption: yes 9 | latex_engine: xelatex 10 | bibliography: bibliography.bib 11 | mainfont: "Arial" 12 | csl: deutsches-archaologisches-institut.csl 13 | --- 14 | 15 | # Einleitung und Kontextualisierung 16 | 17 | Lewis R. Binfords umstrittenes Spätwerk *Constructing Frames of Reference* ist eines der wichtigsten Standardwerke der New Archaeology. Es versteht sich selbst als Methodikstudie zur induktiven Ableitung allgemeiner Regeln menschlichen Verhaltens aus ethnographischen Umwelt- und Sozialdatensätzen, erarbeitet aber am Fallbeispiel eines Datensatzes zu Jäger- und Sammlergruppen gleichermaßen inhaltlich relevante Ergebnisse. 18 | 19 | Binford fasst seine methodischen Ambitionen folgendermaßen zusammen: 20 | 21 | > I cannot emphasize strongly enough, that the major problem this book addresses is *the development of a method for productively using ethnographic data in the servive of archaeological goals*. 22 | > [...] 23 | > This book is unapologetically written from a scientific perspective. It is largely an exercise in inductive reasoning, in that it asks questions regarding the character of the world of organized variability among ethnographically documented hunter-gatherer groups. 24 | > [...] 25 | > And, since one of the goals of this book is to explain variability among hunter-gatherers, the explanatory theory that I have developed is available for archaeologists to use deductively by reasoning to or simulating changing conditions and thereby providing patterns of change that can be expected to occur in the archaeological record at specific locations. 26 | > 27 | > -- @binford_constructing_2001, 2-3. 28 | 29 | Die systematische Suche nach übergeordneten, wiederkehrenden Strukturen in der Mensch-Umwelt und Mensch-Mensch Beziehung ist eines der wesentlichen Themen des Buches. Die Komplexität dieser Aufgabe erklärt seinen bemerkenswerten Umfang und den dennoch teilweise fragmenthaften Charakter. Binford konstruiert allgemeine Lehrsätze, die für alle Jäger- und Sammlergruppen Gültigkeit beanspruchen. Erarbeitet auf Grundlage eines begrenzten Datensatzes und eines limitierten, analytischen Methodensets sind diese Axiome, qualitative und quantitative Thesen wissenschaftliche Aussagen. Die ihnen zugrundeliegenden Analysen sollten reproduzierbar und falsifizierbar sein. Zur Illustration ein Beispiel einer zufällig ausgewählten These, die im zehnten Kapitel formuliert wird: 30 | 31 | > Proposition 10.19: As packing increases, groups that are dependent upon aquatic resources should resort to more complex subsistence technology. Increasing complexity in the design of weapons should also be associated with hunter-gatherer groups that are not primarily dependent upon aquatic resources as a function of their more specialized exploitation of a reduced number of high-yield species (see generalizations 10.15 and 10.16). 32 | > 33 | > -- @binford_constructing_2001, 392. 34 | 35 | Hier wird zunächst ein Rahmen definiert ("groups that are dependent upon aquatic resources") und für den Fall eines Veränderungsprozesses ("as packing increases") eine Vorhersage ("should resort to more complex subsistence technology") getroffen, die dann noch weiter kontextualisiert und präzisiert wird. 36 | 37 | Da sowohl die Daten als auch eine -- unterschiedlich ausführliche -- Beschreibung des Methodenset publiziert sind, die der Entwicklung dieser und aller anderen Aussagen in *Constructing Frames of Reference* zugrunde liegen, sollte es möglich sein, ... 38 | 39 | 1. ... auf Grundlage der selben Daten unter Anwendung der selben Methode zu den gleichen Aussagen zu kommen. 40 | 41 | 2. ... Aussagen mit anderen Daten und anderen Methoden zu rekonstruieren und gegebenenfalls durch verbesserte Aussagen zu ersetzen. 42 | 43 | Der vorliegenden Aufsatz dient auch dazu, Binfords wissenschaftlichen Selbstanspruch zu prüfen. Wie gut ist das Buch für einen zeitgemäßen Reproducible Research Workflow zugänglich? Gleichzeitig muss der Schwerpunkt dieser Arbeit ein didaktischer sein: Ich werde den Umgang mit Grundlagen des wissenschaftlichen Arbeitens üben und -- im Kontext des Hauptseminars -- meine Methodenkompetenz zur Modelleinpassung mittels Regressionsanalyse^[@nakoinz_modelling_2016, 87-105.] ausbauen. 44 | 45 | Vor diesem Hintergrund möchte ich weder versuchen die archäologischen oder archäologietheoretischen Leitgedanken des Buches nachzuzeichnen ^[Für einen kurzen Abriss siehe z.B. @donald_pate_review_2005 oder @browman_constructing_2005], noch seine kontroverse Rezeption aufzugreifen. Kritiker haben Binford neben übermäßigem Naturdeterminismus und Funktionalismus u.a. die Verwendung widersprüchlicher und mangelhaft definierter Neologismen, Rechenfehler und Mängel in der Datenaufnahme bis hin zum Übersehen zentraler Trends vorgeworfen ^[@doi:10.1086/jar.58.3.3631188, @10.2307/4128427]. Die fachtheoretischen Grundlagen des Buches in Kulturökologie und Middle-Range-Theorie gehören in ein ohnehin umstrittenes Feld. Eine Auseinandersetzung mit dem Gesamtwerk ist im zeitlich stark begrenzten Rahmen einer Seminararbeit nicht sinnvoll möglich. Stattdessen möchte ich das Opus höchst selektiv betrachten, berechtigte Kritik vorerst beiseite lassen und mich voll auf einen kleinen Aspekt der explorativen Datenanalyse konzentrieren. Eine erste Abenteuerreise auf den Spuren Binfords, wie sie noch häufig unternommen werden sollte. 46 | 47 | > Does it [Binfords *Constructing Frames of Reference*] help us to better suppose hunter-gatherer variability, and to conceptualize variability beyond what we currently know? The answer to the first question is a definite yes, but its generalizations require rigorous testing and replication, the normal scientific process that Binford himself has advocated for four decades. 48 | > 49 | > @10.2307/4128427, 372. 50 | 51 | 52 | 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /report/bibliography.bib: -------------------------------------------------------------------------------- 1 | @Book{binford_constructing_2001, 2 | location = {{Berkeley/Los Angeles}}, 3 | title = {Constructing Frames of Reference: An Analytical Method for Archaeological Theory Building Using Hunter-Gatherer and Environmental Data Sets}, 4 | timestamp = {2017-08-08T15:24:17Z}, 5 | author = {L. R. Binford}, 6 | date = {2001}, 7 | groups = {Theorie}, 8 | } 9 | @Book{nakoinz_modelling_2016, 10 | title = {Modelling {{Human Behaviour}} in {{Landscapes}}: {{Basic Concepts}} and {{Modelling Elements}}}, 11 | isbn = {978-3-319-29538-1 978-3-319-29536-7}, 12 | shorttitle = {Modelling {{Human Behaviour}} in {{Landscapes}}}, 13 | timestamp = {2017-08-14T12:59:37Z}, 14 | langid = {english}, 15 | author = {Oliver Nakoinz and Daniel Knitter}, 16 | date = {2016}, 17 | note = {OCLC: 951603781}, 18 | groups = {Landschaftsarchäologie}, 19 | } 20 | @Software{marwick_binford:_2017, 21 | title = {Binford: {{Binford}}'s {{Hunter}}-{{Gatherer Data}}}, 22 | url = {http://github.com/benmarwick/binford}, 23 | timestamp = {2017-08-14T14:11:49Z}, 24 | author = {B. Marwick and A. Johnson and D. White and E. A. Eff}, 25 | date = {2017}, 26 | groups = {Landschaftsarchäologie}, 27 | } 28 | @Software{marwick_binford:_2016, 29 | title = {Binford: {{Binford}}'s {{Hunter}}-{{Gatherer Data}}}, 30 | url = {http://github.com/benmarwick/binford}, 31 | timestamp = {2017-08-14T14:13:10Z}, 32 | author = {B. Marwick and A. Johnson and D. White and E. A. Eff}, 33 | date = {2016}, 34 | groups = {Landschaftsarchäologie}, 35 | } 36 | @Collection{backhaus_multivariate_2008, 37 | location = {{Berlin}}, 38 | edition = {12., vollst. überarb. Aufl}, 39 | title = {Multivariate Analysemethoden: eine anwendungsorientierte Einführung}, 40 | isbn = {978-3-540-85044-1}, 41 | shorttitle = {Multivariate Analysemethoden}, 42 | pagetotal = {575}, 43 | timestamp = {2017-08-15T12:00:50Z}, 44 | langid = {german}, 45 | series = {Springer-Lehrbuch}, 46 | publisher = {{Springer}}, 47 | editor = {Klaus Backhaus and Bernd Erichson and Wulff Plinke and Rolf Weiber}, 48 | date = {2008}, 49 | note = {OCLC: 263459647}, 50 | keywords = {Cluster,Conjoint,Diskiminanz,Faktoren,Konfirmatorisch,Kontingenz,Korrespondenz,Kreuztabellierung,Lehrbuch,Methods,Multivariate Analyse,Multivariate Analysis,Netze,Neuronale,Regression,SPSS,Statistik,Statistische Methodenlehre,Strukturgleichung,Varianz,Zeitreihe}, 51 | file = {Table of Contents PDF:/home/clemens/.mozilla/firefox/manjaro.default/zotero/storage/PDV8N8QJ/Backhaus et al. - 2008 - Multivariate Analysemethoden eine anwendungsorien.pdf:application/pdf}, 52 | groups = {R und Statistik}, 53 | } 54 | @Book{venables_modern_2002, 55 | address = {New York}, 56 | edition = {4}, 57 | title = {Modern {{Applied Statistics}} with {{S}}}, 58 | timestamp = {2017-08-30T13:08:07Z}, 59 | publisher = {{Springer}}, 60 | author = {Venables, W. N. and Ripley, B. D.}, 61 | year = {2002}, 62 | note = {ISBN 0-387-95457-0} 63 | } 64 | @Article{donald_pate_review_2005, 65 | title = {Review of ‘{{Constructing Frames}} of {{Reference}}: {{An Analytical Method}} for {{Archaeological Theory Building}} Using {{Ethnographic}} and {{Environmental Data Sets}}’ by {{Lewis R}}. {{Binford}}}, 66 | volume = {60}, 67 | timestamp = {2017-08-23T09:57:47Z}, 68 | journaltitle = {Australian Archaeology}, 69 | author = {F. {Donald Pate}}, 70 | date = {2005}, 71 | pages = {82--83}, 72 | groups = {Landschaftsarchäologie,Theorie}, 73 | } 74 | @Article{browman_constructing_2005, 75 | title = {Constructing Frames of Reference: An Analytical Method for Archaeological Theory Building Using Hunter-Gatherer and Environmental Data Sets}, 76 | volume = {107}, 77 | timestamp = {2017-08-23T10:15:50Z}, 78 | number = {2}, 79 | journaltitle = {American Anthropologist}, 80 | author = {D. L. Browman}, 81 | date = {2005}, 82 | pages = {277--279}, 83 | groups = {Theorie}, 84 | } 85 | @Article{doi:10.1086/jar.58.3.3631188, 86 | title = {Constructing {{Frames}} of {{Reference}}: {{An Analytical Method}} for {{Archeological Theory Building Using Ethnographic}} and {{Environmental Data Sets}}. {{Lewis R}}. {{Binford}}}, 87 | volume = {58}, 88 | doi = {10.1086/jar.58.3.3631188}, 89 | timestamp = {2017-08-30T13:13:20Z}, 90 | number = {3}, 91 | journal = {Journal of Anthropological Research}, 92 | author = {Hill, K.}, 93 | year = {2002}, 94 | pages = {416--419} 95 | } 96 | @Article{10.2307/4128427, 97 | title = {Supposing {{Hunter}}-{{Gatherer Variability}}}, 98 | volume = {69}, 99 | issn = {00027316}, 100 | timestamp = {2017-08-23T13:11:11Z}, 101 | eprinttype = {jstor}, 102 | eprint = {4128427}, 103 | number = {2}, 104 | journaltitle = {American Antiquity}, 105 | author = {Kenneth M. Ames}, 106 | date = {2004}, 107 | pages = {364--374}, 108 | groups = {Theorie}, 109 | reviewed-author = {Lewis R. Binford and Susan Kent and Catherine Panter-Brick and Robert H. Layton and Peter Rowler-Conwy}, 110 | } 111 | @InCollection{Jann2010, 112 | address = {Wiesbaden}, 113 | title = {Robuste {{Regression}}}, 114 | isbn = {978-3-531-92038-2}, 115 | abstract = {Die Kleinste-Quadrate-Regression geh{\"o}rt zu den in der sozialwissenschaftlichen Forschung am h{\"a}ufigsten eingesetzten statistischen Verfahren, ist aber leider in verschiedener Hinsicht als nicht ,,robust`` zu bezeichnen. So k{\"o}nnen Regressionsergebnisse beispielsweise ganz entscheidend von nur einigen wenigen extremen Datenpunkten (,,Ausrei{\ss}ern``) abh{\"a}ngen. Weiterhin ist die vorteilhafte statistische Effizienz, die die Popularit{\"a}t der Kleinste-Quadrate-Regression mitbegr{\"u}ndet, nur unter restriktiven Annahmen {\"u}ber die Verteilung des Fehlerterms erf{\"u}llt. Robuste Regressionsverfahren, die weniger durch Ausrei{\ss}er beeinflusst werden und auch unter alternativen Fehlerverteilungen g{\"u}nstige Eigenschaften aufweisen, sind verf{\"u}gbar, werden aber in der angewandten Forschung bislang eher selten eingesetzt. Zwar hat das Bewusstsein zugenommen, dass Modellannahmen und die ,,Robustheit`` von Regressionsergebnissen gepr{\"u}ft werden sollten, die eingesetzten diagnostischen Mittel beschr{\"a}nken sich aber meistens auf klassische Methoden der Residuenanalyse. Die robuste Regression geht hier einen etwas anderen Weg, indem Modelle gesch{\"a}tzt werden, die von Natur aus gewisse Robustheitskriterien erf{\"u}llen. Diese robusten Ergebnisse k{\"o}nnen dann zu diagnostischen Zwecken mit den Ergebnissen herk{\"o}mmlicher Verfahren verglichen werden, und so zu einem besseren Verst{\"a}ndnis der durch die Daten abgebildeten Prozesse beitragen. Das vorliegende Kapitel soll eine Einf{\"u}hrung in die Methoden der robusten Regression geben. Zentrale Konzepte der robusten Statistik werden erl{\"a}utert und verschiedene robuste Regressionsverfahren wie zum Beispiel die M-und die MM-Sch{\"a}tzung vorgestellt. Die Anwendung der besprochenen Methoden wird an einem Beispiel mit Daten aus dem ALLBUS 2006 illustriert.}, 116 | timestamp = {2017-08-30T13:11:32Z}, 117 | booktitle = {Handbuch Der Sozialwissenschaftlichen {{Datenanalyse}}}, 118 | publisher = {{VS Verlag f{\"u}r Sozialwissenschaften}}, 119 | author = {Jann, B.}, 120 | editor = {Wolf, C. and Best, H.}, 121 | year = {2010}, 122 | pages = {707--740} 123 | } 124 | @Book{gordon2015regression, 125 | title = {Regression {{Analysis}} for the {{Social Sciences}}}, 126 | isbn = {978-1-317-60711-3}, 127 | timestamp = {2017-08-28T14:34:57Z}, 128 | publisher = {{Taylor \& Francis}}, 129 | author = {R.A. Gordon}, 130 | date = {2015}, 131 | groups = {R und Statistik}, 132 | } 133 | @Book{de_veaux_stats:_2012, 134 | location = {{Upper Saddle River, NJ}}, 135 | edition = {3}, 136 | title = {Stats: Data and Models}, 137 | isbn = {978-0-321-69255-9 978-0-321-75372-4}, 138 | shorttitle = {Stats}, 139 | timestamp = {2017-08-30T12:53:08Z}, 140 | langid = {english}, 141 | publisher = {{Addison-Wesley}}, 142 | author = {R. D. {De Veaux} and P. F. Velleman and D. Bock}, 143 | date = {2012}, 144 | note = {OCLC: 747602883}, 145 | groups = {R und Statistik}, 146 | } 147 | -------------------------------------------------------------------------------- /report/binford_discussion.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | pdf_document: 4 | pandoc_args: [ 5 | "-V", "classoption=twocolumn" 6 | ] 7 | toc: yes 8 | fig_caption: yes 9 | latex_engine: xelatex 10 | bibliography: bibliography.bib 11 | mainfont: "Arial" 12 | csl: deutsches-archaologisches-institut.csl 13 | --- 14 | 15 | ## Diskussion 16 | 17 | ```{r binford individual variable plots, cache=TRUE, message=FALSE, warning=FALSE, fig.height=17, fig.cap="\\label{fig:binford_indi_plots}Bivariate Relationen der abhängigen Variable *larea* und allen unabhängigen Variablen in Binfords Ergebnismodell. Mit angegeben ist der Koeffizient der jeweiligen Variable auf fünf Nachkommastellen gerundet."} 18 | # binford variable selection 19 | binford_selection <- c( 20 | "hunting", "lbio5", "lcoklm", 21 | "lrunoff", "watrgrc", "medstab", "perwltg", 22 | "rlow", "rungrc", "sdtemp" 23 | ) 24 | 25 | # extract coefficients into vector 26 | binford_coeffi <- coefficients(binford_model) 27 | #binford_intercept <- binford_coeffi[1] 28 | 29 | # create df only with selection 30 | sel6 <- sel3 %>% dplyr::select( 31 | binford_selection 32 | ) 33 | 34 | # loop to create all scatterplots of larea with the independent variables 35 | binford_indi_plots <- lapply( 36 | seq_along(sel6), 37 | function(x){ 38 | ggplot(data = sel3, aes(sel6[x], larea)) + 39 | geom_point() + 40 | geom_smooth(color = "red") + 41 | xlab(paste0(names(sel6[x]), " (", round(binford_coeffi[x + 1], 5), ")")) + 42 | theme_bw() 43 | } 44 | ) 45 | 46 | # merge individual plots to big figure (plot matrix) 47 | cowplot::plot_grid(plotlist = binford_indi_plots, ncol = 2) 48 | ``` 49 | 50 | ```{r final individual variable plots, cache=TRUE, message=FALSE, warning=FALSE, fig.height=17, fig.cap="\\label{fig:final_indi_plots}Bivariate Relationen der abhängigen Variable *larea* und allen unabhängigen Variablen in meinem finalen Ergebnismodell. Mit angegeben ist der Koeffizient der jeweiligen Variable auf fünf Nachkommastellen gerundet."} 51 | final_selection <- c( 52 | "temp", "medstab", "perwret", 53 | "perwltg", "lnagp", "lnpop", "gatherin", 54 | "kmov", "nicheff", "lati" 55 | ) 56 | 57 | final_coeffi <- coefficients(model4) 58 | 59 | sel7 <- sel3 %>% dplyr::select( 60 | final_selection 61 | ) 62 | 63 | final_indi_plots <- lapply( 64 | seq_along(sel7), 65 | function(x){ 66 | ggplot(data = sel3, aes(sel7[x], larea)) + 67 | geom_point() + 68 | geom_smooth(color = "red") + 69 | xlab(paste0(names(sel7[x]), " (", round(final_coeffi[x + 1], 5), ")")) + 70 | theme_bw() 71 | } 72 | ) 73 | 74 | cowplot::plot_grid(plotlist = final_indi_plots, ncol = 2) 75 | ``` 76 | 77 | Binford ist zuversichtlich, mit Gleichung \ref{eq:area_final} ein sinnvolles und relevantes Modell formuliert zu haben. Das Narrativ, das er zu seiner Erklärung konstruiert, entbehrt allerdings noch jener axiomatischen Schlüsse, die er im weiteren Verlauf des Buches generieren wird. Ich möchte seine Interpretation kurz wiedergeben (siehe dazu Abbildung \ref{fig:binford_indi_plots} und Tabelle \ref{tab:variable_description_1}): 78 | 79 | Die Variablen *hunting* und *lcoklm* sind negativ mit einer Abhängigkeit von marinen Ressourcen und positiv mit einer Abhängigkeit von landgebundenem Jagdwild verknüpft. Man kann nun schließen, dass die Nutzung terrestrischer Nahrungsquellen größere Streifgebiete für die Jäger- und Sammlergruppen erfordert. Damit wären *hunting* und *lcoklm* Anzeiger für die Arealgröße. Dieser Zusammenhang offenbart sich beispielhaft in Küstenarealen z.B. in Mexiko, Australien und der Nordamerikanischen Westküste. Dort konnten Jäger- und Sammlergemeinschaften mit kleinen Verbreitungsarealen beobachtet werden, deren Subsistenz stark von marinen Ressourcen abhängt. Zum Landesinneren hin nehmen die Arealgrößen zu. 80 | 81 | Die primäre Biomasse, die in der Variablen *lbio5* gemessen wird, erhöht sich mit der Niederschlagsmenge. Niederschlagsüberschuss, wie er sich in *lrunoff* und *rungrc* abbildet, ist ein Indikator für ausreichende Wasserverfügbarkeit. Hohe Werte von *lbio5*, *lrunoff* und *rungrc* sind damit Anzeiger für eine Umgebung, in der Jäger- und Sammlergruppen sich aufgrund der hohen Dichte verfügbarer Biomasse aus nur kleinen Arealen versorgen können. Im diesem Kontext lässt sich auch die Negativkorrelation von *larea* mit *medstab* und *perwltg* verstehen. Wasserversorgung ist essentiell für Aufbau und stabile Verfügbarkeit von Biomasse und erlaubt damit kleinere Streifgebiete. Stabilität drückt sich auch in einer geringen Standardabweichung der Monatstemperatur *sdtemp* aus. Höhere Werte der Niederschlagsgebundenen Variablen *watrgrc* und *rlow* deuten darauf hin, dass es im Untersuchungsareal keine echte, jahreszeitliche Trockenphase gibt. 82 | 83 | > The factors that appear correlated with small ethnic areas are the presence of marine coasts in the region, high plant biomass, and environmental stability in seasonality of temperature and rainfall variability. When these factors all have negative values indicating opposite conditions, large ethnic areas are unlikely. 84 | > 85 | > @binford_constructing_2001, 155. 86 | 87 | Die von mir entwickelte Modellgleichung \ref{eq:area_final} erlaubt ebenfalls einen solchen Interpretationsversuch. Die Variablen *medstab* und *perwltg* finden auch in diesem Modell mit in Vorzeichen und Größenordnung gleichem Koeffizienten Berücksichtigung. Entsprechend lässt sich die von Binford vorgeschlagene, kausale Deutung zur Anwendung bringen. Die Variablen *temp*, *perwret* und *lnagp* passen gut in dieses Narrativ. Es liegt auf der Hand, inwiefern *lnpop* und *kmov* eine Vorhersage der Arealgröße von Jäger- und Sammlergruppen erlauben. Abbildung \ref{fig:final_indi_plots} legt nahe, dass diese beiden Variablen nicht geringen Anteil an der gegenüber dem von Binford erhöhten Güte dieses Modells haben. Die negative Korrelation mit *lati* überrascht zunächst, da auf der Nordhalbkugel insgesamt mehr nutzbare Landfläche zur Verfügung steht. Ein genauerer Blick auf den arithmetischen Mittelwert der Variable (`r round(mean(sel3$lati), 2)`) eröffnet allerdings die Perspektive, dass ein überwiegender Teil der in die Analyse aufgenommenen Gruppen eben von der Nordhalbkugel stammt und *lati* dadurch als Indikator für Äquatornähe zu verstehen ist. Binford hat diesen Zusammenhang ebenfalls beobachtet: 88 | 89 | > In both graphs [figure 5.14], hunter-gatherer cases occupying small ethnic areas are clustered in low latitudes that are characterized by high plant productivity. 90 | > 91 | > @binford_constructing_2001, 155. 92 | 93 | Weder Binfords noch mein Modell können alle Aspekte erklären oder auch nur benennen, die die Größe des Ausbreitungsareals einer Jäger- und Sammlergruppe beeinflussen. Einige Trends, wie die Verringerung in Küstennähe, in durchsatzreichen Ökosystemen in niederen Breiten oder bei kleinen Bevölkerungszahlen lassen sich aber gut erkennen. Der Versuch, Binfords Modell zu rekonstruieren, hat also zur Bestätigung einiger Grundüberlegungen geführt. Freilich ist weitere Forschung erforderlich, um den Einfluss dieser Größen besser quantitativ zu fassen. 94 | 95 | Wie im Rahmen der Modellbildung oben angedeutet, ist weder der von Binford noch der von mir applizierte Algorithmus zufriedenstellend. Eine Vorauswahl nach den oben eingeführten vier Kriterien (Linearität, Unabhängigkeit, Varianzäquivalenz und Normalität) wäre sicher sinnvoll, um völlig ungeeignete Variablen auszuschließen. Zumindest die Variablen im Ergebnismodell hätte ich gerne einer solchen Prüfung unterzogen, ich musste aber aus Zeitgründen darauf verzichten. Im Prozess der schrittweisen Vereinfachung des Ausgangsmodells führt die Reduktion wie ich sie vorgenommen habe (und Binford lässt nicht erkennen, dass er dieses Problem besser gelöst hätte) zu einem "Ziehen-ohne-Zurücklegen". Diese selektive Tiefensuche im Baum der Variablenkombinationen kann den Verlust wesentlicher Einflussgrößen aus dem Analysekontext zur Konsequenz haben. `stepAIC()` ist zwar theoretisch in der Lage, ein "Ziehen-mit-Zurücklegen" durchzuführen, in meinen beiden Durchläufen hat die Funktion allerdings in keinem Schritt die Entscheidung getroffen, eine vormals entfernte Variable wieder hinzuzufügen. Möglicherweise würde sich das bei einer besseren Vorauswahl der Variablen verändern. Abbildung \ref{fig:pearson_indi_plots} eröffnet den Blick dafür, dass sich schon mit einfachen Korrelationsmaßen leicht potentiell vielversprechende Variablen für die Regressionsanalyse identifizieren lassen. 96 | 97 | ```{r pearson, cache=TRUE, message=FALSE, warning=FALSE, fig.height=17, fig.cap="\\label{fig:pearson_indi_plots}Bivariate Relationen der Variable *larea* und den 10 Variablen, die mit *larea* den höchsten Korrelationskoeffizienten nach Pearson teilen. Mit angegeben ist der Koeffizient auf fünf Nachkommastellen gerundet."} 98 | # helper function to reduce correlation matrix 99 | diagi <- function(x){x[lower.tri(x)] <- NA; return(x)} 100 | 101 | # calculate pearsons coefficient for every variable relation but reduce the 102 | # colllection to the ten variables with the biggest value 103 | high_cor <- sel5 %>% 104 | cor() %>% 105 | as.matrix() %>% 106 | diagi() %>% 107 | reshape2::melt(.) %>% 108 | dplyr::arrange(-abs(value)) %>% 109 | dplyr::filter( 110 | !is.na(value) & 111 | (Var1 == "larea" | Var2 == "larea") & 112 | !(Var1 == "larea" & Var2 == "larea") 113 | ) %>% 114 | magrittr::extract(1:10,) 115 | 116 | # extract individuel partner variables of larea 117 | pearson_selection <- c(as.character(high_cor$Var1), as.character(high_cor$Var2)) %>% 118 | unique %>% 119 | magrittr::extract(. != "larea") 120 | 121 | # get correct correlation coefficient values for the variables 122 | pearson_vals <- lapply(pearson_selection, function(x){ 123 | high_cor[,3][which(high_cor[1] == x | high_cor[2] == x)] 124 | }) %>% unlist 125 | 126 | # create df only with selection 127 | sel8 <- sel3 %>% dplyr::select( 128 | pearson_selection 129 | ) 130 | 131 | # loop to create all scatterplots of larea with the other variables 132 | pearson_indi_plots <- lapply( 133 | seq_along(sel8), 134 | function(x){ 135 | ggplot(data = sel3, aes(sel8[x], larea)) + 136 | geom_point() + 137 | geom_smooth(color = "red") + 138 | xlab(paste0(names(sel8[x]), " (", round(pearson_vals[x], 5), ")")) + 139 | theme_bw() 140 | } 141 | ) 142 | 143 | # merge individual plots to big figure (plot matrix) 144 | cowplot::plot_grid(plotlist = pearson_indi_plots, ncol = 2) 145 | ``` 146 | -------------------------------------------------------------------------------- /report/deutsches-archaologisches-institut.csl: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /report/binford_tale_1.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | pdf_document: 4 | pandoc_args: [ 5 | "-V", "classoption=twocolumn" 6 | ] 7 | toc: yes 8 | fig_caption: yes 9 | latex_engine: xelatex 10 | bibliography: bibliography.bib 11 | mainfont: "Arial" 12 | csl: deutsches-archaologisches-institut.csl 13 | --- 14 | 15 | ```{r load libraries, echo=FALSE} 16 | library(magrittr) 17 | library(ggplot2) 18 | # only magrittr and ggplot should be loaded here - everything else should be explicitly mentioned via package::function 19 | ``` 20 | 21 | # Modelle zur Beschreibung der Ausbreitungsarealgröße von Jäger- und Sammlergruppen 22 | 23 | ## Problemstellung 24 | 25 | Im 5. Kapitel "Designing Frames of Reference and Exploring Projections" beschreibt Binford unter anderem eine Methode, Vorhersagen zu Attributen von Jäger- und Sammlergruppen in globalem Maßstab auf Grundlage von ethnographischen und naturräumlichen Daten treffen und über Projektion auf Karten visualisieren zu können. Im Abschnitt "Projecting Hunter-Gatherer Populations to the Entire Earth" gibt es wiederum einen Unterabschnitt "Using Relational Projections as Frames of Reference", der das Vorgehen anhand eines Beispiels illustriert. Binford schreibt: 26 | 27 | > If I can develop continously scaled equations that summarize the relationship between the properties of hunter-gatherer systems and suites of environmental variables, it is likely that these equations could be used to project estimates for habitats from which there are few, if any, actual cases of hunter-gatherers documented in the resent past. But since such equations summarize interactive ecological relationships that are not confined to particular time periods, they may furnish strong clues about hunter-gatherer organizational variability that will provide a strong platform for subsequent theory building. 28 | > 29 | > -- @binford_constructing_2001, 154. 30 | 31 | Das Beispiel konzentriert sich auf die Variable *area* -- die Größe des Areals, das von einer Jäger- und Sammlergruppe relativ exklusiv genutzt wird gemessen in Vielfachen von 100km². Mittels multipler Regression auf Grundlage des Gruppendatensatzes kommt Binford zu folgender Gleichung \ref{eq:area_multi} (bzw. umgeformt Gleichung \ref{eq:area_multi_log}), die die abhängige Variable *area* in Relation zu mehreren unabhängigen Variablen (siehe Tabelle \ref{tab:variable_description_1}) beschreibt: 32 | 33 | \begin{equation} \label{eq:area_multi} 34 | \begin{aligned} 35 | \mathit{area} = \\ 36 | & 10 \mathbin{\char`\^} [3.421431 + \\ 37 | & (0.004732 * \mathit{hunting}) + \\ 38 | & (-0.387229 * \mathit{lbio5}) + \\ 39 | & (0.186574 * \mathit{lcoklm}) + \\ 40 | & (-0.110286 * \mathit{lrunoff}) + \\ 41 | & (0.175157 * \mathit{watrgrc}) + \\ 42 | & (-0.164604 * \mathit{medstab}) + \\ 43 | & (-0.743144 * \mathit{perwltg}) + \\ 44 | & (0.004706 * \mathit{rlow}) + \\ 45 | & (-0.080339 * \mathit{rungrc}) + \\ 46 | & (0.024755 * \mathit{sdtemp})] 47 | \end{aligned} 48 | \end{equation} 49 | 50 | ```{r table of variables in binfords model} 51 | # create table data 52 | tibble::tribble( 53 | ~colA, ~Beschreibung, ~Einheit, ~Referenz, 54 | "area", "Größe des Areals, das von einer Jäger- und Sammlergruppe relativ exklusiv genutzt wird", "100km²", "117", 55 | "larea", "siehe area", "log10(100km²)", "", 56 | "lbio5", "Primäre (pflanzliche) Biomasse", "log(kg/m^2)", "85", 57 | "lcoklm", "Distanz zur nächstgelegenen, marinen Küste", "log10(km)", "154", 58 | "gatherin", "Ernährungsanteil pflanzlicher, terrestrischer Ressourcen", "%","117", 59 | "hunting", "Ernährungsanteil tierischer, terrestrischer Ressourcen", "%", "117", 60 | "kmov", "Summe der Distanz, die eine durchschnittliche Familieneinheit in einem Jahr zurücklegt", "km/yr", "117", 61 | "lati", "Breitengrad auf einer Idealkugelprojektion (rectifying latitude)", "°", "", 62 | "rlow", "Niederschlagsmenge im trockensten Monat des Jahres", "Millimeter", "70", 63 | "medstab", "Indikatorgröße für die Ähnlichkeit zu mediterranem Klima berechnet aus Proxies zu Temperatur und Niederschlag", "keine Einheit", "72", 64 | "lnagp", "Net above-ground productivity -- Zuwachs der Biomasse in einem Habitat durch Photosynthese und Wachstum", "g/m^2/yr", "79", 65 | "nicheff", "Niche effectiveness -- Verhältnis der tatsächlichen Bevölkerungsdichte zu einer modellbasierten Vorhersage der Bevölkerungsdichte, die Binford in Kapitel 10 formuliert", "keine Einheit", "373", 66 | "lnpop", "Größe der untersuchten Bevölkerung", "log(Anzahl)", "117", 67 | "perwret", "Anteil des Wachstumszeitraum in dem der Boden Wasser gespeichert vorhält", "%", "79", 68 | "perwltg", "Anteil des Wachstumszeitraum in dem die Wasserverfügbarkeit unter dem pflanzlichen Welkepunkt liegt", "%", "79", 69 | "rungrc", "Anzahl der Monate im Wachstumszeitraum in denen der RUNOFF-Wert > 0", "Anzahl", "79", 70 | "lrunoff", "Wasser, das durch Abfluss für die Nutzung durch Pflanzen verloren geht", "log(mm)", "79", 71 | "sdtemp", "Standardabweichung der mittleren Monatstemperatur", "keine Einheit", "70", 72 | "temp", "Temperateness -- Indikatorgröße für die Ausgeglichenheit der Monatstemperatur", "keine Einheit", "59", 73 | "watrgrc", "Anzahl der Monate im Wachstumszeitraum, in denen Wasser im Boden gespeichert bleibt", "Anzahl", "79" 74 | ) %>% 75 | dplyr::rename( 76 | " " = "colA" 77 | ) %>% 78 | # table setup and settings 79 | knitr::kable( 80 | format = "latex", 81 | caption = "\\label{tab:variable_description_1}Kurzbeschreibung der Variablen in Binfords Ergebnismodell und dem von mir erarbeiteten, finalen Modell. Die Spalte Referenz enthält die Seitenzahl in \\textit{Constructing Frames of Reference}, wo die jeweilige Variable eingeführt wird.", 82 | booktabs = T 83 | ) %>% 84 | kableExtra::column_spec(1, bold = TRUE) %>% 85 | kableExtra::column_spec(2, width = "14em") %>% 86 | kableExtra::kable_styling(font_size = 9) 87 | ``` 88 | 89 | Binford hat seine Analyse in SPSS (Version 6.1.2) ausgeführt. Ein Skriptprotokoll der Analysesession liegt mir nicht vor. Um die Ergebnis in Form der Modellgleichung zu reproduzieren, werde ich nun also zunächst versuchen, das Vorgehen so gut wie möglich nachzuvollziehen. Dafür steht mir eine hoffentlich gleiche oder zumindest hochgradig ähnliche Version des oben beschriebene Gruppendatensatz zu Verfügung. Ich weiß weiterhin, dass Binford sein Modell mit der Methode schrittweiser, Multipler Regression ermittelt hat. Unabhängige Variablen, die sich kollinear zu anderen unabhängigen Variablen verhalten, hat er entfernt. Die abschließende Entscheidung über das beste Modell hat er unter Beachtung der Indikatorgrößen $R^2$ und Standardfehler getroffen. Diese Angaben sind leider nicht hinreichend präzise. 90 | 91 | ## Datensatz 92 | 93 | Ein wesentliches Wissensdefizit besteht bezüglich der Information, in welcher Reihenfolge und mit welcher Rechtfertigung in den Schritten der Multiplen Regression Variablen jeweils entfernt wurden. Schon die Angabe, welche Variablen im Ausgangsdatensatz berücksichtigt wurden, ist unscharf. Immerhin: Da Multiple Regression nur auf Variablen der Intervall- oder Verhältnisskala (zusammen auch Kardinalskala oder metrisch skalierte Variablen) anwendbar ist, ist eine erste Eingrenzung möglich. Der Metadatensatz LRBkey verfügt hierzu über die Spalte *type*, die zu jeder Variable ein Skalenniveau angibt. Leider wird nur zwischen "categorical" und "ordinal" unterschieden. Dabei werden alle Variablen jenseits der Nominalskala als "ordinal" angesprochen. Das genügt nicht, um automatisiert alle intervall- und verhältnisskalierten Variablen auszuwählen. Aus diesem Grund habe ich selbst die Spalte *type_exp* im Metadatensatz hinzugefügt, und nach meiner Einschätzung auf Grundlage des Wertebereichs und der Beschreibung eine Zuordnung zu einem der vier Skalenniveaus "nominal", "ordinal", "interval" und "ratio" vorgenommen. Tabelle \ref{tab:variable_key_example} illustriert die Unterschiede zwischen der vorhandenen und meiner neu unternommenen Zuordnung für ein paar zufällig ausgewählte Variablen. Abbildung \ref{fig:level_of_meas_bar} zeigt, wie sich die Reevaluation durch die in *type_exp* deutlich akzentuiertere Verteilung der Skalenniveaus auswirkt. 94 | 95 | ```{r load binford data, cache=TRUE, message=FALSE, warning=FALSE} 96 | # load data 97 | # TODO: setup correct connections when all datasets are where they are supposed to be 98 | key <- readr::read_csv("../data-raw/LRBkey.csv") #%>% 99 | #dplyr::rename("variable" = "X1") 100 | main <- binford::LRB 101 | ``` 102 | 103 | ```{r variable type example table, cache=TRUE} 104 | # create random subset of key table 105 | key[250:262, ] %>% 106 | # select showvars 107 | dplyr::select(variable, description, type, type_exp) %>% 108 | # add decorative ... at the beginning and end 109 | rbind( 110 | data.frame(variable = "...", description = "...", type = "...", type_exp= "..."), 111 | ., 112 | data.frame(variable = "...", description = "...", type = "...", type_exp= "...") 113 | ) %>% 114 | # table setup and settings 115 | knitr::kable( 116 | format = "latex", 117 | caption = "\\label{tab:variable_key_example}Auszug aus der Metatabelle zum Gruppendatensatz. Skalenniveauklassifizierung in den Spalten \\textit{type} und \\textit{type\\_exp}.", 118 | booktabs = T 119 | ) %>% 120 | kableExtra::column_spec(1, bold = TRUE) %>% 121 | kableExtra::column_spec(2, width = "14em") %>% 122 | kableExtra::kable_styling(font_size = 9) 123 | ``` 124 | 125 | ```{r type classification distribution plot, cache=TRUE, fig.cap="\\label{fig:level_of_meas_bar}Verteilung der Skalenniveauzuordung in den Variablen *type* und *type_exp* des Metatdatensatzes. *type_exp* habe ich hinzugefügt, um Variablen automatisiert nach ihrer Skalenniveauzuordnung auswählen zu können. Die Klassenzuordnung in *type* ist farblich auf *type_exp* abgetragen."} 126 | # prepare factor levels of type_exp column 127 | key$type_exp <- factor( 128 | key$type_exp, 129 | levels = c("nominal", "ordinal", "interval", "ratio", "unknown") 130 | ) 131 | 132 | # barplot of count of type_exp 133 | type_exp_bar <- key %>% 134 | ggplot(aes(x = type_exp, fill = type)) + 135 | geom_bar() + 136 | theme_bw(base_size = 16) + 137 | scale_fill_manual(values = c("red", "black")) + 138 | ylab("") + 139 | guides(fill = FALSE) + 140 | theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) 141 | 142 | # prepare factor levels of type column 143 | key$type <- factor( 144 | key$type, 145 | levels <- c("categorical", "ordinal") 146 | ) 147 | 148 | # barplot of count of type 149 | type_bar <- key %>% 150 | ggplot(aes(x = type, fill = type)) + 151 | geom_bar() + 152 | theme_bw(base_size = 16) + 153 | scale_fill_manual(values = c("red", "black")) + 154 | ylab("Anzahl der Variablen") + 155 | guides(fill = FALSE) + 156 | theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) 157 | 158 | # combine plots 159 | cowplot::plot_grid(type_bar, type_exp_bar) 160 | ``` 161 | 162 | ```{r apply selection, cache=TRUE} 163 | # get interval and ratio variables 164 | inter_ratio_vars <- key %>% 165 | dplyr::filter( 166 | type_exp %in% c("interval", "ratio") 167 | ) %$% 168 | variable 169 | 170 | # select only them for further analysis 171 | sel1 <- main %>% 172 | dplyr::select(dplyr::one_of(inter_ratio_vars)) 173 | ``` 174 | 175 | Auf dieser Grundlage ist es jetzt also möglich, einen Ausgangsdatensatz zusammenstellen, der zwar alle `r nrow(sel1)` Gruppen aber nur die `r ncol(sel1)` interval- und ratioskalierten Variablen enthält. Hier fällt allerdings gleich ein erstes Defizit dieser Selektion auf: Einige Variablen haben sehr wenige Einträge, d.h. der Wert der entsprechenden Variable wurde nur für wenige Gruppen aufgenommen. Abbildung \ref{fig:is_na_bar} enthält ein Histogramm der Fehlstellenanzahl. 176 | 177 | ```{r na histogram, warning=FALSE, fig.cap="\\label{fig:is_na_bar}Verteilung der Fehlstellenanzahl in metrisch skalierten Variablen und den zugehörigen Beobachtungen (Gruppen). Die Klassenbreite der Histogramme beträgt 15. Variablen und Gruppen ohne Fehlstellen wurden für die Visualisierung ausgeschlossen. Die horizontale, rote Linie im Variablenhistogramm markiert die Grenze oberhalb der Variablen aus der weiteren Analyse entfernt wurden."} 178 | # get data.frame with number of na per variable 179 | na_pro_var <- sel1 %>% 180 | purrr::map( 181 | ~sum(is.na(.)) 182 | ) %>% 183 | as.data.frame() %>% 184 | tidyr::gather() %>% 185 | # remove variables with zero na 186 | dplyr::filter( 187 | value != 0 188 | ) 189 | 190 | # get data.frame with number of na per observation 191 | na_pro_obs <- sel1 %>% t %>% as.data.frame() %>% 192 | purrr::map( 193 | ~sum(is.na(.)) 194 | ) %>% 195 | as.data.frame() %>% 196 | tidyr::gather() %>% 197 | # remove variables with zero na 198 | dplyr::filter( 199 | value != 0 200 | ) 201 | 202 | # define threshold above which variables will be removed below 203 | na_vars_removal_threshold <- 1/3 204 | 205 | # plot histogram for variables 206 | na_vars_plot <- na_pro_var %>% 207 | ggplot(aes(x = value)) + 208 | geom_histogram(binwidth = 15, fill = "black") + 209 | geom_vline(aes(xintercept = na_vars_removal_threshold*ncol(sel1)), color = "red", size = 1) + 210 | theme_bw(base_size = 16) + 211 | ylab("Anzahl der Variablen") + 212 | xlab("Anzahl der Fehlstellen") + 213 | coord_flip() + 214 | xlim(-20, 340) 215 | 216 | # plot histogramm for observations 217 | na_obs_plot <- na_pro_obs %>% 218 | ggplot(aes(x = value)) + 219 | geom_histogram(binwidth = 15, fill = "black") + 220 | theme_bw(base_size = 16) + 221 | ylab("Anzahl der Gruppen") + 222 | xlab("") + 223 | coord_flip() + 224 | xlim(-20, 340) 225 | 226 | # combine plots 227 | cowplot::plot_grid(na_vars_plot, na_obs_plot) 228 | ``` 229 | 230 | ```{r prepare na values to be printed} 231 | # calculate proportions for text 232 | without_na <- ncol(sel1) - nrow(na_pro_var) 233 | without_na_percent <- round(without_na/ncol(sel1)*100) 234 | ``` 235 | 236 | ```{r remove na variables} 237 | # remove vars with more na values than 1/3 * total amount of variables 238 | na_vars_part <- na_pro_var %>% 239 | dplyr::filter( 240 | value > (na_vars_removal_threshold)*ncol(sel1) 241 | ) 242 | 243 | sel2 <- sel1 %>% 244 | dplyr::select( 245 | -dplyr::one_of(na_vars_part$key) 246 | ) 247 | ``` 248 | 249 | Immerhin `r without_na` der `r ncol(sel1)` ($\approx$ `r without_na_percent`%) Variablen besitzen überhaupt keine Lehrstellen. Der Datensatz ist bemerkenswert vollständig. Ich habe mich entschieden, alle `r nrow(na_vars_part)` Variablen, bei denen mehr als `r paste(MASS::fractions(na_vars_removal_threshold))` der Werte fehlen aus der Analyse auszuschließen, um Problemen bei der Regressionsanalyse vorzubeugen. In Abbildung \ref{fig:is_na_bar} ist die Demarkationslinie rot eingetragen. Bei den gruppenbezogenen Beobachtungen ist das Bild insgesamt ausgeglichener: Für alle Gruppen liegt eine große Menge an Werten vor. Hier sind keine Änderungen erforderlich. 250 | 251 | In einem letzten Schritt muss nun noch die Variable *area* durch die Variable *larea* ausgetauscht werden. Binford gibt im Buch zwar die Modellergebnisgleichung für *area* an, das errechnete Modell bezieht sich aber auf den Logarithmus zur Basis 10 von *area* (siehe Umformung von Gleichung \ref{eq:area_multi} zu \ref{eq:area_multi_log}). Der damit vorbereitete Arbeitsdatensatz wird im folgenden als `sel3` bezeichnet. 252 | 253 | \begin{equation} \label{eq:area_multi_log} 254 | \begin{aligned} 255 | \log _{10} \mathit{area} = \\ 256 | \mathit{larea} = \\ 257 | & 3.421431 + \\ 258 | & (0.004732 * \mathit{hunting}) + \\ 259 | & (-0.387229 * \mathit{lbio5}) + \\ 260 | & (0.186574 * \mathit{lcoklm}) + \\ 261 | & (-0.110286 * \mathit{lrunoff}) + \\ 262 | & (0.175157 * \mathit{watrgrc}) + \\ 263 | & (-0.164604 * \mathit{medstab}) + \\ 264 | & (-0.743144 * \mathit{perwltg}) + \\ 265 | & (0.004706 * \mathit{rlow}) + \\ 266 | & (-0.080339 * \mathit{rungrc}) + \\ 267 | & (0.024755 * \mathit{sdtemp}) 268 | \end{aligned} 269 | \end{equation} 270 | 271 | ```{r} 272 | # replace area with larea 273 | sel3 <- sel2 %>% 274 | dplyr::rename( 275 | "larea" = "area" 276 | ) %>% 277 | dplyr::mutate( 278 | larea = log10(larea) 279 | ) 280 | ``` 281 | 282 | ## Multiple Regression 283 | 284 | Multiple Lineare Regression ist ein Verfahren der Multivariaten Statistik, das die Erklärung und Vorhersage einer abhängigen Variable durch mehrere unabhängige Variablen erlaubt ^[@backhaus_multivariate_2008, 52-53/64-65., @de_veaux_stats:_2012, 784-812.]. Die Regressionsparameter (Koeffizienten) können wie bei der Einfachen Regressionsanalyse durch Reduktion der Fehlerquadrate berechnet werden. In einem ersten Schritt möchte ich das Modell von Binford nachstellen, indem ich seine Auswahl an Eingabevariablen übernehme und die Regression mittels der Funktion `lm()` aus dem R Basispaket stats darauf anwende. 285 | 286 | ```{r recreate binford model, echo=TRUE, cache=TRUE} 287 | binford_model <- lm( 288 | larea ~ hunting + lbio5 + lcoklm + 289 | lrunoff + watrgrc + medstab + perwltg + 290 | rlow + rungrc + sdtemp, 291 | data = sel3 292 | ) 293 | ``` 294 | 295 | ```{r binford model estimate table, cache=TRUE, dependson="recreate binford model"} 296 | # create nice data.frame with model coefficent information 297 | var_vals <- binford_model %>% 298 | broom::tidy() %>% 299 | dplyr::mutate_if( 300 | is.numeric, 301 | dplyr::funs(round(., 3)) 302 | ) %>% 303 | dplyr::rename( 304 | " " = "term" 305 | ) 306 | 307 | # prepare output table with extensiv description 308 | var_vals %>% 309 | knitr::kable( 310 | format = "latex", 311 | caption = "\\label{tab:binford_model_result}Ergebniszusammenfassung des mit Binfords Variablenauswahl reproduzierten Modells für die einzelnen Koeffizienten. Alle Werte sind auf drei Nachkommastellen gerundet \\newline \\textbf{estimate -- Koeffizient:} Koeffizienten(schätzung) des Ergebnismodells (Intercept und Variablenslopes). \\newline \\textbf{std.error -- Standardfehler:} Maß für die Präzision der Schätzung für den Wert des Koeffizienten. Bei einer Variable, die gut für die Vorhersage der abhängigen Variable geeignet ist, sollte der Standardfehler in Relation zum Koeffizienten klein sein.\\newline \\textbf{statistic -- t-Wert:} Anzahl der Standardabweichungen, die den Koeffizienten von Null trennt. Der Betrag des Wertes sollte (auch in Relation zum Standardfehler) groß sein um die Nullhypothese 'keine Relation der Variablen' verwerfen zu können.\\newline \\textbf{p.value -- p-Wert:} p-Wert der t-Statistik. Wahrscheinlichkeit, dass eine Beobachtung auftritt, die gleich oder größer als der t-Wert ist. Ein kleiner p-Wert zeigt an, dass die Wahrscheinlichkeit einer zufälligen Enstehung dieses Modellergebnisses gering ist. Die Relation zwischen abhängiger und unabhängiger Variable ist damit signifikant.", 312 | booktabs = T 313 | ) %>% 314 | kableExtra::column_spec(1, bold = TRUE) 315 | ``` 316 | 317 | ```{r binford model estimate table 2, cache=TRUE, dependson="recreate binford model"} 318 | # create nice data.frame with model quality information 319 | mod_vals <- binford_model %>% 320 | broom::glance() %>% 321 | tidyr::gather(var, value) %>% 322 | dplyr::mutate( 323 | var2 = c(var[7:11], rep(NA, 6)), 324 | value2 = c(value[7:11], rep(NA, 6)) 325 | ) %>% 326 | # value df is removed like this - is useless here anyway 327 | magrittr::extract(1:5,) %>% 328 | dplyr::mutate_if( 329 | is.numeric, 330 | dplyr::funs(round(., 3)) 331 | ) %>% dplyr::mutate_if( 332 | is.numeric, 333 | as.character 334 | ) %>% 335 | dplyr::rename( 336 | " " = "var", 337 | " " = "var2", 338 | "value" = "value2" 339 | ) 340 | 341 | # vestige from when there were two empty cells due to magrittr::extract(1:6,) 342 | mod_vals[is.na(mod_vals)] <- "" 343 | 344 | # prepare output table with extensiv description 345 | mod_vals %>% 346 | knitr::kable( 347 | format = "latex", 348 | caption = "\\label{tab:binford_model_result_2}Ergebniszusammenfassung des mit Binfords Variablenauswahl reproduzierten Modells für das Gesamtmodell. Alle Werte sind auf drei Nachkommastellen gerundet. \\newline \\textbf{r.squared -- $R^2$ -- Bestimmtheitsmaß:} $R^2$ ist ein Maß für die Güte der Modelleinpassung. Es gibt den Anteil der Varianz der abhängigen Variablen wieder, der durch das lineare Modell erklärt wird. Der Wert liegt zwischen 0 (kein linearer Zusammenhang) und 1 (perfekter linearer Zusammenhang). Ein hoher Wert ist ein Indikator für ein gutes Modell.\\newline \\textbf{adj.r.squared -- korrigiertes Bestimmtheitsmaß:} $R^2$ korrigiert unter Beachtung der Anzahl unabhängiger Variablen. $R^2$ wird bei zunehmender Anzahl an Variablen größer und muss entsprechend bei Multipler Regression normiert werden. \\newline \\textbf{sigma -- Residual Standard Error:} Durchschnittliche Abweichung der Modellvorhersage für die abhängige Variable von den tatsächlich gemessenen Werten. Bei einem guten Modell sollte der Wert gering sein. \\newline \\textbf{statistic und p.value} Siehe Tabelle \\ref{tab:binford_model_result}. Hier beziehen sich die Werte auf das Gesamtmodell. \\newline \\textbf{logLik -- Log\\-Likelihood, AIC -- Akaike Information Criterion und BIC -- Bayesian Information Criterion} Maße für die Güte der Modelleinpassung. \\newline \\textbf{deviance} Maß für die Distance zweier Modelle. Hier als Maß für die Güte der Modelleinpassung indem das Ergebnismodell mit dem Null\\--Modell (s.u.) verglichen wird. \\newline \\textbf{df.residual -- Anzahl der Freiheitsgrade} Berechnet sich aus der Anzahl der Beobachtungen abzüglich der Anzahl der schätzbaren Koeffizienten. Insofern handelt es sich um die Anzahl der 'überflüssigen' Messwerte, die zur Berechnung der Modellparameter nicht erforderlich wären.", 349 | booktabs = T 350 | ) %>% 351 | kableExtra::column_spec(1, bold = TRUE) %>% 352 | kableExtra::column_spec(3, bold = TRUE) 353 | ``` 354 | 355 | ```{r plot binford model, warning=FALSE, cache=TRUE, dependson="recreate binford model", fig.height=7, fig.cap="\\label{fig:binford_model_plot}Diagnostische Plots für das mit Binfords Variablenauswahl reproduzierte Modell. \\newline \\textbf{Residuals vs Fitted:} Streuung der Residuen in Abhängigkeit von der Modellvorhersage. Kann als Indikator für nicht lineare Trends dienen. \\newline \\textbf{Normal Q-Q:} Sortierte Residuenwerte abgetragen auf die theoretischen Quantile einer Normalverteilung. Zeigt, inwiefern die Residuenverteilung der Normalverteilung entspricht. \\newline \\textbf{Scale-Location:} Vergleiche Residuals vs Fitted. Die Umskalierung erlaubt es, die Homogenität der Residuenvarianz (Homoskedastizität) besser zu beurteilen. \\newline \\textbf{Residuals vs Leverage:} Standardisierte Residuenwerte abgetragen auf *Leverage*, ein Maß zur Einschätzung des Einflusses auf das Modellergebnis. Dient zum Identifizieren von einflussreichen Ausreißern. ^[Zum Verständnis der diagnostischen Plots: http://data.library.virginia.edu/diagnostic-plots [16.8.2017]]"} 356 | # diagnostic plots 357 | opar <- par(mfrow=c(2,2)) 358 | plot(binford_model) 359 | par(opar) 360 | ``` 361 | 362 | ```{r identify bad observations for binford model} 363 | # visual obersvations at the plots allow identification of outliers: 364 | outies_binford_model <- c(3, 14, 45, 338) 365 | outies_binford_model_names <- main$X[outies_binford_model] 366 | ``` 367 | 368 | Tabelle \ref{tab:binford_model_result} enthält Koeffizienten und zugehörige Kennwerte des erzeugten Modells ^[Ich habe versucht möglichst viele Kennwerte nachzuvollziehen. Siehe dazu für die Tabellen \ref{tab:binford_model_result} und \ref{tab:binford_model_result_2} @de_veaux_stats:_2012, 785 & 792-794. und \url{https://feliperego.github.io/blog/2015/10/23/Interpreting-Model-Output-In-R} [23.8.2017] sowie für Tabelle \ref{tab:dropterm_example} @gordon2015regression und @de_veaux_stats:_2012, 792-793.]. Ein Vergleich der Werte mit jenen in Gleichung \ref{eq:area_multi_log} ergibt, dass die Koeffizienten von den von Binford errechneten abweichen. Ich nehme an, dass der Regressionsalgorithmus in SPSS geringfügig anders implementiert ist als derjenigen in `lm()` oder SPSS die Regression nicht mittels einfacher Reduktion der Fehlerquadrate durchführt. Unter dem Hyperonym *Robuste Regression* werden verschiedene andere Verfahren diskutiert ^[@Jann2010]. Da die Werte nur geringfügig divergieren und Größenordnung sowie Vorzeichen übereinstimmen, gehe ich von einer nicht relevanten Abweichung aus, die ich hier nicht weiter diskutieren möchte. Tabelle \ref{tab:binford_model_result_2} gibt einige Kennwerte der Modellgüte und Abbildung \ref{fig:binford_model_plot} die vier diagnostischen Standardplots wieder, die die Funktionen `summary.lm()` und `plot.lm()` aus stats bereitstellen. Das Modell scheint ein hohes Erklärungspotential zu besitzen, die abhängige Variable *area* also gut zu beschreiben. Starke Ausreißer gibt es nicht -- die überwiegende Mehrzahl der Beobachtungen wird durch das Modell gut erklärt. Nur einzelne Beobachtungen zeigen Auffälligkeiten: `r paste0(paste0(outies_binford_model, ": ", outies_binford_model_names), collapse = ", ")`. *watrgrc* ist nicht gut zur Beschreibung von *area* geeignet. 369 | 370 | Ich möchte nun versuchen, selbst ein Modell für die Variable *area* zu erstellen. Dabei sind eigentlich einige Aspekte zu beachten ^[@de_veaux_stats:_2012, 788-790.], mit denen ich in diesem Experiment aber bewusst sehr inkonsequent umgehen möchte: 371 | 372 | 1. **Linearität:** Zwischen der abhängigen und jeder einzelnen unabhängigen Variable sollte eine lineare, geradlinige Beziehung bestehen ("straight enough condition"). 373 | 2. **Unabhängigkeit:** Die Fehlerverteilung der unabhängigen Variablen sollte unabhängig voneinander sein ("randomization condition"). 374 | 3. **Varianzäquivalenz:** Die Variabilität der Fehler innerhalb der Beobachtungen der unabhängigen Variablen sollte näherungsweise gleich sein. Die Fehler sollten gleichmäßig streuen und keine Trends ausbilden. 375 | 4. **Normalität:** Die Abweichungen der Messwerte sollten rund um das eingepasste Ergebnismodell normalverteilt sein ("nearly normal condition"). 376 | 377 | Für jedes dieser Kriterien gibt es diagnostische Plots, die visuell ausgewertet werden müssen. Auf dieser Grundlage kann dann eine Entscheidung über Variablen getroffen werden, die in das Modell aufgenommen werden sollen. Mir stehen allerdings `r ncol(sel3) - 1` potentielle Eingabevariablen zur Verfügung und die Vorabprüfung dieser Variablen hätte viel Zeit in Anspruch genommen. Hinzu kommt, dass das Ergebnis der Multiplen Regression zwar tatsächlich im wesentlichen von den Eingabevariablen abhängig ist, andererseits aber auch -- zumindest wenn sie nicht vollständig unkorreliert sind -- von deren Eingabereihenfolge. Da im Fall der vorliegenden Analyse keine theoretischen oder sachlogischen Überlegungen Eingang finden sollen, die die Variablenauswahl determinieren würden, müssten eigentlich alle Permutationen von Auswahl und Reihenfolge betrachtet werden. Bei `r ncol(sel3) - 1` unabhängigen Variablen ist schon die Anzahl der Permutationen weit größer als praktisch in irgendeiner Form verarbeitbar ($200! \approx 7.887*10^{374}$). 378 | 379 | Ich habe mich entschieden, hier auf eine händische Variablenvorauswahl komplett zu verzichten und stattdessen auf ein automatisches Verfahren der Schrittweisen Regressionsanalyse zurückzugreifen. Diese treffen mittels Prüfgrößen selbständig und in verhältnismäßig wenigen Iterationsschritten eine Variablenauswahl ^[@backhaus_multivariate_2008, 100-105.]. 380 | 381 | Die Funktion `stepAIC()` des R Pakets MASS^[@venables_modern_2002, 172-177.] ist eine Implementierung eines solchen Verfahrens. `stepAIC()` erreicht die Modellreduktion durch schrittweise Minimierung der Prüfgröße AIC (Akaike Information Criterion -- $\mathit{AIC} = -2\ *\ \mathit{maximierte}\ \mathit{LogLikelihood}\ +\ 2\ *\ \# Parameter$), die man vereinfacht als Maß für die Passgenauigkeit eines statistischen Modells verstehen kann. `stepAIC()` benötigt dafür ein berechnetes Eingangsmodell, das nach Möglichkeit nah am besten Ergebnismodell liegen sollte, sowie Modelldefinitionen jeweils eines maximal und minimal komplexen Ergebnismodells. Da das Ausgangsmodell mit `r nrow(sel3)` Beobachtungen und `r ncol(sel3)` Variablen in diesem Kontext als komplex gelten darf, möchte ich mich an folgende Empfehlung der Autoren halten, und dem Algorithmus stattdessen nur dieses Initialmodell zur Verfügung stellen. 382 | 383 | > If a large model is selected as the starting point, the scope and scale arguments have generally reasonable defaults, but for a small model where the process is probably to be one of adding terms, they will usually need both to be supplied. 384 | > 385 | > -- @venables_modern_2002, 175. 386 | 387 | Zunächst erfolgt die Berechnung des Ausgangsmodells: 388 | 389 | ```{r create initial model, echo = TRUE, cache=TRUE} 390 | initial_model <- lm(larea ~ ., data = sel3) 391 | ``` 392 | 393 | ```{r plot initial model, warning=FALSE, cache=TRUE, dependson="create initial model", fig.height=7, fig.cap="\\label{fig:initial_model_plot}Diagnostische Plots für das Ausgangsmodell mit allen Variablen."} 394 | opar <- par(mfrow=c(2,2)) 395 | plot(initial_model) 396 | par(opar) 397 | ``` 398 | 399 | Abbildung \ref{fig:initial_model_plot} und Tabelle \ref{tab:initial_model_result_2} zeigen, dass dieses Modell auf Grundlage aller Variablen herausragende Vorhersagefähigkeiten für die Variable *area* besitzt. Gleichermaßen entbehrt es jedoch jeder fachwissenschaftlichen Aussage, da sich eine Gesamtheit von `r ncol(sel3)` semantisch höchst unterschiedlichen Variablen jeder integrativen Interpretation entzieht. Das Modell ist nicht geeignet, ein besseres Verständnis der zugrundeliegenden naturräumlichen, kulturellen und sozioökonomischen Zusammenhänge zu generieren. Erst die Vereinfachung des Modells wird die Ableitung klarer, prüfbarer Hypothesen ermöglichen. 400 | 401 | Entsprechend nun also ein erster Durchlauf der automatischen, schrittweisen Modellreduktion: 402 | 403 | ```{r stepwise multiple regression, cache=TRUE, dependson="create initial model", message=FALSE, echo=TRUE} 404 | model1 <- MASS::stepAIC( 405 | initial_model, 406 | # trace option: don't show intermediate steps 407 | trace = FALSE 408 | ) 409 | ``` 410 | 411 | ```{r plot model1, warning=FALSE, cache=TRUE, dependson="stepwise multiple regression", fig.height=7, fig.cap="\\label{fig:model1_plot}Diagnostische Plots für das Modell nach dem ersten Durchlauf von `stepAIC()`."} 412 | opar <- par(mfrow=c(2,2)) 413 | plot(model1) 414 | par(opar) 415 | ``` 416 | 417 | ```{r anova table of stepAIC, cache=TRUE, dependson="stepwise multiple regression"} 418 | # nice output table of stepAIC() steps 419 | model1$anova %>% as.data.frame() %>% dplyr::rename("Dev" = "Deviance") %>% 420 | dplyr::select(-Df) %>% 421 | knitr::kable( 422 | format = "latex", 423 | caption = "\\label{tab:model1_anova}ANOVA Komponente des Ergebnismodelldatentyps von \\texttt{stepAIC()}. Zeigt die schrittweise Entfernung von Variablen zur Reduktion der Modellkomplexität und zugehörige, diagnostische Prüfgrößen. \\newline \\textbf{Step -- Reduktionsschritt:} Variable, die in diesem Schritt entfernt oder wieder hinzugefügt wurde. \\newline \\textbf{Dev -- Deviance, Resid. Df -- Anzahl der Freiheitsgrade, AIC -- Akaike information criterion:} Siehe Tabelle \\ref{tab:binford_model_result_2}. \\newline \\textbf{Resid. Dev -- residual deviance:} Eine Konstante abzüglich zwei mal die maximierte Log-Likelihood. Ist nur bei gesättigten Modellen aussagekräftig und kann hier ignoriert werden.", 424 | booktabs = T 425 | ) %>% 426 | kableExtra::column_spec(1, bold = TRUE) %>% 427 | kableExtra::kable_styling(font_size = 9) 428 | ``` 429 | 430 | In Tabelle \ref{tab:model1_anova} werden die Reduktionsschritte zeilenweise dokumentiert. Mit dem Entfernen von Variablen nimmt die Anzahl der Freiheitsgrade schrittweise zu, während der AIC-Wert langsam abnimmt. `r nrow(model1$anova)` Variablen wurden von `stepAIC()` entfernt, dann allerdings kam der Prozess in diesem Durchlauf zum Halten. Der Vergleich des *Residuals vs Leverage* Plots in Abbildung \ref{fig:initial_model_plot} und Abbildung \ref{fig:model1_plot} legt nahe, dass sich die durchschnittliche Wirkung individueller Beobachtungen auf das Gesamtergebnis verringert hat. Anzunehmen ist, dass Variablen mit großer Variabilität entfernt wurden. Abbildung \ref{fig:binford_model_plot} lässt die Vermutung zu, dass sich dieser Trend bei weiterer Reduktion des Modells fortsetzen wird. `r ncol(sel3) - nrow(model1$anova)` Variablen sind tatsächlich angesichts der deskriptiven Einfachheit des Binford-Modells hier noch kein befriedigendes Ergebnis. Die Minimierung des AIC-Werts ist scheinbar kein ausreichend starkes Optimierungskriterium: 431 | 432 | > This suggests, correctly, that selecting terms on the basis of AIC can be somewhat permissive in its choice of terms, being roughly equivalent to choosing an F-cutoff of 2. 433 | > 434 | > -- @venables_modern_2002, 176. 435 | 436 | Wir können nun fortfahren, indem wir entweder das k-Attribut in `stepAIC()` (*the multiple of the number of degrees of freedom used for the penalty*) erhöhen und den Prozess erneut starten, oder direkt mittels F-Statistik schrittweise jene Variablen entfernen, die nicht signifikant mit der abhängigen Variable verknüpft sind. Um diese zu identifizieren steht in MASS die Funktion `dropterm.lm()` bereit, die die Modelleinpassung für alle möglichen Modelle durchführt, die eine Variable weniger inkorporieren als das Ausgangsmodell. 437 | 438 | 439 | 440 | ```{r dropterm example code, eval=FALSE, echo=TRUE} 441 | MASS::dropterm(model1, test = "F") 442 | ``` 443 | 444 | Tabelle \ref{tab:dropterm_example} gibt Einblick in die mit `dropterm(test = "F")` berechneten Prüfgrößen. Offensichtlich verändert sich die Modellgüte je nach dem, welche Variablen ausgeschlossen werden. Die F-Statistik gibt Antwort auf die Frage, ob es gegenüber dem Null-Modell -- also dem Modell, in dem alle Koeffizienten außer dem Y-Achsenabschnitt (Intercept) auf Null gesetzt werden -- einen Vorteil für die Vorhersagequalität des Ergebnismodells bringt, die jeweilige Variable mit in das Modell aufzunehmen. Große Werte für *Pr(F)* deuten also auf Variablen hin, die wahrscheinlich nicht in einer direkten Relation zur abhängigen Variablen stehen und entfernt werden können ^[@venables_modern_2002, 176., @de_veaux_stats:_2012, 792-793.]. 445 | 446 | ```{r dropterm example table, cache=TRUE, cache=TRUE, dependson="stepwise multiple regression"} 447 | # nice output table of one dropterm() step 448 | MASS::dropterm(model1, test = "F") %>% 449 | as.data.frame() %>% 450 | magrittr::extract(1:40, ) %>% 451 | tibble::rownames_to_column() %>% 452 | dplyr::select( 453 | -Df 454 | ) %>% 455 | dplyr::mutate_at( 456 | c("Sum of Sq", "RSS", "F Value"), 457 | dplyr::funs(round(., 4)) 458 | ) %>% 459 | dplyr::mutate_at( 460 | c("AIC"), 461 | dplyr::funs(round(., 0)) 462 | ) %>% 463 | dplyr::mutate_at( 464 | c("Pr(F)"), 465 | dplyr::funs(round(., 4)) 466 | ) %>% 467 | dplyr::mutate_all( 468 | dplyr::funs(as.character(.)) 469 | ) %>% 470 | # add decorative ... at the end 471 | rbind( 472 | ., 473 | data.frame( 474 | rowname = "...", `Sum of Sq` = "...", RSS = "...", AIC = "...", 475 | `F Value` = "...", `Pr(F)` = "...", check.names = F 476 | ) 477 | ) %>% 478 | knitr::kable( 479 | format = "latex", 480 | caption = "\\label{tab:dropterm_example}dropterm Tabelle: Maße für die Qualität des Beitrags einzelner Variablen zum Gesamtmodell. Alle Werte sind auf vier Nachkommastellen gerundet, der AIC-Wert auf ganze Zahlen. \\newline \\textbf{Sum of Sq -- ESS -- explained sum of squares:} Summe der Abweichungsquadrate der Modellvorhersage zum arithmetischen Mittel der abhängigen Variable für das Modell ohne diese Variable. \\newline \\textbf{RSS -- residual sum of squares:} Wie ESS, hier aber Summe der Abweichungsquadrate der Modellvorhersage zu \\textit{allen} Werten der abhängigen Variable. \\newline \\textbf{AIC -- Akaike information criterion:} Siehe Tabelle \\ref{tab:binford_model_result_2}. \\newline \\textbf{F Value:} Eingangswert des F-Tests. \\newline \\textbf{Pr(F) -- probability of F-Value:} Maß für die Wahrscheinlichkeit (p-Wert) des F-Werts im F-Test der Gesamtsignifikanz. Der F-Test basiert auf dem Vergleich mit einem Modell, in dem alle Koeffizienten außer dem Intercept und dem der aktuellen Variable den Wert Null annehmen. Ist die Wahrscheinlichkeit klein, kann die Nullhypothese, dass kein wirklicher Zusammenhang zwischen abhängiger und unabhängigen Variablen besteht, verworfen werden.", 481 | booktabs = T 482 | ) %>% 483 | kableExtra::column_spec(1, bold = TRUE) 484 | ``` 485 | 486 | ```{r initial model estimate table 2, cache=TRUE, dependson="create initial model"} 487 | init_mod_vals <- initial_model %>% 488 | broom::glance() %>% 489 | tidyr::gather(var, value) %>% 490 | dplyr::mutate( 491 | var2 = c(var[7:11], rep(NA, 6)), 492 | value2 = c(value[7:11], rep(NA, 6)) 493 | ) %>% 494 | magrittr::extract(1:5,) %>% 495 | dplyr::mutate_if( 496 | is.numeric, 497 | dplyr::funs(round(., 3)) 498 | ) %>% dplyr::mutate_if( 499 | is.numeric, 500 | as.character 501 | ) %>% 502 | dplyr::rename( 503 | " " = "var", 504 | " " = "var2", 505 | "value" = "value2" 506 | ) 507 | 508 | init_mod_vals[is.na(init_mod_vals)] <- "" 509 | 510 | init_mod_vals %>% 511 | knitr::kable( 512 | format = "latex", 513 | caption = "\\label{tab:initial_model_result_2}Ergebniszusammenfassung des initialen Modells. Alle Werte sind auf drei Nachkommastellen gerundet.", 514 | booktabs = T 515 | ) %>% 516 | kableExtra::column_spec(1, bold = TRUE) %>% 517 | kableExtra::column_spec(3, bold = TRUE) 518 | ``` 519 | 520 | Ich möchte das Vorgehen, mit `dropterm()` schrittweise Variablen zu entfernen, automatisieren und dann so oft wiederholt zur Anwendung bringen, bis die Anzahl der Variablen im Ergebnismodell der in Binfords Modell entspricht. Dafür habe ich einen simplen Algorithmus formuliert, der in einer Schleife die `dropterm()`-Funktion ausführt, die Variable mit dem größten p-Wert identifiziert und diese dann dann für den nächsten Schleifendurchlauf aus dem immer einfacheren Modell entfernt. 521 | 522 | ```{r dropterm loop, echo=TRUE, cache=TRUE, dependson="stepwise multiple regression"} 523 | # dublicate model object 524 | model2 <- model1 525 | 526 | # determine number of vars to drop 527 | to_drop <- (ncol(sel3) - nrow(model1$anova) - 10) 528 | 529 | # drop loop 530 | for (i in 1:to_drop) { 531 | # determine variable with highest 532 | # p-value of the F-Test 533 | victimvar <- MASS::dropterm( 534 | model2, test = "F" 535 | ) %>% 536 | tibble::as.tibble() %>% 537 | tibble::rownames_to_column() %>% 538 | dplyr::top_n( 539 | 1, `Pr(F)` 540 | ) 541 | 542 | # remove this variable from the model 543 | model2 <- update( 544 | model2, 545 | as.formula(paste( 546 | ". ~ . - ", victimvar$rowname 547 | )) 548 | ) 549 | } 550 | ``` 551 | 552 | ```{r plot model2, warning=FALSE, cache=TRUE, dependson="dropterm loop", fig.height=7, fig.cap="\\label{fig:model2_plot}Diagnostische Plots für das Modell nach Anwendung des `dropterm()`-Algorithmus."} 553 | opar <- par(mfrow=c(2,2)) 554 | plot(model2) 555 | par(opar) 556 | ``` 557 | 558 | ```{r, cache=TRUE, dependson="dropterm loop"} 559 | model2 %>% 560 | broom::tidy() %>% 561 | dplyr::mutate_if( 562 | is.numeric, 563 | dplyr::funs(round(., 3)) 564 | ) %>% 565 | knitr::kable( 566 | format = "latex", 567 | caption = "\\label{tab:model2_result}Ergebniszusammenfassung für die einzelnen Koeffizienten des mittels \\texttt{stepAIC()} und \\texttt{dropterm()} aus der Gesamtvariablenmenge reduzierten Modells. Alle Werte sind auf drei Nachkommastellen gerundet.", 568 | booktabs = T 569 | ) %>% 570 | kableExtra::column_spec(1, bold = TRUE) 571 | ``` 572 | 573 | ```{r bad variables, cache=TRUE} 574 | # get variable descriptions 575 | # hu <- key %>% 576 | # dplyr::filter( 577 | # variable %in% colnames(sel3) 578 | # ) %>% 579 | # dplyr::select( 580 | # variable, description 581 | # ) 582 | 583 | # manually identify variables, that are directly linked to area 584 | bad_vars <- c("density", "packinx", "prindx", "lden", "lpackinx") 585 | ``` 586 | 587 | Das Modell, das aus diesem Algorithmus hervorgeht, erscheint im ersten Moment äußert vielversprechend. Abbildung \ref{fig:model2_plot} zeigt, dass es äußerst präzise Vorhersagen für *larea* erlaubt und -- abgesehen von wenigen, starken Ausreißern -- die Mehrzahl der Beobachtungen hervorragend erklärt. Ein Blick in Tabelle \ref{tab:model2_result} offenbart jedoch, dass das Modell im wesentlichen auf den beiden Variablen *lnpop* und *lpackinx* beruht. Obgleich die Koeffizientenwerte bei der Multiplen Regression nicht so unmittelbar verstanden werden können, wie das bei der Einfachen Regression möglich ist ^[@de_veaux_stats:_2012, 787-788 & 794.], ist doch klar, dass die anderen Variablen verschwindend wenig Einfluss auf das Ergebnis haben. *packinx* ist eine umskalierte Variante der Bevölkerungsdichte ^[@binford_constructing_2001, 117.], die sich selbst als abhängige Größe aus Bevölkerungszahl und Arealgröße definiert. Es ist also nicht verwunderlich, dass sich die logarithmisch skalierte Variante *lpackinx* gut zur Vorhersage von *larea* eignet. Um ein Ergebnis zu erhalten, dass mehr wissenschaftliche Relevanz besitzt, muss ich jene Variablen aus dem Ausgangsdatensatz entfernen, die direkt von der Arealgröße abhängig sind. Eine kurze Durchsicht der Schlüsseldatei LRBkey reduziert auf die oben getroffene Selektion metrisch skalierter Variablen ergibt dazu folgende Auswahl: `r paste(paste0("*", bad_vars, "*"), collapse = ", ")`. Freilich könnte man argumentieren, dass auch die Beziehung zwischen *larea* und der logarithmisch skalierten Bevölkerungszahl *lnpop*, die in Tabelle \ref{tab:model2_result} ebenfalls deutlich als relevante Vorhersagegröße angeführt wird, trivial ist und entsprechend ausgeschlossen werden könnte. Das führt allerdings zu einer fortgeschrittenen, manuellen Variablenvorauswahl. Eine solche kann fragestellungsbezogen durchaus sinnvoll sein, wurde hier aber bewusst vermieden. Die Abwesenheit von *lpackinx* und *lnpop* in Binfords Ergebnismodell (siehe \ref{eq:area_multi_log}) spricht dafür, dass Binford hier Hand angelegt hat, ohne das explizit zu kommunizieren. Möglicherweise waren diese Variablen aber auch überhaupt nicht Teil der Arbeitsversion des Gruppendatensatzes, die ihm zum Zeitpunkt der Erstellung dieses Modells zur Verfügung stand. 588 | 589 | Hier nun also ein Blick auf das Modell, das sich ergibt, wenn man die beschriebene Variablenvorauswahl trifft und noch einmal die Arbeitsschritte des `stepAIC()`- und `dropterm()`-Algorithmus wiederholt. 590 | 591 | ```{r final model, cache=TRUE, dependson="bad variables"} 592 | # remove those variables 593 | sel5 <- sel3 %>% 594 | dplyr::select( 595 | -dplyr::one_of(bad_vars) 596 | ) 597 | 598 | # create initial model with new variable selection 599 | initial_model2 <- lm(larea ~ ., data = sel5) 600 | 601 | # reduce model with stepAIC() 602 | model3 <- MASS::stepAIC( 603 | initial_model2, 604 | trace = FALSE 605 | ) 606 | 607 | # dublicate model object 608 | model4 <- model3 609 | 610 | # determine number of vars to drop 611 | to_drop <- (ncol(sel5) - nrow(model3$anova) - 10) 612 | 613 | # drop loop 614 | for (i in 1:to_drop) { 615 | # determine variable with highest 616 | # p-value of the F-Test 617 | victimvar <- MASS::dropterm( 618 | model4, test = "F" 619 | ) %>% 620 | tibble::as.tibble() %>% 621 | tibble::rownames_to_column() %>% 622 | dplyr::top_n( 623 | 1, `Pr(F)` 624 | ) 625 | 626 | # remove this variable from the model 627 | model4 <- update( 628 | model4, 629 | as.formula(paste( 630 | ". ~ . - ", victimvar$rowname 631 | )) 632 | ) 633 | } 634 | ``` 635 | 636 | ```{r plot model4, warning=FALSE, cache=TRUE, dependson="dropterm loop", fig.height=7, fig.cap="\\label{fig:model4_plot}Diagnostische Plots für das finale Modell."} 637 | opar <- par(mfrow=c(2,2)) 638 | plot(model4) 639 | par(opar) 640 | ``` 641 | 642 | ```{r final model estimate table, cache=TRUE, dependson="dropterm loop"} 643 | model4 %>% 644 | broom::tidy() %>% 645 | dplyr::mutate_if( 646 | is.numeric, 647 | dplyr::funs(round(., 3)) 648 | ) %>% 649 | knitr::kable( 650 | format = "latex", 651 | caption = "\\label{tab:model4_result}Ergebniszusammenfassung für die einzelnen Koeffizienten des finalen Modells, das durch erneute Anwendung des \\texttt{stepAIC()}- und \\texttt{dropterm()}-Algorithmus auf ein Modell mit leicht reduzierter Variablenauswahl ermittelt wurde. Alle Werte sind auf drei Nachkommastellen gerundet.", 652 | booktabs = T 653 | ) %>% 654 | kableExtra::column_spec(1, bold = TRUE) 655 | ``` 656 | 657 | ```{r final model estimate table 2, cache=TRUE, dependson="dropterm loop"} 658 | fin_mod_vals <- model4 %>% 659 | broom::glance() %>% 660 | tidyr::gather(var, value) %>% 661 | dplyr::mutate( 662 | var2 = c(var[7:11], rep(NA, 6)), 663 | value2 = c(value[7:11], rep(NA, 6)) 664 | ) %>% 665 | magrittr::extract(1:5,) %>% 666 | dplyr::mutate_if( 667 | is.numeric, 668 | dplyr::funs(round(., 3)) 669 | ) %>% dplyr::mutate_if( 670 | is.numeric, 671 | as.character 672 | ) %>% 673 | dplyr::rename( 674 | " " = "var", 675 | " " = "var2", 676 | "value" = "value2" 677 | ) 678 | 679 | fin_mod_vals[is.na(fin_mod_vals)] <- "" 680 | 681 | fin_mod_vals %>% 682 | knitr::kable( 683 | format = "latex", 684 | caption = "\\label{tab:model4_result_2}Ergebniszusammenfassung des finalen Modells. Alle Werte sind auf drei Nachkommastellen gerundet.", 685 | booktabs = T 686 | ) %>% 687 | kableExtra::column_spec(1, bold = TRUE) %>% 688 | kableExtra::column_spec(3, bold = TRUE) 689 | ``` 690 | 691 | \begin{equation} \label{eq:area_final} 692 | \begin{aligned} 693 | \log _{10} \mathit{area} = \\ 694 | \mathit{larea} = \\ 695 | & 2.439 + \\ 696 | & (-0.021 * \mathit{temp}) + \\ 697 | & (-0.132 * \mathit{medstab}) + \\ 698 | & (0.259 * \mathit{perwret}) + \\ 699 | & (-0.498 * \mathit{perwltg}) + \\ 700 | & (-0.760 * \mathit{lnagp}) + \\ 701 | & (0.372 * \mathit{lnpop}) + \\ 702 | & (0.005 * \mathit{gatherin}) + \\ 703 | & (0.001 * \mathit{kmov}) + \\ 704 | & (-0.002 * \mathit{nicheff}) + \\ 705 | & (-0.006 * \mathit{lati}) 706 | \end{aligned} 707 | \end{equation} 708 | 709 | Aus Tabelle \ref{tab:model4_result} (und Gleichung \ref{eq:area_final}) wird ersichtlich, dass dieses Modell nun auf einer wesentlich diverseren Auswahl unabhängiger Variablen fußt. Abbildung \ref{fig:model4_plot} und Tabelle \ref{tab:model4_result_2} belegen, dass es sich um ein solides Modell handelt. Den Kennwerten in Tabelle \ref{tab:binford_model_result_2} nach zu urteilen, scheint es wesentlich bessere Vorhersagen treffen zu können als Binfords Modell: Während das Modell, das ich auf Grundlage von Binfords Variablenauswahl ermittelt habe nur ($R^2 \approx$) `r round(summary(binford_model)$r.squared * 100, 1)`% der Variabilität in *larea* erklären konnte, kommt dieses Modell auf `r round(summary(model4)$r.squared * 100, 1)`%! Dieses Ergebnis ist eindeutig -- selbst wenn man die Unzulänglichkeiten von $R^2$ und $\mathit{adjusted} R^2$ für den Modellvergleich in Betracht zieht ^[@de_veaux_stats:_2012, 799-800.]. Ich möchte damit also den Reproduktionsversuch abschließen und die Ergebnisse diskutieren. 710 | 711 | ```{r child = 'binford_discussion.Rmd'} 712 | ``` 713 | -------------------------------------------------------------------------------- /data-raw/LRBkey.csv: -------------------------------------------------------------------------------- 1 | ,NOTmissing,class,nUniqVals,FNOTmissing,Fclass,FnUniqVals,variable,ds,description,type,type_exp,usevb,source,comments,topcomments,sectbrk 2 | seq339,339,numeric,339,339,numeric,339,seq339,LRB,Sequence number,categorical,nominal,auxiliary,96,NA,NA,NA 3 | groupno,339,numeric,339,339,numeric,339,groupno,LRB,Group number,categorical,nominal,auxiliary,96,NA,NA,NA 4 | name,339,character,339,339,factor,339,name,LRB,Version 1 of society name,categorical,nominal,auxiliary,96,NA,NA,NA 5 | year,339,numeric,59,339,numeric,59,year,LRB,Year of ethnography,categorical,nominal,auxiliary,96,NA,NA,NA 6 | ethref,235,character,197,235,factor,197,ethref,LRB,Ethnographies used as source of forager data. The pinpointing date is in parentheses.,categorical,nominal,auxiliary,96,NA,NA,NA 7 | eanumber,207,numeric,207,207,numeric,207,eanumber,LRB,Ethnographic Atlas society number (manual coding),categorical,nominal,auxiliary,96,NA,NA,NA 8 | eafolk,212,character,209,212,factor,209,eafolk,LRB,Ethnographic Atlas society name (manual coding),categorical,nominal,auxiliary,96,NA,NA,NA 9 | wnainumb,84,numeric,84,84,numeric,84,wnainumb,LRB,Western North American Indian ID number (manual coding),categorical,nominal,auxiliary,96,NA,NA,NA 10 | wnaisociety,84,character,84,84,factor,84,wnaisociety,LRB,Western North American Indian society name (manual coding),categorical,nominal,auxiliary,96,NA,NA,NA 11 | badlangiso,339,numeric,2,339,factor,2,badlangiso,LRB,Flag: possibly inaccurate language classification and code,categorical,nominal,auxiliary,96,NA,NA,NA 12 | wldsec,339,character,5,339,factor,5,wldsec,LRB,World sector,categorical,nominal,data,96,NA,NA,NA 13 | secno,339,character,10,339,factor,10,secno,LRB,Detailed world sector (North America subdivided),categorical,nominal,data,96,NA,NA,NA 14 | wc.area,339,character,10,339,factor,10,wc.area,LRB,Culture area,categorical,nominal,data,96,NA,NA,NA 15 | wcont,124,character,5,124,factor,5,wcont,LRB,Continent of society,categorical,nominal,data,96,NA,NA,NA 16 | wlocation,339,character,67,339,factor,67,wlocation,LRB,Location of society,categorical,nominal,auxiliary,96,NA,NA,NA 17 | longitude,339,numeric,319,339,numeric,319,longitude,LRB,Longitude (decimal degrees),ordinal,interval,data,96,NA,NA,NA 18 | latitude,339,numeric,324,339,numeric,324,latitude,LRB,Latitude (decimal degrees),ordinal,interval,data,96,NA,NA,NA 19 | vegclass,339,numeric,12,339,factor,12,vegclass,LRB,Classification by vegetation type--gross classification,categorical,nominal,data,96,NA,NA,NA 20 | vegnu,339,numeric,24,339,factor,24,vegnu,LRB,Classification by vegetation type ; (Table: 5.01); (Binford 2001:117),categorical,nominal,data,96,NA,NA,NA 21 | soil,339,character,10,339,factor,10,soil,LRB,Primary soil type characteristic of a group's range; (Table: 5.01); (Binford 2001:117),categorical,nominal,data,96,NA,NA,NA 22 | setting,339,character,4,339,factor,4,setting,LRB,Drainage type for land area; (Table: 6.01); (Binford 2001:168),categorical,nominal,data,96,NA,NA,NA 23 | dposit,339,numeric,225,339,numeric,83,dposit,LRB,Position in drainage system (distance from territory to headwaters of drainage system)/(length of drainage system) ; (Equation: headwat/drain); (Binford 2001:168),ordinal,ratio,data,96,NA,NA,NA 24 | headwat,339,numeric,127,339,numeric,127,headwat,LRB,Distance from water source to the headwaters of the drainage (measured along the stream),ordinal,ratio,data,96,NA,NA,NA 25 | drain,339,numeric,161,339,numeric,161,drain,LRB,Total length (in miles) of drainage system.,ordinal,ratio,data,96,NA,NA,NA 26 | h10,339,numeric,192,339,numeric,192,h10,LRB,Measure of altitudinal maximum at a circle with a 10 mile radius.,ordinal,interval,data,96,NA,NA,NA 27 | h25,339,numeric,207,339,numeric,207,h25,LRB,Measure of altitudinal maximum at a circle with a 25 mile radius.,ordinal,interval,data,96,NA,NA,NA 28 | h50,339,numeric,211,339,numeric,211,h50,LRB,Measure of altitudinal maximum at a circle with a 50 mile radius.,ordinal,interval,data,96,NA,NA,NA 29 | l10,339,numeric,146,339,numeric,146,l10,LRB,Measure of altitudinal minimum at a circle with a 10 mile radius.,ordinal,interval,data,96,NA,NA,NA 30 | l25,339,numeric,154,339,numeric,154,l25,LRB,Measure of altitudinal minimum at a circle with a 25 mile radius.,ordinal,interval,data,96,NA,NA,NA 31 | l50,339,numeric,133,339,numeric,133,l50,LRB,Measure of altitudinal minimum at a circle with a 50 mile radius.,ordinal,interval,data,96,NA,NA,NA 32 | maxrange,339,numeric,265,339,numeric,265,maxrange,LRB,Difference between the lowest recorded point of elevation and the highest recorded point of elevation,ordinal,interval,data,96,NA,NA,NA 33 | latgroup,339,numeric,2,339,factor,2,latgroup,LRB,Binary indicator of proximity to equator,ordinal,ordinal,data,96,NA,NA,NA 34 | et,339,numeric,336,339,numeric,282,et,LRB,Effective temperature; (Equation: 4.01); (Table: 4.01); (Binford 2001:58),ordinal,interval,data,96,NA,NA,NA 35 | cmat,339,numeric,336,339,numeric,320,cmat,LRB,Mean annual temperature (degrees C); (Table: 4.01); (Binford 2001:58),ordinal,interval,data,96,NA,NA,NA 36 | mcm,339,numeric,314,339,numeric,314,mcm,LRB,Mean temperature of coldest month (degrees C); (Table: 4.01); (Binford 2001:59),ordinal,interval,data,96,NA,NA,NA 37 | mwm,339,numeric,275,339,numeric,276,mwm,LRB,Mean temperature of warmest month (degrees C); (Table: 4.01); (Binford 2001:59),ordinal,interval,data,96,NA,NA,NA 38 | temp,339,numeric,336,339,numeric,319,temp,LRB,Temperateness; (Equation: 4.02); (Binford 2001:59),ordinal,interval,data,96,NA,NA,NA 39 | mtemp,339,numeric,336,339,numeric,326,mtemp,LRB,Evenness in temperature across seasons; (Equation: 4.03); (Binford 2001:68),ordinal,interval,data,96,NA,NA,NA 40 | clim,339,numeric,7,339,factor,7,clim,LRB,Ordination of the Earth's climates by effective temperature; (Table: 4.02); (Binford 2001:70),ordinal,ordinal,data,96,NA,NA,NA 41 | crr,339,numeric,339,339,numeric,339,crr,LRB,Annual rainfall (mm); (Table: 4.01); (Binford 2001:70),ordinal,ratio,data,96,NA,NA,NA 42 | rhigh,339,numeric,329,339,numeric,329,rhigh,LRB,Mean rainfall of wettest month (mm); (Table: 4.01); (Binford 2001:70),ordinal,ratio,data,96,NA,NA,NA 43 | rlow,339,numeric,253,339,numeric,245,rlow,LRB,Mean rainfall of driest month (mm); (Table: 4.01); (Binford 2001:70),ordinal,ratio,data,96,NA,NA,NA 44 | reven,339,numeric,339,339,numeric,183,reven,LRB,Unevenness in rainfall across seasons; (Equation: 4.04); (Binford 2001:70),ordinal,ratio,data,96,NA,NA,NA 45 | mrain,339,numeric,338,339,numeric,292,mrain,LRB,Evenness in rainfall across seasons (higher values => more even); (Equation: 4.06); (Binford 2001:72),ordinal,ratio,data,96,NA,NA,NA 46 | avwat,339,numeric,8,339,factor,8,avwat,LRB,Moisture ordination of climates; (Table: 4.05); (Binford 2001:79),ordinal,ordinal,data,96,NA,NA,NA 47 | sdtemp,339,numeric,337,339,numeric,293,sdtemp,LRB,Standard deviation of mean monthly temperature; (Binford 2001:70),ordinal,ratio,data,96,NA,NA,NA 48 | sdrain,339,numeric,339,339,numeric,328,sdrain,LRB,Standard deviation of mean monthly rainfall; (Binford 2001:70),ordinal,ratio,data,96,NA,NA,NA 49 | rrcorr,339,numeric,15,339,numeric,15,rrcorr,LRB,"Difference between month with greatest rainfall and that with highest temp. Using the warmest month as a reference point, the number of months (or parts thereof), positive or negative, that separate the wettest month from the warmest month; (Binford 2001:71)",ordinal,interval,data,96,NA,NA,NA 50 | rrcorr2,339,numeric,15,339,numeric,15,rrcorr2,LRB,"Positive scale for rrcorr: add 4.5 to rrcorr; if any subsequent values are negative, they are added to 12; (Table: 4.01); (Binford 2001:71)",ordinal,interval,data,96,NA,NA,NA 51 | rrcorr3,339,numeric,15,339,numeric,15,rrcorr3,LRB,"Like rrcorr2, except that in environments with a 12-month growing season, the value is set to 4.5 ; (Binford 2001:71)",ordinal,interval,data,96,NA,NA,NA 52 | season,339,numeric,4,339,factor,4,season,LRB,Season with greatest rainfall (derived from rrcorr2); (Binford 2001:71),categorical,nominal,data,96,NA,NA,NA 53 | medstab,339,numeric,336,339,numeric,139,medstab,LRB,Mediterranean climate indicator (higher values => more Mediterranean); (Equation: 4.05); (Binford 2001:72),ordinal,ratio,data,96,NA,NA,NA 54 | growc,339,numeric,13,339,numeric,13,growc,LRB,Effective growing season (number of months in growing season); (Table: 4.07); (Binford 2001:73),ordinal,ratio,data,96,NA,NA,NA 55 | pet,339,numeric,339,339,numeric,338,pet,LRB,Potential evapotranspiration; (Table: 4.07); (Binford 2001:75),ordinal,ratio,data,96,NA,NA,NA 56 | ae,339,numeric,339,339,numeric,339,ae,LRB,Actual evapotranspiration; (Table: 4.07); (Binford 2001:74),ordinal,ratio,data,96,NA,NA,NA 57 | watret,339,numeric,303,339,numeric,303,watret,LRB,Quantity of water actually present in the soil; (Table: 4.07); (Binford 2001:75),ordinal,ratio,data,96,NA,NA,NA 58 | watd,339,numeric,314,339,numeric,314,watd,LRB,Water deficit; (Table: 4.07); (Binford 2001:75),ordinal,ratio,data,96,NA,NA,NA 59 | snowac,339,numeric,141,339,numeric,140,snowac,LRB,Snow accumulation; (Table: 4.07); (Binford 2001:75),ordinal,ratio,data,96,NA,NA,NA 60 | ptoae,339,numeric,314,339,numeric,181,ptoae,LRB,Potential evapotranspiration relative to actual evapotranspiration (low values have higher plant growth potential); (Equation: pet/(1+ae)); (Binford 2001:78),ordinal,ratio,data,96,NA,NA,NA 61 | hirx,339,numeric,339,339,numeric,210,hirx,LRB,"Potential evapotranspiration relative to annual rainfall. Values less than 1 tend to be forested, greater than 1 arid, and around 1 savannas and steppes.; (Equation: pet/ccr); (Binford 2001:78)",ordinal,ratio,data,96,NA,NA,NA 62 | ptowatd,339,numeric,339,339,numeric,239,ptowatd,LRB,Potential evapotranspiration relative to water deficit (high values imply high plant growth potential); (Equation: pet/(1+watd)); (Binford 2001:78),ordinal,ratio,data,96,NA,NA,NA 63 | watdgrc,339,numeric,13,339,numeric,13,watdgrc,LRB,Number of months during growing season in which water deficit occurs; (Binford 2001:79),ordinal,ratio,data,96,NA,NA,NA 64 | defper,339,numeric,35,339,numeric,38,defper,LRB,Percentage of growing season with a water deficit; (Equation: 4.08); (Table: 4.07); (Binford 2001:79),ordinal,interval,data,96,NA,NA,NA 65 | perwret,339,numeric,35,339,numeric,35,perwret,LRB,Percentage of growing season with water stored in soil; (Equation: 4.07); (Binford 2001:79),ordinal,interval,data,96,NA,NA,NA 66 | perwltg,339,numeric,34,339,numeric,34,perwltg,LRB,Percentage of growing season for which water available is below the wilting point for plants; (Equation: 4.09); (Binford 2001:79),ordinal,interval,data,96,NA,NA,NA 67 | wltgrc,339,numeric,13,339,numeric,13,wltgrc,LRB,Number of months plants would wilt because of water deficit; (Table: 4.07); (Binford 2001:79),ordinal,ratio,data,96,NA,NA,NA 68 | nagp,339,numeric,339,339,numeric,339,nagp,LRB,Net above-ground productivity: new cell life added to a habitat as a result of photosynthesis and growth (g/sqm/yr); (Equation: 4.10); (Table: 4.07); (Binford 2001:79),ordinal,ratio,data,96,NA,NA,NA 69 | lnagp,339,numeric,339,339,numeric,157,lnagp,LRB,Log 10 of nagp; (Binford 2001:79),ordinal,ratio,data,96,NA,NA,NA 70 | bio5,339,numeric,339,339,numeric,339,bio5,LRB,Primary biomass; (Table: 4.07); (Binford 2001:85),ordinal,ratio,data,96,NA,NA,NA 71 | lbio5,339,numeric,339,339,numeric,174,lbio5,LRB,Log 10 of bio5; (Binford 2001:85),ordinal,ratio,data,96,NA,NA,NA 72 | bar5,339,numeric,339,339,numeric,325,bar5,LRB,Biomass accumulation ratio; (Equation: 4.15); (Table: 4.07); (Binford 2001:85),ordinal,ratio,data,96,NA,NA,NA 73 | lexprey,339,numeric,339,339,numeric,158,lexprey,LRB,(Log of) Secondary animal biomass; (Binford 2001:109),ordinal,ratio,data,96,NA,NA,NA 74 | sucstab2,339,numeric,339,339,numeric,229,sucstab2,LRB,Modified successional stability (a measure of likelihood of vegetation-clearing fires); (Binford 2001:171),ordinal,ratio,data,96,NA,NA,NA 75 | tlpop,339,numeric,249,339,numeric,249,tlpop,LRB,Total number of persons to whom the ethnographic description applies; (Table: 5.01); (Binford 2001:117),ordinal,ratio,data,96,NA,NA,NA 76 | area,339,numeric,295,339,numeric,295,area,LRB,Ethnographers' estimate of total land area occupied by the group (100 sqkm); (Table: 5.01); (Binford 2001:117),ordinal,ratio,data,96,NA,NA,NA 77 | density,339,numeric,302,339,numeric,302,density,LRB,Population density (==tlpop/area); (Table: 5.01); (Binford 2001:117),ordinal,ratio,data,96,NA,NA,NA 78 | group1,227,numeric,78,227,numeric,79,group1,LRB,Size of smallest group that regularly cooperates for subsistence; smallest self-sufficient group; (Table: 5.01 & 8.01); (Binford 2001:117),ordinal,ratio,data,96,NA,NA,NA 79 | group2,297,numeric,124,297,numeric,123,group2,LRB,The mean size of the consumer group that regularly camps together during the most aggregated phase of the yearly economic cycles; (Table: 5.01 & 8.01); (Binford 2001:117),ordinal,ratio,data,96,NA,NA,NA 80 | group3,216,numeric,114,216,numeric,114,group3,LRB,"The mean size of multigroup encampments that may aggregate periodically, but not necessarily annually, for immediate subsistence-related activities; (Table: 5.01 & 8.01); (Binford 2001:117)",ordinal,ratio,data,96,NA,NA,NA 81 | lden,339,numeric,216,339,numeric,191,lden,LRB,Log 10 of density; (Binford 2001:117),ordinal,ratio,data,96,NA,NA,NA 82 | lnpop,339,numeric,248,339,numeric,248,lnpop,LRB,Natural log of tlpop;,ordinal,ratio,data,96,NA,NA,NA 83 | diasz,339,numeric,255,339,numeric,255,diasz,LRB,Number of people in dialect group,ordinal,ratio,data,96,NA,NA,NA 84 | subpop,339,character,2,339,factor,2,subpop,LRB,Designates normal (n) vs exceptional (x) cases; (Table: 5.01); (Binford 2001:117),ordinal,nominal,data,96,NA,NA,NA 85 | wsubpop,339,character,2,339,factor,2,wsubpop,LRB,Flags societies that are suspected to be not pure foragers (Binford 2001:117),ordinal,nominal,data,96,NA,NA,NA 86 | hunting,339,numeric,48,339,numeric,48,hunting,LRB,Percent dependence on terrestrial animals; (Table: 5.01); (Binford 2001:117),ordinal,interval,data,96,NA,NA,NA 87 | gatherin,339,numeric,39,339,numeric,39,gatherin,LRB,Percent dependence on terrestrial plants; (Table: 5.01); (Binford 2001:117),ordinal,interval,data,96,NA,NA,NA 88 | fishing,339,numeric,60,339,numeric,60,fishing,LRB,Percent dependence on aquatic organisms ; (Table: 5.01); (Binford 2001:117),ordinal,interval,data,96,NA,NA,NA 89 | subsp,339,numeric,3,339,factor,3,subsp,LRB,"Which food type (hunted, gathered, fished) provides majority of nutritional intake; (Table: 5.01); (Binford 2001:117)",categorical,nominal,data,96,NA,NA,NA 90 | store,337,numeric,3,337,factor,3,store,LRB,Dependence upon storage; (Binford 2001:388),ordinal,nominal,data,96,NA,NA,NA 91 | subsp.1,339,character,3,339,factor,3,subsp.1,LRB,Primary source of food (character version of subsp); (Binford 2001:388),categorical,nominal,data,96,NA,NA,NA 92 | qtstor,337,numeric,4,337,factor,4,qtstor,LRB,Quantity of food stored ; (Binford 2001:389),ordinal,ordinal,data,96,NA,NA,NA 93 | subdiv2,339,numeric,92,339,numeric,92,subdiv2,LRB,"Subsistence diversity; (Equation: 100-stddev(""hunting"",""gatherin"",""fishing"") ); (Binford 2001:403,fn2)",ordinal,interval,data,96,NA,NA,NA 94 | sudivord,339,numeric,5,339,factor,5,sudivord,LRB,Ordinal simplification (5 categories) of subdiv2.; (Binford 2001:404),ordinal,ordinal,data,96,NA,NA,NA 95 | noantrap,41,numeric,17,41,numeric,17,noantrap,LRB,The number of (types of) animal traps documented for the group.,ordinal,ratio,data,96,NA,NA,NA 96 | noantrapgrp,41,numeric,4,41,factor,4,noantrapgrp,LRB,Ordinal simplification of noantrap,ordinal,ordinal,data,96,NA,NA,NA 97 | hunt,339,numeric,6,339,factor,6,hunt,LRB,"Degree to which males conduct hunting, relative to females.",ordinal,ordinal,data,96,NA,NA,NA 98 | gath,339,numeric,6,339,factor,6,gath,LRB,"Degree to which males conduct gathering, relative to females.",ordinal,ordinal,data,96,NA,NA,NA 99 | fish,339,numeric,7,339,factor,7,fish,LRB,"Degree to which males conduct fishing, relative to females.",ordinal,ordinal,data,96,NA,NA,NA 100 | mdivlab,333,numeric,105,333,numeric,105,mdivlab,LRB,Percentage of total diet derived from male labor; (Binford 2001:280),ordinal,ordinal,data,96,NA,NA,NA 101 | housex,339,numeric,5,339,factor,5,housex,LRB,"The sexual division of labor in the construction of houses. Does not address the procurement of materials, only construction of the highest investment houses.",ordinal,nominal,data,96,NA,NA,NA 102 | forcol,339,numeric,3,339,factor,3,forcol,LRB,Code for forager-collector distinction; (Binford 2001:304),categorical,nominal,data,96,NA,NA,NA 103 | grppat,339,numeric,2,339,factor,2,grppat,LRB,Degree of mobility; (Table: 5.01); (Binford 2001:117),ordinal,ordinal,data,96,NA,NA,NA 104 | nomov,261,numeric,39,261,numeric,39,nomov,LRB,Total number of annual moves in residence of a household unit; (Table: 5.01 & 8.04); (Binford 2001:117),ordinal,ratio,data,96,NA,NA,NA 105 | dismov,236,numeric,110,236,numeric,110,dismov,LRB,Total distance (in miles) residence moved in a year (sum of all moves); (Table: 5.01 & 8.04); (Binford 2001:117),ordinal,ratio,data,96,NA,NA,NA 106 | dspmov,236,numeric,147,236,numeric,147,dspmov,LRB,Distance per move (in miles) (dspmov=dismov/nomov); (Binford 2001:117),ordinal,ratio,data,96,NA,NA,NA 107 | kspmov,255,numeric,143,255,numeric,143,kspmov,LRB,Averge distance per move (in km) (kspmov=kmov/nomov); (Binford 2001:117),ordinal,ratio,data,96,NA,NA,NA 108 | lkmov,261,numeric,92,261,numeric,92,lkmov,LRB,Log 10 of kmov; (Binford 2001:117),ordinal,ratio,data,96,NA,NA,NA 109 | lkspmov,255,numeric,85,255,numeric,85,lkspmov,LRB,Log 10 of kspmov; (Binford 2001:117),ordinal,ratio,data,96,NA,NA,NA 110 | kmov,261,numeric,111,261,numeric,111,kmov,LRB,Summed distance (in km) moved per year by average household (kmov=dismov/.6214); (Binford 2001:117),ordinal,ratio,data,96,NA,NA,NA 111 | mhs,232,numeric,184,232,numeric,184,mhs,LRB,Mean household size ; (Table: 8.08); (Binford 2001:147),ordinal,ratio,data,96,NA,NA,NA 112 | famsz,136,numeric,102,136,numeric,102,famsz,LRB,Mean family size (total group size divided by number of married men); (Table: 8.08); (Binford 2001:286),ordinal,ratio,data,96,NA,NA,NA 113 | hhtype,115,character,21,115,factor,21,hhtype,LRB,Household type; (Table: 8.08); (Binford 2001: 297),categorical,nominal,data,96,NA,NA,NA 114 | commun,335,numeric,5,335,factor,5,commun,LRB,"Community organization. The prevalence of local endogamy, agamy, & exogamy coded together with the presence or absence of localized kin groups. Can be interpreted as ordinal measure of endogamy; (Table: 9.01); (Binford 2001:320)",categorical,nominal,data,96,NA,NA,NA 115 | comstfun,339,numeric,7,339,factor,7,comstfun,LRB,"The functions and properties of structures with specific community-wide functions. These are not residences, nor are they multifunctional residences.; (Table: 9.01); (Binford 2001:320)",categorical,nominal,data,96,NA,NA,NA 116 | famhous,112,numeric,87,112,numeric,87,famhous,LRB,Average number of families per household (==mhs/famsz); (Table: 9.01); (Binford 2001:320),ordinal,ratio,data,96,NA,NA,NA 117 | packinx,339,numeric,227,339,numeric,227,packinx,LRB,Density rescaled to fit population packing concept: packinx=density/9.098; (Binford 2001:374),ordinal,ratio,data,96,NA,NA,NA 118 | lpackinx,339,numeric,191,339,numeric,191,lpackinx,LRB,Log 10 of packinx; (Binford 2001:375),ordinal,ratio,data,96,NA,NA,NA 119 | packord,339,numeric,4,339,factor,4,packord,LRB,"Ordinal measure for packing. The packing threshold is the value of population density at which there is one minimal group per foraging radius (20.47 persons/225 sq km = 9.1 persons), thus indicating a density value at which there is no longer unoccupied space into which mobile hunter-gatherers could move; (Binford 2001:375)",ordinal,ordinal,data,96,NA,NA,NA 120 | packing,339,numeric,2,339,factor,2,packing,LRB,Binary variable for population packing,ordinal,nominal,data,96,NA,NA,NA 121 | systate3recod,339,numeric,7,339,factor,7,systate3recod,LRB,Recode of systate3 by DR White; (Binford 2001:375),categorical,nominal,data,96,NA,NA,NA 122 | systate3,338,numeric,7,338,factor,7,systate3,LRB,Classification of foragers: system's state; (Table: 9.01); (Binford 2001:375),categorical,nominal,data,96,NA,NA,NA 123 | huntfil2,339,numeric,2,339,factor,2,huntfil2,LRB,Identifies mounted hunters. A binary variable produced from systate3; (Binford 2001:417),ordinal,nominal,data,96,NA,NA,NA 124 | spacing,163,numeric,3,163,factor,3,spacing,LRB,A normative estimate of the distance between houses in the settlement.,ordinal,ordinal,data,96,NA,NA,NA 125 | house80,339,numeric,5,339,factor,5,house80,LRB,House shape: Codes the shape of the ground plan of the most common type of shelter.,categorical,nominal,data,96,NA,NA,NA 126 | house81,339,numeric,4,339,factor,4,house81,LRB,Codes the character of the floor level within the house,categorical,nominal,data,96,NA,NA,NA 127 | house82,339,numeric,6,339,factor,6,house82,LRB,Codes the character of the wall material used in constructing the house,categorical,nominal,data,96,NA,NA,NA 128 | house83,339,numeric,5,339,factor,5,house83,LRB,Codes the shape of the roof or of walls and roof when the former are not distinct,categorical,nominal,data,96,NA,NA,NA 129 | house84,339,numeric,6,339,factor,6,house84,LRB,Codes the material used in roofing for the dominant house form,categorical,nominal,data,96,NA,NA,NA 130 | house85,243,numeric,5,243,factor,5,house85,LRB,Codes the shape of the ground plan of the most common type of shelter,categorical,nominal,data,96,NA,NA,NA 131 | house86,241,numeric,4,241,factor,4,house86,LRB,Codes the character of the floor level within the shelter,categorical,nominal,data,96,NA,NA,NA 132 | house87,242,numeric,5,242,factor,5,house87,LRB,Codes the character of the wall material used in constructing the shelter,categorical,nominal,data,96,NA,NA,NA 133 | house88,242,numeric,5,242,factor,5,house88,LRB,Codes the shape of the shelters roof or of walls and roof when the former are not distinct,categorical,nominal,data,96,NA,NA,NA 134 | house89,244,numeric,5,244,factor,5,house89,LRB,Codes the material used in roofing for the dominant shelter form,categorical,nominal,data,96,NA,NA,NA 135 | sed,339,numeric,4,339,factor,4,sed,LRB,Degree of sedentism,ordinal,ordinal,data,96,NA,NA,NA 136 | mobpat,339,numeric,6,339,factor,6,mobpat,LRB,Codes the mobility-group organizations of the subsistence-settlement pattern,categorical,nominal,data,96,NA,NA,NA 137 | mobp2,339,numeric,4,339,factor,4,mobp2,LRB,Routed foraging where the group feeds between target locations that are annually visited for purposes of obtaining raw materials to maintain technology.,categorical,nominal,data,96,NA,NA,NA 138 | sz1fam,143,numeric,104,143,numeric,104,sz1fam,LRB,Size of a single family dwelling [size is reported as the diameter (m) of a circle with area equal to the area of the dwelling],ordinal,ratio,data,96,NA,NA,NA 139 | szjoint,104,numeric,80,104,numeric,80,szjoint,LRB,Size of a joint family dwelling [size reported as diameter (m) of a circle with area equal to the area of the dwelling],ordinal,ratio,data,96,NA,NA,NA 140 | szcomu,56,numeric,53,56,numeric,53,szcomu,LRB,Size of a communal dwelling [size reported as diameter (m) of a circle with area equal to the area of the dwelling],ordinal,ratio,data,96,NA,NA,NA 141 | szmean,206,numeric,170,206,numeric,170,szmean,LRB,Mean dwelling size [size reported as diameter (m) of a circle with area equal to the area of the dwelling],ordinal,ratio,data,96,NA,NA,NA 142 | hhtyp1,115,numeric,8,115,factor,8,hhtyp1,LRB,"Primary household type, based on initial capital letter in code for hhtype; (Table: 8.08); (Binford 2001: 297)",categorical,nominal,data,96,NA,NA,NA 143 | g2mhs,218,numeric,192,218,numeric,192,g2mhs,LRB,Number of households per group2 (==group2/mhs); (Table: 9.01); (Binford 2001:320),ordinal,ratio,data,96,NA,NA,NA 144 | prevalue,218,numeric,148,218,numeric,148,prevalue,LRB,Variable is in Table 9.01. initial attempt at calculating G2MHS. unclear meaning.; (Table: 9.01); (Binford 2001:320),ordinal,unknown,data,96,NA,NA,NA 145 | mhsset,219,numeric,5,219,numeric,5,mhsset,LRB,Classification by values of mhs and hougrp2. Initial version. (Table: 9.01); (Binford 2001:320),categorical,ordinal,data,96,NA,NA,NA 146 | mhset2,218,numeric,5,218,numeric,5,mhset2,LRB,Mean Household Size Set: the classification of the relationship between mhs and hougrp2; (Table: 9.01); (Binford 2001:320),categorical,ordinal,data,96,NA,NA,NA 147 | predg2mh,211,numeric,141,211,numeric,141,predg2mh,LRB,Initial attempt at calculating G2MHS. unclear meaning. (Table: 9.01); (Binford 2001:320),ordinal,unknown,data,96,NA,NA,NA 148 | g2famsz,120,numeric,114,120,numeric,114,g2famsz,LRB,Number of families per group2 (==group2/famsz); (Table: 9.01); (Binford 2001:320),ordinal,ratio,data,96,NA,NA,NA 149 | g1famsz,110,numeric,100,110,numeric,100,g1famsz,LRB,Number of families per group1 (==group1/famsz); (Equation: group1/famsz); (Table: 9.01); (Binford 2001:333),ordinal,ratio,data,96,NA,NA,NA 150 | g1mhs,174,numeric,146,174,numeric,146,g1mhs,LRB,Number of households per group1 (==group1/mhs); (Equation: group1/mhs); (Table: 9.01); (Binford 2001:333),ordinal,ratio,data,96,NA,NA,NA 151 | g2g1,217,numeric,136,217,numeric,136,g2g1,LRB,Number of group1 units in a group2 unit; (Equation: group2/group1); (Binford 2001:336),ordinal,ratio,data,96,NA,NA,NA 152 | g2mhset2,219,numeric,8,219,factor,8,g2mhset2,LRB,Classification based on relationship between mhs and group2; (Table: 9.01); (Binford 2001:337),categorical,nominal,data,96,NA,NA,NA 153 | g2mhset3,217,numeric,4,217,factor,4,g2mhset3,LRB,Simplified version of g2mhset2; (Table: 9.01); (Binford 2001:340),categorical,nominal,data,96,NA,NA,NA 154 | g2basord,291,numeric,12,291,numeric,13,g2basord,LRB,Basal unit size for group2 --ordinal classification; (Table: 9.01); (Binford 2001:348),ordinal,ordinal,data,96,NA,NA,NA 155 | hougrp2,222,numeric,212,222,numeric,215,hougrp2,LRB,Mean number of houses per group2 unit. (Equation: group2/mhs); (Binford 2001:344),ordinal,ratio,data,96,NA,NA,NA 156 | money,339,numeric,4,339,factor,4,money,LRB,Codes the presence or absence of the use of money within the society; (Table: 9.01); (Binford 2001:320),ordinal,nominal,data,96,NA,NA,NA 157 | occspe,339,numeric,4,339,factor,4,occspe,LRB,Occupational specialties reported which are not tied to the sexual division of labor or tendencies for role differences.; (Table: 9.01); (Binford 2001:320),ordinal,ordinal,data,96,NA,NA,NA 158 | owners,339,numeric,4,339,factor,4,owners,LRB,Ownership of resource locations ; (Table: 9.01); (Binford 2001:426),categorical,nominal,data,96,NA,NA,NA 159 | indtrd,317,numeric,6,317,factor,6,indtrd,LRB,The organization of individual to individual trade in manufactured goods or raw materials across local group boundaries.,categorical,nominal,data,96,NA,NA,NA 160 | indtfo,287,numeric,4,287,factor,4,indtfo,LRB,The presence and form of exchanges where FOOD is one of the materials involved in exchanges across local group boundaries.,categorical,nominal,data,96,NA,NA,NA 161 | grptrd,296,numeric,5,296,factor,5,grptrd,LRB,The way of organizing group to group trade and exchange within a regio,categorical,nominal,data,96,NA,NA,NA 162 | orgfair,311,numeric,7,311,factor,7,orgfair,LRB,Manner of organization of trade fairs or special trade events in which food is the primary commodity that is desired in exchange.,categorical,nominal,data,96,NA,NA,NA 163 | excorg,291,numeric,6,291,factor,6,excorg,LRB,"The characteristic exchange relationships between host unit and guest unit in intercommunity events when goods are supplied to guests, the group tabulated is the host and it is an event where exchange is a conventionally included part of the activities.",categorical,nominal,data,96,NA,NA,NA 164 | perogat,337,numeric,5,337,factor,5,perogat,LRB,The social and economic prerogatives which accompany the leadership role.; (Table: 9.01); (Binford 2001:338),ordinal,nominal,data,96,NA,NA,NA 165 | leader,339,numeric,2,339,factor,2,leader,LRB,Predominant type of leadership; (Binford 2001:388),categorical,nominal,data,96,NA,NA,NA 166 | polyscal,339,numeric,4,339,factor,4,polyscal,LRB,Measure of scale for the demographic and spatial scope of political organization.; (Table: 9.01); (Binford 2001:252),ordinal,nominal,data,96,NA,NA,NA 167 | class,339,numeric,3,339,factor,3,class,LRB,Type of social class distinction; (Table: 9.01); (Binford 2001:320),categorical,nominal,data,96,NA,NA,NA 168 | polpos,339,numeric,3,339,factor,3,polpos,LRB,Code refers to the political and economic position of the group relative to other groups in the region; (Table: 9.01); (Binford 2001:345),categorical,nominal,data,96,NA,NA,NA 169 | intform,339,numeric,2,339,factor,2,intform,LRB,Form of integration.,categorical,nominal,data,96,NA,NA,NA 170 | headm,339,numeric,3,339,factor,3,headm,LRB,The patterns of succession of acknowledged leaders in the maximal politically integrated unit represented by the case.,categorical,nominal,data,96,NA,NA,NA 171 | shaman,313,numeric,4,313,factor,4,shaman,LRB,"Codes the presence and scale of shaman's rituals as a social and organized event beyond their functioning as curers, healers, etc. at the personal or family level. This also excludes rituals conducted only for shamans that may qualify as secret societies",ordinal,ordinal,data,96,NA,NA,NA 172 | intcon,327,numeric,3,327,factor,3,intcon,LRB,Frequency of internal conflict.,ordinal,ordinal,data,96,NA,NA,NA 173 | intres,324,numeric,4,324,factor,4,intres,LRB,"The means culturally available for resolving conflict and/or deducing punishments, if any, within GP2.",categorical,nominal,data,96,NA,NA,NA 174 | gpgpcon,333,numeric,4,333,factor,4,gpgpcon,LRB,Scale of inter-group violence which is conducted with groups beyond the definition of GP2 and/or GP3,ordinal,ordinal,data,96,NA,NA,NA 175 | gpgpres,337,numeric,4,337,factor,4,gpgpres,LRB,The means available for resolving conflicts between groups as coded in GPGPCON.,categorical,nominal,data,96,NA,NA,NA 176 | war1,339,numeric,5,339,factor,5,war1,LRB,Scale of intensity of warfare. How frequent and how widespread it may be regionally.,ordinal,ordinal,data,96,NA,NA,NA 177 | conpos,336,numeric,3,336,factor,3,conpos,LRB,The posture of the particular group relative to the intensity of warfare within the region as coded in WAR.,ordinal,ordinal,data,96,NA,NA,NA 178 | enemy,315,numeric,4,315,factor,4,enemy,LRB,The treatment of the bodies of those killed in group-group conflicts.,ordinal,ordinal,data,96,NA,NA,NA 179 | prison,316,numeric,3,316,factor,3,prison,LRB,"Codes the treatment of prisoners upon return to the ""home"" group",ordinal,ordinal,data,96,NA,NA,NA 180 | slave,339,numeric,3,339,factor,3,slave,LRB,Status of slaves in the society.,ordinal,ordinal,data,96,NA,NA,NA 181 | warlead,331,numeric,3,331,factor,3,warlead,LRB,Manner of choosing a leader in group-group conflict situations.,ordinal,nominal,data,96,NA,NA,NA 182 | boyseg38,317,numeric,3,317,factor,3,boyseg38,LRB,Codes the segregation of adolescent boys prior to initiation or at the time of puberty,ordinal,ordinal,data,96,NA,NA,NA 183 | initm,317,numeric,5,317,factor,5,initm,LRB,Scale and elaborateness of male puberty rituals.,ordinal,ordinal,data,96,NA,NA,NA 184 | initexm,300,numeric,5,300,factor,5,initexm,LRB,The degree of exclusiveness of the male centered rituals. (Do women observe or participate?),ordinal,ordinal,data,96,NA,NA,NA 185 | initf,313,numeric,5,313,factor,5,initf,LRB,The scale and social investments in female puberty rites.,categorical,ordinal,data,96,NA,NA,NA 186 | dom1,312,numeric,6,312,factor,6,dom1,LRB,The collapsed classes of rituals as outlined in sections a-f above for the dominant ritual.,categorical,nominal,data,96,NA,NA,NA 187 | dom2,302,numeric,6,302,factor,6,dom2,LRB,The collapsed classes of rituals as outlined in sections a-f above for the second-most dominant ritual.,categorical,nominal,data,96,NA,NA,NA 188 | deadav,299,numeric,3,299,factor,3,deadav,LRB,The presence/absence of taboos against speaking the name of the deceased after death.,ordinal,ordinal,data,96,NA,NA,NA 189 | death,295,numeric,3,295,factor,3,death,LRB,Codes the response to death as regards housing and the use of the camp,ordinal,nominal,data,96,NA,NA,NA 190 | discomp,292,numeric,4,292,factor,4,discomp,LRB,The disposal event sequence regarding only the treatment of the corpse.,ordinal,ordinal,data,96,NA,NA,NA 191 | discomp2,303,numeric,2,303,factor,2,discomp2,LRB,The number of major steps relating to the disposal of the corpse,ordinal,ordinal,data,96,NA,NA,NA 192 | disloc,296,numeric,3,296,factor,3,disloc,LRB,Codes the location of the final step in the disposal sequence.,ordinal,nominal,data,96,NA,NA,NA 193 | bodyt,311,numeric,3,311,factor,3,bodyt,LRB,The state of the skeletal remains on final disposal.,ordinal,nominal,data,96,NA,NA,NA 194 | dispc,318,numeric,6,318,factor,6,dispc,LRB,"This code has reference to the mode of disposal for all persons or persons in the prime of life only. There may be alternative modes of disposal for the very young, aged, or for special conditions of death. This has primary reference to the situation of the decay of the body or the use of soft parts",categorical,nominal,data,96,NA,NA,NA 195 | disdiff,323,numeric,4,323,factor,4,disdiff,LRB,Differences in body disposal modes across persons,categorical,nominal,data,96,NA,NA,NA 196 | dritual,293,numeric,3,293,factor,3,dritual,LRB,The sequential complexity of mortuary rituals regularly performed after a death.,ordinal,ordinal,data,96,NA,NA,NA 197 | ritscal,298,numeric,4,298,factor,4,ritscal,LRB,The scale of non-immediate family involvement in the mortuary rituals.,ordinal,ordinal,data,96,NA,NA,NA 198 | ritfocus,258,numeric,3,258,factor,3,ritfocus,LRB,The goals of the mortuary ritual or its ideological tone.,categorical,nominal,data,96,NA,NA,NA 199 | caudeath,294,numeric,4,294,factor,4,caudeath,LRB,"The emic beliefs regarding the causes of death insofar as they are related to aspects of mortuary behavior. Can be interpreted as ordinal measure of the degree to which death is considered emically ""unnatural"".",categorical,ordinal,data,96,NA,NA,NA 200 | divmor,288,numeric,3,288,factor,3,divmor,LRB,Role and importance of divination in mortuary ritual.,ordinal,ordinal,data,96,NA,NA,NA 201 | usebody,299,numeric,5,299,factor,5,usebody,LRB,The use of body parts from the deceased during the mortuary rituals.,categorical,nominal,data,96,NA,NA,NA 202 | gcont,299,numeric,3,299,factor,3,gcont,LRB,The treatment of the durable goods which were brought for the use during the mortuary rituals. Contributed goods are not the goods of the deceased.,categorical,nominal,data,96,NA,NA,NA 203 | gdist,293,numeric,2,293,factor,2,gdist,LRB,"The treatment of the majority of the durable items which could be considered as associated with or ""owned"" by the deceased.",categorical,nominal,data,96,NA,NA,NA 204 | gfur,291,numeric,3,291,factor,3,gfur,LRB,The placement of personalities with the corpse in its final resting place.,categorical,nominal,data,96,NA,NA,NA 205 | revres,251,numeric,3,251,factor,3,revres,LRB,The organization of revenge activity which is generally conducted during the mourning period of the mortuary rites.,ordinal,ordinal,data,96,NA,NA,NA 206 | agem,179,numeric,26,179,numeric,26,agem,LRB,Average age of males at the time of their first marriage.; (Table: 8.07); (Binford 2001:281),ordinal,ratio,data,96,NA,NA,NA 207 | agef,205,numeric,26,205,numeric,26,agef,LRB,Average age of females at the time of their first marriage.; (Table: 8.07); (Binford 2001:281),ordinal,ratio,data,96,NA,NA,NA 208 | agedif,177,numeric,30,177,numeric,30,agedif,LRB,The difference between mean age at first marriage of husbands and wives. ; (Binford 2001:299),ordinal,ratio,data,96,NA,NA,NA 209 | polyg,211,numeric,52,211,numeric,52,polyg,LRB,"Percentage of males married polygynously, with some additional values added to those found in table; (Table: 8.07); (Binford 2001:281)",ordinal,interval,data,96,NA,NA,NA 210 | polygrecod,221,numeric,53,221,numeric,53,polygrecod,LRB,Percentage of males married polygynously (polyg) recoded by Doug White to distinguish monogamy from missing values; (Table: 8.07); (Binford 2001:281),ordinal,interval,data,96,NA,NA,NA 211 | wx.polygny,191,numeric,57,191,numeric,57,wx.polygny,LRB,"Percentage of males married polygynously, as-is from table; (Table: 8.07); (Binford 2001:281)",ordinal,interval,data,96,NA,NA,NA 212 | agecom,332,numeric,4,332,factor,4,agecom,LRB,"The average difference between males and females at the time of initial marriage of each (i.e., this is not the age difference between the marriage partners).",ordinal,ratio,data,96,NA,NA,NA 213 | res1,334,numeric,5,334,factor,5,res1,LRB,This code has reference to the immediately post-marital residence of the married couple,categorical,nominal,data,96,NA,NA,NA 214 | fres1,310,numeric,3,310,factor,3,fres1,LRB,The residential association of the married couple after birth of children when they are an established family.,categorical,nominal,data,96,NA,NA,NA 215 | fres2,310,numeric,5,310,factor,5,fres2,LRB,The residential association of the married couple after the birth of their first child when they are established as a family. This complements the initial residence code.,categorical,nominal,data,96,NA,NA,NA 216 | levira,275,numeric,2,275,factor,2,levira,LRB,The presence or absence of the passing of wives of a deceased male to close male relatives of the deceased. Binary variable.,ordinal,nominal,data,96,NA,NA,NA 217 | sorora,262,numeric,2,262,factor,2,sorora,LRB,The presence or absence of simultaneous sororal polygyny. Binary variable.,ordinal,nominal,data,96,NA,NA,NA 218 | kinmar,330,numeric,4,330,factor,4,kinmar,LRB,The form of the exogamic practices/ preferences.,categorical,nominal,data,96,NA,NA,NA 219 | marsel,334,numeric,4,334,factor,4,marsel,LRB,The manner in which marriages are negotiated and arranged within the society.,categorical,nominal,data,96,NA,NA,NA 220 | marrycer,334,numeric,4,334,factor,4,marrycer,LRB,The scale of investment in marriage ceremonies both in terms of the scale of participation and the goods and labor involved.,ordinal,ordinal,data,96,NA,NA,NA 221 | polygn,325,numeric,7,325,factor,7,polygn,LRB,Codes on an ordinal scale an estimate of the percentage of males married multiple wives,ordinal,ordinal,data,96,NA,NA,NA 222 | mardir,323,numeric,4,323,factor,4,mardir,LRB,"This code scales the relative balance of the flow of goods and services in marriage arrangements. This code does not scale the absolute quantity of goods and services, only the relative imbalance in terms of the standards of the community itself.",ordinal,ordinal,data,96,NA,NA,NA 223 | marprop,322,numeric,4,322,factor,4,marprop,LRB,The investments in property exchanges made by the kinsmen of the prospective bride and groom.?,ordinal,ordinal,data,96,NA,NA,NA 224 | marinv,322,numeric,3,322,factor,3,marinv,LRB,The types of investment made in marriages.,categorical,nominal,data,96,NA,NA,NA 225 | divorce,339,numeric,3,339,factor,3,divorce,LRB,Codes the difficulty of obtaining a sanctioned divorce within the society.,ordinal,ordinal,data,96,NA,NA,NA 226 | kincon,339,numeric,2,339,factor,2,kincon,LRB,The basic convention for identifying kin for an unmarried child.,categorical,nominal,data,96,NA,NA,NA 227 | kinstr,339,numeric,5,339,factor,5,kinstr,LRB,The basic structure as used of the kinship conventions.,categorical,nominal,data,96,NA,NA,NA 228 | kinbia1,339,numeric,3,339,factor,3,kinbia1,LRB,Records the bias in the kinship structure as regards the side which is extended more or keyed upon cognitively for discussing exogamy,categorical,nominal,data,96,NA,NA,NA 229 | augment,285,numeric,9,285,factor,9,augment,LRB,Codes the ways of augmenting the kinspersons of an individual that are not done through birth or marriage,categorical,nominal,data,96,NA,NA,NA 230 | augmen2,333,numeric,3,333,factor,3,augmen2,LRB,The number of strategies used to augment kinship [in HGKIN printouts from 9/25/94 with LRB handwritten notes re recoding],categorical,ordinal,data,96,NA,NA,NA 231 | sodal,328,numeric,6,328,factor,6,sodal,LRB,Sodalities: collapsed version of adjun [in HGKIN printouts from 9/25/94 with LRB handwritten notes re recoding],categorical,nominal,data,96,NA,NA,NA 232 | kinder,339,numeric,3,339,factor,3,kinder,LRB,Kinship derived units are classes of kinspersons that are identifiable as higher order classes which are consistent with the cognitive conventions of the kinship system,categorical,nominal,data,96,NA,NA,NA 233 | kinbia2,339,numeric,3,339,factor,3,kinbia2,LRB,"Codes the bias in the (KINDER) variable as regards the filiation of persons in kindreds, sibs, clans, etc",categorical,nominal,data,96,NA,NA,NA 234 | elabor,322,numeric,9,322,factor,9,elabor,LRB,"Codes for cognitive elaborations or more general classes which are derivable from the underlying system of kin cognition. Most of the time these elaborations serve no organizational function beyond providing rules of thumb for discussing exogamy and using these general categories for placing ""strangers"" into the web of kinship of the speaker. The coding of cases here implies no additional functions for these units beyond those discussed here.",categorical,nominal,data,96,NA,NA,NA 235 | elabor2,321,numeric,4,321,factor,4,elabor2,LRB,"The situation where elaborations are given social functions beyond those listed above such that they imply more than marriage arrangements, etc. [in HGKIN printouts from 9/25/94 with LRB handwritten notes re recoding]",categorical,nominal,data,96,NA,NA,NA 236 | elabor3,321,numeric,4,321,factor,4,elabor3,LRB,Revised version of elabor2 [in HGKIN printouts from 9/25/94 with LRB handwritten notes re recoding],categorical,nominal,data,96,NA,NA,NA 237 | elabor4,321,numeric,3,321,factor,3,elabor4,LRB,Revised version of elabor3 [in HGKIN printouts from 9/25/94 with LRB handwritten notes re recoding],categorical,nominal,data,96,NA,NA,NA 238 | diffocus,323,numeric,4,323,factor,4,diffocus,LRB,Codes the domain within which the elaborations and adjunct differentiations primarily function. Whether they contribute primarily to the internal complexity of the system or whether they provide the organizational basis for relating to and interacting in an expansive manner with adjacent groups is the focus of this code,categorical,nominal,data,96,NA,NA,NA 239 | adjun,325,numeric,11,325,factor,11,adjun,LRB,Codes the presence of organizational features which are discussed cognitively in kinship terms or refer to behaviors normally conventionalized by kinship but which are not derivable from the underlying properties of the kinship cognition itself,categorical,nominal,data,96,NA,NA,NA 240 | adjun2,336,numeric,4,336,factor,4,adjun2,LRB,Number of forms of adjunct kin [in HGKIN printouts from 9/25/94 with LRB handwritten notes re recoding],categorical,nominal,data,96,NA,NA,NA 241 | adjun3,324,numeric,3,324,factor,3,adjun3,LRB,Types of adjunct kin strategy [in HGKIN printouts from 9/25/94 with LRB handwritten notes re recoding],categorical,ordinal,data,96,NA,NA,NA 242 | adjun4,334,numeric,6,334,factor,6,adjun4,LRB,Collapse of adjun [in HGKIN printouts from 9/25/94 with LRB handwritten notes re recoding],categorical,nominal,data,96,NA,NA,NA 243 | kinexo,339,numeric,4,339,factor,4,kinexo,LRB,Codes the limitation of kin extension characteristic of the society. The code scales the lateral extension of the exogamic practice in the group,ordinal,nominal,data,96,NA,NA,NA 244 | dkinex,339,numeric,3,339,factor,3,dkinex,LRB,The situation where there are marked differences in the exogamic practices of the leaders and/or elite and the general population in the patterns of exogamic extension or restriction.,ordinal,ordinal,data,96,NA,NA,NA 245 | nenept,339,numeric,3,339,factor,3,nenept,LRB,The kinship terminology for parallel nephews and nieces.,categorical,nominal,data,96,NA,NA,NA 246 | nenext,338,numeric,4,338,factor,4,nenext,LRB,The kinship terminology for cross nephews and nieces when they are distinguished from parallel nephews and nieces.,categorical,nominal,data,96,NA,NA,NA 247 | kinterm2,314,numeric,4,314,factor,4,kinterm2,LRB,The type of terminology used for first cousins.,categorical,nominal,data,96,NA,NA,NA 248 | aunt,313,numeric,4,313,factor,4,aunt,LRB,Kin terminology employed for aunts.,categorical,nominal,data,96,NA,NA,NA 249 | gpaterm,252,numeric,5,252,factor,5,gpaterm,LRB,Kin terms for grandparents.,categorical,nominal,data,96,NA,NA,NA 250 | ggpater,322,numeric,4,322,factor,4,ggpater,LRB,Kin terms for great grandparents.,categorical,nominal,data,96,NA,NA,NA 251 | minlaw,332,numeric,2,332,factor,2,minlaw,LRB,The presence or absence of behavioral mother-in-law avoidance and other restrictions on behavior. Binary variable.,ordinal,nominal,data,96,NA,NA,NA 252 | male.mm,151,numeric,96,147,numeric,96,male.mm,LRB,Male height in millimeters; (Table: 6.03); (Binford 2001:183),ordinal,ratio,data,96,NA,NA,NA 253 | female.mm,113,numeric,79,110,numeric,78,female.mm,LRB,Female height in millimeters; (Table: 6.03); (Binford 2001:183),ordinal,ratio,data,96,NA,NA,NA 254 | male.kg,40,numeric,33,34,numeric,31,male.kg,LRB,Male weight in kilograms; (Table: 6.03); (Binford 2001:183),ordinal,ratio,data,96,NA,NA,NA 255 | female.kg,20,numeric,16,15,numeric,14,female.kg,LRB,Female weight in kilograms; (Table: 6.03); (Binford 2001:183),ordinal,ratio,data,96,NA,NA,NA 256 | termhnt,339,numeric,330,339,numeric,330,termhnt,LRB,Proportion of food in a-cultural terrestrial model from hunting,ordinal,interval,data,96,NA,NA,NA 257 | termgath,329,numeric,329,329,numeric,329,termgath,LRB,Proportion of food in a-cultural terrestrial model from gathering,ordinal,interval,data,96,NA,NA,NA 258 | termh2,339,numeric,339,339,numeric,329,termh2,LRB,Terrestrial model hunting density: Number of persons per 100 sqkm who could be supported from ungulate resources alone; (Equation: 6.13); (Binford 2001:187),ordinal,ratio,data,96,NA,NA,NA 259 | termg2,339,numeric,339,329,numeric,324,termg2,LRB,Terrestrial model gathering density: Number of persons per 100 sqkm who could be supported from terrestrial plant foods alone; (Equation: 6.14); (Binford 2001:187),ordinal,ratio,data,96,NA,NA,NA 260 | termd2,339,numeric,339,339,numeric,337,termd2,LRB,"Terrestrial model population density: Population density (adjusted for body size) expected at a particular location, based on terrestrial model (persons per 100 sqkm); (Equation: 6.15); (Binford 2001:187)",ordinal,ratio,data,96,NA,NA,NA 261 | subspx,339,character,4,339,factor,4,subspx,LRB,Dominant food source predicted by Binford's Terrestrial Model; (Binford 2001:203),categorical,nominal,data,96,NA,NA,NA 262 | nicheffg,339,numeric,290,339,numeric,290,nicheffg,LRB,Ratio of niche effectiveness (measured density compared to terrestrial model density) in exploitation of terrestrial plant resources; (Equation: 10.02); (Binford 2001:373),ordinal,ratio,data,96,NA,NA,NA 263 | nicheffh,339,numeric,311,339,numeric,311,nicheffh,LRB,Ratio of niche effectiveness (measured density compared to terrestrial model density) in exploitation of terrestrial animal resources; (Equation: 10.03); (Binford 2001:373),ordinal,ratio,data,96,NA,NA,NA 264 | nicheff,339,numeric,339,339,numeric,339,nicheff,LRB,Ratio of niche effectiveness (measured density compared to terrestrial model density); (Equation: 10.01); (Binford 2001:373),ordinal,ratio,data,96,NA,NA,NA 265 | prindx,339,numeric,229,339,numeric,229,prindx,LRB,"Ratio of population density to TERMD2, the terrestrial model population density - a very simple measure of pressure on terrestrial resources = how many times more densely populated than a cultural terrestrial carrying capacity are observed HGs (Equation: density/termd2)",ordinal,ratio,data,96,NA,NA,NA 266 | cvtemp,339,numeric,337,339,numeric,337,cvtemp,LRB,Coefficient of variation of monthly temperature array.,ordinal,interval,data,96,Binford 2001: 71,NA,NA 267 | elev,339,numeric,261,339,numeric,261,elev,LRB,Elevation at group?s central point.,ordinal,interval,data,96,NA,NA,NA 268 | lbar5,339,numeric,339,339,numeric,339,lbar5,LRB,Log10 value of BAR5,ordinal,ratio,data,96,NA,NA,NA 269 | lcoklm,339,numeric,170,339,numeric,170,lcoklm,LRB,Log10 of COKLM,ordinal,ratio,data,96,Binford 2001: 154,NA,NA 270 | lcvtemp,339,numeric,337,339,numeric,337,lcvtemp,LRB,Log10 value of CVTEMP,ordinal,interval,data,96,NA,NA,NA 271 | lptoae,339,numeric,314,339,numeric,314,lptoae,LRB,Log10 value of PTOAE,ordinal,ratio,data,96,NA,NA,NA 272 | lptorun,339,numeric,339,339,numeric,339,lptorun,LRB,Log10 value of PTORUN,ordinal,ratio,data,96,NA,NA,NA 273 | lsnowac,339,numeric,141,339,numeric,141,lsnowac,LRB,Log10 value of SNOWAC,ordinal,ratio,data,96,NA,NA,NA 274 | lwaccess,339,numeric,59,339,numeric,59,lwaccess,LRB,Log10 value of WACCESS,ordinal,ratio,data,96,NA,NA,NA 275 | rungrc,339,numeric,13,339,numeric,13,rungrc,LRB,Number of months in the growing season in which runoff is greater than zero.,ordinal,ratio,data,96,Binford 2001: 79,NA,NA 276 | wret,339,numeric,310,339,numeric,310,wret,LRB,Water retention: if CRR is greater than PET then the excess water may have been added to the water already stored in the soil.,ordinal,ratio,data,96,Binford 2001: 75,NA,NA 277 | ptorun,339,numeric,339,339,numeric,339,ptorun,LRB,PTORUN = PET/(RUNOFF+1). Ratio of potential evapotranspiration to runoff. Predictor of all true forest plant associations.,ordinal,ratio,data,96,Binford 2001: 79,NA,NA 278 | ldefper,339,numeric,35,339,numeric,35,ldefper,LRB,Log10 value of DEFPER,ordinal,interval,data,96,NA,NA,NA 279 | lgather,339,numeric,39,339,numeric,39,lgather,LRB,Log10 value of GATHERIN,ordinal,interval,data,96,NA,NA,NA 280 | trange,339,numeric,313,339,numeric,313,trange,LRB,Range of temperatures TRANGE = (MWM-MCM),ordinal,ratio,data,96,Binford 2001: 59,NA,NA 281 | watrgrc,339,numeric,13,339,numeric,13,watrgrc,LRB,Number of months in the growing season in which water is retained in the soil.,ordinal,ratio,data,96,NA,NA,NA 282 | lmeanelev,339,numeric,318,339,numeric,318,lmeanelev,LRB,Log10 value of MEANELEV,ordinal,interval,data,96,NA,NA,NA 283 | lwatrgrc,339,numeric,13,339,numeric,13,lwatrgrc,LRB,Log10 value of WATRGRC,ordinal,ratio,data,96,NA,NA,NA 284 | pgrow,339,numeric,34,339,numeric,34,pgrow,LRB,Weighted measure of the simultaneous presence of both water and solar radiation available to the plant community. Scales from 0-36; 0 indicates no chance for plant growth; 36 indicates good growing conditions year round.,ordinal,interval,data,96,Binford 2001: 85,NA,NA 285 | lrunoff,339,numeric,303,339,numeric,303,lrunoff,LRB,Log10 value of RUNOFF,ordinal,ratio,data,96,NA,NA,NA 286 | lsstab2,339,numeric,339,339,numeric,339,lsstab2,LRB,Log10 value of SUCSTAB2,ordinal,ratio,data,96,NA,NA,NA 287 | waccess,339,numeric,59,339,numeric,59,waccess,LRB,Calculates plant access to water during the growing season.,ordinal,ratio,data,96,NA,NA,NA 288 | lfishing,339,numeric,55,339,numeric,55,lfishing,LRB,Log10 value of FISHING,ordinal,interval,data,96,NA,NA,NA 289 | hg142,339,numeric,2,339,numeric,2,hg142,LRB,Sample of hunter-gatherer cases selected by vegetation type proportional to Earth's area in each vegetation type,ordinal,nominal,data,96,NA,Binford 2001: 144-154,NA 290 | setn11,339,numeric,2,339,numeric,2,setn11,LRB,"Marker for subset of hunter-gatherer cases for which SUBPOP = N, GRPPAT = 1, and SUBSP=1",ordinal,nominal,data,96,NA,NA,NA 291 | setn12,339,numeric,2,339,numeric,2,setn12,LRB,"Marker for subset of hunter-gatherer cases for which SUBPOP = N, GRPPAT = 1, and SUBSP=2",ordinal,nominal,data,96,NA,NA,NA 292 | setn13,339,numeric,2,339,numeric,2,setn13,LRB,"Marker for subset of hunter-gatherer cases for which SUBPOP = N, GRPPAT = 1, and SUBSP=3",ordinal,nominal,data,96,NA,NA,NA 293 | setn21,339,numeric,2,339,numeric,2,setn21,LRB,"Marker for subset of hunter-gatherer cases for which SUBPOP = N, GRPPAT = 2, and SUBSP=1",ordinal,nominal,data,96,NA,NA,NA 294 | setn22,339,numeric,2,339,numeric,2,setn22,LRB,"Marker for subset of hunter-gatherer cases for which SUBPOP = N, GRPPAT = 2, and SUBSP=2",ordinal,nominal,data,96,NA,NA,NA 295 | setn23,339,numeric,2,339,numeric,2,setn23,LRB,"Marker for subset of hunter-gatherer cases for which SUBPOP = N, GRPPAT = 2, and SUBSP=3",ordinal,nominal,data,96,NA,NA,NA 296 | pathogen,339,numeric,13,339,numeric,13,pathogen,LRB,Pathogen prevalence (Fincher and Thornhill 2008),ordinal,ratio,data,98,NA,NA,NA 297 | numfam,128,numeric,126,128,numeric,126,numfam,LRB,Number of families in society (Equation: tlpop/famsz) (Hamilton et al. 2007),ordinal,ratio,data,97,NA,NA,NA 298 | numg1,227,numeric,196,227,numeric,196,numg1,LRB,Number of group1 units in society (Equation: tlpop/group1) (Hamilton et al. 2007),ordinal,ratio,data,97,NA,NA,NA 299 | numg2,297,numeric,217,297,numeric,217,numg2,LRB,Number of group2 units in society (Equation: tlpop/group2) (Hamilton et al. 2007),ordinal,ratio,data,97,NA,NA,NA 300 | numg3,216,numeric,117,216,numeric,117,numg3,LRB,Number of group3 units in society (Equation: tlpop/group3) (Hamilton et al. 2007),ordinal,ratio,data,97,NA,NA,NA 301 | branchrat,339,numeric,208,339,numeric,208,branchrat,LRB,"Horton-Strahler branching ratio, produced as slope in regression (Hamilton et al. 2007:2197)",ordinal,ratio,data,97,NA,NA,NA 302 | reg,339,character,2,339,factor,2,reg,GIS,Classification into Old World or New World. Used for calculation of linguistic proximity matrix.,categorical,nominal,auxiliary,75,NA,NA,E. Anthon Eff's supplementary data 303 | iso639.3,339,character,250,339,factor,250,iso639.3,GIS,"ISO 639-3 language code, used on Ethnologue (http://www.ethnologue.com/)",categorical,nominal,auxiliary,75,NA,NA,NA 304 | lang,339,character,251,339,factor,251,lang,GIS,"Language name, from Ethnologue (http://www.ethnologue.com/)",categorical,nominal,auxiliary,75,NA,NA,NA 305 | phyl,339,character,179,339,factor,179,phyl,GIS,"Language phylogenetic classification, from Ethnologue (http://www.ethnologue.com/)",categorical,nominal,auxiliary,75,NA,NA,NA 306 | alt,339,numeric,285,339,numeric,285,alt,GIS,BIOCLIM: Altitude (m),ordinal,interval,data,76,NA,NA,NA 307 | alt.sd,339,numeric,338,339,numeric,338,alt.sd,GIS,BIOCLIM: Altitude (m),ordinal,ratio,data,76,standard deviation within 20km radius,NA,NA 308 | alt.flag,339,numeric,2,339,numeric,2,alt.flag,GIS,BIOCLIM: Altitude (m),ordinal,nominal,auxiliary,76,"flag for data quality, higher values poorer accuracy",NA,NA 309 | meanalt,339,numeric,338,339,numeric,338,meanalt,GIS,BIOCLIM: Mean altitude within 20 km radius (m),ordinal,interval,data,76,NA,NA,NA 310 | sdalt,339,numeric,338,339,numeric,338,sdalt,GIS,BIOCLIM: Standard deviation of altitude within 20 km radius (m),ordinal,ratio,data,76,NA,NA,NA 311 | bio.1,339,numeric,218,339,numeric,218,bio.1,GIS,BIOCLIM: Annual Mean Temperature (dgC*10),ordinal,interval,data,76,NA,NA,NA 312 | bio.1.sd,339,numeric,338,339,numeric,338,bio.1.sd,GIS,BIOCLIM: Annual Mean Temperature (dgC*10),ordinal,ratio,data,76,standard deviation within 20km radius,NA,NA 313 | bio.1.flag,339,numeric,2,339,numeric,2,bio.1.flag,GIS,BIOCLIM: Annual Mean Temperature (dgC*10),ordinal,nominal,auxiliary,76,"flag for data quality, higher values poorer accuracy",NA,NA 314 | bio.2,339,numeric,118,339,numeric,118,bio.2,GIS,BIOCLIM: Mean Diurnal Range (Mean of monthly (max temp - min temp)) (dgC*10),ordinal,ratio,data,76,NA,NA,NA 315 | bio.2.sd,339,numeric,338,339,numeric,338,bio.2.sd,GIS,BIOCLIM: Mean Diurnal Range (Mean of monthly (max temp - min temp)) (dgC*10),ordinal,ratio,data,76,standard deviation within 20km radius,NA,NA 316 | bio.2.flag,339,numeric,2,339,numeric,2,bio.2.flag,GIS,BIOCLIM: Mean Diurnal Range (Mean of monthly (max temp - min temp)) (dgC*10),ordinal,nominal,auxiliary,76,"flag for data quality, higher values poorer accuracy",NA,NA 317 | bio.3,339,numeric,68,339,numeric,68,bio.3,GIS,BIOCLIM: Isothermality (bio_2/bio_7) (* 100),ordinal,ratio,data,76,NA,NA,NA 318 | bio.3.sd,339,numeric,326,339,numeric,326,bio.3.sd,GIS,BIOCLIM: Isothermality (bio_2/bio_7) (* 100),ordinal,ratio,data,76,standard deviation within 20km radius,NA,NA 319 | bio.3.flag,339,numeric,2,339,numeric,2,bio.3.flag,GIS,BIOCLIM: Isothermality (bio_2/bio_7) (* 100),ordinal,nominal,auxiliary,76,"flag for data quality, higher values poorer accuracy",NA,NA 320 | bio.4,339,numeric,332,339,numeric,332,bio.4,GIS,BIOCLIM: Temperature Seasonality (standard deviation *100),ordinal,ratio,data,76,NA,NA,NA 321 | bio.4.sd,339,numeric,338,339,numeric,338,bio.4.sd,GIS,BIOCLIM: Temperature Seasonality (standard deviation *100),ordinal,ratio,data,76,standard deviation within 20km radius,NA,NA 322 | bio.4.flag,339,numeric,2,339,numeric,2,bio.4.flag,GIS,BIOCLIM: Temperature Seasonality (standard deviation *100),ordinal,nominal,auxiliary,76,"flag for data quality, higher values poorer accuracy",NA,NA 323 | bio.5,339,numeric,190,339,numeric,190,bio.5,GIS,BIOCLIM: Max Temperature of Warmest Month (dgC*10),ordinal,interval,data,76,NA,NA,NA 324 | bio.5.sd,339,numeric,338,339,numeric,338,bio.5.sd,GIS,BIOCLIM: Max Temperature of Warmest Month (dgC*10),ordinal,ratio,data,76,standard deviation within 20km radius,NA,NA 325 | bio.5.flag,339,numeric,2,339,numeric,2,bio.5.flag,GIS,BIOCLIM: Max Temperature of Warmest Month (dgC*10),ordinal,nominal,auxiliary,76,"flag for data quality, higher values poorer accuracy",NA,NA 326 | bio.6,339,numeric,250,339,numeric,250,bio.6,GIS,BIOCLIM: Min Temperature of Coldest Month (dgC*10),ordinal,interval,data,76,NA,NA,NA 327 | bio.6.sd,339,numeric,338,339,numeric,338,bio.6.sd,GIS,BIOCLIM: Min Temperature of Coldest Month (dgC*10),ordinal,ratio,data,76,standard deviation within 20km radius,NA,NA 328 | bio.6.flag,339,numeric,2,339,numeric,2,bio.6.flag,GIS,BIOCLIM: Min Temperature of Coldest Month (dgC*10),ordinal,nominal,auxiliary,76,"flag for data quality, higher values poorer accuracy",NA,NA 329 | bio.7,339,numeric,235,339,numeric,235,bio.7,GIS,BIOCLIM: Temperature Annual Range (bio_5-bio_6) (dgC*10),ordinal,ratio,data,76,NA,NA,NA 330 | bio.7.sd,339,numeric,338,339,numeric,338,bio.7.sd,GIS,BIOCLIM: Temperature Annual Range (bio_5-bio_6) (dgC*10),ordinal,ratio,data,76,standard deviation within 20km radius,NA,NA 331 | bio.7.flag,339,numeric,2,339,numeric,2,bio.7.flag,GIS,BIOCLIM: Temperature Annual Range (bio_5-bio_6) (dgC*10),ordinal,nominal,auxiliary,76,"flag for data quality, higher values poorer accuracy",NA,NA 332 | bio.8,339,numeric,210,339,numeric,210,bio.8,GIS,BIOCLIM: Mean Temperature of Wettest Quarter (dgC*10),ordinal,interval,data,76,NA,NA,NA 333 | bio.8.sd,339,numeric,338,339,numeric,338,bio.8.sd,GIS,BIOCLIM: Mean Temperature of Wettest Quarter (dgC*10),ordinal,ratio,data,76,standard deviation within 20km radius,NA,NA 334 | bio.8.flag,339,numeric,2,339,numeric,2,bio.8.flag,GIS,BIOCLIM: Mean Temperature of Wettest Quarter (dgC*10),ordinal,nominal,auxiliary,76,"flag for data quality, higher values poorer accuracy",NA,NA 335 | bio.9,339,numeric,220,339,numeric,220,bio.9,GIS,BIOCLIM: Mean Temperature of Driest Quarter (dgC*10),ordinal,interval,data,76,NA,NA,NA 336 | bio.9.sd,339,numeric,338,339,numeric,338,bio.9.sd,GIS,BIOCLIM: Mean Temperature of Driest Quarter (dgC*10),ordinal,ratio,data,76,standard deviation within 20km radius,NA,NA 337 | bio.9.flag,339,numeric,2,339,numeric,2,bio.9.flag,GIS,BIOCLIM: Mean Temperature of Driest Quarter (dgC*10),ordinal,nominal,auxiliary,76,"flag for data quality, higher values poorer accuracy",NA,NA 338 | bio.10,339,numeric,198,339,numeric,198,bio.10,GIS,BIOCLIM: Mean Temperature of Warmest Quarter (dgC*10),ordinal,interval,data,76,NA,NA,NA 339 | bio.10.sd,339,numeric,338,339,numeric,338,bio.10.sd,GIS,BIOCLIM: Mean Temperature of Warmest Quarter (dgC*10),ordinal,ratio,data,76,standard deviation within 20km radius,NA,NA 340 | bio.10.flag,339,numeric,2,339,numeric,2,bio.10.flag,GIS,BIOCLIM: Mean Temperature of Warmest Quarter (dgC*10),ordinal,nominal,auxiliary,76,"flag for data quality, higher values poorer accuracy",NA,NA 341 | bio.11,339,numeric,252,339,numeric,252,bio.11,GIS,BIOCLIM: Mean Temperature of Coldest Quarter (dgC*10),ordinal,interval,data,76,NA,NA,NA 342 | bio.11.sd,339,numeric,338,339,numeric,338,bio.11.sd,GIS,BIOCLIM: Mean Temperature of Coldest Quarter (dgC*10),ordinal,ratio,data,76,standard deviation within 20km radius,NA,NA 343 | bio.11.flag,339,numeric,2,339,numeric,2,bio.11.flag,GIS,BIOCLIM: Mean Temperature of Coldest Quarter (dgC*10),ordinal,nominal,auxiliary,76,"flag for data quality, higher values poorer accuracy",NA,NA 344 | bio.12,339,numeric,298,339,numeric,298,bio.12,GIS,BIOCLIM: Annual Precipitation (mm),ordinal,ratio,data,76,NA,NA,NA 345 | bio.12.sd,339,numeric,338,339,numeric,338,bio.12.sd,GIS,BIOCLIM: Annual Precipitation (mm),ordinal,ratio,data,76,standard deviation within 20km radius,NA,NA 346 | bio.12.flag,339,numeric,2,339,numeric,2,bio.12.flag,GIS,BIOCLIM: Annual Precipitation (mm),ordinal,nominal,auxiliary,76,"flag for data quality, higher values poorer accuracy",NA,NA 347 | bio.13,339,numeric,199,339,numeric,199,bio.13,GIS,BIOCLIM: Precipitation of Wettest Month (mm),ordinal,ratio,data,76,NA,NA,NA 348 | bio.13.sd,339,numeric,338,339,numeric,338,bio.13.sd,GIS,BIOCLIM: Precipitation of Wettest Month (mm),ordinal,ratio,data,76,standard deviation within 20km radius,NA,NA 349 | bio.13.flag,339,numeric,2,339,numeric,2,bio.13.flag,GIS,BIOCLIM: Precipitation of Wettest Month (mm),ordinal,nominal,auxiliary,76,"flag for data quality, higher values poorer accuracy",NA,NA 350 | bio.14,339,numeric,77,339,numeric,77,bio.14,GIS,BIOCLIM: Precipitation of Driest Month (mm),ordinal,ratio,data,76,NA,NA,NA 351 | bio.14.sd,339,numeric,314,339,numeric,314,bio.14.sd,GIS,BIOCLIM: Precipitation of Driest Month (mm),ordinal,ratio,data,76,standard deviation within 20km radius,NA,NA 352 | bio.14.flag,339,numeric,2,339,numeric,2,bio.14.flag,GIS,BIOCLIM: Precipitation of Driest Month (mm),ordinal,nominal,auxiliary,76,"flag for data quality, higher values poorer accuracy",NA,NA 353 | bio.15,339,numeric,101,339,numeric,101,bio.15,GIS,BIOCLIM: Precipitation Seasonality (Coefficient of Variation),ordinal,ratio,data,76,NA,NA,NA 354 | bio.15.sd,339,numeric,338,339,numeric,338,bio.15.sd,GIS,BIOCLIM: Precipitation Seasonality (Coefficient of Variation),ordinal,ratio,data,76,standard deviation within 20km radius,NA,NA 355 | bio.15.flag,339,numeric,2,339,numeric,2,bio.15.flag,GIS,BIOCLIM: Precipitation Seasonality (Coefficient of Variation),ordinal,nominal,auxiliary,76,"flag for data quality, higher values poorer accuracy",NA,NA 356 | bio.16,339,numeric,283,339,numeric,283,bio.16,GIS,BIOCLIM: Precipitation of Wettest Quarter (mm),ordinal,ratio,data,76,NA,NA,NA 357 | bio.16.sd,339,numeric,338,339,numeric,338,bio.16.sd,GIS,BIOCLIM: Precipitation of Wettest Quarter (mm),ordinal,ratio,data,76,standard deviation within 20km radius,NA,NA 358 | bio.16.flag,339,numeric,2,339,numeric,2,bio.16.flag,GIS,BIOCLIM: Precipitation of Wettest Quarter (mm),ordinal,nominal,auxiliary,76,"flag for data quality, higher values poorer accuracy",NA,NA 359 | bio.17,339,numeric,157,339,numeric,157,bio.17,GIS,BIOCLIM: Precipitation of Driest Quarter (mm),ordinal,ratio,data,76,NA,NA,NA 360 | bio.17.sd,339,numeric,332,339,numeric,332,bio.17.sd,GIS,BIOCLIM: Precipitation of Driest Quarter (mm),ordinal,ratio,data,76,standard deviation within 20km radius,NA,NA 361 | bio.17.flag,339,numeric,2,339,numeric,2,bio.17.flag,GIS,BIOCLIM: Precipitation of Driest Quarter (mm),ordinal,nominal,auxiliary,76,"flag for data quality, higher values poorer accuracy",NA,NA 362 | bio.18,339,numeric,238,339,numeric,238,bio.18,GIS,BIOCLIM: Precipitation of Warmest Quarter (mm),ordinal,ratio,data,76,NA,NA,NA 363 | bio.18.sd,339,numeric,338,339,numeric,338,bio.18.sd,GIS,BIOCLIM: Precipitation of Warmest Quarter (mm),ordinal,ratio,data,76,standard deviation within 20km radius,NA,NA 364 | bio.18.flag,339,numeric,2,339,numeric,2,bio.18.flag,GIS,BIOCLIM: Precipitation of Warmest Quarter (mm),ordinal,nominal,auxiliary,76,"flag for data quality, higher values poorer accuracy",NA,NA 365 | bio.19,339,numeric,228,339,numeric,228,bio.19,GIS,BIOCLIM: Precipitation of Coldest Quarter (mm),ordinal,ratio,data,76,NA,NA,NA 366 | bio.19.sd,339,numeric,335,339,numeric,335,bio.19.sd,GIS,BIOCLIM: Precipitation of Coldest Quarter (mm),ordinal,ratio,data,76,standard deviation within 20km radius,NA,NA 367 | bio.19.flag,339,numeric,2,339,numeric,2,bio.19.flag,GIS,BIOCLIM: Precipitation of Coldest Quarter (mm),ordinal,nominal,auxiliary,76,"flag for data quality, higher values poorer accuracy",NA,NA 368 | mnnpp,339,numeric,337,339,numeric,337,mnnpp,GIS,Mean Net Primary Production within 50 km radius. (scaled) (Imhoff et al. 2004),ordinal,ratio,data,77,NA,NA,NA 369 | long,339,numeric,322,339,numeric,322,long,GIS,Longitude (decimal degrees)--rectified,ordinal,interval,data,78,NA,NA,NA 370 | lati,339,numeric,323,339,numeric,323,lati,GIS,Latitude (decimal degrees)--rectified,ordinal,interval,data,78,NA,NA,NA 371 | ecoregion,339,character,141,339,factor,141,ecoregion,GIS,WWF ecoregion. (Olson et al. 2001),categorical,nominal,data,80,NA,NA,NA 372 | mht.name,339,character,14,339,factor,14,mht.name,GIS,WWF major habitat type. (Olson et al. 2001),categorical,nominal,data,80,NA,NA,NA 373 | koeppengei,339,character,21,339,factor,21,koeppengei,GIS,Koeppen-Geiger climate classification code. (Kottek et al. 2006),categorical,nominal,data,81,NA,NA,NA 374 | koepdesc,339,character,21,339,factor,21,koepdesc,GIS,Koeppen-Geiger code description. (Kottek et al. 2006),categorical,nominal,data,81,NA,NA,NA 375 | iso,339,character,33,339,factor,33,iso,GIS,ISO 3166-1 alpha-3 country codes (GADM-2 data from http://www.gadm.org/),categorical,nominal,auxiliary,82,NA,NA,NA 376 | name.0,339,character,33,339,factor,33,name.0,GIS,Name of Country (GADM-2 data from http://www.gadm.org/),categorical,nominal,auxiliary,82,NA,NA,NA 377 | name.1,325,character,92,339,factor,99,name.1,GIS,Name of first subnational administrative level (GADM-2 data from http://www.gadm.org/),categorical,nominal,auxiliary,82,NA,NA,NA 378 | hasc.1,336,character,96,336,factor,96,hasc.1,GIS,Hierarchical administrative subdivision code--level 1 (GADM-2 data from http://www.gadm.org/),categorical,nominal,auxiliary,82,NA,NA,NA 379 | name.2,321,character,238,339,factor,251,name.2,GIS,Name of second subnational administrative level (GADM-2 data from http://www.gadm.org/),categorical,nominal,auxiliary,82,NA,NA,NA 380 | hasc.2,298,character,222,298,factor,222,hasc.2,GIS,Hierarchical administrative subdivision code--level 2 (GADM-2 data from http://www.gadm.org/),categorical,nominal,auxiliary,82,NA,NA,NA 381 | name.3,96,character,77,97,factor,78,name.3,GIS,Name of third subnational administrative level (GADM-2 data from http://www.gadm.org/),categorical,nominal,auxiliary,82,NA,NA,NA 382 | continent,339,character,5,339,factor,5,continent,GIS,Continent,categorical,nominal,data,83,NA,NA,NA 383 | region,339,character,11,339,factor,11,region,GIS,Major sub-continental geographic region,categorical,nominal,data,83,NA,NA,NA 384 | flag1,339,numeric,2,339,numeric,2,flag1,GIS,Data quality flag 1,categorical,nominal,auxiliary,93,"Dummy==1 where imputed values used for: ""noaddprop"", ""gelic"", ""petric"", ""vertic""",NA,NA 385 | noaddprop,339,numeric,15,339,numeric,15,noaddprop,GIS,"No additional soil properties (not gelic, petric, or vertic)",ordinal,unknown,data,93,NA,NA,NA 386 | gelic,339,numeric,9,339,numeric,9,gelic,GIS,Gelic properties refer to soils having permafrost within 200 cm from the soil surface.,ordinal,unknown,data,93,NA,NA,NA 387 | petric,339,numeric,4,339,numeric,4,petric,GIS,Petric properties refer to strongly cemented or indurated layer starting within 100 cm from the soil surface.,ordinal,unknown,data,93,NA,NA,NA 388 | vertic,339,numeric,7,339,numeric,7,vertic,GIS,Vertic properties refer to cracks of more than 1 cm wide occurring in the upper part of the soil.,ordinal,unknown,data,93,NA,NA,NA 389 | flag2,339,numeric,2,339,numeric,2,flag2,GIS,Data quality flag 2,categorical,nominal,auxiliary,93,"Dummy==1 where imputed values used for: ""clayheavy"", ""claylight"", ""clayloam"", ""loam"", ""loamysand"", ""sand"", ""sandyclay"", ""sandyclayloam"", ""sandyloam"", ""siltloam"", ""siltyclay"", ""siltyclayloam""",NA,NA 390 | clayheavy,339,numeric,6,339,numeric,6,clayheavy,GIS,"Percent of soil in soil texture category 1, where 1 is finest soil and 13 is the coarsest.",ordinal,interval,data,93,NA,NA,NA 391 | siltyclay,339,numeric,4,339,numeric,4,siltyclay,GIS,"Percent of soil in soil texture category 2, where 1 is finest soil and 13 is the coarsest.",ordinal,interval,data,93,NA,NA,NA 392 | claylight,339,numeric,16,339,numeric,16,claylight,GIS,"Percent of soil in soil texture category 3, where 1 is finest soil and 13 is the coarsest.",ordinal,interval,data,93,NA,NA,NA 393 | siltyclayloam,339,numeric,3,339,numeric,3,siltyclayloam,GIS,"Percent of soil in soil texture category 4, where 1 is finest soil and 13 is the coarsest.",ordinal,interval,data,93,NA,NA,NA 394 | clayloam,339,numeric,6,339,numeric,6,clayloam,GIS,"Percent of soil in soil texture category 5, where 1 is finest soil and 13 is the coarsest.",ordinal,interval,data,93,NA,NA,NA 395 | siltloam,339,numeric,12,339,numeric,12,siltloam,GIS,"Percent of soil in soil texture category 7, where 1 is finest soil and 13 is the coarsest.",ordinal,interval,data,93,NA,NA,NA 396 | sandyclay,339,numeric,2,339,numeric,2,sandyclay,GIS,"Percent of soil in soil texture category 8, where 1 is finest soil and 13 is the coarsest.",ordinal,interval,data,93,NA,NA,NA 397 | loam,339,numeric,20,339,numeric,20,loam,GIS,"Percent of soil in soil texture category 9, where 1 is finest soil and 13 is the coarsest.",ordinal,interval,data,93,NA,NA,NA 398 | sandyclayloam,339,numeric,15,339,numeric,15,sandyclayloam,GIS,"Percent of soil in soil texture category 10, where 1 is finest soil and 13 is the coarsest.",ordinal,interval,data,93,NA,NA,NA 399 | sandyloam,339,numeric,19,339,numeric,19,sandyloam,GIS,"Percent of soil in soil texture category 11, where 1 is finest soil and 13 is the coarsest.",ordinal,interval,data,93,NA,NA,NA 400 | loamysand,339,numeric,15,339,numeric,15,loamysand,GIS,"Percent of soil in soil texture category 12, where 1 is finest soil and 13 is the coarsest.",ordinal,interval,data,93,NA,NA,NA 401 | sand,339,numeric,14,339,numeric,14,sand,GIS,"Percent of soil in soil texture category 13, where 1 is finest soil and 13 is the coarsest.",ordinal,interval,data,93,NA,NA,NA 402 | flag3,339,numeric,1,339,numeric,1,flag3,GIS,Data quality flag 3,categorical,nominal,auxiliary,93,"Dummy==1 where imputed values used for: ""issoil"", ""t_texture"", ""drainage"", ""awc_class"", ""t_gravel"", ""t_sand"", ""t_silt"", ""t_clay"", ""t_usda_tex_class"", ""t_ref_bulk_density"", ""t_bulk_density"", ""t_oc"", ""t_ph_h2o"", ""t_cec_clay"", ""t_cec_soil"", ""t_bs"", ""t_teb"", ""t_caco3"", ""t_caso4"", ""t_esp"", ""t_ece"", ""s_gravel"", ""s_sand"", ""s_silt"", ""s_clay"", ""s_usda_tex_class"", ""s_ref_bulk_density"", ""s_bulk_density"", ""s_oc"", ""s_ph_h2o"", ""s_cec_clay"", ""s_cec_soil"", ""s_bs"", ""s_teb"", ""s_caco3"", ""s_caso4"", ""s_esp"", ""s_ece"",""su_symbol"", ""su_value""",NA,NA 403 | issoil,339,numeric,2,339,numeric,2,issoil,GIS,Proportion of sampled area that is soil,ordinal,unknown,data,93,NA,NA,NA 404 | t_texture,339,numeric,37,339,numeric,37,t_texture,GIS,Texture of topsoil (higher values coarser texture),ordinal,interval,data,93,NA,NA,NA 405 | drainage,339,numeric,53,339,numeric,53,drainage,GIS,Drainage of soil (higher values better drainage),ordinal,interval,data,93,NA,NA,NA 406 | awc_class,339,numeric,43,339,numeric,43,awc_class,GIS,Available Water storage Capacity (higher values less water storage capacity),ordinal,interval,data,93,NA,NA,NA 407 | t_gravel,339,numeric,23,339,numeric,23,t_gravel,GIS,Percent of topsoil consisting of gravel,ordinal,interval,data,93,NA,NA,NA 408 | t_sand,339,numeric,61,339,numeric,61,t_sand,GIS,Percent of topsoil consisting of sand,ordinal,interval,data,93,NA,NA,NA 409 | t_silt,339,numeric,44,339,numeric,44,t_silt,GIS,Percent of topsoil consisting of silt,ordinal,interval,data,93,NA,NA,NA 410 | t_clay,339,numeric,43,339,numeric,43,t_clay,GIS,Percent of topsoil consisting of clay,ordinal,interval,data,93,NA,NA,NA 411 | t_usda_tex_class,339,numeric,79,339,numeric,79,t_usda_tex_class,GIS,Measure of topsoil texture (higher values coarser texture),ordinal,interval,data,93,NA,NA,NA 412 | t_ref_bulk_density,339,numeric,151,339,numeric,151,t_ref_bulk_density,GIS,Bulk density = topsoil mass/topsoil volume. Calculations based on texture only.,ordinal,ratio,data,93,Bulk density is inversely related to porosity.,NA,NA 413 | t_bulk_density,339,numeric,161,339,numeric,161,t_bulk_density,GIS,"Bulk density = topsoil mass/topsoil volume. Calculations based on texture, organic matter content, and porosity.",ordinal,ratio,data,93,Bulk density is inversely related to porosity.,NA,NA 414 | t_oc,339,numeric,191,339,numeric,191,t_oc,GIS,Percentage of organic carbon in topsoil.,ordinal,interval,data,93,"Organic Carbon is together with pH, the best simple indicator of the health status of the soil. Moderate to high amounts of organic carbon are associated with fertile soils with a good structure.",NA,NA 415 | t_ph_h2o,339,numeric,159,339,numeric,159,t_ph_h2o,GIS,pH of topsoil.,ordinal,interval,data,93,Low values are acidic; high values alkaline. Best soils for most crops lie in 5.5 to 7.2 range.,NA,NA 416 | t_cec_clay,339,numeric,176,339,numeric,176,t_cec_clay,GIS,Cation exchange capacity of the clay fraction in topsoil.,ordinal,ratio,data,93,The total nutrient fixing capacity of a soil is well expressed by its Cation Exchange Capacity. Soils with low CEC have little resilience and can not build up stores of nutrients.,NA,NA 417 | t_cec_soil,339,numeric,147,339,numeric,147,t_cec_soil,GIS,Cation exchange capacity in topsoil.,ordinal,ratio,data,93,The total nutrient fixing capacity of a soil is well expressed by its Cation Exchange Capacity. Soils with low CEC have little resilience and can not build up stores of nutrients.,NA,NA 418 | t_bs,339,numeric,156,339,numeric,156,t_bs,GIS,Base saturation in topsoil.,ordinal,interval,data,93,"The base saturation measures the sum of exchangeable cations (nutrients) Na, Ca, Mg and K as a percentage of the overall exchange capacity of the soil (including the same cations plus H and Al). The value often shows a near linear correlation with pH.",NA,NA 419 | t_teb,339,numeric,190,339,numeric,190,t_teb,GIS,Total exchangable bases in topsoil.,ordinal,ratio,data,93,"Total exchangeable bases stand for the sum of exchangeable cations in a soil: sodium (Na), calcium (Ca), magnesium (Mg) and Potassium (K).",NA,NA 420 | t_caco3,339,numeric,95,339,numeric,95,t_caco3,GIS,Calcium carbonate (lime) content in topsoil.,ordinal,interval,data,93,Low levels of calcium carbonate enhance soil structure and are generally beneficial for crop production but at higher concentrations they may induce iron deficiency and when cemented limit the water storage capacity of soils.,NA,NA 421 | t_caso4,339,numeric,23,339,numeric,23,t_caso4,GIS,Calcium sulphate (gypsum) content in topsoil,ordinal,interval,data,93,"Up to 2 percent gypsum in the soil favours plant growth, between 2 and 25 percent has little or no adverse effect if in powdery form, but more than 25 percent can cause substantial reduction in yields.",NA,NA 422 | t_esp,339,numeric,73,339,numeric,73,t_esp,GIS,Exchangeable sodium percentage in the topsoil,ordinal,interval,data,93,Exchangeable sodium percentage has been used to indicate levels of sodium in soils. It is calculated as the ratio of Na in the CEC (or sum of cations) ESP= Na*100/CECsoil,NA,NA 423 | t_ece,339,numeric,56,339,numeric,56,t_ece,GIS,Electrical conductivity of topsoil,ordinal,ratio,data,93,"Salt content of a soil can be roughly estimated from the Electrical Conductivity of the soil (EC, expressed in dS m-1). Crops vary considerably in their resistance and response to salt in soils. Some crops will suffer at values as little as 2 dS m-1 (Spinach) others can stand up to 16 dS m-1 (Date palm).",NA,NA 424 | s_gravel,339,numeric,27,339,numeric,27,s_gravel,GIS,Percent of subsoil consisting of gravel,ordinal,interval,data,93,NA,NA,NA 425 | s_sand,339,numeric,64,339,numeric,64,s_sand,GIS,Percent of subsoil consisting of sand,ordinal,interval,data,93,NA,NA,NA 426 | s_silt,339,numeric,45,339,numeric,45,s_silt,GIS,Percent of subsoil consisting of silt,ordinal,interval,data,93,NA,NA,NA 427 | s_clay,339,numeric,48,339,numeric,48,s_clay,GIS,Percent of subsoil consisting of clay,ordinal,interval,data,93,NA,NA,NA 428 | s_usda_tex_class,339,numeric,93,339,numeric,93,s_usda_tex_class,GIS,Measure of subsoil texture (higher values coarser texture),ordinal,interval,data,93,NA,NA,NA 429 | s_ref_bulk_density,339,numeric,170,339,numeric,170,s_ref_bulk_density,GIS,Bulk density = subsoil mass/subsoil volume. Calculations based on texture only.,ordinal,ratio,data,93,Bulk density is inversely related to porosity.,NA,NA 430 | s_bulk_density,339,numeric,175,339,numeric,175,s_bulk_density,GIS,"Bulk density = subsoil mass/subsoil volume. Calculations based on texture, organic matter content, and porosity.",ordinal,ratio,data,93,Bulk density is inversely related to porosity.,NA,NA 431 | s_oc,339,numeric,175,339,numeric,175,s_oc,GIS,Percentage of organic carbon in subsoil.,ordinal,interval,data,93,"Organic Carbon is together with pH, the best simple indicator of the health status of the soil. Moderate to high amounts of organic carbon are associated with fertile soils with a good structure.",NA,NA 432 | s_ph_h2o,339,numeric,163,339,numeric,163,s_ph_h2o,GIS,pH of subsoil.,ordinal,interval,data,93,Low values are acidic; high values alkaline. Best soils for most crops lie in 5.5 to 7.2 range.,NA,NA 433 | s_cec_clay,339,numeric,176,339,numeric,176,s_cec_clay,GIS,Cation exchange capacity of the clay fraction in subsoil.,ordinal,ratio,data,93,The total nutrient fixing capacity of a soil is well expressed by its Cation Exchange Capacity. Soils with low CEC have little resilience and can not build up stores of nutrients.,NA,NA 434 | s_cec_soil,339,numeric,149,339,numeric,149,s_cec_soil,GIS,Cation exchange capacity in subsoil.,ordinal,ratio,data,93,The total nutrient fixing capacity of a soil is well expressed by its Cation Exchange Capacity. Soils with low CEC have little resilience and can not build up stores of nutrients.,NA,NA 435 | s_bs,339,numeric,155,339,numeric,155,s_bs,GIS,Base saturation in subsoil.,ordinal,interval,data,93,"The base saturation measures the sum of exchangeable cations (nutrients) Na, Ca, Mg and K as a percentage of the overall exchange capacity of the soil (including the same cations plus H and Al). The value often shows a near linear correlation with pH.",NA,NA 436 | s_teb,339,numeric,191,339,numeric,191,s_teb,GIS,Total exchangable bases in subsoil.,ordinal,ratio,data,93,"Total exchangeable bases stand for the sum of exchangeable cations in a soil: sodium (Na), calcium (Ca), magnesium (Mg) and Potassium (K).",NA,NA 437 | s_caco3,339,numeric,98,339,numeric,98,s_caco3,GIS,Calcium carbonate (lime) content in subsoil.,ordinal,interval,data,93,Low levels of calcium carbonate enhance soil structure and are generally beneficial for crop production but at higher concentrations they may induce iron deficiency and when cemented limit the water storage capacity of soils.,NA,NA 438 | s_caso4,339,numeric,34,339,numeric,34,s_caso4,GIS,Calcium sulphate (gypsum) content in subsoil,ordinal,interval,data,93,"Up to 2 percent gypsum in the soil favours plant growth, between 2 and 25 percent has little or no adverse effect if in powdery form, but more than 25 percent can cause substantial reduction in yields.",NA,NA 439 | s_esp,339,numeric,84,339,numeric,84,s_esp,GIS,Exchangeable sodium percentage in the subsoil,ordinal,interval,data,93,Exchangeable sodium percentage has been used to indicate levels of sodium in soils. It is calculated as the ratio of Na in the CEC (or sum of cations) ESP= Na*100/CECsoil,NA,NA 440 | s_ece,339,numeric,77,339,numeric,77,s_ece,GIS,Electrical conductivity of subsoil,ordinal,ratio,data,93,"Salt content of a soil can be roughly estimated from the Electrical Conductivity of the soil (EC, expressed in dS m-1). Crops vary considerably in their resistance and response to salt in soils. Some crops will suffer at values as little as 2 dS m-1 (Spinach) others can stand up to 16 dS m-1 (Date palm).",NA,NA 441 | su_symbol,339,character,28,339,factor,28,su_symbol,GIS,Symbol for HWSD spatially dominant major soil group.,categorical,nominal,data,93,NA,NA,NA 442 | su_value,339,character,28,339,factor,28,su_value,GIS,Name of HWSD spatially dominant major soil group.,categorical,nominal,data,93,NA,NA,NA 443 | sq1,339,numeric,8,339,numeric,8,sq1,GIS,Nutrient availability,ordinal,ordinal,data,101,"Soil texture, soil organic carbon, soil pH, total exchangeable bases",NA,NA 444 | sq2,339,numeric,8,339,numeric,8,sq2,GIS,Nutrient retention capacity,ordinal,ordinal,data,101,"Soil Organic carbon, Soil texture, base saturation, cation exchange capacity of soil and of clay fraction",NA,NA 445 | sq3,339,numeric,8,339,numeric,8,sq3,GIS,Rooting conditions,ordinal,ordinal,data,101,"Soil textures, bulk density, coarse fragments, vertic soil properties and soil phases affecting root penetration and soil depth and soil volume",NA,NA 446 | sq4,339,numeric,8,339,numeric,8,sq4,GIS,Oxygen availability to roots,ordinal,ordinal,data,101,Soil drainage and soil phases affecting soil drainage,NA,NA 447 | sq5,339,numeric,8,339,numeric,8,sq5,GIS,Excess salts.,ordinal,ordinal,data,101,"Soil salinity, soil sodicity and soil phases influencing salt conditions",NA,NA 448 | sq6,339,numeric,8,339,numeric,8,sq6,GIS,Toxicity,ordinal,ordinal,data,101,Calcium carbonate and gypsum,NA,NA 449 | sq7,339,numeric,8,339,numeric,8,sq7,GIS,Workability (constraining field management),ordinal,ordinal,data,101,"Soil texture, effective soil depth/volume, and soil phases constraining soil management (soil depth, rock outcrop, stoniness, gravel/concretions and hardpans)",NA,NA 450 | dicgsh1a,339,numeric,278,339,numeric,278,dicgsh1a,GIS,Distance to the coast line in km based on GSHHS,ordinal,ratio,data,102,Negative values show distance across land to coastline; Postive values show distance across water to coastline (very small islands have positive values),Grids and descriptions obtained from http://worldgrids.org. Resolution 1 km gridcell.,NA 451 | dicgsh1a.flag,339,numeric,1,339,numeric,1,dicgsh1a.flag,GIS,Distance to the coast line in km based on GSHHS,ordinal,nominal,auxiliary,102,data quality flag--higher numbers are less accurate,NA,NA 452 | etmnts2a,339,numeric,331,339,numeric,331,etmnts2a,GIS,Long-term MODIS-estimated Evapotranspiration (MOD16) (mm/year),ordinal,ratio,data,103,NA,Grids and descriptions obtained from http://worldgrids.org. Resolution 1 km gridcell.,NA 453 | etmnts2a.flag,339,numeric,3,339,numeric,3,etmnts2a.flag,GIS,Long-term MODIS-estimated Evapotranspiration (MOD16) (mm/year),ordinal,nominal,auxiliary,103,data quality flag--higher numbers are less accurate,NA,NA 454 | g12igb3a,339,numeric,17,339,numeric,17,g12igb3a,GIS,Land cover type 1 (IGBP) for year 2012,categorical,nominal,data,104,NA,Grids and descriptions obtained from http://worldgrids.org. Resolution 1 km gridcell.,NA 455 | g12igb3a.flag,339,numeric,1,339,numeric,1,g12igb3a.flag,GIS,Land cover type 1 (IGBP) for year 2012,categorical,nominal,auxiliary,104,data quality flag--higher numbers are less accurate,NA,NA 456 | twisre3a,339,numeric,87,339,numeric,87,twisre3a,GIS,SAGA GIS Topographic wetness index,ordinal,ratio,data,105,"Sorensen, R., Zinko, U., & Seibert, J. (2006). On the calculation of the topographic wetness index: evaluation of different methods based on field observations. Hydrol. Earth Syst. Sci., 10(1), 101-112. doi:10.5194/hess-10-101-2006",Grids and descriptions obtained from http://worldgrids.org. Resolution 1 km gridcell.,NA 457 | twisre3a.flag,339,numeric,2,339,numeric,2,twisre3a.flag,GIS,SAGA GIS Topographic wetness index,ordinal,nominal,auxiliary,105,data quality flag--higher numbers are less accurate,NA,NA 458 | l3pobi3b,339,numeric,7,339,factor,7,l3pobi3b,GIS,Physiographic landform units Level 3 (SCALA project),categorical,ordinal,data,106,NA,Grids and descriptions obtained from http://worldgrids.org. Resolution 1 km gridcell.,NA 459 | l3pobi3b.flag,339,numeric,3,339,numeric,3,l3pobi3b.flag,GIS,Physiographic landform units Level 3 (SCALA project),categorical,nominal,auxiliary,106,data quality flag--higher numbers are less accurate,NA,NA 460 | l3pobi3b.navn,339,character,7,339,factor,7,l3pobi3b.navn,GIS,Physiographic landform units Level 3 (SCALA project),categorical,ordinal,data,106,names of categories,NA,NA 461 | opisre2a,339,numeric,103,339,numeric,103,opisre2a,GIS,SAGA GIS Topopgraphic Openess Index,ordinal,ratio,data,107,"Topographic openess developed by Yokoyama R, Shirasawa M and Pike RJ, 2002, Visualizing topography by Openness: A new applicationof image processing to digital elevation models. Photogrammetric Engineering and Remote Sensing, 68(3): 257-265.",Grids and descriptions obtained from http://worldgrids.org. Resolution 1 km gridcell.,NA 462 | opisre2a.flag,339,numeric,1,339,numeric,1,opisre2a.flag,GIS,SAGA GIS Topopgraphic Openess Index,ordinal,nominal,auxiliary,107,data quality flag--higher numbers are less accurate,NA,NA 463 | geaisg3a,339,numeric,40,339,factor,40,geaisg3a,GIS,Geological ages based on the surface geology,categorical,ordinal,data,108,NA,Grids and descriptions obtained from http://worldgrids.org. Resolution 1 km gridcell.,NA 464 | geaisg3a.flag,339,numeric,3,339,numeric,3,geaisg3a.flag,GIS,Geological ages based on the surface geology,categorical,nominal,auxiliary,108,data quality flag--higher numbers are less accurate,NA,NA 465 | geaisg3a.navn,339,character,40,339,factor,40,geaisg3a.navn,GIS,Geological ages based on the surface geology,categorical,ordinal,data,108,names of categories,NA,NA 466 | glcjrc3a,339,numeric,21,339,factor,21,glcjrc3a,GIS,Global Land Cover map for the year 2000 (GLC2000),categorical,nominal,data,109,NA,Grids and descriptions obtained from http://worldgrids.org. Resolution 1 km gridcell.,NA 467 | glcjrc3a.flag,339,numeric,1,339,numeric,1,glcjrc3a.flag,GIS,Global Land Cover map for the year 2000 (GLC2000),categorical,nominal,auxiliary,109,data quality flag--higher numbers are less accurate,NA,NA 468 | glcjrc3a.navn,339,character,21,339,factor,21,glcjrc3a.navn,GIS,Global Land Cover map for the year 2000 (GLC2000),categorical,nominal,data,109,names of categories,NA,NA 469 | inmsre3a,339,numeric,19,339,numeric,19,inmsre3a,GIS,Mean potential incoming solar radiation (8-day average) derived in SAGA GIS,ordinal,ratio,data,110,NA,Grids and descriptions obtained from http://worldgrids.org. Resolution 1 km gridcell.,NA 470 | inmsre3a.flag,339,numeric,2,339,numeric,2,inmsre3a.flag,GIS,Mean potential incoming solar radiation (8-day average) derived in SAGA GIS,ordinal,nominal,auxiliary,110,data quality flag--higher numbers are less accurate,NA,NA 471 | inssre2a,339,numeric,320,339,numeric,320,inssre2a,GIS,Standard deviation of the potential incoming solar radiation derived in SAGA GIS,ordinal,ratio,data,111,NA,Grids and descriptions obtained from http://worldgrids.org. Resolution 1 km gridcell.,NA 472 | inssre2a.flag,339,numeric,3,339,numeric,3,inssre2a.flag,GIS,Standard deviation of the potential incoming solar radiation derived in SAGA GIS,ordinal,nominal,auxiliary,111,data quality flag--higher numbers are less accurate,NA,NA 473 | evmmod2a,339,numeric,329,339,numeric,329,evmmod2a,GIS,Mean value of the monthly MODIS EVI time series data,ordinal,ratio,data,112,Enhanced Vegetation Index: https://lpdaac.usgs.gov/products/modis_products_table/mod13q1,Grids and descriptions obtained from http://worldgrids.org. Resolution 1 km gridcell.,NA 474 | evmmod2a.flag,339,numeric,1,339,numeric,1,evmmod2a.flag,GIS,Mean value of the monthly MODIS EVI time series data,ordinal,nominal,auxiliary,112,data quality flag--higher numbers are less accurate,NA,NA 475 | lammod3a,339,numeric,49,339,numeric,49,lammod3a,GIS,Mean value of the monthly MODIS EVI time series data,ordinal,ratio,data,113,Leaf Area Index: https://lpdaac.usgs.gov/products/modis_products_table/mod15a2,Grids and descriptions obtained from http://worldgrids.org. Resolution 1 km gridcell.,NA 476 | lammod3a.flag,339,numeric,5,339,numeric,5,lammod3a.flag,GIS,Mean value of the monthly MODIS EVI time series data,ordinal,nominal,auxiliary,113,data quality flag--higher numbers are less accurate,NA,NA 477 | anntotprecip,339,numeric,10,339,numeric,10,anntotprecip,GIS,Average Annual Total Precipitation,ordinal,ratio,data,117,The total amount of precipitation that falls during an average year (1960 to 1990) in units of millimeters (mm3 per year / mm2 = mm / year).,NA,NA 478 | anntotprecip.flag,339,numeric,2,339,numeric,2,anntotprecip.flag,GIS,Average Annual Total Precipitation,ordinal,nominal,auxiliary,117,data quality flag--higher numbers are less accurate,NA,NA 479 | avgannrh,339,numeric,64,339,numeric,64,avgannrh,GIS,Average Annual Relative Humidity,ordinal,ratio,data,117,"Relative humidity is defined as the ratio of the actual vapor pressure of water vapor to the saturation vapor pressure of water, in other words how much water is in the air divided by the most water that could possibly be there.",NA,NA 480 | avgannrh.flag,339,numeric,2,339,numeric,2,avgannrh.flag,GIS,Average Annual Relative Humidity,ordinal,nominal,auxiliary,117,data quality flag--higher numbers are less accurate,NA,NA 481 | avgannrunoff,339,numeric,197,339,numeric,197,avgannrunoff,GIS,The total volume of water running over the surface in a year-averaged over the gridcell.,ordinal,ratio,data,117,the volume of water running over this gridcell divided by the area of the gridcell itself (mm3 per year / mm2 over 1 yr = mm/yr),NA,NA 482 | avgannrunoff.flag,339,numeric,4,339,numeric,4,avgannrunoff.flag,GIS,The total volume of water running over the surface in a year-averaged over the gridcell.,ordinal,nominal,auxiliary,117,data quality flag--higher numbers are less accurate,NA,NA 483 | evapotrans,339,numeric,328,339,numeric,328,evapotrans,GIS,Evapotranspiration,ordinal,ratio,data,114,"The amount of water removed from the surface through evaporation (due to temperature, humidity, radiation...) and through transpiration (the process by which plants move water up from their roots and out through their leaves). Evapotranspiration can be used to estimate the amount of water the local vegetation needs to remain in equilibrium with the ground water supply.","Grids and variable descriptions are downloaded from the Atlas of the Biosphere, a product of the Center for Sustainability and the Global Environment (SAGE), part of the Nelson Institute for Environmental Studies at the University of Wisconsin - Madison (http://www.sage.wisc.edu/atlas/). Resolution is 30 arc minutes.",NA 484 | evapotrans.flag,339,numeric,4,339,numeric,4,evapotrans.flag,GIS,Evapotranspiration,ordinal,nominal,auxiliary,114,data quality flag--higher numbers are less accurate,NA,NA 485 | gdd,339,numeric,328,339,numeric,328,gdd,GIS,Growing Degree Days,ordinal,ratio,data,117,"The Growing Degree Day (GDD) is based on the fact that if temperature is too hot or cold, plant growth will slow. GDD is calculated for a range specific to a particular plant or suite of plants. Taking corn as an example, it grows with a minimum temperature of 10?C and a maximum temperature of 30?C. So if one day the maximum temperature is 21?C and the minimum temperature is 15?C, the average temperature of that day was 18?C. To determine how many GDD were in that day, simply subtract the base temperature from the average temperature (18?C-10?C) remembering that this all happened on one day. Thus for this example, there were 8 GDD accumulated by the corn during that day. Had the base temperature been 5?C, there would have been 13 GDD accumulated. Annual GDD is the average number of GDD accumulated in a particular area under normal climatic conditions. In this dataset, a base temperature of 5?C was used to compute monthly GDD and those monthly values were then totaled to yield an annual number.","Grids and variable descriptions are downloaded from the Atlas of the Biosphere, a product of the Center for Sustainability and the Global Environment (SAGE), part of the Nelson Institute for Environmental Studies at the University of Wisconsin - Madison (http://www.sage.wisc.edu/atlas/). Resolution is 30 arc minutes.",NA 486 | gdd.flag,339,numeric,2,339,numeric,2,gdd.flag,GIS,Growing Degree Days,ordinal,nominal,auxiliary,117,data quality flag--higher numbers are less accurate,NA,NA 487 | npp,339,numeric,241,339,numeric,241,npp,GIS,Net Primary Productivity,ordinal,ratio,data,116,"NPP is calculated by taking the gross primary productivity (the total amount of energy/mass taken in by the plant) and subtracting the plant's respiration (the total amount of energy/mass lost by the plant as it breathes). NPP is measured in units of kg-Carbon per square meter per year, or the net amount of carbon the plants in an average square meter of the gridcell take up during an average year.","Grids and variable descriptions are downloaded from the Atlas of the Biosphere, a product of the Center for Sustainability and the Global Environment (SAGE), part of the Nelson Institute for Environmental Studies at the University of Wisconsin - Madison (http://www.sage.wisc.edu/atlas/). Resolution is 30 arc minutes.",NA 488 | npp.flag,339,numeric,3,339,numeric,3,npp.flag,GIS,Net Primary Productivity,ordinal,nominal,auxiliary,116,data quality flag--higher numbers are less accurate,NA,NA 489 | pevapotrans,339,numeric,325,339,numeric,325,pevapotrans,GIS,Potential evapotranspiration,ordinal,ratio,data,114,"The amount of water that would be removed from the surface by evaporation and transpiration, if the amount of water already present were not a limiting factor. In other words, the potential evapotranspiration over the Sahara desert is very large because the amount of evaporation that could take place there is huge. However, because there isn't any water there to be evaporated, the evapotranspiration that actually takes place is quite small.",NA,NA 490 | pevapotrans.flag,339,numeric,4,339,numeric,4,pevapotrans.flag,GIS,Potential evapotranspiration,ordinal,nominal,auxiliary,114,data quality flag--higher numbers are less accurate,NA,NA 491 | potentialveg,339,numeric,13,339,factor,13,potentialveg,GIS,Potential Vegetation,categorical,nominal,data,115,"Potential vegetation is the vegetation that would exist at a given location had human forms of land use never existed. In other words, if humans weren't around, this would be an accurate description of the planet's land cover.","Grids and variable descriptions are downloaded from the Atlas of the Biosphere, a product of the Center for Sustainability and the Global Environment (SAGE), part of the Nelson Institute for Environmental Studies at the University of Wisconsin - Madison (http://www.sage.wisc.edu/atlas/). Resolution is 30 arc minutes.",NA 492 | potentialveg.flag,339,numeric,4,339,numeric,4,potentialveg.flag,GIS,Potential Vegetation,categorical,nominal,auxiliary,115,data quality flag--higher numbers are less accurate,NA,NA 493 | potentialveg.navn,339,character,13,339,factor,13,potentialveg.navn,GIS,Potential Vegetation,categorical,nominal,data,115,names of categories,NA,NA 494 | snowdepth,339,numeric,178,339,numeric,178,snowdepth,GIS,Snow depth,ordinal,ratio,data,114,"The average annual snow depth--natural log scale, range is from 10^-20mm (trace) to 10^10mm (greater than a kilometer).",NA,NA 495 | snowdepth.flag,339,numeric,5,339,numeric,5,snowdepth.flag,GIS,Snow depth,ordinal,nominal,auxiliary,114,data quality flag--higher numbers are less accurate,NA,NA 496 | soilmoisture,339,numeric,328,339,numeric,328,soilmoisture,GIS,Soil Moisture,ordinal,ratio,data,114,The average amount of water in the soil,NA,NA 497 | soilmoisture.flag,339,numeric,4,339,numeric,4,soilmoisture.flag,GIS,Soil Moisture,ordinal,nominal,auxiliary,114,data quality flag--higher numbers are less accurate,NA,NA 498 | suit,339,numeric,208,339,numeric,208,suit,GIS,Suitability for agriculture,ordinal,ratio,data,119,The fraction of each grid cell that is suitable to be used for agriculture. It is based on the temperature and soil conditions of each grid cell.,"Grids and variable descriptions are downloaded from the Atlas of the Biosphere, a product of the Center for Sustainability and the Global Environment (SAGE), part of the Nelson Institute for Environmental Studies at the University of Wisconsin - Madison (http://www.sage.wisc.edu/atlas/). Resolution is 30 arc minutes.",NA 499 | suit.flag,339,numeric,4,339,numeric,4,suit.flag,GIS,Suitability for agriculture,ordinal,nominal,auxiliary,119,NA,NA,NA 500 | eaid,207,numeric,207,207,numeric,207,eaid,GIS,"Ethnographic Atlas ID number, if society is in the EA",categorical,nominal,auxiliary,83,NA,NA,NA 501 | lrbid,339,numeric,339,339,numeric,339,lrbid,GIS,"Binford foragers ID number, if society is in the LRB",categorical,nominal,auxiliary,83,NA,NA,NA 502 | sccsid,37,numeric,37,37,numeric,37,sccsid,GIS,"Standard Cross Cultural Sample ID number, if society is in the SCCS",categorical,nominal,auxiliary,83,NA,NA,NA 503 | wnaiid,90,numeric,90,90,numeric,90,wnaiid,GIS,"Western North American Indian ID number, if society is in the WNAI",categorical,nominal,auxiliary,83,NA,NA,NA 504 | xcid,207,numeric,207,207,integer,207,xcid,GIS,"XC ID number, if society is in the XC",categorical,nominal,auxiliary,83,NA,NA,NA 505 | awc,115,character,115,115,factor,115,awc,GIS,Atlas of World Cultures ID number,categorical,nominal,auxiliary,100,assignments by Doug White,NA,NA 506 | society,339,character,339,339,factor,339,society,GIS,Ethnographic Atlas society name,categorical,nominal,auxiliary,1,NA,NA,NA 507 | dxid,339,numeric,339,339,numeric,339,dxid,GIS,"Binford foragers ID number, if society is in the LRB",categorical,nominal,auxiliary,83,NA,NA,NA 508 | --------------------------------------------------------------------------------