├── .gitignore ├── .github ├── .gitignore └── workflows │ └── pkgdown.yaml ├── R ├── sysdata.rda ├── test_login.R ├── wiesbaden-package.R ├── download_csv.R ├── save_credentials.R ├── read_header_genesis.R ├── retrieve_metadata.R ├── retrieve_varinfo.R ├── retrieve_valuelabel.R ├── retrieve_datalist.R ├── helper.R ├── read_gv100.R └── retrieve_data.R ├── .Rbuildignore ├── data-raw ├── GV100ADkey.csv ├── GV100NADfwf.csv ├── GV100ADfwf.csv └── make_data.R ├── _pkgdown.yml ├── man ├── test_login.Rd ├── save_credentials.Rd ├── wiesbaden-package.Rd ├── retrieve_metadata.Rd ├── retrieve_varinfo.Rd ├── download_csv.Rd ├── retrieve_valuelabel.Rd ├── retrieve_datalist.Rd ├── read_header_genesis.Rd ├── read_gv100.Rd └── retrieve_data.Rd ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── README.md └── vignettes └── wiesbaden.Rmd /.gitignore: -------------------------------------------------------------------------------- 1 | docs 2 | install.R -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /R/sysdata.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sumtxt/wiesbaden/HEAD/R/sysdata.rda -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^data-raw 2 | ^docs 3 | ^README\.md$ 4 | ^cran-comments\.md$ 5 | ^\.DS_Store 6 | \.git 7 | ^CRAN-RELEASE$ 8 | ^DS_Store$ 9 | ^doc$ 10 | ^Meta$ 11 | ^_pkgdown\.yml$ 12 | ^pkgdown$ 13 | ^docs$ 14 | ^\.github$ 15 | ^CRAN-SUBMISSION$ 16 | install.R 17 | -------------------------------------------------------------------------------- /data-raw/GV100ADkey.csv: -------------------------------------------------------------------------------- 1 | schluessel,typ 2 | 41,Kreisfreie Stadt 3 | 42,Stadtkreis 4 | 43,Kreis 5 | 44,Landkreis 6 | 45,Regionalverband 7 | 50,verbandsfreie Gemeinde 8 | 51,Amt 9 | 52,Samtgemeinde 10 | 53,Verbandsgemeinde 11 | 54,Verwaltungsgemeinschaft 12 | 55,Kirchspielslandgemeinde 13 | 56,Verwaltungsverband 14 | 57,VG Trägermodell 15 | 58,Erfüllende Gemeinde 16 | 60,Markt 17 | 61,Kreisfreie Stadt 18 | 62,Stadtkreis 19 | 63,Stadt 20 | 64,Kreisangehörige Gemeinde 21 | 65,gemeindefreies Gebiet bewohnt 22 | 66,gemeindefreies Gebiet unbewohnt 23 | 67,große Kreisstadt -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | home: 2 | title: wiesbaden - Access Databases from the Federal Statistical Office of Germany 3 | 4 | authors: 5 | Moritz Marbach: 6 | href: "http://moritz-marbach.com/" 7 | 8 | reference: 9 | - title: "Main functions" 10 | - contents: 11 | - retrieve_datalist 12 | - retrieve_data 13 | - retrieve_metadata 14 | - retrieve_valuelabel 15 | - retrieve_varinfo 16 | - title: "Utilities" 17 | - contents: 18 | - save_credentials 19 | - test_login 20 | - download_csv 21 | - read_gv100 22 | - read_header_genesis 23 | 24 | url: https://sumtxt.github.io/wiesbaden/ 25 | 26 | template: 27 | bootstrap: 5 28 | 29 | -------------------------------------------------------------------------------- /man/test_login.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/test_login.R 3 | \name{test_login} 4 | \alias{test_login} 5 | \title{Tests Login in GENESIS Databases} 6 | \usage{ 7 | test_login(genesis = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{genesis}{to authenticate a user and set the database (see below).} 11 | 12 | \item{...}{other arguments send to the httr::GET request.} 13 | } 14 | \value{ 15 | a \code{string} with the server return message. 16 | } 17 | \description{ 18 | \code{test_login} tests if the login works. 19 | } 20 | \examples{ 21 | 22 | \dontrun{ 23 | 24 | test_login(genesis=c(db="regio") ) 25 | 26 | } 27 | 28 | 29 | 30 | 31 | } 32 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: wiesbaden 2 | Title: Access Databases from the Federal Statistical Office of Germany 3 | Version: 1.2.10 4 | Authors@R: c(person("Moritz", "Marbach", role = c("aut", "cre"), email="m.marbach@ucl.ac.uk", comment = c(ORCID = "0000-0002-7101-2821"))) 5 | Description: Retrieve and import data from different databases of the Federal Statistical Office of Germany (DESTATIS) using their SOAP XML web service . 6 | Depends: 7 | R (>= 3.3.1) 8 | License: GPL-3 9 | URL: https://github.com/sumtxt/wiesbaden/ 10 | BugReports: https://github.com/sumtxt/wiesbaden/issues 11 | Encoding: UTF-8 12 | Imports: 13 | httr (>= 1.2.1), 14 | xml2 (>= 1.0.0), 15 | stringr (>= 1.1.0), 16 | stringi (>= 1.4.0), 17 | readr (>= 1.0.0), 18 | jsonlite (>= 1.6.0), 19 | keyring (>= 1.1.0) 20 | RoxygenNote: 7.2.3 21 | Suggests: 22 | knitr, 23 | rmarkdown 24 | VignetteBuilder: knitr 25 | -------------------------------------------------------------------------------- /man/save_credentials.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/save_credentials.R 3 | \name{save_credentials} 4 | \alias{save_credentials} 5 | \title{Saves database credentials} 6 | \usage{ 7 | save_credentials(db, user, password) 8 | } 9 | \arguments{ 10 | \item{db}{database name, either 'nrw', 'regio', 'de' or 'bm'.} 11 | 12 | \item{user}{your user name.} 13 | 14 | \item{password}{your password.} 15 | } 16 | \description{ 17 | \code{save_credentials} saves a set of database credentials using the \code{keyring} package. 18 | } 19 | \details{ 20 | User/password are stored in Keychain on macOS, Credential Store on Windows or Secret Service API on Linux. 21 | If a user/password pair for a database already exists, it is silently replaced with the new pair. 22 | This function relies on the \code{\link{keyring}} package. 23 | } 24 | \seealso{ 25 | \code{\link{wiesbaden}}, \code{\link[keyring:keyring]{keyring}} 26 | } 27 | -------------------------------------------------------------------------------- /R/test_login.R: -------------------------------------------------------------------------------- 1 | #' Tests Login in GENESIS Databases 2 | #' 3 | #' \code{test_login} tests if the login works. 4 | #' 5 | #' 6 | #' @param genesis to authenticate a user and set the database (see below). 7 | #' @param ... other arguments send to the httr::GET request. 8 | #' 9 | #' 10 | #' @return a \code{string} with the server return message. 11 | #' 12 | #' 13 | #' 14 | #' @examples 15 | #' 16 | #' \dontrun{ 17 | #' 18 | #' test_login(genesis=c(db="regio") ) 19 | #' 20 | #' } 21 | #' 22 | #' 23 | #' 24 | #' 25 | #' @export 26 | test_login <- function(genesis=NULL, ... ) { 27 | 28 | genesis <- make_genesis(genesis) 29 | 30 | baseurl <- paste(set_db(db=genesis['db']), "TestService_2010", sep="") 31 | 32 | param <- list( 33 | method = 'logonoff', 34 | kennung = genesis['user'], 35 | passwort = genesis['password']) 36 | 37 | httrdata <- GET(baseurl, query = param, ... ) 38 | xmldata <- content(httrdata, type='text/xml', encoding="UTF-8") 39 | 40 | return(xml_text(xmldata)) 41 | } 42 | -------------------------------------------------------------------------------- /data-raw/GV100NADfwf.csv: -------------------------------------------------------------------------------- 1 | begin,end,col_names,satzart 2 | 0,2,satzart,41 3 | 2,10,gebietsstand,41 4 | 10,15,id,41 5 | 29,39,regionsgrundtyp_stand,41 6 | 39,40,regionsgrundtyp,41 7 | 51,61,kreistyp_stand,41 8 | 61,62,kreistyp,41 9 | 62,72,arbeitsmarktregion_stand,41 10 | 72,75,arbeitsmarktregion,41 11 | 75,85,raumordnungsregion_stand,41 12 | 85,88,raumordnungsregion,41 13 | 88,98,regionstyp_stand,41 14 | 98,100,regionstyp,41 15 | 0,2,satzart,61 16 | 2,10,gebietsstand,61 17 | 10,18,id,61 18 | 30,40,planungsregion_stand,61 19 | 40,42,planungsregion,61 20 | 42,52,gemeindetypneu_stand,61 21 | 52,54,gemeindetypneu,61 22 | 66,76,verdichtungsraeume_stand,61 23 | 76,78,verdichtungsraeume,61 24 | 90,100,verstaedterung_stand,61 25 | 100,102,verstaedterung,61 26 | 102,112,zentralitaet_stand,61 27 | 112,117,zentralitaet,61 28 | 129,139,reisegebiet_stand,61 29 | 139,144,reisegebiet,61 30 | 148,158,bik_stand,61 31 | 158,161,biknr,61 32 | 164,165,biktyp5,61 33 | 165,166,bikstrukturtyp5,61 34 | 166,167,bikgroesse7,61 35 | 167,168,bikgroesse10,61 -------------------------------------------------------------------------------- /data-raw/GV100ADfwf.csv: -------------------------------------------------------------------------------- 1 | begin,end,col_names,satzart 2 | 0,2,satzart,10 3 | 0,2,satzart,20 4 | 0,2,satzart,30 5 | 0,2,satzart,40 6 | 0,2,satzart,50 7 | 0,2,satzart,60 8 | 2,10,gebietsstand,10 9 | 2,10,gebietsstand,20 10 | 2,10,gebietsstand,30 11 | 2,10,gebietsstand,40 12 | 2,10,gebietsstand,50 13 | 2,10,gebietsstand,60 14 | 10,12,id,10 15 | 10,13,id,20 16 | 10,14,id,30 17 | 10,15,id,40 18 | 10,15,id,50 19 | 10,18,id,60 20 | 18,22,id_vbm,50 21 | 18,22,id_vbm,60 22 | 22,72,bez,10 23 | 22,72,bez,20 24 | 22,72,bez,30 25 | 22,72,bez,40 26 | 22,72,bez,50 27 | 22,72,bez,60 28 | 72,122,verwaltungssitz,10 29 | 72,122,verwaltungssitz,20 30 | 72,122,verwaltungssitz,30 31 | 72,122,verwaltungssitz,40 32 | 72,122,verwaltungssitz,50 33 | 122,124,schluessel,40 34 | 122,124,schluessel,50 35 | 122,124,schluessel,60 36 | 128,139,flaeche,60 37 | 139,150,bev,60 38 | 150,161,bev_m,60 39 | 165,170,plz,60 40 | 170,175,plz_eindeutig,60 41 | 177,181,finanzamts_bezirk,60 42 | 181,185,gerichtsbarkeit,60 43 | 185,190,arbeitsagentur_bezirk,60 44 | 190,193,bundestagswahlkreise_von,60 45 | 193,196,bundestagswahlkreise_bis,60 -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(download_csv) 4 | export(read_gv100) 5 | export(read_header_genesis) 6 | export(retrieve_data) 7 | export(retrieve_datalist) 8 | export(retrieve_metadata) 9 | export(retrieve_valuelabel) 10 | export(retrieve_varinfo) 11 | export(save_credentials) 12 | export(test_login) 13 | import(httr) 14 | import(xml2) 15 | importFrom(jsonlite,fromJSON) 16 | importFrom(jsonlite,toJSON) 17 | importFrom(keyring,key_get) 18 | importFrom(keyring,key_list) 19 | importFrom(keyring,key_set_with_value) 20 | importFrom(readr,col_character) 21 | importFrom(readr,cols) 22 | importFrom(readr,locale) 23 | importFrom(readr,read_csv) 24 | importFrom(readr,read_csv2) 25 | importFrom(readr,read_delim) 26 | importFrom(readr,read_file) 27 | importFrom(readr,read_fwf) 28 | importFrom(readr,read_lines) 29 | importFrom(stats,na.omit) 30 | importFrom(stringi,stri_encode) 31 | importFrom(stringi,stri_trans_general) 32 | importFrom(stringr,str_detect) 33 | importFrom(stringr,str_replace_all) 34 | importFrom(stringr,str_split) 35 | importFrom(stringr,str_to_lower) 36 | importFrom(stringr,str_trim) 37 | importFrom(utils,read.csv2) 38 | -------------------------------------------------------------------------------- /man/wiesbaden-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/wiesbaden-package.R 3 | \docType{package} 4 | \name{wiesbaden-package} 5 | \alias{wiesbaden-package} 6 | \alias{wiesbaden} 7 | \title{Client to access the data from the Federal Statistical Office, Germany} 8 | \description{ 9 | Data retrieval client for Federal Statistical Office of Germany 10 | } 11 | \details{ 12 | To authenticate, supply a vector with your user name, password, and database 13 | shortcut ("regio", "de", "nrw", "bm") as an argument for the \code{genesis} 14 | parameter whenever you call a \code{retrieve_*} function: 15 | \code{c(user="your-username", password="your-password", db="database-shortname")} 16 | 17 | Alternatively, store the credentials on your computer using the \code{\link{save_credentials}} function. This function 18 | relies on the \code{\link[keyring:keyring]{keyring}} package. 19 | 20 | Available databases are regionalstatistik.de (shortname: "regio"), landesdatenbank.nrw.de ("nrw"), 21 | www-genesis.destatis.de ("de") and bildungsmonitoring.de ("bm"). 22 | } 23 | \author{ 24 | Moritz Marbach \email{moritz.marbach@tamu.edu} 25 | } 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /man/retrieve_metadata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/retrieve_metadata.R 3 | \name{retrieve_metadata} 4 | \alias{retrieve_metadata} 5 | \title{Retrieves Meta Data from GENESIS Databases} 6 | \usage{ 7 | retrieve_metadata(tablename, language = "de", genesis = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{tablename}{name of the table to retrieve.} 11 | 12 | \item{language}{retrieve information in German "de" (default) or in English "en" if available.} 13 | 14 | \item{genesis}{to authenticate a user and set the database (see below).} 15 | 16 | \item{...}{other arguments send to the httr::GET request.} 17 | } 18 | \value{ 19 | a \code{data.frame}. 20 | } 21 | \description{ 22 | \code{retrieve_metadata} retrieves meta data. 23 | } 24 | \details{ 25 | See the package description (\code{\link{wiesbaden}}) for details about setting the login and database. 26 | } 27 | \examples{ 28 | 29 | \dontrun{ 30 | # Meta data contain the explanations to the variable names for the table 31 | # federal election results on the county level. 32 | # Assumes that user/password are stored via save_credentials() 33 | 34 | metadata <- retrieve_metadata(tablename="14111KJ002", genesis=c(db="regio") ) 35 | } 36 | 37 | 38 | 39 | 40 | } 41 | \seealso{ 42 | \code{\link{wiesbaden}} 43 | } 44 | -------------------------------------------------------------------------------- /man/retrieve_varinfo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/retrieve_varinfo.R 3 | \name{retrieve_varinfo} 4 | \alias{retrieve_varinfo} 5 | \title{Retrieves further information on a variable from GENESIS Databases} 6 | \usage{ 7 | retrieve_varinfo(variablename, genesis = NULL, language = "de", ...) 8 | } 9 | \arguments{ 10 | \item{variablename}{name of the variable} 11 | 12 | \item{genesis}{to authenticate a user and set the database (see below).} 13 | 14 | \item{language}{retrieve information in German "de" (default) or in English "en" if available.} 15 | 16 | \item{...}{other arguments send to the httr::GET request.} 17 | } 18 | \value{ 19 | a \code{data.frame}. 20 | } 21 | \description{ 22 | \code{retrieve_varinfo} retrieves further information. 23 | } 24 | \details{ 25 | See the package description (\code{\link{wiesbaden}}) for details about setting the login and database. 26 | } 27 | \examples{ 28 | 29 | \dontrun{ 30 | # Variable information 'AI2105' (Anteil der Empfänger von Arbeitslosengeld II im Alter 31 | # von 15 bis 24 Jahren an der Bevölkerung gleichen Alters) 32 | # Assumes that user/password are stored via save_credentials() 33 | 34 | metadata <- retrieve_varinfo(variablename="AI2105", genesis=c(db="regio") ) 35 | } 36 | 37 | 38 | 39 | 40 | } 41 | \seealso{ 42 | \code{\link{retrieve_datalist}} \code{\link{wiesbaden}} 43 | } 44 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | release: 9 | types: [published] 10 | workflow_dispatch: 11 | 12 | name: pkgdown 13 | 14 | jobs: 15 | pkgdown: 16 | runs-on: ubuntu-latest 17 | # Only restrict concurrency for non-PR jobs 18 | concurrency: 19 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 20 | env: 21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 22 | steps: 23 | - uses: actions/checkout@v2 24 | 25 | - uses: r-lib/actions/setup-pandoc@v2 26 | 27 | - uses: r-lib/actions/setup-r@v2 28 | with: 29 | use-public-rspm: true 30 | 31 | - uses: r-lib/actions/setup-r-dependencies@v2 32 | with: 33 | extra-packages: any::pkgdown, local::. 34 | needs: website 35 | 36 | - name: Build site 37 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 38 | shell: Rscript {0} 39 | 40 | - name: Deploy to GitHub pages 🚀 41 | if: github.event_name != 'pull_request' 42 | uses: JamesIves/github-pages-deploy-action@4.1.4 43 | with: 44 | clean: false 45 | branch: gh-pages 46 | folder: docs 47 | -------------------------------------------------------------------------------- /man/download_csv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/download_csv.R 3 | \name{download_csv} 4 | \alias{download_csv} 5 | \title{Download the csv-file of a table} 6 | \usage{ 7 | download_csv( 8 | tablename, 9 | startyear = "", 10 | endyear = "", 11 | ..., 12 | genesis_db = "de", 13 | save = TRUE 14 | ) 15 | } 16 | \arguments{ 17 | \item{tablename}{name of the table to retrieve.} 18 | 19 | \item{startyear}{only retrieve values for years equal or larger to \code{startyear}. Default: "".} 20 | 21 | \item{endyear}{only retrieve values for years smaller or equal to \code{endyear}. Default: "".} 22 | 23 | \item{...}{further parameters supplied as URL parameter in the GENESIS database call} 24 | 25 | \item{genesis_db}{name of the database (default: 'de').} 26 | 27 | \item{save}{write string to a text file (default: TRUE)} 28 | } 29 | \description{ 30 | \code{download_csv()} downloads the csv for a table 31 | } 32 | \details{ 33 | Downloads the csv file either to the working directory \code{getwd()} or outputs it as a string. 34 | This is an alternative approach to the retrieve_*() functions. This is designed for \url{https://www-genesis.destatis.de/genesis/online} as it does not require a login. It might not work as expected for the other databases. 35 | } 36 | \examples{ 37 | \dontrun{ 38 | 39 | download_csv("12411-0004.csv") 40 | 41 | } 42 | 43 | 44 | 45 | 46 | } 47 | \seealso{ 48 | \code{\link{read_header_genesis}}. 49 | } 50 | -------------------------------------------------------------------------------- /man/retrieve_valuelabel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/retrieve_valuelabel.R 3 | \name{retrieve_valuelabel} 4 | \alias{retrieve_valuelabel} 5 | \title{Retrieves Value Labels from GENESIS Databases} 6 | \usage{ 7 | retrieve_valuelabel( 8 | variablename, 9 | valuelabel = "*", 10 | genesis = NULL, 11 | language = "de", 12 | ... 13 | ) 14 | } 15 | \arguments{ 16 | \item{variablename}{name of the variable} 17 | 18 | \item{valuelabel}{"*" (default) retrieves all value labels.} 19 | 20 | \item{genesis}{to authenticate a user and set the database (see below).} 21 | 22 | \item{language}{retrieve information in German "de" (default) or in English "en" if available.} 23 | 24 | \item{...}{other arguments send to the httr::GET request.} 25 | } 26 | \value{ 27 | a \code{data.frame}. 28 | } 29 | \description{ 30 | \code{retrieve_valuelabel} retrieves value labels for variable 31 | } 32 | \details{ 33 | See the package description (\code{\link{wiesbaden}}) for details about setting the login and database. 34 | } 35 | \examples{ 36 | 37 | \dontrun{ 38 | # Value labels contain for the variable 'PART04' in the table with the 39 | # federal election results on the county level. 40 | # Assumes that user/password are stored via save_credentials() 41 | 42 | metadata <- retrieve_valuelabel(variablename="PART04", genesis=c(db="regio") ) 43 | } 44 | 45 | 46 | 47 | 48 | } 49 | \seealso{ 50 | \code{\link{retrieve_datalist}} \code{\link{wiesbaden}} 51 | } 52 | -------------------------------------------------------------------------------- /man/retrieve_datalist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/retrieve_datalist.R 3 | \name{retrieve_datalist} 4 | \alias{retrieve_datalist} 5 | \title{Retrieves List of Tables from GENESIS Databases} 6 | \usage{ 7 | retrieve_datalist(tableseries, genesis = NULL, language = "de", ...) 8 | } 9 | \arguments{ 10 | \item{tableseries}{name of series for which tables should be retrieved.} 11 | 12 | \item{genesis}{to authenticate a user and set the database (see below).} 13 | 14 | \item{language}{retrieve information in German "de" (default) or in English "en" if available.} 15 | 16 | \item{...}{other arguments send to the httr::GET request.} 17 | } 18 | \value{ 19 | a \code{data.frame} 20 | } 21 | \description{ 22 | \code{retrieve_datalist} retrieves a list of available data tables in a series. 23 | } 24 | \details{ 25 | See the package description (\code{\link{wiesbaden}}) for details about setting the login and database. 26 | To retrieve a list of all available data use tableseries="*" or combine the wildcard character * with a prefix (see below for an example). 27 | } 28 | \examples{ 29 | 30 | \dontrun{ 31 | # Retrieves list of available tables for the table series 14111 32 | # which contains the federal election results. 33 | # Assumes that user/password are stored via save_credentials() 34 | 35 | d <- retrieve_datalist(tableseries="14111*", genesis=c(db="regio") ) 36 | } 37 | 38 | 39 | 40 | 41 | } 42 | \seealso{ 43 | \code{\link{retrieve_data}} \code{\link{wiesbaden}} 44 | } 45 | -------------------------------------------------------------------------------- /R/wiesbaden-package.R: -------------------------------------------------------------------------------- 1 | #' 2 | #' Data retrieval client for Federal Statistical Office of Germany 3 | #' 4 | #' 5 | #' 6 | #' To authenticate, supply a vector with your user name, password, and database 7 | #' shortcut ("regio", "de", "nrw", "bm") as an argument for the \code{genesis} 8 | #' parameter whenever you call a \code{retrieve_*} function: 9 | #' \code{c(user="your-username", password="your-password", db="database-shortname")} 10 | #' 11 | #' Alternatively, store the credentials on your computer using the \code{\link{save_credentials}} function. This function 12 | #' relies on the \code{\link[keyring:keyring]{keyring}} package. 13 | #' 14 | #' Available databases are regionalstatistik.de (shortname: "regio"), landesdatenbank.nrw.de ("nrw"), 15 | #' www-genesis.destatis.de ("de") and bildungsmonitoring.de ("bm"). 16 | #' 17 | #' 18 | #' 19 | #' @name wiesbaden-package 20 | #' 21 | #' @docType package 22 | #' @aliases wiesbaden 23 | #' @title Client to access the data from the Federal Statistical Office, Germany 24 | #' @author Moritz Marbach \email{moritz.marbach@tamu.edu} 25 | #' 26 | #' @keywords internal 27 | #' 28 | #' @import httr 29 | #' @import xml2 30 | #' @importFrom keyring key_set_with_value key_list key_get 31 | #' @importFrom stringr str_detect str_split str_replace_all str_trim str_to_lower 32 | #' @importFrom readr read_csv read_csv2 read_fwf read_delim read_file read_lines locale cols col_character 33 | #' @importFrom stringi stri_trans_general stri_encode 34 | #' @importFrom stats na.omit 35 | #' @importFrom utils read.csv2 36 | #' @importFrom jsonlite fromJSON toJSON 37 | NULL 38 | 39 | -------------------------------------------------------------------------------- /man/read_header_genesis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read_header_genesis.R 3 | \name{read_header_genesis} 4 | \alias{read_header_genesis} 5 | \title{Read Header of a GENESIS csv} 6 | \usage{ 7 | read_header_genesis( 8 | ..., 9 | start, 10 | lines = 2, 11 | readr_locale = locale(encoding = "windows-1252"), 12 | replacer = NULL, 13 | clean_letters = TRUE 14 | ) 15 | } 16 | \arguments{ 17 | \item{...}{arguments to \code{read_csv2}} 18 | 19 | \item{start}{number of the first line of the header} 20 | 21 | \item{lines}{number of header lines} 22 | 23 | \item{readr_locale}{definition of locale() to be passed to read_csv2()} 24 | 25 | \item{replacer}{a vector that is used as the first K column-names} 26 | 27 | \item{clean_letters}{make proper variable names? (default: TRUE)} 28 | 29 | \item{locale}{default encoding is 'windows-1252'} 30 | } 31 | \value{ 32 | a \code{vector} of column names. 33 | } 34 | \description{ 35 | \code{read_header_genesis} reads the header of a GENESIS csv. 36 | } 37 | \details{ 38 | To generate valid column names, the function replaces all special characters (e.g. German öüä) with ASCII letters 39 | and removes whitespaces. Multi-line headers are joined but separated with a '_'. 40 | } 41 | \examples{ 42 | \dontrun{ 43 | 44 | library(readr) 45 | 46 | download_csv(tablename="12411-0004") 47 | 48 | d <- read_header_genesis('12411-0004.csv', start=6, replacer=c("STAG")) 49 | data <- read_csv2('12411-0004.csv', skip=6, n_max=30-6+1, 50 | na="-", locale=locale(encoding="windows-1252") ) 51 | colnames(data) <- d 52 | } 53 | 54 | 55 | 56 | 57 | } 58 | \seealso{ 59 | \code{\link[readr:read_csv2]{read_csv2}} 60 | } 61 | -------------------------------------------------------------------------------- /data-raw/make_data.R: -------------------------------------------------------------------------------- 1 | library(readr) 2 | 3 | # The definition of the GV100 file (width of fields and keys) 4 | # to made made available inside the package environment only 5 | 6 | # GV100AD 7 | ########## 8 | 9 | GV100ADfwf <- read_csv("./data-raw/GV100ADfwf.csv") 10 | GV100ADkey <- read_csv("./data-raw/GV100ADkey.csv") 11 | 12 | GV100ADkey <- GV100ADkey %>% mutate(typ=factor(typ)) 13 | 14 | GV100ADcol <- cols( 15 | .default = col_character(), 16 | satzart = col_integer(), 17 | gebietsstand = col_date(format="%Y%m%d"), 18 | schluessel = col_integer(), 19 | flaeche = col_integer(), 20 | bev = col_integer(), 21 | bev_m = col_integer() 22 | ) 23 | 24 | GV100NADfwf <- read_csv("./data-raw/GV100NADfwf.csv") 25 | 26 | GV100NADcol <- cols( 27 | .default = col_character(), 28 | gebietsstand = col_date(format="%Y%m%d"), 29 | # regionsgrundtyp_stand = col_date(), 30 | # kreistyp_stand = col_date(), 31 | # arbeitsmarktregion_stand = col_date(), 32 | # raumordnungsregion_stand = col_date(), 33 | # regionstyp_stand = col_date(), 34 | # planungsregion_stand = col_date(), 35 | # gemeindetypneu_stand = col_date(), 36 | # verdichtungsraeume_stand = col_date(), 37 | # verstaedterung_stand = col_date(), 38 | # zentralitaet_stand = col_date(), 39 | # reisegebiet_stand = col_date(), 40 | # bik_stand = col_date(), 41 | biktyp5 = col_integer(), 42 | bikstrukturtyp5 = col_integer(), 43 | bikgroesse7 = col_integer(), 44 | bikgroesse10= col_integer(), 45 | regionsgrundtyp = col_integer(), 46 | kreistyp = col_integer() 47 | ) 48 | 49 | gv100 <- list('ad'=list(fwf=GV100ADfwf, key=GV100ADkey, col=GV100ADcol), 50 | 'nad'=list(fwf=GV100NADfwf, col=GV100NADcol)) 51 | 52 | devtools::use_data(gv100, overwrite=TRUE, internal=TRUE) 53 | -------------------------------------------------------------------------------- /R/download_csv.R: -------------------------------------------------------------------------------- 1 | #' Download the csv-file of a table 2 | #' 3 | #' \code{download_csv()} downloads the csv for a table 4 | #' 5 | #' @param tablename name of the table to retrieve. 6 | #' @param startyear only retrieve values for years equal or larger to \code{startyear}. Default: "". 7 | #' @param endyear only retrieve values for years smaller or equal to \code{endyear}. Default: "". 8 | #' @param ... further parameters supplied as URL parameter in the GENESIS database call 9 | #' @param genesis_db name of the database (default: 'de'). 10 | #' @param save write string to a text file (default: TRUE) 11 | #' 12 | #' @details 13 | #' Downloads the csv file either to the working directory \code{getwd()} or outputs it as a string. 14 | #' This is an alternative approach to the retrieve_*() functions. This is designed for \url{https://www-genesis.destatis.de/genesis/online} as it does not require a login. It might not work as expected for the other databases. 15 | #' 16 | #' 17 | #' @seealso \code{\link{read_header_genesis}}. 18 | #' 19 | #' 20 | #' @examples 21 | #' \dontrun{ 22 | #' 23 | #' download_csv("12411-0004.csv") 24 | #' 25 | #' } 26 | #' 27 | #' 28 | #' 29 | #' 30 | #' @export 31 | download_csv <- function(tablename, startyear="", endyear="", ..., genesis_db="de", save=TRUE){ 32 | argg <- eval(substitute(alist(...))) 33 | baseurl <- set_db2(db=genesis_db) 34 | param <- list( 35 | sequenz='tabelleDownload', 36 | selectionname=tablename, 37 | startjahr = startyear, 38 | endjahr = endyear, 39 | format = 'csv') 40 | param <- c(param,argg) 41 | httrdata <- GET(baseurl, query = param) 42 | str <- content(httrdata, encoding="windows-1252", as = "text") 43 | if( save ){ 44 | writeLines(str, file(paste0(tablename,".csv"))) 45 | } else{ return(str) } 46 | } 47 | -------------------------------------------------------------------------------- /R/save_credentials.R: -------------------------------------------------------------------------------- 1 | #' Saves database credentials 2 | #' 3 | #' \code{save_credentials} saves a set of database credentials using the \code{keyring} package. 4 | #' 5 | #' @param db database name, either 'nrw', 'regio', 'de' or 'bm'. 6 | #' @param user your user name. 7 | #' @param password your password. 8 | #' 9 | #' @details 10 | #' User/password are stored in Keychain on macOS, Credential Store on Windows or Secret Service API on Linux. 11 | #' If a user/password pair for a database already exists, it is silently replaced with the new pair. 12 | #' This function relies on the \code{\link{keyring}} package. 13 | #' 14 | #' @seealso \code{\link{wiesbaden}}, \code{\link[keyring:keyring]{keyring}} 15 | #' 16 | #' 17 | #' 18 | #' @export 19 | save_credentials <- function(db, user, password){ 20 | if ( !(db %in% c("nrw", "regio", "de", "bm", "by", "st")) ) stop(paste("Database '", db, "' unknown.",sep="")) 21 | if (db=='regio'){ 22 | key_set_with_value("regionalstatistik", username=user, password=password) 23 | message("Successfully added credentials.") 24 | } else if (db=='nrw'){ 25 | key_set_with_value("landesdatenbank-nrw", username=user, password=password) 26 | message("Successfully added credentials.") 27 | } else if (db=='bm'){ 28 | key_set_with_value("bildungsmonitoring", username=user, password=password) 29 | message("Successfully added credentials.") 30 | } else if (db=='de'){ 31 | key_set_with_value("destatis", username=user, password=password) 32 | message("Successfully saved credentials.") 33 | } else if (db=='by'){ 34 | key_set_with_value("landesdatenbank-by", username=user, password=password) 35 | message("Successfully saved credentials.") 36 | } else if (db=='st'){ 37 | key_set_with_value("landesdatenbank-st", username=user, password=password) 38 | message("Successfully saved credentials.") 39 | } 40 | } 41 | 42 | -------------------------------------------------------------------------------- /R/read_header_genesis.R: -------------------------------------------------------------------------------- 1 | #' Read Header of a GENESIS csv 2 | #' 3 | #' \code{read_header_genesis} reads the header of a GENESIS csv. 4 | #' 5 | #' @param ... arguments to \code{read_csv2} 6 | #' @param start number of the first line of the header 7 | #' @param lines number of header lines 8 | #' @param locale default encoding is 'windows-1252' 9 | #' @param replacer a vector that is used as the first K column-names 10 | #' @param clean_letters make proper variable names? (default: TRUE) 11 | #' @param readr_locale definition of locale() to be passed to read_csv2() 12 | #' 13 | #' @details 14 | #' To generate valid column names, the function replaces all special characters (e.g. German öüä) with ASCII letters 15 | #' and removes whitespaces. Multi-line headers are joined but separated with a '_'. 16 | #' 17 | #' 18 | #' @return a \code{vector} of column names. 19 | #' 20 | #' @seealso \code{\link[readr:read_csv2]{read_csv2}} 21 | #' 22 | #' @examples 23 | #' \dontrun{ 24 | #' 25 | #' library(readr) 26 | #' 27 | #' download_csv(tablename="12411-0004") 28 | #' 29 | #' d <- read_header_genesis('12411-0004.csv', start=6, replacer=c("STAG")) 30 | #' data <- read_csv2('12411-0004.csv', skip=6, n_max=30-6+1, 31 | #' na="-", locale=locale(encoding="windows-1252") ) 32 | #' colnames(data) <- d 33 | #' } 34 | #' 35 | #' 36 | #' 37 | #' 38 | #' @export 39 | read_header_genesis <- function(..., start, lines=2, readr_locale=locale(encoding="windows-1252"), replacer=NULL, clean_letters=TRUE){ 40 | h <- read_csv2(..., col_names=FALSE, skip=start-1, n_max=lines, col_types=cols( .default = col_character() ), locale=readr_locale ) 41 | if(clean_letters==TRUE){ 42 | h <- apply(h, 2, function(x) get_character_vec(x) ) 43 | } else{ 44 | h <- apply(h, 2, function(x) paste(unlist(na.omit(x), use.names=FALSE), collapse=" ")) 45 | } 46 | if( !is.null(replacer) ) h[1:length(replacer)] <- replacer 47 | return(h) 48 | } 49 | -------------------------------------------------------------------------------- /R/retrieve_metadata.R: -------------------------------------------------------------------------------- 1 | #' Retrieves Meta Data from GENESIS Databases 2 | #' 3 | #' \code{retrieve_metadata} retrieves meta data. 4 | #' 5 | #' @param tablename name of the table to retrieve. 6 | #' @param genesis to authenticate a user and set the database (see below). 7 | #' @param language retrieve information in German "de" (default) or in English "en" if available. 8 | #' @param ... other arguments send to the httr::GET request. 9 | #' 10 | #' 11 | #' @details 12 | #' See the package description (\code{\link{wiesbaden}}) for details about setting the login and database. 13 | #' 14 | #' @return a \code{data.frame}. 15 | #' 16 | #' @seealso \code{\link{wiesbaden}} 17 | #' 18 | #' @examples 19 | #' 20 | #' \dontrun{ 21 | #' # Meta data contain the explanations to the variable names for the table 22 | #' # federal election results on the county level. 23 | #' # Assumes that user/password are stored via save_credentials() 24 | #' 25 | #' metadata <- retrieve_metadata(tablename="14111KJ002", genesis=c(db="regio") ) 26 | #' } 27 | #' 28 | #' 29 | #' 30 | #' 31 | #' @export 32 | retrieve_metadata <- function( 33 | tablename, language='de', 34 | genesis=NULL, ... ) { 35 | 36 | genesis <- make_genesis(genesis) 37 | 38 | baseurl <- paste(set_db(db=genesis['db']), "ExportService_2010", sep="") 39 | 40 | param <- list( 41 | method = 'DatenAufbau', 42 | kennung = genesis['user'], 43 | passwort = genesis['password'], 44 | namen = tablename, 45 | bereich = 'Alle', 46 | sprache = language) 47 | 48 | datenaufbau <- GET(baseurl, query = param, ... ) 49 | datenaufbau <- content(datenaufbau, type='text/xml', encoding="UTF-8") 50 | entries <- xml_find_all(datenaufbau, '//merkmale') 51 | 52 | if ( length(entries)==0 ) return( xml_text(datenaufbau) ) 53 | 54 | entries <- lapply(entries, function(x) xml_text(xml_find_all(x, './code|./inhalt|./masseinheit')) ) 55 | d <- as.data.frame(do.call(rbind, entries)) 56 | 57 | if ( ncol(d)==0 ) return("No results found.") 58 | 59 | colnames(d) <- c("name", "description", "unit") 60 | 61 | return(d) 62 | } 63 | -------------------------------------------------------------------------------- /R/retrieve_varinfo.R: -------------------------------------------------------------------------------- 1 | #' Retrieves further information on a variable from GENESIS Databases 2 | #' 3 | #' \code{retrieve_varinfo} retrieves further information. 4 | #' 5 | #' @param variablename name of the variable 6 | #' @param genesis to authenticate a user and set the database (see below). 7 | #' @param language retrieve information in German "de" (default) or in English "en" if available. 8 | #' @param ... other arguments send to the httr::GET request. 9 | #' 10 | #' @details 11 | #' See the package description (\code{\link{wiesbaden}}) for details about setting the login and database. 12 | #' 13 | #' @return a \code{data.frame}. 14 | #' 15 | #' @seealso \code{\link{retrieve_datalist}} \code{\link{wiesbaden}} 16 | #' 17 | #' @examples 18 | #' 19 | #' \dontrun{ 20 | #' # Variable information 'AI2105' (Anteil der Empfänger von Arbeitslosengeld II im Alter 21 | #' # von 15 bis 24 Jahren an der Bevölkerung gleichen Alters) 22 | #' # Assumes that user/password are stored via save_credentials() 23 | #' 24 | #' metadata <- retrieve_varinfo(variablename="AI2105", genesis=c(db="regio") ) 25 | #' } 26 | #' 27 | #' 28 | #' 29 | #' 30 | #' @export 31 | retrieve_varinfo <- function( 32 | variablename, 33 | genesis=NULL, language='de', ... ) { 34 | 35 | genesis <- make_genesis(genesis) 36 | 37 | baseurl <- paste(set_db(db=genesis['db']), "ExportService_2010", sep="") 38 | 39 | param <- list( 40 | method = 'MerkmalInformation', 41 | kennung = genesis['user'], 42 | passwort = genesis['password'], 43 | name = variablename, 44 | bereich = 'Alle', 45 | sprache = language) 46 | 47 | datenaufbau <- GET(baseurl, query = param, ... ) 48 | datenaufbau <- content(datenaufbau, type='text/xml', encoding="UTF-8") 49 | entries <- xml_find_all(datenaufbau, '//MerkmalInformationReturn') 50 | 51 | if ( length(entries)==0 ) return( xml_text(datenaufbau) ) 52 | 53 | entries <- lapply(entries, function(x) xml_text(xml_find_all(x, './code|./information')) ) 54 | d <- as.data.frame(do.call(rbind, entries)) 55 | 56 | if ( ncol(d)==0 ) return("No results found.") 57 | 58 | colnames(d) <- c(variablename, "description") 59 | 60 | return(d) 61 | } 62 | 63 | 64 | -------------------------------------------------------------------------------- /man/read_gv100.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read_gv100.R 3 | \name{read_gv100} 4 | \alias{read_gv100} 5 | \title{Reads the DESTATIS GV100 Format} 6 | \usage{ 7 | read_gv100(file, stzrt, version = NULL, encoding = "iso-8859-1", ...) 8 | } 9 | \arguments{ 10 | \item{file}{path to file} 11 | 12 | \item{stzrt}{integer to select the administrative level (see details)} 13 | 14 | \item{version}{which GV100 version. If NULL the version is guessed based on the file name.} 15 | 16 | \item{encoding}{encoding of the file} 17 | 18 | \item{...}{other parameters passed to \code{read_fwf}} 19 | } 20 | \value{ 21 | a \code{data.frame}. 22 | } 23 | \description{ 24 | The GV100 format is used by DESTATIS to publish the German municipality register 25 | } 26 | \details{ 27 | The Gemeindeverzeichnis (municipality register) is published 28 | in a fixed width file refered to as "GV1000 ASCII Format" by 29 | DESTATIS. The register features the list of municipality and 30 | higher order administrative units. The function is a wrapper 31 | around \code{\link[readr:read_fwf]{read_fwf}}. 32 | 33 | There are two types of files: One feature the administrative 34 | information (\code{version="AD"}) and one with non-administrative 35 | (\code{version="NAD"}). If \code{version=NULL}, read_gv100() guess the 36 | type based on the file name. 37 | 38 | To select a particular administrative 39 | unit use the stzrt argument (Satzart). For the 40 | AD version, the following choices are possible: 41 | 42 | 10 - Länder (states) 43 | 20 - Regierungsbezirke 44 | 30 - Regionsdaten (only Baden-Württemberg) 45 | 40 - Kreise (counties) 46 | 50 - Gemeindeverbandsdaten 47 | 60 - Gemeinden (municipalities) 48 | 49 | For the NAD version only: 50 | 51 | 41 - Kreise (counties) 52 | 61 - Gemeinden (municipalities) 53 | 54 | Since about 2019, the Gemeindeverzeichnis is using UTF-8 encoding rather 55 | than ISO-8859-1. See also DESTATIS Website: \href{https://www.destatis.de/DE/Themen/Laender-Regionen/Regionales/Gemeindeverzeichnis/_inhalt.html}{GV-ISys} 56 | } 57 | \examples{ 58 | \dontrun{ 59 | 60 | d <- read_gv100("GV100NAD31122016.asc", stzrt=60) 61 | 62 | } 63 | 64 | 65 | 66 | } 67 | \seealso{ 68 | \code{\link[readr:read_fwf]{read_fwf}} 69 | } 70 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # wiesbaden 2 | 3 | # Version 1.2.10 (2024-07-22) 4 | 5 | * read_gv100() now uses stri_encode() instead of iconv(). This seems to work better with the most recent DESTATIS files. 6 | * Updated broken links in the documentation. 7 | 8 | # Version 1.2.9 (2022-01-17) 9 | 10 | * Removed `tidyverse` package suggest 11 | 12 | # Version 1.2.8 (2022-12-17) 13 | 14 | * New Parameter inhalte for retrieve_data 15 | * Revised help files to include tips on dealing with large tables 16 | 17 | # Version 1.2.6 (2022-02-14) 18 | 19 | * Fixed bug in read_gv100() that let to an error when reading UTF-8 files 20 | * Allow to save credentials for databases from Bavaria and Saxony-Anhalt 21 | 22 | # Version 1.2.5 (2022-01-03) 23 | 24 | * Add connection to databases from Bavaria and Saxony-Anhalt 25 | 26 | # Version 1.2.4 (2021-03-15) 27 | 28 | * Change to TestService_2010 29 | * More options to supply sachmerkmal 30 | * Fixed bugs 31 | 32 | # Version 1.2.3 (2020-06-17) 33 | 34 | * Fixed bug when using the regionalschluessel parameter in retrieve_data() 35 | * Revised vignette. 36 | 37 | 38 | # Version 1.2.2 (2020-02-14) 39 | 40 | * Allow to switch language between German and English. 41 | * Allow to supply sachmerkmal and sachschluessel as parameter 42 | * Revised the help files. 43 | 44 | # Version 1.2.1 (2020-01-26) 45 | 46 | * Fixed a bug that leads `keyring` to fail when trying to retrieve the credentials on a Windows machine. 47 | * Fixed some bugs in the package documentation/vignette. 48 | * Anticipating DESTATIS API changes in February, increase the default value to 25000 for the number of retrievable value labels via `retrieve_valuelabel()`. 49 | 50 | # Version 1.2.0 (2019-10-14) 51 | 52 | * Database usernames and passwords are now stored securely via keyring package instead of a file in the root directory. 53 | 54 | # Version 1.1.1 (2019-10-14) 55 | 56 | * Allow to retrieve 2500 value labels when using `retrieve_valuelabel()` (instead of only 500) 57 | 58 | # Version 1.1.0 (2019-10-13) 59 | 60 | * Added a vignette and revised documentation for some functions. 61 | * `read_header_genesis()` uses `stri_trans_general()` for non-ASCII character replacement 62 | * All `dplyr` dependency removed and reduced number of dependencies 63 | * Code for `read_gv100()` rewritten using only base functions 64 | 65 | # Version 1.0.0 66 | 67 | * First release -------------------------------------------------------------------------------- /R/retrieve_valuelabel.R: -------------------------------------------------------------------------------- 1 | #' Retrieves Value Labels from GENESIS Databases 2 | #' 3 | #' \code{retrieve_valuelabel} retrieves value labels for variable 4 | #' 5 | #' @param variablename name of the variable 6 | #' @param valuelabel "*" (default) retrieves all value labels. 7 | #' @param genesis to authenticate a user and set the database (see below). 8 | #' @param language retrieve information in German "de" (default) or in English "en" if available. 9 | #' @param ... other arguments send to the httr::GET request. 10 | #' 11 | #' @details 12 | #' See the package description (\code{\link{wiesbaden}}) for details about setting the login and database. 13 | #' 14 | #' @return a \code{data.frame}. 15 | #' 16 | #' @seealso \code{\link{retrieve_datalist}} \code{\link{wiesbaden}} 17 | #' 18 | #' @examples 19 | #' 20 | #' \dontrun{ 21 | #' # Value labels contain for the variable 'PART04' in the table with the 22 | #' # federal election results on the county level. 23 | #' # Assumes that user/password are stored via save_credentials() 24 | #' 25 | #' metadata <- retrieve_valuelabel(variablename="PART04", genesis=c(db="regio") ) 26 | #' } 27 | #' 28 | #' 29 | #' 30 | #' 31 | #' @export 32 | retrieve_valuelabel <- function( 33 | variablename, 34 | valuelabel="*", 35 | genesis=NULL, language='de', ... ) { 36 | 37 | genesis <- make_genesis(genesis) 38 | 39 | baseurl <- paste(set_db(db=genesis['db']), "RechercheService_2010", sep="") 40 | 41 | # listenLaenge: 2500 is the max for this API 42 | param <- list( 43 | method = 'MerkmalAuspraegungenKatalog', 44 | kennung = genesis['user'], 45 | passwort = genesis['password'], 46 | namen = variablename, 47 | auswahl = valuelabel, 48 | kriterium = '', 49 | bereich = 'Alle', 50 | listenLaenge = 2500, 51 | sprache = language) 52 | 53 | datenaufbau <- GET(baseurl, query = param, ... ) 54 | datenaufbau <- content(datenaufbau, type='text/xml', encoding="UTF-8") 55 | entries <- xml_find_all(datenaufbau, '//merkmalAuspraegungenKatalogEintraege') 56 | 57 | if ( length(entries)==0 ) return( xml_text(datenaufbau) ) 58 | 59 | entries <- lapply(entries, function(x) xml_text(xml_find_all(x, './code|./inhalt')) ) 60 | d <- as.data.frame(do.call(rbind, entries)) 61 | 62 | if ( ncol(d)==0 ) return("No results found.") 63 | 64 | colnames(d) <- c(variablename, "description") 65 | 66 | return(d) 67 | } 68 | 69 | -------------------------------------------------------------------------------- /R/retrieve_datalist.R: -------------------------------------------------------------------------------- 1 | #' Retrieves List of Tables from GENESIS Databases 2 | #' 3 | #' \code{retrieve_datalist} retrieves a list of available data tables in a series. 4 | #' 5 | #' @param tableseries name of series for which tables should be retrieved. 6 | #' @param genesis to authenticate a user and set the database (see below). 7 | #' @param language retrieve information in German "de" (default) or in English "en" if available. 8 | #' @param ... other arguments send to the httr::GET request. 9 | #' 10 | #' 11 | #' @details 12 | #' See the package description (\code{\link{wiesbaden}}) for details about setting the login and database. 13 | #' To retrieve a list of all available data use tableseries="*" or combine the wildcard character * with a prefix (see below for an example). 14 | #' 15 | #' @return a \code{data.frame} 16 | #' 17 | #' @seealso \code{\link{retrieve_data}} \code{\link{wiesbaden}} 18 | #' 19 | #' @examples 20 | #' 21 | #' \dontrun{ 22 | #' # Retrieves list of available tables for the table series 14111 23 | #' # which contains the federal election results. 24 | #' # Assumes that user/password are stored via save_credentials() 25 | #' 26 | #' d <- retrieve_datalist(tableseries="14111*", genesis=c(db="regio") ) 27 | #' } 28 | #' 29 | #' 30 | #' 31 | #' 32 | #' @export 33 | retrieve_datalist <- function(tableseries, 34 | genesis=NULL, language='de', ... ) { 35 | 36 | genesis <- make_genesis(genesis) 37 | 38 | baseurl <- paste(set_db(db=genesis['db']), "RechercheService_2010", sep="") 39 | 40 | param <- list( 41 | method = 'DatenKatalog', 42 | kennung = genesis['user'], 43 | passwort = genesis['password'], 44 | bereich = 'Alle', 45 | filter = tableseries, 46 | listenLaenge = '500', 47 | sprache = language) 48 | 49 | httrdata <- GET(baseurl, query = param, ... ) 50 | xmldata <- content(httrdata, type='text/xml', encoding="UTF-8") 51 | entries <- xml_find_all(xmldata, '//datenKatalogEintraege') 52 | 53 | if ( length(entries)==0 ) return( xml_text(xmldata) ) 54 | 55 | entries <- lapply(entries, function(x) rev(xml_text(xml_find_all(x, './code|./beschriftungstext'))) ) 56 | d <- as.data.frame(do.call(rbind, entries)) 57 | 58 | if ( ncol(d)==0 ) return("No results found.") 59 | 60 | # Cleanup 61 | colnames(d) <- c("tablename", "description") 62 | d$description <- unlist(lapply(str_split(d$description, pattern=",", n=2), function(x) x[2] )) 63 | d$description <- str_trim(str_replace_all(d$description, "\n", " ")) 64 | 65 | if ( nrow(d) == 500 ) warning("The selected series might contain more data. The maximum number of results was retrieved (N=500).\n") 66 | return(d) 67 | } -------------------------------------------------------------------------------- /R/helper.R: -------------------------------------------------------------------------------- 1 | make_genesis <- function(genesis){ 2 | if ( is.null(genesis['db']) ) { 3 | stop("genesis['db'] missing/unrecognized.") 4 | } 5 | if ( !(genesis['db'] %in% c("regio", "nrw", "bm", "de", "by", "st")) ){ 6 | stop("genesis['db'] missing/unrecognized.") 7 | } 8 | if ( is.na(genesis['user']) | is.na(genesis['password']) ){ 9 | if (genesis['db']=='regio'){ 10 | genesis <- key_user_pw(genesis,"regionalstatistik") 11 | } 12 | else if (genesis['db']=='nrw'){ 13 | genesis <- key_user_pw(genesis,"landesdatenbank-nrw") 14 | } 15 | else if (genesis['db']=='bm'){ 16 | genesis <- key_user_pw(genesis,"bildungsmonitoring") 17 | } 18 | else if (genesis['db']=='st'){ 19 | genesis <- key_user_pw(genesis,"landesdatenbank-st") 20 | } 21 | else if (genesis['db']=='by'){ 22 | genesis <- key_user_pw(genesis,"landesdatenbank-by") 23 | } 24 | else if (genesis['db']=='de'){ 25 | genesis <- key_user_pw(genesis,"destatis") 26 | } else { 27 | stop("genesis['user']/genesis['password'] is missing.") 28 | } 29 | } 30 | return(genesis) 31 | } 32 | 33 | key_user_pw <- function(genesis,service){ 34 | genesis["user"] <- as.character(key_list(service=service)['username']) 35 | genesis["password"] <- as.character(key_get(service=service, 36 | username=genesis["user"])) 37 | return(genesis) 38 | } 39 | 40 | # genesis_error_check <- function(xml){ 41 | # 42 | # if ( length(xml)==0 ) { 43 | # error <- xml_find_all(xml, './/faultstring/text()') 44 | # if ( length(error) !=0 ) stop(as.character(error)) 45 | # } 46 | # 47 | # if ( length(xml)==1 ){ 48 | # if ( xml_has_attr(xml, 'nil')==TRUE ) { 49 | # stop("No results found.") } 50 | # } 51 | # 52 | # } 53 | 54 | readstr_csv <- function(string,skip=0){ 55 | con <- textConnection(string) 56 | tab <- read.csv2(con, header=FALSE, stringsAsFactors=FALSE, skip=skip) 57 | return(tab) 58 | } 59 | 60 | set_db <- function(db){ 61 | if (db=="nrw") return("https://www.landesdatenbank.nrw.de/ldbnrwws/services/") 62 | if (db=="regio") return("https://www.regionalstatistik.de/genesisws/services/") 63 | if (db=="de") return("https://www-genesis.destatis.de/genesisWS/web/") 64 | if (db=="bm") return("https://www.bildungsmonitoring.de/bildungws/services/") 65 | if (db=="st") return("https://genesis.sachsen-anhalt.de/webservice/services/") 66 | if (db=="by") return("https://www.statistikdaten.bayern.de/genesisWS/services/") 67 | stop("DB: Currently not implemented.") 68 | } 69 | 70 | set_db2 <- function(db){ 71 | if (db=="de") return("https://www-genesis.destatis.de/genesis/online") 72 | if (db=="by") return("https://www.statistikdaten.bayern.de/genesis/online") 73 | if (db=="regio") return("https://www.regionalstatistik.de/genesis/online/") 74 | stop("DB: Currently not implemented.") 75 | } 76 | 77 | 78 | get_character_vec <- function(x){ 79 | x <- paste(unlist(na.omit(x), use.names=FALSE), collapse="_") 80 | x <- stri_trans_general(x, "Latin-ASCII") 81 | x <- str_replace_all(x, " *", "") 82 | x <- str_replace_all(x, "[^a-zA-Z0-9_]", "") 83 | return(x) 84 | } 85 | -------------------------------------------------------------------------------- /R/read_gv100.R: -------------------------------------------------------------------------------- 1 | #' Reads the DESTATIS GV100 Format 2 | #' 3 | #' The GV100 format is used by DESTATIS to publish the German municipality register 4 | #' 5 | #' 6 | #' @param file path to file 7 | #' @param stzrt integer to select the administrative level (see details) 8 | #' @param version which GV100 version. If NULL the version is guessed based on the file name. 9 | #' @param encoding encoding of the file 10 | #' @param ... other parameters passed to \code{read_fwf} 11 | #' 12 | #' 13 | #' @details 14 | #' The Gemeindeverzeichnis (municipality register) is published 15 | #' in a fixed width file refered to as "GV1000 ASCII Format" by 16 | #' DESTATIS. The register features the list of municipality and 17 | #' higher order administrative units. The function is a wrapper 18 | #' around \code{\link[readr:read_fwf]{read_fwf}}. 19 | #' 20 | #' There are two types of files: One feature the administrative 21 | #' information (\code{version="AD"}) and one with non-administrative 22 | #' (\code{version="NAD"}). If \code{version=NULL}, read_gv100() guess the 23 | #' type based on the file name. 24 | #' 25 | #' To select a particular administrative 26 | #' unit use the stzrt argument (Satzart). For the 27 | #' AD version, the following choices are possible: 28 | #' 29 | #' 10 - Länder (states) 30 | #' 20 - Regierungsbezirke 31 | #' 30 - Regionsdaten (only Baden-Württemberg) 32 | #' 40 - Kreise (counties) 33 | #' 50 - Gemeindeverbandsdaten 34 | #' 60 - Gemeinden (municipalities) 35 | #' 36 | #' For the NAD version only: 37 | #' 38 | #' 41 - Kreise (counties) 39 | #' 61 - Gemeinden (municipalities) 40 | #' 41 | #' Since about 2019, the Gemeindeverzeichnis is using UTF-8 encoding rather 42 | #' than ISO-8859-1. See also DESTATIS Website: \href{https://www.destatis.de/DE/Themen/Laender-Regionen/Regionales/Gemeindeverzeichnis/_inhalt.html}{GV-ISys} 43 | #' 44 | #' @return a \code{data.frame}. 45 | #' 46 | #' 47 | #' @seealso \code{\link[readr:read_fwf]{read_fwf}} 48 | #' 49 | #' 50 | #' 51 | #' @examples 52 | #' \dontrun{ 53 | #' 54 | #' d <- read_gv100("GV100NAD31122016.asc", stzrt=60) 55 | #' 56 | #' } 57 | #' 58 | #' 59 | #' 60 | #' @export 61 | read_gv100 <- function(file, stzrt, 62 | version=NULL, 63 | encoding="iso-8859-1", 64 | ...){ 65 | 66 | if ( is.null(version) ) { 67 | version <- ifelse(str_detect(file, "NAD"), "NAD", "AD") 68 | } 69 | 70 | if (version=="AD"){ 71 | 72 | spec <- gv100$ad 73 | spec_fwf <- spec$fwf[spec$fwf$satzart==stzrt,] 74 | 75 | } else { 76 | 77 | spec <- gv100$nad 78 | spec_fwf <- spec$fwf[spec$fwf$satzart==stzrt,] 79 | 80 | } 81 | 82 | if(str_to_lower(encoding)=="utf-8"){ 83 | 84 | # Workaround: https://github.com/sumtxt/wiesbaden/issues/13 85 | # "Durch die Aufname der sorbischen Schreibweise in den 86 | # amtlichen Gemeindenamen ist es notwendig geworden, die 87 | # Daten mit UTF-8 zu kodieren." Latin-2 (ISO8859-2) can 88 | # accomodate Sorbian (Latin-1 can not). 89 | x <- read_lines(file=file, 90 | locale = locale(encoding = "UTF-8"), ...) 91 | x <- stri_encode(x, from = "UTF-8", to = "ISO8859-2") 92 | 93 | d <- withCallingHandlers( 94 | read_fwf( 95 | file=I(x), 96 | col_positions=spec_fwf, 97 | col_types=spec$col, 98 | locale = locale(encoding = "iso-8859-2"), 99 | ...), 100 | warning = h) 101 | 102 | } else { 103 | 104 | d <- withCallingHandlers( 105 | read_fwf( 106 | file=file, 107 | col_positions=spec_fwf, 108 | col_types=spec$col, 109 | locale = locale(encoding = encoding), 110 | ...), 111 | warning = h) 112 | 113 | } 114 | 115 | if (stzrt %in% c(40,50,60) & version=="AD"){ 116 | d <- merge(d, spec$key, by="schluessel", all.y=FALSE, all.x=TRUE) 117 | d$schluessel <- d$typ 118 | d$typ <- NULL 119 | } 120 | 121 | d <- d[d$satzart==stzrt,] 122 | 123 | return(as.data.frame(d)) 124 | } 125 | 126 | # Suppress expected specific warning 127 | h <- function(w) if( any( grepl( "The following named parsers don't match the column names", w) ) ) invokeRestart( "muffleWarning" ) 128 | -------------------------------------------------------------------------------- /man/retrieve_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/retrieve_data.R 3 | \name{retrieve_data} 4 | \alias{retrieve_data} 5 | \title{Retrieves Data from GENESIS Databases} 6 | \usage{ 7 | retrieve_data( 8 | tablename, 9 | startyear = "", 10 | endyear = "", 11 | regionalmerkmal = "", 12 | regionalschluessel = "", 13 | sachmerkmal = "", 14 | sachschluessel = "", 15 | sachmerkmal2 = "", 16 | sachschluessel2 = "", 17 | sachmerkmal3 = "", 18 | sachschluessel3 = "", 19 | inhalte = "", 20 | genesis = NULL, 21 | language = "de", 22 | ... 23 | ) 24 | } 25 | \arguments{ 26 | \item{tablename}{name of the table to retrieve.} 27 | 28 | \item{startyear}{only retrieve values for years equal or larger to \code{startyear}. Default: "".} 29 | 30 | \item{endyear}{only retrieve values for years smaller or equal to \code{endyear}. Default: "".} 31 | 32 | \item{regionalmerkmal}{key for Regionalklassifikation. See details for more information. Default: "".} 33 | 34 | \item{regionalschluessel}{only retrieve values for particular regional units. See details for more information. Default: "".} 35 | 36 | \item{sachmerkmal, sachmerkmal2, sachmerkmal3}{key for Sachklassifikation. Default: "".} 37 | 38 | \item{sachschluessel, sachschluessel2, sachschluessel3}{value for Sachklassifikation. Default: "".} 39 | 40 | \item{inhalte}{retrieve only selected variables. Default is to retrieve all.} 41 | 42 | \item{genesis}{to authenticate a user and set the database (see below).} 43 | 44 | \item{language}{retrieve information in German "de" (default) or in English "en" if available.} 45 | 46 | \item{...}{other arguments send to the httr::GET request.} 47 | } 48 | \value{ 49 | a \code{data.frame}. Value variables (_val) come with three additional variables (_qual, _lock, _err). The exact nature 50 | of these variables is unknown, but _qual appears to indicate if _val is a valid value. If _qual=="e" the value in _val is 51 | valid while if _qual!="e" (then _qual = ("-","/", ".", "x", ... ) ) it is typically zero should/might be set to NA. 52 | } 53 | \description{ 54 | \code{retrieve_data} retrieves a single data table. 55 | } 56 | \details{ 57 | Use \code{\link{retrieve_datalist}} to find the \code{tablename} based on the table series you are interested in. See the 58 | package description (\code{\link{wiesbaden}}) for details about setting the login and database. 59 | 60 | The parameter \code{regionalschluessel} can either be a single value (a single Amtlicher Gemeindeschlüssel) or a 61 | comma-separated list of values supplied as string (no whitespaces). Wildcard character "*" is allowed. 62 | If \code{regionalschluessel} is set, the parameter \code{regionalmerkmal} must also be set to GEMEIN, KREISE, 63 | REGBEZ, or DLAND. The same logic applies to the parameter combination \code{sachmerkmal} and \code{sachschluessel*}. 64 | The parameter \code{inhalte} takes a 1-6 character long name of a variable in the table. If choosing multiple variables, 65 | delimit by ",", e.g. "STNW01,STNW02" (no whitespaces). 66 | 67 | Limiting the data request to particular years (via the \code{*year} parameters), geographical units (via the \code{regional*} parameters) 68 | attributes (via the \code{sach*} parameters) or selected variables (via the \code{inhalte} parameter) is necessary if the API request 69 | fails to return any data. If you are not able to download the table because of size, inspect the metadata first 70 | (using \link{retrieve_metadata} or \link{retrieve_valuelabel}) and then limit the data request accordingly. See also examples below. 71 | } 72 | \examples{ 73 | 74 | \dontrun{ 75 | # Retrieve values for the table 14111KJ002 which contains the 76 | # federal election results on the county level. 77 | # Assumes that user/password are stored via save_credentials() 78 | 79 | data <- retrieve_data(tablename="14111KJ002", genesis=c(db="regio") ) 80 | 81 | # ... only the values for the AfD. 82 | 83 | data <- retrieve_data(tablename="14111KJ002", sachmerkmal="PART04", 84 | sachschluessel="AFD", genesis=c(db="regio") ) 85 | 86 | # ... or only values from Saxony 87 | 88 | data <- retrieve_data(tablename="14111KJ002", regionalmerkmal="KREISE", 89 | regionalschluessel="14*", genesis=c(db="regio") ) 90 | 91 | # Limiting the number of data points is in particular important for 92 | # large tables. For example, this data request fails: 93 | 94 | data <- retrieve_data(tablename="33111GJ005", genesis=c(db='regio')) 95 | 96 | # But after limiting the request to one year, the data is returned: 97 | 98 | data <- retrieve_data(tablename="33111GJ005", genesis=c(db='regio'), startyear=2019, endyear=2019) 99 | 100 | # An alternative strategy is to only request a subset of the variables. 101 | # For example, this data request fails: 102 | 103 | data <- retrieve_data("12711GJ002", genesis=c(db="regio")) 104 | 105 | # But when requesting only one instead of all variables, the data is returned: 106 | 107 | data <- retrieve_data("12711GJ002", inhalte="BEV081", genesis=c(db="regio")) 108 | 109 | # Example using the sachschluessel: Number of refugees from Afghanistan (ST423) 110 | # and Egypt (ST287) by district (Kreis) in 2022. 111 | 112 | data <- retrieve_data("12531KJ003", startyear = 2022, endyear = 2022, 113 | sachmerkmal="STAAG5", sachschluessel = "ST423, ST287", genesis=c(db='de')) 114 | 115 | } 116 | 117 | } 118 | \seealso{ 119 | \code{\link{retrieve_datalist}} \code{\link{wiesbaden}} 120 | } 121 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # wiesbaden 2 | 3 | 4 | [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/wiesbaden)](https://cran.r-project.org/package=wiesbaden) 5 | [![Downloads](http://cranlogs.r-pkg.org/badges/wiesbaden)](https://CRAN.R-project.org/package=wiesbaden) 6 | [![Downloads](http://cranlogs.r-pkg.org/badges/grand-total/wiesbaden)](https://CRAN.R-project.org/package=wiesbaden) 7 | 8 | 9 | > [!CAUTION] 10 | The Federal Statistical Office of Germany (DESTATIS) will discontinue the old API endpoint currently used by this R package, which means the package will stop working for [genesis.destatis.de](https://www-genesis.destatis.de/genesis/online) in mid-2025. The old API endpoints for the other databases—[regionalstatistik.de](https://www.regionalstatistik.de/genesis/online), [bildungsmonitoring.de](https://www.bildungsmonitoring.de/bildung/online/logon), and [landesdatenbank.nrw.de](https://www.landesdatenbank.nrw.de)—will also be shut down in the near future. **There are no plans to update this package to support the new API endpoints**, so users are encouraged to switch to the R package [`restatis`](https://github.com/CorrelAid/restatis) which provides similar functionality compared to `wiesbaden` and uses the modern API endpoints. 11 | 12 | Since 2016 the R package `wiesbaden` provides functions to directly retrieve data from databases maintained by the Federal Statistical Office of Germany (DESTATIS) in Wiesbaden. The package uses the SOAP XML web service from DESTATIS [(PDF Documentation)](https://www-genesis.destatis.de/genesis/online?Menu=Webservice). 13 | 14 | Access to the following databases is implemented: 15 | 16 | * [regionalstatistik.de](https://www.regionalstatistik.de/genesis/online) 17 | * [genesis.destatis.de](https://www-genesis.destatis.de/genesis/online) 18 | * [bildungsmonitoring.de](https://www.bildungsmonitoring.de/bildung/online/logon) 19 | * [landesdatenbank.nrw.de](https://www.landesdatenbank.nrw.de) 20 | * ~~statistikdaten.bayern.de~~ (data retrieval API disabled) 21 | * ~~genesis.sachsen-anhalt.de~~ (data retrieval API disabled) 22 | * ~~statistik.sachsen.de~~ (API disabled) 23 | * ~~ergebnisse.zensus2022.de~~ (not implemented) 24 | 25 | Note, to access any of the databases using this package, you need to register on the respective website to get a personal login name and password. The registration is free. 26 | 27 | ### Installation 28 | 29 | You can install the package directly from CRAN: 30 | 31 | ```R 32 | install.packages("wiesbaden") 33 | ``` 34 | 35 | Or install the latest version from Github using: 36 | 37 | ```R 38 | remotes::install_github("sumtxt/wiesbaden", force=TRUE) 39 | ``` 40 | 41 | 42 | ### Usage 43 | 44 | The package helps with retrieving the data cubes which are used to construct the data tables available as `csv` files via the web application of each database. The data cubes are long format data tables that are much easier to process as compared to the `csv` files. For details on how to use the package: [Getting Started 45 | with wiesbaden](https://sumtxt.github.io/wiesbaden/articles/wiesbaden.html). 46 | 47 | The package also helps with importing the [German municipality register files](https://www.destatis.de/DE/ZahlenFakten/LaenderRegionen/Regionales/Gemeindeverzeichnis/Gemeindeverzeichnis.html) via the function `read_gv100()`. For more information see the help file of this function. 48 | 49 | Users that wish to work with the `csv` files might find the `download_csv()` and `read_header_genesis()` in this package helpful. The former can be used to automate downloads and the latter facilitates importing downloaded files. Users might also wish to check the R package `destatiscleanr` [github.com/cutterkom/destatiscleanr](https://github.com/cutterkom/destatiscleanr). 50 | 51 | 52 | ### FAQ 53 | 54 | * Does this package work with a proxy? _Yes. Set the proxy globally before calling any package command, e.g.:_ 55 | 56 | ```R 57 | httr::set_config(httr::use_proxy( 58 | "your.proxy", port = 1234, auth = "basic")) 59 | 60 | data <- retrieve_data(tablename="14111KJ002", 61 | genesis=c(db="regio")) 62 | ``` 63 | 64 | 65 | ### Similar and Complementary Packages 66 | 67 | * The R package `destatiscleanr` [github.com/cutterkom/destatiscleanr](https://github.com/cutterkom/destatiscleanr) provides functions to help importing `csv` files downloaded via the web application. 68 | 69 | * The R package `restatis` [github.com/CorrelAid/restatis](https://github.com/CorrelAid/restatis) provides similar functions to access [genesis.destatis.de](https://www-genesis.destatis.de/genesis/online), [regionalstatistik.de](https://www.regionalstatistik.de/genesis/online) and the [Zensus 2022](https://ergebnisse.zensus2022.de/datenbank/online/). 70 | 71 | * The R package `bonn` [github.com/sumtxt/bonn](https://github.com/sumtxt/bonn) provides functions to retrieve data from the [INKAR](https://www.inkar.de/) database maintained by the Federal Office for Building and Regional Planning (BBSR) in Bonn. 72 | 73 | * The R package `AGS` [github.com/sumtxt/ags](https://github.com/sumtxt/ags) provides functions to work with the [Amtlicher Gemeindeschlüssel (AGS)](https://de.wikipedia.org/wiki/Amtlicher_Gemeindeschl%C3%BCssel), e.g. construct time series of statistics for Germany's municipalities and districts. 74 | 75 | * The Python packages [github.com/WZBSocialScienceCenter/gemeindeverzeichnis](https://github.com/WZBSocialScienceCenter/gemeindeverzeichnis) and [rohablog.wordpress.com/2011/11/22/gv100-parser-python/](https://rohablog.wordpress.com/2011/11/22/gv100-parser-python/) provide functions to read the GV100 format. 76 | 77 | * The node.js package [https://github.com/yetzt/node-gv100json](https://github.com/yetzt/node-gv100json) provides functions to read the GV100 format. 78 | 79 | * The Python package [github.com/pudo/regenesis](https://github.com/pudo/regenesis) provides a function to bulk download data from [regionalstatistik.de](https://www.regionalstatistik.de/genesis/online). 80 | 81 | 82 | -------------------------------------------------------------------------------- /vignettes/wiesbaden.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Getting Data from DESTATIS via R" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Getting Data from DESTATIS via R} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>" 14 | ) 15 | ``` 16 | 17 | The R package `wiesbaden` provides functions to directly retrieve data from databases maintained by the Federal Statistical Office of Germany (DESTATIS) in Wiesbaden. 18 | 19 | Access to the following databases is implemented: 20 | 21 | * [regionalstatistik.de](https://www.regionalstatistik.de/genesis/online) 22 | * [genesis.destatis.de](https://www-genesis.destatis.de/genesis/online) 23 | * [landesdatenbank.nrw.de](https://www.landesdatenbank.nrw.de) 24 | * [bildungsmonitoring.de](https://www.bildungsmonitoring.de/bildung/online/logon) 25 | 26 | To access any of the databases using this package, you need to register on the respective website to get a personal login name and password. The registration is free. 27 | 28 | To authenticate, supply a vector with your user name, password, and database shortcut ("regio", "de", "nrw", "bm") as an argument for the `genesis` parameter whenever you call a `retrieve_*` function: 29 | 30 | c(user="your-username", password="your-password", db="database-shortname") 31 | 32 | Alternatively, you can use `save_credentials()` to store the credentials on your computer. This function relies on the [keyring package](https://github.com/r-lib/keyring). For more details about how credentials are stored by this package, see the keyring package documentation. 33 | 34 | Use the function `test_login()` to check if your login/password combination allows you to access the respective database (and if the server is functioning properly). 35 | 36 | 37 | ```{r,eval = FALSE} 38 | library(wiesbaden) 39 | 40 | # Assuming credentials are stored via save_credentials() 41 | test_login(genesis=c(db='regio')) 42 | #> [1] "Sie wurden erfolgreich an- und abgemeldet." 43 | 44 | # ... or supply password/username 45 | test_login(genesis=c(db='regio', user="your-username", password="your-password")) 46 | #> [1] "Sie wurden erfolgreich an- und abgemeldet." 47 | ``` 48 | 49 | The available data are organized by themes ("Themen") and subthemes. To get a list of all available themes go to the respective database website (links above) and click on "Themen". Each theme typically comes with multiple subthemes. 50 | 51 | Suppose we want to download the federal election results on the county level from [regionalstatistik.de](https://www.regionalstatistik.de/genesis/online). This data is available in the theme "Wahlen" which has the code `14`. The federal election results are available in subtheme `141`. 52 | 53 | Using `retrieve_datalist()`, download a `data.frame` of all available data cubes in theme `141`: 54 | 55 | ```{r,eval = FALSE} 56 | d <- retrieve_datalist(tableseries="141*", genesis=c(db='regio')) 57 | ``` 58 | 59 | Note, we are assuming that credentials are stored via `save_credentials()`. 60 | 61 | Use `grepl` (or `str_detect()` from the `stringr` package) to filter cubes with a description that contains the word "Kreise" (county): 62 | 63 | ```{r,eval = FALSE} 64 | subset(d, grepl("Kreise", description)) 65 | #> tablename 66 | #> 1 14111KJ001 67 | #> 2 14111KJ002 68 | #> description 69 | #> 1 Wahlberechtigte, Wahlbeteiligung, Gültige Zweitstimmen, Kreise und kreisfreie Städte, Stichtag 70 | #> 2 Gültige Zweitstimmen, Kreise und kreisfreie Städte, Parteien, Stichtag 71 | ``` 72 | 73 | Having identified the correct data cube, call `retrieve_data()` to download the data: 74 | 75 | ```{r,eval = FALSE} 76 | data <- retrieve_data(tablename="14111KJ002", genesis=c(db='regio')) 77 | ``` 78 | 79 | ```{r,eval = FALSE} 80 | head(data) 81 | #> id14111 KREISE PART04 STAG WAHL09_val WAHL09_qual WAHL09_lock 82 | #> 1 D 01001 AFD 22.09.2013 1855 e NA 83 | #> 2 D 01001 AFD 24.09.2017 3702 e NA 84 | #> 3 D 01001 B90-GRUENE 16.10.1994 4651 e NA 85 | #> 4 D 01001 B90-GRUENE 27.09.1998 3815 e NA 86 | #> 5 D 01001 B90-GRUENE 22.09.2002 5556 e NA 87 | #> 6 D 01001 B90-GRUENE 18.09.2005 5028 e NA 88 | #> WAHL09_err 89 | #> 1 0 90 | #> 2 0 91 | #> 3 0 92 | #> 4 0 93 | #> 5 0 94 | #> 6 0 95 | ``` 96 | 97 | The data are organized in long format: For each combination of `KREIS` (county), `PART04` (political party) and `STAG` (election date) there is a vote count (`WAHL09_value`). Please see help file for the information on the additional variables (\*\_qual, \*\_lock, \*\_err). 98 | 99 | To get the metadata for each variable, call `retrieve_metadata()`: 100 | 101 | ```{r,eval = FALSE} 102 | retrieve_metadata(tablename="14111KJ002", genesis=c(db='regio')) 103 | #> name description unit 104 | #> 1 WAHL09 Gültige Zweitstimmen Anzahl 105 | #> 2 STAG Stichtag 106 | #> 3 PART04 Parteien 107 | #> 4 KREISE Kreise und kreisfreie Städte 108 | ``` 109 | To get the value labels for the variable `PART04`, call `retrieve_valuelabel()`: 110 | 111 | ```{r,eval = FALSE} 112 | retrieve_valuelabel("PART04", genesis=c(db='regio')) 113 | #> PART04 description 114 | #> 1 AFD AfD 115 | #> 2 B90-GRUENE GRÜNE 116 | #> 3 CDU CDU/CSU 117 | #> 4 DIELINKE DIE LINKE 118 | #> 5 FDP FDP 119 | #> 6 SONSTIGE Sonstige Parteien 120 | #> 7 SPD SPD 121 | ``` 122 | 123 | This function also works with the other variables (e.g., `KREIS`). 124 | 125 | -------------------------------------------------------------------------------- /R/retrieve_data.R: -------------------------------------------------------------------------------- 1 | #' Retrieves Data from GENESIS Databases 2 | #' 3 | #' \code{retrieve_data} retrieves a single data table. 4 | #' 5 | #' 6 | #' @param tablename name of the table to retrieve. 7 | #' @param startyear only retrieve values for years equal or larger to \code{startyear}. Default: "". 8 | #' @param endyear only retrieve values for years smaller or equal to \code{endyear}. Default: "". 9 | #' @param regionalschluessel only retrieve values for particular regional units. See details for more information. Default: "". 10 | #' @param regionalmerkmal key for Regionalklassifikation. See details for more information. Default: "". 11 | #' @param sachmerkmal,sachmerkmal2,sachmerkmal3 key for Sachklassifikation. Default: "". 12 | #' @param sachschluessel,sachschluessel2,sachschluessel3 value for Sachklassifikation. Default: "". 13 | #' @param inhalte retrieve only selected variables. Default is to retrieve all. 14 | #' @param genesis to authenticate a user and set the database (see below). 15 | #' @param language retrieve information in German "de" (default) or in English "en" if available. 16 | #' @param ... other arguments send to the httr::GET request. 17 | #' 18 | #' 19 | #' 20 | #' @details 21 | #' Use \code{\link{retrieve_datalist}} to find the \code{tablename} based on the table series you are interested in. See the 22 | #' package description (\code{\link{wiesbaden}}) for details about setting the login and database. 23 | #' 24 | #' The parameter \code{regionalschluessel} can either be a single value (a single Amtlicher Gemeindeschlüssel) or a 25 | #' comma-separated list of values supplied as string (no whitespaces). Wildcard character "*" is allowed. 26 | #' If \code{regionalschluessel} is set, the parameter \code{regionalmerkmal} must also be set to GEMEIN, KREISE, 27 | #' REGBEZ, or DLAND. The same logic applies to the parameter combination \code{sachmerkmal} and \code{sachschluessel*}. 28 | #' The parameter \code{inhalte} takes a 1-6 character long name of a variable in the table. If choosing multiple variables, 29 | #' delimit by ",", e.g. "STNW01,STNW02" (no whitespaces). 30 | #' 31 | #' Limiting the data request to particular years (via the \code{*year} parameters), geographical units (via the \code{regional*} parameters) 32 | #' attributes (via the \code{sach*} parameters) or selected variables (via the \code{inhalte} parameter) is necessary if the API request 33 | #' fails to return any data. If you are not able to download the table because of size, inspect the metadata first 34 | #' (using \link{retrieve_metadata} or \link{retrieve_valuelabel}) and then limit the data request accordingly. See also examples below. 35 | #' 36 | #' @return a \code{data.frame}. Value variables (_val) come with three additional variables (_qual, _lock, _err). The exact nature 37 | #' of these variables is unknown, but _qual appears to indicate if _val is a valid value. If _qual=="e" the value in _val is 38 | #' valid while if _qual!="e" (then _qual = ("-","/", ".", "x", ... ) ) it is typically zero should/might be set to NA. 39 | #' 40 | #' 41 | #' 42 | #' 43 | #' @seealso \code{\link{retrieve_datalist}} \code{\link{wiesbaden}} 44 | #' 45 | #' @examples 46 | #' 47 | #' \dontrun{ 48 | #' # Retrieve values for the table 14111KJ002 which contains the 49 | #' # federal election results on the county level. 50 | #' # Assumes that user/password are stored via save_credentials() 51 | #' 52 | #' data <- retrieve_data(tablename="14111KJ002", genesis=c(db="regio") ) 53 | #' 54 | #' # ... only the values for the AfD. 55 | #' 56 | #' data <- retrieve_data(tablename="14111KJ002", sachmerkmal="PART04", 57 | #' sachschluessel="AFD", genesis=c(db="regio") ) 58 | # 59 | #' 60 | #' # ... or only values from Saxony 61 | #' 62 | #' data <- retrieve_data(tablename="14111KJ002", regionalmerkmal="KREISE", 63 | #' regionalschluessel="14*", genesis=c(db="regio") ) 64 | #' 65 | #' # Limiting the number of data points is in particular important for 66 | #' # large tables. For example, this data request fails: 67 | #' 68 | #' data <- retrieve_data(tablename="33111GJ005", genesis=c(db='regio')) 69 | #' 70 | #' # But after limiting the request to one year, the data is returned: 71 | #' 72 | #' data <- retrieve_data(tablename="33111GJ005", genesis=c(db='regio'), startyear=2019, endyear=2019) 73 | #' 74 | #' # An alternative strategy is to only request a subset of the variables. 75 | #' # For example, this data request fails: 76 | #' 77 | #' data <- retrieve_data("12711GJ002", genesis=c(db="regio")) 78 | #' 79 | #' # But when requesting only one instead of all variables, the data is returned: 80 | #' 81 | #' data <- retrieve_data("12711GJ002", inhalte="BEV081", genesis=c(db="regio")) 82 | #' 83 | #' # Example using the sachschluessel: Number of refugees from Afghanistan (ST423) 84 | #' # and Egypt (ST287) by district (Kreis) in 2022. 85 | #' 86 | #' data <- retrieve_data("12531KJ003", startyear = 2022, endyear = 2022, 87 | #' sachmerkmal="STAAG5", sachschluessel = "ST423, ST287", genesis=c(db='de')) 88 | #' 89 | #' } 90 | #' 91 | #' @export 92 | retrieve_data <- function( 93 | tablename, 94 | startyear = "", 95 | endyear = "", 96 | regionalmerkmal = "", 97 | regionalschluessel = "", 98 | sachmerkmal = "", 99 | sachschluessel = "", 100 | sachmerkmal2 = "", 101 | sachschluessel2 = "", 102 | sachmerkmal3 = "", 103 | sachschluessel3 = "", 104 | inhalte = "", 105 | genesis=NULL, language='de', ... ) { 106 | 107 | genesis <- make_genesis(genesis) 108 | 109 | baseurl <- paste(set_db(db=genesis['db']), "ExportService_2010", sep="") 110 | 111 | param <- list( 112 | method = 'DatenExport', 113 | kennung = genesis['user'], 114 | passwort = genesis['password'], 115 | namen = tablename, 116 | bereich = 'Alle', 117 | format = 'csv', 118 | werte = 'true', 119 | metadaten = 'false', 120 | zusatz = 'false', 121 | startjahr = as.character(startyear), 122 | endjahr = as.character(endyear), 123 | zeitscheiben = '', 124 | inhalte = inhalte, 125 | regionalmerkmal = regionalmerkmal, 126 | regionalschluessel = regionalschluessel, 127 | sachmerkmal = sachmerkmal, 128 | sachschluessel = sachschluessel, 129 | sachmerkmal2 = sachmerkmal2, 130 | sachschluessel2 = sachschluessel2, 131 | sachmerkmal3 = sachmerkmal3, 132 | sachschluessel3 = sachschluessel3, 133 | stand = '', 134 | sprache = language) 135 | 136 | httrdata <- GET(baseurl, query = param, progress(), ... ) 137 | xmldata <- content(httrdata, type='text/xml', options="HUGE", encoding="UTF-8") 138 | entries <- xml_find_all(xmldata, './/quaderDaten') 139 | 140 | if ( length(entries)==0 ) return( xml_text(xmldata) ) 141 | 142 | sstr <- str_split(xml_text(entries), '\nK') 143 | 144 | if ( sstr[[1]][1] == "" ) return("No results found.") 145 | 146 | tabs <- lapply(sstr[[1]], readstr_csv) 147 | 148 | # Construct header 149 | 150 | DQERH <- paste("id", tabs[[3]]$V2[2], sep="") 151 | DQA <- tabs[[4]]$V2[2:nrow(tabs[[4]])] 152 | DQZ <- tabs[[5]]$V2[2:nrow(tabs[[5]])] 153 | DQI <- tabs[[6]]$V2[2:nrow(tabs[[6]])] 154 | 155 | DQIexpd <- c("val", "qual", "lock", "err") 156 | 157 | DQIcom <- unlist(lapply(DQI, function(x) paste(x, DQIexpd,sep="_"))) 158 | 159 | header <- c(DQERH, DQA, DQZ, DQIcom) 160 | 161 | if ( is.na(sstr[[1]][7]) ) stop("The API has returned a response without data. 162 | This might indicate that you requested too much data. Consider only 163 | requesting a subset of the data. See package documentation for guidance.") 164 | 165 | data <- read_delim(sstr[[1]][7], skip = 1, col_names = header, delim = ';') 166 | 167 | return(as.data.frame(data)) 168 | } 169 | --------------------------------------------------------------------------------