├── .gitignore
├── LICENSE
├── README.md
├── R_scripts
    ├── 0_own_finctions
    │   ├── ik_an_extract.lm.R
    │   ├── ik_an_multiple.y.lm.list.R
    │   ├── ik_dm_fill.missings.R
    │   ├── ik_gg_gghole.R
    │   ├── ik_gg_population.pyramid.compare.R
    │   ├── ik_map_eu.base.R
    │   ├── ik_phd_an_theil.decomposition.R
    │   ├── ik_phd_estimate.conv.models.dec.R
    │   ├── ik_phd_gg_align.6.plots.R
    │   └── ik_ut_columns.classes.R
    ├── 1_preparation
    │   ├── 1.01_install_required_packages.R
    │   ├── 1.02_load_own_functions.R
    │   └── 1.03_prepare_ALL_supplementary.R
    ├── 2_data_manipulation
    │   ├── 2.01_download_geodata.R
    │   ├── 2.02_download&prepare_OBS_data.R
    │   ├── 2.03_download&prepare_PROJ_data.R
    │   ├── 2.04_missing_download&unzip.R
    │   ├── 2.05_missing_DE.R
    │   ├── 2.06_missing_DK.R
    │   ├── 2.07_missing_SI.R
    │   ├── 2.08_missing_RO_smooth.R
    │   ├── 2.09_missing_INSERT.R
    │   ├── 2.10_TSR_decomposition_OBS.R
    │   ├── 2.11_TSR_decomposition_PROJ.R
    │   ├── 2.12_TSR_decomposition_n2dec0342.R
    │   └── 2.13_TSR_2043.R
    ├── 3_analysis
    │   ├── 3.01_fig1_maps_TSR_growth_4decades.R
    │   ├── 3.02_fig2_TSR_subregions_0342.R
    │   ├── 3.03_fig3+A1+A2+A3+A4_maps_decomposition.R
    │   ├── 3.04_fig4_decomposed_descriptive.R
    │   ├── 3.05_fig5_model_estimates_0342.R
    │   ├── 3.06_tabA1_summary_stat_by_country.R
    │   ├── 3.07_figA5_pyramid_London.R
    │   └── 3.08_figA6_pyramid_Eastern_Europe.R
    └── master_script.R
├── _output
    └── placeholder.txt
├── data0_supplementary
    ├── EU_nuts
    │   ├── EU28.CSV.GZ
    │   ├── idn0.csv.gz
    │   ├── idn1.csv.gz
    │   ├── idn2.csv.gz
    │   └── idn3.csv.gz
    └── Roboto_Condensed
    │   ├── LICENSE.txt
    │   ├── RobotoCondensed-Bold.ttf
    │   ├── RobotoCondensed-BoldItalic.ttf
    │   ├── RobotoCondensed-Italic.ttf
    │   ├── RobotoCondensed-Light.ttf
    │   ├── RobotoCondensed-LightItalic.ttf
    │   └── RobotoCondensed-Regular.ttf
├── data1_raw
    ├── Eurostat
    │   ├── observed
    │   │   └── placeholder.txt
    │   ├── placeholder.txt
    │   └── projected
    │   │   └── placeholder.txt
    └── Missing_data
    │   └── placeholder.txt
├── data2_prepared
    └── placeholder.txt
├── data3_calculated
    └── placeholder.txt
├── genus-2017.Rproj
└── geo_data
    └── placeholder.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | 
 5 | # Session Data files
 6 | .RData
 7 | 
 8 | # Example code in package build process
 9 | *-Ex.R
10 | 
11 | # Output files from R CMD build
12 | /*.tar.gz
13 | 
14 | # Output files from R CMD check
15 | /*.Rcheck/
16 | 
17 | # RStudio files
18 | .Rproj.user/
19 | 
20 | # produced vignettes
21 | vignettes/*.html
22 | vignettes/*.pdf
23 | 
24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
25 | .httr-oauth
26 | 
27 | # knitr and R markdown default cache directories
28 | /*_cache/
29 | /cache/
30 | 
31 | # Temporary files created by R markdown
32 | *.utf8.md
33 | *.knit.md
34 | .Rproj.user
35 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Ilya Kashnitsky
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![doi](https://img.shields.io/badge/DOI-10.1186%2Fs41118--017--0018--2-lightgrey.svg?style=for-the-badge)][doi]
 2 | [![osf](https://img.shields.io/badge/OSF-d4hjx-lightgrey.svg?style=for-the-badge)][osf]
 3 | 
 4 | # Reproducibility materials for the paper
 5 | >Kashnitsky, I., Beer, J. de, & Wissen, L. van. (2017). Decomposition of regional convergence in population aging across Europe. Genus, 73(1), 2. [https://doi.org/10.1186/s41118-017-0018-2][doi]
 6 | 
 7 | ## REQUIREMENTS AND PRELIMINARY NOTES 
 8 | The analysis and the necessary data preparation were conducted using [R, a language and environment for statistical computing][r], version 3.2.4. 
 9 | A valid internet connection is necessary. To ensure full reproducibility of the analysis, i.e. proper execution of each R script, all the required R packages will be downloaded in the versions of 2016-03-14. For this reason we use R package for reproducible research called "checkpoint". Also, to replicate the results of the analysis, the requited data have to be downloaded; thus, you will need a valid internet connection. About 35MB of data will be downloaded.
10 | It is recommended (but not obligatory) to use [RStudio (IDE for R)][rs] (the used version is 1.0.136), and open the project file (genus-2017.Rproj). Then, there is no need to set working directory, as the R project recognizes relative paths. If, for some reason, you prefer not to use RStudio, working directory should be set to the top-level parental folder (genus-2017). 
11 | Each time a fresh R session is started, the preparation steps should be taken: 
12 | 1) working directory set to the top-level parental folder (genus-2017) [not necessary if you use RStudio and open the project file (genus-2017.Rproj)]; 
13 | 2) required packages loaded (script "R_scripts/1_preparation/1.01_install_required_packages.R"); 
14 | 3) and self-written functions loaded ("R_scripts/1_preparation/1.02_load_own_functions.R").
15 | 
16 | ## REPLICATION. HOW TO
17 | 1. Fork this repository or [unzip the archive][arch].
18 | 2. Open "genus-2017.Rproj" file in the main project directory.
19 | 3. Run the "R_scripts/master_script.R" file. 
20 | Wait. That's it.
21 | The results are stored in the directory "_output".
22 | 
23 | ## LOGIC OF THE PROCESS
24 | The whole process is split into three parts, which is reflected in the structure of R scripts. First, the steps required for reproducibility are taken. Second, all data manipulations and calculations are performed. Finally, at the third step, the analysis is done, and the outputs are stored in "_output" directory. 
25 | The names of the scripts are quite indicative, and each script is reasonably commented. 
26 | 
27 | ## CONTENTS OF THE REPRODUCIBILITY PACKAGE
28 | Directories and sub-directories in alphabetical order.  
29 | = "_output" the results are stored here  
30 | = "data0_supplementary"  
31 | === "EU_nuts" NUTS-2 classification for EU-28, version 2010  
32 | = "data1_raw" raw data downloaded here  
33 | === "Eurostat" official data from Eurostat  
34 | ===== "observed" data for the observed period, 2003-2012  
35 | ===== "projected" projected data, EUROPOP2013 regional, 2013-2042  
36 | === "Missing_data" data from national statistical offices and HMD needed to fill the missings and harmonize the data (sub-directories will appear after the execution of script "2.04_missing_download&unzip.R")  
37 | ===== "raw_DE" data for Germany  
38 | ===== "raw_DK" data for Denmark  
39 | ===== "raw_SI" data for Slovenia  
40 | = "data2_prepared" prepared for the analysis raw data  
41 | = "data3_calculated" ready for the analysis and visualization data are stored here  
42 | = "geo_data" spatial objects prepared for R, needed to map the results  
43 | = "R_packages" a directory where the package "checkpoint" and its dependences will be installed  
44 | === ".checkpoint" directory for all other required packages, handled by "checkpoint"  package in a reproducible way.  
45 | = "R_scripts"  
46 | === "0_own_finctions"  
47 | === "1_preparation"  
48 | === "2_data_manipulation"  
49 | === "3_analysis"  
50 | === "master_script.R" - !!! this is the main script to be run !!!  
51 | = "genus-2017.Rproj" RStudio project file  
52 | 
53 | ## SEE ALSO
54 |  - **My PhD project -- Regional demographic convergence in Europe**: [at OSF][osf], [at NIDI][nidi]
55 |  - [NIDI working paper, the draft of the next PhD paper][econ]
56 |  - [Blog post on the reconstruction of the data for Dannish regions][dan]
57 |  - [Blog post on the composite maps (Figure 3 in the paper)][align]
58 |  - [Another blog post on composite maps in R][stamp]
59 | 
60 | 
61 | [doi]: https://doi.org/10.1186/s41118-017-0018-2
62 | [r]: https://cran.r-project.org/
63 | [rs]: https://www.rstudio.com/products/rstudio/download/
64 | [arch]: https://ikashnitsky.github.io/doc/misc/genus-2017.zip
65 | [osf]: https://osf.io/d4hjx/
66 | [nidi]: http://nidi.nl/en/research/al/270rdc
67 | [econ]: http://www.nidi.nl/shared/content/output/papers/nidi-wp-2017-02.pdf
68 | [stamp]: https://ikashnitsky.github.io/2017/subplots-in-maps/
69 | [align]: https://ikashnitsky.github.io/2017/align-six-maps/
70 | [dan]: https://ikashnitsky.github.io/2017/denmark-nuts-reconstruction/
71 | 


--------------------------------------------------------------------------------
/R_scripts/0_own_finctions/ik_an_extract.lm.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                                                                                                        
 3 | # Genus 2017. 2017-05-26
 4 | # Function. Extract estimates of simple lm models
 5 | # NOTE: the function is designed for just a simple linear regression with just 
 6 | # 1 regressor
 7 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
 8 | #                                                                                                    
 9 | ################################################################################
10 | 
11 | 
12 | ik_an_extract.lm <- function(list, round=T){
13 |         
14 |         extract <- data.frame(matrix(0,length(list),7))
15 |         names(extract) <- c('model_y.x','coef','conf2.5','conf97.5','pValue','Rsq','cor')
16 |         
17 |         for (i in 1:length(list)){
18 |              
19 |                 modeli <- list[[i]]
20 |                 
21 |                 namei <- names(list)[i]
22 |                 coef <- coef(modeli)[[2]]
23 |                 ci1 <- confint(modeli)[2,1]
24 |                 ci2 <- confint(modeli)[2,2]
25 |                 p.value <- summary(modeli)$coefficients[2,4]
26 |                 Rsq <- summary(modeli)$r.squared
27 |                 cor <- cor(modeli$model)[2,1]
28 |                 
29 |                 extract[i,1] <- namei
30 |                 extract[i,2:7] <- c(coef,ci1,ci2,p.value,Rsq,cor)
31 |                    
32 |         }
33 |         
34 |         if (round==T) {extract[,2:7] <- apply(extract[,2:7],2,round,3)} else {}
35 |         
36 |         return(extract)
37 | }


--------------------------------------------------------------------------------
/R_scripts/0_own_finctions/ik_an_multiple.y.lm.list.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                                                                                                        
 3 | # Genus 2017. 2017-05-26
 4 | # Function. Run several simple lm models with various regressands (yi) and the 
 5 | # same regressor (x)
 6 | # NOTE: the function is designed for just a simple linear regression with just 
 7 | # 1 regressor; input is a data.farme; x and y are defined as numbers
 8 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
 9 | #
10 | ################################################################################
11 | 
12 | ik_an_multiple.y.lm.list <- function(data, y, x, scaled=F, log.x=F){
13 |         
14 |         require(dplyr)
15 | 
16 |         models <- list()
17 |         for (i in 1:length(y)){
18 |                 
19 |                 var.x <- unlist(select(data,x))
20 |                 var.y <- unlist(select(data,y[i]))
21 | 
22 |                 if (scaled==F & log.x==F){modeli <- lm(var.y~var.x)} 
23 |                 else if (scaled==F & log.x==T){modeli <- lm(var.y~log(var.x))}
24 |                 else if (scaled==T & log.x==F) {modeli <- lm(scale(var.y)~scale(var.x))}
25 |                 else {modeli <- lm(scale(var.y)~scale(log(var.x)))}
26 | 
27 |                 models[[i]] <- modeli
28 |                 names(models)[i] <- paste(names(data)[y[i]],names(data)[x],sep='.')
29 |         }
30 |         return(models)
31 | }
32 | 


--------------------------------------------------------------------------------
/R_scripts/0_own_finctions/ik_dm_fill.missings.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                                                                                                        
 3 | # Genus 2017. 2017-05-26
 4 | # Function. Replace slots in x by corresponding values from y
 5 | # NOTE: input requites two data frames (x and y) with at least 1 common id field
 6 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
 7 | #
 8 | ################################################################################
 9 | 
10 | ik_dm_fill.missings <- function(x,y,by){
11 |         x <- merge(x,y,by=by,all.x=T,all.y=F)
12 |         x$value.y[ is.na(x$value.y) ] <- x$value.x[ is.na(x$value.y) ]
13 |         x$value <- x$value.y
14 |         x <- subset(x, select=-c(value.x,value.y))
15 |         return(x)
16 | }
17 | 


--------------------------------------------------------------------------------
/R_scripts/0_own_finctions/ik_gg_gghole.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                                                                                                        
 3 | # Genus 2017. 2017-05-26
 4 | # Function. Fix the problem of mapping ppolygons with holes using ggplot2
 5 | # NOTE: inpot is a fortified spatialPolygons object
 6 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
 7 | #
 8 | ################################################################################
 9 | 
10 | # NOTE: The solution comes from an SO question
11 | # http://stackoverflow.com/questions/21748852/choropleth-map-in-ggplot-with-polygons-that-have-holes/32186989
12 | 
13 | # How to use the function?
14 | # The output is a list with 2 elements.
15 | # The layer plygons and holes should be plotted twice using the data from boths 
16 | # objects of the gghole output. For example:
17 | 
18 | # gg_map <- ggplot() +
19 | #         geom_polygon(data = gghole(FORT)[[1]], 
20 | #                      aes_string(x='long', y='lat', group='group',fill='VAR'),
21 | #                      color='grey30',size=.1)+
22 | #         geom_polygon(data = gghole(FORT)[[2]], 
23 | #                      aes_string(x='long', y='lat', group='group',fill='VAR'),
24 | #                      color='grey30',size=.1)
25 | 
26 | # Where: FORT is a fortified spatialPolygons, VAR is some variable to define fill
27 | # colors
28 | 
29 | gghole <- function(fort){
30 |         poly <- fort[fort$id %in% fort[fort$hole,]$id,]
31 |         hole <- fort[!fort$id %in% fort[fort$hole,]$id,]
32 |         out <- list(poly,hole)
33 |         names(out) <- c('poly','hole')
34 |         return(out)
35 | }


--------------------------------------------------------------------------------
/R_scripts/0_own_finctions/ik_gg_population.pyramid.compare.R:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | #                                                                                                        
  3 | # Genus 2017. 2017-05-26
  4 | # Function. Compare population pyramids at two points in time
  5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
  6 | #
  7 | ################################################################################
  8 | 
  9 | 
 10 | ik_gg_population.pyramid.compare <- function(df, t1, t2,
 11 |                                              year='year', age='age', sex='sex', value='value',
 12 |                                              level.male='m',level.female='f',
 13 |                                              base_family = "",base_size=15) {
 14 |         
 15 |         require(ggplot2)
 16 |         require(ggthemes)
 17 |         require(dplyr)
 18 |         
 19 |         df <- df[,match(c(year,age,sex,value),colnames(df))]
 20 |         colnames(df) <- c('year','age','sex','value')
 21 |         
 22 |         t1s <- paste0('y',t1)
 23 |         t2s <- paste0('y',t2)
 24 |         
 25 |         df <- filter(df, year%in%c(t1s,t2s),sex%in%c(level.female,level.male))
 26 |         
 27 |         # characteristics that will differntiate between 1-year and 5-year pyramids
 28 |         n.age.groups <- length(unique(paste(df$age)))
 29 |         age.group.size <- if(n.age.groups>30){1}else{5}
 30 |         
 31 |         
 32 |         max.f.t1 <- filter(df,sex==level.female,year==t1s) %>% mutate(value=value/sum(value)*100) %>% with(max(value))
 33 |         max.m.t1 <- filter(df,sex==level.male,year==t1s) %>% mutate(value=value/sum(value)*100) %>% with(max(value))
 34 |         max.f.t2 <- filter(df,sex==level.female,year==t2s) %>% mutate(value=value/sum(value)*100) %>% with(max(value))
 35 |         max.m.t2 <- filter(df,sex==level.male,year==t2s) %>% mutate(value=value/sum(value)*100) %>% with(max(value))
 36 |         max.y <- max(max.f.t1,max.m.t1,max.f.t2,max.m.t2)
 37 |         max <- ceiling(max.y*10)/10
 38 |         step <- if(n.age.groups>30){.5}else{2}
 39 |         
 40 |         labels.y <- seq(0,max,step)
 41 |         labels.y <- c(-rev(labels.y),labels.y)
 42 |         labels.y <- labels.y[!is.null(labels.y)]
 43 |         
 44 |         breaks <- unique(paste(df$age))
 45 |         #breaks <- c(breaks,s)
 46 |         breaks[-(seq(0,(length(breaks)-1),5)+1)] <- NA
 47 |         
 48 |         labels <- paste(0:(length(breaks)-1))
 49 |         labels[-(seq(0,(length(breaks)-1),5)+1)] <- NA
 50 |         
 51 |         
 52 |         
 53 |         gg <- ggplot(data=df,aes(x=age)) +
 54 |                 geom_path(data=filter(df,sex==level.female,year==t1s),aes(y=value/sum(value)*100,group='identity',
 55 |                                                                           color='black'),alpha=.5) +
 56 |                 geom_path(data=filter(df,sex==level.male,year==t1s),aes(y=value*(-1)/sum(value)*100,group='identity',
 57 |                                                                         color='yellow'),alpha=.5) +
 58 |                 geom_path(data=filter(df,sex==level.female,year==t2s),aes(y=value/sum(value)*100,group='identity',
 59 |                                                                           color='black'),linetype=2) +
 60 |                 geom_path(data=filter(df,sex==level.male,year==t2s),aes(y=value*(-1)/sum(value)*100,group='identity',
 61 |                                                                         color='yellow'),linetype=2) +
 62 |                 geom_vline(xintercept=c(16,66),alpha=.3)+
 63 |                 geom_hline(yintercept=0)+
 64 |                 #add the boundaries lines
 65 |                 geom_hline(yintercept = c(-max,max),color='white')+
 66 |                 coord_flip(xlim = c(1,n.age.groups+1/step*10))+
 67 |                 scale_color_manual('Sex',labels=c('Female','Male','Female','Male'),
 68 |                                    values=c('magenta','deepskyblue','magenta','deepskyblue'))+
 69 |                 #scale_fill_manual('Sex',labels=c('Female','Male'),values=c('magenta','deepskyblue'))+
 70 |                 scale_x_discrete(breaks=breaks,labels=labels)+
 71 |                 scale_y_continuous(breaks=labels.y,
 72 |                                    labels=labels.y)+
 73 |                 ylab('Percent of total population')+
 74 |                 xlab('Age')+
 75 |                 theme_few(base_size=base_size, base_family = base_family)+
 76 |                 theme(aspect.ratio=1,
 77 |                       legend.position="none")
 78 |         
 79 |         
 80 |         
 81 |         #annotate to create legend
 82 |         
 83 |         gg <- gg + annotate('text', x=n.age.groups+15*(1/age.group.size), y=-max,
 84 |                             label=c('Males'),hjust=0,size=5, family = base_family)+
 85 |                 annotate('text', x=n.age.groups+15*(1/age.group.size), y=max-step*1.5,
 86 |                          label=c('Females'),hjust=0,size=5, family = base_family)+
 87 |                 
 88 |                 annotate('segment', x=n.age.groups+10*(1/age.group.size),
 89 |                          xend=n.age.groups+10*(1/age.group.size),
 90 |                          y=-max,
 91 |                          yend=-max+step,
 92 |                          color='deepskyblue')+
 93 |                 annotate('segment', x=n.age.groups+5*(1/age.group.size),
 94 |                          xend=n.age.groups+5*(1/age.group.size),
 95 |                          y=-max,
 96 |                          yend=-max+step,
 97 |                          color='deepskyblue',linetype=2)+
 98 |                 
 99 |                 annotate('segment', x=n.age.groups+10*(1/age.group.size),
100 |                          xend=n.age.groups+10*(1/age.group.size),
101 |                          y=max-step*1.5,
102 |                          yend=max-step*.5,
103 |                          color='magenta')+
104 |                 annotate('segment', x=n.age.groups+5*(1/age.group.size),
105 |                          xend=n.age.groups+5*(1/age.group.size),
106 |                          y=max-step*1.5,
107 |                          yend=max-step*.5,
108 |                          color='magenta',linetype=2)+
109 |                 
110 |                 annotate('text', x=n.age.groups+10*(1/age.group.size), y=-max+step,
111 |                          label=paste(t1),hjust=-.2,size=4, family = base_family)+
112 |                 annotate('text', x=n.age.groups+5*(1/age.group.size), y=-max+step,
113 |                          label=paste(t2),hjust=-.2,size=4, family = base_family)+
114 |                 
115 |                 annotate('text', x=n.age.groups+10*(1/age.group.size), y=max-step*.5,
116 |                          label=paste(t1),hjust=-.2,size=4, family = base_family)+
117 |                 annotate('text', x=n.age.groups+5*(1/age.group.size), y=max-step*.5,
118 |                          label=paste(t2),hjust=-.2,size=4, family = base_family)
119 |         
120 |         
121 |         return(gg)
122 | }
123 | 


--------------------------------------------------------------------------------
/R_scripts/0_own_finctions/ik_map_eu.base.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                                                                                                        
 3 | # Genus 2017. 2017-05-26
 4 | # Function. Create a canvas map of Europe
 5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
 6 | #                                                                                                    
 7 | ################################################################################
 8 | 
 9 | # NOTE: the function requires certain spatial objects to be loaded in Global 
10 | # Environment. They can be downloaded here:
11 | # https://dx.doi.org/10.6084/m9.figshare.3100657
12 | 
13 | ik_map_eu.base <- function(background_color='grey90', family = ""){
14 |         
15 |         require(ggplot2)
16 |         require(ggthemes)
17 |         require(sp)
18 |         require(rgdal)
19 |         require(maptools)
20 |         
21 |         
22 |         if (exists('Sneighbors')==F){
23 |                 load('/Jottacloud/PhD/R/DATA/zzz.geodata/shp.neighbors.RData')
24 |         } else {}
25 |         
26 |         map <- ggplot()+
27 |                 geom_polygon(data=fortify(Sneighbors),aes(x=long, y=lat, group=group),fill=background_color,color=background_color)+
28 |                 coord_equal(ylim=c(1350000,5450000), xlim=c(2500000, 6600000))+
29 |                 guides(fill = guide_colorbar(barwidth = 1.5, barheight = 20))+
30 |                 
31 |                 theme_map(base_family = family)+
32 |                 theme(panel.border=element_rect(color = 'black',size=.5,fill = NA),
33 |                       legend.position = c(1, 1),
34 |                       legend.justification = c(1, 1),
35 |                       legend.background = element_rect(colour = NA, fill = NA),
36 |                       legend.title = element_text(size=15),
37 |                       legend.text = element_text(size=15))+
38 |                 scale_x_continuous(expand=c(0,0)) +
39 |                 scale_y_continuous(expand=c(0,0)) +
40 |                 labs(x = NULL, y = NULL)
41 |         
42 |         return(map)
43 | }
44 | 


--------------------------------------------------------------------------------
/R_scripts/0_own_finctions/ik_phd_an_theil.decomposition.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                                                                                                        
 3 | # Genus 2017. 2017-05-26
 4 | # Decompose THEIL inequality index for between and within groups components
 5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
 6 | #                                                                                                    
 7 | ################################################################################
 8 | 
 9 | # Data organization!
10 | # Input should be any data.frame with at least 3 columns representing
11 | # (1) unique records (default is 'id')
12 | # (2) groups (default is 'group')
13 | # (3) values to be analyzed for inequality (default is 'value')
14 | # Data frame may be prepared according to default organization OR
15 | # the correct colums could be defined
16 | 
17 | # Output is a list of 2 data farmes 
18 | # 1. theil - global, between, within 
19 | # 2. theil.each.group.impact
20 | 
21 | ik_phd_an_theil.decomposition <- function(df, id='id', group='group', value='value'){
22 |         
23 |         require(dplyr)
24 |         require(tidyr)
25 |         
26 |         df <- df[,match(c(id,group,value),colnames(df))]
27 |         colnames(df) <- c('id','group','value')
28 |         
29 |         df <- df %>%
30 |                 mutate(sum_value = sum(value,na.rm=T),
31 |                        ni = length(value),
32 |                        si = value/sum_value,
33 |                        theil.glob =  sum(si*log(ni*si),na.rm = T))
34 | 
35 |         
36 |         theil.glob <- df %>%
37 |                 summarise_each(funs(mean)) %>%
38 |                 select(theil.glob)
39 |         
40 |         theil.each.group <- df %>%
41 |                 group_by(group) %>%
42 |                 mutate(sij = value/sum(value,na.rm = T),
43 |                        s_gj = sum(value/sum_value,na.rm = T),
44 |                        nij=length(value),
45 |                        theil.between = s_gj*log(ni/nij*s_gj),
46 |                        theil.within = s_gj*sum(sij*log(nij*sij),na.rm =T)) %>%
47 |                 summarise_each(funs(mean)) %>%
48 |                 select(group,theil.between,theil.within) 
49 |         
50 |         theil.between.within <- theil.each.group %>%
51 |                 select(-group) %>% 
52 |                 summarise_each(funs(sum)) 
53 |         
54 |         theil <- cbind(theil.glob, theil.between.within) %>%
55 |                 gather('theil','value',1:3)
56 |         
57 |         theil.each.group.impact <- theil.each.group %>%
58 |                 gather('variable','value',2:3)
59 |         
60 |         
61 |         out <- list()
62 |         out[[1]] <- theil
63 |         names(out)[1] <- 'theil'
64 |         out[[2]] <- theil.each.group.impact
65 |         names(out)[2] <- 'theil.each.group.impact'
66 |         
67 |         return(out)
68 | }
69 | 
70 | 


--------------------------------------------------------------------------------
/R_scripts/0_own_finctions/ik_phd_estimate.conv.models.dec.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                                                                                                        
 3 | # Genus 2017. 2017-05-26
 4 | # Function. Estimate convergence models from decomposed data
 5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
 6 | #
 7 | # NOTE: this funcrion depends on other self-defined functions:
 8 | # ik_an_multiple.y.lm.list() AND ik_an_extract.lm()
 9 | #                                                                                                    
10 | ################################################################################
11 | 
12 | 
13 | ik_phd_estimate.conv.models.dec <- function(df,years){
14 |         
15 |         require(dplyr)
16 |         require(tidyr)
17 |         require(data.table)
18 |         
19 |         jan1.years <- paste0('y',years)
20 |         fullyears <- jan1.years[-length(jan1.years)]
21 |         
22 |         est.l <- list()
23 |         
24 |         for (i in 1:length(fullyears)) {
25 |                 di <- filter(df, year==fullyears[i])
26 |                 modelsi <- ik_an_multiple.y.lm.list(di,y=3:8,x=10)
27 |                 tablei <- ik_an_extract.lm(modelsi,round = F) %>%
28 |                         mutate(year = fullyears[i]) %>%
29 |                         gather('variable','value',2:7)
30 |                 tablei$model_y.x <- factor(c("(A). Level 1. Overall model","(B). Level 2. Non-working age","(C). Level 2. Working age",
31 |                                       "(F). Level 3. Mortality","(E). Level 3. Migration","(D). Level 3. Cohort turnover"))
32 | 
33 |                 est.l[[i]] <- tablei
34 |                 names(est.l)[i] <- fullyears[i]
35 |         }
36 |         
37 |         est <- rbind_all(est.l) %>%
38 |                 spread(variable,value) %>%
39 |                 mutate(year=factor(year)) %>%
40 |                 group_by(model_y.x) %>%
41 |                 mutate(group.mean = mean(coef)) %>%
42 |                 ungroup()
43 |         
44 |         return(est)
45 |         
46 | }
47 | 


--------------------------------------------------------------------------------
/R_scripts/0_own_finctions/ik_phd_gg_align.6.plots.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                                                                                                        
 3 | # Genus 2017. 2017-05-26
 4 | # Function. Align decomposed plots neatly on A4 canvas
 5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
 6 | # 
 7 | # See also: https://ikashnitsky.github.io/2017/align-six-maps/
 8 | #
 9 | ################################################################################
10 | 
11 | 
12 | ik_phd_gg_align.6.plots <- function(list.plots, family = "",
13 |                                     labels=LETTERS[1:6], labels.size=8){
14 |         
15 |         require(tidyverse)
16 |         require(gridExtra)
17 |         
18 |         gg <- ggplot()+
19 |                 coord_equal(xlim = c(0, 21), ylim = c(0, 30), expand = c(0,0))+
20 |                 
21 |                 annotation_custom(ggplotGrob(list.plots[[1]]),
22 |                                   xmin = 0.5, xmax = 8.5, ymin = 21, ymax = 29)+
23 |                 
24 |                 annotation_custom(ggplotGrob(list.plots[[2]]),
25 |                                   xmin = 12.5, xmax = 20.5, ymin = 19.5, ymax = 27.5)+
26 |                 annotation_custom(ggplotGrob(list.plots[[3]]),
27 |                                   xmin = 12.5,xmax = 20.5,ymin = 10.5,ymax = 18.5)+
28 |                 
29 |                 annotation_custom(ggplotGrob(list.plots[[4]]),
30 |                                   xmin = 0.5, xmax = 8.5, ymin = 9,ymax = 17)+
31 |                 annotation_custom(ggplotGrob(list.plots[[5]]),
32 |                                   xmin = 0.5, xmax = 8.5, ymin = 0, ymax = 8)+
33 |                 annotation_custom(ggplotGrob(list.plots[[6]]),
34 |                                   xmin = 12.5,xmax = 20.5, ymin = 0, ymax = 8)+
35 |                 
36 |                 labs(x = NULL, y = NULL)+
37 |                 theme_void()
38 |         
39 |         
40 |         # DF with the coordinates of the 5 arrows
41 |         df.arrows <- data.frame(id=1:5,
42 |                                 x=c(8.5,8.5,12.5,12.5,12.5),
43 |                                 y=c(21,21,10.5,10.5,10.5),
44 |                                 xend=c(12.5,12.5,8.5,8.5,12.5),
45 |                                 yend=c(20.5,17.5,10,7,7))
46 |         
47 |         # add arrows
48 |         gg <- gg +
49 |                 geom_curve(data = df.arrows %>% filter(id==1),
50 |                            aes(x=x,y=y,xend=xend,yend=yend),
51 |                            curvature = 0.1,
52 |                            arrow = arrow(type="closed",length = unit(0.25,"cm"))) +
53 |                 geom_curve(data = df.arrows %>% filter(id==2),
54 |                            aes(x=x,y=y,xend=xend,yend=yend),
55 |                            curvature = -0.1,
56 |                            arrow = arrow(type="closed",length = unit(0.25,"cm"))) +
57 |                 geom_curve(data = df.arrows %>% filter(id==3),
58 |                            aes(x=x,y=y,xend=xend,yend=yend),
59 |                            curvature = -0.15,
60 |                            arrow = arrow(type="closed",length = unit(0.25,"cm"))) +
61 |                 geom_curve(data = df.arrows %>% filter(id==4),
62 |                            aes(x=x,y=y,xend=xend,yend=yend),
63 |                            curvature = 0,
64 |                            arrow = arrow(type="closed",length = unit(0.25,"cm"))) +
65 |                 geom_curve(data = df.arrows %>% filter(id==5),
66 |                            aes(x=x,y=y,xend=xend,yend=yend),
67 |                            curvature = 0.3,
68 |                            arrow = arrow(type="closed",length = unit(0.25,"cm")))
69 |         
70 |         # add labes
71 |         gg <- gg + annotate('text',label = labels,
72 |                             x=c(.5,12.5,12.5,.5,.5,12.5)+.5,
73 |                             y=c(29,27.5,18.5,17,8,8)+.1,
74 |                             size=labels.size,hjust=0, vjust=0, family = family)
75 |         
76 |         return(gg)
77 | }
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/R_scripts/0_own_finctions/ik_ut_columns.classes.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                                                                                                        
 3 | # Genus 2017. 2017-05-26
 4 | # Function. Fast reclass of the columns
 5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
 6 | #
 7 | ################################################################################
 8 | 
 9 | ik_ut_colclass_factorize <- function(df, cols = 1:ncol(df)) {
10 |         for(i in cols){
11 |                 df[,i] <- factor(paste(df[,i]))
12 |         }
13 |         return(df)
14 | }
15 | 
16 | ik_ut_colclass_numeralize <- function(df, cols = 1:ncol(df)) {
17 |         for(i in cols){
18 |                 df[,i] <- as.numeric(paste(df[,i]))
19 |         }
20 |         return(df)
21 | }
22 | 
23 | ik_ut_colclass_characterize <- function(df, cols = 1:ncol(df)) {
24 |         for(i in cols){
25 |                 df[,i] <- paste(df[,i])
26 |         }
27 |         return(df)
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/R_scripts/1_preparation/1.01_install_required_packages.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                                                                                                        
 3 | # Genus 2017. 2017-05-26
 4 | # Install required packages
 5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
 6 | #                                                                                                    
 7 | ################################################################################
 8 | 
 9 | 
10 | # This script will make sure that there will be no error in the replication of 
11 | # the analysis due to R packages absence or version missmatch.
12 | # For this reason we use the special package for reproducible research called 
13 | # "checkpoint".
14 | # This package installes all the packages used in a project precisely as they 
15 | # were at a specific date, the date of abalysis. 
16 | # More info: https://github.com/RevolutionAnalytics/checkpoint/wiki
17 | 
18 | 
19 | # install "checkpoint" package with all its dependencies 
20 | # install only if it was not previously installed
21 | # solution by Sacha Epskamp from: http://stackoverflow.com/a/9341833/4638884
22 | if (!require('checkpoint',character.only = TRUE))
23 | {
24 |         install.packages('checkpoint',dep=TRUE)
25 |         if(!require('checkpoint',character.only = TRUE)) stop("Package not found")
26 | }
27 | 
28 | # load "checkpoint" package
29 | library(checkpoint)
30 | 
31 | # create the system directory for `checkpont` to avoid being asked about that
32 | ifelse(dir.exists('~/.checkpoint'),yes = print('directory already exists'),no = dir.create('~/.checkpoint'))
33 | 
34 | # Set the checkpoint date for reproducibility: 2017-02-09
35 | # Now quite a long process starts: "checkpoint" scans through all the scripts in 
36 | # the project and finds all the packages that should be installed to run the scripts.
37 | # The packages will be installed precisely as the were on 2016-03-14.
38 | checkpoint('2017-02-09')
39 | 
40 | 
41 | 
42 | # Let's thank the authors of these packages we use!
43 | 
44 | # 'dplyr',             Hadley Wickham and Romain Francois (2015). dplyr: A Grammar of Data Manipulation. R package version 0.4.3.
45 | # 'tidyr',             Hadley Wickham (2016). tidyr: Easily Tidy Data with `spread()` and `gather()` Functions. R package version 0.4.1.
46 | # 'readr',             Hadley Wickham and Romain Francois (2015). readr: Read Tabular Data. R package version 0.2.2.
47 | # 'data.table',        M Dowle, A Srinivasan, T Short, S Lianoglou with contributions from R Saporta and E Antonyan (2015). data.table: Extension of Data.frame. R package version 1.9.6. 
48 | # 'ggplot2',           H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York, 2009.
49 | # 'ggthemes',          Jeffrey B. Arnold (2016). ggthemes: Extra Themes, Scales and Geoms for 'ggplot2'. R package version 3.0.2.
50 | # 'viridis',           Simon Garnier (2016). viridis: Default Color Maps from 'matplotlib'. R package version 0.3.4.
51 | # 'cowplot',           Claus O. Wilke (2015). cowplot: Streamlined Plot Theme and Plot Annotations for 'ggplot2'. R package version 0.6.1.
52 | # 'gridExtra',         Baptiste Auguie (2015). gridExtra: Miscellaneous Functions for "Grid" Graphics. R package version 2.1.0.
53 | # 'sp',                Pebesma, E.J., R.S. Bivand, 2005. Classes and methods for spatial data in R. R News 5 (2)
54 | # 'maptools',          Roger Bivand and Nicholas Lewin-Koh (2016). maptools: Tools for Reading and Handling Spatial Objects. R package version 0.8-39.
55 | # 'rgdal',             Roger Bivand, Tim Keitt and Barry Rowlingson (2015). rgdal: Bindings for the Geospatial Data Abstraction Library. R package version 1.1-3
56 | # 'rgeos'              Roger Bivand and Colin Rundel (2015). rgeos: Interface to Geometry Engine - Open Source (GEOS). R package version 0.3-15.
57 | 
58 | # load required packages
59 | library(dplyr)
60 | library(tidyr)
61 | library(readr)
62 | library(data.table)
63 | library(ggplot2)
64 | library(ggthemes)
65 | library(viridis)
66 | library(cowplot)
67 | library(gridExtra)
68 | library(sp)
69 | library(maptools)
70 | library(rgdal)
71 | library(rgeos)
72 | library(extrafont)
73 | library(RColorBrewer)
74 | 
75 | 
76 | 
77 | # make sure everythins goes ok
78 | print(sessionInfo())
79 | 
80 | ###
81 | # Report finish of the script execution
82 | print('Done: script 1.01')
83 | 


--------------------------------------------------------------------------------
/R_scripts/1_preparation/1.02_load_own_functions.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                                                                                                        
 3 | # Genus 2017. 2017-05-26
 4 | # Load and save own functions
 5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
 6 | #                                                                                                    
 7 | ################################################################################
 8 | 
 9 | # Erase all objects in memory
10 | rm(list = ls(all = TRUE))
11 | 
12 | # Logic behind functions names.
13 | # 'ik' - my initials
14 | # Second part defines the function's specification
15 | # 'an' - analysis
16 | # 'dm' - data manipulations
17 | # 'gg' - graphics (named after ggplot2 package)
18 | # 'map' - mapping
19 | # 'phd' - PhD project specific functions
20 | # 'ut' - utilites
21 | 
22 | # Load all self-written functions
23 | 
24 | source('R_scripts/0_own_finctions/ik_an_extract.lm.R')
25 | source('R_scripts/0_own_finctions/ik_an_multiple.y.lm.list.R')
26 | source('R_scripts/0_own_finctions/ik_dm_fill.missings.R')
27 | source('R_scripts/0_own_finctions/ik_gg_gghole.R')
28 | source('R_scripts/0_own_finctions/ik_gg_population.pyramid.compare.R')
29 | source('R_scripts/0_own_finctions/ik_map_eu.base.R')
30 | source('R_scripts/0_own_finctions/ik_phd_an_theil.decomposition.R')
31 | source('R_scripts/0_own_finctions/ik_phd_estimate.conv.models.dec.R')
32 | source('R_scripts/0_own_finctions/ik_phd_gg_align.6.plots.R')
33 | source('R_scripts/0_own_finctions/ik_ut_columns.classes.R')
34 | 
35 | save.image('data0_supplementary/own_functions.RData')
36 | 
37 | 
38 | 
39 | ###
40 | # Report finish of the script execution
41 | print('Done: script 1.02')


--------------------------------------------------------------------------------
/R_scripts/1_preparation/1.03_prepare_ALL_supplementary.R:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | #                                                                                                        
  3 | # Genus 2017. 2017-05-26
  4 | # Prepare supplementary data (mainly, classifications)
  5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
  6 | #                                                                                                    
  7 | ################################################################################
  8 | 
  9 | # Erase all objects in memory
 10 | rm(list = ls(all = TRUE))
 11 | 
 12 | # load own functions
 13 | load('data0_supplementary/own_functions.RData')
 14 | 
 15 | 
 16 | 
 17 | ################################################################################
 18 | # Lists of NUTS regions
 19 | 
 20 | idn0 <- read.csv('data0_supplementary/EU_nuts/idn0.csv.gz',colClasses='character')
 21 | idn0 <- unique(idn0$idn0)
 22 | 
 23 | idn1 <- read.csv('data0_supplementary/EU_nuts/idn1.csv.gz',colClasses='character')
 24 | idn1 <- unique(idn1$idn1)
 25 | 
 26 | idn2 <- read.csv('data0_supplementary/EU_nuts/idn2.csv.gz',colClasses='character')
 27 | idn2 <- unique(idn2$idn2)
 28 | 
 29 | idn3 <- read.csv('data0_supplementary/EU_nuts/idn3.csv.gz',colClasses='character')
 30 | idn3 <- unique(idn3$idn3)
 31 | 
 32 | idn0eu28 <- read.csv('data0_supplementary/EU_nuts/EU28.CSV.GZ',header=F,colClasses='character')
 33 | idn0eu28 <- idn0eu28$V1
 34 | 
 35 | idn1eu28 <- idn1[substr(idn1,1,2)%in%idn0eu28]
 36 | idn2eu28 <- idn2[substr(idn2,1,2)%in%idn0eu28]
 37 | idn3eu28 <- idn3[substr(idn3,1,2)%in%idn0eu28]
 38 | 
 39 | save(list = c('idn0','idn1','idn2','idn3','idn0eu28','idn1eu28','idn2eu28','idn3eu28'),
 40 |      file = 'data0_supplementary/idn0123.RData')
 41 | 
 42 | 
 43 | 
 44 | ################################################################################
 45 | # EuroVoc definition of European regions
 46 | # http://eurovoc.europa.eu/drupal/?q=request&mturi=http://eurovoc.europa.eu/100277&language=en&view=mt&ifacelang=en
 47 | 
 48 | EU28 <- idn0eu28
 49 | EUN <- c('IE','DK','SE','FI','EE','LT','LV')
 50 | EUW <- c('UK','FR','DE','BE','NL','LU','AT')
 51 | EUS <- c('PT','ES','IT','EL','SI','HR','CY','MT')
 52 | EUE <- c('CZ','PL','SK','HU','BG','RO')
 53 | 
 54 | EU28.df <- data.frame(country=EU28,subregion=NA)
 55 | 
 56 | EU28.df$subregion[EU28.df$country%in%EUE] <- 'E'
 57 | EU28.df$subregion[EU28.df$country%in%EUN] <- 'N'
 58 | EU28.df$subregion[EU28.df$country%in%EUS] <- 'S'
 59 | EU28.df$subregion[EU28.df$country%in%EUW] <- 'W'
 60 | 
 61 | EU28.df$subregion <- factor(EU28.df$subregion)
 62 | 
 63 | save(EU28,EUN,EUW,EUS,EUE,EU28.df, file = 'data0_supplementary/subregions.EuroVoc.countries.RData')
 64 | 
 65 | 
 66 | # classification of nuts2 regions
 67 | df <- data.frame(id=idn2eu28) %>%
 68 |         mutate(country=substr(id,1,2),
 69 |                subregion='NA')
 70 | 
 71 | df[which(substr(df$id,1,2)%in%EUN),'subregion'] <- 'N'
 72 | df[which(substr(df$id,1,2)%in%EUW),'subregion'] <- 'W'
 73 | df[which(substr(df$id,1,2)%in%EUS),'subregion'] <- 'S'
 74 | df[which(substr(df$id,1,2)%in%EUE),'subregion'] <- 'E'
 75 | 
 76 | idn2sub <- ik_ut_colclass_factorize(df)
 77 | 
 78 | save(file = 'data0_supplementary/idn2sub.Rdata',idn2sub)
 79 | 
 80 | 
 81 | 
 82 | ################################################################################
 83 | # Remote NUTS-2 regions to be removed
 84 | 
 85 | # first step. Remove remote areas
 86 | # Remove non-European territories of France, Spain and Portugal
 87 | remove.n1 <- c('ES7','FR9','PT2','PT3')
 88 | remove.n2 <- c(paste0('ES',c(63,64,70)),paste('FR',91:94,sep=''),'PT20','PT30')
 89 | remove.remote <- c(remove.n1,remove.n2)
 90 | 
 91 | save(file = 'data0_supplementary/remove.remote.Rdata',remove.remote)
 92 | 
 93 | 
 94 | # load prefered font for graphics - "Roboto Condensed"
 95 | extrafont::ttf_import('data0_supplementary/Roboto_Condensed')
 96 | 
 97 | 
 98 | 
 99 | ###
100 | # Report finish of the script execution
101 | print('Done: script 1.03')


--------------------------------------------------------------------------------
/R_scripts/2_data_manipulation/2.01_download_geodata.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                                                                                                        
 3 | # Genus 2017. 2017-05-26
 4 | # Prepare. Download geodata
 5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
 6 | #                                                                                                    
 7 | ################################################################################
 8 | 
 9 | # Erase all objects in memory
10 | rm(list = ls(all = TRUE))
11 | 
12 | # download cached geodata [2.21 MB]
13 | url <- 'https://ndownloader.figshare.com/files/4817635'
14 | path <- 'geo_data/shp.n2eu28.RData.zip'
15 | 
16 | ifelse(file.exists(path), yes = 'file alredy exists', no = download.file(url, path,mode="wb"))
17 | # If there are problems downloading the data automatically, please download them manually from
18 | # https://dx.doi.org/10.6084/m9.figshare.3100657.v2
19 | 
20 | unzip(zipfile = path,exdir = 'geo_data')
21 | 
22 | ###
23 | # Report finish of the script execution
24 | print('Done: script 2.01')
25 | 


--------------------------------------------------------------------------------
/R_scripts/2_data_manipulation/2.02_download&prepare_OBS_data.R:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | #                                                                                                        
  3 | # Genus 2017. 2017-05-26
  4 | # Prepare data for inserting missings 
  5 | # NUTS-2, 1-year age structure, 100 years, OBSERVED period
  6 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
  7 | #                                                                                                    
  8 | ################################################################################
  9 | 
 10 | # Erase all objects in memory
 11 | rm(list = ls(all = TRUE))
 12 | 
 13 | # load own functions
 14 | load('data0_supplementary/own_functions.RData')
 15 | 
 16 | load('data0_supplementary/idn0123.RData')
 17 | 
 18 | 
 19 | ################################################################################
 20 | # Population structure
 21 | 
 22 | # download cached Eurostat data [7.35 MB]
 23 | url1 <- 'https://ndownloader.figshare.com/files/4795327'
 24 | path1 <- 'data1_raw/Eurostat/observed/demo_r_d2jan.tsv.gz'
 25 | 
 26 | ifelse(file.exists(path1), yes = 'file alredy exists', no = download.file(url1, path1,mode="wb"))
 27 | # If there are problems downloading the data automatically, please download them manually from
 28 | # https://dx.doi.org/10.6084/m9.figshare.3084394.v1
 29 | 
 30 | # read the raw data in
 31 | n2p1.raw <- read.table(path1, sep = c('\t'), header = T,as.is = T) 
 32 | 
 33 | 
 34 | #####
 35 | # CORRECTIONS
 36 | 
 37 | #correct the first column
 38 | n2p1 <- separate(n2p1.raw, sex.age.geo.time, c('sex','age','id'), sep = ',') 
 39 | 
 40 | n2p1 <- ik_ut_colclass_factorize(n2p1,1:3)
 41 | 
 42 | 
 43 | # reshape to long data format
 44 | n2p1 <- n2p1 %>% gather('year','value',4:28)
 45 | 
 46 | 
 47 | #!!! remove EU flags from the value column 
 48 | 
 49 | # b 	break in time series 	c 	confidential 	d 	definition differs, see metadata
 50 | # e 	estimated 	f 	forecast 	i 	see metadata (phased out)
 51 | # n 	not significant 	p 	provisional 	r 	revised
 52 | # s 	Eurostat estimate (phased out) 	u 	low reliability 	z 	not applicable 
 53 | 
 54 | # in our data set only these flags are present: b, c, e, p
 55 | 
 56 | n2p1$value <- gsub('b','',n2p1$value)
 57 | n2p1$value <- gsub('c','',n2p1$value)
 58 | n2p1$value <- gsub('e','',n2p1$value)
 59 | n2p1$value <- gsub('p','',n2p1$value)
 60 | 
 61 | # change ':' symbol for NA
 62 | n2p1$value[n2p1$value==':'] <- NA
 63 | n2p1$value[n2p1$value==': '] <- NA
 64 | 
 65 | # transform the value column to numeric format
 66 | n2p1$value <- as.numeric(n2p1$value)
 67 | 
 68 | # correct years
 69 | n2p1$year <- gsub('X','y',n2p1$year)
 70 | n2p1$year <- factor(n2p1$year)
 71 | 
 72 | ### FILTER ONLY needed (but keep sex for now)
 73 | # filter NUTS-2, 
 74 | n2p1 <- filter(n2p1, id%in%idn2eu28, year%in%paste0('y',2003:2013)) %>% 
 75 |         droplevels()
 76 | 
 77 | # Correct the levels of factor variables: age and sex
 78 | # correct ages, levels
 79 | for (i in 1:nlevels(n2p1$age)){
 80 |         agei <- gsub('Y','',levels(n2p1$age)[i])
 81 |         subi <- if(nchar(agei)==1){
 82 |                 paste0('a00',agei)
 83 |         }else if(nchar(agei)==2){
 84 |                 paste0('a0',agei)
 85 |         }else if(nchar(agei)==3){
 86 |                 paste0('a',agei) # we will summarize everything greater than 100 years
 87 |         }else{agei}
 88 |         levels(n2p1$age)[i] <- subi
 89 |         #print(paste(i,'out of',nlevels(n2p1$age)))
 90 | }
 91 | 
 92 | n2p1$age <- paste(n2p1$age)
 93 | n2p1$sex <- paste(n2p1$sex)
 94 | 
 95 | n2p1$age[n2p1$age=='TOTAL'] <- 'total'
 96 | n2p1$age[n2p1$age=='_LT1'] <- 'a000'
 97 | n2p1$age[n2p1$age%in%c('_OPEN','_GE100','aUNK',paste0('a',100:109))] <- 'open'
 98 | 
 99 | n2p1$sex[n2p1$sex=='T'] <- 'b'
100 | n2p1$sex[n2p1$sex=='F'] <- 'f'
101 | n2p1$sex[n2p1$sex=='M'] <- 'm'
102 | 
103 | n2p1$age <- factor(paste(n2p1$age))
104 | n2p1$sex <- factor(paste(n2p1$sex))
105 | 
106 | # change NAs for zeros
107 | n2p1[is.na(n2p1)] <- 0
108 | 
109 | 
110 | # summarize open age group
111 | n2p1 <- n2p1 %>% group_by(year,id,sex,age) %>%
112 |         summarise(value=sum(value)) %>%
113 |         ungroup() %>% droplevels()
114 | 
115 | # remove now unneccessary extra digit in age levels
116 | levels(n2p1$age) <- gsub('a0','a',levels(n2p1$age))
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | ###
124 | # Remove remote regions
125 | load('data0_supplementary/remove.remote.Rdata')
126 | 
127 | n2p1 <- n2p1 %>% filter(!id%in%remove.remote) %>%
128 |         droplevels()
129 | 
130 | # 
131 | 
132 | #####
133 | # save the data before inserting the missing data
134 | save(n2p1, file = 'data1_raw/n2p1.missings.RData')
135 | ################################################################################
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | ################################################################################
144 | # Deaths
145 | 
146 | # download cached Eurostat data [3.59 MB]
147 | url2 <- 'https://ndownloader.figshare.com/files/4795345'
148 | path2 <- 'data1_raw/Eurostat/observed/demo_r_magec.tsv.gz'
149 | 
150 | ifelse(file.exists(path2), yes = 'file alredy exists', no = download.file(url2, path2,mode="wb"))
151 | # If there are problems downloading the data automatically, please download them manually from
152 | # https://dx.doi.org/10.6084/m9.figshare.3084418
153 | 
154 | 
155 | # read the raw data in
156 | n2d1.raw <- read.table(path2, sep = c('\t'), header = T,as.is = T)
157 | 
158 | #####
159 | # CORRECTIONS
160 | 
161 | #correct the first column
162 | n2d1 <- separate(n2d1.raw, sex.age.geo.time, c('sex','age','id'), sep = ',') 
163 | 
164 | ####
165 | n2d1 <- ik_ut_colclass_factorize(n2d1,1:3)
166 | 
167 | # reshape to long data format
168 | n2d1 <- n2d1 %>% gather('year','value',4:27)
169 | 
170 | 
171 | #!!! remove EU flags from the value column 
172 | 
173 | # b 	break in time series 	c 	confidential 	d 	definition differs, see metadata
174 | # e 	estimated 	f 	forecast 	i 	see metadata (phased out)
175 | # n 	not significant 	p 	provisional 	r 	revised
176 | # s 	Eurostat estimate (phased out) 	u 	low reliability 	z 	not applicable 
177 | 
178 | # in our data set only these flags are present: b, c, e, p
179 | 
180 | n2d1$value <- gsub('b','',n2d1$value)
181 | n2d1$value <- gsub('c','',n2d1$value)
182 | n2d1$value <- gsub('e','',n2d1$value)
183 | n2d1$value <- gsub('p','',n2d1$value)
184 | 
185 | # change ':' symbol for NA
186 | n2d1$value[n2d1$value==':'] <- NA
187 | n2d1$value[n2d1$value==': '] <- NA
188 | 
189 | # transform the value column to numeric format
190 | n2d1$value <- as.numeric(n2d1$value)
191 | 
192 | # correct years
193 | n2d1$year <- gsub('X','y',n2d1$year)
194 | n2d1$year <- factor(n2d1$year)
195 | 
196 | ### FILTER ONLY needed (but keep sex for now)
197 | # filter NUTS-2, 
198 | n2d1 <- filter(n2d1, id%in%idn2eu28, year%in%paste0('y',2003:2012)) %>% 
199 |         droplevels()
200 | 
201 | # Correct the levels of factor variables: age and sex
202 | # correct ages, levels
203 | for (i in 1:nlevels(n2d1$age)){
204 |         agei <- gsub('Y','',levels(n2d1$age)[i])
205 |         subi <- if(nchar(agei)==1){
206 |                 paste0('a00',agei)
207 |         }else if(nchar(agei)==2){
208 |                 paste0('a0',agei)
209 |         }else if(nchar(agei)==3){
210 |                 paste0('a',agei) # we will summarize everything greater than 100 years
211 |         }else{agei}
212 |         levels(n2d1$age)[i] <- subi
213 |         #print(paste(i,'out of',nlevels(n2d1$age)))
214 | }
215 | 
216 | n2d1$age <- paste(n2d1$age)
217 | n2d1$sex <- paste(n2d1$sex)
218 | 
219 | n2d1$age[n2d1$age=='TOTAL'] <- 'total'
220 | n2d1$age[n2d1$age=='_LT1'] <- 'a000'
221 | n2d1$age[n2d1$age%in%c('_OPEN','_GE100','aUNK',paste0('a',100:109))] <- 'open'
222 | 
223 | n2d1$sex[n2d1$sex=='T'] <- 'b'
224 | n2d1$sex[n2d1$sex=='F'] <- 'f'
225 | n2d1$sex[n2d1$sex=='M'] <- 'm'
226 | 
227 | n2d1$age <- factor(paste(n2d1$age))
228 | n2d1$sex <- factor(paste(n2d1$sex))
229 | 
230 | # summarize open age group
231 | n2d1 <- n2d1 %>% group_by(year,id,sex,age) %>%
232 |         summarise(value=sum(value, na.rm=T)) %>%
233 |         ungroup() %>% droplevels()
234 | 
235 | # remove now unneccessary extra digit in age levels
236 | levels(n2d1$age) <- gsub('a0','a',levels(n2d1$age))
237 | 
238 | 
239 | ###
240 | # Remove remote regions
241 | load('data0_supplementary/remove.remote.Rdata')
242 | 
243 | n2d1 <- n2d1 %>% filter(!id%in%remove.remote) %>%
244 |         droplevels()
245 | 
246 | 
247 | #####
248 | # save the data before inserting the missing data
249 | save(n2d1, file = 'data1_raw/n2d1.missings.RData')
250 | ################################################################################
251 | 
252 | 
253 | ###
254 | # Report finish of the script execution
255 | print('Done: script 2.02')
256 | 


--------------------------------------------------------------------------------
/R_scripts/2_data_manipulation/2.03_download&prepare_PROJ_data.R:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | #                                                                                                        
  3 | # Genus 2017. 2017-05-26
  4 | # Read and prepare regional population projections data
  5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
  6 | #                                                                                                    
  7 | ################################################################################
  8 | 
  9 | # Erase all objects in memory
 10 | rm(list = ls(all = TRUE))
 11 | 
 12 | # load own functions
 13 | load('data0_supplementary/own_functions.RData')
 14 | 
 15 | load('data0_supplementary/idn0123.RData')
 16 | load('data0_supplementary/remove.remote.Rdata')
 17 | 
 18 | 
 19 | 
 20 | ################################################################################
 21 | # Population structure
 22 | 
 23 | # download cached Eurostat data [16.28 MB]
 24 | url1 <- 'http://ndownloader.figshare.com/files/4795531'
 25 | path1 <- 'data1_raw/Eurostat/projected/proj_13rpms.tsv.gz'
 26 | 
 27 | ifelse(file.exists(path1), yes = 'file alredy exists', no = download.file(url1, path1,mode="wb"))
 28 | # If there are problems downloading the data automatically, please download them manually from
 29 | # https://dx.doi.org/10.6084/m9.figshare.3084556
 30 | 
 31 | # read the raw data in
 32 | pop.raw <- read.table(path1, sep = c('\t'), header = T) 
 33 | 
 34 | #correct the first column
 35 | pop <- separate(pop.raw, age.sex.geo.time, c('age','sex','id'), sep = ',') 
 36 | 
 37 | pop <- ik_ut_colclass_factorize(pop, 1:3)
 38 | 
 39 | 
 40 | # convert to long data format
 41 | p1 <- gather(pop, year, value, X2080:X2013)
 42 | 
 43 | p1$year <- gsub('X','y',p1$year)
 44 | p1$year <- factor(p1$year)
 45 | levels(p1$sex) <- c('f','m','b')
 46 | p1$sex <- factor(paste(p1$sex))
 47 | 
 48 | 
 49 | p1 <- filter(p1, id%in%idn2eu28, year%in%paste0('y',2013:2043)) %>%
 50 |         droplevels()
 51 | 
 52 | #correct ages
 53 | ages <- levels(p1$age)
 54 | ages[nchar(ages)==2] <- gsub('Y','a0',ages[nchar(ages)==2])
 55 | ages[nchar(ages)==3] <- gsub('Y','a',ages[nchar(ages)==3])
 56 | ages[1:3] <- c('total','open','a00')
 57 | 
 58 | levels(p1$age) <- ages
 59 | p1$age <- factor(paste(p1$age))
 60 | 
 61 | p1 <- p1 %>% select(year,id,sex,age,value) %>%
 62 |         arrange(sex,year,id,age)
 63 | 
 64 | 
 65 | 
 66 | ################################################################################
 67 | # Migration data
 68 |         
 69 | # download cached Eurostat data [4.54 MB]
 70 | url2 <- 'https://ndownloader.figshare.com/files/4795534'
 71 | path2 <- 'data1_raw/Eurostat/projected/proj_13ranmig.tsv.gz'
 72 | 
 73 | ifelse(file.exists(path2), yes = 'file alredy exists', no = download.file(url2, path2,mode="wb"))
 74 | # If there are problems downloading the data automatically, please download them manually from
 75 | # https://dx.doi.org/10.6084/m9.figshare.3084559
 76 | 
 77 | # read the raw data in
 78 | mig.raw <- read.table(path2, sep = c('\t'), header = T) 
 79 | 
 80 | #correct the first column
 81 | mig.raw <- separate(mig.raw, age.sex.geo.time, c('age','sex','id'), sep = ',')
 82 | 
 83 | mig.raw <- ik_ut_colclass_factorize(mig.raw, 1:3)
 84 | 
 85 | 
 86 | # convert to long data format
 87 | m1 <- gather(mig.raw, year, value, X2080:X2013)
 88 | 
 89 | m1$year <- gsub('X','y',m1$year)
 90 | m1$year <- factor(m1$year)
 91 | levels(m1$sex) <- c('f','m','b')
 92 | m1$sex <- factor(paste(m1$sex))
 93 | 
 94 | # filter only relevant data
 95 | m1 <- filter(m1, id%in%idn2eu28, year%in%paste0('y',2013:2042)) %>%
 96 |         droplevels()
 97 | 
 98 | #correct ages
 99 | ages <- levels(m1$age)
100 | ages[nchar(ages)==2] <- gsub('Y','a0',ages[nchar(ages)==2])
101 | ages[nchar(ages)==3] <- gsub('Y','a',ages[nchar(ages)==3])
102 | ages[1:3] <- c('total','open','a00')
103 | ages[ages=='Y100'] <- 'open'
104 | 
105 | levels(m1$age) <- ages
106 | m1$age <- factor(paste(m1$age))
107 | 
108 | # summarize the two 'open' age groups
109 | m1 <- m1 %>% group_by(year,id,sex,age) %>%
110 |         summarise(value=sum(value)) %>%
111 |         ungroup() %>%
112 |         arrange(sex,year,id,age)
113 | 
114 | 
115 | 
116 | 
117 | ################################################################################
118 | # Remove remote regions
119 | 
120 | p1 <- p1 %>% filter(!id%in%remove.remote) %>% droplevels()
121 | m1 <- m1 %>% filter(!id%in%remove.remote) %>% droplevels()
122 | 
123 | 
124 | ################################################################################
125 | # Save the ready data
126 | 
127 | n2p1proj <- p1
128 | n2m1proj <- m1
129 | 
130 | save(n2p1proj,n2m1proj,file = 'data2_prepared/n2.PROJ.RData')
131 | save(n2p1proj,file = 'data2_prepared/n2p1proj.RData')
132 | save(n2m1proj,file = 'data2_prepared/n2m1proj.RData')
133 | 
134 | 
135 | 
136 | 
137 | ###
138 | # Report finish of the script execution
139 | print('Done: script 2.03')


--------------------------------------------------------------------------------
/R_scripts/2_data_manipulation/2.04_missing_download&unzip.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                                                                                                        
 3 | # Genus 2017. 2017-05-26
 4 | # Download and unzip the missing data.  DE, DK, SI. NUTS2
 5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
 6 | #                                                                                                    
 7 | ################################################################################
 8 | 
 9 | # Erase all objects in memory
10 | rm(list = ls(all = TRUE))
11 | 
12 | 
13 | 
14 | # download the cached data, downloaded from national Statistical Offices [795.4 KB]
15 | url <- 'https://ndownloader.figshare.com/files/4819075'
16 | path <- 'data1_raw/Missing_data/missing_data_raw.zip'
17 | 
18 | 
19 | ifelse(file.exists(path), yes = 'file alredy exists', no = download.file(url, path,mode="wb"))
20 | # If there are problems downloading the data automatically, please download them manually from
21 | # https://dx.doi.org/10.6084/m9.figshare.3100111.v2
22 | 
23 | unzip(zipfile = path,exdir = 'data1_raw/Missing_data')
24 | 
25 | 
26 | 
27 | ###
28 | # Report finish of the script execution
29 | print('Done: script 2.04')
30 | 


--------------------------------------------------------------------------------
/R_scripts/2_data_manipulation/2.05_missing_DE.R:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | #                                                                                                        
  3 | # Genus 2017. 2017-05-26
  4 | # Prepare missing data. Germany. NUTS-2
  5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
  6 | #                                                                                                    
  7 | ################################################################################
  8 | 
  9 | 
 10 | # What is missing?
 11 | # Germany. DED4 and DED5 (Chemnitz and Leipzig) + NUTS-3
 12 | # Deaths and pop str. for 2003-2013
 13 | 
 14 | 
 15 | # Erase all objects in memory
 16 | rm(list = ls(all = TRUE))
 17 | 
 18 | # load own functions
 19 | load('data0_supplementary/own_functions.RData')
 20 | 
 21 | 
 22 | 
 23 | 
 24 | p1 <- read.csv('data1_raw/Missing_data/raw_DE/173-33-4-B.csv.gz',colClasses = 'character')
 25 | 
 26 | p1$age <-  rep(c(paste0('a0',0:9),paste0('a',10:74),rep('open',3),'total'))
 27 | 
 28 | # summarise open age group
 29 | p1 <- p1 %>% gather('sex','value',4:6) %>%
 30 |         mutate(value=as.numeric(value)) %>%
 31 |         group_by(year,id,sex,age) %>%
 32 |         summarise(value=sum(value)) %>%
 33 |         ungroup() %>%
 34 |         filter(age!='total')
 35 | 
 36 | # !!! add 2003 equal to 2004 a year earlier
 37 | p1.03 <- p1 %>% filter(year=='2004', !age%in%c('a00'))
 38 | p1.03$year <- '2003'
 39 | p1.03$age <- c(paste0('a0',0:9),paste0('a',10:73),'open')
 40 | p1.03.a74 <- p1.03 %>% filter(age=='a73')
 41 | p1.03.a74$age <- 'a74'
 42 | p1.03 <- bind_rows(p1.03,p1.03.a74)
 43 | 
 44 | # add 2003 to the main data set
 45 | p1 <- bind_rows(p1.03,p1) %>% arrange(sex,year,id,age)
 46 | 
 47 | 
 48 | # smooth the old ages
 49 | open <- p1 %>% filter(age=='open') %>% droplevels() %>% select(-age)
 50 | 
 51 | # load HMD data
 52 | hmd <- read.table('data1_raw/Missing_data/raw_DE/HMD_DE_exposure_1x1.txt.gz',header=T,skip=2,as.is = T) 
 53 | names(hmd) <- c('year','age','f','m','b')
 54 | hmd.weights <- hmd %>%
 55 |         filter(age%in%75:99,year%in%2003:2010) %>% droplevels() %>%
 56 |         ik_ut_colclass_numeralize(3:5) %>% 
 57 |         gather('sex','value',3:5) %>%
 58 |         group_by(year,sex) %>%
 59 |         mutate(value=value/sum(value)) %>%
 60 |         ungroup()
 61 | hmd.weights$age <- paste0('a',hmd.weights$age)
 62 | hmd.weights$year <- paste(hmd.weights$year)
 63 | 
 64 | old <- left_join(hmd.weights,open,c('year','sex')) %>%
 65 |         mutate(value=value.x*value.y) %>%
 66 |         select(year,id,sex,age,value)
 67 | 
 68 | # add old back to the main data set
 69 | p1 <- bind_rows(p1,old)
 70 | 
 71 | # zeros to open age group
 72 | p1[p1$age=='open',5] <- 0
 73 | 
 74 | 
 75 | p1 <- arrange(p1, sex,year,id,age)
 76 | p1$year <- paste0('y',p1$year)
 77 | 
 78 | 
 79 | 
 80 | 
 81 | 
 82 | ###
 83 | # deaths recreate for 1-year age groups using HMD mortality ratios for Germany
 84 | 
 85 | mx <- read.table('data1_raw/Missing_data/raw_DE/HMD_DE_Mx_1x1.txt.gz',header=T,skip=2,colClasses = 'character') 
 86 | names(mx) <- c('year','age','f','m','b')
 87 | mx <- mx %>% filter(year%in%2003:2010,age%in%0:99) %>%
 88 |         gather('sex','value',3:5)
 89 | mx$age <- c(paste0('a0',0:9),paste0('a',10:99))
 90 | mx$year <- paste0('y',mx$year)
 91 | mx$value <- as.numeric(mx$value)
 92 | 
 93 | d1 <- left_join(filter(p1,!age%in%c('open','total')),mx,c('year','sex','age')) %>%
 94 |         mutate(value=value.x*value.y) %>%
 95 |         select(year,id,sex,age,value) %>%
 96 |         arrange(sex,year,id,age)
 97 |         
 98 | # add a zero open age category
 99 | dopen <- d1 %>% filter(age=='a00') %>%
100 |         mutate(age='open',value=0)
101 | 
102 | # add to the main data set
103 | d1 <- bind_rows(d1,dopen) %>% arrange(sex,year,id,age)
104 | 
105 | 
106 | 
107 | 
108 | # correct population structure in 2003 for those died in 2003
109 | p1[p1$year=='y2003',5] <- p1[p1$year=='y2003',5]-d1[d1$year=='y2003',5]
110 | 
111 | 
112 | 
113 | ###
114 | #convert to factors
115 | p1 <- p1 %>% mutate(year=factor(year),id=factor(id),sex=factor(sex),age=factor(age))
116 | d1 <- d1 %>% mutate(year=factor(year),id=factor(id),sex=factor(sex),age=factor(age))
117 | 
118 | ###
119 | # add 'total' age proup
120 | 
121 | ptot <- p1 %>% mutate(age=factor('total')) %>%
122 |         group_by(year,id,sex,age) %>%
123 |         summarise(value=sum(value)) %>% 
124 |         ungroup()
125 | 
126 | n2p1 <- suppressWarnings(bind_rows(p1,ptot)) %>%
127 |         mutate(age=factor(age))
128 | 
129 | 
130 | dtot <- d1 %>% mutate(age=factor('total')) %>%
131 |         group_by(year,id,sex,age) %>%
132 |         summarise(value=sum(value)) %>% 
133 |         ungroup()
134 | 
135 | n2d1 <- suppressWarnings(bind_rows(d1,dtot)) %>%
136 |         mutate(age=factor(age))
137 | 
138 | 
139 | 
140 | ################################################################################
141 | # save properly
142 | n2d1mDE <- n2d1
143 | n2p1mDE <- n2p1
144 | save(n2d1mDE,n2p1mDE,file = 'data1_raw/Missing_data/ready.missing.DE.RData')
145 | 
146 | 
147 | 
148 | ###
149 | # Report finish of the script execution
150 | print('Done: script 2.05')
151 | 


--------------------------------------------------------------------------------
/R_scripts/2_data_manipulation/2.06_missing_DK.R:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | #                                                                                                        
  3 | # Genus 2017. 2017-05-26
  4 | # Prepare missing data. Denmark. NUTS-2
  5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
  6 | #
  7 | # See also: https://ikashnitsky.github.io/2017/denmark-nuts-reconstruction/
  8 | #                                                                                                    
  9 | ################################################################################
 10 | 
 11 | # What is missing?
 12 | # Population structures (before 2007) and deaths (before 2006) for all NUTS-2 
 13 | # regions of Denmark. There was an administrative reform in 2007 that changed
 14 | # the whole municipal and regional division in the country.
 15 | # 
 16 | # Strategy: I will aggregate data from munucupality level at NUTS-2 level
 17 | # The matching between mun and NUTS-2 is done manually using GIS, i.e. I 
 18 | # checked what municipalities of the division before 2007 were located in the
 19 | # present day NUTS-2 and NUTS-3 regions.
 20 | 
 21 | 
 22 | # Erase all objects in memory
 23 | rm(list = ls(all = TRUE))
 24 | 
 25 | # load own functions
 26 | load('data0_supplementary/own_functions.RData')
 27 | 
 28 | match <- read.csv('data1_raw/Missing_data/raw_DK/MATCHED.csv')
 29 | 
 30 | ################################################################################
 31 | # population 
 32 | p1 <- read.csv('data1_raw/Missing_data/raw_DK/BEF1A.csv.gz')
 33 | 
 34 | p1 <- p1 %>% gather('year','value',4:7)
 35 | levels(p1$age) <- c(paste('a0',0:9,sep=''),paste('a',10:99,sep=''),rep('open',26))
 36 | # summarize by age
 37 | p1 <- p1 %>% group_by(code,sex,age,year) %>%
 38 |         summarise(value=sum(value)) %>% ungroup()
 39 | 
 40 | #summarize ny NUTS-2
 41 | n2p1 <- inner_join(p1,match,'code') %>% 
 42 |         group_by(year,idn2,sex,age) %>%
 43 |         summarise(value=sum(value)) %>% 
 44 |         ungroup() %>%
 45 |         rename(id=idn2) %>%
 46 |         select(year,id,sex,age,value) %>%
 47 |         arrange(sex,year,id,age) %>%
 48 |         mutate(year=factor(year)) 
 49 | 
 50 | # add both sex
 51 | n2p1b <- n2p1 %>% mutate(sex=factor('b')) %>%
 52 |         group_by(year,id,sex,age) %>%
 53 |         summarise(value=sum(value)) %>% 
 54 |         ungroup() 
 55 | 
 56 | n2p1 <- suppressWarnings(bind_rows(n2p1b,n2p1)) %>%
 57 |         mutate(sex=factor(sex),
 58 |                value=as.numeric(value))
 59 | 
 60 | # add age 'total'
 61 | ptot <- n2p1 %>% mutate(age=factor('total')) %>%
 62 |         group_by(year,id,sex,age) %>%
 63 |         summarise(value=sum(value)) %>% 
 64 |         ungroup()
 65 | 
 66 | n2p1 <- suppressWarnings(bind_rows(n2p1,ptot)) %>%
 67 |         mutate(age=factor(age))
 68 | 
 69 | 
 70 | 
 71 | 
 72 | 
 73 | ################################################################################
 74 | # deaths 
 75 | d1 <- read.csv('data1_raw/Missing_data/raw_DK/FOD2.csv.gz')
 76 | 
 77 | d1 <- d1 %>% gather('year','value',4:6)
 78 | levels(d1$age) <- c(paste('a0',0:9,sep=''),paste('a',10:99,sep=''),rep('open',26))
 79 | # summarize by age
 80 | d1 <- d1 %>% group_by(code,sex,age,year) %>%
 81 |         summarise(value=sum(value)) %>% ungroup()
 82 | 
 83 | #summarize ny NUTS-2
 84 | n2d1 <- inner_join(d1,match,'code') %>% 
 85 |         group_by(year,idn2,sex,age) %>%
 86 |         summarise(value=sum(value)) %>% 
 87 |         ungroup() %>%
 88 |         rename(id=idn2) %>%
 89 |         select(year,id,sex,age,value) %>%
 90 |         arrange(sex,year,id,age) %>%
 91 |         mutate(year=factor(year)) 
 92 | 
 93 | # add both sex
 94 | n2d1b <- n2d1 %>% mutate(sex=factor('b')) %>%
 95 |         group_by(year,id,sex,age) %>%
 96 |         summarise(value=sum(value)) %>% 
 97 |         ungroup() 
 98 | 
 99 | n2d1 <- suppressWarnings(bind_rows(n2d1b,n2d1)) %>%
100 |         mutate(sex=factor(sex),
101 |                value=as.numeric(value))
102 | 
103 | # add age 'total'
104 | ptot <- n2d1 %>% mutate(age=factor('total')) %>%
105 |         group_by(year,id,sex,age) %>%
106 |         summarise(value=sum(value)) %>% 
107 |         ungroup()
108 | 
109 | n2d1 <- suppressWarnings(bind_rows(n2d1,ptot)) %>%
110 |         mutate(age=factor(age))
111 | 
112 | 
113 | 
114 | 
115 | # save
116 | n2d1mDK <- n2d1
117 | n2p1mDK <- n2p1
118 | 
119 | save(n2d1mDK,n2p1mDK, file = 'data1_raw/Missing_data/ready.missing.DK.RData')
120 | 
121 | 
122 | 
123 | ###
124 | # Report finish of the script execution
125 | print('Done: script 2.06')
126 | 


--------------------------------------------------------------------------------
/R_scripts/2_data_manipulation/2.07_missing_SI.R:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | #                                                                                                        
  3 | # Genus 2017. 2017-05-26
  4 | # Prepare missing data. Slovenia. NUTS-2
  5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
  6 | #                                                                                                    
  7 | ################################################################################
  8 | 
  9 | # What is missing?
 10 | # Population structure for 2003 and 2004
 11 | 
 12 | 
 13 | 
 14 | 
 15 | # Erase all objects in memory
 16 | rm(list = ls(all = TRUE))
 17 | 
 18 | # load own functions
 19 | load('data0_supplementary/own_functions.RData')
 20 | 
 21 | 
 22 | 
 23 | p5 <- read.csv('data1_raw/Missing_data/raw_SI/05C1004E.csv.gz')
 24 | 
 25 | colnames(p5) <- c('sex','year','age','name','value')
 26 | levels(p5$sex) <- c('m','b','f') # ! scpecific naming in the original dataset
 27 | 
 28 | # # check regional differences (optional)
 29 | # dataplot <- subset(p5, sex=='b'&year%in%paste(2003:2007,'H1',sep=''))
 30 | # dp.split <- split(dataplot, dataplot$age)
 31 | # dp.shares <- list()
 32 | # for (i in 5:25){
 33 | #         di <- dp.split[[i]]
 34 | #         stri <- di[,5]/dp.split[[1]][,5]
 35 | #         oi <- data.frame(di[,1:4],stri)
 36 | #         dp.shares[[i-4]] <- oi
 37 | # }
 38 | # dp.shares <- rbindlist(dp.shares)
 39 | # 
 40 | # plot <- ggplot()+
 41 | #         geom_line(data=dp.shares, aes(x=age,y=stri,group = name,color=name))+
 42 | #         facet_grid(year~.)
 43 | # plot
 44 | # 
 45 | # plot2 <- ggplot()+
 46 | #         geom_line(data=dp.shares, aes(x=age,y=stri,group = year,color=year))+
 47 | #         facet_grid(name~.)
 48 | # plot2
 49 | 
 50 | # result of a check. Significant differences could be seen through time, by not across regions.
 51 | # Hence, I can take the pop structure for Slovenia and produce pop structures for regions
 52 | 
 53 | 
 54 | # load pop str for Slovenia from HMD
 55 | hmd <- read.table('data1_raw/Missing_data/raw_SI/HMD.SI.x1pop.txt.gz', header=T) 
 56 | names(hmd) <- c('year','age','f','m','b')
 57 | hmd <- hmd %>%
 58 |         filter(year%in%c('2003','2004')) %>%
 59 |         gather('sex','value',3:5)
 60 | 
 61 | # summarize by ages
 62 | hmd$age <- c(paste('a0',0:9,sep=''),paste('a',10:99,sep=''),rep('open',11))
 63 | hmd <- hmd %>% group_by(year,age,sex) %>%
 64 |         summarise(value=sum(value)) %>%
 65 |         ungroup() 
 66 | hmd.ps <- hmd %>% group_by(year,sex) %>%
 67 |         mutate(value=value/sum(value)) %>%
 68 |         ungroup()
 69 | hmd.ps$year <- paste0('y',hmd.ps$year)
 70 |         
 71 | 
 72 | ptot <- filter(p5, year%in%c('2003H1','2004H1'),age=='Age - TOTAL') %>% droplevels() %>%
 73 |         rename(id=name) %>% select(-age)
 74 | levels(ptot$year) <- c('y2003','y2004')
 75 | levels(ptot$id) <-  c('SI','SI01','SI02')
 76 | 
 77 | 
 78 | 
 79 | # calculate pop structures
 80 | p1 <- suppressWarnings(full_join(hmd.ps,ptot,by=c('year','sex'))) %>%
 81 |         mutate(value=value.x*value.y) %>%
 82 |         select(year,id,sex,age,value) %>%
 83 |         arrange(sex,year,id,age) %>%
 84 |         mutate(year=factor(year),
 85 |                sex=factor(sex),
 86 |                age=factor(age))
 87 | 
 88 | 
 89 | ###
 90 | # add 'total' age proup
 91 | 
 92 | ptot <- p1 %>% mutate(age=factor('total')) %>%
 93 |         group_by(year,id,sex,age) %>%
 94 |         summarise(value=sum(value)) %>% 
 95 |         ungroup()
 96 | 
 97 | n2p1 <- suppressWarnings(bind_rows(p1,ptot)) %>%
 98 |         mutate(age=factor(age)) %>%
 99 |         filter(id!='SI') %>%
100 |         droplevels()
101 | 
102 | 
103 | # save
104 | n2p1mSI <- n2p1
105 | 
106 | save(n2p1mSI, file = 'data1_raw/Missing_data/ready.missing.SI.RData')
107 | 
108 | 
109 | 
110 | 
111 | ###
112 | # Report finish of the script execution
113 | print('Done: script 2.07')
114 | 


--------------------------------------------------------------------------------
/R_scripts/2_data_manipulation/2.08_missing_RO_smooth.R:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | #                                                                                                        
  3 | # Genus 2017. 2017-05-26
  4 | # Reconstruct the population data for Romania, NUTS-2
  5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
  6 | #                                                                                                    
  7 | ################################################################################
  8 | 
  9 | # Why?
 10 | # There was a Census in Romania in 2011 that registered large, and previously 
 11 | # underestimated, decrease in population size. Evidently, the outmigration from 
 12 | # Romania was underreported. Yet, no rollback corrections were made, and Eurostat 
 13 | # provides non-harmonized data for Romanian regions. Thus, we harmonized the 
 14 | # population figures for Romanian regions. 
 15 | 
 16 | 
 17 | # Erase all objects in memory
 18 | rm(list = ls(all = TRUE))
 19 | 
 20 | # load own functions
 21 | load('data0_supplementary/own_functions.RData')
 22 | 
 23 | 
 24 | 
 25 | load('data1_raw/n2p1.missings.RData')
 26 | load('data1_raw/n2d1.missings.RData')
 27 | 
 28 | n2p1RO <- n2p1 %>% filter(substr(paste(id),1,2)=='RO') %>% droplevels()
 29 | n2d1RO <- n2d1 %>% filter(substr(paste(id),1,2)=='RO') %>% droplevels()
 30 | 
 31 | 
 32 | ################################################################################
 33 | # Exploratory data analysis
 34 | ################################################################################
 35 | 
 36 | #plot to see the discrepancy
 37 | gg.pop <- ggplot(n2p1RO %>% group_by(year,id,sex) %>% summarise(value=sum(value)))+
 38 |         geom_point(aes(x=year,y=value/10^6))+
 39 |         facet_wrap(id~sex,scales = "free",ncol = 3)+
 40 |         scale_x_discrete('',labels=2003:2013)+
 41 |         theme_few(base_size = 15)
 42 | 
 43 | ggsave('_output/Romania_check_discrepancy.png',gg.pop,width = 12,height = 18,dpi=192)
 44 | 
 45 | # check the age effect
 46 | df.ch.age <- n2p1RO %>% 
 47 |         filter(sex=='b') %>%
 48 |         mutate(a10 = factor(paste0(substr(paste(age),2,2),'0s'))) %>%
 49 |         group_by(year,id,a10) %>% 
 50 |         summarise(value=sum(value))
 51 | 
 52 | 
 53 | gg.pop.b.10 <- ggplot(filter(df.ch.age,a10!='o0s'))+
 54 |         geom_point(aes(x=year,y=value/10^6))+
 55 |         geom_path(aes(x=year,y=value/10^6,group=1))+
 56 |         facet_grid(id~a10,scales = "free")+
 57 |         scale_x_discrete('',labels=3:13)+
 58 |         theme_few(base_size = 10)
 59 | 
 60 | ggsave('_output/Romania_check_discrepancy_by_age.png',gg.pop.b.10,width = 18,height = 12,dpi=192)
 61 | 
 62 | 
 63 | 
 64 | 
 65 | 
 66 | 
 67 | gg.death <- ggplot(n2d1RO %>% group_by(year,id,sex) %>% summarise(value=sum(value)))+
 68 |         geom_point(aes(x=year,y=value/10^3))+
 69 |         geom_path(aes(x=year,y=value/10^3,group=1))+
 70 |         facet_wrap(id~sex,scales = "free",ncol=3)+
 71 |         scale_x_discrete('',labels=2003:2013)+
 72 |         theme_few(base_size = 10)
 73 | 
 74 | 
 75 | df.ch.age.death <- n2d1RO %>% 
 76 |         filter(sex=='b') %>%
 77 |         mutate(a10 = factor(paste0(substr(paste(age),2,2),'0s'))) %>%
 78 |         group_by(year,id,a10) %>% 
 79 |         summarise(value=sum(value))
 80 | 
 81 | gg.pop.b.10 <- ggplot(filter(df.ch.age.death,a10!='o0s'))+
 82 |         geom_point(aes(x=year,y=value/10^3))+
 83 |         geom_path(aes(x=year,y=value/10^3,group=1))+
 84 |         facet_grid(id~a10,scales = "free")+
 85 |         scale_x_discrete('',labels=3:13)+
 86 |         theme_few(base_size = 10)
 87 | 
 88 | # nothing particularly strange here
 89 | 
 90 | 
 91 | ################################################################################
 92 | # Let's correct it. Cohort-wise
 93 | # STRATEGY:
 94 | # 1. Calculate pseudo-observed migration numbers using mortality figures and 
 95 | #    unharmonized population
 96 | # 2. Forward-roll population structure 2003 using the mortality and observed migration data
 97 | # 3. Calculate the descrepancy between estimated population forward-roll population 
 98 | #    and the observed population in 2012
 99 | # 4. Distribute the discrepancy by years taking into account the pseudo-observed migration
100 | # In short, the idea is to redistribute the excess(shortage) of population 
101 | # assuming that migration record captured the real trends.
102 | # *assumption: everybody is born on 01-01-yyyy
103 | # *assumption: migration in 2011 equals 2010 
104 | ################################################################################
105 | 
106 | # drop OPEN and TOTAL age groups and year 2013
107 | df.p <- n2p1RO %>% filter(!age%in%c('open','total'),year!='y2013') %>% droplevels()
108 | df.d <- n2d1RO %>% filter(!age%in%c('open','total'),!year%in%c('y2012','y2013')) %>% droplevels()
109 | 
110 | 
111 | # create cohort variable and spread using cohorts 
112 | # we also drop all the cohorts with not full data + 2003:2001 - kids
113 | # dfc means data frame cohort
114 | dfc.p <- df.p %>% 
115 |         mutate(cohort = factor(paste0('c',2003+as.numeric(year)-as.numeric(age))),
116 |                age=NULL) %>%
117 |         spread(year,value) %>%
118 |         arrange(sex,id,desc(cohort)) %>%
119 |         filter(cohort%in%paste0('c',2003:1928)) %>%
120 |         droplevels()
121 | 
122 | dfc.d <- df.d %>% 
123 |         mutate(cohort = factor(paste0('c',2003+as.numeric(year)-as.numeric(age))),
124 |                age=NULL) %>%
125 |         spread(year,value) %>%
126 |         arrange(sex,id,desc(cohort)) %>%
127 |         filter(cohort%in%paste0('c',2003:1928))%>%
128 |         droplevels()
129 | 
130 | 
131 | 
132 | # calculate cohort change
133 | dfc.cc <- data.frame(dfc.p[,1:3],
134 |                     dfc.p[,5:13]-dfc.p[,4:12])
135 | names(dfc.cc)[4:12] <- names(dfc.p)[4:12]
136 | 
137 | # calculate observed migration
138 | dfc.mg <- data.frame(dfc.cc[,1:3],
139 |                     dfc.cc[,4:11]+dfc.d[,4:11]) # + because of mortality
140 | 
141 | 
142 | # calculate cumulated annual excessive migration for each year
143 | dfc.ex.2 <- dfc.p %>% 
144 |         transmute(id=id,sex=sex,cohort=cohort,
145 |                   ex03=((y2012-y2003) - 
146 |                               apply(dfc.mg[,4:11],1,sum) +
147 |                               apply(dfc.d[,4:11],1,sum)) / 9,  # divide by 9 years
148 |                   ex04=ex03*2,ex05=ex03*3,ex06=ex03*4,ex07=ex03*5,
149 |                   ex08=ex03*6,ex09=ex03*7,ex10=ex03*8) 
150 | 
151 | dfc.p.cor <- data.frame(dfc.p[,1:4], dfc.p[,5:12]+dfc.ex.2[,4:11],y2012=dfc.p[,13])
152 | 
153 | 
154 | 
155 | # now we need to transform back from cohorts to ages
156 | cor <- dfc.p.cor %>%
157 |         gather('year','value',y2003:y2012) %>%
158 |         mutate(tmp.coh=as.numeric(substr(paste(cohort),2,5)),
159 |                tmp.year=as.numeric(substr(paste(year),2,5)),
160 |                age = factor(paste0('a',tmp.year-tmp.coh)),
161 |                tmp.coh=NULL,
162 |                tmp.year=NULL)
163 | #correct the levels of age
164 | levels(cor$age)[nchar(levels(cor$age))==2] <- gsub('a','a0',levels(cor$age)[nchar(levels(cor$age))==2])
165 | cor$year <- factor(cor$year)
166 | 
167 | cor <- arrange(cor, year,id,sex,age) %>% select(4,1,2,6,3,5)
168 | 
169 | # finally, insert the new values to the initial data set
170 | n2p1mRO <- ik_dm_fill.missings(n2p1RO,cor,by=1:4)
171 | 
172 | save(n2p1mRO,file = 'data1_raw/Missing_data/ready.missing.RO.RData')
173 | 
174 | 
175 | 
176 | ################################################################################
177 | # check the harmonization
178 | 
179 | # plot the garmonized data 
180 | df.ch.age.cor <- n2p1mRO %>% 
181 |         filter(sex=='b') %>%
182 |         mutate(a10 = factor(paste0(substr(paste(age),2,2),'0s'))) %>%
183 |         group_by(year,id,a10) %>% 
184 |         summarise(value=sum(value))
185 | 
186 | 
187 | gg.pop.b.10.cor <- ggplot(filter(df.ch.age.cor,a10!='o0s'))+
188 |         geom_point(aes(x=year,y=value/10^6))+
189 |         geom_path(aes(x=year,y=value/10^6,group=1))+
190 |         facet_grid(id~a10,scales = "free")+
191 |         scale_x_discrete('',labels=3:13)+
192 |         theme_few(base_size = 10)
193 | 
194 | 
195 | ggsave('_output/Romania_check_harmonized_data_by_age.png',width = 18,height = 12,dpi=192)
196 | 
197 | 
198 | 
199 | ###
200 | # Report finish of the script execution
201 | print('Done: script 2.08')


--------------------------------------------------------------------------------
/R_scripts/2_data_manipulation/2.09_missing_INSERT.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                                                                                                        
 3 | # Genus 2017. 2017-05-26
 4 | # Insert misssing population data. n2p1 and n2d1
 5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
 6 | #                                                                                                    
 7 | ################################################################################
 8 | 
 9 | # Erase all objects in memory
10 | rm(list = ls(all = TRUE))
11 | 
12 | # load own functions
13 | load('data0_supplementary/own_functions.RData')
14 | 
15 | 
16 | 
17 | load('data1_raw/n2p1.missings.RData')
18 | load('data1_raw/n2d1.missings.RData')
19 | 
20 | 
21 | 
22 | # insert Germany
23 | load('data1_raw/Missing_data/ready.missing.DE.RData')
24 | n2p1 <- ik_dm_fill.missings(n2p1, n2p1mDE, by=1:4)
25 | n2d1 <- ik_dm_fill.missings(n2d1, n2d1mDE, by=1:4)
26 | 
27 | # insert Denmark
28 | load('data1_raw/Missing_data/ready.missing.DK.RData')
29 | n2p1 <- ik_dm_fill.missings(n2p1, n2p1mDK, by=1:4)
30 | n2d1 <- ik_dm_fill.missings(n2d1, n2d1mDK, by=1:4)
31 | 
32 | # insert Slovenia
33 | load('data1_raw/Missing_data/ready.missing.SI.RData')
34 | n2p1 <- ik_dm_fill.missings(n2p1, n2p1mSI, by=1:4)
35 | 
36 | 
37 | # insert Romania - harmonized data
38 | load('data1_raw/Missing_data/ready.missing.RO.RData') 
39 | n2p1mRO <- n2p1mRO %>% select(-cohort)
40 | n2p1 <- ik_dm_fill.missings(n2p1, n2p1mRO, by=1:4)
41 | 
42 | 
43 | 
44 | ###
45 | # FINAL SAVE!!!
46 | save(n2p1, file = 'data2_prepared/n2p1.RData')
47 | save(n2d1, file = 'data2_prepared/n2d1.RData')
48 | save(n2d1,n2p1,file = 'data2_prepared/n2.OBS.RData')
49 | 
50 | 
51 | 
52 | 
53 | ###
54 | # Report finish of the script execution
55 | print('Done: script 2.09')


--------------------------------------------------------------------------------
/R_scripts/2_data_manipulation/2.10_TSR_decomposition_OBS.R:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | #                                                                                                        
  3 | # Genus 2017. 2017-05-26
  4 | # Calculate TSR decomposition for the OBS period
  5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
  6 | #                                                                                                    
  7 | ################################################################################
  8 | 
  9 | # Erase all objects in memory
 10 | rm(list = ls(all = TRUE))
 11 | 
 12 | # load own functions
 13 | load('data0_supplementary/own_functions.RData')
 14 | 
 15 | 
 16 | 
 17 | # ################################################################################
 18 | # LOGIC of the computations
 19 | # We only need 1-year population structure and 1-year deaths numbers.
 20 | # Cohort turnover is calculated from the population structure; numbers of migrants 
 21 | # are calculatedform the demographic balance formula.
 22 | 
 23 | 
 24 | 
 25 | load('data2_prepared/n2.OBS.RData')
 26 | 
 27 | 
 28 | ###
 29 | # calculate cohort turnover for working ages (15-64)
 30 | # The idea is to account for migration and mortality at the ages 14 and 64
 31 | p.14.64 <- filter(n2p1, age%in%c('a14','a64'),sex=='b',year!='y2013') %>%
 32 |         droplevels() %>%  select(-sex) %>%
 33 |         spread(age,value) %>%
 34 |         rename(p14=a14,p64=a64)
 35 | 
 36 | # correct for those died at ages 14 and 64
 37 | mt.14.64 <- n2d1 %>% mutate(MT=-value) %>% select(-value) %>%
 38 |         filter(age%in%c('a14','a64'),sex=='b') %>%
 39 |         spread(age,MT) %>% select(-sex) %>%
 40 |         rename(mt14=a14,mt64=a64)
 41 | 
 42 | # correct for those migrated at ages 14 and 64
 43 | p.14.64.begin <- filter(n2p1, year!='y2013',age%in%c('a14','a64'),sex=='b') %>% 
 44 |         droplevels() %>% select(-sex) %>%
 45 |         spread(age,value)
 46 | p.14.64.end <- filter(n2p1, year!='y2003',age%in%c('a15','a65'),sex=='b') %>% 
 47 |         droplevels() %>% select(-sex)%>%
 48 |         spread(age,value)
 49 | p.14.64.end$year <- p.14.64.begin$year
 50 | 
 51 | pg.14.64 <- left_join(p.14.64.begin,p.14.64.end,by=c('year','id')) %>% # pg means 'population growth'
 52 |         mutate(pg14=a15-a14,
 53 |                pg64=a65-a64) %>%
 54 |         select(year,id,pg14,pg64)
 55 | 
 56 | mg.14.64 <- left_join(pg.14.64,mt.14.64,by=c('year','id')) %>%
 57 |         mutate(mg14=pg14-mt14,
 58 |                mg64=pg64-mt64) %>%
 59 |         select(year,id,mg14,mg64)
 60 | 
 61 | 
 62 | CT <- left_join(p.14.64,mt.14.64,by=c('year','id'))
 63 | CT <- left_join(CT,mg.14.64,by=c('year','id'))
 64 | CT <- CT %>%
 65 |         mutate(CT = (p14-p64) -.5*(mt14-mt64) -.5*(mg14-mg64)) %>%
 66 |         select(year,id,CT) 
 67 |         
 68 | 
 69 | ###
 70 | # calculate mortality at working ages (15-64)
 71 | MT <- n2d1 %>% mutate(MT=-value) %>% select(-value)
 72 | levels(MT$age) <- c(rep('PY',15),rep('PW',50),rep('PO',36),'PT')
 73 | MT <- MT %>% filter(sex=='b') %>% select(-sex) %>%
 74 |         group_by(id,year,age) %>%
 75 |         summarise(MT=sum(MT)) %>%
 76 |         ungroup()%>%
 77 |         filter(age=='PW') %>%
 78 |         droplevels() %>%
 79 |         select(-age)
 80 | 
 81 | 
 82 | 
 83 | ###
 84 | # now aggreagte for broad age groups        
 85 |         
 86 | p3 <- n2p1 
 87 | levels(p3$age) <- c(rep('PY',15),rep('PW',50),rep('PO',36),'PT')
 88 | p3 <- p3 %>% filter(sex=='b') %>% select(-sex) %>%
 89 |         group_by(id,year,age) %>%
 90 |         summarise(value=sum(value)) %>%
 91 |         ungroup()%>%
 92 |         spread(age,value) %>%
 93 |         mutate(PN = PY+PO) %>%
 94 |         select(-PO,-PY)
 95 | 
 96 | p3begin <- filter(p3, year!='y2013') %>% droplevels()
 97 | p3end <- filter(p3, year!='y2003')
 98 | p3end$year <- p3begin$year
 99 | 
100 | df <- left_join(p3begin,p3end,by=c('id','year'))
101 | 
102 | df <- left_join(df,CT, by=c('id','year'))
103 | df <- left_join(df,MT, by=c('id','year'))
104 | 
105 | df <- df %>% mutate(MG = PW.y-PW.x-CT-MT)
106 | 
107 | 
108 | 
109 | ################################################################################
110 | # Calculate decomposition
111 | 
112 | 
113 | dec <- transmute(df, year=year, id=id,
114 |                  g = PW.y/PN.y - PW.x/PN.x,
115 |                  nw = .5*(PW.y+PW.x)*(1/PN.y-1/PN.x),
116 |                  w = .5*(1/PN.y+1/PN.x)*(PW.y-PW.x),
117 |                  mt = .5*(1/PN.y+1/PN.x)*MT,
118 |                  mg = .5*(1/PN.y+1/PN.x)*MG,
119 |                  ct = .5*(1/PN.y+1/PN.x)*CT,
120 |                  tsr.year.begin = PW.x/PN.x)     
121 | 
122 | #calculate TSR for the periods beginings
123 | tsr03 <- df %>%
124 |         filter(year%in%c('y2003')) %>%
125 |         transmute(id=id,
126 |                   tsr03=PW.x/PN.x)
127 | 
128 | 
129 | n2obs.dec <- left_join(dec,tsr03,'id')   
130 | 
131 | save(n2obs.dec,file = 'data3_calculated/n2obs.dec.Rdata')
132 | 
133 | 
134 | ###
135 | # Report finish of the script execution
136 | print('Done: script 2.10')
137 | 


--------------------------------------------------------------------------------
/R_scripts/2_data_manipulation/2.11_TSR_decomposition_PROJ.R:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | #                                                                                                        
  3 | # Genus 2017. 2017-05-26
  4 | # Calculate TSR decomposition for the PROJ period
  5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
  6 | #                                                                                                    
  7 | ################################################################################
  8 | 
  9 | # Erase all objects in memory
 10 | rm(list = ls(all = TRUE))
 11 | 
 12 | # load own functions
 13 | load('data0_supplementary/own_functions.RData')
 14 | 
 15 | 
 16 | 
 17 | # ################################################################################
 18 | # LOGIC of the computations
 19 | # We only need 1-year population structure and the projected numbers of migrants.
 20 | # Cohort turnover is calculated from the population structure; numbers of deaths 
 21 | # are calculated form the demographic balance formula.
 22 | 
 23 | 
 24 | 
 25 | load('data2_prepared/n2.PROJ.RData')
 26 | 
 27 | 
 28 | ###
 29 | # calculate cohort turnover for working ages (15-64)
 30 | # The idea is to account for migration and mortality at the ages 14 and 64
 31 | p.14.64 <- filter(n2p1proj, age%in%c('a14','a64'),sex=='b',year!='y2043') %>%
 32 |         droplevels() %>%  select(-sex) %>%
 33 |         spread(age,value) %>%
 34 |         rename(p14=a14,p64=a64)
 35 | 
 36 | # correct for those MIGRATED at ages 14 and 64
 37 | mg.14.64 <- n2m1proj %>% 
 38 |         filter(age%in%c('a14','a64'),sex=='b') %>%
 39 |         spread(age,value) %>% select(-sex) %>%
 40 |         rename(mg14=a14,mg64=a64)
 41 | 
 42 | # correct for those DIED at ages 14 and 64
 43 | p.14.64.begin <- filter(n2p1proj, year!='y2043',age%in%c('a14','a64'),sex=='b') %>% 
 44 |         droplevels() %>% select(-sex) %>%
 45 |         spread(age,value)
 46 | p.14.64.end <- filter(n2p1proj, year!='y2013',age%in%c('a15','a65'),sex=='b') %>% 
 47 |         droplevels() %>% select(-sex)%>%
 48 |         spread(age,value)
 49 | p.14.64.end$year <- p.14.64.begin$year
 50 | 
 51 | pg.14.64 <- left_join(p.14.64.begin,p.14.64.end,by=c('year','id')) %>% # pg means 'population growth'
 52 |         mutate(pg14=a15-a14,
 53 |                pg64=a65-a64) %>%
 54 |         select(year,id,pg14,pg64)
 55 | 
 56 | mt.14.64 <- left_join(pg.14.64,mg.14.64,by=c('year','id')) %>%
 57 |         mutate(mt14=pg14-mg14,
 58 |                mt64=pg64-mg64) %>%
 59 |         select(year,id,mt14,mt64)
 60 | 
 61 | 
 62 | CT <- left_join(p.14.64,mt.14.64,by=c('year','id'))
 63 | CT <- left_join(CT,mg.14.64,by=c('year','id'))
 64 | CT <- CT %>%
 65 |         mutate(CT = (p14-p64) -.5*(mt14-mt64) -.5*(mg14-mg64)) %>%
 66 |         select(year,id,CT) 
 67 | 
 68 | 
 69 | ###
 70 | # calculate migration at working ages (15-64)
 71 | MG <- n2m1proj
 72 | levels(MG$age) <- c(rep('PY',15),rep('PW',50),rep('PO',36),'PT')
 73 | MG <- MG %>% filter(sex=='b') %>% select(-sex) %>%
 74 |         group_by(id,year,age) %>%
 75 |         summarise(MG=sum(value)) %>%
 76 |         ungroup()%>%
 77 |         filter(age=='PW') %>%
 78 |         droplevels() %>%
 79 |         select(-age)
 80 | 
 81 | 
 82 | 
 83 | ###
 84 | # now aggreagte for broad age groups        
 85 | 
 86 | p3 <- n2p1proj 
 87 | levels(p3$age) <- c(rep('PY',15),rep('PW',50),rep('PO',36),'PT')
 88 | p3 <- p3 %>% filter(sex=='b') %>% select(-sex) %>%
 89 |         group_by(id,year,age) %>%
 90 |         summarise(value=sum(value)) %>%
 91 |         ungroup()%>%
 92 |         spread(age,value) %>%
 93 |         mutate(PN = PY+PO) %>%
 94 |         select(-PO,-PY)
 95 | 
 96 | p3begin <- filter(p3, year!='y2043') %>% droplevels()
 97 | p3end <- filter(p3, year!='y2013')
 98 | p3end$year <- p3begin$year
 99 | 
100 | df <- left_join(p3begin,p3end,by=c('id','year'))
101 | 
102 | df <- left_join(df,CT, by=c('id','year'))
103 | df <- left_join(df,MG, by=c('id','year'))
104 | 
105 | df <- df %>% mutate(MT = PW.y-PW.x-CT-MG)
106 | 
107 | 
108 | ################################################################################
109 | # Calculate decomposition
110 | 
111 | dec <- transmute(df, year=year, id=id,
112 |                  g = PW.y/PN.y - PW.x/PN.x,
113 |                  nw = .5*(PW.y+PW.x)*(1/PN.y-1/PN.x),
114 |                  w = .5*(1/PN.y+1/PN.x)*(PW.y-PW.x),
115 |                  mt = .5*(1/PN.y+1/PN.x)*MT,
116 |                  mg = .5*(1/PN.y+1/PN.x)*MG,
117 |                  ct = .5*(1/PN.y+1/PN.x)*CT,
118 |                  tsr.year.begin = PW.x/PN.x)     
119 | 
120 | #calculate TSR for the periods beginings
121 | tsr0 <- df %>%
122 |         filter(year%in%c('y2013','y2023','y2033')) %>%
123 |         transmute(year=year, id=id,
124 |                   tsr=PW.x/PN.x) %>%
125 |         spread(year,tsr)
126 | names(tsr0)[2:4] <- gsub('y20','tsr',names(tsr0)[2:4])
127 | 
128 | 
129 | n2proj.dec <- left_join(dec,tsr0,'id')   
130 | 
131 | save(n2proj.dec,file = 'data3_calculated/n2proj.dec.Rdata')
132 | 
133 | 
134 | ###
135 | # Report finish of the script execution
136 | print('Done: script 2.11')
137 | 


--------------------------------------------------------------------------------
/R_scripts/2_data_manipulation/2.12_TSR_decomposition_n2dec0342.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                                                                                                        
 3 | # Genus 2017. 2017-05-26
 4 | # Calculate decomposition for 263 NUTS-2 regions for 2003-2042 OBS+PROJ
 5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
 6 | #                                                                                                    
 7 | ################################################################################
 8 | 
 9 | # Erase all objects in memory
10 | rm(list = ls(all = TRUE))
11 | 
12 | # load own functions
13 | load('data0_supplementary/own_functions.RData')
14 | 
15 | 
16 | 
17 | load('data3_calculated/n2proj.dec.Rdata')
18 | load('data3_calculated/n2obs.dec.Rdata')
19 | load('data0_supplementary/idn2sub.Rdata')
20 | load('data0_supplementary/idn0123.RData')
21 | 
22 | 
23 | n2dec0342 <- rbind(n2obs.dec[,1:9],n2proj.dec[,1:9])
24 | 
25 | tsr03 <- n2dec0342 %>% filter(year%in%c('y2003')) %>%
26 |         select(2,9) %>% rename(tsr03=tsr.year.begin)
27 | 
28 | n2dec0342 <- left_join(n2dec0342,tsr03,'id') 
29 | 
30 | n2dec0342 <- n2dec0342 %>% rename(tsr=tsr.year.begin)
31 | 
32 | # add country and subregion factors
33 | n2dec0342 <- suppressWarnings(left_join(n2dec0342,idn2sub,'id'))
34 | n2dec0342$id <- factor(n2dec0342$id)
35 | 
36 | 
37 | save(file = 'data3_calculated/n2dec0342.RData', n2dec0342)
38 | 
39 | 
40 | 
41 | 
42 | ###
43 | # Report finish of the script execution
44 | print('Done: script 2.12')
45 | 


--------------------------------------------------------------------------------
/R_scripts/2_data_manipulation/2.13_TSR_2043.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                                                                                                        
 3 | # Genus 2017. 2017-05-26
 4 | # Calculate TSR in 2043
 5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
 6 | #                                                                                                    
 7 | ################################################################################
 8 | 
 9 | # Erase all objects in memory
10 | rm(list = ls(all = TRUE))
11 | 
12 | # load own functions
13 | load('data0_supplementary/own_functions.RData')
14 | 
15 | load('data2_prepared/n2p1proj.RData')
16 | 
17 | 
18 | p3 <- n2p1proj 
19 | levels(p3$age) <- c(rep('PY',15),rep('PW',50),rep('PO',36),'PT')
20 | p3 <- p3 %>% filter(sex=='b') %>% select(-sex) %>%
21 |         group_by(id,year,age) %>%
22 |         summarise(value=sum(value)) %>%
23 |         ungroup()%>%
24 |         spread(age,value) %>%
25 |         mutate(PN = PY+PO) %>%
26 |         select(-PO,-PY)
27 | 
28 | n2tsr43 <- p3 %>% filter(year=='y2043') %>%
29 |         transmute(id=id,tsr43=PW/PN)
30 | 
31 | save(n2tsr43, file = 'data3_calculated/n2tsr43.RData')
32 | 
33 | 
34 | 
35 | ###
36 | # Report finish of the script execution
37 | print('Done: script 2.13')


--------------------------------------------------------------------------------
/R_scripts/3_analysis/3.01_fig1_maps_TSR_growth_4decades.R:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | #                                                                                                        
  3 | # Genus 2017. 2017-05-26
  4 | # Figure 1. TSR in 2003 and 2043 + change over 4 decades - map results
  5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
  6 | #                                                                                                    
  7 | ################################################################################
  8 | 
  9 | # Erase all objects in memory
 10 | rm(list = ls(all = TRUE))
 11 | 
 12 | # load own functions
 13 | load('data0_supplementary/own_functions.RData')
 14 | 
 15 | # load data
 16 | load('geo_data/shp.n2eu28.all.RData')
 17 | load('data3_calculated/n2dec0342.RData')
 18 | 
 19 | myfont <- "Roboto Condensed"
 20 | 
 21 | basemap <- ik_map_eu.base()
 22 | 
 23 | bord <- fortify(Sborders)
 24 | fort <- fortify(Sn2, region = 'id') 
 25 | 
 26 | ################################################################################
 27 | # maps TSR 2003 and 2042 - fixed color scales
 28 | df.tsr03 <- n2dec0342 %>% filter(year == 'y2003')
 29 | 
 30 | fort.tsr03 <- left_join(x=fort, y=df.tsr03, by = 'id')
 31 | 
 32 | tsr03 <- basemap +
 33 |         geom_polygon(data = gghole(fort.tsr03)[[1]], aes_string(x='long', y='lat', group='group', fill='tsr03'),
 34 |                      color='grey30',size=.1)+
 35 |         geom_polygon(data = gghole(fort.tsr03)[[2]], aes_string(x='long', y='lat', group='group', fill='tsr03'),
 36 |                      color='grey30',size=.1)+
 37 |         scale_fill_viridis('TSR\n2003',limits=c(.868,2.765), option = 'B')+
 38 |         geom_path(data=bord,aes(x=long, y=lat, group=group),color='grey20',size=.5)+
 39 |         guides(fill = guide_colorbar(barwidth = 1, barheight = 9))
 40 | 
 41 | 
 42 | load('DATA/aaa.RESULTS/.calculated/n2tsr43.Rdata')
 43 | 
 44 | fort.tsr43 <- left_join(x=fort, y=n2tsr43, by = 'id')
 45 | 
 46 | tsr43 <- basemap +
 47 |         geom_polygon(data = gghole(fort.tsr43)[[1]], aes_string(x='long', y='lat', group='group', fill='tsr43'),
 48 |                      color='grey30',size=.1)+
 49 |         geom_polygon(data = gghole(fort.tsr43)[[2]], aes_string(x='long', y='lat', group='group', fill='tsr43'),
 50 |                      color='grey30',size=.1)+
 51 |         scale_fill_viridis('TSR\n2043',limits=c(.868,2.765), option = 'B')+
 52 |         geom_path(data=bord,aes(x=long, y=lat, group=group),color='grey20',size=.5)+
 53 |         guides(fill = guide_colorbar(barwidth = 1, barheight = 9))
 54 | 
 55 | 
 56 | 
 57 | 
 58 | 
 59 | 
 60 | ################################################################################
 61 | # change maps 4 periods - fixed color scales
 62 | 
 63 | df.g1 <- n2dec0342 %>%
 64 |         filter(as.numeric(factor(year))%in%1:10) %>%
 65 |         group_by(id) %>%
 66 |         select(-1,-9,-10,-11,-12) %>%
 67 |         summarise_each(funs(sum))
 68 | 
 69 | fort.g1 <- left_join(x=fort, y=df.g1, by = 'id')
 70 | 
 71 | g1 <- basemap +
 72 |         geom_polygon(data = gghole(fort.g1)[[1]], aes_string(x='long', y='lat', group='group', fill='g'),
 73 |                      color='grey30',size=.1)+
 74 |         geom_polygon(data = gghole(fort.g1)[[2]], aes_string(x='long', y='lat', group='group', fill='g'),
 75 |                      color='grey30',size=.1)+
 76 |         scale_fill_viridis('TSR change\n2003-2012',limits=c(-.7238,.3275))+
 77 |         geom_path(data=bord,aes(x=long, y=lat, group=group),color='grey20',size=.5)+
 78 |         guides(fill = guide_colorbar(barwidth = 1, barheight = 9))
 79 | 
 80 | 
 81 | df.g2 <- n2dec0342 %>%
 82 |         filter(as.numeric(factor(year))%in%11:20) %>%
 83 |         group_by(id) %>%
 84 |         select(-1,-9,-10,-11,-12) %>%
 85 |         summarise_each(funs(sum))
 86 | 
 87 | fort.g2 <- left_join(x=fort, y=df.g2, by = 'id')
 88 | 
 89 | g2 <- basemap +
 90 |         geom_polygon(data = gghole(fort.g2)[[1]], aes_string(x='long', y='lat', group='group', fill='g'),
 91 |                      color='grey30',size=.1)+
 92 |         geom_polygon(data = gghole(fort.g2)[[2]], aes_string(x='long', y='lat', group='group', fill='g'),
 93 |                      color='grey30',size=.1)+
 94 |         scale_fill_viridis('TSR change\n2013-2022',limits=c(-.7238,.3275))+
 95 |         geom_path(data=bord,aes(x=long, y=lat, group=group),color='grey20',size=.5)+
 96 |         guides(fill = guide_colorbar(barwidth = 1, barheight = 9))
 97 | 
 98 | 
 99 | df.g3 <- n2dec0342 %>%
100 |         filter(as.numeric(factor(year))%in%21:30) %>%
101 |         group_by(id) %>%
102 |         select(-1,-9,-10,-11,-12) %>%
103 |         summarise_each(funs(sum))
104 | 
105 | fort.g3 <- left_join(x=fort, y=df.g3, by = 'id')
106 | 
107 | g3 <- basemap +
108 |         geom_polygon(data = gghole(fort.g3)[[1]], aes_string(x='long', y='lat', group='group', fill='g'),
109 |                      color='grey30',size=.1)+
110 |         geom_polygon(data = gghole(fort.g3)[[2]], aes_string(x='long', y='lat', group='group', fill='g'),
111 |                      color='grey30',size=.1)+
112 |         scale_fill_viridis('TSR change\n2023-2032',limits=c(-.7238,.3275))+
113 |         geom_path(data=bord,aes(x=long, y=lat, group=group),color='grey20',size=.5)+
114 |         guides(fill = guide_colorbar(barwidth = 1, barheight = 9))
115 | 
116 | 
117 | df.g4 <- n2dec0342 %>%
118 |         filter(as.numeric(factor(year))%in%31:40) %>%
119 |         group_by(id) %>%
120 |         select(-1,-9,-10,-11,-12) %>%
121 |         summarise_each(funs(sum))
122 | 
123 | fort.g4 <- left_join(x=fort, y=df.g4, by = 'id')
124 | 
125 | g4 <- basemap +
126 |         geom_polygon(data = gghole(fort.g4)[[1]], aes_string(x='long', y='lat', group='group', fill='g'),
127 |                      color='grey30',size=.1)+
128 |         geom_polygon(data = gghole(fort.g4)[[2]], aes_string(x='long', y='lat', group='group', fill='g'),
129 |                      color='grey30',size=.1)+
130 |         scale_fill_viridis('TSR change\n2033-2042',limits=c(-.7238,.3275))+
131 |         geom_path(data=bord,aes(x=long, y=lat, group=group),color='grey20',size=.5)+
132 |         guides(fill = guide_colorbar(barwidth = 1, barheight = 9))
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 
139 | ################################################################################
140 | # align all 6 maps with arrows
141 | 
142 | require(gridExtra)
143 | 
144 | list.plots <- list(tsr03,g1,g2,g3,g4,tsr43)
145 | 
146 | 
147 | gg <- ggplot()+
148 |         coord_equal(xlim = c(0, 21), ylim = c(0, 30), expand = c(0,0))+
149 |         
150 |         annotation_custom(ggplotGrob(list.plots[[1]]),
151 |                           xmin = 0,xmax = 10,ymin = 20,ymax = 30)+
152 |         
153 |         annotation_custom(ggplotGrob(list.plots[[2]]),
154 |                           xmin = 12,xmax = 21,ymin = 20,ymax = 29)+
155 |         annotation_custom(ggplotGrob(list.plots[[3]]),
156 |                           xmin = 12,xmax = 21,ymin = 10.5,ymax = 19.5)+
157 |         
158 |         annotation_custom(ggplotGrob(list.plots[[4]]),
159 |                           xmin = 0,xmax = 9,ymin = 10.5,ymax = 19.5)+
160 |         annotation_custom(ggplotGrob(list.plots[[5]]),
161 |                           xmin = 0,xmax = 9,ymin = 1,ymax = 10)+
162 |         annotation_custom(ggplotGrob(list.plots[[6]]),
163 |                           xmin = 11,xmax = 21,ymin = 0,ymax = 10)+
164 |         
165 |         labs(x = NULL, y = NULL)+
166 |         theme_void()
167 | 
168 | 
169 | # DF with the coordinates of the 5 arrows
170 | df.arrows <- data.frame(id=1:5,
171 |                         x=c(10,12,12,9,9),
172 |                         y=c(25,23,15,13,5),
173 |                         xend=c(12,12,9,9,11),
174 |                         yend=c(25,17,15,7,5))
175 | 
176 | # add arrows
177 | gg <- gg + geom_curve(data = df.arrows %>% filter(id==1),
178 |                       aes(x=x,y=y,xend=xend,yend=yend),
179 |                       curvature = 0, 
180 |                       arrow = arrow(type="closed",length = unit(0.25,"cm"))) +
181 |         geom_curve(data = df.arrows %>% filter(id==2),
182 |                    aes(x=x,y=y,xend=xend,yend=yend),
183 |                    curvature = 0.3, 
184 |                    arrow = arrow(type="closed",length = unit(0.25,"cm"))) +
185 |         geom_curve(data = df.arrows %>% filter(id==3),
186 |                    aes(x=x,y=y,xend=xend,yend=yend),
187 |                    curvature = 0, 
188 |                    arrow = arrow(type="closed",length = unit(0.25,"cm"))) +
189 |         geom_curve(data = df.arrows %>% filter(id==4),
190 |                    aes(x=x,y=y,xend=xend,yend=yend),
191 |                    curvature = -0.3, 
192 |                    arrow = arrow(type="closed",length = unit(0.25,"cm"))) +
193 |         geom_curve(data = df.arrows %>% filter(id==5),
194 |                    aes(x=x,y=y,xend=xend,yend=yend),
195 |                    curvature = 0, 
196 |                    arrow = arrow(type="closed",length = unit(0.25,"cm"))) 
197 | 
198 | # add labes
199 | gg <- gg + annotate('text',label = LETTERS[1:6],
200 |                     x=c(0,12,12,0,0,11)+.5,
201 |                     y=c(30.5,29.5,20,20,10.5,10.5)-1,
202 |                     size=10,hjust=0, vjust=1, family = myfont)
203 | 
204 | 
205 | ggsave('_output/fig1_maps_TSR_change_4decades.png',gg,width = 12,height = 18,dpi = 192)
206 | 
207 | 
208 | 
209 | ###
210 | # Report finish of the script execution
211 | print('Done: script 3.01')
212 | 


--------------------------------------------------------------------------------
/R_scripts/3_analysis/3.02_fig2_TSR_subregions_0342.R:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | #                                                                                                        
  3 | # Genus 2017. 2017-05-26
  4 | # Figure 2. Plot TSR dynamics. OBS + PROJ for subregions
  5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
  6 | #                                                                                                    
  7 | ################################################################################
  8 | 
  9 | # Erase all objects in memory
 10 | rm(list = ls(all = TRUE))
 11 | 
 12 | myfont <- "Roboto Condensed"
 13 | 
 14 | # load own functions
 15 | load('data0_supplementary/own_functions.RData')
 16 | 
 17 | # load data
 18 | load('data3_calculated/n2dec0342.RData')
 19 | 
 20 | years <- paste0('y',2003:2043)
 21 | fullyears <- paste0('y',2003:2042)
 22 | 
 23 | year.labels <- 2003:2042
 24 | year.labels[-c(seq(1,36,5),40)] <- ''
 25 | year.ticks <- (as.numeric(nchar(year.labels))+4)/8
 26 | 
 27 | 
 28 | brbg3 <- brewer.pal(11, "BrBG")[c(8,2,11)]
 29 | 
 30 | 
 31 | ################################################################################
 32 | 
 33 | df.sub <- n2dec0342 %>%
 34 |         select(1,3:9,12) %>%
 35 |         group_by(year,subregion) %>%
 36 |         summarise_each(funs(mean)) %>%
 37 |         ungroup()
 38 | 
 39 | df.mean <- n2dec0342 %>%
 40 |         select(1,3:9) %>%
 41 |         group_by(year) %>%
 42 |         summarise_each(funs(mean)) %>%
 43 |         ungroup()
 44 | 
 45 | gg.tsr.dist <- ggplot()+
 46 |         
 47 |         annotate('rect',xmin=-Inf,xmax=11,ymin=-Inf,ymax=Inf,fill='grey95')+
 48 |         
 49 |         geom_jitter(data=n2dec0342, 
 50 |                     aes(x=year,y=tsr,color=subregion),
 51 |                     size=2,alpha=.75,width = .5) +
 52 |         
 53 |         geom_line(data=df.sub,
 54 |                   aes(x=year,y=tsr,group=subregion), 
 55 |                   size=3, color='white',lineend='round') +
 56 |         geom_line(data=df.sub,
 57 |                   aes(x=year,y=tsr,group=subregion), 
 58 |                   size=2, color='black',lineend='round') +
 59 |         geom_line(data=df.sub,
 60 |                   aes(x=year,y=tsr,group=subregion,color=subregion), 
 61 |                   size=1.8,lineend='round') +
 62 |         
 63 |         geom_line(data=df.mean,
 64 |                   aes(x=year,y=tsr,group=1), 
 65 |                   size=3, color='white',lineend='round') +
 66 |         geom_line(data=df.mean,
 67 |                   aes(x=year,y=tsr,group=1), 
 68 |                   size=2, color='black',lineend='round') +
 69 |         geom_point(data=df.mean,
 70 |                    aes(x=year,y=tsr),
 71 |                    color='black',size=1.8) +
 72 |         
 73 |         scale_color_manual(values = rev(brbg3))+
 74 |         
 75 |         xlab(NULL)+
 76 |         scale_x_discrete(labels=year.labels)+
 77 |         scale_y_continuous(breaks = seq(1,2.75,.25))+
 78 |         ylab('Total support ratio')+
 79 |         theme_few(base_size = 20, base_family = myfont)+
 80 |         theme(legend.position='none',
 81 |               axis.text.x = element_text(angle = 0, vjust = 0.5),
 82 |               axis.ticks = element_line(size = year.ticks))+
 83 |         
 84 |         geom_text(data = data_frame(x= c(19.5,26.5,21.5), 
 85 |                                     y=c(2.4,2.25,1.05), 
 86 |                                     label = c('Eastern Europe','Southern Europe','Western Europe')), 
 87 |                   aes(x=x,y=y,label=label),
 88 |                   color = rev(brbg3),family = myfont,
 89 |                   size=8, hjust=0.5, vjust=0) +
 90 |         
 91 |         annotate('text',x=10,y=2.9,label='London',family = myfont,
 92 |                  color=brbg3[1],size=6,hjust=0, vjust=1,fontface=3)+
 93 |         annotate('text',x=32,y=.87,label='Eastern Germany',family = myfont,
 94 |                  color=brbg3[1],size=6,hjust=0, vjust=1,fontface=3)+
 95 |         
 96 |         annotate('text',x=1,y=1.5,label='Mean EU',color='black',size=8,hjust=0, vjust=1,family = myfont)+
 97 |         geom_curve(data = data.frame(x = 3.5, y = 1.5, xend = 3.5, yend = 2.03),
 98 |                    aes(x=x,y=y,xend=xend,yend=yend),
 99 |                    color='black', size=.5, curvature = 0,
100 |                    arrow = arrow(type="closed",length = unit(0.25,"cm")))+
101 |         geom_curve(data = data.frame(x=c(19.5,26.5,21.5),
102 |                                      xend=c(19.5,26.5,21.5),
103 |                                      y=c(2.38,2.23,1.1), 
104 |                                      yend=c(1.85,1.65,1.63)),
105 |                    aes(x=x,y=y,xend=xend,yend=yend),
106 |                    color='black', size=.5, curvature = 0,
107 |                    arrow = arrow(type="closed",length = unit(0.25,"cm")))
108 |         
109 | 
110 | ggsave('_output/fig2_TSR_0342_jitter+lines.png',gg.tsr.dist,width = 12,height = 12,dpi=192)
111 | 
112 | 
113 | 
114 | ###
115 | # Report finish of the script execution
116 | print('Done: script 3.02')


--------------------------------------------------------------------------------
/R_scripts/3_analysis/3.03_fig3+A1+A2+A3+A4_maps_decomposition.R:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | #                                                                                                        
  3 | # Genus 2017. 2017-05-26
  4 | # Figure 3. Map decomposition for the whole period 2003-2043
  5 | # Also: figures A1, A2, A3, A4 (for each of the 4 decades)
  6 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
  7 | #                                                                                                    
  8 | ################################################################################
  9 | 
 10 | # Erase all objects in memory
 11 | rm(list = ls(all = TRUE))
 12 | 
 13 | myfont <- "Roboto Condensed"
 14 | 
 15 | # load own functions
 16 | load('data0_supplementary/own_functions.RData')
 17 | 
 18 | # load data
 19 | load('geo_data/shp.n2eu28.all.RData')
 20 | load('data3_calculated/n2dec0342.RData')
 21 | 
 22 | basemap <- ik_map_eu.base(family = myfont)
 23 | 
 24 | bord <- fortify(Sborders)
 25 | 
 26 | variables <- c('g','nw','w','ct','mg','mt')
 27 | 
 28 | labels.decomposed <- paste(paste0(LETTERS[1:6],'.'),
 29 |                            c('Change in TSR',
 30 |                              'Non-working age',
 31 |                              'Working age',
 32 |                              'Cohort turnover',
 33 |                              'Migration (15-64)',
 34 |                              'Mortality (15-64)'))
 35 | 
 36 | 
 37 | dfm0312 <- n2dec0342 %>%
 38 |         filter(as.numeric(factor(year))%in%1:10) %>%
 39 |         group_by(id) %>%
 40 |         select(-1,-9,-10,-11,-12) %>%
 41 |         summarise_each(funs(sum))
 42 | 
 43 | fort.0312 <- fortify(Sn2, region = 'id') 
 44 | fort.0312 <- left_join(x=fort.0312, y=dfm0312, by = 'id')
 45 | 
 46 | maps0312 <- list()
 47 | 
 48 | for (i in 1:length(variables)){
 49 |         vari <- variables[i]
 50 |         mapi <- basemap +
 51 |                 geom_polygon(data = gghole(fort.0312)[[1]], aes_string(x='long', y='lat', group='group', fill=vari),
 52 |                              color='grey30',size=.2)+
 53 |                 geom_polygon(data = gghole(fort.0312)[[2]], aes_string(x='long', y='lat', group='group', fill=vari),
 54 |                              color='grey30',size=.2)+
 55 |                 scale_fill_viridis(vari)+
 56 |                 geom_path(data=bord,aes(x=long, y=lat, group=group),color='grey20',size=.5)+
 57 |                 guides(fill = guide_colorbar(barwidth = 1, barheight = 9))
 58 |         
 59 |         maps0312[[i]] <- mapi
 60 | }
 61 | 
 62 | maps0312[[1]]
 63 | 
 64 | map.0312 <- ik_phd_gg_align.6.plots(maps0312, labels = labels.decomposed, family = myfont) +
 65 |         annotate('text',x=20.5,y=29.1,label='2003 - 2012',size=12,fontface=2,hjust=1, vjust=0, family = myfont)
 66 | 
 67 | ggsave('_output/figA1_map_decomposed_0312.png',map.0312,width=12,height=18,dpi=192)
 68 | 
 69 | 
 70 | 
 71 | ################################################################################
 72 | # the same 6 maps for 2013 - 2022
 73 | 
 74 | dfm1322 <- n2dec0342 %>%
 75 |         filter(as.numeric(factor(year))%in%11:20) %>%
 76 |         group_by(id) %>%
 77 |         select(-1,-9,-10,-11,-12) %>%
 78 |         summarise_each(funs(sum))
 79 | 
 80 | fort.1322 <- fortify(Sn2, region = 'id') 
 81 | fort.1322 <- left_join(x=fort.1322, y=dfm1322, by = 'id')
 82 | 
 83 | maps1322 <- list()
 84 | 
 85 | for (i in 1:length(variables)){
 86 |         vari <- variables[i]
 87 |         mapi <- basemap +
 88 |                 geom_polygon(data = gghole(fort.1322)[[1]], aes_string(x='long', y='lat', group='group', fill=vari),
 89 |                              color='grey30',size=.2)+
 90 |                 geom_polygon(data = gghole(fort.1322)[[2]], aes_string(x='long', y='lat', group='group', fill=vari),
 91 |                              color='grey30',size=.2)+
 92 |                 scale_fill_viridis(vari)+
 93 |                 geom_path(data=bord,aes(x=long, y=lat, group=group),color='grey20',size=.5)+
 94 |                 guides(fill = guide_colorbar(barwidth = 1, barheight = 9))
 95 |         
 96 |         maps1322[[i]] <- mapi
 97 | }
 98 | 
 99 | 
100 | map.1322 <- ik_phd_gg_align.6.plots(maps1322, labels = labels.decomposed, family = myfont)+
101 |         annotate('text',x=20.5,y=29.1,label='2013 - 2022',size=12,fontface=2,hjust=1, vjust=0, family = myfont)
102 | 
103 | ggsave('_output/figA2_map_decomposed_1322.png',map.1322,width=12,height=18,dpi=192)
104 | 
105 | 
106 | 
107 | ################################################################################
108 | # the same 6 maps for 2023 - 2032
109 | 
110 | dfm2332 <- n2dec0342 %>%
111 |         filter(as.numeric(factor(year))%in%21:30) %>%
112 |         group_by(id) %>%
113 |         select(-1,-9,-10,-11,-12) %>%
114 |         summarise_each(funs(sum))
115 | 
116 | fort.2332 <- fortify(Sn2, region = 'id') 
117 | fort.2332 <- left_join(x=fort.2332, y=dfm2332, by = 'id')
118 | 
119 | maps2332 <- list()
120 | 
121 | for (i in 1:length(variables)){
122 |         vari <- variables[i]
123 |         mapi <- basemap +
124 |                 geom_polygon(data = gghole(fort.2332)[[1]], aes_string(x='long', y='lat', group='group', fill=vari),
125 |                              color='grey30',size=.2)+
126 |                 geom_polygon(data = gghole(fort.2332)[[2]], aes_string(x='long', y='lat', group='group', fill=vari),
127 |                              color='grey30',size=.2)+
128 |                 scale_fill_viridis(vari)+
129 |                 geom_path(data=bord,aes(x=long, y=lat, group=group),color='grey20',size=.5)+
130 |                 guides(fill = guide_colorbar(barwidth = 1, barheight = 9))
131 |         
132 |         maps2332[[i]] <- mapi
133 | }
134 | 
135 | 
136 | map.2332 <- ik_phd_gg_align.6.plots(maps2332, labels = labels.decomposed, family = myfont)+
137 |         annotate('text',x=20.5,y=29.1,label='2023 - 2032',size=12,fontface=2,hjust=1, vjust=0, family = myfont)
138 | 
139 | ggsave('_output/figA3_map_decomposed_2332.png',map.2332,width=12,height=18,dpi=192)
140 | 
141 | 
142 | 
143 | 
144 | 
145 | ################################################################################
146 | # the same 6 maps for 2033 - 2042
147 | 
148 | dfm3342 <- n2dec0342 %>%
149 |         filter(as.numeric(factor(year))%in%31:40) %>%
150 |         group_by(id) %>%
151 |         select(-1,-9,-10,-11,-12) %>%
152 |         summarise_each(funs(sum))
153 | 
154 | fort.3342 <- fortify(Sn2, region = 'id') 
155 | fort.3342 <- left_join(x=fort.3342, y=dfm3342, by = 'id')
156 | 
157 | maps3342 <- list()
158 | 
159 | for (i in 1:length(variables)){
160 |         vari <- variables[i]
161 |         mapi <- basemap +
162 |                 geom_polygon(data = gghole(fort.3342)[[1]], aes_string(x='long', y='lat', group='group', fill=vari),
163 |                              color='grey30',size=.2)+
164 |                 geom_polygon(data = gghole(fort.3342)[[2]], aes_string(x='long', y='lat', group='group', fill=vari),
165 |                              color='grey30',size=.2)+
166 |                 scale_fill_viridis(vari)+
167 |                 geom_path(data=bord,aes(x=long, y=lat, group=group),color='grey20',size=.5)+
168 |                 guides(fill = guide_colorbar(barwidth = 1, barheight = 9))
169 |         
170 |         maps3342[[i]] <- mapi
171 | }
172 | 
173 | 
174 | map.3342 <- ik_phd_gg_align.6.plots(maps3342, labels = labels.decomposed, family = myfont)+
175 |         annotate('text',x=20.5,y=29.1,label='2033 - 2042',size=12,fontface=2,hjust=1, vjust=0, family = myfont)
176 | 
177 | ggsave('_output/figA4_map_decomposed_3342.png',map.3342,width=12,height=18,dpi=192)
178 | 
179 | 
180 | 
181 | 
182 | 
183 | 
184 | 
185 | ################################################################################
186 | # THE WHOLE PERIOD. for 2003 - 2042
187 | 
188 | dfm0342 <- n2dec0342 %>%
189 |         #filter(as.numeric(factor(year))%in%1:40) %>%
190 |         group_by(id) %>%
191 |         select(-1,-9,-10,-11,-12) %>%
192 |         summarise_each(funs(sum))
193 | 
194 | fort.0342 <- fortify(Sn2, region = 'id') 
195 | fort.0342 <- left_join(x=fort.0342, y=dfm0342, by = 'id')
196 | 
197 | maps0342 <- list()
198 | 
199 | for (i in 1:length(variables)){
200 |         vari <- variables[i]
201 |         mapi <- basemap +
202 |                 geom_polygon(data = gghole(fort.0342)[[1]], aes_string(x='long', y='lat', group='group', fill=vari),
203 |                              color='grey30',size=.2)+
204 |                 geom_polygon(data = gghole(fort.0342)[[2]], aes_string(x='long', y='lat', group='group', fill=vari),
205 |                              color='grey30',size=.2)+
206 |                 scale_fill_viridis(vari)+
207 |                 geom_path(data=bord,aes(x=long, y=lat, group=group),color='grey20',size=.5)+
208 |                 guides(fill = guide_colorbar(barwidth = 1, barheight = 9))
209 |         
210 |         maps0342[[i]] <- mapi
211 | }
212 | 
213 | maps0342[[1]]
214 | 
215 | 
216 | map.0342 <- ik_phd_gg_align.6.plots(maps0342, labels = labels.decomposed, family = myfont) +
217 |         annotate('text',x=20.5,y=29.1,label='2003 - 2042',size=12,fontface=2,hjust=1, vjust=0, family = myfont)
218 | 
219 | ggsave('_output/fig3_map_decomposed_0342.png',map.0342,width=12,height=18,dpi=192)
220 | 
221 | 
222 | 
223 | ###
224 | # Report finish of the script execution
225 | print('Done: script 3.03')
226 | 


--------------------------------------------------------------------------------
/R_scripts/3_analysis/3.04_fig4_decomposed_descriptive.R:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | #                                                                                                        
  3 | # Genus 2017. 2017-05-26
  4 | # Figure 4. Descriptive analysis of the decomposed TSR
  5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
  6 | #                                                                                                    
  7 | ################################################################################
  8 | 
  9 | # Erase all objects in memory
 10 | rm(list = ls(all = TRUE))
 11 | 
 12 | myfont <- "Roboto Condensed"
 13 | brbg3 <- brewer.pal(11, "BrBG")[c(8,2,11)]
 14 | 
 15 | # load own functions
 16 | load('data0_supplementary/own_functions.RData')
 17 | 
 18 | # load data
 19 | load('data3_calculated/n2dec0342.RData')
 20 | 
 21 | 
 22 | 
 23 | plots <- list()
 24 | 
 25 | vars <- c('g','nw','w','ct','mg','mt')
 26 | 
 27 | year.labels <- paste(2003:2042) 
 28 | year.labels[-c(1,11,21,31,40)] <- ''
 29 | 
 30 | df.sub <- n2dec0342 %>%
 31 |         select(1,3:9,12) %>%
 32 |         group_by(year,subregion) %>%
 33 |         summarise_each(funs(mean)) %>%
 34 |         ungroup()
 35 | 
 36 | df.mean <- n2dec0342 %>%
 37 |         select(1,3:9) %>%
 38 |         group_by(year) %>%
 39 |         summarise_each(funs(mean)) %>%
 40 |         ungroup()
 41 | 
 42 | # old colors
 43 | # viridis(100)[c(10,60,95)]
 44 | 
 45 | for (i in 1:length(vars)){
 46 |         ylimi <- quantile(unlist(n2dec0342[,vars[i]]), probs=c(.001,.999))
 47 |         if(ylimi[2]<0){ylimi[2] <- 0}
 48 |         labi <- seq(round(ylimi[1],2),ylimi[2],.01)
 49 |         
 50 |         ggi <- ggplot()+
 51 |                 geom_vline(xintercept = 1:40,colour = 'grey80',size=.5)+
 52 |                 
 53 |                 geom_jitter(data = n2dec0342,aes_string(x='year',y=vars[i],color='subregion'),
 54 |                             alpha=1,size=.1,width = .5)+
 55 |                 
 56 |                 geom_line(data = df.sub, aes_string(x='year',y=vars[i],color='subregion',group='subregion'),
 57 |                           size=3,color='white')+
 58 |                 geom_line(data = df.sub, aes_string(x='year',y=vars[i],color='subregion',group='subregion'),
 59 |                           size=1.8,color='black')+
 60 |                 geom_line(data = df.sub, aes_string(x='year',y=vars[i],color='subregion',group='subregion'),
 61 |                           size=1.5,lineend='round')+
 62 |                 
 63 |                 geom_line(data = df.mean, aes_string(x='year',y=vars[i],group=1),
 64 |                           size=3,color='white')+
 65 |                 geom_line(data = df.mean, aes_string(x='year',y=vars[i],group=1.2),
 66 |                           size=1.5,color='black')+
 67 |                 #geom_point(data = df.mean, aes_string(x='year',y=vars[i]),size=2.5,color='white')+
 68 |                 geom_point(data = df.mean, aes_string(x='year',y=vars[i]),size=2.2,color='black')+
 69 |                 
 70 |                 geom_hline(yintercept=0,color='black')+
 71 |                 
 72 |                 
 73 |                 coord_cartesian(ylim = ylimi)+
 74 |                 scale_color_manual(values = rev(brbg3))+
 75 |                 scale_y_continuous('',breaks = labi,labels = labi)+
 76 |                 scale_x_discrete('',labels= year.labels)+
 77 |                 theme_few(base_size = 15, base_family = myfont)+
 78 |                 theme(axis.text.x = element_text(angle = 0, vjust = 0.5),
 79 |                       legend.position='none')
 80 |         
 81 |         
 82 |         
 83 |         plots[[i]] <- ggi
 84 | }
 85 | 
 86 | 
 87 | # align 6 plots in a fancy way
 88 | labels.decomposed <- paste(paste0(LETTERS[1:6],'.'),
 89 |                            c('Change in TSR',
 90 |                              'Non-working age',
 91 |                              'Working age',
 92 |                              'Cohort turnover',
 93 |                              'Migration (15-64)',
 94 |                              'Mortality (15-64)'))
 95 | 
 96 | 
 97 | gg.six <- ik_phd_gg_align.6.plots(plots, labels = labels.decomposed, family = myfont) +
 98 |         annotate('rect',xmin = 9,xmax = 12.3,ymin = 24.5,ymax = 29.5,color='black',size=.5,fill=NA)+
 99 |         annotate('point',x=9.5,y=seq(28.5,25.5,length.out = 3),size=10,color=rev(brbg3))+
100 |         annotate('text',x=10,y=seq(28.5,25.5,length.out = 3),hjust=0,vjust=0.5,size=7,color='black',
101 |                  label=c('Eastern\nEurope','Southern\nEurope','Western\nEurope'), family = myfont)
102 | 
103 | ggsave('_output/fig4_decomposed_descriptive.png',gg.six,width = 12,height = 12*1.5,dpi=192)
104 | 
105 | 
106 | 
107 | ###
108 | # Report finish of the script execution
109 | print('Done: script 3.04')
110 |         


--------------------------------------------------------------------------------
/R_scripts/3_analysis/3.05_fig5_model_estimates_0342.R:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | #                                                                                                        
  3 | # Genus 2017. 2017-05-26
  4 | # Figure 5. Plot decomposed bettas, 2003-2042, dependent variable- TSR 2003
  5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
  6 | #                                                                                                    
  7 | ################################################################################
  8 | 
  9 | # Erase all objects in memory
 10 | rm(list = ls(all = TRUE))
 11 | 
 12 | myfont <- "Roboto Condensed"
 13 | 
 14 | # load own functions
 15 | load('data0_supplementary/own_functions.RData')
 16 | 
 17 | # load data
 18 | load('data3_calculated/n2dec0342.RData')
 19 | 
 20 | 
 21 | years <- paste0('y',2003:2043)
 22 | fullyears <- paste0('y',2003:2042)
 23 | 
 24 | year.labels <- 2003:2042
 25 | year.labels[-c(seq(1,36,5),40)] <- ''
 26 | year.ticks <- (as.numeric(nchar(year.labels))+4)/8
 27 | 
 28 | est0342 <- ik_phd_estimate.conv.models.dec(n2dec0342, years=2003:2043)
 29 | 
 30 | est0342 <- est0342 %>% mutate(size=ifelse(pValue<.05,1,.1))
 31 | 
 32 | 
 33 | g0a <- ggplot(est0342,aes(x=year,y=coef))+
 34 |         
 35 |         annotate('rect',xmin=-Inf,xmax=11,ymin=-Inf,ymax=Inf,fill='grey95')+
 36 |         geom_hline(yintercept=0)+
 37 |         
 38 |         geom_line(data=filter(est0342,as.numeric(model_y.x)%in%c(2)),
 39 |                   aes(group=model_y.x,color=model_y.x),stat='identity',size=1.5)+
 40 |         geom_point(data=filter(est0342,as.numeric(model_y.x)%in%c(2)),
 41 |                    aes(group=model_y.x,color=model_y.x),size=3,shape=21,fill='white')+
 42 |         
 43 |         geom_line(data=filter(est0342,as.numeric(model_y.x)%in%c(3)),
 44 |                   aes(group=model_y.x,color=model_y.x),stat='identity',size=1.5)+
 45 |         geom_point(data=filter(est0342,as.numeric(model_y.x)%in%c(3)),
 46 |                    aes(group=model_y.x,color=model_y.x),size=3,shape=16)+
 47 |         
 48 |         geom_line(data=filter(est0342,as.numeric(model_y.x)%in%c(1)),
 49 |                   aes(group=model_y.x,color=model_y.x),stat='identity',size=2)+
 50 |         geom_point(data=filter(est0342,as.numeric(model_y.x)%in%c(1)),
 51 |                    aes(group=model_y.x,color=model_y.x),size=4)+
 52 |         
 53 |         scale_color_manual(values = viridis(6)[1:3]) +
 54 |         scale_x_discrete(labels= year.labels)+
 55 |         scale_y_continuous(limits=c(-.05,.03))+
 56 |         xlab(NULL)+
 57 |         ylab('Beta-coefficient')+
 58 |         theme_few(base_size = 20, base_family = myfont)+
 59 |         theme(legend.position='none',
 60 |               axis.text.x = element_text(angle = 0, vjust = 0.5),
 61 |               axis.ticks = element_line(size = year.ticks))+
 62 |         annotate('text',family = myfont,
 63 |                  x=c(25,6),
 64 |                  y=c(.017,.007),
 65 |                  label=c('Non-working age','Working age'),
 66 |                  color=viridis(6)[c(2:3)],
 67 |                  size=7, hjust=0, vjust=1)+
 68 |         annotate('text', family = myfont,
 69 |                  x=c(17),
 70 |                  y=c(-.041),
 71 |                  label=c('Overall model'),
 72 |                  color=viridis(6)[c(1)],
 73 |                  size=8, hjust=0, vjust=1)
 74 | 
 75 | 
 76 | 
 77 | g0b <- ggplot(est0342,aes(x=year,y=coef))+
 78 |         
 79 |         annotate('rect',xmin=-Inf,xmax=11,ymin=-Inf,ymax=Inf,fill='grey95')+
 80 |         geom_hline(yintercept=0)+
 81 |         
 82 |         geom_line(data=filter(est0342,as.numeric(model_y.x)%in%c(6)),
 83 |                   aes(group=model_y.x,color=model_y.x),stat='identity',size=1)+
 84 |         geom_point(data=filter(est0342,as.numeric(model_y.x)%in%c(6)),
 85 |                    aes(group=model_y.x,color=model_y.x),size=2,shape=16)+
 86 |         
 87 |         geom_line(data=filter(est0342,as.numeric(model_y.x)%in%c(4)),
 88 |                   aes(group=model_y.x,color=model_y.x),stat='identity',size=1)+
 89 |         geom_point(data=filter(est0342,as.numeric(model_y.x)%in%c(4)),
 90 |                    aes(group=model_y.x,color=model_y.x),size=2,shape=21,fill='white')+
 91 |         
 92 |         geom_line(data=filter(est0342,as.numeric(model_y.x)%in%c(5)),
 93 |                   aes(group=model_y.x,color=model_y.x),stat='identity',size=1)+
 94 |         geom_point(data=filter(est0342,as.numeric(model_y.x)%in%c(5)),
 95 |                    aes(group=model_y.x,color=model_y.x),size=2,shape=16)+
 96 |         
 97 |         geom_line(data=filter(est0342,as.numeric(model_y.x)%in%c(3)),
 98 |                   aes(group=model_y.x,color=model_y.x),stat='identity',size=1.5)+
 99 |         geom_point(data=filter(est0342,as.numeric(model_y.x)%in%c(3)),
100 |                    aes(group=model_y.x,color=model_y.x),size=3,shape=16)+
101 |         
102 |         scale_color_manual(values = viridis(6)[3:6]) +
103 |         scale_x_discrete(labels= year.labels)+
104 |         scale_y_continuous(limits=c(-.05,.03))+
105 |         xlab(NULL)+
106 |         ylab('Beta-coefficient')+
107 |         theme_few(base_size = 20, base_family = myfont)+
108 |         theme(legend.position='none',
109 |               axis.text.x = element_text(angle = 0, vjust = 0.5),
110 |               axis.ticks = element_line(size = year.ticks))+
111 |         annotate('text', family = myfont,
112 |                  x=c(1,14,25),
113 |                  y=c(.017,.008,-.006),
114 |                  label=c('Cohort turnover','Migration','Mortality'),
115 |                  color=viridis(6)[c(4:6)],
116 |                  size=6, hjust=0, vjust=1)+
117 |         annotate('text', family = myfont,
118 |                  x=c(15),
119 |                  y=c(-.02),
120 |                  label=c('Working age'),
121 |                  color=viridis(6)[c(3)],
122 |                  size=7, hjust=0, vjust=1)
123 | 
124 | 
125 | 
126 | ####################################################################################################
127 | # plot accumulated betas
128 | 
129 | 
130 | est0342cum <- est0342 %>%
131 |         group_by(model_y.x) %>%
132 |         mutate(coef.cum = cumsum(coef)) %>%
133 |         ungroup() %>%
134 |         arrange(model_y.x,year)
135 | 
136 | gcum.a <- ggplot(est0342cum,aes(x=year,y=coef.cum))+
137 |         
138 |         annotate('rect',xmin=-Inf,xmax=11,ymin=-Inf,ymax=Inf,fill='grey95')+
139 |         geom_hline(yintercept=0)+
140 |         
141 |         geom_line(data=filter(est0342cum,as.numeric(model_y.x)%in%c(2)),
142 |                   aes(group=model_y.x,color=model_y.x),stat='identity',size=1.5)+
143 |         geom_point(data=filter(est0342cum,as.numeric(model_y.x)%in%c(2)),
144 |                    aes(group=model_y.x,color=model_y.x),size=3,shape=21,fill='white')+
145 |         
146 |         geom_line(data=filter(est0342cum,as.numeric(model_y.x)%in%c(3)),
147 |                   aes(group=model_y.x,color=model_y.x),stat='identity',size=1.5)+
148 |         geom_point(data=filter(est0342cum,as.numeric(model_y.x)%in%c(3)),
149 |                    aes(group=model_y.x,color=model_y.x),size=3,shape=16)+
150 |         
151 |         geom_line(data=filter(est0342cum,as.numeric(model_y.x)%in%c(1)),
152 |                   aes(group=model_y.x,color=model_y.x),stat='identity',size=2)+
153 |         geom_point(data=filter(est0342cum,as.numeric(model_y.x)%in%c(1)),
154 |                    aes(group=model_y.x,color=model_y.x),size=4)+
155 |         
156 |         scale_color_manual(values = viridis(6)[1:3]) +
157 |         scale_x_discrete(labels= year.labels)+
158 |         coord_cartesian(ylim = c(-.75,.05))+
159 |         xlab(NULL)+
160 |         ylab('Cumulative beta-coefficient')+
161 |         theme_few(base_size = 20, base_family = myfont)+
162 |         theme(legend.position='none',
163 |               axis.text.x = element_text(angle = 0, vjust = 0.5),
164 |               axis.ticks = element_line(size = year.ticks))+
165 |         annotate('text',family = myfont,
166 |                  x=c(20,16),
167 |                  y=c(-.34,-.1),
168 |                  label=c('Non-working age','Working age'),
169 |                  color=viridis(6)[c(2:3)],
170 |                  size=7, hjust=0, vjust=1)+
171 |         annotate('text',family = myfont,
172 |                  x=c(23),
173 |                  y=c(-.55),
174 |                  label=c('Overall model'),
175 |                  color=viridis(6)[c(1)],
176 |                  size=8, hjust=0, vjust=1)
177 | 
178 | 
179 | 
180 | 
181 | gcum.b <- ggplot(est0342cum,aes(x=year,y=coef.cum))+
182 |         
183 |         annotate('rect',xmin=-Inf,xmax=11,ymin=-Inf,ymax=Inf,fill='grey95')+
184 |         geom_hline(yintercept=0)+
185 |         
186 |         geom_line(data=filter(est0342cum,as.numeric(model_y.x)%in%c(4)),
187 |                   aes(group=model_y.x,color=model_y.x),stat='identity',size=1)+
188 |         geom_point(data=filter(est0342cum,as.numeric(model_y.x)%in%c(4)),
189 |                    aes(group=model_y.x,color=model_y.x),size=2,shape=21,fill='white')+
190 |         
191 |         geom_line(data=filter(est0342cum,as.numeric(model_y.x)%in%c(5)),
192 |                   aes(group=model_y.x,color=model_y.x),stat='identity',size=1)+
193 |         geom_point(data=filter(est0342cum,as.numeric(model_y.x)%in%c(5)),
194 |                    aes(group=model_y.x,color=model_y.x),size=2,shape=16)+
195 |         
196 |         geom_line(data=filter(est0342cum,as.numeric(model_y.x)%in%c(6)),
197 |                   aes(group=model_y.x,color=model_y.x),stat='identity',size=1)+
198 |         geom_point(data=filter(est0342cum,as.numeric(model_y.x)%in%c(6)),
199 |                    aes(group=model_y.x,color=model_y.x),size=2,shape=16)+
200 |         
201 |         geom_line(data=filter(est0342cum,as.numeric(model_y.x)%in%c(3)),
202 |                   aes(group=model_y.x,color=model_y.x),stat='identity',size=1.5)+
203 |         geom_point(data=filter(est0342cum,as.numeric(model_y.x)%in%c(3)),
204 |                    aes(group=model_y.x,color=model_y.x),size=3,shape=16)+
205 |         
206 |         scale_color_manual(values = viridis(6)[3:6]) +
207 |         scale_x_discrete(labels= year.labels)+
208 |         coord_cartesian(ylim = c(-.75,.05))+
209 |         xlab(NULL)+
210 |         ylab('Cumulative beta-coefficient')+
211 |         theme_few(base_size = 20, base_family = myfont)+
212 |         theme(legend.position='none',
213 |               axis.text.x = element_text(angle = 0, vjust = 0.5),
214 |               axis.ticks = element_line(size = year.ticks))+
215 |         annotate('text',family = myfont,
216 |                  x=c(1,33,26),
217 |                  y=c(.07,.07,-.14),
218 |                  label=c('Cohort turnover','Migration','Mortality'),
219 |                  color=viridis(6)[c(4:6)],
220 |                  size=6, hjust=0, vjust=1)+
221 |         annotate('text',family = myfont,
222 |                  x=c(19),
223 |                  y=c(-.27),
224 |                  label=c('Working age'),
225 |                  color=viridis(6)[c(3)],
226 |                  size=7, hjust=0, vjust=1)
227 | 
228 | 
229 | ####################################################################################################
230 | # save both plots
231 | 
232 | gg <- cowplot::plot_grid(g0a,g0b,gcum.a,gcum.b,labels = LETTERS[1:4],ncol = 2,align = 'hv',label_size = 20, hjust = -1)
233 | 
234 | ggsave('_output/fig5_model_estimates_0342_norm+cum.png',gg,width = 15,height = 11,dpi=192)
235 | 
236 | 
237 | 
238 | ###
239 | # Report finish of the script execution
240 | print('Done: script 3.05')
241 | 


--------------------------------------------------------------------------------
/R_scripts/3_analysis/3.06_tabA1_summary_stat_by_country.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                                                                                                        
 3 | # Genus 2017. 2017-05-26
 4 | # Table A1. Summary table Appendix. Statistics by country
 5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
 6 | #                                                                                                    
 7 | ################################################################################
 8 | 
 9 | # Erase all objects in memory
10 | rm(list = ls(all = TRUE))
11 | 
12 | # load own functions
13 | load('data0_supplementary/own_functions.RData')
14 | 
15 | # load data
16 | load('data3_calculated/n2dec0342.RData')
17 | load('data0_supplementary/subregions.EuroVoc.countries.RData')
18 | load('data0_supplementary/idn2sub.Rdata')
19 | 
20 | tsr <- n2dec0342 %>% filter(year %in% paste0('y',c(2003,2013,2023,2033,2043))) %>%
21 |         group_by(year,country) %>%
22 |         summarise(tsr=mean(tsr)) %>%
23 |         ungroup() %>%
24 |         spread(year,tsr)
25 | 
26 | 
27 | dec <- n2dec0342 %>% select(-(9:12)) %>%
28 |         mutate(decade=factor(ceiling(as.numeric(year)/10))) %>%
29 |         select(-1) %>%
30 |         group_by(decade,id) %>%
31 |         summarise_each(funs(sum)) %>%
32 |         ungroup() %>%
33 |         mutate(id=factor(substr(paste(id),1,2))) %>%
34 |         group_by(decade,id) %>%
35 |         summarise_each(funs(mean))%>%
36 |         ungroup()
37 | 
38 | dec0312 <- dec %>% filter(decade==1) %>% select(-1) %>% rename(country=id)
39 | 
40 | nreg <- idn2sub[,1:2] %>% group_by(country) %>% summarise(nreg=n())
41 | 
42 | 
43 | load('data2_prepared/n2p1.RData')
44 | 
45 | pop <- n2p1 %>% filter(sex=='b',year%in%c('y2003','y2013'),age=='total') %>% droplevels() %>%
46 |         mutate(country=factor(substr(paste(id),1,2))) %>%
47 |         group_by(year,country) %>% summarise(pop.cnt=sum(value),mean.pop=mean(value)) %>%
48 |         ungroup() 
49 | 
50 | pop.cnt <- pop %>% select(-mean.pop) %>% spread(year,pop.cnt)
51 | names(pop.cnt)[2:3] <- paste0(names(pop.cnt)[2:3],'pop.cnt')
52 | 
53 | pop.mean.pop <- pop %>% select(-pop.cnt) %>% spread(year,mean.pop)
54 | names(pop.mean.pop)[2:3] <- paste0(names(pop.mean.pop)[2:3],'mean.pop')
55 | 
56 | 
57 | 
58 | join <- suppressWarnings(left_join(tsr,dec0312,by='country'))
59 | join <- suppressWarnings(left_join(join,EU28.df,by='country'))
60 | join <- suppressWarnings(left_join(join,nreg,by='country'))
61 | join <- suppressWarnings(left_join(join,pop.cnt,by='country'))
62 | join <- suppressWarnings(left_join(join,pop.mean.pop,by='country'))
63 | 
64 | join <- join %>% arrange(subregion,country) %>%
65 |         select(-y2023,-y2033) %>%
66 |         select(subregion,country,nreg,y2003pop.cnt,y2003mean.pop,y2013pop.cnt,y2013mean.pop,
67 |                y2003,g,nw,w,ct,mg,mt,y2013)
68 | 
69 | write.csv(join,file = '_output/tabA1_summary_stat_by_country.csv',row.names = F)
70 | 
71 | 
72 | ###
73 | # Report finish of the script execution
74 | print('Done: script 3.06')
75 | 


--------------------------------------------------------------------------------
/R_scripts/3_analysis/3.07_figA5_pyramid_London.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                                                                                                        
 3 | # Genus 2017. 2017-05-26
 4 | # Figure A5. Appendix. Population pyramids. London
 5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
 6 | #                                                                                                    
 7 | ################################################################################
 8 | 
 9 | # Erase all objects in memory
10 | rm(list = ls(all = TRUE))
11 | 
12 | myfont <- "Roboto Condensed"
13 | 
14 | # load own functions
15 | load('data0_supplementary/own_functions.RData')
16 | 
17 | # load data
18 | load('data2_prepared/n2p1.RData')
19 | 
20 | 
21 | df.london <- filter(n2p1,id=='UKI1',age%in%c(paste0('a0',0:9),paste0('a',10:90)),sex!='b') %>% 
22 |         droplevels()
23 | 
24 | # simulate age 90
25 | a90 <- filter(df.london,age=='a89') %>%
26 |         mutate(age='a90',
27 |                value=.9*value)
28 | 
29 | df.london[is.null(df.london)] <- NA
30 | 
31 | df.london <- ik_dm_fill.missings(df.london,a90,by=c('year','id','sex','age'))
32 | 
33 | 
34 | 
35 | gg <- ik_gg_population.pyramid.compare(df = df.london,t1 = 2003,t2 = 2013, base_family = myfont)
36 | 
37 | ggsave('_output/figA5_pyramid_London.png',gg,width = 7, height = 7,dpi=192)
38 | 
39 | 
40 | ###
41 | # Report finish of the script execution
42 | print('Done: script 3.07')
43 | 


--------------------------------------------------------------------------------
/R_scripts/3_analysis/3.08_figA6_pyramid_Eastern_Europe.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                                                                                                        
 3 | # Genus 2017. 2017-05-26
 4 | # Figure A6. Appendix. Population pyramids. Eastern Europe
 5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
 6 | #                                                                                                    
 7 | ################################################################################
 8 | 
 9 | # Erase all objects in memory
10 | rm(list = ls(all = TRUE))
11 | 
12 | myfont <- "Roboto Condensed"
13 | 
14 | # load own functions
15 | load('data0_supplementary/own_functions.RData')
16 | 
17 | # load data
18 | load('data2_prepared/n2p1.RData')
19 | load('data2_prepared/n2p1proj.RData')
20 | 
21 | load('data0_supplementary/subregions.EuroVoc.countries.RData')
22 | 
23 | df.ee.obs <- filter(n2p1,substr(paste(id),1,2)%in%EUE,
24 |                     age%in%c(paste0('a0',0:9),paste0('a',10:84)),sex!='b',year!='y2013') %>% 
25 |         droplevels() %>%
26 |         group_by(year,sex,age) %>%
27 |         summarise(value=sum(value)) %>%
28 |         ungroup()
29 | 
30 | df.ee.proj <- filter(n2p1proj,substr(paste(id),1,2)%in%EUE,
31 |                      age%in%c(paste0('a0',0:9),paste0('a',10:84)),sex!='total') %>%
32 |         droplevels() %>%
33 |         group_by(year,sex,age) %>%
34 |         summarise(value=sum(value)) %>%
35 |         ungroup()
36 | 
37 | 
38 | df.ee <- bind_rows(df.ee.obs,df.ee.proj)
39 | df.ee$year <- factor(df.ee$year)
40 | 
41 | gg1 <- ik_gg_population.pyramid.compare(df = df.ee,t1 = 2003,t2 = 2013, base_family = myfont)
42 | gg2 <- ik_gg_population.pyramid.compare(df = df.ee,t1 = 2013,t2 = 2023, base_family = myfont)
43 | gg3 <- ik_gg_population.pyramid.compare(df = df.ee,t1 = 2023,t2 = 2033, base_family = myfont)
44 | gg4 <- ik_gg_population.pyramid.compare(df = df.ee,t1 = 2033,t2 = 2043, base_family = myfont)
45 | 
46 | 
47 | gg <- cowplot::plot_grid(gg1,gg2,gg3,gg4,ncol = 2,label_size = 20,
48 |                          labels=c('A. 2003 and 2013',
49 |                                   'B. 2013 and 2023',
50 |                                   'C. 2023 and 2033',
51 |                                   'D. 2033 and 2043'))
52 | 
53 | 
54 | ggsave('_output/figA6_pyramid_EastEU.png',gg,width = 12, height = 14,dpi=192)
55 | 
56 | 
57 | 
58 | ###
59 | # Report finish of the script execution
60 | print('Done: script 3.08')
61 | 


--------------------------------------------------------------------------------
/R_scripts/master_script.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                                                                                                        
 3 | # Genus 2017. 2017-05-26
 4 | # MASTER SCRIPT
 5 | # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
 6 | #                                                                                                    
 7 | ################################################################################
 8 | 
 9 | # Erase all objects in memory
10 | rm(list = ls(all = TRUE))
11 | 
12 | 
13 | # This is a master script. 
14 | # Running just this one script, one may replicate all the results of the paper.
15 | # This script runs all other scripts in a sequential order performing all steps
16 | # of the data manipulation and analysis.
17 | 
18 | # After the execution of each script, the message of the form
19 | # > [1] "Done: script x.xx"
20 | # will arrive, indicating the overall progress
21 | 
22 | # The reference execution time of all scripts 
23 | # on a win7 intel core i5 machine with 4GB RAM is 10 minutes. 
24 | # Half of the time is consumed by instalation of the packages. 
25 | # That is only required during the first execution of the scripts.
26 | # The speed may vary considerably depending on your internet connection.
27 | # In total, the scripts download 34.8 MB of data + all the packages
28 | 
29 | 
30 | # STEP 1. Preparations
31 | source('R_scripts/1_preparation/1.01_install_required_packages.R') # very long!
32 | # now we need to re-open R session and run the 1.01 script again
33 | .rs.restartR()
34 | source('R_scripts/1_preparation/1.01_install_required_packages.R') # quick this time
35 | source('R_scripts/1_preparation/1.02_load_own_functions.R')
36 | source('R_scripts/1_preparation/1.03_prepare_ALL_supplementary.R')
37 | 
38 | # STEP 2. Data manipulations
39 | source('R_scripts/2_data_manipulation/2.01_download_geodata.R')
40 | source('R_scripts/2_data_manipulation/2.02_download&prepare_OBS_data.R')
41 | source('R_scripts/2_data_manipulation/2.03_download&prepare_PROJ_data.R')
42 | source('R_scripts/2_data_manipulation/2.04_missing_download&unzip.R')
43 | source('R_scripts/2_data_manipulation/2.05_missing_DE.R')
44 | source('R_scripts/2_data_manipulation/2.06_missing_DK.R')
45 | source('R_scripts/2_data_manipulation/2.07_missing_SI.R')
46 | source('R_scripts/2_data_manipulation/2.08_missing_RO_smooth.R')
47 | source('R_scripts/2_data_manipulation/2.09_missing_INSERT.R')
48 | source('R_scripts/2_data_manipulation/2.10_TSR_decomposition_OBS.R')
49 | source('R_scripts/2_data_manipulation/2.11_TSR_decomposition_PROJ.R')
50 | source('R_scripts/2_data_manipulation/2.12_TSR_decomposition_n2dec0342.R')
51 | source('R_scripts/2_data_manipulation/2.13_TSR_2043.R')
52 | 
53 | # STEP 3. Analysis
54 | source('R_scripts/3_analysis/3.01_fig1_maps_TSR_growth_4decades.R')
55 | source('R_scripts/3_analysis/3.02_fig2_TSR_subregions_0342.R')
56 | source('R_scripts/3_analysis/3.03_fig3+A1+A2+A3+A4_maps_decomposition.R')
57 | source('R_scripts/3_analysis/3.04_fig4_decomposed_descriptive.R')
58 | source('R_scripts/3_analysis/3.05_fig5_model_estimates_0342.R')
59 | source('R_scripts/3_analysis/3.06_tabA1_summary_stat_by_country.R')
60 | source('R_scripts/3_analysis/3.07_figA5_pyramid_London.R')
61 | source('R_scripts/3_analysis/3.08_figA6_pyramid_Eastern_Europe.R')
62 | 
63 | # find all the results in '_output/'


--------------------------------------------------------------------------------
/_output/placeholder.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikashnitsky/genus-2017/31be7ae9058c37e27df7af326527d6c1b197d01d/_output/placeholder.txt


--------------------------------------------------------------------------------
/data0_supplementary/EU_nuts/EU28.CSV.GZ:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikashnitsky/genus-2017/31be7ae9058c37e27df7af326527d6c1b197d01d/data0_supplementary/EU_nuts/EU28.CSV.GZ


--------------------------------------------------------------------------------
/data0_supplementary/EU_nuts/idn0.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikashnitsky/genus-2017/31be7ae9058c37e27df7af326527d6c1b197d01d/data0_supplementary/EU_nuts/idn0.csv.gz


--------------------------------------------------------------------------------
/data0_supplementary/EU_nuts/idn1.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikashnitsky/genus-2017/31be7ae9058c37e27df7af326527d6c1b197d01d/data0_supplementary/EU_nuts/idn1.csv.gz


--------------------------------------------------------------------------------
/data0_supplementary/EU_nuts/idn2.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikashnitsky/genus-2017/31be7ae9058c37e27df7af326527d6c1b197d01d/data0_supplementary/EU_nuts/idn2.csv.gz


--------------------------------------------------------------------------------
/data0_supplementary/EU_nuts/idn3.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikashnitsky/genus-2017/31be7ae9058c37e27df7af326527d6c1b197d01d/data0_supplementary/EU_nuts/idn3.csv.gz


--------------------------------------------------------------------------------
/data0_supplementary/Roboto_Condensed/LICENSE.txt:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/data0_supplementary/Roboto_Condensed/RobotoCondensed-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikashnitsky/genus-2017/31be7ae9058c37e27df7af326527d6c1b197d01d/data0_supplementary/Roboto_Condensed/RobotoCondensed-Bold.ttf


--------------------------------------------------------------------------------
/data0_supplementary/Roboto_Condensed/RobotoCondensed-BoldItalic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikashnitsky/genus-2017/31be7ae9058c37e27df7af326527d6c1b197d01d/data0_supplementary/Roboto_Condensed/RobotoCondensed-BoldItalic.ttf


--------------------------------------------------------------------------------
/data0_supplementary/Roboto_Condensed/RobotoCondensed-Italic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikashnitsky/genus-2017/31be7ae9058c37e27df7af326527d6c1b197d01d/data0_supplementary/Roboto_Condensed/RobotoCondensed-Italic.ttf


--------------------------------------------------------------------------------
/data0_supplementary/Roboto_Condensed/RobotoCondensed-Light.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikashnitsky/genus-2017/31be7ae9058c37e27df7af326527d6c1b197d01d/data0_supplementary/Roboto_Condensed/RobotoCondensed-Light.ttf


--------------------------------------------------------------------------------
/data0_supplementary/Roboto_Condensed/RobotoCondensed-LightItalic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikashnitsky/genus-2017/31be7ae9058c37e27df7af326527d6c1b197d01d/data0_supplementary/Roboto_Condensed/RobotoCondensed-LightItalic.ttf


--------------------------------------------------------------------------------
/data0_supplementary/Roboto_Condensed/RobotoCondensed-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikashnitsky/genus-2017/31be7ae9058c37e27df7af326527d6c1b197d01d/data0_supplementary/Roboto_Condensed/RobotoCondensed-Regular.ttf


--------------------------------------------------------------------------------
/data1_raw/Eurostat/observed/placeholder.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikashnitsky/genus-2017/31be7ae9058c37e27df7af326527d6c1b197d01d/data1_raw/Eurostat/observed/placeholder.txt


--------------------------------------------------------------------------------
/data1_raw/Eurostat/placeholder.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikashnitsky/genus-2017/31be7ae9058c37e27df7af326527d6c1b197d01d/data1_raw/Eurostat/placeholder.txt


--------------------------------------------------------------------------------
/data1_raw/Eurostat/projected/placeholder.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikashnitsky/genus-2017/31be7ae9058c37e27df7af326527d6c1b197d01d/data1_raw/Eurostat/projected/placeholder.txt


--------------------------------------------------------------------------------
/data1_raw/Missing_data/placeholder.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikashnitsky/genus-2017/31be7ae9058c37e27df7af326527d6c1b197d01d/data1_raw/Missing_data/placeholder.txt


--------------------------------------------------------------------------------
/data2_prepared/placeholder.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikashnitsky/genus-2017/31be7ae9058c37e27df7af326527d6c1b197d01d/data2_prepared/placeholder.txt


--------------------------------------------------------------------------------
/data3_calculated/placeholder.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikashnitsky/genus-2017/31be7ae9058c37e27df7af326527d6c1b197d01d/data3_calculated/placeholder.txt


--------------------------------------------------------------------------------
/genus-2017.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 8
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/geo_data/placeholder.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikashnitsky/genus-2017/31be7ae9058c37e27df7af326527d6c1b197d01d/geo_data/placeholder.txt


--------------------------------------------------------------------------------