├── CRAN-RELEASE
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
    ├── apply_constrained.R
    ├── apply_pop_density.R
    ├── calculate_zs_covariates.R
    ├── calculate_zs_parallel.R
    ├── check_result.R
    ├── create_covariates_list.R
    ├── download_file.R
    ├── get_aval_memory.R
    ├── get_blocks_need.R
    ├── initial_check.R
    ├── load_pop.R
    ├── masking_out.R
    ├── merge_covariates.R
    ├── os_system.R
    ├── popRF.R
    ├── popRFdemo.R
    ├── popfit.R
    ├── popfit_final.R
    ├── popfit_init_tuning.R
    ├── popfit_quant.R
    ├── progress_message.R
    ├── rasterize_parallel.R
    ├── rf_prediction.R
    ├── rf_prediction_parallel.R
    └── utility_functions.R
├── README.md
├── cran-comments.md
├── inst
    └── CITATION
└── man
    ├── figures
        └── example_ppp_ECU_v2.jpg
    ├── popRF.Rd
    └── popRFdemo.Rd


/CRAN-RELEASE:
--------------------------------------------------------------------------------
1 | This package was submitted to CRAN on 2021-07-21.
2 | Once it is accepted, delete this file and tag the release (commit 7e88512).
3 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: popRF
 2 | Type: Package
 3 | Title: Random Forest-Informed Population Disaggregation
 4 | Version: 1.0.0
 5 | Authors@R: c(
 6 |     person( 
 7 |           given = "Maksym", 
 8 |           family = "Bondarenko", 
 9 |           role = c("aut", "cre", "cph"), 
10 |           email = "mb4@soton.ac.uk", 
11 |           comment = c(ORCID = "0000-0003-4958-6551")
12 |           ),
13 |     person( 
14 |           given = "Jeremiah J", 
15 |           family = "Nieves", 
16 |           role = c("aut"), 
17 |           email = "j.j.nieves@liverpool.ac.uk", 
18 |           comment = c(ORCID = "0000-0002-7423-1341")
19 |           ),
20 |     person( 
21 |           given = "Forrest R.", 
22 |           family = "Stevens", 
23 |           role = c("aut"), 
24 |           email = "forrest.stevens@louisville.edu"
25 |           ),
26 |     person( 
27 |           given = "Andrea E.", 
28 |           family = "Gaughan", 
29 |           role = c("aut"), 
30 |           email = "ae.gaughan@louisville.edu"
31 |           ),            
32 |     person( 
33 |           given = "Chris", 
34 |           family = "Jochem", 
35 |           role = c("ctb"), 
36 |           email = "W.C.Jochem@soton.ac.uk", 
37 |           comment = c(ORCID = "0000-0003-2192-5988")
38 |           ),          
39 |     person( 
40 |           given = "David", 
41 |           family = "Kerr", 
42 |           role = c("ctb"), 
43 |           email = "dk2n16@soton.ac.uk"
44 |           ),
45 |     person( 
46 |           given = "Alessandro", 
47 |           family = "Sorichetta", 
48 |           role = c("ctb"), 
49 |           email = "as1v13@soton.ac.uk", 
50 |           comment = c(ORCID = "0000-0002-3576-5826")
51 |           )
52 |     )
53 | Maintainer: Maksym Bondarenko <mb4@soton.ac.uk> 
54 | Description: Disaggregating census-based areal population counts to finer 
55 |     gridded population surfaces using Random Forest algorithm to determine 
56 |     the target area weights 
57 |     (see _Stevens, et al._ (2015) <doi:10.1371/journal.pone.0107042>). 
58 | URL: https://github.com/wpgp/popRF
59 | BugReports: https://github.com/wpgp/popRF/issues
60 | Imports: 
61 |     doParallel,
62 |     parallel, 
63 |     raster, 
64 |     methods, 
65 |     stats, 
66 |     foreach, 
67 |     terra, 
68 |     randomForest, 
69 |     quantregForest,
70 |     plyr
71 | Depends: R (>= 3.2.0)
72 | License: GPL-3
73 | Encoding: UTF-8
74 | Roxygen: list(markdown = TRUE)
75 | RoxygenNote: 7.2.3
76 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(popRF)
 4 | export(popRFdemo)
 5 | importFrom(doParallel,registerDoParallel)
 6 | importFrom(foreach,'%dopar%')
 7 | importFrom(foreach,foreach)
 8 | importFrom(methods,is)
 9 | importFrom(parallel,clusterEvalQ)
10 | importFrom(parallel,clusterExport)
11 | importFrom(parallel,detectCores)
12 | importFrom(parallel,makeCluster)
13 | importFrom(parallel,stopCluster)
14 | importFrom(plyr,join)
15 | importFrom(quantregForest,quantregForest)
16 | importFrom(randomForest,combine)
17 | importFrom(randomForest,importance)
18 | importFrom(randomForest,randomForest)
19 | importFrom(randomForest,tuneRF)
20 | importFrom(randomForest,varImpPlot)
21 | importFrom(raster,beginCluster)
22 | importFrom(raster,blockSize)
23 | importFrom(raster,calc)
24 | importFrom(raster,compareRaster)
25 | importFrom(raster,endCluster)
26 | importFrom(raster,getCluster)
27 | importFrom(raster,getValues)
28 | importFrom(raster,hasValues)
29 | importFrom(raster,ncell)
30 | importFrom(raster,nlayers)
31 | importFrom(raster,raster)
32 | importFrom(raster,rasterTmpFile)
33 | importFrom(raster,returnCluster)
34 | importFrom(raster,values)
35 | importFrom(raster,writeRaster)
36 | importFrom(raster,writeStart)
37 | importFrom(raster,writeStop)
38 | importFrom(raster,writeValues)
39 | importFrom(raster,zonal)
40 | importFrom(stats,aggregate)
41 | importFrom(stats,complete.cases)
42 | importFrom(stats,na.omit)
43 | importFrom(stats,predict)
44 | importFrom(stats,sd)
45 | importFrom(terra,merge)
46 | importFrom(terra,rast)
47 | importFrom(terra,sprc)
48 | importFrom(utils,getFromNamespace)
49 | importFrom(utils,object.size)
50 | importFrom(utils,read.csv)
51 | importFrom(utils,stack)
52 | importFrom(utils,write.csv)
53 | importFrom(utils,write.table)
54 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # popRF 1.0.0
2 | 
3 | * Random Forest-informed Population Disaggregation R package
4 | * Demo function `popRFdemo` was added to generate a population layer using the WorldPop geospatial covariates
5 | 


--------------------------------------------------------------------------------
/R/apply_constrained.R:
--------------------------------------------------------------------------------
  1 | #' Apply population density. 
  2 | #' 
  3 | #' @details masking out the prediction layer and L1 using mask.
  4 | #' 
  5 | #' @rdname apply_constrained
  6 | #' @param pop the name of the file which the administrative ID and the 
  7 | #'        population values are to be read from. The file should contain two 
  8 | #'        columns comma-separated with the value of administrative ID and 
  9 | #'        population without columns names. If it does not contain an absolute 
 10 | #'        path, the file name is relative to the current working directory.
 11 | #' @param mastergrid_filename census mask Path FileName
 12 | #' @param const Path to the mask to constrained population layer. 
 13 | #'        Mask file should be a raster file with 0 and Nodata values.
 14 | #' @param output_dir Path to the folder to save the outputs. 
 15 | #' @param cores is a integer. Number of cores to use when executing the 
 16 | #'        function, which defaults to 4. If set to 0 or NULL max number of cores
 17 | #'        will be used based on as many processors as the hardware and RAM 
 18 | #'        allow. 
 19 | #' @param rfg.countries.tag character of tag
 20 | #' @param quant logical. If FALSE then quant will not be calculated
 21 | #' @param blocks number of blocks sugesting for processing raster file.
 22 | #' @param verbose is logical. TRUE or FALSE: flag indicating whether to print 
 23 | #'        intermediate output from the function on the console, which might be 
 24 | #'        helpful for model debugging. Default is \code{verbose} = TRUE.
 25 | #' @param log is logical. TRUE or FALSE: flag indicating whether to print 
 26 | #'        intermediate output from the function on the log.txt file. 
 27 | #'        Default is \code{log} = FALSE.
 28 | #' @importFrom raster getValues writeRaster values calc
 29 | #' @importFrom plyr join
 30 | #' @importFrom utils read.csv
 31 | #' @return raster objects
 32 | #' @noRd 
 33 | apply_constrained <- function(pop,
 34 |                               mastergrid_filename,
 35 |                               const,
 36 |                               output_dir,
 37 |                               cores = NULL,
 38 |                               rfg.countries.tag, 
 39 |                               quant = TRUE, 
 40 |                               blocks = NULL,
 41 |                               verbose = TRUE, 
 42 |                               log = FALSE) {
 43 |   
 44 |   log_info("MSG", paste0("Start creating a constrained population layer"), 
 45 |            verbose=verbose, 
 46 |            log=log)  
 47 |   
 48 | 
 49 |   rfg.predict.density.rf.pred <- file.path(output_dir, 
 50 |                                            paste0("predict_density_rf_pred_", 
 51 |                                                   rfg.countries.tag, ".tif"))
 52 |   
 53 |   rfg.predict.density.rf.pred.const <- file.path(output_dir, 
 54 |                                                  paste0("predict_density_rf_pred_", 
 55 |                                                         rfg.countries.tag, 
 56 |                                                         "_const.tif"))
 57 |   
 58 |   mastergrid.basename = basename(mastergrid_filename)
 59 |   
 60 |   mastergrid.fl.name = substr(basename(mastergrid.basename), 
 61 |                               1, 
 62 |                               nchar(basename(mastergrid.basename)) - 4)  
 63 |   
 64 |   mastergrid.const <- file.path(output_dir, 
 65 |                                 paste0(mastergrid.fl.name, "_const.tif"))
 66 |   
 67 |   
 68 |   fun_const = function(x) {
 69 |     if (any(!is.na(x))) {
 70 |       if (is.na(x[1]) & !is.na(x[2])) {
 71 |         return(NA)
 72 |       } else if (x[1]==0 & !is.na(x[2])) {
 73 |         return(x[2])
 74 |       }else{
 75 |         return(NA)
 76 |       }
 77 |     } else{
 78 |       return(NA)
 79 |     }
 80 |   } 
 81 |   
 82 |   log_info("MSG", paste0("Start preparing data to constrain"), 
 83 |            verbose=verbose, 
 84 |            log=log)
 85 |   
 86 |   # if we have multi cores then do masking in parallel
 87 |   if ( cores > 1 ){
 88 |   
 89 |     mask_ppd_stack <- raster::stack(raster(const), 
 90 |                                     raster(rfg.predict.density.rf.pred))
 91 | 
 92 |     if (is.null(blocks)) {
 93 |       
 94 |       blocks <- get_blocks_size(mask_ppd_stack, 
 95 |                                 cores,
 96 |                                 verbose = verbose)        
 97 | 
 98 |     }
 99 | 
100 |     npoc_blocks <- ifelse(blocks$n < cores, blocks$n, cores)
101 | 
102 |     log_info("MSG", paste0("Constraining prediction layer"),
103 |              verbose=verbose, 
104 |              log=log)
105 |     
106 |     density_mask <- masking_out(mask_ppd_stack,
107 |                                 fun = fun_const,
108 |                                 filename = rfg.predict.density.rf.pred.const,
109 |                                 NAflag=-99999,
110 |                                 datatype="FLT4S",
111 |                                 overwrite = TRUE,
112 |                                 cores = npoc_blocks,
113 |                                 blocks = blocks,
114 |                                 cblk = 2,
115 |                                 silent = ifelse(verbose, FALSE, TRUE))
116 | 
117 |     
118 |     mastergrid_stack <- raster::stack(raster(const), 
119 |                                       raster(mastergrid_filename))
120 |     
121 | 
122 |     log_info("MSG", paste0("Constraining mastergrid"), verbose=verbose, log=log)
123 |     
124 |     mastergrid_mask <- masking_out(mastergrid_stack,
125 |                                    fun = fun_const,
126 |                                    filename = mastergrid.const,
127 |                                    NAflag=-99999,
128 |                                    datatype="FLT4S",
129 |                                    overwrite = TRUE,
130 |                                    cores = npoc_blocks,
131 |                                    blocks = blocks,
132 |                                    cblk = 2,
133 |                                    silent = ifelse(verbose, FALSE, TRUE))
134 |     
135 |     
136 |   }else{
137 |     
138 |     density_mask <- calc(mask_ppd_stack, fun_const)
139 |     
140 |     
141 |     writeRaster(density_mask, 
142 |                 filename=rfg.predict.density.rf.pred.const,
143 |                 format="GTiff", 
144 |                 overwrite=TRUE, 
145 |                 NAflag=-99999, 
146 |                 datatype='FLT4S', 
147 |                 options=c("COMPRESS=LZW")
148 |     )
149 |     
150 |     rm(density_mask)
151 |     
152 |     mastergrid_mask <- calc(mastergrid_stack, fun_const)
153 |     
154 |     
155 |     writeRaster(mastergrid_mask, 
156 |                 filename=mastergrid.const,
157 |                 format="GTiff", 
158 |                 overwrite=TRUE, 
159 |                 NAflag=-99999, 
160 |                 datatype='FLT4S', 
161 |                 options=c("COMPRESS=LZW")
162 |     )
163 |     
164 |     rm(mastergrid_mask)
165 |     
166 |   } # cores > 1
167 |   
168 |   
169 |   
170 |   
171 |   apply_pop_density_constrained(pop, 
172 |                                 mastergrid.const, 
173 |                                 output_dir, 
174 |                                 cores=npoc_blocks, 
175 |                                 rfg.countries.tag, 
176 |                                 blocks=blocks, 
177 |                                 verbose=verbose, 
178 |                                 log=log)
179 |   
180 | }


--------------------------------------------------------------------------------
/R/apply_pop_density.R:
--------------------------------------------------------------------------------
  1 | #' Apply population density. 
  2 | #' 
  3 | #' @details Apply population density to a final RF prediction 
  4 | #' (RF_pred +L1_pop)/RF_pred_ZS_sum.
  5 | #' 
  6 | #' @rdname apply_pop_density
  7 | #' @param pop the name of the file which the administrative ID and the population 
  8 | #'        values are to be read from. The file should contain two columns 
  9 | #'        comma-separated with the value of administrative ID and population 
 10 | #'        without columns names. If it does not contain an absolute path, the 
 11 | #'        file name is relative to the current working directory.
 12 | #' @param mastergrid_filename census mask Path FileName
 13 | #' @param output_dir Path to the folder to save the outputs. 
 14 | #' @param cores is a integer. Number of cores to use when executing the function, 
 15 | #'        which defaults to 4. If set to 0 or NULL max number of cores will be 
 16 | #'        used based on as many processors as the hardware and RAM allow. 
 17 | #' @param rfg.countries.tag character of tag
 18 | #' @param quant logical. If FALSE then quant will not be calculated
 19 | #' @param blocks number of blocks suggesting for processing raster file.
 20 | #' @param verbose is logical. TRUE or FALSE: flag indicating whether to print 
 21 | #'        intermediate output from the function on the console, which might be 
 22 | #'        helpful for model debugging. Default is \code{verbose} = TRUE.
 23 | #' @param log is logical. TRUE or FALSE: flag indicating whether to print intermediate 
 24 | #'        output from the function on the log.txt file. 
 25 | #'        Default is \code{log} = FALSE.
 26 | #' @importFrom raster getValues writeRaster values
 27 | #' @importFrom plyr join
 28 | #' @importFrom utils stack
 29 | #' @return raster objects
 30 | #' @noRd 
 31 | apply_pop_density <- function(pop,
 32 |                               mastergrid_filename,
 33 |                               output_dir,
 34 |                               cores=NULL,
 35 |                               rfg.countries.tag, 
 36 |                               quant = TRUE, 
 37 |                               blocks=NULL,
 38 |                               verbose=TRUE, 
 39 |                               log=FALSE) {
 40 | 
 41 | 
 42 |   silent <- if (verbose) FALSE else TRUE
 43 |   
 44 |   # rasterising pop table 
 45 |   rfg.rst.pop.census.tif <- file.path(output_dir,
 46 |                                       paste0("pop_census_mask_",
 47 |                                              rfg.countries.tag, ".tif"))
 48 |   
 49 |   rfg.rst.zonal.stats.rf.pred.tif<- file.path(output_dir,
 50 |                                               paste0("predict_density_rf_pred_",
 51 |                                                      rfg.countries.tag, 
 52 |                                                      "_ZS_sum.tif")) 
 53 |   
 54 |   
 55 |   rfg.predict.density.rf.pred <- file.path(output_dir, 
 56 |                                            paste0("predict_density_rf_pred_",
 57 |                                                   rfg.countries.tag, ".tif"))
 58 |   
 59 |   rst.predict.density.rf.pred <- raster(rfg.predict.density.rf.pred) 
 60 |   
 61 |   df <- get_pop_census_all(pop)
 62 |   zonal_raster <- raster(mastergrid_filename)
 63 |   
 64 |   if (is.null(cores) | cores < 2 ){
 65 |     
 66 |     v <- data.frame( raster::getValues(zonal_raster) )
 67 |     colnames(v) <- c("v1")
 68 |     colnames(df) <- c("v1","v2")
 69 |     out <- plyr::join(v, df, type="left",by = "v1")[-1]
 70 |     
 71 |     
 72 |     out.rst.pop.census.raster <- zonal_raster
 73 |     
 74 |     raster::values(out.rst.pop.census.raster) <- out[[1]]
 75 |     
 76 |     rst.pop.census <- raster::writeRaster(out.rst.pop.census.raster,
 77 |                                            filename=rfg.rst.pop.census.tif,
 78 |                                            format="GTiff",
 79 |                                            datatype='FLT4S',
 80 |                                            overwrite=TRUE,
 81 |                                            options=c("COMPRESS=LZW"),
 82 |                                            NAflag=-99999)
 83 |     
 84 |     rm(out.rst.pop.census.raster)
 85 |     
 86 |     
 87 |     zonal.stats.rf.pred.sum <- zonal(rst.predict.density.rf.pred, 
 88 |                                      zonal_raster, 
 89 |                                      fun="sum")
 90 |     
 91 |     
 92 |     ##  Adjust column names:
 93 |     colnames(zonal.stats.rf.pred.sum) <- c("ADMINID", "sum")
 94 |     ##  Sort the stats by Admin ID:
 95 |     zonal.stats.rf.pred.sum <-  zonal.stats.rf.pred.sum[sort.list(zonal.stats.rf.pred.sum[,1]), ]
 96 |     
 97 |     ##  Return the zonal stats excluding "Admin ID 0":
 98 |     zonal.stats.rf.pred.sum <- as.data.frame(zonal.stats.rf.pred.sum[zonal.stats.rf.pred.sum[,1] != 0, ])
 99 |     
100 |     colnames(zonal.stats.rf.pred.sum) <- c("v1","v2")
101 |     out.zonal.stats.rf.pred.sum <- plyr::join(v, zonal.stats.rf.pred.sum,
102 |                                               type="left",by = "v1")[-1]
103 |     
104 |     out.zonal.stats.rf.pred.sum.raster <- zonal_raster
105 |     
106 |     raster::values(out.zonal.stats.rf.pred.sum.raster) <- out.zonal.stats.rf.pred.sum[[1]]
107 |     
108 |     rst.zonal.stats.rf.pred  <- raster::writeRaster(out.zonal.stats.rf.pred.sum.raster, 
109 |                                                      filename=rfg.rst.zonal.stats.rf.pred.tif, 
110 |                                                      format="GTiff", 
111 |                                                      datatype='FLT4S', 
112 |                                                      overwrite=TRUE, 
113 |                                                      options=c("COMPRESS=LZW"), 
114 |                                                      NAflag=-99999)
115 |     
116 |     rm(out.zonal.stats.rf.pred.sum.raster)
117 | 
118 |   }else{
119 |     
120 |     
121 |     colnames(df) <- c("ADMINID", "ADMINPOP")
122 | 
123 |     log_info("MSG", paste0("Rasterizing input population data."),
124 |              verbose=verbose, log=log)
125 |     
126 |     rst.pop.census <- rasterize_parallel(zonal_raster, 
127 |                                          df, 
128 |                                          cores=cores, 
129 |                                          blocks=blocks, 
130 |                                          NAflag=NULL, 
131 |                                          datatype=NULL, 
132 |                                          filename=rfg.rst.pop.census.tif, 
133 |                                          overwrite=TRUE, 
134 |                                          silent=silent)
135 |     
136 |     
137 |     
138 |     log_info("MSG",
139 |              paste0("Computing zonal statistics of final RF prediction layer."),
140 |              verbose=verbose, log=log)
141 |     
142 |     out.zonal.stats.rf.pred.sum <- calculate_zs_parallel(rst.predict.density.rf.pred,
143 |                                                          zonal_raster, 
144 |                                                          fun="sum", 
145 |                                                          cores=cores,  
146 |                                                          blocks=blocks,
147 |                                                          silent=silent)
148 |     
149 |     
150 |     ##  Adjust column names:
151 |     colnames(out.zonal.stats.rf.pred.sum) <- c("ADMINID", "sum")
152 |     ##  Sort the stats by Admin ID:
153 |     out.zonal.stats.rf.pred.sum <- out.zonal.stats.rf.pred.sum[sort.list(out.zonal.stats.rf.pred.sum[,1]), ]
154 |     
155 |     ##  Return the zonal stats excluding "Admin ID 0":
156 |     out.zonal.stats.rf.pred.sum <- as.data.frame(out.zonal.stats.rf.pred.sum[out.zonal.stats.rf.pred.sum[,1] != 0, ]) 
157 |     
158 |     
159 |     log_info("MSG", paste0("Rasterizing the results of zonal statistics."), 
160 |              verbose=verbose, log=log)
161 |     rst.zonal.stats.rf.pred <- rasterize_parallel(zonal_raster, 
162 |                                                   out.zonal.stats.rf.pred.sum, 
163 |                                                   cores=cores, 
164 |                                                   blocks=blocks, 
165 |                                                   NAflag=NULL, 
166 |                                                   datatype=NULL, 
167 |                                                   filename=rfg.rst.zonal.stats.rf.pred.tif, 
168 |                                                   overwrite=TRUE, 
169 |                                                   silent=silent)
170 |     rm(out.zonal.stats.rf.pred.sum)
171 |     
172 |   }  
173 |   
174 |   
175 |   rfg.predict.density.rf.pred.final <- file.path(output_dir, 
176 |                                                  paste0("ppp_",rfg.countries.tag,
177 |                                                         ".tif"))
178 |   
179 |   r_calc <- (rst.predict.density.rf.pred * rst.pop.census)/rst.zonal.stats.rf.pred
180 |   
181 |   writeRaster(r_calc, 
182 |               filename=rfg.predict.density.rf.pred.final,
183 |               format="GTiff", 
184 |               overwrite=TRUE, 
185 |               NAflag=-99999, 
186 |               datatype='FLT4S', 
187 |               options=c("COMPRESS=LZW")
188 |   )
189 |   
190 |   return(r_calc)
191 | }
192 | 
193 | 
194 | 
195 | 
196 | 
197 | #' Apply population density constrained. 
198 | #' 
199 | #' @details Apply population density to a final constrained RF prediction 
200 | #' (RF_pred +L1_pop)/RF_pred_ZS_sum.
201 | #' 
202 | #' @rdname apply_pop_density_constrained
203 | #' @param pop the name of the file which the administrative ID and the 
204 | #'        population values are to be read from. The file should contain two 
205 | #'        columns comma-separated with the value of administrative ID and 
206 | #'        population without columns names. If it does not contain an absolute 
207 | #'        path, the file name is relative to the current working directory.
208 | #' @param mastergrid_filename census mask Path FileName
209 | #' @param output_dir Path to the folder to save the outputs. 
210 | #' @param cores is a integer. Number of cores to use when executing the function, 
211 | #'        which defaults to 4. If set to 0 or NULL max number of cores will be 
212 | #'        used based on as many processors as the hardware and RAM allow. 
213 | #' @param rfg.countries.tag character of tag
214 | #' @param blocks number of blocks sugesting for processing raster file.
215 | #' @param verbose is logical. TRUE or FALSE: flag indicating whether to print 
216 | #'        intermediate output from the function on the console, which might be 
217 | #'        helpful for model debugging. Default is \code{verbose} = TRUE.
218 | #' @param log is logical. TRUE or FALSE: flag indicating whether to print 
219 | #'        intermediate output from the function on the log.txt file. 
220 | #'        Default is \code{log} = FALSE.
221 | #' @importFrom raster getValues writeRaster values
222 | #' @importFrom plyr join
223 | #' @importFrom utils stack
224 | #' @return raster objects
225 | #' @noRd 
226 | apply_pop_density_constrained <- function(pop, 
227 |                                           mastergrid_filename, 
228 |                                           output_dir, 
229 |                                           cores=NULL, 
230 |                                           rfg.countries.tag,
231 |                                           blocks=NULL, 
232 |                                           verbose=TRUE, 
233 |                                           log=FALSE) {
234 |   
235 |   
236 |   silent <- if (verbose) FALSE else TRUE
237 |   
238 |   # rasterising pop table 
239 |   rfg.rst.pop.census.tif <- file.path(output_dir,
240 |                                       paste0("pop_census_mask_",
241 |                                              rfg.countries.tag, "_const.tif"))
242 |   
243 |   rfg.rst.zonal.stats.rf.pred.tif<- file.path(output_dir,
244 |                                               paste0("predict_density_rf_pred_",
245 |                                                      rfg.countries.tag,
246 |                                                      "_ZS_sum_const.tif")) 
247 |   
248 |   
249 |   rfg.predict.density.rf.pred <- file.path(output_dir, 
250 |                                            paste0("predict_density_rf_pred_",
251 |                                                   rfg.countries.tag,
252 |                                                   "_const.tif"))
253 |   
254 |   rst.predict.density.rf.pred <- raster(rfg.predict.density.rf.pred) 
255 |   
256 |   df <- get_pop_census_all(pop)
257 |   zonal_raster <- raster(mastergrid_filename)
258 |   
259 |   if (is.null(cores) | cores < 2 ){
260 |     
261 |     v <- data.frame( raster::getValues(zonal_raster) )
262 |     colnames(v) <- c("v1")
263 |     colnames(df) <- c("v1","v2")
264 |     out <- plyr::join(v, df, type="left",by = "v1")[-1]
265 |     
266 |     
267 |     out.rst.pop.census.raster <- zonal_raster
268 |     
269 |     raster::values(out.rst.pop.census.raster) <- out[[1]]
270 |     
271 |     rst.pop.census <- raster::writeRaster(out.rst.pop.census.raster,
272 |                                           filename=rfg.rst.pop.census.tif,
273 |                                           format="GTiff",
274 |                                           datatype='FLT4S',
275 |                                           overwrite=TRUE,
276 |                                           options=c("COMPRESS=LZW"),
277 |                                           NAflag=-99999)
278 |     
279 |     rm(out.rst.pop.census.raster)
280 |     
281 |     
282 |     zonal.stats.rf.pred.sum <- zonal(rst.predict.density.rf.pred, 
283 |                                      zonal_raster, 
284 |                                      fun="sum")
285 |     
286 |     
287 |     ##  Adjust column names:
288 |     colnames(zonal.stats.rf.pred.sum) <- c("ADMINID", "sum")
289 |     ##  Sort the stats by Admin ID:
290 |     zonal.stats.rf.pred.sum <-  zonal.stats.rf.pred.sum[sort.list(zonal.stats.rf.pred.sum[,1]), ]
291 |     
292 |     ##  Return the zonal stats excluding "Admin ID 0":
293 |     zonal.stats.rf.pred.sum <- as.data.frame(zonal.stats.rf.pred.sum[zonal.stats.rf.pred.sum[,1] != 0, ])
294 |     
295 |     colnames(zonal.stats.rf.pred.sum) <- c("v1","v2")
296 |     out.zonal.stats.rf.pred.sum <- plyr::join(v, zonal.stats.rf.pred.sum, 
297 |                                               type="left",by = "v1")[-1]
298 |     
299 |     out.zonal.stats.rf.pred.sum.raster <- zonal_raster
300 |     
301 |     raster::values(out.zonal.stats.rf.pred.sum.raster) <- out.zonal.stats.rf.pred.sum[[1]]
302 |     
303 |     rst.zonal.stats.rf.pred  <- raster::writeRaster(out.zonal.stats.rf.pred.sum.raster, 
304 |                                                     filename=rfg.rst.zonal.stats.rf.pred.tif, 
305 |                                                     format="GTiff", 
306 |                                                     datatype='FLT4S', 
307 |                                                     overwrite=TRUE, 
308 |                                                     options=c("COMPRESS=LZW"), 
309 |                                                     NAflag=-99999)
310 |     
311 |     rm(out.zonal.stats.rf.pred.sum.raster)
312 |     
313 |     
314 |     
315 |   }else{
316 |     
317 |     
318 |     colnames(df) <- c("ADMINID", "ADMINPOP")
319 | 
320 |     log_info("MSG", 
321 |              paste0("Rasterizing input population data using constarined mastegrid"),
322 |              verbose=verbose, log=log)
323 |     
324 |     rst.pop.census <- rasterize_parallel(zonal_raster, 
325 |                                          df, 
326 |                                          cores=cores, 
327 |                                          blocks=blocks, 
328 |                                          NAflag=NULL, 
329 |                                          datatype=NULL, 
330 |                                          filename=rfg.rst.pop.census.tif, 
331 |                                          overwrite=TRUE, 
332 |                                          silent=silent)
333 |     
334 |     
335 |     
336 |     log_info("MSG",
337 |              paste0("Computing zonal statistics of constarined final RF prediction layer."),
338 |              verbose=verbose, log=log)
339 |     
340 |     out.zonal.stats.rf.pred.sum <- calculate_zs_parallel(rst.predict.density.rf.pred,
341 |                                                          zonal_raster, 
342 |                                                          fun="sum", 
343 |                                                          cores=cores,  
344 |                                                          blocks=blocks,
345 |                                                          silent=silent)
346 |     
347 |     
348 |     ##  Adjust column names:
349 |     colnames(out.zonal.stats.rf.pred.sum) <- c("ADMINID", "sum")
350 |     ##  Sort the stats by Admin ID:
351 |     out.zonal.stats.rf.pred.sum <- out.zonal.stats.rf.pred.sum[sort.list(out.zonal.stats.rf.pred.sum[,1]), ]
352 |     
353 |     ##  Return the zonal stats excluding "Admin ID 0":
354 |     out.zonal.stats.rf.pred.sum <- as.data.frame(out.zonal.stats.rf.pred.sum[out.zonal.stats.rf.pred.sum[,1] != 0, ]) 
355 |     
356 |     
357 |     log_info("MSG",
358 |              paste0("Rasterizing the results of zonal statistics."), 
359 |              verbose=verbose, log=log)
360 |     rst.zonal.stats.rf.pred <- rasterize_parallel(zonal_raster, 
361 |                                                   out.zonal.stats.rf.pred.sum, 
362 |                                                   cores=cores, 
363 |                                                   blocks=blocks, 
364 |                                                   NAflag=NULL, 
365 |                                                   datatype=NULL, 
366 |                                                   filename=rfg.rst.zonal.stats.rf.pred.tif, 
367 |                                                   overwrite=TRUE, 
368 |                                                   silent=silent)
369 |     rm(out.zonal.stats.rf.pred.sum)
370 |     
371 |   }  
372 |   
373 |   
374 |   rfg.predict.density.rf.pred.final <- file.path(output_dir, 
375 |                                                  paste0("ppp_",
376 |                                                         rfg.countries.tag,
377 |                                                         "_const.tif"))
378 |   
379 |   r_calc <- (rst.predict.density.rf.pred * rst.pop.census)/rst.zonal.stats.rf.pred
380 |   
381 |   writeRaster(r_calc, 
382 |               filename=rfg.predict.density.rf.pred.final,
383 |               format="GTiff", 
384 |               overwrite=TRUE, 
385 |               NAflag=-99999, 
386 |               datatype='FLT4S', 
387 |               options=c("COMPRESS=LZW")
388 |   )
389 |   
390 |   return(r_calc)
391 | }


--------------------------------------------------------------------------------
/R/calculate_zs_covariates.R:
--------------------------------------------------------------------------------
  1 | #' Calculation of covariates zonal statistics 
  2 | #'
  3 | #' @param x Input covariates list 
  4 | #' @param y path to save results
  5 | #' @param pop the name of the file which the administrative ID and the population 
  6 | #'        values are to be read from. The file should contain two columns 
  7 | #'        comma-separated with the value of administrative ID and population 
  8 | #'        without columns names. If it does not contain an absolute path, the 
  9 | #'        file name is relative to the current working directory.
 10 | #' @param save_zst is logical. TRUE or FALSE: flag indicating whether output of 
 11 | #'        zonal statistics will be sved into the file. 
 12 | #'        Default is \code{save_zst} = TRUE.
 13 | #' @param cores is a integer. Number of cores to use when executing the function.
 14 | #' @param blocks number of blocks suggesting for processing raster file.
 15 | #' @param verbose is logical. TRUE or FALSE: flag indicating whether to print 
 16 | #'        intermediate output from the function on the console, which might be 
 17 | #'        helpful for model debugging. Default is \code{verbose} = TRUE.
 18 | #' @param log is logical. TRUE or FALSE: flag indicating whether to print intermediate 
 19 | #'        output from the function on the log.txt file. 
 20 | #'        Default is \code{log} = FALSE.
 21 | #' @importFrom raster raster zonal
 22 | #' @importFrom utils write.csv read.csv
 23 | #' @rdname calculate_zonal_stats_covariates
 24 | #' @return A data.frame compiled census_data
 25 | #' @examples
 26 | #' \dontrun{
 27 | #' calculate_zonal_stats_covariates(x, y, pop)
 28 | #' }
 29 | #' @noRd 
 30 | calculate_zonal_stats_covariates <- function(x, 
 31 |                                              y,
 32 |                                              pop,
 33 |                                              save_zst=TRUE,
 34 |                                              cores=NULL,
 35 |                                              blocks=NULL,
 36 |                                              verbose=FALSE,
 37 |                                              log=FALSE, ...){
 38 | 
 39 |   
 40 |   log_info("MSG", 
 41 |            paste0("Start calculating zonal-statistics for all covariates"),
 42 |            verbose=verbose, log=log)  
 43 |   
 44 |   tag <- paste(names(x), collapse="_")
 45 |   
 46 |   ##  Pre allocate the matrix frameworks we'll use:
 47 |   census_data <- matrix(nrow=0, ncol = 2) 
 48 |   POP_TABLE <- matrix(nrow=0, ncol = 2)
 49 |   colnames(POP_TABLE) <- c("ADMINID", "ADMINPOP")
 50 |   
 51 |   for ( icountry  in names(x) ) {
 52 |     # icountry <- "BTN"
 53 |     ##  Declare the path to the raster containing the zonal information:
 54 |     zonal_raster_path <- x[[icountry]][["mastergrid"]][["dataset_path"]]
 55 |     ##  Bring in the zonal raster:
 56 |     zonal_raster <- raster(zonal_raster_path)
 57 |     
 58 |     ##  Set up the matrix to hold the census data for that country:
 59 |     census_data.country <- matrix(nrow=0, ncol = 2)
 60 |     
 61 |     
 62 |     #get blokcs for parallel calculation of zonal stats
 63 |     
 64 |     blocks <- get_blocks_size(zonal_raster, 
 65 |                               cores, 
 66 |                               verbose=verbose, ...)
 67 |     
 68 |     npoc_blocks <- ifelse(blocks$n < cores, blocks$n, cores)
 69 |     
 70 |     for ( icvr in 1:length(x[[icountry]]) ){
 71 |       
 72 |    
 73 |       
 74 |       # icvr <- 1
 75 |       ## Skip water mask and L1 in covariates based upon names of covariates:
 76 |       if( names(x[[icountry]][icvr]) %in%
 77 |           c("watermask","mastergrid") ){next}
 78 |       
 79 |       ##  Retrieve the name of the covariate:
 80 |       var_name <- names(x[[icountry]][icvr])
 81 |       ##  Retrieve the corresponding attributes of the covariate:
 82 |       covariate <- x[[icountry]][[icvr]]
 83 |       ##  Retrieve the dataset summary:
 84 |       dataset_summary <- covariate$dataset_summary
 85 |       ##  Retrieve the corresponding dataset class:
 86 |       var_name_class <- covariate$dataset_class
 87 |       ##  Retrieve the raster path of the covariate:
 88 |       raster_path <- covariate$dataset_path
 89 |       
 90 |       
 91 |       ##  Load the actual covariate raster:
 92 |       dataset_raster <- raster(raster_path)
 93 |       
 94 |       ##  Explicitly retrieve what the covariate is meant to represent:
 95 |       covariates.var.names <- covariate$dataset_class
 96 |       
 97 |       fname <- paste0(tolower(icountry),"_",
 98 |                       var_name_class,"_ZS_",dataset_summary,".csv")
 99 |       file.path.csv <- file.path(y, fname)
100 |       
101 | 
102 |       if(!file.exists(file.path.csv )){
103 | 
104 |         
105 |         if (!is.null(cores)){
106 |           
107 |           ##  Determine the minimum number of blocks needed for processing:
108 |           # blocks <- get_blocks_size(dataset_raster, 
109 |           #                           cores,
110 |           #                           nl=2,
111 |           #                           nt=1,
112 |           #                           verbose = verbose)      
113 |           
114 |           # npoc_blocks <- ifelse(blocks$n < cores, blocks$n, cores)
115 |           
116 |           ##  Calculate the stats in parallel:
117 |           output_stats <- calculate_zs_parallel(dataset_raster, 
118 |                                                 zonal_raster, 
119 |                                                 fun=dataset_summary, 
120 |                                                 cores=npoc_blocks, 
121 |                                                 blocks=blocks)  
122 |         }else{
123 |           
124 |           output_stats <- zonal(dataset_raster, 
125 |                                 zonal_raster, fun=dataset_summary)
126 |           
127 |         }
128 |         
129 |         
130 |         ##  Adjust the column names:
131 |         colnames(output_stats) <- c("ADMINID", var_name_class)
132 |         ##  Sort the stats:
133 |         output_stats.sorted <-  output_stats[sort.list(output_stats[,1]), ]   
134 |         ##Return the stats which do not correspond to "admin ID 0":
135 |         output_stats.sorted <-  output_stats.sorted[output_stats.sorted[,1] != 0, ]        
136 |         ## Saving zonal statiscs per country for each covariate:
137 |         if (save_zst){
138 |           write.csv( as.data.frame(output_stats.sorted),
139 |                      file = file.path.csv, row.names=FALSE )
140 |         }        
141 |         
142 |       }else{
143 |         ##  If the zonal stats already exist locally:
144 |         
145 |         # log_info("MSG", paste0("Working on ", 
146 |         #                        var_name_class, 
147 |         #                        " for ", 
148 |         #                        icountry,
149 |         #                        " ", 
150 |         #                        dataset_summary ), 
151 |         #          verbose=verbose, log=log)  
152 |         # 
153 |         # log_info("MSG", paste0("Zonal stats has been calculated before for a covariat. Loading..."), 
154 |         #          verbose=verbose, log=log)
155 |         
156 |         ##  Read in the file:
157 |         output_stats <- read.csv( file.path.csv ) 
158 |         ##  Adjust column names:
159 |         colnames(output_stats) <- c("ADMINID", var_name_class)
160 |         ##  Sort the stats:
161 |         output_stats.sorted <-  output_stats[sort.list(output_stats[,1]), ] 
162 |       }
163 |       
164 |       ##  If this is the first iteration:
165 |       if (icvr == 1 ) { 
166 |         census_data.country <- output_stats.sorted
167 |       } else {
168 |         ##  Merge with the previous iterations:
169 |         census_data.country <- merge(as.data.frame(census_data.country), 
170 |                                      as.data.frame(output_stats.sorted), 
171 |                                      by="ADMINID", 
172 |                                      sort=FALSE)
173 |       }
174 |       
175 |       if (verbose){
176 |         if ( icvr != length(x[[icountry]])){
177 |           
178 |           progress_message(x=icvr, 
179 |                            max=length(x[[icountry]]), 
180 |                            label=paste0(" ",
181 |                                         icountry,
182 |                                         ": ",
183 |                                         var_name_class
184 |                                         )
185 |                            )  
186 |           
187 |         }else{
188 |           
189 |           progress_message(x=icvr, 
190 |                            max=length(x[[icountry]]), 
191 |                            label=paste0(" ",
192 |                                         icountry,
193 |                                         "                                   "
194 |                                         )
195 |                            )
196 |           
197 |         }
198 |       }
199 |       
200 |       
201 |     }
202 |     
203 |     census_data.country <- merge(as.data.frame(census_data.country), 
204 |                                  as.data.frame(load_pop(icountry,
205 |                                                         pop
206 |                                                         )), 
207 |                                  by="ADMINID",
208 |                                  sort=FALSE)       
209 |     
210 |     #   Merging census_data all countries:
211 |     census_data <- rbind(census_data, census_data.country)   
212 |     
213 |     
214 |   }
215 |   
216 |   
217 |   ##	Convert our calculated admin unit areas into hectares and add them to the 
218 |   ##  census data:
219 |   census_data$AREA_HA <- census_data$px_area / 10000
220 |   
221 |   ##	Finally calculate our population density in people per hectare for use as 
222 |   ##  our model's outcome of interest:
223 |   census_data$POPD_PPHA <- census_data$ADMINPOP / census_data$AREA_HA
224 |   
225 |   ##  Save the compiled census data as a new file in the temporary output 
226 |   ##  folder:
227 |   fln.csv <- paste0(tag,"_census_data.csv")
228 |   file.path.csv <- file.path(y, fln.csv)
229 |   write.csv(as.data.frame(census_data), file = file.path.csv, row.names=FALSE )    
230 |   
231 |   ##  Convert that data to a dataframe for continuted use:
232 |   census_data <- as.data.frame(census_data)
233 |   
234 |   log_info("MSG",
235 |            paste0("Complited calculation zonal-statistics for all covariates"), 
236 |            verbose=verbose, log=log) 
237 |   
238 |   return(census_data)
239 | }  
240 | 


--------------------------------------------------------------------------------
/R/calculate_zs_parallel.R:
--------------------------------------------------------------------------------
  1 | #' @title Function compute zonal statistics. 
  2 | #'        That is, cross-tabulate the values of a Raster* object based on a 
  3 | #'        "zones" RasterLayer. NA values are removed. 
  4 | #'        Function uses \code{\link[doParallel]{registerDoParallel}} 
  5 | #'        library to work with a big raster data.
  6 | #' 
  7 | #' @author Maksym Bondarenko <mb4@soton.ac.uk> and 
  8 | #'        Chris Jochem <W.C.Jochem@soton.ac.uk>
  9 | #' @param x Raster* object
 10 | #' @param y RasterLayer object with codes representing zones
 11 | #' @param fun The function to be applied. Either as character: 'mean', 
 12 | #'        'min', 'max' and 'sum'
 13 | #' @param cores Integer. Number of cores for parallel calculation
 14 | #' @param blocks number of blocks sugesting for processing raster file.
 15 | #' @param na.rm using na.rm = TRUE for missing data
 16 | #' @param silent If FALSE then the progress will be shown
 17 | #' @rdname calculate_zs_parallel
 18 | #' @return A data.frame with a value for each zone (unique value in zones)
 19 | #' @importFrom doParallel registerDoParallel
 20 | #' @importFrom parallel detectCores makeCluster stopCluster
 21 | #' @importFrom raster compareRaster hasValues getValues blockSize
 22 | #' @importFrom stats aggregate 
 23 | #' @importFrom foreach '%dopar%' foreach
 24 | #' @examples
 25 | #' \dontrun{
 26 | #' calculate_zs_parallel( x=rasterObj1, y=rasterObj2, cores=2)
 27 | #' }
 28 | #' @noRd 
 29 | calculate_zs_parallel <- function(x, 
 30 |                                   y, 
 31 |                                   fun='mean', 
 32 |                                   cores=NULL, 
 33 |                                   blocks=NULL, 
 34 |                                   na.rm=TRUE, 
 35 |                                   silent=TRUE) {
 36 |  
 37 |   fun <- tolower(fun)
 38 |   if(length(fun) > 1){
 39 |     fun <- fun[1]
 40 |   }
 41 |   
 42 |   if (! fun %in% c('sum', 'mean', 'sd', 'min', 'max', 'count')) {
 43 |     stop("fun can be 'sum', 'mean', 'sd', 'min', 'max', or 'count'")
 44 |   }
 45 |   
 46 |   # get real physical cores in a computer
 47 |   max.cores <- detectCores(logical = TRUE)
 48 |   
 49 |   if (is.null(cores)) {
 50 |     cores <- max.cores - 1
 51 |   }
 52 |   
 53 |   if (cores > max.cores) {
 54 |     stop(paste0("Number of cores ",cores," more then real physical cores in PC ",max.cores ))
 55 |   }
 56 |   
 57 |   if (is.null(blocks)) {
 58 |     
 59 |     blocks <- get_blocks_size(x, 
 60 |                               cores,
 61 |                               verbose = T) 
 62 |     
 63 |     cores <- ifelse(blocks$n < cores, blocks$n, cores)  
 64 |   }  
 65 |   
 66 |   compareRaster(c(x, y))
 67 |   stopifnot(hasValues(x))
 68 |   stopifnot(hasValues(y))
 69 |   
 70 |   layernames <- names(x)
 71 | 
 72 |   
 73 |   tStart <- Sys.time()
 74 |   
 75 |   cl <- makeCluster(cores)
 76 |   
 77 |   # broadcast the data and functions to all worker
 78 |   # processes by clusterExport
 79 |   # clusterExport(cl, c(x,"y", "blocks"))
 80 |   
 81 |   registerDoParallel(cl)
 82 |   
 83 |   i <- 0
 84 |   result <- foreach(i = 1:blocks$n, .combine = rbind, .packages='raster') %dopar%
 85 |     {
 86 |       
 87 |       df.x <- data.frame( getValues(x, row=blocks$row[i], nrows=blocks$nrows[i]) )
 88 |       df.y <- data.frame( getValues(y, row=blocks$row[i], nrows=blocks$nrows[i]) )
 89 |       
 90 |       
 91 |       if ( fun == 'mean' | fun == 'sd' ) {
 92 |         
 93 |         df.fun <- aggregate(x = (df.x), by = list(df.y[,1]), 
 94 |                             FUN = function(x, na.rm = TRUE) sum(as.numeric(x), na.rm = na.rm), na.rm=na.rm)
 95 |         df.length <- aggregate(x = (df.x), by = list(df.y[,1]),
 96 |                                FUN = function(x, na.rm=na.rm) length(stats::na.omit(x)), na.rm=na.rm)
 97 |         
 98 |         colnames(df.length) <- c(layernames,'length')
 99 |         colnames(df.fun) <- c(layernames,'sum')
100 |         
101 |         df <- merge(df.fun, df.length, all = TRUE, by = layernames)
102 |         
103 |         if (fun == 'sd'){
104 |           
105 |           df.sq <- aggregate(x = (df.x^2), by = list(df.y[,1]),
106 |                              FUN = function(x, na.rm = TRUE) sum(as.numeric(x), na.rm = na.rm), na.rm=na.rm)
107 |           colnames(df.sq) <- c(layernames,'sq')
108 |           df <- merge(df, df.sq, all=TRUE, by=layernames)
109 |           
110 |         }
111 |         
112 |       } else if ( fun == 'count') {
113 |         
114 |         df <- aggregate(x = (df.x), by = list(df.y[,1]),
115 |                         FUN = function(x, na.rm=na.rm) length(stats::na.omit(x)), na.rm=na.rm)
116 |         
117 |         colnames(df) <- c(layernames,'count')
118 |         
119 |       } else if ( fun == 'sum') {
120 |         
121 |         df <- aggregate(x = (df.x), by = list(df.y[,1]),
122 |                         FUN = function(x, na.rm = TRUE) sum(as.numeric(x), na.rm = na.rm), na.rm=na.rm)
123 |         
124 |         colnames(df) <- c(layernames,'sum')	
125 |         
126 |         
127 |       } else {      
128 |         
129 |         df <- aggregate(x = (df.x), by = list(df.y[,1]), FUN = fun, na.rm=na.rm)
130 |         
131 |         colnames(df) <- c(layernames,fun)
132 |       }
133 |       
134 |       return(df)
135 |     }
136 |   
137 |   stopCluster(cl)
138 |   
139 |   if ( fun == 'mean' | fun == 'sd') {
140 |     
141 |     df1 <- aggregate(x = result$sum,
142 |                      by = list(result[[1]]), FUN = 'sum', na.rm=na.rm)
143 |     df2 <- aggregate(x = result$length,
144 |                      by = list(result[[1]]), FUN = 'sum', na.rm=na.rm)
145 |     df1$x <- df1$x / df2$x
146 |     
147 |     if (fun == 'sd'){
148 |       
149 |       df3 <- aggregate(x = result$sq,
150 |                        by = list(result[[1]]), FUN = 'sum', na.rm=na.rm)
151 |       df1$x <- sqrt(( (df3$x / df2$x) - (df1$x)^2 ) * (df2$x / (df2$x - 1)))
152 |       colnames(df1) <- c(layernames, 'sd')
153 |       
154 |     } else{
155 |       
156 |       colnames(df1) <- c(layernames,'mean')
157 |       
158 |     }
159 |     
160 |   } else if ( fun == 'count') {
161 |     
162 |     df1 <- aggregate(x = result[[2]],
163 |                      by = list(result[[1]]), FUN = 'sum', na.rm=na.rm)
164 |     
165 |     colnames(df1) <- c(layernames,'count')
166 |     
167 |   } else if ( fun == 'sum') {
168 |     
169 |     df1 <- aggregate(x = result[[2]], 
170 |                      by = list(result[[1]]),
171 |                      FUN = function(x, na.rm = TRUE) sum(as.numeric(x), na.rm = na.rm), na.rm=na.rm)
172 |     
173 |     colnames(df1) <- c(layernames,'sum')	
174 |     
175 |   } else{
176 |     
177 |     df1 <- aggregate(x = result[[2]],
178 |                      by = list(result[[1]]),
179 |                      FUN = fun, na.rm=na.rm)
180 |     
181 |     colnames(df1) <- c(layernames,fun)
182 |     
183 |   }
184 |   
185 |   tEnd <-  Sys.time()
186 |   
187 |   if (!silent) print(paste("Elapsed Processing Time:", tmDiff(tStart,tEnd)))
188 |   
189 |   return(df1)
190 | }
191 | 


--------------------------------------------------------------------------------
/R/check_result.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #' check_result comper input with output
  3 | #' 
  4 | #' @param input_poptables Input list of pop tables
  5 | #' @param censusmaskPathFileName path to census mask raster file
  6 | #' @param rfg.output.path.countries output dir
  7 | #' @param nrpoc number of processors
  8 | #' @param rfg.countries.tag tag of the project
  9 | #' @param blocks number of blocks sugesting for processing raster file.
 10 | #' @param verbose If FALSE then the progress will be shown
 11 | #' @param log If FALSE then the progress will be shown
 12 | #' @rdname check_result
 13 | #' @return Number of mean difference
 14 | #' @noRd 
 15 | check_result <- function(input_poptables, 
 16 |                          censusmaskPathFileName, 
 17 |                          rfg.output.path.countries, 
 18 |                          nrpoc, 
 19 |                          rfg.countries.tag, 
 20 |                          blocks=NULL, 
 21 |                          verbose=TRUE, 
 22 |                          log=FALSE) {
 23 |   
 24 |   
 25 |   silent <- if (verbose) FALSE else TRUE
 26 |   
 27 |   census_data <- get_pop_census_all(input_poptables)
 28 |   census_data <-  census_data[sort.list(census_data[,1]), ]
 29 |   
 30 |   
 31 |   ##  Load final results
 32 |   rfg.predict.density.rf.pred.final <- file.path(rfg.output.path.countries, 
 33 |                                                  paste0("ppp_",
 34 |                                                         rfg.countries.tag,
 35 |                                                         ".tif"))
 36 |   
 37 |   dataset_raster <- raster(rfg.predict.density.rf.pred.final)
 38 |   
 39 |   
 40 |   ##  Load the zonal raster:
 41 |   zonal_raster <- raster(censusmaskPathFileName)   
 42 |   
 43 |   
 44 |   if (is.null(nrpoc) | nrpoc < 2 ){
 45 |     
 46 |     output_stats <- zonal(dataset_raster, zonal_raster, fun="sum")
 47 |     
 48 |     
 49 |   }else{
 50 |     
 51 |     if (is.null(blocks)) {
 52 |       
 53 |       blocks <- get_blocks_size(dataset_raster, 
 54 |                                 nrpoc,
 55 |                                 verbose = ifelse(silent, FALSE, TRUE))      
 56 |     } 
 57 |     
 58 |     npoc_blocks <- ifelse(blocks$n < nrpoc, blocks$n, nrpoc)  
 59 |     
 60 |     output_stats <- calculate_zs_parallel(dataset_raster, 
 61 |                                           zonal_raster, 
 62 |                                           fun="sum", 
 63 |                                           cores=nrpoc, 
 64 |                                           blocks=blocks, 
 65 |                                           silent=silent)
 66 |   }
 67 |   
 68 |   
 69 |   colnames(output_stats) <- c("ADMINID", "PPP_FINAL_RES")
 70 |   output_stats.sorted <-  output_stats[sort.list(output_stats[,1]), ] 
 71 |   output_stats.sorted <-  output_stats.sorted[output_stats.sorted[,1] != 0, ] 
 72 |   
 73 |   df <- merge( as.data.frame(census_data), 
 74 |                as.data.frame(output_stats.sorted), 
 75 |                by="ADMINID", 
 76 |                sort=FALSE)   
 77 |   
 78 |   
 79 |   
 80 |   df <- cbind( df, abs(df$ADMINPOP - df$PPP_FINAL_RES)) 
 81 |   
 82 |   colnames(df) <- c("ADMINID", "ADMINPOP", "PPP", "DIFFERENCE")  
 83 |   
 84 |   
 85 |   file.path.csv <- file.path(rfg.output.path.countries, 
 86 |                              paste0("/check_result_prj_",rfg.countries.tag,".csv"))
 87 |   
 88 |   write.csv( as.data.frame(df), file = file.path.csv, row.names=FALSE )
 89 |   
 90 |   return(mean(df[,4]))
 91 | }
 92 | 
 93 | 
 94 | 
 95 | #' check_result_constrained comper input with output
 96 | #' 
 97 | #' @param input_poptables Input list of pop tables
 98 | #' @param censusmaskPathFileName path to census mask raster file
 99 | #' @param rfg.output.path.countries output dir
100 | #' @param nrpoc number of processors
101 | #' @param rfg.countries.tag tag of the project
102 | #' @param blocks number of blocks sugesting for processing raster file.
103 | #' @param verbose If FALSE then the progress will be shown
104 | #' @param log If FALSE then the progress will be shown
105 | #' @rdname check_result_constrained
106 | #' @return Number of mean difference
107 | #' @noRd 
108 | check_result_constrained <- function(input_poptables, 
109 |                                      censusmaskPathFileName, 
110 |                                      rfg.output.path.countries, 
111 |                                      nrpoc, 
112 |                                      rfg.countries.tag, 
113 |                                      blocks=NULL, 
114 |                                      verbose=TRUE, 
115 |                                      log=FALSE) {
116 |   
117 |   
118 |   mastergrid.basename = basename(censusmaskPathFileName)
119 |   
120 |   mastergrid.fl.name = substr(basename(mastergrid.basename), 
121 |                               1, 
122 |                               nchar(basename(mastergrid.basename)) - 4)  
123 |   
124 |   mastergrid.const <- file.path(rfg.output.path.countries, 
125 |                                 paste0(mastergrid.fl.name, "_const.tif"))
126 |   
127 |   
128 |   silent <- if (verbose) FALSE else TRUE
129 |   
130 |   census_data <- get_pop_census_all(input_poptables)
131 |   census_data <-  census_data[sort.list(census_data[,1]), ]
132 |   
133 |   
134 |   ##  Load final results
135 |   rfg.predict.density.rf.pred.final <- file.path(rfg.output.path.countries, 
136 |                                                  paste0("ppp_",
137 |                                                         rfg.countries.tag, 
138 |                                                         "_const.tif"))
139 |   
140 |   dataset_raster <- raster(rfg.predict.density.rf.pred.final)
141 |   
142 |   
143 |   ##  Load the zonal raster:
144 |   zonal_raster <- raster(censusmaskPathFileName)   
145 |   
146 |   
147 |   if (is.null(nrpoc) | nrpoc < 2 ){
148 |     
149 |     output_stats <- zonal(dataset_raster, zonal_raster, fun="sum")
150 |     
151 |     
152 |   }else{
153 |     
154 |     if (is.null(blocks)) {
155 |       
156 |       blocks <- get_blocks_size(dataset_raster, 
157 |                                 nrpoc,
158 |                                 verbose = ifelse(silent, FALSE, TRUE))      
159 |     } 
160 |     
161 |     npoc_blocks <- ifelse(blocks$n < nrpoc, blocks$n, nrpoc)  
162 |     
163 |     output_stats <- calculate_zs_parallel(dataset_raster, 
164 |                                           zonal_raster, 
165 |                                           fun="sum", 
166 |                                           cores=nrpoc, 
167 |                                           blocks=blocks, 
168 |                                           silent=silent)
169 |   }
170 |   
171 |   
172 |   colnames(output_stats) <- c("ADMINID", "PPP_FINAL_RES")
173 |   output_stats.sorted <-  output_stats[sort.list(output_stats[,1]), ] 
174 |   output_stats.sorted <-  output_stats.sorted[output_stats.sorted[,1] != 0, ] 
175 |   
176 |   df <- merge( as.data.frame(census_data), 
177 |                as.data.frame(output_stats.sorted), 
178 |                by="ADMINID", 
179 |                sort=FALSE)   
180 |   
181 |   
182 |   
183 |   df <- cbind( df, abs(df$ADMINPOP - df$PPP_FINAL_RES)) 
184 |   
185 |   colnames(df) <- c("ADMINID", "ADMINPOP", "PPP", "DIFFERENCE")  
186 |   
187 |   
188 |   file.path.csv <- file.path(rfg.output.path.countries, 
189 |                              paste0("check_result_prj_",
190 |                                     rfg.countries.tag,"_const.csv"))
191 |   
192 |   write.csv( as.data.frame(df), file = file.path.csv, row.names=FALSE )
193 |   
194 |   return(mean(df[,4]))
195 | }


--------------------------------------------------------------------------------
/R/create_covariates_list.R:
--------------------------------------------------------------------------------
  1 | #' creating a covariates list
  2 | #' 
  3 | #' @param input_covariates Input list of covariates
  4 | #' @param input_mastergrid Input list to mastergrid
  5 | #' @param input_watermask Input list to watermask
  6 | #' @param input_px_area Input list to population
  7 | #' @rdname create_covariates_list
  8 | #' @return A list of covariates
  9 | #' @examples
 10 | #' \dontrun{
 11 | #' create_covariates_list( input_covariates, 
 12 | #'                         input_covariates, 
 13 | #'                         input_watermask, 
 14 | #'                         input_px_area  )
 15 | #' }
 16 | #' @noRd 
 17 | create_covariates_list <- function(input_covariates,
 18 |                                    input_mastergrid,
 19 |                                    input_watermask,
 20 |                                    input_px_area){
 21 |   
 22 |   
 23 |   covariates <- list()
 24 |   
 25 |   # i = "GHA" 
 26 |   # j = "gha_dist_cities" 
 27 |   for ( i in names(input_covariates) ) {
 28 |     
 29 |     for (j in names(input_covariates[[i]]) ){
 30 |       
 31 |       fc <- input_covariates[[i]][[j]]
 32 |       
 33 |       
 34 |       #covariates[[i]][[j]][["path"]] <- fc
 35 |       
 36 |       covariates[[i]][[j]] <- list(
 37 |         dataset_folder      =  dirname(fc),
 38 |         dataset_filename    =  basename(fc),
 39 |         dataset_description =  j,
 40 |         dataset_summary     =  "mean",
 41 |         dataset_country     = i,
 42 |         dataset_class       = j,
 43 |         dataset_path        = fc 
 44 |       )
 45 |       
 46 |       
 47 |       
 48 |     }
 49 |   }
 50 |   
 51 |   
 52 |   # input_mastergrid
 53 |   #
 54 |   for ( i in names(input_mastergrid) ) {
 55 |     
 56 |     for (j in 1:length(input_mastergrid[[i]]) ){
 57 |       
 58 |       fc <- input_mastergrid[[i]][[j]]
 59 |       
 60 |       
 61 |       #covariates[[i]][["mastergrid"]][["path"]] <- fc
 62 |       
 63 |       covariates[[i]][["mastergrid"]] <- list(
 64 |         dataset_folder      =  dirname(fc),
 65 |         dataset_filename    =  basename(fc),
 66 |         dataset_description =  "mastergrid",
 67 |         dataset_summary     =  "sum",
 68 |         dataset_country     = i,
 69 |         dataset_class       = "mastergrid",
 70 |         dataset_path        = fc 
 71 |       )
 72 |     }
 73 |   }  
 74 |   
 75 |   
 76 |   
 77 |   # rfg.water.mask
 78 |   #
 79 |   for ( i in names(input_watermask) ) {
 80 |     
 81 |     for (j in 1:length(input_watermask[[i]]) ){
 82 |       
 83 |       fc <- input_watermask[[i]][[j]]
 84 |       
 85 |       
 86 |       #covariates[[i]][["watermask"]][["path"]] <- fc
 87 |       
 88 |       covariates[[i]][["watermask"]] <- list(
 89 |         dataset_folder      =  dirname(fc),
 90 |         dataset_filename    =  basename(fc),
 91 |         dataset_description =  "watermask",
 92 |         dataset_summary     =  "sum",
 93 |         dataset_country     = i,
 94 |         dataset_class       = "watermask",
 95 |         dataset_path        = fc 
 96 |       )
 97 |     }
 98 |   }
 99 |   
100 |   # input_px_area
101 |   #
102 |   for ( i in names(input_px_area) ) {
103 |     
104 |     for (j in 1:length(input_px_area[[i]]) ){
105 |       
106 |       fc <- input_px_area[[i]][[j]]
107 |       
108 |       
109 |       #covariates[[i]][["px_area"]][["path"]] <- fc
110 |       
111 |       covariates[[i]][["px_area"]] <- list(
112 |         dataset_folder      =  dirname(fc),
113 |         dataset_filename    =  basename(fc),
114 |         dataset_description =  "px_area",
115 |         dataset_summary     =  "sum",
116 |         dataset_country     = i,
117 |         dataset_class       = "px_area",
118 |         dataset_path        = fc 
119 |       )
120 |     }
121 |   }  
122 |   
123 |   
124 |   return(covariates)
125 |   
126 | }
127 | 
128 | 
129 | #' creating a covariates list as input for RF model
130 | #' 
131 | #' @param covariates Input list of covariates
132 | #' @return A list of covariates
133 | #' @examples
134 | #' \dontrun{
135 | #' create_covariates_list_for_RF( covariates )
136 | #' }
137 | #' @noRd 
138 | create_covariates_list_for_RF <- function(covariates){
139 |   
140 |   covariates.new <- list()
141 |   
142 |   cname <- names(covariates[[1]])
143 |   
144 |   for (i in cname) {
145 |     
146 |     dstfile <- covariates[[1]][[i]]$dataset_path
147 |     rtype <- covariates[[1]][[i]]$dataset_summary
148 |     
149 |     covariates.new[[ i ]] <- list(
150 |       dataset_folder      = dirname(dstfile),
151 |       dataset_name        = i,
152 |       dataset_description = i,
153 |       dataset_summary     = rtype,
154 |       path     = dstfile
155 |     )       
156 |     
157 |     
158 |   }  
159 |   
160 |   
161 |   return(covariates.new)
162 |   
163 | }
164 | 
165 | #' get_covariates_var_names This function retrieves variable names 
166 | #' from the given covariates. Create a vector to hold the covariate 
167 | #' variable names:
168 | #' 
169 | #' @param x Input list of covariates
170 | #' @return A list of covariates
171 | #' @examples
172 | #' \dontrun{
173 | #' get_covariates_var_names( covariates )
174 | #' }
175 | #' @noRd 
176 | get_covariates_var_names <- function(x){
177 |   
178 |   covariates.var.names <- c()
179 |   
180 |   ##  For every covariate in the covariates object:
181 |   for ( icvritm in 1:length(x) ) { 
182 |     ##  Retrieve that covariate object:
183 |     covariate <- x[[icvritm]]
184 |     ##  Retrieve the dataset_name attribute:
185 |     var_name_class <- covariate[['dataset_name']]
186 |     ##  Append that variable name to the covariates.var.names vector:
187 |     covariates.var.names <- c(covariates.var.names, var_name_class)
188 |   }    
189 |   ##  Sort those names:
190 |   sort(covariates.var.names)
191 |   ##  Return the names vector:
192 |   return(covariates.var.names)
193 | }
194 | 


--------------------------------------------------------------------------------
/R/download_file.R:
--------------------------------------------------------------------------------
 1 | #' Function to download file from ftp server
 2 | #'
 3 | #' @param file_remote is a url to a remoute file
 4 | #' @param dest_file is a path where downloaded file will be stored
 5 | #' @param quiet If TRUE, suppress status messages (if any), and the progress bar.
 6 | #' @param method Method to be used for downloading files.
 7 | #'  Current download methods are "internal", "wininet" (Windows only) "libcurl",
 8 | #' "wget" and "curl", and there is a value "auto"
 9 | #' @rdname download_file
10 | #' @importFrom utils read.csv
11 | #' @noRd 
12 | download_file <- function(file_remote, dest_file, quiet, method="auto") {
13 |   
14 |   tmStartDw  <- Sys.time()
15 |   
16 |   checkStatus <- tryCatch(
17 |     {
18 |       utils::download.file(file_remote, destfile=dest_file,mode="wb",
19 |                            quiet=quiet, method=method)
20 |     },
21 |     error=function(cond){
22 |       message(paste("URL does not seem to exist:", file_remote))
23 |       message("Here's the original error message:")
24 |       message(cond)
25 |     },
26 |     warning=function(cond){
27 |       message(paste("URL caused a warning:", file_remote))
28 |       message("Here's the original warning message:")
29 |       message(cond)
30 |     },
31 |     finally={
32 |       if (!quiet){
33 |         tmEndDw  <- Sys.time()
34 |         #message(paste("Processed URL:", file_remote))
35 |         message(paste("It took ", tmDiff(tmStartDw ,tmEndDw,frm="hms"), "to download" ))
36 |       }
37 |     }
38 |   )
39 |   
40 |   if(inherits(checkStatus, "error") | inherits(checkStatus, "warning")){
41 |     return(NULL)
42 |   } else{
43 |     return(1)
44 |   }
45 | }
46 | 


--------------------------------------------------------------------------------
/R/get_aval_memory.R:
--------------------------------------------------------------------------------
 1 | # Authors: Maksym Bondarenko mb4@soton.ac.uk
 2 | # Date :  October 2017
 3 | # Version 0.1
 4 | #
 5 | #' get_aval_memory function will return avalible
 6 | #' of the system memory in GB
 7 | #' Tested on Windows 10
 8 | #'
 9 | #' @rdname get_aval_memory
10 | #' @return numeric
11 | #' @noRd 
12 | get_aval_memory <- function(){
13 |   
14 |   OS = tolower(get_OS_system())
15 |   
16 |   if(OS == 'windows'){
17 |     memavail = shell('wmic OS get FreePhysicalMemory /Value',intern=T)
18 |     memavail = memavail[grep('FreePhysicalMemory', memavail)]
19 |     memavail = as.numeric(gsub('FreePhysicalMemory=','',memavail))
20 |   }else if (OS == 'osx'){
21 |     memavail = as.numeric(unlist(strsplit(system("sysctl hw.memsize",
22 |                                                  intern = T),
23 |                                           split = ' '))[2])/1e3
24 |   }else{
25 |     memavail = as.numeric(system(" awk '/MemTotal/ {print $2}' /proc/meminfo",
26 |                                  intern=T))
27 |   }
28 |   
29 |   return(memavail/ (1024 * 1024))
30 | }


--------------------------------------------------------------------------------
/R/get_blocks_need.R:
--------------------------------------------------------------------------------
  1 | #' @param nrows number in raster object
  2 | #' @param n number of  chancks
  3 | #' @rdname get_blocks
  4 | #' @return list
  5 | #' @examples
  6 | #' \dontrun{
  7 | #' get_blocks(nrows, n=2)
  8 | #' }
  9 | #' @noRd
 10 | #' 
 11 | get_blocks <- function(nrows, n){
 12 |   
 13 |   chunk2 <- function(x,n) split(x, cut(seq_along(x), n, labels = FALSE)) 
 14 |   
 15 |   mblocks <- list()
 16 |   
 17 |   if (n == 1) {
 18 |     
 19 |     mblocks$row[1] <- 1 
 20 |     mblocks$nrows[1] <- nrows
 21 |     mblocks$n <- 1
 22 |     
 23 |     return(mblocks)
 24 |     
 25 |   }
 26 |   
 27 |   ch <- chunk2(1:nrows,n)
 28 |   
 29 |   
 30 |   for (i in 1:n){
 31 |     
 32 |     if (i == 1){
 33 |       mblocks$row[i] <- 1 
 34 |     }else{
 35 |       mblocks$row[i] <- mblocks$row[[i-1]] + length(ch[[i-1]])  
 36 |     }
 37 |     
 38 |     mblocks$nrows[i] <- length(ch[[i]])
 39 |     
 40 |   }
 41 |   
 42 |   mblocks$n <- n
 43 |   
 44 |   return(mblocks)
 45 |   
 46 | }
 47 | 
 48 | 
 49 | #' @param memavail total memory avalible
 50 | #' @param totalmem_need total memory need
 51 | #' @param blks number of  blocks
 52 | #' @param maxmem.mg integer. The maximum amount of memory (in Mb) to use for a 
 53 | #'        given operation, defaults to 5 billion bytes (4.66 GB)
 54 | #' @param verbose Logical vector indicating whether to print 
 55 | #'        intermediate output from the function to the console, which might be 
 56 | #'        helpful for model debugging. Default is \code{verbose} = TRUE
 57 | #' @rdname log_mem_info
 58 | #' @examples
 59 | #' \dontrun{
 60 | #' log_mem_info(nrows, n=2)
 61 | #' }
 62 | #' @noRd
 63 | #' 
 64 | log_mem_info <- function(memavail, maxmem.mg, totalmem_need, blks, verbose){
 65 |   
 66 |   log <- TRUE
 67 |   
 68 |   log_info("MSG", paste(replicate(48, "-"), collapse = ""), verbose=verbose, log=log)
 69 |   
 70 |   if (blks==1){
 71 |     
 72 |     log_info("MSG", paste0("Total avalible memory: ", 
 73 |                            round(memavail, 2), " Mb" ), 
 74 |              verbose=verbose, 
 75 |              log=log)
 76 |     
 77 |     log_info("MSG", paste0("Max memory to use for operation: ", 
 78 |                            round(maxmem.mg, 2), " Mb" ), 
 79 |              verbose=verbose, 
 80 |              log=log)    
 81 |     
 82 |     log_info("MSG", paste0("Memory needs to proccess raster: ", 
 83 |                            round(totalmem_need, 2), " Mb" ), 
 84 |              verbose=verbose, 
 85 |              log=log)    
 86 |     
 87 |     
 88 |   }else{
 89 |     
 90 |     log_info("MSG", paste0("Total avalible memory per core: ", 
 91 |                            round(memavail, 2), " Mb" ), 
 92 |              verbose=verbose, 
 93 |              log=log)
 94 |     
 95 |     log_info("MSG", paste0("Max memory to use for operation: ", 
 96 |                            round(maxmem.mg, 2), " Mb" ), 
 97 |              verbose=verbose, 
 98 |              log=log)      
 99 |     
100 |     log_info("MSG", paste0("Memory needs per block of data: ", 
101 |                            round(totalmem_need, 2), " Mb" ), 
102 |              verbose=verbose, 
103 |              log=log)    
104 |     
105 |   }
106 |   
107 | 
108 |   
109 |   log_info("MSG", paste(replicate(48, "-"), collapse = ""),
110 |            verbose=verbose, log=log)
111 |   log_info("MSG", paste0("Total number of blocks: ", blks),
112 |            verbose=verbose, log=log)
113 |   log_info("MSG", paste(replicate(48, "-"), collapse = ""),
114 |            verbose=verbose, log=log)
115 |   
116 | }
117 | 
118 | 
119 | 
120 | 
121 | #' Function will return a number of blocks suggested for processing raster file.
122 | #' Indicates the minimum number of blocks to break the processing extent into 
123 | #' for parallel processing. It will take into consideration number of layers, 
124 | #' cells, cores and avalible memory on computer
125 | #' binc number parameter to increase requrement of the raster
126 | #' @param x raster
127 | #' @param cores number of cores
128 | #' @param nt number of trees used in RF training
129 | #' @param n number of  layers
130 | #' @param verbose Logical vector indicating whether to print 
131 | #'        intermediate output from the function to the console, which might be 
132 | #'        helpful for model debugging. Default is \code{verbose} = TRUE.
133 | #' @importFrom utils object.size getFromNamespace
134 | #' @importFrom raster nlayers ncell
135 | #' @rdname get_blocks_size
136 | #' @return integer
137 | #' @examples
138 | #' \dontrun{
139 | #' get_blocks_size(x, cores=2)
140 | #' }
141 | #' @noRd 
142 | get_blocks_size <- function(x, 
143 |                             cores,
144 |                             nt=1,
145 |                             n=1,
146 |                             verbose=T,
147 |                             ...){
148 |   
149 |   args <- list(...);
150 |   
151 |   if ("binc" %in% names(args)){
152 |     binc <- args[["binc"]]
153 |   }else{
154 |     binc <- 1
155 |   }
156 |   
157 |   if ("boptimise" %in% names(args)){
158 |     boptimise <- args[["boptimise"]]
159 |   }else{
160 |     boptimise <- FALSE
161 |   }
162 |   
163 |   if ("bsoft" %in% names(args)){
164 |     bsoft <- args[["bsoft"]]
165 |   }else{
166 |     bsoft <- FALSE
167 |   }
168 |   
169 |   # importing .availableRAM and .maxmemory function from raster package
170 |   #
171 |   maxmemory.rf <- getFromNamespace(".maxmemory", "raster")
172 |   availableRAM.rf <- getFromNamespace(".availableRAM", "raster")
173 |   
174 |   
175 |   # maxmem.mg: integer. The maximum amount of memory (in bytes) to use for a 
176 |   # given operation, defaults to 5 billion bytes (4.66 GB)
177 | 
178 |   if ("maxmem.mg" %in% names(args)){
179 |     maxmem.mg <- args[["maxmem.mg"]]
180 |   }else{
181 |     #maxmem.mg <- 4768.372
182 |     #maxmem.mg <- raster:::.maxmemory() /(1024 * 1024 )
183 |     maxmem.mg <- maxmemory.rf() /(1024 * 1024 )
184 |   }
185 |   
186 |   
187 |   blocks <- list()
188 |   # get aval memory in bytes
189 |   rs <- as.integer(object.size(x))
190 |   
191 |   
192 |   #.availableRAM(maxmem)
193 |   #memavail.bytes <- (get_aval_memory() * 1073741824 ) - (rs*cores)
194 |   
195 |   memavail.bytes <- (availableRAM.rf(maxmemory.rf())) - (rs*cores)
196 |   
197 |   memavail.mg <- memavail.bytes /(1024 * 1024 )
198 |   
199 |   # to be on safe side 65% is applied to a total avalible memory on PC
200 |   if (boptimise){
201 |     memavail.mg <- (memavail.mg * 65 )/100
202 |   }
203 |   
204 |   # memory aval per core
205 |   memavail_core.mg <- ceiling(memavail.mg /(cores))
206 |   
207 |   #nl = n * nlayers(x) 
208 |   
209 |   nl = n 
210 |   
211 |   totalmem_need.mg <- ((ncell(x) * 8 * nl * nt * binc)/(1024 * 1024 ))
212 |   
213 |   
214 |   if (bsoft  &  totalmem_need.mg < maxmem.mg & totalmem_need.mg < memavail_core.mg){
215 |     
216 |     blocks$row[1] <- 1 
217 |     blocks$nrows[1] <- nrow(x)
218 |     blocks$n <- 1
219 |     
220 |     if (verbose){
221 |       log_mem_info(memavail_core.mg, 
222 |                    maxmem.mg, 
223 |                    totalmem_need.mg, 
224 |                    1, 
225 |                    verbose=verbose)
226 |     }    
227 |     
228 |     return(blocks)
229 |     
230 |   }
231 |   
232 |   can_be_run <- FALSE
233 |   
234 |   if (bsoft){
235 |     bl <- 2  
236 |   }else{
237 |     bl <- cores
238 |   }
239 |   
240 | 
241 |   while(!can_be_run){
242 | 
243 |     blocks_nrows <- nrow(x)/bl
244 |     
245 |     nc <- ceiling(blocks_nrows) * ncol(x) 
246 |     
247 |     memneed <- ( (nc  * 8 * nl * nt * binc) / (1024 * 1024 ) )
248 |     
249 |     memavail <- min(memavail_core.mg, maxmem.mg)
250 |     
251 |     if (memneed < memavail) {
252 |       can_be_run <- TRUE
253 |     }else{
254 |       bl <- bl + 1  
255 |     }  
256 |     
257 |     if (bl == nrow(x)) can_be_run <- TRUE
258 |     
259 |   }
260 |   
261 |   blocks <- get_blocks(nrow(x), bl)
262 |   
263 |   if (verbose){
264 | 
265 |     log_mem_info(memavail_core.mg, 
266 |                  maxmem.mg, 
267 |                  memneed, 
268 |                  blocks$n, 
269 |                  verbose=verbose)
270 |     
271 |    }  
272 |   
273 |   
274 |   return(blocks)
275 | }
276 | 
277 | 
278 | 
279 | 
280 | 


--------------------------------------------------------------------------------
/R/initial_check.R:
--------------------------------------------------------------------------------
  1 | #' Function to check the input arguments of popRF.It will be checked that 
  2 | #' input raster files exists.    
  3 | #' @importFrom methods is
  4 | #' @rdname initial_check
  5 | #' @param cov List contains a list of elements. Each element of a 
  6 | #'        list is another list object with a given name of the country, the 
  7 | #'        element of this list is the input covariates with the name of the 
  8 | #'        covariates and the path to them.
  9 | #' @param mastergrid List with each element of a list is 
 10 | #'       another object with a given name of the country, the element of 
 11 | #'       this list is the input mastergrid with the path to the raster file. 
 12 | #' @param watermask List with each element of a list is 
 13 | #'       another object with a given name of the country, the element of 
 14 | #'       this list is the input watermask with the path to the raster file.
 15 | #' @param px_area List with each element of a list is 
 16 | #'       another object with a given name of the country, the element of 
 17 | #'       this list is the input px_area with the path to the raster file.
 18 | #' @param pop the name of the file which the administrative ID and the population 
 19 | #'        values are to be read from. The file should contain two columns 
 20 | #'        comma-separated with the value of administrative ID and population 
 21 | #'        without columns names. If it does not contain an absolute path, the 
 22 | #'        file name is relative to the current working directory.
 23 | #' @param output_dir Path to the folder to save the outputs. 
 24 | #' @param cores is a integer. Number of cores to use when executing the function.
 25 | #' @param minblocks Integer. if \code{minblocks} is NULL then \code{minblocks} 
 26 | #'        for cluster prediction parallesation will be calculated based on 
 27 | #'        available memory.
 28 | #' @return TRUE if no errors found otherwise error message will be returned.
 29 | #' @noRd 
 30 | initial_check <- function(cov, 
 31 |                           mastergrid, 
 32 |                           watermask,
 33 |                           px_area,
 34 |                           pop,
 35 |                           output_dir, 
 36 |                           cores, 
 37 |                           minblocks){
 38 |   
 39 |    
 40 |   # if ( !is(cores,  "numeric") & !is.null(cores) ){
 41 |   #   
 42 |   #   msg <- paste0("Error :: cores " , cores , " should be integer or NULL value")
 43 |   #   return(msg)
 44 |   # 
 45 |   # }
 46 |   
 47 |   if ( !is(cores,  "numeric") ){
 48 |     
 49 |     msg <- paste0("Error :: cores value ", cores ," should be integer value.")
 50 |     return(msg)
 51 |     
 52 |   }   
 53 |   
 54 |   # if ( is(cores,  "numeric")){
 55 |   #   if ( cores < 1 ){
 56 |   #     msg <- paste0("Error :: cores " , cores , " should be integer equal 1 or 2.. value")
 57 |   #     return(msg)
 58 |   #   }
 59 |   # } 
 60 | 
 61 |   
 62 |   
 63 |   # if ( !is(minblocks,  "numeric") & !is.null(minblocks)){
 64 |   #   
 65 |   #   msg <- paste0("Error :: minblocks ", minblocks , " should be integer or NULL value")
 66 |   #   return(msg)
 67 |   #   
 68 |   # } 
 69 |   # 
 70 |   # if ( is(minblocks,  "numeric")){
 71 |   #   if ( minblocks < 1 ){
 72 |   #     msg <- paste0("Error :: minblocks " , minblocks , " should be integer equal 1 or 2.. value")
 73 |   #     return(msg)
 74 |   #   }
 75 |   # } 
 76 |   
 77 |   
 78 |   if (is.null(output_dir) | !dir.exists(output_dir)) {
 79 |     
 80 |     msg <- paste0("Error :: Output directory ",output_dir,
 81 |                   " does not exsit. Please choose a different directory.")
 82 |     return(msg)
 83 |     
 84 |   }    
 85 |   
 86 |   if(!is.list( cov )) {
 87 |     
 88 |     msg <- paste0("Error :: Input parameter 'cov' should be a list.")
 89 |     return(msg)
 90 |     
 91 |   }
 92 |   
 93 |   if(!is.list( mastergrid )) {
 94 |     
 95 |     msg <- paste0("Error :: Input parameter 'mastergrid' should be a list.")
 96 |     return(msg)
 97 |     
 98 |   } 
 99 |   
100 |   if(!is.list( watermask )) {
101 |     
102 |     msg <- paste0("Error :: Input parameter 'watermask' should be a list.")
103 |     return(msg)
104 |     
105 |   } 
106 |   
107 |   if(!is.list( px_area )) {
108 |     
109 |     msg <- paste0("Error :: Input parameter 'px_area' should be a list.")
110 |     return(msg)
111 |     
112 |   }   
113 |   
114 |   
115 |   for ( i in names(cov) ) {
116 |     
117 |     for (ii in 1:length(cov[[i]]) ){
118 |       fc <- cov[[i]][[ii]]
119 |       if(!file.exists( fc )) {
120 |         
121 |         msg <- paste0("Error :: Covariates file ", fc ," does not exist.")
122 |         return(msg)
123 |       }       
124 |     }
125 |   }
126 |   
127 |   for ( i in names(mastergrid) ) {
128 |     
129 |     for (ii in 1:length(mastergrid[[i]]) ){
130 |       fc <- mastergrid[[i]][[ii]]
131 |       if(!file.exists( fc )) {
132 |         
133 |         msg <- paste0("Error :: Mastergrid file ", fc ," does not exist.")
134 |         return(msg)
135 |       }       
136 |     }
137 |   }
138 |   
139 |   for ( i in names(watermask) ) {
140 |     
141 |     for (ii in 1:length(watermask[[i]]) ){
142 |       fc <- watermask[[i]][[ii]]
143 |       if(!file.exists( fc )) {
144 |         
145 |         msg <- paste0("Error :: Watermask file ", fc ," does not exist.")
146 |         return(msg)
147 |       }       
148 |     }
149 |   }
150 |   
151 |   for ( i in names(px_area) ) {
152 |     
153 |     for (ii in 1:length(px_area[[i]]) ){
154 |       fc <- px_area[[i]][[ii]]
155 |       if(!file.exists( fc )) {
156 |         
157 |         msg <- paste0("Error :: Watermask file ", fc ," does not exist.")
158 |         return(msg)
159 |       }       
160 |     }
161 |   }  
162 |   
163 |   
164 |   for ( i in names(pop) ) {
165 |     
166 |     fc <- watermask[[i]]
167 |     if(!file.exists( fc )) {
168 |       
169 |       msg <- paste0("Error :: Population data file ", fc ," does not exist.")
170 |       return(msg)
171 |     }       
172 |     
173 |   }  
174 |   
175 | 
176 |   
177 |   return(TRUE)
178 |   
179 | }
180 | 


--------------------------------------------------------------------------------
/R/load_pop.R:
--------------------------------------------------------------------------------
 1 | #
 2 | #' Load population data from input
 3 | #'
 4 | #' @param icountry ISO of the country
 5 | #' @param input_poptables Path to the csv file with pop data
 6 | #' @rdname load_pop
 7 | #' @return A data.frame with the populations for each zone
 8 | #' @noRd 
 9 | load_pop <- function(icountry,input_poptables){
10 |   
11 |   file_local <- input_poptables[[icountry]]
12 |   df <- utils::read.csv(file_local, stringsAsFactors=FALSE, header = FALSE)
13 |   colnames(df) <-  c("ADMINID", "ADMINPOP") 
14 |   
15 |   return(df)
16 |   
17 | }
18 | 
19 | #
20 | #' get_pop_census_all Load population data from input
21 | #'
22 | #' @param x input poptables
23 | #' @rdname get_pop_census_all
24 | #' @return A data.frame with the populations
25 | #' @noRd 
26 | get_pop_census_all  <- function(x) {
27 |   
28 |   c <- names(x)
29 |   
30 |   census_data <- load_pop(c[[1]], x)
31 |   
32 |   for ( i in 1:length(c) ) {
33 |     if (i==1) next()
34 |     census_data <- rbind(census_data, load_pop(c[[i]], x)) 
35 |   }
36 |   
37 |   return(census_data)  
38 | }


--------------------------------------------------------------------------------
/R/masking_out.R:
--------------------------------------------------------------------------------
  1 | #' masking_out function to mask raster 
  2 | #'
  3 | #' @param x RasterStack object
  4 | #' @param fun is typically a function that can take a single vector as input
  5 | #' @param filename File of a new raster file.
  6 | #' @param NAflag NO data value will be used for a new raster
  7 | #' @param datatype Type of raster. Avalible are INT1S/INT2S/INT4S/FLT4S/LOG1S/INT1U/INT2U/INT4U/FLT8S
  8 | #' @param overwrite Overwrite existing file
  9 | #' @param cores Integer. Number of cores for parallel calculation
 10 | #' @param blocks number of blocks sugesting for processing raster file.
 11 | #' @param silent If FALSE then the progress will be shown
 12 | #' @importFrom utils getFromNamespace
 13 | #' @rdname masking_out_start
 14 | #' @return Raster* object
 15 | #' @noRd 
 16 | masking_out_start <- function(x, 
 17 |                               fun, 
 18 |                               filename, 
 19 |                               NAflag, 
 20 |                               datatype, 
 21 |                               overwrite, 
 22 |                               cores, 
 23 |                               blocks, 
 24 |                               silent) {
 25 |   
 26 | 
 27 |   
 28 |   tStart <- Sys.time()
 29 |   
 30 |   verbose <- if (silent) FALSE else TRUE
 31 |   
 32 |   log_info("MSG", 
 33 |            paste0("Rasterizing using total blocks ",blocks$n ), 
 34 |            verbose=verbose, 
 35 |            log=FALSE)
 36 |  
 37 |   
 38 |   recvOneData <- getFromNamespace("recvOneData", "parallel")
 39 |   sendCall <- getFromNamespace("sendCall", "parallel")  
 40 |   
 41 |   cl <- raster::getCluster()
 42 |   
 43 |   nodes <- length(cl)
 44 |   
 45 | 
 46 |   clusterExport(cl, c("blocks", "x","fun"), envir=environment())
 47 |   clusterExport(cl, c("recvOneData", "sendCall"), envir=environment())
 48 |   
 49 |   
 50 |   clwpRasterCalcFun <- function(i) {
 51 |     
 52 |     tryCatch({
 53 |       v <- raster::getValues(x, row=blocks$row[i], nrows=blocks$nrows[i])
 54 |       
 55 |       res <- apply(v, 1, fun)
 56 |       
 57 |     }, error = function(e) stop(paste0("The block '", blocks$row[i], "'",
 58 |                                        " caused the error: '", e, "'")))
 59 |     
 60 |     return(res)
 61 |   }
 62 |   
 63 |   
 64 |   for (i in 1:nodes) {
 65 |     sendCall(cl[[i]], clwpRasterCalcFun, i, tag=i)
 66 |   }
 67 |   
 68 |   
 69 |   out <- x[[1]]
 70 |   
 71 |   out <- raster::writeStart(out, 
 72 |                              filename=filename, 
 73 |                              format="GTiff", 
 74 |                              datatype=datatype, 
 75 |                              overwrite=overwrite, 
 76 |                              options=c("COMPRESS=LZW"),
 77 |                              NAflag=NAflag)
 78 |   
 79 |   for (i in 1:blocks$n) {
 80 |     
 81 |     d <- recvOneData(cl)
 82 |     
 83 |     if (! d$value$success ) {
 84 |       stop('cluster error')
 85 |     }
 86 |     
 87 |     tEnd <-  Sys.time()
 88 |     
 89 |     b <- d$value$tag
 90 |     
 91 |     if ((silent == FALSE) & (i%%10 == 0) )   {
 92 |       
 93 |       progress_message(x=i, 
 94 |                        max=blocks$n, 
 95 |                        label=paste0("received block ", 
 96 |                                     i, 
 97 |                                     " Processing Time: ", 
 98 |                                     tmDiff(tStart,tEnd)
 99 |                        )
100 |       )     
101 |       
102 |     }else if (i==blocks$n) {
103 |       
104 |       progress_message(x=blocks$n, 
105 |                        max=blocks$n, 
106 |                        label=paste0("received block ", 
107 |                                     i, 
108 |                                     " Processing Time: ", 
109 |                                     tmDiff(tStart,tEnd)
110 |                        )
111 |       )
112 |       
113 |     }  
114 |     
115 |     out <- raster::writeValues(out, d$value$value, blocks$row[b])
116 |     
117 |     # need to send more data
118 |     #
119 |     ni <- nodes + i
120 |     if (ni <= blocks$n) {
121 |       sendCall(cl[[d$node]], clwpRasterCalcFun, ni, tag=ni)
122 |     }
123 |   }
124 |   
125 |   out <- raster::writeStop(out)
126 |   
127 |   return(out)
128 | }
129 | 
130 | 
131 | 
132 | 
133 | #' masking_out function to mask raster 
134 | #'
135 | #' @param x RasterStack object
136 | #' @param fun is typically a function that can take a single vector as input
137 | #' @param filename File of a new raster file.
138 | #' @param NAflag NO data value will be used for a new raster
139 | #' @param datatype Type of raster. Avalible are INT1S/INT2S/INT4S/FLT4S/LOG1S/INT1U/INT2U/INT4U/FLT8S
140 | #' @param overwrite Overwrite existing file
141 | #' @param cores Integer. Number of cores for parallel calculation
142 | #' @param blocks number of blocks sugesting for processing raster file.
143 | #' @param cblk Integer. param to controle min number of blocks during paralisation
144 | #' @param silent If FALSE then the progress will be shown
145 | #' @param na.rm Optional
146 | #' @importFrom raster nlayers
147 | #' @rdname masking_out
148 | #' @return Raster* object
149 | #' @noRd 	
150 | masking_out <- function(x, 
151 |                         fun='mean', 
152 |                         filename=rasterTmpFile(), 
153 |                         NAflag=NULL, 
154 |                         datatype=NULL, 
155 |                         overwrite=TRUE, 
156 |                         cores=NULL,
157 |                         blocks=NULL, 
158 |                         cblk=NULL, 
159 |                         silent=TRUE, 
160 |                         na.rm) {
161 |   
162 |   
163 |   if (!file.exists(dirname(filename))){
164 |     stop(paste0("Directory  ",dirname(filename)," for file ", 
165 |                 basename(filename) ," does not exist"))
166 |   }
167 |   
168 |   if (is.null(NAflag)) NAflag=-99999
169 |   if (is.null(datatype)) datatype='FLT4S'
170 |   if (is.null(cblk)) cblk=1
171 |   
172 |   if (!is(NAflag, "numeric")) stop(paste0("NAflag should be  numeric"))
173 |   if (!is(overwrite, "logical")) stop(paste0("overwrite should be  logical (e.g., TRUE, FALSE)"))
174 |   if (!is(silent, "logical")) stop(paste0("silent should be logical (e.g., TRUE, FALSE)"))
175 |   
176 |   datatype <- toupper(datatype)
177 |   
178 |   if (!(datatype %in% c('INT1S', 'INT2S', 'INT4S', 'FLT4S', 'LOG1S', 'INT1U',
179 |                         'INT2U', 'INT4U', 'FLT8S'))) {
180 |     stop('not a valid data type. Avalible are INT1S/INT2S/INT4S/FLT4S/LOG1S/INT1U/INT2U/INT4U/FLT8S')
181 |   }
182 |   
183 |   if (!cblk%%1==0) stop(paste0("cblk should be integer"))
184 |   
185 |   
186 |   
187 |   if (!missing(na.rm)) {
188 |     if (!is(na.rm, "logical")) stop(paste0("na.rm should be  logical (e.g., TRUE, FALSE)"))
189 |   }
190 |   
191 |   if ( file.exists(filename) & overwrite==FALSE) {
192 |     stop(paste0("File ",filename," exist. Use option overwrite=TRUE"))
193 |   } else{
194 |     if ( file.exists(filename) ) file.remove(filename)
195 |   }
196 |   
197 |   stopifnot(hasValues(x))
198 |   
199 |   if ( (class(x)[1] != 'RasterStack')) {
200 |     stop("Argument x should be 'RasterStack'")
201 |   }  
202 |   
203 |   nl <- nlayers(x)
204 |   
205 |   if ( nl < 2 ) {
206 |     stop("'RasterStack' should have atleast two Raster layers")
207 |   }  
208 |   
209 |   
210 |   if (class(fun) == 'character') {
211 |     
212 |     fun <- tolower(fun)
213 |     if (! fun %in% c('sum', 'mean', 'min', 'max')) {
214 |       stop("fun can be 'sum', 'mean', 'min', or 'max'")
215 |     }
216 |     
217 |   }else{
218 |     
219 |     testMatrix = matrix( seq(1, nl, by = 1), 
220 |                          nrow=1, 
221 |                          ncol=nl, 
222 |                          byrow = TRUE)
223 |     
224 |     if (!missing(na.rm)) {
225 |       
226 |       test <- try( apply(testMatrix, 1, fun, na.rm=na.rm), silent=TRUE)
227 |       
228 |       if (class(test) == 'try-error') {
229 |         
230 |         test <- try(fun(testMatrix, na.rm=na.rm), silent=TRUE)
231 |         
232 |         if (class(test) == 'try-error') {
233 |           stop("Cannot use this function. Perhaps add '...' or 'na.rm' to the function arguments?") 
234 |         }
235 |       } 
236 |     } else {
237 |       test <- try( apply(testMatrix, 1, fun), silent=TRUE)
238 |       if (class(test) == 'try-error') {
239 |         
240 |         test <- try(fun(testMatrix), silent=TRUE)
241 |         
242 |         if (class(test) == 'try-error') {
243 |           stop("cannot use this function") 
244 |         }
245 |       }
246 |     } 
247 |   }
248 |   
249 |   
250 |   # get real physical cores in a computer
251 |   max.cores <- parallel::detectCores(logical = TRUE)
252 |   
253 |   if (is.null(cores)) {
254 |     cores <- max.cores - 1
255 |   }
256 |   
257 |   if (cores > max.cores) {
258 |     stop(paste0("Number of cores ",cores,
259 |                 " more then real physical cores in PC ",max.cores ))
260 |   }
261 |   
262 |   
263 |   if (is.null(blocks)) {
264 |     
265 |     blocks <- get_blocks_size(x, 
266 |                               cores,
267 |                               verbose = ifelse(silent, FALSE, TRUE))      
268 |   }  
269 |   
270 |   npoc_blocks <- ifelse(blocks$n < cores, blocks$n, cores)  
271 |   
272 |   beginCluster(n=npoc_blocks)
273 |   
274 |   out <- masking_out_start(x, 
275 |                            fun, 
276 |                            filename, 
277 |                            NAflag, 
278 |                            datatype, 
279 |                            overwrite, 
280 |                            npoc_blocks, 
281 |                            blocks, 
282 |                            silent)
283 |   
284 |   endCluster()
285 |   
286 |   return(out)
287 | }
288 | 


--------------------------------------------------------------------------------
/R/merge_covariates.R:
--------------------------------------------------------------------------------
  1 | #' Merging covariates.
  2 | #'
  3 | #' @param input.countries list countries 
  4 | #' @param covariates.var.names list of covariates names 
  5 | #' @param covariates details of the covariates
  6 | #' @param rfg.countries.tag tag for the project
  7 | #' @param rfg.countries.merged directory to save merged rasters
  8 | #' @param verbose is logical. TRUE or FALSE: flag indicating whether to print 
  9 | #'        intermediate output from the function on the console, which might be 
 10 | #'        helpful for model debugging. Default is \code{verbose} = TRUE.
 11 | #' @param log is logical. TRUE or FALSE: flag indicating whether to print 
 12 | #'        intermediate output from the function on the log.txt file. 
 13 | #'        Default is \code{log} = FALSE.
 14 | #' @importFrom terra sprc
 15 | #' @importFrom terra rast
 16 | #' @importFrom terra merge
 17 | #' @noRd 
 18 | #' @rdname merge_covariates
 19 | #' @return A data.frame merged covariates
 20 | #' @examples
 21 | #' \dontrun{
 22 | #' merge_covariates( covariates.var.names, 
 23 | #'                  covariates, 
 24 | #'                  rfg.countries.tag, 
 25 | #'                  rfg.countries.merged)
 26 | #' }
 27 | merge_covariates <- function(input.countries,
 28 |                              covariates.var.names, 
 29 |                              covariates, 
 30 |                              rfg.countries.tag, 
 31 |                              rfg.countries.merged, 
 32 |                              verbose = FALSE,
 33 |                              log = FALSE){
 34 |   
 35 |   
 36 |   rfg.input.countries <- input.countries
 37 |   
 38 |   log_info("MSG",
 39 |            paste0("Start merging covariates"),
 40 |            verbose=verbose,
 41 |            log=log) 
 42 |   
 43 |   covariates_merged <- list()
 44 |   
 45 |   
 46 |   for ( i in 1:length(covariates.var.names) ) {  
 47 |     
 48 |     cname <- covariates.var.names[[i]]
 49 |     
 50 |     list_of_tiffs <- c()
 51 |     for ( ic in 1:length(rfg.input.countries) ) {
 52 |       
 53 |       country <- rfg.input.countries[[ic]]
 54 |       rst <- paste0(covariates[[country]][[cname]][["dataset_path"]])
 55 |       list_of_tiffs <- append(list_of_tiffs, rst, 1)
 56 |       rtype <- covariates[[country]][[cname]][["dataset_summary"]]
 57 |       
 58 |     }
 59 |     
 60 |     dstfile <- file.path(rfg.countries.merged,
 61 |                          paste0(rfg.countries.tag,
 62 |                                 "_",
 63 |                                 cname,
 64 |                                 ".tif"))
 65 |     
 66 |     if (!file.exists(dstfile)){
 67 |       
 68 |       
 69 |       # gdalwarp(srcfile=list_of_tiffs,
 70 |       #          of = "GTiff",
 71 |       #          dstfile=dstfile,
 72 |       #          co=c("COMPRESS=LZW", "BLOCKXSIZE=512",
 73 |       #           "BLOCKYSIZE=512", "TILED=YES", "BIGTIFF=YES"),
 74 |       #          s_srs=crs(raster(rst)),
 75 |       #          output_Raster=TRUE,
 76 |       #          overwrite=TRUE,
 77 |       #          verbose=FALSE) 
 78 |       
 79 |       terra::merge(x = terra::sprc(list_of_tiffs),
 80 |                    first = TRUE,
 81 |                    na.rm = TRUE,
 82 |                    filename = dstfile,
 83 |                    overwrite = TRUE,
 84 |                    wopt = list(filetype = "GTiff", 
 85 |                                gdal = c("COMPRESS=LZW",
 86 |                                         "BLOCKXSIZE=512",
 87 |                                         "BLOCKYSIZE=512",
 88 |                                         "TILED=YES",
 89 |                                         "BIGTIFF=YES")))
 90 |       
 91 |       
 92 |       
 93 |     }
 94 |     
 95 |     #covariates_merged[[rfg.countries.tag]][[cname]][["path"]] <- dstfile
 96 |     covariates_merged[[rfg.countries.tag]][[cname]] <- list(dataset_folder = dirname(dstfile), 
 97 |                                                             dataset_filename = basename(dstfile),
 98 |                                                             dataset_description = cname,
 99 |                                                             dataset_summary = rtype,
100 |                                                             dataset_country = cname,
101 |                                                             dataset_class = cname,
102 |                                                             dataset_path  = dstfile 
103 |     )
104 |     
105 |     
106 |     if (verbose){
107 |       if ( i != length(covariates.var.names)){
108 |         
109 |         progress_message(x=i, 
110 |                          max=length(covariates.var.names), 
111 |                          label=paste0("Merging ", cname))  
112 |         
113 |       }else{
114 |         
115 |         progress_message(x=i, 
116 |                          max=length(covariates.var.names), 
117 |                          label=paste0("Merging complete                     " ))
118 |         
119 |       }
120 |     }      
121 |     
122 |     
123 |     
124 |   } 
125 |   
126 |   #log_info("MSG", paste0("Completed merging covariates"),
127 |   # verbose=verbose, log=log) 
128 |   return(covariates_merged)
129 |   
130 | }
131 | 


--------------------------------------------------------------------------------
/R/os_system.R:
--------------------------------------------------------------------------------
 1 | # Authors: Maksym Bondarenko mb4@soton.ac.uk
 2 | # Date :  October 2017
 3 | # Version 0.1
 4 | #
 5 | #' get_OS_system function will return a string with OS
 6 | #' of the system
 7 | #' Tested on Windows 10
 8 | 
 9 | #' @rdname get_OS_system
10 | #' @return string
11 | #' @noRd 
12 | get_OS_system <- function(){
13 |   
14 |   sysinf <- Sys.info()
15 |   
16 |   if (!is.null(sysinf)){
17 |     
18 |     OS <- tolower(sysinf['sysname'])
19 |     
20 |     if(OS == 'windows'){
21 |       
22 |       return('windows')
23 |       
24 |     } else if (OS == 'darwin') {
25 |       
26 |       return('osx')
27 |       
28 |     } else if (OS == 'linux') {
29 |       
30 |       return('linux')
31 |       
32 |     }
33 |     
34 |   } else { ## other OS
35 |     OS <- .Platform$OS.type
36 |     if (grepl("^darwin", R.version$os))
37 |       return('osx')
38 |     if (grepl("linux-gnu", R.version$os))
39 |       return('linux')
40 |   }
41 |   
42 | }


--------------------------------------------------------------------------------
/R/popRFdemo.R:
--------------------------------------------------------------------------------
  1 | #' @title Function to demo the popRF package using WorldPop input data.
  2 | #' 
  3 | #' @description This function allows the user to generate a population layer 
  4 | #' using the \href{https://www.worldpop.org}{WorldPop} geospatial covariates and 
  5 | #' subnational census-based population estimates for 230 countries.
  6 | #' All input datasets use a geographical coordinate system (GCS) with WGS 1984 
  7 | #' datum (EPSG:4326) in Geotiff format at a resolution of 3 arc-second 
  8 | #' (0.00083333333 decimal degree, approximately 100m at the equator).
  9 | #' Mastergrid of sub-national administrative unit boundary was rasterised 
 10 | #' by \href{http://www.ciesin.org}{CIESIN}.
 11 | #'  
 12 | #' Following covariates will be downloaded and used to disaggregat population 
 13 | #' (2020 year) from census units into grid cells.
 14 | #' \itemize{
 15 | #' \item subnational_admin_2000_2020.tif  - sub-national units provided by  nationalEAs 
 16 | #' \item esaccilc_dst011_2015.tif - Distance to ESA-CCI-LC cultivated area edges 2015.
 17 | #' \item esaccilc_dst040_2015.tif - Distance to ESA-CCI-LC woody-tree area edges 2015.
 18 | #' \item esaccilc_dst130_2015.tif - Distance to ESA-CCI-LC shrub area edges 2015.
 19 | #' \item esaccilc_dst140_2015.tif - Distance to ESA-CCI-LC herbaceous area edges 2015.
 20 | #' \item esaccilc_dst150_2015.tif - Distance to ESA-CCI-LC sparse vegetation area edges 2015.
 21 | #' \item esaccilc_dst160_2015.tif - Distance to ESA-CCI-LC aquatic vegetation area edges 2015.
 22 | #' \item esaccilc_dst190_2015.tif - Distance to ESA-CCI-LC artificial surface edges 2015.
 23 | #' \item esaccilc_dst200_2015.tif - Distance to ESA-CCI-LC bare area edges 2015.
 24 | #' \item esaccilc_dst_water_100m_2000_2012.tif - ESA-CCI-LC inland waterbodies 2000-2012.
 25 | #' \item coastline_100m_2000_2020.tif - Distance to coastline 2000-2020.
 26 | #' \item dst_roadintersec_100m_2016.tif - Distance to OSM major road intersections. 
 27 | #' \item dst_waterway_100m_2016.tif - Distance to OSM major waterways.
 28 | #' \item dst_road_100m_2016.tif - Distance to OSM major roads.
 29 | #' \item px_area.tif - Grid-cell surface areas.
 30 | #' \item srtm_slope_100m.tif - SRTM-based slope 2000 (SRTM is Shuttle Radar Topography Mission).
 31 | #' \item srtm_topo_100m.tif - SRTM elevation 2000.
 32 | #' \item viirs_100m_2016.tif - VIIRS night-time lights 2015 (VIIRS is Visible Infrared Imaging Radiometer Suite). 
 33 | #' \item wdpa_dst_cat1_100m_2017.tif - Distance to IUCN strict nature reserve and wilderness area edges 2017.	
 34 | #' \item dst_bsgme_100m_2020.tif	- Distance to predicted built-settlement extents in 2020.
 35 | #' }
 36 | #'  All downloaded files will be saved into subdirectory \code{covariates}.
 37 | #' @references 
 38 | #' \itemize{
 39 | #' \item Global spatio-temporally harmonised datasets for producing high-resolution 
 40 | #' gridded population distribution datasets \doi{10.1080/20964471.2019.1625151}.
 41 | #' \item WorldPop (www.worldpop.org - School of Geography and Environmental Science, 
 42 | #' University of Southampton; Department of Geography and Geosciences, 
 43 | #' University of Louisville; Departement de Geographie, Universite de Namur) 
 44 | #' and Center for International Earth Science Information Network (CIESIN), 
 45 | #' Columbia University (2018). Global High Resolution Population Denominators 
 46 | #' Project - Funded by The Bill and Melinda Gates Foundation (OPP1134076) 
 47 | #' \doi{10.5258/SOTON/WP00649}.
 48 | #' }
 49 | #' @usage
 50 | #' popRFdemo(project_dir, 
 51 | #'           country="NPL", 
 52 | #'           cores=0, 
 53 | #'           quant=TRUE, 
 54 | #'           ftp=TRUE, 
 55 | #'           verbose=TRUE, 
 56 | #'           log=TRUE, ...)
 57 | #'           
 58 | #' @param project_dir Path to the folder to save the outputs.
 59 | #' @param country character. ISO of the country 
 60 | #'        (see \href{https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3}{country codes}). 
 61 | #'        Default one is NPL (Nepal)
 62 | #' @param cores is a integer. Number of cores to use when executing the function. 
 63 | #'        If set to 0 \code{(max_number_of_cores - 1)}  will be used based on as 
 64 | #'        many processors as the hardware and RAM allow. 
 65 | #'        Default is \code{cores} = 0.
 66 | #' @param quant If FALSE then quant will not be calculated
 67 | #' @param ftp is logical. TRUE or FALSE: flag indicating whether 
 68 | #'        [FTP](ftp://ftp.worldpop.org) or [HTTPS](https://data.worldpop.org) of
 69 | #'        \href{https://sdi.worldpop.org/wpdata}{WorldPop data} hub server will be used. 
 70 | #'        Default is \code{ftp} = TRUE. 
 71 | #' @param verbose is logical. TRUE or FALSE: flag indicating whether to print 
 72 | #'        intermediate output from the function on the console, which might be 
 73 | #'        helpful for model debugging. Default is \code{verbose} = TRUE.
 74 | #' @param log is logical. TRUE or FALSE: flag indicating whether to print intermediate 
 75 | #'        output from the function on the log.txt file. 
 76 | #'        Default is \code{log} = FALSE.
 77 | #' @param ...	Additional arguments:\cr 
 78 | #'        \code{binc}: Numeric. Increase number of blocks sugesting for 
 79 | #'        processing raster file.\cr 
 80 | #'        \code{boptimise}: Logical. Optimize total memory requires to 
 81 | #'        processing raster file by reducing the memory need to 35%.\cr
 82 | #'        \code{bsoft}: Numeric. If raster can be processed on less 
 83 | #'        then \code{cores} it will be foresed to use less number 
 84 | #'        of \code{cores}.\cr
 85 | #'        \code{nodesize}: Minimum size of terminal nodes. Setting this number larger 
 86 | #'        causes smaller trees to be grown (and thus take less time). See 
 87 | #'        \code{\link[randomForest]{randomForest}} for more details. Default 
 88 | #'        is \code{nodesize} = NULL and will be calculated 
 89 | #'        as \code{length(y_data)/1000}.\cr
 90 | #'        \code{maxnodes} Maximum number of terminal nodes trees in the forest can have. 
 91 | #'        If not given, trees are grown to the maximum possible (subject to 
 92 | #'        limits by nodesize). If set larger than maximum possible, a warning is 
 93 | #'        issued. See \code{\link[randomForest]{randomForest}} for more details. 
 94 | #'        Default is \code{maxnodes} = NULL.\cr 
 95 | #'        \code{ntree} Number of variables randomly sampled as candidates at each split. 
 96 | #'        See \code{\link[randomForest]{randomForest}} for more details. 
 97 | #'        Default is \code{ntree} = NULL and \code{ntree} will be used 
 98 | #'        \code{popfit$ntree}\cr
 99 | #'        \code{mtry} Number of trees to grow. This should not be set to too small a 
100 | #'        number, to ensure that every input row gets predicted at least a few 
101 | #'        times. See \code{\link[randomForest]{randomForest}} for more details. 
102 | #'        Default is \code{ntree} = NULL and \code{ntree} will be used 
103 | #'        \code{popfit$mtry}.
104 | #' @importFrom utils write.table read.csv
105 | #' @rdname popRFdemo
106 | #' @return Raster* object of gridded population surfaces.
107 | #' @export
108 | #' @examples
109 | #' \dontrun{
110 | #' popRFdemo(project_dir="/home/user/demo",
111 | #'           country="NPL", 
112 | #'           cores=0)
113 | #' } 
114 | popRFdemo <- function(project_dir, 
115 |                       country="NPL", 
116 |                       cores=0,
117 |                       quant=TRUE,
118 |                       ftp=TRUE, 
119 |                       verbose=TRUE, 
120 |                       log=TRUE,...){
121 |   
122 |   iso.list <- c('ABW','AFG','AGO','AIA','ALA','ALB','AND','ARE','ARG',
123 |                 'ARM','ASM','ATG','AUS','AUT','AZE','BDI','BEL','BEN','BES','BFA','BGD','BGR','BHR','BHS',
124 |                 'BIH','BLM','BLR','BLZ','BMU','BOL','BRA','BRB','BRN','BTN','BWA','CAF','CAN','CHE','CHL',
125 |                 'CIV','CMR','COD','COG','COK','COL','COM','CPV','CRI','CUB','CUW','CYM','CZE','DEU','DJI',
126 |                 'DMA','DNK','DOM','DZA','ECU','EGY','ERI','ESH','ESP','EST','ETH','FIN','FJI','FLK','FRA',
127 |                 'FRO','FSM','GAB','GBR','GEO','GGY','GHA','GIB','GIN','GLP','GMB','GNB','GNQ','GRC','GRD',
128 |                 'GRL','GTM','GUF','GUM','GUY','HKG','HND','HRV','HTI','IDN','IMN','IRL','IRN','IRQ','ISL',
129 |                 'ITA','JAM','JOR','JPN','KAZ','KGZ','KHM','KIR','KNA','KOR','KOS','KWT','LAO','LBN','LBR',
130 |                 'LBY','LCA','LIE','LKA','LSO','LTU','LUX','LVA','MAC','MAF','MAR','MCO','MDA','MDG','MDV',
131 |                 'MHL','MKD','MLI','MLT','MMR','MNE','MNG','MNP','MOZ','MRT','MSR','MUS','MYS','MYT','NAM',
132 |                 'NCL','NER','NFK','NGA','NIC','NIU','NLD','NOR','NPL','NRU','NZL','OMN','PAK','PAN','PCN',
133 |                 'PER','PHL','PLW','PNG','PRI','PRK','PRT','PRY','PSE','PYF','QAT','REU','ROU','RWA',
134 |                 'SAU','SDN','SEN','SGP','SHN','SJM','SLB','SLE','SLV','SMR','SOM','SPM','SPR','SSD','STP',
135 |                 'SUR','SVN','SWE','SWZ','SXM','SYC','SYR','TCA','TCD','TGO','THA','TJK','TKL','TKM','TLS',
136 |                 'TON','TTO','TUN','TUR','TUV','TWN','TZA','UGA','UKR','URY', 'UZB','VAT','VCT','VEN',
137 |                 'VGB','VIR','VNM','VUT','WLF','WSM','YEM','ZAF','ZMB','ZWE')
138 |   
139 |   is.populated <- function(x, xlist) x %in% xlist
140 |   
141 |   iso.s <- tolower(country)
142 |   country <- toupper(country)
143 |   
144 |   if (!is.populated(country, iso.list)) {
145 |     stop(paste0("Error: ",country," does not exist in this demo.\n")) 
146 |   }
147 |   
148 |   quiet <- ifelse(verbose, FALSE, TRUE)
149 |   
150 |   output_dir <- file.path(project_dir, country, "covariates")
151 |   
152 |   if(!file.exists(output_dir)){ 
153 |     
154 |     if (verbose){
155 |       message("Info :: Creating dir ", output_dir)  
156 |     }
157 |     dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) 
158 |   }
159 |   
160 |   
161 |   url_prefix <- "https://data.worldpop.org"
162 |   if (ftp){
163 |     url_prefix <- "ftp://ftp.worldpop.org"
164 |   }
165 |   ptcov <- paste0(url_prefix,"/GIS/Covariates/Global_2000_2020/",toupper(country))
166 |   
167 |   input_covariates <- list(
168 |     country = list(
169 |       "esaccilc_dst011_100m_2015"=paste0(ptcov,"/ESA_CCI_Annual/2015/",iso.s,"_esaccilc_dst011_100m_2015.tif"),
170 |       "esaccilc_dst040_100m_2015"=paste0(ptcov,"/ESA_CCI_Annual/2015/",iso.s,"_esaccilc_dst040_100m_2015.tif"),
171 |       "esaccilc_dst130_100m_2015"=paste0(ptcov,"/ESA_CCI_Annual/2015/",iso.s,"_esaccilc_dst130_100m_2015.tif"),
172 |       "esaccilc_dst140_100m_2015"=paste0(ptcov,"/ESA_CCI_Annual/2015/",iso.s,"_esaccilc_dst140_100m_2015.tif"),
173 |       "esaccilc_dst140_100m_2015"=paste0(ptcov,"/ESA_CCI_Annual/2015/",iso.s,"_esaccilc_dst140_100m_2015.tif"),
174 |       "esaccilc_dst160_100m_2015"=paste0(ptcov,"/ESA_CCI_Annual/2015/",iso.s,"_esaccilc_dst160_100m_2015.tif"),
175 |       "esaccilc_dst190_100m_2015"=paste0(ptcov,"/ESA_CCI_Annual/2015/",iso.s,"_esaccilc_dst190_100m_2015.tif"),
176 |       "esaccilc_dst200_100m_2015"=paste0(ptcov,"/ESA_CCI_Annual/2015/",iso.s,"_esaccilc_dst200_100m_2015.tif"),
177 |       "esaccilc_dst_water_100m_2000_2012"= paste0(ptcov,"/ESA_CCI_Water/DST/",iso.s,"_esaccilc_dst_water_100m_2000_2012.tif"),
178 |       "dst_bsgme_100m_2020"= paste0(ptcov,"/BSGM/2020/DTE/",iso.s,"_dst_bsgme_100m_2020.tif"),
179 |       "dst_ghslesaccilc_100m_2000"= paste0(ptcov,"/BuiltSettlement/2000/DTE/",iso.s,"_dst_ghslesaccilc_100m_2000.tif"),
180 |       "osm_dst_roadintersec_100m_2016"= paste0(ptcov,"/OSM/DST/",iso.s,"_osm_dst_roadintersec_100m_2016.tif"),
181 |       "osm_dst_waterway_100m_2016"=paste0(ptcov,"/OSM/DST/",iso.s,"_osm_dst_waterway_100m_2016.tif"),
182 |       "osm_dst_road_100m_2016"=paste0(ptcov,"/OSM/DST/",iso.s,"_osm_dst_road_100m_2016.tif"),
183 |       "srtm_slope_100m"=paste0(ptcov,"/Slope/",iso.s,"_srtm_slope_100m.tif"),
184 |       "srtm_topo_100m"=paste0(ptcov,"/Topo/",iso.s,"_srtm_topo_100m.tif"),
185 |       "dst_coastline_100m_2000_2020"=paste0(ptcov,"/Coastline/DST/",iso.s,"_dst_coastline_100m_2000_2020.tif"),
186 |       "viirs_100m_2016"=paste0(ptcov,"/VIIRS/",iso.s,"_viirs_100m_2016.tif"),
187 |       "wdpa_dst_cat1_100m_2017"=paste0(ptcov,"/WDPA/WDPA_1/",iso.s,"_wdpa_dst_cat1_100m_2017.tif")
188 |     )  
189 |   )
190 |   names(input_covariates) <- c(country)
191 |   
192 |   ptcov <- paste0(url_prefix,"/GIS/Mastergrid/Global_2000_2020/",toupper(country))
193 |   
194 |   input_mastergrid <- list(
195 |     country = paste0(ptcov,"/Subnational/",iso.s,"_subnational_admin_2000_2020.tif")
196 |   )
197 |   names(input_mastergrid) <- c(country)
198 |   
199 |   ptcov <- paste0(url_prefix,"/GIS/Covariates/Global_2000_2020/",toupper(country))
200 |   input_watermask <- list(
201 |     country = paste0(ptcov,"/ESA_CCI_Water/Binary/",iso.s,"_esaccilc_water_100m_2000_2012.tif")
202 |   )
203 |   names(input_watermask) <- c(country)
204 |   
205 |   ptcov <- paste0(url_prefix,"/GIS/Pixel_area/Global_2000_2020/",toupper(country))
206 |   input_px_area <- list(
207 |     country = paste0(ptcov,"/",iso.s,"_px_area_100m.tif")
208 |   )
209 |   names(input_px_area) <- c(country)
210 |   
211 |   
212 |   countries <- c()
213 |   
214 |   for ( i in names(input_covariates) ) {
215 |     countries <- append(countries, i, 1)
216 |   } 
217 |   
218 |   for( i in countries){
219 |     
220 |     covariates <- names(input_covariates[[i]])
221 |     
222 |     if (verbose){
223 |       cat("\n------------------------------------------------\n")
224 |       cat("------------------------------------------------\n")
225 |       cat(paste0("Following covariates will be downloaded to \n",output_dir,"\n"))
226 |       cat("------------------------------------------------\n")
227 |       cat(paste0("",covariates,"\n"))
228 |       cat("------------------------------------------------\n")
229 |     }
230 |     
231 |     for (c in covariates){
232 |       file_remote <- input_covariates[[i]][[c]]
233 |       
234 |       output_file <- file.path(output_dir, paste0(c,".tif"))
235 |       if (!file.exists(output_file)){
236 |         if (verbose){
237 |           cat(paste0("Downloading... ", c ,"\n"))  
238 |         }
239 |         download_file(file_remote, output_file, quiet, method="auto")
240 |         
241 |       }
242 |     }
243 |     
244 |   }
245 |   
246 |   cat(paste0("\n"))
247 |   output_px_area <- file.path(output_dir, paste0("px_area_100m.tif"))
248 |   file_remote_px_area <- input_px_area[[country]]
249 |   if (!file.exists(output_px_area)){
250 |     if (verbose){
251 |       cat(paste0("Downloading... px_area px_area_100m\n"))  
252 |     }
253 |     download_file(file_remote_px_area, output_px_area, quiet, method="auto")  
254 |   }  
255 |   
256 |   
257 |   output_watermask <- file.path(output_dir, paste0("esaccilc_water_100m_2000_2012.tif"))
258 |   file_remote_watermask <- input_watermask[[country]]
259 |   if (!file.exists(output_watermask)){
260 |     if (verbose){
261 |       cat(paste0("Downloading... watermask esaccilc_water_100m_2000_2012\n"))
262 |     }  
263 |     download_file(file_remote_watermask, output_watermask, quiet, method="auto") 
264 |   }   
265 |   
266 |   
267 |   output_mastergrid <- file.path(output_dir, paste0("subnational_admin_2000_2020.tif"))
268 |   file_remote_mastergrid <- input_mastergrid[[country]]
269 |   if (!file.exists(output_mastergrid)){
270 |     if (verbose){
271 |       cat(paste0("Downloading... mastergrid subnational_admin_2000_2020\n"))  
272 |     }
273 |     download_file(file_remote_mastergrid, output_mastergrid, quiet, method="auto")
274 |   }   
275 |   
276 |   ####
277 |   
278 |   
279 |   input_covariates <- list(
280 |     country = list(
281 |       "esaccilc_dst011_100m_2015"=file.path(output_dir,"esaccilc_dst011_100m_2015.tif"),
282 |       "esaccilc_dst040_100m_2015"=file.path(output_dir,"esaccilc_dst040_100m_2015.tif"),
283 |       "esaccilc_dst130_100m_2015"=file.path(output_dir,"esaccilc_dst130_100m_2015.tif"),
284 |       "esaccilc_dst140_100m_2015"=file.path(output_dir,"esaccilc_dst140_100m_2015.tif"),
285 |       "esaccilc_dst140_100m_2015"=file.path(output_dir,"esaccilc_dst140_100m_2015.tif"),
286 |       "esaccilc_dst160_100m_2015"=file.path(output_dir,"esaccilc_dst160_100m_2015.tif"),
287 |       "esaccilc_dst190_100m_2015"=file.path(output_dir,"esaccilc_dst190_100m_2015.tif"),
288 |       "esaccilc_dst200_100m_2015"=file.path(output_dir,"esaccilc_dst200_100m_2015.tif"),
289 |       "esaccilc_dst_water_100m_2000_2012"= file.path(output_dir,"esaccilc_dst_water_100m_2000_2012.tif"),
290 |       "dst_bsgme_100m_2020"= file.path(output_dir,"dst_bsgme_100m_2020.tif"),
291 |       "dst_ghslesaccilc_100m_2000"= file.path(output_dir,"dst_ghslesaccilc_100m_2000.tif"),
292 |       "osm_dst_roadintersec_100m_2016"= file.path(output_dir,"osm_dst_roadintersec_100m_2016.tif"),
293 |       "osm_dst_waterway_100m_2016"=file.path(output_dir,"osm_dst_waterway_100m_2016.tif"),
294 |       "osm_dst_road_100m_2016"=file.path(output_dir,"osm_dst_road_100m_2016.tif"),
295 |       "srtm_slope_100m"=file.path(output_dir,"srtm_slope_100m.tif"),
296 |       "srtm_topo_100m"=file.path(output_dir,"srtm_topo_100m.tif"),
297 |       "dst_coastline_100m_2000_2020"=file.path(output_dir,"dst_coastline_100m_2000_2020.tif"),
298 |       "viirs_100m_2016"=file.path(output_dir,"viirs_100m_2016.tif"),
299 |       "wdpa_dst_cat1_100m_2017"=file.path(output_dir,"wdpa_dst_cat1_100m_2017.tif")
300 |     )  
301 |   )
302 |   names(input_covariates) <- c(country)
303 |   
304 |   
305 |   
306 |   
307 |   input_mastergrid <- list(
308 |     country = file.path(output_dir,"subnational_admin_2000_2020.tif")
309 |   )
310 |   names(input_mastergrid) <- c(country)
311 |   
312 |   input_watermask <- list(
313 |     country = file.path(output_dir,"esaccilc_water_100m_2000_2012.tif")
314 |   )
315 |   names(input_watermask) <- c(country)
316 |   
317 |   
318 |   input_px_area <- list(
319 |     country = file.path(output_dir,"px_area_100m.tif")
320 |   )
321 |   names(input_px_area) <- c(country)  
322 |   
323 |   if (verbose){
324 |     cat( paste0("Saving input covariates, watermask, px_area and mastergrid") )
325 |     cat( paste0("\nas R objects RData in :",output_dir,"\n") )
326 |   }
327 |   
328 |   save(input_covariates, file=file.path(output_dir,"input_covariates.RData"))
329 |   save(input_mastergrid, file=file.path(output_dir,"input_mastergrid.RData"))
330 |   save(input_watermask, file=file.path(output_dir,"input_watermask.RData"))
331 |   save(input_px_area, file=file.path(output_dir,"input_px_area.RData"))
332 |   
333 |   
334 |   output_mastergrid <- file.path(output_dir, paste0("subnational_admin_2000_2020.tif"))
335 |   
336 |   dpop_file <- file.path(output_dir, paste0(iso.s, "_population.csv"))
337 |   
338 |   if (!file.exists(dpop_file)){
339 |     if (verbose){
340 |       cat( paste0("\nDownloading and saving population table for ",country) )
341 |       cat( paste0(" in ", paste0(iso.s, "_population.csv"), "\n" ,dpop_file,"\n") )
342 |     }
343 |     
344 |     dpop <- read.csv(file.path(url_prefix,
345 |                                "GIS/Population/Global_2000_2020/CensusTables",
346 |                                paste0(iso.s,"_population_2000_2020.csv")
347 |     )
348 |     )
349 |     
350 |     dpop <- dpop[,c("GID","P_2020")]
351 |     
352 |     write.table(dpop, dpop_file, sep=",",  col.names=FALSE, row.names=FALSE)
353 |     
354 |   }
355 |   
356 |   pop_tmp <- read.csv(dpop_file)
357 |   
358 |   if ( nrow(pop_tmp) < 20 ){
359 |     
360 |     if (verbose){
361 |       
362 |       cat("\n------------------------------------------------\n")
363 |       cat("------------------------------------------------\n\n")
364 |       cat( paste0("Country ",country," has only ",nrow(pop_tmp)," admin units.\n") )
365 |       cat( paste0("This amount of admin units will not be enought to train the model\n") )
366 |       cat( paste0("For the purpers of the demo please choose another country\n\n") )
367 |       cat("------------------------------------------------\n")
368 |       cat("------------------------------------------------\n")
369 |       
370 |       opt <- options(show.error.messages = FALSE)
371 |       on.exit(options(opt))
372 |       stop()
373 |       
374 |     }else{
375 |       
376 |       stop(paste0("Country ",country," has only ",nrow(pop_tmp)," admin units."))
377 |       
378 |     }
379 | 
380 |   } 
381 |   
382 |   input_poptables <- list(
383 |     country=dpop_file
384 |   )
385 |   names(input_poptables) <- c(country)
386 |   
387 |   
388 |   fset <- NULL
389 |   fset_incl <- FALSE
390 |   fset_cutoff <- 20
391 |   
392 |   pop <- popRF(input_poptables, 
393 |                input_covariates, 
394 |                input_mastergrid, 
395 |                input_watermask, 
396 |                input_px_area, 
397 |                project_dir, 
398 |                cores=cores,
399 |                fset=fset,
400 |                fset_incl=fset_incl,
401 |                fset_cutoff=fset_cutoff,
402 |                check_result=TRUE, 
403 |                verbose=verbose, 
404 |                log=log, ...)
405 |   
406 |   
407 |   return(pop)
408 | }


--------------------------------------------------------------------------------
/R/popfit.R:
--------------------------------------------------------------------------------
  1 | #' get_popfit optimize the model
  2 | #' 
  3 | #' @rdname get_popfit
  4 | #' @param x_data matrix or data frame of predictor variables. 
  5 | #'        See \code{\link[randomForest]{tuneRF}} for more details.
  6 | #' @param y_data response vector.
  7 | #' @param proximity is logical. TRUE or FALSE: flag indicating whether proximity
  8 | #'        measures among the rows be computed? Default is \code{proximity} = TRUE. 
  9 | #'        See \code{\link[randomForest]{randomForest}} for more details.4
 10 | #' @param set_seed Integer, set the seed. Default is \code{set_seed} = 2010
 11 | #' @param init_popfit randomForest object with the optimal mtry.
 12 | #' @param verbose is logical. TRUE or FALSE: flag indicating whether to print 
 13 | #'        intermediate output from the function on the console, which might be 
 14 | #'        helpful for model debugging. Default is \code{verbose} = TRUE.
 15 | #' @param log is logical. TRUE or FALSE: flag indicating whether to print intermediate 
 16 | #'        output from the function on the log.txt file. 
 17 | #'        Default is \code{log} = FALSE
 18 | #' @importFrom randomForest tuneRF importance
 19 | #' @importFrom stats na.omit
 20 | #' @return it returns the randomForest object produced with the optimal mtry. 
 21 | #'         See \code{\link[randomForest]{tuneRF}} for more details.
 22 | #' @seealso \code{\link{randomForest}}
 23 | #' @noRd         
 24 | get_popfit <- function(x_data, 
 25 |                        y_data, 
 26 |                        init_popfit=NULL, 
 27 |                        proximity=TRUE,
 28 |                        set_seed=2010, 
 29 |                        verbose=FALSE, 
 30 |                        log=FALSE) {
 31 |  
 32 |   
 33 |   start_time <- Sys.time()
 34 |   # set.seed(set_seed)
 35 |   ##	Now we will optimize the model by iteratively removing any 
 36 |   ##		covariates with negative increases in node purity:
 37 |   
 38 |   ##	Get list of covariates that have an importance score greater than 0:
 39 |   importance_scores <- importance(init_popfit)[order(importance(init_popfit)[,1], decreasing=TRUE),]
 40 |   pos_importance <- rownames(importance_scores)[importance_scores[,1] > 0]
 41 |   
 42 |   if (length(pos_importance) == length(importance_scores[,1])) {
 43 |     
 44 |     x_data <- x_data[pos_importance]
 45 |     
 46 |     popfit = tuneRF(x=x_data, 
 47 |                     y=y_data, 
 48 |                     plot=TRUE, 
 49 |                     mtryStart=length(x_data)/3, 
 50 |                     ntreeTry=length(y_data)/20, 
 51 |                     improve=0.0001, 
 52 |                     stepFactor=1.20, 
 53 |                     trace=verbose, 
 54 |                     doBest=TRUE, 
 55 |                     nodesize=length(y_data)/1000, 
 56 |                     na.action=na.omit, 
 57 |                     importance=TRUE, 
 58 |                     proximity=proximity, 
 59 |                     sampsize=min(c(length(y_data), 1000)), 
 60 |                     replace=TRUE) 
 61 |     
 62 |   }else{
 63 |     
 64 |     while (length(pos_importance) < length(importance_scores[,1])) {
 65 |       
 66 |       log_info("MSG", 
 67 |                paste(" Jumping into the [while (length(pos_importance) < length(importance_scores[,1])) ] ... "),
 68 |                verbose=verbose, 
 69 |                log=log
 70 |                ) 
 71 |       ##	Subset our x_data to just those columns having positive scores:
 72 |       x_data <- x_data[pos_importance]
 73 |       
 74 |       popfit = tuneRF(x=x_data, 
 75 |                       y=y_data, 
 76 |                       plot=TRUE, 
 77 |                       mtryStart=length(x_data)/3, 
 78 |                       ntreeTry=length(y_data)/20, 
 79 |                       improve=0.0001, 
 80 |                       stepFactor=1.20, 
 81 |                       trace=verbose, 
 82 |                       doBest=TRUE, 
 83 |                       nodesize=length(y_data)/1000, 
 84 |                       na.action=na.omit, 
 85 |                       importance=TRUE, 
 86 |                       proximity=proximity, 
 87 |                       sampsize=min(c(length(y_data), 1000)), 
 88 |                       replace=TRUE) 
 89 |       
 90 |       ##	Re-check importance scores:
 91 |       importance_scores <- importance(popfit)[order(importance(popfit)[,1], decreasing=TRUE),]
 92 |       pos_importance <- rownames(importance_scores)[importance_scores[,1] > 0]
 93 |       
 94 |       if (verbose) print(popfit)
 95 |       
 96 |     } ## End of while loop
 97 |   }
 98 |   
 99 |   end_time <- Sys.time()
100 |   log_info("MSG", paste0("Elapsed Fitting Time: ", tmDiff(start_time,end_time)), verbose=verbose, log=log)  
101 |   
102 |   return(popfit)
103 |   
104 | }
105 | 


--------------------------------------------------------------------------------
/R/popfit_final.R:
--------------------------------------------------------------------------------
  1 | #' get_popfit_final Another alternative is to use Quantile Regression Forests to generate
  2 | #' prediction intervals.  We'll fit a quantile regression using
  3 | #' the tuning parameters pulled from the popfit object.
  4 | #' 
  5 | #' @rdname get_popfit_final
  6 | #' @param x_data matrix or data frame of predictor variables
  7 | #' @param y_data response vector (factor for classification, numeric for 
  8 | #'        regression)
  9 | #' @param nodesize Minimum size of terminal nodes. Setting this number larger 
 10 | #'        causes smaller trees to be grown (and thus take less time). See 
 11 | #'        \code{\link[randomForest]{randomForest}} for more details. Default 
 12 | #'        is \code{nodesize} = NULL and will be calculated 
 13 | #'        as \code{length(y_data)/1000}.
 14 | #' @param maxnodes Maximum number of terminal nodes trees in the forest can have. 
 15 | #'        If not given, trees are grown to the maximum possible (subject to 
 16 | #'        limits by nodesize). If set larger than maximum possible, a warning is 
 17 | #'        issued. See \code{\link[randomForest]{randomForest}} for more details. 
 18 | #'        Default is \code{maxnodes} = NULL.
 19 | #' @param ntree Number of variables randomly sampled as candidates at each split. 
 20 | #'        See \code{\link[randomForest]{randomForest}} for more details. 
 21 | #'        Default is \code{ntree} = NULL and \code{ntree} will be used 
 22 | #'        \code{popfit$ntree}
 23 | #' @param mtry Number of trees to grow. This should not be set to too small a 
 24 | #'        number, to ensure that every input row gets predicted at least a few 
 25 | #'        times. See \code{\link[randomForest]{randomForest}} for more details. 
 26 | #'        Default is \code{ntree} = NULL and \code{ntree} will be used 
 27 | #'        \code{popfit$mtry}
 28 | #' @param set_seed Integer, set the seed. Default is \code{set_seed} = 2010
 29 | #' @param popfit the randomForest object produced with the optimal mtry. 
 30 | #'         See \code{\link[randomForest]{tuneRF}} for more details.
 31 | #' @param popfit_fln path to save \code{popfit} objects
 32 | #' @param proximity is logical. TRUE or FALSE: flag indicating whether proximity
 33 | #'        measures among the rows be computed? Default is \code{proximity} = TRUE. 
 34 | #'        See \code{\link[randomForest]{randomForest}} for more details.
 35 | #' @param verbose is logical. TRUE or FALSE: flag indicating whether to print 
 36 | #'        intermediate output from the function on the console, which might be 
 37 | #'        helpful for model debugging. Default is \code{verbose} = TRUE.
 38 | #' @param log is logical. TRUE or FALSE: flag indicating whether to print 
 39 | #'        intermediate output from the function on the log.txt file. 
 40 | #'        Default is \code{log} = FALSE
 41 | #' @importFrom randomForest randomForest
 42 | #' @return constructed n object of class randomForest, 
 43 | #'         see \code{\link[randomForest]{randomForest}} for more details
 44 | #' @noRd 
 45 | get_popfit_final <- function(x_data, 
 46 |                              y_data,
 47 |                              nodesize=NULL, 
 48 |                              maxnodes=NULL, 
 49 |                              ntree=NULL,
 50 |                              mtry=NULL, 
 51 |                              set_seed=2010, 
 52 |                              popfit,
 53 |                              popfit_fln,
 54 |                              proximity=TRUE, 
 55 |                              verbose=FALSE, 
 56 |                              log=FALSE) {
 57 |   
 58 |   if (file.exists(popfit_fln)) {
 59 |     
 60 |     log_info("MSG", paste0("Loading popfit object from ",popfit_fln),
 61 |              verbose=verbose, log=log) 
 62 |     load(file=popfit_fln)
 63 |     
 64 |   }else{
 65 |     
 66 |     set.seed(set_seed)
 67 |     
 68 |     rf_nodesize <- ifelse(is.null(nodesize), length(y_data)/1000, nodesize)
 69 |     
 70 |     if (is.null(maxnodes)){
 71 |       rf_maxnodes <- NULL
 72 |     }else{
 73 |       rf_maxnodes <- maxnodes
 74 |     }
 75 |     
 76 |     rf_ntree <- ifelse(is.null(ntree), popfit$ntree, ntree)
 77 |     rf_mtry <- ifelse(is.null(mtry), popfit$mtry, mtry)    
 78 |     
 79 |     # popfit_final <- randomForest(x=x_data, 
 80 |     #                              y=y_data, 
 81 |     #                              mtry=popfit$mtry, 
 82 |     #                              ntree=popfit$ntree, 
 83 |     #                              nodesize=length(y_data)/1000, 
 84 |     #                              importance=TRUE, 
 85 |     #                              proximity=proximity,
 86 |     #                              do.trace=F)
 87 |     
 88 |     popfit_final <- randomForest(x=x_data, 
 89 |                                  y=y_data,
 90 |                                  ntree=rf_ntree,
 91 |                                  mtry=rf_mtry, 
 92 |                                  nodesize=rf_nodesize,
 93 |                                  maxnodes=rf_maxnodes,
 94 |                                  importance=TRUE, 
 95 |                                  proximity=proximity,
 96 |                                  do.trace=F)    
 97 |     
 98 |     log_info("MSG", paste0("Saving popfit_final object ",popfit_fln),
 99 |              verbose=verbose, log=log) 
100 |     save(popfit_final, file=popfit_fln)
101 |     
102 |   } 
103 |   
104 |   return(popfit_final)
105 |   
106 | }
107 | 
108 | 
109 | 
110 | 
111 | #' get_popfit_final_old exit program without error
112 | #' 
113 | #' @rdname get_popfit_final_old
114 | #' @param fset list of the path to popfit.RData objects
115 | #' @param only.names if true return only names
116 | #' @param proximity Should proximity measures be computed?
117 | #' @param verbose logical. Should report extra information on progress?
118 | #' @param log logical. Should report on progress be saved in log file?
119 | #' @importFrom randomForest importance combine
120 | #' @return constructed n object of class randomForest, 
121 | #'         see \code{\link[randomForest]{randomForest}} for more details
122 | #' @noRd 
123 | get_popfit_final_old <- function(fset, 
124 |                                  only.names=FALSE, 
125 |                                  proximity=TRUE, 
126 |                                  verbose=FALSE, 
127 |                                  log=FALSE) {
128 |   ##  Function which retrieves previously constructed popfit.RData objects.
129 |   err_mess <- ""
130 |   err_bool <- FALSE
131 |   
132 |   list.of.old.popfits.final <- list.files(fset$final,
133 |                                           pattern=paste0("\\.Rdata$"),
134 |                                           full.names=TRUE) 
135 |   
136 |   
137 | 
138 |   log_info("MSG", 
139 |            paste("Loading old popfit final from: ", fset$final),
140 |            verbose=verbose, log=log)   
141 | 
142 |   if ( length(list.of.old.popfits.final) == 0 ){
143 |     err_mess <- paste0('There is no old popfit Please check the folder : ',
144 |                        fset$final)
145 |     stop(err_mess)
146 |   }
147 |   
148 |   ##  Load it:
149 | 
150 |   log_info("MSG", 
151 |            paste("Loading", basename(list.of.old.popfits.final[[1]]) ),
152 |            verbose=verbose, log=log)   
153 |  
154 |   local_env.Popfit_final = local({load(file=list.of.old.popfits.final[[1]]);environment()})
155 |   
156 |   popfit.final.old <- local_env.Popfit_final$popfit_final
157 |   popfit.final.old$proximity <- NULL
158 |   popfit.final.old$predicted <- 0  
159 |   
160 |   if (only.names){
161 |     
162 |     fixed.predictors <- row.names(importance(popfit.final.old))    
163 |     return(fixed.predictors)  
164 |   }
165 |   
166 |   for ( i in 1:length(list.of.old.popfits.final) ) {
167 |     
168 |     if (i==1) next()
169 |     
170 |     local_env.Popfit_final = local({load(file=list.of.old.popfits.final[[i]]);environment()})
171 |     
172 |     local_env.Popfit_final$popfit_final$proximity <- NULL
173 |     local_env.Popfit_final$popfit_final$predicted <- 0
174 |     
175 |     ##  Combine it with the other popfit finals:
176 |     log_info("MSG", paste("'Combine popfit ",
177 |                           basename(list.of.old.popfits.final[[i]]) ),
178 |              verbose=verbose, log=log) 
179 |     
180 |     popfit.final.old <- combine( popfit.final.old, local_env.Popfit_final$popfit_final )    
181 |   } 
182 |   
183 |   ##  Return it:
184 |   return(popfit.final.old)  
185 |   
186 | }


--------------------------------------------------------------------------------
/R/popfit_init_tuning.R:
--------------------------------------------------------------------------------
 1 | #' Tuning of our randomForest population density regression
 2 | #' @rdname popfit_init_tuning
 3 | #' @param x matrix or data frame of predictor variables
 4 | #' @param y response vector (factor for classification, numeric for regression)
 5 | #' @param proximity Should proximity measures be computed?
 6 | #' @param verbose logical. Should report extra information on progress?
 7 | #' @param log logical. Should report on progress be saved in log file?
 8 | #' @importFrom randomForest tuneRF
 9 | #' @importFrom stats na.omit
10 | #' @return it returns a matrix whose first column contains the mtry values 
11 | #' searched, and the second column the corresponding OOB error
12 | #' @noRd 
13 | popfit_init_tuning <- function(x, 
14 |                                y, 
15 |                                proximity=TRUE, 
16 |                                verbose=FALSE, 
17 |                                log=FALSE) {
18 | 
19 |   #  x_data = x
20 |   #  y_data = y  
21 |   
22 |   log_info("MSG",
23 |            paste0("Start tuning of our randomForest population density regression."),
24 |            verbose=verbose, log=log)  
25 |   
26 |   start_time <- Sys.time()
27 |   
28 |   init_popfit = tuneRF(x=x, 
29 |                        y=y, 
30 |                        plot=TRUE, 
31 |                        mtryStart=length(x)/3, 
32 |                        ntreeTry=length(y)/20, 
33 |                        improve=0.0001, 
34 |                        stepFactor=1.20, 
35 |                        trace=verbose, 
36 |                        doBest=TRUE, 
37 |                        nodesize=length(y)/1000, 
38 |                        na.action=na.omit, 
39 |                        importance=TRUE, 
40 |                        proximity=proximity, 
41 |                        sampsize=min(c(length(y), 1000)), 
42 |                        replace=TRUE) 
43 |   
44 |   
45 |   end_time <- Sys.time()
46 |   log_info("MSG", paste("End tuning RF. Elapsed Fitting Time:",
47 |                         tmDiff(start_time,end_time)),
48 |            verbose=verbose, log=log)  
49 |   
50 |   return(init_popfit)
51 | }
52 | 


--------------------------------------------------------------------------------
/R/popfit_quant.R:
--------------------------------------------------------------------------------
  1 | ######################################################################################
  2 | #
  3 | #
  4 | #' get_popfit_quant Another alternative is to use Quantile Regression Forests to
  5 | #' generate prediction intervals.  We'll fit a quantile regression using
  6 | #' the tuning parameters pulled from the popfit object above:
  7 | #' 
  8 | #' @rdname get_popfit_quant
  9 | #' @param x_data x data for randomForest
 10 | #' @param y_data y data for randomForest
 11 | #' @param nodesize Minimum size of terminal nodes. Setting this number larger 
 12 | #'        causes smaller trees to be grown (and thus take less time). See 
 13 | #'        \code{\link[randomForest]{randomForest}} for more details. Default 
 14 | #'        is \code{nodesize} = NULL and will be calculated 
 15 | #'        as \code{length(y_data)/1000}.
 16 | #' @param maxnodes Maximum number of terminal nodes trees in the forest can have. 
 17 | #'        If not given, trees are grown to the maximum possible (subject to 
 18 | #'        limits by nodesize). If set larger than maximum possible, a warning is 
 19 | #'        issued. See \code{\link[randomForest]{randomForest}} for more details. 
 20 | #'        Default is \code{maxnodes} = NULL.
 21 | #' @param ntree Number of variables randomly sampled as candidates at each split. 
 22 | #'        See \code{\link[randomForest]{randomForest}} for more details. 
 23 | #'        Default is \code{ntree} = NULL and \code{ntree} will be used 
 24 | #'        \code{popfit$ntree}
 25 | #' @param mtry Number of trees to grow. This should not be set to too small a 
 26 | #'        number, to ensure that every input row gets predicted at least a few 
 27 | #'        times. See \code{\link[randomForest]{randomForest}} for more details. 
 28 | #'        Default is \code{ntree} = NULL and \code{ntree} will be used 
 29 | #'        \code{popfit$mtry}
 30 | #' @param set_seed Integer, set the seed. Default is \code{set_seed} = 2010
 31 | #' @param popfit popfit objects
 32 | #' @param rfg.popfit.quant.RData path to load/save popfit objects
 33 | #' @param proximity proximity
 34 | #' @param verbose If FALSE then the progress will be shown
 35 | #' @param log If FALSE then the progress will be shown
 36 | #' @importFrom quantregForest quantregForest
 37 | #' @return constructed popfit objects
 38 | #' @noRd 
 39 | get_popfit_quant <- function(x_data, 
 40 |                              y_data,
 41 |                              nodesize=NULL, 
 42 |                              maxnodes=NULL, 
 43 |                              ntree=NULL,
 44 |                              mtry=NULL, 
 45 |                              set_seed=2010, 
 46 |                              popfit,
 47 |                              rfg.popfit.quant.RData,
 48 |                              proximity=TRUE, 
 49 |                              verbose=FALSE, 
 50 |                              log=FALSE) {
 51 |   
 52 |   if (file.exists(rfg.popfit.quant.RData)) {
 53 |     
 54 |     log_info("MSG", paste0("Loading popfit object from ",
 55 |                            rfg.popfit.quant.RData),
 56 |              verbose=verbose, log=log) 
 57 |     load(file=rfg.popfit.quant.RData)
 58 |     
 59 |   }else{  
 60 |     
 61 |     set.seed(set_seed)
 62 |     
 63 |     rf_nodesize <- ifelse(is.null(nodesize), length(y_data)/1000, nodesize)
 64 |     
 65 |     if (is.null(maxnodes)){
 66 |       rf_maxnodes <- NULL
 67 |     }else{
 68 |       rf_maxnodes <- maxnodes
 69 |     }
 70 |     
 71 |     rf_ntree <- ifelse(is.null(ntree), popfit$ntree, ntree)
 72 |     rf_mtry <- ifelse(is.null(mtry), popfit$mtry, mtry)   
 73 |     
 74 |     # popfit_quant <- quantregForest(x=x_data, 
 75 |     #                                y=y_data, 
 76 |     #                                mtry=popfit$mtry, 
 77 |     #                                ntree=popfit$ntree, 
 78 |     #                                nodesize=length(y_data)/1000)
 79 |     
 80 |     popfit_quant <- quantregForest(x=x_data, 
 81 |                                    y=y_data, 
 82 |                                    mtry=rf_mtry, 
 83 |                                    ntree=rf_ntree, 
 84 |                                    nodesize=rf_nodesize,
 85 |                                    maxnodes=rf_maxnodes)    
 86 |     
 87 |     log_info("MSG", 
 88 |              paste0("Saving popfit_quant object ",
 89 |                     rfg.popfit.quant.RData),
 90 |              verbose=verbose, log=log) 
 91 |     save(popfit_quant, file=rfg.popfit.quant.RData)
 92 |     
 93 |   } 
 94 |   
 95 |   return(popfit_quant)
 96 |   
 97 | }
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | #' get_popfit_quant_old exit program without error
104 | #' 
105 | #' @rdname get_popfit_quant_old
106 | #' @param fset list of the path to popfit.RData objects
107 | #' @param only.names if true return only names
108 | #' @param proximity proximity
109 | #' @param verbose If FALSE then the progress will be shown
110 | #' @param log If FALSE then the progress will be shown
111 | #' @importFrom randomForest importance combine
112 | #' @return previously constructed popfit.RData objects
113 | #' @noRd 
114 | get_popfit_quant_old <- function(fset, 
115 |                                  only.names=FALSE, 
116 |                                  proximity=TRUE, 
117 |                                  verbose=FALSE, 
118 |                                  log=FALSE) {
119 |   ##  Function which retrieves previously constructed popfit.RData objects.
120 |   err_mess <- ""
121 |   err_bool <- FALSE
122 |   
123 |   list.of.old.popfits.quant <- list.files(fset$quant,
124 |                                           pattern=paste0("\\.Rdata$"),
125 |                                           full.names=TRUE) 
126 |   
127 |   
128 |   log_info("MSG", paste("Loading old popfit quant from: ", fset$quant),
129 |            verbose=verbose, log=log) 
130 |   
131 |   
132 |   if ( length(list.of.old.popfits.quant) == 0 ){
133 |     err_mess <- paste0('There is no old popfit Please check the folder : ', 
134 |                        fset$quant)
135 |     stop(err_mess)
136 |   }
137 |   
138 |   ##  Load it:
139 |   log_info("MSG", 
140 |            paste("Loading ", basename(list.of.old.popfits.quant[[1]])),
141 |            verbose=verbose, log=log) 
142 |   local_env.Popfit_quant = local({load(file=list.of.old.popfits.quant[[1]]);environment()})
143 |   
144 |   popfit.quant.old <- local_env.Popfit_quant$popfit_quant
145 |   popfit.quant.old$proximity <- NULL
146 |   popfit.quant.old$predicted <- 0  
147 |   
148 |   if (only.names){
149 |     
150 |     fixed.predictors <- row.names(importance(popfit.quant.old))  
151 |     
152 |     return(fixed.predictors)  
153 |   }
154 |   
155 |   for ( i in 1:length(list.of.old.popfits.quant) ) {
156 |     
157 |     if (i==1) next()
158 |     
159 |     local_env.Popfit_quantl = local({load(file=list.of.old.popfits.quant[[i]]);environment()})
160 |     
161 |     local_env.Popfit_quant$popfit_quant$proximity <- NULL
162 |     local_env.Popfit_quant$popfit_quant$predicted <- 0
163 |     
164 |     ##  Combine it with the other popfit quant:
165 |     log_info("MSG",
166 |              paste("Combine popfit ", basename(list.of.old.popfits.quant[[i]])),
167 |              verbose=verbose, log=log) 
168 |     popfit.quant.old <- combine( popfit.quant.old, local_env.Popfit_quant$popfit_quant )    
169 |   } 
170 |   
171 |   ##  Return it:
172 |   return(popfit.quant.old)  
173 |   
174 | }


--------------------------------------------------------------------------------
/R/progress_message.R:
--------------------------------------------------------------------------------
 1 | #' Function will return progress plotting a progress bar
 2 | #'
 3 | #' @param x integer, current level
 4 | #' @param max maximum for progress bar
 5 | #' @param label additional text for progress  bar
 6 | #' @rdname progress_message
 7 | #' @return character
 8 | #' @examples
 9 | #' \dontrun{
10 | #' progress_message( x=10, max = 200, label="Progress message" )
11 | #' }
12 | #' @noRd 
13 | progress_message <- function (x, max = 100, label=NULL) {
14 |   
15 |   if (is.null(label)) label=''
16 |   if (x != max) ar = '>' else ar=''
17 |   
18 |   percent <- x / max * 100
19 |   cat(sprintf('\r[%-50s] %d%% %s',
20 |               paste(paste(rep('=', percent / 2), collapse = ''),'',sep = ar),
21 |               floor(percent),
22 |               label))
23 |   if (x == max)
24 |     cat('\n')
25 | }
26 | 


--------------------------------------------------------------------------------
/R/rasterize_parallel.R:
--------------------------------------------------------------------------------
  1 | #' @title rasterize_parallel_start function will return a string with OS
  2 | #' @param x Raster* object
  3 | #' @param df data.frame of points
  4 | #' @param blocks number of blocks sugesting for processing raster file.
  5 | #' @param NAflag NO data value will be used for a new raster
  6 | #' @param datatype Type of raster. Available are 
  7 | #'        INT1S/INT2S/INT4S/FLT4S/LOG1S/INT1U/INT2U/INT4U/FLT8S
  8 | #' @param filename File of a new raster file.
  9 | #' @param overwrite Overwrite existing file
 10 | #' @param silent If FALSE then the progress will be shown
 11 | #' @importFrom utils getFromNamespace
 12 | #' @rdname rasterize_parallel_start
 13 | #' @return Raster* object
 14 | #' @noRd 
 15 | rasterize_parallel_start <- function(x, 
 16 |                                      df, 
 17 |                                      blocks, 
 18 |                                      NAflag, 
 19 |                                      datatype, 
 20 |                                      filename, 
 21 |                                      overwrite=TRUE, 
 22 |                                      silent=TRUE) {
 23 |   
 24 |   
 25 | 
 26 |   
 27 |   tStart <- Sys.time()
 28 |   
 29 |   layernames <- names(x)
 30 |   
 31 |   verbose <- if (silent) FALSE else TRUE
 32 |   
 33 |   log_info("MSG", paste0("Rasterizing using total blocks ",blocks$n ),
 34 |            verbose=verbose, log=FALSE)
 35 | 
 36 |   
 37 |   recvOneData <- getFromNamespace("recvOneData", "parallel")
 38 |   sendCall <- getFromNamespace("sendCall", "parallel")  
 39 |   
 40 |   cl <- raster::getCluster()
 41 |   
 42 |   #on.exit( returnCluster() )
 43 |   nodes <- length(cl)
 44 | 
 45 |   
 46 |   clusterExport(cl, c("blocks", "x","df", "silent"), envir=environment())
 47 |   clusterExport(cl, c("recvOneData", "sendCall"), envir=environment())
 48 |   
 49 |   clRasteriseFun <- function(i) {
 50 |     # tryCatch({
 51 |     v <- data.frame( raster::getValues(x, 
 52 |                                        row=blocks$row[i],
 53 |                                        nrows=blocks$nrows[i]) )
 54 |     colnames(v) <- c("v1")
 55 |     colnames(df) <- c("v1","v2")
 56 |     v <- plyr::join(v,df,type="left",by = "v1")[-1]
 57 |     
 58 |     return(v[[1]])
 59 |   }
 60 |   
 61 |   # get all nodes going
 62 |   for (i in 1:nodes) {
 63 |     sendCall(cl[[i]], clRasteriseFun, i, tag=i)
 64 |   }
 65 |   
 66 |   #out <- raster:::setValues(x, 0)
 67 |   out <- x
 68 |   
 69 |   out <- raster::writeStart(out, 
 70 |                             filename=filename, 
 71 |                             format="GTiff", 
 72 |                             datatype=datatype, 
 73 |                             overwrite=overwrite, 
 74 |                             options=c("COMPRESS=LZW"), 
 75 |                             NAflag=NAflag)
 76 |   
 77 |   for (i in 1:blocks$n) {
 78 |     
 79 |     d <- recvOneData(cl)
 80 |     
 81 |     if (!d$value$success) {
 82 |       stop('cluster error')
 83 |     }
 84 |     
 85 |     tEnd <-  Sys.time()
 86 |     
 87 |     b <- d$value$tag
 88 |     
 89 |     if ((silent == FALSE) & (i%%10 == 0) )   {
 90 |       
 91 |       progress_message(x=i, 
 92 |                        max=blocks$n, 
 93 |                        label=paste0("received block ", 
 94 |                                     i, 
 95 |                                     " Processing Time: ", 
 96 |                                     tmDiff(tStart,tEnd)
 97 |                        )
 98 |       )     
 99 |       
100 |     }  
101 |     
102 |     out <- raster::writeValues(out, d$value$value, blocks$row[b])
103 |     
104 |     # need to send more data
105 |     #
106 |     ni <- nodes + i
107 |     if (ni <= blocks$n) {
108 |       sendCall(cl[[d$node]], clRasteriseFun, ni, tag=ni)
109 |     }
110 |   }
111 |   
112 |   out <- raster::writeStop(out)
113 |   
114 |   return(out)
115 | }
116 | 
117 | 
118 | #' @title Function will transfer values associated with 'object' type spatial 
119 | #' data (data.frame) to raster cells. Function is using parallel library to work 
120 | #' with a big raster data. The raster file will be split to blocks and 
121 | #' processed per block.
122 | #'
123 | #' @author Maksym Bondarenko <mb4@soton.ac.uk> and 
124 | #'        Chris Jochem <W.C.Jochem@soton.ac.uk>
125 | #' @usage rasterize_parallel(x, df, cores=NULL, blocks=NULL, NAflag=NULL, 
126 | #'                           datatype=NULL, filename=rasterTmpFile(), 
127 | #'                           overwrite=TRUE, silent=TRUE)
128 | #' @param x Raster* object.
129 | #' @param df data.frame of points.
130 | #' @param cores is a integer. Number of cores to use when executing the 
131 | #'        function in paralle.
132 | #' @param blocks number of blocks sugesting for processing raster file.
133 | #' @param NAflag NO data value will be used for a new raster.
134 | #' @param datatype Type of raster. Available are 
135 | #'        INT1S/INT2S/INT4S/FLT4S/LOG1S/INT1U/INT2U/INT4U/FLT8S.
136 | #' @param filename the name of the raster file.
137 | #' @param overwrite Overwrite existing file.
138 | #' @param silent is logical. TRUE or FALSE: flag indicating whether to print 
139 | #'        intermediate output from the function on the console, which might be 
140 | #'        helpful for model debugging. If FALSE then the progress will be shown. 
141 | #'        Default is \code{silent} = TRUE.
142 | #' @importFrom raster getValues writeRaster writeStart writeStop compareRaster 
143 | #'             hasValues writeValues blockSize  getCluster returnCluster 
144 | #'             endCluster beginCluster rasterTmpFile
145 | #' @importFrom stats complete.cases predict sd aggregate
146 | #' @importFrom utils stack
147 | #' @importFrom doParallel registerDoParallel 
148 | #' @importFrom parallel detectCores 
149 | #' @importFrom foreach '%dopar%' foreach
150 | #' @rdname rasterize_parallel
151 | #' @return Raster* object
152 | #' @examples
153 | #' \dontrun{
154 | #' rasterize_parallel(x=rasterObj, df=df, cores=2,NAflag=-99999,datatype='INT1U' )
155 | #' }
156 | #' @noRd 
157 | rasterize_parallel <- function(x, 
158 |                                df, 
159 |                                cores=NULL, 
160 |                                blocks=NULL, 
161 |                                NAflag=NULL, 
162 |                                datatype=NULL, 
163 |                                filename=rasterTmpFile(), 
164 |                                overwrite=TRUE, 
165 |                                silent=TRUE) {
166 |   
167 |   stopifnot(hasValues(x))
168 |   
169 |   # get real physical cores
170 |   max.cores <- detectCores(logical = TRUE)
171 |   
172 |   if (is.null(NAflag)) NAflag=-99999
173 |   if (is.null(datatype)) datatype='FLT4S'
174 |   
175 |   # if user did not tell how many cores to use then max-1 will be taken
176 |   #
177 |   if (is.null(cores)) {
178 |     cores <- max.cores - 1
179 |   }
180 |   
181 | 
182 |   if (cores > max.cores) {
183 |     stop(paste0("Number of cores ",cores,
184 |                 " more then real physical cores in PC ",max.cores ))
185 |   }
186 |   
187 |   if (!is.data.frame(df)) stop(paste0("df should be a data.frame"))
188 |   if (!is(NAflag, "numeric")) stop(paste0("NAflag should be  numeric"))
189 |   #if (!is(cores,  "integer")) stop(paste0("cores should be integer value"))
190 |   if (!is(overwrite, "logical")) stop(paste0("overwrite should be  logical (e.g., TRUE, FALSE)"))
191 |   if (!is(silent, "logical")) stop(paste0("silent should be logical (e.g., TRUE, FALSE)"))
192 |   
193 |   datatype <- toupper(datatype)
194 |   
195 |   if (!(datatype %in% c('INT1S', 'INT2S', 'INT4S', 'FLT4S',
196 |                         'LOG1S', 'INT1U', 'INT2U', 'INT4U', 'FLT8S'))) {
197 |     stop('Not a valid data type. Avalible are INT1S/INT2S/INT4S/FLT4S/LOG1S/INT1U/INT2U/INT4U/FLT8S')
198 |   }
199 |   
200 |   if (!file.exists(dirname(filename))){
201 |     stop(paste0("Directory  ",dirname(filename)," for file ", basename(filename) ," does not exist"))
202 |   }
203 |   
204 |   if ( file.exists(filename) & overwrite==FALSE) {
205 |     stop(paste0("File ",filename," exists. Use option overwrite=TRUE"))
206 |   } else{
207 |     if ( file.exists(filename) ) file.remove(filename)
208 |   }
209 |   
210 |   if (cores > max.cores) {
211 |     stop(paste0("Number of cores ",cores,
212 |                 " more then real physical cores in PC ",max.cores ))
213 |   }
214 |   
215 |   # if user did not tell how many blocks to use then blocks will
216 |   # calculated by get_blocks_need() function
217 |   #
218 |   if (is.null(blocks)) {
219 |     
220 |     blocks <- get_blocks_size(x, 
221 |                               cores,
222 |                               verbose = ifelse(silent, FALSE, TRUE))      
223 |   }  
224 | 
225 |   npoc_blocks <- ifelse(blocks$n < cores, blocks$n, cores)  
226 |   
227 |   beginCluster(n=npoc_blocks)
228 |   
229 |   out <- rasterize_parallel_start(x, df, blocks, NAflag, datatype, filename,
230 |                                   overwrite, silent)
231 |   
232 |   endCluster()
233 |   
234 |   return(out)
235 | }
236 | 


--------------------------------------------------------------------------------
/R/rf_prediction.R:
--------------------------------------------------------------------------------
  1 | #' Predict for gridded covariates
  2 | #' 
  3 | #' @rdname rf_prediction
  4 | #' @param covariates covariates list
  5 | #' @param census_mask census_mask 
  6 | #' @param water_raster water_raster   
  7 | #' @param popfit_final popfit final objects
  8 | #' @param popfit_quant popfit quant objects
  9 | #' @param outdir path to load/save popfit objects
 10 | #' @param tag proximity
 11 | #' @param quant proximity
 12 | #' @param verbose If FALSE then the progress will be shown
 13 | #' @param log If FALSE then the progress will be shown
 14 | #' @importFrom raster getValues writeRaster
 15 | #' @importFrom stats complete.cases predict sd
 16 | #' @importFrom utils stack
 17 | #' @return raster objects
 18 | #' @noRd 
 19 | rf_prediction <- function(covariates,
 20 |                           census_mask, 
 21 |                           water_raster,
 22 |                           popfit_final, 
 23 |                           popfit_quant, 
 24 |                           outdir, 
 25 |                           tag, 
 26 |                           quant=TRUE, 
 27 |                           verbose=FALSE, 
 28 |                           log=FALSE) {
 29 | 
 30 |   
 31 |   tStart <- Sys.time()
 32 |   log_info("MSG", paste0("Start prediction for gridded covariates"),
 33 |            verbose=verbose,
 34 |            log=log) 
 35 |   
 36 |   rfg.predict.density.rf.pred <- file.path(outdir, 
 37 |                                            paste0("predict_density_rf_pred_",
 38 |                                                   tag, ".tif")) 
 39 |   rfg.predict.density.rf.sd <- file.path(outdir, 
 40 |                                          paste0("predict_density_rf_sd_",
 41 |                                                 tag, ".tif")) 
 42 |   
 43 |   
 44 |   rfg.predict.density.rf.pred_05 <- file.path(outdir, 
 45 |                                               paste0("predict_density_rf_pred_05_",
 46 |                                                      tag , ".tif")) 
 47 |   rfg.predict.density.rf.pred_50 <- file.path(outdir, 
 48 |                                               paste0("predict_density_rf_pred_50_",
 49 |                                                      tag , ".tif")) 
 50 |   rfg.predict.density.rf.pred_95 <- file.path(outdir, 
 51 |                                               paste0("predict_density_rf_pred_90_",
 52 |                                                      tag , ".tif")) 
 53 |   
 54 |   
 55 |   # Stack all of our covariates and masks together:
 56 |   #
 57 |   for (i in 1:length(names(popfit_final$forest$xlevels))){
 58 |     
 59 |     var_name <- names(popfit_final$forest$xlevels)[i]
 60 |     r <- raster( covariates[[var_name]]$path )
 61 |     names(r) <- var_name    
 62 |     
 63 |     if (i == 1) {
 64 |       covariate_stack <- r  
 65 |     }else{
 66 |       covariate_stack <- raster::addLayer(covariate_stack, r)
 67 |     }
 68 |     
 69 |   } 
 70 |   
 71 |   ##  Append the census mask and the water mask to that list:
 72 |   names(census_mask) <- "census_mask"
 73 |   covariate_stack <- raster::addLayer(covariate_stack, census_mask)
 74 |   names(water_raster) <- "water_raster"
 75 |   covariate_stack <- raster::addLayer(covariate_stack, water_raster)
 76 |   
 77 |   rm(r)
 78 |   #
 79 |   #
 80 |   
 81 |   row_data <- data.frame(getValues(covariate_stack))
 82 |   
 83 |   
 84 |   
 85 |   ## Convert field names to something more manageable and
 86 |   ## that matches our popfit variable list:
 87 |   ## Full covariate stack:
 88 |   #
 89 |   names(row_data) <- c(names(popfit_final$forest$xlevels),
 90 |                        "census_mask",
 91 |                        "water_raster")
 92 |   
 93 |   ##	Detect if we have any NA or Inf values, and that the values are
 94 |   ##		covered by our census administrative units:
 95 |   #
 96 |   na_present <- apply(is.na(row_data), 1, any)
 97 |   inf_present <- apply(row_data == -Inf | row_data == Inf, 1, any)
 98 |   census_mask <- (is.na(row_data$census_mask))
 99 |   water_mask <- (row_data$water_raster == 1)
100 |   
101 |   ## Use the first if you want to mask out water pixels, this can greatly
102 |   ## speed up predictions over areas with a lot of water, however, you
103 |   ## run the risk of having no predictions in the resulting dataset
104 |   ## if you have a census block small enough that it might only have
105 |   ## water cover (GeoCover/GlobCover is what determines the water mask):
106 |   #
107 |   roi_subset <- (!na_present & !inf_present & !census_mask & !water_mask)
108 |   
109 |   ##	Create a set of predictions based on our covariates:
110 |   #
111 |   predictions <- numeric(length = length(row_data[, 1]))
112 |   predictions[] <- NA
113 |   
114 |   
115 |   
116 |   if (quant) {
117 |     predictions <- data.frame(
118 |       "rf_pred" = predictions,
119 |       "rf_sd" = predictions,
120 |       "rf_05" = predictions,
121 |       "rf_50" = predictions,
122 |       "rf_95" = predictions
123 |     )
124 |     #prinitialising rasters
125 |     predictions_rf_pred <- covariate_stack$census_mask
126 |     predictions_rf_sd <- covariate_stack$census_mask    
127 |     predictions_rf_05 <- covariate_stack$census_mask
128 |     predictions_rf_50 <- covariate_stack$census_mask
129 |     predictions_rf_95 <- covariate_stack$census_mask
130 |     
131 |   } else{
132 |     predictions <- data.frame("rf_pred" = predictions,
133 |                               "rf_sd" = predictions)
134 |     
135 |     #prinitialising rasters
136 |     predictions_rf_pred <- covariate_stack$census_mask
137 |     predictions_rf_sd <- covariate_stack$census_mask
138 |   }
139 |   
140 |   rm(covariate_stack)
141 |   
142 |   ## I f we have data where NAs or Inf values are not present then we
143 |   ## predict for those cells (where we subset our data according to the
144 |   ## roi_subset and remove the census zone and water mask columns (length(row_data) - 2):
145 |   #
146 |   if (sum(roi_subset) > 0) {
147 |     prediction_set <- predict(popfit_final,
148 |                               newdata = row_data[roi_subset, 1:(length(row_data) - 2)],
149 |                               predict.all = TRUE)
150 |     
151 |     
152 |     raster::values(predictions_rf_pred) <- transY(apply(prediction_set$individual, 
153 |                                                         MARGIN = 1, 
154 |                                                         mean), 
155 |                                                   inverse = TRUE)
156 |     
157 |     raster::values(predictions_rf_sd) <- apply(prediction_set$individual, 
158 |                                                MARGIN = 1, 
159 |                                                sd)
160 |     
161 |     # predictions$rf_pred[roi_subset] <-
162 |     #   transY(apply(prediction_set$individual, MARGIN = 1, mean),
163 |     #          inverse = TRUE)
164 |     # 
165 |     # predictions$rf_sd[roi_subset] <-
166 |     #   apply(prediction_set$individual, MARGIN = 1, sd)
167 |     
168 |     if (quant) {
169 |       
170 |       prediction_set <- predict(popfit_quant,
171 |                                 newdata = row_data[roi_subset, 1:(length(row_data) - 2)],
172 |                                 quantiles = c(0.05, 0.50, 0.95))
173 |       
174 |       raster::values(predictions_rf_05) <- transY(prediction_set[, 1], inverse = TRUE)
175 |       raster::values(predictions_rf_50) <- transY(prediction_set[, 2], inverse = TRUE)
176 |       raster::values(predictions_rf_95) <- transY(prediction_set[, 3], inverse = TRUE)
177 |       
178 |       # 
179 |       # predictions$rf_05[roi_subset] <-
180 |       #   transY(prediction_set[, 1], inverse = TRUE)
181 |       # predictions$rf_50[roi_subset] <-
182 |       #   transY(prediction_set[, 2], inverse = TRUE)
183 |       # predictions$rf_95[roi_subset] <-
184 |       #   transY(prediction_set[, 3], inverse = TRUE)
185 |       
186 |     }
187 |   }
188 |   
189 |   
190 |   prediction_raster <- writeRaster(predictions_rf_pred, 
191 |                                    filename=rfg.predict.density.rf.pred, 
192 |                                    format="GTiff", 
193 |                                    datatype="FLT4S", 
194 |                                    overwrite=TRUE, 
195 |                                    options=c("COMPRESS=LZW"))
196 |   
197 |   
198 |    writeRaster(predictions_rf_sd, 
199 |                filename=rfg.predict.density.rf.sd, 
200 |                format="GTiff", 
201 |                datatype="FLT4S", 
202 |                overwrite=TRUE, 
203 |                options=c("COMPRESS=LZW"))  
204 |   
205 |   
206 |   if (quant) {
207 |     
208 |     writeRaster(predictions_rf_05,
209 |                 filename=rfg.predict.density.rf.pred_05,
210 |                 format="GTiff",
211 |                 datatype="FLT4S",
212 |                 overwrite=TRUE,
213 |                 options=c("COMPRESS=LZW"))
214 |     
215 |     
216 |     writeRaster(predictions_rf_50,
217 |                 filename=rfg.predict.density.rf.pred_50,
218 |                 format="GTiff",
219 |                 datatype="FLT4S",
220 |                 overwrite=TRUE,
221 |                 options=c("COMPRESS=LZW"))
222 |     
223 |     
224 |     writeRaster(predictions_rf_95,
225 |                 filename=rfg.predict.density.rf.pred_95, 
226 |                 format="GTiff",
227 |                 datatype="FLT4S",
228 |                 overwrite=TRUE,
229 |                 options=c("COMPRESS=LZW"))
230 |   }  
231 |   
232 |   
233 |   tEnd <-  Sys.time()
234 |   log_info("MSG", 
235 |            paste0("Processing Time:  ",
236 |                   tmDiff(tStart,tEnd)),
237 |            verbose=verbose, log=log) 
238 |   
239 |   return(prediction_raster)
240 | }


--------------------------------------------------------------------------------
/R/rf_prediction_parallel.R:
--------------------------------------------------------------------------------
  1 | #' rf_prediction_parallel Predict for gridded covariates
  2 | #' 
  3 | #' @rdname rf_prediction_parallel
  4 | #' @param covariates popfit quant objects
  5 | #' @param census_mask census_mask
  6 | #' @param water_raster water_raster
  7 | #' @param popfit_final covariates list
  8 | #' @param popfit_quant popfit final objects
  9 | #' @param outdir path to load/save popfit objects
 10 | #' @param nrpoc path to load/save popfit objects
 11 | #' @param tag proximity
 12 | #' @param quant proximity
 13 | #' @param blocks number of blocks sugesting for processing raster file.
 14 | #' @param verbose If FALSE then the progress will be shown
 15 | #' @param log If FALSE then the progress will be shown
 16 | #' @importFrom raster getValues writeRaster writeStart writeStop
 17 | #' compareRaster hasValues writeValues blockSize  getCluster returnCluster
 18 | #' @importFrom stats complete.cases predict sd aggregate
 19 | #' @importFrom utils stack getFromNamespace
 20 | #' @importFrom doParallel registerDoParallel  
 21 | #' @importFrom parallel detectCores clusterEvalQ clusterExport
 22 | #' @importFrom foreach '%dopar%' foreach
 23 | #' @return raster objects
 24 | #' @noRd 
 25 | rf_prediction_parallel <- function(covariates,
 26 |                                    census_mask,
 27 |                                    water_raster,
 28 |                                    popfit_final, 
 29 |                                    popfit_quant, 
 30 |                                    outdir, 
 31 |                                    nrpoc, 
 32 |                                    tag, 
 33 |                                    quant = TRUE, 
 34 |                                    blocks=NULL, 
 35 |                                    verbose=TRUE, 
 36 |                                    log=FALSE) {
 37 |   
 38 | 
 39 |   prediction_raster <- census_mask
 40 |     
 41 |   log_info("MSG", paste0("Start predict for gridded covariates"),
 42 |            verbose=verbose, log=log) 
 43 |   
 44 |   rfg.predict.density.rf.pred <- file.path(outdir, 
 45 |                                            paste0("predict_density_rf_pred_", 
 46 |                                                   tag, ".tif")) 
 47 |   rfg.predict.density.rf.sd <- file.path(outdir, 
 48 |                                          paste0("predict_density_rf_sd_",
 49 |                                                 tag, ".tif")) 
 50 |   
 51 |   
 52 |   rfg.predict.density.rf.pred_05 <- file.path(outdir, 
 53 |                                               paste0("predict_density_rf_pred_05_",
 54 |                                                      tag , ".tif")) 
 55 |   rfg.predict.density.rf.pred_50 <- file.path(outdir, 
 56 |                                               paste0("predict_density_rf_pred_50_", 
 57 |                                                      tag , ".tif")) 
 58 |   rfg.predict.density.rf.pred_95 <- file.path(outdir, 
 59 |                                               paste0("predict_density_rf_pred_90_",
 60 |                                                      tag , ".tif")) 
 61 |   
 62 |   
 63 |   
 64 |   recvOneData <- getFromNamespace("recvOneData", "parallel")
 65 |   sendCall <- getFromNamespace("sendCall", "parallel")
 66 |   
 67 |   
 68 |   tStart <- Sys.time()
 69 |   
 70 |   
 71 |   
 72 |   # Stack all of our covariates and masks together:
 73 |   #
 74 |   for (i in 1:length(names(popfit_final$forest$xlevels))){
 75 |     
 76 |     var_name <- names(popfit_final$forest$xlevels)[i]
 77 |     r <- raster( covariates[[var_name]]$path )
 78 |     names(r) <- var_name    
 79 |     
 80 |     if (i == 1) {
 81 |       covariate_stack <- r  
 82 |     }else{
 83 |       covariate_stack <- raster::addLayer(covariate_stack, r)
 84 |     }
 85 |     
 86 |   } 
 87 |   
 88 |   ##  Append the census mask and the water mask to that list:
 89 |   names(census_mask) <- "census_mask"
 90 |   covariate_stack <- raster::addLayer(covariate_stack, census_mask)
 91 |   names(water_raster) <- "water_raster"
 92 |   covariate_stack <- raster::addLayer(covariate_stack, water_raster)
 93 |   
 94 |   rm(r)
 95 |   #
 96 |   #
 97 | 
 98 |   
 99 |   # # set additional param for geting min blocks for cluster
100 |   # if (quant) nmb=60 else nmb=30
101 |   # 
102 |   # ##  Stack all of our covariates and masks together:
103 |   # #covariate_stack <- creat_raster_stack(covariates, popfit_final)
104 |   # 
105 |   # if (is.null(minblocks)) {
106 |   #   minblocks <- get_blocks_need(covariate_stack, cores=nrpoc, n=nmb)  
107 |   # }
108 |   
109 |   
110 |   
111 |   cl <- getCluster()
112 |   on.exit( returnCluster() )  
113 |   
114 |   nodes <- length(cl)
115 |   # blocks <- blockSize(prediction_raster,minblocks=minblocks)
116 |   
117 |   log_info("MSG", paste0("covariate_stack will be divided to ",blocks$n," blocks"), verbose=verbose, log=log) 
118 |   
119 |   clusterEvalQ(cl, {
120 |     require(raster)
121 |     require(randomForest)
122 |   })  
123 |   
124 |   if (quant) {
125 |     clusterExport(cl, c("popfit_final", "popfit_quant", "covariate_stack", "transY"), envir=environment())
126 |   } else {
127 |     clusterExport(cl, c("popfit_final", "covariate_stack", "transY"), envir=environment())
128 |   }
129 |   
130 |   clusterExport(cl, "blocks", envir=environment())
131 |   clusterExport(cl, "quant", envir=environment())
132 |   clusterExport(cl, "verbose", envir=environment())
133 |   clusterExport(cl, c("recvOneData", "sendCall"), envir=environment())
134 |   
135 |   #################################################################################
136 |   #################################################################################
137 |   ##	Define the function that will be run on each cluster to do the predictions:
138 |   #
139 |   call_predictions <- function (i) {
140 |     
141 |     row_data <- data.frame( getValues(covariate_stack, 
142 |                                       row=blocks$row[i], 
143 |                                       nrows=blocks$nrows[i]) )
144 |     
145 |     ## Convert field names to something more manageable and 
146 |     ## that matches our popfit variable list:
147 |     ## Full covariate stack:
148 |     #
149 |     names(row_data) <- c(names(popfit_final$forest$xlevels), "census_mask", "water_raster")
150 |     
151 |     ##	Detect if we have any NA or Inf values, and that the values are 
152 |     ##		covered by our census administrative units:
153 |     #
154 |     na_present <- apply(is.na(row_data), 1, any)
155 |     inf_present <- apply(row_data == -Inf | row_data == Inf, 1, any)
156 |     census_mask <- (is.na(row_data$census_mask))
157 |     water_mask <- (row_data$water_raster == 1)
158 |     
159 |     ## Use the first if you want to mask out water pixels, this can greatly
160 |     ## speed up predictions over areas with a lot of water, however, you
161 |     ## run the risk of having no predictions in the resulting dataset
162 |     ## if you have a census block small enough that it might only have
163 |     ## water cover (GeoCover/GlobCover is what determines the water mask):
164 |     #
165 |     roi_subset <- (!na_present & !inf_present & !census_mask & !water_mask)
166 |     
167 |     
168 |     ##	Create a set of predictions based on our covariates:
169 |     #
170 |     predictions <- numeric(length=length(row_data[,1]))
171 |     predictions[] <- NA
172 |     
173 |     if (quant) {
174 |       predictions <- data.frame("rf_pred"=predictions, 
175 |                                 "rf_sd"=predictions, 
176 |                                 "rf_05"=predictions, 
177 |                                 "rf_50"=predictions, 
178 |                                 "rf_95"=predictions)              
179 |     }else{
180 |       predictions <- data.frame("rf_pred"=predictions, 
181 |                                 "rf_sd"=predictions)              
182 |     }
183 |     
184 |     
185 |     ## I f we have data where NAs or Inf values are not present then we 
186 |     ## predict for those cells (where we subset our data according to the 
187 |     ## roi_subset and remove the census zone and water mask columns (length(row_data) - 2):
188 |     #
189 |     if (sum(roi_subset) > 0) {
190 |       
191 |       prediction_set <- predict(popfit_final, 
192 |                                 newdata=row_data[roi_subset,1:(length(row_data)-2)], 
193 |                                 predict.all=TRUE)
194 |       
195 |       predictions$rf_pred[roi_subset] <- transY(apply(prediction_set$individual,
196 |                                                       MARGIN=1,
197 |                                                       mean),
198 |                                                 inverse=TRUE)
199 |       
200 |       predictions$rf_sd[roi_subset] <- apply(prediction_set$individual,
201 |                                              MARGIN=1,
202 |                                              sd)
203 |       
204 |       if (quant) {
205 |         
206 |         prediction_set <- predict(popfit_quant, 
207 |                                   newdata=row_data[roi_subset,1:(length(row_data)-2)], 
208 |                                   quantiles=c(0.05, 0.50, 0.95))
209 |         
210 |         predictions$rf_05[roi_subset] <- transY(prediction_set[,1], inverse=TRUE)
211 |         predictions$rf_50[roi_subset] <- transY(prediction_set[,2], inverse=TRUE)
212 |         predictions$rf_95[roi_subset] <- transY(prediction_set[,3], inverse=TRUE)
213 |       }
214 |     }
215 |     
216 |     return(predictions)
217 |   } 
218 |   #
219 |   ##
220 |   #################################################################################
221 |   #################################################################################
222 |   
223 |   
224 |   
225 |   ##	Start all nodes on a prediction:
226 |   for (i in 1:nodes) {
227 |     sendCall(cl[[i]], call_predictions, i, tag=i)
228 |   }  
229 |   
230 |   ## Start the raster writer object so we can store our results as they
231 |   ## come back from our cluster:  
232 |   #
233 |   prediction_raster <- writeStart(prediction_raster, 
234 |                                   filename=rfg.predict.density.rf.pred, 
235 |                                   format="GTiff", 
236 |                                   datatype="FLT4S", 
237 |                                   overwrite=TRUE, 
238 |                                   options=c("COMPRESS=LZW"))
239 |   
240 |   
241 |   sd_raster <- prediction_raster
242 |   sd_raster <- writeStart(sd_raster, 
243 |                           filename=rfg.predict.density.rf.sd, 
244 |                           format="GTiff", 
245 |                           datatype="FLT4S", 
246 |                           overwrite=TRUE, 
247 |                           options=c("COMPRESS=LZW"))  
248 |   
249 |   
250 |   if (quant) {
251 |     prediction_raster_05 <- prediction_raster
252 |     prediction_raster_05 <- writeStart(prediction_raster_05, 
253 |                                        filename=rfg.predict.density.rf.pred_05, 
254 |                                        format="GTiff", 
255 |                                        datatype="FLT4S", 
256 |                                        overwrite=TRUE, 
257 |                                        options=c("COMPRESS=LZW"))
258 |     
259 |     prediction_raster_50 <- prediction_raster
260 |     prediction_raster_50 <- writeStart(prediction_raster_50, 
261 |                                        filename=rfg.predict.density.rf.pred_50, 
262 |                                        format="GTiff", 
263 |                                        datatype="FLT4S", 
264 |                                        overwrite=TRUE, 
265 |                                        options=c("COMPRESS=LZW"))
266 |     
267 |     prediction_raster_95 <- prediction_raster
268 |     prediction_raster_95 <- writeStart(prediction_raster_95, 
269 |                                        filename=rfg.predict.density.rf.pred_95, 
270 |                                        format="GTiff", 
271 |                                        datatype="FLT4S", 
272 |                                        overwrite=TRUE, 
273 |                                        options=c("COMPRESS=LZW"))
274 |   }
275 |   
276 |   
277 |   
278 |   ########################################################################
279 |   ##
280 |   ## Create our primary cluster processing loop, recalling that we already
281 |   ## have clusters running:
282 |   #
283 |   
284 |   
285 |   for (i in 1:blocks$n) {
286 |     
287 |     ##	Receive results from a node:
288 |     predictions <- recvOneData(cl)
289 |     
290 |     ##	Check if there was an error:
291 |     if (!predictions$value$success) {
292 |       stop("ERROR: Cluster barfed...\n\n", predictions)
293 |     }
294 |     
295 |     ##	Which block are we processing:
296 |     block <- predictions$value$tag
297 |     
298 |     
299 |     prediction_raster <- writeValues(prediction_raster, 
300 |                                      predictions$value$value$rf_pred, 
301 |                                      blocks$row[block])
302 |     sd_raster <- writeValues(sd_raster, 
303 |                              predictions$value$value$rf_sd, 
304 |                              blocks$row[block])
305 |     
306 |     
307 |     if (quant) {
308 |       
309 |       prediction_raster_05 <- writeValues(prediction_raster_05, 
310 |                                           predictions$value$value$rf_05, 
311 |                                           blocks$row[block])
312 |       
313 |       prediction_raster_50 <- writeValues(prediction_raster_50, 
314 |                                           predictions$value$value$rf_50, 
315 |                                           blocks$row[block])
316 |       
317 |       prediction_raster_95 <- writeValues(prediction_raster_95, 
318 |                                           predictions$value$value$rf_95, 
319 |                                           blocks$row[block])
320 |     }
321 |     
322 |     
323 |     ##	Check to see if we are at the end of our block list:
324 |     ni <- nodes + i
325 |     if (ni <= blocks$n) {
326 |       sendCall(cl[[predictions$node]], call_predictions, ni, tag=ni)
327 |     }
328 |     tEnd <-  Sys.time()
329 |     
330 |     if (verbose){
331 |       progress_message(x=i, 
332 |                        max=blocks$n, 
333 |                        label=paste0("received block ",
334 |                                     ni,
335 |                                     " Processing Time: ",
336 |                                     tmDiff(tStart,tEnd))
337 |       ) 
338 |     }
339 |   }
340 |   
341 |   prediction_raster <- writeStop(prediction_raster)
342 |   sd_raster <- writeStop(sd_raster)
343 |   
344 |   if (quant) {
345 |     prediction_raster_05 <- writeStop(prediction_raster_05)
346 |     prediction_raster_50 <- writeStop(prediction_raster_50)
347 |     prediction_raster_95 <- writeStop(prediction_raster_95)
348 |   }
349 |   
350 |   
351 |   return(prediction_raster)
352 | }  
353 | 


--------------------------------------------------------------------------------
/R/utility_functions.R:
--------------------------------------------------------------------------------
  1 | # Authors: Maksym Bondarenko mb4@soton.ac.uk Jeremiah J. Nieves
  2 | # Date :  April 2023
  3 | # Version 0.2
  4 | #
  5 | #' Exit program quietly without error
  6 | #' @noRd 
  7 | #' @rdname stop_quietly
  8 | stop_quietly <- function() {
  9 |   
 10 |   opt <- options(show.error.messages = FALSE)
 11 |   on.exit(options(opt))
 12 |   stop()
 13 |   
 14 | }
 15 | 
 16 | #
 17 | #' The default is to log transform the x argument:
 18 | #'
 19 | #' @param x vector
 20 | #' @param inverse if TRUE inverse
 21 | #' @rdname transY
 22 | #' @return A data.frame merged covariates
 23 | #' @noRd 
 24 | transY <- function(x, 
 25 |                    inverse=FALSE) {
 26 |   if (!inverse) {
 27 |     return( log(x) )
 28 |   } else {
 29 |     ##  Otherwise we backtransform it by exponentiating it:
 30 |     return( exp(x) )
 31 |   }
 32 | }
 33 | 
 34 | 
 35 | #
 36 | #' Print text into log.txt file
 37 | #' @param prank type of log
 38 | #' @param stext Text to log
 39 | #' @param verbose is logical. TRUE or FALSE: flag indicating whether to print 
 40 | #'        intermediate output from the function on the console, which might be 
 41 | #'        helpful for model debugging. Default is \code{verbose} = TRUE.
 42 | #' @param log is logical. TRUE or FALSE: flag indicating whether to print 
 43 | #'        intermediate output to file
 44 | #' @rdname log_info
 45 | #' @noRd 
 46 | log_info <- function(prank, 
 47 |                      stext, 
 48 |                      verbose=FALSE, 
 49 |                      log=FALSE){
 50 |   
 51 |   if (verbose){
 52 |     cat(stext)
 53 |     cat("\n")
 54 |   }
 55 | 
 56 |   if (log){
 57 |     if (file.exists(getOption("pj.output.dir"))){
 58 |       log_con <- file(file.path(getOption("pj.output.dir"),
 59 |                                 "logs.txt"),
 60 |                       open="a")
 61 |       cat(paste0( format(Sys.time(), "%d %X"), 
 62 |                   " :: ", 
 63 |                   paste0(prank, " :: ", stext)), 
 64 |           file = log_con, sep="\n")
 65 |       close(log_con)
 66 |     }
 67 |   }
 68 | }
 69 | 
 70 | 
 71 | 
 72 | 
 73 | #' Creating a folder in specified location.
 74 | #' @param x name of the folder to be created
 75 | #' @param verbose is logical. TRUE or FALSE: flag indicating whether to print 
 76 | #'        intermediate output from the function on the console, which might be 
 77 | #'        helpful for model debugging. Default is \code{verbose} = TRUE. 
 78 | #' @rdname create_dir
 79 | #' @noRd 
 80 | create_dir <- function(x, 
 81 |                        verbose){
 82 |   
 83 |   x.covariates <- file.path(x, "covariates")
 84 |   
 85 |   if(!file.exists( x.covariates )) {
 86 |     
 87 |     msg <- paste0("Log :: Creating directory ", x.covariates,".")
 88 |     
 89 |     dir.create(x.covariates, 
 90 |                recursive = TRUE,
 91 |                showWarnings = FALSE)
 92 |     
 93 |     if (verbose) message(msg)
 94 |   }
 95 | 
 96 | }
 97 | 
 98 | 
 99 | 
100 | #' Function which takes time objects and calculates the difference between
101 | #' the start and end time point. Returned is the formatted time.
102 | #' @param start Start time.
103 | #' @param end End time.
104 | #' @param frm format of time difference.
105 | #' @rdname tmDiff
106 | #' @return Returned is the formatted time.
107 | #' @noRd 
108 | tmDiff <- function(start, 
109 |                    end,
110 |                    frm = "hms") {
111 | 
112 |   dsec <- as.numeric(difftime(end, start, units = c("secs")))
113 |   hours <- floor(dsec / 3600)
114 |   
115 |   if (frm == "hms" ){
116 |     minutes <- floor((dsec - 3600 * hours) / 60)
117 |     seconds <- dsec - 3600 * hours - 60 * minutes
118 |     
119 |     out=paste0(
120 |       sapply(c(hours, minutes, seconds), function(x) {
121 |         formatC(x, width = 2, format = "d", flag = "0")
122 |       }), collapse = ":")
123 |     
124 |     return(out)
125 |   }else{
126 |     return(hours)
127 |   }
128 | }
129 | 
130 | 
131 | 
132 | #' create_raster_stack Create a raster stack from all covariates from 
133 | #' popfit and census_mask and water_raster.
134 | #' @param covariates covariates list
135 | #' @param popfit_final list of names used in the RF
136 | #' @param census_mask raster of census mask
137 | #' @param water_raster raster of water mask
138 | #' @importFrom utils stack
139 | #' @importFrom raster raster
140 | #' @rdname creat_raster_stack
141 | #' @return raster stack
142 | #' @noRd 
143 | create_raster_stack <- function(covariates, 
144 |                                 popfit_final, 
145 |                                 census_mask, 
146 |                                 water_raster) {
147 |   ##  Create an empty list to hold the rasters:
148 |   list_ras <- list()
149 |   
150 |   ##  For every raster name in the list of names used in the RF:
151 |   for (i in 1:length(names(popfit_final$forest$xlevels))){
152 |     ##  Retrieve the name:
153 |     var_name <- names(popfit_final$forest$xlevels)[i]
154 |     r <- raster( covariates[[var_name]]$path )
155 |     names(r) <- var_name
156 |     list_ras[[i]] <-  r
157 |   }  
158 |   ##  Append the census mask and the water mask to that list:
159 |   names(census_mask) <- "census_mask"
160 |   list_ras[[length(list_ras) + 1]] <- census_mask
161 |   names(water_raster) <- "water_raster"
162 |   list_ras[[length(list_ras) + 1]] <- water_raster
163 |   
164 |   ##  Stack all the rasters we just retrieved:
165 |   ras_stack <- stack(list_ras)
166 |   
167 |   ##  Return the raster stack object:
168 |   return(ras_stack)
169 | }
170 | 
171 | 
172 | #' This function allows for precision decimal formatting.
173 | #' @param x number 
174 | #' @param k format
175 | #' @rdname specify_decimal
176 | #' @return number
177 | #' @noRd 
178 | specify_decimal <- function(x, k) format(round(x, k), nsmall=k)
179 | 
180 | 
181 | #' Checking extent of two rasters
182 | #' @param x the name of the raster file.  
183 | #' @param y the name of the raster file. 
184 | #' @rdname check_raster_extent
185 | #' @return logical
186 | #' @noRd 
187 | check_raster_extent <- function(x,
188 |                                 y){
189 |   
190 |   r1 <- raster::raster(x)
191 |   r2 <- raster::raster(y)
192 |   xmin.r1 <- specify_decimal( raster::bbox(r1)[1,1],5 )
193 |   xmax.r1 <- specify_decimal( raster::bbox(r1)[1,2],5 )
194 |   ymin.r1 <- specify_decimal( raster::bbox(r1)[2,1],5 )
195 |   ymax.r1 <- specify_decimal( raster::bbox(r1)[2,2],5 )
196 |   
197 |   xmin.r2 <- specify_decimal( raster::bbox(r2)[1,1],5 )
198 |   xmax.r2 <- specify_decimal( raster::bbox(r2)[1,2],5 )
199 |   ymin.r2 <- specify_decimal( raster::bbox(r2)[2,1],5 )
200 |   ymax.r2 <- specify_decimal( raster::bbox(r2)[2,2],5 )
201 |   
202 |   if ( (xmin.r1) != (xmin.r2) | (xmax.r1) != (xmax.r2) | (ymin.r1) != (ymin.r2) | (ymax.r1) != (ymax.r2)  ) {
203 |     return(FALSE)
204 |   }else{
205 |     return(TRUE)
206 |   }
207 |   
208 | }
209 | 
210 | 
211 | #' Changing extent of two rasters.
212 | #' @param srcfile the name of the raster file.
213 | #' @param dstfile the name of the raster file. 
214 | #' @param verbose logical. Should report extra information on progress? 
215 | #' @param overwrite logical to overwrite or not the output file.  
216 | #' @rdname check_raster_extent
217 | #' @return logical
218 | #' @noRd 
219 | #' @importFrom terra sprc
220 | #' @importFrom terra rast
221 | #' @importFrom terra merge
222 | change_raster_extent <- function(srcfile, 
223 |                                  dstfile, 
224 |                                  verbose = FALSE, 
225 |                                  overwrite = FALSE){
226 |   
227 |   
228 |   r1 <- raster::raster(srcfile)
229 |   ##  Changing extent of two rasters using terra::merge.
230 |   xmin <- raster::bbox(r1)[1,1]
231 |   xmax <- raster::bbox(r1)[1,2]
232 |   ymin <- raster::bbox(r1)[2,1]
233 |   ymax <- raster::bbox(r1)[2,2]
234 |   
235 |   rFileName <- basename(dstfile) 
236 |   rPath <- dirname(dstfile)
237 |   
238 |   te <- paste0(' ',xmin,' ',ymin,' ',xmax,' ',ymax)
239 |   
240 |   rToPath <- file.path(rPath, paste0("tmp_",rFileName))
241 |   # gdalwarp(dstfile,
242 |   #          te=te,
243 |   #          co=c("COMPRESS=LZW","BLOCKXSIZE=512","BLOCKYSIZE=512", "TILED=YES", "BIGTIFF=YES"),
244 |   #          s_srs=crs(r1),
245 |   #          rToPath,
246 |   #          output_Raster=TRUE,
247 |   #          overwrite=TRUE,
248 |   #          verbose=verbose)
249 |   terra::merge(x = rast(dstfile),
250 |                first = TRUE,
251 |                na.rm = TRUE,
252 |                filename = rToPath,
253 |                overwrite = TRUE,
254 |                wopt = list(filetype = "GTiff", 
255 |                            gdal = c("COMPRESS=LZW",
256 |                                     "BLOCKXSIZE=512",
257 |                                     "BLOCKYSIZE=512",
258 |                                     "TILED=YES",
259 |                                     "BIGTIFF=YES")))
260 |   
261 |   
262 |   if(file.exists(dstfile)){ unlink(dstfile,
263 |                                    recursive = TRUE,
264 |                                    force = FALSE)}
265 |   
266 |   file.rename(from=rToPath, to=dstfile)
267 | }
268 | 
269 | 
270 | 
271 | #' Create project directory based on tag of the countries.
272 | #' @param input.countries input list of the countries. 
273 | #' @param output_dir path to project dir
274 | #' @param verbose is logical. TRUE or FALSE: flag indicating whether to print 
275 | #'        intermediate output from the function on the console, which might be 
276 | #'        helpful for model debugging. Default is \code{verbose} = TRUE.
277 | #' @param log is logical. TRUE or FALSE: flag indicating whether to print 
278 | #'        intermediate   file
279 | #' @rdname create_dirs_for_prj
280 | #' @return list of project directories
281 | #' @noRd 
282 | create_dirs_for_prj <- function(input.countries, 
283 |                                 output_dir, 
284 |                                 verbose = FALSE, 
285 |                                 log = FALSE){
286 |   
287 |   
288 |   countries_tag_output <- paste(input.countries, collapse = "_")
289 |   
290 |   subDir.country <- file.path(output_dir, countries_tag_output, "tmp")
291 |   
292 |   if(!file.exists(subDir.country)){ 
293 |     log_info("Info", paste0("Creating dir ", subDir.country),
294 |              verbose = verbose,
295 |              log = log)
296 |     dir.create(subDir.country, recursive = TRUE, showWarnings = FALSE) 
297 |   }
298 |   
299 |   subDir.country.output.zst <- file.path(output_dir, 
300 |                                          countries_tag_output,
301 |                                          "zonal_stats")
302 |   
303 |   if(!file.exists(subDir.country.output.zst)){ 
304 |     log_info("Info", 
305 |              paste0("Creating dir ",
306 |                     subDir.country.output.zst),
307 |              verbose = verbose,
308 |              log = log)    
309 |     dir.create(subDir.country.output.zst, 
310 |                recursive = TRUE,
311 |                showWarnings = FALSE) 
312 |   }
313 |   
314 |   subDir.countrys.merged <- file.path(output_dir,
315 |                                       countries_tag_output,
316 |                                       "merged")
317 |   
318 |   if (length(input.countries) > 1){
319 |     
320 |     if(!file.exists(subDir.countrys.merged)){ 
321 |       log_info("Info", paste0("Creating dir ",
322 |                               subDir.countrys.merged),
323 |                verbose = verbose, 
324 |                log = log)    
325 |       dir.create(subDir.countrys.merged, recursive = TRUE, showWarnings = FALSE) 
326 |     }  
327 |   }
328 |   
329 |   
330 |   ##  Return a list of objects which represent the output path, the data path, 
331 |   ##  and the country(ies) tag:
332 |   return(list(countries_tag = countries_tag_output,
333 |               output = file.path(output_dir, countries_tag_output),
334 |               tmp = subDir.country,
335 |               data_cvr = subDir.country.output.zst,
336 |               data_merged = subDir.countrys.merged
337 |   ))  
338 |   
339 | }
340 | 
341 | #' Checking if covariates extent matches mastergrid.
342 | #' @param covariates list of covariates 
343 | #' @param fix_cov logical
344 | #' @param verbose logical. Should report extra information on progress?
345 | #' @param log logical. Should report on progress be saved in log file?
346 | #' @rdname check_cov
347 | #' @return return TRUE or FASLE if \code{fix_raster} parameter is FALSE.
348 | #' @noRd 
349 | check_cov <- function(covariates, 
350 |                       fix_cov,
351 |                       verbose = FALSE, 
352 |                       log = FALSE){
353 | 
354 |     for ( i in names(covariates) ) {
355 |       
356 |       mdir <-  covariates[[i]]$mastergrid$dataset_folder
357 |       mfile <-  covariates[[i]]$mastergrid$dataset_filename
358 |       mpath <- file.path(mdir, mfile)
359 |       
360 |       
361 |       for (j in names(covariates[[i]]) ){
362 |         
363 |         if (j == "mastergrid") next
364 |         
365 |         fdir <-  covariates[[i]][[j]]$dataset_folder
366 |         ffile <-  covariates[[i]][[j]]$dataset_filename
367 |         fpath <- file.path(fdir,ffile)
368 |         
369 |         
370 |         if (!check_raster_extent(mpath, fpath)){
371 |           
372 |           if (fix_cov){
373 |             log_info("Warning", 
374 |                      paste0("Will try to fix ", 
375 |                             ffile ," extent to match mastergrid extent."),
376 |                      verbose = verbose, log = log)
377 |             change_raster_extent(mpath,fpath)
378 |             return(FALSE)
379 |           }else{
380 |             
381 |             #log_info("Error", paste0("Covariate ", ffile ,
382 |             #         "extent is not match mastergrid extent."),
383 |             #          verbose = verbose, 
384 |             #          log = log)
385 |             stop(paste0("Covariate ", ffile ,
386 |                         " extent does not match mastergrid extent.")) 
387 |             
388 |             return(TRUE)
389 |           } 
390 |         }
391 |         
392 |       }
393 |     }
394 |   
395 |   return(FALSE)
396 |  
397 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## popRF: Random Forest-informed Population Disaggregation R package
  2 | 
  3 | High resolution, recent data on human population distributions are important for measuring impacts of population growth, monitoring human-environment interactions and for planning and policy development. Many methods are used to disaggregate census data and predict population densities for finer scale, gridded population data sets.  
  4 | `popRF` is a population modelling R package utilizing Random Forests to inform a dasymetric redistribution of census-based population count data. A description of using Random Forests machine learning method  in `popRF` is described in [Stevens et al](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0107042). 
  5 | 
  6 | ### Installation
  7 | The `popRF` package can be installed directly from Github.
  8 | 
  9 | ``` r
 10 | install.packages("devtools")
 11 | devtools::install_github("wpgp/popRF")
 12 | ```
 13 | 
 14 | ### Demo 
 15 | 
 16 | The `popRF` package has a demo function `popRFdemo` to generate a population layer
 17 | using the [WorldPop](https://www.worldpop.org) geospatial covariates and
 18 | subnational census-based population estimates for 230 countries. 
 19 | All necessary covariates will be downloaded and used to disaggregat population.
 20 | All input datasets use a geographical coordinate system (GCS) with WGS 1984
 21 | datum (EPSG:4326) in Geotiff format at a resolution of 3 arc-second
 22 | (0.00083333333 decimal degree, approximately 100m at the equator).
 23 | 
 24 | The following script will produce a population layer for Nepal (NPL) using 4 cores.
 25 | 
 26 | ``` r
 27 | library("popRF")
 28 | 
 29 | popRFdemo(project_dir="/home/user/demo",
 30 |           country="NPL", 
 31 |           cores=4)
 32 |  
 33 | ```
 34 | 
 35 | ### Basic Usage
 36 | 
 37 | ``` r
 38 | library("popRF")
 39 | 
 40 | # Specifying a name of the file from which the unique area ID and corresponding 
 41 | # population values are to be read from. The file should contain two columns 
 42 | # comma-separated with the value of administrative ID and population without
 43 | # columns names. If it does not contain an absolute path, the file name is 
 44 | # relative to the current working directory
 45 | 
 46 | pop_table <- list("NPL"="/user/npl_population.csv")
 47 | 
 48 | 
 49 | # Specifying a nested list of named list(s), i.e. where each element of the
 50 | # first list is a named list object with atomic elements. The name of
 51 | # each named list corresponds to the 3-letter ISO code of a specified
 52 | # country. The elements within each named list define the specified
 53 | # input covariates to be used in the random forest model, i.e. the name
 54 | # of the covariates and the corresponding, if applicable and local, path
 55 | # to them. If the path is not a full path, it is assumed to be relative
 56 | # to the current working directory
 57 | 
 58 | input_cov <- list(
 59 |                   "NPL"= list(
 60 |                              "cov1" = "covariate1.tif",
 61 |                              "cov2" = "covariate2.tif"
 62 |                      )
 63 |                   )
 64 |                      
 65 | # Specifying a named list where each element of the list defines the
 66 | # path to the input mastergrid(s), i.e. the template gridded raster(s)
 67 | # that contains the unique area IDs as their value. The name(s)
 68 | # corresponds to the 3-letter ISO code(s) of a specified country(ies).
 69 | # Each corresponding element defines the path to the mastergrid(s). If
 70 | # the path is local and not a full path, it is assumed to be relative to
 71 | # the current working directory                     
 72 | 
 73 | input_mastergrid <- list("NPL" = "npl_mastergrid.tif")
 74 | 
 75 | # Specifying a named list where each element of the list defines the path
 76 | # to the input country-specific watermask. The name corresponds to the
 77 | # 3-letter ISO code of a specified country. Each corresponding element
 78 | # defines the path to the watermask, i.e. the binary raster that
 79 | # delineates the presence of water (1) and non-water (0), that is used
 80 | # to mask out areas from modelling. If the path is local and not a full
 81 | # path, it is assumed to be relative to the current working directory.
 82 | 
 83 | 
 84 | input_watermask <- list("NPL" = "npl_watermask.tif")
 85 | 
 86 | # Specifying a named list where each element of the list defines the path
 87 | # to the input raster(s) containing the pixel area. The name corresponds
 88 | # to the 3-letter ISO code of a specified country. Each corresponding
 89 | # element defines the path to the raster whose values indicate the area
 90 | # of each unprojected (WGS84) pixel. If the path is local and not a full
 91 | # path, it is assumed to be relative to the current working directory.
 92 | 
 93 | input_px_area <- list("NPL" = "npl_px_area.tif")
 94 | 
 95 | # Running a model
 96 | 
 97 | res <- popRF(pop=pop_table,
 98 |              cov=input_cov,
 99 |              mastergrid=input_mastergrid,
100 |              watermask=input_watermask,
101 |              px_area=input_px_area,
102 |              output_dir="/user/output",
103 |              cores=4)
104 | 
105 | # Plot populataion raster
106 | plot(res$pop)
107 | 
108 | # Plot Error via Trees
109 | plot(res$popfit)
110 | ```
111 | 
112 | <img src="man/figures/example_ppp_ECU_v2.jpg" width="100%" />
113 | 
114 | ### Outputs
115 | 
116 | Population raster layer in GeoTiff format.
117 | 
118 | ## Contributions
119 | 
120 | Contributions are welcome. Please raise or respond to an issue, or create a new
121 | branch to develop a feature/modification and submit a pull request.
122 | 
123 | ## Acknowledgements
124 | 
125 | ``` r
126 | #> citation("popRF")
127 | 
128 | #> To cite popRF in publications use:
129 | #> 
130 | #> Bondarenko M., Nieves J.J., Forrest R.S., Andrea E.G., Jochem C., Kerr D., and Sorichetta A. (2021): popRF: Random Forest-informed Population
131 | #> Disaggregation R package, _Comprehensive R Archive Network (CRAN)_, url:https://cran.r-project.org/package=popRF.
132 | #>
133 | #> A BibTeX entry for LaTeX users is
134 | #> 
135 | #> @Manual{,
136 | #>  title = {popRF: Random Forest-informed Population Disaggregation R package.},
137 | #>  author = {Maksym Bondarenko and Jeremiah J Nieves and Forrest R. Stevens and Andrea E. Gaughan and Chris Jochem and David Kerr and Alessandro Sorichetta},
138 | #>  year = {2021},
139 | #>  journal = {Comprehensive R Archive Network (CRAN)},
140 | #>  url = {https://cran.r-project.org/package=popRF},
141 | #>  language = {English},
142 | #> }
143 |   
144 | ```
145 | 
146 | ### License
147 | [GNU General Public License v3.0 (GNU GPLv3)](https://www.gnu.org/licenses/gpl-3.0.en.html)


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## 24 July 2021
 2 | * Included a references describing the methods into the package DESCRIPTION file
 3 | * Cleaned up the code to make sure there is no console output if "verbose" is FALSE. popRF is the package to work with a raster files, therefore calculation can take some time and for a user convenience parameter "verbose" is TRUE by default .
 4 | Only the output which can not be suppressed in popRF package is from tuneRF function from randomeForest package. Even if "trace" paramenter is "False" in tuneRF function, there is a place where tuneRF function is using "cat" to print text (cat(Improve, improve, "\n")).
 5 | * There were no ERRORs or WARNINGs after "R CMD check results" 
 6 | 
 7 | ## 22 July 2021
 8 | * Omitted the licence file and the reference as it was advised by CRAN team member after first submution 
 9 | * Removed DOI: 10.5258/SOTON/WP00715  which was "Not Found" 
10 | * There were no ERRORs or WARNINGs after "R CMD check results" 
11 | 
12 | ## Tested on environments
13 | * Red Hat Enterprise Linux Server release 7.9 R-version 3.6.2 
14 | * win-builder (devel and release)
15 | * Ubuntu 20.04.2 LTS (GNU/Linux 5.8.0-50-generic x86_64) R-version 4.0.5
16 | * Windows 10/64 R-version 4.0.5
17 | * Southampton University HPC cluster Iridis5 RH8 R-version 3.6.2
18 | 
19 | ## Tested on R-hub building platforms
20 | * Debian Linux, R-devel, clang, ISO-8859-15 locale (debian-clang-devel)
21 | * Debian Linux, R-devel, GCC (debian-gcc-devel)
22 | * Debian Linux, R-release, GCC (debian-gcc-release)
23 | * Fedora Linux, R-devel, clang, gfortran (fedora-clang-devel)
24 | * CentOS 8, stock R from EPEL (linux-x86_64-centos-epel)
25 | * Debian Linux, R-devel, GCC ASAN/UBSAN (linux-x86_64-rocker-gcc-san)
26 | * macOS 10.13.6 High Sierra, R-release, brew (macos-highsierra-release)
27 | * macOS 10.13.6 High Sierra, R-release, CRAN's setup (macos-highsierra-release-cran)
28 | * Ubuntu Linux 20.04.1 LTS, R-devel, GCC (ubuntu-gcc-devel)
29 | * Windows Server 2008 R2 SP1, R-devel, 32/64 bit (windows-x86_64-devel)
30 | * Windows Server 2008 R2 SP1, R-release, 32/64 bit (windows-x86_64-release)
31 | 
32 | ## R CMD check results
33 | There were no ERRORs or WARNINGs. 
34 | 
35 | ## Downstream dependencies
36 | There are no downstream dependencies for this package.


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citHeader("To cite popRF in publications use:")
 2 | 
 3 | citEntry(entry = "Manual",
 4 |   title        = "popRF: Random Forest-informed Population Disaggregation R package.",
 5 |   author       = c(
 6 |     person( 
 7 |           given = "Maksym", 
 8 |           family = "Bondarenko", 
 9 |           role = c("aut", "cre", "cph"), 
10 |           email = "mb4@soton.ac.uk", 
11 |           comment = c(ORCID = "0000-0003-4958-6551")
12 |           ),
13 |     person( 
14 |           given = "Jeremiah J", 
15 |           family = "Nieves", 
16 |           role = c("aut"), 
17 |           email = "jeremiah.j.nieves@outlook.com", 
18 |           comment = c(ORCID = "0000-0002-7423-1341")
19 |           ),
20 |     person( 
21 |           given = "Forrest R.", 
22 |           family = "Stevens", 
23 |           role = c("aut"), 
24 |           email = "forrest.stevens@louisville.edu"
25 |           ),
26 |     person( 
27 |           given = "Andrea E.", 
28 |           family = "Gaughan", 
29 |           role = c("aut"), 
30 |           email = "ae.gaughan@louisville.edu"
31 |           ),               
32 |     person( 
33 |           given = "Chris", 
34 |           family = "Jochem", 
35 |           role = c("ctb"), 
36 |           email = "W.C.Jochem@soton.ac.uk", 
37 |           comment = c(ORCID = "0000-0003-2192-5988")
38 |           ),          
39 |     person( 
40 |           given = "David", 
41 |           family = "Kerr", 
42 |           role = c("ctb"), 
43 |           email = "dk2n16@soton.ac.uk"
44 |           ),
45 |     person( 
46 |           given = "Alessandro", 
47 |           family = "Sorichetta", 
48 |           role = c("ctb"), 
49 |           email = "as1v13@soton.ac.uk", 
50 |           comment = c(ORCID = "0000-0002-3576-5826")
51 |           )),
52 |   year         = "2021",
53 |   journal      = "Comprehensive R Archive Network (CRAN)",
54 |   url          = "https://cran.r-project.org/package=popRF",
55 |   language     = "English",
56 | 
57 |   textVersion  =
58 |   paste("Bondarenko M., Nieves J.J., Forrest R.S., Andrea E.G., Jochem C., Kerr D., and Sorichetta A. (2021):",
59 |         "popRF: Random Forest-informed Population Disaggregation R package,",
60 |         "_Comprehensive R Archive Network (CRAN)_,", 
61 |         "url:https://cran.r-project.org/package=popRF")
62 | )


--------------------------------------------------------------------------------
/man/figures/example_ppp_ECU_v2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wpgp/popRF/bee3af9f86512f10e7609fd733f51aee9c73b04d/man/figures/example_ppp_ECU_v2.jpg


--------------------------------------------------------------------------------
/man/popRF.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/popRF.R
  3 | \name{popRF}
  4 | \alias{popRF}
  5 | \title{Disaggregating Census Data for Population Mapping Using Random Forests
  6 | with Remotely-Sensed and Ancillary Data.}
  7 | \usage{
  8 | popRF(pop, cov, mastergrid, watermask, px_area, output_dir, cores=0, 
  9 | quant=FALSE, set_seed=2010, fset=NULL, fset_incl=FALSE, 
 10 | fset_cutoff=20, fix_cov=FALSE, check_result=TRUE, verbose=TRUE, 
 11 | log=FALSE, ...)
 12 | }
 13 | \arguments{
 14 | \item{pop}{Character vector containing the name of the file from which the
 15 | unique area ID and corresponding population values are to be read
 16 | from. The file should contain two columns comma-separated with the
 17 | value of administrative ID and population without columns names.
 18 | If it does not contain an absolute path, the file name is relative to
 19 | the current working directory.}
 20 | 
 21 | \item{cov}{A nested list of named list(s), i.e. where each element of the
 22 | first list is a named list object with atomic elements. The name of
 23 | each named list corresponds to the 3-letter ISO code of a specified
 24 | country. The elements within each named list define the specified
 25 | input covariates to be used in the random forest model, i.e. the name
 26 | of the covariates and the corresponding, if applicable and local, path
 27 | to them. If the path is not a full path, it is assumed to be relative
 28 | to the current working directory.
 29 | Example for Nepal (NPL):
 30 | 
 31 | \if{html}{\out{<div class="sourceCode r">}}\preformatted{list(
 32 |     "NPL"=list(
 33 |                "covariate1" = "covariate1.tif",
 34 |                "covariate2" = "covariate2.tif"
 35 |               )  
 36 |    )
 37 | #> $NPL
 38 | #> $NPL$covariate1
 39 | #> [1] "covariate1.tif"
 40 | #> 
 41 | #> $NPL$covariate2
 42 | #> [1] "covariate2.tif"
 43 | }\if{html}{\out{</div>}}}
 44 | 
 45 | \item{mastergrid}{A named list where each element of the list defines the
 46 | path to the input mastergrid(s), i.e. the template gridded raster(s)
 47 | that contains the unique area IDs as their value. The name(s)
 48 | corresponds to the 3-letter ISO code(s) of a specified country(ies).
 49 | Each corresponding element defines the path to the mastergrid(s). If
 50 | the path is local and not a full path, it is assumed to be relative to
 51 | the current working directory.
 52 | Example:
 53 | 
 54 | \if{html}{\out{<div class="sourceCode">}}\preformatted{list(
 55 |     "NPL" = "npl_mastergrid.tif"
 56 |    )
 57 | }\if{html}{\out{</div>}}}
 58 | 
 59 | \item{watermask}{A named list where each element of the list defines the path
 60 | to the input country-specific watermask. The name corresponds to the
 61 | 3-letter ISO code of a specified country. Each corresponding element
 62 | defines the path to the watermask, i.e. the binary raster that
 63 | delineates the presence of water (1) and non-water (0), that is used
 64 | to mask out areas from modelling. If the path is local and not a full
 65 | path, it is assumed to be relative to the current working directory.
 66 | Example:
 67 | 
 68 | \if{html}{\out{<div class="sourceCode">}}\preformatted{list(
 69 |     "NPL" = "npl_watermask.tif"
 70 |    )
 71 | }\if{html}{\out{</div>}}}
 72 | 
 73 | \item{px_area}{A named list where each element of the list defines the path
 74 | to the input raster(s) containing the pixel area. The name corresponds
 75 | to the 3-letter ISO code of a specified country. Each corresponding
 76 | element defines the path to the raster whose values indicate the area
 77 | of each unprojected (WGS84) pixel. If the path is local and not a full
 78 | path, it is assumed to be relative to the current working directory.
 79 | Example:
 80 | 
 81 | \if{html}{\out{<div class="sourceCode r">}}\preformatted{list(
 82 |     "NPL" = "npl_px_area.tif"
 83 |    )
 84 | #> $NPL
 85 | #> [1] "npl_px_area.tif"
 86 | }\if{html}{\out{</div>}}}
 87 | 
 88 | \item{output_dir}{Character vector containing the path to the directory for
 89 | writing output files. Default is the temp directory.}
 90 | 
 91 | \item{cores}{Integer vector containing an integer. Indicates the number of
 92 | cores to use in parallel when executing the function. If set to 0
 93 | \code{(max_number_of_cores - 1)}  will be used based on as many
 94 | processors as the hardware and RAM allow. Default is \code{cores} = 0.}
 95 | 
 96 | \item{quant}{Logical vector indicating whether to produce the quantile
 97 | regression forests (TRUE) to generate prediction intervals.
 98 | Default is \code{quant} = TRUE.}
 99 | 
100 | \item{set_seed}{Integer, set the seed. Default is \code{set_seed} = 2010}
101 | 
102 | \item{fset}{Named list containing character vector elements that give the
103 | path to the directory(ies) containing the random forest model objects
104 | (.RData) with which we are using as a "fixed set" in this modeling,
105 | i.e. are we parameterizing, in part or in full, this RF model run upon
106 | another country's(ies') RF model object. The list should have two
107 | named character vectors, "final" and "quant", with the character
108 | vectors corresponding to the directory paths of the corresponding
109 | folders that hold the random forest model objects and the quantile
110 | regression random forest model objects, respectively.
111 | Numerous model objects can be in each folder "./final/" and "./quant/"
112 | representing numerous countries with the understanding that the model
113 | being run will incorporate all model objects in the folder, e.g. if
114 | a model object for Mexico and}
115 | 
116 | \item{fset_incl}{Logical vector indicating whether the RF model object
117 | will or will not be combined with another RF model run upon another
118 | country's(ies') RF model object. Default is \code{fset_incl} = FALSE}
119 | 
120 | \item{fset_cutoff}{Numeric vector containing an integer. This parameter is
121 | only used if \code{fset_incl} is TRUE. If the country has less than
122 | \code{fset_cutoff} admin units, then RF popfit will not be combined
123 | with the RF model run upon another country's(ies') RF model object.
124 | Default is \code{fset_cutoff} = 20.}
125 | 
126 | \item{fix_cov}{Logical vector indicating whether the raster extent of the
127 | covariates will be corrected if the extent does not match mastergrid.
128 | Default is \code{fix_cov} = FALSE.}
129 | 
130 | \item{check_result}{Logical vector indicating whether the results will be
131 | compared with input data. Default is \code{check_result} = TRUE.}
132 | 
133 | \item{verbose}{Logical vector indicating whether to print
134 | intermediate output from the function to the console, which might be
135 | helpful for model debugging. Default is \code{verbose} = TRUE.}
136 | 
137 | \item{log}{Logical vector indicating whether to print intermediate
138 | output from the function to the log.txt file.
139 | Default is \code{log} = FALSE.}
140 | 
141 | \item{...}{Additional arguments:\cr
142 | \code{binc}: Numeric. Increase number of blocks sugesting for
143 | processing raster file.\cr
144 | \code{boptimise}: Logical. Optimize total memory requires to
145 | processing raster file by reducing the memory need to 35\%.\cr
146 | \code{bsoft}: Numeric. If raster can be processed on less
147 | then \code{cores} it will be foresed to use less number
148 | of \code{cores}.\cr
149 | \code{nodesize}: Minimum size of terminal nodes. Setting this number larger
150 | causes smaller trees to be grown (and thus take less time). See
151 | \code{\link[randomForest]{randomForest}} for more details. Default
152 | is \code{nodesize} = NULL and will be calculated
153 | as \code{length(y_data)/1000}.\cr
154 | \code{maxnodes}: Maximum number of terminal nodes trees in the forest can have.
155 | If not given, trees are grown to the maximum possible (subject to
156 | limits by nodesize). If set larger than maximum possible, a warning is
157 | issued. See \code{\link[randomForest]{randomForest}} for more details.
158 | Default is \code{maxnodes} = NULL.\cr
159 | \code{ntree}: Number of variables randomly sampled as candidates at each split.
160 | See \code{\link[randomForest]{randomForest}} for more details.
161 | Default is \code{ntree} = NULL and \code{ntree} will be used
162 | \code{popfit$ntree}\cr
163 | \code{mtry}: Number of trees to grow. This should not be set to too small a
164 | number, to ensure that every input row gets predicted at least a few
165 | times. See \code{\link[randomForest]{randomForest}} for more details.
166 | Default is \code{ntree} = NULL and \code{ntree} will be used
167 | \code{popfit$mtry}.\cr
168 | \code{proximity}: Logical vector indicating whether proximity measures among
169 | the rows should be computed. Default is \code{proximity} = TRUE.
170 | See \code{\link[randomForest]{randomForest}} for more details.\cr
171 | \code{const}: Character vector containing the name of the file from which the
172 | mask will be used to constraine population layer. The mask file should
173 | have value \code{0} as a mask. If it does not contain an absolute path,
174 | the file name is relative to the current working directory.}
175 | }
176 | \value{
177 | Raster* object of gridded population.
178 | }
179 | \description{
180 | Disaggregating Census Data for Population Mapping Using Random Forests
181 | with Remotely-Sensed and Ancillary Data.
182 | }
183 | \details{
184 | This function produces gridded population density estimates using
185 | a Random Forest model as described in \emph{Stevens, et al. (2015)}
186 | \doi{10.1371/journal.pone.0107042}.
187 | The unit-average log-transformed population density and covariate
188 | summary values for each census unit are then used to train a
189 | Random Forest model (\doi{10.1023/A:1010933404324})
190 | to predict log population density. Random Forest models are an
191 | ensemble, nonparametric modelling approach that grows a "forest" of
192 | individual classification or regression trees and improves upon
193 | bagging by using the best f a random selection of predictors at
194 | each node in each tree. The Random Forest is used to produced grid,
195 | i.e. pixel, level population density estimates that are used as
196 | unit-relative weights to dasymetrically redistribute the census
197 | based areal population counts. This function also allows for
198 | modelling based upon a
199 | regional parameterisation (\doi{10.1080/17538947.2014.965761})
200 | of other previously run models as well as the creation of models based
201 | upon multiple countries at once (\doi{10.1016/j.compenvurbsys.2019.01.006}).
202 | This function assumes that all data is unprojected and is in the
203 | WGS84 coordinate system.
204 | }
205 | \examples{
206 | \dontrun{
207 | 
208 | library("popRF")
209 | 
210 | pop_table <- list("NPL"="/user/npl_population.csv")
211 | 
212 | input_cov <- list(
213 |                  "NPL"=list(
214 |                             "cov1" = "covariate1.tif",
215 |                             "cov2" = "covariate2.tif"))
216 |                             
217 |                  
218 | input_mastergrid <- list("NPL" = "npl_mastergrid.tif")
219 | input_watermask  <- list("NPL" = "npl_watermask.tif")
220 | input_px_area    <- list("NPL" = "npl_px_area.tif")
221 | 
222 | res <- popRF(pop=pop_table, 
223 |              cov=input_cov, 
224 |              mastergrid=input_mastergrid, 
225 |              watermask=input_watermask, 
226 |              px_area=input_px_area, 
227 |              output_dir="/user/output", 
228 |              cores=4) 
229 |  
230 | # Plot populataion raster 
231 | plot(res$pop) 
232 | 
233 | # Plot Error via Trees     
234 | plot(res$popfit)
235 |             
236 | }
237 | }
238 | \references{
239 | \itemize{
240 | \item Stevens, F. R., Gaughan, A. E., Linard, C. & A. J. Tatem. 2015.
241 | Disaggregating Census Data for Population Mapping Using Random Forests
242 | with Remotely-Sensed and Ancillary Data. PLoS ONE 10, e0107042
243 | \doi{10.1371/journal.pone.0107042}
244 | \item L. Breiman. 2001. Random Forests. Machine Learning, 45: 5-32.
245 | \doi{10.1023/A:1010933404324}
246 | \item Gaughan, A. E., Stevens, F. R., Linard, C., Patel, N. N., & A. J. Tatem.
247 | 2015. Exploring Nationally and Regionally Defined Models for Large Area
248 | Population Mapping. International Journal of Digital Earth, 12(8):
249 | 989-1006. \doi{10.1080/17538947.2014.965761}
250 | \item Sinha, P., Gaughan, A. E, Stevens, F. R., Nieves, J. J., Sorichetta, A.,
251 | & A. J. Tatem. 2019. Assessing the Spatial Sensitivity of a Random
252 | Forest Model: Application in Gridded Population Modeling. Computers,
253 | Environment and Urban Systems, 75: 132-145.
254 | \doi{10.1016/j.compenvurbsys.2019.01.006}
255 | }
256 | }
257 | \author{
258 | Maksym Bondarenko \href{mailto:mb4@soton.ac.uk}{mb4@soton.ac.uk},
259 | Jeremiah J. Nieves \href{mailto:J.J.Nieves@liverpool.ac.uk}{J.J.Nieves@liverpool.ac.uk},
260 | Forrest R. Stevens \href{mailto:forrest.stevens@louisville.edu}{forrest.stevens@louisville.edu},
261 | Andrea E. Gaughan \href{mailto:ae.gaughan@louisville.edu}{ae.gaughan@louisville.edu},
262 | David Kerr \href{mailto:dk2n16@soton.ac.uk}{dk2n16@soton.ac.uk},
263 | Chris Jochem \href{mailto:W.C.Jochem@soton.ac.uk}{W.C.Jochem@soton.ac.uk} and
264 | Alessandro Sorichetta \href{mailto:as1v13@soton.ac.uk}{as1v13@soton.ac.uk}
265 | }
266 | 


--------------------------------------------------------------------------------
/man/popRFdemo.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/popRFdemo.R
  3 | \name{popRFdemo}
  4 | \alias{popRFdemo}
  5 | \title{Function to demo the popRF package using WorldPop input data.}
  6 | \usage{
  7 | popRFdemo(project_dir, 
  8 |           country="NPL", 
  9 |           cores=0, 
 10 |           quant=TRUE, 
 11 |           ftp=TRUE, 
 12 |           verbose=TRUE, 
 13 |           log=TRUE, ...)
 14 | }
 15 | \arguments{
 16 | \item{project_dir}{Path to the folder to save the outputs.}
 17 | 
 18 | \item{country}{character. ISO of the country
 19 | (see \href{https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3}{country codes}).
 20 | Default one is NPL (Nepal)}
 21 | 
 22 | \item{cores}{is a integer. Number of cores to use when executing the function.
 23 | If set to 0 \code{(max_number_of_cores - 1)}  will be used based on as
 24 | many processors as the hardware and RAM allow.
 25 | Default is \code{cores} = 0.}
 26 | 
 27 | \item{quant}{If FALSE then quant will not be calculated}
 28 | 
 29 | \item{ftp}{is logical. TRUE or FALSE: flag indicating whether
 30 | \href{ftp://ftp.worldpop.org}{FTP} or \href{https://data.worldpop.org}{HTTPS} of
 31 | \href{https://sdi.worldpop.org/wpdata}{WorldPop data} hub server will be used.
 32 | Default is \code{ftp} = TRUE.}
 33 | 
 34 | \item{verbose}{is logical. TRUE or FALSE: flag indicating whether to print
 35 | intermediate output from the function on the console, which might be
 36 | helpful for model debugging. Default is \code{verbose} = TRUE.}
 37 | 
 38 | \item{log}{is logical. TRUE or FALSE: flag indicating whether to print intermediate
 39 | output from the function on the log.txt file.
 40 | Default is \code{log} = FALSE.}
 41 | 
 42 | \item{...}{Additional arguments:\cr
 43 | \code{binc}: Numeric. Increase number of blocks sugesting for
 44 | processing raster file.\cr
 45 | \code{boptimise}: Logical. Optimize total memory requires to
 46 | processing raster file by reducing the memory need to 35\%.\cr
 47 | \code{bsoft}: Numeric. If raster can be processed on less
 48 | then \code{cores} it will be foresed to use less number
 49 | of \code{cores}.\cr
 50 | \code{nodesize}: Minimum size of terminal nodes. Setting this number larger
 51 | causes smaller trees to be grown (and thus take less time). See
 52 | \code{\link[randomForest]{randomForest}} for more details. Default
 53 | is \code{nodesize} = NULL and will be calculated
 54 | as \code{length(y_data)/1000}.\cr
 55 | \code{maxnodes} Maximum number of terminal nodes trees in the forest can have.
 56 | If not given, trees are grown to the maximum possible (subject to
 57 | limits by nodesize). If set larger than maximum possible, a warning is
 58 | issued. See \code{\link[randomForest]{randomForest}} for more details.
 59 | Default is \code{maxnodes} = NULL.\cr
 60 | \code{ntree} Number of variables randomly sampled as candidates at each split.
 61 | See \code{\link[randomForest]{randomForest}} for more details.
 62 | Default is \code{ntree} = NULL and \code{ntree} will be used
 63 | \code{popfit$ntree}\cr
 64 | \code{mtry} Number of trees to grow. This should not be set to too small a
 65 | number, to ensure that every input row gets predicted at least a few
 66 | times. See \code{\link[randomForest]{randomForest}} for more details.
 67 | Default is \code{ntree} = NULL and \code{ntree} will be used
 68 | \code{popfit$mtry}.}
 69 | }
 70 | \value{
 71 | Raster* object of gridded population surfaces.
 72 | }
 73 | \description{
 74 | This function allows the user to generate a population layer
 75 | using the \href{https://www.worldpop.org}{WorldPop} geospatial covariates and
 76 | subnational census-based population estimates for 230 countries.
 77 | All input datasets use a geographical coordinate system (GCS) with WGS 1984
 78 | datum (EPSG:4326) in Geotiff format at a resolution of 3 arc-second
 79 | (0.00083333333 decimal degree, approximately 100m at the equator).
 80 | Mastergrid of sub-national administrative unit boundary was rasterised
 81 | by \href{http://www.ciesin.org}{CIESIN}.
 82 | 
 83 | Following covariates will be downloaded and used to disaggregat population
 84 | (2020 year) from census units into grid cells.
 85 | \itemize{
 86 | \item subnational_admin_2000_2020.tif  - sub-national units provided by  nationalEAs
 87 | \item esaccilc_dst011_2015.tif - Distance to ESA-CCI-LC cultivated area edges 2015.
 88 | \item esaccilc_dst040_2015.tif - Distance to ESA-CCI-LC woody-tree area edges 2015.
 89 | \item esaccilc_dst130_2015.tif - Distance to ESA-CCI-LC shrub area edges 2015.
 90 | \item esaccilc_dst140_2015.tif - Distance to ESA-CCI-LC herbaceous area edges 2015.
 91 | \item esaccilc_dst150_2015.tif - Distance to ESA-CCI-LC sparse vegetation area edges 2015.
 92 | \item esaccilc_dst160_2015.tif - Distance to ESA-CCI-LC aquatic vegetation area edges 2015.
 93 | \item esaccilc_dst190_2015.tif - Distance to ESA-CCI-LC artificial surface edges 2015.
 94 | \item esaccilc_dst200_2015.tif - Distance to ESA-CCI-LC bare area edges 2015.
 95 | \item esaccilc_dst_water_100m_2000_2012.tif - ESA-CCI-LC inland waterbodies 2000-2012.
 96 | \item coastline_100m_2000_2020.tif - Distance to coastline 2000-2020.
 97 | \item dst_roadintersec_100m_2016.tif - Distance to OSM major road intersections.
 98 | \item dst_waterway_100m_2016.tif - Distance to OSM major waterways.
 99 | \item dst_road_100m_2016.tif - Distance to OSM major roads.
100 | \item px_area.tif - Grid-cell surface areas.
101 | \item srtm_slope_100m.tif - SRTM-based slope 2000 (SRTM is Shuttle Radar Topography Mission).
102 | \item srtm_topo_100m.tif - SRTM elevation 2000.
103 | \item viirs_100m_2016.tif - VIIRS night-time lights 2015 (VIIRS is Visible Infrared Imaging Radiometer Suite).
104 | \item wdpa_dst_cat1_100m_2017.tif - Distance to IUCN strict nature reserve and wilderness area edges 2017.
105 | \item dst_bsgme_100m_2020.tif	- Distance to predicted built-settlement extents in 2020.
106 | }
107 | All downloaded files will be saved into subdirectory \code{covariates}.
108 | }
109 | \examples{
110 | \dontrun{
111 | popRFdemo(project_dir="/home/user/demo",
112 |           country="NPL", 
113 |           cores=0)
114 | } 
115 | }
116 | \references{
117 | \itemize{
118 | \item Global spatio-temporally harmonised datasets for producing high-resolution
119 | gridded population distribution datasets \doi{10.1080/20964471.2019.1625151}.
120 | \item WorldPop (www.worldpop.org - School of Geography and Environmental Science,
121 | University of Southampton; Department of Geography and Geosciences,
122 | University of Louisville; Departement de Geographie, Universite de Namur)
123 | and Center for International Earth Science Information Network (CIESIN),
124 | Columbia University (2018). Global High Resolution Population Denominators
125 | Project - Funded by The Bill and Melinda Gates Foundation (OPP1134076)
126 | \doi{10.5258/SOTON/WP00649}.
127 | }
128 | }
129 | 


--------------------------------------------------------------------------------