├── 2.1_make_country_lon_tz_offset_files.R ├── 0_convert_country_boundary_to_coordinate_lines.R ├── 8_make_VIIRS_average_raster.R ├── 2.3_make_prp_set_per_cell.R ├── 2.2_get_pop_per_1as_cell.R ├── 4.2_extract_xy_landcover_values.R ├── 4.1_find_overlapping_landcover_tiles.R ├── 10.4_reg_resids.R ├── 10.5_make_reg_set_lit_rasters.R ├── 10.3_multiyear_reg.R ├── 10.2_prep_set_dat_for_r9_regs.R ├── 1_make_settlement_rasters.R ├── 9.1_make_set_lit_rasters_wb_erate.R ├── 7_gen_r9_musd.R ├── 10.6_comp_reg_w_wb_erate.R ├── 10.0_find_nearby_set_cells.R ├── 6_find_good_times.R ├── 9.0_gen_set_elec_based_on_year_wb_val.R ├── 3_find_overlapping_VIIRS_TIFFs.R ├── 2.0_gen_country_VIIRS_XYs.R ├── 10.1_prep_nset_dat_for_r9_regs.R ├── 10.1_prep_nset_dat_for_r9_regs_bigcountry.R ├── 5_extract_good_daily_VIIRS_data.R └── README.md /2.1_make_country_lon_tz_offset_files.R: -------------------------------------------------------------------------------- 1 | # load the following packages 2 | sapply(c("data.table","bit64"),require,character.only=T) 3 | 4 | # get the following variables from the environment 5 | (ctry<-Sys.getenv("ctry")) # country 6 | (wd<-Sys.getenv("wd")) # working directory 7 | # set working directory 8 | setwd(wd) 9 | 10 | # read in data.table of 15as cells 11 | xyDT<-readRDS(paste0("data/",ctry,"/",ctry,"_resamp_country_cell_xy_id.rds")) 12 | # keep ID and x value 13 | (xyDT<-xyDT[,list(id,x)]) 14 | # create time offset by multiplying x value by 240 15 | set(xyDT,NULL,'secOffset',xyDT[['x']]*240) 16 | 17 | # save as RDS 18 | saveRDS(xyDT[,list(id,secOffset)],paste0("data/",ctry,"/",ctry,"_id_secOffset.rds")) -------------------------------------------------------------------------------- /0_convert_country_boundary_to_coordinate_lines.R: -------------------------------------------------------------------------------- 1 | # load the following required packages 2 | sapply(c("data.table","raster","rgdal","rgeos"),require,character.only=T) 3 | 4 | # get the following variables from the environment: 5 | (wd<-Sys.getenv("wd")) # working directory 6 | (ctry<-Sys.getenv("ctry")) # country 7 | # set working directory 8 | setwd(wd) 9 | 10 | # directory of country shapefile 11 | (shpDir<-dir(paste0('data/',ctry,'/shapefiles'),'gadm36_[A-Z][A-Z][A-Z]_shp',full.names=T)) 12 | # find top-level GADM shapefile 13 | (shp<-list.files(shpDir,'gadm36_[A-Z][A-Z][A-Z]_0\\.shp',full.names=T)) 14 | # fail if not found 15 | stopifnot(length(shp)==1) 16 | 17 | # read in list of countries 18 | (FBctries<-fread('data/FB/FB_countries.csv')) 19 | # get longform name of the country 20 | (country<-FBctries[country_short==ctry][['country_long']]) 21 | 22 | # read country shapefile 23 | (Shp<-shapefile(shp)) 24 | # disaggregate 25 | (Shp<-disaggregate(Shp)) 26 | # simplify 27 | (Shp<-gSimplify(Shp,.01,T)) 28 | # assign country name to shapefile 29 | Shp$id<-rep(country,length(Shp)) 30 | # write shapefile 31 | writeOGR(Shp,paste0("data/",ctry,"/shapefiles"),paste0(ctry,"_noproj_disag_simp"),driver="ESRI Shapefile",overwrite_layer=T) 32 | -------------------------------------------------------------------------------- /8_make_VIIRS_average_raster.R: -------------------------------------------------------------------------------- 1 | # load the following required packages 2 | sapply(c("data.table","bit64","raster"),require,character.only=T) 3 | # get the following variables from environment 4 | (YYYY<-as.integer(Sys.getenv('YYYY'))) # year 5 | (ctry<-Sys.getenv("ctry")) # country 6 | (wd<-Sys.getenv("wd")) # working directory 7 | # set working directory 8 | setwd(wd) 9 | 10 | # open 15as country cell raster (to be filled later) 11 | (rast<-raster(paste0('data/',ctry,'/GeoTIFFs/',ctry,'_sets_15as.tif'),band=1)) 12 | # set all cells to empty 13 | rast[]<-NA_real_;gc() 14 | # read the 15as id, coordinate, and cell data for the country 15 | (xyDT<-readRDS(paste0("data/",ctry,"/",ctry,"_resamp_country_cell_xy_id.rds"))) 16 | # read in the file with mean logged rade9 17 | r9dt<-readRDS(paste0("data/",ctry,"/VIIRS/daily/",ctry,"_good_r9_musd_",YYYY,".rds")) 18 | # merge the mean logged rade9 data with the 15as cell position data by id 19 | (r9dt<-merge(r9dt[,list(id,r9lm)],xyDT[,list(id,cell_resamp)],by='id'));gc() 20 | # fill cell positions with mean logged rade9 data in raster 21 | rast[r9dt[['cell_resamp']]]<-r9dt[['r9lm']] 22 | # remove data.table 23 | rm(r9dt);gc() 24 | # save raster as GeoTIFF 25 | writeRaster(rast,paste0('data/',ctry,'/GeoTIFFs/',ctry,'_rade9lnmu_',YYYY,'.tif'),format="GTiff",overwrite=T,options=c("COMPRESS=LZW")) -------------------------------------------------------------------------------- /2.3_make_prp_set_per_cell.R: -------------------------------------------------------------------------------- 1 | # load following required packages 2 | sapply(c("data.table","bit64"),require,character.only=T) 3 | 4 | # get following variables from environment 5 | (ctry<-Sys.getenv("ctry")) # country 6 | (wd<-Sys.getenv("wd")) # working directory 7 | # set working directory 8 | setwd(wd) 9 | 10 | # read data.table of 15as cells with ID and xy info 11 | (xyDT<-readRDS(paste0('data/',ctry,'/',ctry,'_resamp_country_cell_xy_id.rds'))) 12 | # read data.table of 15as-1as cell matches 13 | (setmatchDT<-readRDS(paste0('data/',ctry,'/',ctry,'_orig_v_resamp_set_cell_match.rds'))) 14 | 15 | # get number of 1as settlement cells within each 15as cell 16 | (setNDT<-setmatchDT[,list(N_set_cells=.N),by='cell_resamp']) 17 | # calculate proportion of 15as cell filled with 1as settlement cells by dividing by 225 (the max) 18 | set(setNDT,NULL,'prp_sets',setNDT[['N_set_cells']]/225) 19 | 20 | # merge proportion of settlement cells with 15as cell ID information 21 | (xyDT2<-merge(xyDT[,list(id,cell_resamp)],setNDT[,list(cell_resamp,prp_sets)],all.x=T)) 22 | # if missing values, fill with 0, because that means there are no settlement cells in the cell 23 | set(xyDT2,xyDT2[,.I[is.na(prp_sets)]],which(names(xyDT2)=='prp_sets'),0) 24 | # remove cell_resamp column 25 | xyDT2[,cell_resamp:=NULL] 26 | 27 | # save as RDS 28 | saveRDS(xyDT2,paste0('data/',ctry,'/',ctry,'_resamp_country_cell_id_prp_sets.rds')) -------------------------------------------------------------------------------- /2.2_get_pop_per_1as_cell.R: -------------------------------------------------------------------------------- 1 | # load the following required packages 2 | sapply(c("data.table","bit64","raster"),require,character.only=T) 3 | # get the following variables from environment 4 | (ctry<-Sys.getenv("ctry")) # country 5 | (wd<-Sys.getenv("wd")) # working directory 6 | # set working directory 7 | setwd(wd) 8 | 9 | # find high resolution settlement layer file 10 | (fbHRSL<-list.files(paste0('data/',ctry,'/FB'),'population_[a-z][a-z][a-z]_20[0-9][0-9]-[0-9][0-9]-[0-9][0-9]\\.tif$',full.names=T)) 11 | # if file not found, stop 12 | stopifnot(length(fbHRSL)==1) 13 | 14 | # load data.table of 1as to 15as cell matches 15 | (setmatchDT<-readRDS(paste0('data/',ctry,'/',ctry,'_orig_v_resamp_set_cell_match.rds'))) 16 | # open 1as raster 17 | (rast_1as<-raster(paste0('data/',ctry,'/GeoTIFFs/',ctry,'_sets_1as.tif'),band=1)) 18 | # open original FB raster 19 | (rast_pop<-raster(fbHRSL,band=1)) 20 | 21 | # get 1as cells with settlements 22 | rast_1as_cell<-setmatchDT[['cell_orig']] 23 | # get xy coordinates 24 | rast_1as_XY<-xyFromCell(rast_1as,rast_1as_cell) 25 | # get cells from FB raster using xy values 26 | rast_pop_cell<-cellFromXY(rast_pop,rast_1as_XY) 27 | # get values of cells (which are population estimates) 28 | rast_pop_vals<-rast_pop[rast_pop_cell] 29 | 30 | # create data.table of 1as cells and their corresponding population estimates 31 | (popDT<-data.table(cell_orig=rast_1as_cell,pop=rast_pop_vals)) 32 | 33 | # save as RDS 34 | saveRDS(popDT,paste0('data/',ctry,'/',ctry,'_cell_orig_pop.rds')) -------------------------------------------------------------------------------- /4.2_extract_xy_landcover_values.R: -------------------------------------------------------------------------------- 1 | # load following required packages 2 | sapply(c("data.table","bit64","parallel","foreach","doMC","raster"),require,character.only=T) 3 | 4 | # get following variables from environment 5 | (ncores<-as.integer(Sys.getenv("ncores"))) # number of cores 6 | (ctry<-Sys.getenv("ctry")) # country 7 | (wd<-Sys.getenv("wd")) # working directory 8 | # set working directory 9 | setwd(wd) 10 | # register cores for parallel operations 11 | registerDoMC(ncores) 12 | 13 | # specify year of landcover type data 14 | LCyr<-2012L 15 | # make path to landcover file directory 16 | (LCdir<-paste0('data/LandCover/LandCoverTiles',LCyr,'/')) 17 | 18 | # read intersecting landcover tyle information 19 | (inttiles<-readRDS(paste0('data/',ctry,'/',ctry,'_intersecting_landcover_tiles_',LCyr,'.rds'))) 20 | 21 | # read long-lat information for 15as cells in country 22 | (xys<-readRDS(paste0('data/',ctry,'/',ctry,'_resamp_country_cell_xy_id.rds'))) 23 | # convert x-y coords to matrix 24 | xyMat<-as.matrix(xys[,list(x,y)]) 25 | # store IDs as vector 26 | ids<-xys[['id']] 27 | rm(xys);gc() 28 | 29 | # for each of the intersecting tiles, to the following, collecting results in a data.table 30 | lc_dt<-rbindlist(foreach(it=inttiles,.options.multicore=list(preschedule=F))%dopar%{ 31 | # open tile raster 32 | (rast<-raster(paste0(LCdir,it),band=1)) 33 | # create data.table of IDs and their corresponding landcover type extracted from raster 34 | (dt<-data.table(id=ids,lc_type=extract(rast,xyMat,method="simple",cellnumbers=F))) 35 | # remove missing 36 | return(na.omit(dt)) 37 | },fill=T) 38 | # get unique values 39 | (lc_dt<-unique(lc_dt)) 40 | 41 | # save result 42 | saveRDS(lc_dt,paste0('data/',ctry,'/',ctry,'_resamp_country_cell_xy_landcover_values_',LCyr,'.rds')) -------------------------------------------------------------------------------- /4.1_find_overlapping_landcover_tiles.R: -------------------------------------------------------------------------------- 1 | # load following required packages 2 | sapply(c("data.table","bit64","parallel","foreach","doMC","raster",'sf'),require,character.only=T) 3 | 4 | # get following variables from environment 5 | (ncores<-as.integer(Sys.getenv("ncores"))) # number of cores for parallel operations 6 | (ctry<-Sys.getenv("ctry")) # country 7 | (wd<-Sys.getenv("wd")) # working directory 8 | # set working directory 9 | setwd(wd) 10 | # register number of cores for parallel operations 11 | registerDoMC(ncores) 12 | 13 | # year of land classification data 14 | LCyr<-2012L 15 | # directory of land cover tiles 16 | (LCdir<-paste0('data/LandCover/LandCoverTiles',LCyr,'/')) 17 | # list of land cover TIFFs 18 | (LCfiles<-list.files(LCdir,'\\.tif$',full.names=F)) 19 | 20 | # for each file 21 | LCrelist<-foreach(f=LCfiles,.inorder=T)%dopar%{ 22 | # open raster 23 | rast<-raster(paste0(LCdir,f),band=1) 24 | # get and return extent 25 | rect<-st_as_sfc(st_bbox(rast)) 26 | return(rect) 27 | } 28 | 29 | # specify country shapefile path 30 | (country_shp<-paste0("data/",ctry,"/shapefiles/",ctry,"_noproj_disag_simp.shp")) 31 | # open shapefile 32 | (countryShp<-st_read(country_shp)) 33 | # project shapefile using CRS from landcover extent 34 | (countryShpTrans<-st_transform(countryShp,st_crs(LCrelist[[1]]))) 35 | 36 | # for each tile 37 | inttiles<-foreach(i=1:length(LCrelist),.combine=c,.inorder=T,.options.multicore=list(preschedule=F))%dopar%{ 38 | # test whether extent of landcover tile overlaps with country shapefile 39 | reint<-st_intersects(countryShpTrans,LCrelist[[i]]) 40 | reint<-ifelse(any(lengths(reint)!=0L),LCfiles[i],NA_character_) 41 | return(reint) 42 | } 43 | # remove missing 44 | (inttiles<-c(na.omit(inttiles))) 45 | 46 | # save result as RDS 47 | saveRDS(inttiles,paste0('data/',ctry,'/',ctry,'_intersecting_landcover_tiles_',LCyr,'.rds')) -------------------------------------------------------------------------------- /10.4_reg_resids.R: -------------------------------------------------------------------------------- 1 | # rsync -rltgoDuvhh --progress /victor/Work/Brian/current/R_code/FBHRSL/ zokeeffe@flux-xfer.arc-ts.umich.edu:/nfs/brianmin/work/zokeeffe/current/R_code/FBHRSL/ 2 | 3 | # export wd=/nfs/brianmin/work/zokeeffe/current/; export ctry=Brunei; export ncores=6; R 4 | 5 | sapply(c("data.table","lme4"),require,character.only=T) 6 | 7 | (YYYY<-as.integer(Sys.getenv("YYYY"))) 8 | (ctry<-Sys.getenv("ctry")) 9 | (wd<-Sys.getenv("wd")) 10 | # ctry<-'Nepal' 11 | # ncores<-12L 12 | # wd<-'/nfs/brianmin/work/zokeeffe/current/' 13 | setwd(wd) 14 | 15 | conflevs<-c(.85,.9,.95) 16 | (confthreshes<-qnorm(conflevs,mean=0,sd=1)) 17 | (conflevnamestmp<-paste0('lit_conf',conflevs*100)) 18 | (conflevnamesfin<-paste0('prplit_conf',conflevs*100)) 19 | 20 | (rdsDir<-paste0("data/",ctry,"/VIIRS/daily/")) 21 | 22 | (LCt2<-readRDS(paste0("data/",ctry,"/VIIRS/daily/",ctry,"_RE_reg_multiyear_dropoutfull_LCdt.rds"))) 23 | (dateDT<-readRDS(paste0("data/",ctry,"/VIIRS/daily/",ctry,"_RE_reg_multiyear_monthfac_dateDT.rds"))) 24 | 25 | mod<-readRDS(paste0("data/",ctry,"/VIIRS/daily/",ctry,"_RE_reg_multiyear_mod_final.rds")) 26 | mod_sig<-sigma(mod) 27 | 28 | (setfiles<-list.files(rdsDir,paste0(ctry,'_regdat_sets_',YYYY,'[0-1][0-9]\\.rds'),full.names=F)) 29 | (nsetfiles<-length(setfiles)) 30 | 31 | setrmuDT<-vector('list',nsetfiles) 32 | for(i in 1:nsetfiles){ 33 | f<-setfiles[i] 34 | (setDT<-readRDS(paste0(rdsDir,f))) 35 | (setDT<-merge(setDT,dateDT,by='locdatechar')) 36 | (setDT<-merge(setDT,LCt2,by='lc_type')) 37 | r9preds<-predict(mod,newdata=setDT) 38 | set(setDT,NULL,'r9rs',(setDT[['r9']]-r9preds)/mod_sig) 39 | setDT<-setDT[,list(id,r9rs)] 40 | setrmuDT[[i]]<-setDT 41 | rm(setDT,r9preds);gc() 42 | } 43 | (setrmuDT<-rbindlist(setrmuDT,fill=T)) 44 | 45 | for(x in 1:length(conflevs)){ 46 | set(setrmuDT,NULL,conflevnamestmp[x],as.integer(setrmuDT[['r9rs']]>confthreshes[x])) 47 | } 48 | setrmuDT[,(c('zscore',conflevnamesfin)):=lapply(.SD,mean),.SDcols=c('r9rs',conflevnamestmp),by='id'] 49 | set(setrmuDT,NULL,'lightscore',(pnorm(setrmuDT[['zscore']])-.5)/.5) 50 | set(setrmuDT,setrmuDT[,.I[lightscore<0]],which(names(setrmuDT)=='lightscore'),0) 51 | 52 | (setrmuDTf<-unique(setrmuDT[,c('id','zscore','lightscore',conflevnamesfin),with=F])) 53 | 54 | saveRDS(setrmuDTf,paste0("data/",ctry,"/VIIRS/daily/",ctry,"_RE_reg_my_sets_predvals_",YYYY,".rds")) 55 | -------------------------------------------------------------------------------- /10.5_make_reg_set_lit_rasters.R: -------------------------------------------------------------------------------- 1 | # rsync -rltgoDuvhh --progress /victor/Work/Brian/current/R_code/FBHRSL/ zokeeffe@flux-xfer.arc-ts.umich.edu:/nfs/brianmin/work/zokeeffe/current/R_code/FBHRSL/ 2 | 3 | sapply(c("data.table","bit64","raster","rgdal"),require,character.only=T) 4 | 5 | (YYYY<-as.integer(Sys.getenv("YYYY"))) 6 | (ctry<-Sys.getenv("ctry")) 7 | (wd<-Sys.getenv("wd")) 8 | # ctry<-'Nepal' 9 | # wd<-'/victor/Work/Brian/current/' 10 | setwd(wd) 11 | 12 | fstem<-'_RE_reg_my_sets_predvals_' 13 | conflevs<-c(.85,.9,.95) 14 | (lscorevars<-c('zscore','lightscore',paste0('prplit_conf',conflevs*100))) 15 | (fintifstems<-paste0('_set_',lscorevars,'_')) 16 | 17 | (xyDT<-readRDS(paste0("data/",ctry,"/",ctry,"_resamp_country_cell_xy_id.rds"))) 18 | (xyDT<-xyDT[grep('s',id),list(id,cell_resamp)]) 19 | (cellDT<-readRDS(paste0('data/',ctry,'/',ctry,'_orig_v_resamp_set_cell_match.rds'))) 20 | (cellDT<-merge(xyDT,cellDT,by='cell_resamp')) 21 | set(cellDT,NULL,'cell_resamp',NULL) 22 | rm(xyDT);gc() 23 | 24 | (rast<-raster(paste0('data/',ctry,'/GeoTIFFs/',ctry,'_sets_1as.tif'))) 25 | dataType(rast)<-'FLT4S' 26 | rast[]<-NA;gc() 27 | 28 | # read in light score data 29 | (nvaldt<-readRDS(paste0("data/",ctry,"/VIIRS/daily/",ctry,fstem,YYYY,".rds"))) 30 | # merge with cell numbers 31 | (nvaldt<-merge(nvaldt[,c('id',lscorevars),with=F],cellDT,by='id')) 32 | # remove ID variable 33 | set(nvaldt,NULL,'id',NULL) 34 | # create index vector 35 | trowvec<-1:nrow(nvaldt) 36 | # split index vector 37 | splits<-split(trowvec,ceiling(seq_along(trowvec)/500000)) 38 | rm(trowvec);gc() 39 | for(rnum in 1:length(lscorevars)){ 40 | # rnum<-2L 41 | (lscorevar<-lscorevars[rnum]) 42 | message(paste("working on var",lscorevar)) 43 | (fin_file<-paste0(wd,'data/',ctry,'/GeoTIFFs/',ctry,fintifstems[rnum],YYYY,'.tif')) 44 | trast<-copy(rast) 45 | for(part in splits){ 46 | nvaldttmp<-nvaldt[part,c('cell_orig',lscorevar),with=F] 47 | trast[nvaldttmp[['cell_orig']]]<-nvaldttmp[[lscorevar]] 48 | rm(nvaldttmp);gc() 49 | } 50 | writeRaster(trast,fin_file,format="GTiff",overwrite=T,options=c("COMPRESS=LZW"),datatype='FLT4S') 51 | rm(trast);gc() 52 | ## resample to 5 as 53 | # (file_5as<-sub(paste0('_',YYYY,'\\.tif$'),paste0('_5as_',YYYY,'.tif'),fin_file)) 54 | # (call1<-paste('gdalwarp -tr 0.00138888888 -0.00138888888 -r average -overwrite -co "COMPRESS=LZW" -multi',fin_file,file_5as)) 55 | # system(call1) 56 | # rm(fin_file,file_5as,call1);gc() 57 | } 58 | -------------------------------------------------------------------------------- /10.3_multiyear_reg.R: -------------------------------------------------------------------------------- 1 | sapply(c("data.table","bit64","parallel","foreach","doMC","lme4"),require,character.only=T) 2 | 3 | (ctry<-Sys.getenv("ctry")) 4 | (ncores<-as.integer(Sys.getenv("ncores"))) 5 | (wd<-Sys.getenv("wd")) 6 | # ctry<-'Nepal' 7 | # ncores<-12L 8 | # wd<-'/nfs/brianmin/work/zokeeffe/current/' 9 | setwd(wd) 10 | registerDoMC(ncores) 11 | 12 | (LCt<-fread('data/LandCover/landcover_classification_modis_composites.csv')) 13 | 14 | (rdsDir<-paste0("data/",ctry,"/VIIRS/daily/")) 15 | 16 | (nsetfiles<-list.files(rdsDir,paste0(ctry,'_regdat_iso_nsets_20[0-1][0-9][0-1][0-9]\\.rds'),full.names=F)) 17 | stopifnot(length(nsetfiles)==69L) 18 | 19 | (nsetDT1<-rbindlist(foreach(f=nsetfiles,.inorder=F,.options.multicore=list(preschedule=F))%do%{ 20 | readRDS(paste0(rdsDir,f)) 21 | },fill=T)) 22 | set(nsetDT1,NULL,'r9l',log(nsetDT1[['r9']]+2.5)) 23 | (r9med<-median(nsetDT1[['r9l']])) 24 | (r9sd<-sd(nsetDT1[['r9l']])) 25 | (nsetDT1<-nsetDT1[r9l<=(r9med+4*r9sd),!'r9l',with=F]) 26 | 27 | lcdates<-unique(nsetDT1[,list(lc_type,locdatechar)]) 28 | setorder(lcdates,lc_type,locdatechar) 29 | ## removing outliers ## 30 | (nsetDT2<-rbindlist(foreach(r=1:nrow(lcdates),.inorder=F,.options.multicore=list(preschedule=F))%dopar%{ 31 | # r<-1L 32 | (tdt<-nsetDT1[lc_type==lcdates[r][['lc_type']]&locdatechar==lcdates[r][['locdatechar']]]) 33 | if(nrow(tdt)>=5L){ 34 | r9m<-mean(tdt[['r9']]) 35 | r9s<-sd(tdt[['r9']]) 36 | tdt<-tdt[r9<=(r9m+4*r9s)] # remove if above 4 standard deviations from the mean 37 | } 38 | tdt 39 | },fill=T)) 40 | rm(nsetDT1);gc() 41 | 42 | (LCt2<-LCt[lc_type%in%lcdates[['lc_type']],list(lc_type,label)]) 43 | set(LCt2,NULL,'lc_type_fac',factor(LCt2[['lc_type']],levels=LCt2[[1L]],labels=LCt2[[2L]])) 44 | LCt2<-LCt2[,list(lc_type,lc_type_fac)] 45 | saveRDS(LCt2,paste0("data/",ctry,"/VIIRS/daily/",ctry,"_RE_reg_multiyear_dropoutfull_LCdt.rds")) 46 | 47 | dateDT<-unique(nsetDT2[,list(locdatechar)]) 48 | set(dateDT,NULL,'monthfac',factor(as.integer(substr(dateDT[['locdatechar']],6L,7L)),levels=1:12,labels=month.abb)) 49 | saveRDS(dateDT,paste0("data/",ctry,"/VIIRS/daily/",ctry,"_RE_reg_multiyear_monthfac_dateDT.rds")) 50 | ## 51 | (nsetDT2<-merge(nsetDT2,LCt2,by='lc_type')) 52 | (nsetDT2<-merge(nsetDT2,dateDT,by='locdatechar')) 53 | 54 | (mod<-lmer(r9~lirescale+timehour+monthfac+lc_type_fac+lc_type_fac:lirescale+(1|locdatechar),data=nsetDT2,REML=F)) 55 | saveRDS(mod,paste0("data/",ctry,"/VIIRS/daily/",ctry,"_RE_reg_multiyear_mod_final.rds")) 56 | -------------------------------------------------------------------------------- /10.2_prep_set_dat_for_r9_regs.R: -------------------------------------------------------------------------------- 1 | # rsync -rltgoDuvhh --progress /victor/Work/Brian/current/R_code/FBHRSL/ zokeeffe@flux-xfer.arc-ts.umich.edu:/nfs/brianmin/work/zokeeffe/current/R_code/FBHRSL/ 2 | sapply(c("data.table","bit64"),require,character.only=T) 3 | 4 | (ctry<-Sys.getenv("ctry")) 5 | (wd<-Sys.getenv("wd")) 6 | # ctry<-'Nepal' 7 | # wd<-'/nfs/brianmin/work/zokeeffe/current/' 8 | setwd(wd) 9 | 10 | rdsDir<-"/VIIRS/daily/CSVs/" 11 | rdsStem<-"_daily_VIIRS_values_good_resamp_" 12 | lithresh<-.001 13 | LCyr<-2012L 14 | 15 | ### CONSTANT INFO ### 16 | # x-y information 17 | xyDT<-readRDS(paste0("data/",ctry,"/",ctry,"_resamp_country_cell_xy_id.rds")) 18 | if(ctry=='Rwanda'){ 19 | cellsinctry<-readRDS('data/Rwanda/Rwanda_resamp_set_cells_in_country.rds') 20 | (xyDT<-xyDT[!(grepl('s',id)&!(cell_resamp%in%cellsinctry))]) 21 | } 22 | (matchDT<-xyDT[grep('s',id),list(id)]) 23 | 24 | # land cover info 25 | (LCDT<-readRDS(paste0('data/',ctry,'/',ctry,'_resamp_country_cell_xy_landcover_values_',LCyr,'.rds'))) 26 | (lcs2keep<-sort(intersect(unique(LCDT[grep('s',id)][['lc_type']]),unique(LCDT[grep('n',id)][['lc_type']])))) 27 | (lcs2keep<-lcs2keep[lcs2keep<=16]) 28 | (matchDT<-merge(matchDT,LCDT[lc_type%in%lcs2keep],by='id')) 29 | setkey(matchDT,id) 30 | rm(xyDT,LCDT);gc() 31 | 32 | (rdsfs<-list.files(paste0("data/",ctry,rdsDir),paste0(ctry,"_daily_VIIRS_values_good_resamp_201[2-7][0-1][0-9]_id\\.rds"),full.names=F)) 33 | (tifmonths<-sub("(.*?)(201[0-9][0-1][0-9]?)_id\\.rds$","\\2",rdsfs)) 34 | stopifnot(length(tifmonths)==69L) 35 | for(m in tifmonths){ 36 | # m<-tifmonths[1L] 37 | dt<-data.table(id=readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_id.rds"))) 38 | set(dt,NULL,'r9',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_rade9.rds"))) 39 | set(dt,NULL,'li',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_li.rds"))) 40 | set(dt,NULL,'mtimeloc',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_mtimeloc.rds"))) 41 | timebad<-readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_timebadset.rds")) 42 | if(length(timebad)!=0L){ 43 | dt<-dt[-timebad] 44 | } 45 | setkey(dt,id) 46 | dt<-dt[matchDT] 47 | dt<-dt[li<=lithresh] 48 | set(dt,NULL,'lirescale',dt[['li']]/lithresh) 49 | set(dt,NULL,'li',NULL) 50 | set(dt,NULL,'locdatechar',strftime(dt[['mtimeloc']],'%Y-%m-%d')) 51 | set(dt,NULL,'timehour',hour(dt[['mtimeloc']])+minute(dt[['mtimeloc']])/60+second(dt[['mtimeloc']])/3600) 52 | set(dt,NULL,'mtimeloc',NULL) 53 | saveRDS(dt,paste0("data/",ctry,"/VIIRS/daily/",ctry,'_regdat_sets_',m,".rds")) 54 | rm(dt);gc() 55 | message(paste('finished',m)) 56 | } 57 | rm(matchDT);gc() 58 | -------------------------------------------------------------------------------- /1_make_settlement_rasters.R: -------------------------------------------------------------------------------- 1 | # load the following required packages 2 | sapply(c('raster','parallel'),require,character.only=T) 3 | # get the following variables from the environment 4 | (ctry<-Sys.getenv("ctry")) # country 5 | (wd<-Sys.getenv("wd")) # working directory 6 | (ncores<-as.integer(Sys.getenv("ncores"))) # number of cores 7 | # set working directory 8 | setwd(wd) 9 | # set number of cores for parallelization 10 | options(mc.cores=ncores) 11 | 12 | # specify and create new directory for rasters 13 | (outdir<-paste0('data/',ctry,'/GeoTIFFs/')) 14 | dir.create(outdir,F,T) 15 | 16 | # find Facebook population settlement raster corresponding to the country 17 | (fbHRSL<-list.files(paste0('data/',ctry,'/FB'),'population_[a-z][a-z][a-z]_20[0-9][0-9]-[0-9][0-9]-[0-9][0-9]\\.tif$',full.names=T)) 18 | # fail if not found 19 | stopifnot(length(fbHRSL)==1) 20 | 21 | # load the raster 22 | (setrast<-raster(fbHRSL)) 23 | 24 | # get the number of cells from the raster 25 | (Ncells<-ncell(setrast)) 26 | 27 | # if the raster is too large 28 | if(Ncells>.Machine$integer.max){ 29 | # get the values by row using mclapply 30 | rastvals<-mclapply(1:nrow(setrast),function(X) getValues(setrast,X,1),mc.preschedule=F) 31 | rastvals<-unlist(rastvals) 32 | } else { 33 | # otherwise just extract them directly 34 | rastvals<-setrast[] 35 | } 36 | # find settlement cells (non-empty ones) 37 | setcells<-which(!is.na(rastvals)) 38 | # get long-lat information from settlement cells 39 | setXYs<-xyFromCell(setrast,setcells) 40 | 41 | # read in disaggregated shapefile 42 | (shp<-shapefile(paste0("data/",ctry,"/shapefiles/",ctry,"_noproj_disag_simp"))) 43 | # create empty raster with same resolution based on the extent of the country 44 | (rast1<-raster(x=extent(shp),resolution=c(0.0002777778,0.0002777778),crs='+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0')) 45 | # set datatype 46 | dataType(rast1)<-'INT2S' 47 | # populate with NAs 48 | rast1[]<-NA 49 | # find cells corresponding to settlement coordinates 50 | setCells1<-cellFromXY(rast1,setXYs) 51 | # set these to 1 52 | rast1[setCells1]<-1L 53 | # write new 1 arcsecond raster 54 | writeRaster(rast1,paste0(outdir,ctry,'_sets_1as.tif'),overwrite=TRUE,options=c("COMPRESS=LZW")) 55 | 56 | # create a 15 arcsecond raster in a similar fashion 57 | (rast15<-raster(x=extent(shp),resolution=c(0.0041666667,0.0041666667),crs='+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0')) 58 | dataType(rast15)<-'INT2S' 59 | rast15[]<-NA 60 | setCells15<-cellFromXY(rast15,setXYs) 61 | # get unique cells because multiple 1as coordinates should fit in cell 62 | setCells15<-unique(setCells15) 63 | # set cells to 1 64 | rast15[setCells15]<-1L 65 | # write new 15 arcsecond settlement raster 66 | writeRaster(rast15,paste0(outdir,ctry,'_sets_15as.tif'),overwrite=TRUE,options=c("COMPRESS=LZW")) 67 | -------------------------------------------------------------------------------- /9.1_make_set_lit_rasters_wb_erate.R: -------------------------------------------------------------------------------- 1 | # load the following required packages 2 | sapply(c("data.table","bit64","raster"),require,character.only=T) 3 | # get the following variables from the environment 4 | (ctry<-Sys.getenv("ctry")) # country 5 | (wd<-Sys.getenv("wd")) # working directory 6 | # set working directory 7 | setwd(wd) 8 | 9 | # specify years to work on 10 | YYYYs<-2012:2017 11 | # specify number of cells to split raster by 12 | splitrastby<-500000 13 | 14 | # read in country cell, id, and xy info 15 | (xyDT<-readRDS(paste0("data/",ctry,"/",ctry,"_resamp_country_cell_xy_id.rds"))) 16 | # read in the 15 to 1as cell match data 17 | (setmatchDT<-readRDS(paste0('data/',ctry,'/',ctry,'_orig_v_resamp_set_cell_match.rds'))) 18 | # merge two data.tables together 19 | (matchDT<-merge(xyDT[,list(id,cell_resamp)],setmatchDT,by='cell_resamp')) 20 | # remove unnecessary 21 | rm(xyDT,setmatchDT);gc() 22 | 23 | # read in 1as raster 24 | (rast<-raster(paste0('data/',ctry,'/GeoTIFFs/',ctry,'_sets_1as.tif'),band=1)) 25 | # fill raster with missing 26 | rast[]<-NA;gc() 27 | # for each year 28 | for(YYYY in YYYYs){ 29 | # announce the year being worked on 30 | message(paste("working on year",YYYY)) 31 | # create name for raster to be created 32 | (fin_file<-paste0(wd,'data/',ctry,'/GeoTIFFs/',ctry,'_wbnaterate_sets_lit_',YYYY,'.tif')) 33 | # specify name of World Bank electrification data to read in 34 | (in_file<-paste0('data/',ctry,'/VIIRS/thresh_lit/',ctry,'_set_id_lit_wb_erate_pop_',YYYY,'.rds')) 35 | # read in WB erate data 36 | (r9lmdtr<-readRDS(in_file)) 37 | # merge with cell information 38 | (r9lmdtr<-merge(r9lmdtr,matchDT,by='id')) 39 | # get settlement cells that are not considered lit 40 | cnl<-r9lmdtr[lit==0L][['cell_orig']] 41 | # get settlement cells that are considered lit 42 | cl<-r9lmdtr[lit==1L][['cell_orig']] 43 | rm(r9lmdtr);gc() 44 | # copy empty raster 45 | trast<-copy(rast);gc() 46 | # if not all settlement cells are unlit, 47 | if(length(cnl)!=0L){ 48 | # split the vector of unlit cell values into groups 49 | cnl<-split(cnl,ceiling(seq_along(cnl)/splitrastby)) 50 | # for each group, set those cells to 0 in the raster 51 | for(part in cnl){trast[part]<-0;gc()} 52 | rm(cnl);gc() 53 | } 54 | # if not all settlement cells are lit, 55 | if(length(cl)!=0L){ 56 | # split the vector of lit cell values into groups 57 | cl<-split(cl,ceiling(seq_along(cl)/splitrastby)) 58 | # for each group, set those cells to 1 in the raster 59 | for(part in cl){trast[part]<-1;gc()} 60 | rm(cl);gc() 61 | } 62 | # save the raster as a GeoTIFF 63 | writeRaster(trast,fin_file,format="GTiff",overwrite=T,options=c("COMPRESS=LZW"),datatype='INT2S') 64 | # remove unnecessary 65 | rm(trast,fin_file,file_5as,call1);gc() 66 | # announce finish 67 | message(paste("finished",YYYY)) 68 | } 69 | rm(rast);gc() 70 | -------------------------------------------------------------------------------- /7_gen_r9_musd.R: -------------------------------------------------------------------------------- 1 | # load following required packages 2 | sapply(c("data.table","bit64"),require,character.only=T) 3 | 4 | # get following variables from environment 5 | (YYYY<-as.integer(Sys.getenv('YYYY'))) # year 6 | (ctry<-Sys.getenv('ctry')) # country 7 | (wd<-Sys.getenv("wd")) # working directory 8 | # set working directory 9 | setwd(wd) 10 | 11 | # name of final file to create 12 | (fin_file<-paste0("data/",ctry,"/VIIRS/daily/",ctry,"_good_r9_musd_",YYYY,".rds")) 13 | # get id files corresponding to that year 14 | (rdsfs<-list.files(paste0("data/",ctry,"/VIIRS/daily/CSVs/"),paste0(ctry,"_daily_VIIRS_values_good_resamp_",YYYY,"[0-1][0-9]_id\\.rds"),full.names=F)) 15 | # get months of data 16 | (tifmonths<-sub("(.*?)(201[0-9][0-1][0-9]?)_id\\.rds$","\\2",rdsfs)) 17 | # say the months 18 | message(paste('TIFF months are:',paste(tifmonths,collapse=", "))) 19 | # if the year is 2012 20 | if(YYYY==2012L){ 21 | # stop if not 9 files 22 | stopifnot(length(tifmonths)==9L) 23 | } else { 24 | # stop if not 12 files 25 | stopifnot(length(tifmonths)==12L) 26 | } 27 | # number of tiffs 28 | ntms<-length(tifmonths) 29 | # empty list for results 30 | fulldt<-vector('list',ntms) 31 | # for each index of tiffs 32 | for(i in 1:ntms){ 33 | # get the month 34 | m<-tifmonths[i] 35 | # read in id as column in data.table 36 | dt<-data.table(id=readRDS(paste0("data/",ctry,"/VIIRS/daily/CSVs/",ctry,"_daily_VIIRS_values_good_resamp_",m,"_id.rds"))) 37 | # read in rade9 as column 38 | set(dt,NULL,'r9',readRDS(paste0("data/",ctry,"/VIIRS/daily/CSVs/",ctry,"_daily_VIIRS_values_good_resamp_",m,"_rade9.rds"))) 39 | # read in li as column 40 | set(dt,NULL,'li',readRDS(paste0("data/",ctry,"/VIIRS/daily/CSVs/",ctry,"_daily_VIIRS_values_good_resamp_",m,"_li.rds"))) 41 | # read in index of values to drop based on time 42 | timebad<-readRDS(paste0("data/",ctry,"/VIIRS/daily/CSVs/",ctry,"_daily_VIIRS_values_good_resamp_",m,"_timebadset.rds")) 43 | timebadnset<-readRDS(paste0("data/",ctry,"/VIIRS/daily/CSVs/",ctry,"_daily_VIIRS_values_good_resamp_",m,"_timebadnset.rds")) 44 | # combine the settlement and nonsettlement indices 45 | timebad<-c(timebad,timebadnset) 46 | # if there are any values that should be dropped 47 | if(length(timebad)!=0L){ 48 | # drop them 49 | dt<-dt[-timebad] 50 | } 51 | # remove unnecessary 52 | rm(timebadnset,timebad);gc() 53 | # keep data if li is less than .0005 54 | (dt<-dt[li<.0005,!'li',with=F]) 55 | # assign data to list slot 56 | fulldt[[i]]<-dt 57 | # remove 58 | rm(dt,m);gc() 59 | } 60 | # bind results together in data.table 61 | (fulldt<-rbindlist(fulldt,fill=T)) 62 | rm(rdsfs,tifmonths);gc() 63 | # create logged version of rade9 by adding 2.5 and logging 64 | set(fulldt,NULL,'r9l',log(fulldt[['r9']]+2.5)) 65 | # this is to ensure no failure in the following 66 | options(datatable.optimize=1) 67 | # create new data.table that contains mean and standard deviation of rade9 and logged rade9 by cell 68 | (fulldt<-fulldt[,list(r9m=mean(r9),r9s=sd(r9),r9lm=mean(r9l),r9ls=sd(r9l)),by='id']) 69 | # save result 70 | saveRDS(fulldt,fin_file) -------------------------------------------------------------------------------- /10.6_comp_reg_w_wb_erate.R: -------------------------------------------------------------------------------- 1 | # rsync -rltgoDuvhh --progress /victor/Work/Brian/current/R_code/FBHRSL/ zokeeffe@flux-xfer.arc-ts.umich.edu:/nfs/brianmin/work/zokeeffe/current/R_code/FBHRSL/ 2 | 3 | # export wd=/nfs/brianmin/work/zokeeffe/current/; export ctry=Cambodia; export ncores=6; R 4 | 5 | sapply(c("data.table","bit64","parallel","foreach","doMC"),require,character.only=T) 6 | 7 | (ncores<-as.integer(Sys.getenv("ncores"))) 8 | (ctry<-Sys.getenv("ctry")) 9 | (wd<-Sys.getenv("wd")) 10 | # ncores<-12L 11 | # ctry<-'Nepal' 12 | # country<-'Nepal' 13 | # wd<-'/victor/Work/Brian/current/' 14 | setwd(wd) 15 | registerDoMC(ncores) 16 | 17 | YYYYs<-2012:2017 18 | 19 | # WB electrification data 20 | #eratedat<-fread('data/WorldBank/pct_pop_w_electricity_1990-2016.csv',header=T) 21 | #(FBctries<-fread('data/FB/FB_countries.csv')) 22 | #(country<-FBctries[country_short==ctry][['country_long']]) 23 | eratedat<-fread('data/WorldBank/pct_pop_w_electricity_1990-2017.csv',header=T) 24 | (eratedat<-eratedat[ctry_short==ctry]) 25 | stopifnot(nrow(eratedat)==1) 26 | 27 | conflevs<-c(.85,.9,.95) 28 | (confthreshes<-qnorm(conflevs,mean=0,sd=1)) 29 | (plclnames<-paste0('prplit_conf',conflevs*100)) 30 | prplitthreshes<-c(.25,.5,.75) 31 | (plclgpnames<-unlist(lapply(plclnames,function(x) paste0(x,'_gr',prplitthreshes*100,'pct')))) 32 | (lsnames<-paste0('zscore_conf',conflevs*100)) 33 | 34 | xyDT<-readRDS(paste0('data/',ctry,'/',ctry,'_resamp_country_cell_xy_id.rds')) 35 | xyDT<-xyDT[grep('s',id),list(id,cell_resamp)] 36 | (setmatchDT<-readRDS(paste0('data/',ctry,'/',ctry,'_orig_v_resamp_set_cell_match.rds'))) 37 | (popDT<-readRDS(paste0('data/',ctry,'/',ctry,'_cell_orig_pop.rds'))) 38 | # (popDT2<-merge(setmatchDT,popDT,by='cell_orig')) 39 | # could actually just cbind because they are already sorted but w/e, for robustness: 40 | setkey(setmatchDT,cell_orig) 41 | setkey(popDT,cell_orig) 42 | (popDT2<-popDT[setmatchDT]) 43 | (popDT2<-popDT2[,list(totpop=ifelse(all(is.na(pop)),NA_real_,sum(pop,na.rm=T))),by='cell_resamp']) 44 | (matchDT<-merge(xyDT,na.omit(popDT2),by='cell_resamp')) 45 | rm(xyDT,popDT2,popDT);gc() 46 | 47 | (erate_comp<-rbindlist(foreach(YYYY=YYYYs,.inorder=T)%dopar%{ 48 | # YYYY<-2012L 49 | #if(YYYY==2017L){(natl_elec_rate<-eratedat[`Country Name`==country][['2016']])} else {(natl_elec_rate<-eratedat[`Country Name`==country][[as.character(YYYY)]])} 50 | (natl_elec_rate<-eratedat[[as.character(YYYY)]]) 51 | (setrmuDT<-readRDS(paste0("data/",ctry,"/VIIRS/daily/",ctry,"_RE_reg_my_sets_predvals_",YYYY,".rds"))) 52 | (setrmuDT<-merge(setrmuDT,matchDT,by='id')) 53 | ttotpop<-sum(setrmuDT[['totpop']]) 54 | (lsvals<-round(100*foreach(a=confthreshes,.inorder=T,.combine=c)%do%{sum(setrmuDT[zscore>a][['totpop']])/ttotpop},2)) 55 | (plclvals<-foreach(var=plclnames,.inorder=T,.combine=c)%do%{ 56 | round(100*foreach(a=prplitthreshes,.inorder=T,.combine=c)%do%{sum(setrmuDT[get(var)>a][['totpop']])/ttotpop},2) 57 | }) 58 | (dt<-data.table(country=ctry,year=YYYY,erate_wb=round(natl_elec_rate,2))) 59 | for(x in 1:length(lsvals)){set(dt,NULL,lsnames[x],lsvals[x])} 60 | for(x in 1:length(plclvals)){set(dt,NULL,plclgpnames[x],plclvals[x])} 61 | dt 62 | },fill=T)) 63 | 64 | fwrite(erate_comp,paste0("data/",ctry,"/",ctry,"_RE_reg_my_res_v_WB_erate.csv")) 65 | -------------------------------------------------------------------------------- /10.0_find_nearby_set_cells.R: -------------------------------------------------------------------------------- 1 | # rsync -rltgoDuvhh --progress /victor/Work/Brian/current/R_code/FBHRSL/ zokeeffe@flux-xfer.arc-ts.umich.edu:/nfs/brianmin/work/zokeeffe/current/R_code/FBHRSL/ 2 | 3 | sapply(c("data.table","bit64","raster"),require,character.only=T) 4 | 5 | (ctry<-Sys.getenv("ctry")) 6 | (wd<-Sys.getenv("wd")) 7 | # ctry<-'Nepal' 8 | # wd<-'/victor/Work/Brian/current/' 9 | setwd(wd) 10 | 11 | (rast<-raster(paste0('data/',ctry,'/GeoTIFFs/',ctry,'_sets_15as.tif'),band=1)) 12 | 13 | (xyDT<-readRDS(paste0('data/',ctry,'/',ctry,'_resamp_country_cell_xy_id.rds'))) 14 | 15 | ## settlement stuff to match on later 16 | (setmatchDT<-readRDS(paste0('data/',ctry,'/',ctry,'_orig_v_resamp_set_cell_match.rds'))) 17 | (popDT<-readRDS(paste0('data/',ctry,'/',ctry,'_cell_orig_pop.rds'))) 18 | (prpset<-readRDS(paste0('data/',ctry,'/',ctry,'_resamp_country_cell_id_prp_sets.rds'))) 19 | (prpset<-prpset[,list(set_id_match=id,N_1as_sets=as.integer(round(prp_sets*225)))]) 20 | (finsetdatmatch<-merge(xyDT[grepl('s',id),list(set_id_match=id,cell_resamp)],setmatchDT,by='cell_resamp')) 21 | (finsetdatmatch<-merge(finsetdatmatch,popDT,by='cell_orig',all.x=T)) 22 | (finsetdatmatch<-merge(finsetdatmatch,prpset,by='set_id_match',all.x=T)) 23 | set(finsetdatmatch,NULL,'cell_orig',NULL) 24 | set(finsetdatmatch,NULL,'cell_resamp',NULL) 25 | (finsetdatmatch<-finsetdatmatch[,list(pop=sum(pop,na.rm=T),N_1as_sets=N_1as_sets[1L]),by='set_id_match']) 26 | rm(setmatchDT,popDT,prpset);gc() 27 | ## 28 | 29 | cellDTf<-xyDT[,list(id,cell_resamp)] 30 | set(cellDTf,NULL,'row',rowFromCell(rast,cellDTf[['cell_resamp']])) 31 | set(cellDTf,NULL,'col',colFromCell(rast,cellDTf[['cell_resamp']])) 32 | (rastNrow<-nrow(rast)) 33 | (rastNcol<-ncol(rast)) 34 | (rastNcell<-ncell(rast)) 35 | rm(rast,xyDT);gc() 36 | set(cellDTf,NULL,'set',as.integer(grepl('s',cellDTf[['id']]))) 37 | 38 | set(cellDTf,NULL,'edge',FALSE) 39 | set(cellDTf,cellDTf[,.I[row==1L|row==rastNrow|col==1L|col==rastNcol]],'edge',TRUE) 40 | cellDTf 41 | 42 | (cellmatchfix<-cellDTf[,list(id,edge)]) 43 | 44 | (cellDT1<-cellDTf[set==1L,list(set_id_match=id,col=col-1,row=row-1)]) 45 | (cellDT2<-cellDTf[set==1L,list(set_id_match=id,col=col,row=row-1)]) 46 | (cellDT3<-cellDTf[set==1L,list(set_id_match=id,col=col+1,row=row-1)]) 47 | (cellDT4<-cellDTf[set==1L,list(set_id_match=id,col=col-1,row=row)]) 48 | (cellDT5<-cellDTf[set==1L,list(set_id_match=id,col=col+1,row=row)]) 49 | (cellDT6<-cellDTf[set==1L,list(set_id_match=id,col=col-1,row=row+1)]) 50 | (cellDT7<-cellDTf[set==1L,list(set_id_match=id,col=col,row=row+1)]) 51 | (cellDT8<-cellDTf[set==1L,list(set_id_match=id,col=col+1,row=row+1)]) 52 | (cellDTMatch<-rbindlist(list(cellDT1,cellDT2,cellDT3,cellDT4,cellDT5,cellDT6,cellDT7,cellDT8),fill=T)) 53 | rm(cellDT1,cellDT2,cellDT3,cellDT4,cellDT5,cellDT6,cellDT7,cellDT8);gc() 54 | 55 | (fincellmatch<-merge(cellDTf[,list(id,row,col)],cellDTMatch,by=c('row','col'))) 56 | set(fincellmatch,NULL,'row',NULL) 57 | set(fincellmatch,NULL,'col',NULL) 58 | rm(cellDTf);gc() 59 | 60 | (finDT<-merge(fincellmatch[,list(id,set_id_match)],finsetdatmatch,by='set_id_match')) 61 | (finDT<-finDT[,list(N_15as_set_dist1=.N,N_1as_set_dist1=sum(N_1as_sets),pop_dist1=sum(pop,na.rm=T)),by='id']) 62 | (finDT<-merge(cellmatchfix,finDT,by='id',all.x=T)) 63 | set(finDT,finDT[,.I[is.na(N_15as_set_dist1)]],'N_15as_set_dist1',0L) 64 | set(finDT,finDT[,.I[is.na(N_1as_set_dist1)]],'N_1as_set_dist1',0L) 65 | set(finDT,finDT[,.I[is.na(pop_dist1)]],'pop_dist1',0L) 66 | finDT 67 | 68 | saveRDS(finDT,paste0('data/',ctry,'/',ctry,'_id_set_stats_15as_cell_dist1.rds')) 69 | 70 | rm(finDT,fincellmatch,finsetdatmatch,cellmatchfix);gc() 71 | -------------------------------------------------------------------------------- /6_find_good_times.R: -------------------------------------------------------------------------------- 1 | # load the following required packages 2 | sapply(c("data.table","bit64"),require,character.only=T) 3 | 4 | # get the following variables from the environment 5 | (YYYY<-as.integer(Sys.getenv('YYYY'))) # year 6 | (ctry<-Sys.getenv("ctry")) # country 7 | (wd<-Sys.getenv("wd")) # working directory 8 | # set working directory 9 | setwd(wd) 10 | 11 | # open file with VIIRS TIFF time matches 12 | tgzdt<-readRDS('data/VIIRS/VIIRS_tiff_time_lookup.rds') 13 | # keep the month, day, start time, and mean time values 14 | (tgzdt<-tgzdt[,list(YYYYMM,day,stime,meantime)]) 15 | 16 | # specify stem where extracted data are stored 17 | csvDir<-"/VIIRS/daily/CSVs/" 18 | # specify stem of extracted data 19 | fstem<-"_daily_VIIRS_values_good_resamp_" 20 | 21 | # read the country's id-second offset file 22 | (offsetDT<-readRDS(paste0("data/",ctry,"/",ctry,"_id_secOffset.rds"))) 23 | # if year was specified 24 | if(!is.na(YYYY)){ 25 | # list the id files corresponding to that year 26 | rdsfs<-list.files(paste0("data/",ctry,"/VIIRS/daily/CSVs/"),paste0(ctry,"_daily_VIIRS_values_good_resamp_",YYYY,"[0-1][0-9]_id\\.rds"),full.names=F) 27 | # otherwise 28 | } else { 29 | # list the id files corresponding to all years 30 | rdsfs<-list.files(paste0("data/",ctry,"/VIIRS/daily/CSVs/"),paste0(ctry,"_daily_VIIRS_values_good_resamp_201[0-9][0-1][0-1][0-9]_id\\.rds"),full.names=F) 31 | } 32 | # get list of months from id files 33 | (YYYYMMs<-sub("(.*?)(201[0-9][0-1][0-9]?)_id\\.rds$","\\2",rdsfs)) 34 | # display months 35 | message(paste('YYYYMMs are:',paste(YYYYMMs,collapse=", "))) 36 | # for each month 37 | for(tYYYYMM in YYYYMMs){ 38 | # message which month we're working on 39 | message(paste("working on ",tYYYYMM)) 40 | # store the id data from that month as a column in a data.table 41 | dt<-data.table(id=readRDS(paste0("data/",ctry,csvDir,ctry,fstem,tYYYYMM,"_id.rds"))) 42 | # get number of rows of data 43 | totN<-nrow(dt) 44 | # create index vector 45 | set(dt,NULL,'index',1:totN) 46 | # assign month as column 47 | set(dt,NULL,'YYYYMM',tYYYYMM) 48 | # read day in as column 49 | set(dt,NULL,'day',readRDS(paste0("data/",ctry,csvDir,ctry,fstem,tYYYYMM,"_day.rds"))) 50 | # read start time in as column 51 | set(dt,NULL,'stime',readRDS(paste0("data/",ctry,csvDir,ctry,fstem,tYYYYMM,"_stime.rds"))) 52 | # merge the data.table with the data containing other time information 53 | dt<-merge(dt,tgzdt,by=c('YYYYMM','day','stime')) 54 | # merge data.table with second offset information 55 | dt<-merge(dt,offsetDT,by='id') 56 | # stop if any rows were dropped 57 | stopifnot(nrow(dt)==totN) 58 | # create the mean time in "local" time by adding the second offset to the value 59 | set(dt,NULL,'mtimeloc',dt[['meantime']]+dt[['secOffset']]) 60 | # reorder by original index 61 | setorder(dt,index) 62 | # save the result 63 | saveRDS(dt[['mtimeloc']],paste0("data/",ctry,csvDir,ctry,fstem,tYYYYMM,"_mtimeloc.rds")) 64 | # get the date of the "local" time 65 | set(dt,NULL,'mdateloc',as.Date(dt[['mtimeloc']])) 66 | # convert time to datetime 67 | set(dt,NULL,'mtimelocconst',strftime(dt[['mtimeloc']],format="%H:%M:%S",tz='UTC',usetz=F)) 68 | # reorder by id, then local date, then local time 69 | setorder(dt,id,mdateloc,mtimelocconst) 70 | # assign index by each date 71 | dt[,mdatelocpos:=1:.N,by=c('id','mdateloc')] 72 | # keep settlement data indices corresponding to the earliest observation that day 73 | timebadset<-dt[grepl('s',id)&mdatelocpos!=1L][['index']] 74 | # save result 75 | saveRDS(timebadset,paste0("data/",ctry,csvDir,ctry,fstem,tYYYYMM,"_timebadset.rds")) 76 | # do the same but for non-settlement cells 77 | timebadnset<-dt[grepl('n',id)&mdatelocpos!=1L][['index']] 78 | saveRDS(timebadnset,paste0("data/",ctry,csvDir,ctry,fstem,tYYYYMM,"_timebadnset.rds")) 79 | # remove unnecessary and announce we are done for the month 80 | rm(dt,timebadset,timebadnset,totN);gc() 81 | message(paste("finished",tYYYYMM)) 82 | } 83 | -------------------------------------------------------------------------------- /9.0_gen_set_elec_based_on_year_wb_val.R: -------------------------------------------------------------------------------- 1 | # load the following required packages 2 | sapply(c("data.table","bit64"),require,character.only=T) 3 | 4 | # get the following variables from environment 5 | (ctry<-Sys.getenv("ctry")) # country 6 | (wd<-Sys.getenv("wd")) # working directory 7 | # set working directory 8 | setwd(wd) 9 | 10 | # read in electrification rate data from the World Bank 11 | (eratedat<-fread('data/WorldBank/pct_pop_w_electricity_1990-2017.csv',header=T)) 12 | # read in list of countries 13 | (FBctries<-fread('data/FB/FB_countries.csv')) 14 | # get "longform" name of country 15 | (country<-FBctries[country_short==ctry][['country_long']]) 16 | 17 | # read in 15as cell information 18 | (xyDT<-readRDS(paste0("data/",ctry,"/",ctry,"_resamp_country_cell_xy_id.rds"))) 19 | # create data.table of settlement value ids and cell positions 20 | (matchdt<-xyDT[grep('s',id),list(id,cell_resamp)]) 21 | 22 | # read in 15 to 1as cell match data.table 23 | (setmatchDT<-readRDS(paste0('data/',ctry,'/',ctry,'_orig_v_resamp_set_cell_match.rds'))) 24 | # read in 1as population data 25 | (popDT<-readRDS(paste0('data/',ctry,'/',ctry,'_cell_orig_pop.rds'))) 26 | # merge the population data with the 15as cells 27 | (popDT2<-merge(setmatchDT,popDT,by='cell_orig')) 28 | # for each 15as cell, sum the population; if none, return missing 29 | (popDT2<-popDT2[,list(pop=ifelse(all(is.na(pop)),NA_real_,sum(pop,na.rm=T))),by='cell_resamp']) 30 | # merge population data with ID information 31 | (matchdt<-merge(matchdt,popDT2,all.x=T,by='cell_resamp')) 32 | 33 | # remove unnecessary 34 | rm(xyDT,setmatchDT,popDT,popDT2);gc() 35 | 36 | # specify directory for saving data 37 | (outdir<-paste0('data/',ctry,'/VIIRS/thresh_lit/')) 38 | # create directory 39 | dir.create(outdir,showWarnings=F,recursive=T) 40 | 41 | # for each year 42 | for(YYYY in 2012:2017){ 43 | # state which year we're working on 44 | message(paste("working on",YYYY)) 45 | # get the national electrification rate for the country from the World Bank data 46 | (natl_elec_rate<-eratedat[`Country Name`==country][[as.character(YYYY)]]/100) 47 | # read in the mean rade9 data 48 | (r9lmdtrf<-readRDS(paste0("data/",ctry,"/VIIRS/daily/",ctry,"_good_r9_musd_",YYYY,".rds"))) 49 | # keep only settlement data and the means 50 | (r9lmdtrf<-r9lmdtrf[grep('s',id),list(id,r9m,r9lm)]) 51 | # if the national electrification rate is less than 1 52 | if(natl_elec_rate<1){ 53 | # merge population data with rade9 mean data 54 | (r9lmdtr<-merge(na.omit(matchdt[,list(id,pop)]),r9lmdtrf,by='id'));gc() 55 | # order by mean logged rade9 56 | setorder(r9lmdtr,r9lm) 57 | # generate percentage of population for each cell 58 | set(r9lmdtr,NULL,'prppop',r9lmdtr[['pop']]/sum(r9lmdtr[['pop']])) 59 | # create a running sum vector 60 | set(r9lmdtr,NULL,'prppopcs',cumsum(r9lmdtr[['prppop']])) 61 | # create index vector of whether the percentage of the population lit exceeds 1 - the national electrification rate 62 | set(r9lmdtr,NULL,'prppopcsgrerate',r9lmdtr[['prppopcs']]>(1-natl_elec_rate)) 63 | # create a cumulative sum of this vector 64 | set(r9lmdtr,NULL,'prppopcsgreratecs',cumsum(r9lmdtr[['prppopcsgrerate']])) 65 | # get the minimum mean logged rade9 that corresponds to being electrified 66 | (lit_thresh_r9lm<-r9lmdtr[prppopcsgreratecs==1L][['r9lm']]) 67 | # create "lit" vector of 0s 68 | set(r9lmdtrf,NULL,'lit',0L) 69 | # set "lit" to 1 if it exceeds the minimum mean loged rade9 to be considered lit 70 | set(r9lmdtrf,r9lmdtrf[,.I[r9lm>lit_thresh_r9lm]],which(names(r9lmdtrf)=='lit'),1L) 71 | # save data of id and lit status 72 | saveRDS(r9lmdtrf[,list(id,lit)],paste0(outdir,ctry,'_set_id_lit_wb_erate_pop_',YYYY,'.rds')) 73 | # create a data.table of various statistics 74 | (erdt_natl<-data.table(natl_e_rate=natl_elec_rate,prp_15as_set_lit=sum(r9lmdtrf[['lit']])/nrow(r9lmdtrf),r9m_lit_thresh=r9lmdtr[prppopcsgreratecs==1L][['r9m']],r9lm_lit_thresh=lit_thresh_r9lm)) 75 | rm(r9lmdtr) 76 | # otherwise 77 | } else { 78 | # define all settlement cells as lit 79 | set(r9lmdtrf,NULL,'lit',1L) 80 | # save this information 81 | saveRDS(r9lmdtrf[,list(id,lit)],paste0(outdir,ctry,'_set_id_lit_wb_erate_pop_',YYYY,'.rds')) 82 | # create the statistics data.table 83 | (erdt_natl<-data.table(natl_e_rate=1,prp_15as_set_lit=1,r9m_lit_thresh=min(r9lmdtrf[['r9m']]),r9lm_lit_thresh=min(r9lmdtrf[['r9lm']]))) 84 | } 85 | # save the statistics data.table 86 | saveRDS(erdt_natl,paste0(outdir,ctry,'_natl_e_rate_',YYYY,'.rds')) 87 | # remove unnecessary and announce that year is done 88 | rm(erdt_natl,r9lmdtrf);gc() 89 | message(paste('finished',YYYY)) 90 | } 91 | -------------------------------------------------------------------------------- /3_find_overlapping_VIIRS_TIFFs.R: -------------------------------------------------------------------------------- 1 | # load the following required packages 2 | sapply(c("data.table","bit64","parallel","foreach","doMC","raster","rgeos"),require,character.only=T) 3 | # set working directory 4 | setwd("/nfs/brianmin/work/zokeeffe/current/") 5 | # specify path where VIIRS GeoTIFF metadata are stored 6 | viirs_dat_path<-"/nfs/brianmin/VIIRS/" 7 | # get number of cores for parallel operations from environment 8 | (ncores<-as.integer(Sys.getenv("ncores"))) 9 | # get country from environment 10 | (ctry<-Sys.getenv("ctry")) 11 | # specify a single thread for DT operations, then set cores for parallel operations 12 | setDTthreads(1L);registerDoMC(ncores) 13 | 14 | # specify directory with GDAL information extracted from vflag files 15 | (gdaldir<-paste0(viirs_dat_path,"gdalinfo/vflag/")) 16 | # get list of directories with metadata 17 | (YYYYMMs<-list.dirs(gdaldir,recursive=F,full.names=F)) 18 | 19 | # specify six monthly file tiles 20 | MVTypes<-c("00N060E","75N060E","00N060W","75N060W","00N180W","75N180W") 21 | MVexl<-list() 22 | # for each type, open an example TIFF and get the extent 23 | for(mvt in MVTypes){ 24 | tmp<-raster(paste0(viirs_dat_path,'monthly/201204/vcmcfg/SVDNB_npp_20120401-20120430_',mvt,'_vcmcfg_v10_c201605121456.cf_cvg.tif')) 25 | MVexl[[mvt]]<-extent(tmp) 26 | rm(tmp);gc() 27 | } 28 | 29 | # specify country shapefile path 30 | (country_shp<-paste0("data/",ctry,"/shapefiles/",ctry,"_noproj_disag_simp.shp")) 31 | # open the shapefile 32 | (CountryShp<-shapefile(country_shp)) 33 | # get the extent 34 | (CountryExtent<-extent(CountryShp)) 35 | # specify new directory for where to store data 36 | (tiff_list_outdir<-paste0("data/",ctry,"/VIIRS/daily/")) 37 | # create directory 38 | dir.create(tiff_list_outdir,recursive=T,showWarnings=F) 39 | 40 | # create an empty list the length of the number of months of data 41 | FinDT<-vector('list',length(YYYYMMs)) 42 | # for each month of data, 43 | for(i in 1:length(YYYYMMs)){ 44 | YYYYMM<-YYYYMMs[i] 45 | message(paste('working on',YYYYMM)) 46 | # get the files of metadata 47 | FileList<-list.files(path=paste0(gdaldir,YYYYMM),pattern="*.info$") 48 | # try to read textfiles that list rasters with "bad data" so these can be ignored; if they don't exist, ignore 49 | try({ 50 | (BadFileList<-fread(paste0(viirs_dat_path,"bad_rasters/",YYYYMM,"_bad_rasters.txt"),sep="/",header=F)) 51 | if(ncol(BadFileList)==3L){ 52 | badfiles<-paste0(BadFileList[[2L]],".info") 53 | FileList<-setdiff(FileList,badfiles) 54 | } 55 | }) 56 | # start the overlap checking process for each file 57 | OverlapCheck<-foreach(f=FileList,.inorder=T,.options.multicore=list(preschedule=F))%dopar%{ 58 | tiff_name<-sub(".info","",f,fixed=T) 59 | # read in metadata from file 60 | info<-readLines(paste0(viirs_dat_path,"gdalinfo/vflag/",YYYYMM,"/",f)) 61 | # get upper left coordinates and remove surrounding text 62 | (ULCoords<-grep("^Upper Left \\(",info,value=T)) 63 | (ULCoords<-sub("(^Upper Left )(\\(.*?\\))(.*)$","\\2",ULCoords)) 64 | (ULCoords<-trimws(gsub("[[:space:]]+","",gsub("\\(|\\)","",ULCoords)))) 65 | # convert to numeric 66 | (ULXY<-as.numeric(unlist(strsplit(ULCoords,",")))) 67 | # get lower right coordinates and remove surrounding text 68 | (LRCoords<-grep("^Lower Right \\(",info,value=T)) 69 | (LRCoords<-sub("(^Lower Right )(\\(.*?\\))(.*)$","\\2",LRCoords)) 70 | (LRCoords<-trimws(gsub("[[:space:]]+","",gsub("\\(|\\)","",LRCoords)))) 71 | # convert to numeric 72 | (LRXY<-as.numeric(unlist(strsplit(LRCoords,",")))) 73 | # create an extent using the extracted coordinates 74 | tiff_extent<-extent(ULXY[1L],LRXY[1L],LRXY[2L],ULXY[2L]) 75 | # determine whether TIFF overlaps with country extent 76 | ext_overlap<-as.integer(!is.null(intersect(CountryExtent,tiff_extent))) 77 | # create data.table that states whether the TIFF in question overlaps with the country extent 78 | data.table(ext_overlap=ext_overlap,tiff_name=tiff_name) 79 | } 80 | # row bind all the results together 81 | OverlapCheck<-rbindlist(OverlapCheck,fill=T) 82 | # only keep overlapping TIFFs 83 | (OverlapCheck<-OverlapCheck[ext_overlap==1L,list(tiff_name)]) 84 | set(OverlapCheck,NULL,1L,paste(YYYYMM,OverlapCheck[[1L]],sep="/")) 85 | # assign results to empty list slot 86 | FinDT[[i]]<-OverlapCheck 87 | } 88 | # row bind the results 89 | (FinDT<-rbindlist(FinDT,fill=T)) 90 | 91 | # specify the file name to save under 92 | fname<-paste0(tiff_list_outdir,ctry,"_daily_VIIRS_overlapping_imgs.txt") 93 | # write the file out 94 | fwrite(FinDT,fname,col.names=F,quote=F,sep=",",na="",nThread=ncores) 95 | rm(FinDT);gc() 96 | 97 | # now see which of the six month tiles overlaps with the country extent 98 | MVOv<-rbindlist(foreach(mvt=MVTypes,.inorder=T)%dopar%{ 99 | ext_overlap<-as.integer(!is.null(intersect(CountryExtent,MVexl[[mvt]]))) 100 | data.table(ext_overlap=ext_overlap,mvt=mvt) 101 | },fill=T) 102 | (MVOv<-MVOv[ext_overlap==1L][["mvt"]]) 103 | # save the results as a text file 104 | writeLines(MVOv,paste0("data/",ctry,"/VIIRS/",ctry,"_monthly_VIIRS_overlapping_tiles.txt"),sep="\n") -------------------------------------------------------------------------------- /2.0_gen_country_VIIRS_XYs.R: -------------------------------------------------------------------------------- 1 | # load the following required packages 2 | sapply(c("data.table","bit64","parallel","foreach","doMC","raster",'sf',"rgdal","rgeos"),require,character.only=T) 3 | # get the following variables from the environment 4 | (ncores<-as.integer(Sys.getenv("ncores"))) # number of cores for parallel operations 5 | (ctry<-Sys.getenv("ctry")) # country 6 | (wd<-Sys.getenv("wd")) # working directory 7 | # set working directory 8 | setwd(wd) 9 | 10 | # register cores for parallel operations 11 | registerDoMC(ncores) 12 | 13 | # path to edited country shapefile 14 | (country_shp<-paste0("data/",ctry,"/shapefiles/",ctry,"_noproj_disag_simp.shp")) 15 | # path to 1 arc second settlement raster 16 | (set_path_1as<-paste0('data/',ctry,'/GeoTIFFs/',ctry,'_sets_1as.tif')) 17 | # path to 15 arc second settlement raster 18 | (set_path_15as<-paste0('data/',ctry,'/GeoTIFFs/',ctry,'_sets_15as.tif')) 19 | 20 | # load country shapefile 21 | (countryShp<-st_read(country_shp)) 22 | # load resampled settlement raster 23 | (setrast_15as<-raster(set_path_15as,band=1)) 24 | # load original settlement raster 25 | (setrast_1as<-raster(set_path_1as,band=1)) 26 | 27 | # get number of rows from each raster 28 | (nrowSRr<-nrow(setrast_15as)) 29 | (nrowSRo<-nrow(setrast_1as)) 30 | # get number of columns from each raster 31 | (ncolSRr<-ncol(setrast_15as)) 32 | (ncolSRo<-ncol(setrast_1as)) 33 | 34 | # vector from one to number of rows of 1as raster 35 | rowso<-1:nrowSRo 36 | # specify number of rows per group 37 | rowspergrp<-400L 38 | # split rows into groups 39 | rowso_split<-split(rowso,ceiling(seq_along(rowso)/rowspergrp)) 40 | 41 | # create data.table of cells matching 1as to 15as raster in parallel, working on each row group 42 | cell_lookup_dt<-rbindlist(foreach(rg=1:length(rowso_split),.inorder=T)%dopar%{ 43 | # get temporary rows to work with 44 | trows<-rowso_split[[rg]] 45 | # get cells from 1as raster 46 | cells_o<-cellFromRow(setrast_1as,trows) 47 | # get values corresponding to those cells 48 | values_o<-values(setrast_1as,row=trows[1L],nrows=length(trows)) 49 | # remove missing 50 | valuesNoNA_o<-which(!is.na(values_o)) 51 | # if not all missing, 52 | if(length(valuesNoNA_o)!=0L){ 53 | # get cells corresponding to nonmissing values (settlement cells) 54 | cells_o2<-cells_o[valuesNoNA_o] 55 | # get xy values from cells 56 | xys_o<-xyFromCell(setrast_1as,cells_o2) 57 | # get cell from 15as raster corresponding to those xy values 58 | cells_r<-cellFromXY(setrast_15as,xys_o) 59 | # return data.table of cells from 1as (cell_orig) and 15as (cell_resamp) 60 | return(data.table(cell_orig=cells_o2,cell_resamp=cells_r)) 61 | } else { 62 | return(NULL) 63 | } 64 | },fill=T) 65 | # remove missing 66 | (cell_lookup_dt<-na.omit(cell_lookup_dt)) 67 | # save data.table as RDS file 68 | saveRDS(cell_lookup_dt,paste0('data/',ctry,'/',ctry,'_orig_v_resamp_set_cell_match.rds')) 69 | 70 | # get cells in 15as raster that have settlements 71 | agg_set_cells<-sort(unique(cell_lookup_dt[['cell_resamp']])) 72 | # get xy values from those cells 73 | agg_set_xys<-xyFromCell(setrast_15as,agg_set_cells) 74 | # create data.table of cells and xy values 75 | (agg_set_cxy<-data.table(cell_resamp=agg_set_cells,agg_set_xys)) 76 | # save as RDS file 77 | saveRDS(agg_set_cxy,paste0('data/',ctry,'/',ctry,'_resamp_set_cell_xy.rds')) 78 | 79 | # create vector from one to number of rows in 15as raster 80 | rowsr<-1:nrowSRr 81 | 82 | # set rows per group 83 | rowspergrp<-50L 84 | # split vector into groups of rows 85 | rowsr_split<-split(rowsr,ceiling(seq_along(rowsr)/rowspergrp)) 86 | 87 | # get cells in 15as raster intersecting with country boundary that do not contain settlements by working on row groups in parallel 88 | nonset_cells<-rbindlist(foreach(rg=1:length(rowsr_split),.inorder=T)%dopar%{ 89 | # get temporary row set 90 | trows<-rowsr_split[[rg]] 91 | # get cells in those rows 92 | cells_o<-cellFromRow(setrast_15as,trows) 93 | # return cells that do not have settlements 94 | cells_o<-setdiff(cells_o,agg_set_cells) 95 | # if not empty, 96 | if(length(cells_o)!=0L){ 97 | # get xy values from those cells 98 | xys_o<-xyFromCell(setrast_15as,cells_o) 99 | # make data.table of cell and xy values 100 | (cxy_dt<-data.table(cell_resamp=cells_o,xys_o)) 101 | # convert data.table to spatial object 102 | points<-st_as_sf(cxy_dt,coords=c('x','y'),crs=st_crs(countryShp)) 103 | # intersect with county shapefile 104 | point_ctry_int<-st_intersects(points,countryShp) 105 | # determine which points lie within the country boundary 106 | nonempty<-unlist(lapply(point_ctry_int,function(xx) length(xx)!=0)) 107 | nonempty_cells<-which(nonempty) 108 | # return the non-settlement cells that are inside the country boundary 109 | return(cxy_dt[nonempty_cells]) 110 | } else { 111 | return(NULL) 112 | } 113 | },fill=T) 114 | # remove missing 115 | (nonset_cells<-na.omit(nonset_cells)) 116 | # save as RDS 117 | saveRDS(nonset_cells,paste0('data/',ctry,'/',ctry,'_resamp_nonset_cell_xy.rds')) 118 | 119 | # reorder 15as cell data.table of settlement cells 120 | setorder(agg_set_cxy,cell_resamp) 121 | # assign new settlement identifier 122 | set(agg_set_cxy,NULL,'id',paste0('s',1:nrow(agg_set_cxy))) 123 | # reorder 15as cell data.table of non-settlement cells 124 | setorder(nonset_cells,cell_resamp) 125 | # assign new non-settlement identifier 126 | set(nonset_cells,NULL,'id',paste0('n',1:nrow(nonset_cells))) 127 | 128 | # bind settlement and non-settlement cell information 129 | (ctry_cxys<-rbindlist(list(agg_set_cxy,nonset_cells),fill=T)) 130 | # save as RDS 131 | saveRDS(ctry_cxys,paste0('data/',ctry,'/',ctry,'_resamp_country_cell_xy_id.rds')) -------------------------------------------------------------------------------- /10.1_prep_nset_dat_for_r9_regs.R: -------------------------------------------------------------------------------- 1 | # rsync -rltgoDuvhh --progress /victor/Work/Brian/current/R_code/FBHRSL/ zokeeffe@flux-xfer.arc-ts.umich.edu:/nfs/brianmin/work/zokeeffe/current/R_code/FBHRSL/ 2 | 3 | # export wd=/nfs/brianmin/work/zokeeffe/current/; export ctry=Brunei; R 4 | 5 | sapply(c("data.table","bit64","foreach","doMC","stackoverflow"),require,character.only=T) 6 | 7 | options(datatable.optimize=1) 8 | 9 | (ctry<-Sys.getenv("ctry")) 10 | (ncores<-as.integer(Sys.getenv("ncores"))) 11 | (wd<-Sys.getenv("wd")) 12 | # ctry<-'Nepal' 13 | # ncores<-12L 14 | # wd<-'/nfs/brianmin/work/zokeeffe/current/' 15 | setwd(wd) 16 | 17 | registerDoMC(ncores) 18 | 19 | rdsDir<-"/VIIRS/daily/CSVs/" 20 | rdsStem<-"_daily_VIIRS_values_good_resamp_" 21 | lithresh<-.001 22 | LCyr<-2012L 23 | maxsampN<-500L 24 | totNmonths<-69L 25 | avgNpmonththresh<-5 26 | 27 | # x-y information 28 | xyDT<-readRDS(paste0("data/",ctry,"/",ctry,"_resamp_country_cell_xy_id.rds")) 29 | (matchDT<-xyDT[grep('n',id),list(id)]) 30 | # land cover info 31 | (LCDT<-readRDS(paste0('data/',ctry,'/',ctry,'_resamp_country_cell_xy_landcover_values_',LCyr,'.rds'))) 32 | (lcs2keep<-sort(intersect(unique(LCDT[grep('s',id)][['lc_type']]),unique(LCDT[grep('n',id)][['lc_type']])))) 33 | (lcs2keep<-lcs2keep[lcs2keep<=16]) 34 | (matchDT<-merge(matchDT,LCDT[lc_type%in%lcs2keep],by='id')) 35 | # nearby settlement cells 36 | (setdatDT<-readRDS(paste0('data/',ctry,'/',ctry,'_id_set_stats_15as_cell_dist1.rds'))) 37 | (matchDT<-merge(matchDT,setdatDT[edge==F&N_15as_set_dist1==0,list(id)],by='id')) 38 | ids2keep<-matchDT[['id']] 39 | rm(setdatDT,LCDT,xyDT);gc() 40 | 41 | (rdsfs<-list.files(paste0("data/",ctry,rdsDir),paste0(ctry,"_daily_VIIRS_values_good_resamp_201[2-7][0-1][0-9]_id\\.rds"),full.names=F)) 42 | (tifmonths<-sub("(.*?)(201[0-9][0-1][0-9]?)_id\\.rds$","\\2",rdsfs)) 43 | (ntifmonths<-length(tifmonths)) 44 | stopifnot(ntifmonths==69L) 45 | 46 | nsmsDT<-vector('list',69L) 47 | for(i in 1:ntifmonths){ 48 | m<-tifmonths[i] 49 | dt<-data.table(id=readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_id.rds"))) 50 | set(dt,NULL,'r9',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_rade9.rds"))) 51 | set(dt,NULL,'li',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_li.rds"))) 52 | dt<-dt[li<=lithresh,list(id,r9)] 53 | dt<-dt[id%chin%ids2keep] 54 | nsmsDT[[i]]<-dt 55 | } 56 | # nsmsDT<-foreach(m=tifmonths,.inorder=F,.options.multicore=list(preschedule=F))%dopar%{ 57 | # dt<-data.table(id=readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_id.rds"))) 58 | # set(dt,NULL,'r9',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_rade9.rds"))) 59 | # set(dt,NULL,'li',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_li.rds"))) 60 | # dt<-dt[li<=lithresh,list(id,r9)] 61 | # dt<-dt[id%chin%ids2keep] 62 | # return(dt) 63 | # } 64 | gc() 65 | # get total number of rows in case too large 66 | (Nrows<-sum(as.numeric(unlist(lapply(nsmsDT,nrow))))) 67 | if(Nrows>.Machine$integer.max){ 68 | (Nsplits<-ceiling(Nrows/.Machine$integer.max)) 69 | idchunks<-chunk2(ids2keep,Nsplits) 70 | nsmsDT2<-vector('list',Nsplits) 71 | for(chun in 1:Nsplits){ 72 | message(paste('working on',chun)) 73 | tmpDT<-rbindlist(lapply(nsmsDT,function(xx) xx[id%chin%idchunks[[chun]]]),fill=T) 74 | tmpDT<-tmpDT[,list(r9m=mean(r9),r9s=sd(r9),N=.N),by='id'] 75 | nsmsDT2[[chun]]<-tmpDT 76 | rm(tmpDT);gc() 77 | } 78 | nsmsDT<-rbindlist(nsmsDT2,fill=T) 79 | rm(ids2keep,idchunks,Nsplits,nsmsDT2);gc() 80 | } else { 81 | (nsmsDT<-rbindlist(nsmsDT,fill=T)) 82 | rm(ids2keep);gc() 83 | (nsmsDT<-nsmsDT[,list(r9m=mean(r9),r9s=sd(r9),N=.N),by='id']) 84 | } 85 | saveRDS(nsmsDT,paste0("data/",ctry,"/VIIRS/daily/",ctry,'_regdat_iso_nsets_musd.rds')) 86 | 87 | (nsmsDT<-na.omit(nsmsDT[N>=(totNmonths*avgNpmonththresh)])) 88 | gc() 89 | 90 | set.seed(48105) 91 | tmplist<-foreach(i=1:length(lcs2keep),.inorder=F,.options.multicore=list(preschedule=F,mc.set.seed=F))%dopar%{ 92 | # i<-8L 93 | (lctt<-lcs2keep[i]) 94 | tids<-matchDT[lc_type==lctt][['id']] 95 | (tnsmsdt<-nsmsDT[id%chin%tids]) 96 | (tmquants<-quantile(tnsmsdt[['r9m']],c(.01,.5),na.rm=T)) 97 | (tmquantdif<-tmquants[[2]]-tmquants[[1]]) 98 | (tmthreshes<-c(tmquants[1L],tmquants[2L]+tmquantdif)) 99 | (tsquants<-quantile(tnsmsdt[['r9s']],c(.01,.5),na.rm=T)) 100 | (tsquantdif<-tsquants[[2]]-tsquants[[1]]) 101 | (tsthreshes<-c(tsquants[1L],tsquants[2L]+tsquantdif)) 102 | threshdt<-data.table(lc_type=lctt,r9m_lo=tmthreshes[1L],r9m_hi=tmthreshes[2L],r9m_q99=quantile(tnsmsdt[['r9m']],.99,na.rm=T), 103 | r9s_lo=tsthreshes[1L],r9s_hi=tsthreshes[2L],r9s_q99=quantile(tnsmsdt[['r9s']],.99,na.rm=T)) 104 | tids2keep<-tnsmsdt[r9m>tmthreshes[1L]&r9mtsthreshes[1]&r9s1]) 58 | set(aggdt,NULL,'r9m',aggdt[['r9sum']]/aggdt[['N']]) 59 | set(aggdt,NULL,'r9sum',NULL) 60 | aggdt 61 | 62 | saveRDS(aggdt,paste0("data/",ctry,"/VIIRS/daily/",ctry,'_regdat_iso_nsets_muN.rds')) 63 | 64 | 65 | tmplist<-vector('list',ntifmonths) 66 | ids2keep<-aggdt[['id']] 67 | 68 | for(i in 1:ntifmonths){ 69 | # i<-1L 70 | m<-tifmonths[i] 71 | dt<-data.table(id=readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_id.rds"))) 72 | set(dt,NULL,'r9',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_rade9.rds"))) 73 | set(dt,NULL,'li',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_li.rds"))) 74 | dt<-dt[li<=lithresh,list(id,r9)] 75 | dt<-merge(dt,aggdt[,list(id,r9m)],by='id') 76 | set(dt,NULL,'r9s_tmp',(dt[['r9']]-dt[['r9m']])^2) 77 | dt<-dt[,list(r9s_tmp=sum(r9s_tmp)),by='id'] 78 | tmplist[[i]]<-dt 79 | rm(dt);gc() 80 | message(paste('finished reading',m)) 81 | } 82 | message('finished reading nonset data') 83 | 84 | (tmplist<-rbindlist(tmplist,fill=T)) 85 | (tmplist<-tmplist[,list(r9s_tmp=sum(r9s_tmp)),by='id']) 86 | 87 | (nsmsDT<-merge(aggdt,tmplist,by='id')) 88 | rm(tmplist,aggdt);gc() 89 | set(nsmsDT,NULL,'r9s',sqrt(nsmsDT[['r9s_tmp']]/(nsmsDT[['N']]-1))) 90 | set(nsmsDT,NULL,'r9s_tmp',NULL) 91 | setcolorder(nsmsDT,c('id','r9m','r9s','N')) 92 | nsmsDT 93 | 94 | saveRDS(nsmsDT,paste0("data/",ctry,"/VIIRS/daily/",ctry,'_regdat_iso_nsets_musd.rds')) 95 | 96 | (nsmsDT<-na.omit(nsmsDT[N>=(totNmonths*avgNpmonththresh)])) 97 | rm(Nrows);gc() 98 | 99 | message('finished summarizing nonset data') 100 | 101 | fids2keep<-threshes<-vector('list',length(lcs2keep)) 102 | set.seed(48105) 103 | for(i in 1:length(lcs2keep)){ 104 | # i<-1L 105 | (lctt<-lcs2keep[i]) 106 | tids<-matchDT[lc_type==lctt][['id']] 107 | # remove if r9s is missing, which is done if N>1. but also in case anything weird happened? 108 | # also, require that there be on average at least 5 "good" observations per month 109 | (tnsmsdt<-nsmsDT[id%chin%tids]) 110 | (tmquants<-quantile(tnsmsdt[['r9m']],c(.01,.5))) 111 | (tmquantdif<-tmquants[[2]]-tmquants[[1]]) 112 | (tmthreshes<-c(tmquants[1L],tmquants[2L]+tmquantdif)) 113 | (tsquants<-quantile(tnsmsdt[['r9s']],c(.01,.5))) 114 | (tsquantdif<-tsquants[[2]]-tsquants[[1]]) 115 | (tsthreshes<-c(tsquants[1L],tsquants[2L]+tsquantdif)) 116 | threshes[[i]]<-data.table(lc_type=lctt,r9m_lo=tmthreshes[1L],r9m_hi=tmthreshes[2L],r9m_q99=quantile(tnsmsdt[['r9m']],.99), 117 | r9s_lo=tsthreshes[1L],r9s_hi=tsthreshes[2L],r9s_q99=quantile(tnsmsdt[['r9s']],.99)) 118 | tids2keep<-tnsmsdt[r9m>tmthreshes[1L]&r9mtsthreshes[1]&r9s(-1.5)];gc() 114 | # only keep if LI is below .025 115 | val_dt<-val_dt[li<.025];gc() 116 | # assign the day value pulled from the vflag TIFF name 117 | set(val_dt,NULL,"day",as.integer(substr(vfn,12,13))) 118 | # assign the time of the overpass by extracting it from the vflag TIFF name 119 | set(val_dt,NULL,"stime",substr(vfn,16L,22L)) 120 | # return the data.table 121 | return(val_dt) 122 | } 123 | # bind results into a list 124 | res<-rbindlist(res,fill=T) 125 | # store the results in the empty slot of the list created above 126 | FinDT[[tifgrp]]<-res 127 | rm(res,tmptiffs2);gc() 128 | } 129 | # rowbind data.tables together 130 | FinDT<-rbindlist(FinDT,fill=T) 131 | # remove unnecessary 132 | rm(tmptiffs,ntmptiffs,otntmptiffs,tifgrps,ntifgrps);gc() 133 | # if the data.table is empty 134 | if(nrow(FinDT)==0L){ 135 | # report no data 136 | message(paste("no data extracted for",m));flush(stdout()) 137 | # otherwise 138 | } else { 139 | # for each column in the data.table 140 | for(col in fincols2write){ 141 | # save the column as a separate vector 142 | saveRDS(FinDT[[col]],paste0("data/",ctry,"/VIIRS/daily/CSVs/",ctry,"_daily_VIIRS_values_good_resamp_",m,"_",col,".rds")) 143 | # remove the column 144 | set(FinDT,NULL,which(names(FinDT)==col),NULL) 145 | } 146 | } 147 | # cleanup 148 | rm(FinDT);gc() 149 | message(paste("finished",m)) 150 | } 151 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # High Resolution Electricity Access (HREA) Indicators 2 | ### New Methods to Estimate Electricity Access Using Nightly VIIRS Satellite Imagery 3 | 4 | Brian Min (brianmin@umich.edu) 5 | 6 | Zachary O’Keeffe (zokeeffe@umich.edu) 7 | 8 | University of Michigan 9 | 10 | 11 | Rev. September 2020 12 | 13 | # Introduction 14 | 15 | We introduce a new method to generate likelihood estimates of electricity access for all areas with human settlements within a country by identifying statistical anomalies in brightness values that are plausibly associated with electricity use, and unlikely to be due to exogenous factors. 16 | 17 | On every night, the VIIRS DNB sensor collects data on the observed brightness over all locations within a country, including over electrified and unelectrified areas, and populated and unpopulated areas. Our objective is to classify populated areas as electrified or not using all the brightness data over a country. But the challenge is that light output can be due to multiple sources unrelated to electricity use. Notably, the VIIRS sensor is so sensitive that it picks up light from overglow, atmospheric interactions, moonlight, and variations in surface reflectivity across types of land cover. We refer collectively to these exogenous sources as background noise, which must be accounted for to classify whether an area is brighter than expected. 18 | 19 | We use data on light output detected over areas with no settlements or buildings to train a statistical model of background noise. The model can be used to generate an expected brightness value on every given night for every given location. We then compare the observed brightness on each night against the expected baseline brightness value. Areas with human settlements with brighter light output than expected are assumed to have access to electricity on that night. We classify all settlements on all nights and then average the estimates and generate an "Artificial Light Score" for each calendar year for all settlement areas. Areas that are much brighter than would be expected on most nights have the highest probability of being electrified. Areas that are as dim as areas with no settlements have the lowest probability of being electrified. And areas that are a little brighter on some nights have middling scores. 20 | 21 | The advantage of this process is that it uses all available nightly data from the VIIRS data stream while taking into account sources of known noise and variability. The process also allows for the identification of areas where the likelihood of electricity access and use is uncertain (the areas with middling scores). This is significant given that traditional binary measures of access do not account for variations in levels of use or reliability of power supply, even across areas that are all nominally electrified. These data may therefore be helpful in identifying baseline variations in access and reliability within countries, consistent with the objectives of the Multi-tier Framework for measuring energy access (ESMAP 2015). 22 | 23 | We explain the process in more detail below: 24 | 25 | 1. **Select random sample of locations with no settlements to measure background noise.** 26 | We select a stratified random sample of isolated non-settlement pixels, which are identified by overlaying the 1 arcsecond (as) settlement pixel grid on the 15 as VIIRS grid. Candidate 15 as pixels are those that 1) contain zero 1 as settlement pixels; and 2) are not adjacent to any 15 as cells containing settlement cells. We stratify based on the type of land, selecting up to 500 pixels per category. As there are 17 categories, the theoretical maximum is 8,500. 27 | 1. **Select observations.** 28 | Following NOAA guidelines and their data quality flags, we drop bad quality data, including those with heavy cloud cover and excessive sensor noise. NOAA also drops many nights with high lunar illumination. We relax this threshold slightly and keep nightly observations where lunar illumination is below .001 lux. Furthermore, on nights with multiple overpasses, we use data with the earliest local timestamp for settlement points, but allow multiple observations for non-settlement points. 29 | 1. **Remove outliers.** 30 | To generate a reliable estimate of background noise, we need to exclude outliers. Presumably, unusually high brightness values in unsettled areas are not due to background noise. To accomplish this, we first apply a logarithmic transformation to observed brightness to make the distribution more normal. Then we calculate the median and standard deviation. Observations are removed if they are above 4 standard deviations from the median on this metric. Then we stratify by land type and date, and, using the original scale, calculate the mean and standard deviation. Observations that are above 4 standard deviations above the mean are removed. 31 | 1. **Create statistical model of background noise.** 32 | For each calendar year, we run a linear mixed effects model to learn the impacts of exogenous factors using all non-settlement data from all good quality nights, for all years (2012–2017). There is a single random effect: date. There are five fixed effects: lunar illumination, local time, calendar month, land type, and the interaction between land type and lunar illumination (plus an intercept). Notably, the regression diagnostics are excellent. Outliers are not present, the linear relationships specified hold well, and heteroskedasticity is not an issue. The distributions look normal, constant, and linear. 33 | Using the statistical parameters learned from data on non-settlement areas, we then calculate the expected level of light output for all areas with settlements. These predicted values represent a counterfactual estimate of how much light would be expected on that specific day on that type of land, if the only sources of light were from background noise and other exogenous factors. Areas with consistently higher observed light output than expected are assumed to have electricity access. 34 | 1. **Identify electrified settlement areas on each night.** 35 | We compare observed levels of light output against the statistically estimated baseline light output level for every settlement pixel on every night. This generates residuals which we standardize by dividing by the sigma from the model (the standard deviation of residuals, with a degrees of freedom adjustment) to generate z-scores for each pixel on each night. Higher z-scores imply much higher light output than expected. We assume that higher scores are correlated with higher likelihood that a settlement is using electricity on that specific night. 36 | 1. **Aggregate nightly estimates to generate "Artificial Light Score" values for all settlement areas for each year.** 37 | For each year, we average all nightly z-scores for each settlement cell. We then calculate the corresponding quantile value assuming a standard normal distribution, which transforms the average z-score to be between 0 and 1. We then subtract .5 from this value, divide the result by .5, and set negative values to 0. This produces annual Artificial Light Scores for each pixel, which also lie between 0 and 1. On this scale, .95 roughly corresponds to having observed light scores that are on average above 2 standard deviations from expected. Meanwhile, 0 corresponds to having observed radiance values that are on average lower than the expected values for comparable isolated non-settlement pixels. 38 | 39 | This GitHub repository contains several R scripts that are used to produce these high resolution settlement electrification estimates for a given country. Additional output includes annual composite GeoTIFFs of visible light. Many of the scripts pull variables from the environment (the required variables are specified at the top of the scripts). They are written in a way that allows the user to specify the time and country from the command line, and then submit the script as a job to a computer cluster (these scripts require quite a lot in the way of resources like memory and processor power, and may take a long time to run). Descriptions of required files and what each script does can be found below. 40 | 41 | # Required Files 42 | 43 | The scripts here assume the user has access to several files. These include: 44 | 45 | ## FB_countries.csv 46 | 47 | This matches "short" names of countries to "long" names. The short names of countries are the names of the country folders, while the long names are the names used in the World Bank electrification estimates spreadsheet. 48 | 49 | ## pct_pop_w_electricity_1990-2017.csv 50 | 51 | This CSV contains estimates of access to electricity as a percentage of the population per country, from 1990-2017. Data are obtained from the World Bank's Sustainable Energy for All database: https://data.worldbank.org/indicator/EG.ELC.ACCS.ZS. 52 | 53 | ## new_good_vflag_ints_no_li.txt 54 | 55 | This is a text file with all the VIIRS flag integers corresponding to "good" values (the values that are to be kept in the extraction process). 56 | 57 | ## Country Boundary Shapefile (filename varies by country) 58 | 59 | This is an ESRI Shapefile of the country boundary. It is used to create edited settlement GeoTIFFs, as well as determine which VIIRS images overlap with the country. Files are taken from GADM: https://gadm.org/download_country_v3.html. 60 | 61 | ## High Resolution Population Density Maps (filename varies by country) 62 | 63 | These 1 arcsecond (as) GeoTIFFs identify where people live. These "settlement" layers also contain population estimates per cell. Data are provided by Facebook: https://data.humdata.org/organization/facebook?res_format=zipped%20geotiff&q=&ext_page_size=25. 64 | 65 | # Description of Scripts 66 | 67 | The following describes the files contained on this GitHub repository. The files should be executed in the order they are described; subsequent scripts require output from previous ones. 68 | 69 | ## 0_convert_country_boundary_to_coordinate_lines.R 70 | 71 | Reformats country boundary shapefile. 72 | 73 | ## 1_make_settlement_rasters.R 74 | 75 | Creates 1 and 15 arc second settlement rasters using the Facebook High Resolution Settlement Layer files and country boundary shapefile. Cells with settlements are populated with 1s, with the rest of the cells coded as empty. 76 | 77 | ## 2.0_gen_country_VIIRS_XYs.R 78 | 79 | Creates data.tables of cell information. Specifically, one is created that matches 1as settlement cells to 15as cells. Two others store long-lat information associated with 15as cells, for settlement and non-settlement cells. A final data.table is created that includes both settlement and non-settlement cells, their long-lat information, and unique identifiers. 80 | 81 | ## 2.1_make_country_lon_tz_offset_files.R 82 | 83 | Creates a data.table of "local" time offsets for each 15as cell in the country based on longitude. 84 | 85 | ## 2.2_get_pop_per_1as_cell.R 86 | 87 | Extracts population estimates from original FB raster for each 1as settlement cell. 88 | 89 | ## 2.3_make_prp_set_per_cell.R 90 | 91 | Calculates the proportion of each 15as cell that is populated with 1as settlement cells. 92 | 93 | ## 3_find_overlapping_VIIRS_TIFFs.R 94 | 95 | Determines which VIIRS GeoTIFFs overlap with the country boundary. 96 | 97 | ## 4.1_find_overlapping_landcover_tiles.R 98 | 99 | Determines which landcover tiles overlap with the country boundary. 100 | 101 | ## 4.2_extract_xy_landcover_values.R 102 | 103 | Extracts land type values for each 15as cell in the country. 104 | 105 | ## 5_extract_good_daily_VIIRS_data.R 106 | 107 | For a given month, extracts data from the VIIRS GeoTIFFs for all country cells and stores them as RDS files. Specifically, it extracts data from the li (lunar illumination), vflag (quality flag), rade9 (visible radiance), rad (infrared radiance), and sample position. "High-quality" data are returned by month and saved as separate files. In the end, for each month of data, a column of a data.table is saved for the: id, day, stime (start time of image), rade9, rad, li, and sample. 108 | 109 | ## 6_find_good_times.R 110 | 111 | Converts image capture start time to "local" times. Along with storing this data by month, it also creates vectors that identify, for each date, the earliest timestamped image in case of overlap. 112 | 113 | ## 7_gen_r9_musd.R 114 | 115 | For each year, reads in data and subsets. Data are dropped if the timestamp is "bad" (see previous script) or if the lunar illumination (LI) is above .0005. A logged version of the visible light (rade9) is created. Then, for each 15as cell, the mean and standard deviation of rade9 and its logged version are created and saved as a data.table. These values are used in the following script to generate 15as annual composite GeoTIFFs of visible light. 116 | 117 | ## 8_make_VIIRS_average_raster.R 118 | 119 | For a year, creates a GeoTIFF of the 15as country cells with the values of the mean logged rade9 (mean logged rade9 is chosen over mean rade9 because of the highly skewed nature of observed light). 120 | 121 | ## 9.0_gen_set_elec_based_on_year_wb_val.R 122 | 123 | Generates files that identify whether a settlement cell should be classified as "lit" or not based on World Bank classifications, combined with population and visible light data. Also creates tables with the following statistics: electrification rate according to World Bank data, proportion of 15as cells that are considered electrified, and the minimum mean logged rade9 and rade9 values that count as electrified. 124 | 125 | ## 9.1_make_set_lit_rasters_wb_erate.R 126 | 127 | Creates 1as country settlement cell GeoTIFFs for each year, where 0 represents an unelectrified settlement cell, and 1 represents an electrified settlement cell, based on World Bank electrification estimates and interpolated population. 128 | 129 | ## 10.0_find_nearby_set_cells.R 130 | 131 | Determines, for each cell, how many settlement cells are adjacent. The purpose is to find isolated non-settlement cells, which are presumed to be dark. 132 | 133 | ## 10.1_prep_nset_dat_for_r9_regs.R 134 | 135 | Prepares nonsettlement cell data for the regression. Data are subsetted based on lunar illumination, and outliers are removed based on their mean and standard deviation. 136 | 137 | ## 10.1_prep_nset_dat_for_r9_regs_bigcountry.R 138 | 139 | This is a different version of the previous script that works with large countries (the previous one will fail for certain large countries). 140 | 141 | ## 10.2_prep_set_dat_for_r9_regs.R 142 | 143 | Prepares settlement data for the regressions. Data are subsetted based on lunar illumination and timestamp. 144 | 145 | ## 10.3_multiyear_reg.R 146 | 147 | Further removes outliers on the nonsettlement data and fits a linear mixed-effects model predicting light output from the timestamp, date, lunar illumination, and land type. 148 | 149 | ## 10.4_reg_resids.R 150 | 151 | Predicts the light output of settlement cells assuming they behaved like nonsettlement cells. The difference (the regression residuals) form the basis for determining whether cells are considered electrified or not. 152 | 153 | ## 10.5_make_reg_set_lit_rasters.R 154 | 155 | Creates 1as GeoTIFFs of settlement cells with values corresponding to their electrification scores. Multiple metrics are employed. 156 | 157 | ## 10.6_comp_reg_w_wb_erate.R 158 | 159 | Compares the population weighted electrification status of settlement cells as calculated from the regressions with the values reported by the World Bank by year for a given country. 160 | --------------------------------------------------------------------------------