├── 2.1_make_country_lon_tz_offset_files.R
├── 0_convert_country_boundary_to_coordinate_lines.R
├── 8_make_VIIRS_average_raster.R
├── 2.3_make_prp_set_per_cell.R
├── 2.2_get_pop_per_1as_cell.R
├── 4.2_extract_xy_landcover_values.R
├── 4.1_find_overlapping_landcover_tiles.R
├── 10.4_reg_resids.R
├── 10.5_make_reg_set_lit_rasters.R
├── 10.3_multiyear_reg.R
├── 10.2_prep_set_dat_for_r9_regs.R
├── 1_make_settlement_rasters.R
├── 9.1_make_set_lit_rasters_wb_erate.R
├── 7_gen_r9_musd.R
├── 10.6_comp_reg_w_wb_erate.R
├── 10.0_find_nearby_set_cells.R
├── 6_find_good_times.R
├── 9.0_gen_set_elec_based_on_year_wb_val.R
├── 3_find_overlapping_VIIRS_TIFFs.R
├── 2.0_gen_country_VIIRS_XYs.R
├── 10.1_prep_nset_dat_for_r9_regs.R
├── 10.1_prep_nset_dat_for_r9_regs_bigcountry.R
├── 5_extract_good_daily_VIIRS_data.R
└── README.md


/2.1_make_country_lon_tz_offset_files.R:
--------------------------------------------------------------------------------
 1 | # load the following packages
 2 | sapply(c("data.table","bit64"),require,character.only=T)
 3 | 
 4 | # get the following variables from the environment
 5 | (ctry<-Sys.getenv("ctry")) # country
 6 | (wd<-Sys.getenv("wd")) # working directory
 7 | # set working directory
 8 | setwd(wd)
 9 | 
10 | # read in data.table of 15as cells
11 | xyDT<-readRDS(paste0("data/",ctry,"/",ctry,"_resamp_country_cell_xy_id.rds"))
12 | # keep ID and x value
13 | (xyDT<-xyDT[,list(id,x)])
14 | # create time offset by multiplying x value by 240
15 | set(xyDT,NULL,'secOffset',xyDT[['x']]*240)
16 | 
17 | # save as RDS
18 | saveRDS(xyDT[,list(id,secOffset)],paste0("data/",ctry,"/",ctry,"_id_secOffset.rds"))


--------------------------------------------------------------------------------
/0_convert_country_boundary_to_coordinate_lines.R:
--------------------------------------------------------------------------------
 1 | # load the following required packages
 2 | sapply(c("data.table","raster","rgdal","rgeos"),require,character.only=T)
 3 | 
 4 | # get the following variables from the environment:
 5 | (wd<-Sys.getenv("wd")) # working directory
 6 | (ctry<-Sys.getenv("ctry")) # country
 7 | # set working directory
 8 | setwd(wd)
 9 | 
10 | # directory of country shapefile
11 | (shpDir<-dir(paste0('data/',ctry,'/shapefiles'),'gadm36_[A-Z][A-Z][A-Z]_shp',full.names=T))
12 | # find top-level GADM shapefile
13 | (shp<-list.files(shpDir,'gadm36_[A-Z][A-Z][A-Z]_0\\.shp',full.names=T))
14 | # fail if not found
15 | stopifnot(length(shp)==1)
16 | 
17 | # read in list of countries
18 | (FBctries<-fread('data/FB/FB_countries.csv'))
19 | # get longform name of the country
20 | (country<-FBctries[country_short==ctry][['country_long']])
21 | 
22 | # read country shapefile
23 | (Shp<-shapefile(shp))
24 | # disaggregate
25 | (Shp<-disaggregate(Shp))
26 | # simplify
27 | (Shp<-gSimplify(Shp,.01,T))
28 | # assign country name to shapefile
29 | Shp$id<-rep(country,length(Shp))
30 | # write shapefile
31 | writeOGR(Shp,paste0("data/",ctry,"/shapefiles"),paste0(ctry,"_noproj_disag_simp"),driver="ESRI Shapefile",overwrite_layer=T)
32 | 


--------------------------------------------------------------------------------
/8_make_VIIRS_average_raster.R:
--------------------------------------------------------------------------------
 1 | # load the following required packages
 2 | sapply(c("data.table","bit64","raster"),require,character.only=T)
 3 | # get the following variables from environment
 4 | (YYYY<-as.integer(Sys.getenv('YYYY'))) # year
 5 | (ctry<-Sys.getenv("ctry")) # country
 6 | (wd<-Sys.getenv("wd")) # working directory
 7 | # set working directory
 8 | setwd(wd)
 9 | 
10 | # open 15as country cell raster (to be filled later)
11 | (rast<-raster(paste0('data/',ctry,'/GeoTIFFs/',ctry,'_sets_15as.tif'),band=1))
12 | # set all cells to empty
13 | rast[]<-NA_real_;gc()
14 | # read the 15as id, coordinate, and cell data for the country
15 | (xyDT<-readRDS(paste0("data/",ctry,"/",ctry,"_resamp_country_cell_xy_id.rds")))
16 | # read in the file with mean logged rade9
17 | r9dt<-readRDS(paste0("data/",ctry,"/VIIRS/daily/",ctry,"_good_r9_musd_",YYYY,".rds"))
18 | # merge the mean logged rade9 data with the 15as cell position data by id
19 | (r9dt<-merge(r9dt[,list(id,r9lm)],xyDT[,list(id,cell_resamp)],by='id'));gc()
20 | # fill cell positions with mean logged rade9 data in raster
21 | rast[r9dt[['cell_resamp']]]<-r9dt[['r9lm']]
22 | # remove data.table
23 | rm(r9dt);gc()
24 | # save raster as GeoTIFF
25 | writeRaster(rast,paste0('data/',ctry,'/GeoTIFFs/',ctry,'_rade9lnmu_',YYYY,'.tif'),format="GTiff",overwrite=T,options=c("COMPRESS=LZW"))


--------------------------------------------------------------------------------
/2.3_make_prp_set_per_cell.R:
--------------------------------------------------------------------------------
 1 | # load following required packages
 2 | sapply(c("data.table","bit64"),require,character.only=T)
 3 | 
 4 | # get following variables from environment
 5 | (ctry<-Sys.getenv("ctry")) # country
 6 | (wd<-Sys.getenv("wd")) # working directory
 7 | # set working directory
 8 | setwd(wd)
 9 | 
10 | # read data.table of 15as cells with ID and xy info
11 | (xyDT<-readRDS(paste0('data/',ctry,'/',ctry,'_resamp_country_cell_xy_id.rds')))
12 | # read data.table of 15as-1as cell matches
13 | (setmatchDT<-readRDS(paste0('data/',ctry,'/',ctry,'_orig_v_resamp_set_cell_match.rds')))
14 | 
15 | # get number of 1as settlement cells within each 15as cell
16 | (setNDT<-setmatchDT[,list(N_set_cells=.N),by='cell_resamp'])
17 | # calculate proportion of 15as cell filled with 1as settlement cells by dividing by 225 (the max)
18 | set(setNDT,NULL,'prp_sets',setNDT[['N_set_cells']]/225)
19 | 
20 | # merge proportion of settlement cells with 15as cell ID information
21 | (xyDT2<-merge(xyDT[,list(id,cell_resamp)],setNDT[,list(cell_resamp,prp_sets)],all.x=T))
22 | # if missing values, fill with 0, because that means there are no settlement cells in the cell
23 | set(xyDT2,xyDT2[,.I[is.na(prp_sets)]],which(names(xyDT2)=='prp_sets'),0)
24 | # remove cell_resamp column
25 | xyDT2[,cell_resamp:=NULL]
26 | 
27 | # save as RDS
28 | saveRDS(xyDT2,paste0('data/',ctry,'/',ctry,'_resamp_country_cell_id_prp_sets.rds'))


--------------------------------------------------------------------------------
/2.2_get_pop_per_1as_cell.R:
--------------------------------------------------------------------------------
 1 | # load the following required packages
 2 | sapply(c("data.table","bit64","raster"),require,character.only=T)
 3 | # get the following variables from environment
 4 | (ctry<-Sys.getenv("ctry")) # country
 5 | (wd<-Sys.getenv("wd")) # working directory
 6 | # set working directory
 7 | setwd(wd)
 8 | 
 9 | # find high resolution settlement layer file
10 | (fbHRSL<-list.files(paste0('data/',ctry,'/FB'),'population_[a-z][a-z][a-z]_20[0-9][0-9]-[0-9][0-9]-[0-9][0-9]\\.tif$',full.names=T))
11 | # if file not found, stop
12 | stopifnot(length(fbHRSL)==1)
13 | 
14 | # load data.table of 1as to 15as cell matches
15 | (setmatchDT<-readRDS(paste0('data/',ctry,'/',ctry,'_orig_v_resamp_set_cell_match.rds')))
16 | # open 1as raster
17 | (rast_1as<-raster(paste0('data/',ctry,'/GeoTIFFs/',ctry,'_sets_1as.tif'),band=1))
18 | # open original FB raster
19 | (rast_pop<-raster(fbHRSL,band=1))
20 | 
21 | # get 1as cells with settlements
22 | rast_1as_cell<-setmatchDT[['cell_orig']]
23 | # get xy coordinates
24 | rast_1as_XY<-xyFromCell(rast_1as,rast_1as_cell)
25 | # get cells from FB raster using xy values
26 | rast_pop_cell<-cellFromXY(rast_pop,rast_1as_XY)
27 | # get values of cells (which are population estimates)
28 | rast_pop_vals<-rast_pop[rast_pop_cell]
29 | 
30 | # create data.table of 1as cells and their corresponding population estimates
31 | (popDT<-data.table(cell_orig=rast_1as_cell,pop=rast_pop_vals))
32 | 
33 | # save as RDS
34 | saveRDS(popDT,paste0('data/',ctry,'/',ctry,'_cell_orig_pop.rds'))


--------------------------------------------------------------------------------
/4.2_extract_xy_landcover_values.R:
--------------------------------------------------------------------------------
 1 | # load following required packages
 2 | sapply(c("data.table","bit64","parallel","foreach","doMC","raster"),require,character.only=T)
 3 | 
 4 | # get following variables from environment
 5 | (ncores<-as.integer(Sys.getenv("ncores"))) # number of cores
 6 | (ctry<-Sys.getenv("ctry")) # country
 7 | (wd<-Sys.getenv("wd")) # working directory
 8 | # set working directory
 9 | setwd(wd)
10 | # register cores for parallel operations
11 | registerDoMC(ncores)
12 | 
13 | # specify year of landcover type data
14 | LCyr<-2012L
15 | # make path to landcover file directory
16 | (LCdir<-paste0('data/LandCover/LandCoverTiles',LCyr,'/'))
17 | 
18 | # read intersecting landcover tyle information
19 | (inttiles<-readRDS(paste0('data/',ctry,'/',ctry,'_intersecting_landcover_tiles_',LCyr,'.rds')))
20 | 
21 | # read long-lat information for 15as cells in country
22 | (xys<-readRDS(paste0('data/',ctry,'/',ctry,'_resamp_country_cell_xy_id.rds')))
23 | # convert x-y coords to matrix
24 | xyMat<-as.matrix(xys[,list(x,y)])
25 | # store IDs as vector
26 | ids<-xys[['id']]
27 | rm(xys);gc()
28 | 
29 | # for each of the intersecting tiles, to the following, collecting results in a data.table
30 | lc_dt<-rbindlist(foreach(it=inttiles,.options.multicore=list(preschedule=F))%dopar%{
31 |   # open tile raster
32 |   (rast<-raster(paste0(LCdir,it),band=1))
33 |   # create data.table of IDs and their corresponding landcover type extracted from raster
34 |   (dt<-data.table(id=ids,lc_type=extract(rast,xyMat,method="simple",cellnumbers=F)))
35 |   # remove missing
36 |   return(na.omit(dt))
37 | },fill=T)
38 | # get unique values
39 | (lc_dt<-unique(lc_dt))
40 | 
41 | # save result
42 | saveRDS(lc_dt,paste0('data/',ctry,'/',ctry,'_resamp_country_cell_xy_landcover_values_',LCyr,'.rds'))


--------------------------------------------------------------------------------
/4.1_find_overlapping_landcover_tiles.R:
--------------------------------------------------------------------------------
 1 | # load following required packages
 2 | sapply(c("data.table","bit64","parallel","foreach","doMC","raster",'sf'),require,character.only=T)
 3 | 
 4 | # get following variables from environment
 5 | (ncores<-as.integer(Sys.getenv("ncores"))) # number of cores for parallel operations
 6 | (ctry<-Sys.getenv("ctry")) # country
 7 | (wd<-Sys.getenv("wd")) # working directory
 8 | # set working directory
 9 | setwd(wd)
10 | # register number of cores for parallel operations
11 | registerDoMC(ncores)
12 | 
13 | # year of land classification data
14 | LCyr<-2012L
15 | # directory of land cover tiles
16 | (LCdir<-paste0('data/LandCover/LandCoverTiles',LCyr,'/'))
17 | # list of land cover TIFFs
18 | (LCfiles<-list.files(LCdir,'\\.tif$',full.names=F))
19 | 
20 | # for each file
21 | LCrelist<-foreach(f=LCfiles,.inorder=T)%dopar%{
22 |   # open raster
23 |   rast<-raster(paste0(LCdir,f),band=1)
24 |   # get and return extent
25 |   rect<-st_as_sfc(st_bbox(rast))
26 |   return(rect)
27 | }
28 | 
29 | # specify country shapefile path
30 | (country_shp<-paste0("data/",ctry,"/shapefiles/",ctry,"_noproj_disag_simp.shp"))
31 | # open shapefile
32 | (countryShp<-st_read(country_shp))
33 | # project shapefile using CRS from landcover extent
34 | (countryShpTrans<-st_transform(countryShp,st_crs(LCrelist[[1]])))
35 | 
36 | # for each tile
37 | inttiles<-foreach(i=1:length(LCrelist),.combine=c,.inorder=T,.options.multicore=list(preschedule=F))%dopar%{
38 |   # test whether extent of landcover tile overlaps with country shapefile
39 |   reint<-st_intersects(countryShpTrans,LCrelist[[i]])
40 |   reint<-ifelse(any(lengths(reint)!=0L),LCfiles[i],NA_character_)
41 |   return(reint)
42 | }
43 | # remove missing
44 | (inttiles<-c(na.omit(inttiles)))
45 | 
46 | # save result as RDS
47 | saveRDS(inttiles,paste0('data/',ctry,'/',ctry,'_intersecting_landcover_tiles_',LCyr,'.rds'))


--------------------------------------------------------------------------------
/10.4_reg_resids.R:
--------------------------------------------------------------------------------
 1 | # rsync -rltgoDuvhh --progress /victor/Work/Brian/current/R_code/FBHRSL/ zokeeffe@flux-xfer.arc-ts.umich.edu:/nfs/brianmin/work/zokeeffe/current/R_code/FBHRSL/
 2 | 
 3 | # export wd=/nfs/brianmin/work/zokeeffe/current/; export ctry=Brunei; export ncores=6; R
 4 | 
 5 | sapply(c("data.table","lme4"),require,character.only=T)
 6 | 
 7 | (YYYY<-as.integer(Sys.getenv("YYYY")))
 8 | (ctry<-Sys.getenv("ctry"))
 9 | (wd<-Sys.getenv("wd"))
10 | # ctry<-'Nepal'
11 | # ncores<-12L
12 | # wd<-'/nfs/brianmin/work/zokeeffe/current/'
13 | setwd(wd)
14 | 
15 | conflevs<-c(.85,.9,.95)
16 | (confthreshes<-qnorm(conflevs,mean=0,sd=1))
17 | (conflevnamestmp<-paste0('lit_conf',conflevs*100))
18 | (conflevnamesfin<-paste0('prplit_conf',conflevs*100))
19 | 
20 | (rdsDir<-paste0("data/",ctry,"/VIIRS/daily/"))
21 | 
22 | (LCt2<-readRDS(paste0("data/",ctry,"/VIIRS/daily/",ctry,"_RE_reg_multiyear_dropoutfull_LCdt.rds")))
23 | (dateDT<-readRDS(paste0("data/",ctry,"/VIIRS/daily/",ctry,"_RE_reg_multiyear_monthfac_dateDT.rds")))
24 | 
25 | mod<-readRDS(paste0("data/",ctry,"/VIIRS/daily/",ctry,"_RE_reg_multiyear_mod_final.rds"))
26 | mod_sig<-sigma(mod)
27 | 
28 | (setfiles<-list.files(rdsDir,paste0(ctry,'_regdat_sets_',YYYY,'[0-1][0-9]\\.rds'),full.names=F))
29 | (nsetfiles<-length(setfiles))
30 | 
31 | setrmuDT<-vector('list',nsetfiles)
32 | for(i in 1:nsetfiles){
33 |   f<-setfiles[i]
34 |   (setDT<-readRDS(paste0(rdsDir,f)))
35 |   (setDT<-merge(setDT,dateDT,by='locdatechar'))
36 |   (setDT<-merge(setDT,LCt2,by='lc_type'))
37 |   r9preds<-predict(mod,newdata=setDT)
38 |   set(setDT,NULL,'r9rs',(setDT[['r9']]-r9preds)/mod_sig)
39 |   setDT<-setDT[,list(id,r9rs)]
40 |   setrmuDT[[i]]<-setDT
41 |   rm(setDT,r9preds);gc()
42 | }
43 | (setrmuDT<-rbindlist(setrmuDT,fill=T))
44 | 
45 | for(x in 1:length(conflevs)){
46 |   set(setrmuDT,NULL,conflevnamestmp[x],as.integer(setrmuDT[['r9rs']]>confthreshes[x]))
47 | }
48 | setrmuDT[,(c('zscore',conflevnamesfin)):=lapply(.SD,mean),.SDcols=c('r9rs',conflevnamestmp),by='id']
49 | set(setrmuDT,NULL,'lightscore',(pnorm(setrmuDT[['zscore']])-.5)/.5)
50 | set(setrmuDT,setrmuDT[,.I[lightscore<0]],which(names(setrmuDT)=='lightscore'),0)
51 | 
52 | (setrmuDTf<-unique(setrmuDT[,c('id','zscore','lightscore',conflevnamesfin),with=F]))
53 | 
54 | saveRDS(setrmuDTf,paste0("data/",ctry,"/VIIRS/daily/",ctry,"_RE_reg_my_sets_predvals_",YYYY,".rds"))
55 | 


--------------------------------------------------------------------------------
/10.5_make_reg_set_lit_rasters.R:
--------------------------------------------------------------------------------
 1 | # rsync -rltgoDuvhh --progress /victor/Work/Brian/current/R_code/FBHRSL/ zokeeffe@flux-xfer.arc-ts.umich.edu:/nfs/brianmin/work/zokeeffe/current/R_code/FBHRSL/
 2 | 
 3 | sapply(c("data.table","bit64","raster","rgdal"),require,character.only=T)
 4 | 
 5 | (YYYY<-as.integer(Sys.getenv("YYYY")))
 6 | (ctry<-Sys.getenv("ctry"))
 7 | (wd<-Sys.getenv("wd"))
 8 | # ctry<-'Nepal'
 9 | # wd<-'/victor/Work/Brian/current/'
10 | setwd(wd)
11 | 
12 | fstem<-'_RE_reg_my_sets_predvals_'
13 | conflevs<-c(.85,.9,.95)
14 | (lscorevars<-c('zscore','lightscore',paste0('prplit_conf',conflevs*100)))
15 | (fintifstems<-paste0('_set_',lscorevars,'_'))
16 | 
17 | (xyDT<-readRDS(paste0("data/",ctry,"/",ctry,"_resamp_country_cell_xy_id.rds")))
18 | (xyDT<-xyDT[grep('s',id),list(id,cell_resamp)])
19 | (cellDT<-readRDS(paste0('data/',ctry,'/',ctry,'_orig_v_resamp_set_cell_match.rds')))
20 | (cellDT<-merge(xyDT,cellDT,by='cell_resamp'))
21 | set(cellDT,NULL,'cell_resamp',NULL)
22 | rm(xyDT);gc()
23 | 
24 | (rast<-raster(paste0('data/',ctry,'/GeoTIFFs/',ctry,'_sets_1as.tif')))
25 | dataType(rast)<-'FLT4S'
26 | rast[]<-NA;gc()
27 | 
28 | # read in light score data
29 | (nvaldt<-readRDS(paste0("data/",ctry,"/VIIRS/daily/",ctry,fstem,YYYY,".rds")))
30 | # merge with cell numbers
31 | (nvaldt<-merge(nvaldt[,c('id',lscorevars),with=F],cellDT,by='id'))
32 | # remove ID variable
33 | set(nvaldt,NULL,'id',NULL)
34 | # create index vector
35 | trowvec<-1:nrow(nvaldt)
36 | # split index vector
37 | splits<-split(trowvec,ceiling(seq_along(trowvec)/500000))
38 | rm(trowvec);gc()
39 | for(rnum in 1:length(lscorevars)){
40 |   # rnum<-2L
41 |   (lscorevar<-lscorevars[rnum])
42 |   message(paste("working on var",lscorevar))
43 |   (fin_file<-paste0(wd,'data/',ctry,'/GeoTIFFs/',ctry,fintifstems[rnum],YYYY,'.tif'))
44 |   trast<-copy(rast)
45 |   for(part in splits){
46 |     nvaldttmp<-nvaldt[part,c('cell_orig',lscorevar),with=F]
47 |     trast[nvaldttmp[['cell_orig']]]<-nvaldttmp[[lscorevar]]
48 |     rm(nvaldttmp);gc()
49 |   }
50 |   writeRaster(trast,fin_file,format="GTiff",overwrite=T,options=c("COMPRESS=LZW"),datatype='FLT4S')
51 |   rm(trast);gc()
52 |   ## resample to 5 as
53 |   # (file_5as<-sub(paste0('_',YYYY,'\\.tif$'),paste0('_5as_',YYYY,'.tif'),fin_file))
54 |   # (call1<-paste('gdalwarp -tr 0.00138888888 -0.00138888888 -r average -overwrite -co "COMPRESS=LZW" -multi',fin_file,file_5as))
55 |   # system(call1)
56 |   # rm(fin_file,file_5as,call1);gc()
57 | }
58 | 


--------------------------------------------------------------------------------
/10.3_multiyear_reg.R:
--------------------------------------------------------------------------------
 1 | sapply(c("data.table","bit64","parallel","foreach","doMC","lme4"),require,character.only=T)
 2 | 
 3 | (ctry<-Sys.getenv("ctry"))
 4 | (ncores<-as.integer(Sys.getenv("ncores")))
 5 | (wd<-Sys.getenv("wd"))
 6 | # ctry<-'Nepal'
 7 | # ncores<-12L
 8 | # wd<-'/nfs/brianmin/work/zokeeffe/current/'
 9 | setwd(wd)
10 | registerDoMC(ncores)
11 | 
12 | (LCt<-fread('data/LandCover/landcover_classification_modis_composites.csv'))
13 | 
14 | (rdsDir<-paste0("data/",ctry,"/VIIRS/daily/"))
15 | 
16 | (nsetfiles<-list.files(rdsDir,paste0(ctry,'_regdat_iso_nsets_20[0-1][0-9][0-1][0-9]\\.rds'),full.names=F))
17 | stopifnot(length(nsetfiles)==69L)
18 | 
19 | (nsetDT1<-rbindlist(foreach(f=nsetfiles,.inorder=F,.options.multicore=list(preschedule=F))%do%{
20 |   readRDS(paste0(rdsDir,f))
21 | },fill=T))
22 | set(nsetDT1,NULL,'r9l',log(nsetDT1[['r9']]+2.5))
23 | (r9med<-median(nsetDT1[['r9l']]))
24 | (r9sd<-sd(nsetDT1[['r9l']]))
25 | (nsetDT1<-nsetDT1[r9l<=(r9med+4*r9sd),!'r9l',with=F])
26 | 
27 | lcdates<-unique(nsetDT1[,list(lc_type,locdatechar)])
28 | setorder(lcdates,lc_type,locdatechar)
29 | ## removing outliers ##
30 | (nsetDT2<-rbindlist(foreach(r=1:nrow(lcdates),.inorder=F,.options.multicore=list(preschedule=F))%dopar%{
31 |   # r<-1L
32 |   (tdt<-nsetDT1[lc_type==lcdates[r][['lc_type']]&locdatechar==lcdates[r][['locdatechar']]])
33 |   if(nrow(tdt)>=5L){
34 |     r9m<-mean(tdt[['r9']])
35 |     r9s<-sd(tdt[['r9']])
36 |     tdt<-tdt[r9<=(r9m+4*r9s)] # remove if above 4 standard deviations from the mean
37 |   }
38 |   tdt
39 | },fill=T))
40 | rm(nsetDT1);gc()
41 | 
42 | (LCt2<-LCt[lc_type%in%lcdates[['lc_type']],list(lc_type,label)])
43 | set(LCt2,NULL,'lc_type_fac',factor(LCt2[['lc_type']],levels=LCt2[[1L]],labels=LCt2[[2L]]))
44 | LCt2<-LCt2[,list(lc_type,lc_type_fac)]
45 | saveRDS(LCt2,paste0("data/",ctry,"/VIIRS/daily/",ctry,"_RE_reg_multiyear_dropoutfull_LCdt.rds"))
46 | 
47 | dateDT<-unique(nsetDT2[,list(locdatechar)])
48 | set(dateDT,NULL,'monthfac',factor(as.integer(substr(dateDT[['locdatechar']],6L,7L)),levels=1:12,labels=month.abb))
49 | saveRDS(dateDT,paste0("data/",ctry,"/VIIRS/daily/",ctry,"_RE_reg_multiyear_monthfac_dateDT.rds"))
50 | ##
51 | (nsetDT2<-merge(nsetDT2,LCt2,by='lc_type'))
52 | (nsetDT2<-merge(nsetDT2,dateDT,by='locdatechar'))
53 | 
54 | (mod<-lmer(r9~lirescale+timehour+monthfac+lc_type_fac+lc_type_fac:lirescale+(1|locdatechar),data=nsetDT2,REML=F))
55 | saveRDS(mod,paste0("data/",ctry,"/VIIRS/daily/",ctry,"_RE_reg_multiyear_mod_final.rds"))
56 | 


--------------------------------------------------------------------------------
/10.2_prep_set_dat_for_r9_regs.R:
--------------------------------------------------------------------------------
 1 | # rsync -rltgoDuvhh --progress /victor/Work/Brian/current/R_code/FBHRSL/ zokeeffe@flux-xfer.arc-ts.umich.edu:/nfs/brianmin/work/zokeeffe/current/R_code/FBHRSL/
 2 | sapply(c("data.table","bit64"),require,character.only=T)
 3 | 
 4 | (ctry<-Sys.getenv("ctry"))
 5 | (wd<-Sys.getenv("wd"))
 6 | # ctry<-'Nepal'
 7 | # wd<-'/nfs/brianmin/work/zokeeffe/current/'
 8 | setwd(wd)
 9 | 
10 | rdsDir<-"/VIIRS/daily/CSVs/"
11 | rdsStem<-"_daily_VIIRS_values_good_resamp_"
12 | lithresh<-.001
13 | LCyr<-2012L
14 | 
15 | ### CONSTANT INFO ###
16 | # x-y information
17 | xyDT<-readRDS(paste0("data/",ctry,"/",ctry,"_resamp_country_cell_xy_id.rds"))
18 | if(ctry=='Rwanda'){
19 |   cellsinctry<-readRDS('data/Rwanda/Rwanda_resamp_set_cells_in_country.rds')
20 |   (xyDT<-xyDT[!(grepl('s',id)&!(cell_resamp%in%cellsinctry))])
21 | }
22 | (matchDT<-xyDT[grep('s',id),list(id)])
23 | 
24 | # land cover info
25 | (LCDT<-readRDS(paste0('data/',ctry,'/',ctry,'_resamp_country_cell_xy_landcover_values_',LCyr,'.rds')))
26 | (lcs2keep<-sort(intersect(unique(LCDT[grep('s',id)][['lc_type']]),unique(LCDT[grep('n',id)][['lc_type']]))))
27 | (lcs2keep<-lcs2keep[lcs2keep<=16])
28 | (matchDT<-merge(matchDT,LCDT[lc_type%in%lcs2keep],by='id'))
29 | setkey(matchDT,id)
30 | rm(xyDT,LCDT);gc()
31 | 
32 | (rdsfs<-list.files(paste0("data/",ctry,rdsDir),paste0(ctry,"_daily_VIIRS_values_good_resamp_201[2-7][0-1][0-9]_id\\.rds"),full.names=F))
33 | (tifmonths<-sub("(.*?)(201[0-9][0-1][0-9]?)_id\\.rds$","\\2",rdsfs))
34 | stopifnot(length(tifmonths)==69L)
35 | for(m in tifmonths){
36 |   # m<-tifmonths[1L]
37 |   dt<-data.table(id=readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_id.rds")))
38 |   set(dt,NULL,'r9',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_rade9.rds")))
39 |   set(dt,NULL,'li',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_li.rds")))
40 |   set(dt,NULL,'mtimeloc',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_mtimeloc.rds")))
41 |   timebad<-readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_timebadset.rds"))
42 |   if(length(timebad)!=0L){
43 |     dt<-dt[-timebad]
44 |   }
45 |   setkey(dt,id)
46 |   dt<-dt[matchDT]
47 |   dt<-dt[li<=lithresh]
48 |   set(dt,NULL,'lirescale',dt[['li']]/lithresh)
49 |   set(dt,NULL,'li',NULL)
50 |   set(dt,NULL,'locdatechar',strftime(dt[['mtimeloc']],'%Y-%m-%d'))
51 |   set(dt,NULL,'timehour',hour(dt[['mtimeloc']])+minute(dt[['mtimeloc']])/60+second(dt[['mtimeloc']])/3600)
52 |   set(dt,NULL,'mtimeloc',NULL)
53 |   saveRDS(dt,paste0("data/",ctry,"/VIIRS/daily/",ctry,'_regdat_sets_',m,".rds"))
54 |   rm(dt);gc()
55 |   message(paste('finished',m))
56 | }
57 | rm(matchDT);gc()
58 | 


--------------------------------------------------------------------------------
/1_make_settlement_rasters.R:
--------------------------------------------------------------------------------
 1 | # load the following required packages
 2 | sapply(c('raster','parallel'),require,character.only=T)
 3 | # get the following variables from the environment
 4 | (ctry<-Sys.getenv("ctry")) # country
 5 | (wd<-Sys.getenv("wd")) # working directory
 6 | (ncores<-as.integer(Sys.getenv("ncores"))) # number of cores
 7 | # set working directory
 8 | setwd(wd)
 9 | # set number of cores for parallelization
10 | options(mc.cores=ncores)
11 | 
12 | # specify and create new directory for rasters
13 | (outdir<-paste0('data/',ctry,'/GeoTIFFs/'))
14 | dir.create(outdir,F,T)
15 | 
16 | # find Facebook population settlement raster corresponding to the country
17 | (fbHRSL<-list.files(paste0('data/',ctry,'/FB'),'population_[a-z][a-z][a-z]_20[0-9][0-9]-[0-9][0-9]-[0-9][0-9]\\.tif$',full.names=T))
18 | # fail if not found
19 | stopifnot(length(fbHRSL)==1)
20 | 
21 | # load the raster
22 | (setrast<-raster(fbHRSL))
23 | 
24 | # get the number of cells from the raster
25 | (Ncells<-ncell(setrast))
26 | 
27 | # if the raster is too large
28 | if(Ncells>.Machine$integer.max){
29 |   # get the values by row using mclapply
30 |   rastvals<-mclapply(1:nrow(setrast),function(X) getValues(setrast,X,1),mc.preschedule=F)
31 |   rastvals<-unlist(rastvals)
32 | } else {
33 |   # otherwise just extract them directly
34 |   rastvals<-setrast[]
35 | }
36 | # find settlement cells (non-empty ones)
37 | setcells<-which(!is.na(rastvals))
38 | # get long-lat information from settlement cells
39 | setXYs<-xyFromCell(setrast,setcells)
40 | 
41 | # read in disaggregated shapefile
42 | (shp<-shapefile(paste0("data/",ctry,"/shapefiles/",ctry,"_noproj_disag_simp")))
43 | # create empty raster with same resolution based on the extent of the country
44 | (rast1<-raster(x=extent(shp),resolution=c(0.0002777778,0.0002777778),crs='+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0'))
45 | # set datatype
46 | dataType(rast1)<-'INT2S'
47 | # populate with NAs
48 | rast1[]<-NA
49 | # find cells corresponding to settlement coordinates
50 | setCells1<-cellFromXY(rast1,setXYs)
51 | # set these to 1
52 | rast1[setCells1]<-1L
53 | # write new 1 arcsecond raster
54 | writeRaster(rast1,paste0(outdir,ctry,'_sets_1as.tif'),overwrite=TRUE,options=c("COMPRESS=LZW"))
55 | 
56 | # create a 15 arcsecond raster in a similar fashion
57 | (rast15<-raster(x=extent(shp),resolution=c(0.0041666667,0.0041666667),crs='+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0'))
58 | dataType(rast15)<-'INT2S'
59 | rast15[]<-NA
60 | setCells15<-cellFromXY(rast15,setXYs)
61 | # get unique cells because multiple 1as coordinates should fit in cell
62 | setCells15<-unique(setCells15)
63 | # set cells to 1
64 | rast15[setCells15]<-1L
65 | # write new 15 arcsecond settlement raster
66 | writeRaster(rast15,paste0(outdir,ctry,'_sets_15as.tif'),overwrite=TRUE,options=c("COMPRESS=LZW"))
67 | 


--------------------------------------------------------------------------------
/9.1_make_set_lit_rasters_wb_erate.R:
--------------------------------------------------------------------------------
 1 | # load the following required packages
 2 | sapply(c("data.table","bit64","raster"),require,character.only=T)
 3 | # get the following variables from the environment
 4 | (ctry<-Sys.getenv("ctry")) # country
 5 | (wd<-Sys.getenv("wd")) # working directory
 6 | # set working directory
 7 | setwd(wd)
 8 | 
 9 | # specify years to work on
10 | YYYYs<-2012:2017
11 | # specify number of cells to split raster by
12 | splitrastby<-500000
13 | 
14 | # read in country cell, id, and xy info
15 | (xyDT<-readRDS(paste0("data/",ctry,"/",ctry,"_resamp_country_cell_xy_id.rds")))
16 | # read in the 15 to 1as cell match data
17 | (setmatchDT<-readRDS(paste0('data/',ctry,'/',ctry,'_orig_v_resamp_set_cell_match.rds')))
18 | # merge two data.tables together
19 | (matchDT<-merge(xyDT[,list(id,cell_resamp)],setmatchDT,by='cell_resamp'))
20 | # remove unnecessary
21 | rm(xyDT,setmatchDT);gc()
22 | 
23 | # read in 1as raster
24 | (rast<-raster(paste0('data/',ctry,'/GeoTIFFs/',ctry,'_sets_1as.tif'),band=1))
25 | # fill raster with missing
26 | rast[]<-NA;gc()
27 | # for each year
28 | for(YYYY in YYYYs){
29 |   # announce the year being worked on
30 |   message(paste("working on year",YYYY))
31 |   # create name for raster to be created
32 |   (fin_file<-paste0(wd,'data/',ctry,'/GeoTIFFs/',ctry,'_wbnaterate_sets_lit_',YYYY,'.tif'))
33 |   # specify name of World Bank electrification data to read in
34 |   (in_file<-paste0('data/',ctry,'/VIIRS/thresh_lit/',ctry,'_set_id_lit_wb_erate_pop_',YYYY,'.rds'))
35 |   # read in WB erate data
36 |   (r9lmdtr<-readRDS(in_file))
37 |   # merge with cell information
38 |   (r9lmdtr<-merge(r9lmdtr,matchDT,by='id'))
39 |   # get settlement cells that are not considered lit
40 |   cnl<-r9lmdtr[lit==0L][['cell_orig']]
41 |   # get settlement cells that are considered lit
42 |   cl<-r9lmdtr[lit==1L][['cell_orig']]
43 |   rm(r9lmdtr);gc()
44 |   # copy empty raster
45 |   trast<-copy(rast);gc()
46 |   # if not all settlement cells are unlit,
47 |   if(length(cnl)!=0L){
48 |     # split the vector of unlit cell values into groups
49 |     cnl<-split(cnl,ceiling(seq_along(cnl)/splitrastby))
50 |     # for each group, set those cells to 0 in the raster
51 |     for(part in cnl){trast[part]<-0;gc()}
52 |     rm(cnl);gc()
53 |   }
54 |   # if not all settlement cells are lit,
55 |   if(length(cl)!=0L){
56 |     # split the vector of lit cell values into groups
57 |     cl<-split(cl,ceiling(seq_along(cl)/splitrastby))
58 |     # for each group, set those cells to 1 in the raster
59 |     for(part in cl){trast[part]<-1;gc()}
60 |     rm(cl);gc()
61 |   }
62 |   # save the raster as a GeoTIFF
63 |   writeRaster(trast,fin_file,format="GTiff",overwrite=T,options=c("COMPRESS=LZW"),datatype='INT2S')
64 |   # remove unnecessary
65 |   rm(trast,fin_file,file_5as,call1);gc()
66 |   # announce finish
67 |   message(paste("finished",YYYY))
68 | }
69 | rm(rast);gc()
70 | 


--------------------------------------------------------------------------------
/7_gen_r9_musd.R:
--------------------------------------------------------------------------------
 1 | # load following required packages
 2 | sapply(c("data.table","bit64"),require,character.only=T)
 3 | 
 4 | # get following variables from environment
 5 | (YYYY<-as.integer(Sys.getenv('YYYY'))) # year
 6 | (ctry<-Sys.getenv('ctry')) # country
 7 | (wd<-Sys.getenv("wd")) # working directory
 8 | # set working directory
 9 | setwd(wd)
10 | 
11 | # name of final file to create
12 | (fin_file<-paste0("data/",ctry,"/VIIRS/daily/",ctry,"_good_r9_musd_",YYYY,".rds"))
13 | # get id files corresponding to that year
14 | (rdsfs<-list.files(paste0("data/",ctry,"/VIIRS/daily/CSVs/"),paste0(ctry,"_daily_VIIRS_values_good_resamp_",YYYY,"[0-1][0-9]_id\\.rds"),full.names=F))
15 | # get months of data
16 | (tifmonths<-sub("(.*?)(201[0-9][0-1][0-9]?)_id\\.rds$","\\2",rdsfs))
17 | # say the months
18 | message(paste('TIFF months are:',paste(tifmonths,collapse=", ")))
19 | # if the year is 2012
20 | if(YYYY==2012L){
21 |   # stop if not 9 files
22 |   stopifnot(length(tifmonths)==9L)
23 | } else {
24 |   # stop if not 12 files
25 |   stopifnot(length(tifmonths)==12L)
26 | }
27 | # number of tiffs
28 | ntms<-length(tifmonths)
29 | # empty list for results
30 | fulldt<-vector('list',ntms)
31 | # for each index of tiffs
32 | for(i in 1:ntms){
33 |   # get the month
34 |   m<-tifmonths[i]
35 |   # read in id as column in data.table
36 |   dt<-data.table(id=readRDS(paste0("data/",ctry,"/VIIRS/daily/CSVs/",ctry,"_daily_VIIRS_values_good_resamp_",m,"_id.rds")))
37 |   # read in rade9 as column
38 |   set(dt,NULL,'r9',readRDS(paste0("data/",ctry,"/VIIRS/daily/CSVs/",ctry,"_daily_VIIRS_values_good_resamp_",m,"_rade9.rds")))
39 |   # read in li as column
40 |   set(dt,NULL,'li',readRDS(paste0("data/",ctry,"/VIIRS/daily/CSVs/",ctry,"_daily_VIIRS_values_good_resamp_",m,"_li.rds")))
41 |   # read in index of values to drop based on time
42 |   timebad<-readRDS(paste0("data/",ctry,"/VIIRS/daily/CSVs/",ctry,"_daily_VIIRS_values_good_resamp_",m,"_timebadset.rds"))
43 |   timebadnset<-readRDS(paste0("data/",ctry,"/VIIRS/daily/CSVs/",ctry,"_daily_VIIRS_values_good_resamp_",m,"_timebadnset.rds"))
44 |   # combine the settlement and nonsettlement indices
45 |   timebad<-c(timebad,timebadnset)
46 |   # if there are any values that should be dropped
47 |   if(length(timebad)!=0L){
48 |     # drop them
49 |     dt<-dt[-timebad]
50 |   }
51 |   # remove unnecessary
52 |   rm(timebadnset,timebad);gc()
53 |   # keep data if li is less than .0005
54 |   (dt<-dt[li<.0005,!'li',with=F])
55 |   # assign data to list slot
56 |   fulldt[[i]]<-dt
57 |   # remove
58 |   rm(dt,m);gc()
59 | }
60 | # bind results together in data.table
61 | (fulldt<-rbindlist(fulldt,fill=T))
62 | rm(rdsfs,tifmonths);gc()
63 | # create logged version of rade9 by adding 2.5 and logging
64 | set(fulldt,NULL,'r9l',log(fulldt[['r9']]+2.5))
65 | # this is to ensure no failure in the following
66 | options(datatable.optimize=1)
67 | # create new data.table that contains mean and standard deviation of rade9 and logged rade9 by cell
68 | (fulldt<-fulldt[,list(r9m=mean(r9),r9s=sd(r9),r9lm=mean(r9l),r9ls=sd(r9l)),by='id'])
69 | # save result
70 | saveRDS(fulldt,fin_file)


--------------------------------------------------------------------------------
/10.6_comp_reg_w_wb_erate.R:
--------------------------------------------------------------------------------
 1 | # rsync -rltgoDuvhh --progress /victor/Work/Brian/current/R_code/FBHRSL/ zokeeffe@flux-xfer.arc-ts.umich.edu:/nfs/brianmin/work/zokeeffe/current/R_code/FBHRSL/
 2 | 
 3 | # export wd=/nfs/brianmin/work/zokeeffe/current/; export ctry=Cambodia; export ncores=6; R
 4 | 
 5 | sapply(c("data.table","bit64","parallel","foreach","doMC"),require,character.only=T)
 6 | 
 7 | (ncores<-as.integer(Sys.getenv("ncores")))
 8 | (ctry<-Sys.getenv("ctry"))
 9 | (wd<-Sys.getenv("wd"))
10 | # ncores<-12L
11 | # ctry<-'Nepal'
12 | # country<-'Nepal'
13 | # wd<-'/victor/Work/Brian/current/'
14 | setwd(wd)
15 | registerDoMC(ncores)
16 | 
17 | YYYYs<-2012:2017
18 | 
19 | # WB electrification data
20 | #eratedat<-fread('data/WorldBank/pct_pop_w_electricity_1990-2016.csv',header=T)
21 | #(FBctries<-fread('data/FB/FB_countries.csv'))
22 | #(country<-FBctries[country_short==ctry][['country_long']])
23 | eratedat<-fread('data/WorldBank/pct_pop_w_electricity_1990-2017.csv',header=T)
24 | (eratedat<-eratedat[ctry_short==ctry])
25 | stopifnot(nrow(eratedat)==1)
26 | 
27 | conflevs<-c(.85,.9,.95)
28 | (confthreshes<-qnorm(conflevs,mean=0,sd=1))
29 | (plclnames<-paste0('prplit_conf',conflevs*100))
30 | prplitthreshes<-c(.25,.5,.75)
31 | (plclgpnames<-unlist(lapply(plclnames,function(x) paste0(x,'_gr',prplitthreshes*100,'pct'))))
32 | (lsnames<-paste0('zscore_conf',conflevs*100))
33 | 
34 | xyDT<-readRDS(paste0('data/',ctry,'/',ctry,'_resamp_country_cell_xy_id.rds'))
35 | xyDT<-xyDT[grep('s',id),list(id,cell_resamp)]
36 | (setmatchDT<-readRDS(paste0('data/',ctry,'/',ctry,'_orig_v_resamp_set_cell_match.rds')))
37 | (popDT<-readRDS(paste0('data/',ctry,'/',ctry,'_cell_orig_pop.rds')))
38 | # (popDT2<-merge(setmatchDT,popDT,by='cell_orig'))
39 | # could actually just cbind because they are already sorted but w/e, for robustness:
40 | setkey(setmatchDT,cell_orig)
41 | setkey(popDT,cell_orig)
42 | (popDT2<-popDT[setmatchDT])
43 | (popDT2<-popDT2[,list(totpop=ifelse(all(is.na(pop)),NA_real_,sum(pop,na.rm=T))),by='cell_resamp'])
44 | (matchDT<-merge(xyDT,na.omit(popDT2),by='cell_resamp'))
45 | rm(xyDT,popDT2,popDT);gc()
46 | 
47 | (erate_comp<-rbindlist(foreach(YYYY=YYYYs,.inorder=T)%dopar%{
48 |   # YYYY<-2012L
49 |   #if(YYYY==2017L){(natl_elec_rate<-eratedat[`Country Name`==country][['2016']])} else {(natl_elec_rate<-eratedat[`Country Name`==country][[as.character(YYYY)]])}
50 |   (natl_elec_rate<-eratedat[[as.character(YYYY)]])
51 |   (setrmuDT<-readRDS(paste0("data/",ctry,"/VIIRS/daily/",ctry,"_RE_reg_my_sets_predvals_",YYYY,".rds")))
52 |   (setrmuDT<-merge(setrmuDT,matchDT,by='id'))
53 |   ttotpop<-sum(setrmuDT[['totpop']])
54 |   (lsvals<-round(100*foreach(a=confthreshes,.inorder=T,.combine=c)%do%{sum(setrmuDT[zscore>a][['totpop']])/ttotpop},2))
55 |   (plclvals<-foreach(var=plclnames,.inorder=T,.combine=c)%do%{
56 |     round(100*foreach(a=prplitthreshes,.inorder=T,.combine=c)%do%{sum(setrmuDT[get(var)>a][['totpop']])/ttotpop},2)
57 |   })
58 |   (dt<-data.table(country=ctry,year=YYYY,erate_wb=round(natl_elec_rate,2)))
59 |   for(x in 1:length(lsvals)){set(dt,NULL,lsnames[x],lsvals[x])}
60 |   for(x in 1:length(plclvals)){set(dt,NULL,plclgpnames[x],plclvals[x])}
61 |   dt
62 | },fill=T))
63 | 
64 | fwrite(erate_comp,paste0("data/",ctry,"/",ctry,"_RE_reg_my_res_v_WB_erate.csv"))
65 | 


--------------------------------------------------------------------------------
/10.0_find_nearby_set_cells.R:
--------------------------------------------------------------------------------
 1 | # rsync -rltgoDuvhh --progress /victor/Work/Brian/current/R_code/FBHRSL/ zokeeffe@flux-xfer.arc-ts.umich.edu:/nfs/brianmin/work/zokeeffe/current/R_code/FBHRSL/
 2 | 
 3 | sapply(c("data.table","bit64","raster"),require,character.only=T)
 4 | 
 5 | (ctry<-Sys.getenv("ctry"))
 6 | (wd<-Sys.getenv("wd"))
 7 | # ctry<-'Nepal'
 8 | # wd<-'/victor/Work/Brian/current/'
 9 | setwd(wd)
10 | 
11 | (rast<-raster(paste0('data/',ctry,'/GeoTIFFs/',ctry,'_sets_15as.tif'),band=1))
12 | 
13 | (xyDT<-readRDS(paste0('data/',ctry,'/',ctry,'_resamp_country_cell_xy_id.rds')))
14 | 
15 | ## settlement stuff to match on later
16 | (setmatchDT<-readRDS(paste0('data/',ctry,'/',ctry,'_orig_v_resamp_set_cell_match.rds')))
17 | (popDT<-readRDS(paste0('data/',ctry,'/',ctry,'_cell_orig_pop.rds')))
18 | (prpset<-readRDS(paste0('data/',ctry,'/',ctry,'_resamp_country_cell_id_prp_sets.rds')))
19 | (prpset<-prpset[,list(set_id_match=id,N_1as_sets=as.integer(round(prp_sets*225)))])
20 | (finsetdatmatch<-merge(xyDT[grepl('s',id),list(set_id_match=id,cell_resamp)],setmatchDT,by='cell_resamp'))
21 | (finsetdatmatch<-merge(finsetdatmatch,popDT,by='cell_orig',all.x=T))
22 | (finsetdatmatch<-merge(finsetdatmatch,prpset,by='set_id_match',all.x=T))
23 | set(finsetdatmatch,NULL,'cell_orig',NULL)
24 | set(finsetdatmatch,NULL,'cell_resamp',NULL)
25 | (finsetdatmatch<-finsetdatmatch[,list(pop=sum(pop,na.rm=T),N_1as_sets=N_1as_sets[1L]),by='set_id_match'])
26 | rm(setmatchDT,popDT,prpset);gc()
27 | ##
28 | 
29 | cellDTf<-xyDT[,list(id,cell_resamp)]
30 | set(cellDTf,NULL,'row',rowFromCell(rast,cellDTf[['cell_resamp']]))
31 | set(cellDTf,NULL,'col',colFromCell(rast,cellDTf[['cell_resamp']]))
32 | (rastNrow<-nrow(rast))
33 | (rastNcol<-ncol(rast))
34 | (rastNcell<-ncell(rast))
35 | rm(rast,xyDT);gc()
36 | set(cellDTf,NULL,'set',as.integer(grepl('s',cellDTf[['id']])))
37 | 
38 | set(cellDTf,NULL,'edge',FALSE)
39 | set(cellDTf,cellDTf[,.I[row==1L|row==rastNrow|col==1L|col==rastNcol]],'edge',TRUE)
40 | cellDTf
41 | 
42 | (cellmatchfix<-cellDTf[,list(id,edge)])
43 | 
44 | (cellDT1<-cellDTf[set==1L,list(set_id_match=id,col=col-1,row=row-1)])
45 | (cellDT2<-cellDTf[set==1L,list(set_id_match=id,col=col,row=row-1)])
46 | (cellDT3<-cellDTf[set==1L,list(set_id_match=id,col=col+1,row=row-1)])
47 | (cellDT4<-cellDTf[set==1L,list(set_id_match=id,col=col-1,row=row)])
48 | (cellDT5<-cellDTf[set==1L,list(set_id_match=id,col=col+1,row=row)])
49 | (cellDT6<-cellDTf[set==1L,list(set_id_match=id,col=col-1,row=row+1)])
50 | (cellDT7<-cellDTf[set==1L,list(set_id_match=id,col=col,row=row+1)])
51 | (cellDT8<-cellDTf[set==1L,list(set_id_match=id,col=col+1,row=row+1)])
52 | (cellDTMatch<-rbindlist(list(cellDT1,cellDT2,cellDT3,cellDT4,cellDT5,cellDT6,cellDT7,cellDT8),fill=T))
53 | rm(cellDT1,cellDT2,cellDT3,cellDT4,cellDT5,cellDT6,cellDT7,cellDT8);gc()
54 | 
55 | (fincellmatch<-merge(cellDTf[,list(id,row,col)],cellDTMatch,by=c('row','col')))
56 | set(fincellmatch,NULL,'row',NULL)
57 | set(fincellmatch,NULL,'col',NULL)
58 | rm(cellDTf);gc()
59 | 
60 | (finDT<-merge(fincellmatch[,list(id,set_id_match)],finsetdatmatch,by='set_id_match'))
61 | (finDT<-finDT[,list(N_15as_set_dist1=.N,N_1as_set_dist1=sum(N_1as_sets),pop_dist1=sum(pop,na.rm=T)),by='id'])
62 | (finDT<-merge(cellmatchfix,finDT,by='id',all.x=T))
63 | set(finDT,finDT[,.I[is.na(N_15as_set_dist1)]],'N_15as_set_dist1',0L)
64 | set(finDT,finDT[,.I[is.na(N_1as_set_dist1)]],'N_1as_set_dist1',0L)
65 | set(finDT,finDT[,.I[is.na(pop_dist1)]],'pop_dist1',0L)
66 | finDT
67 | 
68 | saveRDS(finDT,paste0('data/',ctry,'/',ctry,'_id_set_stats_15as_cell_dist1.rds'))
69 | 
70 | rm(finDT,fincellmatch,finsetdatmatch,cellmatchfix);gc()
71 | 


--------------------------------------------------------------------------------
/6_find_good_times.R:
--------------------------------------------------------------------------------
 1 | # load the following required packages
 2 | sapply(c("data.table","bit64"),require,character.only=T)
 3 | 
 4 | # get the following variables from the environment
 5 | (YYYY<-as.integer(Sys.getenv('YYYY'))) # year
 6 | (ctry<-Sys.getenv("ctry")) # country
 7 | (wd<-Sys.getenv("wd")) # working directory
 8 | # set working directory
 9 | setwd(wd)
10 | 
11 | # open file with VIIRS TIFF time matches
12 | tgzdt<-readRDS('data/VIIRS/VIIRS_tiff_time_lookup.rds')
13 | # keep the month, day, start time, and mean time values
14 | (tgzdt<-tgzdt[,list(YYYYMM,day,stime,meantime)])
15 | 
16 | # specify stem where extracted data are stored
17 | csvDir<-"/VIIRS/daily/CSVs/"
18 | # specify stem of extracted data
19 | fstem<-"_daily_VIIRS_values_good_resamp_"
20 | 
21 | # read the country's id-second offset file
22 | (offsetDT<-readRDS(paste0("data/",ctry,"/",ctry,"_id_secOffset.rds")))
23 | # if year was specified
24 | if(!is.na(YYYY)){
25 |   # list the id files corresponding to that year
26 |   rdsfs<-list.files(paste0("data/",ctry,"/VIIRS/daily/CSVs/"),paste0(ctry,"_daily_VIIRS_values_good_resamp_",YYYY,"[0-1][0-9]_id\\.rds"),full.names=F)
27 | # otherwise
28 | } else {
29 |   # list the id files corresponding to all years
30 |   rdsfs<-list.files(paste0("data/",ctry,"/VIIRS/daily/CSVs/"),paste0(ctry,"_daily_VIIRS_values_good_resamp_201[0-9][0-1][0-1][0-9]_id\\.rds"),full.names=F)
31 | }
32 | # get list of months from id files
33 | (YYYYMMs<-sub("(.*?)(201[0-9][0-1][0-9]?)_id\\.rds$","\\2",rdsfs))
34 | # display months
35 | message(paste('YYYYMMs are:',paste(YYYYMMs,collapse=", ")))
36 | # for each month
37 | for(tYYYYMM in YYYYMMs){
38 |   # message which month we're working on
39 |   message(paste("working on ",tYYYYMM))
40 |   # store the id data from that month as a column in a data.table
41 |   dt<-data.table(id=readRDS(paste0("data/",ctry,csvDir,ctry,fstem,tYYYYMM,"_id.rds")))
42 |   # get number of rows of data
43 |   totN<-nrow(dt)
44 |   # create index vector
45 |   set(dt,NULL,'index',1:totN)
46 |   # assign month as column
47 |   set(dt,NULL,'YYYYMM',tYYYYMM)
48 |   # read day in as column
49 |   set(dt,NULL,'day',readRDS(paste0("data/",ctry,csvDir,ctry,fstem,tYYYYMM,"_day.rds")))
50 |   # read start time in as column
51 |   set(dt,NULL,'stime',readRDS(paste0("data/",ctry,csvDir,ctry,fstem,tYYYYMM,"_stime.rds")))
52 |   # merge the data.table with the data containing other time information
53 |   dt<-merge(dt,tgzdt,by=c('YYYYMM','day','stime'))
54 |   # merge data.table with second offset information
55 |   dt<-merge(dt,offsetDT,by='id')
56 |   # stop if any rows were dropped
57 |   stopifnot(nrow(dt)==totN)
58 |   # create the mean time in "local" time by adding the second offset to the value
59 |   set(dt,NULL,'mtimeloc',dt[['meantime']]+dt[['secOffset']])
60 |   # reorder by original index
61 |   setorder(dt,index)
62 |   # save the result
63 |   saveRDS(dt[['mtimeloc']],paste0("data/",ctry,csvDir,ctry,fstem,tYYYYMM,"_mtimeloc.rds"))
64 |   # get the date of the "local" time
65 |   set(dt,NULL,'mdateloc',as.Date(dt[['mtimeloc']]))
66 |   # convert time to datetime
67 |   set(dt,NULL,'mtimelocconst',strftime(dt[['mtimeloc']],format="%H:%M:%S",tz='UTC',usetz=F))
68 |   # reorder by id, then local date, then local time
69 |   setorder(dt,id,mdateloc,mtimelocconst)
70 |   # assign index by each date
71 |   dt[,mdatelocpos:=1:.N,by=c('id','mdateloc')]
72 |   # keep settlement data indices corresponding to the earliest observation that day
73 |   timebadset<-dt[grepl('s',id)&mdatelocpos!=1L][['index']]
74 |   # save result
75 |   saveRDS(timebadset,paste0("data/",ctry,csvDir,ctry,fstem,tYYYYMM,"_timebadset.rds"))
76 |   # do the same but for non-settlement cells
77 |   timebadnset<-dt[grepl('n',id)&mdatelocpos!=1L][['index']]
78 |   saveRDS(timebadnset,paste0("data/",ctry,csvDir,ctry,fstem,tYYYYMM,"_timebadnset.rds"))
79 |   # remove unnecessary and announce we are done for the month
80 |   rm(dt,timebadset,timebadnset,totN);gc()
81 |   message(paste("finished",tYYYYMM))
82 | }
83 | 


--------------------------------------------------------------------------------
/9.0_gen_set_elec_based_on_year_wb_val.R:
--------------------------------------------------------------------------------
 1 | # load the following required packages
 2 | sapply(c("data.table","bit64"),require,character.only=T)
 3 | 
 4 | # get the following variables from environment
 5 | (ctry<-Sys.getenv("ctry")) # country
 6 | (wd<-Sys.getenv("wd")) # working directory
 7 | # set working directory
 8 | setwd(wd)
 9 | 
10 | # read in electrification rate data from the World Bank
11 | (eratedat<-fread('data/WorldBank/pct_pop_w_electricity_1990-2017.csv',header=T))
12 | # read in list of countries
13 | (FBctries<-fread('data/FB/FB_countries.csv'))
14 | # get "longform" name of country
15 | (country<-FBctries[country_short==ctry][['country_long']])
16 | 
17 | # read in 15as cell information
18 | (xyDT<-readRDS(paste0("data/",ctry,"/",ctry,"_resamp_country_cell_xy_id.rds")))
19 | # create data.table of settlement value ids and cell positions
20 | (matchdt<-xyDT[grep('s',id),list(id,cell_resamp)])
21 | 
22 | # read in 15 to 1as cell match data.table
23 | (setmatchDT<-readRDS(paste0('data/',ctry,'/',ctry,'_orig_v_resamp_set_cell_match.rds')))
24 | # read in 1as population data
25 | (popDT<-readRDS(paste0('data/',ctry,'/',ctry,'_cell_orig_pop.rds')))
26 | # merge the population data with the 15as cells
27 | (popDT2<-merge(setmatchDT,popDT,by='cell_orig'))
28 | # for each 15as cell, sum the population; if none, return missing
29 | (popDT2<-popDT2[,list(pop=ifelse(all(is.na(pop)),NA_real_,sum(pop,na.rm=T))),by='cell_resamp'])
30 | # merge population data with ID information
31 | (matchdt<-merge(matchdt,popDT2,all.x=T,by='cell_resamp'))
32 | 
33 | # remove unnecessary
34 | rm(xyDT,setmatchDT,popDT,popDT2);gc()
35 | 
36 | # specify directory for saving data
37 | (outdir<-paste0('data/',ctry,'/VIIRS/thresh_lit/'))
38 | # create directory
39 | dir.create(outdir,showWarnings=F,recursive=T)
40 | 
41 | # for each year
42 | for(YYYY in 2012:2017){
43 |   # state which year we're working on
44 |   message(paste("working on",YYYY))
45 |   # get the national electrification rate for the country from the World Bank data
46 |   (natl_elec_rate<-eratedat[`Country Name`==country][[as.character(YYYY)]]/100)
47 |   # read in the mean rade9 data
48 |   (r9lmdtrf<-readRDS(paste0("data/",ctry,"/VIIRS/daily/",ctry,"_good_r9_musd_",YYYY,".rds")))
49 |   # keep only settlement data and the means
50 |   (r9lmdtrf<-r9lmdtrf[grep('s',id),list(id,r9m,r9lm)])
51 |   # if the national electrification rate is less than 1
52 |   if(natl_elec_rate<1){
53 |     # merge population data with rade9 mean data
54 |     (r9lmdtr<-merge(na.omit(matchdt[,list(id,pop)]),r9lmdtrf,by='id'));gc()
55 |     # order by mean logged rade9
56 |     setorder(r9lmdtr,r9lm)
57 |     # generate percentage of population for each cell
58 |     set(r9lmdtr,NULL,'prppop',r9lmdtr[['pop']]/sum(r9lmdtr[['pop']]))
59 |     # create a running sum vector
60 |     set(r9lmdtr,NULL,'prppopcs',cumsum(r9lmdtr[['prppop']]))
61 |     # create index vector of whether the percentage of the population lit exceeds 1 - the national electrification rate
62 |     set(r9lmdtr,NULL,'prppopcsgrerate',r9lmdtr[['prppopcs']]>(1-natl_elec_rate))
63 |     # create a cumulative sum of this vector
64 |     set(r9lmdtr,NULL,'prppopcsgreratecs',cumsum(r9lmdtr[['prppopcsgrerate']]))
65 |     # get the minimum mean logged rade9 that corresponds to being electrified
66 |     (lit_thresh_r9lm<-r9lmdtr[prppopcsgreratecs==1L][['r9lm']])
67 |     # create "lit" vector of 0s
68 |     set(r9lmdtrf,NULL,'lit',0L)
69 |     # set "lit" to 1 if it exceeds the minimum mean loged rade9 to be considered lit
70 |     set(r9lmdtrf,r9lmdtrf[,.I[r9lm>lit_thresh_r9lm]],which(names(r9lmdtrf)=='lit'),1L)
71 |     # save data of id and lit status
72 |     saveRDS(r9lmdtrf[,list(id,lit)],paste0(outdir,ctry,'_set_id_lit_wb_erate_pop_',YYYY,'.rds'))
73 |     # create a data.table of various statistics
74 |     (erdt_natl<-data.table(natl_e_rate=natl_elec_rate,prp_15as_set_lit=sum(r9lmdtrf[['lit']])/nrow(r9lmdtrf),r9m_lit_thresh=r9lmdtr[prppopcsgreratecs==1L][['r9m']],r9lm_lit_thresh=lit_thresh_r9lm))
75 |     rm(r9lmdtr)
76 |   # otherwise
77 |   } else {
78 |     # define all settlement cells as lit
79 |     set(r9lmdtrf,NULL,'lit',1L)
80 |     # save this information
81 |     saveRDS(r9lmdtrf[,list(id,lit)],paste0(outdir,ctry,'_set_id_lit_wb_erate_pop_',YYYY,'.rds'))
82 |     # create the statistics data.table
83 |     (erdt_natl<-data.table(natl_e_rate=1,prp_15as_set_lit=1,r9m_lit_thresh=min(r9lmdtrf[['r9m']]),r9lm_lit_thresh=min(r9lmdtrf[['r9lm']])))
84 |   }
85 |   # save the statistics data.table
86 |   saveRDS(erdt_natl,paste0(outdir,ctry,'_natl_e_rate_',YYYY,'.rds'))
87 |   # remove unnecessary and announce that year is done
88 |   rm(erdt_natl,r9lmdtrf);gc()
89 |   message(paste('finished',YYYY))
90 | }
91 | 


--------------------------------------------------------------------------------
/3_find_overlapping_VIIRS_TIFFs.R:
--------------------------------------------------------------------------------
  1 | # load the following required packages
  2 | sapply(c("data.table","bit64","parallel","foreach","doMC","raster","rgeos"),require,character.only=T)
  3 | # set working directory
  4 | setwd("/nfs/brianmin/work/zokeeffe/current/")
  5 | # specify path where VIIRS GeoTIFF metadata are stored
  6 | viirs_dat_path<-"/nfs/brianmin/VIIRS/"
  7 | # get number of cores for parallel operations from environment
  8 | (ncores<-as.integer(Sys.getenv("ncores")))
  9 | # get country from environment
 10 | (ctry<-Sys.getenv("ctry"))
 11 | # specify a single thread for DT operations, then set cores for parallel operations
 12 | setDTthreads(1L);registerDoMC(ncores)
 13 | 
 14 | # specify directory with GDAL information extracted from vflag files
 15 | (gdaldir<-paste0(viirs_dat_path,"gdalinfo/vflag/"))
 16 | # get list of directories with metadata
 17 | (YYYYMMs<-list.dirs(gdaldir,recursive=F,full.names=F))
 18 | 
 19 | # specify six monthly file tiles
 20 | MVTypes<-c("00N060E","75N060E","00N060W","75N060W","00N180W","75N180W")
 21 | MVexl<-list()
 22 | # for each type, open an example TIFF and get the extent
 23 | for(mvt in MVTypes){
 24 |   tmp<-raster(paste0(viirs_dat_path,'monthly/201204/vcmcfg/SVDNB_npp_20120401-20120430_',mvt,'_vcmcfg_v10_c201605121456.cf_cvg.tif'))
 25 |   MVexl[[mvt]]<-extent(tmp)
 26 |   rm(tmp);gc()
 27 | }
 28 | 
 29 | # specify country shapefile path
 30 | (country_shp<-paste0("data/",ctry,"/shapefiles/",ctry,"_noproj_disag_simp.shp"))
 31 | # open the shapefile
 32 | (CountryShp<-shapefile(country_shp))
 33 | # get the extent
 34 | (CountryExtent<-extent(CountryShp))
 35 | # specify new directory for where to store data
 36 | (tiff_list_outdir<-paste0("data/",ctry,"/VIIRS/daily/"))
 37 | # create directory
 38 | dir.create(tiff_list_outdir,recursive=T,showWarnings=F)
 39 | 
 40 | # create an empty list the length of the number of months of data
 41 | FinDT<-vector('list',length(YYYYMMs))
 42 | # for each month of data,
 43 | for(i in 1:length(YYYYMMs)){
 44 |   YYYYMM<-YYYYMMs[i]
 45 |   message(paste('working on',YYYYMM))
 46 |   # get the files of metadata
 47 |   FileList<-list.files(path=paste0(gdaldir,YYYYMM),pattern="*.info$")
 48 |   # try to read textfiles that list rasters with "bad data" so these can be ignored; if they don't exist, ignore
 49 |   try({
 50 |     (BadFileList<-fread(paste0(viirs_dat_path,"bad_rasters/",YYYYMM,"_bad_rasters.txt"),sep="/",header=F))
 51 |     if(ncol(BadFileList)==3L){
 52 |       badfiles<-paste0(BadFileList[[2L]],".info")
 53 |       FileList<-setdiff(FileList,badfiles)
 54 |     }
 55 |   })
 56 |   # start the overlap checking process for each file
 57 |   OverlapCheck<-foreach(f=FileList,.inorder=T,.options.multicore=list(preschedule=F))%dopar%{
 58 |     tiff_name<-sub(".info","",f,fixed=T)
 59 |     # read in metadata from file
 60 |     info<-readLines(paste0(viirs_dat_path,"gdalinfo/vflag/",YYYYMM,"/",f))
 61 |     # get upper left coordinates and remove surrounding text
 62 |     (ULCoords<-grep("^Upper Left  \\(",info,value=T))
 63 |     (ULCoords<-sub("(^Upper Left  )(\\(.*?\\))(.*)$","\\2",ULCoords))
 64 |     (ULCoords<-trimws(gsub("[[:space:]]+","",gsub("\\(|\\)","",ULCoords))))
 65 |     # convert to numeric
 66 |     (ULXY<-as.numeric(unlist(strsplit(ULCoords,","))))
 67 |     # get lower right coordinates and remove surrounding text
 68 |     (LRCoords<-grep("^Lower Right \\(",info,value=T))
 69 |     (LRCoords<-sub("(^Lower Right )(\\(.*?\\))(.*)$","\\2",LRCoords))
 70 |     (LRCoords<-trimws(gsub("[[:space:]]+","",gsub("\\(|\\)","",LRCoords))))
 71 |     # convert to numeric
 72 |     (LRXY<-as.numeric(unlist(strsplit(LRCoords,","))))
 73 |     # create an extent using the extracted coordinates
 74 |     tiff_extent<-extent(ULXY[1L],LRXY[1L],LRXY[2L],ULXY[2L])
 75 |     # determine whether TIFF overlaps with country extent
 76 |     ext_overlap<-as.integer(!is.null(intersect(CountryExtent,tiff_extent)))
 77 |     # create data.table that states whether the TIFF in question overlaps with the country extent
 78 |     data.table(ext_overlap=ext_overlap,tiff_name=tiff_name)
 79 |   }
 80 |   # row bind all the results together
 81 |   OverlapCheck<-rbindlist(OverlapCheck,fill=T)
 82 |   # only keep overlapping TIFFs
 83 |   (OverlapCheck<-OverlapCheck[ext_overlap==1L,list(tiff_name)])
 84 |   set(OverlapCheck,NULL,1L,paste(YYYYMM,OverlapCheck[[1L]],sep="/"))
 85 |   # assign results to empty list slot
 86 |   FinDT[[i]]<-OverlapCheck
 87 | }
 88 | # row bind the results
 89 | (FinDT<-rbindlist(FinDT,fill=T))
 90 | 
 91 | # specify the file name to save under
 92 | fname<-paste0(tiff_list_outdir,ctry,"_daily_VIIRS_overlapping_imgs.txt")
 93 | # write the file out
 94 | fwrite(FinDT,fname,col.names=F,quote=F,sep=",",na="",nThread=ncores)
 95 | rm(FinDT);gc()
 96 | 
 97 | # now see which of the six month tiles overlaps with the country extent
 98 | MVOv<-rbindlist(foreach(mvt=MVTypes,.inorder=T)%dopar%{
 99 |   ext_overlap<-as.integer(!is.null(intersect(CountryExtent,MVexl[[mvt]])))
100 |   data.table(ext_overlap=ext_overlap,mvt=mvt)
101 | },fill=T)
102 | (MVOv<-MVOv[ext_overlap==1L][["mvt"]])
103 | # save the results as a text file
104 | writeLines(MVOv,paste0("data/",ctry,"/VIIRS/",ctry,"_monthly_VIIRS_overlapping_tiles.txt"),sep="\n")


--------------------------------------------------------------------------------
/2.0_gen_country_VIIRS_XYs.R:
--------------------------------------------------------------------------------
  1 | # load the following required packages
  2 | sapply(c("data.table","bit64","parallel","foreach","doMC","raster",'sf',"rgdal","rgeos"),require,character.only=T)
  3 | # get the following variables from the environment
  4 | (ncores<-as.integer(Sys.getenv("ncores"))) # number of cores for parallel operations
  5 | (ctry<-Sys.getenv("ctry")) # country
  6 | (wd<-Sys.getenv("wd")) # working directory
  7 | # set working directory
  8 | setwd(wd)
  9 | 
 10 | # register cores for parallel operations
 11 | registerDoMC(ncores)
 12 | 
 13 | # path to edited country shapefile
 14 | (country_shp<-paste0("data/",ctry,"/shapefiles/",ctry,"_noproj_disag_simp.shp"))
 15 | # path to 1 arc second settlement raster
 16 | (set_path_1as<-paste0('data/',ctry,'/GeoTIFFs/',ctry,'_sets_1as.tif'))
 17 | # path to 15 arc second settlement raster
 18 | (set_path_15as<-paste0('data/',ctry,'/GeoTIFFs/',ctry,'_sets_15as.tif'))
 19 | 
 20 | # load country shapefile
 21 | (countryShp<-st_read(country_shp))
 22 | # load resampled settlement raster
 23 | (setrast_15as<-raster(set_path_15as,band=1))
 24 | # load original settlement raster
 25 | (setrast_1as<-raster(set_path_1as,band=1))
 26 | 
 27 | # get number of rows from each raster
 28 | (nrowSRr<-nrow(setrast_15as))
 29 | (nrowSRo<-nrow(setrast_1as))
 30 | # get number of columns from each raster
 31 | (ncolSRr<-ncol(setrast_15as))
 32 | (ncolSRo<-ncol(setrast_1as))
 33 | 
 34 | # vector from one to number of rows of 1as raster
 35 | rowso<-1:nrowSRo
 36 | # specify number of rows per group
 37 | rowspergrp<-400L
 38 | # split rows into groups
 39 | rowso_split<-split(rowso,ceiling(seq_along(rowso)/rowspergrp))
 40 | 
 41 | # create data.table of cells matching 1as to 15as raster in parallel, working on each row group
 42 | cell_lookup_dt<-rbindlist(foreach(rg=1:length(rowso_split),.inorder=T)%dopar%{
 43 |   # get temporary rows to work with
 44 |   trows<-rowso_split[[rg]]
 45 |   # get cells from 1as raster
 46 |   cells_o<-cellFromRow(setrast_1as,trows)
 47 |   # get values corresponding to those cells
 48 |   values_o<-values(setrast_1as,row=trows[1L],nrows=length(trows))
 49 |   # remove missing
 50 |   valuesNoNA_o<-which(!is.na(values_o))
 51 |   # if not all missing,
 52 |   if(length(valuesNoNA_o)!=0L){
 53 |     # get cells corresponding to nonmissing values (settlement cells)
 54 |     cells_o2<-cells_o[valuesNoNA_o]
 55 |     # get xy values from cells
 56 |     xys_o<-xyFromCell(setrast_1as,cells_o2)
 57 |     # get cell from 15as raster corresponding to those xy values
 58 |     cells_r<-cellFromXY(setrast_15as,xys_o)
 59 |     # return data.table of cells from 1as (cell_orig) and 15as (cell_resamp)
 60 |     return(data.table(cell_orig=cells_o2,cell_resamp=cells_r))
 61 |   } else {
 62 |     return(NULL)
 63 |   }
 64 | },fill=T)
 65 | # remove missing
 66 | (cell_lookup_dt<-na.omit(cell_lookup_dt))
 67 | # save data.table as RDS file
 68 | saveRDS(cell_lookup_dt,paste0('data/',ctry,'/',ctry,'_orig_v_resamp_set_cell_match.rds'))
 69 | 
 70 | # get cells in 15as raster that have settlements
 71 | agg_set_cells<-sort(unique(cell_lookup_dt[['cell_resamp']]))
 72 | # get xy values from those cells
 73 | agg_set_xys<-xyFromCell(setrast_15as,agg_set_cells)
 74 | # create data.table of cells and xy values
 75 | (agg_set_cxy<-data.table(cell_resamp=agg_set_cells,agg_set_xys))
 76 | # save as RDS file
 77 | saveRDS(agg_set_cxy,paste0('data/',ctry,'/',ctry,'_resamp_set_cell_xy.rds'))
 78 | 
 79 | # create vector from one to number of rows in 15as raster
 80 | rowsr<-1:nrowSRr
 81 | 
 82 | # set rows per group
 83 | rowspergrp<-50L
 84 | # split vector into groups of rows
 85 | rowsr_split<-split(rowsr,ceiling(seq_along(rowsr)/rowspergrp))
 86 | 
 87 | # get cells in 15as raster intersecting with country boundary that do not contain settlements by working on row groups in parallel
 88 | nonset_cells<-rbindlist(foreach(rg=1:length(rowsr_split),.inorder=T)%dopar%{
 89 |   # get temporary row set
 90 |   trows<-rowsr_split[[rg]]
 91 |   # get cells in those rows
 92 |   cells_o<-cellFromRow(setrast_15as,trows)
 93 |   # return cells that do not have settlements
 94 |   cells_o<-setdiff(cells_o,agg_set_cells)
 95 |   # if not empty,
 96 |   if(length(cells_o)!=0L){
 97 |     # get xy values from those cells
 98 |     xys_o<-xyFromCell(setrast_15as,cells_o)
 99 |     # make data.table of cell and xy values
100 |     (cxy_dt<-data.table(cell_resamp=cells_o,xys_o))
101 |     # convert data.table to spatial object
102 |     points<-st_as_sf(cxy_dt,coords=c('x','y'),crs=st_crs(countryShp))
103 |     # intersect with county shapefile
104 |     point_ctry_int<-st_intersects(points,countryShp)
105 |     # determine which points lie within the country boundary
106 |     nonempty<-unlist(lapply(point_ctry_int,function(xx) length(xx)!=0))
107 |     nonempty_cells<-which(nonempty)
108 |     # return the non-settlement cells that are inside the country boundary
109 |     return(cxy_dt[nonempty_cells])
110 |   } else {
111 |     return(NULL)
112 |   }
113 | },fill=T)
114 | # remove missing
115 | (nonset_cells<-na.omit(nonset_cells))
116 | # save as RDS
117 | saveRDS(nonset_cells,paste0('data/',ctry,'/',ctry,'_resamp_nonset_cell_xy.rds'))
118 | 
119 | # reorder 15as cell data.table of settlement cells
120 | setorder(agg_set_cxy,cell_resamp)
121 | # assign new settlement identifier
122 | set(agg_set_cxy,NULL,'id',paste0('s',1:nrow(agg_set_cxy)))
123 | # reorder 15as cell data.table of non-settlement cells
124 | setorder(nonset_cells,cell_resamp)
125 | # assign new non-settlement identifier
126 | set(nonset_cells,NULL,'id',paste0('n',1:nrow(nonset_cells)))
127 | 
128 | # bind settlement and non-settlement cell information
129 | (ctry_cxys<-rbindlist(list(agg_set_cxy,nonset_cells),fill=T))
130 | # save as RDS
131 | saveRDS(ctry_cxys,paste0('data/',ctry,'/',ctry,'_resamp_country_cell_xy_id.rds'))


--------------------------------------------------------------------------------
/10.1_prep_nset_dat_for_r9_regs.R:
--------------------------------------------------------------------------------
  1 | # rsync -rltgoDuvhh --progress /victor/Work/Brian/current/R_code/FBHRSL/ zokeeffe@flux-xfer.arc-ts.umich.edu:/nfs/brianmin/work/zokeeffe/current/R_code/FBHRSL/
  2 | 
  3 | # export wd=/nfs/brianmin/work/zokeeffe/current/; export ctry=Brunei; R
  4 | 
  5 | sapply(c("data.table","bit64","foreach","doMC","stackoverflow"),require,character.only=T)
  6 | 
  7 | options(datatable.optimize=1)
  8 | 
  9 | (ctry<-Sys.getenv("ctry"))
 10 | (ncores<-as.integer(Sys.getenv("ncores")))
 11 | (wd<-Sys.getenv("wd"))
 12 | # ctry<-'Nepal'
 13 | # ncores<-12L
 14 | # wd<-'/nfs/brianmin/work/zokeeffe/current/'
 15 | setwd(wd)
 16 | 
 17 | registerDoMC(ncores)
 18 | 
 19 | rdsDir<-"/VIIRS/daily/CSVs/"
 20 | rdsStem<-"_daily_VIIRS_values_good_resamp_"
 21 | lithresh<-.001
 22 | LCyr<-2012L
 23 | maxsampN<-500L
 24 | totNmonths<-69L
 25 | avgNpmonththresh<-5
 26 | 
 27 | # x-y information
 28 | xyDT<-readRDS(paste0("data/",ctry,"/",ctry,"_resamp_country_cell_xy_id.rds"))
 29 | (matchDT<-xyDT[grep('n',id),list(id)])
 30 | # land cover info
 31 | (LCDT<-readRDS(paste0('data/',ctry,'/',ctry,'_resamp_country_cell_xy_landcover_values_',LCyr,'.rds')))
 32 | (lcs2keep<-sort(intersect(unique(LCDT[grep('s',id)][['lc_type']]),unique(LCDT[grep('n',id)][['lc_type']]))))
 33 | (lcs2keep<-lcs2keep[lcs2keep<=16])
 34 | (matchDT<-merge(matchDT,LCDT[lc_type%in%lcs2keep],by='id'))
 35 | # nearby settlement cells
 36 | (setdatDT<-readRDS(paste0('data/',ctry,'/',ctry,'_id_set_stats_15as_cell_dist1.rds')))
 37 | (matchDT<-merge(matchDT,setdatDT[edge==F&N_15as_set_dist1==0,list(id)],by='id'))
 38 | ids2keep<-matchDT[['id']]
 39 | rm(setdatDT,LCDT,xyDT);gc()
 40 | 
 41 | (rdsfs<-list.files(paste0("data/",ctry,rdsDir),paste0(ctry,"_daily_VIIRS_values_good_resamp_201[2-7][0-1][0-9]_id\\.rds"),full.names=F))
 42 | (tifmonths<-sub("(.*?)(201[0-9][0-1][0-9]?)_id\\.rds$","\\2",rdsfs))
 43 | (ntifmonths<-length(tifmonths))
 44 | stopifnot(ntifmonths==69L)
 45 | 
 46 | nsmsDT<-vector('list',69L)
 47 | for(i in 1:ntifmonths){
 48 |   m<-tifmonths[i]
 49 |   dt<-data.table(id=readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_id.rds")))
 50 |   set(dt,NULL,'r9',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_rade9.rds")))
 51 |   set(dt,NULL,'li',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_li.rds")))
 52 |   dt<-dt[li<=lithresh,list(id,r9)]
 53 |   dt<-dt[id%chin%ids2keep]
 54 |   nsmsDT[[i]]<-dt
 55 | }
 56 | # nsmsDT<-foreach(m=tifmonths,.inorder=F,.options.multicore=list(preschedule=F))%dopar%{
 57 |   # dt<-data.table(id=readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_id.rds")))
 58 |   # set(dt,NULL,'r9',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_rade9.rds")))
 59 |   # set(dt,NULL,'li',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_li.rds")))
 60 |   # dt<-dt[li<=lithresh,list(id,r9)]
 61 |   # dt<-dt[id%chin%ids2keep]
 62 |   # return(dt)
 63 | # }
 64 | gc()
 65 | # get total number of rows in case too large
 66 | (Nrows<-sum(as.numeric(unlist(lapply(nsmsDT,nrow)))))
 67 | if(Nrows>.Machine$integer.max){
 68 |   (Nsplits<-ceiling(Nrows/.Machine$integer.max))
 69 |   idchunks<-chunk2(ids2keep,Nsplits)
 70 |   nsmsDT2<-vector('list',Nsplits)
 71 |   for(chun in 1:Nsplits){
 72 |     message(paste('working on',chun))
 73 |     tmpDT<-rbindlist(lapply(nsmsDT,function(xx) xx[id%chin%idchunks[[chun]]]),fill=T)
 74 |     tmpDT<-tmpDT[,list(r9m=mean(r9),r9s=sd(r9),N=.N),by='id']
 75 |     nsmsDT2[[chun]]<-tmpDT
 76 |     rm(tmpDT);gc()
 77 |   }
 78 |   nsmsDT<-rbindlist(nsmsDT2,fill=T)
 79 |   rm(ids2keep,idchunks,Nsplits,nsmsDT2);gc()
 80 | } else {
 81 |   (nsmsDT<-rbindlist(nsmsDT,fill=T))
 82 |   rm(ids2keep);gc()
 83 |   (nsmsDT<-nsmsDT[,list(r9m=mean(r9),r9s=sd(r9),N=.N),by='id'])
 84 | }
 85 | saveRDS(nsmsDT,paste0("data/",ctry,"/VIIRS/daily/",ctry,'_regdat_iso_nsets_musd.rds'))
 86 | 
 87 | (nsmsDT<-na.omit(nsmsDT[N>=(totNmonths*avgNpmonththresh)]))
 88 | gc()
 89 | 
 90 | set.seed(48105)
 91 | tmplist<-foreach(i=1:length(lcs2keep),.inorder=F,.options.multicore=list(preschedule=F,mc.set.seed=F))%dopar%{
 92 |   # i<-8L
 93 |   (lctt<-lcs2keep[i])
 94 |   tids<-matchDT[lc_type==lctt][['id']]
 95 |   (tnsmsdt<-nsmsDT[id%chin%tids])
 96 |   (tmquants<-quantile(tnsmsdt[['r9m']],c(.01,.5),na.rm=T))
 97 |   (tmquantdif<-tmquants[[2]]-tmquants[[1]])
 98 |   (tmthreshes<-c(tmquants[1L],tmquants[2L]+tmquantdif))
 99 |   (tsquants<-quantile(tnsmsdt[['r9s']],c(.01,.5),na.rm=T))
100 |   (tsquantdif<-tsquants[[2]]-tsquants[[1]])
101 |   (tsthreshes<-c(tsquants[1L],tsquants[2L]+tsquantdif))
102 |   threshdt<-data.table(lc_type=lctt,r9m_lo=tmthreshes[1L],r9m_hi=tmthreshes[2L],r9m_q99=quantile(tnsmsdt[['r9m']],.99,na.rm=T),
103 |                        r9s_lo=tsthreshes[1L],r9s_hi=tsthreshes[2L],r9s_q99=quantile(tnsmsdt[['r9s']],.99,na.rm=T))
104 |   tids2keep<-tnsmsdt[r9m>tmthreshes[1L]&r9m<tmthreshes[2L]&r9s>tsthreshes[1]&r9s<tsthreshes[2L]][['id']]
105 |   tids2keep<-sample(tids2keep,min(length(tids2keep),maxsampN),replace=F)
106 |   return(list(threshdt,tids2keep))
107 | }
108 | # rm(nsmsDT);gc()
109 | (threshes<-rbindlist(lapply(tmplist,`[[`,1),fill=T))
110 | saveRDS(threshes,paste0("data/",ctry,"/VIIRS/daily/",ctry,"_iso_nset_reg_match_threshes.rds"))
111 | # rm(threshes,lcs2keep);gc()
112 | 
113 | fids2keep<-unlist(lapply(tmplist,`[[`,2))
114 | (fmatchDT<-matchDT[id%chin%fids2keep])
115 | setkey(fmatchDT,id)
116 | saveRDS(fmatchDT,paste0("data/",ctry,"/VIIRS/daily/",ctry,"_iso_nset_reg_match_data.rds"))
117 | # rm(matchDT,fids2keep);gc()
118 | 
119 | for(m in tifmonths){
120 |   # m<-tifmonths[21L]
121 |   dt<-data.table(id=readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_id.rds")))
122 |   set(dt,NULL,'r9',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_rade9.rds")))
123 |   set(dt,NULL,'li',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_li.rds")))
124 |   set(dt,NULL,'mtimeloc',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_mtimeloc.rds")))
125 |   setkey(dt,id)
126 |   dt<-dt[fmatchDT]
127 |   dt<-dt[li<=lithresh]
128 |   set(dt,NULL,'lirescale',dt[['li']]/lithresh)
129 |   set(dt,NULL,'li',NULL)
130 |   set(dt,NULL,'locdatechar',strftime(dt[['mtimeloc']],'%Y-%m-%d'))
131 |   set(dt,NULL,'timehour',hour(dt[['mtimeloc']])+minute(dt[['mtimeloc']])/60+second(dt[['mtimeloc']])/3600)
132 |   set(dt,NULL,'mtimeloc',NULL)
133 |   saveRDS(dt,paste0("data/",ctry,"/VIIRS/daily/",ctry,'_regdat_iso_nsets_',m,".rds"))
134 |   rm(dt);gc()
135 |   message(paste('finished',m))
136 | }
137 | 


--------------------------------------------------------------------------------
/10.1_prep_nset_dat_for_r9_regs_bigcountry.R:
--------------------------------------------------------------------------------
  1 | # rsync -rltgoDuvhh --progress /victor/Work/Brian/current/R_code/FBHRSL/ zokeeffe@flux-xfer.arc-ts.umich.edu:/nfs/brianmin/work/zokeeffe/current/R_code/FBHRSL/
  2 | 
  3 | sapply(c("data.table","bit64","stackoverflow"),require,character.only=T)
  4 | 
  5 | (ctry<-Sys.getenv("ctry"))
  6 | (wd<-Sys.getenv("wd"))
  7 | setwd(wd)
  8 | 
  9 | rdsDir<-"/VIIRS/daily/CSVs/"
 10 | rdsStem<-"_daily_VIIRS_values_good_resamp_"
 11 | lithresh<-.001
 12 | LCyr<-2012L
 13 | maxsampN<-500L
 14 | totNmonths<-69L
 15 | avgNpmonththresh<-5
 16 | 
 17 | # x-y information
 18 | xyDT<-readRDS(paste0("data/",ctry,"/",ctry,"_resamp_country_cell_xy_id.rds"))
 19 | (matchDT<-xyDT[grep('n',id),list(id)])
 20 | # land cover info
 21 | (LCDT<-readRDS(paste0('data/',ctry,'/',ctry,'_resamp_country_cell_xy_landcover_values_',LCyr,'.rds')))
 22 | (lcs2keep<-sort(intersect(unique(LCDT[grep('s',id)][['lc_type']]),unique(LCDT[grep('n',id)][['lc_type']]))))
 23 | (lcs2keep<-lcs2keep[lcs2keep<=16])
 24 | (matchDT<-merge(matchDT,LCDT[lc_type%in%lcs2keep],by='id'))
 25 | # nearby settlement cells
 26 | (setdatDT<-readRDS(paste0('data/',ctry,'/',ctry,'_id_set_stats_15as_cell_dist1.rds')))
 27 | (matchDT<-merge(matchDT,setdatDT[edge==F&N_15as_set_dist1==0,list(id)]))
 28 | ids2keep<-matchDT[['id']]
 29 | rm(setdatDT,LCDT,xyDT);gc()
 30 | 
 31 | (rdsfs<-list.files(paste0("data/",ctry,rdsDir),paste0(ctry,"_daily_VIIRS_values_good_resamp_201[2-7][0-1][0-9]_id\\.rds"),full.names=F))
 32 | (tifmonths<-sub("(.*?)(201[0-9][0-1][0-9]?)_id\\.rds$","\\2",rdsfs))
 33 | (ntifmonths<-length(tifmonths))
 34 | stopifnot(ntifmonths==totNmonths)
 35 | 
 36 | for(i in 1:ntifmonths){
 37 |   # i<-1L
 38 |   m<-tifmonths[i]
 39 |   dt<-data.table(id=readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_id.rds")))
 40 |   set(dt,NULL,'r9',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_rade9.rds")))
 41 |   set(dt,NULL,'li',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_li.rds")))
 42 |   dt<-dt[li<=lithresh,list(id,r9)]
 43 |   dt<-dt[id%chin%ids2keep]
 44 |   if(i==1L){
 45 |     aggdt<-dt[,list(r9sum=sum(r9),N=.N),by='id']
 46 |     rm(dt);gc()
 47 |   } else {
 48 |     dt<-dt[,list(r9sum=sum(r9),N=.N),by='id']
 49 |     aggdt<-rbindlist(list(aggdt,dt),fill=T)
 50 |     rm(dt);gc()
 51 |     aggdt<-aggdt[,list(r9sum=sum(r9sum),N=sum(N)),by='id']
 52 |     gc()
 53 |   }
 54 |   message(paste('finished reading',m))
 55 | }
 56 | 
 57 | (aggdt<-aggdt[N>1])
 58 | set(aggdt,NULL,'r9m',aggdt[['r9sum']]/aggdt[['N']])
 59 | set(aggdt,NULL,'r9sum',NULL)
 60 | aggdt
 61 | 
 62 | saveRDS(aggdt,paste0("data/",ctry,"/VIIRS/daily/",ctry,'_regdat_iso_nsets_muN.rds'))
 63 | 
 64 | 
 65 | tmplist<-vector('list',ntifmonths)
 66 | ids2keep<-aggdt[['id']]
 67 | 
 68 | for(i in 1:ntifmonths){
 69 |   # i<-1L
 70 |   m<-tifmonths[i]
 71 |   dt<-data.table(id=readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_id.rds")))
 72 |   set(dt,NULL,'r9',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_rade9.rds")))
 73 |   set(dt,NULL,'li',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_li.rds")))
 74 |   dt<-dt[li<=lithresh,list(id,r9)]
 75 |   dt<-merge(dt,aggdt[,list(id,r9m)],by='id')
 76 |   set(dt,NULL,'r9s_tmp',(dt[['r9']]-dt[['r9m']])^2)
 77 |   dt<-dt[,list(r9s_tmp=sum(r9s_tmp)),by='id']
 78 |   tmplist[[i]]<-dt
 79 |   rm(dt);gc()
 80 |   message(paste('finished reading',m))
 81 | }
 82 | message('finished reading nonset data')
 83 | 
 84 | (tmplist<-rbindlist(tmplist,fill=T))
 85 | (tmplist<-tmplist[,list(r9s_tmp=sum(r9s_tmp)),by='id'])
 86 | 
 87 | (nsmsDT<-merge(aggdt,tmplist,by='id'))
 88 | rm(tmplist,aggdt);gc()
 89 | set(nsmsDT,NULL,'r9s',sqrt(nsmsDT[['r9s_tmp']]/(nsmsDT[['N']]-1)))
 90 | set(nsmsDT,NULL,'r9s_tmp',NULL)
 91 | setcolorder(nsmsDT,c('id','r9m','r9s','N'))
 92 | nsmsDT
 93 | 
 94 | saveRDS(nsmsDT,paste0("data/",ctry,"/VIIRS/daily/",ctry,'_regdat_iso_nsets_musd.rds'))
 95 | 
 96 | (nsmsDT<-na.omit(nsmsDT[N>=(totNmonths*avgNpmonththresh)]))
 97 | rm(Nrows);gc()
 98 | 
 99 | message('finished summarizing nonset data')
100 | 
101 | fids2keep<-threshes<-vector('list',length(lcs2keep))
102 | set.seed(48105)
103 | for(i in 1:length(lcs2keep)){
104 |   # i<-1L
105 |   (lctt<-lcs2keep[i])
106 |   tids<-matchDT[lc_type==lctt][['id']]
107 |   # remove if r9s is missing, which is done if N>1. but also in case anything weird happened?
108 |   # also, require that there be on average at least 5 "good" observations per month
109 |   (tnsmsdt<-nsmsDT[id%chin%tids])
110 |   (tmquants<-quantile(tnsmsdt[['r9m']],c(.01,.5)))
111 |   (tmquantdif<-tmquants[[2]]-tmquants[[1]])
112 |   (tmthreshes<-c(tmquants[1L],tmquants[2L]+tmquantdif))
113 |   (tsquants<-quantile(tnsmsdt[['r9s']],c(.01,.5)))
114 |   (tsquantdif<-tsquants[[2]]-tsquants[[1]])
115 |   (tsthreshes<-c(tsquants[1L],tsquants[2L]+tsquantdif))
116 |   threshes[[i]]<-data.table(lc_type=lctt,r9m_lo=tmthreshes[1L],r9m_hi=tmthreshes[2L],r9m_q99=quantile(tnsmsdt[['r9m']],.99),
117 |                             r9s_lo=tsthreshes[1L],r9s_hi=tsthreshes[2L],r9s_q99=quantile(tnsmsdt[['r9s']],.99))
118 |   tids2keep<-tnsmsdt[r9m>tmthreshes[1L]&r9m<tmthreshes[2L]&r9s>tsthreshes[1]&r9s<tsthreshes[2L]][['id']]
119 |   tids2keep<-sample(tids2keep,min(length(tids2keep),maxsampN),replace=F)
120 |   fids2keep[[i]]<-tids2keep
121 |   rm(tids,tnsmsdt,tmquants,tmquantdif,tmthreshes,tsquants,tsquantdif,tsthreshes);gc()
122 | }
123 | rm(nsmsDT);gc()
124 | (threshes<-rbindlist(threshes,fill=T))
125 | saveRDS(threshes,paste0("data/",ctry,"/VIIRS/daily/",ctry,"_iso_nset_reg_match_threshes.rds"))
126 | rm(threshes,lcs2keep);gc()
127 | 
128 | fids2keep<-unlist(fids2keep)
129 | (fmatchDT<-matchDT[id%chin%fids2keep])
130 | rm(matchDT,fids2keep);gc()
131 | setkey(fmatchDT,id)
132 | saveRDS(fmatchDT,paste0("data/",ctry,"/VIIRS/daily/",ctry,"_iso_nset_reg_match_data.rds"))
133 | 
134 | message('finished sampling nonset cells')
135 | 
136 | for(m in tifmonths){
137 |   # m<-tifmonths[1L]
138 |   dt<-data.table(id=readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_id.rds")))
139 |   set(dt,NULL,'r9',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_rade9.rds")))
140 |   set(dt,NULL,'li',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_li.rds")))
141 |   set(dt,NULL,'mtimeloc',readRDS(paste0("data/",ctry,rdsDir,ctry,rdsStem,m,"_mtimeloc.rds")))
142 |   setkey(dt,id)
143 |   dt<-dt[fmatchDT]
144 |   dt<-dt[li<=lithresh]
145 |   set(dt,NULL,'lirescale',dt[['li']]/lithresh)
146 |   set(dt,NULL,'li',NULL)
147 |   set(dt,NULL,'locdatechar',strftime(dt[['mtimeloc']],'%Y-%m-%d'))
148 |   set(dt,NULL,'timehour',hour(dt[['mtimeloc']])+minute(dt[['mtimeloc']])/60+second(dt[['mtimeloc']])/3600)
149 |   set(dt,NULL,'mtimeloc',NULL)
150 |   saveRDS(dt,paste0("data/",ctry,"/VIIRS/daily/",ctry,'_regdat_iso_nsets_',m,".rds"))
151 |   rm(dt);gc()
152 |   message(paste('finished',m))
153 | }
154 | 
155 | 


--------------------------------------------------------------------------------
/5_extract_good_daily_VIIRS_data.R:
--------------------------------------------------------------------------------
  1 | # load following required packages
  2 | sapply(c("data.table","bit64","parallel","foreach","doMC","raster","rgeos","rgdal"),require,character.only=T)
  3 | # set working directory
  4 | setwd("/nfs/brianmin/work/zokeeffe/current/")
  5 | # get following variables from environment
  6 | (ncores<-as.integer(Sys.getenv("ncores"))) # number of cores
  7 | (YYYY<-as.integer(Sys.getenv("YYYY"))) # year
  8 | (MM<-Sys.getenv("MM")) # month
  9 | (ctry<-Sys.getenv("ctry")) # country
 10 | # register cores
 11 | registerDoMC(ncores)
 12 | # specify directory where extracted VIIRS GeoTIFFs are
 13 | TIFFTopDir<-'/scratch/polisci_dept_root/polisci_dept/zokeeffe/VIIRS/'
 14 | 
 15 | # read in file of "good" vflag integers
 16 | good_ints<-fread("data/VIIRS/vflag_info/new_good_vflag_ints_no_li.txt",header=F)[[1L]]
 17 | # read in country 15as cell coordinates and IDs
 18 | (xyDT<-readRDS(paste0('data/',ctry,'/',ctry,'_resamp_country_cell_xy_id.rds')))
 19 | # store IDs as vector
 20 | xyIDs<-xyDT[["id"]]
 21 | # convert xy coordinates to matrix
 22 | xyMat<-as.matrix(xyDT[,list(x,y)])
 23 | # convert matrix to spatial points object
 24 | xyPts<-SpatialPoints(coords=xyMat,proj4string=CRS("+init=epsg:4326"))
 25 | # get extent of country, adding a buffer
 26 | (xyExt<-extent((min(xyDT[['x']])-.1),(max(xyDT[['x']])+.1),(min(xyDT[['y']])-.1),(max(xyDT[['y']])+.1)))
 27 | rm(xyPts,xyDT)
 28 | 
 29 | # read in file that states which TIFFs overlap with country
 30 | (tifdirs<-fread(paste0("data/",ctry,"/VIIRS/daily/",ctry,"_daily_VIIRS_overlapping_imgs.txt"),header=F,sep="/"))
 31 | # if year is not missing, subset the TIFFs to those that correspond to that year
 32 | if(!is.na(YYYY)) tifdirs<-tifdirs[grep(paste0("^",YYYY),tifdirs[[1L]])]
 33 | 
 34 | # assign names to data.table
 35 | setnames(tifdirs,c("month","tiff"))
 36 | # get the unique set of months
 37 | (tifmonths<-sort(unique(tifdirs[[1L]])))
 38 | # specify a directory to store the extracted data in
 39 | (csv_dir<-paste0("data/",ctry,"/VIIRS/daily/CSVs/"))
 40 | # create the directory
 41 | dir.create(csv_dir,showWarnings=F,recursive=T)
 42 | 
 43 | # specify the VIIRS GeoTIFF file "types"
 44 | FileTypes<-c("li","vflag","rade9","rad","samples")
 45 | # create a regex string for searching for all of these
 46 | (FileRegex<-paste0("(",paste(FileTypes,collapse="|"),")\\.tif$"))
 47 | # get number of file types
 48 | NFTs<-length(FileTypes)
 49 | 
 50 | # set DT threads to 1
 51 | setDTthreads(1L)
 52 | # if MM has been specified, create set of months by combining year and month
 53 | if(MM%in%formatC(1:12,format='d',width=2L,flag='0')) (tifmonths<-paste0(YYYY,MM))
 54 | # numbers of things to work on per group
 55 | nprgrp<-10L
 56 | 
 57 | # final columns to keep for results
 58 | fincols2write<-c("id","day","stime","rade9","rad","li","samples")
 59 | 
 60 | # for each month
 61 | for(m in tifmonths){
 62 |   message(paste("working on",m));flush(stdout())
 63 |   # get the TIFF names for the month
 64 |   (tmptiffs<-tifdirs[month==m][["tiff"]])
 65 |   # get number of TIFFs
 66 |   (ntmptiffs<-length(tmptiffs))
 67 |   # create vector of 1 to the number of TIFFs
 68 |   (otntmptiffs<-1:ntmptiffs)
 69 |   # split TIFFs into groups
 70 |   (tifgrps<-floor(otntmptiffs/(nprgrp+1))+1)
 71 |   # number of groups
 72 |   (ntifgrps<-tail(tifgrps,1))
 73 |   # empty list for results to be stored in
 74 |   FinDT<-vector('list',ntifgrps)
 75 |   # for each group of TIFFs,
 76 |   for(tifgrp in 1:ntifgrps){
 77 |     # subset to group of TIFFs
 78 |     (tmptiffs2<-tmptiffs[tifgrps==tifgrp])
 79 |     # for each TIFF, extract results from TIFFs in a data.table
 80 |     res<-foreach(tiff_name=tmptiffs2,.inorder=T,.errorhandling="remove",.options.multicore=list(preschedule=F))%dopar%{
 81 |       # directory where group of TIFFs is stored
 82 |       (dir<-paste0(TIFFTopDir,m,"/",tiff_name))
 83 |       # get the names of the TIFFs in the directory
 84 |       (files<-sort(list.files(dir,FileRegex,all.files=F,full.names=F,recursive=F,include.dirs=F,no..=T)))
 85 |       # skip the operation if directory doesn't contain all types of TIFFs
 86 |       stopifnot(length(files)==NFTs)
 87 |       # getermine which is the vflag file
 88 |       vfn<-grep("vflag",files,value=T,fixed=T)
 89 |       # extract data from each type of TIFF into a data.table
 90 |       (val_dt<-foreach(ft=FileTypes,.inorder=T,.combine=cbind)%do%{
 91 |         # open raster of the particular file type
 92 |         rast<-raster(paste0(dir,"/",grep(paste0(ft,"\\.tif$"),files,value=T)))
 93 |         # crop the raster to the extent of the country
 94 |         crop_country<-crop(rast,xyExt)
 95 |         # if empty, stop
 96 |         stopifnot(!is.null(crop_country))
 97 |         # extract values from TIFF using coordinates
 98 |         dt<-data.table(val=extract(crop_country,xyMat,method="simple",cellnumbers=F))
 99 |         # name the column based on the type of TIFF
100 |         setnames(dt,ft)
101 |         return(dt)
102 |       })
103 |       gc()
104 |       # add ID column to data.table
105 |       set(val_dt,NULL,"id",xyIDs)
106 |       # if the vflag is not in the list of "good" integers, drop the row
107 |       val_dt<-val_dt[vflag%in%good_ints,!"vflag",with=F];gc()
108 |       # drop if missing rade9 (visible light) info
109 |       val_dt<-val_dt[!is.na(rade9)];gc()
110 |       # drop if missing lunar illumination info
111 |       val_dt<-val_dt[!is.na(li)];gc()
112 |       # only keep so long as the rade9 is above -1.5
113 |       val_dt<-val_dt[rade9>(-1.5)];gc()
114 |       # only keep if LI is below .025
115 |       val_dt<-val_dt[li<.025];gc()
116 |       # assign the day value pulled from the vflag TIFF name
117 |       set(val_dt,NULL,"day",as.integer(substr(vfn,12,13)))
118 |       # assign the time of the overpass by extracting it from the vflag TIFF name
119 |       set(val_dt,NULL,"stime",substr(vfn,16L,22L))
120 |       # return the data.table
121 |       return(val_dt)
122 |     }
123 |     # bind results into a list
124 |     res<-rbindlist(res,fill=T)
125 |     # store the results in the empty slot of the list created above
126 |     FinDT[[tifgrp]]<-res
127 |     rm(res,tmptiffs2);gc()
128 |   }
129 |   # rowbind data.tables together
130 |   FinDT<-rbindlist(FinDT,fill=T)
131 |   # remove unnecessary
132 |   rm(tmptiffs,ntmptiffs,otntmptiffs,tifgrps,ntifgrps);gc()
133 |   # if the data.table is empty
134 |   if(nrow(FinDT)==0L){
135 |     # report no data
136 |     message(paste("no data extracted for",m));flush(stdout())
137 |   # otherwise
138 |   } else {
139 |     # for each column in the data.table
140 |     for(col in fincols2write){
141 |       # save the column as a separate vector
142 |       saveRDS(FinDT[[col]],paste0("data/",ctry,"/VIIRS/daily/CSVs/",ctry,"_daily_VIIRS_values_good_resamp_",m,"_",col,".rds"))
143 |       # remove the column
144 |       set(FinDT,NULL,which(names(FinDT)==col),NULL)
145 |     }
146 |   }
147 |   # cleanup
148 |   rm(FinDT);gc()
149 |   message(paste("finished",m))
150 | }
151 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # High Resolution Electricity Access (HREA) Indicators
  2 | ### New Methods to Estimate Electricity Access Using Nightly VIIRS Satellite Imagery
  3 | 
  4 | Brian Min (brianmin@umich.edu)
  5 | 
  6 | Zachary O’Keeffe (zokeeffe@umich.edu)
  7 | 
  8 | University of Michigan
  9 | 
 10 | 
 11 | Rev. September 2020
 12 | 
 13 | # Introduction
 14 | 
 15 | We introduce a new method to generate likelihood estimates of electricity access for all areas with human settlements within a country by identifying statistical anomalies in brightness values that are plausibly associated with electricity use, and unlikely to be due to exogenous factors.
 16 | 
 17 | On every night, the VIIRS DNB sensor collects data on the observed brightness over all locations within a country, including over electrified and unelectrified areas, and populated and unpopulated areas. Our objective is to classify populated areas as electrified or not using all the brightness data over a country. But the challenge is that light output can be due to multiple sources unrelated to electricity use. Notably, the VIIRS sensor is so sensitive that it picks up light from overglow, atmospheric interactions, moonlight, and variations in surface reflectivity across types of land cover. We refer collectively to these exogenous sources as background noise, which must be accounted for to classify whether an area is brighter than expected.
 18 | 
 19 | We use data on light output detected over areas with no settlements or buildings to train a statistical model of background noise. The model can be used to generate an expected brightness value on every given night for every given location. We then compare the observed brightness on each night against the expected baseline brightness value. Areas with human settlements with brighter light output than expected are assumed to have access to electricity on that night. We classify all settlements on all nights and then average the estimates and generate an "Artificial Light Score" for each calendar year for all settlement areas. Areas that are much brighter than would be expected on most nights have the highest probability of being electrified. Areas that are as dim as areas with no settlements have the lowest probability of being electrified. And areas that are a little brighter on some nights have middling scores.
 20 | 
 21 | The advantage of this process is that it uses all available nightly data from the VIIRS data stream while taking into account sources of known noise and variability. The process also allows for the identification of areas where the likelihood of electricity access and use is uncertain (the areas with middling scores). This is significant given that traditional binary measures of access do not account for variations in levels of use or reliability of power supply, even across areas that are all nominally electrified. These data may therefore be helpful in identifying baseline variations in access and reliability within countries, consistent with the objectives of the Multi-tier Framework for measuring energy access (ESMAP 2015).
 22 | 
 23 | We explain the process in more detail below:
 24 | 
 25 | 1. **Select random sample of locations with no settlements to measure background noise.**
 26 | We select a stratified random sample of isolated non-settlement pixels, which are identified by overlaying the 1 arcsecond (as) settlement pixel grid on the 15 as VIIRS grid. Candidate 15 as pixels are those that 1) contain zero 1 as settlement pixels; and 2) are not adjacent to any 15 as cells containing settlement cells. We stratify based on the type of land, selecting up to 500 pixels per category. As there are 17 categories, the theoretical maximum is 8,500.
 27 | 1. **Select observations.**
 28 | Following NOAA guidelines and their data quality flags, we drop bad quality data, including those with heavy cloud cover and excessive sensor noise. NOAA also drops many nights with high lunar illumination. We relax this threshold slightly and keep nightly observations where lunar illumination is below .001 lux. Furthermore, on nights with multiple overpasses, we use data with the earliest local timestamp for settlement points, but allow multiple observations for non-settlement points. 
 29 | 1. **Remove outliers.**
 30 | To generate a reliable estimate of background noise, we need to exclude outliers. Presumably, unusually high brightness values in unsettled areas are not due to background noise. To accomplish this, we first apply a logarithmic transformation to observed brightness to make the distribution more normal. Then we calculate the median and standard deviation. Observations are removed if they are above 4 standard deviations from the median on this metric. Then we stratify by land type and date, and, using the original scale, calculate the mean and standard deviation. Observations that are above 4 standard deviations above the mean are removed.
 31 | 1. **Create statistical model of background noise.** 
 32 | For each calendar year, we run a linear mixed effects model to learn the impacts of exogenous factors using all non-settlement data from all good quality nights, for all years (2012–2017). There is a single random effect: date. There are five fixed effects: lunar illumination, local time, calendar month, land type, and the interaction between land type and lunar illumination (plus an intercept). Notably, the regression diagnostics are excellent. Outliers are not present, the linear relationships specified hold well, and heteroskedasticity is not an issue. The distributions look normal, constant, and linear.
 33 | Using the statistical parameters learned from data on non-settlement areas, we then calculate the expected level of light output for all areas with settlements. These predicted values represent a counterfactual estimate of how much light would be expected on that specific day on that type of land, if the only sources of light were from background noise and other exogenous factors. Areas with consistently higher observed light output than expected are assumed to have electricity access. 
 34 | 1. **Identify electrified settlement areas on each night.**
 35 | We compare observed levels of light output against the statistically estimated baseline light output level for every settlement pixel on every night. This generates residuals which we standardize by dividing by the sigma from the model (the standard deviation of residuals, with a degrees of freedom adjustment) to generate z-scores for each pixel on each night. Higher z-scores imply much higher light output than expected. We assume that higher scores are correlated with higher likelihood that a settlement is using electricity on that specific night. 
 36 | 1. **Aggregate nightly estimates to generate "Artificial Light Score" values for all settlement areas for each year.**
 37 | For each year, we average all nightly z-scores for each settlement cell. We then calculate the corresponding quantile value assuming a standard normal distribution, which transforms the average z-score to be between 0 and 1. We then subtract .5 from this value, divide the result by .5, and set negative values to 0. This produces annual Artificial Light Scores for each pixel, which also lie between 0 and 1. On this scale, .95 roughly corresponds to having observed light scores that are on average above 2 standard deviations from expected. Meanwhile, 0 corresponds to having observed radiance values that are on average lower than the expected values for comparable isolated non-settlement pixels.
 38 | 
 39 | This GitHub repository contains several R scripts that are used to produce these high resolution settlement electrification estimates for a given country. Additional output includes annual composite GeoTIFFs of visible light. Many of the scripts pull variables from the environment (the required variables are specified at the top of the scripts). They are written in a way that allows the user to specify the time and country from the command line, and then submit the script as a job to a computer cluster (these scripts require quite a lot in the way of resources like memory and processor power, and may take a long time to run). Descriptions of required files and what each script does can be found below.
 40 | 
 41 | # Required Files
 42 | 
 43 | The scripts here assume the user has access to several files. These include:
 44 | 
 45 | ## FB_countries.csv
 46 | 
 47 | This matches "short" names of countries to "long" names. The short names of countries are the names of the country folders, while the long names are the names used in the World Bank electrification estimates spreadsheet.
 48 | 
 49 | ## pct_pop_w_electricity_1990-2017.csv
 50 | 
 51 | This CSV contains estimates of access to electricity as a percentage of the population per country, from 1990-2017. Data are obtained from the World Bank's Sustainable Energy for All database: https://data.worldbank.org/indicator/EG.ELC.ACCS.ZS.
 52 | 
 53 | ## new_good_vflag_ints_no_li.txt
 54 | 
 55 | This is a text file with all the VIIRS flag integers corresponding to "good" values (the values that are to be kept in the extraction process).
 56 | 
 57 | ## Country Boundary Shapefile (filename varies by country)
 58 | 
 59 | This is an ESRI Shapefile of the country boundary. It is used to create edited settlement GeoTIFFs, as well as determine which VIIRS images overlap with the country. Files are taken from GADM: https://gadm.org/download_country_v3.html.
 60 | 
 61 | ## High Resolution Population Density Maps (filename varies by country)
 62 | 
 63 | These 1 arcsecond (as) GeoTIFFs identify where people live. These "settlement" layers also contain population estimates per cell. Data are provided by Facebook: https://data.humdata.org/organization/facebook?res_format=zipped%20geotiff&q=&ext_page_size=25.
 64 | 
 65 | # Description of Scripts
 66 | 
 67 | The following describes the files contained on this GitHub repository. The files should be executed in the order they are described; subsequent scripts require output from previous ones.
 68 | 
 69 | ## 0_convert_country_boundary_to_coordinate_lines.R
 70 | 
 71 | Reformats country boundary shapefile.
 72 | 
 73 | ## 1_make_settlement_rasters.R
 74 | 
 75 | Creates 1 and 15 arc second settlement rasters using the Facebook High Resolution Settlement Layer files and country boundary shapefile. Cells with settlements are populated with 1s, with the rest of the cells coded as empty.
 76 | 
 77 | ## 2.0_gen_country_VIIRS_XYs.R
 78 | 
 79 | Creates data.tables of cell information. Specifically, one is created that matches 1as settlement cells to 15as cells. Two others store long-lat information associated with 15as cells, for settlement and non-settlement cells. A final data.table is created that includes both settlement and non-settlement cells, their long-lat information, and unique identifiers.
 80 | 
 81 | ## 2.1_make_country_lon_tz_offset_files.R
 82 | 
 83 | Creates a data.table of "local" time offsets for each 15as cell in the country based on longitude.
 84 | 
 85 | ## 2.2_get_pop_per_1as_cell.R
 86 | 
 87 | Extracts population estimates from original FB raster for each 1as settlement cell.
 88 | 
 89 | ## 2.3_make_prp_set_per_cell.R
 90 | 
 91 | Calculates the proportion of each 15as cell that is populated with 1as settlement cells.
 92 | 
 93 | ## 3_find_overlapping_VIIRS_TIFFs.R
 94 | 
 95 | Determines which VIIRS GeoTIFFs overlap with the country boundary.
 96 | 
 97 | ## 4.1_find_overlapping_landcover_tiles.R
 98 | 
 99 | Determines which landcover tiles overlap with the country boundary.
100 | 
101 | ## 4.2_extract_xy_landcover_values.R
102 | 
103 | Extracts land type values for each 15as cell in the country.
104 | 
105 | ## 5_extract_good_daily_VIIRS_data.R
106 | 
107 | For a given month, extracts data from the VIIRS GeoTIFFs for all country cells and stores them as RDS files. Specifically, it extracts data from the li (lunar illumination), vflag (quality flag), rade9 (visible radiance), rad (infrared radiance), and sample position. "High-quality" data are returned by month and saved as separate files. In the end, for each month of data, a column of a data.table is saved for the: id, day, stime (start time of image), rade9, rad, li, and sample.
108 | 
109 | ## 6_find_good_times.R
110 | 
111 | Converts image capture start time to "local" times. Along with storing this data by month, it also creates vectors that identify, for each date, the earliest timestamped image in case of overlap.
112 | 
113 | ## 7_gen_r9_musd.R
114 | 
115 | For each year, reads in data and subsets. Data are dropped if the timestamp is "bad" (see previous script) or if the lunar illumination (LI) is above .0005. A logged version of the visible light (rade9) is created. Then, for each 15as cell, the mean and standard deviation of rade9 and its logged version are created and saved as a data.table. These values are used in the following script to generate 15as annual composite GeoTIFFs of visible light.
116 | 
117 | ## 8_make_VIIRS_average_raster.R
118 | 
119 | For a year, creates a GeoTIFF of the 15as country cells with the values of the mean logged rade9 (mean logged rade9 is chosen over mean rade9 because of the highly skewed nature of observed light).
120 | 
121 | ## 9.0_gen_set_elec_based_on_year_wb_val.R
122 | 
123 | Generates files that identify whether a settlement cell should be classified as "lit" or not based on World Bank classifications, combined with population and visible light data. Also creates tables with the following statistics: electrification rate according to World Bank data, proportion of 15as cells that are considered electrified, and the minimum mean logged rade9 and rade9 values that count as electrified.
124 | 
125 | ## 9.1_make_set_lit_rasters_wb_erate.R
126 | 
127 | Creates 1as country settlement cell GeoTIFFs for each year, where 0 represents an unelectrified settlement cell, and 1 represents an electrified settlement cell, based on World Bank electrification estimates and interpolated population.
128 | 
129 | ## 10.0_find_nearby_set_cells.R
130 | 
131 | Determines, for each cell, how many settlement cells are adjacent. The purpose is to find isolated non-settlement cells, which are presumed to be dark.
132 | 
133 | ## 10.1_prep_nset_dat_for_r9_regs.R
134 | 
135 | Prepares nonsettlement cell data for the regression. Data are subsetted based on lunar illumination, and outliers are removed based on their mean and standard deviation.
136 | 
137 | ## 10.1_prep_nset_dat_for_r9_regs_bigcountry.R
138 | 
139 | This is a different version of the previous script that works with large countries (the previous one will fail for certain large countries).
140 | 
141 | ## 10.2_prep_set_dat_for_r9_regs.R
142 | 
143 | Prepares settlement data for the regressions. Data are subsetted based on lunar illumination and timestamp.
144 | 
145 | ## 10.3_multiyear_reg.R
146 | 
147 | Further removes outliers on the nonsettlement data and fits a linear mixed-effects model predicting light output from the timestamp, date, lunar illumination, and land type.
148 | 
149 | ## 10.4_reg_resids.R
150 | 
151 | Predicts the light output of settlement cells assuming they behaved like nonsettlement cells. The difference (the regression residuals) form the basis for determining whether cells are considered electrified or not.
152 | 
153 | ## 10.5_make_reg_set_lit_rasters.R
154 | 
155 | Creates 1as GeoTIFFs of settlement cells with values corresponding to their electrification scores. Multiple metrics are employed.
156 | 
157 | ## 10.6_comp_reg_w_wb_erate.R
158 | 
159 | Compares the population weighted electrification status of settlement cells as calculated from the regressions with the values reported by the World Bank by year for a given country.
160 | 


--------------------------------------------------------------------------------