├── .gitignore
├── LICENSE
├── README.md
├── figures
├── clean
│ ├── figure00_graphical_abstract.eps
│ ├── figure01_schematic.pdf
│ ├── figure02_performance.pdf
│ ├── figure03_CA_fires.pdf
│ ├── figure04_annual_smokePM.png
│ ├── figure05_state_trends.pdf
│ ├── figure06_smokePM_by_covariates.pdf
│ ├── figure07_extreme_days.pdf
│ ├── figureS01_hysplit.png
│ ├── figureS02_smoke_days.png
│ ├── figureS03_plume_size_by_date.png
│ ├── figureS04_hms_missing_dates.pdf
│ ├── figureS05_station_timeseries.png
│ ├── figureS06_improve_csn_speciated.png
│ ├── figureS07_aod_training_locations.png
│ ├── figureS08_AOD_performance.png
│ ├── figureS09_grid_to_station.pdf
│ ├── figureS10_hysplit_aod_comparison.png
│ ├── figureS11_station_modis_landcover.png
│ ├── figureS12_poor_performance_stations.png
│ ├── figureS13_fire_smoke_dynamics.png
│ ├── figureS14_burned_area_trend.png
│ ├── figureS15_annual_fires.png
│ ├── figureS16_state_fires.png
│ ├── figureS17_maps_avgs5yr_labeled.png
│ └── figureS18_PurpleAir_comparison.png
└── raw
│ ├── figure00a.png
│ ├── figure00b.png
│ ├── figure01a.png
│ ├── figure01b.png
│ ├── figure01c.png
│ ├── figure01d.png
│ ├── figure02a.png
│ ├── figure02b.png
│ ├── figure02c_histogram.png
│ ├── figure02c_map.png
│ ├── figure02d.png
│ ├── figure03a.png
│ ├── figure03b-d_time_series.png
│ ├── figure03b_map.png
│ ├── figure03c_map.png
│ ├── figure03d_map.png
│ ├── figure04.png
│ ├── figure05a.png
│ ├── figure05b_map.png
│ ├── figure05b_time_series.png
│ ├── figure06a-d_maps.png
│ ├── figure06e.png
│ ├── figure07a-b.png
│ ├── figure07c.png
│ ├── figureS01a.png
│ ├── figureS01b.png
│ ├── figureS01c.png
│ ├── figureS02a.png
│ ├── figureS02b.png
│ ├── figureS02c.jpg
│ ├── figureS02d.png
│ ├── figureS02e.png
│ ├── figureS02f.png
│ ├── figureS03.png
│ ├── figureS04.pdf
│ ├── figureS05.png
│ ├── figureS06a.png
│ ├── figureS06b.png
│ ├── figureS07.png
│ ├── figureS08a.png
│ ├── figureS08b.png
│ ├── figureS09.pdf
│ ├── figureS10.png
│ ├── figureS11.png
│ ├── figureS12a.png
│ ├── figureS12b.png
│ ├── figureS12c.png
│ ├── figureS13.png
│ ├── figureS14.png
│ ├── figureS15.png
│ ├── figureS16.png
│ ├── figureS17.png
│ └── figureS18.png
├── scripts
├── main
│ ├── 01_01_train_AOD_Sherlock.R
│ ├── 01_02_predict_AOD_Sherlock.R
│ ├── 01_02_predict_AOD_Sherlock.sh
│ ├── 02_01_define_smokePM_training_jobs.R
│ ├── 02_02_smokePM_training.sh
│ ├── 02_02_train_smokePM_Sherlock.R
│ ├── 02_03_predict_smokePM_Sherlock.R
│ ├── 02_03_predict_smokePM_Sherlock.sh
│ ├── 02_04_combine_preds.R
│ ├── 02_05_finalize_10km_grid.R
│ ├── 02_06_gridded_predictions_to_county.R
│ ├── 02_07_gridded_predictions_to_zip.R
│ ├── 02_08_gridded_predictions_to_tract.R
│ ├── 02_09_finalize_preds.R
│ ├── 03_01_figure00.R
│ ├── 03_02_figure01.R
│ ├── 03_03_figure02.R
│ ├── 03_04_figure03.R
│ ├── 03_05_figure04.R
│ ├── 03_06_figure05.R
│ ├── 03_07_figure06.R
│ ├── 03_08_figure07.R
│ ├── 04_01_manuscript_calculations.R
│ ├── 05_01_figureS01.R
│ ├── 05_02_figureS02.R
│ ├── 05_03_figureS03.R
│ ├── 05_04_figureS04.R
│ ├── 05_05_figureS05.R
│ ├── 05_06_figureS06.R
│ ├── 05_07_figureS07.R
│ ├── 05_08_figureS08.R
│ ├── 05_09_figureS09.R
│ ├── 05_10_figureS10.R
│ ├── 05_11_figureS11.R
│ ├── 05_12_figureS12.R
│ ├── 05_13_figureS13.R
│ ├── 05_14_figureS14.R
│ ├── 05_15_figureS15.R
│ ├── 05_16_figureS16.R
│ ├── 05_17_figureS17.R
│ ├── 05_18_figureS18.R
│ ├── 06_01_tableS01.R
│ └── 06_02_tableS04.R
└── setup
│ ├── 00_01_load_packages.R
│ ├── 00_02_load_functions.R
│ └── 00_03_load_settings.R
└── tables
├── clean
├── tableS01_distance_to_plume_effects.tex
├── tableS02_anomAOD_model_inputs.tex
├── tableS03_smokePM_model_inputs.tex
└── tableS04_model_performance.tex
└── raw
└── tableS01.tex
/.gitignore:
--------------------------------------------------------------------------------
1 | # History files
2 | .Rhistory
3 | .Rapp.history
4 |
5 | # Session Data files
6 | .RData
7 |
8 | # User-specific files
9 | .Ruserdata
10 |
11 | # Example code in package build process
12 | *-Ex.R
13 |
14 | # Output files from R CMD build
15 | /*.tar.gz
16 |
17 | # Output files from R CMD check
18 | /*.Rcheck/
19 |
20 | # RStudio files
21 | .Rproj.user/
22 |
23 | # produced vignettes
24 | vignettes/*.html
25 | vignettes/*.pdf
26 |
27 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
28 | .httr-oauth
29 |
30 | # knitr and R markdown default cache directories
31 | *_cache/
32 | /cache/
33 |
34 | # Temporary files created by R markdown
35 | *.utf8.md
36 | *.knit.md
37 |
38 | # R Environment Variables
39 | .Renviron
40 |
41 | # Other
42 | .DS_Store
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 ECHOLab: Environmental Change and Human Outcomes
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # daily-10km-smokePM
2 | Repo supporting [Childs et al 2022 "Daily local-level estimates of ambient wildfire smoke PM2.5 for the contiguous US"](https://doi.org/10.1021/acs.est.2c02934).
3 |
4 | Results from the paper are in the `figures/clean` and `tables/clean` folders. Code to replicate results are in the `scripts` folder. Data, models, and predictions are in [Dropbox](https://www.dropbox.com/sh/e7m3313fb7sqxui/AABGu-jUO3Ps2isGHbB2EGfAa?dl=0).
5 |
6 | ## Final predictions
7 | Daily smoke PM2.5 predictions from Jan 1, 2006 to Dec 31, 2020 for the contiguous US can be [downloaded](https://www.dropbox.com/sh/16bwdnfbakvuf3x/AABlnrek080Qu9YnbkLkUk8ha?st=a2oli9on&dl=0) at the following spatial scales:
8 | * [10 km grid](https://www.dropbox.com/sh/9mbcxy65crd5cex/AAAk2_1QE6A5rAS7M5We-iapa?st=f8gglo0m&dl=0)
9 | * [County](https://www.dropbox.com/sh/tze93uz29lzyr0h/AABulMjC_l-b5YssncaK5u7ha?st=tujqcqjp&dl=0)
10 | * [ZCTA5](https://www.dropbox.com/sh/6ynye9u1vl8idbj/AAAAjn61JpxEoyfbESsbQY9Ua?st=9kfa7mxr&dl=0)
11 | * [Census tract](https://www.dropbox.com/sh/uw81pu5eh6o4q5v/AACZJplCr-EiLruAIFn0nm5Na?st=mz7gp2su&dl=0)
12 |
13 | Data download is also available through [Harvard Dataverse](https://doi.org/10.7910/DVN/DJVMTV).
14 |
15 | ### Descriptions of final prediction files
16 | 10 km grid
17 | * `10km_grid/10km_grid_wgs84/`: this is a folder that contains the shapefile for the 10 km grid.
18 | * `10km_grid/smokePM2pt5_predictions_daily_10km_20060101-20201231.rds`: this is a file that contains a data frame with the final set of daily smoke PM2.5 predictions on smoke days at 10 km resolution from January 1, 2006 to December 31, 2020 for the contiguous US. The `grid_id_10km` column in this file corresponds to the `ID` column in the 10 km grid shapefile. All rows in this file are predictions on smoke days. Predictions on non-smoke days are by construction 0 $\mu g/m^3$ and not included in this file. A smoke PM2.5 prediction of 0 in this file means that the grid cell-day did have a smoke day but did not have elevated PM2.5. The full set of smoke PM2.5 predictions on both smoke days and non-smoke days can be obtained by setting the smoke PM2.5 prediction to 0 on grid cell-days in the 10 km grid and in the January 1, 2006-December 31, 2020 date range that are not in this file. For example, the R code below returns the full set of smoke PM2.5 predictions:
19 |
20 | ```
21 | library(lubridate)
22 | library(sf)
23 | library(dplyr)
24 | library(tidyr)
25 |
26 | # Load smokePM predictions on smoke days
27 | preds = readRDS("./final/10km_grid/smokePM2pt5_predictions_daily_10km_20060101-20201231.rds")
28 |
29 | # Load 10 km grid
30 | grid_10km = read_sf("./final/10km_grid/10km_grid_wgs84/10km_grid_wgs84.shp")
31 |
32 | # Load full set of dates
33 | dates = seq.Date(ymd("20060101"), ymd("20201231"), by = "day")
34 |
35 | # Get full combination of grid cell-days
36 | # Warning: this may require a large amount of memory
37 | out = expand.grid(grid_id_10km = grid_10km$ID, date = dates)
38 |
39 | # Match smokePM predictions on smoke days to grid cell-days
40 | out = left_join(out, preds, by = c("grid_id_10km", "date"))
41 |
42 | # Predict 0 for remaining grid cell-days, which are non-smoke days
43 | out = mutate(out, smokePM_pred = replace_na(smokePM_pred, 0))
44 | ```
45 |
46 | * `10km_grid/smokePM2pt5_predictions_daily_10km_20060101-20201231.csv`: this is the same as `smokePM2pt5_predictions_daily_10km_20060101-20201231.rds`, except it is saved as a CSV file.
47 |
48 | County
49 | * `county/tl_2019_us_county/`: this is a folder that contains the shapefile for CONUS counties in 2019. Files were downloaded from the US Census Bureau TIGER/Line Shapefiles [website](https://www.census.gov/cgi-bin/geo/shapefiles/index.php). R users may also use the `tigris` package. This shapefile includes only counties within the spatial domain over which smoke PM2.5 predictions are made.
50 |
51 | * `county/smokePM2pt5_predictions_daily_county_20060101-20201231.rds`: this is a file that contains a data frame with the final set of daily smoke PM2.5 predictions on smoke days at the county level from January 1, 2006 to December 31, 2020 for the contiguous US. County-level smoke PM2.5 predictions are aggregated from smoke PM2.5 predictions at the 10 km resolution using population and area of intersection-weighted averaging (see `scripts/main/02_06_gridded_predictions_to_county.R`). The `GEOID` column in this file corresponds to the `GEOID` column in the county shapefile. All rows in this file are predictions on smoke days. Predictions on non-smoke days are by construction 0 $\mu g/m^3$ and not included in this file. A smoke PM2.5 prediction of 0 in this file means that the county-day did have a smoke day but did not have elevated PM2.5. The full set of smoke PM2.5 predictions on both smoke days and non-smoke days can be obtained by setting the smoke PM2.5 prediction to 0 on county-days in the counties and in the January 1, 2006-December 31, 2020 date range that are not in this file. For example, the R code below returns the full set of smoke PM2.5 predictions:
52 |
53 | ```
54 | library(lubridate)
55 | library(sf)
56 | library(dplyr)
57 | library(tidyr)
58 |
59 | # Load smokePM predictions on smoke days
60 | preds = readRDS("./final/county/smokePM2pt5_predictions_daily_county_20060101-20201231.rds")
61 |
62 | # Load counties
63 | counties = read_sf("./final/county/tl_2019_us_county")
64 |
65 | # Load full set of dates
66 | dates = seq.Date(ymd("20060101"), ymd("20201231"), by = "day")
67 |
68 | # Get full combination of county-days
69 | # Warning: this may require a large amount of memory
70 | out = expand.grid(GEOID = counties$GEOID, date = dates)
71 |
72 | # Match smokePM predictions on smoke days to county-days
73 | out = left_join(out, preds, by = c("GEOID", "date"))
74 |
75 | # Predict 0 for remaining county-days, which are non-smoke days
76 | out = mutate(out, smokePM_pred = replace_na(smokePM_pred, 0))
77 | ```
78 | * `county/smokePM2pt5_predictions_daily_county_20060101-20201231.csv`: this is the same as `smokePM2pt5_predictions_daily_county_20060101-20201231.rds`, except it is saved as a CSV file.
79 |
80 | ZIP Code Tabulation Area (ZCTA5)
81 | * `zcta/tl_2019_us_zcta510/`: ths is a folder that contains the shapefile for CONUS zip code tabulation areas in 2019. Files were downloaded from the US Census Bureau TIGER/Line Shapefiles [website](https://www.census.gov/cgi-bin/geo/shapefiles/index.php). R users may also use the `tigris` package. This shapefile includes only ZCTAs within the spatial domain over which smoke PM2.5 predictions are made.
82 |
83 | * `zcta/smokePM2pt5_predictions_daily_zcta_20060101-20201231.rds`: this is a file that contains a data frame with the final set of daily smoke PM2.5 predictions on smoke days at the ZCTA5 level from January 1, 2006 to December 31, 2020 for the contiguous US. ZCTA-level smoke PM2.5 predictions are aggregated from smoke PM2.5 predictions at the 10 km resolution using population and area of intersection-weighted averaging (see `scripts/main/02_07_gridded_predictions_to_zip.R`).The `GEOID10` column in this file corresponds to the `GEOID10` column in the ZCTA shapefile. All rows in this file are predictions on smoke days. Predictions on non-smoke days are by construction 0 $\mu g/m^3$ and not included in this file. A smoke PM2.5 prediction of 0 in this file means that the ZCTA-day did have a smoke day but did not have elevated PM2.5. The full set of smoke PM2.5 predictions on both smoke days and non-smoke days can be obtained by setting the smoke PM2.5 prediction to 0 on ZCTA-days in the ZCTAs and in the January 1, 2006-December 31, 2020 date range that are not in this file. For example, the R code below returns the full set of smoke PM2.5 predictions:
84 |
85 | ```
86 | library(lubridate)
87 | library(sf)
88 | library(dplyr)
89 | library(tidyr)
90 |
91 | # Load smokePM predictions on smoke days
92 | preds = readRDS("./final/zcta/smokePM2pt5_predictions_daily_zcta_20060101-20201231.rds")
93 |
94 | # Load ZCTAs
95 | zctas = read_sf("./final/zcta/tl_2019_us_zcta510")
96 |
97 | # Load full set of dates
98 | dates = seq.Date(ymd("20060101"), ymd("20201231"), by = "day")
99 |
100 | # Get full combination of ZCTA-days
101 | # Warning: this may require a large amount of memory
102 | out = expand.grid(GEOID10 = zctas$GEOID10, date = dates)
103 |
104 | # Match smokePM predictions on smoke days to ZCTA-days
105 | out = left_join(out, preds, by = c("GEOID10", "date"))
106 |
107 | # Predict 0 for remaining ZCTA-days, which are non-smoke days
108 | out = mutate(out, smokePM_pred = replace_na(smokePM_pred, 0))
109 | ```
110 |
111 | * `zcta/smokePM2pt5_predictions_daily_zcta_20060101-20201231.csv`: this is the same as `smokePM2pt5_predictions_daily_zcta_20060101-20201231.rds`, except it is saved as a CSV file.
112 |
113 | Census tract
114 | * `tract/tracts/`: this is a folder that contains the shapefiles for CONUS census tracts by state/territory in 2019. Files were downloaded from the US Census Bureau TIGER/Line Shapefiles [website](https://www.census.gov/cgi-bin/geo/shapefiles/index.php). R users may also use the `tigris` package. This shapefile includes only tracts within the spatial domain over which smoke PM2.5 predictions are made.
115 |
116 | * `tract/smokePM2pt5_predictions_daily_tract_20060101-20201231.rds`: this is a file that contains a data frame with the final set of daily smoke PM2.5 predictions on smoke days at the tract level from January 1, 2006 to December 31, 2020 for the contiguous US. Tract-level smoke PM2.5 predictions are aggregated from smoke PM2.5 predictions at the 10 km resolution using population and area of intersection-weighted averaging (see `scripts/main/02_08_gridded_predictions_to_tract.R`). The `GEOID` column in this file corresponds to the `GEOID` column in the tract shapefiles. All rows in this file are predictions on smoke days. Predictions on non-smoke days are by construction 0 $\mu g/m^3$ and not included in this file. A smoke PM2.5 prediction of 0 in this file means that the tract-day did have a smoke day but did not have elevated PM2.5. The full set of smoke PM2.5 predictions on both smoke days and non-smoke days can be obtained by setting the smoke PM2.5 prediction to 0 on tract-days in the tracts and in the January 1, 2006-December 31, 2020 date range that are not in this file. For example, the R code below returns the full set of smoke PM2.5 predictions:
117 |
118 | ```
119 | library(lubridate)
120 | library(sf)
121 | library(dplyr)
122 | library(tidyr)
123 |
124 | # Load smokePM predictions on smoke days
125 | preds = readRDS("./final/tract/smokePM2pt5_predictions_daily_tract_20060101-20201231.rds")
126 |
127 | # Load tracts
128 | tracts = list.files("./final/tract/tracts", full.names = T, pattern = "\\.shp$")
129 | tracts = lapply(tracts, read_sf)
130 | tracts = bind_rows(tracts)
131 |
132 | # Load full set of dates
133 | dates = seq.Date(ymd("20060101"), ymd("20201231"), by = "day")
134 |
135 | # Get full combination of tract-days
136 | # Warning: this may require a large amount of memory
137 | out = expand.grid(GEOID = tracts$GEOID, date = dates)
138 |
139 | # Match smokePM predictions on smoke days to tract-days
140 | out = left_join(out, preds, by = c("GEOID", "date"))
141 |
142 | # Predict 0 for remaining tract-days, which are non-smoke days
143 | out = mutate(out, smokePM_pred = replace_na(smokePM_pred, 0))
144 | ```
145 |
146 | * `tract/smokePM2pt5_predictions_daily_tract_20060101-20201231.csv`: this is the same as `smokePM2pt5_predictions_daily_tract_20060101-20201231.rds`, except it is saved as a CSV file.
147 |
148 | ## How to replicate results
149 | 1. Download this repository.
150 | 2. Download the [Dropbox](https://www.dropbox.com/sh/e7m3313fb7sqxui/AABGu-jUO3Ps2isGHbB2EGfAa?dl=0) folder. Place files downloaded from Dropbox in the same folder as the downloaded GitHub repository.
151 | 3. Change settings in `scripts/setup/00_03_load_settings.R`:
152 | 1. Set `gee_email` to your Google Earth Engine email.
153 | 2. Set `key` to the value of your US Census Bureau API Key (which can be requested [here](https://api.census.gov/data/key_signup.html)).
154 | 3. Set `num_cores` to the number of cores to use in parallel computing.
155 | 4. Set `path_dropbox` to the location of the data downloaded from Dropbox.
156 | 5. Set `path_github` to the location of this downloaded repository's root.
157 | 4. Install packages listed in `scripts/setup/00_01_load_packages.R`.
158 | 5. Set working directory to this downloaded repository's root.
159 | 6. Run scripts in `scripts/main`. Some scripts may require relatively large computer memory.
160 |
--------------------------------------------------------------------------------
/figures/clean/figure01_schematic.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figure01_schematic.pdf
--------------------------------------------------------------------------------
/figures/clean/figure02_performance.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figure02_performance.pdf
--------------------------------------------------------------------------------
/figures/clean/figure03_CA_fires.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figure03_CA_fires.pdf
--------------------------------------------------------------------------------
/figures/clean/figure04_annual_smokePM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figure04_annual_smokePM.png
--------------------------------------------------------------------------------
/figures/clean/figure05_state_trends.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figure05_state_trends.pdf
--------------------------------------------------------------------------------
/figures/clean/figure06_smokePM_by_covariates.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figure06_smokePM_by_covariates.pdf
--------------------------------------------------------------------------------
/figures/clean/figure07_extreme_days.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figure07_extreme_days.pdf
--------------------------------------------------------------------------------
/figures/clean/figureS01_hysplit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figureS01_hysplit.png
--------------------------------------------------------------------------------
/figures/clean/figureS02_smoke_days.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figureS02_smoke_days.png
--------------------------------------------------------------------------------
/figures/clean/figureS03_plume_size_by_date.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figureS03_plume_size_by_date.png
--------------------------------------------------------------------------------
/figures/clean/figureS04_hms_missing_dates.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figureS04_hms_missing_dates.pdf
--------------------------------------------------------------------------------
/figures/clean/figureS05_station_timeseries.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figureS05_station_timeseries.png
--------------------------------------------------------------------------------
/figures/clean/figureS06_improve_csn_speciated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figureS06_improve_csn_speciated.png
--------------------------------------------------------------------------------
/figures/clean/figureS07_aod_training_locations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figureS07_aod_training_locations.png
--------------------------------------------------------------------------------
/figures/clean/figureS08_AOD_performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figureS08_AOD_performance.png
--------------------------------------------------------------------------------
/figures/clean/figureS09_grid_to_station.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figureS09_grid_to_station.pdf
--------------------------------------------------------------------------------
/figures/clean/figureS10_hysplit_aod_comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figureS10_hysplit_aod_comparison.png
--------------------------------------------------------------------------------
/figures/clean/figureS11_station_modis_landcover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figureS11_station_modis_landcover.png
--------------------------------------------------------------------------------
/figures/clean/figureS12_poor_performance_stations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figureS12_poor_performance_stations.png
--------------------------------------------------------------------------------
/figures/clean/figureS13_fire_smoke_dynamics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figureS13_fire_smoke_dynamics.png
--------------------------------------------------------------------------------
/figures/clean/figureS14_burned_area_trend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figureS14_burned_area_trend.png
--------------------------------------------------------------------------------
/figures/clean/figureS15_annual_fires.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figureS15_annual_fires.png
--------------------------------------------------------------------------------
/figures/clean/figureS16_state_fires.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figureS16_state_fires.png
--------------------------------------------------------------------------------
/figures/clean/figureS17_maps_avgs5yr_labeled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figureS17_maps_avgs5yr_labeled.png
--------------------------------------------------------------------------------
/figures/clean/figureS18_PurpleAir_comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/clean/figureS18_PurpleAir_comparison.png
--------------------------------------------------------------------------------
/figures/raw/figure00a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure00a.png
--------------------------------------------------------------------------------
/figures/raw/figure00b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure00b.png
--------------------------------------------------------------------------------
/figures/raw/figure01a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure01a.png
--------------------------------------------------------------------------------
/figures/raw/figure01b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure01b.png
--------------------------------------------------------------------------------
/figures/raw/figure01c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure01c.png
--------------------------------------------------------------------------------
/figures/raw/figure01d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure01d.png
--------------------------------------------------------------------------------
/figures/raw/figure02a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure02a.png
--------------------------------------------------------------------------------
/figures/raw/figure02b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure02b.png
--------------------------------------------------------------------------------
/figures/raw/figure02c_histogram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure02c_histogram.png
--------------------------------------------------------------------------------
/figures/raw/figure02c_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure02c_map.png
--------------------------------------------------------------------------------
/figures/raw/figure02d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure02d.png
--------------------------------------------------------------------------------
/figures/raw/figure03a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure03a.png
--------------------------------------------------------------------------------
/figures/raw/figure03b-d_time_series.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure03b-d_time_series.png
--------------------------------------------------------------------------------
/figures/raw/figure03b_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure03b_map.png
--------------------------------------------------------------------------------
/figures/raw/figure03c_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure03c_map.png
--------------------------------------------------------------------------------
/figures/raw/figure03d_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure03d_map.png
--------------------------------------------------------------------------------
/figures/raw/figure04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure04.png
--------------------------------------------------------------------------------
/figures/raw/figure05a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure05a.png
--------------------------------------------------------------------------------
/figures/raw/figure05b_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure05b_map.png
--------------------------------------------------------------------------------
/figures/raw/figure05b_time_series.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure05b_time_series.png
--------------------------------------------------------------------------------
/figures/raw/figure06a-d_maps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure06a-d_maps.png
--------------------------------------------------------------------------------
/figures/raw/figure06e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure06e.png
--------------------------------------------------------------------------------
/figures/raw/figure07a-b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure07a-b.png
--------------------------------------------------------------------------------
/figures/raw/figure07c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figure07c.png
--------------------------------------------------------------------------------
/figures/raw/figureS01a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS01a.png
--------------------------------------------------------------------------------
/figures/raw/figureS01b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS01b.png
--------------------------------------------------------------------------------
/figures/raw/figureS01c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS01c.png
--------------------------------------------------------------------------------
/figures/raw/figureS02a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS02a.png
--------------------------------------------------------------------------------
/figures/raw/figureS02b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS02b.png
--------------------------------------------------------------------------------
/figures/raw/figureS02c.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS02c.jpg
--------------------------------------------------------------------------------
/figures/raw/figureS02d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS02d.png
--------------------------------------------------------------------------------
/figures/raw/figureS02e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS02e.png
--------------------------------------------------------------------------------
/figures/raw/figureS02f.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS02f.png
--------------------------------------------------------------------------------
/figures/raw/figureS03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS03.png
--------------------------------------------------------------------------------
/figures/raw/figureS04.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS04.pdf
--------------------------------------------------------------------------------
/figures/raw/figureS05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS05.png
--------------------------------------------------------------------------------
/figures/raw/figureS06a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS06a.png
--------------------------------------------------------------------------------
/figures/raw/figureS06b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS06b.png
--------------------------------------------------------------------------------
/figures/raw/figureS07.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS07.png
--------------------------------------------------------------------------------
/figures/raw/figureS08a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS08a.png
--------------------------------------------------------------------------------
/figures/raw/figureS08b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS08b.png
--------------------------------------------------------------------------------
/figures/raw/figureS09.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS09.pdf
--------------------------------------------------------------------------------
/figures/raw/figureS10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS10.png
--------------------------------------------------------------------------------
/figures/raw/figureS11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS11.png
--------------------------------------------------------------------------------
/figures/raw/figureS12a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS12a.png
--------------------------------------------------------------------------------
/figures/raw/figureS12b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS12b.png
--------------------------------------------------------------------------------
/figures/raw/figureS12c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS12c.png
--------------------------------------------------------------------------------
/figures/raw/figureS13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS13.png
--------------------------------------------------------------------------------
/figures/raw/figureS14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS14.png
--------------------------------------------------------------------------------
/figures/raw/figureS15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS15.png
--------------------------------------------------------------------------------
/figures/raw/figureS16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS16.png
--------------------------------------------------------------------------------
/figures/raw/figureS17.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS17.png
--------------------------------------------------------------------------------
/figures/raw/figureS18.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/echolab-stanford/daily-10km-smokePM/26d52f20048353b2dd4c394715b5df7e9a558bcc/figures/raw/figureS18.png
--------------------------------------------------------------------------------
/scripts/main/01_01_train_AOD_Sherlock.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Trains anomalous AOD model.
8 | # ------------------------------------------------------------------------------
9 | txt_progress_file <- file.path(path_output, "anomAOD", "model", "aod_xgb_progress.txt")
10 | max_xgb_rounds <- 10000
11 | bayes_opt_n_init <- 24
12 | bayes_opt_n_iter <- 24
13 |
14 | if (Sys.getenv('SLURM_JOB_ID') != ""){
15 | usable.cores <- Sys.getenv("SLURM_NTASKS_PER_NODE")
16 | } else {
17 | usable.cores <- 2
18 | }
19 |
20 | mod_data <- readRDS(file.path(path_data, "4_clean", "aod_training.rds"))
21 |
22 | xgb_opt_fun <- function(eta, gamma, max_depth, subsample, colsample_bytree,
23 | min_child_weight, xgb_mat, cv_ind = NULL,
24 | rounds_max = 100, nfold = 4, nthread = 2,
25 | progress_file = ""){
26 | set.seed(10001)
27 | mod_xgb_cv <- xgb.cv(
28 | params = list(booster = "gbtree",
29 | eta = eta,
30 | gamma = gamma,
31 | max_depth = max_depth,
32 | subsample = subsample,
33 | colsample_bytree = colsample_bytree,
34 | min_child_weight = min_child_weight,
35 | objective = "reg:squarederror",
36 | eval_metric = "rmse"),
37 | data = xgb_mat,
38 | nrounds = rounds_max,
39 | nthread = nthread,
40 | nfold = nfold,
41 | folds = cv_ind,
42 | early_stopping_rounds = 10,
43 | print_every_n = 20,
44 | verbose = TRUE)
45 |
46 | if(progress_file != ""){
47 | write(paste0("nrounds = ", mod_xgb_cv$best_iteration,
48 | ", eta = ", eta,
49 | ", gamma = ", gamma,
50 | ", max_depth = ", max_depth,
51 | ", colsample_bytree = ", colsample_bytree,
52 | ", subsample = ", subsample,
53 | ", min_child_weight = ", min_child_weight,
54 | ", RMSE = ", mod_xgb_cv$evaluation_log %>%
55 | filter(iter == mod_xgb_cv$best_iteration) %>%
56 | pull(test_rmse_mean),
57 | "\n"),
58 | file = progress_file,
59 | append = T)}
60 | list(Score = mod_xgb_cv$evaluation_log %>%
61 | filter(iter == mod_xgb_cv$best_iteration) %>%
62 | pull(test_rmse_mean) %>%
63 | multiply_by(-1),
64 | Pred = mod_xgb_cv$best_iteration) %>%
65 | return
66 | }
67 |
68 | xgb_train_mat <- xgb.DMatrix(
69 | data = model.matrix.lm(~.-1,
70 | data = mod_data %>%
71 | mutate(month = as.factor(month)) %>%
72 | select(month, lat, lon, aot_anom,
73 | aot_anom_lag1, aot_anom_lag2, aot_anom_lag3,
74 | fire_dist_km, closest_fire_area, closest_fire_num_points,
75 | pbl_min, pbl_max, pbl_mean,
76 | wind_u, wind_v,
77 | dewpoint_temp_2m, temp_2m,
78 | sea_level_pressure, surface_pressure, precip,
79 | elevation_mean, elevation_stdDev,
80 | developed, barren, forest, shrubland, cultivated,
81 | wetlands, herbaceous, water),
82 | na.action = "na.pass"),
83 | label = mod_data %>%
84 | pull(aod_anom))
85 |
86 | # define folds
87 | cv_folds = mod_data %>%
88 | mutate(row_no = row_number()) %>%
89 | group_by(fold) %>%
90 | summarise(row_nos = list(row_no)) %>%
91 | pull(row_nos)
92 |
93 |
94 | # run bayesian optimization to find the best hyperparameters
95 | write("", txt_progress_file)
96 |
97 | tic <- Sys.time()
98 | print(paste0("Starting bayesian optimization at ", tic))
99 | set.seed(10001)
100 | bayes_opt_params <- BayesianOptimization(
101 | function(eta, gamma, max_depth, subsample,
102 | colsample_bytree, min_child_weight){
103 | xgb_opt_fun(eta, gamma, max_depth, subsample,
104 | colsample_bytree, min_child_weight,
105 | xgb_train_mat, cv_ind = cv_folds,
106 | rounds_max = max_xgb_rounds, nfold = length(cv_folds),
107 | nthread = usable.cores,
108 | progress_file = txt_progress_file)},
109 | bounds = list(
110 | eta = c(0.0001, 0.2),
111 | gamma = c(0, 100),
112 | max_depth = c(2L, 50L),
113 | colsample_bytree = c(0.5, 1),
114 | subsample = c(0.25, 1),
115 | min_child_weight = c(1L, 50L)),
116 | init_points = bayes_opt_n_init, # heuristic on how many points?
117 | n_iter = bayes_opt_n_iter)
118 | toc <- Sys.time()
119 | print(paste0("bayesian optimization completed at ", toc))
120 | print(toc - tic)
121 |
122 | mod_gb_final <- xgb.train(
123 | params = c(as.list(bayes_opt_params$Best_Par),
124 | booster = "gbtree",
125 | objective = "reg:squarederror",
126 | eval_metric = "rmse"),
127 | data = xgb_train_mat,
128 | nrounds = bayes_opt_params$History %>%
129 | filter(Value == bayes_opt_params$Best_Value) %>%
130 | pull(Round) %>%
131 | magrittr::extract(unlist(bayes_opt_params$Pred), .),
132 | verbose = 1)
133 |
134 | saveRDS(bayes_opt_params, file.path(path_output, "anomAOD", "model", "aod_bayes_opt_params.rds"))
135 | xgb.save(mod_gb_final, file.path(path_output, "anomAOD", "model", "aod_mod.xgb"))
136 |
137 | # save variable importance information
138 | feat_names <- model.matrix.lm(~.-1,
139 | data = mod_data %>%
140 | mutate(month = as.factor(month)) %>%
141 | select(month, lat, lon, aot_anom,
142 | aot_anom_lag1, aot_anom_lag2, aot_anom_lag3,
143 | fire_dist_km, closest_fire_area, closest_fire_num_points,
144 | pbl_min, pbl_max, pbl_mean,
145 | wind_u, wind_v,
146 | dewpoint_temp_2m, temp_2m,
147 | sea_level_pressure, surface_pressure, precip,
148 | elevation_mean, elevation_stdDev,
149 | developed, barren, forest, shrubland, cultivated,
150 | wetlands, herbaceous, water),
151 | na.action = "na.pass") %>% colnames
152 |
153 | var_import <- xgb.importance(model = mod_gb_final)
154 |
155 | saveRDS(list(variable_importance = var_import,
156 | feature_names = feat_names),
157 | file.path(path_output, "anomAOD", "model", "AOD_var_importance.rds"))
158 |
--------------------------------------------------------------------------------
/scripts/main/01_02_predict_AOD_Sherlock.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | #SBATCH --job-name=predict_AOD
4 | #
5 | #SBATCH --partition=serc
6 | #SBATCH --nodes=1
7 | #SBATCH --ntasks-per-node=1
8 | #SBATCH --cpus-per-task=33
9 | #SBATCH --time=1-06:00:00
10 | #SBATCH --mem-per-cpu=20G
11 | #SBATCH --output=predict_AOD.log
12 | #SBATCH --mail-type=ALL
13 |
14 | # load modules
15 | ml physics gdal/2.2.1 udunits proj/4.9.3 geos
16 | ml R/4.0.2
17 |
18 | # execute script
19 | Rscript scripts/main/01_02_predict_AOD_Sherlock.R
20 |
--------------------------------------------------------------------------------
/scripts/main/02_01_define_smokePM_training_jobs.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | #-------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Defines smoke PM2.5 training jobs.
8 | #-------------------------------------------------------------------------------
9 | # csv with jobs to submit
10 | expand.grid(cv_fold_num = c(0:4,99),
11 | drop_vars = c("", "traj_points", "aod_anom_pred")) %>%
12 | write.table(file.path(path_data, "smokePM_training_jobs.csv"),
13 | row.names = FALSE, col.names = FALSE, sep = ",")
14 |
--------------------------------------------------------------------------------
/scripts/main/02_02_smokePM_training.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export IFS=","
3 |
4 | cat data/smokePM_training_jobs.csv | while read a b; do
5 |
6 | job_file="train_smokePM_fold${a}_drop${b}.job"
7 |
8 | echo "#!/bin/bash
9 |
10 | #SBATCH -p serc
11 | #SBATCH --job-name=train_smokePM_fold${a}_drop${b}
12 | #SBATCH --nodes=1
13 | #SBATCH --ntasks-per-node=20
14 | #SBATCH --mem-per-cpu=20GB
15 | #SBATCH --time=5-00:00:00
16 | #SBATCH --mail-type=ALL
17 | #SBATCH --output=logs/train_smokePM_fold${a}_drop${b}.log
18 |
19 |
20 | ml R/4.0.2
21 |
22 | Rscript scripts/main/02_02_train_smokePM_Sherlock.R "$a" "$b" " > $job_file
23 |
24 | sbatch $job_file
25 |
26 | done
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/scripts/main/02_02_train_smokePM_Sherlock.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | #-------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Trains smoke PM2.5 model.
8 | #-------------------------------------------------------------------------------
9 | args <- commandArgs(TRUE)
10 | cv_fold_num <- as.numeric(args[1])
11 | drop_vars <- strsplit(args[2], split = "-")[[1]] # second arg should be any character strings of variables to be dropped separated by dashes, matches are on containment of that string in the column name
12 |
13 | print(paste0("tuning model, excluding fold ", cv_fold_num))
14 | print(paste0("excluding variables with names containing ",
15 | paste0(drop_vars, collapse = " or ")))
16 |
17 | txt_progress_file <- file.path(path_output, "smokePM", "model",
18 | paste0("smokePM_xgb_progress_fold", cv_fold_num,
19 | paste0(c("_drop", drop_vars), collapse = "-"), ".txt"))
20 | max_xgb_rounds <- 10000
21 | bayes_opt_n_init <- 24
22 | bayes_opt_n_iter <- 16
23 |
24 | param_bounds <- list(
25 | eta = c(0.001, 0.1),
26 | gamma = c(0, 50),
27 | max_depth = c(2L, 25L),
28 | colsample_bytree = c(0.5, 1),
29 | subsample = c(0.25, 1),
30 | min_child_weight = c(1L, 50L))
31 |
32 | if (Sys.getenv('SLURM_JOB_ID') != "") {
33 | usable.cores <- Sys.getenv("SLURM_NTASKS_PER_NODE")
34 | } else {
35 | usable.cores <- 2
36 | }
37 |
38 | print(paste0("there are ", usable.cores, " usable cores"))
39 |
40 | mod_data <- readRDS(file.path(path_data, "smokePM_training.rds")) %>%
41 | filter(fold != cv_fold_num)
42 |
43 | xgb_train_mat <- xgb.DMatrix(
44 | data = model.matrix.lm(~.-1,
45 | data = mod_data %>%
46 | select(month, lat, lon,
47 | aot_anom, aot_anom_lag1, aot_anom_lag2, aot_anom_lag3,
48 | aod_anom_pred_0.00, aod_anom_pred_0.25, aod_anom_pred_0.50,
49 | aod_anom_pred_0.75, aod_anom_pred_1.00, aod_anom_pred_mean,
50 | AODmissing,
51 | num_traj_points_height_1, num_traj_points_height_2,
52 | num_traj_points_height_3, num_traj_points_height_4,
53 | num_traj_points_height_5,
54 | fire_dist_km, closest_fire_area, closest_fire_num_points,
55 | pbl_min, pbl_max, pbl_mean,
56 | wind_u, wind_v,
57 | dewpoint_temp_2m, temp_2m,
58 | sea_level_pressure, surface_pressure, precip,
59 | elevation_mean, elevation_stdDev,
60 | developed, barren, forest, shrubland, cultivated,
61 | wetlands, herbaceous, water) %>%
62 | select(-contains(drop_vars)),
63 | na.action = "na.pass"),
64 | label = mod_data %>%
65 | pull(smokePM))
66 |
67 |
68 | xgb_opt_fun <- function(eta, gamma, max_depth, subsample, colsample_bytree,
69 | min_child_weight, xgb_mat, cv_ind = NULL,
70 | rounds_max = 100, nfold = 4, nthread = 2,
71 | progress_file = ""){
72 | if(progress_file != ""){
73 | write(paste0("eta = ", eta,
74 | ", gamma = ", gamma,
75 | ", max_depth = ", max_depth,
76 | ", colsample_bytree = ", colsample_bytree,
77 | ", subsample = ", subsample,
78 | ", min_child_weight = ", min_child_weight),
79 | file = progress_file,
80 | append = T)}
81 |
82 | set.seed(10001)
83 | mod_xgb_cv <- xgb.cv(
84 | params = list(booster = "gbtree",
85 | eta = eta,
86 | gamma = gamma,
87 | max_depth = max_depth,
88 | subsample = subsample,
89 | colsample_bytree = colsample_bytree,
90 | min_child_weight = min_child_weight,
91 | objective = "reg:squarederror",
92 | eval_metric = "rmse"),
93 | data = xgb_mat,
94 | nrounds = rounds_max,
95 | nthread = nthread,
96 | nfold = nfold,
97 | folds = cv_ind,
98 | early_stopping_rounds = 10,
99 | print_every_n = 20,
100 | verbose = TRUE)
101 |
102 | if (progress_file != ""){
103 | write(paste0("nrounds = ", mod_xgb_cv$best_iteration,
104 | ", RMSE = ", mod_xgb_cv$evaluation_log %>%
105 | filter(iter == mod_xgb_cv$best_iteration) %>%
106 | pull(test_rmse_mean),
107 | "\n"),
108 | file = progress_file,
109 | append = T)}
110 |
111 | list(Score = mod_xgb_cv$evaluation_log %>%
112 | filter(iter == mod_xgb_cv$best_iteration) %>%
113 | pull(test_rmse_mean) %>%
114 | multiply_by(-1),
115 | Pred = mod_xgb_cv$best_iteration) %>%
116 | return
117 | }
118 |
119 |
120 | # define folds
121 | cv_folds = mod_data %>%
122 | mutate(row_no = row_number()) %>%
123 | group_by(fold) %>%
124 | summarise(row_nos = list(row_no)) %>%
125 | pull(row_nos)
126 |
127 |
128 | # run bayesian optimization to find the best hyperparameters
129 | write("", txt_progress_file)
130 |
131 | tic <- Sys.time()
132 | set.seed(10001)
133 | bayes_opt_params <- BayesianOptimization(
134 | function(eta, gamma, max_depth, subsample,
135 | colsample_bytree, min_child_weight){
136 | xgb_opt_fun(eta, gamma, max_depth, subsample,
137 | colsample_bytree, min_child_weight,
138 | xgb_train_mat, cv_ind = cv_folds,
139 | rounds_max = max_xgb_rounds, nfold = length(cv_folds),
140 | nthread = min(usable.cores, 8),
141 | progress_file = txt_progress_file)},
142 | bounds = param_bounds,
143 | init_points = bayes_opt_n_init, # heuristic on how many points?
144 | n_iter = bayes_opt_n_iter)
145 | toc <- Sys.time()
146 | toc - tic
147 |
148 | saveRDS(bayes_opt_params,
149 | file.path(path_output, "smokePM", "model",
150 | paste0("smokePM_bayes_opt_params_fold", cv_fold_num,
151 | paste0(c("_drop", drop_vars), collapse = "-"),
152 | ".rds")))
153 |
154 | mod_gb_final <- xgb.train(
155 | params = c(as.list(bayes_opt_params$Best_Par),
156 | booster = "gbtree",
157 | objective = "reg:squarederror",
158 | eval_metric = "rmse"),
159 | data = xgb_train_mat,
160 | nrounds = bayes_opt_params$History %>%
161 | filter(Value == bayes_opt_params$Best_Value) %>%
162 | pull(Round) %>%
163 | magrittr::extract(unlist(bayes_opt_params$Pred), .),
164 | verbose = 1)
165 |
166 | xgb.save(mod_gb_final,
167 | file.path(path_output, "smokePM", "model",
168 | paste0("smokePM_mod_fold", cv_fold_num,
169 | paste0(c("_drop", drop_vars), collapse = "-"),
170 | ".xgb")))
171 |
172 | # predict for full data set and save predictions
173 | pred_data <- readRDS(file.path(path_data, "smokePM_training.rds"))
174 |
175 | xgb_pred_mat <- xgb.DMatrix(
176 | data = model.matrix.lm(~.-1,
177 | data = pred_data %>%
178 | select(month, lat, lon,
179 | aot_anom, aot_anom_lag1, aot_anom_lag2, aot_anom_lag3,
180 | aod_anom_pred_0.00, aod_anom_pred_0.25, aod_anom_pred_0.50,
181 | aod_anom_pred_0.75, aod_anom_pred_1.00, aod_anom_pred_mean,
182 | AODmissing,
183 | num_traj_points_height_1, num_traj_points_height_2,
184 | num_traj_points_height_3, num_traj_points_height_4,
185 | num_traj_points_height_5,
186 | fire_dist_km, closest_fire_area, closest_fire_num_points,
187 | pbl_min, pbl_max, pbl_mean,
188 | wind_u, wind_v,
189 | dewpoint_temp_2m, temp_2m,
190 | sea_level_pressure, surface_pressure, precip,
191 | elevation_mean, elevation_stdDev,
192 | developed, barren, forest, shrubland, cultivated,
193 | wetlands, herbaceous, water) %>%
194 | select(-contains(drop_vars)),
195 | na.action = "na.pass"))
196 |
197 | preds <- pred_data %>%
198 | select(id, date, fold) %>%
199 | cbind(smokePM_pred = predict(mod_gb_final, xgb_pred_mat))
200 |
201 | saveRDS(preds,
202 | file.path(path_output, "smokePM", "model", paste0("smokePM_pred_fold", cv_fold_num,
203 | paste0(c("_drop", drop_vars), collapse = "-"),
204 | ".rds")))
205 |
206 | # time permitting, calculate and save the variable importance
207 | var_import <- xgb.importance(model = mod_gb_final)
208 | feat_names <- model.matrix.lm(~.-1,
209 | data = mod_data %>%
210 | select(month, lat, lon,
211 | aot_anom, aot_anom_lag1, aot_anom_lag2, aot_anom_lag3,
212 | aod_anom_pred_0.00, aod_anom_pred_0.25, aod_anom_pred_0.50,
213 | aod_anom_pred_0.75, aod_anom_pred_1.00, aod_anom_pred_mean,
214 | AODmissing,
215 | num_traj_points_height_1, num_traj_points_height_2,
216 | num_traj_points_height_3, num_traj_points_height_4,
217 | num_traj_points_height_5,
218 | fire_dist_km, closest_fire_area, closest_fire_num_points,
219 | pbl_min, pbl_max, pbl_mean,
220 | wind_u, wind_v,
221 | dewpoint_temp_2m, temp_2m,
222 | sea_level_pressure, surface_pressure, precip,
223 | elevation_mean, elevation_stdDev,
224 | developed, barren, forest, shrubland, cultivated,
225 | wetlands, herbaceous, water) %>%
226 | select(-contains(drop_vars)),
227 | na.action = "na.pass") %>% colnames
228 |
229 | saveRDS(list(variable_importance = var_import,
230 | feature_names = feat_names),
231 | file.path(path_output, "smokePM", "model",
232 | paste0("smokePM_var_importance_fold", cv_fold_num,
233 | paste0(c("_drop", drop_vars), collapse = "-"),
234 | ".rds")))
235 |
--------------------------------------------------------------------------------
/scripts/main/02_03_predict_smokePM_Sherlock.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | #SBATCH --job-name=predict_smokePM
4 | #
5 | #SBATCH --partition=serc
6 | #SBATCH --nodes=1
7 | #SBATCH --ntasks-per-node=1
8 | #SBATCH --cpus-per-task=30
9 | #SBATCH --time=1-06:00:00
10 | #SBATCH --mem-per-cpu=16G
11 | #SBATCH --output=logs/predict_smokePM.log
12 | #SBATCH --mail-type=ALL
13 |
14 | # load modules
15 | ml physics gdal/2.2.1 udunits proj/4.9.3 geos
16 | ml R/4.0.2
17 |
18 | # execute script
19 | Rscript scripts/main/02_03_predict_smokePM_Sherlock.R
20 |
--------------------------------------------------------------------------------
/scripts/main/02_04_combine_preds.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | #-------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Combines smoke PM2.5 predictions.
8 | #-------------------------------------------------------------------------------
9 | smokePM_pred <- list.files(
10 | file.path(path_output, "smokePM", "predictions", "10km_smoke_days"),
11 | full.names = TRUE
12 | ) %>%
13 | map_dfr(readRDS)
14 |
15 | smokePM_pred %<>% mutate(smokePM_pred = pmax(0, smokePM_pred))
16 |
17 | saveRDS(smokePM_pred,
18 | file.path(path_output, "smokePM", "predictions", "combined",
19 | paste0("smokePM_predictions_",
20 | format(min(smokePM_pred$date), "%Y%m%d"),
21 | "_",
22 | format(max(smokePM_pred$date), "%Y%m%d"),
23 | ".rds")))
24 |
--------------------------------------------------------------------------------
/scripts/main/02_05_finalize_10km_grid.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Jessica Li
7 | # Drops 10 km grid cells that we do not make predictions for. Saves predictions
8 | # and 10 km grid in root folder.
9 | # ------------------------------------------------------------------------------
10 | # Load 10 km grid
11 | grid_10km = read_sf(file.path(path_data, "1_grids", "grid_10km_wgs84"))
12 |
13 | # Get grid cell IDs for which predictions are not made
14 | no_pred_cells = readRDS(list.files(file.path(
15 | path_data, "ERA5_variables", "Land", "surface_pressure", "USA", "10km_grid",
16 | "UTC-0600", "daily_mean_of_1-hourly"), full.names = T)[1]) %>%
17 | filter(is.na(surface_pressure)) %>%
18 | pull(id_grid) %>%
19 | unique()
20 |
21 | # Subset grid
22 | grid_10km = grid_10km %>% filter(!(ID %in% no_pred_cells))
23 |
24 | # Save
25 | if (!dir.exists(file.path(path_final, "10km_grid", "10km_grid_wgs84"))) dir.create(file.path(path_final, "10km_grid", "10km_grid_wgs84"))
26 | write_sf(grid_10km, file.path(path_final, "10km_grid", "10km_grid_wgs84", "10km_grid_wgs84.shp"))
27 |
--------------------------------------------------------------------------------
/scripts/main/02_06_gridded_predictions_to_county.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Aggregates 10 km grid smokePM predictions to county level.
8 | # ------------------------------------------------------------------------------
9 | unit <- "county" # alternatively, "tract"
10 | # load shapefile, plus 10km grid transformed to match the crs
11 | if(unit == "county"){
12 | unit_sf <- counties() %>%
13 | filter(STATEFP %in% nonContig_stateFIPS == F)
14 | } else if(unit == "tract"){
15 | unit_sf <- states() %>%
16 | filter(STATEFP %in% nonContig_stateFIPS == F) %>%
17 | pull(STATEFP) %>%
18 | map_dfr(function(x){
19 | tracts(x, year = 2019) %>% select(STATEFP, GEOID)
20 | })
21 | } else{
22 | stop("only allowed units are \"tract\" or \"county\"")
23 | }
24 |
25 | # read in the grid
26 | grid_10km <- st_read(file.path(path_final, "10km_grid", "10km_grid_wgs84", "10km_grid_wgs84.shp")) %>%
27 | st_transform(st_crs(unit_sf))
28 |
29 | # make a crosswalk with intersection area with grid cells
30 | unit_cross = st_intersection(unit_sf,
31 | grid_10km) %>%
32 | select(GEOID, grid_id_10km = ID) %>%
33 | {cbind(st_drop_geometry(.),
34 | area = st_area(.))}
35 |
36 | # save the crosswalk since it takes a while to make
37 | saveRDS(unit_cross, file.path(path_data, paste0(unit, "_grid_area_crosswalk.rds")))
38 |
39 | # population by grid cell
40 | pop <- list.files(file.path(path_data, "2_from_EE", "populationDensity_10km_subgrid"),
41 | full.names = T) %>% purrr::map_dfr(read.csv)
42 |
43 | # smoke PM predictions
44 | smokePM <- readRDS(file.path(path_output, "smokePM", "predictions", "combined", "smokePM_predictions_20060101_20201231.rds"))
45 |
46 | # lets only save predictions if there's a smoke day in the unit, start by identifying smoke-days per unit
47 | unit_smoke_days = smokePM %>% # 51434138 rows
48 | # add unit information, this will duplicate any rows that are in multiple counties
49 | left_join(unit_cross %>% select(grid_id_10km, GEOID),
50 | by = "grid_id_10km") %>% # 76009188 rows for county
51 | filter(!is.na(GEOID)) %>% # drop grid cells that don't match to a unit
52 | # full set of unit-days with smoke
53 | select(date, GEOID) %>%
54 | unique # 2308941 rows (should actually be less after dropping NAs)
55 |
56 | unit_smokePM <- unit_smoke_days %>%
57 | # join in all grid-cells for each unit
58 | left_join(unit_cross, by = "GEOID") %>% # 119622779 rows
59 | # join in population and smoke PM predictions
60 | left_join(pop %>% select(grid_id_10km = ID, grid_pop_per_m2 = mean)) %>%
61 | left_join(smokePM) # should still be 119622779 rows
62 |
63 | # fill missings with 0s
64 | # calculate pop-weighted avg (density * area) over grid cells in each unit
65 | avg_unit_smokePM <- unit_smokePM %>%
66 | replace_na(list(smokePM_pred = 0)) %>%
67 | mutate(area = unclass(area),
68 | pop = grid_pop_per_m2*area) %>%
69 | group_by(GEOID, date) %>%
70 | summarise(smokePM_pred = weighted.mean(smokePM_pred, pop)) %>%
71 | ungroup
72 |
73 | saveRDS(avg_unit_smokePM,
74 | file.path(path_output, "smokePM", "predictions", "combined",
75 | paste0(unit, "_smokePM_predictions_20060101_20201231.rds")))
76 |
--------------------------------------------------------------------------------
/scripts/main/02_07_gridded_predictions_to_zip.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Aggregates 10 km grid smokePM predictions to zip level.
8 | # ------------------------------------------------------------------------------
9 | unit <- "zcta" # alternatively, "county"
10 | # load shapefile, plus 10km grid transformed to match the crs
11 | if (unit == "county") {
12 | unit_sf <- counties() %>%
13 | filter(STATEFP %in% nonContig_stateFIPS == F)
14 | } else if (unit == "tract") {
15 | unit_sf <- states() %>%
16 | filter(STATEFP %in% nonContig_stateFIPS == F) %>%
17 | pull(STATEFP) %>%
18 | map_dfr(function(x){
19 | tracts(x, year = 2019) %>% select(STATEFP, GEOID)
20 | })
21 | } else if (unit == "zcta") {
22 | unit_sf <- states() %>%
23 | filter(STATEFP %in% nonContig_stateFIPS == F) %>%
24 | pull(STATEFP) %>%
25 | map_dfr(function(x){
26 | zctas(x, year = 2019) %>% select(ZCTA5CE10, GEOID10)
27 | })
28 | } else {
29 | stop("only allowed units are \"tract\" or \"zcta\" or \"county\"")
30 | }
31 |
32 | # read in the grid
33 | grid_10km <- st_read(file.path(path_final, "10km_grid", "10km_grid_wgs84", "10km_grid_wgs84.shp")) %>%
34 | st_transform(st_crs(unit_sf))
35 |
36 | # make a crosswalk with intersection area with grid cells
37 | if (unit %in% c("county", "tract")) {
38 | unit_cross = st_intersection(unit_sf,
39 | grid_10km) %>%
40 | select(GEOID, grid_id_10km = ID) %>%
41 | {cbind(st_drop_geometry(.),
42 | area = st_area(.))}
43 | } else if (unit == "zcta") {
44 | unit_cross = st_intersection(unit_sf,
45 | grid_10km) %>%
46 | select(GEOID10, grid_id_10km = ID) %>%
47 | {cbind(st_drop_geometry(.),
48 | area = st_area(.))}
49 | }
50 | # save the crosswalk since it takes a while to make
51 | saveRDS(unit_cross, file.path(path_data, paste0(unit, "_grid_area_crosswalk.rds")))
52 | # unit_cross <- readRDS(file.path(path_data, paste0(unit, "_grid_area_crosswalk.rds")))
53 |
54 | # smoke PM predictions
55 | smokePM <- readRDS(file.path(path_output, "smokePM", "predictions", "combined", "smokePM_predictions_20060101_20201231.rds"))
56 |
57 | # population by grid cell
58 | pop <- list.files(file.path(path_data, "2_from_EE", "populationDensity_10km_subgrid"),
59 | full.names = T) %>% purrr::map_dfr(read.csv)
60 |
61 | # loop through work with one date at a time? rbind the data frames from the dates together
62 | if (unit == "zcta") {
63 | unit_cross = unit_cross %>% rename(GEOID = GEOID10)
64 | }
65 | avg_unit_smokePM <- smokePM %>%
66 | pull(date) %>%
67 | unique %>%
68 | purrr::map_dfr(function(i){
69 | print(i)
70 | # filter to the date of interest
71 | smokePM %>%
72 | filter(date == i) %>%
73 | # add on all the tract/county unit IDs that are among the grid cells with smoke
74 | left_join(unit_cross %>% select(grid_id_10km, GEOID),
75 | by = "grid_id_10km") %>%
76 | filter(!is.na(GEOID)) %>% # drop grid cells that don't match to a unit
77 | # full set of unit-days with smoke
78 | select(GEOID, date) %>%
79 | unique %>%
80 | # join in all grid-cells for each unit
81 | left_join(unit_cross, by = "GEOID") %>%
82 | # join in population and smoke PM predictions
83 | left_join(pop %>% select(grid_id_10km = ID, grid_pop_per_m2 = mean),
84 | by = "grid_id_10km") %>%
85 | left_join(smokePM %>% filter(date == i),
86 | by = c("grid_id_10km", "date")) %>%
87 | # fill missing smoke PM values with zero
88 | replace_na(list(smokePM_pred = 0)) %>%
89 | mutate(area = unclass(area),
90 | pop = grid_pop_per_m2*area) %>%
91 | # for each unit-date calculate pop-weighted avg
92 | group_by(GEOID, date) %>%
93 | summarise(smokePM_pred = weighted.mean(smokePM_pred, pop),
94 | .groups = "drop")
95 | })
96 | if (unit == "zcta") {
97 | avg_unit_smokePM = avg_unit_smokePM %>% rename(GEOID10 = GEOID)
98 | }
99 |
100 | saveRDS(avg_unit_smokePM,
101 | file.path(path_output, "smokePM", "predictions", "combined",
102 | paste0(unit, "_smokePM_predictions_20060101_20201231.rds")))
103 |
--------------------------------------------------------------------------------
/scripts/main/02_08_gridded_predictions_to_tract.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Aggregates 10 km grid smokePM predictions to census tract level.
8 | # ------------------------------------------------------------------------------
9 | unit <- "tract" # alternatively, "county"
10 | # load shapefile, plus 10km grid transformed to match the crs
11 | if(unit == "county"){
12 | unit_sf <- counties() %>%
13 | filter(STATEFP %in% nonContig_stateFIPS == F)
14 | } else if(unit == "tract"){
15 | unit_sf <- states() %>%
16 | filter(STATEFP %in% nonContig_stateFIPS == F) %>%
17 | pull(STATEFP) %>%
18 | map_dfr(function(x){
19 | tracts(x, year = 2019) %>% select(STATEFP, GEOID)
20 | })
21 | } else{
22 | stop("only allowed units are \"tract\" or \"county\"")
23 | }
24 |
25 | # read in the grid
26 | grid_10km <- st_read(file.path(path_final, "10km_grid", "10km_grid_wgs84", "10km_grid_wgs84.shp")) %>%
27 | st_transform(st_crs(unit_sf))
28 |
29 | # make a crosswalk with intersection area with grid cells
30 | unit_cross = st_intersection(unit_sf,
31 | grid_10km) %>%
32 | select(GEOID, grid_id_10km = ID) %>%
33 | {cbind(st_drop_geometry(.),
34 | area = st_area(.))}
35 |
36 | # save the crosswalk since it takes a while to make
37 | saveRDS(unit_cross, file.path(path_data, paste0(unit, "_grid_area_crosswalk.rds")))
38 | # unit_cross <- readRDS(file.path(path_data, paste0(unit, "_grid_area_crosswalk.rds")))
39 |
40 | # smoke PM predictions
41 | smokePM <- readRDS(file.path(path_output, "smokePM", "predictions", "combined", "smokePM_predictions_20060101_20201231.rds"))
42 |
43 | # population by grid cell
44 | pop <- list.files(file.path(path_data, "2_from_EE", "populationDensity_10km_subgrid"),
45 | full.names = T) %>% purrr::map_dfr(read.csv)
46 |
47 | # loop through work with one date at a time? rbind the data frames from the dates together
48 | avg_unit_smokePM <- smokePM %>%
49 | pull(date) %>%
50 | unique %>%
51 | purrr::map_dfr(function(i){
52 | print(i)
53 | # filter to the date of interest
54 | smokePM %>%
55 | filter(date == i) %>%
56 | # add on all the tract/county unit IDs that are among the grid cells with smoke
57 | left_join(unit_cross %>% select(grid_id_10km, GEOID),
58 | by = "grid_id_10km") %>%
59 | filter(!is.na(GEOID)) %>% # drop grid cells that don't match to a unit
60 | # full set of unit-days with smoke
61 | select(GEOID, date) %>%
62 | unique %>%
63 | # join in all grid-cells for each unit
64 | left_join(unit_cross, by = "GEOID") %>%
65 | # join in population and smoke PM predictions
66 | left_join(pop %>% select(grid_id_10km = ID, grid_pop_per_m2 = mean),
67 | by = "grid_id_10km") %>%
68 | left_join(smokePM %>% filter(date == i),
69 | by = c("grid_id_10km", "date")) %>%
70 | # fill missing smoke PM values with zero
71 | replace_na(list(smokePM_pred = 0)) %>%
72 | group_by(GEOID, date) %>%
73 | mutate(grid_pop_per_m2 = ifelse(rep(all(grid_pop_per_m2 == 0), n()), 1, grid_pop_per_m2)) %>%
74 | ungroup() %>%
75 | mutate(area = unclass(area),
76 | pop = grid_pop_per_m2*area) %>%
77 | # for each unit-date calculate pop-weighted avg
78 | group_by(GEOID, date) %>%
79 | summarise(smokePM_pred = weighted.mean(smokePM_pred, pop),
80 | .groups = "drop")
81 | })
82 |
83 | saveRDS(avg_unit_smokePM,
84 | file.path(path_output, "smokePM", "predictions", "combined",
85 | paste0(unit, "_smokePM_predictions_20060101_20201231.rds")))
86 |
--------------------------------------------------------------------------------
/scripts/main/02_09_finalize_preds.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Jessica Li
7 | # Saves smokePM aggregates in final folder.
8 | # ------------------------------------------------------------------------------
9 | # Load predictions
10 | preds = readRDS(file.path(path_output, "smokePM", "predictions", "combined", "smokePM_predictions_20060101_20201231.rds"))
11 |
12 | # Save
13 | saveRDS(preds, file.path(path_final, "10km_grid", "smokePM2pt5_predictions_daily_10km_20060101-20201231.rds"))
14 |
15 | # Convert date to character
16 | preds = preds %>% mutate(date = format(date, "%Y%m%d"))
17 |
18 | # Save
19 | write.csv(preds, file.path(path_final, "10km_grid", "smokePM2pt5_predictions_daily_10km_20060101-20201231.csv"), row.names = F)
20 |
21 | # ------------------------------------------------------------------------------
22 | # Load predictions aggregated to county level
23 | preds = readRDS(file.path(path_output, "smokePM", "predictions", "combined", "county_smokePM_predictions_20060101_20201231.rds"))
24 |
25 | # Save
26 | saveRDS(preds, file.path(path_final, "county", "smokePM2pt5_predictions_daily_county_20060101-20201231.rds"))
27 |
28 | # Convert date to character
29 | preds = preds %>% mutate(date = format(date, "%Y%m%d"))
30 |
31 | # Save
32 | write.csv(preds, file.path(path_final, "county", "smokePM2pt5_predictions_daily_county_20060101-20201231.csv"), row.names = F)
33 |
34 | # ------------------------------------------------------------------------------
35 | # Load predictions aggregated to zip level
36 | preds = readRDS(file.path(path_output, "smokePM", "predictions", "combined", "zcta_smokePM_predictions_20060101_20201231.rds"))
37 |
38 | # Save
39 | saveRDS(preds, file.path(path_final, "zcta", "smokePM2pt5_predictions_daily_zcta_20060101-20201231.rds"))
40 |
41 | # Convert date to character
42 | preds = preds %>% mutate(date = format(date, "%Y%m%d"))
43 |
44 | # Save
45 | write.csv(preds, file.path(path_final, "zcta", "smokePM2pt5_predictions_daily_zcta_20060101-20201231.csv"), row.names = F)
46 |
47 | # ------------------------------------------------------------------------------
48 | # Load predictions aggregated to census tract level
49 | preds = readRDS(file.path(path_output, "smokePM", "predictions", "combined", "tract_smokePM_predictions_20060101_20201231.rds"))
50 |
51 | # Save
52 | saveRDS(preds, file.path(path_final, "tract", "smokePM2pt5_predictions_daily_tract_20060101-20201231.rds"))
53 |
54 | # Convert date to character
55 | preds = preds %>% mutate(date = format(date, "%Y%m%d"))
56 |
57 | # Save
58 | write.csv(preds, file.path(path_final, "tract", "smokePM2pt5_predictions_daily_tract_20060101-20201231.csv"), row.names = F)
59 |
--------------------------------------------------------------------------------
/scripts/main/03_01_figure00.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Plots graphical abstract.
8 | # ------------------------------------------------------------------------------
9 | # Load smokePM prediction
10 | smokePM_preds <- readRDS(file.path(path_output, "smokePM", "predictions", "combined", "smokePM_predictions_20060101_20201231.rds"))
11 |
12 | # Load population density
13 | pop <- list.files(file.path(path_data, "2_from_EE", "populationDensity_10km_subgrid"),
14 | full.names = T) %>% purrr::map_dfr(read.csv)
15 |
16 | # Load 10 km grid
17 | grid_10km <- st_read(file.path(path_data, "1_grids", "grid_10km_wgs84")) %>%
18 | mutate(area = st_area(geometry) %>% unclass) # area in m^2
19 |
20 | #
21 | extremes_by_year <- smokePM_preds %>%
22 | mutate(year = lubridate::year(date)) %>%
23 | {full_join(group_by(., year) %>%
24 | summarise(days_over50 = sum(smokePM_pred > 50),
25 | days_over100 = sum(smokePM_pred > 100),
26 | days_over200 = sum(smokePM_pred > 200),
27 | .groups = "drop"),
28 | group_by(., year, grid_id_10km) %>%
29 | summarise(pop_over50 = any(smokePM_pred >50),
30 | pop_over100 = any(smokePM_pred > 100),
31 | pop_over200 = any(smokePM_pred > 200),
32 | .groups = "drop") %>%
33 | left_join(pop %>% left_join(grid_10km) %>%
34 | transmute(grid_id_10km = ID, pop = mean*area)) %>%
35 | group_by(year) %>%
36 | summarise(across(starts_with("pop_over"), ~sum(.x*pop)),
37 | .groups = "drop"))}
38 | # change in extremes
39 | extremes_by_year %>%
40 | mutate(period = case_when(year >= 2006 & year <=2010 ~ "2006 - 2010",
41 | year >= 2016 & year <=2020 ~ "2016 - 2020",
42 | T ~ as.character(NA))) %>%
43 | filter(!is.na(period)) %>%
44 | group_by(period) %>%
45 | summarise(across(contains("over"), mean)) %>%
46 | pivot_longer(contains("over")) %>%
47 | separate(name, into = c("panel", "cutoff"), sep = "_") %>%
48 | mutate(panel = dplyr::recode_factor(panel,
49 | "days" = "average annual number of grid cell-days",
50 | "pop" = "average annual population exposed\nto at least one day",
51 | .ordered = TRUE)) %>%
52 | {ggplot(data = .,
53 | aes(x = period, y = value, group = cutoff,
54 | color = cutoff)) +
55 | geom_point() +
56 | geom_line() +
57 | geom_text(data = group_by(., panel, cutoff) %>%
58 | summarise(asinh_mid = sinh(mean(asinh(value))),
59 | .groups = "drop") %>%
60 | mutate(cutoff_num = gsub("over", "", cutoff),
61 | label_top = paste0("paste(\'Days > ", cutoff_num, "\', mu, \'g\')")),
62 | aes(y = asinh_mid, label = label_top,
63 | x = 2.1 + ifelse(cutoff == "over100", 0.03, 0) +
64 | ifelse(cutoff == "over50", 0.06, 0)),
65 | color = "black", hjust = 0, vjust = 0, parse = TRUE) +
66 | geom_text(data = group_by(., panel, cutoff) %>%
67 | summarise(asinh_mid = sinh(mean(asinh(value))),
68 | mult = max(value)/min(value),
69 | .groups = "drop") %>%
70 | mutate(mult_round = formatC(round(signif(mult, 2), 0),
71 | format="d", big.mark=","),
72 | label_bottom = paste0(mult_round, "x increase")),
73 | aes(y = asinh_mid, label = label_bottom,
74 | x = 2.1 + ifelse(cutoff == "over100", 0.03, 0) +
75 | ifelse(cutoff == "over50", 0.06, 0)),
76 | color = "black", hjust = 0, vjust = 1) +
77 | geom_linerange(data = group_by(., panel, cutoff) %>%
78 | summarise(min = min(value),
79 | max = max(value),
80 | .groups = "drop"),
81 | mapping = aes(ymin = min, ymax = max,
82 | x = 2.05 + ifelse(cutoff == "over100", 0.03, 0) +
83 | ifelse(cutoff == "over50", 0.06, 0),
84 | color = cutoff),
85 | inherit.aes = FALSE) +
86 | facet_wrap(~panel, scales = "free", nrow = 1, strip.position = "left") +
87 | scale_y_continuous(trans = "pseudo_log",
88 | expand = expansion(mult = c(0.07, 0.05)),
89 | breaks = c(0, 1, 5, 10, 50, 100, 500,
90 | 1000, 5000, 10000, 50000, 100000,
91 | 500000, 1000000, 5000000, 10000000,
92 | 20000000),
93 | labels = c("0", "1", "5", "10", "50", "100", "500",
94 | "1k", "5k", "10k", "50k", "100k",
95 | "500k", "1m", "5m", "10m", "20m")) +
96 | scale_x_discrete(expand = expansion(mult = c(0.1, 0.05))) +
97 | coord_cartesian(clip = "off") +
98 | scale_color_manual(values = c("#e8993b", "#913818", "#4a571e"),
99 | guide = "none") +
100 | xlab("") + ylab("") +
101 | theme_classic() +
102 | theme(plot.margin = margin(0.5,5.5,0,-1, "lines"),
103 | text = element_text(size = 15),
104 | axis.text.x = element_text(vjust = -0.5),
105 | strip.placement = "outside",
106 | strip.background = element_blank(),
107 | strip.text = element_text(size = 12),
108 | panel.spacing.x = unit(6.25, "lines"))} %>%
109 | ggsave(file.path(path_figures, "figure00.png"), .,
110 | width = 8, height = 3.95)
111 |
--------------------------------------------------------------------------------
/scripts/main/03_04_figure03.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Plots California fires 2018-2020.
8 |
9 | # calfire from https://frap.fire.ca.gov/frap-projects/fire-perimeters/
10 | # ------------------------------------------------------------------------------
11 | # Load smokePM predictions
12 | smokePM_preds <- list.files(
13 | file.path(path_output, "smokePM", "predictions", "10km_smoke_days"),
14 | full.names = TRUE
15 | ) %>%
16 | map_dfr(readRDS) %>%
17 | mutate(smokePM_pred = pmax(smokePM_pred, 0))
18 |
19 | # Load 10 km grid
20 | grid_10km <- st_read(file.path(path_data, "1_grids", "grid_10km_wgs84"))
21 |
22 | # Load population density
23 | pop <- list.files(
24 | file.path(path_data, "2_from_EE", "populationDensity_10km_subgrid"),
25 | full.names = T
26 | ) %>%
27 | purrr::map_dfr(read.csv) %>%
28 | rename(pop_density = mean)
29 |
30 | # Define fire panels
31 | fire_panels <- data.frame(
32 | panel_name = c("November 2018", "October - November 2019", "Fall 2020"),
33 | start_date = as.Date(c("2018-11-01", "2019-10-20", "2020-08-10")),
34 | end_date = as.Date(c("2018-11-30", "2019-11-15", "2020-11-15"))
35 | )
36 |
37 | # Load CAL FIRE data
38 | calfire <- st_read(file.path(path_data, "CAL FIRE FRAP", "fire20_1.gdb"), layer = "firep20_1") %>%
39 | mutate(Fire = recode(FIRE_NAME,
40 | "CAMP" = "Camp Fire",
41 | "KINCADE" = "Kincade Fire",
42 | "AUGUST COMPLEX FIRES" = "August Complex",
43 | "CZU LIGHTNING COMPLEX" = "CZU Lightning Complex"),
44 | panel_name = recode(FIRE_NAME,
45 | "CAMP" = "November 2018",
46 | "KINCADE" = "October - November 2019",
47 | "AUGUST COMPLEX FIRES" = "Fall 2020",
48 | "CZU LIGHTNING COMPLEX" = "Fall 2020"),
49 | panel_fire_no = recode(FIRE_NAME,
50 | "CAMP" = 1,
51 | "KINCADE" = 1,
52 | "AUGUST COMPLEX FIRES" = 2,
53 | "CZU LIGHTNING COMPLEX" = 1,
54 | .default = as.numeric(NA)),
55 | select_fires = ((FIRE_NAME == "CAMP" & YEAR_ == "2018" & ALARM_DATE > as.Date("2018-11-01")) |
56 | (FIRE_NAME == "KINCADE" & YEAR_ == "2019") |
57 | FIRE_NAME %in% c("AUGUST COMPLEX FIRES", "CZU LIGHTNING COMPLEX")))
58 |
59 | # Load counties
60 | counties <- tigris::counties()
61 |
62 | # Load states
63 | states <- tigris::states()
64 | states %<>% st_transform(crs = st_crs(grid_10km))
65 |
66 | # Limit to California
67 | CA_cells <- st_intersects(grid_10km, states %>% filter(NAME == "California"))
68 | CA_cells <- grid_10km$ID[CA_cells %>%
69 | purrr::map_lgl(function(x){length(x) > 0})]
70 |
71 | # Match to counties
72 | county_cells <- counties %>%
73 | filter(NAME %in% c("Sacramento", "Santa Clara", "Fresno", "Sonoma")) %>%
74 | select(NAME, geometry) %>%
75 | purrr::pmap_dfr(function(NAME, geometry){
76 | data.frame(county = NAME,
77 | grid_id_10km = grid_10km$ID[st_intersects(grid_10km, geometry) %>%
78 | purrr::map_lgl(function(x){length(x) > 0})]) %>%
79 | return()
80 | })
81 |
82 | # Sampling of fires from 2018 to 2020 in CA
83 | grid_fire_preds <- fire_panels %>%
84 | purrr::pmap_dfr(function(panel_name, start_date, end_date){
85 | smokePM_preds %>% filter(grid_id_10km %in% county_cells$grid_id_10km) %>%
86 | {right_join(.,
87 | expand.grid(grid_id_10km = pull(., grid_id_10km) %>% unique,
88 | date = seq.Date(from = start_date,
89 | to = end_date,
90 | by = "day")))} %>%
91 | replace_na(list(smokePM_pred = 0)) %>%
92 | left_join(county_cells) %>%
93 | mutate(panel_name = panel_name) %>%
94 | return
95 | }) %>%
96 | {mutate(., panel_name = factor(panel_name, levels = unique(.$panel_name),
97 | ordered = TRUE))}
98 |
99 | grid_fire_avgs <- group_by(grid_fire_preds, grid_id_10km, panel_name, county) %>%
100 | summarise(total_smokePM = mean(smokePM_pred),
101 | .groups = "drop") %>%
102 | group_by(panel_name, county) %>%
103 | mutate(nobs = n()) %>%
104 | ungroup %>%
105 | mutate(type = factor("average", levels = c("time_series","average"), ordered = T),
106 | county_no = as.numeric(as.factor(county)))
107 |
108 | # Plot time series
109 | CA_fires_ts <- ggplot(mapping = aes(x = date, y = smokePM_pred, color = county)) +
110 | # Add lines for each grid cell
111 | geom_line(data = grid_fire_preds %>%
112 | mutate(type = factor("time_series", levels = c("time_series", "average"), ordered = T)) %>%
113 | group_by(county) %>%
114 | mutate(n_cell = length(unique(grid_id_10km))),
115 | aes(group = grid_id_10km,
116 | alpha = I(20/n_cell))) +
117 | # Add mean lines for each county
118 | geom_line(data = left_join(grid_fire_preds, pop, by = c("grid_id_10km" = "ID")) %>%
119 | group_by(date, county, panel_name) %>%
120 | summarise(smokePM_pred = weighted.mean(smokePM_pred, pop_density),
121 | .groups = "drop") %>%
122 | mutate(type = factor("time_series", levels = c("time_series", "average"), ordered = T)),
123 | size = 1.5) +
124 | # Add dashes for average smoke PM for grid cell
125 | geom_linerange(data = grid_fire_avgs,
126 | aes(y = total_smokePM, color = county,
127 | xmin = as.Date(county_no*2, origin = "2000-01-01"),
128 | xmax = as.Date(county_no*2+1.5, origin = "2000-01-01"),
129 | alpha = I(20/nobs)),
130 | inherit.aes = FALSE) +
131 | # Add dashes for average smokePM for county
132 | geom_linerange(data = left_join(grid_fire_avgs, pop, by = c("grid_id_10km" = "ID")) %>%
133 | group_by(panel_name, county, county_no, type) %>%
134 | summarise(total_smokePM = weighted.mean(total_smokePM, pop_density)),
135 | aes(y = total_smokePM, color = county,
136 | xmin = as.Date(county_no*2, origin = "2000-01-01"),
137 | xmax = as.Date(county_no*2+1.5, origin = "2000-01-01")),
138 | size = 1.5,
139 | inherit.aes = FALSE) +
140 | # Add alpha = 0 points to average plots to get the zeros to align
141 | geom_point(data = left_join(calfire %>% filter(select_fires),
142 | grid_fire_avgs %>% group_by(panel_name) %>% summarise(max = max(total_smokePM))),
143 | aes(y = -panel_fire_no*0.075*max),
144 | x = as.Date(4, origin = "2000-01-01"),
145 | inherit.aes = FALSE,
146 | alpha = 0) +
147 | # Add line ranges on the bottom for the fires' start and end
148 | geom_linerange(data = left_join(calfire %>% filter(select_fires),
149 | grid_fire_preds %>% group_by(panel_name) %>% summarise(max = max(smokePM_pred))) %>%
150 | mutate(panel_name = factor(panel_name, levels = levels(grid_fire_preds$panel_name), ordered = T),
151 | type = factor("time_series", levels = c("time_series", "average"), ordered = T)),
152 | aes(xmin = as.Date(ALARM_DATE),
153 | xmax = as.Date(CONT_DATE),
154 | y = -panel_fire_no*0.075*max),
155 | inherit.aes = FALSE) +
156 | facet_wrap(panel_name~type, scales = "free", nrow =3, ncol = 2,
157 | labeller = labeller(.multi_line = FALSE)) +
158 | scale_color_manual(values = MetBrewer::met.brewer("Juarez")[c(1,6,5,3)],
159 | guide = "none") +
160 | scale_y_continuous(sec.axis = sec_axis(~.,
161 | name=expression(paste("Daily average (",mu, "g/", m^3, ")")))) +
162 | theme_classic() + xlab("") + ylab(expression(paste("smoke ", PM[2.5]," (",mu, "g/", m^3, ")"))) +
163 | theme(strip.background = element_blank(),
164 | strip.text.x = element_blank(),
165 | panel.spacing.y = unit(1.5, "lines"),
166 | panel.spacing.x = unit(-2, "lines"))
167 |
168 | # Make it into a gtable so we can edit
169 | CA_fires_ts <- ggplot_gtable(ggplot_build(CA_fires_ts))
170 |
171 | # Make the average column narrower
172 | CA_fires_ts$widths[9] <- CA_fires_ts$widths[9]*0.25
173 |
174 | # Remove right y axis on left panels and left y axis on right panels and x-axis from 2nd column
175 | CA_fires_ts %<>% gtable_remove_grobs(c("axis-l-1-2", "axis-l-2-2", "axis-l-3-2",
176 | "axis-r-1-1", "axis-r-2-1", "axis-r-3-1",
177 | "axis-b-2-1", "axis-b-2-2", "axis-b-2-3"))
178 |
179 | # Repeat outside y axes on all plots
180 | CA_fires_ts %>%
181 | gtable::gtable_add_grob(gtable::gtable_filter(CA_fires_ts, pattern = "ylab-l"),
182 | l = 3, t = 7, b = 8,
183 | name = "top-ylab-l") %>%
184 | gtable::gtable_add_grob(gtable::gtable_filter(CA_fires_ts, pattern = "ylab-l"),
185 | l = 3, t = 17, b = 18,
186 | name = "bottom-ylab-l") %>%
187 | gtable::gtable_add_grob(gtable::gtable_filter(CA_fires_ts, pattern = "ylab-r"),
188 | l = 11, t = 7, b = 8,
189 | name = "top-ylab-r") %>%
190 | gtable::gtable_add_grob(gtable::gtable_filter(CA_fires_ts, pattern = "ylab-r"),
191 | l = 11, t = 17, b = 18,
192 | name = "bottom-ylab-r") %>%
193 | ggsave(file.path(path_figures, "figure03b-d_time_series.png"), ., width = 6, height = 7)
194 |
195 | # Plot map of select CA fires
196 | {ggplot() +
197 | geom_sf(data = states %>% filter(NAME == "California")) +
198 | geom_sf(data = counties %>%
199 | filter(NAME %in% c("Sacramento", "Santa Clara", "Fresno", "Sonoma")),
200 | aes(color = NAME, fill = NAME)) +
201 | scale_color_manual(values = MetBrewer::met.brewer("Juarez")[c(1,6,5,3)],
202 | aesthetics = c("color", "fill")) +
203 | ggnewscale::new_scale_fill() +
204 | ggnewscale::new_scale_color() +
205 | geom_sf(data = calfire %>% filter(select_fires),
206 | color = NA,
207 | fill = "black") +
208 | theme_void()} %>%
209 | ggsave(file.path(path_figures, "figure03a.png"), ., width = 4, height = 5)
210 |
211 | # Plot all calfires during the time series
212 | fire_panels %>%
213 | purrr::pmap(function(panel_name, start_date, end_date){
214 | panel_label = c("b", "c", "d")[which(fire_panels$panel_name == panel_name)]
215 | fig_name <- file.path(path_figures, paste0("figure03", panel_label, "_map.png"))
216 | # print(fig_name)
217 | calfire %>%
218 | mutate(ALARM_DATE = as.Date(ALARM_DATE),
219 | CONT_DATE = as.Date(CONT_DATE)) %>%
220 | rowwise %>%
221 | mutate(max_start = max(c(ALARM_DATE, start_date)),
222 | min_end = min(c(CONT_DATE, end_date))) %>%
223 | ungroup %>%
224 | filter(max_start < min_end) %>%
225 | {ggplot(data = .,) +
226 | geom_sf(data = states %>% filter(NAME == "California")) +
227 | geom_sf(color = NA,
228 | fill = "black") +
229 | theme_void()} %>%
230 | ggsave(fig_name,., width = 4, height = 5)
231 |
232 | })
233 |
--------------------------------------------------------------------------------
/scripts/main/03_05_figure04.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Plots annual smoke PM.
8 | # ------------------------------------------------------------------------------
9 | # Load smoke PM predictions
10 | smokePM_preds <- readRDS(file.path(path_output, "smokePM", "predictions", "combined", "smokePM_predictions_20060101_20201231.rds")) %>%
11 | mutate(smokePM_pred = pmax(smokePM_pred, 0))
12 |
13 | grid_10km <- st_read(file.path(path_data, "1_grids", "grid_10km_wgs84"))
14 |
15 | annual_smokePM <- smokePM_preds %>%
16 | mutate(year = lubridate::year(date)) %>%
17 | group_by(grid_id_10km, year) %>%
18 | summarise(smokePM = sum(smokePM_pred), .groups = "drop") %>%
19 | # pivot wide, then back to long, to fill any missing years with zeros
20 | pivot_wider(values_from = smokePM, names_from = year, names_prefix = "year_", values_fill = 0) %>%
21 | pivot_longer(starts_with("year"), values_to = "smokePM", names_to = "year", names_prefix = "year_") %>%
22 | # identify leap years for dividing by 365
23 | mutate(year = as.numeric(year)) %>%
24 | mutate(y_days = 365 + lubridate::leap_year(year)*1,
25 | smokePM = smokePM/y_days)
26 |
27 | # small miniatures of annual smoke PM
28 | right_join(grid_10km,
29 | annual_smokePM %>%
30 | mutate(smokePM = pmin(smokePM, 5)),
31 | by = c("ID" = "grid_id_10km")) %>%
32 | {ggplot(data = ., aes(color = smokePM, fill = smokePM)) +
33 | geom_sf() +
34 | facet_wrap(~year, nrow = 3, ncol = 5) +
35 | scale_color_gradientn(colors = viridis::inferno(100, begin = 0, end = 1, direction = 1),
36 | name = expression(paste(mu, "g/", m^3)),
37 | aesthetics = c("fill", "color"),
38 | values = scales::rescale(sinh(seq(0, 3, length.out = 100))),
39 | breaks = seq(0, 5, by = 1),
40 | labels = c(seq(0, 4, by = 1), ">5"),
41 | guide = guide_colorbar(barheight = 10,
42 | title.theme = element_text(size = 16))) +
43 | theme_void() +
44 | theme(text = element_text(size = 20))} %>%
45 | ggsave(file.path(path_figures, "figure04.png"), ., width = 15, height = 7)
46 |
--------------------------------------------------------------------------------
/scripts/main/03_06_figure05.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Plots state trends.
8 | # ------------------------------------------------------------------------------
9 | # read in grid and load state geometries (a detailed one for calcs and generalized one for plotting)
10 | grid_10km <- st_read(file.path(path_data, "1_grids", "grid_10km_wgs84"))
11 | states <- tigris::states(cb = FALSE) %>%
12 | filter(!(STATEFP %in% nonContig_stateFIPS)) %>%
13 | st_transform(crs = st_crs(grid_10km))
14 | simple_states <- tigris::states(cb = TRUE) %>%
15 | filter(!(STATEFP %in% nonContig_stateFIPS)) %>%
16 | st_transform(crs = st_crs(grid_10km))
17 |
18 | # load predictions and population data
19 | smokePM_preds <- readRDS(file.path(path_output, "smokePM", "predictions", "combined", "smokePM_predictions_20060101_20201231.rds")) %>%
20 | mutate(smokePM_pred = pmax(smokePM_pred, 0))
21 |
22 | pop <- list.files(file.path(path_data, "2_from_EE", "populationDensity_10km_subgrid"),
23 | full.names = T) %>% purrr::map_dfr(read.csv)
24 |
25 | # calculate decadal change in average smoke PM
26 | decadal_change <- smokePM_preds %>%
27 | mutate(year = lubridate::year(date)) %>%
28 | # calculate annual avg contribution of smokePM to daily PM
29 | group_by(year, grid_id_10km) %>%
30 | summarise(annual_total_smokePM = sum(smokePM_pred),
31 | .groups = "drop") %>%
32 | mutate(y_days = 365 + leap_year(year)*1,
33 | annual_daily_smokePM = annual_total_smokePM/y_days) %>%
34 | select(-annual_total_smokePM) %>%
35 | # join with full set of years and grid cells to fill in zeros
36 | {left_join(expand.grid(year = unique(.$year),
37 | grid_id_10km = unique(.$grid_id_10km)),
38 | .)} %>%
39 | replace_na(list(annual_daily_smokePM = 0)) %>%
40 | # add the time periods to summarise over
41 | mutate(period = case_when(year >= 2006 & year <=2010 ~ "years2006_2010",
42 | year >= 2016 & year <=2020 ~ "years2016_2020",
43 | T ~ as.character(NA))) %>%
44 | filter(!is.na(period)) %>%
45 | # average the annual daily smokePM to the period
46 | group_by(period, grid_id_10km) %>%
47 | summarise(annual_daily_smokePM = mean(annual_daily_smokePM),
48 | .groups = "drop") %>%
49 | # make a column for each period, and difference the columns
50 | pivot_wider(names_from = period, values_from = annual_daily_smokePM) %>%
51 | mutate(decade_change = years2016_2020 - years2006_2010)
52 |
53 | # plot a map of the decadal change
54 | grid_10km %>%
55 | left_join(decadal_change,
56 | by = c("ID" = "grid_id_10km")) %>%
57 | mutate(decade_change = pmin(5, decade_change)) %>%
58 | {ggplot(data = .) +
59 | geom_sf(aes(color = decade_change, fill = decade_change)) +
60 | geom_sf(data = simple_states,
61 | color = "grey20", lwd = 0.1, fill = NA) +
62 | scale_color_gradientn(aesthetics = c("color", "fill"),
63 | name = expression(paste(mu, "g/", m^3)),
64 | colors = cmocean::cmocean("balance",
65 | start = 0.05,
66 | end = 0.95)(101),
67 | values = scales::rescale(sinh(seq(-2, 2, length.out = 101))),
68 | rescaler = mid_rescaler(0),
69 | guide = guide_colorbar(barheight = 6),
70 | breaks = seq(0, 5, by = 1),
71 | labels = c(seq(0, 4, by = 1), ">5"),) +
72 | theme_void() +
73 | theme(legend.position = c(.92, 0.32),
74 | legend.justification = "center")} %>%
75 | ggsave(file.path(path_figures, "figure05a.png"), ., width = 5, height = 4)
76 |
77 |
78 | state_annual_cells <- states %>%
79 | # identify the cells intersecting with each state (the result is the index of the grid cell insecting each state)
80 | {mutate(.,
81 | grid_id_10km_ind = st_intersects(., grid_10km))} %>%
82 | st_drop_geometry() %>%
83 | unnest(grid_id_10km_ind) %>%
84 | # join in the grid cell identifiers based on grid cell index
85 | left_join(grid_10km %>% mutate(grid_id_10km_ind = 1:n()) %>%
86 | st_drop_geometry()) %>%
87 | # join in population density for each 10km grid cell
88 | left_join(pop %>%
89 | mutate(pop_density = mean)) %>%
90 | # join in annual smoke averages
91 | left_join(smokePM_preds %>%
92 | mutate(year = lubridate::year(date)) %>%
93 | group_by(year, grid_id_10km) %>%
94 | # calculate annual total smoke PM, then divide by days in the year
95 | summarise(annual_total_smokePM = sum(smokePM_pred),
96 | .groups = "drop") %>%
97 | mutate(y_days = 365 + leap_year(year)*1,
98 | annual_daily_smokePM = annual_total_smokePM/y_days) %>%
99 | # join with full set of years and grid cells to fill in zeros
100 | {left_join(expand.grid(year = unique(.$year),
101 | grid_id_10km = unique(.$grid_id_10km)),
102 | .)} %>%
103 | replace_na(list(annual_daily_smokePM = 0)),
104 | by = c("ID" = "grid_id_10km"))
105 |
106 | state_annual_cells %>%
107 | group_by(NAME, STUSPS, year) %>%
108 | summarise(annual_daily_smokePM = weighted.mean(annual_daily_smokePM, pop_density),
109 | .groups = "drop") %>%
110 | mutate(states = ifelse(NAME %in% c("California", "Florida",
111 | "New York", "Oregon", "Michigan"),
112 | NAME,
113 | "rest")) %>%
114 | {ggplot(mapping = aes(x = year, y = annual_daily_smokePM,
115 | group = NAME)) +
116 | geom_line(data = filter(., states == "rest"),
117 | color = "grey30", lwd = 0.25, alpha = 0.5) +
118 | geom_line(data = filter(., states!= "rest"),
119 | mapping = aes(color = states),
120 | lwd = 1.5, alpha = 1) +
121 | ylab(expression(paste("smoke ", PM[2.5]," (", mu, "g/", m^3, ")"))) +
122 | scale_color_manual(values = MetBrewer::met.brewer("VanGogh2", 15)[c(1,3, 7, 9, 15)],
123 | guide = "none") +
124 | theme_classic() } %>%
125 | ggsave(file.path(path_figures, "figure05b_time_series.png"),
126 | ., width = 5, height = 3.5)
127 |
128 | simple_states %>%
129 | mutate(states = ifelse(NAME %in% c("California", "Florida",
130 | "New York", "Oregon", "Michigan"),
131 | NAME,
132 | "rest")) %>%
133 | {ggplot() +
134 | geom_sf(data = filter(., states == "rest"),
135 | lwd = 0.1) +
136 | geom_sf(data = filter(., states != "rest"),
137 | mapping = aes(color = states, fill = states),
138 | lwd = 0.1) +
139 | scale_color_manual(values = MetBrewer::met.brewer("VanGogh2", 15)[c(1,3, 7, 9, 15)], #MetBrewer::met.brewer("VanGogh2", 15)[c(1, 4, 9, 14)],
140 | guide = "none",
141 | aesthetics = c("fill", "color")) +
142 | theme_void()} %>%
143 | ggsave(file.path(path_figures, "figure05b_map.png"),
144 | ., width = 3, height = 2.5)
145 |
--------------------------------------------------------------------------------
/scripts/main/03_08_figure07.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Plots extreme days.
8 | # ------------------------------------------------------------------------------
9 | smokePM_preds <- readRDS(file.path(path_output, "smokePM", "predictions", "combined", "smokePM_predictions_20060101_20201231.rds")) %>%
10 | mutate(smokePM_pred = pmax(smokePM_pred, 0))
11 |
12 | pop <- list.files(file.path(path_data, "2_from_EE", "populationDensity_10km_subgrid"),
13 | full.names = T) %>% purrr::map_dfr(read.csv)
14 |
15 | grid_10km <- st_read(file.path(path_data, "1_grids", "grid_10km_wgs84")) %>%
16 | mutate(area = st_area(geometry) %>% unclass) # area in m^2
17 |
18 | # check the population adds up
19 | pop %>%
20 | rename(pop_density = mean) %>% # people per m2
21 | left_join(grid_10km) %>%
22 | mutate(pop = pop_density*area) %>%
23 | summarise(pop = sum(pop))
24 |
25 | extremes_by_year <- smokePM_preds %>%
26 | mutate(year = lubridate::year(date)) %>%
27 | {full_join(group_by(., year) %>%
28 | summarise(days_over50 = sum(smokePM_pred > 50),
29 | days_over100 = sum(smokePM_pred > 100),
30 | days_over200 = sum(smokePM_pred > 200),
31 | .groups = "drop"),
32 | group_by(., year, grid_id_10km) %>%
33 | summarise(pop_over50 = any(smokePM_pred >50),
34 | pop_over100 = any(smokePM_pred > 100),
35 | pop_over200 = any(smokePM_pred > 200),
36 | .groups = "drop") %>%
37 | left_join(pop %>% left_join(grid_10km) %>%
38 | transmute(grid_id_10km = ID, pop = mean*area)) %>%
39 | group_by(year) %>%
40 | summarise(across(starts_with("pop_over"), ~sum(.x*pop)),
41 | .groups = "drop"))}
42 |
43 | # change in extremes
44 | extremes_by_year %>%
45 | mutate(period = case_when(year >= 2006 & year <=2010 ~ "2006 - 2010",
46 | year >= 2016 & year <=2020 ~ "2016 - 2020",
47 | T ~ as.character(NA))) %>%
48 | filter(!is.na(period)) %>%
49 | group_by(period) %>%
50 | summarise(across(contains("over"), mean)) %>%
51 | pivot_longer(contains("over")) %>%
52 | separate(name, into = c("panel", "cutoff"), sep = "_") %>%
53 | mutate(panel = dplyr::recode_factor(panel,
54 | "days" = "avg annual number of grid cell-days",
55 | "pop" = "avg annual population exposed to 1+ days",
56 | .ordered = TRUE)) %>%
57 | {ggplot(data = .,
58 | aes(x = period, y = value, group = cutoff,
59 | color = cutoff)) +
60 | geom_point() +
61 | geom_line() +
62 | geom_text(data = group_by(., panel, cutoff) %>%
63 | summarise(asinh_mid = sinh(mean(asinh(value))),
64 | .groups = "drop") %>%
65 | mutate(cutoff_num = gsub("over", "", cutoff),
66 | label_top = paste0("paste(\'Days > ", cutoff_num, "\', mu, \'g\')")),
67 | aes(y = asinh_mid, label = label_top,
68 | x = 2.1 + ifelse(cutoff == "over100", 0.03, 0) +
69 | ifelse(cutoff == "over50", 0.06, 0)),
70 | color = "black", hjust = 0, vjust = 0, parse = TRUE) +
71 | geom_text(data = group_by(., panel, cutoff) %>%
72 | summarise(asinh_mid = sinh(mean(asinh(value))),
73 | mult = max(value)/min(value),
74 | .groups = "drop") %>%
75 | mutate(mult_round = formatC(round(signif(mult, 2), 0),
76 | format="d", big.mark=","),
77 | label_bottom = paste0(mult_round, "x increase")),
78 | aes(y = asinh_mid, label = label_bottom,
79 | x = 2.1 + ifelse(cutoff == "over100", 0.03, 0) +
80 | ifelse(cutoff == "over50", 0.06, 0)),
81 | color = "black", hjust = 0, vjust = 1) +
82 | geom_linerange(data = group_by(., panel, cutoff) %>%
83 | summarise(min = min(value),
84 | max = max(value),
85 | .groups = "drop"),
86 | mapping = aes(ymin = min, ymax = max,
87 | x = 2.05 + ifelse(cutoff == "over100", 0.03, 0) +
88 | ifelse(cutoff == "over50", 0.06, 0),
89 | color = cutoff),
90 | inherit.aes = FALSE) +
91 | facet_wrap(~panel, scales = "free", nrow = 1, strip.position = "left") +
92 | scale_y_continuous(trans = "pseudo_log",
93 | breaks = c(0, 1, 5, 10, 50, 100, 500,
94 | 1000, 5000, 10000, 50000, 100000,
95 | 500000, 1000000, 5000000, 10000000,
96 | 20000000),
97 | labels = c("0", "1", "5", "10", "50", "100", "500",
98 | "1k", "5k", "10k", "50k", "100k",
99 | "500k", "1m", "5m", "10m", "20m")) +
100 | scale_x_discrete(expand = expansion(mult = c(0.1, 0.05))) +
101 | coord_cartesian(clip = "off") +
102 | scale_color_manual(values = c("#e8993b", "#913818", "#4a571e"),
103 | guide = "none") +
104 | xlab("") + ylab("") +
105 | theme_classic() +
106 | theme(plot.margin = margin(0.5,6,0,0, "lines"),
107 | strip.placement = "outside",
108 | strip.background = element_blank(),
109 | strip.text = element_text(size = 10),
110 | panel.spacing.x = unit(5, "lines"))} %>%
111 | ggsave(file.path(path_figures, "figure07a-b.png"), ., width = 8, height = 3.25)
112 |
113 | # map of changing extremes
114 | simple_states <- tigris::states(cb = TRUE) %>%
115 | filter(!(STATEFP %in% nonContig_stateFIPS)) %>%
116 | st_transform(crs = st_crs(grid_10km))
117 |
118 | decadal_change <- smokePM_preds %>%
119 | mutate(year = lubridate::year(date)) %>%
120 | # calculate annual avg contribution of smokePM to daily PM
121 | group_by(year, grid_id_10km) %>%
122 | summarise(annual_total_smokePM = sum(smokePM_pred),
123 | annual_days_over50 = sum(smokePM_pred > 50),
124 | annual_days_over100 = sum(smokePM_pred > 100),
125 | .groups = "drop") %>%
126 | mutate(y_days = 365 + leap_year(year)*1,
127 | annual_daily_smokePM = annual_total_smokePM/y_days) %>%
128 | select(-annual_total_smokePM) %>%
129 | # join with full set of years and grid cells to fill in zeros
130 | {left_join(expand.grid(year = unique(.$year),
131 | grid_id_10km = unique(.$grid_id_10km)),
132 | .)} %>%
133 | replace_na(list(annual_daily_smokePM = 0,
134 | annual_days_over50 = 0,
135 | annual_days_over100 = 0)) %>%
136 | # add the time periods to summarise over
137 | mutate(period = case_when(year >= 2006 & year <=2010 ~ "years2006_2010",
138 | year >= 2016 & year <=2020 ~ "years2016_2020",
139 | T ~ as.character(NA))) %>%
140 | filter(!is.na(period)) %>%
141 | # average the annual daily smokePM to the period
142 | group_by(period, grid_id_10km) %>%
143 | summarise(across(starts_with("annual"), mean),
144 | .groups = "drop") %>%
145 | # make a row for each metric, then column for each period, and difference the columns
146 | pivot_longer(starts_with("annual")) %>%
147 | pivot_wider(names_from = period, values_from = value) %>%
148 | mutate(decade_change = years2016_2020 - years2006_2010) %>%
149 | select(-starts_with("years")) %>%
150 | pivot_wider(names_from = name, values_from = decade_change)
151 |
152 | decadal_change %>%
153 | left_join(grid_10km %>% select(grid_id_10km = ID)) %>%
154 | st_as_sf %>%
155 | mutate(annual_days_over50 = pmin(10, annual_days_over50)) %>%
156 | {ggplot(data = .,
157 | aes(color = annual_days_over50,
158 | fill = annual_days_over50)) +
159 | geom_sf() +
160 | geom_sf(data = simple_states,
161 | color = "grey20", lwd = 0.1, fill = NA) +
162 | scale_color_gradientn(aesthetics = c("color", "fill"),
163 | name = "",
164 | colors = rev(colorRampPalette(RColorBrewer::brewer.pal(11, "BrBG"))(101)),
165 | values = scales::rescale(sinh(seq(-2, 2, length.out = 101))),
166 | rescaler = mid_rescaler(0),
167 | guide = guide_colorbar(barheight = 5),
168 | breaks = seq(-5, 10, by = 5),
169 | labels = c(seq(-5, 5, by = 5), ">10")) +
170 | theme_void() +
171 | theme(legend.position = c(.87, 0.28),
172 | legend.justification = "left",
173 | legend.title = element_text(size = 10))} %>%
174 | ggsave(file.path(path_figures, "figure07c.png"),. , width = 5, height = 3)
175 |
--------------------------------------------------------------------------------
/scripts/main/04_01_manuscript_calculations.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Calculates statistics in manuscript.
8 | # ------------------------------------------------------------------------------
9 | epa_data <- readRDS(file.path(path_data, "3_intermediate", "station_smokePM.rds"))
10 |
11 | # how many stations do we have data at
12 | epa_data$id %>% unique %>% length
13 |
14 | # how many stations have observations for at least 5 years
15 | epa_data %>%
16 | group_by(id) %>%
17 | summarise(start_date = min(date),
18 | end_date = max(date),
19 | .groups = "drop") %>%
20 | mutate(time_range = difftime(end_date, start_date, units = "days")) %>%
21 | filter(time_range > (365*5)) %>%
22 | nrow
23 |
24 | # how many obs for aod model
25 | test <- readRDS(file.path(path_data, "4_clean", "aod_training.rds"))
26 | nrow(test)
27 |
28 | # stats on extreme smoke days
29 | smokePM_preds <- readRDS(file.path(path_output, "smokePM", "predictions", "combined", "smokePM_predictions_20060101_20201231.rds"))
30 |
31 | pop <- list.files(file.path(path_data, "2_from_EE", "populationDensity_10km_subgrid"),
32 | full.names = T) %>% purrr::map_dfr(read.csv)
33 |
34 | grid_10km <- st_read(file.path(path_data, "1_grids", "grid_10km_wgs84")) %>%
35 | mutate(area = st_area(geometry) %>% unclass) # area in m^2
36 |
37 | # This may require a large amount of memory
38 | extremes_by_year <- smokePM_preds %>%
39 | mutate(year = lubridate::year(date)) %>%
40 | {full_join(group_by(., year) %>%
41 | summarise(days_over50 = sum(smokePM_pred > 50),
42 | days_over100 = sum(smokePM_pred > 100),
43 | days_over200 = sum(smokePM_pred > 200),
44 | .groups = "drop"),
45 | group_by(., year, grid_id_10km) %>%
46 | summarise(pop_over50 = any(smokePM_pred >50),
47 | pop_over100 = any(smokePM_pred > 100),
48 | pop_over200 = any(smokePM_pred > 200),
49 | .groups = "drop") %>%
50 | left_join(pop %>% left_join(grid_10km) %>%
51 | transmute(grid_id_10km = ID, pop = mean*area)) %>%
52 | group_by(year) %>%
53 | summarise(across(starts_with("pop_over"), ~sum(.x*pop)),
54 | .groups = "drop"))}
55 | saveRDS(extremes_by_year, file.path(path_data, "extremes_by_year.rds"))
56 | # extremes_by_year = readRDS(file.path(path_data, "extremes_by_year.rds"))
57 |
58 | # how far off are we on the really low smoke days PM
59 | smokePM_preds <- list.files(file.path(path_output, "smokePM", "model"),
60 | pattern = "smokePM_pred",
61 | full.names = TRUE) %>%
62 | grep("drop\\.", ., value = TRUE) %>%
63 | map_dfr(function(x){
64 | readRDS(x) %>%
65 | mutate(test_fold = as.numeric(gsub("^smokePM_pred_fold|_drop-?.*\\.rds$", "", basename(x))))
66 | }) %>%
67 | mutate(smokePM_pred = pmax(smokePM_pred, 0))
68 |
69 | smokePM_data <- readRDS(file.path(path_data, "4_clean", "smokePM_training.rds"))
70 |
71 | comp <- smokePM_preds %>%
72 | mutate(test = ifelse(fold == test_fold, "test", "train")) %>%
73 | filter(test == "test") %>%
74 | left_join(smokePM_data %>% select(id, date, smokePM))
75 |
76 | comp %>%
77 | filter(smokePM < 1) %>%
78 | mutate(pred_bin = cut(smokePM_pred, c(0, 5, 10, Inf),
79 | right = FALSE,
80 | include.lowest = T)) %>%
81 | group_by(pred_bin) %>%
82 | summarise(n = n()) %>%
83 | mutate(pct = n/sum(n))
84 |
--------------------------------------------------------------------------------
/scripts/main/05_01_figureS01.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Plots supplemental figure 1.
8 | # ------------------------------------------------------------------------------
9 | hysplit_date <- "2020-09-20"
10 |
11 | states <- tigris::states(cb = TRUE) %>%
12 | filter(!(STATEFP %in% nonContig_stateFIPS))
13 |
14 | traj <- list.files(file.path(path_data, "HYSPLIT", "trajectories", hysplit_date), full.names = T)[1] %>%
15 | list.files(full.names = T) %>%
16 | magrittr::extract(4:6) %>%
17 | map_dfr(function(x){readRDS(x) %>% mutate(file = x)})
18 |
19 | height_data <- readRDS(file.path(path_data, "HYSPLIT", "miscellaneous", "heights.rds"))
20 |
21 | # panel A, example hysplit traj from single point and hour
22 | traj %>%
23 | mutate(init_height = gsub("^traj-traj-fwd-[0-2][0-9]-[0-1][0-9]-[0-3][0-9]-[0-2][0-9]-1lat_[0-9]{2}p[0-9]{2}_lon_-[0-9]?[0-9]{2}p[0-9]{2}-hgt_|-144h\\.rds$", "", basename(file))) %>%
24 | {ggplot(data = .) +
25 | geom_sf(data = states, inherit.aes = FALSE) +
26 | geom_point(mapping = aes(x = lon, y = lat,
27 | color = height), size = 1) +
28 | geom_path(aes(x = lon, y = lat, group = init_height), size = 0.2) +
29 | geom_text(data = filter(., hour_along == max(hour_along)),
30 | aes(x = lon + ifelse(init_height == "2500", 0.5, -0.5),
31 | y = lat,
32 | label = paste0(init_height, " m"),
33 | hjust = ifelse(init_height == "2500", 0, 1))) +
34 | scale_color_viridis_c("height above\nground level (m)",
35 | option = "magma", direction = -1) +
36 | theme_void()} %>%
37 | ggsave(filename = file.path(path_figures, "figureS01a.png"), .,
38 | width = 5, height = 3)
39 |
40 | # panel B, distribution of heights for quintiles
41 | if (!file.exists(file.path(path_data, "HYSPLIT", "miscellaneous", "height_quant.rds"))) {
42 | height_quant <- quantile(height_data, probs = seq(0, 1, by = 0.2))
43 | saveRDS(height_quant, file.path(path_data, "HYSPLIT", "miscellaneous", "height_quant.rds"))
44 | } else {
45 | height_quant = readRDS(file.path(path_data, "HYSPLIT", "miscellaneous", "height_quant.rds"))
46 | }
47 |
48 | if (!file.existsfile.path(path_data, "HYSPLIT", "miscellaneous", "height_quant_df.rds")) {
49 | height_quant_df <- data.frame(height = height_quant/1e3,
50 | quant = names(height_quant)) %>%
51 | mutate(label = paste0(quant, "\n", round(height, 1), " km"))
52 | saveRDS(height_quant_df, file.path(path_data, "HYSPLIT", "miscellaneous", "height_quant_df.rds"))
53 | } else {
54 | height_quant_df = readRDS(file.path(path_data, "HYSPLIT", "miscellaneous", "height_quant_df.rds"))
55 | }
56 |
57 | if (!file.exists(file.path(path_data, "HYSPLIT", "miscellaneous", "height_samp.rds"))) {
58 | set.seed(10001)
59 | height_samp <- sample(1:length(height_data), size = 1e6, replace = F)
60 | saveRDS(height_samp, file.path(path_data, "HYSPLIT", "miscellaneous", "height_samp.rds"))
61 | } else {
62 | height_sample = readRDS(file.path(path_data, "HYSPLIT", "miscellaneous", "height_samp.rds"))
63 | }
64 |
65 | height_hist <- data.frame(height = height_data[height_samp]/1e3) %>%
66 | # slice_sample(n = 1e6) %>%
67 | ggplot(aes(x = height, y = ..density..)) +
68 | geom_histogram(boundary = 0) +
69 | geom_vline(data = height_quant_df %>% filter(quant != "100%"),
70 | aes(xintercept = height),
71 | color = "#2b614e") +
72 | geom_label(data = height_quant_df %>% filter(quant != "100%"),
73 | aes(x = height, y = .3, label = label),
74 | position = position_dodge(width = 0.2),
75 | label.size = 0, size = 2, label.padding = unit(0.1, "lines"),
76 | color = "#2b614e") +
77 | xlab("height above ground level (km)") + ylab("density") +
78 | scale_x_continuous(limits = c(NA, 12), expand = c(0.05, 0)) +
79 | theme_classic()
80 |
81 | ggsave(filename = file.path(path_figures, "figureS01b.png"), height_hist,
82 | width = 5, height = 3)
83 |
84 | # panel C, counts in each height quintile in the 50km buffer
85 | hysplit <- list.files(
86 | file.path(path_data, "HYSPLIT", "10km_grid_2006-2020"),
87 | pattern = paste0(format(as.Date(hysplit_date), "%Y"),
88 | "_",
89 | format(as.Date(hysplit_date), "%m")),
90 | full.names = T
91 | ) %>%
92 | readRDS %>%
93 | mutate(date = as.Date(date, format = "%Y%m%d")) %>%
94 | filter(date == as.Date(hysplit_date))
95 |
96 | grid_10km <- st_read(file.path(path_data, "1_grids", "grid_10km_wgs84"))
97 |
98 | hysplit %>%
99 | select(ID = id_grid, contains("num_traj_points_height")) %>%
100 | pivot_longer(starts_with("num")) %>%
101 | separate(name, into = c(NA, NA, NA, NA, "n"), sep = "_", remove = FALSE,
102 | convert = T) %>%
103 | left_join(data.frame(height = unname(height_quant)/1e3) %>%
104 | mutate(n = 1:n(),
105 | label = paste0(round(height, 1),
106 | " - ",
107 | lead(round(height, 1)),
108 | " km above ground level")) %>%
109 | select(n, label)) %>%
110 | left_join(grid_10km %>% select(ID)) %>%
111 | st_as_sf %>%
112 | {ggplot(data = .) +
113 | geom_sf(aes(fill = value, color = value)) +
114 | facet_wrap(~label, nrow = 1) +
115 | scale_color_gradientn(colors = colorspace::sequential_hcl(palette = "BuPu", n = 100, rev = T),
116 | name = "trajectory points\nwithin 50km",
117 | aesthetics = c("fill", "color"),
118 | values = scales::rescale(sinh(seq(0, 5, length.out = 100))),
119 | guide = guide_colorbar(title.theme = element_text(size = 7),
120 | label.theme = element_text(size = 6.5),
121 | barheight = 4, barwidth = 1)) +
122 | theme_void() +
123 | theme(strip.text = element_text(size = 8),
124 | panel.spacing = unit(-0.5, "lines"))} %>%
125 | ggsave(filename = file.path(path_figures, "figureS01c.png"),. ,
126 | height = 3, width = 12)
127 |
--------------------------------------------------------------------------------
/scripts/main/05_02_figureS02.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Jessica Li and Marissa Childs
7 | # Plots supplemental figure 2.
8 | # ------------------------------------------------------------------------------
9 | chosen_date = "20181111"
10 | chosen_year = year(ymd(chosen_date))
11 | chosen_month = month(ymd(chosen_date))
12 |
13 | # Get smoke plume shapes
14 | smoke = readRDS(file.path(path_data, "smoke", "smoke_plumes_sfdf.rds"))
15 | smoke = smoke %>% filter(date == chosen_date)
16 |
17 | # Get CONUS background
18 | usa = states()
19 | conus = usa %>% filter(!(STUSPS %in% c("AK", "AS", "GU", "HI", "MP", "PR", "VI")))
20 |
21 | # Plot plumes
22 | p = ggplot() +
23 | geom_sf(data = conus) +
24 | geom_sf(data = smoke, fill = "orange", alpha = 0.3) +
25 | theme_void()
26 |
27 | # Save
28 | ggsave(file.path(path_figures, "figureS02a.png"),
29 | plot = p, width = 12, height = 8)
30 |
31 | #-------------------------------------------------------------------------------
32 | # Load 10 km grid
33 | project_grid = read_sf(file.path(path_data, "1_grids", "10km_grid"))
34 | project_grid = st_as_sf(project_grid) %>% st_transform(st_crs(smoke)) %>% select(id_grid = ID)
35 |
36 | # Get gridded smoke day
37 | smoke_day = readRDS(file.path(path_data, "smoke_days", paste0("grid_smoke_day_", chosen_year, "_", chosen_month, ".rds")))
38 | smoke_day = smoke_day %>%
39 | filter(date == ymd(chosen_date)) %>%
40 | mutate(smoke_day = factor(smoke_day, levels = 0:1))
41 | smoke_day = project_grid %>% left_join(smoke_day, by = "id_grid")
42 |
43 | # Plot gridded smoke day
44 | p = ggplot(smoke_day, aes(fill = smoke_day)) +
45 | geom_sf(color = NA) +
46 | theme_void() +
47 | scale_fill_manual(values = c("gray50", "orange")) +
48 | labs(fill = "Smoke Day")
49 |
50 | # Save
51 | ggsave(file.path(path_figures, "figureS02b.png"),
52 | plot = p, width = 12, height = 8)
53 |
54 | #-------------------------------------------------------------------------------
55 | # AOD Missingness
56 | aod_na = list.files(file.path(path_data, "2_from_EE", "maiac_AODmissings"),
57 | pattern = sprintf("^aod_pctMissing_10km_subgrid_.*_%s.*\\.csv$", chosen_year),
58 | full.names = T) %>%
59 | map_dfr(read.csv) %>%
60 | rename(id_grid = ID, date = start_date, perc_aod_missing = mean) %>%
61 | filter(date == chosen_date)
62 | aod_na = project_grid %>% left_join(aod_na, by = "id_grid")
63 |
64 | # Plot
65 | p = ggplot(aod_na %>% mutate(perc_aod_missing = perc_aod_missing*100), aes(fill = perc_aod_missing)) +
66 | geom_sf(color = NA) +
67 | theme_void() +
68 | labs(fill = "% AOD\nMissing")
69 |
70 | # Save
71 | ggsave(file.path(path_figures, "figureS02d.png"),
72 | plot = p, width = 12, height = 8)
73 |
74 | #-------------------------------------------------------------------------------
75 | hysplit <- list.files(file.path(path_data, "HYSPLIT", "10km_grid_2006-2020"),
76 | pattern = paste0(format(ymd(chosen_date), "%Y"),
77 | "_",
78 | format(ymd(chosen_date), "%m")),
79 | full.names = T) %>%
80 | readRDS %>%
81 | mutate(date = as.Date(date, format = "%Y%m%d")) %>%
82 | filter(date == ymd(chosen_date))
83 |
84 | grid_10km <- st_read(file.path(path_data, "1_grids", "grid_10km_wgs84"))
85 |
86 | hysplit %>%
87 | select(ID = id_grid, num_traj_points_height_1) %>%
88 | left_join(grid_10km %>% select(ID)) %>%
89 | st_as_sf %>%
90 | {ggplot(data = .) +
91 | geom_sf(aes(fill = num_traj_points_height_1, color = num_traj_points_height_1)) +
92 | scale_color_gradientn(colors = c(cmocean::cmocean(name = "oxy", start = 0.2, end = 0.79, direction = -1)(20),
93 | cmocean::cmocean(name = "oxy", start = 0, end = 0.19, direction = -1)(20)),
94 | name = "trajectory points\nwithin 50km",
95 | aesthetics = c("fill", "color"),
96 | rescaler = mid_rescaler(50),
97 | guide = guide_colorbar(title.theme = element_text(size = 7),
98 | label.theme = element_text(size = 6.5),
99 | barheight = 4, barwidth = 1)) +
100 | theme_void() +
101 | theme(legend.position = c(0.93, 0.35))} %>%
102 | ggsave(filename = file.path(path_figures, "figureS02e.png"),
103 | ., height = 3.5, width = 5)
104 |
105 | #-------------------------------------------------------------------------------
106 | # Final Gridded Smoke Days
107 | # Get filled smoke day grid
108 | filled_smoke_day = readRDS(file.path(path_data, "3_intermediate", "all_smoke_days_incl_cloudy.rds"))
109 | filled_smoke_day = filled_smoke_day %>%
110 | filter(date == ymd(chosen_date)) %>%
111 | select(id_grid = grid_id_10km, date, smoke_day)
112 | filled_smoke_day = project_grid %>%
113 | left_join(filled_smoke_day, by = "id_grid") %>%
114 | replace_na(list(date = ymd(chosen_date), smoke_day = 0)) %>%
115 | mutate(smoke_day = factor(smoke_day, levels = 0:1))
116 |
117 | # Plot gridded smoke day
118 | p = ggplot(filled_smoke_day, aes(fill = smoke_day)) +
119 | geom_sf(color = NA) +
120 | theme_void() +
121 | scale_fill_manual(values = c("gray50", "orange")) +
122 | labs(fill = "Smoke Day")
123 |
124 | # Save
125 | ggsave(file.path(path_figures, "figureS02f.png"),
126 | plot = p, width = 12, height = 8)
127 |
--------------------------------------------------------------------------------
/scripts/main/05_03_figureS03.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Plots supplemental figure 3.
8 | # ------------------------------------------------------------------------------
9 | # trends in plumes
10 | plumes <- readRDS(file.path(path_data, "smoke", "smoke_plumes_sfdf_20050805_20220711.RDS"))
11 | plumes <- readRDS(file.path(path_data, "smoke", "smoke_plumes_sfdf.RDS"))
12 |
13 | plume_area <- st_area(plumes)
14 |
15 | plumes %<>% st_drop_geometry() %>%
16 | cbind(area = unclass(plume_area)/1e6)
17 |
18 | plumes %>%
19 | filter(area > 0) %>%
20 | mutate(year_month = substr(date, 1, 6),
21 | date = as.Date(date, format = "%Y%m%d"),
22 | month = lubridate::month(date),
23 | year = lubridate::year(date)) %>%
24 | filter(year >= 2006 & year <= 2020) %>%
25 | group_by(year_month) %>%
26 | summarise(start_date = min(date),
27 | qminimum = min(area),
28 | q5 = quantile(area, 0.05),
29 | q10 = quantile(area, 0.10),
30 | .groups = "drop") %>%
31 | pivot_longer(starts_with("q"), names_prefix = "q") %>%
32 | mutate(name = ifelse(name %in% c("minimum", "max"), name, paste0(name, "th percentile"))) %>%
33 | {ggplot() +
34 | geom_vline(xintercept = as.Date("2017-12-28"),
35 | color = "grey") +
36 | geom_label(aes(x = as.Date("2017-12-28"),
37 | y = pull(., value) %>% max,
38 | label = "GOES-16"),
39 | nudge_y = -5,
40 | nudge_x = -65,
41 | vjust = 1,
42 | fill = "white",
43 | color = "grey",
44 | label.size = NA) +
45 | geom_vline(xintercept = as.Date("2019-02-28"),
46 | color = "grey") +
47 | geom_label(aes(x = as.Date("2019-02-28"),
48 | y = Inf,
49 | label = "GOES-17"),
50 | vjust = 1,
51 | nudge_y = 20,
52 | nudge_x = 65,
53 | fill = "white",
54 | color = "grey",
55 | label.size = NA) +
56 | geom_line(data = .,
57 | aes(x = start_date,
58 | y = value,
59 | group = name,
60 | color = name)) +
61 | geom_text(data = group_by(., name) %>%
62 | summarise(mean_val = last(value)) %>%
63 | cbind(start_date = as.Date("2021-01-01")),
64 | aes(x = start_date,
65 | y = mean_val,
66 | label = name,
67 | color = name),
68 | hjust = 0) +
69 | theme_classic() +
70 | coord_cartesian(clip = "off") +
71 | scale_color_manual(values = c("black", "darkgreen", "purple")) +
72 | xlab("Date") + ylab(bquote("Plume size "(km^2))) +
73 | theme(legend.position = "none",
74 | plot.margin = unit(c(0.75, 2.5, 0.5, 0.25), "cm"))} %>%
75 | ggsave(file.path(path_figures, "figureS03.png"), .,
76 | width = 6, height = 4)
77 |
--------------------------------------------------------------------------------
/scripts/main/05_04_figureS04.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Jessica Li
7 | # Plots supplemental figure 4.
8 | # ------------------------------------------------------------------------------
9 | max_nchar = 35
10 |
11 | start_date = "20060101"
12 | end_date = "20201231"
13 | all_dates = seq.Date(ymd(start_date), ymd(end_date), by = "day")
14 | duration_days = 6
15 |
16 | # Fire points
17 | fire_dates_not_online = readRDS(file.path(path_data, "fire", "fire_dates_not_online.rds"))
18 | fire_dates_empty_data = readRDS(file.path(path_data, "fire", "fire_dates_empty_data.rds"))
19 | fire_dates_clusters_too_small = readRDS(file.path(path_data, "fire", "fire_dates_clusters_too_small.rds"))
20 |
21 | # HYSPLIT points
22 | hysplit_dates_without_start_duration = seq.Date(ymd(start_date), ymd("20060418"), by = "day")
23 | hysplit_dates_not_online = ymd(readRDS(file.path(path_data, "HYSPLIT", "miscellaneous", "hysplit_dates_not_online.rds")))
24 | hysplit_dates_gis_corrupt = ymd(readRDS(file.path(path_data, "HYSPLIT", "miscellaneous", "hysplit_dates_gis_corrupt.rds")))
25 | hysplit_dates_oddly_empty = ymd(readRDS(file.path(path_data, "HYSPLIT", "miscellaneous", "hysplit_dates_oddly_empty.rds")))
26 | hysplit_na_dates_original = c(hysplit_dates_without_start_duration, hysplit_dates_not_online, hysplit_dates_gis_corrupt, hysplit_dates_oddly_empty)
27 | hysplit_na_dates = unique(unlist(lapply(hysplit_na_dates_original, function(x) seq.Date(x, x + days(duration_days), by = "day"))))
28 | class(hysplit_na_dates) = "Date"
29 | hysplit_na_dates_affected = setdiff(hysplit_na_dates, hysplit_na_dates_original)
30 | class(hysplit_na_dates_affected) = "Date"
31 | hysplit_dates_without_start_duration = setdiff(hysplit_dates_without_start_duration, hysplit_dates_not_online)
32 | class(hysplit_dates_without_start_duration) = "Date"
33 |
34 | # Smoke plumes
35 | smoke_dates_not_online = ymd(readRDS(file.path(path_data, "smoke", "smoke_dates_not_online.rds")))
36 | smoke_dates_empty_data = ymd(readRDS(file.path(path_data, "smoke", "smoke_dates_empty_data.rds")))
37 | smoke_dates_repaired_geometry = ymd(readRDS(file.path(path_data, "smoke", "smoke_dates_repaired_geometry.rds")))
38 |
39 | # Datasets
40 | dataset = create_node_df(
41 | n = 3,
42 | label = c("Fire points", "HYSPLIT points", "Smoke plumes")
43 | )
44 |
45 | # Reasons for missingness
46 | reasons = create_node_df(
47 | n = 9,
48 | label = str_wrap(c("Not online", "Empty data", "Clusters too small",
49 | "No start time nor duration", "Not online",
50 | "Corrupt shapefile", "Oddly empty", "Affected by previous missing date",
51 | "Not online"), max_nchar) %>%
52 | paste0("\n",
53 | sapply(list(fire_dates_not_online, fire_dates_empty_data, fire_dates_clusters_too_small,
54 | hysplit_dates_without_start_duration, hysplit_dates_not_online,
55 | hysplit_dates_gis_corrupt, hysplit_dates_oddly_empty, hysplit_na_dates_affected,
56 | smoke_dates_not_online), length))
57 | )
58 |
59 | # How missingness gets treated
60 | treatments = create_node_df(
61 | n = 4,
62 | label = c("Set distance = NA,\narea = 0, num_points = 0",
63 | "Exclude from training;\nfill using temporal NN for prediction",
64 | "Treat trajectory point counts as MIA",
65 | "Exclude from medians and training;\nfill using temporal NN for prediction") %>%
66 | paste0("\n",
67 | c(length(setdiff(fire_dates_not_online, smoke_dates_not_online)) + length(fire_dates_empty_data) + length(fire_dates_clusters_too_small),
68 | length(intersect(fire_dates_not_online, smoke_dates_not_online)),
69 | sum(sapply(list(hysplit_dates_without_start_duration, hysplit_dates_not_online,
70 | hysplit_dates_gis_corrupt, hysplit_dates_oddly_empty,
71 | hysplit_na_dates_affected),
72 | length)),
73 | length(smoke_dates_not_online)))
74 | )
75 | nodes = combine_ndfs(dataset, reasons, treatments)
76 |
77 | # Connect nodes
78 | edges = create_edge_df(
79 | from = c(1, 1, 1, 2, 2, 2, 2, 2, 3, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12),
80 | to = c(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13, 13, 15, 15, 15, 15, 15, 16)
81 | )
82 |
83 | # Make graph
84 | g = create_graph(
85 | nodes_df = nodes,
86 | edges_df = edges,
87 | attr_theme = "lr"
88 | ) %>%
89 | set_node_attrs("shape", "rectangle") %>%
90 | set_node_attrs("fillcolor", "white") %>%
91 | set_node_attrs("fixedsize", F) %>%
92 | set_node_attrs("fontcolor", "black") %>%
93 | set_node_attrs("color", "black") %>%
94 | set_edge_attrs("color", "black")
95 |
96 | # Preview
97 | render_graph(g)
98 |
99 | # Save
100 | export_graph(g, file_name = file.path(path_figures, "figureS04.pdf"), file_type = "pdf")
101 |
--------------------------------------------------------------------------------
/scripts/main/05_05_figureS05.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Plots supplemental figure 5.
8 | # ------------------------------------------------------------------------------
9 | plumes <- readRDS(file.path(path_data, "3_intermediate", "all_smoke_days.rds"))
10 | smoke <- readRDS(file.path(path_data, "3_intermediate", "all_smoke_days_incl_cloudy.rds"))
11 | epa_data <- readRDS(file.path(path_data, "3_intermediate", "station_smokePM.rds")) %>% ungroup
12 | epa_ll <- st_read(file.path(path_data, "epa_station_locations")) %>%
13 | rename(grid_id_10km = grid_10km)
14 |
15 | counties <- tigris::counties(cb = TRUE)
16 | states <- tigris::states(cb = TRUE)
17 |
18 | # select stations to show timeseries for
19 | set.seed(20202)
20 | station_set <- epa_data %>%
21 | filter(date >= as.Date("2020-01-01")) %>%
22 | group_by(id) %>%
23 | summarise(n = n(),
24 | max_PM = max(pm25)) %>%
25 | filter(n > 100, max_PM > 50) %>%
26 | slice_sample(n = 4) %>%
27 | pull(id)
28 |
29 | # identify the counties that the stations fall in for labeling
30 | station_loc <- cbind(id = epa_ll %>%
31 | filter(id %in% station_set) %>%
32 | pull(id),
33 | counties[st_intersects(epa_ll %>%
34 | filter(id %in% station_set),
35 | counties %>% st_transform(st_crs(epa_ll))) %>%
36 | unlist, ]) %>%
37 | left_join(states %>% select(STATEFP, STUSPS) %>% st_drop_geometry) %>%
38 | select(id, NAME, STUSPS)
39 |
40 | epa_data %>%
41 | filter(id %in% station_set) %>%
42 | filter(date >= as.Date("2020-01-01")) %>%
43 | left_join(plumes %>%
44 | rename(plume = smoke_day) %>%
45 | filter(grid_id_10km %in% unique(epa_ll$grid_id_10km))) %>%
46 | mutate(smoke_def = case_when(smoke_day == 1 & plume == 1 ~ "plume",
47 | smoke_day == 1 & is.na(plume) ~ "hysplit + aod",
48 | smoke_day == 0 ~ as.character(NA))) %>%
49 | left_join(station_loc %>% st_drop_geometry()) %>%
50 | mutate(panel = paste0(NAME, " County, ", STUSPS)) %>%
51 | group_by(id) %>%
52 | mutate(max_pm = max(pm25)) %>%
53 | {ggplot(data = ., aes(x = date, y = pm25)) +
54 | geom_line() +
55 | geom_point(data = filter(., smoke_day == 1),
56 | mapping = aes(x = date, y = -0.05*max_pm,
57 | color = smoke_def),
58 | size = 0.9) +
59 | geom_point(data = filter(., panel == "Douglas County, NE" & date == as.Date("2020-07-04")),
60 | color = "blue", size = 0.9) +
61 | scale_color_manual(name = "", values = c("grey", "#BD3106")) +
62 | facet_wrap(~panel, ncol = 1, scales = "free_y") +
63 | xlab("") + ylab(expression(PM[2.5])) +
64 | theme_classic()} %>%
65 | ggsave(file.path(path_figures, "figureS05.png"), .,
66 | width = 5, height = 6)
67 |
--------------------------------------------------------------------------------
/scripts/main/05_06_figureS06.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Plots supplemental figure 6.
8 | # ------------------------------------------------------------------------------
9 | # restart using the data from Kara and Jen
10 | dt <- read.csv(file.path(path_data, "IMPROVE", "IMPROVE_CSN_smokeday_data.csv")) %>%
11 | mutate(date = as.Date(paste0(year, "-", month, "-", day),
12 | format = "%Y-%m-%d")) %>%
13 | filter(date >= as.Date("2006-01-01"))
14 |
15 | species <- c('PM2.5', 'OC','EC', 'SO4', 'Dust')
16 |
17 | # calculate pm25 anomalies and species anomalies,
18 | # how much of the pm25 anomalies are explained/accounted for by the OC anomalies?
19 | species_anom <- dt %>%
20 | select(Network, Site, date, all_of(species), smoke_day) %>%
21 | pivot_longer(all_of(species)) %>%
22 | filter(!is.na(value)) %>%
23 | filter(!is.na(smoke_day)) %>%
24 | mutate(month = lubridate::month(date),
25 | year = lubridate::year(date),
26 | name = gsub("\\.", "", name)) %>%
27 | unite(net_site_species, Network, Site, name) %>%
28 | {left_join(.,
29 | nonsmoke_medians(., value, smoke_day, net_site_species, month, year),
30 | by = c("net_site_species", "month", "year"))} %>%
31 | mutate(anom = value - value_med_3yr) %>%
32 | separate(net_site_species, into = c("Network", "Site", "Species"), sep = "_")
33 |
34 | site_nonsmokePM <- dt %>%
35 | filter(!is.na(smoke_day)) %>%
36 | group_by(Network, Site, State, Latitude, Longitude, smoke_day) %>%
37 | summarise(baselinePM = median(`PM2.5`, na.rm = T),
38 | n = n(),
39 | .groups = "drop") %>%
40 | pivot_wider(values_from = c(baselinePM, n),
41 | names_from = smoke_day,
42 | names_prefix = "smokeday") %>%
43 | filter(!is.na(n_smokeday1)) %>%
44 | select(-starts_with("n_smokeday"), -baselinePM_smokeday1) %>%
45 | rename(baselinePM = baselinePM_smokeday0) %>%
46 | filter(State %in% c("HI", "AK") == F)
47 |
48 | pct_anom <- species_anom %>%
49 | select(-nobs_3yr) %>%
50 | filter(Site != "240239000") %>% # has duplicate obs
51 | pivot_wider(id_cols = c(Network, Site, date, smoke_day),
52 | values_from = c(anom, value, value_med_3yr),
53 | names_from = Species) %>%
54 | mutate(#pct_anom_NH4 = anom_NH4/anom_PM25,
55 | pct_anom_SO4 = anom_SO4/anom_PM25,
56 | pct_anom_OC = anom_OC/anom_PM25,
57 | pct_anom_EC = anom_EC/anom_PM25,
58 | pct_anom_dust = anom_Dust/anom_PM25) %>%
59 | filter(!is.na(value_med_3yr_PM25)) %>%
60 | select(starts_with("pct_anom"), anom_PM25, smoke_day, date, Site, Network) %>%
61 | left_join(site_nonsmokePM %>% mutate(Site = as.character(Site))) %>%
62 | mutate(baselinePM = ifelse(baselinePM > 8, "High", "Low")) %>%
63 | pivot_longer(starts_with("pct_anom")) %>%
64 | mutate(day = case_when(smoke_day == 0 ~ "non-smoke day",
65 | smoke_day == 1 & anom_PM25 <= 0 ~ "smoke day,\nanom < 0",
66 | smoke_day == 1 & anom_PM25 <= 25 ~ "smoke day,\n0 < anom <= 25 ",
67 | smoke_day == 1 & anom_PM25 > 25 ~ "smoke day,\n25 < anom"),
68 | day = factor(day, levels = c("non-smoke day", "smoke day,\nanom < 0",
69 | "smoke day,\n0 < anom <= 25 ", "smoke day,\n25 < anom"),
70 | ordered = T),
71 | name = gsub("pct_anom_", "", name))
72 |
73 | pct_anom %>%
74 | filter(anom_PM25 != 0) %>%
75 | filter(!is.na(baselinePM)) %>%
76 | filter(day != "smoke day,\nanom < 0") %>%
77 | filter(!is.na(value)) %>%
78 | mutate(name = recode_factor(name,
79 | "dust" = "Dust",
80 | "EC" = "Elemental~carbon",
81 | "SO4" = "SO[4]",
82 | "OC" = "Organic~carbon",
83 | .ordered = T)) %>%
84 | {ggplot(data =.,
85 | aes(x = value,
86 | y = ..ndensity..,
87 | group = day,
88 | color = day,
89 | fill = day)) +
90 | geom_density(alpha = 0.1, alpha = 0.5, adjust = 2) +
91 | theme_classic() +
92 | facet_grid(paste0(baselinePM, "~baseline~PM[2.5]") ~ name,
93 | scales = "free",
94 | labeller = label_parsed) +
95 | geom_vline(xintercept = 0, color = "grey30", linetype = "dotted") +
96 | scale_x_continuous(limits = c(-0.5, 1), # results in 46580/635396 (~7%) obs being dropped
97 | labels = scales::percent) +
98 | scale_color_manual(values = c("black", "blue", "red"),
99 | aesthetics = c("color", "fill")) +
100 | labs(color = "",
101 | fill = "",
102 | y = "density") +
103 | xlab(bquote(Percent~PM[2.5]~anomaly)) +
104 | theme(panel.spacing.x = unit(1, "lines"),
105 | strip.text = element_text(size = 12),
106 | strip.background = element_blank())} %>%
107 | ggsave(file.path(path_figures, "figureS06b.png"), .,
108 | width = 10, height = 6)
109 |
110 | states <- tigris::states(cb = TRUE) %>%
111 | st_transform(crs = 4326)
112 |
113 | site_nonsmokePM %>%
114 | filter(!is.na(baselinePM)) %>%
115 | st_as_sf(coords = c("Longitude", "Latitude")) %>%
116 | st_set_crs(4326) %>%
117 | mutate(baselinePM_class = ifelse(baselinePM > 8, "high", "low"),
118 | baselinePM = pmin(baselinePM, 15)) %>%
119 | {ggplot(data = .,
120 | aes(color = baselinePM,
121 | fill = baselinePM,
122 | shape = Network)) +
123 | geom_sf(data = states %>% filter(STATEFP %in% nonContig_stateFIPS == F),
124 | fill = "white", inherit.aes = FALSE) +
125 | geom_sf() +
126 | scale_color_gradientn(colors = c(cmocean::cmocean(name = "balance", start = 0, end = 0.4)(20),
127 | cmocean::cmocean(name = "balance", start = 0.6, end = 1)(20)),
128 | name = "",
129 | aesthetics = c("fill", "color"),
130 | rescaler = mid_rescaler(8),
131 | breaks = c(5, 10, 15),
132 | labels = c("5", "10", ">15"),
133 | guide = guide_colorbar(label.theme = element_text(size = 7),
134 | barheight = 4, barwidth = 1)) +
135 | theme_void() +
136 | theme(legend.position = c(0.93, 0.25))} %>%
137 | ggsave(file.path(path_figures, "figureS06a.png"),
138 | ., width = 6, height = 4)
139 |
--------------------------------------------------------------------------------
/scripts/main/05_07_figureS07.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Plots supplemental figure 7.
8 | # ------------------------------------------------------------------------------
9 | aod_df <- readRDS(file.path(path_data, "4_clean", "aod_training.rds"))
10 | grid_orig <- st_read(file.path(path_data, "1_grids", "1km_aod_grid_wgs84_training")) %>%
11 | mutate(train_cells = (grid_id %in% unique(aod_df$grid_id_1km)))
12 |
13 | states <- tigris::states(cb = TRUE) %>%
14 | filter(!(STATEFP %in% nonContig_stateFIPS))
15 |
16 | {ggplot() +
17 | geom_sf(data = states,
18 | color = "grey30",
19 | lwd = 0.1) +
20 | geom_sf(data = grid_orig,
21 | aes(color = train_cells,
22 | fill = train_cells)) +
23 | scale_color_manual(name = "",
24 | values = c("black", "blue"),
25 | labels = c("original 5,000 cells",
26 | "training locations"),
27 | aesthetics = c("color", "fill")) +
28 | theme_void() +
29 | theme(legend.position = c(0.75, 0.3),
30 | legend.justification = "left",
31 | legend.key.size = unit(0.4, 'cm'))} %>%
32 | ggsave(file.path(path_figures, "figureS07.png"),
33 | ., width = 5, height = 3.5)
34 |
--------------------------------------------------------------------------------
/scripts/main/05_08_figureS08.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Plots supplemental figure 8.
8 | # ------------------------------------------------------------------------------
9 | crosswalk <- readRDS(file.path(path_data, "1_grids", "grid_crosswalk_1km_10km.rds"))
10 |
11 | aod_train <- readRDS(file.path(path_data, "4_clean", "aod_training.rds"))
12 |
13 | aod_train %<>% select(starts_with("grid"), date, month, aod_anom, lat, lon, fold)
14 |
15 | aod_train_cells <- aod_train$grid_id_1km %>% unique
16 |
17 | # use the extracted aod that we didn't train on to test with
18 | smoke_missing_dates = readRDS(file.path(path_data, "smoke", "smoke_dates_not_online.rds"))
19 | smoke_missing_dates = ymd(smoke_missing_dates)
20 |
21 | smoke_days <- readRDS(file.path(path_data, "3_intermediate", "all_smoke_days_incl_cloudy.rds"))
22 |
23 | aod_null_value <- -999999
24 |
25 | aod_test <- map_dfr(list.files(file.path(path_data, "2_from_EE", "maiac_AOD_training"), full.names = TRUE),
26 | function(x) read_csv(x) %>% filter(!(grid_id %in% aod_train_cells))) %>%
27 | transmute(grid_id_1km = grid_id,
28 | date = as.Date(as.character(start_date), format = "%Y%m%d"),
29 | aod = median) %>%
30 | filter(date >= as.Date("2006-01-01")) %>%
31 | mutate(month = lubridate::month(date),
32 | year = lubridate::year(date)) %>%
33 | left_join(crosswalk, by = "grid_id_1km") %>%
34 | left_join(smoke_days %>% select(-note_smoke_date_not_online),
35 | by = c("date", "grid_id_10km")) %>%
36 | replace_na(list(smoke_day = 0)) %>%
37 | filter(aod != aod_null_value) %>%
38 | {left_join(.,
39 | nonsmoke_medians(filter(., !(date %in% smoke_missing_dates)),
40 | aod, smoke_day, grid_id_1km, month, year),
41 | by = c("grid_id_1km", "month", "year"))} %>%
42 | mutate(aod_anom = aod - aod_med_3yr)
43 |
44 | aod_test_cells <- aod_test$grid_id_1km %>% unique()
45 |
46 | all_aod_cells <- c(aod_test_cells, aod_train_cells)
47 |
48 | aod_preds <- readRDS(file.path(path_output, "smokePM", "model", "test_aod_preds.rds"))
49 | # predicted vs observed, in and out of sample
50 | aod_comp<- rbind(aod_train %>% select(grid_id_1km, date, aod_anom) %>% mutate(test_train = "train"),
51 | aod_test %>% filter(smoke_day == 1) %>% select(grid_id_1km, date, aod_anom) %>% mutate(test_train = "test")) %>%
52 | left_join(aod_preds)
53 |
54 | aod_comp %>%
55 | arrange(test_train) %>%
56 | {ggplot(data = ., aes(x= aod_anom, y = aod_anom_pred, color = test_train)) +
57 | geom_abline(slope = 1, intercept = 0, color = "red") +
58 | geom_point(alpha = 0.3) +
59 | ggpubr::stat_cor(aes(label = ..rr.label..), digits = 4,
60 | show_guide = FALSE) +
61 | scale_color_manual(values = c("black", "blue"), name = "") +
62 | xlab("observed AOD anomalies") + ylab("predicted AOD anomalies") +
63 | theme_classic()} %>%
64 | ggsave(file.path(path_figures, "figureS08a.png"),
65 | ., width = 6, height = 6)
66 |
67 | # variable importance
68 | var_import <- readRDS(file.path(path_output, "smokePM", "model", "AOD_var_importance.rds"))
69 | var_import$variable_importance %>%
70 | left_join(data.frame(feat_name = var_import$feature_names) %>%
71 | mutate(Feature = paste0("f", (1:n()) - 1))) %>%
72 | mutate(group = case_when(grepl("aod_anom|aot_anom", feat_name) ~ "aerosols",
73 | grepl("fire", feat_name) ~ "fire",
74 | grepl("traj", feat_name) ~ "hysplit",
75 | feat_name %in% c("lat", "lon") ~ "cross sectional",
76 | feat_name %in% c("dewpoint_temp_2m", "temp_2m", "wind_u", "wind_v", "precip", "pbl_mean",
77 | "surface_pressure", "sea_level_pressure") ~ "meteorology",
78 | T ~ as.character(NA))) %>%
79 | mutate(feat_name = factor(feat_name, levels = rev(.$feat_name), ordered = T)) %>%
80 | mutate(feat_name = recode(feat_name,
81 | "lon" = "Longitude",
82 | "lat" = "Latitude",
83 | "closest_fire_num_points" = "Points in closest fire cluster",
84 | "dewpoint_temp_2m" = "Dewpoint temperature",
85 | "fire_dist_km" = "Distance to closest fire cluster",
86 | "closest_fire_area" = "Area of closest fire cluster",
87 | "sea_level_pressure" = "Sea level pressure",
88 | "wind_v" = "Wind speed (eastward)",
89 | "wind_u" = "Wind speed (westward)",
90 | "precip" = "Total precipitation",
91 | "pbl_mean" = "Planetary Boundary Layer (mean)",
92 | "temp_2m" = "Temperature",
93 | "aot_anom" = "AOT anomaly (current)",
94 | "aot_anom_lag1" = "AOT anomaly (1-day lag)",
95 | "aot_anom_lag2" = "AOT anomaly (2-day lag)")) %>%
96 | magrittr::extract(1:15,) %>%
97 | {ggplot(data = .,
98 | aes(x = Gain, y = feat_name)) +
99 | geom_segment(aes(yend = feat_name, color = group), xend = 0, lwd = 6) +
100 | xlim(0, NA) +
101 | scale_color_manual(values = MetBrewer::met.brewer("VanGogh2", 15)[c(15, 3, 7, 9)],
102 | name = "") +
103 | theme_classic() +
104 | ylab("") +
105 | theme(legend.position = c(0.7, 0.3))} %>%
106 | ggsave(file.path(path_figures, "figureS08b.png"), .,
107 | width = 4, height = 4)
108 |
--------------------------------------------------------------------------------
/scripts/main/05_09_figureS09.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Plots supplemental figure 9.
8 | # ------------------------------------------------------------------------------
9 | grid <- read_sf(file.path(path_data, "1_grids", "10km_grid"))
10 | pop <- list.files(file.path(path_data, "2_from_EE", "populationDensity_10km_subgrid"),
11 | full.names = T) %>%
12 | purrr:::map_dfr(read.csv)
13 |
14 | grid %<>% left_join(pop)
15 |
16 | # smoke PM data
17 | epa_stations <- readRDS(file.path(path_data, "3_intermediate", "station_smokePM.rds")) %>%
18 | pull(id) %>%
19 | unique
20 |
21 | epa_ll <- read_sf(file.path(path_data, "epa_station_locations")) %>%
22 | st_transform(st_crs(grid)) %>%
23 | filter(id %in% epa_stations)
24 |
25 | grid_station_dists <- st_distance(st_centroid(grid), epa_ll, by_element = FALSE)
26 |
27 | grid$centroid_station_dist <- apply(grid_station_dists, 1, min)
28 |
29 | median(grid$centroid_station_dist)/1e3
30 | weighted.mean(grid$centroid_station_dist, grid$mean)/1e3
31 |
32 | # Plot
33 | pdf(file.path(path_figures, "figureS09.pdf"),
34 | width = 10, height = 5)
35 | par(mar = c(5, 5, 6, 2) + 0.1)
36 | hist(grid$centroid_station_dist/1e3, breaks = seq(0, 300, 10),
37 | xlab = "minimum distance (km)", ylab = "",
38 | main = "", las = 1)
39 | title(ylab = "Frequency", line = 3.5)
40 | title(main = "Distance from prediction grid centroid to EPA station", line = 4)
41 | 50 %>%
42 | {abline(v = ., col = "black", lwd = 3)
43 | text(x = ., y = 15200, label = "merra2\nresolution", col = "black", xpd = T)}
44 |
45 | median(grid$centroid_station_dist/1e3) %>%
46 | {abline(v = ., col = "red", lty = "dashed")
47 | text(x = .-4, y = 13800, label = "median",
48 | col = "red", xpd = T)}
49 |
50 | mean(grid$centroid_station_dist/1e3) %>%
51 | {abline(v = ., col = "blue")
52 | text(x = .+4, y = 14000, label = "mean", col = "blue", xpd = T)}
53 |
54 | weighted.mean(grid$centroid_station_dist/1e3, grid$mean) %>%
55 | {abline(v = ., col = "darkgreen")
56 | text(x = .-8, y = 14000, label = "pop-weighted\nmean",
57 | col = "darkgreen", xpd = T)}
58 |
59 | dev.off()
60 |
--------------------------------------------------------------------------------
/scripts/main/05_10_figureS10.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Plots supplemental figure 10.
8 | # ------------------------------------------------------------------------------
9 | monitors <- st_read(file.path(path_data, "epa_station_locations"))
10 | states <- tigris::states() %>% st_transform(st_crs(monitors))
11 |
12 | monitors$state <- states$STUSPS[st_intersects(monitors, states) %>% unlist]
13 |
14 | smokePM_data <- readRDS(file.path(path_data, "4_clean", "smokePM_training.rds")) %>%
15 | select(id, date, smokePM, fold)
16 |
17 | cv_preds <- list.files(file.path(path_output, "smokePM", "model"),
18 | pattern = "pred_fold",
19 | full.names = TRUE) %>%
20 | purrr::map_dfr(function(x){
21 | readRDS(x) %>%
22 | mutate(test_fold = gsub("^smokePM_pred_fold|_drop|-aod_anom_pred|-traj_points|\\.rds$", "", basename(x)),
23 | drop_var = gsub("^smokePM_pred_fold[0-4]_drop-?|\\.rds$", "", basename(x))) %>%
24 | return()
25 | })
26 |
27 | model_metrics <- cv_preds %>%
28 | mutate(smokePM_pred = pmax(0, smokePM_pred)) %>%
29 | left_join(smokePM_data, by = c("id", "date", "fold")) %>%
30 | mutate(test = fold == test_fold) %>%
31 | eval_metrics(models = drop_var, test_tune = test,
32 | obs = smokePM, pred = smokePM_pred, loc_id = id) %>%
33 | pivot_longer(contains("rank") | contains("value")) %>%
34 | separate(name, into = c("metric", "rank_value"), sep = "_(?=rank|value)") %>%
35 | pivot_wider(values_from = value, names_from = rank_value) %>%
36 | separate(metric, into = c("metric", "subset"), sep = "_", extra = "merge")
37 |
38 | # comparison between full model, without AOD preds, and without HYSPLIT
39 | model_metrics %>%
40 | filter(grepl("day", subset)) %>%
41 | filter(metric %in% c("rmse", "r2", "wr2")) %>%
42 | filter(test == TRUE) %>%
43 | mutate(subset = recode_factor(subset,
44 | "day" = "all days",
45 | "day_sub50" = "days with\n< 50ug",
46 | "day_over50" = "days with\n> 50ug",
47 | .ordered = T),
48 | metric = recode_factor(metric,
49 | "r2" = "R^2",
50 | "wr2" = "within~R^2",
51 | "rmse" = "RMSE",
52 | .ordered = TRUE),
53 | drop_var = case_when(drop_var == "" ~ "full model",
54 | drop_var =="aod_anom_pred" ~ "no AOD predictions",
55 | drop_var =="traj_points" ~ "no HYSPLIT"),
56 | drop_var = factor(drop_var,
57 | levels = c("full model", "no HYSPLIT", "no AOD predictions"),
58 | ordered = TRUE)) %>%
59 | {ggplot(data = .,
60 | aes(x = subset, y = value, color = drop_var,
61 | group = drop_var)) +
62 | geom_point() +
63 | geom_line() +
64 | scale_x_discrete(expand = expansion(mult = 0.04)) +
65 | ylim(0, NA) +
66 | facet_wrap(~metric, scales = "free", strip.position = "left",
67 | labeller = label_parsed) +
68 | theme_classic() +
69 | theme(strip.placement = "outside",
70 | strip.background = element_blank(),
71 | panel.spacing = unit(-0.1, "lines"),
72 | strip.switch.pad.wrap = unit(-0.25, "lines")) +
73 | scale_color_manual(name = "", values = MetBrewer::met.brewer("Homer2", 5)[c(1,2,4)]) +
74 | xlab("") + ylab("")} %>%
75 | ggsave(file.path(path_figures, "figureS10.png"), .,
76 | width = 8, height= 3)
77 |
78 | # change in model r2 from aod and hysplit inclusion
79 | model_metrics %>%
80 | filter(subset == "day" & metric == "r2" & test == TRUE) %>%
81 | mutate(diff = max(value) - value) %>% View
82 |
--------------------------------------------------------------------------------
/scripts/main/05_11_figureS11.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Plots station MODIS landcover.
8 | # ------------------------------------------------------------------------------
9 | # Load smokePM predictions from held-out folds
10 | smokePM_preds <- list.files(file.path(path_output, "smokePM", "model"),
11 | pattern = "smokePM_pred",
12 | full.names = TRUE) %>%
13 | grep("drop\\.", ., value = TRUE) %>%
14 | map_dfr(function(x){
15 | readRDS(x) %>% mutate(test_fold = as.numeric(
16 | substr(gsub("smokePM_pred_fold", "", basename(x)), 1, 1)))
17 | }) %>%
18 | mutate(smokePM_pred = pmax(smokePM_pred, 0))
19 |
20 | smokePM_data <- readRDS(file.path(path_data, "4_clean", "smokePM_training.rds")) %>%
21 | select(id, date, smokePM, water:wetlands)
22 |
23 | epa_ll <- st_read(file.path(path_data, "epa_station_locations")) %>%
24 | rename(grid_id_10km = grid_10km)
25 |
26 | epa_data <- readRDS(file.path(path_data, "3_intermediate", "station_smokePM.rds"))
27 |
28 | states <- tigris::states(cb = TRUE)
29 |
30 | station_performance <- smokePM_preds %>%
31 | filter(fold == test_fold) %>%
32 | left_join(smokePM_data %>% select(id, date, smokePM)) %>%
33 | select(-test_fold, -date) %>%
34 | nest_by(id) %>%
35 | mutate(n = nrow(data),
36 | fold = unique(data$fold),
37 | n_unique_smokePM = length(unique(data$smokePM))) %>%
38 | filter(n > 1 & n_unique_smokePM > 1) %>%
39 | mutate(r2 = fixest::r2(fixest::feols(smokePM ~ smokePM_pred,
40 | data = data),
41 | "r2") %>% unname,
42 | max_smokePM = max(data$smokePM),
43 | avg_smokePM = mean(data$smokePM),
44 | skew_smokePM = e1071::skewness(data$smokePM),
45 | kurt_smokePM = e1071::kurtosis(data$smokePM),
46 | range_smokePM = range(data$smokePM) %>% diff,
47 | var_smokePM = var(data$smokePM)) %>%
48 | {right_join(epa_ll ,.)} %>%
49 | cbind(.,
50 | stations_50km = st_distance(.) %>%
51 | as.matrix %>%
52 | units::set_units(NULL) %>%
53 | magrittr::is_less_than(50000) %>%
54 | rowSums(),
55 | stations_100km = st_distance(.) %>%
56 | as.matrix %>%
57 | units::set_units(NULL) %>%
58 | magrittr::is_less_than(100000) %>%
59 | rowSums()) %>%
60 | left_join(epa_data %>%
61 | group_by(id, smoke_day) %>%
62 | summarise(avg_totalPM = mean(pm25),
63 | var_totalPM = var(pm25),
64 | skew_totalPM = e1071::skewness(pm25),
65 | kurt_totalPM = e1071::kurtosis(pm25),
66 | .groups = "drop") %>%
67 | mutate(smoke_day = recode(smoke_day,
68 | "1" = "smoke",
69 | "0" = "non_smoke")) %>%
70 | pivot_wider(values_from = ends_with("_totalPM"),
71 | names_sep = "_",
72 | names_from = smoke_day)) %>%
73 | left_join(smokePM_data %>% select(id, water:wetlands) %>% unique) %>%
74 | mutate(class = names(select(st_drop_geometry(.), water:wetlands))[max.col(select(st_drop_geometry(.), water:wetlands))])
75 |
76 | ee_Initialize(email = gee_email)
77 |
78 | lc = ee$ImageCollection("MODIS/006/MCD12Q1") %>%
79 | ee$ImageCollection$filterDate("2013-01-01", "2014-01-01") %>%
80 | ee$ImageCollection$first() %>%
81 | ee$Image$select("LC_Type5")
82 |
83 | if (!file.exists(file.path(path_data, "station_MODIS_landcover.rds"))) {
84 | # Takes ~12 minutes
85 | start_time = get_start_time()
86 | station_lc = map_dfr(1:nrow(station_performance), function(i) {
87 | print(i)
88 | ee_extract(lc, station_performance[i,],
89 | scale = lc$projection()$nominalScale()$getInfo())
90 | }) %>%
91 | select(id, Type5 = LC_Type5)
92 | print_time(start_time)
93 | saveRDS(station_lc, file.path(path_data, "station_MODIS_landcover.rds"))
94 | } else {
95 | station_lc = readRDS(file.path(path_data, "station_MODIS_landcover.rds"))
96 | }
97 |
98 | # plot station land cover spatially
99 | epa_ll %>%
100 | filter(id %in% station_performance$id) %>%
101 | left_join(station_lc) %>%
102 | {ggplot(data = .) +
103 | geom_sf(data = states %>% filter(!(STATEFP %in% nonContig_stateFIPS)),
104 | color = "grey10", fill = "grey95") +
105 | geom_sf(aes(color = case_when(Type5 == 11 ~ "barren",
106 | Type5 == 5 ~ "shrubland",
107 | T ~ "other"),
108 | size = I(ifelse(Type5 %in% c(5, 11), 1.3, 0.9)))) +
109 | scale_color_manual(name = "landcover",
110 | values = c("#6e948c", "black", "#c38f16")) +
111 | theme_void()} %>%
112 | ggsave(file.path(path_figures, "figureS11.png"), ., width = 6, height = 4)
113 |
114 | # 0 1c0dff Water Bodies: at least 60% of area is covered by permanent water bodies.
115 | # 1 05450a Evergreen Needleleaf Trees: dominated by evergreen conifer trees (>2m). Tree cover >10%.
116 | # 2 086a10 Evergreen Broadleaf Trees: dominated by evergreen broadleaf and palmate trees (>2m). Tree cover >10%.
117 | # 3 54a708 Deciduous Needleleaf Trees: dominated by deciduous needleleaf (larch) trees (>2m). Tree cover >10%.
118 | # 4 78d203 Deciduous Broadleaf Trees: dominated by deciduous broadleaf trees (>2m). Tree cover >10%.
119 | # 5 dcd159 Shrub: Shrub (1-2m) cover >10%.
120 | # 6 b6ff05 Grass: dominated by herbaceous annuals (<2m) that are not cultivated.
121 | # 7 dade48 Cereal Croplands: dominated by herbaceous annuals (<2m). At least 60% cultivated cereal crops.
122 | # 8 c24f44 Broadleaf Croplands: dominated by herbaceous annuals (<2m). At least 60% cultivated broadleaf crops.
123 | # 9 a5a5a5 Urban and Built-up Lands: at least 30% impervious surface area including building materials, asphalt, and vehicles.
124 | # 11 f9ffa4 Non-Vegetated Lands: at least 60% of area is non-vegetated barren (sand, rock, soil) with less than 10% vegetation.
125 |
--------------------------------------------------------------------------------
/scripts/main/05_12_figureS12.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Plots supplemental figure 12.
8 | # ------------------------------------------------------------------------------
9 | states <- tigris::states(cb = TRUE)
10 |
11 | epa_ll <- st_read(file.path(path_data, "epa_station_locations")) %>%
12 | rename(grid_id_10km = grid_10km)
13 |
14 | epa_data <- readRDS(file.path(path_data, "3_intermediate", "station_smokePM.rds"))
15 |
16 | smokePM_preds <- list.files(file.path(path_output, "smokePM", "model"),
17 | pattern = "smokePM_pred",
18 | full.names = TRUE) %>%
19 | grep("drop\\.", ., value = TRUE) %>%
20 | map_dfr(function(x){
21 | readRDS(x) %>%
22 | mutate(test_fold = gsub("^smokePM_pred_fold|_drop|-aod_anom_pred|-traj_points|\\.rds$", "", basename(x)))
23 | }) %>%
24 | mutate(smokePM_pred = pmax(smokePM_pred, 0))
25 |
26 | smokePM_data <- readRDS(file.path(path_data, "4_clean", "smokePM_training.rds")) %>%
27 | select(id, date, smokePM)
28 |
29 | station_performance <- smokePM_preds %>%
30 | filter(fold == test_fold) %>%
31 | left_join(smokePM_data %>% select(id, date, smokePM)) %>%
32 | select(-test_fold, -date) %>%
33 | nest_by(id) %>%
34 | mutate(n = nrow(data),
35 | fold = unique(data$fold),
36 | n_unique_smokePM = length(unique(data$smokePM))) %>%
37 | filter(n > 1 & n_unique_smokePM > 1) %>%
38 | mutate(r2 = fixest::r2(fixest::feols(smokePM ~ smokePM_pred,
39 | data = data),
40 | "r2") %>% unname,
41 | max_smokePM = max(data$smokePM),
42 | max_smokePM_pred = max(data$smokePM_pred))
43 |
44 | set.seed(909)
45 | station_set <- station_performance %>%
46 | select(-data) %>%
47 | ungroup %>%
48 | filter(n > 100) %>%
49 | filter(r2 < 0.1)
50 |
51 | smokePM_preds %>%
52 | filter(fold == test_fold) %>%
53 | filter(id %in% station_set$id) %>%
54 | left_join(station_set %>% select(id, r2)) %>%
55 | left_join(smokePM_data %>%
56 | select(id, date, smokePM)) %>%
57 | group_by(id) %>%
58 | mutate(max_error = abs(smokePM - smokePM_pred) == max(abs(smokePM - smokePM_pred))) %>%
59 | mutate(year = lubridate::year(date)) %>%
60 | {ggplot(data = ., aes(x = smokePM, y = smokePM_pred)) +
61 | geom_point(data = filter(., max_error),
62 | size = 3) +
63 | geom_point(aes(color = as.factor(id))) +
64 | geom_abline(intercept = 0, slope = 1, color = "grey") +
65 | geom_text(data = station_set,
66 | aes(x = 0.8*max_smokePM,
67 | y = 0.9*max_smokePM_pred,
68 | label = paste("R^2==", signif(r2, 3), sep = "")),
69 | parse = TRUE,
70 | color = "black") +
71 | geom_text(data = station_set,
72 | aes(x = 0.8*max_smokePM,
73 | y = 0.75*max_smokePM_pred,
74 | label = paste0("n== ", n)),
75 | parse = TRUE,
76 | color = "black") +
77 | facet_wrap(~id, scales = "free",
78 | nrow = 2, ncol = 4) +
79 | theme_classic() +
80 | xlab(expression(observed~smoke~PM[2.5])) +
81 | ylab(expression(predicted~smoke~PM[2.5])) +
82 | theme(legend.position = "none")} %>%
83 | ggsave(file.path(path_figures, "figureS12b.png"), .,
84 | width = 10, height = 4)
85 |
86 | worst_obs <- smokePM_preds %>%
87 | filter(fold == test_fold) %>%
88 | filter(id %in% station_set$id) %>%
89 | left_join(smokePM_data %>%
90 | select(id, date, smokePM)) %>%
91 | group_by(id) %>%
92 | mutate(max_error = abs(smokePM - smokePM_pred) == max(abs(smokePM - smokePM_pred)),
93 | max_error_date = date[max_error == T],
94 | start_date = max_error_date - as.difftime(30, units = "days"),
95 | end_date = max_error_date + as.difftime(30, units = "days"),
96 | .groups = "drop") %>%
97 | filter(max_error)
98 |
99 | epa_data %>%
100 | ungroup %>%
101 | filter(id %in% station_set$id) %>%
102 | left_join(ungroup(worst_obs) %>%
103 | select(id, max_error_date, start_date, end_date)) %>%
104 | filter(date >= start_date & date <= end_date) %>%
105 | group_by(id) %>%
106 | mutate(max_pm = max(pm25)) %>%
107 | ungroup %>%
108 | {ggplot(data = .) +
109 | geom_line(aes(x = date, y = pm25)) +
110 | geom_line(aes(x = date, y = smokePM), color = "red") +
111 | geom_point(data = filter(., date == max_error_date),
112 | aes(x = date, y = pm25), color = "blue") +
113 | geom_point(data = filter(., smoke_day == 1),
114 | mapping = aes(x = date, y = -0.05*max_pm),
115 | color = "grey", inherit.aes = FALSE) +
116 | facet_wrap(~id, scales = "free",
117 | nrow = 2, ncol = 4) +
118 | ylab(expression(PM[2.5])) + xlab("") +
119 | theme_classic() +
120 | theme(legend.position = "none")} %>%
121 | ggsave(file.path(path_figures, "figureS12c.png"), .,
122 | width = 10, height = 4)
123 |
124 | {ggplot() +
125 | geom_sf(data = states %>% filter(!(STATEFP %in% nonContig_stateFIPS))) +
126 | geom_sf(data = epa_ll %>% filter(id %in% unique(smokePM_data$id)),
127 | size = 1) +
128 | geom_sf(data = epa_ll %>% filter(id %in% station_set$id),
129 | mapping = aes(color = as.factor(id)),
130 | size = 3) +
131 | theme_void() +
132 | theme(legend.position = "none")} %>%
133 | ggsave(file.path(path_figures, "figureS12a.png"), .,
134 | width = 5, height = 3.5)
135 |
136 | smokePM_data %>%
137 | group_by(id) %>%
138 | summarise(n = n(), .groups = "drop") %>%
139 | summarise(mean_n = mean(n),
140 | med_n = median(n))
141 |
--------------------------------------------------------------------------------
/scripts/main/05_13_figureS13.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Plots supplemental figure 13.
8 | # ------------------------------------------------------------------------------
9 | counties <- tigris::counties(cb = TRUE)
10 | epa_data <- readRDS(file.path(path_data, "3_intermediate", "station_smokePM.rds")) %>% ungroup
11 | epa_ll <- st_read(file.path(path_data, "epa_station_locations")) %>%
12 | filter(id %in% unique(epa_data$id)) %>%
13 | rename(grid_id_10km = grid_10km) %>%
14 | mutate(county = counties$NAME[st_intersects(geometry,
15 | counties %>% st_transform(st_crs(geometry))) %>%
16 | unlist])
17 |
18 | fire_panels <- data.frame(panel_name = c("November 2018", "October - November 2019", "Fall 2020"),
19 | start_date = as.Date(c("2018-11-01", "2019-10-20", "2020-08-10")),
20 | end_date = as.Date(c("2018-11-30", "2019-11-15", "2020-11-15"))) %>%
21 | mutate(panel_name = factor(panel_name,
22 | levels = panel_name,
23 | ordered = T))
24 |
25 | calfire <- st_read(file.path(path_data, "CAL FIRE FRAP", "fire20_1.gdb"), layer = "firep20_1") %>%
26 | mutate(Fire = recode(FIRE_NAME,
27 | "CAMP" = "Camp Fire",
28 | "KINCADE" = "Kincade Fire",
29 | "AUGUST COMPLEX FIRES" = "August Complex",
30 | "CZU LIGHTNING COMPLEX" = "CZU Lightning Complex"),
31 | panel_name = recode(FIRE_NAME,
32 | "CAMP" = "November 2018",
33 | "KINCADE" = "October - November 2019",
34 | "AUGUST COMPLEX FIRES" = "Fall 2020",
35 | "CZU LIGHTNING COMPLEX" = "Fall 2020"),
36 | panel_fire_no = recode(FIRE_NAME,
37 | "CAMP" = 1,
38 | "KINCADE" = 1,
39 | "AUGUST COMPLEX FIRES" = 2,
40 | "CZU LIGHTNING COMPLEX" = 1,
41 | .default = as.numeric(NA)),
42 | select_fires = ((FIRE_NAME == "CAMP" & YEAR_ == "2018" & ALARM_DATE > as.Date("2018-11-01")) |
43 | (FIRE_NAME == "KINCADE" & YEAR_ == "2019") |
44 | FIRE_NAME %in% c("AUGUST COMPLEX FIRES", "CZU LIGHTNING COMPLEX")))
45 |
46 | obs_data <- epa_data %>%
47 | left_join(epa_ll %>%
48 | st_drop_geometry() %>%
49 | select(id, county)) %>%
50 | filter(county %in% c("Santa Clara", "Fresno", "Sacramento", "Sonoma")) %>%
51 | left_join(fire_panels %>%
52 | rowwise %>%
53 | mutate(all_dates = list(seq.Date(start_date, end_date, by = "day"))) %>%
54 | unnest_longer(all_dates),
55 | by = c("date" = "all_dates")) %>%
56 | filter(!is.na(panel_name))
57 |
58 | obs_data %>%
59 | mutate(id = as.factor(id)) %>%
60 | {ggplot(data = ., aes(x = date, y = smokePM, group = id, color = county)) +
61 | geom_line() +
62 | geom_linerange(data = left_join(calfire %>% filter(select_fires),
63 | obs_data %>% group_by(panel_name) %>% summarise(max = max(smokePM, na.rm = T))) %>%
64 | mutate(panel_name = factor(panel_name, levels = unique(panel_name), ordered = T)),
65 | aes(xmin = as.Date(ALARM_DATE),
66 | xmax = as.Date(CONT_DATE),
67 | y = -panel_fire_no*0.1*max),
68 | inherit.aes = FALSE) +
69 | geom_label(data = left_join(calfire %>% filter(select_fires) %>% st_drop_geometry(),
70 | obs_data %>% group_by(panel_name) %>% summarise(max = max(smokePM, na.rm = T))) %>%
71 | mutate(panel_name = factor(panel_name, levels = unique(panel_name), ordered = T),
72 | mid_date = as.Date(ALARM_DATE) + floor(difftime(as.Date(CONT_DATE), as.Date(ALARM_DATE), units = "days")/2)),
73 | aes(x = mid_date, y = -panel_fire_no*0.1*max, label = Fire),
74 | inherit.aes = FALSE, size = 3,
75 | label.size = NA, label.padding = unit(0.1, "lines")) +
76 | facet_wrap(~panel_name, scales = "free",
77 | nrow =3) +
78 | scale_color_manual(values = MetBrewer::met.brewer("Juarez")[c(1,6,5,3)]) +
79 | theme_classic() +
80 | theme(strip.background = element_blank(),
81 | strip.text = element_text(size = 13, face = 2)) +
82 | ylab(expression(paste("observed smoke ", PM[2.5]," (",mu, "g/", m^3, ")"))) +
83 | xlab("")} %>%
84 | ggsave(file.path(path_figures, "figureS13.png"), .,
85 | width = 6, height = 8)
86 |
--------------------------------------------------------------------------------
/scripts/main/05_14_figureS14.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Plots supplemental figure 14.
8 | # ------------------------------------------------------------------------------
9 | states <- tigris::states(cb = T) %>% filter(STATEFP %in% nonContig_stateFIPS == FALSE)
10 |
11 | mtbs <- read_sf(file.path(path_data, "mtbs_perimeter_data"))
12 | mtbs %<>% st_make_valid()
13 |
14 | state_fires <- st_intersection(mtbs, states)
15 |
16 | test <- state_fires
17 |
18 | test$state_area = st_area(test$geometry)
19 |
20 | state_fire_trend <- test %>% st_drop_geometry() %>%
21 | mutate(year = lubridate::year(Ig_Date)) %>%
22 | group_by(year, STUSPS) %>%
23 | summarise(n_fire = n(),
24 | area_fire = sum(state_area)) %>%
25 | mutate(period = case_when(year >= 2006 & year <= 2010 ~ "2006_2010",
26 | year >= 2016 & year <= 2020 ~ "2016_2020",
27 | T ~ as.character(NA))) %>%
28 | filter(!is.na(period)) %>%
29 | group_by(period, STUSPS) %>%
30 | summarise(n_fire = mean(n_fire),
31 | area_fire = mean(area_fire)) %>%
32 | pivot_wider(names_from = period,
33 | values_from = contains("fire")) %>%
34 | mutate(pct_change_fire_area = (area_fire_2016_2020 - area_fire_2006_2010)/area_fire_2006_2010)
35 |
36 | states %>%
37 | left_join(state_fire_trend) %>%
38 | mutate(pct_change_fire_area = unclass(pct_change_fire_area)) %>%
39 | {ggplot(data =., aes(fill = pct_change_fire_area)) +
40 | geom_sf() +
41 | scale_fill_gradient2(low = muted("blue"), high = muted("red")) +
42 | labs(fill = "Percent change\nin burned area") +
43 | theme_void() +
44 | theme(legend.position = c(0.92, 0.32))} %>%
45 | ggsave(file.path(path_figures, "figureS14.png"), .,
46 | width = 6.75, height = 4)
47 |
--------------------------------------------------------------------------------
/scripts/main/05_15_figureS15.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Plots supplemental figure 15.
8 | # ------------------------------------------------------------------------------
9 | states <- tigris::states(cb = T) %>% filter(STATEFP %in% nonContig_stateFIPS == FALSE)
10 |
11 | mtbs <- read_sf(file.path(path_data, "mtbs_perimeter_data"))
12 | mtbs %<>% st_make_valid()
13 | mtbs$area <- unclass(st_area(mtbs))/1e6
14 |
15 | mtbs %>%
16 | st_simplify(dTolerance = 0.0005) %>%
17 | mutate(year = lubridate::year(Ig_Date),
18 | state = substr(Event_ID, 1, 2)) %>%
19 | filter(state %in% c("AK", "HI") == FALSE) %>%
20 | filter(year >= 2006) %>%
21 | # filter(year <= 2010) %>%
22 | group_by(year) %>%
23 | slice_max(BurnBndAc, n = 8) %>%
24 | # st_drop_geometry() %>% View
25 | {ggplot(data = .) +
26 | geom_sf(data = states, lwd = 0.05) +
27 | geom_sf(color = "red", fill = "red") +
28 | facet_wrap(~year, nrow = 3, ncol = 5) +
29 | theme_void() +
30 | theme(text = element_text(size = 20))} %>%
31 | ggsave(file.path(path_figures, "figureS15.png"),
32 | ., width = 15, height = 7)
33 |
--------------------------------------------------------------------------------
/scripts/main/05_16_figureS16.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Plots supplemental figure 16.
8 | # ------------------------------------------------------------------------------
9 | states <- tigris::states(cb = T) %>% filter(STATEFP %in% nonContig_stateFIPS == FALSE)
10 |
11 | mtbs <- read_sf(file.path(path_data, "mtbs_perimeter_data"))
12 | mtbs %<>% st_make_valid()
13 | mtbs$area <- unclass(st_area(mtbs))/1e6
14 |
15 | mtbs %>%
16 | st_simplify(dTolerance = 0.0005) %>%
17 | mutate(year = lubridate::year(Ig_Date),
18 | state = substr(Event_ID, 1, 2)) %>%
19 | filter(state %in% c("AK", "HI") == FALSE) %>%
20 | filter(year >= 2006) %>%
21 | group_by(state) %>%
22 | slice_max(BurnBndAc, n = 3) %>%
23 | {ggplot(data = .) +
24 | geom_sf(data = states, lwd = 0.05) +
25 | geom_sf(color = "red", fill = "red") +
26 | facet_wrap(~year, nrow = 3, ncol = 5) +
27 | theme_void() +
28 | theme(text = element_text(size = 20))} %>%
29 | ggsave(file.path(path_figures, "figureS16.png"),
30 | ., width = 15, height = 7)
31 |
--------------------------------------------------------------------------------
/scripts/main/05_18_figureS18.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | #-------------------------------------------------------------------------------
6 | # Written by: Jessica Li
7 | # Compares smoke PM2.5 at PurpleAir monitors vs 10 km grid.
8 | #-------------------------------------------------------------------------------
9 | # Choose minimum number of hourly observations
10 | min_hours = 1
11 |
12 | #-------------------------------------------------------------------------------
13 | #### Process PurpleAir PM2.5 to monitor-day ####
14 | # Original hourly data file too large to load, so we run this step on cluster
15 | # and load from intermediate saved daily data file
16 |
17 | # # Load clean PurpleAir outdoor monitor data
18 | # # Have 79837229 observations
19 | # purpleair = readRDS(file.path(path_data, "PurpleAir", "outdoor_monitor_data_clean_part1.rds")) %>%
20 | # mutate(
21 | # # bottom code at 0
22 | # pm25_out = pmax(pm25_out, 0),
23 | # # top code any values 500-1000 at 500
24 | # pm25_out = ifelse(pm25_out >= 500 & pm25_out <= 1000, 500, pm25_out),
25 | # # set any values > 1000 to NA
26 | # pm25_out = ifelse(pm25_out > 1000, NA, pm25_out)
27 | # ) %>%
28 | # # Drop missing hourly observations
29 | # # Have 58312646 observations (73.0%)
30 | # drop_na(pm25_out) %>%
31 | # # Filter to observations over time period for which we have smoke days
32 | # # Have 58279714 observations (99.9%)
33 | # # Have 6618 monitors
34 | # filter(year >= 2006, year <= 2020) %>%
35 | # select(purpleair_id = ID_out, lon = Lon_out, lat = Lat_out, year, month, day, hour, pm25 = pm25_out)
36 | #
37 | # # Aggregate to daily level
38 | # purpleair = purpleair %>%
39 | # # Have 2516222 observations
40 | # group_by(purpleair_id, lon, lat, year, month, day) %>%
41 | # summarize(pm25 = mean(pm25),
42 | # num_hourly_obs = n()) %>%
43 | # ungroup() %>%
44 | # # Filter to days with minimum number of hourly observations
45 | # # Have 2516222 observations (100%)
46 | # # Have 6618 monitors (100%)
47 | # filter(num_hourly_obs >= min_hours) %>%
48 | # group_by(purpleair_id) %>%
49 | # mutate(num_daily_obs = n()) %>%
50 | # ungroup()
51 | #
52 | # saveRDS(purpleair, file.path(path_data, "PurpleAir", "outdoor_monitor_data_clean_part1_daily.rds"))
53 |
54 | purpleair = readRDS(file.path(path_data, "PurpleAir", "outdoor_monitor_data_clean_part1_daily.rds"))
55 |
56 | #-------------------------------------------------------------------------------
57 | #### Match PurpleAir monitors and 10 km grid cells ####
58 | # Load PurpleAir monitor locations
59 | purpleair_loc = readRDS(file.path(path_data, "PurpleAir", "outdoor_monitor_locs.rds")) %>%
60 | select(purpleair_id = ID_out, lon = Lon_out, lat = Lat_out) %>%
61 | st_as_sf(coords = c("lon", "lat"), crs = 4326)
62 |
63 | # Load 10 km grid
64 | grid_10km = read_sf(file.path(path_data, "1_grids", "10km_grid")) %>%
65 | select(grid_id_10km = ID)
66 |
67 | # Match each PurpleAir monitor to the overlapping 10 km grid cell
68 | o = purpleair_loc %>%
69 | st_transform(st_crs(grid_10km)) %>%
70 | st_join(grid_10km) %>%
71 | st_drop_geometry()
72 |
73 | #-------------------------------------------------------------------------------
74 | #### Classify monitor-days as smoke day or non-smoke day ####
75 | # Load smoke days
76 | smoke_days = readRDS(file.path(path_data, "3_intermediate", "all_smoke_days_incl_cloudy.rds")) %>%
77 | select(grid_id_10km, date, smoke_day)
78 |
79 | # Join smoke day classification to PM2.5 observations by grid cell ID and date
80 | purpleair = purpleair %>%
81 | left_join(o, by = "purpleair_id") %>%
82 | mutate(date = ymd(paste(year, month, day))) %>%
83 | left_join(smoke_days, by = c("grid_id_10km", "date")) %>%
84 | replace_na(list(smoke_day = 0))
85 |
86 | #-------------------------------------------------------------------------------
87 | #### Calculate smoke PM observations ####
88 | # For each monitor-month, limit to non-smoke day observations at the monitor in
89 | # that calendar month in the three-year window
90 | background_pm25 = purpleair %>%
91 | filter(smoke_day == 0) %>%
92 | group_by(purpleair_id, month, year) %>%
93 | summarize(pm25 = list(pm25)) %>%
94 | rowwise() %>%
95 | mutate(nobs = length(pm25)) %>%
96 | ungroup() %>%
97 | arrange(purpleair_id, month, year) %>%
98 | group_by(purpleair_id, month) %>%
99 | mutate(pm25_lead = lead(pm25, n = 1, default = list(NA)),
100 | pm25_lag = lag(pm25, n = 1, default = list(NA)),
101 | nobs_lead = lead(nobs, n = 1, default = 0),
102 | nobs_lag = lag(nobs, n = 1, default = 0)) %>%
103 | ungroup() %>%
104 | rowwise() %>%
105 | mutate(pm25_3yr = list(c(pm25, pm25_lag, pm25_lead)),
106 | nobs_3yr = nobs + nobs_lead + nobs_lag) %>%
107 | rowwise() %>%
108 | # Obtain background PM2.5 by aggregating (taking the median)
109 | mutate(pm25_med_3yr = median(unlist(pm25_3yr), na.rm = T)) %>%
110 | select(purpleair_id, year, month, pm25_med_3yr, nobs_3yr, nobs, nobs_lead, nobs_lag) %>%
111 | filter(nobs > 0, (nobs_lead > 0 | nobs_lag > 0))
112 |
113 | # For each monitor-smoke day, match to background PM2.5 for the monitor-month
114 | purpleair = purpleair %>%
115 | # Have 325490 observations (12.9%)
116 | # Have 5644 monitors (85.3%)
117 | filter(smoke_day == 1) %>%
118 | # Drop observations with insufficient data for calculating background PM2.5
119 | # Have 175312 observations (53.9%)
120 | # Have 2550 monitors (45.2%)
121 | inner_join(background_pm25, by = c("purpleair_id", "year", "month")) %>%
122 | # Subtract background PM2.5 from total PM2.5
123 | mutate(smokePM = pmax(ifelse(smoke_day == 1, pm25 - pm25_med_3yr, 0), 0))
124 |
125 | #-------------------------------------------------------------------------------
126 | #### Match to smoke PM predictions ####
127 | # Load 10 km grid smoke PM predictions
128 | preds = readRDS(file.path(path_output, "smokePM", "predictions", "combined", "smokePM_predictions_20060101_20201231.rds")) %>%
129 | mutate(smokePM_pred = pmax(smokePM_pred, 0))
130 |
131 | # Join smoke PM predictions to observations by 10 km grid cell ID and date
132 | df = purpleair %>%
133 | left_join(preds, by = c("grid_id_10km", "date"))
134 |
135 | #-------------------------------------------------------------------------------
136 | #### Compare smoke PM predictions and observations ####
137 | # Plot distribution of PurpleAir monitor-smoke days over time
138 | p_dates = ggplot(df, aes(x = date)) +
139 | # Each bin is approximately 2 weeks large
140 | geom_histogram(bins = ((as.integer(max(df$date) - min(df$date)) + 1) %/% 14) + 1,
141 | closed = "left") +
142 | # Enforce limits on date range
143 | scale_x_date(limits = c(min(df$date) - days(1), max(df$date) + days(1)),
144 | oob = squish) +
145 | theme_classic() +
146 | labs(title = "b) temporal distribution of PurpleAir data",
147 | y = "count of monitor-smoke days") +
148 | theme(plot.title = element_text(face = "bold", hjust = -0.22))
149 |
150 | # Count smoke days at each monitor
151 | df_sf = df %>%
152 | group_by(lon, lat) %>%
153 | summarize(num_smoke_days = n()) %>%
154 | ungroup() %>%
155 | st_as_sf(coords = c("lon", "lat"), crs = 4326)
156 | conus = states() %>%
157 | filter(!(NAME %in% c("Alaska",
158 | "American Samoa",
159 | "Guam",
160 | "Hawaii",
161 | "Commonwealth of the Northern Mariana Islands",
162 | "Puerto Rico",
163 | "United States Virgin Islands")))
164 |
165 | # Plot map showing spatial distribution of PurpleAir monitors with smoke days
166 | p_map = ggplot() +
167 | geom_sf(data = conus) +
168 | geom_sf(data = df_sf, mapping = aes(color = num_smoke_days), alpha = 0.5) +
169 | theme_void() +
170 | labs(title = "c) spatial distribution of PurpleAir data",
171 | color = "count of smoke days") +
172 | theme(legend.position = "bottom",
173 | plot.title = element_text(face = "bold", hjust = 0.023)) +
174 | guides(color = guide_colorbar(title.vjust = 0.8))
175 |
176 | # Plot scatter and simple linear regression of predictions on observations
177 | p_comparison = ggplot(df, aes(x = smokePM, y = smokePM_pred)) +
178 | geom_bin2d(bins = 70) +
179 | geom_abline(intercept = 0, slope = 1, color = "grey30") +
180 | theme_classic() +
181 | scale_fill_continuous(type = "viridis",
182 | trans = "log",
183 | breaks = c(1, 10, 100, 1000, 10000)) +
184 | scale_x_continuous(trans = "pseudo_log",
185 | breaks = c(0, 1, 5, 10, 50, 100, 500),
186 | expand = c(0, 0)) +
187 | scale_y_continuous(trans = "pseudo_log",
188 | breaks = c(0, 1, 5, 10, 50, 100, 500),
189 | expand = c(0, 0)) +
190 | labs(title = "a) predicted and observed smoke pollution",
191 | x = expression(observed~smokePM[2.5]~(PurpleAir)),
192 | y = expression(predicted~smokePM[2.5]),
193 | caption = paste("count of monitors =",
194 | prettyNum(length(unique(df$purpleair_id)),
195 | big.mark = ",", scientific = F))) +
196 | theme(plot.title = element_text(face = "bold", hjust = -0.3))
197 |
198 | # Compose into one figure
199 | p = grid.arrange(p_comparison, p_dates, p_map,
200 | layout_matrix = matrix(c(1, 1, 1, 1, 2, 2, 2, 2,
201 | 1, 1, 1, 1, 3, 3, 3, 3,
202 | 1, 1, 1, 1, 3, 3, 3, 3),
203 | nrow = 3, byrow = T))
204 | ggsave(file.path(path_figures, "figureS18.png"),
205 | plot = p, width = 14, height = 8)
206 |
207 | # Calculate R2
208 | r2(feols(smokePM_pred ~ smokePM, df), "r2")
209 |
--------------------------------------------------------------------------------
/scripts/main/06_01_tableS01.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | #-------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Calculates supplementary table 1.
8 | #-------------------------------------------------------------------------------
9 | epa_data <- readRDS(file.path(path_data, "3_intermediate", "station_smokePM.rds")) %>%
10 | ungroup
11 |
12 | grid_10km <- st_read(file.path(path_data, "1_grids", "grid_10km_wgs84")) %>%
13 | mutate(grid_index = 1:n())
14 | monitors <- st_read(file.path(path_data, "epa_station_locations")) %>%
15 | filter(id %in% unique(epa_data$id)) %>%
16 | mutate(monitor_index = 1:n())
17 | states <- tigris::states() %>% st_transform(st_crs(monitors))
18 |
19 | monitors$state <- states$STUSPS[st_intersects(monitors, states) %>% unlist]
20 |
21 | # Takes 13 minutes to run
22 | Sys.time()
23 | cell_monitor_dist <- data.frame(start = c(seq(0, nrow(grid_10km), by = 2000), nrow(grid_10km)) + 1) %>%
24 | mutate(end = lead(start) - 1) %>%
25 | filter(!is.na(end)) %>%
26 | purrr::pmap(function(start, end){
27 | print(Sys.time())
28 | st_distance(grid_10km[start:end,], monitors) %>%
29 | return
30 | }) %>%
31 | do.call(what = rbind)
32 | Sys.time()
33 |
34 | smoke_days <- readRDS(file.path(path_data, "3_intermediate", "all_smoke_days_incl_cloudy.rds")) %>%
35 | left_join(readRDS(file.path(path_data, "3_intermediate", "all_smoke_days.rds")) %>%
36 | rename(plume_day = smoke_day) %>%
37 | select(-note_smoke_date_not_online)) %>%
38 | replace_na(list(plume_day = 0))
39 |
40 | smoke_missing_dates = readRDS(file.path(path_data, "smoke", "smoke_dates_not_online.rds"))
41 | smoke_missing_dates = ymd(smoke_missing_dates)
42 |
43 | # add distance to smoke-day to epa data
44 | monitor_smoke_dist <- epa_data %>%
45 | left_join(monitors %>% st_drop_geometry() %>% select(id, monitor_index)) %>%
46 | select(id, grid_id_10km, monitor_index, date, pm25) %>%
47 | nest_by(date) %>%
48 | rename(monitor_obs = data) %>%
49 | left_join(smoke_days %>%
50 | left_join(grid_10km %>% st_drop_geometry %>% select(grid_id_10km = ID, grid_index)) %>%
51 | nest_by(date) %>%
52 | rename(smoke_days = data)) %>%
53 | purrr::pmap_dfr(function(date, monitor_obs, smoke_days){
54 | if(is.null(smoke_days)){
55 | plume_dist = Inf
56 | smoke_dist = Inf
57 | } else {
58 | plume_dist <- cell_monitor_dist[smoke_days %>%
59 | filter(plume_day == 1) %>%
60 | pull(grid_index),
61 | monitor_obs %>%
62 | pull(monitor_index)] %>%
63 | matrix(ncol = nrow(monitor_obs)) %>%
64 | matrixStats::colMins()
65 | smoke_dist <- cell_monitor_dist[smoke_days %>%
66 | filter(smoke_day == 1) %>%
67 | pull(grid_index),
68 | monitor_obs %>%
69 | pull(monitor_index)] %>%
70 | matrix(ncol = nrow(monitor_obs)) %>%
71 | matrixStats::colMins()
72 | }
73 | monitor_obs %>%
74 | mutate(date = date) %>%
75 | cbind(plume_dist = plume_dist/1e3) %>%
76 | cbind(smoke_dist = smoke_dist/1e3) %>%
77 | return
78 | })
79 |
80 | saveRDS(monitor_smoke_dist,
81 | file.path(path_data, "monitor_smoke_distance.rds"))
82 |
83 | # monitor_smoke_dist <- readRDS(file.path(path_data, "monitor_smoke_distance.rds"))
84 |
85 | reg_df <- monitor_smoke_dist %>%
86 | left_join(monitors %>% st_drop_geometry() %>%
87 | select(id, state)) %>%
88 | mutate(year = lubridate::year(date),
89 | month = lubridate::month(date)) %>%
90 | mutate(plume_dist_factor = case_when(plume_dist == 0 ~ 0,
91 | plume_dist > 0 & plume_dist <= 100 ~ 1,
92 | plume_dist > 100 & plume_dist <= 250 ~ 2,
93 | plume_dist > 250 & plume_dist <= 500 ~ 3,
94 | plume_dist > 500 & plume_dist <= 750 ~ 4,
95 | plume_dist > 750 & plume_dist <= 1000 ~ 5,
96 | plume_dist > 1000 ~ 6),
97 | smoke_dist_factor = case_when(smoke_dist == 0 ~ 0,
98 | smoke_dist > 0 & smoke_dist <= 100 ~ 1,
99 | smoke_dist > 100 & smoke_dist <= 250 ~ 2,
100 | smoke_dist > 250 & smoke_dist <= 500 ~ 3,
101 | smoke_dist > 500 & smoke_dist <= 750 ~ 4,
102 | smoke_dist > 750 & smoke_dist <= 1000 ~ 5,
103 | smoke_dist > 1000 ~ 6)) %>%
104 | mutate(across(ends_with("dist_factor"), function(x){ recode_factor(x,
105 | `0` = "overhead",
106 | `1` = "0 - 100",
107 | `2` = "100 - 250",
108 | `3` = "250 - 500",
109 | `4` = "500 - 750",
110 | `5` = "750 - 1000",
111 | `6` = "1000+",
112 | .ordered = TRUE)}))
113 |
114 | # add in meteorology (temperature, dewpoint temperature, precipitation)
115 | epa_station_grid_cells <- epa_data$grid_id_10km %>% unique
116 | # This may require large amount of memory
117 | meteor <- c(grep("temperature|precipitation|wind",
118 | list.files(file.path(path_data, "ERA5_variables", "Land"), full.names = T),
119 | value = T),
120 | grep("boundary_layer",
121 | list.files(file.path(path_data, "ERA5_variables", "Global"), full.names = T),
122 | value = T)) %>%
123 | paste0("/USA/10km_grid/") %>%
124 | {paste0(., ifelse(grepl("precipitation", .), "UTC+0000", "UTC-0600"))} %>%
125 | list.files(full.names = T) %>%
126 | map(function(x) {
127 | print(x)
128 | type = basename(x) %>%
129 | gsub("daily_|_of_1-hourly", "", .)
130 | print(type)
131 | list.files(x, full.names = T) %>%
132 | purrr::map_dfr(function(y){
133 | readRDS(y) %>%
134 | filter(id_grid %in% epa_station_grid_cells) %>%
135 | rename_with(.fn = function(z){paste0(gsub("2m_|10m_", "", z), "_", type)},
136 | .cols = !c(date, id_grid))
137 | })
138 | }) %>%
139 | reduce(full_join)
140 |
141 | reg_df %<>% left_join(meteor, by = c("grid_id_10km" = "id_grid", "date" = "date"))
142 |
143 | saveRDS(reg_df,
144 | file.path(path_data, "monitor_smoke_distance_w_met.rds"))
145 |
146 | # reg_df <- readRDS(file.path(path_data, "monitor_smoke_distance_w_met.rds"))
147 |
148 |
149 | etable(feols(pm25 ~ i(plume_dist_factor, ref = "1000+") |
150 | id^month + state^year^month,
151 | data = reg_df),
152 | feols(pm25 ~ i(smoke_dist_factor, ref = "1000+") |
153 | id^month + state^year^month,
154 | data = reg_df),
155 | feols(pm25 ~ i(plume_dist_factor, ref = "1000+") +
156 | ns(temperature_mean, 5) + ns(dewpoint_temperature_mean, 5) +
157 | ns(total_precipitation_maximum, 5) + ns(boundary_layer_height_maximum, 5) +
158 | ns(boundary_layer_height_mean, 5) + ns(boundary_layer_height_minimum, 5) +
159 | temperature_mean:total_precipitation_maximum |
160 | id^month + state^year^month,
161 | data = reg_df),
162 | feols(pm25 ~ i(smoke_dist_factor, ref = "1000+") +
163 | ns(temperature_mean, 5) + ns(dewpoint_temperature_mean, 5) +
164 | ns(total_precipitation_maximum, 5) + ns(boundary_layer_height_maximum, 5) +
165 | ns(boundary_layer_height_mean, 5) + ns(boundary_layer_height_minimum, 5) +
166 | temperature_mean:total_precipitation_maximum |
167 | id^month + state^year^month,
168 | data = reg_df),
169 | keep = "dist_factor",
170 | subtitles = c("plumes", "plumes + hysplit", "plumes w meteorology", "plumes + hysplit w meteorology"),
171 | tex = TRUE,
172 | file = file.path(path_tables, "tableS01.tex"))
173 |
174 | feols(pm25 ~ i(smoke_dist_factor, ref = "1000+") +
175 | ns(temperature_mean, 5) + ns(dewpoint_temperature_mean, 5) +
176 | ns(total_precipitation_maximum, 5) + ns(boundary_layer_height_maximum, 5) +
177 | ns(boundary_layer_height_mean, 5) + ns(boundary_layer_height_minimum, 5) +
178 | temperature_mean:total_precipitation_maximum |
179 | id^month + state^year^month,
180 | data = reg_df %>%
181 | filter(!(date %in% smoke_missing_dates)) %>%
182 | filter(is.finite(plume_dist)))
183 |
--------------------------------------------------------------------------------
/scripts/main/06_02_tableS04.R:
--------------------------------------------------------------------------------
1 | source("scripts/setup/00_01_load_packages.R")
2 | source("scripts/setup/00_02_load_functions.R")
3 | source("scripts/setup/00_03_load_settings.R")
4 |
5 | # ------------------------------------------------------------------------------
6 | # Written by: Marissa Childs
7 | # Calculates model performance metrics.
8 | # ------------------------------------------------------------------------------
9 | monitors <- st_read(file.path(path_data, "epa_station_locations"))
10 | states <- tigris::states() %>% st_transform(st_crs(monitors))
11 |
12 | monitors$state <- states$STUSPS[st_intersects(monitors, states) %>% unlist]
13 |
14 | smokePM_data <- readRDS(file.path(path_data, "4_clean", "smokePM_training.rds")) %>%
15 | select(id, date, smokePM, fold)
16 |
17 | cv_preds <- list.files(file.path(path_output, "smokePM", "model"),
18 | pattern = "pred_fold",
19 | full.names = TRUE) %>%
20 | purrr::map_dfr(function(x){
21 | readRDS(x) %>%
22 | mutate(test_fold = gsub("^smokePM_pred_fold|_drop|-aod_anom_pred|-traj_points|\\.rds$", "", basename(x)),
23 | drop_var = gsub("^smokePM_pred_fold[0-4]_drop-?|\\.rds$", "", basename(x))) %>%
24 | return()
25 | })
26 |
27 | model_metrics <- cv_preds %>%
28 | mutate(smokePM_pred = pmax(0, smokePM_pred)) %>%
29 | left_join(smokePM_data, by = c("id", "date", "fold")) %>%
30 | mutate(test = fold == test_fold) %>%
31 | eval_metrics(models = drop_var, test_tune = test,
32 | obs = smokePM, pred = smokePM_pred, loc_id = id) %>%
33 | pivot_longer(contains("rank") | contains("value")) %>%
34 | separate(name, into = c("metric", "rank_value"), sep = "_(?=rank|value)") %>%
35 | pivot_wider(values_from = value, names_from = rank_value) %>%
36 | separate(metric, into = c("metric", "subset"), sep = "_", extra = "merge")
37 |
38 | # metrics on different sample for main model, table S10
39 | model_metrics %>%
40 | filter(drop_var == "") %>%
41 | filter(test == TRUE & metric != "nobs" & metric != "me" &
42 | !grepl("bin", metric, ignore.case = TRUE)) %>%
43 | select(-drop_var, -test, -rank) %>%
44 | pivot_wider(names_from = metric, values_from = value)
45 |
46 | # confirm two decimal places for RMSE on over 50 days
47 | model_metrics %>%
48 | filter(drop_var=="", test==T, metric=="rmse", subset=="day_over50") %>%
49 | pull(value)
50 |
--------------------------------------------------------------------------------
/scripts/setup/00_01_load_packages.R:
--------------------------------------------------------------------------------
1 | library(RCurl)
2 | library(R.utils)
3 | library(XML)
4 | library(rvest)
5 | library(rlang)
6 | library(ff)
7 |
8 | library(tigris)
9 | library(tidycensus)
10 | library(rgee)
11 |
12 | library(splitr)
13 |
14 | library(tools)
15 | library(readr)
16 | library(readxl)
17 | library(retry)
18 |
19 | library(stringr)
20 | library(lubridate)
21 |
22 | library(parallel)
23 | library(foreach)
24 | library(doParallel)
25 |
26 | library(FNN)
27 | library(sp)
28 | library(sf) # Linking to GEOS 3.9.1, GDAL 3.3.1, PROJ 8.1.0
29 | library(ncdf4)
30 | library(raster)
31 | library(rgdal)
32 | library(rgeos)
33 | library(SpatialPosition)
34 | library(exactextractr)
35 |
36 | library(magrittr)
37 | library(tibble)
38 | library(plyr)
39 | library(dplyr)
40 | library(tidyr)
41 | library(purrr)
42 |
43 | import::from(data.table, .except = c("hour", "month", "wday", "week", "year"))
44 | library(gtable)
45 | library(fixest)
46 | library(splines)
47 | library(matrixStats)
48 |
49 | library(xgboost)
50 | library(rBayesianOptimization)
51 |
52 | library(ggplot2)
53 | library(ggpubr)
54 | library(grid)
55 | library(gridExtra)
56 | library(cowplot)
57 | library(scales)
58 | library(RColorBrewer)
59 | library(layer)
60 | library(facetscales)
61 | library(DiagrammeR)
62 | library(DiagrammeRsvg)
63 | library(rsvg)
64 |
--------------------------------------------------------------------------------
/scripts/setup/00_02_load_functions.R:
--------------------------------------------------------------------------------
1 | # Timers
2 | print_time <- function(start, unit = "auto", message = "Time elapsed:") {
3 | end <- Sys.time()
4 | d <- difftime(time1 = end, time2 = start, units = unit)
5 | t <- round(d, digits = 1)
6 | u <- units(d)
7 |
8 | print(paste("Start time:", start))
9 | print(paste("End time:", end))
10 | message <- paste(message, t, u)
11 | print(message)
12 | return(d)
13 | }
14 |
15 | get_start_time <- function(message = "Time started:") {
16 | t <- Sys.time()
17 | print(paste(message, t))
18 | return(t)
19 | }
20 |
21 | # Calculations
22 | nonsmoke_medians <- function(data,
23 | main_var,
24 | smoke_day,
25 | spatial_unit,
26 | temporal_unit,
27 | temporal_trend){
28 | main_var <- enquo(main_var)
29 | smoke_day <- enquo(smoke_day)
30 | spatial_unit <- enquo(spatial_unit)
31 | temporal_unit <- enquo(temporal_unit)
32 | temporal_trend <- enquo(temporal_trend)
33 |
34 | new_name <- paste0(rlang::as_name(main_var), "_med_3yr")
35 |
36 | full_panel <- expand.grid(id = data %>% pull(!!spatial_unit) %>% unique,
37 | month = data %>% pull(!!temporal_unit) %>% unique,
38 | year = data %>% pull(!!temporal_trend) %>% unique) %>%
39 | rename(!!spatial_unit := id,
40 | !!temporal_unit := month,
41 | !!temporal_trend := year) %>%
42 | ungroup
43 |
44 | data %>%
45 | filter(!is.na(!!main_var) & !!smoke_day == 0) %>%
46 | full_join(full_panel) %>%
47 | group_by(!!spatial_unit, !!temporal_unit, !!temporal_trend) %>%
48 | summarise(main_var = list(!!main_var),
49 | nobs = n(),
50 | .groups = "drop") %>%
51 | arrange(!!spatial_unit, !!temporal_unit, !!temporal_trend) %>%
52 | group_by(!!spatial_unit, !!temporal_unit) %>%
53 | mutate(main_var_lag = lag(main_var, n = 1, default = list(NA)),
54 | main_var_lead = lead(main_var, n = 1, default = list(NA)),
55 | nobs_lag = lag(nobs, n = 1, default = 0),
56 | nobs_lead = lead(nobs, n = 1, default = 0)) %>%
57 | ungroup %>%
58 | rowwise %>%
59 | mutate(main_var_3yr = list(c(main_var, main_var_lag, main_var_lead)),
60 | main_var_med_3yr = median(unlist(main_var_3yr), na.rm = T),
61 | nobs_3yr = nobs + nobs_lead + nobs_lag) %>%
62 | ungroup %>%
63 | transmute(!!spatial_unit, !!temporal_unit, !!temporal_trend,
64 | nobs_3yr,
65 | !!new_name := main_var_med_3yr)
66 | }
67 |
68 | eval_metrics <- function(data, models, test_tune, obs, pred,
69 | loc_id, bins = c(0, 5, 10, 20, 50, 100, 10000)){
70 | library(fixest)
71 | models <- enquo(models)
72 | test_tune <- enquo(test_tune)
73 | obs <- enquo(obs)
74 | pred <- enquo(pred)
75 | loc_id <- enquo(loc_id)
76 |
77 | data %<>%
78 | rename(obs = !!obs,
79 | pred = !!pred,
80 | loc_id = !!loc_id) %>%
81 | mutate(month = lubridate::month(date),
82 | year = lubridate::year(date),
83 | month_year = paste0(month, "_", year))
84 | day_eval <- data %>%
85 | nest_by(!!models, !!test_tune) %>%
86 | # for each model and test vs train folds, calculate metrics
87 | mutate(wr2_day = r2(feols(obs ~ pred | loc_id + year,
88 | data = data), "wr2"),
89 | r2_day = r2(feols(obs ~ pred,
90 | data = data), "r2"),
91 | rmse_day = sqrt(mean((data$obs - data$pred)^2)),
92 | me_day = mean(data$pred - data$obs),
93 | nobs_day = (data %>% filter(!is.na(pred)) %>% nrow)) %>%
94 | select(-data)
95 | day_eval_sub50 <- data %>%
96 | filter(obs < 50) %>%
97 | nest_by(!!models, !!test_tune) %>%
98 | # for each model and test vs train folds, calculate metrics
99 | mutate(wr2_day_sub50 = r2(feols(obs ~ pred | loc_id + year,
100 | data = data), "wr2"),
101 | r2_day_sub50 = r2(feols(obs ~ pred,
102 | data = data), "r2"),
103 | rmse_day_sub50 = sqrt(mean((data$obs - data$pred)^2)),
104 | me_day_sub50 = mean(data$pred - data$obs),
105 | nobs_day_sub50 = (data %>% filter(!is.na(pred)) %>% nrow)) %>%
106 | select(-data)
107 | day_eval_over50 <- data %>%
108 | filter(obs >= 50) %>%
109 | nest_by(!!models, !!test_tune) %>%
110 | # for each model and test vs train folds, calculate metrics
111 | mutate(wr2_day_over50 = r2(feols(obs ~ pred | loc_id + year,
112 | data = data), "wr2"),
113 | r2_day_over50 = r2(feols(obs ~ pred,
114 | data = data), "r2"),
115 | rmse_day_over50 = sqrt(mean((data$obs - data$pred)^2)),
116 | me_day_over50 = mean(data$pred - data$obs),
117 | nobs_day_over50 = (data %>% filter(!is.na(pred)) %>% nrow)) %>%
118 | select(-data)
119 | month_eval <- data %>%
120 | group_by(loc_id, month_year, !!models, !!test_tune) %>%
121 | summarise(pred = mean(pred),
122 | obs = mean(obs),
123 | year = unique(year),
124 | .groups = "drop") %>%
125 | nest_by(!!models, !!test_tune) %>%
126 | # for each model and test vs train folds, calculate metrics
127 | mutate(wr2_month = r2(feols(obs ~ pred | loc_id + year,
128 | data = data), "wr2"),
129 | r2_month = r2(feols(obs ~ pred,
130 | data = data), "r2"),
131 | rmse_month = sqrt(mean((data$obs - data$pred)^2)),
132 | me_month = mean(data$pred - data$obs),
133 | nobs_month = (data %>% filter(!is.na(pred)) %>% nrow)) %>%
134 | select(-data)
135 | year_eval <- data %>%
136 | group_by(loc_id, year, !!models, !!test_tune) %>%
137 | summarise(pred = mean(pred),
138 | obs = mean(obs),
139 | .groups = "drop") %>%
140 | nest_by(!!models, !!test_tune) %>%
141 | # for each model and test vs train folds, calculate metrics
142 | mutate(wr2_year = r2(feols(obs ~ pred | loc_id + year,
143 | data = data), "wr2"),
144 | r2_year = r2(feols(obs ~ pred,
145 | data = data), "r2"),
146 | rmse_year = sqrt(mean((data$obs - data$pred)^2)),
147 | me_year = mean(data$pred - data$obs),
148 | nobs_year = (data %>% filter(!is.na(pred)) %>% nrow)) %>%
149 | select(-data)
150 | day_bin <- data %>%
151 | mutate(obs_bin = cut(obs, breaks = bins,
152 | right = FALSE, include.lowest = TRUE,
153 | ordered_result = TRUE),
154 | pred_bin = cut(pred, breaks = bins,
155 | right = FALSE, include.lowest = TRUE,
156 | ordered_result = TRUE)) %>%
157 | mutate(across(ends_with("_bin"), as.numeric),
158 | bin_dist = abs(obs_bin - pred_bin)) %>%
159 | group_by(!!models, !!test_tune) %>%
160 | summarise(binDist_day = mean(bin_dist),
161 | pctCorrectBin_day = mean(obs_bin == pred_bin),
162 | wrongBinDist_day = weighted.mean(bin_dist, bin_dist > 0),
163 | .groups = "drop")
164 |
165 | full_join(day_eval,
166 | day_eval_sub50) %>%
167 | full_join(day_eval_over50) %>%
168 | full_join(month_eval) %>%
169 | full_join(year_eval) %>%
170 | full_join(day_bin) %>%
171 | group_by(!!test_tune) %>%
172 | # rank the rmse and r2 for each model fold that was left out
173 | mutate(across(contains("r2"), list(rank =~ data.table::frankv(.x, order = -1))),
174 | across(contains("rmse"), list(rank =~ data.table::frankv(.x, order = 1))),
175 | across(contains("me"), list(rank =~ data.table::frankv(abs(.x), order = 1))),
176 | across(contains("Dist"), list(rank =~ data.table::frankv(.x, order = 1))),
177 | across(contains("pctCorrect"), list(rank =~ data.table::frankv(.x, order = -1)))) %>%
178 | ungroup %>%
179 | # name the other columns with value to distinguish them from the rank columns
180 | dplyr::rename_with(function(x){paste0(x, "_value")}, !contains("rank") & !(!!test_tune) & !(!!models)) %>%
181 | return
182 | }
183 |
184 | # Plotting
185 | mid_rescaler <- function(mid = 0) {
186 | function(x, to = c(0, 1), from = range(x, na.rm = TRUE)) {
187 | scales::rescale_mid(x, to, from, mid)
188 | }
189 | }
190 |
--------------------------------------------------------------------------------
/scripts/setup/00_03_load_settings.R:
--------------------------------------------------------------------------------
1 | #-------------------------------------------------------------------------------
2 | # Provide your Google Earth Engine email
3 | gee_email = "INSERT YOUR GEE EMAIL HERE, e.g. jdoe@stanford.edu"
4 | try(ee_Initialize(email = gee_email))
5 |
6 | # Set the number of cores to use in parallel computing
7 | num_cores = Sys.getenv("SLURM_CPUS_PER_TASK")
8 | if (nchar(num_cores) > 0) {
9 | num_cores = as.integer(num_cores) - 1
10 | } else {
11 | num_cores = 1 # default sequential
12 | }
13 |
14 | # Provide your US Census API Key
15 | key <- "INSERT YOUR US CENSUS BUREAU API KEY HERE"
16 | census_api_key(key)
17 |
18 | #-------------------------------------------------------------------------------
19 | # Set to location of Dropbox and GitHub folders
20 | path_dropbox = "INSERT PATH TO DROPBOX FOLDER HERE"
21 | path_github = "INSERT PATH TO GITHUB REPO HERE"
22 |
23 | # File paths based on root folders above
24 | path_data = file.path(path_dropbox, "data")
25 | path_output = file.path(path_dropbox, "output")
26 | path_final = file.path(path_dropbox, "final")
27 | path_tables = file.path(path_github, "tables", "raw")
28 | path_figures = file.path(path_github, "figures", "raw")
29 | path_setup = file.path(path_github, "scripts", "setup")
30 | path_main = file.path(path_github, "scripts", "main")
31 | path_supplementary = file.path(path_github, "scripts", "supplementary")
32 |
33 | #-------------------------------------------------------------------------------
34 | nonContig_stateFIPS <- c("02","60","66","15","72","78","69")
35 | conus_stusps = setdiff(states()$STUSPS, c("AK", "AS", "GU", "HI", "MP", "PR", "VI"))
36 |
--------------------------------------------------------------------------------
/tables/clean/tableS01_distance_to_plume_effects.tex:
--------------------------------------------------------------------------------
1 | \begin{table}[H]
2 | \caption{Estimated effect on monitor \pmt of being underneath or near a plume or smoke day. The third and fourth columns (labeled w/ met.) control flexibly for meteorological variables including temperature, dewpoint temperature, planetary boundary layer minimum, maximum and mean height. Plume variables are based solely on the presence of HMS plumes, while smoke variables also include days identified as smoke-days based on AOD missingness and HYSPLIT trajectories. All regressions include monitor-month and state-month-year fixed effects.}
3 | \begin{tabular}{lcccc}
4 | \tabularnewline\midrule\midrule
5 | %Dependent Variable:&\multicolumn{4}{c}{pm25}\\
6 | & plumes & smoke days & plumes w/ met. & smoke days w/ met.\\
7 | \midrule
8 | plume dist: overhead&4.504$^{***}$ & & 3.099$^{***}$ & \\
9 | &(0.0510) & & (0.0410) & \\
10 | plume dist: 0-100&1.272$^{***}$ & & 0.3164$^{***}$ & \\
11 | &(0.0203) & & (0.0226) & \\
12 | plume dist: 100-250&1.012$^{***}$ & & 0.2041$^{***}$ & \\
13 | &(0.0150) & & (0.0139) & \\
14 | plume dist: 250-500&0.7020$^{***}$ & & 0.1632$^{***}$ & \\
15 | &(0.0127) & & (0.0104) & \\
16 | plume dist: 500-750&0.3203$^{***}$ & & 0.1121$^{***}$ & \\
17 | &(0.0117) & & (0.0094) & \\
18 | plume dist: 750-1000&0.0608$^{***}$ & & 0.0619$^{***}$ & \\
19 | &(0.0105) & & (0.0091) & \\
20 | smoke dist: overhead& & 4.934$^{***}$ & & 3.399$^{***}$\\
21 | & & (0.0482) & & (0.0385)\\
22 | smoke dist: 0-100& & 1.947$^{***}$ & & 0.8033$^{***}$\\
23 | & & (0.0202) & & (0.0211)\\
24 | smoke dist: 100-250& & 1.449$^{***}$ & & 0.5029$^{***}$\\
25 | & & (0.0160) & & (0.0141)\\
26 | smoke dist: 250-500& & 0.9806$^{***}$ & & 0.3568$^{***}$\\
27 | & & (0.0140) & & (0.0113)\\
28 | smoke dist: 500-750& & 0.4757$^{***}$ & & 0.2037$^{***}$\\
29 | & & (0.0127) & & (0.0103)\\
30 | smoke dist: 750-1000& & 0.1570$^{***}$ & & 0.1093$^{***}$\\
31 | & & (0.0113) & & (0.0100)\\
32 | \midrule \emph{Fixed-effects}& & & & \\
33 | id$\times$month & Yes & Yes & Yes & Yes\\
34 | state$\times$year$\times$month & Yes & Yes & Yes & Yes\\
35 | \midrule
36 | Observations & 4,442,067&4,442,067&4,442,067&4,442,067\\
37 | Within R$^2$ & 0.021599&0.02626&0.125643&0.127239\\
38 | \midrule\midrule\multicolumn{5}{l}{\emph{One-way (id$\times$month) standard-errors in parentheses}}\\
39 | \multicolumn{5}{l}{\emph{Signif. Codes: ***: 0.01, **: 0.05, *: 0.1}}\\
40 | \end{tabular}
41 | \label{tab:plume_accuracy}
42 | \end{table}
43 |
--------------------------------------------------------------------------------
/tables/clean/tableS02_anomAOD_model_inputs.tex:
--------------------------------------------------------------------------------
1 | \begin{table}[H]
2 | \centering
3 | \caption{AOD anomaly model inputs. NED = National Elevation Database, NLCD = National Land Cover Database, PBL = planetary boundary layer.}
4 | \begin{tabular}{l|c|c}
5 | Feature & Source & Native resolution\\
6 | \hline
7 | \hline
8 | \shortstack[l]{Aerosol optical thickness anomalies on smoke-days\\ (current, 1-day, 2-day, and 3-day lagged)}& MERRA-2 & ~50km\\
9 | \hline
10 | \shortstack[l]{Elevation \\ (mean and standard deviation in grid cells)} & USGS NED & ~10m \\
11 | \hline
12 | \shortstack[l]{Percent of area in each Level 1 land cover class \\ (water, developed, barren, shrubland, herbaceous,\\cultivated, forest, wetlands)} & USGS NLCD & 30m \\
13 | \hline
14 | Distance to nearest fire cluster & HMS fire points & - \\
15 | \hline
16 | \shortstack[l]{Size of nearest fire cluster \\ (area and number of constituent fire points)} & HMS fire points & - \\
17 | \hline
18 | \shortstack[l]{Meteorology \\ (daily mean, max, and min PBL,\\average sea level pressure)} & ERA5 global & ~30km \\
19 | \hline
20 | \shortstack[l]{Meteorology \\ (total precipitation, average 2m air temperature,\\ average eastward and northward wind speed,\\ average surface pressure, 2m dewpoint temperature)} & ERA5 land & ~11km \\
21 | \hline
22 | Latitude, Longitude, Month & & - \\
23 | \end{tabular}
24 | \label{tab:aodinputs}
25 | \end{table}
26 |
--------------------------------------------------------------------------------
/tables/clean/tableS03_smokePM_model_inputs.tex:
--------------------------------------------------------------------------------
1 | \begin{table}[H]
2 | \centering
3 | \caption{Smoke \pmt Model inputs. NED = National Elevation Database, NLCD = National Land Cover Database}
4 | \begin{tabular}{l|c|c}
5 | Feature & Source & Native resolution\\
6 | \hline
7 | \hline
8 | \shortstack[l]{Aerosol optical thickness anomalies\\ (current, 1-day, 2-day, and 3-day lagged)}& MERRA-2 & ~50km\\
9 | \hline
10 | Percent of AOD observations missing & MODIS MAIAC & 1km \\
11 | \hline
12 | \shortstack[l]{Predicted aerosol optical depth anomalies \\ (min, max, mean, 25th, 50th, and 75th percentiles)} & \shortstack[l]{predicted, trained on \\ MODIS MAIAC} & - \\
13 | \hline
14 | \shortstack[l]{Elevation \\ (mean and standard deviation in grid cells)} & USGS NED & ~10m \\
15 | \hline
16 | \shortstack[l]{Percent of area in each Level 1 land cover class \\ (water, developed, barren, shrubland, herbaceous,\\cultivated, forest, wetlands)} & USGS NLCD & 30m \\
17 | \hline
18 | Distance to nearest fire cluster & HMS fire points & - \\
19 | \hline
20 | \shortstack[l]{Size of nearest fire cluster \\ (area and number of constituent fire points)} & HMS fire points & -\\
21 | \hline
22 | \shortstack[l]{Meteorology \\ (daily mean, max, and min PBL,\\average sea level pressure)} & ERA5 global & ~30km \\
23 | \hline
24 | \shortstack[l]{Meteorology \\ (total precipitation, average 2m air temperature,\\ average eastward and northward wind speed,\\ average surface pressure, 2m dewpoint temperature)} & ERA5 land & ~11km \\
25 | \hline
26 | \shortstack[l]{HYSPLIT trajectory points in 50 km buffer \\ (by height quintiles: 0 - 1.1, 1.1-1.8, 1.8 - 2.7,\\2.7 - 4.3, 4.3+ km AGL)} & \shortstack[l]{HYSPLIT simulations\\from HMS HYSPLIT points} & - \\
27 | \hline
28 | Latitude, Longitude, Month & & - \\
29 | \end{tabular}
30 | \label{tab:inputs}
31 | \end{table}
32 |
--------------------------------------------------------------------------------
/tables/clean/tableS04_model_performance.tex:
--------------------------------------------------------------------------------
1 | \begin{table}[ht]
2 | \caption{Out-of-sample smoke \pmt model performance on different subset or aggregations of the data (rows) and different performance metrics (columns). Within $R^2$ controls for monitor and year fixed effects. }
3 | \centering
4 | \begin{tabular}{lccc}
5 | \hline
6 | & within $R^2$ & $R^2$ & RMSE \\
7 | \hline
8 | smoke days & 0.65 & 0.67 & 9.57 \\
9 | day $<$50\mg & 0.47 & 0.50 & 5.07 \\
10 | day $\geq$50\mg & 0.36 & 0.44 & 70.99 \\
11 | month avg & 0.67 & 0.67 & 3.75 \\
12 | year avg & 0.68 & 0.72 & 2.89 \\
13 | \hline
14 | \end{tabular}
15 | \label{tab:model_performance}
16 | \end{table}
17 |
--------------------------------------------------------------------------------
/tables/raw/tableS01.tex:
--------------------------------------------------------------------------------
1 |
2 | \begin{tabular}{lcccc}
3 | \tabularnewline\midrule\midrule
4 | Dependent Variable: & \multicolumn{4}{c}{pm25}\\
5 | & plumes & plumes + hysplit & plumes w meteorology & plumes + hysplit w meteorology \\
6 | Model: & (1) & (2) & (3) & (4)\\
7 | \midrule \emph{Variables} & & & & \\
8 | plume\_dist\_factor $=$ overhead & 4.504$^{***}$ & & 3.099$^{***}$ & \\
9 | & (0.0510) & & (0.0410) & \\
10 | plume\_dist\_factor $=$ 0-100 & 1.272$^{***}$ & & 0.3164$^{***}$ & \\
11 | & (0.0203) & & (0.0226) & \\
12 | plume\_dist\_factor $=$ 100-250 & 1.012$^{***}$ & & 0.2041$^{***}$ & \\
13 | & (0.0150) & & (0.0140) & \\
14 | plume\_dist\_factor $=$ 250-500 & 0.7020$^{***}$ & & 0.1632$^{***}$ & \\
15 | & (0.0127) & & (0.0104) & \\
16 | plume\_dist\_factor $=$ 500-750 & 0.3203$^{***}$ & & 0.1121$^{***}$ & \\
17 | & (0.0117) & & (0.0094) & \\
18 | plume\_dist\_factor $=$ 750-1000 & 0.0608$^{***}$ & & 0.0619$^{***}$ & \\
19 | & (0.0105) & & (0.0091) & \\
20 | smoke\_dist\_factor $=$ overhead & & 4.934$^{***}$ & & 3.399$^{***}$\\
21 | & & (0.0482) & & (0.0385)\\
22 | smoke\_dist\_factor $=$ 0-100 & & 1.947$^{***}$ & & 0.8033$^{***}$\\
23 | & & (0.0202) & & (0.0211)\\
24 | smoke\_dist\_factor $=$ 100-250 & & 1.449$^{***}$ & & 0.5029$^{***}$\\
25 | & & (0.0160) & & (0.0141)\\
26 | smoke\_dist\_factor $=$ 250-500 & & 0.9806$^{***}$ & & 0.3568$^{***}$\\
27 | & & (0.0140) & & (0.0113)\\
28 | smoke\_dist\_factor $=$ 500-750 & & 0.4757$^{***}$ & & 0.2037$^{***}$\\
29 | & & (0.0127) & & (0.0103)\\
30 | smoke\_dist\_factor $=$ 750-1000 & & 0.1570$^{***}$ & & 0.1093$^{***}$\\
31 | & & (0.0113) & & (0.0100)\\
32 | \midrule \emph{Fixed-effects} & & & & \\
33 | id-month & Yes & Yes & Yes & Yes\\
34 | state-year-month & Yes & Yes & Yes & Yes\\
35 | \midrule \emph{Fit statistics} & & & & \\
36 | Observations & 4,442,067 & 4,442,067 & 4,442,067 & 4,442,067\\
37 | R$^2$ & 0.32277 & 0.32600 & 0.39479 & 0.39590\\
38 | Within R$^2$ & 0.02160 & 0.02626 & 0.12564 & 0.12724\\
39 | \midrule\midrule\multicolumn{5}{l}{\emph{Clustered (id-month) standard-errors in parentheses}}\\
40 | \multicolumn{5}{l}{\emph{Signif. Codes: ***: 0.01, **: 0.05, *: 0.1}}\\
41 | \end{tabular}
42 |
43 |
44 |
--------------------------------------------------------------------------------