├── test_dataset.npy ├── config_jsons ├── test.json ├── default.json └── example.json ├── dataset.json ├── LICENSE ├── rnn_model.py ├── train.py ├── coherence_timeseries.py ├── README.md └── generate_dpm.py /test_dataset.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olliestephenson/dpm-rnn-public/HEAD/test_dataset.npy -------------------------------------------------------------------------------- /config_jsons/test.json: -------------------------------------------------------------------------------- 1 | { 2 | "train_dataset": "test_dataset", 3 | "deploy_dataset": "test_dataset" 4 | } 5 | -------------------------------------------------------------------------------- /config_jsons/default.json: -------------------------------------------------------------------------------- 1 | { 2 | "train_dataset": "test_dataset", 3 | "deploy_dataset": "test_dataset" 4 | } 5 | -------------------------------------------------------------------------------- /dataset.json: -------------------------------------------------------------------------------- 1 | { 2 | "default": { 3 | "path": "", 4 | "shape": "", 5 | "length": "", 6 | "event_index": "" 7 | }, 8 | "test_dataset": { 9 | "path": "./test_dataset.npy", 10 | "shape": [100, 100], 11 | "length": 10, 12 | "event_index": 9 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /config_jsons/example.json: -------------------------------------------------------------------------------- 1 | { 2 | "train_dataset": "test_dataset", 3 | "deploy_dataset": "test_dataset", 4 | "model_hyperparameters": { 5 | "rnn_dim": 256, 6 | "rnn_cell" : "gru", 7 | "h_dim": 128, 8 | "num_layers": 1 9 | }, 10 | "training_hyperparameters": { 11 | "batch_size": 256, 12 | "num_epochs": 20, 13 | "learning_rate": 0.0005, 14 | "seed": 128 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Oliver Stephenson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /rnn_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class RNN(nn.Module): 7 | 8 | def __init__(self, model_config): 9 | super().__init__() 10 | 11 | self.config = model_config 12 | self._construct_model() 13 | 14 | def _construct_model(self): 15 | data_dim = self.config['data_dim'] # dimension of data at each timestep (e.g. coherence is scalar, so data_dim=1) 16 | h_dim = self.config['h_dim'] # size of hidden layers in fully-connected network 17 | rnn_dim = self.config['rnn_dim'] # size of hidden state for RNN 18 | rnn_cell = self.config['rnn_cell'] # type of RNN (currently only GRU is implemented) 19 | num_layers = self.config['num_layers'] # number of RNN layers (default is 1, more may be better but is slower to train) 20 | 21 | # Initialize decoder network that maps hidden state to parameters of Gaussian distribution 22 | # Currently fixed to be 3-layer network, but can be customizable in the future. 23 | self.dec_fc = nn.Sequential( 24 | nn.Linear(rnn_dim, h_dim), 25 | nn.ReLU(), 26 | nn.Linear(h_dim, h_dim), 27 | nn.ReLU(), 28 | nn.Linear(h_dim, h_dim), 29 | nn.ReLU()) 30 | self.dec_mean = nn.Linear(h_dim, data_dim) 31 | self.dec_logvar = nn.Linear(h_dim, data_dim) # we learn the log-variance instead of standard deviation (numerical stability) 32 | 33 | # Initialize RNN cell 34 | if rnn_cell == 'gru': 35 | self.rnn = nn.GRU(data_dim, rnn_dim, num_layers) 36 | elif rnn_cell == 'lstm': 37 | raise NotImplementedError 38 | else: 39 | raise NotImplementedError 40 | 41 | def num_parameters(self): 42 | """Count the number of trainable parameters in the model.""" 43 | if not hasattr(self, '_num_parameters'): 44 | self._num_parameters = 0 45 | for p in self.parameters(): 46 | count = 1 47 | for s in p.size(): 48 | count *= s 49 | self._num_parameters += count 50 | 51 | return self._num_parameters 52 | 53 | def forward(self, batch, generate_dpm=False): 54 | """ 55 | Pass a batch of sequences through RNN and compute means and log-variances. 56 | Assumes batch has shape (batch_size, seq_len, data_dim) 57 | If generate_dpm is true the function returns all forecasts, including coseismic 58 | If tgenerate_dpm is false the function returns just preseismic forecasts 59 | """ 60 | 61 | batch = batch.transpose(0,1) # PyTorch method for swapaxes 62 | seq_len, batch_size, _ = batch.size() 63 | 64 | # Initialize initial hidden state h_0 to be all 0s 65 | h_0 = torch.zeros(self.config['num_layers'], batch_size, self.config['rnn_dim']).to(batch.device) 66 | 67 | # Run batch sequences through RNN to compute all hidden states 68 | # By default, PyTorch assumes first dimension of batch is time, which is why we need to tranpose batch above 69 | # hiddens has shape (seq_len, batch_size, rnn_dim), which corresponds to h_1 to h_T 70 | self.rnn.flatten_parameters() 71 | hiddens, _ = self.rnn(batch, h_0) 72 | 73 | # h_(T-1) is used to predict x_T which is the final preseismic coherence measurement 74 | # We want to use h_(t-1) to predict x_t, so we want hidden states h_0 to h_(T-1) when traning on preseismic data 75 | # Only want h_0 that corresponds to the output hidden state when using a stacked RNN 76 | if generate_dpm==False: 77 | hiddens = torch.cat([h_0[-1,:,:].unsqueeze(dim=0), hiddens], dim=0)[:-1] # move h_0 to front, truncate h_T 78 | elif generate_dpm==True: 79 | hiddens = torch.cat([h_0[-1,:,:].unsqueeze(dim=0), hiddens], dim=0) # move h_0 to front, keep h_T 80 | seq_len = seq_len + 1 # Increase sequence length to take account of additional element 81 | 82 | # We use decoder to map each hidden state to parameters of a Gaussian distribution 83 | # To forgo looping, we will reshape hiddens into one giant batch, then reshape back after 84 | hiddens = hiddens.view(seq_len*batch_size, -1) 85 | dec_h = self.dec_fc(hiddens) # passing through fully connected layers 86 | dec_mean = self.dec_mean(dec_h) # means of Gaussian 87 | dec_logvar = self.dec_logvar(dec_h) # log-variances of Gaussian 88 | 89 | # Reshape the means and log-variances from (seq_len*batch_size, data_dim) back to (seq_len, batch_size, data_dim) 90 | dec_mean = dec_mean.view(seq_len, batch_size, -1) 91 | dec_logvar = dec_logvar.view(seq_len, batch_size, -1) 92 | 93 | # Swap back to (batch_size, seq_len, data_dim) to match original input batch shape 94 | dec_mean = dec_mean.transpose(0,1) 95 | dec_logvar = dec_logvar.transpose(0,1) 96 | 97 | return dec_mean, dec_logvar 98 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import random 4 | import time 5 | import torch 6 | from torch.utils.data import DataLoader 7 | 8 | 9 | def nll_gaussian(mean, logvar, value): 10 | """Compute negative log-likelihood of Gaussian.""" 11 | assert mean.size() == logvar.size() == value.size() 12 | pi = torch.FloatTensor([np.pi]).to(value.device) 13 | nll_element = (value - mean).pow(2) / torch.exp(logvar) + logvar + torch.log(2*pi) 14 | return torch.sum(0.5*nll_element) 15 | 16 | 17 | def run_epoch(dataloader, model, optimizer, device, train=True): 18 | """Perform one epoch of training by looping through the dataset once.""" 19 | 20 | # Setting models and datasets into train/test mode 21 | if train: 22 | model = model.train() 23 | dataloader.dataset.train() 24 | else: 25 | model = model.eval() 26 | dataloader.dataset.test() 27 | 28 | nll_total = 0.0 29 | 30 | for batch_idx, (batch,_) in enumerate(dataloader): 31 | assert isinstance(batch, torch.Tensor) 32 | batch = batch.to(device) # batch is preseismic timeseries 33 | 34 | # Compute loss (negative log-likelihood) 35 | pred_means, pred_logvars = model(batch,generate_dpm=False) # this calls model.forward() 36 | nll_batch = nll_gaussian(pred_means, pred_logvars, batch) # compute negative log-likelihood (NLL) of batch under predicted Gaussians 37 | 38 | if train: 39 | optimizer.zero_grad() 40 | nll_batch.backward() # compute gradients 41 | torch.nn.utils.clip_grad_norm_(model.parameters(), 10) # clips norm of gradients to 10 42 | optimizer.step() # one step of gradient descent 43 | 44 | nll_total += nll_batch.item() # .item() remove gradient information, which is more memory efficient 45 | 46 | nll_average = nll_total / len(dataloader.dataset) # average NLL per sequence 47 | print('{}\t| nll: {:.6f}'.format('TRAIN' if train else 'TEST', nll_average)) 48 | 49 | return nll_average 50 | 51 | 52 | def train_model(train_config, model, dataset, device, save_dir): 53 | assert 'batch_size' in train_config and isinstance(train_config['batch_size'], int) and train_config['batch_size'] > 0 54 | assert 'num_epochs' in train_config and isinstance(train_config['num_epochs'], int) and train_config['num_epochs'] > 0 55 | assert 'learning_rate' in train_config and train_config['learning_rate'] > 0.0 56 | 57 | 58 | seed = train_config['seed'] 59 | torch.manual_seed(seed) 60 | np.random.seed(seed) 61 | random.seed(seed) 62 | torch.cuda.manual_seed_all(seed) 63 | torch.cuda.manual_seed(seed) 64 | 65 | # torch.backends.cudnn.deterministic = True 66 | # torch.backends.cudnn.benchmark = False 67 | # torch.set_deterministic(True) 68 | # torch.backends.cudnn.enabled = False 69 | 70 | # Initialize dataloaders 71 | # See documentation at https://pytorch.org/docs/stable/data.html 72 | dataloader = DataLoader(dataset, batch_size=train_config['batch_size'], shuffle=True,worker_init_fn=np.random.seed(seed),num_workers=0) # set batch_size here 73 | 74 | # Initialize optimizer (default using ADAM optimizer) 75 | optimizer = torch.optim.Adam(model.parameters(), lr=train_config['learning_rate']) # set learning_rate here 76 | 77 | # Initialize bookkeeping variables 78 | log = [] 79 | best_test_epoch = 0 80 | best_test_loss = float('inf') 81 | start_time = time.time() 82 | 83 | for epoch in range(train_config['num_epochs']): 84 | print('--- EPOCH [{}/{}] ---'.format(epoch+1, train_config['num_epochs'])) 85 | 86 | epoch_start_time = time.time() 87 | train_loss = run_epoch(dataloader, model, optimizer, device, train=True) 88 | test_loss = run_epoch(dataloader, model, optimizer, device, train=False) 89 | epoch_time = time.time() - epoch_start_time 90 | print('{:.3f} seconds'.format(epoch_time)) 91 | 92 | log.append({ 93 | 'epoch' : epoch+1, 94 | 'train_loss' : train_loss, 95 | 'test_loss' : test_loss, 96 | 'time' : epoch_time 97 | }) 98 | 99 | # Save model with best test loss 100 | if test_loss < best_test_loss: 101 | best_test_loss = test_loss 102 | best_test_epoch = epoch+1 103 | torch.save(model.state_dict(), os.path.join(save_dir, 'best_model.pth')) 104 | print('BEST test loss') 105 | 106 | # Save final model 107 | torch.save(model.state_dict(), os.path.join(save_dir, 'final_model.pth')) 108 | print('--- DONE training model ---') 109 | 110 | # Compute summary statistics 111 | summary = { 112 | 'total_time': round(time.time()-start_time, 3), 113 | 'average_epoch_time': round((time.time()-start_time)/train_config['num_epochs'], 3), 114 | 'best_test_loss': best_test_loss, 115 | 'best_test_epoch': best_test_epoch, 116 | 'num_trainable_params': model.num_parameters() 117 | } 118 | 119 | return train_config, summary, log 120 | -------------------------------------------------------------------------------- /coherence_timeseries.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | from torch.utils.data import Dataset 5 | 6 | 7 | from enum import Enum 8 | 9 | class Mode(Enum): 10 | DEPLOY = 0 11 | TRAIN = 1 12 | TEST = 2 13 | 14 | 15 | class Coherence_Timeseries(Dataset): 16 | 17 | data_dim = 1 # coherence values are scalar (dimension = 1) 18 | mode = Mode.DEPLOY # default mode 19 | 20 | def __init__(self, data_config): 21 | 22 | # Check fields in data_config 23 | assert 'path' in data_config and isinstance(data_config['path'], str) 24 | assert 'shape' in data_config and isinstance(data_config['shape'], list) and len(data_config['shape']) == 2 25 | assert 'length' in data_config and isinstance(data_config['length'], int) 26 | assert 'event_index' in data_config and isinstance(data_config['event_index'], int) 27 | 28 | # Load data 29 | assert data_config['path'][-4:] == '.npy' 30 | self.data = np.load(data_config['path']) 31 | assert isinstance(self.data, np.ndarray) 32 | 33 | # Check dataset.shape 34 | assert len(self.data.shape) == 3 35 | assert self.data.shape == (data_config['shape'][0], data_config['shape'][1], data_config['length']) 36 | self.dataset_shape = (self.data.shape[0], self.data.shape[1]) 37 | self.sequence_length = self.data.shape[2] 38 | 39 | # Check event_index 40 | assert 0 <= data_config['event_index'] < self.sequence_length 41 | self.event_index = data_config['event_index'] 42 | 43 | # Flatten data 44 | self.data = np.reshape(self.data, (-1, self.data.shape[-1])) 45 | self.data = np.expand_dims(self.data, axis=2) # last dimension 1 since coherence values are scalars 46 | 47 | def remove_nans(self): 48 | """Remove sequences with nan values from dataset.""" 49 | nans = np.isnan(self.data) 50 | nan_count = np.sum(np.sum(nans, axis=-1), axis=-1) 51 | self.not_nan_inds = np.where(nan_count == 0)[0] 52 | self.data = self.data[self.not_nan_inds] 53 | 54 | def unbound(self,transform): 55 | """ 56 | Transform coherence values into unbounded range with inverse sigmoid. Can transform coherence or squared coherence 57 | Transform on squared coherence closely matches cramer-rao bound on phase variance (see paper) 58 | Can also add further transforms here 59 | """ 60 | 61 | if transform == 'logit_squared': 62 | # Convert to higher precision to avoid divide by zero error in log 63 | # Don't seem to need this with logit transform 64 | self.data = np.float64(self.data) 65 | 66 | # Make sure all values in [0,1] range first 67 | eps = 1e-6 # small epsilon value 68 | self.data[self.data <= 0.0] = eps 69 | self.data[self.data >= 1.0] = 1.0-eps 70 | 71 | # Apply inverse sigmoid 72 | print('Using transform: {}'.format(transform)) 73 | if transform == 'logit': 74 | self.data = np.log(self.data/(1-self.data)) 75 | elif transform == 'logit_squared': 76 | self.data = np.log(np.square(self.data)/(1.0-np.square(self.data))) 77 | else: 78 | raise Exception('Data transform not defined') 79 | 80 | def create_test_set(self, train_split=0.8, seed=128): 81 | """ 82 | Create test dataset. 83 | 84 | This is memory efficient and doesn't duplicate self.data 85 | The training set is: self.data[self.shuffle_inds[:self.train_set_size]] 86 | The test set is: self.data[self.shuffle_inds[self.train_set_size:]] 87 | 88 | Args: 89 | train_split: proportion of data to use for training, rest for test. 90 | seed: seed to fix randomness. 91 | """ 92 | 93 | np.random.seed(seed) # fix randomness 94 | self.shuffle_inds = np.random.permutation(len(self.data)) # shuffle a random permutation 95 | self.train_set_size = int(train_split*len(self.data)) # set training set size 96 | 97 | def deploy(self): 98 | self.mode = Mode.DEPLOY 99 | 100 | def train(self): 101 | self.mode = Mode.TRAIN 102 | 103 | def test(self): 104 | self.mode = Mode.TEST 105 | 106 | def __len__(self): 107 | """ 108 | Length of dataset. 109 | Must override this method when extending Dataset object. 110 | """ 111 | if self.mode == Mode.DEPLOY: 112 | return len(self.data) 113 | elif self.mode == Mode.TRAIN: 114 | return self.train_set_size 115 | elif self.mode == Mode.TEST: 116 | return len(self.data)-self.train_set_size 117 | else: 118 | raise NotImplementedError 119 | 120 | def __getitem__(self, index): 121 | """ 122 | For getting data with indices. 123 | Must override this method when extending Dataset object. 124 | 125 | Return: 126 | (preseismic timeseries, coseismic coherence) 127 | """ 128 | if self.mode == Mode.DEPLOY: 129 | batch_preseismic = self.data[index,:self.event_index] 130 | batch_coseismic = self.data[index,self.event_index] 131 | elif self.mode == Mode.TRAIN: 132 | train_index = self.shuffle_inds[index] 133 | batch_preseismic = self.data[train_index,:self.event_index] 134 | batch_coseismic = self.data[train_index,self.event_index] 135 | elif self.mode == Mode.TEST: 136 | test_index = self.shuffle_inds[index+self.train_set_size] 137 | batch_preseismic = self.data[test_index,:self.event_index] 138 | batch_coseismic = self.data[test_index,self.event_index] 139 | else: 140 | raise NotImplementedError 141 | 142 | return torch.tensor(batch_preseismic).float(), torch.tensor(batch_coseismic).float() 143 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dpm-coherence-rnn 2 | 3 | Deep learning code implementing the satellite-based damage mapping method from [Stephenson et al. (2021), IEEE Trasactions on Geoscience and Remote Sensing](https://ieeexplore.ieee.org/document/9467551). 4 | 5 | The artice is also available on [arXiv](https://arxiv.org/abs/2105.11544). 6 | 7 | Contact: oliver.stephenson@caltech.edu 8 | 9 | Written in PyTorch v1.0.1, tested for v1.7.0 10 | 11 | THIS IS RESEARCH CODE PROVIDED TO YOU "AS IS" WITH NO WARRANTIES OF CORRECTNESS. USE AT YOUR OWN RISK. 12 | 13 | ## Notes 14 | 15 | This readme assumes you already have familiarity with [SAR](https://en.wikipedia.org/wiki/Synthetic-aperture_radar), [InSAR](https://en.wikipedia.org/wiki/Interferometric_synthetic-aperture_radar), [Python](https://www.python.org/), and [deep learning](https://en.wikipedia.org/wiki/Deep_learning) using [PyTorch](https://pytorch.org/). All software and data used are open source, but the processing can be involved for people unfamiliar with the details. Please check out our paper for a more detailed presentation of the method. If you're interested in satellite-based damage mapping but any of these terms are unfamiliar to you, please get in touch. 16 | 17 | These scripts are used to create damage proxy maps (DPMs) from sequential InSAR coherence time series using machine learning. The input data are a sequential series of pre-event InSAR coherence images (i.e. if you have SAR acquisitions A,B,C then we want the coherences for A-B and B-C) and one co-event coherence image (i.e. the coherence between the final pre-event SAR acquisition and the first post-event SAR acquisition). Best performance will be obtained when the temporal baseline is constant between acquisitsions. 18 | 19 | This code assumes that you already have a stack of coherence images. These images can be produced using freely available [Sentinel-1 data](https://asf.alaska.edu/data-sets/sar-data-sets/sentinel-1/), which can be processed using the [InSAR Scientific Computing Environment (ISCE)](https://github.com/isce-framework/isce2). The method has not been tested with data from other SAR satellites, but will presumably work similarly assuming there are regular acqusitions before the natural hazard and the satellite spatial baseline is well controlled. 20 | 21 | When creating damage maps you will need to think about your coordinate system. We do all of our processing in 'radar' coordinates, then map the final damage map to geographic coordinates for plotting/analysis. 22 | 23 | This method assumes that your natural hazard occured between two satellite acqusitions, with no anomalous behavior beforehand. Results may be worse for seasonal hazards, or hazards that occured over a longer period of time. We welcome discussions about potential improvements/modifications. Please get in touch! 24 | 25 | 26 | ## Code structure 27 | 28 | ### Python files 29 | 30 | `generate_dpm.py` is the main script that will generate a DPM. You can follow the code starting from here. 31 | 32 | `train.py` contains all the code for training the model (i.e. batching data, computing objective, performing gradient descent, etc.). 33 | 34 | `coherence_timeseries.py` contains the Coherence_Timeseries object for manipulating data. Currently expects `*.npy` files, but other formats can be easily added. 35 | 36 | `rnn_model.py` contains the RNN model, implemented with PyTorch. 37 | 38 | `scripts` directory contains simple scripts for exploring the code output. 39 | 40 | ### JSON files 41 | 42 | `config_jsons/` contains configuration JSONs for `generate_dpm.py`: 43 | 1. `train_dataset` (required, str) - coherence dataset used to train model (usually over a large region, around 100 km by 100 km). 44 | 2. `deploy_dataset` (required, str) - coherence dataset used to generate DPM (usually over a smaller area in the same geographic region, e.g. a town or a city). 45 | 3. `model_hyperparamters` (optional, dict) - see `config_jsons/example.json`, default parameters in code. 46 | 4. `training_hyperparameters` (optional, dict) - see `config_jsons/example.json`, default parameters in code. 47 | 5. `transform` (optional, str) - transform applied to map the coherence from [0,1] to an unbounded space before training. Either `logit_squared` (the logit transform of the coherence squared, used in the paper, default) or `logit` (logit transform without squaring the coherence). Other tranforms can easily be added. 48 | 49 | `dataset.json` contains information about available coherence time series datasets. The dataset names are keys in this file and also used for `train_dataset` and `deploy_dataset` fields in config files. 50 | 1. `path` (required, str) - path to data file. 51 | 2. `shape` (required, list) - shape of data as list with 2 integers, same as returned by numpy.shape(). 52 | 3. `length` (required, int) - length of coherence timeseries (for sequential coherence from N SAR images this will be N-1). 53 | 4. `event_index` (required, int) - index of event in timeseries (using zero indexing). Only data before this will be used in training. Anomaly detection will be performed on the coherence image at the event_index. Data after this image will not be used at all. 54 | 5. `pre_num` (optional, int) - number of pre-event coherence images to use in training. Must be >= 2 and <= event_index. 55 | 56 | ## Usage 57 | 58 | ### General usage 59 | 60 | `python generate_dpm.py -d --config_path --config --save_dir --return_ts ` 61 | 62 | ### Command line variables 63 | 64 | `--config` - Name of the JSON configuration file for this specific run. 65 | 66 | `--config_path` - Path to directory containing the JSON configuration files. 67 | 68 | `--dataset_json` - JSON file containing details on all training and deployment datasets 69 | 70 | `--save_dir` - Directory in which to save outputs. If there is already a trained model in the relevant sub-directory, the code will just deploy the model on the data. 71 | 72 | `--return_ts` - If true, code returns the mean and standard deviation of the forecast for every timestep, rather than just the final damage proxy map. 73 | 74 | `--best_model` - Path to PyTorch `state_dict` saved from previous training (optional). Takes precedence over any previously saved models in `save_dir`. 75 | 76 | `-d` - Device id. Controls the GPU used for training. Set to -1 to train using a CPU. Defaults to CPU if GPU is not available. `print(torch.has_cuda)` should return `True` for GPU training. 77 | 78 | 79 | ### Example 80 | We provide some randomly generated data (test_dataset.npy) on which to test the code. As the data is randomly generated the results will not be physically meaningful. You can test the code by running: 81 | 82 | `python generate_dpm.py -d 0 --config_path config_jsons --config test --save_dir saved --return_ts True` 83 | 84 | This will train a model with configuration in `config_jsons/test.json` on GPU device 0. Results will be saved in `saved/test/` and will include: 85 | 1. `best_model.pth` - the model that achieved the best test loss during training. 86 | 2. `final_model.pth` - the final training model. 87 | 3. `log.json` - log file of training. 88 | 4. `summary.json` - various summary statistics computed during training. 89 | 5. `config.json` - duplicate of the config file (for reproducability). 90 | 6. `coseismic_dpm/` - folder that contains the mean and standard deviation and z-scores of coseismic (or co-event, for non-earthquake nautral hazards) coherence under distribution predicted by the model. All outputs are in the transformed space (i.e. the coherence has been mapped to an unbounded space). 91 | 7. `full_ts/` - optional folder that contains the full time series of the coherence, forecast means, forcast standard deviations and calculated z-scores (controlled by `return_ts` boolean). All outputs are in the transformed space. 92 | 93 | ## Hyperparameter search 94 | 95 | Default hyperparameters included in the code may not always be the best. We have not yet systematically explored the optimal hyperparameters. General guidelines for tuning hyperparameters are below: 96 | 97 | ### Model parameters 98 | 99 | 1. `rnn_dim`: [64, 128, 256, 512] should often be sufficient. 100 | 2. `num_layers`: set to 1 in our experiments, but can increase if large `rnn_dim` are not working well (expect significant increase in training time). 101 | 3. `h_dim`: usually no larger than `rnn_dim`. 102 | 4. `rnn_cell`: currently only gated recurrent unit (GRU) is implemented. 103 | 104 | In general, you want as small a model as possible (which will be less prone to overfitting) without affecting performance. Note that we have not performed a systematic hyperparameter search for our work. 105 | 106 | ### Training parameters 107 | 108 | 1. `learning_rate`: usually the most easily tune-able hyperparameter, but also most dataset-dependent. Recommended range is [0.01, 0.00001]. Smaller is better, but would also require more `num_epochs` to converge. 109 | 2. `batch_size`: smaller is better, but larger can decrease training time. No larger than 512 is recommended. 110 | 3. `num_epochs`: should increase as `batch_size` increases, or as `learning_rate` decreases. 111 | 4. `seed`: seed for the random number generators. 112 | 113 | In general, you want small `learning_rate` and `batch_size` as long as it doesn't take too many `num_epochs` to converge. 114 | 115 | ### Installation 116 | 117 | In order to run this code, you need to install PyTorch and several dependencies. We recommend using a package management system such as [Conda](https://docs.conda.io/en/latest/) to install these packages into a dedicated environment. The code itself doesn't require installation—you can just [clone](https://www.atlassian.com/git/tutorials/setting-up-a-repository/git-clone) the repository and run it on your machine once the dependencies are installed. 118 | 119 | If you have GPUs available and want to make use of them during training (which is substantially faster), you will need to install the relevant version of the cudatoolkit package, or potentially build from source. This will depend on your machine and CUDA version. See [here](https://pytorch.org/get-started/locally/) for more information. 120 | 121 | To check if you have access to GPU training, after installation open a python terminal and do `import torch; torch.cuda.is_available()`. This should return `True`. 122 | 123 | 124 | ## Credit 125 | 126 | Citation: [Stephenson et al. (2021), IEEE TGRS](https://ieeexplore.ieee.org/document/9467551), and see the version on [arXiv](https://arxiv.org/abs/2105.11544). 127 | 128 | Code written by Eric Zhan and Oliver Stephenson. The method was developed by the authors of Stephenson et al. (2021). 129 | 130 | Contact: oliver.stephenson@caltech.edu 131 | 132 | ## Dislaimer 133 | This software is distributed under an MIT License. 134 | 135 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 136 | 137 | 138 | -------------------------------------------------------------------------------- /generate_dpm.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import numpy as np 4 | import os 5 | import torch 6 | from torch.utils.data import DataLoader 7 | import random 8 | 9 | from coherence_timeseries import Coherence_Timeseries 10 | from rnn_model import RNN 11 | from train import train_model 12 | 13 | 14 | DEFAULT_MODEL_PARAMS = { 15 | "rnn_dim": 256, 16 | "rnn_cell" : "gru", 17 | "h_dim": 128, 18 | "num_layers": 1 19 | } 20 | 21 | 22 | DEFAULT_TRAINING_PARAMS = { 23 | "batch_size": 256, 24 | "num_epochs": 20, 25 | "learning_rate": 0.0005, 26 | "seed": 128 27 | } 28 | 29 | 30 | def load_json(json_path): 31 | if json_path[-5:] != '.json': 32 | json_path += '.json' 33 | assert os.path.isfile(json_path) 34 | 35 | with open(json_path, 'r') as f: 36 | json_file = json.load(f) 37 | 38 | return json_file 39 | 40 | def get_device(device_id): 41 | if torch.cuda.is_available() and args.device_id >= 0: 42 | print('Using GPU') 43 | assert device_id < torch.cuda.device_count() 44 | return torch.device('cuda', device_id) 45 | else: 46 | print('Using CPU') 47 | return torch.device('cpu') 48 | 49 | 50 | def compute_scores(model, dataset, device, return_ts=False): 51 | ''' 52 | Compute DPM scores with a given dataset and model 53 | Can chose to return the forecast means and standard deviations for every time step using return_ts bool 54 | ''' 55 | 56 | print('Computing coseismic scores ...') 57 | # Initialize dataloader 58 | # Each batch is a column in space and all steps in time 59 | dataloader = DataLoader(deploy_dataset, batch_size=deploy_dataset.dataset_shape[1], shuffle=False) 60 | dataloader.dataset.deploy() 61 | 62 | dataset_shape = deploy_dataset.dataset_shape 63 | # DPM np.arrays to be saved 64 | dpm_means = np.zeros(dataset_shape, dtype=np.half) 65 | dpm_stds = np.zeros(dataset_shape, dtype=np.half) 66 | dpm_scores = np.zeros(dataset_shape, dtype=np.half) 67 | 68 | if return_ts: 69 | # Full prediction arrays - can be large 70 | print('Outputting a prediction for every time step. This can be a lot of data for large coherence timeseries.') 71 | # Just save pre- and co-event data/predicitions 72 | sequence_length = deploy_dataset.event_index + 1 73 | all_means = np.zeros((*dataset_shape,sequence_length), dtype=np.half) 74 | all_stds = np.zeros((*dataset_shape,sequence_length), dtype=np.half) 75 | all_scores = np.zeros((*dataset_shape,sequence_length), dtype=np.half) 76 | # Duplicating the coherence for convenience in plotting 77 | all_coherence_pred = np.zeros((*dataset_shape,sequence_length), dtype=np.half) 78 | 79 | # Iterating through deploy dataset in order (dataloader has shuffle=False) 80 | for batch_idx, (batch_preseismic, batch_coseismic) in enumerate(dataloader): 81 | assert isinstance(batch_coseismic, torch.Tensor) 82 | assert isinstance(batch_preseismic, torch.Tensor) 83 | assert batch_preseismic.size(1) == deploy_dataset.event_index 84 | 85 | batch_preseismic = batch_preseismic.to(device) # Preseismic coherence values 86 | batch_coseismic = batch_coseismic.to(device) # Coseismic coherence values 87 | 88 | # Compute without keeping track of gradient information (uses less memory) 89 | with torch.no_grad(): 90 | pred_means, pred_logvars = model(batch_preseismic,generate_dpm=True) # this calls model.forward() 91 | 92 | # Extract the full time series of forecasts 93 | if return_ts: 94 | pred_stds = torch.sqrt(torch.exp(pred_logvars)) 95 | # Compute z-score for all timesteps 96 | # Construct the coherence time series that we're actually trying to predict 97 | batch_pred = torch.cat((batch_preseismic,batch_coseismic.unsqueeze(-1)),dim=1) 98 | score_all = (pred_means-batch_pred) / pred_stds 99 | 100 | # Extract predicted coseismic mean and standard deviation (std) 101 | mean_coseismic = pred_means[:,-1] 102 | logvar_coseismic = pred_logvars[:,-1] 103 | std_coseismic = torch.sqrt(torch.exp(logvar_coseismic)) 104 | 105 | # Compute coseismic z-score 106 | score_coseismic = (mean_coseismic-batch_coseismic) / std_coseismic 107 | 108 | # Store the DPM values 109 | dpm_means[batch_idx] = mean_coseismic.squeeze().cpu().numpy() 110 | dpm_stds[batch_idx] = std_coseismic.squeeze().cpu().numpy() 111 | dpm_scores[batch_idx] = score_coseismic.squeeze().cpu().numpy() 112 | 113 | if return_ts: 114 | # Store all values 115 | all_stds[batch_idx] = pred_stds.squeeze().cpu().numpy() 116 | all_means[batch_idx] = pred_means.squeeze().cpu().numpy() 117 | all_scores[batch_idx] = score_all.squeeze().cpu().numpy() 118 | all_coherence_pred[batch_idx] = batch_pred.squeeze().cpu().numpy() 119 | 120 | result_dic = dict() 121 | result_dic['dpm_means'] = dpm_means # mean for co-event forecast 122 | result_dic['dpm_stds'] = dpm_stds # std. dev. for co-event forecast 123 | result_dic['dpm_scores'] = dpm_scores # z-score for co-event damage proxy map 124 | 125 | # Output values for every timestep 126 | if return_ts: 127 | result_dic['all_means'] = all_means # mean of forecast 128 | result_dic['all_stds'] = all_stds # std. dev. of forecast 129 | result_dic['all_scores'] = all_scores # z score 130 | result_dic['all_coherence_pred'] = all_coherence_pred # coherence values that we're trying to predict 131 | # This duplicates the coherence for convenience. Can remove if coherence datasets are large 132 | 133 | return result_dic 134 | 135 | if __name__ == '__main__': 136 | parser = argparse.ArgumentParser() 137 | parser.add_argument('--config', type=str, 138 | required=True, 139 | help='config JSON file') 140 | parser.add_argument('--config_path', type=str, 141 | required=False, default='config_jsons', 142 | help='path to config json') 143 | parser.add_argument('--dataset_json', type=str, 144 | required=False, default='dataset.json', 145 | help='location of dataset json') 146 | parser.add_argument('--save_dir', type=str, 147 | required=False, default='saved', 148 | help='save directory') 149 | parser.add_argument('--return_ts',type=bool, 150 | required=False, default=False, 151 | help='If true, saves mean, std and z-score for every timestep. Can rerun after training to output full ts with best model') 152 | parser.add_argument('--best_model', default=None, type=str, 153 | required=False, 154 | help='path to PyTorch state_dict from previous training to deploy on the data (skips training, optional)') 155 | parser.add_argument('-d', '--device_id', type=int, 156 | required=False, default=-1, 157 | help='device to use (cpu or gpu)') 158 | args = parser.parse_args() 159 | 160 | # Load config JSON and check for required fields 161 | config = load_json(os.path.join(os.getcwd(), args.config_path, args.config)) 162 | assert 'train_dataset' in config 163 | assert 'deploy_dataset' in config 164 | if 'transform' not in config: 165 | print('Defaulting to logit squared transform on coherence') 166 | config['transform'] = 'logit_squared' 167 | assert config['transform'] in ['logit','logit_squared'] 168 | 169 | # Create save directory 170 | trial_name = args.config[:-5] if args.config[-5:] == '.json' else args.config 171 | save_dir = os.path.join(os.getcwd(), args.save_dir, trial_name) 172 | coseismic_dpm_dir = os.path.join(save_dir,'coseismic_dpm') 173 | full_ts_dir = os.path.join(save_dir,'full_ts') 174 | 175 | # Check if we have output directories, create them if not 176 | if not os.path.exists(save_dir): 177 | os.makedirs(os.path.join(save_dir)) 178 | if not os.path.exists(coseismic_dpm_dir): 179 | os.makedirs(coseismic_dpm_dir) 180 | if args.return_ts: 181 | if not os.path.exists(full_ts_dir): 182 | os.makedirs(full_ts_dir) 183 | 184 | print('Save directory:\t {}'.format(save_dir)) 185 | 186 | # Get device (i.e. GPU or CPU) 187 | device = get_device(args.device_id) 188 | 189 | # Load datasets 190 | dataset_json = load_json(os.path.join(os.getcwd(), args.dataset_json)) 191 | train_dataset = Coherence_Timeseries(dataset_json[config['train_dataset']]) # for training RNN model 192 | deploy_dataset = Coherence_Timeseries(dataset_json[config['deploy_dataset']]) # for generating DPM 193 | 194 | training_params = config['training_hyperparameters'] if 'training_hyperparameters' in config else DEFAULT_TRAINING_PARAMS 195 | 196 | # Sample and fix a random seed if not set in train config 197 | # See https://pytorch.org/docs/stable/notes/randomness.html 198 | if 'seed' not in training_params: 199 | print('Seeding randomly') 200 | training_params['seed'] = random.randint(0, 9999) 201 | else: 202 | print('Using supplied seed') 203 | seed = training_params['seed'] 204 | 205 | # Preprocess training dataset 206 | train_dataset.remove_nans() # remove nans from data 207 | train_dataset.unbound(config['transform']) # apply transform to coherence values 208 | train_dataset.create_test_set(seed=seed) # create test set 209 | 210 | # Load model 211 | model_params = config['model_hyperparameters'] if 'model_hyperparameters' in config else DEFAULT_MODEL_PARAMS 212 | model_params['data_dim'] = train_dataset.data_dim 213 | model = RNN(model_params).to(device) # move model onto device 214 | 215 | # Check if best_model.pth already exists from previous training, or if we're passing a model via the command line 216 | 217 | # If we're passing a previous model from the command line, use that 218 | if args.best_model is not None: 219 | best_model_path = args.best_model 220 | print("Loading a pre-existing model from {}".format(args.best_model)) 221 | 222 | # If we can't find a model from the command line, look for one saved in save_dir 223 | elif os.path.isfile(os.path.join(save_dir,'best_model.pth')): 224 | best_model_path = os.path.join(save_dir,'best_model.pth') 225 | print('Found a best model from previous training: {}'.format(best_model_path)) 226 | 227 | # If we don't have a model yet, we'll have to train one 228 | elif not os.path.isfile(os.path.join(save_dir, 'best_model.pth')): 229 | print("No model supplied or found, starting training") 230 | # Train model 231 | # training_params = config['training_hyperparameters'] if 'training_hyperparameters' in config else DEFAULT_TRAINING_PARAMS 232 | training_params, summary, log = train_model(training_params, model, train_dataset, device, save_dir) 233 | 234 | # Save summary file 235 | with open(os.path.join(save_dir, 'summary.json'), 'w') as f: 236 | json.dump(summary, f, indent=4) 237 | 238 | # Save log file 239 | with open(os.path.join(save_dir, 'log.json'), 'w') as f: 240 | json.dump(log, f, indent=4) 241 | 242 | # Save config JSON (for reproducability) 243 | config['model_hyperparameters'] = model_params 244 | config['training_hyperparameters'] = training_params 245 | with open(os.path.join(save_dir, 'config.json'), 'w') as f: 246 | json.dump(config, f, indent=4) 247 | best_model_path = os.path.join(save_dir,'best_model.pth') 248 | 249 | else: 250 | # Shouldn't end up here! 251 | raise Exception('Problem loading or training a model') 252 | 253 | # Load best model for computing dpm 254 | # Lambda function to deal with GPU vs CPU storage 255 | state_dict = torch.load(best_model_path, map_location=lambda storage, loc: storage) 256 | model.load_state_dict(state_dict) 257 | model = model.eval() 258 | 259 | # Prepare deploy dataset 260 | # deploy_dataset.remove_nans() # Removing nans causes problems with the shape of the output data 261 | # Not necessary for forecasting, although should mask them out if deploying in response scenario 262 | deploy_dataset.unbound(config['transform']) 263 | 264 | dpm_output_dic = compute_scores(model, deploy_dataset, device, args.return_ts) 265 | 266 | dpm_means = dpm_output_dic['dpm_means'] # mean for co-event forecast 267 | dpm_stds = dpm_output_dic['dpm_stds'] # std. dev. for co-event forecast 268 | dpm_scores = dpm_output_dic['dpm_scores'] # z-score for co-event damage proxy map 269 | 270 | # Output values for every timestep 271 | if args.return_ts: 272 | all_means = dpm_output_dic['all_means'] # mean of forecast 273 | all_stds = dpm_output_dic['all_stds'] # std. dev. of forecast 274 | all_scores = dpm_output_dic['all_scores'] # z score 275 | all_coherence_pred = dpm_output_dic['all_coherence_pred'] # coherence values that we're trying to predict 276 | 277 | # full_ts.npz duplicates the coherence time series 278 | np.savez(os.path.join(full_ts_dir,'full_ts.npz'),pred_means=all_means,pred_stds=all_stds,z_scores=all_scores,coherence=all_coherence_pred) 279 | 280 | # Save DPMS 281 | np.save(os.path.join(coseismic_dpm_dir, 'means.npy'), dpm_means) 282 | np.save(os.path.join(coseismic_dpm_dir, 'stds.npy'), dpm_stds) 283 | np.save(os.path.join(coseismic_dpm_dir, 'scores.npy'), dpm_scores) 284 | --------------------------------------------------------------------------------