├── README.md ├── generate-ar-data ├── generate_ar_data_script.py └── generate_data.py ├── lstm-pytorch ├── generate_ar_data_script.py ├── generate_data.py └── lstm-baseline.py ├── record_time.py └── record_time.txt /README.md: -------------------------------------------------------------------------------- 1 | # blog-code-snippets 2 | -------------------------------------------------------------------------------- /generate-ar-data/generate_ar_data_script.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code to generate autoregressive data. 3 | Replicates plots in blog post linked below. 4 | 5 | Blog post: http://www.jessicayung.com/generating-autoregressive-data-for-experiments= 6 | 7 | Author: Jessiac Yung 8 | Sept 2018 9 | """ 10 | 11 | from generate_data import ARData, fixed_ar_coefficients 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | 15 | ################################## 16 | # Generate AR(4) with unstable poles 17 | ################################## 18 | 19 | unstable = False 20 | # Generate coefficients until we have at least one unstable pole 21 | while not unstable: 22 | unstable_coeffs = np.random.random(5) # 5 = 4 prev terms + 1 23 | # Calculate pole magnitudes 24 | root_magnitudes = np.abs(np.roots(unstable_coeffs)) 25 | # check if max pole magnitude > 1 (unstable) 26 | if np.max(root_magnitudes) > 1: 27 | unstable = True 28 | print("Poles: {}".format(np.roots(unstable_coeffs))) 29 | 30 | # plot unstable AR data 31 | unstable_ar = ARData(num_datapoints=50, coeffs=unstable_coeffs, num_prev=len(unstable_coeffs), noise_var=0) 32 | 33 | plt.plot(unstable_ar.y[:10]) 34 | plt.xlabel('t') 35 | plt.ylabel('x_t') 36 | plt.title("Unstable AR data (first 10 dp)") 37 | # plt.savefig('unstable_ar_first10.jpg') 38 | plt.show() 39 | 40 | plt.plot(unstable_ar.y) 41 | plt.xlabel('t') 42 | plt.ylabel('x_t') 43 | plt.title("Unstable AR data") 44 | # plt.savefig('unstable_ar.jpg') 45 | plt.show() 46 | 47 | 48 | ################################## 49 | # Generate AR(5) with stable poles 50 | ################################## 51 | 52 | # Fix coefficients used so can compare plots with and without noise. 53 | c = fixed_ar_coefficients 54 | 55 | # Generate AR(5) with stable poles, no noise 56 | stable_ar = ARData(num_datapoints=50, coeffs=c[5], num_prev=5, noise_var=0) 57 | 58 | plt.plot(stable_ar.y) 59 | plt.xlabel('t') 60 | plt.ylabel('x_t') 61 | plt.title("Stable AR data (no noise)") 62 | # plt.savefig('stable_ar.jpg') 63 | plt.show() 64 | 65 | # Generate AR(5) with stable poles, Gaussian noise 66 | stable_ar = ARData(num_datapoints=50, coeffs=c[5], num_prev=5, noise_var=1) 67 | 68 | plt.plot(stable_ar.y) 69 | plt.xlabel('t') 70 | plt.ylabel('x_t') 71 | plt.title("Stable AR data (noise var = 1)") 72 | # plt.savefig('stable_ar_noisy.jpg') 73 | plt.show() -------------------------------------------------------------------------------- /generate-ar-data/generate_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code to generate autoregressive data. 3 | 4 | Blog post: http://www.jessicayung.com/generating-autoregressive-data-for-experiments= 5 | 6 | Author: Jessiac Yung 7 | Sept 2018 8 | """ 9 | import numpy as np 10 | from sklearn.model_selection import train_test_split 11 | 12 | class TimeSeriesData: 13 | def __init__(self, num_datapoints, test_size=0.2, max_t=20, num_prev=1, 14 | noise_var=1): 15 | """ 16 | Template class for generating time series data. 17 | :param test_size: in (0,1), data to be used in test set as a fraction of all data generated. 18 | """ 19 | self.num_datapoints = num_datapoints 20 | self.test_size = test_size 21 | self.num_prev = num_prev 22 | self.max_t = max_t 23 | self.data = None 24 | self.noise_var = noise_var 25 | self.y = np.zeros(num_datapoints + num_prev*4) # TODO: check this 26 | self.bayes_preds = np.copy(self.y) 27 | 28 | # Generate data and reshape data 29 | self.create_data() 30 | 31 | # Split into training and test sets 32 | self.train_test_split() 33 | 34 | def create_data(self): 35 | self.generate_data() 36 | self.reshape_data() 37 | 38 | def generate_data(self): 39 | """Generates data in self.y, may take as implicit input timesteps self.t. 40 | May also generate Bayes predictions.""" 41 | raise NotImplementedError("Generate data method not implemented.") 42 | 43 | def reshape_data(self): 44 | self.x = np.reshape([self.y[i:i + self.num_prev] for i in range( 45 | self.num_datapoints)], (-1, self.num_prev)) 46 | self.y = np.copy(self.y[self.num_prev:]) 47 | self.bayes_preds = np.copy(self.bayes_preds[self.num_prev:]) 48 | 49 | def train_test_split(self): 50 | test_size = int(len(self.y) * self.test_size) 51 | self.data = [self.X_train, self.X_test, self.y_train, 52 | self.y_test] = \ 53 | self.x[:-test_size], self.x[-test_size:], \ 54 | self.y[:-test_size], self.y[-test_size:] 55 | self.bayes_preds = [self.bayes_train_preds, self.bayes_test_preds] = self.bayes_preds[:-test_size], self.bayes_preds[-test_size:] 56 | 57 | def return_data(self): 58 | return self.data 59 | 60 | def return_train_test(self): 61 | return self.X_train, self.y_train, self.X_test, self.y_test 62 | 63 | class ARData(TimeSeriesData): 64 | """Class to generate autoregressive data.""" 65 | 66 | def __init__(self, *args, coeffs=None, **kwargs): 67 | self.given_coeffs = coeffs 68 | super(ARData, self).__init__(*args, **kwargs) 69 | 70 | if coeffs is not None: 71 | self.num_prev = len(coeffs) - 1 72 | 73 | def generate_data(self): 74 | self.generate_coefficients() 75 | self.generate_initial_points() 76 | 77 | # + 3*self.num_prev because we want to cut first (3*self.num_prev) datapoints later 78 | # so dist is more stationary (else initial num_prev datapoints will stand out as diff dist) 79 | for i in range(self.num_datapoints+3*self.num_prev): 80 | # Generate y value if there was no noise 81 | # (equivalent to Bayes predictions: predictions from oracle that knows true parameters (coefficients)) 82 | self.bayes_preds[i + self.num_prev] = np.dot(self.y[i:self.num_prev+i][::-1], self.coeffs) 83 | # Add noise 84 | self.y[i + self.num_prev] = self.bayes_preds[i + self.num_prev] + self.noise() 85 | 86 | # Cut first 20 points so dist is roughly stationary 87 | self.bayes_preds = self.bayes_preds[3*self.num_prev:] 88 | self.y = self.y[3*self.num_prev:] 89 | 90 | def generate_coefficients(self): 91 | if self.given_coeffs is not None: 92 | self.coeffs = self.given_coeffs 93 | else: 94 | filter_stable = False 95 | # Keep generating coefficients until we come across a set of coefficients 96 | # that correspond to stable poles 97 | while not filter_stable: 98 | true_theta = np.random.random(self.num_prev) - 0.5 99 | coefficients = np.append(1, -true_theta) 100 | # check if magnitude of all poles is less than one 101 | if np.max(np.abs(np.roots(coefficients))) < 1: 102 | filter_stable = True 103 | self.coeffs = true_theta 104 | 105 | def generate_initial_points(self): 106 | # Initial datapoints distributed as N(0,1) 107 | self.y[:self.num_prev] = np.random.randn(self.num_prev) 108 | 109 | def noise(self): 110 | # Noise distributed as N(0, self.noise_var) 111 | return self.noise_var * np.random.randn() 112 | 113 | # A set of coefficients that are stable (to produce replicable plots, experiments) 114 | fixed_ar_coefficients = {2: [ 0.46152873, -0.29890739], 115 | 5: [ 0.02519834, -0.24396899, 0.2785921, 0.14682383, 0.39390468], 116 | 10: [-0.10958935, -0.34564819, 0.3682048, 0.3134046, -0.21553732, 0.34613629, 117 | 0.41916508, 0.0165352, 0.14163503, -0.38844378], 118 | 20: [ 0.1937815, 0.01201026, 0.00464018, -0.21887467, -0.20113385, -0.02322278, 119 | 0.34285319, -0.21069086, 0.06604683, -0.22377364, 0.11714593, -0.07122126, 120 | -0.16346554, 0.03174824, 0.308584, 0.06881604, 0.24840789, -0.32735569, 121 | 0.21939492, 0.3996207 ]} 122 | 123 | """ 124 | Example of using fixed coefficients (consistency across tests of different models) 125 | 126 | data = ARData(100, coeffs=fixed_ar_coefficients[5], num_prev=5) 127 | plt.plot(data.y_train) 128 | """ 129 | -------------------------------------------------------------------------------- /lstm-pytorch/generate_ar_data_script.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code to generate autoregressive data. 3 | Replicates plots in blog post linked below. 4 | 5 | Blog post: http://www.jessicayung.com/generating-autoregressive-data-for-experiments= 6 | 7 | Author: Jessiac Yung 8 | Sept 2018 9 | """ 10 | 11 | from generate_data import ARData, fixed_ar_coefficients 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | 15 | ################################## 16 | # Generate AR(4) with unstable poles 17 | ################################## 18 | 19 | unstable = False 20 | # Generate coefficients until we have at least one unstable pole 21 | while not unstable: 22 | unstable_coeffs = np.random.random(5) # 5 = 4 prev terms + 1 23 | # Calculate pole magnitudes 24 | root_magnitudes = np.abs(np.roots(unstable_coeffs)) 25 | # check if max pole magnitude > 1 (unstable) 26 | if np.max(root_magnitudes) > 1: 27 | unstable = True 28 | print("Poles: {}".format(np.roots(unstable_coeffs))) 29 | 30 | # plot unstable AR data 31 | unstable_ar = ARData(num_datapoints=50, coeffs=unstable_coeffs, num_prev=len(unstable_coeffs), noise_var=0) 32 | 33 | plt.plot(unstable_ar.y[:10]) 34 | plt.xlabel('t') 35 | plt.ylabel('x_t') 36 | plt.title("Unstable AR data (first 10 dp)") 37 | # plt.savefig('unstable_ar_first10.jpg') 38 | plt.show() 39 | 40 | plt.plot(unstable_ar.y) 41 | plt.xlabel('t') 42 | plt.ylabel('x_t') 43 | plt.title("Unstable AR data") 44 | # plt.savefig('unstable_ar.jpg') 45 | plt.show() 46 | 47 | 48 | ################################## 49 | # Generate AR(5) with stable poles 50 | ################################## 51 | 52 | # Fix coefficients used so can compare plots with and without noise. 53 | c = fixed_ar_coefficients 54 | 55 | # Generate AR(5) with stable poles, no noise 56 | stable_ar = ARData(num_datapoints=50, coeffs=c[5], num_prev=5, noise_var=0) 57 | 58 | plt.plot(stable_ar.y) 59 | plt.xlabel('t') 60 | plt.ylabel('x_t') 61 | plt.title("Stable AR data (no noise)") 62 | # plt.savefig('stable_ar.jpg') 63 | plt.show() 64 | 65 | # Generate AR(5) with stable poles, Gaussian noise 66 | stable_ar = ARData(num_datapoints=50, coeffs=c[5], num_prev=5, noise_var=1) 67 | 68 | plt.plot(stable_ar.y) 69 | plt.xlabel('t') 70 | plt.ylabel('x_t') 71 | plt.title("Stable AR data (noise var = 1)") 72 | # plt.savefig('stable_ar_noisy.jpg') 73 | plt.show() -------------------------------------------------------------------------------- /lstm-pytorch/generate_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code to generate autoregressive data. 3 | 4 | Blog post: http://www.jessicayung.com/generating-autoregressive-data-for-experiments= 5 | 6 | Author: Jessiac Yung 7 | Sept 2018 8 | """ 9 | import numpy as np 10 | from sklearn.model_selection import train_test_split 11 | 12 | class TimeSeriesData: 13 | def __init__(self, num_datapoints, test_size=0.2, max_t=20, num_prev=1, 14 | noise_var=1): 15 | """ 16 | Template class for generating time series data. 17 | :param test_size: in (0,1), data to be used in test set as a fraction of all data generated. 18 | """ 19 | self.num_datapoints = num_datapoints 20 | self.test_size = test_size 21 | self.num_prev = num_prev 22 | self.max_t = max_t 23 | self.data = None 24 | self.noise_var = noise_var 25 | self.y = np.zeros(num_datapoints + num_prev*4) # TODO: check this 26 | self.bayes_preds = np.copy(self.y) 27 | 28 | # Generate data and reshape data 29 | self.create_data() 30 | 31 | # Split into training and test sets 32 | self.train_test_split() 33 | 34 | def create_data(self): 35 | self.generate_data() 36 | self.reshape_data() 37 | 38 | def generate_data(self): 39 | """Generates data in self.y, may take as implicit input timesteps self.t. 40 | May also generate Bayes predictions.""" 41 | raise NotImplementedError("Generate data method not implemented.") 42 | 43 | def reshape_data(self): 44 | self.x = np.reshape([self.y[i:i + self.num_prev] for i in range( 45 | self.num_datapoints)], (-1, self.num_prev)) 46 | self.y = np.copy(self.y[self.num_prev:]) 47 | self.bayes_preds = np.copy(self.bayes_preds[self.num_prev:]) 48 | 49 | def train_test_split(self): 50 | test_size = int(len(self.y) * self.test_size) 51 | self.data = [self.X_train, self.X_test, self.y_train, 52 | self.y_test] = \ 53 | self.x[:-test_size], self.x[-test_size:], \ 54 | self.y[:-test_size], self.y[-test_size:] 55 | self.bayes_preds = [self.bayes_train_preds, self.bayes_test_preds] = self.bayes_preds[:-test_size], self.bayes_preds[-test_size:] 56 | 57 | def return_data(self): 58 | return self.data 59 | 60 | def return_train_test(self): 61 | return self.X_train, self.y_train, self.X_test, self.y_test 62 | 63 | class ARData(TimeSeriesData): 64 | """Class to generate autoregressive data.""" 65 | 66 | def __init__(self, *args, coeffs=None, **kwargs): 67 | self.given_coeffs = coeffs 68 | super(ARData, self).__init__(*args, **kwargs) 69 | 70 | if coeffs is not None: 71 | self.num_prev = len(coeffs) - 1 72 | 73 | def generate_data(self): 74 | self.generate_coefficients() 75 | self.generate_initial_points() 76 | 77 | # + 3*self.num_prev because we want to cut first (3*self.num_prev) datapoints later 78 | # so dist is more stationary (else initial num_prev datapoints will stand out as diff dist) 79 | for i in range(self.num_datapoints+3*self.num_prev): 80 | # Generate y value if there was no noise 81 | # (equivalent to Bayes predictions: predictions from oracle that knows true parameters (coefficients)) 82 | self.bayes_preds[i + self.num_prev] = np.dot(self.y[i:self.num_prev+i][::-1], self.coeffs) 83 | # Add noise 84 | self.y[i + self.num_prev] = self.bayes_preds[i + self.num_prev] + self.noise() 85 | 86 | # Cut first 20 points so dist is roughly stationary 87 | self.bayes_preds = self.bayes_preds[3*self.num_prev:] 88 | self.y = self.y[3*self.num_prev:] 89 | 90 | def generate_coefficients(self): 91 | if self.given_coeffs is not None: 92 | self.coeffs = self.given_coeffs 93 | else: 94 | filter_stable = False 95 | # Keep generating coefficients until we come across a set of coefficients 96 | # that correspond to stable poles 97 | while not filter_stable: 98 | true_theta = np.random.random(self.num_prev) - 0.5 99 | coefficients = np.append(1, -true_theta) 100 | # check if magnitude of all poles is less than one 101 | if np.max(np.abs(np.roots(coefficients))) < 1: 102 | filter_stable = True 103 | self.coeffs = true_theta 104 | 105 | def generate_initial_points(self): 106 | # Initial datapoints distributed as N(0,1) 107 | self.y[:self.num_prev] = np.random.randn(self.num_prev) 108 | 109 | def noise(self): 110 | # Noise distributed as N(0, self.noise_var) 111 | return self.noise_var * np.random.randn() 112 | 113 | # A set of coefficients that are stable (to produce replicable plots, experiments) 114 | fixed_ar_coefficients = {2: [ 0.46152873, -0.29890739], 115 | 5: [ 0.02519834, -0.24396899, 0.2785921, 0.14682383, 0.39390468], 116 | 10: [-0.10958935, -0.34564819, 0.3682048, 0.3134046, -0.21553732, 0.34613629, 117 | 0.41916508, 0.0165352, 0.14163503, -0.38844378], 118 | 20: [ 0.1937815, 0.01201026, 0.00464018, -0.21887467, -0.20113385, -0.02322278, 119 | 0.34285319, -0.21069086, 0.06604683, -0.22377364, 0.11714593, -0.07122126, 120 | -0.16346554, 0.03174824, 0.308584, 0.06881604, 0.24840789, -0.32735569, 121 | 0.21939492, 0.3996207 ]} 122 | 123 | """ 124 | Example of using fixed coefficients (consistency across tests of different models) 125 | 126 | data = ARData(100, coeffs=fixed_ar_coefficients[5], num_prev=5) 127 | plt.plot(data.y_train) 128 | """ 129 | -------------------------------------------------------------------------------- /lstm-pytorch/lstm-baseline.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from generate_data import * 4 | import matplotlib.pyplot as plt 5 | 6 | ##################### 7 | # Set parameters 8 | ##################### 9 | 10 | # Data params 11 | noise_var = 0 12 | num_datapoints = 100 13 | test_size = 0.2 14 | num_train = int((1-test_size) * num_datapoints) 15 | 16 | # Network params 17 | input_size = 20 18 | # If `per_element` is True, then LSTM reads in one timestep at a time. 19 | per_element = True 20 | if per_element: 21 | lstm_input_size = 1 22 | else: 23 | lstm_input_size = input_size 24 | # size of hidden layers 25 | h1 = 32 26 | output_dim = 1 27 | num_layers = 2 28 | learning_rate = 1e-3 29 | num_epochs = 500 30 | dtype = torch.float 31 | 32 | ##################### 33 | # Generate data 34 | ##################### 35 | data = ARData(num_datapoints, num_prev=input_size, test_size=test_size, noise_var=noise_var, coeffs=fixed_ar_coefficients[input_size]) 36 | 37 | # make training and test sets in torch 38 | X_train = torch.from_numpy(data.X_train).type(torch.Tensor) 39 | X_test = torch.from_numpy(data.X_test).type(torch.Tensor) 40 | y_train = torch.from_numpy(data.y_train).type(torch.Tensor).view(-1) 41 | y_test = torch.from_numpy(data.y_test).type(torch.Tensor).view(-1) 42 | 43 | X_train = X_train.view([input_size, -1, 1]) 44 | X_test = X_test.view([input_size, -1, 1]) 45 | 46 | ##################### 47 | # Build model 48 | ##################### 49 | 50 | # Here we define our model as a class 51 | class LSTM(nn.Module): 52 | 53 | def __init__(self, input_dim, hidden_dim, batch_size, output_dim=1, 54 | num_layers=2): 55 | super(LSTM, self).__init__() 56 | self.input_dim = input_dim 57 | self.hidden_dim = hidden_dim 58 | self.batch_size = batch_size 59 | self.num_layers = num_layers 60 | 61 | # Define the LSTM layer 62 | self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers) 63 | 64 | # Define the output layer 65 | self.linear = nn.Linear(self.hidden_dim, output_dim) 66 | 67 | def init_hidden(self): 68 | # This is what we'll initialise our hidden state as 69 | return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim), 70 | torch.zeros(self.num_layers, self.batch_size, self.hidden_dim)) 71 | 72 | def forward(self, input): 73 | # Forward pass through LSTM layer 74 | # shape of lstm_out: [input_size, batch_size, hidden_dim] 75 | # shape of self.hidden: (a, b), where a and b both 76 | # have shape (num_layers, batch_size, hidden_dim). 77 | lstm_out, self.hidden = self.lstm(input.view(len(input), self.batch_size, -1)) 78 | 79 | # Only take the output from the final timetep 80 | # Can pass on the entirety of lstm_out to the next layer if it is a seq2seq prediction 81 | y_pred = self.linear(lstm_out[-1].view(self.batch_size, -1)) 82 | return y_pred.view(-1) 83 | 84 | model = LSTM(lstm_input_size, h1, batch_size=num_train, output_dim=output_dim, num_layers=num_layers) 85 | 86 | loss_fn = torch.nn.MSELoss(size_average=False) 87 | 88 | optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate) 89 | 90 | ##################### 91 | # Train model 92 | ##################### 93 | 94 | hist = np.zeros(num_epochs) 95 | 96 | for t in range(num_epochs): 97 | # Initialise hidden state 98 | # Don't do this if you want your LSTM to be stateful 99 | model.hidden = model.init_hidden() 100 | 101 | # Forward pass 102 | y_pred = model(X_train) 103 | 104 | loss = loss_fn(y_pred, y_train) 105 | if t % 100 == 0: 106 | print("Epoch ", t, "MSE: ", loss.item()) 107 | hist[t] = loss.item() 108 | 109 | # Zero out gradient, else they will accumulate between epochs 110 | optimiser.zero_grad() 111 | 112 | # Backward pass 113 | loss.backward() 114 | 115 | # Update parameters 116 | optimiser.step() 117 | 118 | ##################### 119 | # Plot preds and performance 120 | ##################### 121 | 122 | plt.plot(y_pred.detach().numpy(), label="Preds") 123 | plt.plot(y_train.detach().numpy(), label="Data") 124 | plt.legend() 125 | plt.show() 126 | 127 | plt.plot(hist, label="Training loss") 128 | plt.legend() 129 | plt.show() 130 | -------------------------------------------------------------------------------- /record_time.py: -------------------------------------------------------------------------------- 1 | """ 2 | Writes day of the week and time to a file. 3 | 4 | Script written for crontab tutorial. 5 | 6 | Author: Jessica Yung 2016 7 | 8 | """ 9 | import time 10 | 11 | filename = "record_time.txt" 12 | 13 | # Records time in format Sun 10:00:00 14 | current_time = time.strftime('%a %H:%M:%S') 15 | 16 | # Append output to file. 'a' is append mode. 17 | with open(filename, 'a') as handle: 18 | # Write (Append) output to a line 19 | handle.write(str(current_time)) 20 | # Newline to separate different lines of output 21 | handle.write('\n') 22 | -------------------------------------------------------------------------------- /record_time.txt: -------------------------------------------------------------------------------- 1 | Mon 14:59:19 2 | Mon 14:59:22 3 | --------------------------------------------------------------------------------