├── .gitignore
├── LICENSE
├── Model
    ├── EncDec.py
    └── RNN_Vanilla.py
├── Notebooks
    ├── .gitignore
    ├── HyperParameterTuning.ipynb
    ├── SinWaves_EncDec.ipynb
    └── SinWaves_RNN.ipynb
├── README.md
├── Saved_models
    └── .gitkeep
├── Utils
    ├── Create_and_Train.py
    ├── SeqData.py
    ├── Trainer.py
    └── __init__.py
├── environment.yml
├── imports.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | bin/
10 | build/
11 | develop-eggs/
12 | dist/
13 | eggs/
14 | lib/
15 | lib64/
16 | parts/
17 | sdist/
18 | var/
19 | *.egg-info/
20 | .installed.cfg
21 | *.egg
22 | 
23 | # Installer logs
24 | pip-log.txt
25 | pip-delete-this-directory.txt
26 | 
27 | # Unit test / coverage reports
28 | .tox/
29 | .coverage
30 | .cache
31 | nosetests.xml
32 | coverage.xml
33 | 
34 | # Translations
35 | *.mo
36 | 
37 | # Mr Developer
38 | .mr.developer.cfg
39 | .project
40 | .pydevproject
41 | 
42 | # Rope
43 | .ropeproject
44 | 
45 | # Django stuff:
46 | *.log
47 | *.pot
48 | 
49 | # Sphinx documentation
50 | docs/_build/
51 | 
52 | 
53 | Saved_models/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [2023] [Rakesh Kumar Yadav]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Model/EncDec.py:
--------------------------------------------------------------------------------
  1 | # Author: Rakesh K. Yadav, 2023
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | class Encoder(nn.Module):
  7 | 
  8 |     """Encoder layer to encode a sequence to a hidden state"""
  9 | 
 10 |     def __init__(self, input_size, hidden_size, num_layers, flavor):
 11 | 
 12 |         """
 13 |         Parameters
 14 |         ----------
 15 |         input_size: int
 16 |             This is the same as number of features in traditional lingo.
 17 |             For univariate time series, this would be 1 and greater than 1
 18 |             for multivariate time series.
 19 |         hidden_size: int
 20 |             Number of hidden units in the RNN model
 21 |         num_layers: int
 22 |             Number of layers in the RNN model
 23 |         flavor: str
 24 |             Takes 'rnn', 'lstm', or 'gru' values.
 25 |         """
 26 | 
 27 |         # inherit the nn.Module class via 'super'
 28 |         super(Encoder, self).__init__()
 29 |         
 30 |         # store stuff in the class
 31 |         self.input_size  = input_size
 32 |         self.hidden_size = hidden_size
 33 |         self.num_layers  = num_layers
 34 |         self.flavor      = flavor
 35 | 
 36 |         if flavor=='rnn':
 37 |             self.rnn = nn.RNN(input_size, hidden_size, num_layers)
 38 |         elif flavor=='lstm':
 39 |             self.rnn = nn.LSTM(input_size, hidden_size, num_layers)
 40 |         elif flavor=='gru':
 41 |             self.rnn = nn.GRU(input_size, hidden_size, num_layers)
 42 | 
 43 |     def forward(self, x): # x must be: [seq, batch_size]
 44 | 
 45 |         # gather weights in a contiguous memory location for 
 46 |         # more efficient processing
 47 |         self.rnn.flatten_parameters()
 48 |         
 49 |         # initialize hidden state with appropriate size
 50 |         h0 = torch.zeros(self.num_layers, x.size(1), 
 51 |                         self.hidden_size, device=x.device)
 52 | 
 53 |         if self.flavor=='lstm':
 54 |             # cell state only for lstm
 55 |             c0 = torch.zeros_like(h0)
 56 |             _, hidden = self.rnn(x.view(x.shape[0], x.shape[1], 
 57 |                                     self.input_size), (h0,c0))
 58 |         else:
 59 |             _, hidden = self.rnn(x.view(x.shape[0], x.shape[1], 
 60 |                                     self.input_size), h0)
 61 | 
 62 |         return hidden
 63 | 
 64 | class Decoder(nn.Module):
 65 | 
 66 |     """
 67 |         Decoder layer which uses a hidden state from the encoder layer 
 68 |         and makes predictions
 69 |     """
 70 | 
 71 |     def __init__(self, input_size, hidden_size, num_layers, flavor):
 72 | 
 73 |         """
 74 |         Parameters
 75 |         ----------
 76 |         input_size: int
 77 |             This is the same as number of features in traditional lingo.
 78 |             For univariate time series, this would be 1 and greater than 1
 79 |             for multivariate time series.
 80 |         hidden_size: int
 81 |             Number of hidden units in the RNN model
 82 |         num_layers: int
 83 |             Number of layers in the RNN model
 84 |         flavor: str
 85 |             Takes 'rnn', 'lstm', or 'gru' values.
 86 |         """
 87 | 
 88 |         # inherit the nn.Module class via 'super'
 89 |         super(Decoder, self).__init__()
 90 |         
 91 |         # store stuff in the class
 92 |         self.input_size  = input_size
 93 |         self.hidden_size = hidden_size
 94 |         self.num_layers  = num_layers
 95 |         self.flavor      = flavor
 96 | 
 97 |         if flavor=='rnn':
 98 |             self.rnn = nn.RNN(input_size, hidden_size, num_layers)
 99 |         elif flavor=='lstm':
100 |             self.rnn = nn.LSTM(input_size, hidden_size, num_layers)
101 |         elif flavor=='gru':
102 |             self.rnn = nn.GRU(input_size, hidden_size, num_layers)
103 |         
104 |         # will be used at the end of the RNN to do many-to-1 operation
105 |         self.linear = nn.Linear(hidden_size, input_size)      
106 | 
107 |     def forward(self, x, encoder_hidden):
108 | 
109 |         # gather weights in a contiguous memory location for 
110 |         # more efficient processing
111 |         self.rnn.flatten_parameters()
112 | 
113 |         # x: [batch_size] is the end point of the primary input seq
114 |         # view input as [1, batch_size, input_size] using unsqueeze
115 |         out, hidden = self.rnn(x.unsqueeze(0), encoder_hidden)
116 | 
117 |         # out shape: [input_size(1), batch, hidden_size]
118 |         out = self.linear(out.squeeze(0))     
119 |         
120 |         return out, hidden
121 | 
122 | class EncoderDecoder(nn.Module):
123 | 
124 |     """Combines the encoder and decoder classes to define a global model"""
125 | 
126 |     def __init__(self, encoder, decoder, npred):
127 | 
128 |         """
129 |         Parameters
130 |         ----------
131 |         encoder: class
132 |             RNN class that decodes a sequence to a hidden state
133 |         decoder: class
134 |             RNN class that takes in an encoder hidden state and 
135 |             the last point of the input sequence to make predictions
136 |         npred: int
137 |             Number of points to predict
138 |         """
139 | 
140 |         # inherit the nn.Module class via 'super'
141 |         super(EncoderDecoder, self).__init__()
142 |         
143 |         # store stuff in the class
144 |         self.enc   = encoder
145 |         self.dec   = decoder
146 |         self.npred = npred
147 |         
148 |     def forward(self, x): # x shape: [seq, batch]
149 |         
150 |         local_batch_size = x.shape[1]
151 |         target_len = self.npred
152 | 
153 |         # convert to [seq, batch, 1]
154 |         # 1 is for univariate sequence
155 |         input_batch = x.unsqueeze(2)
156 | 
157 |         # initialize output array to be filled with predictions
158 |         outs = torch.zeros(target_len, local_batch_size, 
159 |                            input_batch.shape[2], device=x.device)
160 | 
161 |         # STEP 1: obtain the encoder hidden state for the inputs
162 |         enc_hid = self.enc(input_batch)
163 |         
164 |         # STEP 2.1: grab last point of input batch for decoder
165 |         dec_in = input_batch[-1, :, :] # shape: (batch_size, input_size)
166 |         
167 |         # STEP 2.2: assign the encoder hidden state to the decoder hidden
168 |         dec_hid = enc_hid
169 | 
170 |         # STEP 3: make prediction like a traditional RNN point-by-point
171 |         #         by using the predicted point as new input
172 |         for t in range(target_len):
173 |             # note that the dec_hid is being continuously rewritten
174 |             dec_out, dec_hid = self.dec(dec_in, dec_hid)
175 |             # store the prediction
176 |             outs[t] = dec_out
177 |             # feed back the prediction as input to the decoder
178 |             dec_in =  dec_out
179 |         
180 |         return outs.reshape(target_len, local_batch_size)
181 | 


--------------------------------------------------------------------------------
/Model/RNN_Vanilla.py:
--------------------------------------------------------------------------------
 1 | # Author: Rakesh K. Yadav, 2023
 2 | 
 3 | 
 4 | import torch.nn as nn
 5 | import torch
 6 | 
 7 | class RNNs(nn.Module):
 8 | 
 9 |     """Model class to declare an rnn and define a forward pass of the model."""
10 | 
11 |     def __init__(self, input_size, hidden_size, num_layers, flavor):
12 | 
13 |         """
14 |         Parameters
15 |         ----------
16 |         input_size: int
17 |             This is the same as number of features in traditional lingo.
18 |             For univariate time series, this would be 1 and greater than 1
19 |             for multivariate time series.
20 |         hidden_size: int
21 |             Number of hidden units in the RNN model
22 |         num_layers: int
23 |             Number of layers in the RNN model
24 |         flavor: str
25 |             Takes 'rnn', 'lstm', or 'gru' values.
26 |         """
27 | 
28 |         # inherit the nn.Module class via 'super'
29 |         super(RNNs, self).__init__()
30 | 
31 |         # store stuff in the class
32 |         self.num_layers = num_layers
33 |         self.hidden_size = hidden_size
34 |         self.flavor = flavor
35 | 
36 |         if flavor=='rnn':
37 |             self.rnn = nn.RNN(input_size, hidden_size, num_layers)
38 |         elif flavor=='lstm':
39 |             self.rnn = nn.LSTM(input_size, hidden_size, num_layers)
40 |         elif flavor=='gru':
41 |             self.rnn = nn.GRU(input_size, hidden_size, num_layers)
42 |         
43 |         # will be used at the end of the RNN to do many-to-1 operation
44 |         self.linear = nn.Linear(hidden_size, 1)
45 |     
46 |     def forward(self, x): 
47 |         
48 |         # rnn module expects data of shape [seq, batch_size, input_size]
49 |         # since we have a univariate time series data, we need to add 
50 |         # a dimension at the end
51 |         x = x.unsqueeze(2) # shape now: [seq, batch_size, 1]
52 | 
53 |         batch_size = x.size(1)
54 | 
55 |         # initialize hidden state with appropriate size
56 |         h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size, device=x.device)
57 | 
58 |         if self.flavor=='lstm':
59 |             #cell state for lstm only
60 |             c0 = torch.zeros_like(h0) 
61 |             out, _ = self.rnn(x, (h0, c0))
62 |         else:
63 |             out, _ = self.rnn(x, h0)
64 | 
65 |         # use the last index of RNN output
66 |         out = out[-1,:,:] # shape: [batch_size, hidden_size]
67 | 
68 |         # run the data through a fully connected layer and return 1-point prediction
69 |         # for each sequence in the batch
70 |         return self.linear(out).reshape(1, batch_size)
71 | 


--------------------------------------------------------------------------------
/Notebooks/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PyTorch tutorial on using RNNs and Encoder-Decoder RNNs for time series forcasting and hyperparameter tuning
 2 | 
 3 | ## Some blabber
 4 | 
 5 | This package resulted from my effort to write a simple PyTorch based ML package that uses recurrent neural networks (RNN) to predict a given time series data. 
 6 | 
 7 | You must be wondering why you should bother with this package since there is a lot of stuff on the internet on this topic. Well, let me tell you that I have traveled the internet lanes and I was really frustrated by how scattered the information is in this context. It was a lot of effort to collect all the relevant parts from the internet and construct this package. 
 8 | 
 9 | I had only a basic background in ML and zero knowledge of PyTorch (using Keras doesn't prepare you for PyTorch :stuck_out_tongue:) when I started writing this package. But that actually ended up being a blessing in disguise. Since I was starting from scratch, I was able to write the code in a way that was intuitive and easy to understand for people who are new to the subject.
10 | 
11 | So if you're feeling lost and frustrated, give this package a try. It might just help you understand not only RNNs, but PyTorch as well. And who knows, you might even have a little fun along the way.
12 | 
13 | ## Code Functionalities
14 | 1. Many-to-One prediction using PyTorch's vanilla versions of RNN, LSTM, and GRU.
15 | 2. Many-to-Many (or Seq2Seq) prediction using Encoder-Decoder architecture; base units could be RNN, LSTM, or GRU.
16 | 3. Hyperparameter Tuning! It uses the [Optuna](https://optuna.org/) library for that.
17 | 4. Save PyTorch models, as well as reload and train them further.
18 | 5. Works on any **univariate** and **equispaced** time series data.
19 | 6. Can use GPUs.
20 | 
21 | ## Usage
22 | Best way to figure out how to use this package is to check out the example notebooks available in the `Notebooks` folder.
23 | 
24 | I have also made a sample notebook available in Google Colab!
25 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1dsP3FhY-qghqfcmn6TLaUkuyCaWAl8WL?usp=sharing)
26 | 
27 | ## Code Structure
28 | I have structured the code so that different operations are abstracted away in Python Classes. Here is a brief summary:
29 | 
30 | * `Model`: Directory  -  contains classes which define the RNN models. _RNN_Vanilla.py_ defines the Many-to-One RNN; the traditional kind. _The EncDec.py_ file defines the Encode-Decoder class which uses the traditional RNN units as Encoder and Decoder modules, which are then combined together to provide a one-shot Many-to-Many prediction.
31 | 
32 | * `Notebooks`: Directory - example notebooks which demonstrate how to use the code on a sample time series data consisting of multi frequency sin waves. It also contains a notebook which demonstrates how to perform hyperparameter tuning using Optuna.
33 | 
34 | * `Saved_models`: Directory, empty - used to store the output from the _Create_and_Train.py_ file.
35 | 
36 | * `Utils` Directory - contains all the class files which do the data prep, training, testing, validation, and predicting. 
37 |    * _Trainer.py_ contains the training loop, a test function to run the model on test data, as well as functions to make predictions. 
38 |    * _SeqData.py_ file is used to create sequenced dataset, in torch tensors format, given a numpy 1D time series. 
39 |    * _Create_and_Train.py_ is THE main file which creates a model (using the classes in the Model directory), runs the epoch loop, saves PyTorch models and train-test loss curves.
40 | 
41 | * `imports.py` file is used by the notebooks present in the `Notebooks` folder.
42 | 
43 | * `requirements.txt` file can be used in conjunction with pip to install the required packages.
44 | 
45 | ## Limitations
46 | I haven't generalized the code to use multivariate time series data for the sake of simplicty. But, it is relatively easy to do. If interested, report in the repo's _Issues_ section and we can collaborate!
47 | 
48 | ### Note 
49 | I also recommend checking out my colleague's [implementation](https://github.com/lkulowski/LSTM_encoder_decoder) of rnn in pytorch.
50 | 


--------------------------------------------------------------------------------
/Saved_models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rakesh-yadav/PyTorch-RNN-Tutorial/92b588a7341f4eafee0f7c08c2a6943cbb5ccfae/Saved_models/.gitkeep


--------------------------------------------------------------------------------
/Utils/Create_and_Train.py:
--------------------------------------------------------------------------------
  1 | # Author: Rakesh K. Yadav, 2023
  2 | 
  3 | 
  4 | import os
  5 | import glob
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | import seaborn as sns
  9 | sns.set_theme(style="darkgrid")
 10 | import sys
 11 | import torch
 12 | import torch.nn as nn
 13 | import torch.optim as optim
 14 | 
 15 | from .Trainer import *
 16 | 
 17 | class Create_and_Train():
 18 | 
 19 |     """
 20 |         This class performs model declaration, training, testing, validation
 21 |         and saving of model and loss curves. 
 22 |     """
 23 | 
 24 |     def __init__(self, params, data, device):
 25 | 
 26 |         """
 27 |         Parameters
 28 |         ----------
 29 |         params: dict
 30 |             Contains all the model specifications and a bunch of other stuff
 31 |                 'train':          bool,   To train or not to train
 32 |                 'EPOCHS':         int,    Number of epochs
 33 |                 'num_layers':     int,    Number of layer in the model
 34 |                 'hidden_size':    int,    Number of hidden units in the model 
 35 |                 'batch_size':     int,    Batch size during the epoch training
 36 |                 'input_size':     int,    Number of features; 1 for univariate time series
 37 |                 'learning_rate':  float,  Learning rate for training; typical value 0.001
 38 |                 'flavor':         str,    Which RNN model to use. 'rnn', 'lstm', 'gru'
 39 |                 'left_seq_size':  int,    Size of the input sequence
 40 |                 'right_seq_size': int,    Size of prediction. 1 for RNN and >1 for Encoder-Decoder
 41 |                 'noise':          float,  Noise factor to use; Put it to 0.0 if no noise needed
 42 |                 'norm_fac':       float,  The time series is normalized by data max before it is given
 43 |                                           to the model. I am storing it to keep track of it.
 44 |                 'save_path':      str,    Path of dir where model and loss curves will be saved.
 45 |                 'load_previous':  bool,   If True, it loads a previously run model and trains it further
 46 |                 'encdec':         bool,   If True, it constructs an Encoder-Decoder model using 
 47 |                                           the 'flavor' chosen above
 48 |                 'test':           bool,   If True, it performs testing on test data and plots on 
 49 |                                           the loss curve
 50 |                 'val':            bool,   If True, it predicts for validation data and saves a .npz file
 51 |                                           which contains the error between targets and predictions
 52 |                 'save_model':     bool,   If True, saves modes whenever the test loss decreases
 53 |                 'iplot':          bool    If True, saves the loss curves as PNG files
 54 |         
 55 |         data: dict
 56 |             Contains Xtrain, Ytrain, Xtest, Ytest, Xval, Yval
 57 | 
 58 |         device: str
 59 |             'cuda' or 'cpu'
 60 |         """
 61 | 
 62 |         self.params = params
 63 |         self.device = device
 64 |         
 65 |         # define model and trainer
 66 |         self.model = self.__define_model(params)
 67 |         self.model.to(device)
 68 | 
 69 |         # define the optimizer
 70 |         self.optimizer = optim.AdamW(self.model.parameters(),
 71 |                                     amsgrad=True, 
 72 |                                     lr=params['learning_rate'], 
 73 |                                     weight_decay=0) #alpha parameter for l2 regularization
 74 | 
 75 |         # feed the model and optimizer to the Trainer class; see Trainer.py
 76 |         self.trainer = Trainer(self.model, self.optimizer)
 77 | 
 78 |         self.first_epoch = 1  # will be updated if load_previous model is true
 79 | 
 80 |         if params['save_model']:
 81 |             # construct a descriptive file name
 82 |             self.model_filename = (f'L{params["num_layers"]}_'\
 83 |                                    f'H{params["hidden_size"]}_'\
 84 |                                    f'lr{params["learning_rate"]}_'\
 85 |                                    f'input{params["left_seq_size"]}_'\
 86 |                                    f'target{params["right_seq_size"]}_'\
 87 |                                    f'noise{params["noise"]}')
 88 | 
 89 |         if params['load_previous']:
 90 |             self.__load_previous(params)
 91 | 
 92 |         # loss below which model should be saved
 93 |         self.best_loss = 0.1
 94 | 
 95 |         # load appropriate tqdm depending on the environment
 96 |         if self.__is_notebook(): import tqdm.notebook as tqbar
 97 |         else: import tqdm as tqbar
 98 |         
 99 |         # declare the tqdm progress bar
100 |         epoch_bar = tqbar.tqdm(range(params['EPOCHS']),
101 |                                desc="Epochs progress [Loss: NA]",
102 |                                unit='Epoch')
103 | 
104 |         #--------------------Main Train loop---------------------------------
105 |         if params['train']:
106 | 
107 |             # ---------Send data to device----------
108 |             Xtrain = data['Xtrain'].to(device)  # shape: [seq, samples]
109 |             Ytrain = data['Ytrain'].to(device)  # shape: [seq, samples]
110 | 
111 |             if params['test']:
112 |                 Xtest = data['Xtest'].to(device)  
113 |                 Ytest = data['Ytest'].to(device) 
114 |             if params['val']:
115 |                 Xval = data['Xval'].to(device)
116 |                 Yval = data['Yval'].to(device)
117 | 
118 |             # allocate loss arrays to be filled later    
119 |             self.train_losses = np.full(params['EPOCHS'], np.nan)
120 |             if params['test']: 
121 |                 self.test_losses = np.full(params['EPOCHS'], np.nan)
122 |             if params['val']:
123 |                 self.val_err = torch.zeros( params['EPOCHS'], 
124 |                                             Yval.shape[0],
125 |                                             Yval.shape[1],
126 |                                             device=Yval.device)
127 |                 
128 |             # -------Epoch loop------
129 |             for epoch in epoch_bar:
130 |                 # get train loss for an epoch
131 |                 self.train_loss = self.trainer.train(Xtrain,Ytrain,params['batch_size'])
132 | 
133 |                 # store the training loss
134 |                 self.train_losses[epoch] = self.train_loss
135 | 
136 |                 # Get test and val loss and errors, if asked
137 |                 if params['test']:
138 |                     self.test_loss  = self.trainer.test(Xtest,Ytest)
139 |                     self.test_losses[epoch] = self.test_loss
140 |                 if params['val']:
141 |                     if params['encdec']:
142 |                         # get mean square error for validation data
143 |                         self.val_err[epoch] = (Yval - self.trainer.pred_encdec(Xval).detach())**2
144 |                     else:
145 |                         # get mean square error for validation data
146 |                         self.val_err[epoch] = (Yval - self.trainer.RNN_npoint_pred(Xval, Yval.shape[0]))**2
147 | 
148 |                 # update loss curve on every 10th epoch
149 |                 if params['iplot'] and ((epoch+1) % 10)==0:
150 |                     self.__plot_loss_curve(params)
151 | 
152 |                 # update saved good model
153 |                 if params['save_model'] and params['test']:
154 |                     if self.test_loss < self.best_loss: 
155 |                         self.__save_model(epoch, params)
156 |                         #update loss to compare later
157 |                         self.best_loss = self.test_loss
158 |                 elif params['save_model'] and self.train_loss < self.best_loss:
159 |                     self.__save_model(epoch, params)
160 |                     self.best_loss = self.train_loss
161 | 
162 |                 # update tqdm epoch progress bar
163 |                 epoch_bar.set_description('Epochs progress [Loss: {:.3e}]'.format(self.train_loss))
164 |         else:
165 |             print(f'Nothing to train since Training is set to {params["train"]}....')
166 |         #---------------------------------------------------------------
167 | 
168 |         #plot final loss curve
169 |         if params['train'] and params['iplot']:
170 |             self.__plot_loss_curve(params)
171 | 
172 |         #-----Save nPoint prediction error mean and std
173 |         if params['train'] and params['val']:
174 |             os.chdir(params['save_path'])
175 |             # val_err shape: [epochs, seqs, batch]
176 |             val_err_mean = torch.mean(self.val_err, dim=2) # along batch
177 |             val_err_std = torch.std(self.val_err, dim=2)   # along batch
178 |             self.err_filename = (self.model_filename+
179 |                                 f'_nPoint_err_epochs{self.first_epoch}to{self.first_epoch+params["EPOCHS"]-1}')
180 |             np.savez_compressed(self.err_filename,
181 |                                 mean=val_err_mean.detach().cpu(),
182 |                                 std=val_err_std.detach().cpu())
183 |             
184 |             
185 |             
186 |     #---------------Helper functions-----------------  
187 |     def __define_model(self, params):
188 |         # getting the name of the directory
189 |         # where this file is present.
190 |         current = os.path.dirname(os.path.realpath(__file__))
191 |         parent = os.path.dirname(current)
192 |         sys.path.append(parent+'/Model')
193 |         
194 |         if params['encdec']:
195 |             from EncDec import Encoder, Decoder, EncoderDecoder
196 |             enc = Encoder(params['input_size'], params['hidden_size'], params['num_layers'], params['flavor'])
197 |             dec = Decoder(params['input_size'], params['hidden_size'], params['num_layers'], params['flavor'])
198 |             model = EncDec(enc, dec, params["right_seq_size"])
199 |             print('**************************************************************************')
200 |             print(f'EncDec {params["flavor"]} Regression model initialized with '\
201 |                   f'{params["num_layers"]} layers and {params["hidden_size"]} hidden size.')
202 |             print(f'I will take in {params["left_seq_size"]} points and predict {params["right_seq_size"]} points.')
203 |             print('**************************************************************************')
204 |         else:
205 |             from RNN_Vanilla import RNNs
206 |             model = RNNs(params['input_size'], params['hidden_size'], params['num_layers'], params['flavor'])
207 |             print('**************************************************************************')
208 |             print(f'RNN {params["flavor"]} Regression model initialized with '\
209 |                   f'{params["num_layers"]} layers and {params["hidden_size"]} hidden size.')
210 |             print(f'I will take in {params["left_seq_size"]} points and predict {params["right_seq_size"]} points')
211 |             print('**************************************************************************')
212 |             
213 |             if params["right_seq_size"]>1:
214 |                 sys.exit('!!! ERROR: Traditional RNNs can not predict more than 1 point. Adjust target size...')
215 | 
216 |         return model
217 | 
218 |     def __load_previous(self, params):
219 |         os.chdir(params['save_path'])
220 |         files = glob.glob(self.model_filename+'_epoch*.pth')
221 |         # look for the most recent file
222 |         files.sort(key=os.path.getmtime)
223 |         if len(files)>0: 
224 |             print('Found older file:', files[-1])
225 |             print('Loading.....')
226 |             checkpoint = torch.load(files[-1], map_location=self.device)
227 |             self.model.load_state_dict(checkpoint['model_state_dict'])
228 |             self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
229 |             # get the epoch of the saved model
230 |             last_epoch = checkpoint['epoch']
231 |             # now update the 'first' epoch which can be used in 
232 |             # a new filename
233 |             self.first_epoch = last_epoch+1
234 | 
235 |     def __save_model(self, epoch, params):
236 |         os.chdir(params['save_path'])
237 |         filename =self.model_filename+f'_epochs{self.first_epoch}to{self.first_epoch+self.params["EPOCHS"]-1}'+'.pth'
238 |         torch.save({'params': self.params,          # save model definition dict
239 |                     'epoch': self.first_epoch+epoch,# save the epoch of the model
240 |                     'model_state_dict': self.model.state_dict(),
241 |                     'optimizer_state_dict': self.optimizer.state_dict()}, # crucial for restarting training
242 |                      filename)
243 | 
244 |     def __plot_loss_curve(self, params):
245 |         os.chdir(params['save_path'])
246 |         fig = plt.figure(figsize=(8, 4), num=1, clear=True)
247 |         ax = fig.add_axes([0.15, 0.15, 0.8, 0.8])
248 |         xdata = self.first_epoch+np.arange(self.params['EPOCHS'])
249 |         ax.semilogy(xdata, self.train_losses, 'r', label='Train')
250 |         if self.params['test']: ax.plot(xdata, self.test_losses, 'g', label='Test')   
251 |         ax.set_xlabel('Epochs')
252 |         ax.set_ylabel('MSE Loss')
253 |         plt.legend()
254 |         fig_name = self.model_filename+f'_LossCurve_epochs{self.first_epoch}to{self.first_epoch+self.params["EPOCHS"]-1}.png'
255 |         plt.savefig(fig_name, dpi=150)
256 | 
257 |     def __is_notebook(self):
258 |         #credit -> https://stackoverflow.com/a/39662359
259 |         try:
260 |             shell = get_ipython().__class__.__name__
261 |             if shell == 'ZMQInteractiveShell':
262 |                 return True   # Jupyter notebook or qtconsole
263 |             elif shell == 'TerminalInteractiveShell':
264 |                 return False  # Terminal running IPython
265 |             else:
266 |                 return False  # Other type (?)
267 |         except NameError:
268 |             return False      # Probably standard Python interpreter
269 | 


--------------------------------------------------------------------------------
/Utils/SeqData.py:
--------------------------------------------------------------------------------
 1 | # Author: Rakesh K. Yadav, 2023
 2 | 
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | 
 7 | class SeqData():
 8 |     """
 9 |     This class takes in a 1D equispaced time series data and generates 
10 |     a bunch of samples of input and output sequences which 
11 |     can be used for training an RNN model.
12 |     """
13 | 
14 |     def __init__(self, data, num_samples, left_seq_size, right_seq_size,
15 |                  skip_points, noise=0, quiet=False):
16 | 
17 |         """
18 |         input data shape must be -> [n equispaced points of time series]
19 | 
20 |         Parameters
21 |         ----------
22 |         data: 1D float numpy array
23 |             Origianl time series from which to get sequences
24 |         num_samples: int
25 |             Total number of sequence samples requested 
26 |         left_seq_size: int
27 |             Size of the input (or left) sequences; will be fed to the model
28 |         right_seq_size: int
29 |             Size of the output (or right) sequence; should be 1 for typical RNNs
30 |             and greater than 1 for encoder-decoder type RNNs
31 |         skip_points: int
32 |             Number of points to skip; useful when the main data is very high resolution
33 |         noise: float
34 |             Scaling factor for a normal-type Noise to be added to the data
35 |             noise * np.random.normal(0, 1, ....)
36 |         quiet: Boolean
37 |             Can be used to suppress print statements from this class
38 | 
39 |         Attributes
40 |         ----------
41 |         x: 2D float tensor array
42 |             [left_seq_size,  num_samples] # inputs for RNN
43 |         y: 2D float tensor array
44 |             [right_seq_size, num_samples] # targets for RNN
45 |         num_samples: int
46 |             Total number of samples generated
47 |         """
48 | 
49 |         main_data_shape = data.shape[0]
50 | 
51 |         # skip+1 since 0 is invalid and ::1 doesn't skip
52 |         data = data[::skip_points+1]
53 | 
54 |         if not quiet and skip_points>0:
55 |             print(f'Main data skip of {skip_points} has been applied!')
56 |             print(f'Data shape changed from {main_data_shape} to {data.shape[0]} after skipping...')
57 |             print('\n')
58 | 
59 |         if noise != 0:
60 |             data += noise * np.random.normal(0, 1, data.shape[0])
61 | 
62 |         # convert to tensor data
63 |         self.data = torch.from_numpy(data)
64 |         
65 |         self.x, self.y = self.__sequencyfy(self.data, num_samples,
66 |                                            left_seq_size, right_seq_size)
67 |         
68 |         self.num_samples = self.x.shape[1]
69 | 
70 |         if not quiet:
71 |             print(f'Dataset created with {self.x.shape[1]} samples. \n')
72 |             print(f'Each sample sequence has {left_seq_size} input len')
73 |             print(f'and {right_seq_size} target len.\n')
74 | 
75 |     def __sequencyfy(self, data, num_samples=100,
76 |                      left_seq_size=50,right_seq_size=10):
77 | 
78 |         len_TS = data.shape[0]
79 | 
80 |         # to avoid index error when the index is too large to create
81 |         # an input seq + output seq chunk
82 |         rightmost_ind = len_TS - (left_seq_size + right_seq_size)
83 | 
84 |         # get a random numbers to pick iw+ow chunk
85 |         rand_inds = torch.randint(0, rightmost_ind, (num_samples,))
86 | 
87 |         # initialize arrays to fill
88 |         X = torch.zeros([left_seq_size,  num_samples], dtype=torch.float)
89 |         Y = torch.zeros([right_seq_size, num_samples], dtype=torch.float)
90 | 
91 |         for i in range(num_samples):
92 |             ls_start = rand_inds[i]
93 |             X[:,i] = data[ls_start:ls_start + left_seq_size]
94 | 
95 |             rs_start = ls_start + left_seq_size
96 |             Y[:,i] = data[rs_start:rs_start + right_seq_size]
97 | 
98 |         return X, Y
99 | 


--------------------------------------------------------------------------------
/Utils/Trainer.py:
--------------------------------------------------------------------------------
 1 | # Author: Rakesh K. Yadav, 2023
 2 | 
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | class Trainer:
 8 | 
 9 |     """Contains the training loop definition, and a few other functions to perform 
10 |         testing on data and predict using a trained model.    
11 |     """
12 | 
13 |     def __init__(self, model, optimizer):
14 |         self.model = model
15 |         self.optimizer = optimizer
16 |         
17 |     def loss(self, outputs, targets):
18 |         # using the 'sum' argument to obtain the batch summed
19 |         # and points summed, if using >1 predictions in EncoderDecoder,
20 |         # loss. When reporting I'll devide by batch size and the 
21 |         # number of points manually
22 |         return nn.MSELoss(reduction='sum')(outputs, targets)
23 |     
24 |     def train(self, Xtrain, Ytrain, batch_size):
25 |         # Xtrain: [seq,      total_train_data] 
26 |         # Ytrain: [1 or seq, total_train_data]
27 |         self.model.train()
28 | 
29 |         # declare random arrray each time the function is called 
30 |         # in the epoch loop. Will be used to randomize the 
31 |         # training data
32 |         shuff_inds = torch.randint(0, Xtrain.shape[1], (Xtrain.shape[1],))
33 | 
34 |         total_loss = 0
35 | 
36 |         # Note: if batch_size in not an integer multiple of
37 |         # traini_size, the loop below only uses the last remaining
38 |         # chunk of the data which will be smaller than the batch_size
39 |         # Apparently a property of python 
40 |         for batch in range(0,Xtrain.shape[1],batch_size):
41 |             self.optimizer.zero_grad()
42 |             indices = shuff_inds[batch:batch+batch_size]
43 |             outputs = self.model(Xtrain[:,indices])
44 |             loss = self.loss(outputs, Ytrain[:,indices])
45 |             loss.backward()
46 |             self.optimizer.step()
47 |             total_loss+=loss.item()
48 | 
49 |         #return per sample and per prediction point loss
50 |         return total_loss/Ytrain.shape[0]/Ytrain.shape[1]
51 |         
52 |     
53 |     def test(self, Xtest, Ytest):
54 |         self.model.eval()
55 |         loss = self.loss(self.model(Xtest), Ytest)
56 |         return loss.item()/Ytest.shape[0]/Ytest.shape[1]
57 | 
58 |     def pred_encdec(self, x):
59 |         # only valid for encoder-decoder model
60 |         self.model.eval()
61 |         outs = self.model(x)
62 |         return outs
63 |     
64 |     def RNN_npoint_pred(self, input_seqs, n_pred):
65 |         # input_seqs : [seqs, batch]
66 |         # n_pred: number of points to predict using simple RNN
67 |         self.model.eval()
68 | 
69 |         input_seq_size = input_seqs.shape[0]
70 |         input_batch = input_seqs
71 | 
72 |         # array used for updating the input_batch
73 |         # in the loop below
74 |         preds = input_seqs
75 | 
76 |         for i in range(n_pred):
77 |             # get 1 point prediction
78 |             pred = self.model(input_batch)
79 | 
80 |             # stop gradient track by "detach" and then 
81 |             # attach the point to the end of input seq
82 |             preds = torch.concat((preds, pred.detach()), dim=0)
83 | 
84 |             # update the input to RNN to include the new prediction point
85 |             input_batch = preds[-input_seq_size:, :]
86 | 
87 |         # now return the last n_pred points
88 |         return preds[-n_pred:,:] #shape: [n_pred, batch]
89 |     
90 | 


--------------------------------------------------------------------------------
/Utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Author: Rakesh K. Yadav, 2023
2 | 
3 | from .SeqData import *
4 | from .Create_and_Train import *
5 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: RNN-ML-env
 2 | 
 3 | channels:
 4 |   - conda-forge
 5 |   - defaults
 6 | 
 7 | dependencies:
 8 |   - python = 3.9.13
 9 |   - numpy = 1.23.3
10 |   - matplotlib = 3.5.3
11 |   - pytorch = 1.12.1
12 |   - seaborn = 0.11.2
13 |   - pip
14 |   - jupyter
15 |   - tqdm
16 | 


--------------------------------------------------------------------------------
/imports.py:
--------------------------------------------------------------------------------
 1 | # Author: Rakesh K. Yadav, 2023
 2 | 
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | import matplotlib.pyplot as plt
 7 | import seaborn as sns
 8 | sns.set_theme(style="darkgrid")
 9 | 
10 | from Utils import *
11 | 
12 | # Device configuration
13 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
14 | if torch.cuda.is_available():
15 |     print(f'Using: {device}. Device: {torch.cuda.get_device_name()}')
16 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.23.3
2 | torch==1.12.1
3 | matplotlib
4 | seaborn
5 | tqdm
6 | optuna
7 | optuna-dashboard
8 | 


--------------------------------------------------------------------------------