├── .gitignore
├── LICENSE
├── README.md
├── datasets
    ├── __init__.py
    ├── adding.py
    ├── music.py
    ├── timeseries.py
    └── turbofan.py
├── requirements.txt
├── topologies
    ├── __init__.py
    ├── convolutional_model.py
    ├── custom_neon_classes.py
    ├── recurrent_model.py
    └── temporal_convolutional_network.py
├── training
    ├── adding_problem
    │   └── adding_with_tcn.py
    ├── music_forecasting
    │   └── music_forecasting_with_tcn.py
    ├── predictive_maintenance
    │   ├── images
    │   │   ├── eval_loss_plot.png
    │   │   └── preds_validation_output.png
    │   ├── turbofan_autoencoder.py
    │   ├── turbofan_baselines.py
    │   └── turbofan_with_tcn.py
    └── timeseries_trainer.py
├── tutorials
    ├── img
    │   ├── dilated_conv.png
    │   └── residual_block.png
    └── tcn_tutorial.ipynb
└── utils
    ├── __init__.py
    ├── arguments.py
    └── inference.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DISCONTINUATION OF PROJECT #
 2 | This project will no longer be maintained by Intel.
 3 | Intel has ceased development and contributions including, but not limited to, maintenance, bug fixes, new releases, or updates, to this project.
 4 | Intel no longer accepts patches to this project.
 5 | ## Predictive maintenance of aircraft engines using deep learning
 6 | 
 7 | ## Link to notebook:
 8 | https://gist.github.com/Ajay191191/323b15b47cfa67cec97e18559ed5adc8
 9 | 
10 | ### Introduction
11 | Predictive maintenance techniques closely monitor the condition of active machinery and predict their remaining-useful-life. This is then used to assess and schedule maintenance to prolong the life of the machinery. Typically, sensors are placed on the machine to monitor acoustics, vibrations or other observations either online (while the equipment is running) or periodically while the equipment is offline. The resulting time-series data can be used in predictive modeling to determine the status of the machine. For details, see [1, 2].
12 | 
13 | ### Data
14 | In order to demonstrate the use of deep learning for predictive maintenance, we used the NASA TurboFan dataset [3, 4]. This dataset consists of time-series data collected using simulations of sensors placed on air-craft engines. Each simulation consists of the engine being operated under different conditions and fault modes until failure . For every time-series sample, the end of the time-series denotes the time-point at which the engine failed. Given any snippet of time-series, the objective of this modeling exercise is to predict the remaining time until the end of the time-series. This is known as the Remaining-Useful-Life, or RUL of the engine.
15 | 
16 | Data provided consists of two sets - for training and validation. For the training data, the entire time-series until the point of failure was provided. For validation data, snippets of sensor data and their associated RULs were given. Training sensor data was split into snippets of length `seq_len` and the corresponding RULs were calculated. For validation data, all time-series were left-truncated to have the same length.
17 | 
18 | ### Deep Learning Model
19 | We compared two different architectures, one based on Recurrent Neural Networks (RNNs) and another based on Convolutional Neural Networks (CNNs). For the RNN, we implemented a sequence2sequence architecture, similar to the topology used for Neural Machine Translation (NMT) tasks [5]. The sequence2sequence architecture consists of an encoder, which compresses the signal into smaller "embedding" vector, and a decoder, which re-construct the signal from the embedding. When a supervised layer, such as a Multi-Layer Perceptron (MLP) is trained on top of the embedding, the embedding vector tends to capture aspects of the data that are also relevant to predicting the value of interest.
20 | 
21 | For the CNN architecture, we implemented Temporal Convolution Network [6], which has shown a lot of promise in modeling sequence and temporal data. Its defining features include use of a dilated convolution layer as well as residual or "skip" connections. Increasing the dilation factor at every layer in the CNN increases the receptive field of the neurons deeper in the network, allowing them to capture information from the entire length of the sequence. Causal padding during convolution ensures that information from future time-points does not leak into current prediction of the RUL. Adding skip connections allows modeling of deeper networks.
22 | 
23 | Mean squared error (MSE) between the predicted and ground-truth RUL was used to train and validate the model.
24 | 
25 | ### Results
26 | Both models were lightly tuned to obtain good performance on the validation set. For a fair comparison, the hyper-parameters of both models was set such that the number of variables between the two models is comparable.
27 | 
28 | The following plot shows a snapshot of how the validation loss progresses during training, for both the sequence2sequence model and TCN. TCN provides better performance on the test set, with an MSE value of ~412 (~20 time-points), while sequence2sequence is around ~513 (~22 time-points). TCN is also significantly faster, with time-to-train of ~4 minutes per epoch (one pass through training set for training, as plus one pass through validation set to calculate `eval_loss`), while sequence2sequence takes ~30 minutes per epoch. Timing numbers were obtained by training the model on an `Intel(R) Xeon(R) CPU E5-2699A v4` CPU.
29 | 
30 | ![validation loss](./training/predictive_maintenance/images/eval_loss_plot.png)
31 | 
32 | The following plot shows estimated RUL values vs. ground-truth for the TCN model:
33 | 
34 | ![predictions](./training/predictive_maintenance/images/preds_validation_output.png)
35 | 
36 | As expected, predictions get better closer to the time of failure.
37 | 
38 | ### Installation
39 | To run the code, first download the repo locally as follows:
40 | ```bash
41 | git clone https://github.com/NervanaSystems/aidc-2018-timeseries.git
42 | ```
43 | ```bash
44 | cd aidc-2018-timeseries
45 | ```
46 | Create and activate a python 3 virtualenv:
47 | ```bash
48 | python3 -m venv .venv
49 | . .venv/bin/activate
50 | ```
51 | Install the dependencies:
52 | ```bash
53 | pip install -r requirements.txt
54 | ```
55 | Clone the nGraph repo in a different folder:
56 | ```bash
57 | git clone https://github.com/NervanaSystems/ngraph-python.git
58 | ```
59 | Intall the nGraph library and add GPU support:
60 | ```bash
61 | cd ngraph-python
62 | make install
63 | ```
64 | 
65 | 
66 | ### Training the model
67 | To train TCN, use the command
68 | ```bash
69 | python ./turbofan_with_tcn.py --batch_size 128 --dropout 0.1 --ksize 4 --levels 4 --seq_len 50 --log_interval 100 --nhid 70 --lr 0.002 --grad_clip_value 0.4 --results_dir ${RESULTS_DIR} --tensorboard_dir ${TB_DIR} -b cpu --epochs 200 --save_plots
70 | ```
71 | To train sequence to sequence model, use the command
72 | ```bash
73 | python ./turbofan_autoencoder.py --batch_size 128 --seq_len 75 --log_interval 100 --n_hidden 70,70 --lr 0.002 --grad_clip_value 0.4 --save_plots --results_dir ${RESULTS_DIR} --tensorboard_dir ${TB_DIR} -b cpu --epochs 200 --save_plots
74 | ```
75 | where `${RESULTS_DIR}` and `${TB_DIR}` are directory locations to save the results and tensorboard event logs respectively.
76 | 
77 | 
78 | 
79 | [1] [Predictive Maintenance For Enhanced Asset Operation](https://www.intel.com/content/www/us/en/manufacturing/solutions/predictive-maintenance-and-asset-optimization.html)
80 | 
81 | [2] [Predictive Maintenance Drives Smarter Fleet Management](https://www.intel.com/content/www/us/en/internet-of-things/solution-briefs/predictive-maintenance-fleet-management-brief.html)
82 | 
83 | [3] [NASA TurboFan dataset](https://ti.arc.nasa.gov/tech/dash/groups/pcoe/prognostic-data-repository/#turbofan)
84 | 
85 | [4] Ramasso, Emmanuel, and Abhinav Saxena. "Performance Benchmarking and Analysis of Prognostic Methods for CMAPSS Datasets." International Journal of Prognostics and Health Management 5.2 (2014): 1-15.
86 | 
87 | [5] Sutskever, Ilya, Oriol Vinyals, and Quoc V. Le. "Sequence to sequence learning with neural networks." Advances in neural information processing systems. 2014.
88 | 
89 | [6] Bai, Shaojie, J. Zico Kolter, and Vladlen Koltun. "An empirical evaluation of generic convolutional and recurrent networks for sequence modeling." arXiv preprint arXiv:1803.01271 (2018).
90 | 


--------------------------------------------------------------------------------
/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NervanaSystems/aidc-2018-timeseries/df0d25a258ced93b23755f7e62a3f96d73963fe3/datasets/__init__.py


--------------------------------------------------------------------------------
/datasets/adding.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class Adding:
 5 |     def __init__(self, T=200, n_train=50000, n_test=1000):
 6 |         self.T = T
 7 |         self.n_train = n_train
 8 |         self.n_test = n_test
 9 | 
10 |         X_train, y_train = self.load_data(n_train)
11 |         X_val, y_val = self.load_data(n_test)
12 | 
13 |         self.train = {'X': {'data': X_train, 'axes': ('N', 'F', 'REC')}, 'y': {'data': y_train, 'axes': ('N', 'Fo')}}
14 | 
15 |         self.test = {'X': {'data': X_val, 'axes': ('N', 'F', 'REC')}, 'y': {'data': y_val, 'axes': ('N', 'Fo')}}
16 | 
17 |     def load_data(self, N):
18 |         """
19 |         Args:
20 |             N: # of data in the set
21 |         """
22 |         X_num = np.random.rand(N, 1, self.T)
23 |         X_mask = np.zeros((N, 1, self.T))
24 |         y = np.zeros((N, 1))
25 |         for i in range(N):
26 |             positions = np.random.choice(self.T, size=2, replace=False)
27 |             X_mask[i, 0, positions[0]] = 1
28 |             X_mask[i, 0, positions[1]] = 1
29 |             y[i, 0] = X_num[i, 0, positions[0]] + X_num[i, 0, positions[1]]
30 |         X = np.concatenate((X_num, X_mask), axis=1)
31 |         return X, y


--------------------------------------------------------------------------------
/datasets/music.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | import urllib.request
 4 | from scipy.io import loadmat
 5 | from datasets.timeseries import TimeSeries
 6 | 
 7 | SOURCE_URL = "https://github.com/locuslab/TCN/raw/master/TCN/poly_music/mdata/"
 8 | JSB_FILENAME = "JSB_Chorales.mat"
 9 | Nott_FILENAME = "Nottingham.mat"
10 | 
11 | class Music():
12 |     def __init__(self, seq_len=100, data_dir="./data", dataset="JSB"):
13 |         self.seq_len = seq_len
14 |         if dataset == "JSB":
15 |             self.filepath = os.path.join(data_dir, JSB_FILENAME)
16 |         if dataset == "Nott":
17 |             self.filepath = os.path.join(data_dir, Nott_FILENAME)
18 |         self._maybe_download(data_dir, dataset)  # list of filepaths
19 | 
20 |         X_train, X_valid, X_test = self.load_series()
21 | 
22 |         X_train_t = self._change_to_seq_len(X_train, self.seq_len + 1)
23 | 
24 |         X_test_t = self._change_to_seq_len(X_test + X_valid, self.seq_len + 1)
25 | 
26 |         self.train = {'X': {'data': X_train_t[:, :self.seq_len , ...], 'axes': ('N', 'REC', 'F')}, 'y': {'data': X_train_t[:, 1:, ...], 'axes': ('N', 'REC', 'Fo')}}
27 | 
28 |         self.test = {'X': {'data': X_test_t[:, :self.seq_len, ...], 'axes': ('N', 'REC', 'F')}, 'y': {'data': X_test_t[:, 1:, ...], 'axes': ('N', 'REC', 'Fo')}}
29 | 
30 |     def load_series(self):
31 |         data = loadmat(self.filepath)
32 |         X_train = list(data['traindata'][0])
33 |         X_valid = list(data['validdata'][0])
34 |         X_test = list(data['testdata'][0])
35 |         return X_train, X_valid, X_test
36 | 
37 |     def _change_to_seq_len(self, X, seq_len):
38 |         X_padded = np.zeros((len(X), seq_len, X[0].shape[1]))
39 | 
40 |         for e, x in enumerate(X):
41 |             if x.shape[0] >= seq_len:
42 |                 X_padded[e, :, :] = x[-1*seq_len:, :]
43 |             else:
44 |                 X_padded[e, -1*x.shape[0]:, :] = x
45 |         return X_padded
46 | 
47 |     def _maybe_download(self, work_directory, dataset):
48 |         """
49 |         This function downloads the stock data if its not already present
50 | 
51 |         Returns:
52 |             Location of saved data
53 | 
54 |         """
55 |         if (not os.path.exists(self.filepath)):
56 |             print("data does not exist, downloading...")
57 |             self._download_data(work_directory, dataset)
58 | 
59 |     def _download_data(self, work_directory, dataset):
60 |         work_directory = os.path.abspath(work_directory)
61 |         if not os.path.exists(work_directory):
62 |             os.mkdir(work_directory)
63 | 
64 |         headers = {'User-Agent': 'Mozilla/5.0'}
65 | 
66 |         if dataset == "JSB":
67 |             filename = JSB_FILENAME
68 |         if dataset == "Nott":
69 |             filename = Nott_FILENAME
70 | 
71 |         filepath = os.path.join(work_directory, filename)
72 |         req = urllib.request.Request(SOURCE_URL + filename, headers=headers)
73 |         data_handle = urllib.request.urlopen(req)
74 |         with open(filepath, "wb") as fp:
75 |             fp.write(data_handle.read())
76 | 
77 |         print('Successfully downloaded data to {}'.format(filepath))
78 | 
79 |         return filepath
80 | 


--------------------------------------------------------------------------------
/datasets/timeseries.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # ----------------------------------------------------------------------------
  3 | # Copyright 2017 Nervana Systems Inc.
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #      http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ----------------------------------------------------------------------------
 16 | from __future__ import division
 17 | import numpy as np
 18 | 
 19 | 
 20 | class TimeSeries(object):
 21 |     """
 22 |     Class that generates training and testing data from a time-series, for forecasting objectives.
 23 |     """
 24 | 
 25 |     def __init__(self, train_ratio=0.8, seq_len=30,
 26 |                  predict_seq=True, look_ahead=1):
 27 |         """
 28 |         Arguments:
 29 |             train_ratio (float, optional): percentage of the function to be used for training
 30 |             seq_len (int, optional): length of the sequence for each sample
 31 |             predict_seq (boolean, optional):
 32 |                 False : Inputs - X[no_samples, seq_len, no_input_features]
 33 |                         Labels - y[no_samples, no_output_features]
 34 |                 True : Inputs - X[no_samples, seq_len, no_input_features]
 35 |                        Labels - y[no_samples, seq_len, no_output_features]
 36 |             look_ahead (int, optional): How far ahead the predicted sequence starts from the input seq
 37 |                         Set to 1 to start predicting from next time point onwards
 38 |                         Only used when predict_seq is False
 39 |         """
 40 | 
 41 |         self.data, self.names = self.load_series() # data could be 2-D (single series) or 3-D (multiple series), in case of multiple series, each series could have different sequence lengths
 42 | 
 43 |         self.series = self.data
 44 | 
 45 |         if (predict_seq is False):
 46 |             # X will be (no_samples, time_steps, feature_dim)
 47 |             X, self.seq_names = self.rolling_window(a=self.data, seq_len=seq_len+1, seq_names=self.names) # add one to the sequence length to get target in the same call
 48 | 
 49 |             X, self.seq_names = self._remove_zero_batches(X, self.seq_names)
 50 | 
 51 |             # Get test samples
 52 |             test_samples = int(round((1 - train_ratio) * X.shape[0]))
 53 |             train_samples = X.shape[0] - test_samples
 54 | 
 55 |             self.train = {'X': {'data': X[:train_samples, :seq_len, ...], 'axes': ('N', 'REC', 'F')},
 56 |                           'y': {'data': X[:train_samples, seq_len, ...],
 57 |                                 'axes': ('N', 'Fo')}}
 58 |             self.train_seq_names = self.seq_names[:train_samples]
 59 | 
 60 |             self.test = {'X': {'data': X[train_samples:, :seq_len, ...], 'axes': ('N', 'REC', 'F')},
 61 |                          'y': {'data': X[train_samples:, seq_len, ...],
 62 |                                'axes': ('N', 'Fo')}}
 63 |             self.test_seq_names = self.seq_names[train_samples:]
 64 | 
 65 |         else:
 66 |             X, y, self.seq_names = self.non_overlapping_window(self.data, seq_len=seq_len, look_ahead=look_ahead, seq_names=self.names)
 67 | 
 68 |             X, self.seq_names = self._remove_zero_batches(X, self.seq_names)
 69 | 
 70 | 
 71 |             test_samples = int(round((1 - train_ratio) * X.shape[0])) #TODO split strategies other than time
 72 |             train_samples = X.shape[0] - test_samples
 73 | 
 74 |             self.train = {'X': {'data': X[:train_samples], 'axes': ('N', 'REC', 'F')},
 75 |                           'y': {'data': y[:train_samples],
 76 |                                 'axes': ('N', 'REC', 'Fo')}}
 77 |             self.train_seq_names = self.seq_names[:train_samples]
 78 | 
 79 |             self.test = {'X': {'data': X[train_samples:], 'axes': ('N', 'REC', 'F')},
 80 |                          'y': {'data': y[train_samples:], 'axes': ('N', 'REC', 'Fo')}}
 81 |             self.test_seq_names = self.seq_names[train_samples:]
 82 | 
 83 |     def load_series(self):
 84 |         raise NotImplementedError
 85 | 
 86 |     def convert_to_trend(self):
 87 |         # TODO work on trend targets: higher/lower/nochange from prev day's value
 88 |         raise NotImplementedError
 89 | 
 90 |     @staticmethod
 91 |     def rolling_window(a=None, seq_len=None, seq_names=None):
 92 |         """
 93 |         Convert sequence a into time-lagged vectors
 94 |         a           : (time_steps, feature_dim) or list of variable length series with same number of features
 95 |         seq_len     : length of sequence used for prediction
 96 |         returns  (n*(time_steps - seq_len + 1), seq_len, feature_dim)  array
 97 |         """
 98 |         if not isinstance(a, list):
 99 |             a = [a]
100 | 
101 |         all_windows = []
102 |         all_seq_names = []
103 |         for i, a_i in enumerate(a):
104 |             if a_i.shape[0] < seq_len:
105 |                 continue
106 |             shape = [a_i.shape[0] - seq_len + 1, seq_len, a_i.shape[-1]]
107 |             strides = [a_i.strides[0], a_i.strides[0], a_i.strides[-1]]
108 |             strided_a =  np.lib.stride_tricks.as_strided(a_i, shape=shape, strides=strides, writeable=False)
109 |             all_windows.append(strided_a)
110 |             if seq_names is None:
111 |                 all_seq_names += (a_i.shape[0] - seq_len + 1) * [i]
112 |             else:
113 |                 all_seq_names += (a_i.shape[0] - seq_len + 1) * [seq_names[i]]
114 | 
115 |         return np.vstack(all_windows), all_seq_names
116 | 
117 |     def non_overlapping_window(self, a=None, seq_len=None, look_ahead=None, seq_names=None):
118 |         """
119 |         Convert sequence into (data, target) pairs with same sequence length in data and target and non-overlapping windows in data
120 |         Args:
121 |             a: input series
122 |             seq_len: sequence length of data and target
123 |             look_ahead: number of time points by which target is ahead of the data
124 |             seq_labels: list containing name of each sequence
125 |         Returns:
126 | 
127 |         """
128 |         if not isinstance(a, list):
129 |             a = [a]
130 | 
131 |         X = []
132 |         y = []
133 |         all_seq_names = []
134 |         for i, a_i in enumerate(a):
135 |             ntimepoints = ((a_i.shape[0] - look_ahead) // seq_len) * seq_len
136 |             X_i = a_i[:ntimepoints, :]
137 |             y_i = a_i[look_ahead:look_ahead + ntimepoints, :]
138 | 
139 |             # Reshape X and y
140 |             nseq = ntimepoints // seq_len
141 |             X_i = np.reshape(X_i, (nseq, seq_len, -1))
142 |             y_i = np.reshape(y_i, (nseq, seq_len, -1))
143 | 
144 |             X.append(X_i)
145 |             y.append(y_i)
146 | 
147 |             if seq_names is None:
148 |                 all_seq_names += nseq * [i]
149 |             else:
150 |                 all_seq_names += nseq * [seq_names[i]]
151 | 
152 |         return np.vstack(X), np.vstack(y), all_seq_names
153 | 
154 |     def _remove_zero_batches(self, X, seq_labels):
155 |         """
156 |         This function removes batches which have all zero sequences, otherwise batch norm returns nan values
157 |         Args:
158 |             X: batch of data (batches, time, features)
159 | 
160 |         Returns:
161 |             X with all zero batches removed
162 | 
163 |         """
164 |         batch_sum = np.sum(np.sum(np.abs(X), axis=-1), axis=-1)
165 |         non_zero_ind = np.where(batch_sum > np.finfo(np.float64).eps)[0]
166 | 
167 |         return X[non_zero_ind, :, :], [seq_labels[i] for i in non_zero_ind]
168 | 
169 |     def plot_sample(self, *args, **kwargs):
170 |         raise NotImplementedError
171 | 
172 | 
173 |     def normalize_data(self):
174 |         raise NotImplementedError
175 | 
176 | 
177 |     def _maybe_download(self, work_directory):
178 |         raise NotImplementedError
179 | 


--------------------------------------------------------------------------------
/datasets/turbofan.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This class can be used to create a dataloader object for the turbofan engine degradation simulated data.
  3 | Data is a combination of two sources, and can be downloaded using the following links:
  4 | [1] https://ti.arc.nasa.gov/tech/dash/groups/pcoe/prognostic-data-repository/#turbofan
  5 | [2] https://ti.arc.nasa.gov/tech/dash/groups/pcoe/prognostic-data-repository/#phm08_challenge
  6 | 
  7 | For details regarding data generation, see:
  8 | [3] A. Saxena, K. Goebel, D. Simon, and N. Eklund, Damage Propagation Modeling for Aircraft Engine Run-to-Failure Simulation, in the Proceedings of the 1st International Conference on Prognostics and Health Management (PHM08), Denver CO, Oct 2008.
  9 | 
 10 | A list of papers that use the dataset are below:
 11 | [4] Heimes, F.O., Recurrent neural networks for remaining useful life estimation, in the Proceedings of the 1st International Conference on Prognostics and Health Management (PHM08), Denver CO, Oct 2008.
 12 | [5] Tianyi Wang, Jianbo Yu,  Siegel, D.,  Lee, J., A similarity-based prognostics approach for Remaining Useful Life estimation of engineered systems, in the Proceedings of the 1st International Conference on Prognostics and Health Management (PHM08), Denver CO, Oct 2008.
 13 | [6] Peel, L., Recurrent neural networks for remaining useful life estimation, in the Proceedings of the 1st International Conference on Prognostics and Health Management (PHM08), Denver CO, Oct 2008.
 14 | [7] Ramasso, Emmanuel, and Abhinav Saxena. "Performance Benchmarking and Analysis of Prognostic Methods for CMAPSS Datasets." International Journal of Prognostics and Health Management 5.2 (2014): 1-15.
 15 | """
 16 | import os
 17 | import pandas as pd
 18 | import numpy as np
 19 | import urllib.request
 20 | import zipfile
 21 | 
 22 | CMAPSS_SOURCE_URL = 'https://ti.arc.nasa.gov/c/6/'
 23 | CHALLENGE_SOURCE_URL = 'https://ti.arc.nasa.gov/c/13/'
 24 | CMAPPS_FILENAME = 'CMAPSSData'
 25 | CHALLENGE_FILENAME = 'Challenge_Data'
 26 | 
 27 | class TurboFan:
 28 |     def __init__(self, data_dir="./data", T=100, skip=1, max_rul_predictable=200, scale=True, normalize=True, shuffle=True,
 29 |                  recurrent_axis_name='REC', feature_axis_name='F', label_axis_name='Fo'):
 30 |         """
 31 | 
 32 |         Args:
 33 |             data_dir: location of data dir
 34 |             T: int, sequence length
 35 |             skip: int, number of timepoints to skip before moving to the next window (similar to "stride" in convolutional filter)
 36 |             max_rul_predictable: int, value at which RUL value is capped
 37 |             scale: boolean, Scale each variable-length trajectory down to [0, 1] if true
 38 |             normalize: boolean, Scale each sample (after windowing) to zero mean unit variance if true
 39 |             shuffle: boolean, shuffle training data if true
 40 |         """
 41 |         self.n_sensors = 21
 42 |         self.n_operating_modes = 3
 43 |         self.data_dir = data_dir
 44 |         self.T = T
 45 |         self.skip = skip
 46 |         self.max_rul_predictable = max_rul_predictable
 47 |         self.CMAPSSDir = os.path.join(data_dir, "CMAPSSData")
 48 |         self.Challenge_Data = os.path.join(data_dir, "Challenge_Data")
 49 |         self.filepath = self._maybe_download(data_dir) # list of filepaths
 50 | 
 51 |         print("Loading data")
 52 |         self.train_trajectories, self.val_trajectories, self.val_rul, self.test_trajectories = self.load_series()
 53 |         if scale:
 54 |             self.train_trajectories = self.scale_data(self.train_trajectories)
 55 |             self.val_trajectories = self.scale_data(self.val_trajectories)
 56 |             self.test_trajectories = self.scale_data(self.test_trajectories)
 57 | 
 58 |         self.n_features = self.train_trajectories[0].shape[1]
 59 | 
 60 |         print("Creating sliding window data")
 61 |         X_train, y_train = self.sliding_window_rul(self.train_trajectories, skip=skip)
 62 | 
 63 |         if normalize:
 64 |             X_train = self.normalize_data(X_train)
 65 |         if shuffle:
 66 |             X_train, y_train = self.shuffle_data(X_train, y_train)
 67 | 
 68 |         X_train_prev = np.roll(X_train, shift=1, axis=1)
 69 | 
 70 |         X_val, y_val = self.sliding_window_rul(self.val_trajectories, rul=self.val_rul, augment_test_data=False)
 71 |         if normalize:
 72 |             X_val = self.normalize_data(X_val)
 73 |         X_val_prev = np.roll(X_val, shift=1, axis=1)
 74 | 
 75 |         self.train = {'X': {'data': X_train, 'axes': ('N', recurrent_axis_name, feature_axis_name)},
 76 |                       'X_prev': {'data': X_train_prev, 'axes': ('N', recurrent_axis_name, feature_axis_name)},
 77 |                       'y': {'data': y_train, 'axes': ('N', label_axis_name)}}
 78 | 
 79 |         self.test = {'X': {'data': X_val, 'axes': ('N', recurrent_axis_name, feature_axis_name)},
 80 |                      'X_prev': {'data': X_val_prev, 'axes': ('N', recurrent_axis_name, feature_axis_name)},
 81 |                      'y': {'data': y_val, 'axes': ('N', label_axis_name)}}
 82 | 
 83 |         print("Done. Number of samples in train: {}, number of samples in test: {}".format(len(X_train), len(X_val)))
 84 | 
 85 | 
 86 |     def load_series(self):
 87 |         train_trajectories = []
 88 |         val_trajectories = []
 89 |         val_rul = []
 90 |         test_trajectories = []
 91 | 
 92 |         # CMAPSS data
 93 |         for f in ["FD00" + str(i+1) for i in range(4)]:
 94 |             full_path = os.path.join(self.CMAPSSDir, "train_" + f + ".txt")
 95 |             train_trajectories += self.load_data_from_file(full_path)
 96 | 
 97 |             full_path = os.path.join(self.CMAPSSDir, "test_" + f + ".txt")
 98 |             val_trajectories += self.load_data_from_file(full_path)
 99 | 
100 |             full_path = os.path.join(self.CMAPSSDir, "RUL_" + f + ".txt") # this RUL corresponds to the val trajectory
101 |             fp = open(full_path, "r")
102 |             val_rul += [int(line.strip("\n")) for line in fp]
103 | 
104 |         assert len(val_trajectories) == len(val_rul)
105 | 
106 |         # Challenge data
107 |         full_path = os.path.join(self.Challenge_Data, "train.txt")
108 |         train_trajectories += self.load_data_from_file(full_path)
109 | 
110 |         full_path = os.path.join(self.Challenge_Data, "test.txt") # this data does not have RUL
111 |         test_trajectories += self.load_data_from_file(full_path)
112 | 
113 |         full_path = os.path.join(self.Challenge_Data, "final_test.txt") # this data does not have RUL
114 |         test_trajectories += self.load_data_from_file(full_path)
115 | 
116 |         return train_trajectories, val_trajectories, val_rul, test_trajectories
117 | 
118 |     def sliding_window_rul(self, trajectories, skip=1, rul=None, augment_test_data=False):
119 |         """
120 |         Given a set of trajectories, split into equal sized windows and corresponding rul values
121 |         If rul is not provided, the end of the trajectory is assumed to be the time of failure
122 |         Args:
123 |             trajectories: List of numpy arrays, elements have variable dim-0 (time) and same number of attributes = (self.n_features)
124 |             rul: remaining useful life
125 | 
126 |         Returns:
127 |             X, y: X is numpy array of shape (N, self.T, self.n_features), y is numpy array of size (N,)
128 | 
129 |         """
130 |         X = []
131 |         y = []
132 |         for ii, traj in enumerate(trajectories):
133 |             # backfill all trajectories that are smaller than desired trajectory
134 |             if traj.shape[0] < self.T * skip:
135 |                 padded_traj = np.zeros((self.T * skip, traj.shape[1]))
136 |                 padded_traj[-1*traj.shape[0]:, :] = traj
137 |                 padded_traj[0:self.T * skip - traj.shape[0], :] = traj[0, :]
138 |                 traj = padded_traj
139 | 
140 |             assert np.any(np.isnan(traj)) == False
141 | 
142 |             if rul is None or (rul is not None and augment_test_data):
143 |                 shape = [int(np.ceil((traj.shape[0] - self.T + 1) * 1.0/ skip)), self.T, traj.shape[-1]]
144 |                 strides = [traj.strides[0] * skip, traj.strides[0], traj.strides[-1]]
145 |                 strided_a = np.lib.stride_tricks.as_strided(traj, shape=shape, strides=strides, writeable=False)
146 |                 X.append(strided_a)
147 |                 if rul is None:
148 |                     y.append(traj.shape[0] - self.T - np.arange(0, traj.shape[0] - self.T + 1, skip)[:, np.newaxis])
149 |                 else:
150 |                     y.append(rul[ii] + traj.shape[0] - self.T - np.arange(0, traj.shape[0] - self.T + 1, skip)[:, np.newaxis])
151 |             else:
152 |                 X.append(traj[-1*self.T:, :][np.newaxis, :, :])
153 |                 y.append(np.array([rul[ii]])[:, np.newaxis])
154 | 
155 |         X = np.vstack(X)
156 |         y = np.vstack(y)
157 |         y[y > self.max_rul_predictable] = self.max_rul_predictable
158 | 
159 |         assert X.shape[0] == y.shape[0]
160 | 
161 |         assert np.all(y >= 0) == True
162 | 
163 |         return X, y
164 | 
165 | 
166 |     def load_data_from_file(self, f):
167 |         df = pd.read_csv(f, sep=' ', header=None, index_col=False).fillna(method='bfill')
168 |         df = df.dropna(axis='columns', how='all')
169 |         assert df.shape[1] == self.n_sensors + self.n_operating_modes + 2
170 |         df.columns = ["trajectory_id", "t"] + ["setting_" + str(i + 1) for i in range(self.n_operating_modes)] + ["sensor_" + str(i + 1) for i in range(self.n_sensors)]
171 |         grouped = df.groupby("trajectory_id")
172 |         trajectories = []
173 |         for traj_id, traj in grouped:
174 |             trajectories.append(traj[["setting_" + str(i + 1) for i in range(self.n_operating_modes)] + ["sensor_" + str(i + 1) for i in range(self.n_sensors)]].as_matrix())
175 |         return trajectories
176 | 
177 |     def _maybe_download(self, work_directory):
178 |         """
179 |         This function downloads the stock data if its not already present
180 | 
181 |         Returns:
182 |             Location of saved data
183 | 
184 |         """
185 |         if (not os.path.exists(self.CMAPSSDir)) or len(os.listdir(self.CMAPSSDir)) == 0:
186 |             print("CMAPSS data does not exist, downloading...")
187 |             self._download_data(work_directory, "CMAPSS")
188 | 
189 |         if (not os.path.exists(self.Challenge_Data)) or len(os.listdir(self.Challenge_Data)) == 0:
190 |             print("Challenge data does not exist, downloading...")
191 |             self._download_data(work_directory, "Challenge")
192 | 
193 |     def _download_data(self, work_directory, dataset):
194 |         work_directory = os.path.abspath(work_directory)
195 |         if not os.path.exists(work_directory):
196 |             os.mkdir(work_directory)
197 | 
198 |         headers = {'User-Agent': 'Mozilla/5.0'}
199 | 
200 |         if dataset == "CMAPSS":
201 |             SOURCE_URL = CMAPSS_SOURCE_URL
202 |             filename = CMAPPS_FILENAME
203 |         if dataset == "Challenge":
204 |             SOURCE_URL = CHALLENGE_SOURCE_URL
205 |             filename = CHALLENGE_FILENAME
206 | 
207 |         filepath = os.path.join(work_directory, filename + ".zip")
208 |         req = urllib.request.Request(SOURCE_URL, headers=headers)
209 |         data_handle = urllib.request.urlopen(req)
210 |         with open(filepath, "wb") as fp:
211 |             fp.write(data_handle.read())
212 | 
213 |         print('Successfully downloaded data to {}'.format(filepath))
214 | 
215 |         unzip_dir = os.path.join(work_directory, filename)
216 |         if not os.path.exists(unzip_dir):
217 |             os.mkdir(unzip_dir)
218 |         fp = zipfile.ZipFile(filepath, 'r')
219 |         fp.extractall(unzip_dir)
220 |         fp.close()
221 |         print('Successfully unzipped data to {}'.format(unzip_dir))
222 | 
223 |         return filepath
224 | 
225 |     def normalize_data(self, X):
226 |         """
227 |         Normalize each sample - for sensors, zero mean and std normalization, for op points, scale only
228 |         Args:
229 |             X: samples
230 | 
231 |         Returns:
232 |             Normalized sample matrix
233 | 
234 |         """
235 |         # cols to normalize
236 |         cols_to_normalize = list(set(range(0, self.n_sensors + self.n_operating_modes)) - set([0, 1, 2]))
237 |         X_mean = np.mean(X[:, :, cols_to_normalize], axis=1)
238 |         X[:, :, cols_to_normalize] = X[:, :, cols_to_normalize] - X_mean[:, np.newaxis, :]
239 |         X_std = np.std(X[:, :, cols_to_normalize], axis=1)
240 |         X_std[X_std <= np.finfo(np.float64).eps] = 1
241 |         X[:, :, cols_to_normalize] = X[:, :, cols_to_normalize]/X_std[:, np.newaxis, :]
242 | 
243 |         # normalize operating cond separately
244 |         X[:, :, 0] = X[:, :, 0] / 100
245 |         X[:, :, 1] = X[:, :, 1] / 100
246 |         X[:, :, 2] = X[:, :, 2] / 100
247 |         return X
248 | 
249 |     def scale_data(self, trajectories):
250 |         """
251 |         Scale each trajectory by its max value
252 |         Args:
253 |             trajectories: list of trajectories
254 | 
255 |         Returns:
256 |             list of scaled trajectories
257 | 
258 |         """
259 |         traj_scaled = []
260 |         for traj in trajectories:
261 |             traj = traj/np.max(traj, axis=0)
262 |             traj_scaled.append(traj)
263 | 
264 |         return traj_scaled
265 | 
266 |     def shuffle_data(self, X, y):
267 |         assert len(X) == len(y)
268 |         shuffle_ind = np.random.permutation(len(X))
269 |         X = X[shuffle_ind, ...]
270 |         y = y[shuffle_ind, ...]
271 |         return X, y
272 | 
273 |     def plot_sample(self, results_dir, trajectory_id=1):
274 |         """
275 |         Plots the trajectory of all sensors and operating modes from a chosen sample
276 |         Args:
277 |             results_dir: Directory to write plots to
278 |             trajectory_id: index of trajectory within dataset
279 | 
280 |         Returns:
281 |             None
282 |         """
283 |         try:
284 |             import matplotlib
285 |             #matplotlib.use('Agg')
286 |             import matplotlib.pyplot as plt
287 |         except ImportError:
288 |             raise ImportError("matplotlib not found")
289 | 
290 |         all_traj_ids = [trajectory_id]
291 | 
292 |         ncols = int(np.ceil((self.n_sensors + self.n_operating_modes)*1.0//3))
293 | 
294 |         fig, ax = plt.subplots(ncols, 3)
295 |         fig.set_figheight(20)
296 |         fig.set_figwidth(10)
297 |         for ii in all_traj_ids:
298 |             for jj in range(self.n_sensors):
299 |                 plt.subplot(ncols, 3, jj + 1)
300 |                 plt.plot(self.train_trajectories[ii][:, jj + self.n_operating_modes])
301 |                 plt.title('Sensor %d' % (jj + 1))
302 | 
303 |             for jj in range(self.n_operating_modes):
304 |                 plt.subplot(ncols, 3, jj + self.n_sensors + 1)
305 |                 plt.plot(self.train_trajectories[ii][:, jj])
306 |                 plt.title('Operating mode %d' % (jj + 1))
307 | 
308 |         plt.tight_layout()
309 |         plt.show()
310 |         plt.savefig('%s' % os.path.join(results_dir, "trajectories.png"))
311 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==0.22.0
2 | pandas-datareader>=0.6.0
3 | matplotlib>=2.1.1
4 | https://github.com/matplotlib/mpl_finance/archive/master.zip
5 | tensorboard
6 | 


--------------------------------------------------------------------------------
/topologies/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NervanaSystems/aidc-2018-timeseries/df0d25a258ced93b23755f7e62a3f96d73963fe3/topologies/__init__.py


--------------------------------------------------------------------------------
/topologies/convolutional_model.py:
--------------------------------------------------------------------------------
 1 | from ngraph.frontends.neon import Sequential, Convolution, Affine, BatchNorm
 2 | from ngraph.frontends.neon import KaimingInit, Rectlin, Identity
 3 | 
 4 | 
 5 | def define_model(out_axis, filter_shapes=[5], n_filters=[32], init=KaimingInit()):
 6 |     assert len(filter_shapes) == len(n_filters)
 7 | 
 8 |     layers = []
 9 |     for e, (f, n) in enumerate(zip(filter_shapes, n_filters)):
10 |         layers.append(Convolution(filter_shape=(f, n), filter_init=init, strides=1, padding="valid", dilation=1, activation=Rectlin(), batch_norm=True))
11 | 
12 |     affine_layer = Affine(weight_init=init, bias_init=init,
13 |                           activation=Identity(), axes=out_axis)
14 | 
15 |     model = Sequential(layers + [affine_layer])
16 | 
17 |     return model


--------------------------------------------------------------------------------
/topologies/custom_neon_classes.py:
--------------------------------------------------------------------------------
  1 | import ngraph as ng
  2 | from ngraph.frontends.neon.layer import Layer
  3 | from ngraph.frontends.neon.graph import SubGraph
  4 | from ngraph.frontends.neon.axis import shadow_axes_map, reorder_spatial_axes
  5 | from ngraph.frontends.neon.layer import ConvBase, Convolution, LABELS
  6 | from ngraph.frontends.neon import GaussianInit
  7 | import six
  8 | 
  9 | 
 10 | class Dropout2D(Layer):
 11 |     """
 12 |     Layer for stochastically zero-out entire channels to prevent overfitting
 13 |     Arguments:
 14 |         keep (float):  Number between 0 and 1 that indicates probability of any particular
 15 |                        activation being kept.  Defaults to 0.5.
 16 |     Example:
 17 |         .. code-block:: python
 18 |         # Place a Dropout layer between two conv layers
 19 |         layers = [
 20 |             Convolution(nout=2048, activation=Rectlin()),
 21 |             Dropout2D(keep=0.6), # zeroes about 820 channels
 22 |             Convolution(nout=2048, activation=Rectlin())
 23 |         ]
 24 |     """
 25 |     def __init__(self, keep=0.5, **kwargs):
 26 |         super(Dropout2D, self).__init__(**kwargs)
 27 |         self.keep = keep
 28 |         self.mask = None
 29 | 
 30 |     @SubGraph.scope_op_creation
 31 |     def __call__(self, in_obj, **kwargs):
 32 |         if Layer.inference_mode:
 33 |             return self.keep * in_obj
 34 |         else:
 35 |             if self.mask is None:
 36 |                 in_axes = in_obj.axes.sample_axes()
 37 |                 channel_axes = ng.make_axes([in_axes.channel_axis()])
 38 |                 self.mask = ng.persistent_tensor(axes=channel_axes).named('channel_mask')
 39 |             self.mask = ng.uniform(self.mask, low=0.0, high=1.0) <= self.keep
 40 |             return self.mask * in_obj
 41 | 
 42 | 
 43 | class DilatedCausalConvBase(ConvBase):
 44 |     def __init__(self, *args, **kwargs):
 45 |         super(DilatedCausalConvBase, self).__init__(*args, **kwargs)
 46 | 
 47 |         self.weight_norm = True
 48 |         self.W = None
 49 | 
 50 |         self.g = None
 51 |         self.v = None
 52 | 
 53 |     @SubGraph.scope_op_creation
 54 |     def __call__(self, in_obj, channel_axes="C", spatial_axes=("D", "H", "W"), **kwargs):
 55 |         """
 56 |         Arguments:
 57 |             in_obj (Op): Input op
 58 |             channel_axes (str): name of the expected channel axis type - defaults to "C"
 59 |             spatial_axes (tuple): names of expected depth, height and width axis types - defaults
 60 |                                   to "D", "H", and "W"
 61 |         """
 62 |         if isinstance(spatial_axes, dict):
 63 |             spatial_axes = tuple(spatial_axes.get(name, name) for name in ("D", "H", "W"))
 64 |         elif isinstance(spatial_axes, tuple):
 65 |             if len(spatial_axes) < 3:
 66 |                 raise ValueError("spatial_axes must have length 3 (e.g. ('D', 'H', 'W'))")
 67 |             spatial_axes = tuple(name if name else default for name, default in zip(spatial_axes, ("D", "H", "W")))
 68 | 
 69 |         orig_axes = in_obj.axes
 70 |         in_obj = reorder_spatial_axes(in_obj, channel_axes, spatial_axes)
 71 |         channel_axes = in_obj.axes.get_by_names(channel_axes)
 72 |         spatial_axes = in_obj.axes.get_by_names(*spatial_axes)
 73 | 
 74 |         filter_axes = self._filter_axes(channel_axes, spatial_axes)
 75 | 
 76 |         # mark 'K' as a shadow axis for the initializers.
 77 |         axes_map = shadow_axes_map(filter_axes.find_by_name('K'))
 78 |         filter_axes = ng.make_axes([axis if axis.name != 'K' else list(axes_map.keys())[0] for axis in filter_axes])
 79 | 
 80 |         if not self.initialized:
 81 |             if not self.weight_norm:
 82 |                 self.W = ng.variable(axes=filter_axes, initial_value=self.init, metadata={"label": LABELS["weight"]}).named("W")
 83 |             else:
 84 |                 self.v = ng.variable(axes=filter_axes, initial_value=self.init, metadata={"label": LABELS["weight"]}).named("v")
 85 |                 out_axes = ng.make_axes([filter_axes.get_by_names("K__NG_SHADOW")])
 86 |                 v_norm = ng.mean(ng.square(self.v), out_axes=out_axes)
 87 |                 self.g = ng.variable(axes=out_axes, initial_value=self.init, metadata={"label": LABELS["weight"]}).named("g")
 88 |                 self.W = self.g * self.v * ng.reciprocal(ng.sqrt(v_norm + 1e-3))
 89 |         else:
 90 |             if filter_axes != self.W.axes:
 91 |                 raise ValueError(("{layer_name} layer has already been initialized with an "
 92 |                                   "input object which has resulted in filter axes: "
 93 |                                   "{existing_filter_axes}. This new input object has axes: "
 94 |                                   "{input_axes}, which implies the need for filter axes: "
 95 |                                   "{new_filter_axes} which are different than the existing "
 96 |                                   "filter axes.").format(layer_name=self.name, existing_filter_axes=self.W.axes, input_axes=in_obj.axes, new_filter_axes=filter_axes, ))
 97 | 
 98 |         output = ng.map_roles(self._conv_op(in_obj, channel_axes, spatial_axes), axes_map)
 99 |         # Reorder the output to match the input order
100 |         output_axis_order = ng.make_axes([output.axes.find_by_name(ax.name)[0] for ax in orig_axes])
101 |         # Remove introduced axes. If their length is > 1, then perhaps they should be kept
102 |         slices = [0 if (ax not in orig_axes) and ax.length == 1 else slice(None) for ax in output.axes]
103 |         output = ng.tensor_slice(output, slices)
104 |         # New axes with length > 1 may have been introduced. Add them to the end.
105 |         output_axis_order = output_axis_order | output.axes
106 |         return ng.axes_with_order(output, output_axis_order)
107 | 
108 | 
109 | def make_dilated_causal_conv(filter_shape, init, strides, padding, dilation, **kwargs):
110 |     default_filter_shape = {k: 1 for k in "DHWK"}
111 |     if isinstance(filter_shape, (list, tuple)):
112 |         if (len(filter_shape) < 2) or (len(filter_shape) > 4):
113 |             raise ValueError("If filter_shape is a list, its length should be between 2 and 4, "
114 |                              "specifying the filter size for 1 to 3 spatial dimensions and the "
115 |                              "number of filters. Provided: {}".format(filter_shape))
116 |         axis_names = {2: "WK", 3: "HWK", 4: "DHWK"}[len(filter_shape)]
117 |         default_filter_shape.update(list(zip(axis_names, filter_shape)))
118 |         filter_shape = default_filter_shape
119 |     else:
120 |         axis_names = filter_shape.keys()
121 |     if isinstance(strides, int):
122 |         strides = {k: strides for k in axis_names if k != "K"}
123 |     if isinstance(padding, (int, six.string_types, tuple)):
124 |         padding = {k: padding for k in axis_names if k != "K"}
125 |     if isinstance(dilation, int):
126 |         dilation = {k: dilation for k in axis_names if k != "K"}
127 | 
128 |     return DilatedCausalConvBase(filter_shape, init, strides, padding, dilation, **kwargs)
129 | 
130 | 
131 | class DilatedCausalConv(Convolution):
132 |     def __init__(self, filter_shape, filter_init, strides=1, padding=0, dilation=1, bias_init=None, activation=None, batch_norm=False, **kwargs):
133 |         super(DilatedCausalConv, self).__init__(filter_shape, filter_init, strides=strides, padding=padding, dilation=dilation, bias_init=bias_init, activation=activation, batch_norm=batch_norm, **kwargs)
134 |         self._make_dilated_causal_conv_layer(filter_shape, filter_init, strides, padding, dilation, **kwargs)
135 | 
136 |         self.weight_norm = True
137 | 
138 |     def _make_dilated_causal_conv_layer(self, filter_shape, filter_init, strides, padding, dilation, **kwargs):
139 |         self.conv = make_dilated_causal_conv(filter_shape, filter_init, strides, padding, dilation, **kwargs)


--------------------------------------------------------------------------------
/topologies/recurrent_model.py:
--------------------------------------------------------------------------------
  1 | import ngraph as ng
  2 | from ngraph.frontends.neon import Sequential, Recurrent, LSTM, Affine, SubGraph
  3 | from ngraph.frontends.neon import GlorotInit, Tanh, Logistic, Identity
  4 | from ngraph.frontends.neon.layer import get_steps
  5 | 
  6 | def define_recurrent_layers(out_axes=None, celltype='RNN', recurrent_units=[32], init=GlorotInit(), return_sequence=True):
  7 |     layers = []
  8 |     for e, i in enumerate(recurrent_units):
  9 |         layer_return_sequence = e < len(recurrent_units) - 1 or return_sequence
 10 |         if celltype == 'RNN':
 11 |             layers.append(Recurrent(nout=i, init=init, backward=False, activation=Tanh(),
 12 |                                     return_sequence=layer_return_sequence))
 13 |         elif celltype == 'LSTM':
 14 |             layers.append(LSTM(nout=i, init=init, backward=False, activation=Tanh(), gate_activation=Logistic(),
 15 |                                return_sequence=layer_return_sequence))
 16 |     if out_axes is not None:
 17 |         affine_layer = Affine(weight_init=init, bias_init=init,
 18 |                               activation=Identity(), axes=out_axes)
 19 |         layers.append(affine_layer)
 20 |     return layers
 21 | 
 22 | class RecurrentEncoder(Sequential):
 23 |     """
 24 |     This wrapper returns the final hidden states of all layers, allowing us to build multilayer seq2seq models.
 25 |     """
 26 |     def __init__(self, celltype='RNN', recurrent_units=[32], init=GlorotInit(), bottleneck=False, *args, **kwargs):
 27 |         layers = define_recurrent_layers(celltype=celltype,
 28 |                                          recurrent_units=recurrent_units,
 29 |                                          init=init,
 30 |                                          return_sequence=True)
 31 |         super(RecurrentEncoder, self).__init__(layers, *args, **kwargs)
 32 |         self.bottleneck = bottleneck
 33 | 
 34 |     @SubGraph.scope_op_creation
 35 |     def __call__(self, in_obj, combine=False, **kwargs):
 36 |         final_states = []
 37 |         for l in self.layers:
 38 |             in_obj = l(in_obj, **kwargs)
 39 |             recurrent_axis = in_obj.axes.recurrent_axis()
 40 |             final_state = get_steps(in_obj, recurrent_axis, backward=False)[-1]
 41 |             final_states.append(final_state)
 42 | 
 43 |         if self.bottleneck:
 44 |             final_states = final_states[::-1]
 45 | 
 46 |         if combine:
 47 |             if len(final_states) == 1:
 48 |                 return final_states[0]
 49 |             else:
 50 |                 batch_axis = final_states[0].axes.batch_axis()
 51 |                 axes_list = [(state.axes - [batch_axis])[0] for state in final_states]
 52 |                 combined = ng.ConcatOp(final_states, axes_list)
 53 |                 return combined
 54 |         else:
 55 |             return final_states
 56 | 
 57 | 
 58 | class RecurrentDecoder(Sequential):
 59 |     """
 60 |     This wrapper allows us to pass initial states into all layers of a multilayer decoder.
 61 |     It also allows an affine readout layer to be placed at the end.
 62 |     """
 63 |     def __init__(self, out_axes=None, celltype='RNN', recurrent_units=[32], init=GlorotInit(), *args, **kwargs):
 64 |         layers = define_recurrent_layers(out_axes=out_axes,
 65 |                                          celltype=celltype,
 66 |                                          recurrent_units=recurrent_units,
 67 |                                          init=init,
 68 |                                          return_sequence=True)
 69 |         super(RecurrentDecoder, self).__init__(layers, *args, **kwargs)
 70 |         self.celltype = celltype
 71 |         self.recurrent_units = recurrent_units
 72 | 
 73 |     @SubGraph.scope_op_creation
 74 |     def __call__(self, inference, *args, **kwargs):
 75 |         if inference:
 76 |             return self.run_inference(*args, **kwargs)
 77 |         else:
 78 |             return self.run_training(*args, **kwargs)
 79 | 
 80 |     def run_training(self, in_obj, init_states, **kwargs):
 81 |         if self.celltype == 'LSTM':
 82 |             init_states = [(state, ng.constant(0., state.axes)) for state in init_states]
 83 | 
 84 |         for i, l in enumerate(self.layers):
 85 |             if i < len(init_states):
 86 |                 in_obj = l(in_obj, init_state=init_states[i], **kwargs)
 87 |             else:
 88 |                 in_obj = l(in_obj, **kwargs)
 89 |         return in_obj
 90 | 
 91 |     def run_inference(self, out_axes, init_states, **kwargs):
 92 |         if self.celltype == 'LSTM':
 93 |             init_states = [(state, ng.constant(0., state.axes)) for state in init_states]
 94 | 
 95 |         one_time_axis = ng.make_axis(1, name="REC")
 96 |         time_axis = out_axes.recurrent_axis()
 97 |         batch_axis = out_axes.batch_axis()
 98 |         feature_axis = (out_axes - [time_axis, batch_axis])[0]
 99 | 
100 |         outputs = [ng.constant(0., [batch_axis, one_time_axis, feature_axis])]
101 |         hidden_states = init_states
102 | 
103 |         for timestep in range(time_axis.length):
104 |             in_obj = outputs[-1]
105 | 
106 |             # Compute the next hidden/cell states for the recurrent layers
107 |             next_hidden_states = []
108 |             for i, l in enumerate(self.layers[:-1]):
109 |                 if i < len(hidden_states):
110 |                     init_state = hidden_states[i]
111 |                 else:
112 |                     init_state = None
113 | 
114 |                 if self.celltype == 'LSTM':
115 |                     h, c = l(in_obj, init_state=init_state, return_cell_state=True)
116 |                     in_obj = h
117 | 
118 |                     h = ng.slice_along_axis(h, one_time_axis, 0)
119 |                     c = ng.slice_along_axis(c, one_time_axis, 0)
120 |                     next_hidden_states.append((h, c))
121 |                 else:
122 |                     h = l(in_obj, init_state=init_state)
123 |                     in_obj = h
124 | 
125 |                     h = ng.slice_along_axis(h, one_time_axis, 0)
126 |                     next_hidden_states.append((h, c))
127 |             hidden_states = next_hidden_states
128 | 
129 |             # Compute the output of the affine layer
130 |             in_obj = self.layers[-1](in_obj)
131 |             outputs.append(in_obj)
132 | 
133 |         # Get rid of the initial 0 input
134 |         outputs = outputs[1:]
135 |         outputs = [ng.slice_along_axis(output, one_time_axis, 0) for output in outputs]
136 |         outputs = ng.stack(outputs, time_axis)
137 |         outputs = ng.axes_with_order(outputs, out_axes)
138 |         return outputs
139 | 
140 | 
141 | def define_model(out_axes=None, celltype='RNN', recurrent_units=[32], init=GlorotInit(), return_sequence=True):
142 |     layers = define_recurrent_layers(out_axes=out_axes,
143 |                                      celltype=celltype,
144 |                                      recurrent_units=recurrent_units,
145 |                                      init=init,
146 |                                      return_sequence=return_sequence)
147 |     return Sequential(layers)
148 | 
149 | 
150 | def encode_and_decode(encoder, decoder, encoder_inputs, decoder_inputs):
151 |     encoded_states = encoder(encoder_inputs, combine=False)
152 |     decoded = decoder(inference=False, in_obj=decoder_inputs, init_states=encoded_states)
153 |     return decoded
154 | 
155 | 
156 | def encode_and_generate(encoder, decoder, encoder_inputs, out_axes):
157 |     encoded_states = encoder(encoder_inputs, combine=False)
158 |     return decoder(inference=True, out_axes=out_axes, init_states=encoded_states)
159 | 


--------------------------------------------------------------------------------
/topologies/temporal_convolutional_network.py:
--------------------------------------------------------------------------------
 1 | from ngraph.frontends.neon.layer import Convolution
 2 | from ngraph.frontends.neon import GaussianInit, Rectlin, Sequential
 3 | from ngraph.frontends.neon.model import ResidualModule
 4 | from topologies.custom_neon_classes import DilatedCausalConv, Dropout2D
 5 | 
 6 | 
 7 | def dilated_causal_conv_layer(kernel_size, n_filters, stride, dilation, init=GaussianInit(0, 0.01)):
 8 |     # define dilated causal convolution layer
 9 |     conv_layer = DilatedCausalConv(filter_shape=(kernel_size, n_filters),
10 |                              filter_init=init,
11 |                              strides=stride,
12 |                              dilation=dilation,
13 |                              padding='causal',
14 |                              batch_norm=False)
15 | 
16 |     return [conv_layer]
17 | 
18 | ## define temporal block
19 | def temporal_block(out_channels, kernel_size, stride, dilation, dropout=0.2):
20 |     # conv layer
21 |     conv_layer = dilated_causal_conv_layer(kernel_size, out_channels, stride, dilation)
22 | 
23 |     # relu
24 |     relu_layer = Rectlin()
25 | 
26 |     # dropout
27 |     dropout_layer = Dropout2D(dropout)
28 | 
29 |     return conv_layer + [relu_layer, dropout_layer]
30 | 
31 | 
32 | 
33 | ## define residual block
34 | def residual_block(in_channels, out_channels, kernel_size, dilation, dropout=0.2, stride=1):
35 |     # define two temporal blocks
36 |     tb = []
37 |     for i in range(2):
38 |         tb += temporal_block(out_channels, kernel_size, stride, dilation, dropout=dropout)
39 |     main_path = Sequential(tb)
40 | 
41 |     # sidepath
42 |     if in_channels != out_channels:
43 |         side_path = Sequential([Convolution(filter_shape=(1, out_channels), filter_init=GaussianInit(0, 0.01), strides=1, dilation=1, padding='same', batch_norm=False)])
44 |     else:
45 |         side_path = None
46 | 
47 |     # combine both
48 |     return ResidualModule(main_path, side_path)
49 | 
50 | ## define tcn
51 | def tcn(n_features_in, hidden_sizes, kernel_size=7, dropout=0.2):
52 |     # loop and define multiple residual blocks
53 |     n_hidden_layers = len(hidden_sizes)
54 | 
55 |     layers = []
56 |     for i in range(n_hidden_layers):
57 |         dilation_size = 2 ** i
58 |         in_channels = n_features_in if i==0 else hidden_sizes[i-1]
59 |         out_channels = hidden_sizes[i]
60 |         layers += [residual_block(in_channels, out_channels, kernel_size, dilation=dilation_size, dropout=dropout), Rectlin()]
61 | 
62 |     # define model
63 |     model = Sequential(layers)
64 | 
65 |     return model
66 | 
67 | 


--------------------------------------------------------------------------------
/training/adding_problem/adding_with_tcn.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This script replicates some of the experiments run in the paper:
  3 | Bai, Shaojie, J. Zico Kolter, and Vladlen Koltun. "An Empirical Evaluation of Generic Convolutional and Recurrent Networks for Sequence Modeling." arXiv preprint arXiv:1803.01271 (2018).
  4 | for the synthetic "adding" data
  5 | To compare with the original implementation, run
  6 | python ./adding_with_tcn.py --batch_size 32 --dropout 0.0 --epochs 20 --ksize 6 --levels 7 --seq_len 200 --log_interval 100 --nhid 27 --lr 0.002 --save_plots --results_dir ./ -b gpu
  7 | python ./adding_with_tcn.py --batch_size 32 --dropout 0.0 --epochs 20 --ksize 7 --levels 7 --seq_len 400 --log_interval 100 --nhid 27 --lr 0.002 --save_plots --results_dir ./ -b gpu
  8 | python ./adding_with_tcn.py --batch_size 32 --dropout 0.0 --epochs 20 --ksize 8 --levels 8 --seq_len 600 --log_interval 100 --nhid 24 --lr 0.002 --save_plots --results_dir ./ -b gpu
  9 | python ./adding_with_tcn.py --batch_size 32 --dropout 0.0 --epochs 20 --levels 2 --seq_len 200 --nhid 77 --modeltype LSTM --grad_clip_value 50 --save_plots --lr 0.002 --results_dir ./ --log_interval 1000 -b gpu
 10 | python ./adding_with_tcn.py --batch_size 32 --dropout 0.0 --epochs 20 --levels 2 --seq_len 400 --nhid 77 --modeltype LSTM --grad_clip_value 50 --save_plots --lr 0.002 --results_dir ./ --log_interval 1000 -b gpu
 11 | python ./adding_with_tcn.py --batch_size 32 --dropout 0.0 --epochs 20 --levels 1 --seq_len 600 --nhid 130 --modeltype LSTM --grad_clip_value 5 --save_plots --lr 0.002 --results_dir ./ --log_interval 1000 -b gpu
 12 | """
 13 | from topologies.temporal_convolutional_network import tcn
 14 | from ngraph.frontends.neon.layer import Affine
 15 | from ngraph.frontends.neon import Identity, GaussianInit
 16 | from ngraph.frontends.neon import ArrayIterator, Sequential
 17 | import ngraph as ng
 18 | from ngraph.frontends.neon import Adam, GradientDescentMomentum, Layer
 19 | from training.timeseries_trainer import TimeseriesTrainer
 20 | from topologies import recurrent_model
 21 | import argparse
 22 | from datasets.adding import Adding
 23 | import os
 24 | from utils.arguments import default_argparser
 25 | 
 26 | parser = default_argparser()
 27 | parser.add_argument('--dropout', type=float, default=0.0,
 28 |                     help='dropout applied to layers (default: 0.0)')
 29 | parser.add_argument('--ksize', type=int, default=7,
 30 |                     help='kernel size (default: 7)')
 31 | parser.add_argument('--levels', type=int, default=8,
 32 |                     help='# of levels (default: 8)')
 33 | parser.add_argument('--lr', type=float, default=4e-3,
 34 |                     help='initial learning rate (default: 4e-3)')
 35 | parser.add_argument('--nhid', type=int, default=30,
 36 |                     help='number of hidden units per layer (default: 30)')
 37 | parser.add_argument('--grad_clip_value', type=float, default=None,
 38 |                     help='value to clip each element of gradient')
 39 | parser.add_argument('--modeltype', default='TCN', choices=['TCN', 'LSTM'],
 40 |                         help='type of model to use (TCN, LSTM)')
 41 | args = parser.parse_args()
 42 | 
 43 | 
 44 | n_features = 2
 45 | hidden_sizes = [args.nhid]*args.levels
 46 | kernel_size = args.ksize
 47 | n_classes = 1
 48 | dropout = 1.0 - args.dropout  # fraction to keep
 49 | seq_len = args.seq_len
 50 | n_train = 50000
 51 | n_val = 1000
 52 | batch_size = args.batch_size
 53 | n_epochs = args.epochs
 54 | num_iterations = int(n_train * n_epochs * 1.0 / batch_size)
 55 | 
 56 | 
 57 | adding_dataset = Adding(T=seq_len, n_train=n_train, n_test=n_val)
 58 | train_iterator = ArrayIterator(adding_dataset.train, batch_size, total_iterations=num_iterations, shuffle=True)
 59 | test_iterator = ArrayIterator(adding_dataset.test, batch_size)
 60 | 
 61 | # Name and create axes
 62 | batch_axis = ng.make_axis(length=batch_size, name="N")
 63 | time_axis = ng.make_axis(length=seq_len, name="REC")
 64 | feature_axis = ng.make_axis(length=n_features, name="F")
 65 | out_axis = ng.make_axis(length=n_classes, name="Fo")
 66 | 
 67 | in_axes = ng.make_axes([batch_axis, feature_axis, time_axis])
 68 | out_axes = ng.make_axes([batch_axis, out_axis])
 69 | 
 70 | # Build placeholders for the created axes
 71 | inputs = dict(X=ng.placeholder(in_axes), y=ng.placeholder(out_axes),
 72 |               iteration=ng.placeholder(axes=()))
 73 | 
 74 | # define model
 75 | if args.modeltype == "TCN":
 76 |      # take only the last timepoint of output sequence to predict sum
 77 |     last_timepoint = [lambda op: ng.tensor_slice(op, [slice(seq_len-1, seq_len, 1) if ax.name == "W" else slice(None) for ax in op.axes])]
 78 |     affine_layer = Affine(axes=out_axis, weight_init=GaussianInit(0, 0.01), activation=Identity())
 79 | 
 80 |     model = Sequential([lambda op: ng.map_roles(op, {'REC': 'W', 'F': 'C'})] + tcn(n_features, hidden_sizes, kernel_size=kernel_size, dropout=dropout).layers + last_timepoint + [affine_layer])
 81 | elif args.modeltype == "LSTM":
 82 |     model = recurrent_model.define_model(out_axis, celltype=args.modeltype, recurrent_units=hidden_sizes, return_sequence=False)
 83 | 
 84 | # Optimizer
 85 | if args.modeltype == "TCN":
 86 |     optimizer = Adam(learning_rate=args.lr, gradient_clip_value=args.grad_clip_value)
 87 | else:
 88 |     optimizer = GradientDescentMomentum(learning_rate=args.lr, gradient_clip_value=args.grad_clip_value)
 89 | 
 90 | # Define the loss function (squared L2 loss)
 91 | fwd_prop = model(inputs['X'])
 92 | train_loss = ng.squared_L2(fwd_prop - inputs['y'])
 93 | with Layer.inference_mode_on():
 94 |     preds = model(inputs['X'])
 95 |     preds = ng.axes_with_order(preds, out_axes)
 96 | eval_loss = ng.mean(ng.squared_L2(preds - inputs['y']), out_axes=())
 97 | eval_computation = ng.computation([eval_loss], "all")
 98 | predict_computation = ng.computation([preds], "all")
 99 | 
100 | 
101 | # Cost calculation
102 | batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())])
103 | train_computation = ng.computation(batch_cost, "all")
104 | 
105 | trainer = TimeseriesTrainer(optimizer, train_computation, eval_computation, predict_computation, inputs, model_graph=[model], tensorboard_dir="./tfboard")
106 | trainer.summary()
107 | 
108 | out_folder = os.path.join(args.results_dir, "results-adding-{}-modeltype-{}-batch_size-{}-dropout-{}-ksize-{}-levels-{}-seq_len-{}-nhid".format(args.modeltype, batch_size, args.dropout, kernel_size, args.levels, seq_len, args.nhid))
109 | if not os.path.exists(out_folder):
110 |     os.mkdir(out_folder)
111 | trainer.train(train_iterator, test_iterator, n_epochs=args.epochs, log_interval=args.log_interval, save_plots=args.save_plots, results_dir=out_folder)
112 | 


--------------------------------------------------------------------------------
/training/music_forecasting/music_forecasting_with_tcn.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This script replicates some of the experiments run in the paper:
  3 | Bai, Shaojie, J. Zico Kolter, and Vladlen Koltun. "An Empirical Evaluation of Generic Convolutional and Recurrent Networks for Sequence Modeling." arXiv preprint arXiv:1803.01271 (2018).
  4 | for music data
  5 | To compare with the original implementation, run
  6 | python ./music_forecasting_with_tcn.py --batch_size 32 --dropout 0.5 --epochs 2000 --ksize 3 --levels 2 --seq_len 100 --log_interval 2 --nhid 150 --lr 0.002 --grad_clip_value 0.4 --save_plots --results_dir ./ -b gpu
  7 | python ./music_forecasting_with_tcn.py --batch_size 32 --dropout 0.2 --epochs 2000 --levels 2 --seq_len 100 --log_interval 2 --nhid 200 --lr 0.002 --grad_clip_value 1 --save_plots --results_dir ./ -b gpu --modeltype LSTM
  8 | 
  9 | python ./music_forecasting_with_tcn.py --dataset Nott --batch_size 32 --dropout 0.2 --epochs 2000 --ksize 6 --levels 4 --seq_len 250 --log_interval 2 --nhid 150 --lr 0.002 --grad_clip_value 0.4 --save_plots --results_dir ./ -b gpu
 10 | python ./music_forecasting_with_tcn.py --dataset Nott --batch_size 32 --dropout 0.1 --epochs 2000 --levels 3 --seq_len 250 --log_interval 2 --nhid 280 --lr 0.004 --grad_clip_value 0.5 --save_plots --results_dir ./ -b gpu --modeltype LSTM
 11 | """
 12 | from topologies.temporal_convolutional_network import tcn
 13 | from ngraph.frontends.neon import ArrayIterator
 14 | import ngraph as ng
 15 | from ngraph.frontends.neon import Adam, GradientDescentMomentum, Layer, Affine, Logistic, GaussianInit, Sequential
 16 | from topologies import recurrent_model
 17 | from training.timeseries_trainer import TimeseriesTrainer
 18 | from datasets.music import Music
 19 | import os
 20 | from utils.arguments import default_argparser
 21 | 
 22 | parser = default_argparser()
 23 | parser.add_argument('--datadir', type=str, default="../data/",
 24 |                     help='dir to download data if not already present')
 25 | parser.add_argument('--dropout', type=float, default=0.0,
 26 |                     help='dropout applied to layers (default: 0.0)')
 27 | parser.add_argument('--ksize', type=int, default=7,
 28 |                     help='kernel size (default: 7)')
 29 | parser.add_argument('--levels', type=int, default=8,
 30 |                     help='# of levels (default: 8)')
 31 | parser.add_argument('--lr', type=float, default=4e-3,
 32 |                     help='initial learning rate (default: 4e-3)')
 33 | parser.add_argument('--nhid', type=int, default=30,
 34 |                     help='number of hidden units per layer (default: 30)')
 35 | parser.add_argument('--grad_clip_value', type=float, default=None,
 36 |                     help='value to clip each element of gradient')
 37 | parser.add_argument('--modeltype', default='TCN', choices=['TCN', 'LSTM'],
 38 |                         help='type of model to use (TCN, LSTM)')
 39 | parser.add_argument('--dataset', default='JSB', choices=['JSB', 'Nott'],
 40 |                         help='type of data to use (JSB, Nott)')
 41 | args = parser.parse_args()
 42 | 
 43 | 
 44 | hidden_sizes = [args.nhid]*args.levels
 45 | kernel_size = args.ksize
 46 | dropout = 1 - args.dropout # amount to keep
 47 | seq_len = args.seq_len
 48 | batch_size = args.batch_size
 49 | n_epochs = args.epochs
 50 | 
 51 | music_dataset = Music(data_dir=args.datadir, seq_len=seq_len, dataset=args.dataset)
 52 | seq_len = music_dataset.seq_len
 53 | n_train = music_dataset.train['X']['data'].shape[0]
 54 | num_iterations = int(n_train * n_epochs * 1.0 / batch_size)
 55 | n_features = music_dataset.train['X']['data'].shape[2]
 56 | 
 57 | train_iterator = ArrayIterator(music_dataset.train, batch_size, total_iterations=num_iterations, shuffle=True)
 58 | test_iterator = ArrayIterator(music_dataset.test, batch_size)
 59 | 
 60 | 
 61 | # Name and create axes
 62 | batch_axis = ng.make_axis(length=batch_size, name="N")
 63 | time_axis = ng.make_axis(length=seq_len, name="REC")
 64 | feature_axis = ng.make_axis(length=n_features, name="F")
 65 | out_axis = ng.make_axis(length=n_features, name="Fo")
 66 | 
 67 | in_axes = ng.make_axes([batch_axis, time_axis, feature_axis])
 68 | out_axes = ng.make_axes([batch_axis, time_axis, out_axis])
 69 | 
 70 | # Build placeholders for the created axes
 71 | inputs = dict(X=ng.placeholder(in_axes), y=ng.placeholder(out_axes),
 72 |               iteration=ng.placeholder(axes=()))
 73 | 
 74 | # define model
 75 | if args.modeltype == "TCN":
 76 |     affine_layer = Affine(axes=out_axis, weight_init=GaussianInit(0, 0.01), activation=Logistic())
 77 |     model = Sequential([lambda op: ng.map_roles(op, {'F': 'C', 'REC': 'W'})] + tcn(n_features, hidden_sizes, kernel_size=kernel_size, dropout=dropout).layers + [lambda op: ng.map_roles(op, {'C': 'F', 'W': 'REC'})] + [affine_layer])
 78 | elif args.modeltype == "LSTM":
 79 |     model = Sequential(recurrent_model.define_model(out_axis, celltype=args.modeltype, recurrent_units=hidden_sizes, return_sequence=True).layers + [Logistic()])
 80 | 
 81 | # Optimizer
 82 | if args.modeltype == "TCN":
 83 |     optimizer = Adam(learning_rate=args.lr, gradient_clip_value=args.grad_clip_value)
 84 | else:
 85 |     optimizer = GradientDescentMomentum(learning_rate=args.lr, gradient_clip_value=args.grad_clip_value)
 86 | 
 87 | # Define the loss function (categorical cross entropy, since each musical key on the piano is encoded as a binary value)
 88 | fwd_prop = model(inputs['X'])
 89 | fwd_prop = ng.axes_with_order(fwd_prop, out_axes)
 90 | train_loss = ng.cross_entropy_binary(fwd_prop, inputs['y'])
 91 | 
 92 | with Layer.inference_mode_on():
 93 |     preds = model(inputs['X'])
 94 |     preds = ng.axes_with_order(preds, out_axes)
 95 | eval_loss = ng.mean(ng.cross_entropy_binary(preds, inputs['y']), out_axes=())
 96 | eval_computation = ng.computation([eval_loss], "all")
 97 | predict_computation = ng.computation([preds], "all")
 98 | 
 99 | 
100 | # Cost calculation
101 | batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())])
102 | train_computation = ng.computation(batch_cost, "all")
103 | 
104 | trainer = TimeseriesTrainer(optimizer, train_computation, eval_computation, predict_computation, inputs, model_graph=[model], tensorboard_dir="./tfboard")
105 | trainer.summary()
106 | 
107 | out_folder = os.path.join(args.results_dir, "results-music-{}-dataset-{}-modeltype-{}-batch_size-{}-dropout-{}-ksize-{}-levels-{}-seq_len-{}-nhid".format(args.dataset, args.modeltype, batch_size, args.dropout, kernel_size, args.levels, seq_len, args.nhid))
108 | if not os.path.exists(out_folder):
109 |     os.mkdir(out_folder)
110 | 
111 | trainer.train(train_iterator, test_iterator, n_epochs=args.epochs, log_interval=args.log_interval, save_plots=args.save_plots, results_dir=out_folder)
112 | 


--------------------------------------------------------------------------------
/training/predictive_maintenance/images/eval_loss_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NervanaSystems/aidc-2018-timeseries/df0d25a258ced93b23755f7e62a3f96d73963fe3/training/predictive_maintenance/images/eval_loss_plot.png


--------------------------------------------------------------------------------
/training/predictive_maintenance/images/preds_validation_output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NervanaSystems/aidc-2018-timeseries/df0d25a258ced93b23755f7e62a3f96d73963fe3/training/predictive_maintenance/images/preds_validation_output.png


--------------------------------------------------------------------------------
/training/predictive_maintenance/turbofan_autoencoder.py:
--------------------------------------------------------------------------------
  1 | from datasets.turbofan import TurboFan
  2 | import ngraph as ng
  3 | from ngraph.frontends.neon import Layer, Affine, Identity
  4 | from ngraph.frontends.neon import GlorotInit, RMSProp
  5 | from ngraph.frontends.neon.layer import get_steps
  6 | from ngraph.frontends.neon import ArrayIterator
  7 | from utils.arguments import default_argparser
  8 | import os
  9 | from topologies import recurrent_model
 10 | from training.timeseries_trainer import TimeseriesTrainer
 11 | import numpy as np
 12 | 
 13 | 
 14 | parser = default_argparser()
 15 | parser.add_argument('--modeltype', default='LSTM', choices=['RNN', 'LSTM'],
 16 |                     help='type of model to use (RNN, LSTM)')
 17 | parser.add_argument('--skip', default=1, type=int, help='skip length for sliding window')
 18 | parser.add_argument('--n_hidden', default="128,256", type=str, help='hidden layers sizes in the encoder')
 19 | parser.add_argument('--bottleneck', default=False, action='store_true',
 20 |                     help='whether to use a bottleneck in the encoder-decoder model.')
 21 | parser.add_argument('--backward', default=False, action='store_true',
 22 |                     help='whether to reverse the target sequence in the autoencoder')
 23 | parser.add_argument('--lr', type=float, default=4e-3,
 24 |                     help='initial learning rate (default: 4e-3)')
 25 | parser.add_argument('--grad_clip_value', type=float, default=None,
 26 |                     help='value to clip each element of gradient')
 27 | parser.add_argument('--tensorboard_dir', type=str, default='./tensorboard',
 28 |                     help='directory to save tensorboard summary to')
 29 | args = parser.parse_args()
 30 | if args.predict_seq:
 31 |     raise ValueError("predict sequence is not available for turbofan use case")
 32 | 
 33 | if not os.path.exists(args.results_dir):
 34 |     os.mkdir(args.results_dir)
 35 | 
 36 | # Plot the inference / generation results
 37 | if args.save_plots:
 38 |     try:
 39 |         import matplotlib
 40 |         matplotlib.use('Agg')
 41 |         import matplotlib.pyplot as plt
 42 |     except ImportError:
 43 |         args.save_plots = False
 44 | 
 45 | # Define initialization
 46 | init_uni = GlorotInit()
 47 | 
 48 | batch_size = args.batch_size
 49 | seq_len = args.seq_len
 50 | no_epochs = args.epochs
 51 | output_dim = 1
 52 | 
 53 | dataset = TurboFan(data_dir="../../data/", T=args.seq_len, skip=args.skip, max_rul_predictable=130)
 54 | feature_dim = dataset.n_features
 55 | 
 56 | 
 57 | if args.save_plots:
 58 |     dataset.plot_sample(args.results_dir, trajectory_id=10)
 59 | 
 60 | # Build input data iterables
 61 | # Yields an input array of Shape (batch_size, seq_len, input_feature_dim)
 62 | train_samples = len(dataset.train['X']['data'])
 63 | num_iterations_per_epoch = train_samples // batch_size
 64 | num_iterations = (no_epochs * train_samples) // batch_size
 65 | 
 66 | # Name and create axes
 67 | batch_axis = ng.make_axis(length=batch_size, name="N")
 68 | time_axis = ng.make_axis(length=seq_len, name="REC")
 69 | feature_axis = ng.make_axis(length=feature_dim, name="F")
 70 | out_axis = ng.make_axis(length=1, name="Fo")
 71 | 
 72 | in_axes = ng.make_axes([batch_axis, time_axis, feature_axis])
 73 | rul_axes = ng.make_axes([batch_axis, out_axis])
 74 | 
 75 | # Build placeholders for the created axes
 76 | inputs = dict(X=ng.placeholder(in_axes),
 77 |               y=ng.placeholder(rul_axes))
 78 | 
 79 | Xs = get_steps(inputs['X'], time_axis)
 80 | if args.backward:
 81 |     target_steps = Xs[::-1]
 82 |     target = ng.stack(target_steps, time_axis)
 83 | else:
 84 |     target_steps = Xs
 85 |     target = inputs['X']
 86 | 
 87 | previous_steps = [ng.constant(0., [batch_axis, feature_axis])] + [target_steps[i] for i in range(seq_len - 1)]
 88 | previous = ng.stack(previous_steps, time_axis)
 89 | 
 90 | # define model
 91 | encoder_recurrent_units = list(map(int, args.n_hidden.split(",")))
 92 | if args.bottleneck:
 93 |     decoder_recurrent_units = encoder_recurrent_units[::-1]
 94 | else:
 95 |     decoder_recurrent_units = encoder_recurrent_units
 96 | encoder = recurrent_model.RecurrentEncoder(celltype=args.modeltype,
 97 |                                            recurrent_units=encoder_recurrent_units,
 98 |                                            bottleneck=args.bottleneck)
 99 | decoder = recurrent_model.RecurrentDecoder(out_axes=(feature_axis,), celltype=args.modeltype,
100 |                                            recurrent_units=decoder_recurrent_units)
101 | 
102 | affine_layer = Affine(weight_init=init_uni, bias_init=init_uni, activation=Identity(),
103 |                       axes=[out_axis])
104 | 
105 | # Optimizer
106 | optimizer = RMSProp(gradient_clip_value=args.grad_clip_value, learning_rate=args.lr)
107 | 
108 | 
109 | def predictions(encoder, affine_layer, inputs):
110 |     encoded = encoder(inputs, combine=True)
111 |     preds = affine_layer(encoded)
112 |     preds = ng.axes_with_order(preds, rul_axes)
113 |     return preds
114 | 
115 | 
116 | def build_seq2seq_computations():
117 |     # Training loss, optimizer
118 |     train_decoded = recurrent_model.encode_and_decode(encoder, decoder,
119 |                                                       inputs['X'], previous)
120 |     train_loss = ng.squared_L2(target - train_decoded)
121 |     batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())])
122 |     train_computation = ng.computation(batch_cost, "all")
123 | 
124 |     # Evaluation loss
125 |     with Layer.inference_mode_on():
126 |         eval_decoded = recurrent_model.encode_and_generate(encoder, decoder, inputs['X'], in_axes)
127 |         eval_loss = ng.mean(ng.squared_L2(target - eval_decoded), out_axes=())
128 |     loss_computation = ng.computation([eval_loss], "all")
129 |     return train_computation, loss_computation
130 | 
131 | 
132 | def build_regressor_computations():
133 |     train_preds = predictions(encoder, affine_layer, inputs['X'])
134 |     train_loss = ng.squared_L2(train_preds - inputs['y'])
135 | 
136 |     # Cost calculation
137 |     batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())])
138 |     train_computation = ng.computation(batch_cost, "all")
139 | 
140 |     with Layer.inference_mode_on():
141 |         eval_preds = predictions(encoder, affine_layer, inputs['X'])
142 |         eval_loss = ng.mean(ng.squared_L2(eval_preds - inputs['y']), out_axes=())
143 |     loss_computation = ng.computation([eval_loss], "all")
144 | 
145 |     return train_computation, loss_computation
146 | 
147 | 
148 | def build_generator_computation():
149 |     with Layer.inference_mode_on():
150 |         generated = recurrent_model.encode_and_generate(encoder, decoder, inputs['X'], in_axes)
151 |     return ng.computation([generated], "all")
152 | 
153 | def build_regressor_prediction():
154 |     with Layer.inference_mode_on():
155 |         eval_preds = predictions(encoder, affine_layer, inputs['X'])
156 |     return ng.computation([eval_preds], "all")
157 | 
158 | def plot_generated(trainer):
159 |     # Get a batch from the train set
160 |     train_set_one_epoch = ArrayIterator(dataset.train, batch_size, shuffle=False)
161 |     gen_series = trainer.predict(train_set_one_epoch, num_batches=1)
162 |     train_set_one_epoch.reset()
163 | 
164 |     # Get an example from the batch
165 |     gen_series = gen_series[4]
166 | 
167 |     if args.backward:
168 |         # If args.backward is set, the autoencoder would have produced the input sequence in reverse.
169 |         # We flip it again to match the true series
170 |         gen_series = gen_series[::-1, :]
171 | 
172 |     true_series = next(train_set_one_epoch)['X'][4]
173 | 
174 |     # Plot the true and generated values of each series
175 |     ncols = int(np.ceil((dataset.n_sensors + dataset.n_operating_modes) * 1.0 // 3))
176 |     fig, ax = plt.subplots(ncols, 3)
177 |     fig.set_figheight(20)
178 |     fig.set_figwidth(10)
179 | 
180 |     for i in range(dataset.n_operating_modes):
181 |         plt.subplot(ncols, 3, i + 1)
182 |         if i == 0:
183 |             plt.plot(true_series[:, i], label="true", color="blue")
184 |         else:
185 |             plt.plot(true_series[:, i], color="blue")
186 |         if i == 0:
187 |             plt.plot(gen_series[:, i], label="gen", color="red")
188 |         else:
189 |             plt.plot(gen_series[:, i], color="red")
190 |         plt.title("Operating mode {}".format(i + 1))
191 | 
192 |     for i in range(dataset.n_sensors):
193 |         plt.subplot(ncols, 3, dataset.n_operating_modes + i + 1)
194 |         plt.plot(true_series[:, dataset.n_operating_modes + i], color="blue")
195 |         plt.plot(gen_series[:, dataset.n_operating_modes + i], color="red")
196 |         plt.title("Sensor {}".format(i + 1))
197 |     fig.legend()
198 | 
199 |     plt.tight_layout()
200 |     fig.savefig(os.path.join(args.results_dir, "generated_series.png"))
201 | 
202 | 
203 | train_set = ArrayIterator(dataset.train, batch_size, total_iterations=num_iterations, shuffle=True)
204 | test_set = ArrayIterator(dataset.test, batch_size)
205 | train_set_one_epoch = ArrayIterator(dataset.train, batch_size, shuffle=False)
206 | 
207 | seq2seq_train_computation, seq2seq_loss_computation = build_seq2seq_computations()
208 | seq2seq_gen_sequence_computation = build_generator_computation()
209 | 
210 | regressor_train_computation, regressor_loss_computation = build_regressor_computations()
211 | regressor_predictions = build_regressor_prediction()
212 | 
213 | ae_trainer = TimeseriesTrainer(optimizer, seq2seq_train_computation, seq2seq_loss_computation, seq2seq_gen_sequence_computation, inputs,
214 |                                model_graph=[encoder, decoder])
215 | ae_trainer.summary()
216 | ae_trainer.train(train_set, test_set, n_epochs=args.epochs/50, log_interval=args.log_interval, save_plots=True, results_dir=args.results_dir)
217 | 
218 | if args.save_plots:
219 |     plot_generated(ae_trainer)
220 | 
221 | print('Start training the regression model')
222 | reg_trainer = TimeseriesTrainer(optimizer, regressor_train_computation, regressor_loss_computation, regressor_predictions, inputs, model_graph=[encoder, decoder], tensorboard_dir=args.tensorboard_dir)
223 | reg_trainer.train(train_set, test_set, n_epochs=args.epochs, log_interval=args.log_interval, save_plots=True, results_dir=args.results_dir)
224 | 


--------------------------------------------------------------------------------
/training/predictive_maintenance/turbofan_baselines.py:
--------------------------------------------------------------------------------
  1 | """
  2 | To run this script, use the command
  3 | python ./turbofan_baselines.py --batch_size 512 --seq_len 100 --modeltype LSTM --n_hidden 75,75 --epochs 200 --log_interval 100 --lr 0.002 --grad_clip_value 0.4 --save_plots --results_dir ./ -b gpu
  4 | """
  5 | from datasets.turbofan import TurboFan
  6 | import ngraph as ng
  7 | from ngraph.frontends.neon import Layer
  8 | from ngraph.frontends.neon import GlorotInit, RMSProp, Sequential, Rectlin
  9 | from ngraph.frontends.neon import ArrayIterator
 10 | from utils.arguments import default_argparser
 11 | import os
 12 | from topologies import recurrent_model, convolutional_model
 13 | from training.timeseries_trainer import TimeseriesTrainer
 14 | 
 15 | parser = default_argparser()
 16 | parser.add_argument('--modeltype', default='LSTM', choices=['RNN', 'CNN', 'LSTM'],
 17 |                         help='type of model to use (RNN, CNN, LSTM)')
 18 | parser.add_argument('--skip', default=1, type=int, help='skip length for sliding window')
 19 | parser.add_argument('--n_hidden', default="128,256", type=str, help='hidden layers sizes')
 20 | parser.add_argument('--filter_shape', default="3,3", type=str, help='filter shape for cnn')
 21 | parser.add_argument('--lr', type=float, default=4e-3,
 22 |                     help='initial learning rate (default: 4e-3)')
 23 | parser.add_argument('--grad_clip_value', type=float, default=None,
 24 |                     help='value to clip each element of gradient')
 25 | parser.add_argument('--tensorboard_dir', type=str, default='./tensorboard',
 26 |                     help='directory to save tensorboard summary to')
 27 | args = parser.parse_args()
 28 | if args.predict_seq:
 29 |     raise ValueError("predict sequence is not available for turbofan use case")
 30 | 
 31 | if not os.path.exists(args.results_dir):
 32 |     os.mkdir(args.results_dir)
 33 | 
 34 | from datetime import datetime
 35 | out_folder = os.path.join(args.results_dir, "results-turbofan-LSTM-{}".format(datetime.strftime(datetime.now(), "%Y-%m-%d_%H%M%S")))
 36 | if not os.path.exists(out_folder):
 37 |     os.mkdir(out_folder)
 38 | 
 39 | # Plot the inference / generation results
 40 | if args.save_plots:
 41 |     try:
 42 |         import matplotlib
 43 |         matplotlib.use('Agg')
 44 |         import matplotlib.pyplot as plt
 45 |     except ImportError:
 46 |         args.save_plots = False
 47 | 
 48 | # Define initialization
 49 | init_uni = GlorotInit()
 50 | 
 51 | batch_size = args.batch_size
 52 | seq_len = args.seq_len
 53 | no_epochs = args.epochs
 54 | output_dim = 1
 55 | 
 56 | dataset = TurboFan(data_dir="../../data/", T=args.seq_len, skip=args.skip, max_rul_predictable=130)
 57 | feature_dim = dataset.n_features
 58 | 
 59 | if args.save_plots:
 60 |     dataset.plot_sample(out_folder, trajectory_id=10)
 61 | 
 62 | # Build input data iterables
 63 | # Yields an input array of Shape (batch_size, seq_len, input_feature_dim)
 64 | train_samples = len(dataset.train['X']['data'])
 65 | num_iterations = (no_epochs * train_samples) // batch_size
 66 | 
 67 | train_set = ArrayIterator(dataset.train, batch_size, total_iterations=num_iterations, shuffle=True)
 68 | train_set_one_epoch = ArrayIterator(dataset.train, batch_size, shuffle=False)
 69 | test_set = ArrayIterator(dataset.test, batch_size)
 70 | 
 71 | # Name and create axes
 72 | batch_axis = ng.make_axis(length=batch_size, name="N")
 73 | time_axis = ng.make_axis(length=seq_len, name="REC")
 74 | feature_axis = ng.make_axis(length=feature_dim, name="F")
 75 | out_axis = ng.make_axis(length=output_dim, name="Fo")
 76 | 
 77 | in_axes = ng.make_axes([batch_axis, time_axis, feature_axis])
 78 | out_axes = ng.make_axes([batch_axis, out_axis])
 79 | 
 80 | # Build placeholders for the created axes
 81 | inputs = dict(X=ng.placeholder(in_axes), y=ng.placeholder(out_axes),
 82 |               iteration=ng.placeholder(axes=()))
 83 | preds_inputs = dict(X=inputs['X'])
 84 | 
 85 | # define model
 86 | n_hidden = list(map(int, args.n_hidden.split(",")))
 87 | filter_shape = list(map(int, args.filter_shape.split(",")))
 88 | if args.modeltype in ["RNN", "LSTM"]:
 89 |     seq1 = Sequential(recurrent_model.define_model(out_axis, celltype=args.modeltype, recurrent_units=n_hidden, return_sequence=args.predict_seq).layers + [Rectlin()])
 90 | elif args.modeltype == "CNN":
 91 |     seq1 = convolutional_model.define_model(out_axis, filter_shapes=filter_shape, n_filters=n_hidden)
 92 |     layers_modified = [lambda op: ng.map_roles(op, {'REC': 'W', 'F': 'C'})] + seq1.layers + [Rectlin()]
 93 |     seq1 = Sequential(layers_modified)
 94 | 
 95 | # Optimizer
 96 | optimizer = RMSProp(learning_rate=args.lr, gradient_clip_value=args.grad_clip_value)
 97 | 
 98 | # Define the loss function (squared L2 loss)
 99 | fwd_prop = seq1(inputs['X'])
100 | train_loss = ng.squared_L2(fwd_prop - inputs['y'])
101 | 
102 | # Cost calculation
103 | batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())])
104 | train_computation = ng.computation(batch_cost, "all")
105 | 
106 | # Forward prop of test set
107 | # Required for correct functioning of batch norm and dropout layers during inference mode
108 | with Layer.inference_mode_on():
109 |     preds = seq1(inputs['X'])
110 |     preds = ng.axes_with_order(preds, out_axes)
111 | eval_loss = ng.mean(ng.squared_L2(preds - inputs['y']), out_axes=())
112 | eval_computation = ng.computation([eval_loss], "all")
113 | predict_computation = ng.computation([preds], "all")
114 | 
115 | trainer = TimeseriesTrainer(optimizer, train_computation, eval_computation, predict_computation, inputs, model_graph=[seq1],
116 |                             tensorboard_dir=args.tensorboard_dir)
117 | trainer.summary()
118 | 
119 | 
120 | print("Starting training")
121 | trainer.train(train_set, test_set, n_epochs=args.epochs, log_interval=args.log_interval, save_plots=args.save_plots, results_dir=out_folder)
122 | 
123 | 
124 | if args.save_plots:
125 |     # Compute the predictions on the training and test sets for visualization
126 |     train_preds = trainer.predict(train_set_one_epoch)
127 |     train_target = dataset.train['y']['data']
128 | 
129 |     test_preds = trainer.predict(test_set)
130 |     test_target = dataset.test['y']['data']
131 | 
132 |     # Visualize the model's predictions on the training and test sets
133 |     plt.figure()
134 |     plt.scatter(train_preds[:, 0], train_target[:, 0])
135 |     plt.xlabel('Training Predictions')
136 |     plt.ylabel('Training Targets')
137 |     plt.title('Predictions on training set')
138 |     plt.savefig(os.path.join(out_folder, 'preds_training_output.png'))
139 | 
140 |     plt.figure()
141 |     plt.scatter(test_preds[:, 0], test_target[:, 0])
142 |     plt.xlabel('Validation Predictions')
143 |     plt.ylabel('Validation Targets')
144 |     plt.title('Predictions on validation set')
145 |     plt.savefig(os.path.join(out_folder, 'preds_validation_output.png'))
146 | 


--------------------------------------------------------------------------------
/training/predictive_maintenance/turbofan_with_tcn.py:
--------------------------------------------------------------------------------
  1 | """"
  2 | To run this script, use the command:
  3 | python ./turbofan_with_tcn.py --batch_size 128 --dropout 0.1 --ksize 4 --levels 4 --seq_len 50 --log_interval 100 --nhid 50 --lr 0.002 --grad_clip_value 0.4 --save_plots --epochs 200 --results_dir ./ -b gpu
  4 | """
  5 | from topologies.temporal_convolutional_network import tcn
  6 | from ngraph.frontends.neon import ArrayIterator
  7 | import ngraph as ng
  8 | from ngraph.frontends.neon import Adam, Layer, Affine, Identity, GaussianInit, Sequential, Rectlin
  9 | from training.timeseries_trainer import TimeseriesTrainer
 10 | from datasets.turbofan import TurboFan
 11 | import os
 12 | from utils.arguments import default_argparser
 13 | 
 14 | parser = default_argparser()
 15 | parser.add_argument('--skip', default=1, type=int, help='skip length for sliding window')
 16 | parser.add_argument('--datadir', type=str, default="../data/",
 17 |                     help='dir to download data if not already present')
 18 | parser.add_argument('--dropout', type=float, default=0.0,
 19 |                     help='dropout applied to layers (default: 0.0)')
 20 | parser.add_argument('--ksize', type=int, default=7,
 21 |                     help='kernel size (default: 7)')
 22 | parser.add_argument('--levels', type=int, default=8,
 23 |                     help='# of levels (default: 8)')
 24 | parser.add_argument('--lr', type=float, default=4e-3,
 25 |                     help='initial learning rate (default: 4e-3)')
 26 | parser.add_argument('--nhid', type=int, default=30,
 27 |                     help='number of hidden units per layer (default: 30)')
 28 | parser.add_argument('--grad_clip_value', type=float, default=None,
 29 |                     help='value to clip each element of gradient')
 30 | parser.add_argument('--tensorboard_dir', type=str, default='./tensorboard',
 31 |                     help='directory to save tensorboard summary to')
 32 | args = parser.parse_args()
 33 | 
 34 | if args.save_plots:
 35 |     try:
 36 |         import matplotlib
 37 |         matplotlib.use('Agg')
 38 |         import matplotlib.pyplot as plt
 39 |     except ImportError:
 40 |         args.save_plots = False
 41 | 
 42 | hidden_sizes = [args.nhid]*args.levels
 43 | kernel_size = args.ksize
 44 | dropout = 1 - args.dropout # amount to keep
 45 | seq_len = args.seq_len
 46 | batch_size = args.batch_size
 47 | n_epochs = args.epochs
 48 | 
 49 | receptive_field_last_t = (kernel_size - 1) * (2 ** args.levels - 1)  # receptive field of last time-point
 50 | 
 51 | if seq_len - receptive_field_last_t > 5:
 52 |     print("WARNING: Given these parameters, the last time-point's receptive field does not cover the entire sequence length")
 53 |     print("Difference in coverage = %d time-points" % (seq_len - receptive_field_last_t))
 54 | 
 55 | turbofan_dataset = TurboFan(data_dir=args.datadir, T=seq_len, skip=args.skip, max_rul_predictable=130)
 56 | train_samples = len(turbofan_dataset.train['X']['data'])
 57 | num_iterations = (n_epochs * train_samples) // batch_size
 58 | n_features = turbofan_dataset.train['X']['data'].shape[2]
 59 | n_output_features = 1
 60 | 
 61 | train_iterator = ArrayIterator(turbofan_dataset.train, batch_size, total_iterations=num_iterations, shuffle=True)
 62 | test_iterator = ArrayIterator(turbofan_dataset.test, batch_size)
 63 | train_set_one_epoch = ArrayIterator(turbofan_dataset.train, batch_size, shuffle=False)
 64 | 
 65 | # Name and create axes
 66 | batch_axis = ng.make_axis(length=batch_size, name="N")
 67 | time_axis = ng.make_axis(length=seq_len, name="REC")
 68 | feature_axis = ng.make_axis(length=n_features, name="F")
 69 | out_axis = ng.make_axis(length=n_output_features, name="Fo")
 70 | 
 71 | in_axes = ng.make_axes([batch_axis, time_axis, feature_axis])
 72 | out_axes = ng.make_axes([batch_axis, out_axis])
 73 | 
 74 | # Build placeholders for the created axes
 75 | inputs = dict(X=ng.placeholder(in_axes), y=ng.placeholder(out_axes),
 76 |               iteration=ng.placeholder(axes=()))
 77 | 
 78 | # take only the last timepoint of output sequence to predict RUL
 79 | last_timepoint = [lambda op: ng.tensor_slice(op, [slice(seq_len-1, seq_len, 1) if ax.name == "W" else slice(None) for ax in op.axes])]
 80 | affine_layer = Affine(axes=out_axis, weight_init=GaussianInit(0, 0.01), activation=Rectlin())
 81 | model = Sequential([lambda op: ng.map_roles(op, {'F': 'C', 'REC': 'W'})] + tcn(n_features, hidden_sizes, kernel_size=kernel_size, dropout=dropout).layers + last_timepoint + [affine_layer])
 82 | 
 83 | 
 84 | # Optimizer
 85 | optimizer = Adam(learning_rate=args.lr, gradient_clip_value=args.grad_clip_value)
 86 | 
 87 | # Define the loss function (categorical cross entropy, since each musical key on the piano is encoded as a binary value)
 88 | fwd_prop = model(inputs['X'])
 89 | fwd_prop = ng.axes_with_order(fwd_prop, out_axes)
 90 | train_loss = ng.squared_L2(fwd_prop - inputs['y'])
 91 | with Layer.inference_mode_on():
 92 |     preds = model(inputs['X'])
 93 |     preds = ng.axes_with_order(preds, out_axes)
 94 | eval_loss = ng.mean(ng.squared_L2(preds - inputs['y']), out_axes=())
 95 | eval_computation = ng.computation([eval_loss], "all")
 96 | predict_computation = ng.computation([preds], "all")
 97 | 
 98 | 
 99 | # Cost calculation
100 | batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())])
101 | train_computation = ng.computation(batch_cost, "all")
102 | 
103 | trainer = TimeseriesTrainer(optimizer, train_computation, eval_computation, predict_computation, inputs,
104 |                             model_graph=[model], tensorboard_dir=args.tensorboard_dir)
105 | trainer.summary()
106 | 
107 | out_folder = os.path.join(args.results_dir, "results-turbofan-{}-batch_size-{}-dropout-{}-ksize-{}-levels-{}-seq_len-{}-nhid".format(batch_size, args.dropout, kernel_size, args.levels, seq_len, args.nhid))
108 | if not os.path.exists(out_folder):
109 |     os.mkdir(out_folder)
110 | trainer.train(train_iterator, test_iterator, n_epochs=args.epochs, log_interval=args.log_interval, save_plots=args.save_plots, results_dir=out_folder)
111 | 
112 | 
113 | if args.save_plots:
114 |     # Compute the predictions on the training and test sets for visualization
115 |     train_preds = trainer.predict(train_set_one_epoch)
116 |     train_target = turbofan_dataset.train['y']['data']
117 | 
118 |     test_preds = trainer.predict(test_iterator)
119 |     test_target = turbofan_dataset.test['y']['data']
120 | 
121 |     # Visualize the model's predictions on the training and test sets
122 |     plt.figure()
123 |     plt.scatter(train_preds[:, 0], train_target[:, 0])
124 |     plt.xlabel('Training Predictions')
125 |     plt.ylabel('Training Targets')
126 |     plt.title('Predictions on training set')
127 |     plt.savefig(os.path.join(out_folder, 'preds_training_output.png'))
128 | 
129 |     plt.figure()
130 |     plt.scatter(test_preds[:, 0], test_target[:, 0])
131 |     plt.xlabel('Validation Predictions')
132 |     plt.ylabel('Validation Targets')
133 |     plt.title('Predictions on validation set')
134 |     plt.savefig(os.path.join(out_folder, 'preds_validation_output.png'))
135 | 


--------------------------------------------------------------------------------
/training/timeseries_trainer.py:
--------------------------------------------------------------------------------
  1 | from ngraph.frontends.neon import Sequential
  2 | from contextlib import closing
  3 | from ngraph.frontends.neon import Saver
  4 | from ngraph.frontends.neon.callbacks import tqdm
  5 | import ngraph.transformers as ngt
  6 | import numpy as np
  7 | import json
  8 | import os
  9 | try:
 10 |     import matplotlib
 11 |     matplotlib.use('Agg')
 12 |     import matplotlib.pyplot as plt
 13 | except ImportError:
 14 |     raise ImportError("matplotlib not found")
 15 | from ngraph.frontends.neon.model import ResidualModule
 16 | 
 17 | 
 18 | class TimeseriesTrainer:
 19 |     """
 20 |     Class that adds methods for training, inference, model summary and tensorboard
 21 | 
 22 |     Arguments: TODO
 23 |         train_computation
 24 |         eval_computation
 25 |         pred_computation
 26 |         input_placeholders
 27 |         model_graph (list of graphs): each list element must have layers attribute, e.g. a Sequential
 28 |         tensorboard_dir (optional, path): if given, save tensorboard to this directory
 29 |     """
 30 |     def __init__(self, opt, train_computation, eval_computation, pred_computation, input_placeholders, model_graph,
 31 |                  tensorboard_dir=None):
 32 |         self.opt = opt
 33 |         self.train_computation = train_computation
 34 |         self.eval_computation = eval_computation
 35 |         self.pred_computation = pred_computation
 36 |         self.input_placeholders = input_placeholders
 37 |         self.layers = [layer for graph in model_graph for layer in graph.layers]
 38 |         self.transformer = ngt.make_transformer()  # initialize transformer
 39 |         self._init_tensorboard(tensorboard_dir, model_graph)
 40 | 
 41 |     def train(self, train_iterator, val_iterator, n_epochs=100, log_interval=100, save_plots=True, results_dir="./"):
 42 |         train_iterator.reset()
 43 |         val_iterator.reset()
 44 | 
 45 |         batch_size = train_iterator.batch_size
 46 |         num_iterations = np.floor((train_iterator.ndata * n_epochs * 1.)/batch_size).astype('int')
 47 |         n_train = train_iterator.ndata
 48 | 
 49 |         assert val_iterator.batch_size == batch_size
 50 | 
 51 |         # save model
 52 |         weight_saver = Saver()
 53 |         # train model
 54 | 
 55 |         self.train_function = self.transformer.add_computation(self.train_computation)
 56 |         self.eval_function = self.transformer.add_computation(self.eval_computation)
 57 |         self.pred_function = self.transformer.add_computation(self.pred_computation)
 58 | 
 59 |         # set up weight saver
 60 |         weight_saver.setup_save(transformer=self.transformer, computation=self.train_computation)
 61 | 
 62 |         # Progress bar
 63 |         tpbar = tqdm(unit="batches", ncols=100, total=num_iterations)
 64 |         tpbar_string = "Train Epoch:  {epoch} [ {num_examples_seen}/{n_train} ({percent_complete}%)] Train Loss {cost}"
 65 | 
 66 |         train_losses = []
 67 |         eval_losses = []
 68 | 
 69 |         # Iterating over the training set
 70 |         num_examples_seen = 0
 71 |         n_epoch = 1
 72 |         for step in range(num_iterations):
 73 |             data = next(train_iterator)
 74 |             feed_dict = {self.input_placeholders["X"]: data["X"], self.input_placeholders["y"]: data["y"]}
 75 | 
 76 |             # Mean batch cost
 77 |             output = self.train_function(feed_dict=feed_dict)
 78 |             train_loss = output[()].item()
 79 | 
 80 |             train_losses.append(train_loss)
 81 |             if self.tb is not None:
 82 |                 self.tb.add_scalar("train_loss", train_loss, step=step)
 83 | 
 84 |             # Update progress bar
 85 |             tpbar.update(1)
 86 |             tpbar.set_description("Training {}".format(str(output[()])))
 87 | 
 88 |             num_examples_seen += batch_size
 89 |             # Every epoch print test set metrics
 90 |             if (step + 1) % log_interval == 0 and step > 0:
 91 | 
 92 |                 # calculate metrics over test set
 93 |                 avg_eval_loss = 0.0
 94 |                 val_iterator.reset()
 95 |                 for e, data_test in enumerate(val_iterator):
 96 |                     feed_dict_test = {self.input_placeholders["X"]: data_test["X"], self.input_placeholders["y"]: data_test["y"]}
 97 |                     eval_loss = self.eval_function(feed_dict=feed_dict_test)[0]
 98 |                     avg_eval_loss += eval_loss
 99 | 
100 |                 avg_eval_loss /= (e + 1)
101 | 
102 |                 # save loss
103 |                 eval_losses.append(avg_eval_loss.item())
104 |                 if self.tb is not None:
105 |                     self.tb.add_scalar("eval_loss", avg_eval_loss, step=step)
106 | 
107 |                 # write to progress bar
108 |                 avg_train_cost = train_losses[-1 * log_interval:]
109 |                 avg_train_cost = np.mean(avg_train_cost)
110 |                 tqdm.write(tpbar_string.format(epoch=n_epoch, num_examples_seen=num_examples_seen, n_train=n_train, percent_complete=100.0 * num_examples_seen / n_train, cost=avg_train_cost))
111 | 
112 |                 weight_saver.save(filename=results_dir + "/" + "model")
113 | 
114 |                 # Writing to CSV
115 |                 logfile = os.path.join(results_dir, "logs")
116 | 
117 |                 with open(logfile, 'w') as fp:
118 |                     json.dump({'train_loss': train_losses, 'eval_loss': eval_losses}, fp)
119 | 
120 |                 if save_plots:
121 |                     # plot all entries in logfile
122 |                     self.plot_scalars(logfile, results_dir)
123 | 
124 |             if num_examples_seen > n_train:
125 |                 num_examples_seen = num_examples_seen - n_train
126 |                 n_epoch += 1
127 |                 print("Test set: Average loss: {}".format(avg_eval_loss))
128 | 
129 |         print("\nTraining Completed")
130 | 
131 |     def predict(self, dataset, num_batches=None):
132 |         """
133 |             Runs a function over the dataset and accumulated the results.
134 |             Instead of reducing the results, as ngraph's loops do, we stack them together.
135 |             This allows us, for instance, to retain the predictions made on each test
136 |             example.
137 |             """
138 |         dataset.reset()
139 |         all_results = []
140 | 
141 |         for ee, data in enumerate(dataset):
142 |             if num_batches is not None:
143 |                 if ee >= num_batches:
144 |                     break
145 |             results = self.pred_function(feed_dict={self.input_placeholders['X']: data['X']})[0]
146 |             all_results.extend(list(results))
147 |         all_results = np.stack(all_results, axis=0)
148 |         all_results = all_results[:dataset.ndata]
149 |         return all_results
150 | 
151 |     def predict_sequence(self):
152 |         pass
153 | 
154 |     def callbacks(self):
155 |         pass
156 | 
157 |     def plot_scalars(self, logfile, results_dir):
158 |         with open(logfile, 'r') as fp:
159 |             data = json.load(fp)
160 | 
161 |         for k in data:
162 |             if isinstance(data[k], list):
163 |                 fig, ax = plt.subplots()
164 |                 plt.plot(data[k])
165 |                 plt.xlabel('Iteration')
166 |                 plt.ylabel('%s' % k)
167 |                 plt.title('%s ' % k)
168 |                 plt.savefig('%s' % os.path.join(results_dir, k + ".png"))
169 |                 plt.close()
170 | 
171 |     def summary(self):
172 |         if self.layers is None:
173 |             raise ValueError("Model layers not provided")
174 |         total_num_vars = 0
175 |         total_num_not_trainable = 0
176 |         print("".join(100 * ["-"]))
177 |         print("{: >20} {: >20} {: >20} {: >20} {: >20}".format("index", "name", "# trainable vars", "# not trainable vars", "output_shape"))
178 |         print("".join(100*["-"]))
179 |         for e, layer in enumerate(self.layers):
180 |             temp_model = Sequential(self.layers[0:e+1])
181 |             l_output = temp_model(self.input_placeholders['X'])
182 |             num_vars, num_not_trainable = self._get_number_of_vars_in_layer(layer)
183 |             if num_vars is not None:
184 |                 total_num_vars += num_vars
185 |             if num_not_trainable is not None:
186 |                 total_num_not_trainable += num_not_trainable
187 |             if 'name' in layer.__dict__:
188 |                 l_name = layer.name
189 |             elif isinstance(layer, ResidualModule):
190 |                 l_name = 'ResidualModule'
191 |             else:
192 |                 l_name = type(layer).__name__
193 |             if 'axes' in dir(l_output):
194 |                 print("{: >20} {: >20} {: >20} {: >20} {: >20}".format(str(e), l_name, str(num_vars), str(num_not_trainable), str(l_output.axes)))
195 |             else:
196 |                 print("{: >20} {: >20} {: >20} {: >20} {: >20}".format(str(e), l_name, str(num_vars), str(num_not_trainable), "Unknown"))
197 | 
198 |         print("".join(100 * ["-"]))
199 |         print("Total number of trainable parameters: %d" % total_num_vars)
200 |         print("Total number of non trainable parameters: %d" % total_num_not_trainable)
201 |         print("".join(100 * ["-"]))
202 |         print("Optimizer type {}".format(self.opt.name))
203 |         print("Optimizer learning rate {}".format(self.opt.lrate.initial_value.item()))
204 |         print("".join(100 * ["-"]))
205 | 
206 |     def _get_number_of_vars_in_layer(self, layer):
207 |         num_vars = []
208 |         num_not_trainable = []
209 |         if 'variables' in dir(layer):
210 |             for i in layer.variables.keys():
211 |                 if layer.variables[i].is_trainable:
212 |                     num_vars.append(self._get_number_of_vars_in_tensor(layer.variables[i]))
213 |                 else:
214 |                     num_not_trainable.append(self._get_number_of_vars_in_tensor(layer.variables[i]))
215 |             return np.sum(num_vars).astype('int'), np.sum(num_not_trainable).astype('int')
216 |         else:
217 |             if isinstance(layer, ResidualModule):
218 |                 side_path_layers = [] if layer.side_path is None else layer.side_path.layers
219 |                 for l in layer.main_path.layers + side_path_layers:
220 |                     num_vars_l, num_not_t_l = self._get_number_of_vars_in_layer(l)
221 |                     num_vars.append(num_vars_l)
222 |                     num_not_trainable.append(num_not_t_l)
223 |                 return np.sum(num_vars).astype('int'), np.sum(num_not_trainable).astype('int')
224 |             else:
225 |                 return 0, 0
226 | 
227 |     def _get_number_of_vars_in_tensor(self, var):
228 |         return np.prod(var.shape.full_lengths)
229 | 
230 |     def _init_tensorboard(self, tensorboard_dir, model_graph):
231 |         self.tb = None
232 |         if tensorboard_dir is not None:
233 |             try:
234 |                 from ngraph.op_graph.tensorboard.tensorboard import TensorBoard
235 |                 self.tb = TensorBoard(tensorboard_dir)
236 |                 for graph in model_graph:
237 |                     self.tb.add_graph(graph)
238 |                 # if not specifying run kwarg to TensorBoard or using add_run,
239 |                 # run attribute is autogenerated when add_graph is called
240 |                 print("Saving Tensorboard to {}/{}".format(tensorboard_dir, self.tb.run))
241 |             except:
242 |                 print("Tensorboard not installed")
243 |         else:
244 |              print("no Tensorboard directory given, not using Tensorboard")
245 | 


--------------------------------------------------------------------------------
/tutorials/img/dilated_conv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NervanaSystems/aidc-2018-timeseries/df0d25a258ced93b23755f7e62a3f96d73963fe3/tutorials/img/dilated_conv.png


--------------------------------------------------------------------------------
/tutorials/img/residual_block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NervanaSystems/aidc-2018-timeseries/df0d25a258ced93b23755f7e62a3f96d73963fe3/tutorials/img/residual_block.png


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # ----------------------------------------------------------------------------
 3 | # Copyright 2017 Nervana Systems Inc.
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ----------------------------------------------------------------------------
16 | 


--------------------------------------------------------------------------------
/utils/arguments.py:
--------------------------------------------------------------------------------
 1 | from ngraph.frontends.neon import NgraphArgparser
 2 | import os
 3 | 
 4 | def default_argparser():
 5 |     # parse the command line arguments
 6 |     parser = NgraphArgparser(__doc__)
 7 |     parser.add_argument('--predict_seq', default=False, dest='predict_seq', action='store_true',
 8 |                         help='If given, seq_len future timepoints are predicted')
 9 |     parser.add_argument('--look_ahead', type=int,
10 |                         help="Number of time steps to start predicting from",
11 |                         default=1)
12 |     parser.add_argument('--seq_len', type=int,
13 |                         help="Number of time points in each input sequence",
14 |                         default=32)
15 |     parser.add_argument('--log_interval', type=int, default=100, help="frequency, in number of iterations, after which loss is evaluated")
16 |     parser.add_argument('--save_plots', action="store_true", help="save plots to disk")
17 |     parser.add_argument('--results_dir', type=str, help="Directory to write results to", default='./')
18 |     parser.add_argument('--resume', type=str, default=None, help="weights of the model to resume training with")
19 |     parser.set_defaults()
20 | 
21 |     return parser


--------------------------------------------------------------------------------
/utils/inference.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # ----------------------------------------------------------------------------
  3 | # Copyright 2017 Nervana Systems Inc.
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #      http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ----------------------------------------------------------------------------
 16 | import numpy as np
 17 | # TODO figure out a way to move this to topologies.timeseries_model
 18 | def generate_sequence(data, time_points, eval_function, predict_seq,
 19 |                       batch_size, seq_len, feature_dim, seq_name=0):
 20 |     """
 21 |     Generates a sequence of length time_points, given ground truth data (gt_data)
 22 |     First seq_len points of gt_data is used as the seed
 23 |     Returns the generated sequence
 24 | 
 25 |     data: ground truth data
 26 |     time_points: number of steps to generate the data
 27 |     eval_function: forward prop function of the network
 28 |     predict_seq: True if network predicts sequences
 29 | 
 30 |     Start with first seq_len points in training data, take it as input (call S0)
 31 |     S0 = [x0, x1, ..., x(seq_len-1)]
 32 |     Given S0, generate next time point x_hat(seq_len), build S1
 33 |     S1 = [x1, x2, ..., x(seq_len-1), x_hat(seq_len)]
 34 |     Given S1, generate x_hat(seq_len+1)
 35 |     Continue generating for a total of time_points
 36 |     """
 37 |     # check if seq_name is in train or test
 38 |     if seq_name in data.test_seq_names:
 39 |         data_fold = data.test
 40 |         seq_indices = [e for e, n in enumerate(data.test_seq_names) if n == seq_name]
 41 |     elif seq_name in data.train_seq_names:
 42 |         data_fold = data.train
 43 |         seq_indices = [e for e, n in enumerate(data.train_seq_names) if n == seq_name]
 44 |     else:
 45 |         raise ValueError("Input sequence name {} not found".format(seq_name))
 46 | 
 47 |     # keep the last few indices in order to get most recent data
 48 |     seq_indices = seq_indices[-1 * (time_points + 1):]
 49 | 
 50 |     no_gen_time_points = time_points
 51 |     input_batch = np.zeros((batch_size, seq_len, feature_dim))
 52 | 
 53 |     input_batch[0] = data_fold['X']['data'][seq_indices[0]]
 54 |     gen_series = data_fold['X']['data'][seq_indices[0]]  # This will hold the generated series
 55 |     gt_series = data_fold['X']['data'][seq_indices[0]]  # This will hold the ground truth series
 56 | 
 57 |     output_dim = data_fold['y']['data'].shape[-1]
 58 |     for tp in range(no_gen_time_points):
 59 |         axx = dict(X=input_batch)
 60 |         # Get the prediction using seq_len past samples
 61 |         result = eval_function(axx)
 62 | 
 63 |         if(predict_seq is False):
 64 |             # result is of size (batch_size, output_dim)
 65 |             # We want the output of the first sample, so get it
 66 |             result = result[0, :]
 67 |         else:
 68 |             # result is of size (batch_size, seq_len, output_dim)
 69 |             # We want the last output of the first sample, so get it
 70 |             result = result[0, -1, :]
 71 |         # Now result is (output_dim,)
 72 |         # Reshape result to (1,output_dim)
 73 |         result = np.reshape(result, (1, output_dim))
 74 | 
 75 |         # Get the last (seq_len-1) samples in the past
 76 |         # cx is of shape (seq_len-1, output_dim)
 77 |         cx = input_batch[0][1:, :]
 78 | 
 79 |         # Append the new prediction to the past (seq_len-1) samples
 80 |         # Put the result into the first sample in the input batch
 81 |         input_batch[0] = np.concatenate((cx, result))
 82 | 
 83 |         # Append the current prediction to gen_series
 84 |         # This is to keep track of predictions, for plotting purposes only
 85 |         gen_series = np.concatenate((gen_series, result))
 86 | 
 87 |         # Find the ground truth for this prediction
 88 |         if(predict_seq is False):
 89 |             gt_outcome = np.copy(data_fold['X']['data'][seq_indices[tp + 1]][-1, :])
 90 |             # Reshape to (1, output_dim)
 91 |             gt_outcome = np.reshape(gt_outcome, (1, output_dim))
 92 |         else:
 93 |             # When predict_seq is given, input 'X' has non overlapping windows
 94 |             # X is of shape (no_samples, seq_len, 2)
 95 |             # Thus, find the right index for the ground truth output
 96 |             gt_outcome = data_fold['X']['data'][seq_indices[(tp + seq_len) // seq_len],
 97 |                                                  (tp + seq_len) % seq_len, :]
 98 |             # Reshape to (1, output_dim)
 99 |             gt_outcome = np.reshape(gt_outcome, (1, output_dim))
100 | 
101 |         # Append ground truth outcome to gt_series
102 |         # This is to keep track of ground truth, for plotting purposes only
103 |         gt_series = np.concatenate((gt_series, gt_outcome))
104 | 
105 |     return gen_series, gt_series
106 | 


--------------------------------------------------------------------------------