├── .gitignore
├── Docs
    ├── 1308.0850v5.pdf
    ├── 1412.3555v1.pdf
    ├── 1412.6980v8.pdf
    ├── 1502.01852v1.pdf
    ├── 1502.03167v3.pdf
    ├── 1502.04390v1.pdf
    ├── 1502.04623v2.pdf
    ├── 1606.04130v1.pdf
    ├── best_data.txt
    └── srivastava14a.pdf
├── LICENSE
├── README.md
├── TSL.ows
├── TimeSeriesLearning.ows
├── data
    ├── testData.zip
    └── training.zip
└── src
    ├── .spyderworkspace
    ├── deep
        ├── __init__.py
        └── deep_learning_nn.py
    ├── deep_learning_runner.py
    ├── results_plotter.py
    ├── rnn
        ├── .spyderworkspace
        ├── __init__.py
        └── simple_rnn.py
    ├── score_validator.py
    ├── utils
        ├── __init__.py
        ├── data_slicer.py
        ├── offline_preprocessor.py
        ├── train_validate_splitter.py
        └── utils.py
    ├── validation_baseline.py
    └── vanila_rnn.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 


--------------------------------------------------------------------------------
/Docs/1308.0850v5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yaricom/TimeSeriesLearning/6c6c5dc253b47bd6a22a2a97030adba5c5e7512a/Docs/1308.0850v5.pdf


--------------------------------------------------------------------------------
/Docs/1412.3555v1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yaricom/TimeSeriesLearning/6c6c5dc253b47bd6a22a2a97030adba5c5e7512a/Docs/1412.3555v1.pdf


--------------------------------------------------------------------------------
/Docs/1412.6980v8.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yaricom/TimeSeriesLearning/6c6c5dc253b47bd6a22a2a97030adba5c5e7512a/Docs/1412.6980v8.pdf


--------------------------------------------------------------------------------
/Docs/1502.01852v1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yaricom/TimeSeriesLearning/6c6c5dc253b47bd6a22a2a97030adba5c5e7512a/Docs/1502.01852v1.pdf


--------------------------------------------------------------------------------
/Docs/1502.03167v3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yaricom/TimeSeriesLearning/6c6c5dc253b47bd6a22a2a97030adba5c5e7512a/Docs/1502.03167v3.pdf


--------------------------------------------------------------------------------
/Docs/1502.04390v1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yaricom/TimeSeriesLearning/6c6c5dc253b47bd6a22a2a97030adba5c5e7512a/Docs/1502.04390v1.pdf


--------------------------------------------------------------------------------
/Docs/1502.04623v2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yaricom/TimeSeriesLearning/6c6c5dc253b47bd6a22a2a97030adba5c5e7512a/Docs/1502.04623v2.pdf


--------------------------------------------------------------------------------
/Docs/1606.04130v1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yaricom/TimeSeriesLearning/6c6c5dc253b47bd6a22a2a97030adba5c5e7512a/Docs/1606.04130v1.pdf


--------------------------------------------------------------------------------
/Docs/best_data.txt:
--------------------------------------------------------------------------------
  1 |        0.026730638156987854  0.007701583490203154  0.03510046831242789
  2 | count         238897.000000         238897.000000        238897.000000
  3 | mean               0.072598              0.282652             0.195517
  4 | std                0.067135              0.157973             0.249721
  5 | min                0.000000              0.000000             0.000000
  6 | 25%                0.030325              0.171160             0.001698
  7 | 50%                0.053854              0.246471             0.005034
  8 | 75%                0.086798              0.346608             0.400094
  9 | max                0.465094              0.891380             0.894128
 10 | 
 11 | 60, 100, 0.05 -> 1e-8, Adagrad, RNN
 12 | 
 13 | epoch 59, train loss: [ 0.04069507], score: [ 9.84009604]
 14 | Validate score: 9.8366469
 15 | TC score: 9.82 (vp_30_07_19_52.csv)
 16 | ________________________________________________________________________
 17 | 
 18 |        0.0378213827224494            0.0          0.0.1
 19 | count       238897.000000  238897.000000  238897.000000
 20 | mean             0.071949       0.279996       0.193371
 21 | std              0.067764       0.156745       0.249946
 22 | min              0.000000       0.000000       0.000000
 23 | 25%              0.030075       0.169914       0.000000
 24 | 50%              0.053832       0.243377       0.002575
 25 | 75%              0.086068       0.345924       0.397201
 26 | max              0.503966       0.925782       0.830723
 27 | 
 28 | 
 29 | 180, 100, 0.05 -> 1e-8, Adagrad, RNN
 30 | 
 31 | epoch 179, train loss: [ 0.04046797], score: [ 9.84038537]
 32 | Validate score: 9.8366469
 33 | TC score: 9.82 (vp_31_07_00_21.csv)
 34 | 
 35 | ------------------------
 36 | Predictions:
 37 |             yvl1_est       yvl2_est       yvl3_est
 38 | count  238898.000000  238898.000000  238898.000000
 39 | mean        0.071048       0.278478       0.190451
 40 | std         0.068349       0.157351       0.247715
 41 | min         0.000000       0.000000       0.000000
 42 | 25%         0.028766       0.166988       0.000000
 43 | 50%         0.053941       0.245374       0.000350
 44 | 75%         0.084177       0.340235       0.389137
 45 | max         0.501959       0.881149       0.793660
 46 | 
 47 | 101, 100, 5e-4 -> adam, tanh, shuffle, RNN
 48 | 
 49 | epoch 100, train loss: [ 0.03829205], score: [ 9.88033067]
 50 | Validate score: 
 51 | TC score: 9.83 (vp_02_08_23_13.csv)
 52 | 
 53 | ------------------------
 54 | Predictions:
 55 |             yvl1_est       yvl2_est       yvl3_est
 56 | count  238898.000000  238898.000000  238898.000000
 57 | mean        0.070911       0.278394       0.188772
 58 | std         0.066999       0.158090       0.247499
 59 | min         0.000000       0.000000       0.000000
 60 | 25%         0.029590       0.166484       0.000000
 61 | 50%         0.053094       0.246164       0.001593
 62 | 75%         0.082912       0.340488       0.407425
 63 | max         0.507457       1.000000       0.828317
 64 | 
 65 | 81, 100, 1e-4 -> adam 0.9/0.99, shuffle, reg1e-3, preprocessing, DeepNN[50, 20]
 66 | 
 67 | epoch 80, train loss: [ 0.03659411], score: [ 9.88233213]
 68 | 
 69 | TC score: 9.85 (vp_03_08_16_27.csv)
 70 | 
 71 | ------------------------
 72 | Predictions:
 73 |             yvl1_est       yvl2_est       yvl3_est
 74 | count  238898.000000  238898.000000  238898.000000
 75 | mean        0.071074       0.278243       0.188506
 76 | std         0.068041       0.157917       0.247308
 77 | min         0.000000       0.000000       0.000000
 78 | 25%         0.029369       0.166125       0.000000
 79 | 50%         0.053656       0.246783       0.001329
 80 | 75%         0.082850       0.337197       0.406893
 81 | max         0.515826       0.976951       0.892692
 82 | 
 83 | 80, 100, 5e-5 -> adam bias 0.9/0.99, shuffle, reg1e-3, preprocessing, DeepNN[60, 30]
 84 | 
 85 | epoch: 79, train loss: [ 0.03626305], score: [ 9.88334681], learning rate: 5e-07
 86 | 
 87 | TC score: 9.85 (vp_04_08_16_40.csv)
 88 | 
 89 | ------------------------
 90 | Predictions:
 91 |             yvl1_est       yvl2_est       yvl3_est
 92 | count  238898.000000  238898.000000  238898.000000
 93 | mean        0.070833       0.278369       0.188905
 94 | std         0.068485       0.157788       0.246909
 95 | min         0.000000       0.000000       0.000000
 96 | 25%         0.028804       0.166068       0.000000
 97 | 50%         0.053427       0.246008       0.003569
 98 | 75%         0.083206       0.340325       0.406023
 99 | max         0.525686       0.954895       0.810934
100 | 
101 | 60, 100, 5e-5, adam, reg1e-4, preprocessing DeepNN[256, 128]
102 | 
103 | epoch: 59, train loss: [ 0.03710156], score: [ 9.88219018], learning rate: 5e-07
104 | 
105 | TC score: 98.53 (vp_06_08_22_49.csv)
106 | 
107 | ------------------------
108 | Predictions:
109 |             yvl1_est       yvl2_est       yvl3_est
110 | count  238898.000000  238898.000000  238898.000000
111 | mean        0.070915       0.278305       0.189096
112 | std         0.068387       0.157991       0.247285
113 | min         0.000000       0.000000       0.000000
114 | 25%         0.029102       0.166380       0.000000
115 | 50%         0.053139       0.245979       0.003406
116 | 75%         0.083086       0.339003       0.406160
117 | max         0.534707       0.998148       0.841650
118 | 
119 | 180, 100, 5e-5, adam, reg1e-4, preprocessing DeepNN[256, 128]
120 | 
121 | epoch: 179, train loss: [ 0.03601578], score: [ 9.88568121], learning rate: 5e-07
122 | 
123 | TC score: 98.56 (vp_07_08_21_22.csv)
124 | 
125 | ------------------------
126 | Predictions:
127 | 
128 | count        238897.000000  238897.000000  238897.000000
129 | mean              0.071045       0.277557       0.188097
130 | std               0.068731       0.158530       0.247959
131 | min               0.000000       0.000000       0.000000
132 | 25%               0.029443       0.165544       0.000000
133 | 50%               0.053455       0.246995       0.000000
134 | 75%               0.082807       0.338127       0.407639
135 | max               0.658165       0.965393       0.885541
136 | 
137 | 
138 | 60, 100, 5e-2, Adagrad, reg1e-4, features selected, DeepNN[128, 32]
139 | 
140 | TC score: 98.61 (vp_10_08_11_45.csv)
141 | 
142 | ------------------------
143 | Predictions:
144 |             yvl1_est       yvl2_est       yvl3_est
145 | count  238898.000000  238898.000000  238898.000000
146 | mean        0.072625       0.284743       0.499098
147 | std         0.071510       0.152186       0.184594
148 | min         0.001185       0.014566       0.001994
149 | 25%         0.029601       0.170947       0.352773
150 | 50%         0.053907       0.249249       0.526542
151 | 75%         0.084633       0.343988       0.653956
152 | max         0.830919       0.943561       0.883551
153 | 
154 | validation baseline
155 | 
156 | TC score: 98.80 (vp_tree_10_08_2016.csv)
157 | 


--------------------------------------------------------------------------------
/Docs/srivastava14a.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yaricom/TimeSeriesLearning/6c6c5dc253b47bd6a22a2a97030adba5c5e7512a/Docs/srivastava14a.pdf


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Iaroslav Omelianenko
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Time Series Learning
  2 | This project is intended to implement Deep NN / RNN based solution in order to develop flexible methods that are able to adaptively fillin, backfill, and predict time-series using a large number of heterogeneous training datasets.
  3 | 
  4 | The perfect solution must at least exceed performance of plain vanila Random Forest Regressor which considered as scoring baseline.
  5 | 
  6 | ## Overview
  7 | The goal of this project is to develop flexible methods that are able to adaptively fillin, backfill, and predict time-series using a large number of heterogeneous training datasets. The data is a set of thousands of aggressively obfuscated, multi-variate timeseries measurements. There are multiple output variables and multiple input variables.
  8 | 
  9 | For each time-series, there are parts missing. Either individual measurements, or entire sections. Each time-series has a different number of known measurements and missing measurements, the goal is to fill in the missing output variables with the best accuracy possible. How the missing input variables are treated is an open question, and is one of the key challenges to solve.
 10 | 
 11 | This problem, unlike many data science contest problems, is not easy to fit into the standard machine learning framework. Some reasons that this is the case:
 12 | * There are multiple time-series outputs.
 13 | * There are multiple timeseries inputs.
 14 | * The time-series are sampled irregularly, and in
 15 | different time points for each subject.
 16 | * There is a huge amount of missing data, and it
 17 | is not missing at random.
 18 | * Many of the variables are nominal/categorical,
 19 | and some of these are very high cardinality. The most important variable, subject id, is the primary example. A good solution should not ignore the subject id.
 20 | 
 21 | ## Scoring
 22 | The score for each individual prediction p, compared against actual ground-truth value t, will be |p - t|. The score for each row, r, will then be the mean of the scores for the individual predictions on that row (possibly 1, 2, or 3 values).
 23 | Over the full n rows, your final score will be calculated as 10 * (1 - Sum(r) / n). Thus a score of 10.00 represents perfect predictions with no error at all.
 24 | 
 25 | ## Realisation
 26 | In order to fulfill requested task was implemented two solutions based on Recurent Neural Network and Deep Learning Neural Network architectures.
 27 | It was compared performance of both against plain vanila implementation based Random Forest Regressor.
 28 | 
 29 | The Deep NN was found as superior to RNN for this task, but with not too big difference. But, unfortunatelly, both still lag behind Random Forest Regressor.
 30 | 
 31 | Scores per method:
 32 | * Deep NN: 9.861
 33 | * RNN: 9.830
 34 | * Random Forest Regressor: 9.880 (baseline)
 35 | 
 36 | ## Best results
 37 | 
 38 | ```
 39 |        0.026730638156987854  0.007701583490203154  0.03510046831242789
 40 | count         238897.000000         238897.000000        238897.000000
 41 | mean               0.072598              0.282652             0.195517
 42 | std                0.067135              0.157973             0.249721
 43 | min                0.000000              0.000000             0.000000
 44 | 25%                0.030325              0.171160             0.001698
 45 | 50%                0.053854              0.246471             0.005034
 46 | 75%                0.086798              0.346608             0.400094
 47 | max                0.465094              0.891380             0.894128
 48 | 
 49 | 60, 100, 0.05 -> 1e-8, Adagrad, RNN
 50 | 
 51 | epoch 59, train loss: [ 0.04069507], score: [ 9.84009604]
 52 | Validate score: 9.8366469
 53 | Test score: 9.82 (vp_30_07_19_52.csv)
 54 | ________________________________________________________________________
 55 | 
 56 |        0.0378213827224494            0.0          0.0.1
 57 | count       238897.000000  238897.000000  238897.000000
 58 | mean             0.071949       0.279996       0.193371
 59 | std              0.067764       0.156745       0.249946
 60 | min              0.000000       0.000000       0.000000
 61 | 25%              0.030075       0.169914       0.000000
 62 | 50%              0.053832       0.243377       0.002575
 63 | 75%              0.086068       0.345924       0.397201
 64 | max              0.503966       0.925782       0.830723
 65 | 
 66 | 
 67 | 180, 100, 0.05 -> 1e-8, Adagrad, RNN
 68 | 
 69 | epoch 179, train loss: [ 0.04046797], score: [ 9.84038537]
 70 | Validate score: 9.8366469
 71 | Test score: 9.82 (vp_31_07_00_21.csv)
 72 | 
 73 | ------------------------
 74 | Predictions:
 75 |             yvl1_est       yvl2_est       yvl3_est
 76 | count  238898.000000  238898.000000  238898.000000
 77 | mean        0.071048       0.278478       0.190451
 78 | std         0.068349       0.157351       0.247715
 79 | min         0.000000       0.000000       0.000000
 80 | 25%         0.028766       0.166988       0.000000
 81 | 50%         0.053941       0.245374       0.000350
 82 | 75%         0.084177       0.340235       0.389137
 83 | max         0.501959       0.881149       0.793660
 84 | 
 85 | 101, 100, 5e-4 -> adam, tanh, shuffle, RNN
 86 | 
 87 | epoch 100, train loss: [ 0.03829205], score: [ 9.88033067]
 88 | Validate score: 
 89 | Test score: 9.83 (vp_02_08_23_13.csv)
 90 | 
 91 | ------------------------
 92 | Predictions:
 93 |             yvl1_est       yvl2_est       yvl3_est
 94 | count  238898.000000  238898.000000  238898.000000
 95 | mean        0.070911       0.278394       0.188772
 96 | std         0.066999       0.158090       0.247499
 97 | min         0.000000       0.000000       0.000000
 98 | 25%         0.029590       0.166484       0.000000
 99 | 50%         0.053094       0.246164       0.001593
100 | 75%         0.082912       0.340488       0.407425
101 | max         0.507457       1.000000       0.828317
102 | 
103 | 81, 100, 1e-4 -> adam 0.9/0.99, shuffle, reg1e-3, preprocessing, DeepNN[50, 20]
104 | 
105 | epoch 80, train loss: [ 0.03659411], score: [ 9.88233213]
106 | 
107 | Test score: 9.85 (vp_03_08_16_27.csv)
108 | 
109 | ------------------------
110 | Predictions:
111 |             yvl1_est       yvl2_est       yvl3_est
112 | count  238898.000000  238898.000000  238898.000000
113 | mean        0.071074       0.278243       0.188506
114 | std         0.068041       0.157917       0.247308
115 | min         0.000000       0.000000       0.000000
116 | 25%         0.029369       0.166125       0.000000
117 | 50%         0.053656       0.246783       0.001329
118 | 75%         0.082850       0.337197       0.406893
119 | max         0.515826       0.976951       0.892692
120 | 
121 | 80, 100, 5e-5 -> adam bias 0.9/0.99, shuffle, reg1e-3, preprocessing, DeepNN[60, 30]
122 | 
123 | epoch: 79, train loss: [ 0.03626305], score: [ 9.88334681], learning rate: 5e-07
124 | 
125 | Test score: 9.85 (vp_04_08_16_40.csv)
126 | 
127 | ------------------------
128 | Predictions:
129 |             yvl1_est       yvl2_est       yvl3_est
130 | count  238898.000000  238898.000000  238898.000000
131 | mean        0.070833       0.278369       0.188905
132 | std         0.068485       0.157788       0.246909
133 | min         0.000000       0.000000       0.000000
134 | 25%         0.028804       0.166068       0.000000
135 | 50%         0.053427       0.246008       0.003569
136 | 75%         0.083206       0.340325       0.406023
137 | max         0.525686       0.954895       0.810934
138 | 
139 | 60, 100, 5e-5, adam, reg1e-4, preprocessing DeepNN[256, 128]
140 | 
141 | epoch: 59, train loss: [ 0.03710156], score: [ 9.88219018], learning rate: 5e-07
142 | 
143 | Test score: 98.53 (vp_06_08_22_49.csv)
144 | 
145 | ------------------------
146 | Predictions:
147 |             yvl1_est       yvl2_est       yvl3_est
148 | count  238898.000000  238898.000000  238898.000000
149 | mean        0.070915       0.278305       0.189096
150 | std         0.068387       0.157991       0.247285
151 | min         0.000000       0.000000       0.000000
152 | 25%         0.029102       0.166380       0.000000
153 | 50%         0.053139       0.245979       0.003406
154 | 75%         0.083086       0.339003       0.406160
155 | max         0.534707       0.998148       0.841650
156 | 
157 | 180, 100, 5e-5, adam, reg1e-4, preprocessing DeepNN[256, 128]
158 | 
159 | epoch: 179, train loss: [ 0.03601578], score: [ 9.88568121], learning rate: 5e-07
160 | 
161 | Test score: 98.56 (vp_07_08_21_22.csv)
162 | 
163 | ------------------------
164 | Predictions:
165 | 
166 | count        238897.000000  238897.000000  238897.000000
167 | mean              0.071045       0.277557       0.188097
168 | std               0.068731       0.158530       0.247959
169 | min               0.000000       0.000000       0.000000
170 | 25%               0.029443       0.165544       0.000000
171 | 50%               0.053455       0.246995       0.000000
172 | 75%               0.082807       0.338127       0.407639
173 | max               0.658165       0.965393       0.885541
174 | 
175 | 
176 | 60, 100, 5e-2, Adagrad, reg1e-4, features selected, DeepNN[128, 32]
177 | 
178 | Test score: 98.61 (vp_10_08_11_45.csv)
179 | 
180 | ------------------------
181 | Predictions:
182 |             yvl1_est       yvl2_est       yvl3_est
183 | count  238898.000000  238898.000000  238898.000000
184 | mean        0.072625       0.284743       0.499098
185 | std         0.071510       0.152186       0.184594
186 | min         0.001185       0.014566       0.001994
187 | 25%         0.029601       0.170947       0.352773
188 | 50%         0.053907       0.249249       0.526542
189 | 75%         0.084633       0.343988       0.653956
190 | max         0.830919       0.943561       0.883551
191 | 
192 | validation baseline - Random Forest Regressor
193 | 
194 | Test score: 98.80 (vp_tree_10_08_2016.csv)
195 | ```
196 | ## Directory structure and running
197 | ### The directories:
198 | * 'data' directory contains training / testing data samples
199 | * 'src' directory has source files
200 | 
201 | ### The source files:
202 | The main runners are 'src/deep_learning_runner.py' and 'src/vanila_rnn.py' for starting 'Deep NN' and 'RNN' correspondingly.
203 | The 'src/score_validator.py' may be used to calculate score over test data saples run results.
204 | 
205 | The 'src/utils/train_validate_splitter.py' can be used in order to generate train/validate data samples for training from 'data/trainng.csv' file
206 | 
207 | ### The data files
208 | The training and test data contains several columns:
209 | ```
210 | ----------+--------------------+------------+-------------------------------------------------------
211 |  Column#s | Column Name(s)     | Data Type  | Description
212 | ----------+--------------------+------------+-------------------------------------------------------
213 |  1-3      | y1, y2, y2         | Float      | The three dependent variables to be predicted in test
214 | ----------+--------------------+------------+-------------------------------------------------------
215 |  4        | STUDYID            | Integer    | 
216 | ----------+--------------------+------------+-------------------------------------------------------
217 |  5        | SITEID             | Integer    | 
218 | ----------+--------------------+------------+-------------------------------------------------------
219 |  6        | COUNTRY            | Integer    | 
220 | ----------+--------------------+------------+-------------------------------------------------------
221 |  7        | SUBJID             | Integer    | 
222 | ----------+--------------------+------------+-------------------------------------------------------
223 |  8        | TIMEVAR1           | Float      | 
224 | ----------+--------------------+------------+-------------------------------------------------------
225 |  9        | TIMEVAR2           | Float      | 
226 | ----------+--------------------+------------+-------------------------------------------------------
227 |  10-39    | COVAR_CONTINUOUS_n | Float      | (30 fields) 
228 | ----------+--------------------+------------+-------------------------------------------------------
229 |  40-47    | COVAR_ORDINAL_n    | Integer    | (8 fields) 
230 | ----------+--------------------+------------+-------------------------------------------------------
231 |  48-55    | COVAR_NOMINAL_n    | Char       | (8 fields) 
232 | ----------+--------------------+------------+-------------------------------------------------------
233 |  56-58    | y1, y2, y3 missing | True/False | (3 fields) does the value exist in ground truth
234 | ----------+--------------------+------------+-------------------------------------------------------
235 | ```
236 | The combination of STUDYID and SUBJID is sufficient to uniquely identify a specific individual. Adding TIMEVAR1 is sufficient to identify to uniquely identify each row.
237 | 
238 | The last three columns contain the values “True” or “False” indicate whether y1, y2, or y3 is missing from the ground truth data.
239 | 
240 | ## Dependencies:
241 | * [Numpy](http://www.numpy.org)
242 | * [Pandas](http://pandas.pydata.org)
243 | * [scikit-learn](http://scikit-learn.org/stable/) 
244 | 
245 | ## References
246 | * [Stanford CS class CS231n](http://cs231n.github.io)
247 | * [UFLDL Deep Learning Tutorial](http://ufldl.stanford.edu/tutorial/)
248 | * [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/)
249 | * [Recurrent Neural Networks](http://christianherta.de/lehre/dataScience/machineLearning/neuralNetworks/recurrentNeuralNetworks.php)
250 | * [Generating Sequences With Recurrent Neural Networks arXiv:1308.0850](http://arxiv.org/abs/1308.0850v5)
251 | * [Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling arXiv:1412.3555](http://arxiv.org/abs/1412.3555v1)
252 | * [Adam: A Method for Stochastic Optimization arXiv:1412.6980](http://arxiv.org/abs/1412.6980v8)
253 | * [Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification arXiv:1502.01852](http://arxiv.org/abs/1502.01852v1)
254 | * [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift arXiv:1502.03167](http://arxiv.org/abs/1502.03167v3)
255 | * [RMSProp and equilibrated adaptive learning rates for non-convex optimization arXiv:1502.04390](http://arxiv.org/abs/1502.04390v1)
256 | * [DRAW: A Recurrent Neural Network For Image Generation arXiv:1502.04623](http://arxiv.org/abs/1502.04623v2)
257 | * [Directly Modeling Missing Data in Sequences with RNNs: Improved Classification of Clinical Time Series arXiv:1606.04130](http://arxiv.org/abs/1606.04130v1)
258 | 


--------------------------------------------------------------------------------
/data/testData.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yaricom/TimeSeriesLearning/6c6c5dc253b47bd6a22a2a97030adba5c5e7512a/data/testData.zip


--------------------------------------------------------------------------------
/data/training.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yaricom/TimeSeriesLearning/6c6c5dc253b47bd6a22a2a97030adba5c5e7512a/data/training.zip


--------------------------------------------------------------------------------
/src/.spyderworkspace:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yaricom/TimeSeriesLearning/6c6c5dc253b47bd6a22a2a97030adba5c5e7512a/src/.spyderworkspace


--------------------------------------------------------------------------------
/src/deep/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Mon Aug  1 12:33:19 2016
4 | 
5 | @author: yaric
6 | """
7 | 
8 | from deep.deep_learning_nn import DeepLearningNN


--------------------------------------------------------------------------------
/src/deep/deep_learning_nn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Aug  1 10:30:44 2016
  4 | 
  5 | The cascade of connected NN forming deep learning 3-layered NN.
  6 | 
  7 | @author: yaric
  8 | """
  9 | import time
 10 | import datetime
 11 | from math import sqrt
 12 | 
 13 | import numpy as np
 14 | import scipy.io as sio
 15 | 
 16 | DEBUG = True
 17 | 
 18 | class DeepLearningNN(object):
 19 |     
 20 |     def __init__(self, n_features, n_outputs, n_neurons=[50, 20], param_update_scheme='Adam', 
 21 |                  learning_rate=1e-1, activation_rule='ReLU', relu_neg_slope=0.01, 
 22 |                  use_dropout_regularization=True, input_dropout_threshold=0.75, 
 23 |                  hiden_dropout_threshold=0.5, reg_strenght=1e-3, use_regularization=True, 
 24 |                  use_batch_step=False, batch_step_size=25,
 25 |                  sgd_shuffle=True):
 26 |         """
 27 |         Initializes RNN
 28 |         n_features the number of features per data sample
 29 |         n_outputs the number of output values to find
 30 |         n_neurons the number of neurons per hidden layer (Default: [50, 20])
 31 |         param_update_scheme the algorithm used to update parameters after gradients update (Default: 'Adam')
 32 |         learning_rate - the start learning rate (Default: 1e-1)
 33 |         activation_rule - the single neuron non-linearity activation rule (Default: 'ReLU')
 34 |         relu_neg_slope the ReLU negative slope (Default: 0.01)
 35 |         use_dropout_regularization whether to use dropout regularization threshold (Default: True)
 36 |         input_dropout_threshold the input units dropout threshold (Default: 0.75)
 37 |         hiden_dropout_threshold the hidden units dropout threshold (Default: 0.5)
 38 |         reg_strenght the L2 regularization strength for training parameters (Default:1e-3)
 39 |         use_regularization the flag to turn on/off regularization (Default: True)
 40 |         use_batch_step the flag to indicate whether to use batch training (True), default - False
 41 |         batch_step_size the number of samples per batch (Default: 25)
 42 |         sgd_shuffle whether to shuffle data samples randomly after each epoch (Default: True)
 43 |         """
 44 |         self.hidden_size = n_neurons
 45 |         self.n_features = n_features
 46 |         self.n_outputs = n_outputs
 47 |         self.use_batch_step = use_batch_step
 48 |         self.batch_step_size = batch_step_size
 49 |         self.param_update_scheme = param_update_scheme
 50 |         self.learning_rate = learning_rate
 51 |         self.activation_rule = activation_rule
 52 |         self.relu_neg_slope = relu_neg_slope
 53 |         self.use_dropout_regularization = use_dropout_regularization
 54 |         self.input_dropout_threshold = input_dropout_threshold
 55 |         self.hiden_dropout_threshold = hiden_dropout_threshold
 56 |         self.reg_strenght = reg_strenght
 57 |         self.use_regularization = use_regularization
 58 |         self.sgd_shuffle = sgd_shuffle
 59 |         
 60 |     def train(self, Xtr, ytr, ytr_missing, n_epochs, Xvl=None, yvl=None, yvl_missing=None, check_gradient=False):
 61 |         """
 62 |         Trains neural network over specified epochs with optional validation if validation data provided
 63 |         Xtr - the train features tenzor with shape (num_samples, num_features)
 64 |         ytr - the train ground truth tenzor with shape (num_samples, num_outputs)
 65 |         ytr_missing - the boolean flags denoting missing train outputs with shape (num_samples, num_outputs)
 66 |         n_epochs - the number of epochs to use for training
 67 |         Xvl - the validation features tenzor with shape (num_samples, num_features) (Default: None)
 68 |         yvl - the validation ground truth tenzor with shape (num_samples, num_outputs) (Default: None)
 69 |         yvl_missing - the boolean flags denoting missing validation outputs with shape (num_samples, num_outputs) (Default: None)
 70 |         check_gradient - the boolean to indicate if gradient check should be done (Default: False)
 71 |         return trained model parameters as well as train/validation errors and scores per epoch
 72 |         """
 73 |         # parameters check
 74 |         assert len(Xtr[0]) == self.n_features
 75 |         assert len(ytr[0]) == self.n_outputs
 76 |         assert len(ytr_missing[0]) == self.n_outputs
 77 |         
 78 |         do_validation = (Xvl is not None)
 79 |         if do_validation and (yvl is None or yvl_missing is None):
 80 |             raise 'Validation outputs or missing falgs not specified when validation requested'
 81 |         elif do_validation:
 82 |             # check that validation parameters of correct size
 83 |             assert len(Xtr[0]) == len(Xvl[0])
 84 |             assert len(ytr[0]) == len(yvl[0])
 85 |             assert len(yvl[0]) == len(yvl_missing[0])
 86 |             
 87 |         # model parameters
 88 |         self.__initNNParameters()
 89 |         
 90 |         start_time = datetime.datetime.fromtimestamp(time.time())
 91 |         
 92 |         # do train
 93 |         mWxh, mWhh, mWhy = np.zeros_like(self.Wxh), np.zeros_like(self.Whh), np.zeros_like(self.Why)
 94 |         mbxh, mbhh, mbhy = np.zeros_like(self.bxh), np.zeros_like(self.bhh), np.zeros_like(self.bhy) # memory variables for Adagrad, RMSProp
 95 |         vWxh, vWhh, vWhy = np.zeros_like(self.Wxh), np.zeros_like(self.Whh), np.zeros_like(self.Why)
 96 |         vbxh, vbhh, vbhy = np.zeros_like(self.bxh), np.zeros_like(self.bhh), np.zeros_like(self.bhy) # memory variables for Adam
 97 |         train_errors = np.zeros((n_epochs, 1))
 98 |         train_scores = np.zeros_like(train_errors)
 99 |         if do_validation:
100 |             validation_errors = np.zeros_like(train_errors)
101 |             validation_scores = np.zeros_like(train_errors)
102 |             
103 |         n = 0
104 |         step_f = self.__activationFunction()
105 |         for epoch in range(n_epochs):
106 |             # prepare for new epoch
107 |             if self.use_batch_step:
108 |                 steps = len(Xtr) / self.batch_step_size
109 |             else:
110 |                 steps = len(Xtr)
111 |             epoch_error = np.zeros((steps, 1))
112 |             epoch_score = np.zeros((steps, 1))
113 |             
114 |             # shuffle data for stohastic gradient descent before new epoch start
115 |             if self.use_batch_step and self.sgd_shuffle:
116 |                 perm = np.arange(Xtr.shape[0])
117 |                 np.random.shuffle(perm)
118 |                 Xtr = Xtr[perm]
119 |                 ytr = ytr[perm]
120 |             
121 |             # proceed with mini-batches
122 |             for j in range(steps): 
123 |                 if self.use_batch_step:
124 |                     index = j * self.batch_step_size
125 |                     inputs = Xtr[index : index + self.batch_step_size, :] # the slice of rows with batch_size length
126 |                     targets = ytr[index : index + self.batch_step_size, :]
127 |                     y_missing = ytr_missing[index : index + self.batch_step_size, :]
128 |                     loss, score, dWxh, dWhh, dWhy, dbx, dbh, dby = step_f(inputs, targets, y_missing)
129 |                 else:
130 |                     inputs = Xtr[j : j + 1, :] # just one row
131 |                     targets = ytr[j : j + 1, :]
132 |                     loss, score, dWxh, dWhh, dWhy, dbx, dbh, dby = step_f(inputs, targets, ytr_missing[j])
133 |                 
134 |                 epoch_error[j] = loss
135 |                 epoch_score[j] = score
136 |     
137 |                 if j % 100 == 0: print '---iter %d, epoch: %d, step: %d from: %d, loss: %.5f' % (n, epoch, j, steps, loss) # print progress  
138 |                 
139 |                 n += 1 # total iteration counter
140 |                 
141 |                 if check_gradient:
142 |                     self.__gradCheck(inputs, targets, ytr_missing[j])
143 |             
144 |                 # perform parameter update 
145 |                 if self.param_update_scheme == 'Adagrad':
146 |                     # with Adagrad
147 |                     eps = 1e-8#1e-4#
148 |                     for param, dparam, mem in zip([self.Wxh, self.Whh, self.Why, self.bxh, self.bhh, self.bhy], [dWxh, dWhh, dWhy, dbx, dbh, dby], [mWxh, mWhh, mWhy, mbxh, mbhh, mbhy]):
149 |                         mem += dparam * dparam
150 |                         param += -self.learning_rate * dparam / (np.sqrt(mem) + eps) # adagrad update
151 |                 elif self.param_update_scheme == 'RMSProp':
152 |                     # with RMSProp
153 |                     eps = 1e-8 # {1e−4, 1e−5, 1e−6}
154 |                     decay_rate = 0.99# {0.9, 0.95}
155 |                     for param, dparam, mem in zip([self.Wxh, self.Whh, self.Why, self.bxh, self.bhh, self.bhy], [dWxh, dWhh, dWhy, dbx, dbh, dby], [mWxh, mWhh, mWhy, mbxh, mbhh, mbhy]):
156 |                         mem = decay_rate * mem + (1 - decay_rate) * (dparam * dparam) # cache = decay_rate * cache + (1 - decay_rate) * dx**2
157 |                         param += -self.learning_rate * dparam / (np.sqrt(mem) + eps) # RMSProp update
158 |                 elif self.param_update_scheme == 'Adam':
159 |                     # with Adam
160 |                     eps = 1e-8
161 |                     beta1 = 0.9
162 |                     beta2 = 0.99 #0.95 #0.999# 
163 |                     for param, dparam, m, v in zip([self.Wxh, self.Whh, self.Why, self.bxh, self.bhh, self.bhy], [dWxh, dWhh, dWhy, dbx, dbh, dby], [mWxh, mWhh, mWhy, mbxh, mbhh, mbhy], [vWxh, vWhh, vWhy, vbxh, vbhh, vbhy]):
164 |                         m = beta1 * m + (1 - beta1) * dparam # Update biased first moment estimate
165 |                         v = beta2 * v + (1 - beta2) * (dparam * dparam) # Update biased second raw moment estimate
166 |                         #param += -self.learning_rate * m / (np.sqrt(v) + eps) # Adam update
167 |                         # bias corrected estimates
168 |                         mt = m / (1 - pow(beta1, j + 1)) #  N.B. j starts from 0
169 |                         vt = v / (1 - pow(beta2, j + 1))
170 |                         param += -self.learning_rate * mt / (np.sqrt(vt) + eps) # Adam update
171 |                 elif self.param_update_scheme == 'AdaMax':
172 |                     # with AdaMax - a variant of Adam based on the infinity norm.
173 |                     eps = 1e-8
174 |                     beta1 = 0.9
175 |                     beta2 = 0.99 #0.95 #0.999# 
176 |                     step_size = self.learning_rate / (1 - pow(beta1, j + 1)) #bias correction
177 |                     for param, dparam, m, v in zip([self.Wxh, self.Whh, self.Why, self.bxh, self.bhh, self.bhy], [dWxh, dWhh, dWhy, dbx, dbh, dby], [mWxh, mWhh, mWhy, mbxh, mbhh, mbhy], [vWxh, vWhh, vWhy, vbxh, vbhh, vbhy]):
178 |                         m = beta1 * m + (1 - beta1) * dparam # Update biased first moment estimate
179 |                         v = np.maximum(beta2 * v, np.abs(dparam) + eps) # Update the exponentially weighted infinity norm
180 |                         param += - step_size * m / v 
181 |                 else:
182 |                     raise "Uknown parameters update scheme: {}".format(self.param_update_scheme)
183 |                 
184 |     
185 |             # Annealing the learning rate but avoid dropping it too low
186 |             if self.learning_rate >= 1e-6 and epoch != 0 and epoch % 20 == 0:  self.learning_rate *= 0.1
187 |             
188 |             train_scores[epoch] = self.__make_score(epoch_score) # the score per epoch
189 |             train_errors[epoch] = np.average(epoch_error, axis=0) # the mean train error per epoch
190 |             
191 |             # calculate validation if appropriate
192 |             if do_validation:
193 |                 y_predicted = self.predict(Xvl)
194 |                 validation_errors[epoch], validation_scores[epoch] = self.__validate(y_predicted, yvl, yvl_missing)
195 |                 
196 |                 print 'epoch: %d, train loss: %s, score: %s, learning rate: %s\nvalidation loss: %s, score: %s' % (epoch, train_errors[epoch], train_scores[epoch], self.learning_rate, validation_errors[epoch], validation_scores[epoch]) # print progress
197 |             else:
198 |                 print 'epoch: %d, train loss: %s, score: %s, learning rate: %s' % (epoch, train_errors[epoch], train_scores[epoch], self.learning_rate) # print progress
199 |     
200 |         # The time spent
201 |         finish_date = datetime.datetime.fromtimestamp(time.time())
202 |         delta = finish_date - start_time
203 |         print '\n------------------------\nTrain time: \n%s\nTrain error: \n%s\nscores:\n%s\n' % (delta, train_errors, train_scores)
204 |         
205 |         if do_validation:
206 |             print '\n------------------------\nValidation error: \n%s\nscores:\n%s\n' % (validation_errors, validation_scores)
207 |             return train_errors, train_scores, validation_errors, validation_scores
208 |         else:
209 |             return train_errors, train_scores
210 |             
211 |             
212 |     def predict(self, Xvl, use_prev_state = False):
213 |         """
214 |         The method to predict outputs based on provided data samples
215 |         Xvl the data samples with shape (num_samples, n_features)
216 |         use_prev_state whether to use saved previous state of RNN or just reset its memory
217 |         return predicitions per data sample with shape (num_samples, n_outputs)
218 |         """
219 |         # ensembled forward pass
220 |         H1 = np.maximum(0, np.dot(Xvl, self.Wxh) + self.bxh)
221 |         H2 = np.maximum(0, np.dot(H1, self.Whh) + self.bhh)
222 |         out = np.dot(H2, self.Why) + self.bhy        
223 |             
224 |         return out 
225 | 
226 |     def saveModel(self, name):
227 |         """
228 |         Saves trained model using provided file name
229 |         """
230 |         vault = {'Wxh' : self.Wxh, 
231 |                  'Whh' : self.Whh, 
232 |                  'Why' : self.Why, 
233 |                  'bxh' : self.bxh,
234 |                  'bhh' : self.bhh, 
235 |                  'byh' : self.bhy, 
236 |                  'hidden_size' : self.hidden_size,
237 |                  'n_features' : self.n_features,
238 |                  'n_outputs' : self.n_outputs,
239 |                  'use_batch_step' : self.use_batch_step,
240 |                  'batch_step_size' : self.batch_step_size,
241 |                  'param_update_scheme' : self.param_update_scheme,
242 |                  'learning_rate' : self.learning_rate,
243 |                  'use_dropout_regularization' : self.use_dropout_regularization,
244 |                  'input_dropout_threshold' : self.input_dropout_threshold,
245 |                  'hiden_dropout_threshold' : self.hiden_dropout_threshold,
246 |                  'reg_strenght' : self.reg_strenght,
247 |                  'use_regularization' : self.use_regularization,
248 |                  'sgd_shuffle' : self.sgd_shuffle,
249 |                  'activation_rule' : self.activation_rule}
250 |                  
251 |         sio.savemat(name, vault)
252 |    
253 |     def loadModel(self, name):
254 |         """
255 |         Loads model from spefied file
256 |         name the path to the model file
257 |         """
258 |         mat_contents = sio.loadmat(name)
259 |         self.Wxh = mat_contents['Wxh']
260 |         self.Whh = mat_contents['Whh']
261 |         self.Why = mat_contents['Why']
262 |         self.bxh = mat_contents['bxh']
263 |         self.bhh = mat_contents['bhh']
264 |         self.bhy = mat_contents['byh']
265 |         self.hidden_size = mat_contents['hidden_size']
266 |         self.n_features = mat_contents['n_features']
267 |         self.n_outputs = mat_contents['n_outputs']
268 |         self.use_batch_step = mat_contents['use_batch_step']
269 |         self.batch_step_size = mat_contents['batch_step_size']
270 |         self.param_update_scheme = mat_contents['param_update_scheme']
271 |         self.learning_rate = mat_contents['learning_rate']
272 |         self.use_dropout_regularization = mat_contents['use_dropout_regularization']
273 |         self.input_dropout_threshold = mat_contents['input_dropout_threshold']
274 |         self.hiden_dropout_threshold = mat_contents['hiden_dropout_threshold']
275 |         self.reg_strenght = mat_contents['reg_strenght']
276 |         self.use_regularization = mat_contents['use_regularization']
277 |         self.sgd_shuffle = mat_contents['sgd_shuffle']
278 |         self.activation_rule = mat_contents['activation_rule']
279 |         
280 |     def __step_relu(self, inputs, targets, ytr_missing):
281 |         """
282 |         The one step in NN computations using ReLU function as non-linear activation function
283 |         inputs, targets are both arrays of real numbers with shapes (input_size, 1) and (target_size, 1) respectively.
284 |         hprev is array of initial hidden state with shape (hidden_size, 1)
285 |         Wxh, Whh, Why - the neurons input/output weights
286 |         bh, by - the hidden/output layer bias
287 |         returns the loss, score_mean, gradients on model parameters, and last hidden state
288 |         """
289 |         #
290 |         # forward pass
291 |         #
292 |         xs = inputs
293 |         hidden_1 = np.maximum(0, np.dot(xs, self.Wxh) + self.bxh) # input-to-hidden, ReLU activation
294 |         if self.use_regularization and self.use_dropout_regularization:
295 |             U1 = (np.random.rand(*hidden_1.shape) < self.input_dropout_threshold ) / self.input_dropout_threshold  # first dropout mask
296 |             hidden_1 *= U1 # drop! and scale the activations by p at test time. (see: http://cs231n.github.io/neural-networks-2/#reg - Inverted Dropout)
297 |             
298 |         hidden_2 = np.maximum(0, np.dot(hidden_1, self.Whh)  + self.bhh) # hidden-to-hidden, ReLU activation
299 |         if self.use_regularization and self.use_dropout_regularization:
300 |             U2 = (np.random.rand(*hidden_2.shape) < self.hiden_dropout_threshold) / self.hiden_dropout_threshold # second dropout mask
301 |             hidden_2 *= U2 # drop! and scale the activations by p at test time.
302 |         
303 |         ys = np.dot(hidden_2, self.Why)  + self.bhy # hidden-to-output, ReLU activation
304 |         ps = ys - targets # error
305 |         loss = np.sum(np.abs(ps), axis=1) # L1 norm
306 |         
307 |         #
308 |         # backward pass: compute gradients going backwards
309 |         #
310 |         dy = np.sign(ps) # the gradient for y only inherits the sign of the difference for L1 norm (http://cs231n.github.io/neural-networks-2/#reg)
311 |         # first backprop into parameters Why and bhy
312 |         dWhy = np.dot(hidden_2.T, dy)
313 |         dby = np.sum(dy, axis=0, keepdims=True)
314 |         # next backprop into hidden layer
315 |         dhidden_2 = np.dot(dy, self.Why.T)
316 |         
317 |         # backprop the ReLU non-linearity
318 |         dhidden_2[hidden_2 <= 0] = 0
319 |         # backprop into Whh, bhh
320 |         dWhh = np.dot(hidden_1.T, dhidden_2)
321 |         dbh = np.sum(dhidden_2, axis=0, keepdims=True)  
322 |         # next backprop into hidden layer
323 |         dhidden_1 = np.dot(dhidden_2, self.Whh.T)
324 |         
325 |         # backprop the ReLU non-linearity
326 |         dhidden_1[hidden_1 <= 0] = 0
327 |         # backprop into Wxh, bxh
328 |         dWxh = np.dot(xs.T, dhidden_1)
329 |         dbx = np.sum(dhidden_1, axis=0, keepdims=True) 
330 |         
331 |         # add L2 regularization gradient contribution if not dropout
332 |         if self.use_regularization and not self.use_dropout_regularization:
333 |             dWhy += self.reg_strenght * self.Why
334 |             dWhh += self.reg_strenght * self.Whh
335 |             dWxh += self.reg_strenght * self.Wxh
336 |           
337 |         # calculate score
338 |         score = np.zeros((inputs.shape[0], 1))
339 |         for t in range(inputs.shape[0]):
340 |             score[t] = self.__score_mean(np.abs(ps[t, :]), ytr_missing[t, :]) # IMPORTANT: use COVAR_y_MISSING flags for mean calculation without missed Y
341 |         return np.average(loss), np.average(score), dWxh, dWhh, dWhy, dbx, dbh, dby
342 |             
343 |         
344 |     def __score_mean(self, abs_diff, y_missing):
345 |         """
346 |         Calculates score mean on based absolute differences between Y predicted and target
347 |         abs_diff = |Ypred - Yeval|
348 |         y_missing the array with COVAR_y_MISSING flags with shape (target_size, 1)
349 |         """  
350 |         scores = abs_diff.flat[~y_missing]
351 |         return np.mean(scores)
352 |         
353 |     def __make_score(self, mean_scores):
354 |         """
355 |         Calculates final score from provided array of mean scores
356 |         mean_scores the array of mean scores
357 |         return score value
358 |         """
359 |         n = len(mean_scores)
360 |         sum_r = np.sum(mean_scores)
361 |         score = 10 * (1 - sum_r/n)
362 |         return score
363 |     
364 |     def __validate(self, y, y_target, y_missing):
365 |         """
366 |         The method to validate calculated validation outputs against ground truth
367 |         y the calculated predictions with shape (num_samples, output_size)
368 |         y_target the ground trouth with shape (num_samples, output_size)
369 |         y_missing the array of flags denoting missed ground trouth value for predicition with shape (num_samples, output_size)
370 |         return calculated score and error values over provided data set
371 |         """
372 |         ps = np.abs(y - y_target)
373 |         errors = np.sum(ps, axis=1) # L1 norm     
374 |         
375 |         scores = np.zeros((y.shape[0], 1))
376 |         for t in range(y.shape[0]):
377 |             # find score per sample
378 |             scores[t] = self.__score_mean(ps[t], y_missing[t])
379 |             
380 |         # find total score and error
381 |         score = self.__make_score(scores)
382 |         error = np.average(errors, axis=0)
383 |         return error, score
384 |         
385 |     def __activationFunction(self):
386 |         """
387 |         Finds appropriate activation function depending on configuration
388 |         """
389 |         step_f = None
390 |         if self.activation_rule == 'ReLU':
391 |             step_f = self.__step_relu
392 |         
393 |         if step_f == None:
394 |             raise 'Unsupported activation function specified: {}'.format(self.activation_rule)
395 |             
396 |         return step_f
397 |         
398 |     def __initNNParameters(self):
399 |         """
400 |         Do NN parameters initialization according to provided data samples
401 |         input_size the input layer size
402 |         output_size the output layer size
403 |         """
404 |         if self.activation_rule == 'ReLU':
405 |             self.Wxh = np.random.randn(self.n_features, self.hidden_size[0]) * sqrt(2.0/self.n_features) # input to hidden
406 |             self.Whh = np.random.randn(self.hidden_size[0], self.hidden_size[1]) * sqrt(2.0/self.hidden_size[0]) # hidden to hidden
407 |             self.Why = np.random.randn(self.hidden_size[1], self.n_outputs) * sqrt(2.0/self.hidden_size[1]) # hidden to output
408 |         else:
409 |             self.Wxh = np.random.randn(self.n_features, self.hidden_size[0]) * 0.01 # input to hidden
410 |             self.Whh = np.random.randn(self.hidden_size[0], self.hidden_size[1]) * 0.01 # hidden to hidden
411 |             self.Why = np.random.randn(self.hidden_size[1], self.n_outputs) * 0.01 # hidden to output
412 |             
413 |         self.bxh = np.zeros((1, self.hidden_size[0])) # input-to-hidden bias
414 |         self.bhh = np.zeros((1, self.hidden_size[1])) # hidden-to-hidden bias
415 |         self.bhy = np.zeros((1, self.n_outputs)) # hidden-to-output bias
416 |         
417 |         if DEBUG:
418 |             print 'Wxh: %s, Whh: %s, Why: %s\nbxh: %s, bhh: %s, bhy: %s' % (np.shape(self.Wxh), np.shape(self.Whh), np.shape(self.Why), np.shape(self.bxh), np.shape(self.bhh), np.shape(self.bhy))


--------------------------------------------------------------------------------
/src/deep_learning_runner.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Aug  1 12:32:23 2016
  4 | 
  5 | The Deep Learning NN runner
  6 | 
  7 | @author: yaric
  8 | """
  9 | import time
 10 | import datetime
 11 | 
 12 | import pandas as pd
 13 | import numpy as np
 14 | 
 15 | from deep.deep_learning_nn import DeepLearningNN
 16 | 
 17 | from utils import utils
 18 | 
 19 | # hyperparameters
 20 | n_neurons = [128, 32] # [64, 32]#  [256, 128] # size of hidden layers of neurons
 21 | n_epochs = 60 # the number of learning epochs
 22 | 
 23 | # for RMSProp it is good to have [1e-3, 1e-4], 
 24 | # for Adagrad [0.05], 
 25 | # for Adam [1e-4, 5e-5]
 26 | # for AdaMax [5e-4]
 27 | learning_rate = 5e-2 #2e-3 # 5e-4 # 
 28 | batch_step_size = 100#200
 29 | param_update_scheme = 'Adagrad' #'AdaMax' # 'RMSProp' #'Adam' #
 30 | activation_rule = 'ReLU'
 31 | relu_neg_slope = 0.001 # 0.01
 32 | sgd_shuffle = True
 33 | 
 34 | # The regularization parameters
 35 | use_dropout_regularization = False # True #
 36 | # The L2 regularization strength
 37 | reg_strenght = 1e-4
 38 | use_regularization = True
 39 | 
 40 | # Whether to preprocess input features (normalization, standardization, PCA, etc)
 41 | USE_PREPROCESSING = False #True#
 42 | # Whether to use single step (False) or batch step training (True)
 43 | USE_BATCH_TRAINING = True #False #
 44 | # Whether to check gradient
 45 | CHECK_GRADIENT = False #True 
 46 | 
 47 | # debug mode switch
 48 | DEBUG = False # True #
 49 | # Whether to save model when in debug mode (in production mode model will be saved anyway)
 50 | SAVE_MODEL_DEBUG = False #
 51 | 
 52 | # Whether to use existing trained model for predicition only
 53 | PREDICT_ONLY = False #True #
 54 | 
 55 | # the file prefix of debug data sets
 56 | debug_file_prefix = '../data/training-' # '../data/training-small-' # '../data/training-preprocessed-'
 57 | 
 58 | # whether data set in RAW form or already preprocessed
 59 | data_set_raw = True # False
 60 | 
 61 | def main():
 62 |     # import data
 63 |     if DEBUG:
 64 |         data_train = pd.read_csv(debug_file_prefix + 'train.csv')
 65 |         data_validation = pd.read_csv(debug_file_prefix + 'validate.csv')
 66 |     else:
 67 |         data_train = pd.read_csv('../data/training.csv')
 68 |         data_validation = pd.read_csv('../data/testData.csv')
 69 | 
 70 |     data_train['train_flag'] = True
 71 |     data_validation['train_flag'] = False
 72 |     data = pd.concat((data_train, data_validation))
 73 |     
 74 |     # keep missing flags for both training and validation
 75 |     ytr_missing = np.array(data_train.loc[ :,'COVAR_y1_MISSING':'COVAR_y3_MISSING'])
 76 |     yvl_missing = np.array(data_validation.loc[ :,'COVAR_y1_MISSING':'COVAR_y3_MISSING'])
 77 |     
 78 |     # remove temporary data
 79 |     del data_train
 80 |     del data_validation
 81 | 
 82 |     # basic formatting
 83 |     if data_set_raw:
 84 |         Xtr, ytr, Xvl, yvl = utils.format_data_features_selected(data)# utils.format_data(data, preprocessing=USE_PREPROCESSING)
 85 |     else:
 86 |         Xtr, ytr, Xvl, yvl = utils.format_data_preprocessed(data)
 87 |     del data
 88 |     
 89 |     # preprocess data
 90 |     if USE_PREPROCESSING:
 91 |         use_pca = False # apply PCA (True) or standard normalization (False)
 92 |         Xtr, Xvl = utils.preprocess(Xtr, Xvl, use_pca)
 93 |         
 94 |     # create RNN instance 
 95 |     n_features = len(Xtr[0])
 96 |     n_outputs = len(ytr[0])
 97 |     nn_solver = DeepLearningNN(n_features=n_features, n_outputs=n_outputs, 
 98 |                     n_neurons=n_neurons, param_update_scheme=param_update_scheme,
 99 |                     learning_rate = learning_rate, activation_rule = activation_rule,
100 |                     use_dropout_regularization=use_dropout_regularization, 
101 |                     reg_strenght=reg_strenght, use_regularization=use_regularization, 
102 |                     relu_neg_slope=relu_neg_slope,
103 |                     use_batch_step=USE_BATCH_TRAINING, batch_step_size=batch_step_size,
104 |                     sgd_shuffle=sgd_shuffle)
105 |                     
106 |     if not PREDICT_ONLY:
107 |         trainAndTest(nn_solver, Xtr, ytr, ytr_missing, Xvl, yvl, yvl_missing)
108 |     else:
109 |         predictByModel(nn_solver, Xvl, '../models/DeepNN/model_2016-08-03T15_39_15.mat')
110 |         
111 | 
112 | def trainAndTest(nn_solver, Xtr, ytr, ytr_missing, Xvl, yvl, yvl_missing):
113 |     """
114 |     The train and test runner
115 |     """
116 |     if DEBUG:
117 |         # train with validation
118 |         train_errors, train_scores, validation_errors, validation_scores =  nn_solver.train(
119 |                     Xtr = Xtr, ytr = ytr, ytr_missing = ytr_missing, 
120 |                     n_epochs = n_epochs, Xvl = Xvl, yvl = yvl, yvl_missing = yvl_missing)
121 |         # plot results
122 |         utils.plotResultsValidate(train_errors, train_scores, validation_errors, validation_scores)
123 |     else:
124 |         # train without validation
125 |         train_errors, train_scores =  nn_solver.train(
126 |                     Xtr = Xtr, ytr = ytr, ytr_missing = ytr_missing, 
127 |                     n_epochs = n_epochs)
128 |         # plot results            
129 |         utils.plotResultsTest(train_errors, train_scores)
130 | 
131 |     # and save model
132 |     if DEBUG == False or (DEBUG and SAVE_MODEL_DEBUG):
133 |         st = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%dT%H_%M_%S')
134 |         m_name = '../models/DeepNN/model_{}.mat'.format(st)
135 |         nn_solver.saveModel(m_name)
136 |     
137 |     # test data predict
138 |     predict(nn_solver, Xvl)
139 | 
140 | 
141 | def predictByModel(nn_solver, Xvl, model_name):
142 |     """    
143 |     Method to make prediction on saved model
144 |     """
145 |     nn_solver.loadModel(model_name)
146 |     
147 |     predict(nn_solver, Xvl)
148 | 
149 | 
150 | def predict(nn_solver, Xvl):
151 |     """
152 |     Do actual predicition 
153 |     """
154 |     yvl_est = nn_solver.predict(Xvl)
155 |     
156 |     # substitute negative with zeros (negative values mark absent Y)
157 |     yvl_est = yvl_est.clip(min=0, max=1)
158 |     
159 |     assert len(yvl_est) == len(Xvl)
160 |     
161 |     # save predictions as csv
162 |     if DEBUG:
163 |         res_name = '../validation_predictions'
164 |     else:
165 |         st = datetime.datetime.fromtimestamp(time.time()).strftime('%d_%m_%H_%M')
166 |         res_name = '../vp_{}'.format(st)
167 |     yvl = pd.DataFrame({'yvl1_est':yvl_est[:,0],'yvl2_est':yvl_est[:,1],'yvl3_est':yvl_est[:,2]})
168 |     yvl.to_csv('{}.{}'.format(res_name, 'csv'),header=False,index=False)
169 |     
170 |     # describe predictions
171 |     print '\n------------------------\nPredictions:\n%s' % yvl.describe()
172 |     
173 |     # plot outputs
174 |     utils.plotOutputs(yvl_est, res_name)
175 | 
176 | 
177 | if __name__ == '__main__':
178 |     main()


--------------------------------------------------------------------------------
/src/results_plotter.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Thu Aug  4 21:17:05 2016
 4 | 
 5 | Renders output
 6 | 
 7 | @author: yaric
 8 | """
 9 | 
10 | import pandas as pd
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 | 
14 | file_name = 'vp_10_08_11_45' # 'vp_31_07_00_21' # 'vp_04_08_16_40' # 
15 | # the path to look for files
16 | results_path = '../results/best/{}.{}'
17 | # the number of bins
18 | bins_count=100
19 | 
20 | # read predictions
21 | y_pred_df = pd.read_csv(results_path.format(file_name, 'csv'))
22 | 
23 | print 'Results:\n%s\n' % y_pred_df.describe()
24 | 
25 | y_pred = np.array(y_pred_df)
26 | 
27 | # make histograms
28 | y1_hist, _ = np.histogram(y_pred[:,0], bins=bins_count)
29 | y2_hist, _ = np.histogram(y_pred[:,1], bins=bins_count)
30 | y3_hist, _ = np.histogram(y_pred[:,2], bins=bins_count)
31 | 
32 | # draw scatter
33 | x = np.arange(bins_count)
34 | 
35 | y1_plot = plt.scatter(x, np.log10(y1_hist), marker='o', color='b')
36 | y2_plot = plt.scatter(x, np.log10(y2_hist), marker='o', color='r')
37 | y3_plot = plt.scatter(x, np.log10(y3_hist), marker='o', color='g')
38 | 
39 | plt.grid(color='black', linestyle='-')
40 | plt.title(file_name)
41 | plt.legend((y1_plot, y2_plot, y3_plot), ('y1','y2','y3'),
42 |            scatterpoints=1, loc='upper right')
43 | # save figure
44 | plt.savefig(results_path.format(file_name, 'png'), dpi=72)
45 | # show figure
46 | plt.show()


--------------------------------------------------------------------------------
/src/rnn/.spyderworkspace:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yaricom/TimeSeriesLearning/6c6c5dc253b47bd6a22a2a97030adba5c5e7512a/src/rnn/.spyderworkspace


--------------------------------------------------------------------------------
/src/rnn/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Wed Jul 27 15:00:00 2016
4 | 
5 | @author: yaric
6 | """
7 | from rnn.simple_rnn import RNN
8 | 


--------------------------------------------------------------------------------
/src/rnn/simple_rnn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Wed Jul 27 10:55:34 2016
  4 | 
  5 | The plain vanila implementation of Recurrent Neural Network
  6 | 
  7 | @author: yaric
  8 | """
  9 | import time
 10 | import datetime
 11 | from random import uniform
 12 | 
 13 | import numpy as np
 14 | import scipy.io as sio
 15 | 
 16 | class RNN(object):
 17 |     
 18 |     def __init__(self, n_features, n_outputs, n_neurons=100, param_update_scheme='Adagrad', 
 19 |                  learning_rate=1e-1, activation_rule='Tanh', 
 20 |                  use_batch_step=False, batch_step_size=25, relu_neg_slope=0.01, 
 21 |                  use_dropout_regularization=True, dropout_threshold=0.8, 
 22 |                  reg_strenght=0.5, use_regularization=True, 
 23 |                  sgd_shuffle=True):
 24 |         """
 25 |         Initializes RNN
 26 |         n_features the number of features per data sample
 27 |         n_outputs the number of output values to find
 28 |         n_neurons the number of neurons in hidden layer (Default: 100)
 29 |         param_update_scheme the algorithm used to update parameters after gradients update (Default: 'Adagrad')
 30 |         learning_rate - the start learning rate (Default: 1e-1)
 31 |         activation_rule - the single neuron non-linearity activation rule (Default: 'Tanh')
 32 |         use_batch_step the flag to indicate whether to use batch training (True), default - False
 33 |         batch_step_size the number of samples per batch (Default: 25)
 34 |         relu_neg_slope the ReLU negative slope (Default: 0.01)
 35 |         use_dropout_regularization whether to use dropout regularization threshold (Default: True)
 36 |         dropout_threshold the dropout threshold (Default: 0.8)
 37 |         reg_strenght the L2 regularization strength for training parameters (Default:0.001)
 38 |         use_regularization the flag to turn on/off regularization (Default: True)
 39 |         sgd_shuffle whether to shuffle data samples randomly after each epoch (Default: True)
 40 |         """
 41 |         self.hidden_size = n_neurons
 42 |         self.n_features = n_features
 43 |         self.n_outputs = n_outputs
 44 |         self.use_batch_step = use_batch_step
 45 |         self.batch_step_size = batch_step_size
 46 |         self.param_update_scheme = param_update_scheme
 47 |         self.learning_rate = learning_rate
 48 |         self.activation_rule = activation_rule
 49 |         self.relu_neg_slope = relu_neg_slope
 50 |         self.use_dropout_regularization = use_dropout_regularization
 51 |         self.dropout_threshold = dropout_threshold
 52 |         self.reg_strenght = reg_strenght
 53 |         self.use_regularization = use_regularization
 54 |         
 55 |         self.sgd_shuffle = sgd_shuffle
 56 |         
 57 |     def train(self, Xtr, ytr, ytr_missing, n_epochs, Xvl=None, yvl=None, yvl_missing=None, check_gradient=False):
 58 |         """
 59 |         Trains neural network over specified epochs with optional validation if validation data provided
 60 |         Xtr - the train features tenzor with shape (num_samples, num_features)
 61 |         ytr - the train ground truth tenzor with shape (num_samples, num_outputs)
 62 |         ytr_missing - the boolean flags denoting missing train outputs with shape (num_samples, num_outputs)
 63 |         n_epochs - the number of epochs to use for training
 64 |         Xvl - the validation features tenzor with shape (num_samples, num_features) (Default: None)
 65 |         yvl - the validation ground truth tenzor with shape (num_samples, num_outputs) (Default: None)
 66 |         yvl_missing - the boolean flags denoting missing validation outputs with shape (num_samples, num_outputs) (Default: None)
 67 |         check_gradient - the boolean to indicate if gradient check should be done (Default: False)
 68 |         return trained model parameters as well as train/validation errors and scores per epoch
 69 |         """
 70 |         # parameters check
 71 |         assert len(Xtr[0]) == self.n_features
 72 |         assert len(ytr[0]) == self.n_outputs
 73 |         assert len(ytr_missing[0]) == self.n_outputs
 74 |         
 75 |         do_validation = (Xvl is not None)
 76 |         if do_validation and (yvl is None or yvl_missing is None):
 77 |             raise 'Validation outputs or missing falgs not specified when validation requested'
 78 |         elif do_validation:
 79 |             # check that validation parameters of correct size
 80 |             assert len(Xtr[0]) == len(Xvl[0])
 81 |             assert len(ytr[0]) == len(yvl[0])
 82 |             assert len(yvl[0]) == len(yvl_missing[0])
 83 |             
 84 |         # model parameters
 85 |         self.__initNNParameters()
 86 |         
 87 |         start_time = datetime.datetime.fromtimestamp(time.time())
 88 |     
 89 |         # do train
 90 |         mWxh, mWhh, mWhy = np.zeros_like(self.Wxh), np.zeros_like(self.Whh), np.zeros_like(self.Why)
 91 |         mbh, mby = np.zeros_like(self.bh), np.zeros_like(self.by) # memory variables for Adagrad, RMSProp
 92 |         vWxh, vWhh, vWhy = np.zeros_like(self.Wxh), np.zeros_like(self.Whh), np.zeros_like(self.Why)
 93 |         vbh, vby = np.zeros_like(self.bh), np.zeros_like(self.by) # memory variables for Adam
 94 |         train_errors = np.zeros((n_epochs, 1))
 95 |         train_scores = np.zeros_like(train_errors)
 96 |         if do_validation:
 97 |             validation_errors = np.zeros_like(train_errors)
 98 |             validation_scores = np.zeros_like(train_errors)
 99 |         
100 |         n = 0
101 |         step_f = self.__activationFunction()
102 |         for epoch in range(n_epochs):
103 |             # prepare for new epoch
104 |             if self.use_batch_step:
105 |                 steps = len(Xtr) / self.batch_step_size
106 |             else:
107 |                 steps = len(Xtr)
108 |             epoch_error = np.zeros((steps, 1))
109 |             epoch_score = np.zeros((steps, 1))
110 |             self.hprev = np.zeros((self.hidden_size, 1)) # reset RNN memory at start of new epoch
111 |             
112 |             # shuffle data for stohastic gradient descent before new epoch start
113 |             if self.use_batch_step and self.sgd_shuffle:
114 |                 perm = np.arange(Xtr.shape[0])
115 |                 np.random.shuffle(perm)
116 |                 Xtr = Xtr[perm]
117 |                 ytr = ytr[perm]            
118 |             
119 |             # proceed with mini-batches
120 |             for j in range(steps): 
121 |                 if self.use_batch_step:
122 |                     index = j * self.batch_step_size
123 |                     inputs = Xtr[index : index + self.batch_step_size, :] # the slice of rows with batch_size length
124 |                     targets = ytr[index : index + self.batch_step_size, :]
125 |                     y_missing = ytr_missing[index : index + self.batch_step_size, :]
126 |                     loss, score, dWxh, dWhh, dWhy, dbh, dby, self.hprev = step_f(inputs, targets, y_missing)
127 |                 else:
128 |                     inputs = Xtr[j : j + 1, :] # just one row
129 |                     targets = ytr[j : j + 1, :]
130 |                     loss, score, dWxh, dWhh, dWhy, dbh, dby, self.hprev = step_f(inputs, targets, ytr_missing[j])
131 |                 
132 |                 epoch_error[j] = loss
133 |                 epoch_score[j] = score
134 |     
135 |                 if j % 100 == 0: print '---iter %d, epoch: %d, step: %d from: %d, loss: %.5f' % (n, epoch, j, steps, loss) # print progress  
136 |                 
137 |                 n += 1 # total iteration counter
138 |                 
139 |                 if check_gradient:
140 |                     self.__gradCheck(inputs, targets, ytr_missing[j])
141 |             
142 |                 # perform parameter update 
143 |                 if self.param_update_scheme == 'Adagrad':
144 |                     # with Adagrad
145 |                     eps = 1e-8#1e-4#
146 |                     for param, dparam, mem in zip([self.Wxh, self.Whh, self.Why, self.bh, self.by], [dWxh, dWhh, dWhy, dbh, dby], [mWxh, mWhh, mWhy, mbh, mby]):
147 |                         mem += dparam * dparam
148 |                         param += -self.learning_rate * dparam / (np.sqrt(mem) + eps) # adagrad update
149 |                 elif self.param_update_scheme == 'RMSProp':
150 |                     # with RMSProp
151 |                     eps = 1e-8 # {1e−4, 1e−5, 1e−6}
152 |                     decay_rate = 0.95# {0.9, 0.95}
153 |                     for param, dparam, mem in zip([self.Wxh, self.Whh, self.Why, self.bh, self.by], [dWxh, dWhh, dWhy, dbh, dby], [mWxh, mWhh, mWhy, mbh, mby]):
154 |                         mem = decay_rate * mem + (1 - decay_rate) * dparam * dparam # cache = decay_rate * cache + (1 - decay_rate) * dx**2
155 |                         param += -self.learning_rate * dparam / (np.sqrt(mem) + eps) # RMSProp update
156 |                 elif self.param_update_scheme == 'Adam':
157 |                     # with Adam
158 |                     eps = 1e-8
159 |                     beta1 = 0.9
160 |                     beta2 = 0.999#0.99
161 |                     for param, dparam, m, v in zip([self.Wxh, self.Whh, self.Why, self.bh, self.by], [dWxh, dWhh, dWhy, dbh, dby], [mWxh, mWhh, mWhy, mbh, mby], [vWxh, vWhh, vWhy, vbh, vby]):
162 |                         m = beta1 * m + (1 - beta1) * dparam
163 |                         v = beta2 * v + (1 - beta2) * (dparam * dparam)
164 |                         #param += -self.learning_rate * m / (np.sqrt(v) + eps) # Adam update
165 |                         # bias corrected
166 |                         mt = m / (1 - pow(beta1, j + 1)) # N.B. j starts from 0
167 |                         vt = v / (1 - pow(beta2, j + 1))
168 |                         param += -self.learning_rate * mt / (np.sqrt(vt) + eps) # Adam update
169 |                 elif self.param_update_scheme == 'AdaMax':
170 |                     # with AdaMax - a variant of Adam based on the infinity norm.
171 |                     eps = 1e-8
172 |                     beta1 = 0.9
173 |                     beta2 = 0.99 #0.999# 0.95 #
174 |                     step_size = self.learning_rate / (1 - pow(beta1, j + 1)) #bias correction
175 |                     for param, dparam, m, v in zip([self.Wxh, self.Whh, self.Why, self.bh, self.by], [dWxh, dWhh, dWhy, dbh, dby], [mWxh, mWhh, mWhy, mbh, mby], [vWxh, vWhh, vWhy, vbh, vby]):
176 |                         m = beta1 * m + (1 - beta1) * dparam # Update biased first moment estimate
177 |                         v = np.maximum(beta2 * v, np.abs(dparam) + eps) # Update the exponentially weighted infinity norm
178 |                         param += - step_size * m / v 
179 |                 else:
180 |                     raise "Uknown parameters update scheme: {}".format(self.param_update_scheme)
181 |                 
182 |     
183 |             # Annealing the learning rate but avoid dropping it too low
184 |             if self.learning_rate > 1e-6 and epoch != 0 and epoch % 20 == 0:  self.learning_rate *= 0.1
185 |             
186 |             train_scores[epoch] = self.__make_score(epoch_score) # the score per epoch
187 |             train_errors[epoch] = np.average(epoch_error, axis=0) # the mean train error per epoch
188 |             
189 |             # calculate validation if appropriate
190 |             if do_validation:
191 |                 y_predicted = self.__predict(Xvl, np.zeros_like(self.hprev))
192 |                 validation_errors[epoch], validation_scores[epoch] = self.__validate(y_predicted, yvl, yvl_missing)
193 |                 
194 |                 print 'epoch: %d, learning rate: %s, train loss: %s, score: %s\nvalidation loss: %s, score: %s' % (epoch, self.learning_rate, train_errors[epoch], train_scores[epoch], validation_errors[epoch], validation_scores[epoch]) # print progress
195 |             else:
196 |                 print 'epoch: %d, learning rate: %s, train loss: %s, score: %s' % (epoch, self.learning_rate, train_errors[epoch], train_scores[epoch]) # print progress
197 |     
198 |         # The time spent
199 |         finish_date = datetime.datetime.fromtimestamp(time.time())
200 |         delta = finish_date - start_time
201 |         print '\n------------------------\nTrain time: \n%s\nTrain error: \n%s\nscores:\n%s\n' % (delta, train_errors, train_scores)
202 |         
203 |         if do_validation:
204 |             print '\n------------------------\nValidation error: \n%s\nscores:\n%s\n' % (validation_errors, validation_scores)
205 |             return train_errors, train_scores, validation_errors, validation_scores
206 |         else:
207 |             return train_errors, train_scores
208 |     
209 |     def predict(self, Xvl, use_prev_state = False):
210 |         """
211 |         The method to predict outputs based on provided data samples
212 |         Xvl the data samples with shape (num_samples, n_features)
213 |         use_prev_state whether to use saved previous state of RNN or just reset its memory
214 |         return predicitions per data sample with shape (num_samples, n_outputs)
215 |         """
216 |         hprev = self.hprev if use_prev_state else np.zeros_like(self.hprev)
217 |         return self.__predict(Xvl, hprev)
218 | 
219 |     def saveModel(self, name):
220 |         """
221 |         Saves trained model using provided file name
222 |         """
223 |         vault = {'Wxh' : self.Wxh, 
224 |                  'Whh' : self.Whh, 
225 |                  'Why': self.Why, 
226 |                  'bh' : self.bh, 
227 |                  'by' : self.by, 
228 |                  'hprev' : self.hprev,
229 |                  'hidden_size' : self.hidden_size,
230 |                  'n_features' : self.n_features,
231 |                  'n_outputs' : self.n_outputs,
232 |                  'use_batch_step' : self.use_batch_step,
233 |                  'batch_step_size' : self.batch_step_size,
234 |                  'param_update_scheme' : self.param_update_scheme,
235 |                  'learning_rate' : self.learning_rate,
236 |                  'activation_rule' : self.activation_rule,
237 |                  'relu_neg_slope' : self.relu_neg_slope,
238 |                  'use_dropout_regularization' : self.use_dropout_regularization,
239 |                  'dropout_threshold' : self.dropout_threshold,
240 |                  'reg_strenght' : self.reg_strenght,
241 |                  'use_regularization' : self.use_regularization }
242 |         sio.savemat(name, vault)
243 |         
244 |     def loadModel(self, name):
245 |         """
246 |         Loads model from spefied file
247 |         name the path to the model file
248 |         """
249 |         mat_contents = sio.loadmat(name)
250 |         self.Wxh = mat_contents['Wxh']
251 |         self.Whh = mat_contents['Whh']
252 |         self.Why = mat_contents['Why']
253 |         self.bh = mat_contents['bh']
254 |         self.by = mat_contents['by']
255 |         self.hprev = mat_contents['hprev']
256 |         self.hidden_size = mat_contents['hidden_size']
257 |         self.n_features = mat_contents['n_features']
258 |         self.n_outputs = mat_contents['n_outputs']
259 |         self.use_batch_step = mat_contents['use_batch_step']
260 |         self.batch_step_size = mat_contents['batch_step_size']
261 |         self.param_update_scheme = mat_contents['param_update_scheme']
262 |         self.learning_rate = mat_contents['learning_rate']
263 |         self.activation_rule = mat_contents['activation_rule']
264 |         self.relu_neg_slope = mat_contents['relu_neg_slope']
265 |         self.use_dropout_regularization = mat_contents['use_dropout_regularization']
266 |         self.dropout_threshold = mat_contents['dropout_threshold']
267 |         self.reg_strenght = mat_contents['reg_strenght']
268 |         self.use_regularization = mat_contents['use_regularization']
269 | 
270 |     def __step_tanh(self, inputs, targets, ytr_missing):
271 |         """
272 |         The one step in RNN computations using Tanhents function as non-linear activation function
273 |         inputs, targets are both arrays of real numbers with shapes (input_size, 1) and (target_size, 1) respectively.
274 |         hprev is array of initial hidden state with shape (hidden_size, 1)
275 |         Wxh, Whh, Why - the neurons input/output weights
276 |         bh, by - the hidden/output layer bias
277 |         returns the loss, score_mean, gradients on model parameters, and last hidden state
278 |         """
279 |         #
280 |         # forward pass
281 |         #
282 |         xs = inputs.T
283 |         hs = np.tanh(np.dot(self.Wxh, xs) + np.dot(self.Whh, self.hprev) + self.bh) # hidden state
284 |         if self.use_regularization and self.use_dropout_regularization:
285 |             U1 = (np.random.rand(*hs.shape) < self.dropout_threshold) / self.dropout_threshold # dropout mask
286 |             hs *= U1 # drop!
287 |         ys = np.dot(self.Why, hs) + self.by # unnormalized next outputs
288 |         ps = ys - targets.T
289 |         loss = np.sum(np.abs(ps)) # L1 norm
290 |         
291 |         #
292 |         # backward pass: compute gradients going backwards
293 |         #
294 |         dy = np.sign(ps) # the gradient for y only inherits the sign of the difference for L1 norm (http://cs231n.github.io/neural-networks-2/#reg)
295 |         dWhy = np.dot(dy, hs.T)
296 |         dby = dy
297 |         dh = np.dot(self.Why.T, dy) # backprop into h
298 |         dhraw = (1 - hs * hs) * dh # backprop through tanh nonlinearity
299 |         dbh = dhraw
300 |         dWxh = np.dot(dhraw, inputs)
301 |         dWhh = np.dot(dhraw, self.hprev.T)
302 |         
303 |         # add L2 regularization gradient contribution if not dropout
304 |         if self.use_regularization and not self.use_dropout_regularization:
305 |             dWhy += self.reg_strenght * self.Why
306 |             dWhh += self.reg_strenght * self.Whh
307 |             dWxh += self.reg_strenght * self.Wxh
308 |             
309 |         for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
310 |             np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
311 |             
312 |         score = self.__score_mean(np.abs(ps), ytr_missing) # IMPORTANT: use COVAR_y_MISSING flags for mean calculation without missed Y
313 |         return loss, score, dWxh, dWhh, dWhy, dbh, dby, hs
314 |         
315 |     def __batch_step_tanh(self, inputs, targets, ytr_missing):
316 |         """
317 |         The one step in RNN computations over min batch of input features using Tanhents function as non-linear activation function
318 |         inputs,targets are both list of real numbers.
319 |         hprev is Hx1 array of initial hidden state
320 |         returns the loss, gradients on model parameters, and last hidden state
321 |         """
322 |         input_size = len(inputs[0])
323 |         target_size = len(targets[0])
324 |         xs, hs, ys, ps = {}, {}, {}, {}
325 |         hs[-1] = np.copy(self.hprev)
326 |         loss = np.zeros((len(inputs), 1))
327 |         score = np.zeros((len(inputs), 1))
328 |         # forward pass
329 |         for t in range(len(inputs)):
330 |             xs[t] = np.reshape(inputs[t], (input_size, 1))
331 |             hs[t] = np.tanh(np.dot(self.Wxh, xs[t]) + np.dot(self.Whh, hs[t-1]) + self.bh) # hidden state
332 |             if self.use_regularization and self.use_dropout_regularization:
333 |                 U1 = (np.random.rand(*hs[t].shape) < self.dropout_threshold) / self.dropout_threshold # dropout mask
334 |                 hs[t] *= U1 # drop!
335 |             ys[t] = np.dot(self.Why, hs[t]) + self.by
336 |             ps[t] = ys[t] - np.reshape(targets[t], (target_size, 1))
337 |             loss[t] = np.sum(np.abs(ps[t])) # L1 norm
338 |             score[t] = self.__score_mean(np.abs(ps[t]), ytr_missing[t]) 
339 |             
340 |         # backward pass: compute gradients going backwards
341 |         dWxh, dWhh, dWhy = np.zeros_like(self.Wxh), np.zeros_like(self.Whh), np.zeros_like(self.Why)
342 |         dbh, dby = np.zeros_like(self.bh), np.zeros_like(self.by)
343 |         dhnext = np.zeros_like(hs[0])
344 |         for t in reversed(range(len(inputs))):
345 |             dy = np.sign(ps[t]) # the gradient for y only inherits the sign of the difference for L1 norm (http://cs231n.github.io/neural-networks-2/#losses)
346 |             dWhy += np.dot(dy, hs[t].T)
347 |             dby += dy
348 |             dh = np.dot(self.Why.T, dy) + dhnext # backprop into h
349 |             dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
350 |             dbh += dhraw
351 |             dWxh += np.dot(dhraw, xs[t].T)
352 |             dWhh += np.dot(dhraw, hs[t-1].T)
353 |             dhnext = np.dot(self.Whh.T, dhraw)
354 |           
355 |         # add L2 regularization gradient contribution if not dropout
356 |         if self.use_regularization and not self.use_dropout_regularization:
357 |             dWhy += self.reg_strenght * self.Why
358 |             dWhh += self.reg_strenght * self.Whh
359 |             dWxh += self.reg_strenght * self.Wxh          
360 |           
361 |         for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
362 |             np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
363 |             
364 |         return np.average(loss), np.average(score), dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]
365 |       
366 |     def __step_relu(self, inputs, targets, ytr_missing):
367 |         """
368 |         The one step in RNN computations using ReLU function as non-linear activation function
369 |         inputs, targets are both arrays of real numbers with shapes (input_size, 1) and (target_size, 1) respectively.
370 |         hprev is array of initial hidden state with shape (hidden_size, 1)
371 |         Wxh, Whh, Why - the neurons input/output weights
372 |         bh, by - the hidden/output layer bias
373 |         returns the loss, score_mean, gradients on model parameters, and last hidden state
374 |         """
375 |         #
376 |         # forward pass
377 |         #
378 |         xs = inputs.T
379 |         #hs = np.maximum(0, np.dot(self.Wxh, xs) + np.dot(self.Whh, self.hprev) + self.bh) # hidden state, ReLU activation
380 |         hs = np.dot(self.Wxh, xs) + np.dot(self.Whh, self.hprev) + self.bh
381 |         hs[hs<0] *= self.relu_neg_slope
382 |         if self.use_regularization and self.use_dropout_regularization:
383 |             U1 = (np.random.rand(*hs.shape) < self.reg_strenght) / self.reg_strenght # dropout mask
384 |             hs *= U1 # drop!
385 |         ys = np.dot(self.Why, hs) + self.by # unnormalized next outputs
386 |         ps = ys - targets.T
387 |         loss = np.sum(np.abs(ps)) # L1 norm
388 |         
389 |         #
390 |         # backward pass: compute gradients going backwards
391 |         #
392 |         dy = np.sign(ps) # the gradient for y only inherits the sign of the difference for L1 norm (http://cs231n.github.io/neural-networks-2/#reg)
393 |         dWhy = np.dot(dy, hs.T)
394 |         dby = dy
395 |         dh = np.dot(self.Why.T, dy) # backprop into h
396 |         dh[hs < 0] = 0 # backprop through ReLU non-linearity
397 |         dbh = dh
398 |         dWxh = np.dot(dh, inputs)
399 |         dWhh = np.dot(dh, self.hprev.T)
400 |          
401 |         # add L2 regularization gradient contribution if not dropout
402 |         if self.use_regularization and not self.use_dropout_regularization:
403 |             dWhy += self.reg_strenght * self.Why
404 |             dWhh += self.reg_strenght * self.Whh
405 |             dWxh += self.reg_strenght * self.Wxh  
406 |             
407 |         #for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
408 |         #    np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
409 |             
410 |         score = self.__score_mean(np.abs(ps), ytr_missing) # IMPORTANT: use COVAR_y_MISSING flags for mean calculation without missed Y
411 |         return loss, score, dWxh, dWhh, dWhy, dbh, dby, hs
412 |         
413 |     def __batch_step_relu(self, inputs, targets, ytr_missing):
414 |         """
415 |         The one step in RNN computations over min batch of input features using ReLU function as non-linear activation function
416 |         inputs,targets are both list of real numbers.
417 |         hprev is Hx1 array of initial hidden state
418 |         returns the loss, gradients on model parameters, and last hidden state
419 |         """
420 |         input_size = len(inputs[0])
421 |         target_size = len(targets[0])
422 |         xs, hs, ys, ps = {}, {}, {}, {}
423 |         hs[-1] = np.copy(self.hprev)
424 |         loss = np.zeros((len(inputs), 1))
425 |         score = np.zeros((len(inputs), 1))
426 |         # forward pass
427 |         for t in range(len(inputs)):
428 |             xs[t] = np.reshape(inputs[t], (input_size, 1))
429 |             #hs[t] = np.maximum(0, np.dot(self.Wxh, xs[t]) + np.dot(self.Whh, hs[t-1]) + self.bh) # hidden state, ReLU Activation
430 |             hs[t] = np.dot(self.Wxh, xs[t]) + np.dot(self.Whh, hs[t-1]) + self.bh
431 |             hs[t][hs<0] *= self.relu_neg_slope            
432 |             if self.use_regularization and self.use_dropout_regularization:
433 |                 U1 = (np.random.rand(*hs[t].shape) < self.reg_strenght) / self.reg_strenght # dropout mask
434 |                 hs[t] *= U1 # drop!
435 |             ys[t] = np.dot(self.Why, hs[t]) + self.by
436 |             ps[t] = ys[t] - np.reshape(targets[t], (target_size, 1))
437 |             loss[t] = np.sum(np.abs(ps[t])) # L1 norm
438 |             score[t] = self.__score_mean(np.abs(ps[t]), ytr_missing[t]) 
439 |             
440 |         # backward pass: compute gradients going backwards
441 |         dWxh, dWhh, dWhy = np.zeros_like(self.Wxh), np.zeros_like(self.Whh), np.zeros_like(self.Why)
442 |         dbh, dby = np.zeros_like(self.bh), np.zeros_like(self.by)
443 |         dhnext = np.zeros_like(hs[0])
444 |         for t in reversed(range(len(inputs))):
445 |             dy = np.sign(ps[t]) # the gradient for y only inherits the sign of the difference for L1 norm (http://cs231n.github.io/neural-networks-2/#losses)
446 |             dWhy += np.dot(dy, hs[t].T)
447 |             dby += dy
448 |             dh = np.dot(self.Why.T, dy) + dhnext # backprop into h
449 |             dh[hs[t] < 0] = 0 # backprop through ReLU non-linearity
450 |             dbh += dh
451 |             dWxh += np.dot(dh, xs[t].T)
452 |             dWhh += np.dot(dh, hs[t-1].T)
453 |             dhnext = np.dot(self.Whh.T, dh)
454 |            
455 |         # add L2 regularization gradient contribution if not dropout
456 |         if self.use_regularization and not self.use_dropout_regularization:
457 |             dWhy += self.reg_strenght * self.Why
458 |             dWhh += self.reg_strenght * self.Whh
459 |             dWxh += self.reg_strenght * self.Wxh 
460 |             
461 |         #for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
462 |         #    np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
463 |             
464 |         return np.average(loss), np.average(score), dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]      
465 |       
466 |     def __score_mean(self, abs_diff, y_missing):
467 |         """
468 |         Calculates score mean on based absolute differences between Y predicted and target
469 |         abs_diff = |Ypred - Yeval|
470 |         y_missing the array with COVAR_y_MISSING flags with shape (target_size, 1)
471 |         """  
472 |         scores = abs_diff.flat[~y_missing]
473 |         return np.mean(scores)
474 |         
475 |     def __make_score(self, mean_scores):
476 |         """
477 |         Calculates final score from provided array of mean scores
478 |         mean_scores the array of mean scores
479 |         return score value
480 |         """
481 |         n = len(mean_scores)
482 |         sum_r = np.sum(mean_scores)
483 |         score = 10 * (1 - sum_r/n)
484 |         return score
485 |     
486 |     def __validate(self, y, y_target, y_missing):
487 |         """
488 |         The method to validate calculated validation outputs against ground truth
489 |         y the calculated predictions with shape (num_samples, output_size)
490 |         y_target the ground trouth with shape (num_samples, output_size)
491 |         y_missing the array of flags denoting missed ground trouth value for predicition with shape (num_samples, output_size)
492 |         return calculated score and error values over provided data set
493 |         """
494 |         num_samples = len(y)
495 |         scores = np.zeros((num_samples, 1))
496 |         errors = np.zeros_like(scores)
497 |         for t in range(num_samples):
498 |             # find error per sample
499 |             ps = y[t] - y_target[t]
500 |             errors[t] = np.sum(np.abs(ps)) # L1 norm
501 |             # find score per sample
502 |             scores[t] = self.__score_mean(np.abs(ps), y_missing[t])
503 |             
504 |         # find total score and error
505 |         score = self.__make_score(scores)
506 |         error = np.average(errors, axis=0)
507 |         return error, score
508 |         
509 |     def __predict(self, Xvl, hprev):
510 |         """
511 |         The RNN predict method
512 |         Xvl - the test data features
513 |         """
514 |         n = len(Xvl)
515 |         input_size = len(Xvl[0])
516 |         y_est = np.zeros((n, self.n_outputs))
517 |         for t in range(n):
518 |             x = np.reshape(Xvl[t], (input_size, 1))
519 |             hprev = np.tanh(np.dot(self.Wxh, x) + np.dot(self.Whh, hprev) + self.bh)
520 |             y = np.dot(self.Why, hprev) + self.by
521 |             y_est[t] = y.T
522 |             
523 |         return y_est  
524 |         
525 |     def __initNNParameters(self):
526 |         """
527 |         Do NN parameters initialization according to provided data samples
528 |         input_size the input layer size
529 |         output_size the output layer size
530 |         """
531 |         self.Wxh = np.random.randn(self.hidden_size, self.n_features) * 0.01 # input to hidden
532 |         self.Whh = np.random.randn(self.hidden_size, self.hidden_size) * 0.01 # hidden to hidden
533 |         self.Why = np.random.randn(self.n_outputs, self.hidden_size) * 0.01 # hidden to output
534 |         self.bh = np.zeros((self.hidden_size, 1)) # hidden bias
535 |         self.by = np.zeros((self.n_outputs, 1)) # output bias
536 |         self.hprev = np.zeros((self.hidden_size,1))
537 |         
538 |     def __activationFunction(self):
539 |         """
540 |         Finds appropriate activation function depending on configuration
541 |         """
542 |         step_f = None
543 |         if self.use_batch_step:
544 |             if self.activation_rule == 'Tanh':
545 |                 step_f = self.__batch_step_tanh
546 |             elif self.activation_rule == 'ReLU':
547 |                 step_f = self.__batch_step_relu
548 |         else:
549 |             if self.activation_rule == 'Tanh':
550 |                 step_f = self.__step_tanh
551 |             elif self.activation_rule == 'ReLU':
552 |                 step_f = self.__step_relu
553 |         
554 |         if step_f == None:
555 |             raise 'Unsupported activation function specified: {}'.format(self.activation_rule)
556 |             
557 |         return step_f
558 |         
559 |     # gradient checking
560 |     def __gradCheck(self, inputs, targets, ytr_missing):
561 |         """
562 |         The gradient check to test if analytic and numerical gradients converge
563 |         returns found gradient errors per paarameter as map
564 |         """
565 |         num_checks, delta = 10, 1e-5
566 |         step_f = self.__activationFunction()
567 |             
568 |         _, dWxh, dWhh, dWhy, dbh, dby, _ = step_f(inputs, targets, ytr_missing)
569 |         
570 |         gradient_rel_errors = {}
571 |         for param,dparam,name in zip([self.Wxh, self.Whh, self.Why, self.bh, self.by], [dWxh, dWhh, dWhy, dbh, dby], ['Wxh', 'Whh', 'Why', 'bh', 'by']):
572 |             s0 = dparam.shape
573 |             s1 = param.shape
574 |             assert s0 == s1, 'Error dims dont match: %s and %s.' % (`s0`, `s1`)
575 |             print name
576 |             errors = np.zeros((num_checks, 1))
577 |             for i in xrange(num_checks):
578 |                 ri = int(uniform(0, param.size))
579 |                 # evaluate cost at [x + delta] and [x - delta]
580 |                 old_val = param.flat[ri]
581 |                 param.flat[ri] = old_val + delta
582 |                 cg0, _, _, _, _, _, _ = step_f(inputs, targets, ytr_missing)
583 |                 param.flat[ri] = old_val - delta
584 |                 cg1, _, _, _, _, _, _ = step_f(inputs, targets, ytr_missing)
585 |                 param.flat[ri] = old_val # reset old value for this parameter
586 |                 # fetch both numerical and analytic gradient
587 |                 grad_analytic = dparam.flat[ri]
588 |                 grad_numerical = (cg0 - cg1) / ( 2 * delta )
589 |                 if grad_numerical + grad_analytic != 0:
590 |                     rel_error = abs(grad_analytic - grad_numerical) / abs(grad_numerical + grad_analytic)
591 |                     print '%f, %f => %e ' % (grad_numerical, grad_analytic, rel_error)
592 |                     # rel_error should be on order of 1e-7 or less
593 |                     errors[i] = rel_error
594 |                 else:
595 |                     errors[i] = 0
596 |                     
597 |             # store relative gradient error average per parameter
598 |             gradient_rel_errors[name] = np.average(errors)
599 |             
600 |         return gradient_rel_errors


--------------------------------------------------------------------------------
/src/score_validator.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Jul 24 17:15:12 2016
 4 | 
 5 | @author: yaric
 6 | """
 7 | import pandas as pd
 8 | import numpy as np
 9 | 
10 | # read predictions
11 | y_pred_df = pd.read_csv('validation_predictions.csv', header=None)
12 | 
13 | # read validation
14 | data_validation = pd.read_csv('data/training-validate.csv')#'data/training-small-validate.csv'
15 | y_val_df = data_validation.loc[ :,'y1':'y3']
16 | # replace nans with 0
17 | y_val_df.fillna(0, inplace=True)
18 | # get flags indicating if Y present in data
19 | y_val_missing = np.array(data_validation.loc[:,'COVAR_y1_MISSING' : 'COVAR_y3_MISSING'])
20 | 
21 | # do scoring
22 | y_pred = np.array(y_pred_df)
23 | y_val = np.array(y_val_df)
24 | 
25 | assert len(y_pred) == len(y_val)
26 | 
27 | scores = np.abs(y_pred - y_val)
28 | 
29 | # the loops
30 | n = len(scores)
31 | means = np.zeros((n, 1))
32 | for i in range(n): # simple loop
33 |     means[i] = np.mean(scores[i][~y_val_missing[i]])
34 |     
35 | sum_r = np.sum(means)
36 | 
37 | score = 10 * (1 - sum_r/n)
38 | 
39 | print 'Score: %f, for %d rows' % (score, n)


--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Wed Jul 27 15:00:00 2016
4 | 
5 | @author: yaric
6 | """
7 | 


--------------------------------------------------------------------------------
/src/utils/data_slicer.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Thu Jul 21 21:28:07 2016
 4 | 
 5 | @author: yaric
 6 | """
 7 | 
 8 | import pandas as pd
 9 | 
10 | path_prefix = '../data/testData'#'data/training' 
11 | 
12 | # import data  
13 | data = pd.read_csv(path_prefix + '.csv')
14 | 
15 | # slice data
16 | small_data = data.loc[0 : 10000]
17 | small_data.to_csv(path_prefix + '-small.csv',header=True,index=False)    
18 |     


--------------------------------------------------------------------------------
/src/utils/offline_preprocessor.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Tue Aug  9 10:54:14 2016
 4 | 
 5 | @author: yaric
 6 | """
 7 | 
 8 | import numpy as np
 9 | import pandas as pd
10 | 
11 | from sklearn import decomposition
12 | 
13 | import utils
14 | 
15 | # the input file prefix of data sets
16 | input_file_prefix = '../../data/training-' # '../../data/training-small-'
17 | output_file_prefix = '../../data/training-preprocessed-'
18 | 
19 | max_pca_components = 19
20 | 
21 | def createDataFrame(X, y, y_missing):
22 |     """
23 |     Creates pandas data frame from provided numpy arrays
24 |     """
25 |     data = np.concatenate((y, X), axis=1)
26 |     columns = ['y1', 'y2', 'y3']
27 |     for k in range(X.shape[1]):
28 |         columns.append('X{}'.format(k))
29 |         
30 |     data_df = pd.DataFrame(data, columns=columns)
31 |     ymiss_df = pd.DataFrame(y_missing, columns=['COVAR_y1_MISSING', 'COVAR_y2_MISSING', 'COVAR_y3_MISSING'])
32 |     df = data_df.join(ymiss_df)
33 |     return df
34 | 
35 | 
36 | # import data  
37 | train_df = pd.read_csv(input_file_prefix + 'train.csv')
38 | validate_df = pd.read_csv(input_file_prefix + 'validate.csv')
39 | 
40 | # keep missing flags for both training and validation
41 | ytr_missing = np.array(train_df.loc[ :,'COVAR_y1_MISSING':'COVAR_y3_MISSING'], dtype=bool)
42 | yvl_missing = np.array(validate_df.loc[ :,'COVAR_y1_MISSING':'COVAR_y3_MISSING'], dtype=bool)
43 | 
44 | # read data
45 | train_df['train_flag'] = True
46 | validate_df['train_flag'] = False
47 | data = pd.concat((train_df, validate_df))
48 | 
49 | # remove temporary data
50 | del train_df
51 | del validate_df
52 | 
53 | # basic formatting
54 | Xtr, ytr, Xvl, yvl = utils.format_data(data, preprocessing=False)
55 | del data
56 | 
57 | #
58 | # do preprocessing
59 | #
60 | scaler = decomposition.RandomizedPCA()
61 | #scaler = decomposition.SparsePCA(n_components=max_pca_components)
62 | #scaler = decomposition.PCA(n_components='mle')
63 | print 'PCA max features to keep: %d' % (max_pca_components)
64 | Xtr = scaler.fit_transform(Xtr) # fit only for train data (http://cs231n.github.io/neural-networks-2/#datapre)
65 | Xvl = scaler.transform(Xvl) 
66 | 
67 | 
68 | #
69 | # write result
70 | #
71 | train_df = createDataFrame(Xtr, ytr, ytr_missing)
72 | validate_df = createDataFrame(Xvl, yvl, yvl_missing)
73 | 
74 | train_df.to_csv(output_file_prefix + 'train.csv', header=True, index=False)
75 | validate_df.to_csv(output_file_prefix + 'validate.csv', header=True, index=False)
76 | 
77 | print '\n---------------------\nResult train:\n%s\n' % (train_df.describe())
78 | print '\n---------------------\nResult validate:\n%s\n' % (validate_df.describe())
79 | 
80 | 
81 | 
82 | 


--------------------------------------------------------------------------------
/src/utils/train_validate_splitter.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Jul 24 15:59:19 2016
 4 | 
 5 | @author: yaric
 6 | """
 7 | 
 8 | from sklearn.cross_validation import train_test_split
 9 | import pandas as pd
10 | 
11 | path_prefix = '../../data/training-pca'#'../../data/training'#'../../data/training-small-pca'#
12 | 
13 | # import data  
14 | df = pd.read_csv(path_prefix + '.csv')
15 | 
16 | print 'Input:\n%s\n\n' % (df.describe())
17 | 
18 | # read X, Y
19 | y = df.loc[:, 'y1':'y3']
20 | X = df.loc[:, 'STUDYID' : 'COVAR_y3_MISSING']
21 | #X = df.loc[:, 'COVAR_y1_MISSING':'PC19']
22 | 
23 | # split
24 | X_train, X_test, y_train, y_test = train_test_split(X, y)
25 | X_train_df = pd.DataFrame(X_train, columns=X.columns)
26 | print 'X train:\n%s\n' % (X_train_df.describe())
27 | y_train_df = pd.DataFrame(y_train, columns=y.columns)
28 | print '\n\nY train:\n%s\n' % (y_train_df.describe())
29 | 
30 | X_test_df = pd.DataFrame(X_test, columns=X.columns)
31 | print '\n---------------------\nX test:\n%s\n' % (X_test_df.describe())
32 | y_test_df = pd.DataFrame(y_test, columns=y.columns)
33 | print '\n\nY test:\n%s\n---------------------\n' % (y_test_df.describe())
34 | 
35 | # combine and save
36 | data_train_df = pd.concat([y_train_df, X_train_df], axis=1, join_axes=[y_train_df.index])
37 | print '\n---------------------\nResult train:\n%s\n' % (data_train_df.describe())
38 | 
39 | data_train_df.to_csv(path_prefix + '-train.csv',header=True,index=False) 
40 | 
41 | data_test_df = pd.concat([y_test_df, X_test_df], axis=1, join_axes=[y_test_df.index])
42 | print '\n---------------------\nResult test:\n%s\n' % (data_test_df.describe())
43 | 
44 | data_test_df.to_csv(path_prefix + '-validate.csv',header=True,index=False) 
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/src/utils/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Thu Aug  4 22:42:08 2016
  4 | 
  5 | Utilities
  6 | 
  7 | @author: yaric
  8 | """
  9 | 
 10 | import matplotlib.pyplot as plt
 11 | import numpy as np
 12 | import pandas as pd
 13 | 
 14 | from sklearn.preprocessing import StandardScaler
 15 | from sklearn import decomposition
 16 | 
 17 | def plotResultsValidate(train_errors, train_scores, validation_errors, validation_scores):
 18 |     """
 19 |     Plots training results
 20 |     """
 21 |     nb_epochs = len(train_errors)
 22 |     epochs_range = np.arange(nb_epochs)
 23 |     
 24 |     plt.figure()
 25 |     plt.subplot(2, 1, 1)
 26 |     plt.title("Train/Eval loss per epoch")
 27 |     plt.plot(epochs_range, train_errors * 100, 'b-', label='Train')
 28 |     plt.plot(epochs_range, validation_errors * 100, 'r-', label='Validate')
 29 |     plt.xlabel('epochs')
 30 |     plt.ylabel('error')
 31 |     plt.ylim(0., np.max(train_errors) * 100 + 5)
 32 |     plt.legend(loc="upper right")
 33 |     
 34 |     plt.subplot(2, 1, 2)
 35 |     plt.title("Train/Eval scores per epoch")
 36 |     plt.plot(epochs_range, train_scores, 'g-', label='Train')
 37 |     plt.plot(epochs_range, validation_scores, 'r-', label='Validate')
 38 |     plt.xlabel('epochs')
 39 |     plt.ylabel('score')
 40 |     plt.ylim(9., 10)
 41 |     plt.legend(loc="lower right")
 42 |     
 43 |     plt.subplots_adjust(0.1, 0.10, 0.98, 0.94, 0.2, 0.6)
 44 |     plt.show()
 45 |     
 46 | def plotResultsTest(train_errors, train_scores):
 47 |     """
 48 |     Plots test results
 49 |     """
 50 |     nb_epochs = len(train_errors)
 51 |     epochs_range = np.arange(nb_epochs)
 52 |     
 53 |     plt.figure()
 54 |     plt.subplot(2, 1, 1)
 55 |     plt.title("Test loss per epoch")
 56 |     plt.plot(epochs_range, train_errors * 100, 'b-', label='Train')
 57 |     plt.xlabel('epochs')
 58 |     plt.ylabel('error')
 59 |     plt.ylim(0., np.maximum(0.5, np.max(train_errors)) * 100 + 5)
 60 |     
 61 |     plt.subplot(2, 1, 2)
 62 |     plt.title("Test scores per epoch")
 63 |     plt.plot(epochs_range, train_scores, 'g-', label='Train')
 64 |     plt.xlabel('epochs')
 65 |     plt.ylabel('score')
 66 |     plt.ylim(9., 10)
 67 |     
 68 |     plt.subplots_adjust(0.1, 0.10, 0.98, 0.94, 0.2, 0.6)
 69 |     plt.show()
 70 | 
 71 | def plotOutputs(y_pred, res_name):
 72 |     """
 73 |     Plot outputs
 74 |     """
 75 |     bins_count = 100
 76 |     # make histograms
 77 |     y1_hist, _ = np.histogram(y_pred[:,0], bins=bins_count)
 78 |     y2_hist, _ = np.histogram(y_pred[:,1], bins=bins_count)
 79 |     y3_hist, _ = np.histogram(y_pred[:,2], bins=bins_count)
 80 |     
 81 |     # draw scatter
 82 |     x = np.arange(bins_count)
 83 |     
 84 |     y1_plot = plt.scatter(x, np.log10(y1_hist), marker='o', color='b')
 85 |     y2_plot = plt.scatter(x, np.log10(y2_hist), marker='o', color='r')
 86 |     y3_plot = plt.scatter(x, np.log10(y3_hist), marker='o', color='g')
 87 |     
 88 |     plt.grid(color='black', linestyle='-')
 89 |     plt.title(res_name)
 90 |     plt.legend((y1_plot, y2_plot, y3_plot), ('y1','y2','y3'),
 91 |                scatterpoints=1, loc='upper right')
 92 |     # save figure
 93 |     plt.savefig('{}.{}'.format(res_name, 'png'), dpi=72)
 94 |     # show figure
 95 |     plt.show()
 96 | 
 97 | def format_data_preprocessed(data, dtype = np.float):
 98 |     """
 99 |     The input data preprocessing
100 |     data the input data frame
101 |     preprocessing whether to use features preprocessing (Default: False)
102 |     dtype the data type for ndarray (Default: np.float)
103 |     """
104 |     train_flag = np.array(data['train_flag'])
105 | 
106 |     print 'Formatting input data, size: %d' % (len(train_flag))
107 | 
108 |     # outputs, nans excluded
109 |     y = data.loc[ :,'y1':'y3']
110 |     # replace nans with 0
111 |     y.fillna(0, inplace=True)
112 | 
113 |     # collect only train data
114 |     ytr = np.array(y)[train_flag]
115 |     # collect only validation data
116 |     yvl = np.array(y)[~train_flag]
117 | 
118 |     print 'Train data outputs collected, size: %d' % (len(ytr))
119 |     print '\n\nData before encoding\n\n%s' % data.describe()
120 | 
121 | 
122 |     # dropping target and synthetic columns
123 |     data.drop(['y1','y2','y3','train_flag', 'COVAR_y1_MISSING', 'COVAR_y2_MISSING', 'COVAR_y3_MISSING'], axis=1, inplace=True)
124 |     
125 |     print '\n\nData after encoding\n\n%s' % data.describe()
126 |     
127 |     # split into training and test
128 |     X = np.array(data).astype(dtype)
129 |     
130 |     Xtr = X[train_flag]
131 |     Xvl = X[~train_flag]
132 | 
133 |     #print 'Train data first: %s' % (Xtr[0])
134 |     #print 'Evaluate data first: %s' % (Xvl[0])
135 | 
136 |     return Xtr, ytr, Xvl, yvl
137 | 
138 | def format_data_features_selected(data, dtype = np.float):
139 |     """
140 |     The input data processign based on preselected relevant features
141 |     """
142 |     columns_to_keep = ['COVAR_CONTINUOUS_1', 'COVAR_CONTINUOUS_10', 'COVAR_CONTINUOUS_11',
143 |                        'COVAR_CONTINUOUS_12', 'COVAR_CONTINUOUS_13', 'COVAR_CONTINUOUS_14',
144 |                        'COVAR_CONTINUOUS_15', 'COVAR_CONTINUOUS_16', 'COVAR_CONTINUOUS_17',
145 |                        'COVAR_CONTINUOUS_18', 'COVAR_CONTINUOUS_2', 'COVAR_CONTINUOUS_20',
146 |                        'COVAR_CONTINUOUS_21', 'COVAR_CONTINUOUS_22', 'COVAR_CONTINUOUS_23',
147 |                        'COVAR_CONTINUOUS_23', 'COVAR_CONTINUOUS_24', 'COVAR_CONTINUOUS_25',
148 |                        'COVAR_CONTINUOUS_26', 'COVAR_CONTINUOUS_27', 'COVAR_CONTINUOUS_28',
149 |                        'COVAR_CONTINUOUS_29', 'COVAR_CONTINUOUS_3', 'COVAR_CONTINUOUS_30',
150 |                        'COVAR_CONTINUOUS_4', 'COVAR_CONTINUOUS_5', 'COVAR_CONTINUOUS_6',
151 |                        'COVAR_CONTINUOUS_7', 'COVAR_CONTINUOUS_8', 'COVAR_CONTINUOUS_9',
152 |                        'COVAR_ORDINAL_1', 'COVAR_ORDINAL_2', 'COVAR_ORDINAL_3',
153 |                        'COVAR_ORDINAL_4', 'COVAR_ORDINAL_5', 'COVAR_ORDINAL_6',
154 |                        'COVAR_ORDINAL_7', 'COVAR_ORDINAL_8',
155 |                        'TIMEVAR1', 'TIMEVAR2',
156 |                        'COVAR_y1_MISSING', 'COVAR_y2_MISSING', 'COVAR_y3_MISSING']
157 |     train_flag = np.array(data['train_flag'])
158 | 
159 |     print 'Formatting input data, size: %d' % (len(train_flag))
160 | 
161 |     # outputs, nans excluded
162 |     y = data.loc[ :,'y1':'y3']
163 |     # replace nans with 0
164 |     y.fillna(0, inplace=True)
165 | 
166 |     # collect only train data
167 |     ytr = np.array(y)[train_flag]
168 |     # collect only validation data
169 |     yvl = np.array(y)[~train_flag]
170 | 
171 |     print 'Train data outputs collected, size: %d' % (len(ytr))
172 |     print '\n\nData before encoding\n\n%s' % data.describe()
173 | 
174 | 
175 |     # dropping columns
176 |     features = data.loc[:, columns_to_keep]
177 |     
178 | 
179 |     # do features construction
180 |     drop_columns = ['COVAR_CONTINUOUS_24', 'COVAR_CONTINUOUS_18', 'COVAR_ORDINAL_4',
181 |                     'COVAR_CONTINUOUS_1', 'COVAR_ORDINAL_1', 'COVAR_CONTINUOUS_13']
182 |     data.drop(drop_columns, axis=1, inplace=True)
183 |     """
184 |     studyid = np.array(data.loc[:, 'STUDYID']).astype(dtype)
185 |     subjid  = np.array(data.loc[:, 'SUBJID']).astype(dtype)
186 |     del data
187 |     
188 |     userid = np.multiply(studyid, subjid)
189 |     #userid = (userid - userid.mean()) / userid.std() # zero mean and standard deviation 1
190 |     userid = np.log(userid) / np.sum(np.log(userid)) # 0 to 1
191 |     
192 |     userid_df = pd.DataFrame(userid, columns=['USERID'])
193 |     features = features.join(userid_df)
194 |     """
195 | 
196 |     # replace nans with 0
197 |     # the least sophisticated approach possible
198 |     features.fillna(0, inplace=True)
199 |     
200 |     print '\n\nData after encoding\n\n%s' % features.describe()
201 |     
202 |     # split into training and test
203 |     X = np.array(features).astype(dtype)
204 |     
205 |     Xtr = X[train_flag]
206 |     Xvl = X[~train_flag]
207 | 
208 |     #print 'Train data first: %s' % (Xtr[0])
209 |     #print 'Evaluate data first: %s' % (Xvl[0])
210 | 
211 |     return Xtr, ytr, Xvl, yvl
212 |   
213 | def format_data(data, preprocessing=False, dtype = np.float):
214 |     """
215 |     The input data preprocessing
216 |     data the input data frame
217 |     preprocessing whether to use features preprocessing (Default: False)
218 |     dtype the data type for ndarray (Default: np.float)
219 |     """
220 |     train_flag = np.array(data['train_flag'])
221 | 
222 |     print 'Formatting input data, size: %d' % (len(train_flag))
223 | 
224 |     # outputs, nans excluded
225 |     y = data.loc[ :,'y1':'y3']
226 |     # replace nans with 0
227 |     y.fillna(0, inplace=True)
228 | 
229 |     # collect only train data
230 |     ytr = np.array(y)[train_flag]
231 |     # collect only validation data
232 |     yvl = np.array(y)[~train_flag]
233 | 
234 |     print 'Train data outputs collected, size: %d' % (len(ytr))
235 |     print '\n\nData before encoding\n\n%s' % data.describe()
236 | 
237 | 
238 |     # dropping columns
239 |     if preprocessing:
240 |         data.drop(['y1','y2','y3','train_flag'], axis=1, inplace=True) # keep SUBJID
241 |     else:
242 |         data.drop(['y1','y2','y3','SUBJID','train_flag'], axis=1, inplace=True)
243 | 
244 |     # categorical encoding
245 |     data = pd.get_dummies(data,columns=['STUDYID', u'SITEID', u'COUNTRY',
246 |                                         'COVAR_NOMINAL_1','COVAR_NOMINAL_2',
247 |                                         'COVAR_NOMINAL_3','COVAR_NOMINAL_4',
248 |                                         'COVAR_NOMINAL_5','COVAR_NOMINAL_6',
249 |                                         'COVAR_NOMINAL_7','COVAR_NOMINAL_8',
250 |                                         'COVAR_y1_MISSING', 'COVAR_y2_MISSING',
251 |                                         'COVAR_y3_MISSING'])
252 | 
253 |     # replace nans with 0
254 |     # the least sophisticated approach possible
255 |     data.fillna(0, inplace=True)
256 |     
257 |     print '\n\nData after encoding\n\n%s' % data.describe()
258 |     
259 |     # split into training and test
260 |     X = np.array(data).astype(dtype)
261 |     
262 |     Xtr = X[train_flag]
263 |     Xvl = X[~train_flag]
264 | 
265 |     #print 'Train data first: %s' % (Xtr[0])
266 |     #print 'Evaluate data first: %s' % (Xvl[0])
267 | 
268 |     return Xtr, ytr, Xvl, yvl
269 |     
270 | # The data preprocessing
271 | def preprocess(Xtr, Xvl, use_pca, max_pca_components=None):
272 |     """
273 |     The data preprocessing
274 |     Xtr - the training data features
275 |     Xvl - the test data features
276 |     use_pca - whether to use PCA for feature space reduction
277 |     max_pca_components - the maximal number of PCA components to extract
278 |     return preprocessed features
279 |     """
280 |     if use_pca:
281 |         if max_pca_components == None:
282 |             raise "Please specify maximal number of PCA components to extract"
283 |         #scaler = decomposition.RandomizedPCA(n_components=max_features)
284 |         scaler = decomposition.SparsePCA(n_components=max_pca_components)
285 |         print 'PCA max features to keep: %d' % (max_pca_components)
286 |         Xtr = scaler.fit_transform(Xtr) # fit only for train data (http://cs231n.github.io/neural-networks-2/#datapre)
287 |         Xvl = scaler.transform(Xvl) 
288 |     else:
289 |         scaler = StandardScaler(copy=False) 
290 |         # scale only first column 'SUBJID'
291 |         xtr_subj = Xtr[:,:1]
292 |         xvl_subj = Xvl[:,:1]
293 |         xtr_subj = scaler.fit_transform(xtr_subj) # fit only for train data (http://cs231n.github.io/neural-networks-2/#datapre)
294 |         xvl_subj = scaler.transform(xvl_subj) 
295 | 
296 |     print 'Train data mean: %f, variance: %f' % (Xtr.mean(), Xtr.std())
297 |     print 'Test data mean: %f, variance: %f' % (Xvl.mean(), Xvl.std())
298 |     
299 |     return Xtr, Xvl
300 | 
301 | def rescale(values, factor=1., dtype = np.float):
302 |     
303 |     factor = np.cast[dtype](factor)
304 |     _,svs,_ = np.linalg.svd(values)
305 |     #svs[0] is the largest singular value                      
306 |     values = values / svs[0]
307 |     return values


--------------------------------------------------------------------------------
/src/validation_baseline.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | '''
  3 | This is a baseline for the validation set.
  4 | 
  5 | We're making the simplest choices at each step, 
  6 | which can and should be improved on:
  7 | 
  8 | (1) The subject id variable is being ignored.
  9 | (2) The missing values are all being set to 0.
 10 | (3) There are three outputs and we are training three separate models.
 11 | (4) No feature selection or dimensionality reduction is being performed.
 12 | '''
 13 | import time
 14 | import datetime
 15 | 
 16 | import pandas as pd
 17 | import numpy as np
 18 | from sklearn.ensemble import RandomForestRegressor
 19 | from utils import utils
 20 | 
 21 | def format_data(data):
 22 |     train_flag = np.array(data['train_flag'])    
 23 |     
 24 |     # outputs, nans included
 25 |     ytr1 = np.array(data['y1'])[train_flag]
 26 |     ytr2 = np.array(data['y2'])[train_flag]
 27 |     ytr3 = np.array(data['y3'])[train_flag]
 28 | 
 29 |     # dropping columns
 30 |     # subject id is not good for tree-based models
 31 |     data.drop(['y1','y2','y3','SUBJID','train_flag'], axis=1, inplace=True)
 32 | 
 33 |     # categorical encoding
 34 |     data = pd.get_dummies(data,columns=['STUDYID', u'SITEID', u'COUNTRY',
 35 |                                         'COVAR_NOMINAL_1','COVAR_NOMINAL_2',
 36 |                                         'COVAR_NOMINAL_3','COVAR_NOMINAL_4',
 37 |                                         'COVAR_NOMINAL_5','COVAR_NOMINAL_6',
 38 |                                         'COVAR_NOMINAL_7','COVAR_NOMINAL_8'])
 39 |         
 40 |     # replace nans with 0
 41 |     # the least sophisticated approach possible
 42 |     data.fillna(0,inplace=True)
 43 |     
 44 |     # split into training and test
 45 |     X = np.array(data).astype(np.float)
 46 |     Xtr = X[train_flag]
 47 |     Xvl = X[~train_flag]
 48 |     
 49 |     return Xtr, ytr1, ytr2, ytr3, Xvl
 50 |     
 51 |     
 52 | def format_data_preprocessed(data, dtype = np.float):
 53 |     columns_to_keep = ['COVAR_CONTINUOUS_1', 'COVAR_CONTINUOUS_10', 'COVAR_CONTINUOUS_11',
 54 |                        'COVAR_CONTINUOUS_12', 'COVAR_CONTINUOUS_13', 'COVAR_CONTINUOUS_14',
 55 |                        'COVAR_CONTINUOUS_15', 'COVAR_CONTINUOUS_16', 'COVAR_CONTINUOUS_17',
 56 |                        'COVAR_CONTINUOUS_18', 'COVAR_CONTINUOUS_2', 'COVAR_CONTINUOUS_20',
 57 |                        'COVAR_CONTINUOUS_21', 'COVAR_CONTINUOUS_22', 'COVAR_CONTINUOUS_23',
 58 |                        'COVAR_CONTINUOUS_23', 'COVAR_CONTINUOUS_24', 'COVAR_CONTINUOUS_25',
 59 |                        'COVAR_CONTINUOUS_26', 'COVAR_CONTINUOUS_27', 'COVAR_CONTINUOUS_28',
 60 |                        'COVAR_CONTINUOUS_29', 'COVAR_CONTINUOUS_3', 'COVAR_CONTINUOUS_30',
 61 |                        'COVAR_CONTINUOUS_4', 'COVAR_CONTINUOUS_5', 'COVAR_CONTINUOUS_6',
 62 |                        'COVAR_CONTINUOUS_7', 'COVAR_CONTINUOUS_8', 'COVAR_CONTINUOUS_9',
 63 |                        'COVAR_ORDINAL_1', 'COVAR_ORDINAL_2', 'COVAR_ORDINAL_3',
 64 |                        'COVAR_ORDINAL_4', 'TIMEVAR1', 'TIMEVAR2',
 65 |                        'COVAR_y1_MISSING', 'COVAR_y2_MISSING', 'COVAR_y3_MISSING']
 66 |     train_flag = np.array(data['train_flag'])
 67 | 
 68 |     print 'Formatting input data, size: %d' % (len(train_flag))
 69 | 
 70 |     # outputs, nans included
 71 |     ytr1 = np.array(data['y1'])[train_flag]
 72 |     ytr2 = np.array(data['y2'])[train_flag]
 73 |     ytr3 = np.array(data['y3'])[train_flag]
 74 | 
 75 |     print 'Train data outputs collected, size: %d' % (len(ytr1))
 76 | 
 77 |     # dropping columns
 78 |     features = data.loc[:, columns_to_keep]
 79 | 
 80 |     # do features construction
 81 |     """
 82 |     drop_columns = ['COVAR_CONTINUOUS_24', 'COVAR_CONTINUOUS_18', 'COVAR_ORDINAL_4',
 83 |                     'COVAR_CONTINUOUS_1', 'COVAR_ORDINAL_1', 'COVAR_CONTINUOUS_13']
 84 |     data.drop(drop_columns, axis=1, inplace=True)
 85 |     """
 86 | 
 87 |     # replace nans with 0
 88 |     # the least sophisticated approach possible
 89 |     features.fillna(0, inplace=True)
 90 |     
 91 |     print '\n\nData after encoding\n\n%s' % features.describe()
 92 |     
 93 |     # split into training and test
 94 |     X = np.array(features).astype(dtype)
 95 |     
 96 |     Xtr = X[train_flag]
 97 |     Xvl = X[~train_flag]
 98 | 
 99 |     #print 'Train data first: %s' % (Xtr[0])
100 |     #print 'Evaluate data first: %s' % (Xvl[0])
101 | 
102 |     return Xtr, ytr1, ytr2, ytr3, Xvl
103 | 
104 | # the file prefix of debug data sets
105 | debug_file_prefix = '../data/training-small-' # '../data/training-' # 
106 | # debug mode switch
107 | DEBUG = False # True #
108 | 
109 | # import data    
110 | if DEBUG:
111 |     data_train = pd.read_csv(debug_file_prefix + 'train.csv')
112 |     data_validation = pd.read_csv(debug_file_prefix + 'validate.csv')
113 | else:
114 |     data_train = pd.read_csv('../data/training.csv')
115 |     data_validation = pd.read_csv('../data/testData.csv')
116 |     
117 | data_train['train_flag'] = True
118 | data_validation['train_flag'] = False
119 | data_validation['y1'] = np.nan
120 | data_validation['y2'] = np.nan
121 | data_validation['y3'] = np.nan
122 | data = pd.concat((data_train,data_validation))
123 | del data_train
124 | del data_validation
125 | 
126 | # basic formatting
127 | Xtr, ytr1, ytr2, ytr3, Xvl = format_data_preprocessed(data) # format_data(data)
128 | del data
129 | 
130 | print 'Start regressor'
131 | 
132 | start_time = datetime.datetime.fromtimestamp(time.time())
133 | 
134 | # random forest regressor
135 | rfr = RandomForestRegressor(n_estimators=100)
136 | 
137 | # naive strategy: for each ytr, train where the output isn't missing
138 | present_flag_1 = ~np.isnan(ytr1)
139 | rfr.fit(Xtr[present_flag_1],ytr1[present_flag_1])
140 | yvl1_est = rfr.predict(Xvl)
141 | 
142 | print 'yvl1_est estimated'
143 | 
144 | present_flag_2 = ~np.isnan(ytr2)
145 | rfr.fit(Xtr[present_flag_2],ytr2[present_flag_2])
146 | yvl2_est = rfr.predict(Xvl)
147 | 
148 | print 'yvl2_est estimated'
149 | 
150 | present_flag_3 = ~np.isnan(ytr3)
151 | rfr.fit(Xtr[present_flag_3],ytr3[present_flag_3])
152 | yvl3_est = rfr.predict(Xvl)
153 | 
154 | print 'yvl3_est estimated'
155 | 
156 | # The time spent
157 | finish_date = datetime.datetime.fromtimestamp(time.time())
158 | delta = finish_date - start_time
159 | print '\n------------------------\nTrain/Test time: \n%s\n' % (delta)
160 | 
161 | # save results as csv
162 | st = datetime.datetime.fromtimestamp(time.time()).strftime('%d_%m_%H_%M')
163 | res_name = '../vp_tree_{}'.format(st)
164 | yvl = pd.DataFrame({'yvl1_est':yvl1_est,'yvl2_est':yvl2_est,'yvl3_est':yvl3_est})
165 | yvl.to_csv('{}.{}'.format(res_name, 'csv'), header=False, index=False)
166 | 
167 | # describe predictions
168 | print '\n------------------------\nPredictions:\n%s' % yvl.describe()
169 | 
170 | # plot outputs
171 | n = len(yvl1_est)
172 | yvl_est = np.concatenate((np.reshape(yvl1_est, (n, 1)), np.reshape(yvl2_est, (n, 1)), np.reshape(yvl3_est, (n, 1))), axis=1)
173 | utils.plotOutputs(yvl_est, res_name)


--------------------------------------------------------------------------------
/src/vanila_rnn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Thu Jul 21 18:13:32 2016
  4 | 
  5 | The plain vanila Recurrent NN with Tanhents/ReLU activation rules and
  6 | Adagrad/RMSProp parameters update schemes
  7 | 
  8 | @author: yaric
  9 | """
 10 | import time
 11 | import datetime
 12 | 
 13 | import pandas as pd
 14 | import numpy as np
 15 | 
 16 | from rnn.simple_rnn import RNN
 17 | 
 18 | from utils import utils
 19 | 
 20 | # hyperparameters
 21 | hidden_size = 50 # size of hidden layer of neurons
 22 | n_epochs = 10 # 60 # 81#the number of learning epochs
 23 | 
 24 | # for RMSProp [0.0001](without regularization); 
 25 | # for Adagrad [0.05](without regularization);[1e-4](with dropout 0.8)
 26 | # for Adam[1e-3,1e-4] (with L2 regularization);
 27 | # for AdaMax [5e-4]
 28 | learning_rate = 5e-4#0.05#1e-4#
 29 | batch_step_size=100#200
 30 | param_update_scheme='Adam' #'Adagrad' #'RMSProp' # 'AdaMax' # 
 31 | activation_rule='Tanh' #'ReLU' #
 32 | relu_neg_slope=0.001 # 0.01
 33 | # whether to shuffle data samles in order to use Stochastic Gradient Descent like mechanics when batch processing
 34 | sgd_shuffle= True # False #
 35 | 
 36 | # The dropout regularization parameters
 37 | use_dropout_regularization=False#True# 
 38 | dropout_threshold=0.75
 39 | # The L2 regularization strength
 40 | reg_strenght=1e-3#
 41 | use_regularization=False # True #
 42 | 
 43 | # Whether to preprocess input features (normalization, standardization, PCA, etc)
 44 | USE_PREPROCESSING = False #True#
 45 | # Whether to use single step (False) or batch step training (True)
 46 | USE_BATCH_TRAINING = True #False #
 47 | # Whether to check gradient
 48 | CHECK_GRADIENT = False #True 
 49 | 
 50 | # debug mode switch
 51 | DEBUG = False #True #
 52 | # Whether to save model when in debug mode (in production mode model will be saved anyway)
 53 | SAVE_MODEL_DEBUG = False #True #
 54 | 
 55 | # Whether to use existing trained model for predicition only
 56 | PREDICT_ONLY = False #True #
 57 | 
 58 | 
 59 | def main():
 60 |     # import data
 61 |     if DEBUG:
 62 |         data_train = pd.read_csv('../data/training-train.csv')#pd.read_csv('../data/training-small-train.csv')#
 63 |         data_validation = pd.read_csv('../data/training-validate.csv')#pd.read_csv('../data/training-small-validate.csv')#
 64 |     else:
 65 |         data_train = pd.read_csv('../data/training.csv')
 66 |         data_validation = pd.read_csv('../data/testData.csv')
 67 | 
 68 |     data_train['train_flag'] = True
 69 |     data_validation['train_flag'] = False
 70 |     data = pd.concat((data_train, data_validation))
 71 |     
 72 |     # keep missing flags for both training and validation
 73 |     ytr_missing = np.array(data_train.loc[ :,'COVAR_y1_MISSING':'COVAR_y3_MISSING'])
 74 |     yvl_missing = np.array(data_validation.loc[ :,'COVAR_y1_MISSING':'COVAR_y3_MISSING'])
 75 |     
 76 |     # remove temporary data
 77 |     del data_train
 78 |     del data_validation
 79 | 
 80 |     # basic formatting
 81 |     Xtr, ytr, Xvl, yvl = utils.format_data(data, preprocessing=USE_PREPROCESSING)
 82 |     del data
 83 |     
 84 |     # preprocess data
 85 |     if USE_PREPROCESSING:
 86 |         use_pca = False # apply PCA (True) or standard normalization (False)
 87 |         Xtr, Xvl = utils.preprocess(Xtr, Xvl, use_pca)
 88 |         
 89 |     # create RNN instance 
 90 |     n_features = len(Xtr[0])
 91 |     n_outputs = len(ytr[0])
 92 |     nn_solver = RNN(n_features=n_features, n_outputs=n_outputs, 
 93 |                     n_neurons=hidden_size, param_update_scheme=param_update_scheme, 
 94 |                     learning_rate = learning_rate, activation_rule = activation_rule,
 95 |                     use_batch_step=USE_BATCH_TRAINING, batch_step_size=batch_step_size,
 96 |                     relu_neg_slope=relu_neg_slope,
 97 |                     use_dropout_regularization=use_dropout_regularization, dropout_threshold=dropout_threshold,
 98 |                     reg_strenght=reg_strenght, use_regularization=use_regularization,
 99 |                     sgd_shuffle=sgd_shuffle)
100 | 
101 |     if not PREDICT_ONLY:
102 |         trainAndTest(nn_solver, Xtr, ytr, ytr_missing, Xvl, yvl, yvl_missing)
103 |     else:
104 |         predictByModel(nn_solver, Xvl, '../models/DeepNN/model_2016-08-03T15_39_15.mat')
105 | 
106 | 
107 | def trainAndTest(nn_solver, Xtr, ytr, ytr_missing, Xvl, yvl, yvl_missing):
108 |     """
109 |     The train and test runner
110 |     """
111 |     if DEBUG:
112 |         # train with validation
113 |         train_errors, train_scores, validation_errors, validation_scores =  nn_solver.train(
114 |                     Xtr = Xtr, ytr = ytr, ytr_missing = ytr_missing, 
115 |                     n_epochs = n_epochs, Xvl = Xvl, yvl = yvl, yvl_missing = yvl_missing)
116 |         # plot results
117 |         utils.plotResultsValidate(train_errors, train_scores, validation_errors, validation_scores)
118 |     else:
119 |         # train without validation
120 |         train_errors, train_scores =  nn_solver.train(
121 |                     Xtr = Xtr, ytr = ytr, ytr_missing = ytr_missing, 
122 |                     n_epochs = n_epochs)
123 |         # plot results            
124 |         utils.plotResultsTest(train_errors, train_scores)
125 | 
126 |     # and save model
127 |     if DEBUG == False or (DEBUG and SAVE_MODEL_DEBUG):
128 |         st = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%dT%H_%M_%S')
129 |         m_name = '../models/DeepNN/model_{}.mat'.format(st)
130 |         nn_solver.saveModel(m_name)
131 |     
132 |     # test data predict
133 |     predict(nn_solver, Xvl)
134 |     
135 |     
136 | def predictByModel(nn_solver, Xvl, model_name):
137 |     """    
138 |     Method to make prediction on saved model
139 |     """
140 |     nn_solver.loadModel(model_name)
141 |     
142 |     predict(nn_solver, Xvl)
143 | 
144 | 
145 | def predict(nn_solver, Xvl):
146 |     """
147 |     Do actual predicition 
148 |     """
149 |     yvl_est = nn_solver.predict(Xvl)
150 |     
151 |     # substitute negative with zeros (negative values mark absent Y)
152 |     yvl_est = yvl_est.clip(min=0, max=1)
153 |     
154 |     assert len(yvl_est) == len(Xvl)
155 |     
156 |     # save predictions as csv
157 |     if DEBUG:
158 |         res_name = '../validation_predictions'
159 |     else:
160 |         st = datetime.datetime.fromtimestamp(time.time()).strftime('%d_%m_%H_%M')
161 |         res_name = '../vp_{}'.format(st)
162 |     yvl = pd.DataFrame({'yvl1_est':yvl_est[:,0],'yvl2_est':yvl_est[:,1],'yvl3_est':yvl_est[:,2]})
163 |     yvl.to_csv('{}.{}'.format(res_name, 'csv'),header=False,index=False)
164 |     
165 |     # describe predictions
166 |     print '\n------------------------\nPredictions:\n%s' % yvl.describe()
167 |     
168 |     # plot outputs
169 |     utils.plotOutputs(yvl_est, res_name)
170 | 
171 | 
172 | 
173 | if __name__ == '__main__':
174 |     main()
175 | 
176 | 


--------------------------------------------------------------------------------