├── .gitignore
├── LICENSE
├── README.md
├── data
    └── TaxiBJ
    │   └── ReadMe.md
├── deepst
    ├── __init__.py
    ├── config.py
    ├── datasets
    │   ├── BikeNYC.py
    │   ├── ReadMe.md
    │   ├── STDATA.py
    │   ├── STMatrix.py
    │   ├── TaxiBJ.py
    │   └── __init__.py
    ├── metrics.py
    ├── models
    │   ├── STConvolution.py
    │   ├── STResNet.py
    │   ├── __init__.py
    │   └── iLayer.py
    ├── preprocessing
    │   ├── __init__.py
    │   └── minmax_normalization.py
    └── utils
    │   ├── __init__.py
    │   ├── eval.py
    │   ├── evalMultiStepAhead.py
    │   ├── evalMultiStepAhead4SeqModel.py
    │   ├── evalMultiStepAheadNew.py
    │   ├── fill_missing_vals.py
    │   ├── runMe.bat
    │   ├── txt2hdf5_InOut.py
    │   └── viewRetFromPkl.py
├── scripts
    └── papers
    │   └── AAAI17
    │       ├── BikeNYC
    │           └── README.md
    │       ├── README.md
    │       ├── TaxiBJ
    │           ├── README.md
    │           ├── exptTaxiBJ-L12.py
    │           └── exptTaxiBJ.py
    │       └── doc
    │           └── ST-ResNet-AAAI17-Zhang.pdf
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # data/model/results/scripts/backup
  2 | *.sh
  3 | *.bat
  4 | *.h5
  5 | *.hdf5
  6 | *.pkl
  7 | *.gz
  8 | bak/
  9 | backup/
 10 | *.txt
 11 | 
 12 | 
 13 | # Byte-compiled / optimized / DLL files
 14 | __pycache__/
 15 | *.py[cod]
 16 | *$py.class
 17 | 
 18 | # C extensions
 19 | *.so
 20 | 
 21 | # Distribution / packaging
 22 | .Python
 23 | env/
 24 | build/
 25 | develop-eggs/
 26 | dist/
 27 | downloads/
 28 | eggs/
 29 | .eggs/
 30 | lib/
 31 | lib64/
 32 | parts/
 33 | sdist/
 34 | var/
 35 | *.egg-info/
 36 | .installed.cfg
 37 | *.egg
 38 | 
 39 | # PyInstaller
 40 | #  Usually these files are written by a python script from a template
 41 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 42 | *.manifest
 43 | *.spec
 44 | 
 45 | # Installer logs
 46 | pip-log.txt
 47 | pip-delete-this-directory.txt
 48 | 
 49 | # Unit test / coverage reports
 50 | htmlcov/
 51 | .tox/
 52 | .coverage
 53 | .coverage.*
 54 | .cache
 55 | nosetests.xml
 56 | coverage.xml
 57 | *,cover
 58 | .hypothesis/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # IPython Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # celery beat schedule file
 88 | celerybeat-schedule
 89 | 
 90 | # dotenv
 91 | .env
 92 | 
 93 | # virtualenv
 94 | venv/
 95 | ENV/
 96 | 
 97 | # Spyder project settings
 98 | .spyderproject
 99 | 
100 | # Rope project settings
101 | .ropeproject
102 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) Microsoft Corporation 
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | DeepST
 2 | ======
 3 | [DeepST](https://github.com/lucktroy/DeepST): A **Deep Learning** Toolbox for Spatio-Temporal Data
 4 | 
 5 | *Tested on `Windows Server 2012 R2`.*
 6 | 
 7 | ## Installation
 8 | 
 9 | DeepST uses the following dependencies: 
10 | 
11 | * [Keras](https://keras.io/#installation) and its dependencies are required to use DeepST. 
12 | * [Theano](http://deeplearning.net/software/theano/install.html#install) or [TensorFlow](https://github.com/tensorflow/tensorflow#download-and-setup), but **Theano** is recommended. 
13 | * numpy and scipy
14 | * HDF5 and [h5py](http://www.h5py.org/)
15 | * [pandas](http://pandas.pydata.org/)
16 | * CUDA 7.5 or latest version. And **cuDNN** is highly recommended. 
17 | 
18 | To install DeepST, `cd` to the **DeepST** folder and run the install command:
19 | 
20 | ```
21 | python setup.py install
22 | ```
23 | 
24 | To install the development version:
25 | 
26 | ```
27 | python setup.py develop
28 | ```
29 | 
30 | ## Data path
31 | 
32 | The default `DATAPATH` variable is `DATAPATH=[path_to_DeepST]/data`. You may set your `DATAPATH` variable using
33 | 
34 | ```
35 | # Windows
36 | set DATAPATH=[path_to_your_data]
37 | 
38 | # Linux
39 | export DATAPATH=[path_to_your_data]
40 | ```
41 | 
42 | ## License
43 | 
44 | DeepST is released under the MIT License (refer to the LICENSE file for details).


--------------------------------------------------------------------------------
/data/TaxiBJ/ReadMe.md:
--------------------------------------------------------------------------------
 1 | TaxiBJ: InFlow/OutFlow, Meteorology and Holidays at Beijing
 2 | ===========================================================
 3 | 
 4 | **If you use the data, please cite the following paper.**
 5 | 
 6 | `Junbo Zhang, Yu Zheng, Dekang Qi. Deep Spatio-Temporal Residual Networks for Citywide Crowd Flows Prediction. In AAAI 2017. `
 7 | 
 8 | Download data from [OneDrive](https://1drv.ms/f/s!Akh6N7xv3uVmhOhDKwx3bm5zpHkDOQ) or [BaiduPan](http://pan.baidu.com/s/1qYq7ja8)
 9 | 
10 | Please check the data with `md5sum` command: 
11 | ```
12 | md5sum -c md5sum.txt
13 | ```
14 | 
15 | **TaxiBJ** consists of the following **SIX** datasets:
16 | 
17 | * BJ16_M32x32_T30_InOut.h5
18 | * BJ15_M32x32_T30_InOut.h5
19 | * BJ14_M32x32_T30_InOut.h5
20 | * BJ13_M32x32_T30_InOut.h5
21 | * BJ_Meteorology.h5
22 | * BJ_Holiday.txt
23 | 
24 | where the first four files are *crowd flows* in Beijing from the year 2013 to 2016, `BJ_Meteorology.h5` is the Meteorological data, `BJ_Holiday.txt` includes the holidays (and adjacent weekends) of Beijing. 
25 | 
26 | ## Flows of Crowds
27 | 
28 | File names: `BJ[YEAR]_M32x32_T30_InOut.h5`, where
29 | 
30 | * YEAR: one of {13, 14, 15, 16}
31 | * M32x32: the Beijing city is divided into a 32 x 32 grid map
32 | * T30: timeslot (a.k.a. time interval) is equal to 30 minites, meaning there are 48 timeslots in a day
33 | * InOut: Inflow/Outflow are defined in the following paper [1]. 
34 | 
35 | [1] Junbo Zhang, Yu Zheng, Dekang Qi. Deep Spatio-Temporal Residual Networks for Citywide Crowd Flows Prediction. In AAAI 2017. 
36 | 
37 | Each `h5` file has two following subsets:
38 | 
39 | * `date`: a list of timeslots, which is associated the **data**. 
40 | * `data`: a 4D tensor of shape (number_of_timeslots, 2, 32, 32), of which `data[i]` is a 3D tensor of shape (2, 32, 32) at the timeslot `date[i]`, `data[i][0]` is a `32x32` inflow matrix and `data[i][1]` is a `32x32` outflow matrix. 
41 | 
42 | ### Example
43 | 
44 | You can get the data info with following command: 
45 | ```
46 | python -c "from deepst.datasets import stat; stat('BJ16_M32x32_T30_InOut.h5')"
47 | ```
48 | 
49 | The output looks like: 
50 | ```
51 | =====stat=====
52 | data shape: (7220, 2, 32, 32)
53 | # of days: 162, from 2015-11-01 to 2016-04-10
54 | # of timeslots: 7776
55 | # of timeslots (available): 7220
56 | missing ratio of timeslots: 7.2%
57 | max: 1250.000, min: 0.000
58 | =====stat=====
59 | ```
60 | 
61 | ## Meteorology
62 | 
63 | File name: `BJ_Meteorology.h5`, which has four following subsets:
64 | 
65 | * `date`: a list of timeslots, which is associated the following kinds of data. 
66 | * `Temperature`: a list of continuous value, of which the `i^{th}` value is `temperature` at the timeslot `date[i]`.
67 | * `WindSpeed`: a list of continuous value, of which the `i^{th}` value is `wind speed` at the timeslot `date[i]`. 
68 | * `Weather`: a 2D matrix, each of which is a one-hot vector (`dim=17`), showing one of the following weather types: 
69 | ```
70 | Sunny = 0,  
71 | Cloudy = 1, 
72 | Overcast = 2, 
73 | Rainy = 3, 
74 | Sprinkle = 4,  
75 | ModerateRain = 5,  
76 | HeavyRain = 6, 
77 | Rainstorm = 7, 
78 | Thunderstorm = 8, 
79 | FreezingRain = 9, 
80 | Snowy = 10,  
81 | LightSnow = 11, 
82 | ModerateSnow = 12, 
83 | HeavySnow = 13, 
84 | Foggy = 14,  
85 | Sandstorm = 15, 
86 | Dusty = 16, 
87 | ```
88 | 
89 | ## Holiday
90 | 
91 | File name: `BJ_Holiday.txt`, which inclues a list of the holidays (and adjacent weekends) of Beijing. 
92 | 
93 | Each line a holiday with the data format [yyyy][mm][dd]. For example, `20150601` is `June 1st, 2015`. 


--------------------------------------------------------------------------------
/deepst/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amirkhango/DeepST/7ba669013bbafd5f413ef50d5d76094c3a68efd6/deepst/__init__.py


--------------------------------------------------------------------------------
/deepst/config.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import platform
 4 | 
 5 | 
 6 | class Config(object):
 7 |     """docstring for Config"""
 8 | 
 9 |     def __init__(self):
10 |         super(Config, self).__init__()
11 | 
12 |         DATAPATH = os.environ.get('DATAPATH')
13 |         if DATAPATH is None:
14 |             if platform.system() == "Windows" or platform.system() == "Linux":
15 |                 # DATAPATH = "D:/data/traffic_flow"
16 |             # elif platform.system() == "Linux":
17 |                 DATAPATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data')
18 |             else:
19 |                 print("Unsupported/Unknown OS: ", platform.system, "please set DATAPATH")
20 |         self.DATAPATH = DATAPATH
21 | 


--------------------------------------------------------------------------------
/deepst/datasets/BikeNYC.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import print_function
 3 | import os
 4 | import cPickle as pickle
 5 | import numpy as np
 6 | 
 7 | from . import load_stdata
 8 | from ..preprocessing import MinMaxNormalization
 9 | from ..preprocessing import remove_incomplete_days
10 | from ..config import Config
11 | from ..datasets.STMatrix import STMatrix
12 | from ..preprocessing import timestamp2vec
13 | np.random.seed(1337)  # for reproducibility
14 | 
15 | # parameters
16 | DATAPATH = Config().DATAPATH
17 | 
18 | 
19 | def load_data(T=24, nb_flow=2, len_closeness=None, len_period=None, len_trend=None, len_test=None, preprocess_name='preprocessing.pkl', meta_data=True):
20 |     assert(len_closeness + len_period + len_trend > 0)
21 |     # load data
22 |     data, timestamps = load_stdata(os.path.join(DATAPATH, 'BikeNYC', 'NYC14_M16x8_T60_NewEnd.h5'))
23 |     # print(timestamps)
24 |     # remove a certain day which does not have 48 timestamps
25 |     data, timestamps = remove_incomplete_days(data, timestamps, T)
26 |     data = data[:, :nb_flow]
27 |     data[data < 0] = 0.
28 |     data_all = [data]
29 |     timestamps_all = [timestamps]
30 |     # minmax_scale
31 |     data_train = data[:-len_test]
32 |     print('train_data shape: ', data_train.shape)
33 |     mmn = MinMaxNormalization()
34 |     mmn.fit(data_train)
35 |     data_all_mmn = []
36 |     for d in data_all:
37 |         data_all_mmn.append(mmn.transform(d))
38 | 
39 |     fpkl = open('preprocessing.pkl', 'wb')
40 |     for obj in [mmn]:
41 |         pickle.dump(obj, fpkl)
42 |     fpkl.close()
43 | 
44 |     XC, XP, XT = [], [], []
45 |     Y = []
46 |     timestamps_Y = []
47 |     for data, timestamps in zip(data_all_mmn, timestamps_all):
48 |         # instance-based dataset --> sequences with format as (X, Y) where X is a sequence of images and Y is an image.
49 |         st = STMatrix(data, timestamps, T, CheckComplete=False)
50 |         _XC, _XP, _XT, _Y, _timestamps_Y = st.create_dataset(len_closeness=len_closeness, len_period=len_period, len_trend=len_trend)
51 |         XC.append(_XC)
52 |         XP.append(_XP)
53 |         XT.append(_XT)
54 |         Y.append(_Y)
55 |         timestamps_Y += _timestamps_Y
56 | 
57 |     XC = np.vstack(XC)
58 |     XP = np.vstack(XP)
59 |     XT = np.vstack(XT)
60 |     Y = np.vstack(Y)
61 |     print("XC shape: ", XC.shape, "XP shape: ", XP.shape, "XT shape: ", XT.shape, "Y shape:", Y.shape)
62 |     XC_train, XP_train, XT_train, Y_train = XC[:-len_test], XP[:-len_test], XT[:-len_test], Y[:-len_test]
63 |     XC_test, XP_test, XT_test, Y_test = XC[-len_test:], XP[-len_test:], XT[-len_test:], Y[-len_test:]
64 |     
65 |     timestamp_train, timestamp_test = timestamps_Y[:-len_test], timestamps_Y[-len_test:]
66 |     X_train = []
67 |     X_test = []
68 |     for l, X_ in zip([len_closeness, len_period, len_trend], [XC_train, XP_train, XT_train]):
69 |         if l > 0:
70 |             X_train.append(X_)
71 |     for l, X_ in zip([len_closeness, len_period, len_trend], [XC_test, XP_test, XT_test]):
72 |         if l > 0:
73 |             X_test.append(X_)
74 |     print('train shape:', XC_train.shape, Y_train.shape, 'test shape: ', XC_test.shape, Y_test.shape)
75 |     # load meta feature
76 |     if meta_data:
77 |         meta_feature = timestamp2vec(timestamps_Y)
78 |         metadata_dim = meta_feature.shape[1]
79 |         meta_feature_train, meta_feature_test = meta_feature[:-len_test], meta_feature[-len_test:]
80 |         X_train.append(meta_feature_train)
81 |         X_test.append(meta_feature_test)
82 |     else:
83 |         metadata_dim = None
84 |     for _X in X_train:
85 |         print(_X.shape, )
86 |     print()
87 |     for _X in X_test:
88 |         print(_X.shape, )
89 |     print()
90 |     return X_train, Y_train, X_test, Y_test, mmn, metadata_dim, timestamp_train, timestamp_test
91 | 


--------------------------------------------------------------------------------
/deepst/datasets/ReadMe.md:
--------------------------------------------------------------------------------
1 | ## Beijing
2 | 
3 | Beijing.py is an unified interface to load data 


--------------------------------------------------------------------------------
/deepst/datasets/STDATA.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import print_function
 3 | 
 4 | import cPickle as pickle
 5 | import numpy as np
 6 | 
 7 | from ..preprocessing import MinMaxNormalization
 8 | from ..preprocessing import remove_incomplete_days
 9 | from ..datasets.STMatrix import STMatrix
10 | from ..preprocessing import timestamp2vec
11 | from . import load_stdata
12 | # np.random.seed(1337)  # for reproducibility
13 | 
14 | # parameters
15 | # DATAPATH = Config().DATAPATH
16 | 
17 | def load_data(fname=None, T=48, nb_flow=2, len_closeness=None, len_period=None, len_trend=None, len_test=None, preprocess_name='preprocessing.pkl'):
18 |     assert(len_closeness + len_period + len_trend > 0)
19 |     data, timestamps = load_stdata(fname)
20 |     print(timestamps)
21 |     # remove a certain day which does not have 48 timestamps
22 |     data, timestamps = remove_incomplete_days(data, timestamps, T)
23 |     data = data[:, :nb_flow]
24 |     data[data < 0] = 0.
25 |     data_all = [data]
26 |     timestamps_all = [timestamps]
27 |     # minmax_scale
28 |     data_train = data[:-len_test]
29 |     print('train_data shape: ', data_train.shape)
30 |     mmn = MinMaxNormalization()
31 |     mmn.fit(data_train)
32 |     data_all_mmn = []
33 |     for d in data_all:
34 |         data_all_mmn.append(mmn.transform(d))
35 | 
36 |     fpkl = open('preprocessing.pkl', 'wb')
37 |     for obj in [mmn]:
38 |         pickle.dump(obj, fpkl)
39 |     fpkl.close()
40 | 
41 |     XC, XP, XT = [], [], []
42 |     Y = []
43 |     timestamps_Y = []
44 |     for data, timestamps in zip(data_all_mmn, timestamps_all):
45 |         # instance-based dataset --> sequences with format as (X, Y) where X is
46 |         # a sequence of images and Y is an image.
47 |         st = STMatrix(data, timestamps, T, CheckComplete=False)
48 |         _XC, _XP, _XT, _Y, _timestamps_Y = st.toSeq4(
49 |             len_closeness=len_closeness, len_period=len_period, len_trend=len_trend)
50 |         XC.append(_XC)
51 |         XP.append(_XP)
52 |         XT.append(_XT)
53 |         Y.append(_Y)
54 |         timestamps_Y += _timestamps_Y
55 | 
56 |     # load meta feature
57 |     meta_feature = timestamp2vec(timestamps_Y)
58 |     metadata_dim = meta_feature.shape[1]
59 | 
60 |     XC = np.vstack(XC)
61 |     XP = np.vstack(XP)
62 |     XT = np.vstack(XT)
63 |     Y = np.vstack(Y)
64 |     print("XC shape: ", XC.shape, "XP shape: ", XP.shape,
65 |           "XT shape: ", XT.shape, "Y shape:", Y.shape)
66 | 
67 |     XC_train, XP_train, XT_train, Y_train = XC[
68 |         :-len_test], XP[:-len_test], XT[:-len_test], Y[:-len_test]
69 |     XC_test, XP_test, XT_test, Y_test = XC[
70 |         -len_test:], XP[-len_test:], XT[-len_test:], Y[-len_test:]
71 |     meta_feature_train, meta_feature_test = meta_feature[
72 |         :-len_test], meta_feature[-len_test:]
73 |     timestamp_train, timestamp_test = timestamps_Y[
74 |         :-len_test], timestamps_Y[-len_test:]
75 |     X_train = []
76 |     X_test = []
77 |     for l, X_ in zip([len_closeness, len_period, len_trend], [XC_train, XP_train, XT_train]):
78 |         if l > 0:
79 |             X_train.append(X_)
80 |     for l, X_ in zip([len_closeness, len_period, len_trend], [XC_test, XP_test, XT_test]):
81 |         if l > 0:
82 |             X_test.append(X_)
83 |     print('train shape:', XC_train.shape, Y_train.shape,
84 |           'test shape: ', XC_test.shape, Y_test.shape)
85 | 
86 |     X_train.append(meta_feature_train)
87 |     X_test.append(meta_feature_test)
88 |     for _X in X_train:
89 |         print(_X.shape, )
90 |     print()
91 |     for _X in X_test:
92 |         print(_X.shape, )
93 |     print()
94 |     return X_train, Y_train, X_test, Y_test, mmn, metadata_dim, timestamp_train, timestamp_test
95 | 


--------------------------------------------------------------------------------
/deepst/datasets/STMatrix.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | import pandas as pd
  4 | import numpy as np
  5 | 
  6 | from . import load_stdata
  7 | from ..config import Config
  8 | from ..utils import string2timestamp
  9 | 
 10 | 
 11 | class STMatrix(object):
 12 |     """docstring for STMatrix"""
 13 | 
 14 |     def __init__(self, data, timestamps, T=48, CheckComplete=True):
 15 |         super(STMatrix, self).__init__()
 16 |         assert len(data) == len(timestamps)
 17 |         self.data = data
 18 |         self.timestamps = timestamps
 19 |         self.T = T
 20 |         self.pd_timestamps = string2timestamp(timestamps, T=self.T)
 21 |         if CheckComplete:
 22 |             self.check_complete()
 23 |         # index
 24 |         self.make_index()
 25 | 
 26 |     def make_index(self):
 27 |         self.get_index = dict()
 28 |         for i, ts in enumerate(self.pd_timestamps):
 29 |             self.get_index[ts] = i
 30 | 
 31 |     def check_complete(self):
 32 |         missing_timestamps = []
 33 |         offset = pd.DateOffset(minutes=24 * 60 // self.T)
 34 |         pd_timestamps = self.pd_timestamps
 35 |         i = 1
 36 |         while i < len(pd_timestamps):
 37 |             if pd_timestamps[i-1] + offset != pd_timestamps[i]:
 38 |                 missing_timestamps.append("(%s -- %s)" % (pd_timestamps[i-1], pd_timestamps[i]))
 39 |             i += 1
 40 |         for v in missing_timestamps:
 41 |             print(v)
 42 |         assert len(missing_timestamps) == 0
 43 | 
 44 |     def get_matrix(self, timestamp):
 45 |         return self.data[self.get_index[timestamp]]
 46 | 
 47 |     def save(self, fname):
 48 |         pass
 49 | 
 50 |     def check_it(self, depends):
 51 |         for d in depends:
 52 |             if d not in self.get_index.keys():
 53 |                 return False
 54 |         return True
 55 | 
 56 |     def create_dataset(self, len_closeness=3, len_trend=3, TrendInterval=7, len_period=3, PeriodInterval=1):
 57 |         """current version
 58 |         """
 59 |         # offset_week = pd.DateOffset(days=7)
 60 |         offset_frame = pd.DateOffset(minutes=24 * 60 // self.T)
 61 |         XC = []
 62 |         XP = []
 63 |         XT = []
 64 |         Y = []
 65 |         timestamps_Y = []
 66 |         depends = [range(1, len_closeness+1),
 67 |                    [PeriodInterval * self.T * j for j in range(1, len_period+1)],
 68 |                    [TrendInterval * self.T * j for j in range(1, len_trend+1)]]
 69 | 
 70 |         i = max(self.T * TrendInterval * len_trend, self.T * PeriodInterval * len_period, len_closeness)
 71 |         while i < len(self.pd_timestamps):
 72 |             Flag = True
 73 |             for depend in depends:
 74 |                 if Flag is False:
 75 |                     break
 76 |                 Flag = self.check_it([self.pd_timestamps[i] - j * offset_frame for j in depend])
 77 | 
 78 |             if Flag is False:
 79 |                 i += 1
 80 |                 continue
 81 |             x_c = [self.get_matrix(self.pd_timestamps[i] - j * offset_frame) for j in depends[0]]
 82 |             x_p = [self.get_matrix(self.pd_timestamps[i] - j * offset_frame) for j in depends[1]]
 83 |             x_t = [self.get_matrix(self.pd_timestamps[i] - j * offset_frame) for j in depends[2]]
 84 |             y = self.get_matrix(self.pd_timestamps[i])
 85 |             if len_closeness > 0:
 86 |                 XC.append(np.vstack(x_c))
 87 |             if len_period > 0:
 88 |                 XP.append(np.vstack(x_p))
 89 |             if len_trend > 0:
 90 |                 XT.append(np.vstack(x_t))
 91 |             Y.append(y)
 92 |             timestamps_Y.append(self.timestamps[i])
 93 |             i += 1
 94 |         XC = np.asarray(XC)
 95 |         XP = np.asarray(XP)
 96 |         XT = np.asarray(XT)
 97 |         Y = np.asarray(Y)
 98 |         print("XC shape: ", XC.shape, "XP shape: ", XP.shape, "XT shape: ", XT.shape, "Y shape:", Y.shape)
 99 |         return XC, XP, XT, Y, timestamps_Y
100 | 
101 | 
102 | if __name__ == '__main__':
103 |     pass
104 | 


--------------------------------------------------------------------------------
/deepst/datasets/TaxiBJ.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 |     load BJ Data from multiple sources as follows:
  4 |         meteorologic data
  5 | """
  6 | from __future__ import print_function
  7 | 
  8 | import os
  9 | #import cPickle as pickle
 10 | import pickle
 11 | from copy import copy
 12 | import numpy as np
 13 | import h5py
 14 | from . import load_stdata, stat
 15 | from ..preprocessing import MinMaxNormalization, remove_incomplete_days, timestamp2vec
 16 | from ..config import Config
 17 | from .STMatrix import STMatrix
 18 | # np.random.seed(1337)  # for reproducibility
 19 | 
 20 | # parameters
 21 | DATAPATH = Config().DATAPATH
 22 | 
 23 | 
 24 | def load_holiday(timeslots, fname=os.path.join(DATAPATH, 'TaxiBJ', 'BJ_Holiday.txt')):
 25 |     f = open(fname, 'r')
 26 |     holidays = f.readlines()
 27 |     holidays = set([h.strip() for h in holidays])
 28 |     H = np.zeros(len(timeslots))
 29 |     for i, slot in enumerate(timeslots):
 30 |         if slot[:8] in holidays:
 31 |             H[i] = 1
 32 |     print(H.sum())
 33 |     # print(timeslots[H==1])
 34 |     return H[:, None]
 35 | 
 36 | 
 37 | def load_meteorol(timeslots, fname=os.path.join(DATAPATH, 'TaxiBJ', 'BJ_Meteorology.h5')):
 38 |     '''
 39 |     timeslots: the predicted timeslots
 40 |     In real-world, we dont have the meteorol data in the predicted timeslot, instead, we use the meteoral at previous timeslots, i.e., slot = predicted_slot - timeslot (you can use predicted meteorol data as well)
 41 |     '''
 42 |     f = h5py.File(fname, 'r')
 43 |     Timeslot = f['date'].value
 44 |     WindSpeed = f['WindSpeed'].value
 45 |     Weather = f['Weather'].value
 46 |     Temperature = f['Temperature'].value
 47 |     f.close()
 48 | 
 49 |     M = dict()  # map timeslot to index
 50 |     for i, slot in enumerate(Timeslot):
 51 |         M[slot] = i
 52 | 
 53 |     WS = []  # WindSpeed
 54 |     WR = []  # Weather
 55 |     TE = []  # Temperature
 56 |     for slot in timeslots:
 57 |         predicted_id = M[slot]
 58 |         cur_id = predicted_id - 1
 59 |         WS.append(WindSpeed[cur_id])
 60 |         WR.append(Weather[cur_id])
 61 |         TE.append(Temperature[cur_id])
 62 | 
 63 |     WS = np.asarray(WS)
 64 |     WR = np.asarray(WR)
 65 |     TE = np.asarray(TE)
 66 | 
 67 |     # 0-1 scale
 68 |     WS = 1. * (WS - WS.min()) / (WS.max() - WS.min())
 69 |     TE = 1. * (TE - TE.min()) / (TE.max() - TE.min())
 70 | 
 71 |     print("shape: ", WS.shape, WR.shape, TE.shape)
 72 | 
 73 |     # concatenate all these attributes
 74 |     merge_data = np.hstack([WR, WS[:, None], TE[:, None]])
 75 | 
 76 |     # print('meger shape:', merge_data.shape)
 77 |     return merge_data
 78 | 
 79 | 
 80 | def load_data(T=48, nb_flow=2, len_closeness=None, len_period=None, len_trend=None,
 81 |               len_test=None, preprocess_name='preprocessing.pkl',
 82 |               meta_data=True, meteorol_data=True, holiday_data=True):
 83 |     """
 84 |     """
 85 |     assert(len_closeness + len_period + len_trend > 0)
 86 |     # load data
 87 |     # 13 - 16
 88 |     data_all = []
 89 |     timestamps_all = list()
 90 |     for year in range(13, 17):
 91 |         fname = os.path.join(
 92 |             DATAPATH, 'TaxiBJ', 'BJ{}_M32x32_T30_InOut.h5'.format(year))
 93 |         print("file name: ", fname)
 94 |         stat(fname)
 95 |         data, timestamps = load_stdata(fname)
 96 |         # print(timestamps)
 97 |         # remove a certain day which does not have 48 timestamps
 98 |         data, timestamps = remove_incomplete_days(data, timestamps, T)
 99 |         data = data[:, :nb_flow]
100 |         data[data < 0] = 0.
101 |         data_all.append(data)
102 |         timestamps_all.append(timestamps)
103 |         print("\n")
104 | 
105 |     # minmax_scale
106 |     data_train = np.vstack(copy(data_all))[:-len_test]
107 |     print('train_data shape: ', data_train.shape)
108 |     mmn = MinMaxNormalization()
109 |     mmn.fit(data_train)
110 |     data_all_mmn = [mmn.transform(d) for d in data_all]
111 | 
112 |     fpkl = open(preprocess_name, 'wb')
113 |     for obj in [mmn]:
114 |         pickle.dump(obj, fpkl)
115 |     fpkl.close()
116 | 
117 |     XC, XP, XT = [], [], []
118 |     Y = []
119 |     timestamps_Y = []
120 |     for data, timestamps in zip(data_all_mmn, timestamps_all):
121 |         # instance-based dataset --> sequences with format as (X, Y) where X is
122 |         # a sequence of images and Y is an image.
123 |         st = STMatrix(data, timestamps, T, CheckComplete=False)
124 |         _XC, _XP, _XT, _Y, _timestamps_Y = st.create_dataset(
125 |             len_closeness=len_closeness, len_period=len_period, len_trend=len_trend)
126 |         XC.append(_XC)
127 |         XP.append(_XP)
128 |         XT.append(_XT)
129 |         Y.append(_Y)
130 |         timestamps_Y += _timestamps_Y
131 | 
132 |     meta_feature = []
133 |     if meta_data:
134 |         # load time feature
135 |         time_feature = timestamp2vec(timestamps_Y)
136 |         meta_feature.append(time_feature)
137 |     if holiday_data:
138 |         # load holiday
139 |         holiday_feature = load_holiday(timestamps_Y)
140 |         meta_feature.append(holiday_feature)
141 |     if meteorol_data:
142 |         # load meteorol data
143 |         meteorol_feature = load_meteorol(timestamps_Y)
144 |         meta_feature.append(meteorol_feature)
145 | 
146 |     meta_feature = np.hstack(meta_feature) if len(
147 |         meta_feature) > 0 else np.asarray(meta_feature)
148 |     metadata_dim = meta_feature.shape[1] if len(
149 |         meta_feature.shape) > 1 else None
150 |     if metadata_dim < 1:
151 |         metadata_dim = None
152 |     if meta_data and holiday_data and meteorol_data:
153 |         print('time feature:', time_feature.shape, 'holiday feature:', holiday_feature.shape,
154 |               'meteorol feature: ', meteorol_feature.shape, 'mete feature: ', meta_feature.shape)
155 | 
156 |     XC = np.vstack(XC)
157 |     XP = np.vstack(XP)
158 |     XT = np.vstack(XT)
159 |     Y = np.vstack(Y)
160 |     print("XC shape: ", XC.shape, "XP shape: ", XP.shape,
161 |           "XT shape: ", XT.shape, "Y shape:", Y.shape)
162 | 
163 |     XC_train, XP_train, XT_train, Y_train = XC[
164 |         :-len_test], XP[:-len_test], XT[:-len_test], Y[:-len_test]
165 |     XC_test, XP_test, XT_test, Y_test = XC[
166 |         -len_test:], XP[-len_test:], XT[-len_test:], Y[-len_test:]
167 |     timestamp_train, timestamp_test = timestamps_Y[
168 |         :-len_test], timestamps_Y[-len_test:]
169 | 
170 |     X_train = []
171 |     X_test = []
172 |     for l, X_ in zip([len_closeness, len_period, len_trend], [XC_train, XP_train, XT_train]):
173 |         if l > 0:
174 |             X_train.append(X_)
175 |     for l, X_ in zip([len_closeness, len_period, len_trend], [XC_test, XP_test, XT_test]):
176 |         if l > 0:
177 |             X_test.append(X_)
178 |     print('train shape:', XC_train.shape, Y_train.shape,
179 |           'test shape: ', XC_test.shape, Y_test.shape)
180 | 
181 |     if metadata_dim is not None:
182 |         meta_feature_train, meta_feature_test = meta_feature[
183 |             :-len_test], meta_feature[-len_test:]
184 |         X_train.append(meta_feature_train)
185 |         X_test.append(meta_feature_test)
186 |     for _X in X_train:
187 |         print(_X.shape, )
188 |     print()
189 |     for _X in X_test:
190 |         print(_X.shape, )
191 |     print()
192 |     return X_train, Y_train, X_test, Y_test, mmn, metadata_dim, timestamp_train, timestamp_test
193 | 


--------------------------------------------------------------------------------
/deepst/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import h5py
 3 | import time
 4 | 
 5 | def load_stdata(fname):
 6 |     f = h5py.File(fname, 'r')
 7 |     data = f['data'].value
 8 |     timestamps = f['date'].value
 9 |     f.close()
10 |     return data, timestamps
11 | 
12 | 
13 | def stat(fname):
14 |     def get_nb_timeslot(f):
15 |         s = f['date'][0]
16 |         e = f['date'][-1]
17 |         year, month, day = map(int, [s[:4], s[4:6], s[6:8]])
18 |         ts = time.strptime("%04i-%02i-%02i" % (year, month, day), "%Y-%m-%d")
19 |         year, month, day = map(int, [e[:4], e[4:6], e[6:8]])
20 |         te = time.strptime("%04i-%02i-%02i" % (year, month, day), "%Y-%m-%d")
21 |         nb_timeslot = (time.mktime(te) - time.mktime(ts)) / (0.5 * 3600) + 48
22 |         ts_str, te_str = time.strftime("%Y-%m-%d", ts), time.strftime("%Y-%m-%d", te)
23 |         return nb_timeslot, ts_str, te_str
24 | 
25 |     with h5py.File(fname) as f:
26 |         nb_timeslot, ts_str, te_str = get_nb_timeslot(f)
27 |         nb_day = int(nb_timeslot / 48)
28 |         mmax = f['data'].value.max()
29 |         mmin = f['data'].value.min()
30 |         stat = '=' * 5 + 'stat' + '=' * 5 + '\n' + \
31 |                'data shape: %s\n' % str(f['data'].shape) + \
32 |                '# of days: %i, from %s to %s\n' % (nb_day, ts_str, te_str) + \
33 |                '# of timeslots: %i\n' % int(nb_timeslot) + \
34 |                '# of timeslots (available): %i\n' % f['date'].shape[0] + \
35 |                'missing ratio of timeslots: %.1f%%\n' % ((1. - float(f['date'].shape[0] / nb_timeslot)) * 100) + \
36 |                'max: %.3f, min: %.3f\n' % (mmax, mmin) + \
37 |                '=' * 5 + 'stat' + '=' * 5
38 |         print(stat)
39 | 
40 | """
41 | def stat(fname):
42 |     def get_nb_timeslot(f):
43 |         s = f['date'][0]
44 |         e = f['date'][-1]
45 |         year, month, day = map(int, [s[:4], s[4:6], s[6:8]])
46 |         ts = time.strptime("%04i-%02i-%02i" % (year, month, day), "%Y-%m-%d")
47 |         year, month, day = map(int, [e[:4], e[4:6], e[6:8]])
48 |         te = time.strptime("%04i-%02i-%02i" % (year, month, day), "%Y-%m-%d")
49 |         nb_timeslot = (time.mktime(te) - time.mktime(ts)) / (0.5 * 3600) + 48
50 |         ts_str, te_str = time.strftime("%Y-%m-%d", ts), time.strftime("%Y-%m-%d", te)
51 |         return nb_timeslot, ts_str, te_str
52 | 
53 |     with h5py.File(fname) as f:
54 |         nb_timeslot, ts_str, te_str = get_nb_timeslot(f)
55 |         nb_day = int(nb_timeslot / 48)
56 |         mmax = f['data'].value.max()
57 |         mmin = f['data'].value.min()
58 |         stat = '=' * 5 + 'stat' + '=' * 5 + '\n' + \
59 |                'data shape: %s\n' % str(f['data'].shape) + \
60 |                'date shape: %s\n' % str(f['date'].shape) + \
61 |                'date range: [%s, %s]\n' % (str(f['date'][0][:8]), str(f['date'][-1][:8])) + \
62 |                '# of days: %i, from %s to %s\n' % (nb_day, ts_str, te_str) + \
63 |                '# of timeslots: %i\n' % int(nb_timeslot) + \
64 |                '# of timeslots (available): %i\n' % f['date'].shape[0] + \
65 |                'missing ratio of timeslots: %.1f%%\n' % ((1. - float(f['date'].shape[0] / nb_timeslot)) * 100) + \
66 |                'max: %.3f, min: %.3f\n' % (mmax, mmin) + \
67 |                '=' * 5 + 'stat' + '=' * 5
68 |         print(stat)
69 | """


--------------------------------------------------------------------------------
/deepst/metrics.py:
--------------------------------------------------------------------------------
 1 | # import numpy as np
 2 | from keras import backend as K
 3 | 
 4 | 
 5 | def mean_squared_error(y_true, y_pred):
 6 |     return K.mean(K.square(y_pred - y_true))
 7 | 
 8 | 
 9 | def root_mean_square_error(y_true, y_pred):
10 |     return mean_squared_error(y_true, y_pred) ** 0.5
11 | 
12 | 
13 | def rmse(y_true, y_pred):
14 |     return mean_squared_error(y_true, y_pred) ** 0.5
15 | 
16 | # aliases
17 | mse = MSE = mean_squared_error
18 | # rmse = RMSE = root_mean_square_error
19 | 
20 | 
21 | def masked_mean_squared_error(y_true, y_pred):
22 |     idx = (y_true > 1e-6).nonzero()
23 |     return K.mean(K.square(y_pred[idx] - y_true[idx]))
24 | 
25 | 
26 | def masked_rmse(y_true, y_pred):
27 |     return masked_mean_squared_error(y_true, y_pred) ** 0.5
28 | 


--------------------------------------------------------------------------------
/deepst/models/STConvolution.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from keras.models import Sequential
  3 | from keras.layers.core import Dense
  4 | from keras.layers.core import Reshape, Merge
  5 | from keras.layers.core import Activation
  6 | from keras.layers.advanced_activations import LeakyReLU
  7 | from keras.layers.normalization import BatchNormalization
  8 | from keras.layers.convolutional import ZeroPadding3D
  9 | from keras.layers.convolutional import Convolution2D, Convolution3D
 10 | 
 11 | 
 12 | def seqCNN(n_flow=4, seq_len=3, map_height=32, map_width=32):
 13 |     model = Sequential()
 14 |     model.add(Convolution2D(64, 3, 3, input_shape=(n_flow*seq_len, map_height, map_width), border_mode='same'))
 15 |     model.add(Activation('relu'))
 16 | 
 17 |     model.add(Convolution2D(128, 3, 3, border_mode='same'))
 18 |     model.add(Activation('relu'))
 19 | 
 20 |     model.add(Convolution2D(64, 3, 3, border_mode='same'))
 21 |     model.add(Activation('relu'))
 22 | 
 23 |     model.add(Convolution2D(n_flow, 3, 3, border_mode='same'))
 24 |     model.add(Activation('tanh'))
 25 |     return model
 26 | 
 27 | 
 28 | def seqCNNBase(conf=(4, 3, 32, 32)):
 29 |     n_flow, seq_len, map_height, map_width = conf
 30 |     model = Sequential()
 31 |     model.add(Convolution2D(64, 3, 3, input_shape=(n_flow*seq_len, map_height, map_width), border_mode='same'))
 32 |     model.add(Activation('relu'))
 33 | 
 34 |     model.add(Convolution2D(128, 3, 3, border_mode='same'))
 35 |     model.add(Activation('relu'))
 36 | 
 37 |     model.add(Convolution2D(64, 3, 3, border_mode='same'))
 38 |     model.add(Activation('relu'))
 39 | 
 40 |     model.add(Convolution2D(n_flow, 3, 3, border_mode='same'))
 41 |     # model.add(Activation('tanh'))
 42 |     return model
 43 | 
 44 | 
 45 | def seqCNNBaseLayer1(conf=(4, 3, 32, 32)):
 46 |     # 1 layer CNN for early fusion
 47 |     n_flow, seq_len, map_height, map_width = conf
 48 |     model = Sequential()
 49 |     model.add(Convolution2D(64, 3, 3, input_shape=(n_flow * seq_len, map_height, map_width), border_mode='same'))
 50 |     model.add(Activation('relu'))
 51 |     return model
 52 | 
 53 | 
 54 | def seqCNN_CPT(c_conf=(4, 3, 32, 32), p_conf=(4, 3, 32, 32), t_conf=(4, 3, 32, 32)):
 55 |     '''
 56 |     C - Temporal Closeness
 57 |     P - Period
 58 |     T - Trend
 59 |     conf = (nb_flow, seq_len, map_height, map_width)
 60 |     '''
 61 |     model = Sequential()
 62 |     components = []
 63 | 
 64 |     for conf in [c_conf, p_conf, t_conf]:
 65 |         if conf is not None:
 66 |             components.append(seqCNNBaseLayer1(conf))
 67 |             nb_flow = conf[0]
 68 |     model.add(Merge(components, mode='concat', concat_axis=1))  # concat
 69 |     model.add(Convolution2D(64, 3, 3, border_mode='same'))
 70 |     model.add(Activation('relu'))
 71 | 
 72 |     model.add(Convolution2D(64, 3, 3, border_mode='same'))
 73 |     model.add(Activation('relu'))
 74 | 
 75 |     model.add(Convolution2D(nb_flow, 3, 3, border_mode='same'))
 76 |     model.add(Activation('tanh'))
 77 |     return model
 78 | 
 79 | 
 80 | def seqCNNBaseLayer1_2(conf=(4, 3, 32, 32)):
 81 |     # 1 layer CNN for early fusion
 82 |     n_flow, seq_len, map_height, map_width = conf
 83 |     model = Sequential()
 84 |     model.add(Convolution2D(64, 3, 3, input_shape=(n_flow * seq_len, map_height, map_width), border_mode='same'))
 85 |     # model.add(Activation('relu'))
 86 |     return model
 87 | 
 88 | 
 89 | def seqCNN_CPT2(c_conf=(4, 3, 32, 32), p_conf=(4, 3, 32, 32), t_conf=(4, 3, 32, 32)):
 90 |     '''
 91 |     C - Temporal Closeness
 92 |     P - Period
 93 |     T - Trend
 94 |     conf = (nb_flow, seq_len, map_height, map_width)
 95 |     '''
 96 |     model = Sequential()
 97 |     components = []
 98 | 
 99 |     for conf in [c_conf, p_conf, t_conf]:
100 |         if conf is not None:
101 |             components.append(seqCNNBaseLayer1_2(conf))
102 |             nb_flow = conf[0]
103 |     # model.add(Merge(components, mode='concat', concat_axis=1))  # concat
104 |     if len(components) > 1:
105 |         model.add(Merge(components, mode='sum'))
106 |     else:
107 |         model = components[0]
108 |     model.add(Activation('relu'))
109 |     model.add(Convolution2D(64, 3, 3, border_mode='same'))
110 |     model.add(Activation('relu'))
111 | 
112 |     model.add(Convolution2D(64, 3, 3, border_mode='same'))
113 |     model.add(Activation('relu'))
114 | 
115 |     model.add(Convolution2D(nb_flow, 3, 3, border_mode='same'))
116 |     model.add(Activation('tanh'))
117 |     return model
118 | 
119 | 
120 | def seqCNN_CPTM(c_conf=(4, 3, 32, 32), p_conf=(4, 3, 32, 32), t_conf=(4, 3, 32, 32), metadata_dim=None):
121 |     '''
122 |     C - Temporal Closeness
123 |     P - Period
124 |     T - Trend
125 |     conf = (nb_flow, seq_len, map_height, map_width)
126 |     metadata_dim
127 |     '''
128 |     model = Sequential()
129 |     components = []
130 |     for conf in [c_conf, p_conf, t_conf]:
131 |         if conf is not None:
132 |             components.append(seqCNNBaseLayer1_2(conf))
133 |             # nb_flow = conf[0]
134 |             nb_flow, _, map_height, map_width = conf
135 |     # model.add(Merge(components, mode='concat', concat_axis=1))  # concat
136 |     if len(components) > 1:
137 |         model.add(Merge(components, mode='sum'))
138 |     else:
139 |         model = components[0]
140 |     model.add(Activation('relu'))
141 |     model.add(Convolution2D(64, 3, 3, border_mode='same'))
142 |     model.add(Activation('relu'))
143 | 
144 |     model.add(Convolution2D(64, 3, 3, border_mode='same'))
145 |     model.add(Activation('relu'))
146 | 
147 |     model.add(Convolution2D(nb_flow, 3, 3, border_mode='same'))
148 | 
149 |     metadata_processor = Sequential()
150 |     # metadata_processor.add(Dense(output_dim=nb_flow * map_height * map_width, input_dim=metadata_dim))
151 |     metadata_processor.add(Dense(output_dim=10, input_dim=metadata_dim))
152 |     metadata_processor.add(Activation('relu'))
153 |     metadata_processor.add(Dense(output_dim=nb_flow * map_height * map_width))
154 |     metadata_processor.add(Activation('relu'))
155 |     metadata_processor.add(Reshape((nb_flow, map_height, map_width)))
156 | 
157 |     model_final=Sequential()
158 |     model_final.add(Merge([model, metadata_processor], mode='sum'))
159 |     model_final.add(Activation('tanh'))
160 |     return model_final
161 | 
162 | 
163 | def lateFusion(metadata_dim, n_flow=2, seq_len=3, map_height=32, map_width=32):
164 |     model=Sequential()
165 |     mat_model=seqCNNBase(n_flow, seq_len, map_height, map_width)
166 |     metadata_processor=Sequential()
167 |     metadata_processor.add(Dense(output_dim=n_flow * map_height * map_width, input_dim=metadata_dim))
168 |     metadata_processor.add(Reshape((n_flow, map_height, map_width)))
169 |     # metadata_processor.add(Activation('relu'))
170 | 
171 |     model=Sequential()
172 |     model.add(Merge([mat_model, metadata_processor], mode='sum'))
173 |     model.add(Activation('tanh'))
174 |     return model
175 | 
176 | 
177 | def seqCNN_BN(n_flow=4, seq_len=3, map_height=32, map_width=32):
178 |     model=Sequential()
179 |     model.add(Convolution2D(64, 3, 3, input_shape=(n_flow*seq_len, map_height, map_width), border_mode='same'))
180 |     model.add(LeakyReLU(0.2))
181 |     model.add(BatchNormalization())
182 | 
183 |     model.add(Convolution2D(128, 3, 3, border_mode='same'))
184 |     model.add(LeakyReLU(0.2))
185 |     model.add(BatchNormalization())
186 | 
187 |     model.add(Convolution2D(64, 3, 3, border_mode='same'))
188 |     model.add(LeakyReLU(0.2))
189 |     model.add(BatchNormalization())
190 | 
191 |     model.add(Convolution2D(n_flow, 3, 3, border_mode='same'))
192 |     model.add(Activation('tanh'))
193 |     return model
194 | 
195 | 
196 | def seqCNN_LReLU(n_flow=4, seq_len=3, map_height=32, map_width=32):
197 |     model=Sequential()
198 |     model.add(Convolution2D(64, 3, 3, input_shape=(n_flow*seq_len, map_height, map_width), border_mode='same'))
199 |     model.add(LeakyReLU(0.2))
200 |     # model.add(BatchNormalization())
201 | 
202 |     model.add(Convolution2D(128, 3, 3, border_mode='same'))
203 |     model.add(LeakyReLU(0.2))
204 |     # model.add(BatchNormalization())
205 | 
206 |     model.add(Convolution2D(64, 3, 3, border_mode='same'))
207 |     model.add(LeakyReLU(0.2))
208 |     # model.add(BatchNormalization())
209 | 
210 |     model.add(Convolution2D(n_flow, 3, 3, border_mode='same'))
211 |     model.add(Activation('tanh'))
212 |     return model
213 | 
214 | 
215 | def seq3DCNN(n_flow=4, seq_len=3, map_height=32, map_width=32):
216 |     model=Sequential()
217 |     # model.add(ZeroPadding3D(padding=(0, 1, 1), input_shape=(n_flow, seq_len, map_height, map_width)))
218 |     # model.add(Convolution3D(64, 2, 3, 3, border_mode='valid'))
219 |     model.add(Convolution3D(64, 2, 3, 3, border_mode='same', input_shape=(n_flow, seq_len, map_height, map_width)))
220 |     model.add(Activation('relu'))
221 | 
222 |     model.add(Convolution3D(128, 2, 3, 3, border_mode='same'))
223 |     model.add(Activation('relu'))
224 | 
225 |     model.add(Convolution3D(64, 2, 3, 3, border_mode='same'))
226 |     model.add(Activation('relu'))
227 | 
228 |     model.add(ZeroPadding3D(padding=(0, 1, 1)))
229 |     model.add(Convolution3D(n_flow, seq_len, 3, 3, border_mode='valid'))
230 |     # model.add(Convolution3D(n_flow, seq_len-2, 3, 3, border_mode='same'))
231 |     model.add(Activation('tanh'))
232 |     return model


--------------------------------------------------------------------------------
/deepst/models/STResNet.py:
--------------------------------------------------------------------------------
  1 | '''
  2 |     ST-ResNet: Deep Spatio-temporal Residual Networks
  3 | '''
  4 | 
  5 | from __future__ import print_function
  6 | from keras.layers import (
  7 |     Input,
  8 |     Activation,
  9 |     merge,
 10 |     Dense,
 11 |     Reshape
 12 | )
 13 | from keras.layers.convolutional import Convolution2D
 14 | from keras.layers.normalization import BatchNormalization
 15 | from keras.models import Model
 16 | #from keras.utils.visualize_util import plot
 17 | 
 18 | 
 19 | def _shortcut(input, residual):
 20 |     return merge([input, residual], mode='sum')
 21 | 
 22 | 
 23 | def _bn_relu_conv(nb_filter, nb_row, nb_col, subsample=(1, 1), bn=False):
 24 |     def f(input):
 25 |         if bn:
 26 |             input = BatchNormalization(mode=0, axis=1)(input)
 27 |         activation = Activation('relu')(input)
 28 |         return Convolution2D(nb_filter=nb_filter, nb_row=nb_row, nb_col=nb_col, subsample=subsample, border_mode="same")(activation)
 29 |     return f
 30 | 
 31 | 
 32 | def _residual_unit(nb_filter, init_subsample=(1, 1)):
 33 |     def f(input):
 34 |         residual = _bn_relu_conv(nb_filter, 3, 3)(input)
 35 |         residual = _bn_relu_conv(nb_filter, 3, 3)(residual)
 36 |         return _shortcut(input, residual)
 37 |     return f
 38 | 
 39 | 
 40 | def ResUnits(residual_unit, nb_filter, repetations=1):
 41 |     def f(input):
 42 |         for i in range(repetations):
 43 |             init_subsample = (1, 1)
 44 |             input = residual_unit(nb_filter=nb_filter,
 45 |                                   init_subsample=init_subsample)(input)
 46 |         return input
 47 |     return f
 48 | 
 49 | 
 50 | def stresnet(c_conf=(3, 2, 32, 32), p_conf=(3, 2, 32, 32), t_conf=(3, 2, 32, 32), external_dim=8, nb_residual_unit=3):
 51 |     '''
 52 |     C - Temporal Closeness
 53 |     P - Period
 54 |     T - Trend
 55 |     conf = (len_seq, nb_flow, map_height, map_width)
 56 |     external_dim
 57 |     '''
 58 | 
 59 |     # main input
 60 |     main_inputs = []
 61 |     outputs = []
 62 |     for conf in [c_conf, p_conf, t_conf]:
 63 |         if conf is not None:
 64 |             len_seq, nb_flow, map_height, map_width = conf
 65 |             input = Input(shape=(nb_flow * len_seq, map_height, map_width))
 66 |             main_inputs.append(input)
 67 |             # Conv1
 68 |             conv1 = Convolution2D(
 69 |                 nb_filter=64, nb_row=3, nb_col=3, border_mode="same")(input)
 70 |             # [nb_residual_unit] Residual Units
 71 |             residual_output = ResUnits(_residual_unit, nb_filter=64,
 72 |                               repetations=nb_residual_unit)(conv1)
 73 |             # Conv2
 74 |             activation = Activation('relu')(residual_output)
 75 |             conv2 = Convolution2D(
 76 |                 nb_filter=nb_flow, nb_row=3, nb_col=3, border_mode="same")(activation)
 77 |             outputs.append(conv2)
 78 | 
 79 |     # parameter-matrix-based fusion
 80 |     if len(outputs) == 1:
 81 |         main_output = outputs[0]
 82 |     else:
 83 |         from .iLayer import iLayer
 84 |         new_outputs = []
 85 |         for output in outputs:
 86 |             new_outputs.append(iLayer()(output))
 87 |         main_output = merge(new_outputs, mode='sum')
 88 | 
 89 |     # fusing with external component
 90 |     if external_dim != None and external_dim > 0:
 91 |         # external input
 92 |         external_input = Input(shape=(external_dim,))
 93 |         main_inputs.append(external_input)
 94 |         embedding = Dense(output_dim=10)(external_input)
 95 |         embedding = Activation('relu')(embedding)
 96 |         h1 = Dense(output_dim=nb_flow * map_height * map_width)(embedding)
 97 |         activation = Activation('relu')(h1)
 98 |         external_output = Reshape((nb_flow, map_height, map_width))(activation)
 99 |         main_output = merge([main_output, external_output], mode='sum')
100 |     else:
101 |         print('external_dim:', external_dim)
102 | 
103 |     main_output = Activation('tanh')(main_output)
104 |     model = Model(input=main_inputs, output=main_output)
105 | 
106 |     return model
107 | 
108 | if __name__ == '__main__':
109 |     model = stresnet(external_dim=28, nb_residual_unit=12)
110 |     #plot(model, to_file='ST-ResNet.png', show_shapes=True)
111 |     model.summary()
112 | 


--------------------------------------------------------------------------------
/deepst/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amirkhango/DeepST/7ba669013bbafd5f413ef50d5d76094c3a68efd6/deepst/models/__init__.py


--------------------------------------------------------------------------------
/deepst/models/iLayer.py:
--------------------------------------------------------------------------------
 1 | from keras import backend as K
 2 | from keras.engine.topology import Layer
 3 | # from keras.layers import Dense
 4 | import numpy as np
 5 | 
 6 | 
 7 | class iLayer(Layer):
 8 |     def __init__(self, **kwargs):
 9 |         # self.output_dim = output_dim
10 |         super(iLayer, self).__init__(**kwargs)
11 | 
12 |     def build(self, input_shape):
13 |         initial_weight_value = np.random.random(input_shape[1:])
14 |         self.W = K.variable(initial_weight_value)
15 |         self.trainable_weights = [self.W]
16 | 
17 |     def call(self, x, mask=None):
18 |         return x * self.W
19 | 
20 |     def get_output_shape_for(self, input_shape):
21 |         return input_shape
22 | 


--------------------------------------------------------------------------------
/deepst/preprocessing/__init__.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from copy import copy
  4 | import time
  5 | # from temporal_contrast_normalization import TemporalConstrastNormalization
  6 | # from personal_temporal_contrast_normalization import PersonalTemporalConstrastNormalization
  7 | from .minmax_normalization import MinMaxNormalization
  8 | from ..utils import string2timestamp
  9 | 
 10 | 
 11 | def timestamp2vec(timestamps):
 12 |     # tm_wday range [0, 6], Monday is 0
 13 |     # vec = [time.strptime(str(t[:8], encoding='utf-8'), '%Y%m%d').tm_wday for t in timestamps]  # python3
 14 |     vec = [time.strptime(t[:8], '%Y%m%d').tm_wday for t in timestamps]  # python2
 15 |     ret = []
 16 |     for i in vec:
 17 |         v = [0 for _ in range(7)]
 18 |         v[i] = 1
 19 |         if i >= 5: 
 20 |             v.append(0)  # weekend
 21 |         else:
 22 |             v.append(1)  # weekday
 23 |         ret.append(v)
 24 |     return np.asarray(ret)
 25 | 
 26 | 
 27 | def remove_incomplete_days(data, timestamps, T=48):
 28 |     # remove a certain day which has not 48 timestamps
 29 |     days = []  # available days: some day only contain some seqs
 30 |     days_incomplete = []
 31 |     i = 0
 32 |     while i < len(timestamps):
 33 |         if int(timestamps[i][8:]) != 1:
 34 |             i += 1
 35 |         elif i+T-1 < len(timestamps) and int(timestamps[i+T-1][8:]) == T:
 36 |             days.append(timestamps[i][:8])
 37 |             i += T
 38 |         else:
 39 |             days_incomplete.append(timestamps[i][:8])
 40 |             i += 1
 41 |     print("incomplete days: ", days_incomplete)
 42 |     days = set(days)
 43 |     idx = []
 44 |     for i, t in enumerate(timestamps):
 45 |         if t[:8] in days:
 46 |             idx.append(i)
 47 | 
 48 |     data = data[idx]
 49 |     timestamps = [timestamps[i] for i in idx]
 50 |     return data, timestamps
 51 | 
 52 | 
 53 | def split_by_time(data, timestamps, split_timestamp):
 54 |     # divide data into two subsets:
 55 |     # e.g., Train: ~ 2015.06.21 & Test: 2015.06.22 ~ 2015.06.28
 56 |     assert(len(data) == len(timestamps))
 57 |     assert(split_timestamp in set(timestamps))
 58 | 
 59 |     data_1 = []
 60 |     timestamps_1 = []
 61 |     data_2 = []
 62 |     timestamps_2 = []
 63 |     switch = False
 64 |     for t, d in zip(timestamps, data):
 65 |         if split_timestamp == t:
 66 |             switch = True
 67 |         if switch is False:
 68 |             data_1.append(d)
 69 |             timestamps_1.append(t)
 70 |         else:
 71 |             data_2.append(d)
 72 |             timestamps_2.append(t)
 73 |     return (np.asarray(data_1), timestamps_1), (np.asarray(data_2), timestamps_2)
 74 | 
 75 | 
 76 | def timeseries2seqs(data, timestamps, length=3, T=48):
 77 |     raw_ts = copy(timestamps)
 78 |     if type(timestamps[0]) != pd.Timestamp:
 79 |         timestamps = string2timestamp(timestamps, T=T)
 80 | 
 81 |     offset = pd.DateOffset(minutes=24 * 60 // T)
 82 | 
 83 |     breakpoints = [0]
 84 |     for i in range(1, len(timestamps)):
 85 |         if timestamps[i-1] + offset != timestamps[i]:
 86 |             print(timestamps[i-1], timestamps[i], raw_ts[i-1], raw_ts[i])
 87 |             breakpoints.append(i)
 88 |     breakpoints.append(len(timestamps))
 89 |     X = []
 90 |     Y = []
 91 |     for b in range(1, len(breakpoints)):
 92 |         print('breakpoints: ', breakpoints[b-1], breakpoints[b])
 93 |         idx = range(breakpoints[b-1], breakpoints[b])
 94 |         for i in range(len(idx) - length):
 95 |             x = np.vstack(data[idx[i:i+length]])
 96 |             y = data[idx[i+length]]
 97 |             X.append(x)
 98 |             Y.append(y)
 99 |     X = np.asarray(X)
100 |     Y = np.asarray(Y)
101 |     print("X shape: ", X.shape, "Y shape:", Y.shape)
102 |     return X, Y
103 | 
104 | def timeseries2seqs_meta(data, timestamps, length=3, T=48):
105 |     raw_ts = copy(timestamps)
106 |     if type(timestamps[0]) != pd.Timestamp:
107 |         timestamps = string2timestamp(timestamps, T=T)
108 | 
109 |     offset = pd.DateOffset(minutes=24 * 60 // T)
110 | 
111 |     breakpoints = [0]
112 |     for i in range(1, len(timestamps)):
113 |         if timestamps[i-1] + offset != timestamps[i]:
114 |             print(timestamps[i-1], timestamps[i], raw_ts[i-1], raw_ts[i])
115 |             breakpoints.append(i)
116 |     breakpoints.append(len(timestamps))
117 |     X = []
118 |     Y = []
119 |     avail_timestamps = []
120 |     for b in range(1, len(breakpoints)):
121 |         print('breakpoints: ', breakpoints[b-1], breakpoints[b])
122 |         idx = range(breakpoints[b-1], breakpoints[b])
123 |         for i in range(len(idx) - length):
124 |             avail_timestamps.append(raw_ts[idx[i+length]])
125 |             x = np.vstack(data[idx[i:i+length]])
126 |             y = data[idx[i+length]]
127 |             X.append(x)
128 |             Y.append(y)
129 |     X = np.asarray(X)
130 |     Y = np.asarray(Y)
131 |     print("X shape: ", X.shape, "Y shape:", Y.shape)
132 |     return X, Y, avail_timestamps
133 | 
134 | 
135 | def timeseries2seqs_peroid_trend(data, timestamps, length=3, T=48, peroid=pd.DateOffset(days=7), peroid_len=2):
136 |     raw_ts = copy(timestamps)
137 |     if type(timestamps[0]) != pd.Timestamp:
138 |         timestamps = string2timestamp(timestamps, T=T)
139 | 
140 |     # timestamps index
141 |     timestamp_idx = dict()
142 |     for i, t in enumerate(timestamps):
143 |         timestamp_idx[t] = i
144 | 
145 |     offset = pd.DateOffset(minutes=24 * 60 // T)
146 | 
147 |     breakpoints = [0]
148 |     for i in range(1, len(timestamps)):
149 |         if timestamps[i-1] + offset != timestamps[i]:
150 |             print(timestamps[i-1], timestamps[i], raw_ts[i-1], raw_ts[i])
151 |             breakpoints.append(i)
152 |     breakpoints.append(len(timestamps))
153 |     X = []
154 |     Y = []
155 |     for b in range(1, len(breakpoints)):
156 |         print('breakpoints: ', breakpoints[b-1], breakpoints[b])
157 |         idx = range(breakpoints[b-1], breakpoints[b])
158 |         for i in range(len(idx) - length):
159 |             # period
160 |             target_timestamp = timestamps[i+length]
161 | 
162 |             legal_idx = []
163 |             for pi in range(1, 1+peroid_len):
164 |                 if target_timestamp - peroid * pi not in timestamp_idx:
165 |                     break
166 |                 legal_idx.append(timestamp_idx[target_timestamp - peroid * pi])
167 |             # print("len: ", len(legal_idx), peroid_len)
168 |             if len(legal_idx) != peroid_len:
169 |                 continue
170 | 
171 |             legal_idx += idx[i:i+length]
172 | 
173 |             # trend
174 |             x = np.vstack(data[legal_idx])
175 |             y = data[idx[i+length]]
176 |             X.append(x)
177 |             Y.append(y)
178 |     X = np.asarray(X)
179 |     Y = np.asarray(Y)
180 |     print("X shape: ", X.shape, "Y shape:", Y.shape)
181 |     return X, Y
182 | 
183 | 
184 | def timeseries2seqs_3D(data, timestamps, length=3, T=48):
185 |     raw_ts = copy(timestamps)
186 |     if type(timestamps[0]) != pd.Timestamp:
187 |         timestamps = string2timestamp(timestamps, T=T)
188 | 
189 |     offset = pd.DateOffset(minutes=24 * 60 // T)
190 | 
191 |     breakpoints = [0]
192 |     for i in range(1, len(timestamps)):
193 |         if timestamps[i-1] + offset != timestamps[i]:
194 |             print(timestamps[i-1], timestamps[i], raw_ts[i-1], raw_ts[i])
195 |             breakpoints.append(i)
196 |     breakpoints.append(len(timestamps))
197 |     X = []
198 |     Y = []
199 |     for b in range(1, len(breakpoints)):
200 |         print('breakpoints: ', breakpoints[b-1], breakpoints[b])
201 |         idx = range(breakpoints[b-1], breakpoints[b])
202 |         for i in range(len(idx) - length):
203 |             x = data[idx[i:i+length]].reshape(-1, length, 32, 32)
204 |             y = np.asarray([data[idx[i+length]]]).reshape(-1, 1, 32, 32)
205 |             X.append(x)
206 |             Y.append(y)
207 |     X = np.asarray(X)
208 |     Y = np.asarray(Y)
209 |     print("X shape: ", X.shape, "Y shape:", Y.shape)
210 |     return X, Y
211 | 
212 | 
213 | def bug_timeseries2seqs(data, timestamps, length=3, T=48):
214 |     # have a bug
215 |     if type(timestamps[0]) != pd.Timestamp:
216 |         timestamps = string2timestamp(timestamps, T=T)
217 | 
218 |     offset = pd.DateOffset(minutes=24 * 60 // T)
219 | 
220 |     breakpoints = [0]
221 |     for i in range(1, len(timestamps)):
222 |         if timestamps[i-1] + offset != timestamps[i]:
223 |             breakpoints.append(i)
224 |     X = []
225 |     Y = []
226 |     for b in range(1, len(breakpoints)):
227 |         print('breakpoints: ', breakpoints[b-1], breakpoints[b])
228 |         idx = range(breakpoints[b-1], breakpoints[b])
229 |         for i in range(len(idx) - 3):
230 |             x = np.vstack(data[idx[i:i+3]])
231 |             y = data[idx[i+3]]
232 |             X.append(x)
233 |             Y.append(y)
234 |     X = np.asarray(X)
235 |     Y = np.asarray(Y)
236 |     print("X shape: ", X.shape, "Y shape:", Y.shape)
237 |     return X, Y
238 | 


--------------------------------------------------------------------------------
/deepst/preprocessing/minmax_normalization.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     MinMaxNormalization
 3 | """
 4 | from __future__ import print_function
 5 | import numpy as np
 6 | np.random.seed(1337)  # for reproducibility
 7 | 
 8 | 
 9 | class MinMaxNormalization(object):
10 |     '''MinMax Normalization --> [-1, 1]
11 |        x = (x - min) / (max - min).
12 |        x = x * 2 - 1
13 |     '''
14 | 
15 |     def __init__(self):
16 |         pass
17 | 
18 |     def fit(self, X):
19 |         self._min = X.min()
20 |         self._max = X.max()
21 |         print("min:", self._min, "max:", self._max)
22 | 
23 |     def transform(self, X):
24 |         X = 1. * (X - self._min) / (self._max - self._min)
25 |         X = X * 2. - 1.
26 |         return X
27 | 
28 |     def fit_transform(self, X):
29 |         self.fit(X)
30 |         return self.transform(X)
31 | 
32 |     def inverse_transform(self, X):
33 |         X = (X + 1.) / 2.
34 |         X = 1. * X * (self._max - self._min) + self._min
35 |         return X
36 | 
37 | 
38 | class MinMaxNormalization_01(object):
39 |     '''MinMax Normalization --> [0, 1]
40 |        x = (x - min) / (max - min).
41 |     '''
42 | 
43 |     def __init__(self):
44 |         pass
45 | 
46 |     def fit(self, X):
47 |         self._min = X.min()
48 |         self._max = X.max()
49 |         print("min:", self._min, "max:", self._max)
50 | 
51 |     def transform(self, X):
52 |         X = 1. * (X - self._min) / (self._max - self._min)
53 |         return X
54 | 
55 |     def fit_transform(self, X):
56 |         self.fit(X)
57 |         return self.transform(X)
58 | 
59 |     def inverse_transform(self, X):
60 |         X = 1. * X * (self._max - self._min) + self._min
61 |         return X
62 | 


--------------------------------------------------------------------------------
/deepst/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import pandas as pd
 3 | from datetime import datetime, timedelta
 4 | import time
 5 | import os
 6 | 
 7 | 
 8 | def timestamp_str_new(cur_timestampes, T=48):
 9 |     os.environ['TZ'] = 'Asia/Shanghai'
10 |     # print cur_timestampes
11 |     if '-' in cur_timestampes[0]:
12 |         return cur_timestampes
13 |     ret = []
14 |     for v in cur_timestampes:
15 |         '''TODO
16 |         Bug here
17 |         '''
18 |         cur_sec = time.mktime(time.strptime("%04i-%02i-%02i" % (int(v[:4]), int(v[4:6]), int(v[6:8])), "%Y-%m-%d")) + (int(v[8:]) * 24. * 60 * 60 // T)
19 |         curr = time.localtime(cur_sec)
20 |         if v == "20151101288" or v == "2015110124":
21 |             print(v, time.strftime("%Y-%m-%d-%H-%M", curr), time.localtime(cur_sec), time.localtime(cur_sec - (int(v[8:]) * 24. * 60 * 60 // T)), time.localtime(cur_sec - (int(v[8:]) * 24. * 60 * 60 // T) + 3600 * 25))
22 |         ret.append(time.strftime("%Y-%m-%d-%H-%M", curr))
23 |     return ret
24 | 
25 | 
26 | def string2timestamp_future(strings, T=48):
27 |     strings = timestamp_str_new(strings, T)
28 |     timestamps = []
29 |     for v in strings:
30 |         year, month, day, hour, tm_min = [int(z) for z in v.split('-')]
31 |         timestamps.append(pd.Timestamp(datetime(year, month, day, hour, tm_min)))
32 | 
33 |     return timestamps
34 | 
35 | 
36 | def string2timestamp(strings, T=48):
37 |     timestamps = []
38 | 
39 |     time_per_slot = 24.0 / T
40 |     num_per_T = T // 24
41 |     for t in strings:
42 |         year, month, day, slot = int(t[:4]), int(t[4:6]), int(t[6:8]), int(t[8:])-1
43 |         timestamps.append(pd.Timestamp(datetime(year, month, day, hour=int(slot * time_per_slot), minute=(slot % num_per_T) * int(60.0 * time_per_slot))))
44 | 
45 |     return timestamps
46 | 
47 | 
48 | def timestamp2string(timestamps, T=48):
49 |     # timestamps = timestamp_str_new(timestamps)
50 |     num_per_T = T // 24
51 |     return ["%s%02i" % (ts.strftime('%Y%m%d'),
52 |                         int(1+ts.to_datetime().hour*num_per_T+ts.to_datetime().minute/(60 // num_per_T))) for ts in timestamps]
53 |     # int(1+ts.to_datetime().hour*2+ts.to_datetime().minute/30)) for ts in timestamps]
54 | 


--------------------------------------------------------------------------------
/deepst/utils/eval.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from __future__ import print_function
 3 | import numpy as np
 4 | # np.random.seed(1337)  # for reproducibility
 5 | 
 6 | 
 7 | def rmse(Y_true, Y_pred):
 8 |     # https://www.kaggle.com/wiki/RootMeanSquaredError
 9 |     from sklearn.metrics import mean_squared_error
10 |     print('shape:', Y_true.shape, Y_pred.shape)
11 |     print("===RMSE===")
12 |     # in
13 |     RMSE = mean_squared_error(Y_true[:, 0].flatten(), Y_pred[:, 0].flatten())**0.5
14 |     print('inflow: ', RMSE)
15 |     # out
16 |     if Y_true.shape[1] > 1:
17 |         RMSE = mean_squared_error(Y_true[:, 1].flatten(), Y_pred[:, 1].flatten())**0.5
18 |         print('outflow: ', RMSE)
19 |     # new
20 |     if Y_true.shape[1] > 2:
21 |         RMSE = mean_squared_error(Y_true[:, 2].flatten(), Y_pred[:, 2].flatten())**0.5
22 |         print('newflow: ', RMSE)
23 |     # end
24 |     if Y_true.shape[1] > 3:
25 |         RMSE = mean_squared_error(Y_true[:, 3].flatten(), Y_pred[:, 3].flatten())**0.5
26 |         print('endflow: ', RMSE)
27 | 
28 |     RMSE = mean_squared_error(Y_true.flatten(), Y_pred.flatten())**0.5
29 |     print("total rmse: ", RMSE)
30 |     print("===RMSE===")
31 |     return RMSE
32 | 
33 | 
34 | def mean_absolute_percentage_error(y_true, y_pred):
35 |     idx = np.nonzero(y_true)
36 |     return np.mean(np.abs((y_true[idx] - y_pred[idx]) / y_true[idx])) * 100
37 | 
38 | 
39 | def mape(Y_true, Y_pred):
40 |     print("===MAPE===")
41 |     # in
42 |     MAPE = mean_absolute_percentage_error(Y_true[:, 0].flatten(), Y_pred[:, 0].flatten())
43 |     print("inflow: ", MAPE)
44 |     # out
45 |     MAPE = mean_absolute_percentage_error(Y_true[:, 1].flatten(), Y_pred[:, 1].flatten())
46 |     print("outflow: ", MAPE)
47 |     # new
48 |     MAPE = mean_absolute_percentage_error(Y_true[:, 2].flatten(), Y_pred[:, 2].flatten())
49 |     print("newflow: ", MAPE)
50 |     # end
51 |     MAPE = mean_absolute_percentage_error(Y_true[:, 3].flatten(), Y_pred[:, 3].flatten())
52 |     print("endflow: ", MAPE)
53 |     MAPE = mean_absolute_percentage_error(Y_true.flatten(), Y_pred.flatten())
54 |     print("total mape: ", MAPE)
55 |     print("===MAPE===")
56 |     return MAPE
57 | 


--------------------------------------------------------------------------------
/deepst/utils/evalMultiStepAhead.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import sys
  3 | from deepst_flow.models.gan import generator_model
  4 | from deepst_flow.datasets import load_stdata
  5 | from deepst_flow.preprocessing import MinMaxNormalization
  6 | from deepst_flow.preprocessing import remove_incomplete_days
  7 | # import h5py
  8 | import numpy as np
  9 | from keras.optimizers import Adam
 10 | import os
 11 | # from keras.callbacks import EarlyStopping
 12 | import cPickle as pickle
 13 | import time
 14 | import pandas as pd
 15 | from copy import copy
 16 | from deepst_flow.config import Config
 17 | from deepst_flow.datasets.STMatrix import STMatrix
 18 | from deepst_flow.utils.eval import rmse
 19 | np.random.seed(1337)  # for reproducibility
 20 | DATAPATH = Config().DATAPATH
 21 | print(DATAPATH)
 22 | 
 23 | 
 24 | def period_trend(period=1, trend=1):
 25 |     model_name = sys.argv[1]
 26 |     steps = 24
 27 |     Period = 7
 28 | 
 29 |     T = 48  # lenofday
 30 |     len_seq = 3
 31 |     nb_flow = 4
 32 |     nb_days = 120
 33 |     # divide data into two subsets:
 34 |     # Train: ~ 2015.06.21 & Test: 2015.06.22 ~ 2015.06.28
 35 |     len_train = T * (nb_days - 7)
 36 |     len_test = T * 7
 37 | 
 38 |     data, timestamps = load_stdata(os.path.join(DATAPATH, 'traffic_flow_bj15_nomissing.h5'))
 39 |     print(timestamps)
 40 |     # remove a certain day which has not 48 timestamps
 41 |     data, timestamps = remove_incomplete_days(data, timestamps, T)
 42 | 
 43 |     # minmax_scale
 44 |     data_train = data[:len_train]
 45 |     mmn = MinMaxNormalization()
 46 |     mmn.fit(data_train)
 47 |     data = mmn.transform(data)
 48 | 
 49 |     st = STMatrix(data, timestamps, T)
 50 | 
 51 |     # save TCN and MMS
 52 |     fpkl = open('preprocessing.pkl', 'wb')
 53 |     for obj in [mmn]:  # [tcn, mmn]:
 54 |         pickle.dump(obj, fpkl)
 55 |     fpkl.close()
 56 | 
 57 |     if period == 1 and trend == 1:
 58 |         depends = [1, 2, 3, Period*T, Period*T+1, Period*T+2, Period*T+3]
 59 |         len_close = 3
 60 |     elif period == 1:
 61 |         depends = [1] + [Period * T * j for j in xrange(1, len_seq+1)]
 62 |         len_close = 1
 63 |     elif trend == 1:
 64 |         depends = range(1, 1+len_seq)
 65 |         len_close = 3
 66 |     else:
 67 |         depends = [1]
 68 |         len_close = 1
 69 |     # else:
 70 |     #    print("unknown args")
 71 |     #    sys.exit(-1)
 72 | 
 73 |     generator = generator_model(nb_flow, len(depends), 32, 32)
 74 |     adam = Adam()
 75 |     generator.compile(loss='mean_absolute_error', optimizer=adam)
 76 |     generator.load_weights(model_name)
 77 | 
 78 |     # instance-based dataset --> sequences with format as (X, Y) where X is a sequence of images and Y is an image.
 79 |     offset_frame = pd.DateOffset(minutes=24 * 60 // T)
 80 |     Y_test = st.data[-(len_test+steps-1):]
 81 |     Y_pd_timestamps = st.pd_timestamps[-(len_test+steps-1):]
 82 | 
 83 |     X_test = []
 84 |     for pd_timestamp in Y_pd_timestamps:
 85 |         x = [st.get_matrix(pd_timestamp - j * offset_frame) for j in depends]
 86 |         X_test.append(np.vstack(x))
 87 |     X_test = np.asarray(X_test)
 88 | 
 89 |     Y_true = mmn.inverse_transform(Y_test[-len_test:])
 90 | 
 91 |     Y_hats = []
 92 |     for k in xrange(1, steps+1):
 93 |         print("\n\n==%d-step rmse==" % k)
 94 |         ts = time.time()
 95 |         Y_hat = generator.predict(X_test)
 96 |         Y_hats.append(copy(Y_hat))
 97 |         print('Y_hat shape', Y_hat.shape, 'X_test shape:', X_test.shape)
 98 |         # eval
 99 |         Y_pred = mmn.inverse_transform(Y_hat[-len_test:])
100 |         rmse(Y_true, Y_pred)
101 |         X_test_hat = copy(X_test[1:])
102 |         for j in xrange(1, min(k, len_close) + 1):
103 |             # Y^\hat _t replace
104 |             X_test_hat[:, nb_flow*(j-1):nb_flow*j] = Y_hats[-j][:-j]
105 | 
106 |         X_test = copy(X_test_hat)
107 |         print("\nelapsed time (eval): ", time.time() - ts)
108 | 
109 | 
110 | def period_trend_closeness(len_closeness=3, len_trend=3, TrendInterval=7, len_period=3, PeriodInterval=1):
111 |     print("start: period_trend_closeness")
112 |     model_name = sys.argv[1]
113 |     steps = 24
114 |     # Period = 7
115 | 
116 |     T = 48  # lenofday
117 |     # len_seq = 3
118 |     nb_flow = 4
119 |     nb_days = 120
120 |     # divide data into two subsets:
121 |     # Train: ~ 2015.06.21 & Test: 2015.06.22 ~ 2015.06.28
122 |     len_train = T * (nb_days - 7)
123 |     len_test = T * 7
124 | 
125 |     data, timestamps = load_stdata(os.path.join(DATAPATH, 'traffic_flow_bj15_nomissing.h5'))
126 |     print(timestamps)
127 |     # remove a certain day which has not 48 timestamps
128 |     data, timestamps = remove_incomplete_days(data, timestamps, T)
129 | 
130 |     # minmax_scale
131 |     data_train = data[:len_train]
132 |     mmn = MinMaxNormalization()
133 |     mmn.fit(data_train)
134 |     data = mmn.transform(data)
135 | 
136 |     st = STMatrix(data, timestamps, T)
137 | 
138 |     # save TCN and MMS
139 |     fpkl = open('preprocessing.pkl', 'wb')
140 |     for obj in [mmn]:  # [tcn, mmn]:
141 |         pickle.dump(obj, fpkl)
142 |     fpkl.close()
143 | 
144 |     depends = range(1, len_closeness+1) + \
145 |         [PeriodInterval * T * j for j in xrange(1, len_period+1)] + \
146 |         [TrendInterval * T * j for j in xrange(1, len_trend+1)]
147 | 
148 |     generator = generator_model(nb_flow, len(depends), 32, 32)
149 |     adam = Adam()
150 |     generator.compile(loss='mean_absolute_error', optimizer=adam)
151 |     generator.load_weights(model_name)
152 | 
153 |     # instance-based dataset --> sequences with format as (X, Y) where X is a sequence of images and Y is an image.
154 |     offset_frame = pd.DateOffset(minutes=24 * 60 // T)
155 |     Y_test = st.data[-(len_test+steps-1):]
156 |     Y_pd_timestamps = st.pd_timestamps[-(len_test+steps-1):]
157 | 
158 |     X_test = []
159 |     for pd_timestamp in Y_pd_timestamps:
160 |         x = [st.get_matrix(pd_timestamp - j * offset_frame) for j in depends]
161 |         X_test.append(np.vstack(x))
162 |     X_test = np.asarray(X_test)
163 | 
164 |     Y_true = mmn.inverse_transform(Y_test[-len_test:])
165 | 
166 |     Y_hats = []
167 |     for k in xrange(1, steps+1):
168 |         print("\n\n==%d-step rmse==" % k)
169 |         ts = time.time()
170 |         Y_hat = generator.predict(X_test)
171 |         Y_hats.append(copy(Y_hat))
172 |         print('Y_hat shape', Y_hat.shape, 'X_test shape:', X_test.shape)
173 |         # eval
174 |         Y_pred = mmn.inverse_transform(Y_hat[-len_test:])
175 |         rmse(Y_true, Y_pred)
176 |         X_test_hat = copy(X_test[1:])
177 |         for j in xrange(1, min(k, len_closeness) + 1):
178 |             # Y^\hat _t replace
179 |             X_test_hat[:, nb_flow*(j-1):nb_flow*j] = Y_hats[-j][:-j]
180 | 
181 |         X_test = copy(X_test_hat)
182 |         print("\nelapsed time (eval): ", time.time() - ts)
183 | 
184 | if __name__ == '__main__':
185 |     if int(sys.argv[2]) == 0:  # period & trend
186 |         period_trend(1, 1)
187 |     elif int(sys.argv[2]) == 1:  # period
188 |         period_trend(1, 0)
189 |     elif int(sys.argv[2]) == 2:  # trend
190 |         period_trend(0, 1)
191 |     elif int(sys.argv[2]) == 3:
192 |         period_trend(0, 0)
193 |     else:
194 |         period_trend_closeness()
195 |         # print("unknown args")
196 |         # sys.exit(-1)
197 | 


--------------------------------------------------------------------------------
/deepst/utils/evalMultiStepAhead4SeqModel.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import sys
  3 | from deepst_flow.models.gan import generator_model
  4 | from deepst_flow.datasets import load_stdata
  5 | from deepst_flow.preprocessing import MinMaxNormalization
  6 | from deepst_flow.preprocessing import remove_incomplete_days
  7 | # import h5py
  8 | import numpy as np
  9 | from keras.optimizers import Adam
 10 | import os
 11 | # from keras.callbacks import EarlyStopping
 12 | import cPickle as pickle
 13 | import time
 14 | import pandas as pd
 15 | from copy import copy
 16 | from deepst_flow.config import Config
 17 | from deepst_flow.datasets.STMatrix import STMatrix
 18 | from deepst_flow.utils.eval import rmse
 19 | from deepst_flow.models.rnn import rnn_model
 20 | np.random.seed(1337)  # for reproducibility
 21 | DATAPATH = Config().DATAPATH
 22 | print(DATAPATH)
 23 | 
 24 | 
 25 | def seq_model(len_seq=3):
 26 |     model_name = sys.argv[1]
 27 |     steps = 24
 28 |     # Period = 7
 29 | 
 30 |     T = 48  # lenofday
 31 |     nb_flow = 4
 32 |     nb_days = 120
 33 |     # divide data into two subsets:
 34 |     # Train: ~ 2015.06.21 & Test: 2015.06.22 ~ 2015.06.28
 35 |     len_train = T * (nb_days - 7)
 36 |     len_test = T * 7
 37 | 
 38 |     data, timestamps = load_stdata(os.path.join(DATAPATH, 'traffic_flow_bj15_nomissing.h5'))
 39 |     print(timestamps)
 40 |     # remove a certain day which has not 48 timestamps
 41 |     data, timestamps = remove_incomplete_days(data, timestamps, T)
 42 |     data = data.reshape(data.shape[0], -1)
 43 | 
 44 |     # minmax_scale
 45 |     data_train = data[:len_train]
 46 |     mmn = MinMaxNormalization()
 47 |     mmn.fit(data_train)
 48 |     data = mmn.transform(data)
 49 | 
 50 |     st = STMatrix(data, timestamps, T)
 51 | 
 52 |     # save TCN and MMS
 53 |     fpkl = open('preprocessing.pkl', 'wb')
 54 |     for obj in [mmn]:  # [tcn, mmn]:
 55 |         pickle.dump(obj, fpkl)
 56 |     fpkl.close()
 57 | 
 58 |     depends = range(1, 1+len_seq)
 59 |     generator = rnn_model(nb_flow, len(depends), 32, 32)
 60 |     adam = Adam()
 61 |     generator.compile(loss='mean_absolute_error', optimizer=adam)
 62 |     generator.load_weights(model_name)
 63 | 
 64 |     # instance-based dataset --> sequences with format as (X, Y) where X is a sequence of images and Y is an image.
 65 |     offset_frame = pd.DateOffset(minutes=24 * 60 // T)
 66 |     Y_test = st.data[-(len_test+steps-1):]
 67 |     Y_pd_timestamps = st.pd_timestamps[-(len_test+steps-1):]
 68 | 
 69 |     X_test = []
 70 |     for pd_timestamp in Y_pd_timestamps:
 71 |         x = [st.get_matrix(pd_timestamp - j * offset_frame) for j in depends]
 72 |         X_test.append(np.vstack(x))
 73 |     X_test = np.asarray(X_test)
 74 | 
 75 |     Y_true = mmn.inverse_transform(Y_test[-len_test:])
 76 |     Y_true = Y_true.reshape(len(Y_true), nb_flow, -1)
 77 | 
 78 |     Y_hats = []
 79 |     for k in xrange(1, steps+1):
 80 |         print("\n\n==%d-step rmse==" % k)
 81 |         ts = time.time()
 82 |         Y_hat = generator.predict(X_test)
 83 |         Y_hats.append(copy(Y_hat))
 84 |         print('Y_hat shape', Y_hat.shape, 'X_test shape:', X_test.shape)
 85 |         # eval
 86 |         Y_pred = mmn.inverse_transform(Y_hat[-len_test:])
 87 |         Y_pred = Y_pred.reshape(len(Y_pred), nb_flow, -1)
 88 |         rmse(Y_true, Y_pred)
 89 |         X_test_hat = copy(X_test[1:])
 90 |         for j in xrange(1, min(k, len_seq) + 1):
 91 |             # Y^\hat _t replace
 92 |             X_test_hat[:, j-1] = Y_hats[-j][:-j]
 93 | 
 94 |         X_test = copy(X_test_hat)
 95 |         print("\nelapsed time (eval): ", time.time() - ts)
 96 | 
 97 | if __name__ == '__main__':
 98 |     if int(sys.argv[2]) > 0:
 99 |         seq_model(len_seq=int(sys.argv[2]))
100 |     else:
101 |         print("unknown args")
102 |         sys.exit(-1)
103 | 


--------------------------------------------------------------------------------
/deepst/utils/evalMultiStepAheadNew.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import sys
  3 | from deepst_flow.models.STConvolution import seqCNN_CPT2
  4 | from deepst_flow.datasets import load_stdata
  5 | from deepst_flow.preprocessing import MinMaxNormalization
  6 | from deepst_flow.preprocessing import remove_incomplete_days
  7 | # import h5py
  8 | import numpy as np
  9 | from keras.optimizers import Adam
 10 | import os
 11 | # from keras.callbacks import EarlyStopping
 12 | import cPickle as pickle
 13 | import time
 14 | import pandas as pd
 15 | from copy import copy
 16 | from deepst_flow.config import Config
 17 | from deepst_flow.datasets.STMatrix import STMatrix
 18 | from deepst_flow.utils.eval import rmse
 19 | np.random.seed(1337)  # for reproducibility
 20 | DATAPATH = Config().DATAPATH
 21 | print(DATAPATH)
 22 | 
 23 | 
 24 | def period_trend_closeness(len_closeness=3, len_trend=3, TrendInterval=7, len_period=3, PeriodInterval=1):
 25 |     print("start: period_trend_closeness")
 26 |     model_name = sys.argv[1]
 27 |     steps = 24
 28 |     # Period = 7
 29 | 
 30 |     T = 48  # lenofday
 31 |     # len_seq = 3
 32 |     nb_flow = 2
 33 |     # nb_days = 120
 34 |     # divide data into two subsets:
 35 |     # Train: ~ 2015.06.21 & Test: 2015.06.22 ~ 2015.06.28
 36 |     # len_train = T * (nb_days - 7)
 37 |     len_test = T * 7
 38 | 
 39 |     data, timestamps = load_stdata(os.path.join(DATAPATH, 'traffic_flow_bj15_nomissing.h5'))
 40 |     print(timestamps)
 41 |     # remove a certain day which has not 48 timestamps
 42 |     data, timestamps = remove_incomplete_days(data, timestamps, T)
 43 |     data = data[:, :nb_flow]
 44 |     # minmax_scale
 45 |     data_train = data[-len_test:]
 46 |     mmn = MinMaxNormalization()
 47 |     mmn.fit(data_train)
 48 |     data = mmn.transform(data)
 49 | 
 50 |     st = STMatrix(data, timestamps, T)
 51 | 
 52 |     # save TCN and MMS
 53 |     fpkl = open('preprocessing.pkl', 'wb')
 54 |     for obj in [mmn]:  # [tcn, mmn]:
 55 |         pickle.dump(obj, fpkl)
 56 |     fpkl.close()
 57 | 
 58 |     depends = [range(1, len_closeness+1),
 59 |                [PeriodInterval * T * j for j in xrange(1, len_period+1)],
 60 |                [TrendInterval * T * j for j in xrange(1, len_trend+1)]]
 61 |     if len_closeness > 0:
 62 |         c_conf = (nb_flow, len_closeness, 32, 32)
 63 |     else:
 64 |         c_conf = None
 65 |     if len_period > 0:
 66 |         p_conf = (nb_flow, len_period, 32, 32)
 67 |     else:
 68 |         p_conf = None
 69 |     if len_trend > 0:
 70 |         t_conf = (nb_flow, len_trend, 32, 32)
 71 |     else:
 72 |         t_conf = None
 73 |     generator = seqCNN_CPT2(c_conf=c_conf, p_conf=p_conf, t_conf=t_conf)
 74 | 
 75 |     adam = Adam()
 76 |     generator.compile(loss='mean_absolute_error', optimizer=adam)
 77 |     generator.load_weights(model_name)
 78 | 
 79 |     # instance-based dataset --> sequences with format as (X, Y) where X is a sequence of images and Y is an image.
 80 |     offset_frame = pd.DateOffset(minutes=24 * 60 // T)
 81 |     Y_test = st.data[-(len_test+steps-1):]
 82 |     Y_pd_timestamps = st.pd_timestamps[-(len_test+steps-1):]
 83 | 
 84 |     XC = []
 85 |     XP = []
 86 |     XT = []
 87 |     for pd_timestamp in Y_pd_timestamps:
 88 |         # x = [st.get_matrix(pd_timestamp - j * offset_frame) for j in depends]
 89 |         # X_test.append(np.vstack(x))
 90 |         x_c = [st.get_matrix(pd_timestamp - j * offset_frame) for j in depends[0]]
 91 |         x_p = [st.get_matrix(pd_timestamp - j * offset_frame) for j in depends[1]]
 92 |         x_t = [st.get_matrix(pd_timestamp - j * offset_frame) for j in depends[2]]
 93 |         if len_closeness > 0:
 94 |             XC.append(np.vstack(x_c))
 95 |         if len_period > 0:
 96 |             XP.append(np.vstack(x_p))
 97 |         if len_trend > 0:
 98 |             XT.append(np.vstack(x_t))
 99 |     if len_closeness > 0:
100 |         XC = np.asarray(XC)
101 |     if len_period > 0:
102 |         XP = np.asarray(XP)
103 |     if len_trend > 0:
104 |         XT = np.asarray(XT)
105 |     print(XC.shape, XP.shape, XT.shape)
106 | 
107 |     # X_test = np.asarray(X_test)
108 |     XAll = []
109 |     for l, X_ in zip([len_closeness, len_period, len_trend], [XC, XP, XT]):
110 |         if l > 0:
111 |             XAll.append(X_)
112 | 
113 |     Y_true = mmn.inverse_transform(Y_test[-len_test:])
114 |     Y_hats = []
115 | 
116 |     # for i in xrange(len(XAll[0])):
117 |     #    x = []
118 |     #    for _X in XAll:
119 |     #        x.append([_X[i]])
120 | 
121 |     for k in xrange(1, steps+1):
122 |         print("\n\n==%d-step rmse==" % k)
123 |         ts = time.time()
124 |         # k^th predicted sequence
125 |         Y_hat = generator.predict(XAll)
126 |         Y_hats.append(copy(Y_hat))
127 |         print('Y_hat shape', Y_hat.shape)
128 |         # eval
129 |         Y_pred = mmn.inverse_transform(Y_hat[-len_test:])
130 |         rmse(Y_true, Y_pred)
131 |         X_hat = []
132 |         for _X in XAll:
133 |             X_hat.append(copy(_X[1:]))
134 |         # X_hat = [XC[1:], XP[1:], XT[1:]]  # copy(X_test[1:])
135 | 
136 |         '''
137 |         # for j in xrange(len_closeness-1, 0):
138 |         for j in xrange(1, min(k, len_closeness) + 1):
139 |             # last sequence -j
140 |             if j == 1:
141 |                 X_hat[0][:, -1 * nb_flow:] = Y_hats[-j][:-j]
142 |             else:
143 |                 X_hat[0][:, nb_flow*(-j):nb_flow*(-j+1)] = Y_hats[-j][:-j]
144 |         '''
145 | 
146 |         XC_hat = X_hat[0]
147 |         len_replace = min(k, len_closeness)
148 | 
149 |         for j in xrange(len_replace):
150 |             # XC_hat[:, nb_flow*(j):nb_flow*(j+1)] = Y_hats[-(j+1)][:-(len_replace-j)]
151 |             XC_hat[:, nb_flow*(j):nb_flow*(j+1)] = Y_hats[-(j+1)][:-(j+1)]
152 |             # XC_hat[:, nb_flow*(j):nb_flow*(j+1)] = Y_hats[j][:-(j+1)]
153 |         # for j in xrange(1, + 1):
154 |         #    XC_hat[:, ] =
155 | 
156 |         # for j in xrange(1, min(k, len_closeness) + 1):
157 |             # Y^\hat _t replace
158 |         #    X_hat[0][:, nb_flow*(j-1):nb_flow*j] = Y_hats[-j][:-j]
159 | 
160 |         XAll = X_hat
161 |         print("\nelapsed time (eval): ", time.time() - ts)
162 | 
163 | if __name__ == '__main__':
164 |     period_trend_closeness(len_closeness=int(sys.argv[2]), len_period=int(sys.argv[3]), len_trend=int(sys.argv[4]))
165 | 


--------------------------------------------------------------------------------
/deepst/utils/fill_missing_vals.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     Usage: python fill_missing_vals.py [fname] [preprocessing_name] [model_name]
 3 | """
 4 | from __future__ import print_function
 5 | import sys
 6 | # sys.path.append("/home/azureuser/workspace/deepst_flow")
 7 | 
 8 | from deepst_flow.models.gan import generator_model
 9 | # from deepst_flow.datasets import load_bj15
10 | from deepst_flow.datasets import load_stdata
11 | from deepst_flow.preprocessing import TemporalConstrastNormalization, MinMaxNormalization
12 | from deepst_flow.preprocessing import remove_incomplete_days, split_by_time, timeseries2seqs
13 | import h5py
14 | import numpy as np
15 | 
16 | from keras.optimizers import Adam
17 | import os
18 | from keras.callbacks import EarlyStopping
19 | import cPickle as pickle
20 | import time
21 | import sys
22 | import pandas as pd
23 | 
24 | np.random.seed(1337)  # for reproducibility
25 | from deepst_flow.config import Config
26 | DATAPATH = Config().DATAPATH
27 | 
28 | if len(sys.argv) != 4:
29 |     print(__doc__)
30 |     sys.exit(-1)
31 | 
32 | fname = sys.argv[1]
33 | data, timestamps = load_stdata(os.path.join(DATAPATH, '{}.h5'.format(fname)))
34 | 
35 | T = 48
36 | slot_time = 24. * 60 / 48
37 | # setting
38 | nb_flow = 2
39 | seq_len = 3
40 | 
41 | data = data[:, :nb_flow]
42 | 
43 | preprocessing_name = sys.argv[2]
44 | model_name = sys.argv[3]
45 | 
46 | # load TCN and MMS
47 | fpkl = open(preprocessing_name, 'rb')
48 | mmn = pickle.load(fpkl)
49 | print("Load Normalization Successfully: ", mmn)
50 | 
51 | # load model
52 | generator = generator_model(nb_flow, seq_len, 32, 32)
53 | adam = Adam(lr=0.0001)
54 | generator.compile(loss='mean_absolute_error', optimizer=adam)
55 | generator.load_weights(model_name)
56 | print("Load Model Successfully")
57 | 
58 | # working
59 | data = mmn.transform(data)
60 | offset = pd.DateOffset(minutes=24 * 60 // T)
61 | 
62 | from deepst_flow.utils import string2timestamp, timestamp2string
63 | timestamps = string2timestamp(timestamps, T=T)
64 | 
65 | new_timestamps = timestamps[:seq_len]
66 | new_data = list(data[:seq_len])
67 | 
68 | i = seq_len
69 | 
70 | while i < len(timestamps):
71 |     if new_timestamps[-1] + offset == timestamps[i]:
72 |         new_timestamps.append(timestamps[i])
73 |         new_data.append(data[i])
74 |         i += 1
75 |     else:
76 |         print('insert: ', new_timestamps[-1] + offset)
77 |         new_timestamps.append(new_timestamps[-1] + offset)
78 |         x = np.vstack(new_data[-seq_len:])
79 |         x = x[np.newaxis]
80 |         Y_pred = generator.predict(x)[0]
81 |         new_data.append(Y_pred)
82 | 
83 | new_data = np.asarray(new_data)
84 | print("shape: ", new_data.shape, "len:", len(new_timestamps))
85 | new_data = mmn.inverse_transform(new_data)
86 | 
87 | h5 = h5py.File(os.path.join(DATAPATH, '{}_nomissing.h5'.format(fname)), 'w')
88 | h5.create_dataset('data', data=new_data)
89 | h5.create_dataset('date', data=timestamp2string(new_timestamps, T=48))


--------------------------------------------------------------------------------
/deepst/utils/runMe.bat:
--------------------------------------------------------------------------------
1 | 
2 | set PYTHON="D:\Users\junbzha\softwares\python"
3 | 
4 | call %PYTHON% preprocessing.py
5 | 
6 | pause


--------------------------------------------------------------------------------
/deepst/utils/txt2hdf5_InOut.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 
 3 | """
 4 | from __future__ import print_function
 5 | import h5py
 6 | import itertools
 7 | import sys
 8 | import os
 9 | import numpy as np
10 | np.random.seed(1337)  # for reproducibility
11 | import time
12 | from datetime import datetime, timedelta
13 | import pandas as pd
14 | import scipy.sparse as sps
15 | from deepst_flow.config import Config
16 | # DATAPATH = Config().DATAPATH
17 | 
18 | DATAPATH = "D:/Users/junbzha/data/traffic_flow"
19 | print(DATAPATH)
20 | 
21 | rootdir = "D:/Users/xiuwen/Project/TrajectoryMap/Data/32_30"
22 | grid_size = 32
23 | 
24 | def load_data_from_COO_fomat(input_path):
25 |     """timeslot,x,y,inCount,outCount,newCount,endCount"""
26 |     data=np.loadtxt(input_path, delimiter=',')
27 |     I=data[:,1] - 1 # x-axis
28 |     J=data[:,2] - 1 # y-axis
29 |     inflow, outflow = data[:,3], data[:,4]
30 |     inflow = sps.coo_matrix((inflow,(I,J)), shape=(grid_size, grid_size) ).toarray()
31 |     outflow = sps.coo_matrix((outflow,(I,J)), shape=(grid_size, grid_size) ).toarray()
32 |     # newflow = sps.coo_matrix((newflow,(I,J)), shape=(grid_size, grid_size) ).toarray()
33 |     # endflow = sps.coo_matrix((endflow,(I,J)), shape=(grid_size, grid_size) ).toarray()
34 |     return np.asarray([inflow, outflow])
35 | 
36 | 
37 | def get_file_lines(input_path):
38 |     with open(input_path) as f:
39 |         return len(f.readlines())
40 | 
41 | def timestamp2string(timestamps):
42 |     return ["%s%02i" % (ts.strftime('%Y%m%d'),
43 |             int(1+ts.to_datetime().hour*2+ts.to_datetime().minute/30)) for ts in timestamps]
44 | 
45 | def load_data(rootdir=rootdir, start='3/1/2015', end='7/1/2015', freq='30Min', year=13):
46 |     rng = pd.date_range(start=start, end=end, periods=None, freq=freq)
47 |     data = dict()
48 |     data_mat = []
49 |     avail_timestamp = []
50 |     for timestamp in rng:
51 |         hour, minute = timestamp.to_datetime().hour, timestamp.to_datetime().minute
52 |         # print(timestamp, "%s%02i" % (timestamp.strftime('%Y%m%d'), int(1+hour*2+minute/30)))
53 |         # subdir = "%04i%02i" % (timestamp.to_datetime().year, timestamp.to_datetime().month)
54 |         fname = "%s%02i.txt" % (timestamp.strftime('%Y%m%d'), int(1+hour*2+minute/30))
55 |         input_path = os.path.join(rootdir, fname)
56 |         if os.path.exists(input_path) is False:
57 |             print('file cannot be found:', input_path)
58 |             continue
59 |         if get_file_lines(input_path) < grid_size * grid_size * 0.25:
60 |             continue
61 |         avail_timestamp.append(timestamp)
62 |         print("processing", input_path)
63 |         data_tensor = load_data_from_COO_fomat(input_path)
64 |         data_mat.append(data_tensor)
65 |         data[timestamp] = data_tensor
66 | 
67 |     print("len:", len(avail_timestamp))
68 |     h5 = h5py.File(os.path.join(DATAPATH, 'BJ', 'BJ{}_M{}_T30_Flow.h5'.format(year, grid_size)), 'w')
69 |     h5.create_dataset("date", data=timestamp2string(avail_timestamp))
70 |     h5.create_dataset("data", data=np.asarray(data_mat))
71 |     h5.close()
72 | 
73 | load_data(start='7/1/2013', end='11/1/2013', year=13)
74 | load_data(start='3/1/2014', end='7/1/2014', year=14)
75 | load_data(start='3/1/2015', end='7/1/2015', year=15)
76 | load_data(start='11/1/2015', end='4/11/2016', year=16)


--------------------------------------------------------------------------------
/deepst/utils/viewRetFromPkl.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | import sys
 4 | import cPickle as pickle
 5 | def view(fname):
 6 |     pkl = pickle.load(open(fname, 'rb'))
 7 |     for ke in pkl.keys():
 8 |          print '=' * 10
 9 |          print ke
10 |          print pkl[ke]
11 | view(sys.argv[1])
12 | 


--------------------------------------------------------------------------------
/scripts/papers/AAAI17/BikeNYC/README.md:
--------------------------------------------------------------------------------
1 | It will soon contain the code for our AAAI 2017 paper:
2 | ```
3 | Junbo Zhang, Yu Zheng, Dekang Qi. Deep Spatio-Temporal Residual Networks for Citywide Crowd Flows Prediction. In AAAI 2017. 
4 | ```


--------------------------------------------------------------------------------
/scripts/papers/AAAI17/README.md:
--------------------------------------------------------------------------------
 1 | Deep Spatio-Temporal Residual Networks for Citywide Crowd Flows Prediction
 2 | ==========================================================================
 3 | 
 4 | The files in this directory recreate some of the experiments reported in the paper
 5 | 
 6 | `Junbo Zhang, Yu Zheng, Dekang Qi. Deep Spatio-Temporal Residual Networks for Citywide Crowd Flows Prediction. In AAAI 2017. `
 7 | 
 8 | **If you use the code provided here, please cite that paper.**
 9 | 
10 | 1. Reproduce **TaxiBJ** results: [ReadMe](TaxiBJ/)
11 | 
12 | 2. Reproduce **BikeNYC** results: [ReadMe](BikeNYC/)


--------------------------------------------------------------------------------
/scripts/papers/AAAI17/TaxiBJ/README.md:
--------------------------------------------------------------------------------
 1 | 1. Install [**DeepST**](https://github.com/lucktroy/DeepST). 
 2 | 
 3 | 2. Download [**TaxiBJ**](https://github.com/lucktroy/DeepST/tree/master/data/TaxiBJ) data
 4 | 
 5 | 3. Reproduce the results of ST-ResNet and its variants. 
 6 | 
 7 |     * Result of Model **L2-E**
 8 | 
 9 |     ```
10 |     THEANO_FLAGS="device=gpu,floatX=float32" python exptTaxiBJ.py 2
11 |     ```
12 | 
13 |     * Result of Model **L4-E**
14 | 
15 |     ```
16 |     THEANO_FLAGS="device=gpu,floatX=float32" python exptTaxiBJ.py 4
17 |     ```
18 | 
19 |     * Result of Model **L12-E**
20 | 
21 |     ```
22 |     THEANO_FLAGS="device=gpu,floatX=float32" python exptTaxiBJ.py 12
23 |     ```
24 | 
25 |     * Result of Model **L12**
26 |     ```
27 |     THEANO_FLAGS="device=gpu,floatX=float32" python exptTaxiBJ-L12.py
28 |     ```


--------------------------------------------------------------------------------
/scripts/papers/AAAI17/TaxiBJ/exptTaxiBJ-L12.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """ 
  3 |     THEANO_FLAGS="device=gpu0" python L12.py
  4 | """
  5 | from __future__ import print_function
  6 | import os
  7 | import cPickle as pickle
  8 | import time
  9 | import numpy as np
 10 | import h5py
 11 | 
 12 | from keras.optimizers import Adam
 13 | from keras.callbacks import EarlyStopping, ModelCheckpoint
 14 | 
 15 | from deepst.models.STResNet import stresnet
 16 | from deepst.config import Config
 17 | import deepst.metrics as metrics
 18 | from deepst.datasets import TaxiBJ
 19 | np.random.seed(1337)  # for reproducibility
 20 | 
 21 | # parameters
 22 | DATAPATH = Config().DATAPATH  # data path, you may set your own data path with a global envirmental variable DATAPATH
 23 | CACHEDATA = True  # cache data or NOT
 24 | path_cache = os.path.join(DATAPATH, 'CACHE')  # cache path
 25 | nb_epoch = 500  # number of epoch at training stage
 26 | nb_epoch_cont = 100  # number of epoch at training (cont) stage
 27 | batch_size = 32  # batch size
 28 | T = 48  # number of time intervals at a day
 29 | lr = 0.0002  # learning rate
 30 | len_closeness = 3  # length of closeness dependent sequence
 31 | len_period = 1  # length of peroid dependent sequence
 32 | len_trend = 1  # length of trend dependent sequence
 33 | nb_residual_unit = 12  # number of residual units
 34 | 
 35 | 
 36 | nb_flow = 2  # there are two types of flows: inflow and outflow
 37 | # split data into two subsets: Train & Test, of which the test set is the last 4 weeks
 38 | days_test = 7 * 4
 39 | len_test = T * days_test
 40 | map_height, map_width = 32, 32  # grid size
 41 | path_result = 'RET'
 42 | path_model = 'MODEL'
 43 | 
 44 | 
 45 | if os.path.isdir(path_result) is False:
 46 |     os.mkdir(path_result)
 47 | if os.path.isdir(path_model) is False:
 48 |     os.mkdir(path_model)
 49 | if CACHEDATA and os.path.isdir(path_cache) is False:
 50 |     os.mkdir(path_cache)
 51 | 
 52 | 
 53 | def build_model(external_dim):
 54 |     c_conf = (len_closeness, nb_flow, map_height,
 55 |               map_width) if len_closeness > 0 else None
 56 |     p_conf = (len_period, nb_flow, map_height,
 57 |               map_width) if len_period > 0 else None
 58 |     t_conf = (len_trend, nb_flow, map_height,
 59 |               map_width) if len_trend > 0 else None
 60 | 
 61 |     model = stresnet(c_conf=c_conf, p_conf=p_conf, t_conf=t_conf,
 62 |                      external_dim=external_dim, nb_residual_unit=nb_residual_unit)
 63 |     adam = Adam(lr=lr)
 64 |     model.compile(loss='mse', optimizer=adam, metrics=[metrics.rmse])
 65 |     model.summary()
 66 |     # from keras.utils.visualize_util import plot
 67 |     # plot(model, to_file='model.png', show_shapes=True)
 68 |     return model
 69 | 
 70 | 
 71 | def read_cache(fname):
 72 |     mmn = pickle.load(open('preprocessing.pkl', 'rb'))
 73 | 
 74 |     f = h5py.File(fname, 'r')
 75 |     num = int(f['num'].value)
 76 |     X_train, Y_train, X_test, Y_test = [], [], [], []
 77 |     for i in xrange(num):
 78 |         X_train.append(f['X_train_%i' % i].value)
 79 |         X_test.append(f['X_test_%i' % i].value)
 80 |     Y_train = f['Y_train'].value
 81 |     Y_test = f['Y_test'].value
 82 |     external_dim = f['external_dim'].value
 83 |     timestamp_train = f['T_train'].value
 84 |     timestamp_test = f['T_test'].value
 85 |     f.close()
 86 | 
 87 |     return X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test
 88 | 
 89 | 
 90 | def cache(fname, X_train, Y_train, X_test, Y_test, external_dim, timestamp_train, timestamp_test):
 91 |     h5 = h5py.File(fname, 'w')
 92 |     h5.create_dataset('num', data=len(X_train))
 93 | 
 94 |     for i, data in enumerate(X_train):
 95 |         h5.create_dataset('X_train_%i' % i, data=data)
 96 |     # for i, data in enumerate(Y_train):
 97 |     for i, data in enumerate(X_test):
 98 |         h5.create_dataset('X_test_%i' % i, data=data)
 99 |     h5.create_dataset('Y_train', data=Y_train)
100 |     h5.create_dataset('Y_test', data=Y_test)
101 |     external_dim = -1 if external_dim is None else int(external_dim)
102 |     h5.create_dataset('external_dim', data=external_dim)
103 |     h5.create_dataset('T_train', data=timestamp_train)
104 |     h5.create_dataset('T_test', data=timestamp_test)
105 |     h5.close()
106 | 
107 | 
108 | def main():
109 |     # load data
110 |     print("loading data...")
111 |     ts = time.time()
112 |     fname = os.path.join(DATAPATH, 'CACHE', 'TaxiBJ_C{}_P{}_T{}_noExternal.h5'.format(
113 |         len_closeness, len_period, len_trend))
114 |     if os.path.exists(fname) and CACHEDATA:
115 |         X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = read_cache(
116 |             fname)
117 |         print("load %s successfully" % fname)
118 |     else:
119 |         X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = TaxiBJ.load_data(
120 |             T=T, nb_flow=nb_flow, len_closeness=len_closeness, len_period=len_period, len_trend=len_trend, len_test=len_test,
121 |             preprocess_name='preprocessing.pkl', meta_data=False, meteorol_data=False, holiday_data=False)
122 |         if CACHEDATA:
123 |             cache(fname, X_train, Y_train, X_test, Y_test,
124 |                   external_dim, timestamp_train, timestamp_test)
125 | 
126 |     print("\n days (test): ", [v[:8] for v in timestamp_test[0::T]])
127 |     print("\nelapsed time (loading data): %.3f seconds\n" % (time.time() - ts))
128 | 
129 |     print('=' * 10)
130 |     print("compiling model...")
131 |     print(
132 |         "**at the first time, it takes a few minites to compile if you uses [Theano] as the backend**")
133 | 
134 |     ts = time.time()
135 |     model = build_model(external_dim)
136 |     hyperparams_name = 'c{}.p{}.t{}.resunit{}.lr{}.noExternal'.format(
137 |         len_closeness, len_period, len_trend, nb_residual_unit, lr)
138 |     fname_param = os.path.join('MODEL', '{}.best.h5'.format(hyperparams_name))
139 | 
140 |     early_stopping = EarlyStopping(monitor='val_rmse', patience=2, mode='min')
141 |     model_checkpoint = ModelCheckpoint(
142 |         fname_param, monitor='val_rmse', verbose=0, save_best_only=True, mode='min')
143 | 
144 |     print("\nelapsed time (compiling model): %.3f seconds\n" %
145 |           (time.time() - ts))
146 | 
147 |     print('=' * 10)
148 |     print("training model...")
149 |     ts = time.time()
150 |     history = model.fit(X_train, Y_train,
151 |                         nb_epoch=nb_epoch,
152 |                         batch_size=batch_size,
153 |                         validation_split=0.1,
154 |                         callbacks=[early_stopping, model_checkpoint],
155 |                         verbose=1)
156 |     model.save_weights(os.path.join(
157 |         'MODEL', '{}.h5'.format(hyperparams_name)), overwrite=True)
158 |     pickle.dump((history.history), open(os.path.join(
159 |         path_result, '{}.history.pkl'.format(hyperparams_name)), 'wb'))
160 |     print("\nelapsed time (training): %.3f seconds\n" % (time.time() - ts))
161 | 
162 |     print('=' * 10)
163 |     print('evaluating using the model that has the best loss on the valid set')
164 |     ts = time.time()
165 |     model.load_weights(fname_param)
166 |     score = model.evaluate(X_train, Y_train, batch_size=Y_train.shape[
167 |                            0] // 48, verbose=0)
168 |     print('Train score: %.6f Train rmse: %.6f %.6f' %
169 |           (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.))
170 |     score = model.evaluate(
171 |         X_test, Y_test, batch_size=Y_test.shape[0], verbose=0)
172 |     print('Test score: %.6f Test rmse: %.6f %.6f' %
173 |           (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.))
174 |     print("\nelapsed time (eval): %.3f seconds\n" % (time.time() - ts))
175 | 
176 |     print('=' * 10)
177 |     print("training model (cont)...")
178 |     ts = time.time()
179 |     fname_param = os.path.join(
180 |         'MODEL', '{}.cont.best.h5'.format(hyperparams_name))
181 |     model_checkpoint = ModelCheckpoint(
182 |         fname_param, monitor='rmse', verbose=0, save_best_only=True, mode='min')
183 |     history = model.fit(X_train, Y_train, nb_epoch=nb_epoch_cont, verbose=2, batch_size=batch_size, callbacks=[
184 |                         model_checkpoint])
185 |     pickle.dump((history.history), open(os.path.join(
186 |         path_result, '{}.cont.history.pkl'.format(hyperparams_name)), 'wb'))
187 |     model.save_weights(os.path.join(
188 |         'MODEL', '{}_cont.h5'.format(hyperparams_name)), overwrite=True)
189 |     print("\nelapsed time (training cont): %.3f seconds\n" % (time.time() - ts))
190 | 
191 |     print('=' * 10)
192 |     print('evaluating using the final model')
193 |     score = model.evaluate(X_train, Y_train, batch_size=Y_train.shape[
194 |                            0] // 48, verbose=0)
195 |     print('Train score: %.6f Train rmse: %.6f %.6f' %
196 |           (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.))
197 |     ts = time.time()
198 |     score = model.evaluate(
199 |         X_test, Y_test, batch_size=Y_test.shape[0], verbose=0)
200 |     print('Test score: %.6f Test rmse: %.6f %.6f' %
201 |           (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.))
202 |     print("\nelapsed time (eval cont): %.3f seconds\n" % (time.time() - ts))
203 | 
204 | if __name__ == '__main__':
205 |     main()


--------------------------------------------------------------------------------
/scripts/papers/AAAI17/TaxiBJ/exptTaxiBJ.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """ 
  3 |     THEANO_FLAGS="device=gpu0" python trainTaxiBJ.py
  4 | """
  5 | from __future__ import print_function
  6 | import os
  7 | import sys
  8 | import cPickle as pickle
  9 | import time
 10 | import numpy as np
 11 | import h5py
 12 | 
 13 | from keras.optimizers import Adam
 14 | from keras.callbacks import EarlyStopping, ModelCheckpoint
 15 | 
 16 | from deepst.models.STResNet import stresnet
 17 | from deepst.config import Config
 18 | import deepst.metrics as metrics
 19 | from deepst.datasets import TaxiBJ
 20 | np.random.seed(1337)  # for reproducibility
 21 | 
 22 | # parameters
 23 | DATAPATH = Config().DATAPATH  # data path, you may set your own data path with a global envirmental variable DATAPATH
 24 | CACHEDATA = True  # cache data or NOT
 25 | path_cache = os.path.join(DATAPATH, 'CACHE')  # cache path
 26 | nb_epoch = 500  # number of epoch at training stage
 27 | nb_epoch_cont = 100  # number of epoch at training (cont) stage
 28 | batch_size = 32  # batch size
 29 | T = 48  # number of time intervals at a day
 30 | lr = 0.0002  # learning rate
 31 | len_closeness = 3  # length of closeness dependent sequence
 32 | len_period = 1  # length of peroid dependent sequence
 33 | len_trend = 1  # length of trend dependent sequence
 34 | if len(sys.argv) == 1:
 35 |     nb_residual_unit = 2  # number of residual units
 36 | else:
 37 |     nb_residual_unit = int(sys.argv[1])  # number of residual units
 38 | 
 39 | nb_flow = 2  # there are two types of flows: inflow and outflow
 40 | # divide data into two subsets: Train & Test, of which the test set is the
 41 | # last 4 weeks
 42 | days_test = 7 * 4
 43 | len_test = T * days_test
 44 | map_height, map_width = 32, 32  # grid size
 45 | path_result = 'RET'
 46 | path_model = 'MODEL'
 47 | 
 48 | 
 49 | if os.path.isdir(path_result) is False:
 50 |     os.mkdir(path_result)
 51 | if os.path.isdir(path_model) is False:
 52 |     os.mkdir(path_model)
 53 | if CACHEDATA and os.path.isdir(path_cache) is False:
 54 |     os.mkdir(path_cache)
 55 | 
 56 | 
 57 | def build_model(external_dim):
 58 |     c_conf = (len_closeness, nb_flow, map_height,
 59 |               map_width) if len_closeness > 0 else None
 60 |     p_conf = (len_period, nb_flow, map_height,
 61 |               map_width) if len_period > 0 else None
 62 |     t_conf = (len_trend, nb_flow, map_height,
 63 |               map_width) if len_trend > 0 else None
 64 | 
 65 |     model = stresnet(c_conf=c_conf, p_conf=p_conf, t_conf=t_conf,
 66 |                      external_dim=external_dim, nb_residual_unit=nb_residual_unit)
 67 |     adam = Adam(lr=lr)
 68 |     model.compile(loss='mse', optimizer=adam, metrics=[metrics.rmse])
 69 |     model.summary()
 70 |     # from keras.utils.visualize_util import plot
 71 |     # plot(model, to_file='model.png', show_shapes=True)
 72 |     return model
 73 | 
 74 | 
 75 | def read_cache(fname):
 76 |     mmn = pickle.load(open('preprocessing.pkl', 'rb'))
 77 | 
 78 |     f = h5py.File(fname, 'r')
 79 |     num = int(f['num'].value)
 80 |     X_train, Y_train, X_test, Y_test = [], [], [], []
 81 |     for i in xrange(num):
 82 |         X_train.append(f['X_train_%i' % i].value)
 83 |         X_test.append(f['X_test_%i' % i].value)
 84 |     Y_train = f['Y_train'].value
 85 |     Y_test = f['Y_test'].value
 86 |     external_dim = f['external_dim'].value
 87 |     timestamp_train = f['T_train'].value
 88 |     timestamp_test = f['T_test'].value
 89 |     f.close()
 90 | 
 91 |     return X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test
 92 | 
 93 | 
 94 | def cache(fname, X_train, Y_train, X_test, Y_test, external_dim, timestamp_train, timestamp_test):
 95 |     h5 = h5py.File(fname, 'w')
 96 |     h5.create_dataset('num', data=len(X_train))
 97 | 
 98 |     for i, data in enumerate(X_train):
 99 |         h5.create_dataset('X_train_%i' % i, data=data)
100 |     # for i, data in enumerate(Y_train):
101 |     for i, data in enumerate(X_test):
102 |         h5.create_dataset('X_test_%i' % i, data=data)
103 |     h5.create_dataset('Y_train', data=Y_train)
104 |     h5.create_dataset('Y_test', data=Y_test)
105 |     external_dim = -1 if external_dim is None else int(external_dim)
106 |     h5.create_dataset('external_dim', data=external_dim)
107 |     h5.create_dataset('T_train', data=timestamp_train)
108 |     h5.create_dataset('T_test', data=timestamp_test)
109 |     h5.close()
110 | 
111 | 
112 | def main():
113 |     # load data
114 |     print("loading data...")
115 |     ts = time.time()
116 |     fname = os.path.join(DATAPATH, 'CACHE', 'TaxiBJ_C{}_P{}_T{}.h5'.format(
117 |         len_closeness, len_period, len_trend))
118 |     if os.path.exists(fname) and CACHEDATA:
119 |         X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = read_cache(
120 |             fname)
121 |         print("load %s successfully" % fname)
122 |     else:
123 |         X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = TaxiBJ.load_data(
124 |             T=T, nb_flow=nb_flow, len_closeness=len_closeness, len_period=len_period, len_trend=len_trend, len_test=len_test,
125 |             preprocess_name='preprocessing.pkl', meta_data=True, meteorol_data=True, holiday_data=True)
126 |         if CACHEDATA:
127 |             cache(fname, X_train, Y_train, X_test, Y_test,
128 |                   external_dim, timestamp_train, timestamp_test)
129 | 
130 |     print("\n days (test): ", [v[:8] for v in timestamp_test[0::T]])
131 |     print("\nelapsed time (loading data): %.3f seconds\n" % (time.time() - ts))
132 | 
133 |     print('=' * 10)
134 |     print("compiling model...")
135 |     print(
136 |         "**at the first time, it takes a few minites to compile if you uses [Theano] as the backend**")
137 | 
138 |     ts = time.time()
139 |     model = build_model(external_dim)
140 |     hyperparams_name = 'c{}.p{}.t{}.resunit{}.lr{}'.format(
141 |         len_closeness, len_period, len_trend, nb_residual_unit, lr)
142 |     fname_param = os.path.join('MODEL', '{}.best.h5'.format(hyperparams_name))
143 | 
144 |     early_stopping = EarlyStopping(monitor='val_rmse', patience=2, mode='min')
145 |     model_checkpoint = ModelCheckpoint(
146 |         fname_param, monitor='val_rmse', verbose=0, save_best_only=True, mode='min')
147 | 
148 |     print("\nelapsed time (compiling model): %.3f seconds\n" %
149 |           (time.time() - ts))
150 | 
151 |     print('=' * 10)
152 |     print("training model...")
153 |     ts = time.time()
154 |     history = model.fit(X_train, Y_train,
155 |                         nb_epoch=nb_epoch,
156 |                         batch_size=batch_size,
157 |                         validation_split=0.1,
158 |                         callbacks=[early_stopping, model_checkpoint],
159 |                         verbose=2)
160 |     model.save_weights(os.path.join(
161 |         'MODEL', '{}.h5'.format(hyperparams_name)), overwrite=True)
162 |     pickle.dump((history.history), open(os.path.join(
163 |         path_result, '{}.history.pkl'.format(hyperparams_name)), 'wb'))
164 |     print("\nelapsed time (training): %.3f seconds\n" % (time.time() - ts))
165 | 
166 |     print('=' * 10)
167 |     print('evaluating using the model that has the best loss on the valid set')
168 |     ts = time.time()
169 |     model.load_weights(fname_param)
170 |     score = model.evaluate(X_train, Y_train, batch_size=Y_train.shape[
171 |                            0] // 48, verbose=0)
172 |     print('Train score: %.6f Train rmse: %.6f %.6f' %
173 |           (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.))
174 |     score = model.evaluate(
175 |         X_test, Y_test, batch_size=Y_test.shape[0], verbose=0)
176 |     print('Test score: %.6f Test rmse: %.6f %.6f' %
177 |           (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.))
178 |     print("\nelapsed time (eval): %.3f seconds\n" % (time.time() - ts))
179 | 
180 |     print('=' * 10)
181 |     print("training model (cont)...")
182 |     ts = time.time()
183 |     fname_param = os.path.join(
184 |         'MODEL', '{}.cont.best.h5'.format(hyperparams_name))
185 |     model_checkpoint = ModelCheckpoint(
186 |         fname_param, monitor='rmse', verbose=0, save_best_only=True, mode='min')
187 |     history = model.fit(X_train, Y_train, nb_epoch=nb_epoch_cont, verbose=2, batch_size=batch_size, callbacks=[
188 |                         model_checkpoint])
189 |     pickle.dump((history.history), open(os.path.join(
190 |         path_result, '{}.cont.history.pkl'.format(hyperparams_name)), 'wb'))
191 |     model.save_weights(os.path.join(
192 |         'MODEL', '{}_cont.h5'.format(hyperparams_name)), overwrite=True)
193 |     print("\nelapsed time (training cont): %.3f seconds\n" % (time.time() - ts))
194 | 
195 |     print('=' * 10)
196 |     print('evaluating using the final model')
197 |     score = model.evaluate(X_train, Y_train, batch_size=Y_train.shape[
198 |                            0] // 48, verbose=0)
199 |     print('Train score: %.6f Train rmse: %.6f %.6f' %
200 |           (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.))
201 |     ts = time.time()
202 |     score = model.evaluate(
203 |         X_test, Y_test, batch_size=Y_test.shape[0], verbose=0)
204 |     print('Test score: %.6f Test rmse: %.6f %.6f' %
205 |           (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.))
206 |     print("\nelapsed time (eval cont): %.3f seconds\n" % (time.time() - ts))
207 | 
208 | if __name__ == '__main__':
209 |     main()
210 | 


--------------------------------------------------------------------------------
/scripts/papers/AAAI17/doc/ST-ResNet-AAAI17-Zhang.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amirkhango/DeepST/7ba669013bbafd5f413ef50d5d76094c3a68efd6/scripts/papers/AAAI17/doc/ST-ResNet-AAAI17-Zhang.pdf


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from setuptools import find_packages
 3 | 
 4 | 
 5 | setup(name='DeepST',
 6 |       version='0.0.1',
 7 |       description='Deep Learning for Spatio-Temporal Data',
 8 |       author='Junbo Zhang',
 9 |       author_email='zjb2046@gmail.com',
10 |       url='https://github.com/lucktroy/DeepST',
11 |       download_url='https://github.com/lucktroy/DeepST/',
12 |       license='MIT',
13 |       install_requires=['keras', 'theano'],
14 |       extras_require={
15 |           'h5py': ['h5py'],
16 |           'visualize': ['pydot-ng'],
17 |       },
18 |       packages=find_packages())


--------------------------------------------------------------------------------