├── .gitignore ├── LICENSE ├── README.md ├── data └── TaxiBJ │ └── ReadMe.md ├── deepst ├── __init__.py ├── config.py ├── datasets │ ├── BikeNYC.py │ ├── ReadMe.md │ ├── STDATA.py │ ├── STMatrix.py │ ├── TaxiBJ.py │ └── __init__.py ├── metrics.py ├── models │ ├── STConvolution.py │ ├── STResNet.py │ ├── __init__.py │ └── iLayer.py ├── preprocessing │ ├── __init__.py │ └── minmax_normalization.py └── utils │ ├── __init__.py │ ├── eval.py │ ├── evalMultiStepAhead.py │ ├── evalMultiStepAhead4SeqModel.py │ ├── evalMultiStepAheadNew.py │ ├── fill_missing_vals.py │ ├── runMe.bat │ ├── txt2hdf5_InOut.py │ └── viewRetFromPkl.py ├── scripts └── papers │ └── AAAI17 │ ├── BikeNYC │ └── README.md │ ├── README.md │ ├── TaxiBJ │ ├── README.md │ ├── exptTaxiBJ-L12.py │ └── exptTaxiBJ.py │ └── doc │ └── ST-ResNet-AAAI17-Zhang.pdf └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # data/model/results/scripts/backup 2 | *.sh 3 | *.bat 4 | *.h5 5 | *.hdf5 6 | *.pkl 7 | *.gz 8 | bak/ 9 | backup/ 10 | *.txt 11 | 12 | 13 | # Byte-compiled / optimized / DLL files 14 | __pycache__/ 15 | *.py[cod] 16 | *$py.class 17 | 18 | # C extensions 19 | *.so 20 | 21 | # Distribution / packaging 22 | .Python 23 | env/ 24 | build/ 25 | develop-eggs/ 26 | dist/ 27 | downloads/ 28 | eggs/ 29 | .eggs/ 30 | lib/ 31 | lib64/ 32 | parts/ 33 | sdist/ 34 | var/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *,cover 58 | .hypothesis/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # IPython Notebook 82 | .ipynb_checkpoints 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # celery beat schedule file 88 | celerybeat-schedule 89 | 90 | # dotenv 91 | .env 92 | 93 | # virtualenv 94 | venv/ 95 | ENV/ 96 | 97 | # Spyder project settings 98 | .spyderproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) Microsoft Corporation 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | DeepST 2 | ====== 3 | [DeepST](https://github.com/lucktroy/DeepST): A **Deep Learning** Toolbox for Spatio-Temporal Data 4 | 5 | *Tested on `Windows Server 2012 R2`.* 6 | 7 | ## Installation 8 | 9 | DeepST uses the following dependencies: 10 | 11 | * [Keras](https://keras.io/#installation) and its dependencies are required to use DeepST. 12 | * [Theano](http://deeplearning.net/software/theano/install.html#install) or [TensorFlow](https://github.com/tensorflow/tensorflow#download-and-setup), but **Theano** is recommended. 13 | * numpy and scipy 14 | * HDF5 and [h5py](http://www.h5py.org/) 15 | * [pandas](http://pandas.pydata.org/) 16 | * CUDA 7.5 or latest version. And **cuDNN** is highly recommended. 17 | 18 | To install DeepST, `cd` to the **DeepST** folder and run the install command: 19 | 20 | ``` 21 | python setup.py install 22 | ``` 23 | 24 | To install the development version: 25 | 26 | ``` 27 | python setup.py develop 28 | ``` 29 | 30 | ## Data path 31 | 32 | The default `DATAPATH` variable is `DATAPATH=[path_to_DeepST]/data`. You may set your `DATAPATH` variable using 33 | 34 | ``` 35 | # Windows 36 | set DATAPATH=[path_to_your_data] 37 | 38 | # Linux 39 | export DATAPATH=[path_to_your_data] 40 | ``` 41 | 42 | ## License 43 | 44 | DeepST is released under the MIT License (refer to the LICENSE file for details). -------------------------------------------------------------------------------- /data/TaxiBJ/ReadMe.md: -------------------------------------------------------------------------------- 1 | TaxiBJ: InFlow/OutFlow, Meteorology and Holidays at Beijing 2 | =========================================================== 3 | 4 | **If you use the data, please cite the following paper.** 5 | 6 | `Junbo Zhang, Yu Zheng, Dekang Qi. Deep Spatio-Temporal Residual Networks for Citywide Crowd Flows Prediction. In AAAI 2017. ` 7 | 8 | Download data from [OneDrive](https://1drv.ms/f/s!Akh6N7xv3uVmhOhDKwx3bm5zpHkDOQ) or [BaiduPan](http://pan.baidu.com/s/1qYq7ja8) 9 | 10 | Please check the data with `md5sum` command: 11 | ``` 12 | md5sum -c md5sum.txt 13 | ``` 14 | 15 | **TaxiBJ** consists of the following **SIX** datasets: 16 | 17 | * BJ16_M32x32_T30_InOut.h5 18 | * BJ15_M32x32_T30_InOut.h5 19 | * BJ14_M32x32_T30_InOut.h5 20 | * BJ13_M32x32_T30_InOut.h5 21 | * BJ_Meteorology.h5 22 | * BJ_Holiday.txt 23 | 24 | where the first four files are *crowd flows* in Beijing from the year 2013 to 2016, `BJ_Meteorology.h5` is the Meteorological data, `BJ_Holiday.txt` includes the holidays (and adjacent weekends) of Beijing. 25 | 26 | ## Flows of Crowds 27 | 28 | File names: `BJ[YEAR]_M32x32_T30_InOut.h5`, where 29 | 30 | * YEAR: one of {13, 14, 15, 16} 31 | * M32x32: the Beijing city is divided into a 32 x 32 grid map 32 | * T30: timeslot (a.k.a. time interval) is equal to 30 minites, meaning there are 48 timeslots in a day 33 | * InOut: Inflow/Outflow are defined in the following paper [1]. 34 | 35 | [1] Junbo Zhang, Yu Zheng, Dekang Qi. Deep Spatio-Temporal Residual Networks for Citywide Crowd Flows Prediction. In AAAI 2017. 36 | 37 | Each `h5` file has two following subsets: 38 | 39 | * `date`: a list of timeslots, which is associated the **data**. 40 | * `data`: a 4D tensor of shape (number_of_timeslots, 2, 32, 32), of which `data[i]` is a 3D tensor of shape (2, 32, 32) at the timeslot `date[i]`, `data[i][0]` is a `32x32` inflow matrix and `data[i][1]` is a `32x32` outflow matrix. 41 | 42 | ### Example 43 | 44 | You can get the data info with following command: 45 | ``` 46 | python -c "from deepst.datasets import stat; stat('BJ16_M32x32_T30_InOut.h5')" 47 | ``` 48 | 49 | The output looks like: 50 | ``` 51 | =====stat===== 52 | data shape: (7220, 2, 32, 32) 53 | # of days: 162, from 2015-11-01 to 2016-04-10 54 | # of timeslots: 7776 55 | # of timeslots (available): 7220 56 | missing ratio of timeslots: 7.2% 57 | max: 1250.000, min: 0.000 58 | =====stat===== 59 | ``` 60 | 61 | ## Meteorology 62 | 63 | File name: `BJ_Meteorology.h5`, which has four following subsets: 64 | 65 | * `date`: a list of timeslots, which is associated the following kinds of data. 66 | * `Temperature`: a list of continuous value, of which the `i^{th}` value is `temperature` at the timeslot `date[i]`. 67 | * `WindSpeed`: a list of continuous value, of which the `i^{th}` value is `wind speed` at the timeslot `date[i]`. 68 | * `Weather`: a 2D matrix, each of which is a one-hot vector (`dim=17`), showing one of the following weather types: 69 | ``` 70 | Sunny = 0, 71 | Cloudy = 1, 72 | Overcast = 2, 73 | Rainy = 3, 74 | Sprinkle = 4, 75 | ModerateRain = 5, 76 | HeavyRain = 6, 77 | Rainstorm = 7, 78 | Thunderstorm = 8, 79 | FreezingRain = 9, 80 | Snowy = 10, 81 | LightSnow = 11, 82 | ModerateSnow = 12, 83 | HeavySnow = 13, 84 | Foggy = 14, 85 | Sandstorm = 15, 86 | Dusty = 16, 87 | ``` 88 | 89 | ## Holiday 90 | 91 | File name: `BJ_Holiday.txt`, which inclues a list of the holidays (and adjacent weekends) of Beijing. 92 | 93 | Each line a holiday with the data format [yyyy][mm][dd]. For example, `20150601` is `June 1st, 2015`. -------------------------------------------------------------------------------- /deepst/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amirkhango/DeepST/7ba669013bbafd5f413ef50d5d76094c3a68efd6/deepst/__init__.py -------------------------------------------------------------------------------- /deepst/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import platform 4 | 5 | 6 | class Config(object): 7 | """docstring for Config""" 8 | 9 | def __init__(self): 10 | super(Config, self).__init__() 11 | 12 | DATAPATH = os.environ.get('DATAPATH') 13 | if DATAPATH is None: 14 | if platform.system() == "Windows" or platform.system() == "Linux": 15 | # DATAPATH = "D:/data/traffic_flow" 16 | # elif platform.system() == "Linux": 17 | DATAPATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data') 18 | else: 19 | print("Unsupported/Unknown OS: ", platform.system, "please set DATAPATH") 20 | self.DATAPATH = DATAPATH 21 | -------------------------------------------------------------------------------- /deepst/datasets/BikeNYC.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import print_function 3 | import os 4 | import cPickle as pickle 5 | import numpy as np 6 | 7 | from . import load_stdata 8 | from ..preprocessing import MinMaxNormalization 9 | from ..preprocessing import remove_incomplete_days 10 | from ..config import Config 11 | from ..datasets.STMatrix import STMatrix 12 | from ..preprocessing import timestamp2vec 13 | np.random.seed(1337) # for reproducibility 14 | 15 | # parameters 16 | DATAPATH = Config().DATAPATH 17 | 18 | 19 | def load_data(T=24, nb_flow=2, len_closeness=None, len_period=None, len_trend=None, len_test=None, preprocess_name='preprocessing.pkl', meta_data=True): 20 | assert(len_closeness + len_period + len_trend > 0) 21 | # load data 22 | data, timestamps = load_stdata(os.path.join(DATAPATH, 'BikeNYC', 'NYC14_M16x8_T60_NewEnd.h5')) 23 | # print(timestamps) 24 | # remove a certain day which does not have 48 timestamps 25 | data, timestamps = remove_incomplete_days(data, timestamps, T) 26 | data = data[:, :nb_flow] 27 | data[data < 0] = 0. 28 | data_all = [data] 29 | timestamps_all = [timestamps] 30 | # minmax_scale 31 | data_train = data[:-len_test] 32 | print('train_data shape: ', data_train.shape) 33 | mmn = MinMaxNormalization() 34 | mmn.fit(data_train) 35 | data_all_mmn = [] 36 | for d in data_all: 37 | data_all_mmn.append(mmn.transform(d)) 38 | 39 | fpkl = open('preprocessing.pkl', 'wb') 40 | for obj in [mmn]: 41 | pickle.dump(obj, fpkl) 42 | fpkl.close() 43 | 44 | XC, XP, XT = [], [], [] 45 | Y = [] 46 | timestamps_Y = [] 47 | for data, timestamps in zip(data_all_mmn, timestamps_all): 48 | # instance-based dataset --> sequences with format as (X, Y) where X is a sequence of images and Y is an image. 49 | st = STMatrix(data, timestamps, T, CheckComplete=False) 50 | _XC, _XP, _XT, _Y, _timestamps_Y = st.create_dataset(len_closeness=len_closeness, len_period=len_period, len_trend=len_trend) 51 | XC.append(_XC) 52 | XP.append(_XP) 53 | XT.append(_XT) 54 | Y.append(_Y) 55 | timestamps_Y += _timestamps_Y 56 | 57 | XC = np.vstack(XC) 58 | XP = np.vstack(XP) 59 | XT = np.vstack(XT) 60 | Y = np.vstack(Y) 61 | print("XC shape: ", XC.shape, "XP shape: ", XP.shape, "XT shape: ", XT.shape, "Y shape:", Y.shape) 62 | XC_train, XP_train, XT_train, Y_train = XC[:-len_test], XP[:-len_test], XT[:-len_test], Y[:-len_test] 63 | XC_test, XP_test, XT_test, Y_test = XC[-len_test:], XP[-len_test:], XT[-len_test:], Y[-len_test:] 64 | 65 | timestamp_train, timestamp_test = timestamps_Y[:-len_test], timestamps_Y[-len_test:] 66 | X_train = [] 67 | X_test = [] 68 | for l, X_ in zip([len_closeness, len_period, len_trend], [XC_train, XP_train, XT_train]): 69 | if l > 0: 70 | X_train.append(X_) 71 | for l, X_ in zip([len_closeness, len_period, len_trend], [XC_test, XP_test, XT_test]): 72 | if l > 0: 73 | X_test.append(X_) 74 | print('train shape:', XC_train.shape, Y_train.shape, 'test shape: ', XC_test.shape, Y_test.shape) 75 | # load meta feature 76 | if meta_data: 77 | meta_feature = timestamp2vec(timestamps_Y) 78 | metadata_dim = meta_feature.shape[1] 79 | meta_feature_train, meta_feature_test = meta_feature[:-len_test], meta_feature[-len_test:] 80 | X_train.append(meta_feature_train) 81 | X_test.append(meta_feature_test) 82 | else: 83 | metadata_dim = None 84 | for _X in X_train: 85 | print(_X.shape, ) 86 | print() 87 | for _X in X_test: 88 | print(_X.shape, ) 89 | print() 90 | return X_train, Y_train, X_test, Y_test, mmn, metadata_dim, timestamp_train, timestamp_test 91 | -------------------------------------------------------------------------------- /deepst/datasets/ReadMe.md: -------------------------------------------------------------------------------- 1 | ## Beijing 2 | 3 | Beijing.py is an unified interface to load data -------------------------------------------------------------------------------- /deepst/datasets/STDATA.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import print_function 3 | 4 | import cPickle as pickle 5 | import numpy as np 6 | 7 | from ..preprocessing import MinMaxNormalization 8 | from ..preprocessing import remove_incomplete_days 9 | from ..datasets.STMatrix import STMatrix 10 | from ..preprocessing import timestamp2vec 11 | from . import load_stdata 12 | # np.random.seed(1337) # for reproducibility 13 | 14 | # parameters 15 | # DATAPATH = Config().DATAPATH 16 | 17 | def load_data(fname=None, T=48, nb_flow=2, len_closeness=None, len_period=None, len_trend=None, len_test=None, preprocess_name='preprocessing.pkl'): 18 | assert(len_closeness + len_period + len_trend > 0) 19 | data, timestamps = load_stdata(fname) 20 | print(timestamps) 21 | # remove a certain day which does not have 48 timestamps 22 | data, timestamps = remove_incomplete_days(data, timestamps, T) 23 | data = data[:, :nb_flow] 24 | data[data < 0] = 0. 25 | data_all = [data] 26 | timestamps_all = [timestamps] 27 | # minmax_scale 28 | data_train = data[:-len_test] 29 | print('train_data shape: ', data_train.shape) 30 | mmn = MinMaxNormalization() 31 | mmn.fit(data_train) 32 | data_all_mmn = [] 33 | for d in data_all: 34 | data_all_mmn.append(mmn.transform(d)) 35 | 36 | fpkl = open('preprocessing.pkl', 'wb') 37 | for obj in [mmn]: 38 | pickle.dump(obj, fpkl) 39 | fpkl.close() 40 | 41 | XC, XP, XT = [], [], [] 42 | Y = [] 43 | timestamps_Y = [] 44 | for data, timestamps in zip(data_all_mmn, timestamps_all): 45 | # instance-based dataset --> sequences with format as (X, Y) where X is 46 | # a sequence of images and Y is an image. 47 | st = STMatrix(data, timestamps, T, CheckComplete=False) 48 | _XC, _XP, _XT, _Y, _timestamps_Y = st.toSeq4( 49 | len_closeness=len_closeness, len_period=len_period, len_trend=len_trend) 50 | XC.append(_XC) 51 | XP.append(_XP) 52 | XT.append(_XT) 53 | Y.append(_Y) 54 | timestamps_Y += _timestamps_Y 55 | 56 | # load meta feature 57 | meta_feature = timestamp2vec(timestamps_Y) 58 | metadata_dim = meta_feature.shape[1] 59 | 60 | XC = np.vstack(XC) 61 | XP = np.vstack(XP) 62 | XT = np.vstack(XT) 63 | Y = np.vstack(Y) 64 | print("XC shape: ", XC.shape, "XP shape: ", XP.shape, 65 | "XT shape: ", XT.shape, "Y shape:", Y.shape) 66 | 67 | XC_train, XP_train, XT_train, Y_train = XC[ 68 | :-len_test], XP[:-len_test], XT[:-len_test], Y[:-len_test] 69 | XC_test, XP_test, XT_test, Y_test = XC[ 70 | -len_test:], XP[-len_test:], XT[-len_test:], Y[-len_test:] 71 | meta_feature_train, meta_feature_test = meta_feature[ 72 | :-len_test], meta_feature[-len_test:] 73 | timestamp_train, timestamp_test = timestamps_Y[ 74 | :-len_test], timestamps_Y[-len_test:] 75 | X_train = [] 76 | X_test = [] 77 | for l, X_ in zip([len_closeness, len_period, len_trend], [XC_train, XP_train, XT_train]): 78 | if l > 0: 79 | X_train.append(X_) 80 | for l, X_ in zip([len_closeness, len_period, len_trend], [XC_test, XP_test, XT_test]): 81 | if l > 0: 82 | X_test.append(X_) 83 | print('train shape:', XC_train.shape, Y_train.shape, 84 | 'test shape: ', XC_test.shape, Y_test.shape) 85 | 86 | X_train.append(meta_feature_train) 87 | X_test.append(meta_feature_test) 88 | for _X in X_train: 89 | print(_X.shape, ) 90 | print() 91 | for _X in X_test: 92 | print(_X.shape, ) 93 | print() 94 | return X_train, Y_train, X_test, Y_test, mmn, metadata_dim, timestamp_train, timestamp_test 95 | -------------------------------------------------------------------------------- /deepst/datasets/STMatrix.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import pandas as pd 4 | import numpy as np 5 | 6 | from . import load_stdata 7 | from ..config import Config 8 | from ..utils import string2timestamp 9 | 10 | 11 | class STMatrix(object): 12 | """docstring for STMatrix""" 13 | 14 | def __init__(self, data, timestamps, T=48, CheckComplete=True): 15 | super(STMatrix, self).__init__() 16 | assert len(data) == len(timestamps) 17 | self.data = data 18 | self.timestamps = timestamps 19 | self.T = T 20 | self.pd_timestamps = string2timestamp(timestamps, T=self.T) 21 | if CheckComplete: 22 | self.check_complete() 23 | # index 24 | self.make_index() 25 | 26 | def make_index(self): 27 | self.get_index = dict() 28 | for i, ts in enumerate(self.pd_timestamps): 29 | self.get_index[ts] = i 30 | 31 | def check_complete(self): 32 | missing_timestamps = [] 33 | offset = pd.DateOffset(minutes=24 * 60 // self.T) 34 | pd_timestamps = self.pd_timestamps 35 | i = 1 36 | while i < len(pd_timestamps): 37 | if pd_timestamps[i-1] + offset != pd_timestamps[i]: 38 | missing_timestamps.append("(%s -- %s)" % (pd_timestamps[i-1], pd_timestamps[i])) 39 | i += 1 40 | for v in missing_timestamps: 41 | print(v) 42 | assert len(missing_timestamps) == 0 43 | 44 | def get_matrix(self, timestamp): 45 | return self.data[self.get_index[timestamp]] 46 | 47 | def save(self, fname): 48 | pass 49 | 50 | def check_it(self, depends): 51 | for d in depends: 52 | if d not in self.get_index.keys(): 53 | return False 54 | return True 55 | 56 | def create_dataset(self, len_closeness=3, len_trend=3, TrendInterval=7, len_period=3, PeriodInterval=1): 57 | """current version 58 | """ 59 | # offset_week = pd.DateOffset(days=7) 60 | offset_frame = pd.DateOffset(minutes=24 * 60 // self.T) 61 | XC = [] 62 | XP = [] 63 | XT = [] 64 | Y = [] 65 | timestamps_Y = [] 66 | depends = [range(1, len_closeness+1), 67 | [PeriodInterval * self.T * j for j in range(1, len_period+1)], 68 | [TrendInterval * self.T * j for j in range(1, len_trend+1)]] 69 | 70 | i = max(self.T * TrendInterval * len_trend, self.T * PeriodInterval * len_period, len_closeness) 71 | while i < len(self.pd_timestamps): 72 | Flag = True 73 | for depend in depends: 74 | if Flag is False: 75 | break 76 | Flag = self.check_it([self.pd_timestamps[i] - j * offset_frame for j in depend]) 77 | 78 | if Flag is False: 79 | i += 1 80 | continue 81 | x_c = [self.get_matrix(self.pd_timestamps[i] - j * offset_frame) for j in depends[0]] 82 | x_p = [self.get_matrix(self.pd_timestamps[i] - j * offset_frame) for j in depends[1]] 83 | x_t = [self.get_matrix(self.pd_timestamps[i] - j * offset_frame) for j in depends[2]] 84 | y = self.get_matrix(self.pd_timestamps[i]) 85 | if len_closeness > 0: 86 | XC.append(np.vstack(x_c)) 87 | if len_period > 0: 88 | XP.append(np.vstack(x_p)) 89 | if len_trend > 0: 90 | XT.append(np.vstack(x_t)) 91 | Y.append(y) 92 | timestamps_Y.append(self.timestamps[i]) 93 | i += 1 94 | XC = np.asarray(XC) 95 | XP = np.asarray(XP) 96 | XT = np.asarray(XT) 97 | Y = np.asarray(Y) 98 | print("XC shape: ", XC.shape, "XP shape: ", XP.shape, "XT shape: ", XT.shape, "Y shape:", Y.shape) 99 | return XC, XP, XT, Y, timestamps_Y 100 | 101 | 102 | if __name__ == '__main__': 103 | pass 104 | -------------------------------------------------------------------------------- /deepst/datasets/TaxiBJ.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | load BJ Data from multiple sources as follows: 4 | meteorologic data 5 | """ 6 | from __future__ import print_function 7 | 8 | import os 9 | #import cPickle as pickle 10 | import pickle 11 | from copy import copy 12 | import numpy as np 13 | import h5py 14 | from . import load_stdata, stat 15 | from ..preprocessing import MinMaxNormalization, remove_incomplete_days, timestamp2vec 16 | from ..config import Config 17 | from .STMatrix import STMatrix 18 | # np.random.seed(1337) # for reproducibility 19 | 20 | # parameters 21 | DATAPATH = Config().DATAPATH 22 | 23 | 24 | def load_holiday(timeslots, fname=os.path.join(DATAPATH, 'TaxiBJ', 'BJ_Holiday.txt')): 25 | f = open(fname, 'r') 26 | holidays = f.readlines() 27 | holidays = set([h.strip() for h in holidays]) 28 | H = np.zeros(len(timeslots)) 29 | for i, slot in enumerate(timeslots): 30 | if slot[:8] in holidays: 31 | H[i] = 1 32 | print(H.sum()) 33 | # print(timeslots[H==1]) 34 | return H[:, None] 35 | 36 | 37 | def load_meteorol(timeslots, fname=os.path.join(DATAPATH, 'TaxiBJ', 'BJ_Meteorology.h5')): 38 | ''' 39 | timeslots: the predicted timeslots 40 | In real-world, we dont have the meteorol data in the predicted timeslot, instead, we use the meteoral at previous timeslots, i.e., slot = predicted_slot - timeslot (you can use predicted meteorol data as well) 41 | ''' 42 | f = h5py.File(fname, 'r') 43 | Timeslot = f['date'].value 44 | WindSpeed = f['WindSpeed'].value 45 | Weather = f['Weather'].value 46 | Temperature = f['Temperature'].value 47 | f.close() 48 | 49 | M = dict() # map timeslot to index 50 | for i, slot in enumerate(Timeslot): 51 | M[slot] = i 52 | 53 | WS = [] # WindSpeed 54 | WR = [] # Weather 55 | TE = [] # Temperature 56 | for slot in timeslots: 57 | predicted_id = M[slot] 58 | cur_id = predicted_id - 1 59 | WS.append(WindSpeed[cur_id]) 60 | WR.append(Weather[cur_id]) 61 | TE.append(Temperature[cur_id]) 62 | 63 | WS = np.asarray(WS) 64 | WR = np.asarray(WR) 65 | TE = np.asarray(TE) 66 | 67 | # 0-1 scale 68 | WS = 1. * (WS - WS.min()) / (WS.max() - WS.min()) 69 | TE = 1. * (TE - TE.min()) / (TE.max() - TE.min()) 70 | 71 | print("shape: ", WS.shape, WR.shape, TE.shape) 72 | 73 | # concatenate all these attributes 74 | merge_data = np.hstack([WR, WS[:, None], TE[:, None]]) 75 | 76 | # print('meger shape:', merge_data.shape) 77 | return merge_data 78 | 79 | 80 | def load_data(T=48, nb_flow=2, len_closeness=None, len_period=None, len_trend=None, 81 | len_test=None, preprocess_name='preprocessing.pkl', 82 | meta_data=True, meteorol_data=True, holiday_data=True): 83 | """ 84 | """ 85 | assert(len_closeness + len_period + len_trend > 0) 86 | # load data 87 | # 13 - 16 88 | data_all = [] 89 | timestamps_all = list() 90 | for year in range(13, 17): 91 | fname = os.path.join( 92 | DATAPATH, 'TaxiBJ', 'BJ{}_M32x32_T30_InOut.h5'.format(year)) 93 | print("file name: ", fname) 94 | stat(fname) 95 | data, timestamps = load_stdata(fname) 96 | # print(timestamps) 97 | # remove a certain day which does not have 48 timestamps 98 | data, timestamps = remove_incomplete_days(data, timestamps, T) 99 | data = data[:, :nb_flow] 100 | data[data < 0] = 0. 101 | data_all.append(data) 102 | timestamps_all.append(timestamps) 103 | print("\n") 104 | 105 | # minmax_scale 106 | data_train = np.vstack(copy(data_all))[:-len_test] 107 | print('train_data shape: ', data_train.shape) 108 | mmn = MinMaxNormalization() 109 | mmn.fit(data_train) 110 | data_all_mmn = [mmn.transform(d) for d in data_all] 111 | 112 | fpkl = open(preprocess_name, 'wb') 113 | for obj in [mmn]: 114 | pickle.dump(obj, fpkl) 115 | fpkl.close() 116 | 117 | XC, XP, XT = [], [], [] 118 | Y = [] 119 | timestamps_Y = [] 120 | for data, timestamps in zip(data_all_mmn, timestamps_all): 121 | # instance-based dataset --> sequences with format as (X, Y) where X is 122 | # a sequence of images and Y is an image. 123 | st = STMatrix(data, timestamps, T, CheckComplete=False) 124 | _XC, _XP, _XT, _Y, _timestamps_Y = st.create_dataset( 125 | len_closeness=len_closeness, len_period=len_period, len_trend=len_trend) 126 | XC.append(_XC) 127 | XP.append(_XP) 128 | XT.append(_XT) 129 | Y.append(_Y) 130 | timestamps_Y += _timestamps_Y 131 | 132 | meta_feature = [] 133 | if meta_data: 134 | # load time feature 135 | time_feature = timestamp2vec(timestamps_Y) 136 | meta_feature.append(time_feature) 137 | if holiday_data: 138 | # load holiday 139 | holiday_feature = load_holiday(timestamps_Y) 140 | meta_feature.append(holiday_feature) 141 | if meteorol_data: 142 | # load meteorol data 143 | meteorol_feature = load_meteorol(timestamps_Y) 144 | meta_feature.append(meteorol_feature) 145 | 146 | meta_feature = np.hstack(meta_feature) if len( 147 | meta_feature) > 0 else np.asarray(meta_feature) 148 | metadata_dim = meta_feature.shape[1] if len( 149 | meta_feature.shape) > 1 else None 150 | if metadata_dim < 1: 151 | metadata_dim = None 152 | if meta_data and holiday_data and meteorol_data: 153 | print('time feature:', time_feature.shape, 'holiday feature:', holiday_feature.shape, 154 | 'meteorol feature: ', meteorol_feature.shape, 'mete feature: ', meta_feature.shape) 155 | 156 | XC = np.vstack(XC) 157 | XP = np.vstack(XP) 158 | XT = np.vstack(XT) 159 | Y = np.vstack(Y) 160 | print("XC shape: ", XC.shape, "XP shape: ", XP.shape, 161 | "XT shape: ", XT.shape, "Y shape:", Y.shape) 162 | 163 | XC_train, XP_train, XT_train, Y_train = XC[ 164 | :-len_test], XP[:-len_test], XT[:-len_test], Y[:-len_test] 165 | XC_test, XP_test, XT_test, Y_test = XC[ 166 | -len_test:], XP[-len_test:], XT[-len_test:], Y[-len_test:] 167 | timestamp_train, timestamp_test = timestamps_Y[ 168 | :-len_test], timestamps_Y[-len_test:] 169 | 170 | X_train = [] 171 | X_test = [] 172 | for l, X_ in zip([len_closeness, len_period, len_trend], [XC_train, XP_train, XT_train]): 173 | if l > 0: 174 | X_train.append(X_) 175 | for l, X_ in zip([len_closeness, len_period, len_trend], [XC_test, XP_test, XT_test]): 176 | if l > 0: 177 | X_test.append(X_) 178 | print('train shape:', XC_train.shape, Y_train.shape, 179 | 'test shape: ', XC_test.shape, Y_test.shape) 180 | 181 | if metadata_dim is not None: 182 | meta_feature_train, meta_feature_test = meta_feature[ 183 | :-len_test], meta_feature[-len_test:] 184 | X_train.append(meta_feature_train) 185 | X_test.append(meta_feature_test) 186 | for _X in X_train: 187 | print(_X.shape, ) 188 | print() 189 | for _X in X_test: 190 | print(_X.shape, ) 191 | print() 192 | return X_train, Y_train, X_test, Y_test, mmn, metadata_dim, timestamp_train, timestamp_test 193 | -------------------------------------------------------------------------------- /deepst/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import h5py 3 | import time 4 | 5 | def load_stdata(fname): 6 | f = h5py.File(fname, 'r') 7 | data = f['data'].value 8 | timestamps = f['date'].value 9 | f.close() 10 | return data, timestamps 11 | 12 | 13 | def stat(fname): 14 | def get_nb_timeslot(f): 15 | s = f['date'][0] 16 | e = f['date'][-1] 17 | year, month, day = map(int, [s[:4], s[4:6], s[6:8]]) 18 | ts = time.strptime("%04i-%02i-%02i" % (year, month, day), "%Y-%m-%d") 19 | year, month, day = map(int, [e[:4], e[4:6], e[6:8]]) 20 | te = time.strptime("%04i-%02i-%02i" % (year, month, day), "%Y-%m-%d") 21 | nb_timeslot = (time.mktime(te) - time.mktime(ts)) / (0.5 * 3600) + 48 22 | ts_str, te_str = time.strftime("%Y-%m-%d", ts), time.strftime("%Y-%m-%d", te) 23 | return nb_timeslot, ts_str, te_str 24 | 25 | with h5py.File(fname) as f: 26 | nb_timeslot, ts_str, te_str = get_nb_timeslot(f) 27 | nb_day = int(nb_timeslot / 48) 28 | mmax = f['data'].value.max() 29 | mmin = f['data'].value.min() 30 | stat = '=' * 5 + 'stat' + '=' * 5 + '\n' + \ 31 | 'data shape: %s\n' % str(f['data'].shape) + \ 32 | '# of days: %i, from %s to %s\n' % (nb_day, ts_str, te_str) + \ 33 | '# of timeslots: %i\n' % int(nb_timeslot) + \ 34 | '# of timeslots (available): %i\n' % f['date'].shape[0] + \ 35 | 'missing ratio of timeslots: %.1f%%\n' % ((1. - float(f['date'].shape[0] / nb_timeslot)) * 100) + \ 36 | 'max: %.3f, min: %.3f\n' % (mmax, mmin) + \ 37 | '=' * 5 + 'stat' + '=' * 5 38 | print(stat) 39 | 40 | """ 41 | def stat(fname): 42 | def get_nb_timeslot(f): 43 | s = f['date'][0] 44 | e = f['date'][-1] 45 | year, month, day = map(int, [s[:4], s[4:6], s[6:8]]) 46 | ts = time.strptime("%04i-%02i-%02i" % (year, month, day), "%Y-%m-%d") 47 | year, month, day = map(int, [e[:4], e[4:6], e[6:8]]) 48 | te = time.strptime("%04i-%02i-%02i" % (year, month, day), "%Y-%m-%d") 49 | nb_timeslot = (time.mktime(te) - time.mktime(ts)) / (0.5 * 3600) + 48 50 | ts_str, te_str = time.strftime("%Y-%m-%d", ts), time.strftime("%Y-%m-%d", te) 51 | return nb_timeslot, ts_str, te_str 52 | 53 | with h5py.File(fname) as f: 54 | nb_timeslot, ts_str, te_str = get_nb_timeslot(f) 55 | nb_day = int(nb_timeslot / 48) 56 | mmax = f['data'].value.max() 57 | mmin = f['data'].value.min() 58 | stat = '=' * 5 + 'stat' + '=' * 5 + '\n' + \ 59 | 'data shape: %s\n' % str(f['data'].shape) + \ 60 | 'date shape: %s\n' % str(f['date'].shape) + \ 61 | 'date range: [%s, %s]\n' % (str(f['date'][0][:8]), str(f['date'][-1][:8])) + \ 62 | '# of days: %i, from %s to %s\n' % (nb_day, ts_str, te_str) + \ 63 | '# of timeslots: %i\n' % int(nb_timeslot) + \ 64 | '# of timeslots (available): %i\n' % f['date'].shape[0] + \ 65 | 'missing ratio of timeslots: %.1f%%\n' % ((1. - float(f['date'].shape[0] / nb_timeslot)) * 100) + \ 66 | 'max: %.3f, min: %.3f\n' % (mmax, mmin) + \ 67 | '=' * 5 + 'stat' + '=' * 5 68 | print(stat) 69 | """ -------------------------------------------------------------------------------- /deepst/metrics.py: -------------------------------------------------------------------------------- 1 | # import numpy as np 2 | from keras import backend as K 3 | 4 | 5 | def mean_squared_error(y_true, y_pred): 6 | return K.mean(K.square(y_pred - y_true)) 7 | 8 | 9 | def root_mean_square_error(y_true, y_pred): 10 | return mean_squared_error(y_true, y_pred) ** 0.5 11 | 12 | 13 | def rmse(y_true, y_pred): 14 | return mean_squared_error(y_true, y_pred) ** 0.5 15 | 16 | # aliases 17 | mse = MSE = mean_squared_error 18 | # rmse = RMSE = root_mean_square_error 19 | 20 | 21 | def masked_mean_squared_error(y_true, y_pred): 22 | idx = (y_true > 1e-6).nonzero() 23 | return K.mean(K.square(y_pred[idx] - y_true[idx])) 24 | 25 | 26 | def masked_rmse(y_true, y_pred): 27 | return masked_mean_squared_error(y_true, y_pred) ** 0.5 28 | -------------------------------------------------------------------------------- /deepst/models/STConvolution.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from keras.models import Sequential 3 | from keras.layers.core import Dense 4 | from keras.layers.core import Reshape, Merge 5 | from keras.layers.core import Activation 6 | from keras.layers.advanced_activations import LeakyReLU 7 | from keras.layers.normalization import BatchNormalization 8 | from keras.layers.convolutional import ZeroPadding3D 9 | from keras.layers.convolutional import Convolution2D, Convolution3D 10 | 11 | 12 | def seqCNN(n_flow=4, seq_len=3, map_height=32, map_width=32): 13 | model = Sequential() 14 | model.add(Convolution2D(64, 3, 3, input_shape=(n_flow*seq_len, map_height, map_width), border_mode='same')) 15 | model.add(Activation('relu')) 16 | 17 | model.add(Convolution2D(128, 3, 3, border_mode='same')) 18 | model.add(Activation('relu')) 19 | 20 | model.add(Convolution2D(64, 3, 3, border_mode='same')) 21 | model.add(Activation('relu')) 22 | 23 | model.add(Convolution2D(n_flow, 3, 3, border_mode='same')) 24 | model.add(Activation('tanh')) 25 | return model 26 | 27 | 28 | def seqCNNBase(conf=(4, 3, 32, 32)): 29 | n_flow, seq_len, map_height, map_width = conf 30 | model = Sequential() 31 | model.add(Convolution2D(64, 3, 3, input_shape=(n_flow*seq_len, map_height, map_width), border_mode='same')) 32 | model.add(Activation('relu')) 33 | 34 | model.add(Convolution2D(128, 3, 3, border_mode='same')) 35 | model.add(Activation('relu')) 36 | 37 | model.add(Convolution2D(64, 3, 3, border_mode='same')) 38 | model.add(Activation('relu')) 39 | 40 | model.add(Convolution2D(n_flow, 3, 3, border_mode='same')) 41 | # model.add(Activation('tanh')) 42 | return model 43 | 44 | 45 | def seqCNNBaseLayer1(conf=(4, 3, 32, 32)): 46 | # 1 layer CNN for early fusion 47 | n_flow, seq_len, map_height, map_width = conf 48 | model = Sequential() 49 | model.add(Convolution2D(64, 3, 3, input_shape=(n_flow * seq_len, map_height, map_width), border_mode='same')) 50 | model.add(Activation('relu')) 51 | return model 52 | 53 | 54 | def seqCNN_CPT(c_conf=(4, 3, 32, 32), p_conf=(4, 3, 32, 32), t_conf=(4, 3, 32, 32)): 55 | ''' 56 | C - Temporal Closeness 57 | P - Period 58 | T - Trend 59 | conf = (nb_flow, seq_len, map_height, map_width) 60 | ''' 61 | model = Sequential() 62 | components = [] 63 | 64 | for conf in [c_conf, p_conf, t_conf]: 65 | if conf is not None: 66 | components.append(seqCNNBaseLayer1(conf)) 67 | nb_flow = conf[0] 68 | model.add(Merge(components, mode='concat', concat_axis=1)) # concat 69 | model.add(Convolution2D(64, 3, 3, border_mode='same')) 70 | model.add(Activation('relu')) 71 | 72 | model.add(Convolution2D(64, 3, 3, border_mode='same')) 73 | model.add(Activation('relu')) 74 | 75 | model.add(Convolution2D(nb_flow, 3, 3, border_mode='same')) 76 | model.add(Activation('tanh')) 77 | return model 78 | 79 | 80 | def seqCNNBaseLayer1_2(conf=(4, 3, 32, 32)): 81 | # 1 layer CNN for early fusion 82 | n_flow, seq_len, map_height, map_width = conf 83 | model = Sequential() 84 | model.add(Convolution2D(64, 3, 3, input_shape=(n_flow * seq_len, map_height, map_width), border_mode='same')) 85 | # model.add(Activation('relu')) 86 | return model 87 | 88 | 89 | def seqCNN_CPT2(c_conf=(4, 3, 32, 32), p_conf=(4, 3, 32, 32), t_conf=(4, 3, 32, 32)): 90 | ''' 91 | C - Temporal Closeness 92 | P - Period 93 | T - Trend 94 | conf = (nb_flow, seq_len, map_height, map_width) 95 | ''' 96 | model = Sequential() 97 | components = [] 98 | 99 | for conf in [c_conf, p_conf, t_conf]: 100 | if conf is not None: 101 | components.append(seqCNNBaseLayer1_2(conf)) 102 | nb_flow = conf[0] 103 | # model.add(Merge(components, mode='concat', concat_axis=1)) # concat 104 | if len(components) > 1: 105 | model.add(Merge(components, mode='sum')) 106 | else: 107 | model = components[0] 108 | model.add(Activation('relu')) 109 | model.add(Convolution2D(64, 3, 3, border_mode='same')) 110 | model.add(Activation('relu')) 111 | 112 | model.add(Convolution2D(64, 3, 3, border_mode='same')) 113 | model.add(Activation('relu')) 114 | 115 | model.add(Convolution2D(nb_flow, 3, 3, border_mode='same')) 116 | model.add(Activation('tanh')) 117 | return model 118 | 119 | 120 | def seqCNN_CPTM(c_conf=(4, 3, 32, 32), p_conf=(4, 3, 32, 32), t_conf=(4, 3, 32, 32), metadata_dim=None): 121 | ''' 122 | C - Temporal Closeness 123 | P - Period 124 | T - Trend 125 | conf = (nb_flow, seq_len, map_height, map_width) 126 | metadata_dim 127 | ''' 128 | model = Sequential() 129 | components = [] 130 | for conf in [c_conf, p_conf, t_conf]: 131 | if conf is not None: 132 | components.append(seqCNNBaseLayer1_2(conf)) 133 | # nb_flow = conf[0] 134 | nb_flow, _, map_height, map_width = conf 135 | # model.add(Merge(components, mode='concat', concat_axis=1)) # concat 136 | if len(components) > 1: 137 | model.add(Merge(components, mode='sum')) 138 | else: 139 | model = components[0] 140 | model.add(Activation('relu')) 141 | model.add(Convolution2D(64, 3, 3, border_mode='same')) 142 | model.add(Activation('relu')) 143 | 144 | model.add(Convolution2D(64, 3, 3, border_mode='same')) 145 | model.add(Activation('relu')) 146 | 147 | model.add(Convolution2D(nb_flow, 3, 3, border_mode='same')) 148 | 149 | metadata_processor = Sequential() 150 | # metadata_processor.add(Dense(output_dim=nb_flow * map_height * map_width, input_dim=metadata_dim)) 151 | metadata_processor.add(Dense(output_dim=10, input_dim=metadata_dim)) 152 | metadata_processor.add(Activation('relu')) 153 | metadata_processor.add(Dense(output_dim=nb_flow * map_height * map_width)) 154 | metadata_processor.add(Activation('relu')) 155 | metadata_processor.add(Reshape((nb_flow, map_height, map_width))) 156 | 157 | model_final=Sequential() 158 | model_final.add(Merge([model, metadata_processor], mode='sum')) 159 | model_final.add(Activation('tanh')) 160 | return model_final 161 | 162 | 163 | def lateFusion(metadata_dim, n_flow=2, seq_len=3, map_height=32, map_width=32): 164 | model=Sequential() 165 | mat_model=seqCNNBase(n_flow, seq_len, map_height, map_width) 166 | metadata_processor=Sequential() 167 | metadata_processor.add(Dense(output_dim=n_flow * map_height * map_width, input_dim=metadata_dim)) 168 | metadata_processor.add(Reshape((n_flow, map_height, map_width))) 169 | # metadata_processor.add(Activation('relu')) 170 | 171 | model=Sequential() 172 | model.add(Merge([mat_model, metadata_processor], mode='sum')) 173 | model.add(Activation('tanh')) 174 | return model 175 | 176 | 177 | def seqCNN_BN(n_flow=4, seq_len=3, map_height=32, map_width=32): 178 | model=Sequential() 179 | model.add(Convolution2D(64, 3, 3, input_shape=(n_flow*seq_len, map_height, map_width), border_mode='same')) 180 | model.add(LeakyReLU(0.2)) 181 | model.add(BatchNormalization()) 182 | 183 | model.add(Convolution2D(128, 3, 3, border_mode='same')) 184 | model.add(LeakyReLU(0.2)) 185 | model.add(BatchNormalization()) 186 | 187 | model.add(Convolution2D(64, 3, 3, border_mode='same')) 188 | model.add(LeakyReLU(0.2)) 189 | model.add(BatchNormalization()) 190 | 191 | model.add(Convolution2D(n_flow, 3, 3, border_mode='same')) 192 | model.add(Activation('tanh')) 193 | return model 194 | 195 | 196 | def seqCNN_LReLU(n_flow=4, seq_len=3, map_height=32, map_width=32): 197 | model=Sequential() 198 | model.add(Convolution2D(64, 3, 3, input_shape=(n_flow*seq_len, map_height, map_width), border_mode='same')) 199 | model.add(LeakyReLU(0.2)) 200 | # model.add(BatchNormalization()) 201 | 202 | model.add(Convolution2D(128, 3, 3, border_mode='same')) 203 | model.add(LeakyReLU(0.2)) 204 | # model.add(BatchNormalization()) 205 | 206 | model.add(Convolution2D(64, 3, 3, border_mode='same')) 207 | model.add(LeakyReLU(0.2)) 208 | # model.add(BatchNormalization()) 209 | 210 | model.add(Convolution2D(n_flow, 3, 3, border_mode='same')) 211 | model.add(Activation('tanh')) 212 | return model 213 | 214 | 215 | def seq3DCNN(n_flow=4, seq_len=3, map_height=32, map_width=32): 216 | model=Sequential() 217 | # model.add(ZeroPadding3D(padding=(0, 1, 1), input_shape=(n_flow, seq_len, map_height, map_width))) 218 | # model.add(Convolution3D(64, 2, 3, 3, border_mode='valid')) 219 | model.add(Convolution3D(64, 2, 3, 3, border_mode='same', input_shape=(n_flow, seq_len, map_height, map_width))) 220 | model.add(Activation('relu')) 221 | 222 | model.add(Convolution3D(128, 2, 3, 3, border_mode='same')) 223 | model.add(Activation('relu')) 224 | 225 | model.add(Convolution3D(64, 2, 3, 3, border_mode='same')) 226 | model.add(Activation('relu')) 227 | 228 | model.add(ZeroPadding3D(padding=(0, 1, 1))) 229 | model.add(Convolution3D(n_flow, seq_len, 3, 3, border_mode='valid')) 230 | # model.add(Convolution3D(n_flow, seq_len-2, 3, 3, border_mode='same')) 231 | model.add(Activation('tanh')) 232 | return model -------------------------------------------------------------------------------- /deepst/models/STResNet.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ST-ResNet: Deep Spatio-temporal Residual Networks 3 | ''' 4 | 5 | from __future__ import print_function 6 | from keras.layers import ( 7 | Input, 8 | Activation, 9 | merge, 10 | Dense, 11 | Reshape 12 | ) 13 | from keras.layers.convolutional import Convolution2D 14 | from keras.layers.normalization import BatchNormalization 15 | from keras.models import Model 16 | #from keras.utils.visualize_util import plot 17 | 18 | 19 | def _shortcut(input, residual): 20 | return merge([input, residual], mode='sum') 21 | 22 | 23 | def _bn_relu_conv(nb_filter, nb_row, nb_col, subsample=(1, 1), bn=False): 24 | def f(input): 25 | if bn: 26 | input = BatchNormalization(mode=0, axis=1)(input) 27 | activation = Activation('relu')(input) 28 | return Convolution2D(nb_filter=nb_filter, nb_row=nb_row, nb_col=nb_col, subsample=subsample, border_mode="same")(activation) 29 | return f 30 | 31 | 32 | def _residual_unit(nb_filter, init_subsample=(1, 1)): 33 | def f(input): 34 | residual = _bn_relu_conv(nb_filter, 3, 3)(input) 35 | residual = _bn_relu_conv(nb_filter, 3, 3)(residual) 36 | return _shortcut(input, residual) 37 | return f 38 | 39 | 40 | def ResUnits(residual_unit, nb_filter, repetations=1): 41 | def f(input): 42 | for i in range(repetations): 43 | init_subsample = (1, 1) 44 | input = residual_unit(nb_filter=nb_filter, 45 | init_subsample=init_subsample)(input) 46 | return input 47 | return f 48 | 49 | 50 | def stresnet(c_conf=(3, 2, 32, 32), p_conf=(3, 2, 32, 32), t_conf=(3, 2, 32, 32), external_dim=8, nb_residual_unit=3): 51 | ''' 52 | C - Temporal Closeness 53 | P - Period 54 | T - Trend 55 | conf = (len_seq, nb_flow, map_height, map_width) 56 | external_dim 57 | ''' 58 | 59 | # main input 60 | main_inputs = [] 61 | outputs = [] 62 | for conf in [c_conf, p_conf, t_conf]: 63 | if conf is not None: 64 | len_seq, nb_flow, map_height, map_width = conf 65 | input = Input(shape=(nb_flow * len_seq, map_height, map_width)) 66 | main_inputs.append(input) 67 | # Conv1 68 | conv1 = Convolution2D( 69 | nb_filter=64, nb_row=3, nb_col=3, border_mode="same")(input) 70 | # [nb_residual_unit] Residual Units 71 | residual_output = ResUnits(_residual_unit, nb_filter=64, 72 | repetations=nb_residual_unit)(conv1) 73 | # Conv2 74 | activation = Activation('relu')(residual_output) 75 | conv2 = Convolution2D( 76 | nb_filter=nb_flow, nb_row=3, nb_col=3, border_mode="same")(activation) 77 | outputs.append(conv2) 78 | 79 | # parameter-matrix-based fusion 80 | if len(outputs) == 1: 81 | main_output = outputs[0] 82 | else: 83 | from .iLayer import iLayer 84 | new_outputs = [] 85 | for output in outputs: 86 | new_outputs.append(iLayer()(output)) 87 | main_output = merge(new_outputs, mode='sum') 88 | 89 | # fusing with external component 90 | if external_dim != None and external_dim > 0: 91 | # external input 92 | external_input = Input(shape=(external_dim,)) 93 | main_inputs.append(external_input) 94 | embedding = Dense(output_dim=10)(external_input) 95 | embedding = Activation('relu')(embedding) 96 | h1 = Dense(output_dim=nb_flow * map_height * map_width)(embedding) 97 | activation = Activation('relu')(h1) 98 | external_output = Reshape((nb_flow, map_height, map_width))(activation) 99 | main_output = merge([main_output, external_output], mode='sum') 100 | else: 101 | print('external_dim:', external_dim) 102 | 103 | main_output = Activation('tanh')(main_output) 104 | model = Model(input=main_inputs, output=main_output) 105 | 106 | return model 107 | 108 | if __name__ == '__main__': 109 | model = stresnet(external_dim=28, nb_residual_unit=12) 110 | #plot(model, to_file='ST-ResNet.png', show_shapes=True) 111 | model.summary() 112 | -------------------------------------------------------------------------------- /deepst/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amirkhango/DeepST/7ba669013bbafd5f413ef50d5d76094c3a68efd6/deepst/models/__init__.py -------------------------------------------------------------------------------- /deepst/models/iLayer.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from keras.engine.topology import Layer 3 | # from keras.layers import Dense 4 | import numpy as np 5 | 6 | 7 | class iLayer(Layer): 8 | def __init__(self, **kwargs): 9 | # self.output_dim = output_dim 10 | super(iLayer, self).__init__(**kwargs) 11 | 12 | def build(self, input_shape): 13 | initial_weight_value = np.random.random(input_shape[1:]) 14 | self.W = K.variable(initial_weight_value) 15 | self.trainable_weights = [self.W] 16 | 17 | def call(self, x, mask=None): 18 | return x * self.W 19 | 20 | def get_output_shape_for(self, input_shape): 21 | return input_shape 22 | -------------------------------------------------------------------------------- /deepst/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from copy import copy 4 | import time 5 | # from temporal_contrast_normalization import TemporalConstrastNormalization 6 | # from personal_temporal_contrast_normalization import PersonalTemporalConstrastNormalization 7 | from .minmax_normalization import MinMaxNormalization 8 | from ..utils import string2timestamp 9 | 10 | 11 | def timestamp2vec(timestamps): 12 | # tm_wday range [0, 6], Monday is 0 13 | # vec = [time.strptime(str(t[:8], encoding='utf-8'), '%Y%m%d').tm_wday for t in timestamps] # python3 14 | vec = [time.strptime(t[:8], '%Y%m%d').tm_wday for t in timestamps] # python2 15 | ret = [] 16 | for i in vec: 17 | v = [0 for _ in range(7)] 18 | v[i] = 1 19 | if i >= 5: 20 | v.append(0) # weekend 21 | else: 22 | v.append(1) # weekday 23 | ret.append(v) 24 | return np.asarray(ret) 25 | 26 | 27 | def remove_incomplete_days(data, timestamps, T=48): 28 | # remove a certain day which has not 48 timestamps 29 | days = [] # available days: some day only contain some seqs 30 | days_incomplete = [] 31 | i = 0 32 | while i < len(timestamps): 33 | if int(timestamps[i][8:]) != 1: 34 | i += 1 35 | elif i+T-1 < len(timestamps) and int(timestamps[i+T-1][8:]) == T: 36 | days.append(timestamps[i][:8]) 37 | i += T 38 | else: 39 | days_incomplete.append(timestamps[i][:8]) 40 | i += 1 41 | print("incomplete days: ", days_incomplete) 42 | days = set(days) 43 | idx = [] 44 | for i, t in enumerate(timestamps): 45 | if t[:8] in days: 46 | idx.append(i) 47 | 48 | data = data[idx] 49 | timestamps = [timestamps[i] for i in idx] 50 | return data, timestamps 51 | 52 | 53 | def split_by_time(data, timestamps, split_timestamp): 54 | # divide data into two subsets: 55 | # e.g., Train: ~ 2015.06.21 & Test: 2015.06.22 ~ 2015.06.28 56 | assert(len(data) == len(timestamps)) 57 | assert(split_timestamp in set(timestamps)) 58 | 59 | data_1 = [] 60 | timestamps_1 = [] 61 | data_2 = [] 62 | timestamps_2 = [] 63 | switch = False 64 | for t, d in zip(timestamps, data): 65 | if split_timestamp == t: 66 | switch = True 67 | if switch is False: 68 | data_1.append(d) 69 | timestamps_1.append(t) 70 | else: 71 | data_2.append(d) 72 | timestamps_2.append(t) 73 | return (np.asarray(data_1), timestamps_1), (np.asarray(data_2), timestamps_2) 74 | 75 | 76 | def timeseries2seqs(data, timestamps, length=3, T=48): 77 | raw_ts = copy(timestamps) 78 | if type(timestamps[0]) != pd.Timestamp: 79 | timestamps = string2timestamp(timestamps, T=T) 80 | 81 | offset = pd.DateOffset(minutes=24 * 60 // T) 82 | 83 | breakpoints = [0] 84 | for i in range(1, len(timestamps)): 85 | if timestamps[i-1] + offset != timestamps[i]: 86 | print(timestamps[i-1], timestamps[i], raw_ts[i-1], raw_ts[i]) 87 | breakpoints.append(i) 88 | breakpoints.append(len(timestamps)) 89 | X = [] 90 | Y = [] 91 | for b in range(1, len(breakpoints)): 92 | print('breakpoints: ', breakpoints[b-1], breakpoints[b]) 93 | idx = range(breakpoints[b-1], breakpoints[b]) 94 | for i in range(len(idx) - length): 95 | x = np.vstack(data[idx[i:i+length]]) 96 | y = data[idx[i+length]] 97 | X.append(x) 98 | Y.append(y) 99 | X = np.asarray(X) 100 | Y = np.asarray(Y) 101 | print("X shape: ", X.shape, "Y shape:", Y.shape) 102 | return X, Y 103 | 104 | def timeseries2seqs_meta(data, timestamps, length=3, T=48): 105 | raw_ts = copy(timestamps) 106 | if type(timestamps[0]) != pd.Timestamp: 107 | timestamps = string2timestamp(timestamps, T=T) 108 | 109 | offset = pd.DateOffset(minutes=24 * 60 // T) 110 | 111 | breakpoints = [0] 112 | for i in range(1, len(timestamps)): 113 | if timestamps[i-1] + offset != timestamps[i]: 114 | print(timestamps[i-1], timestamps[i], raw_ts[i-1], raw_ts[i]) 115 | breakpoints.append(i) 116 | breakpoints.append(len(timestamps)) 117 | X = [] 118 | Y = [] 119 | avail_timestamps = [] 120 | for b in range(1, len(breakpoints)): 121 | print('breakpoints: ', breakpoints[b-1], breakpoints[b]) 122 | idx = range(breakpoints[b-1], breakpoints[b]) 123 | for i in range(len(idx) - length): 124 | avail_timestamps.append(raw_ts[idx[i+length]]) 125 | x = np.vstack(data[idx[i:i+length]]) 126 | y = data[idx[i+length]] 127 | X.append(x) 128 | Y.append(y) 129 | X = np.asarray(X) 130 | Y = np.asarray(Y) 131 | print("X shape: ", X.shape, "Y shape:", Y.shape) 132 | return X, Y, avail_timestamps 133 | 134 | 135 | def timeseries2seqs_peroid_trend(data, timestamps, length=3, T=48, peroid=pd.DateOffset(days=7), peroid_len=2): 136 | raw_ts = copy(timestamps) 137 | if type(timestamps[0]) != pd.Timestamp: 138 | timestamps = string2timestamp(timestamps, T=T) 139 | 140 | # timestamps index 141 | timestamp_idx = dict() 142 | for i, t in enumerate(timestamps): 143 | timestamp_idx[t] = i 144 | 145 | offset = pd.DateOffset(minutes=24 * 60 // T) 146 | 147 | breakpoints = [0] 148 | for i in range(1, len(timestamps)): 149 | if timestamps[i-1] + offset != timestamps[i]: 150 | print(timestamps[i-1], timestamps[i], raw_ts[i-1], raw_ts[i]) 151 | breakpoints.append(i) 152 | breakpoints.append(len(timestamps)) 153 | X = [] 154 | Y = [] 155 | for b in range(1, len(breakpoints)): 156 | print('breakpoints: ', breakpoints[b-1], breakpoints[b]) 157 | idx = range(breakpoints[b-1], breakpoints[b]) 158 | for i in range(len(idx) - length): 159 | # period 160 | target_timestamp = timestamps[i+length] 161 | 162 | legal_idx = [] 163 | for pi in range(1, 1+peroid_len): 164 | if target_timestamp - peroid * pi not in timestamp_idx: 165 | break 166 | legal_idx.append(timestamp_idx[target_timestamp - peroid * pi]) 167 | # print("len: ", len(legal_idx), peroid_len) 168 | if len(legal_idx) != peroid_len: 169 | continue 170 | 171 | legal_idx += idx[i:i+length] 172 | 173 | # trend 174 | x = np.vstack(data[legal_idx]) 175 | y = data[idx[i+length]] 176 | X.append(x) 177 | Y.append(y) 178 | X = np.asarray(X) 179 | Y = np.asarray(Y) 180 | print("X shape: ", X.shape, "Y shape:", Y.shape) 181 | return X, Y 182 | 183 | 184 | def timeseries2seqs_3D(data, timestamps, length=3, T=48): 185 | raw_ts = copy(timestamps) 186 | if type(timestamps[0]) != pd.Timestamp: 187 | timestamps = string2timestamp(timestamps, T=T) 188 | 189 | offset = pd.DateOffset(minutes=24 * 60 // T) 190 | 191 | breakpoints = [0] 192 | for i in range(1, len(timestamps)): 193 | if timestamps[i-1] + offset != timestamps[i]: 194 | print(timestamps[i-1], timestamps[i], raw_ts[i-1], raw_ts[i]) 195 | breakpoints.append(i) 196 | breakpoints.append(len(timestamps)) 197 | X = [] 198 | Y = [] 199 | for b in range(1, len(breakpoints)): 200 | print('breakpoints: ', breakpoints[b-1], breakpoints[b]) 201 | idx = range(breakpoints[b-1], breakpoints[b]) 202 | for i in range(len(idx) - length): 203 | x = data[idx[i:i+length]].reshape(-1, length, 32, 32) 204 | y = np.asarray([data[idx[i+length]]]).reshape(-1, 1, 32, 32) 205 | X.append(x) 206 | Y.append(y) 207 | X = np.asarray(X) 208 | Y = np.asarray(Y) 209 | print("X shape: ", X.shape, "Y shape:", Y.shape) 210 | return X, Y 211 | 212 | 213 | def bug_timeseries2seqs(data, timestamps, length=3, T=48): 214 | # have a bug 215 | if type(timestamps[0]) != pd.Timestamp: 216 | timestamps = string2timestamp(timestamps, T=T) 217 | 218 | offset = pd.DateOffset(minutes=24 * 60 // T) 219 | 220 | breakpoints = [0] 221 | for i in range(1, len(timestamps)): 222 | if timestamps[i-1] + offset != timestamps[i]: 223 | breakpoints.append(i) 224 | X = [] 225 | Y = [] 226 | for b in range(1, len(breakpoints)): 227 | print('breakpoints: ', breakpoints[b-1], breakpoints[b]) 228 | idx = range(breakpoints[b-1], breakpoints[b]) 229 | for i in range(len(idx) - 3): 230 | x = np.vstack(data[idx[i:i+3]]) 231 | y = data[idx[i+3]] 232 | X.append(x) 233 | Y.append(y) 234 | X = np.asarray(X) 235 | Y = np.asarray(Y) 236 | print("X shape: ", X.shape, "Y shape:", Y.shape) 237 | return X, Y 238 | -------------------------------------------------------------------------------- /deepst/preprocessing/minmax_normalization.py: -------------------------------------------------------------------------------- 1 | """ 2 | MinMaxNormalization 3 | """ 4 | from __future__ import print_function 5 | import numpy as np 6 | np.random.seed(1337) # for reproducibility 7 | 8 | 9 | class MinMaxNormalization(object): 10 | '''MinMax Normalization --> [-1, 1] 11 | x = (x - min) / (max - min). 12 | x = x * 2 - 1 13 | ''' 14 | 15 | def __init__(self): 16 | pass 17 | 18 | def fit(self, X): 19 | self._min = X.min() 20 | self._max = X.max() 21 | print("min:", self._min, "max:", self._max) 22 | 23 | def transform(self, X): 24 | X = 1. * (X - self._min) / (self._max - self._min) 25 | X = X * 2. - 1. 26 | return X 27 | 28 | def fit_transform(self, X): 29 | self.fit(X) 30 | return self.transform(X) 31 | 32 | def inverse_transform(self, X): 33 | X = (X + 1.) / 2. 34 | X = 1. * X * (self._max - self._min) + self._min 35 | return X 36 | 37 | 38 | class MinMaxNormalization_01(object): 39 | '''MinMax Normalization --> [0, 1] 40 | x = (x - min) / (max - min). 41 | ''' 42 | 43 | def __init__(self): 44 | pass 45 | 46 | def fit(self, X): 47 | self._min = X.min() 48 | self._max = X.max() 49 | print("min:", self._min, "max:", self._max) 50 | 51 | def transform(self, X): 52 | X = 1. * (X - self._min) / (self._max - self._min) 53 | return X 54 | 55 | def fit_transform(self, X): 56 | self.fit(X) 57 | return self.transform(X) 58 | 59 | def inverse_transform(self, X): 60 | X = 1. * X * (self._max - self._min) + self._min 61 | return X 62 | -------------------------------------------------------------------------------- /deepst/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import pandas as pd 3 | from datetime import datetime, timedelta 4 | import time 5 | import os 6 | 7 | 8 | def timestamp_str_new(cur_timestampes, T=48): 9 | os.environ['TZ'] = 'Asia/Shanghai' 10 | # print cur_timestampes 11 | if '-' in cur_timestampes[0]: 12 | return cur_timestampes 13 | ret = [] 14 | for v in cur_timestampes: 15 | '''TODO 16 | Bug here 17 | ''' 18 | cur_sec = time.mktime(time.strptime("%04i-%02i-%02i" % (int(v[:4]), int(v[4:6]), int(v[6:8])), "%Y-%m-%d")) + (int(v[8:]) * 24. * 60 * 60 // T) 19 | curr = time.localtime(cur_sec) 20 | if v == "20151101288" or v == "2015110124": 21 | print(v, time.strftime("%Y-%m-%d-%H-%M", curr), time.localtime(cur_sec), time.localtime(cur_sec - (int(v[8:]) * 24. * 60 * 60 // T)), time.localtime(cur_sec - (int(v[8:]) * 24. * 60 * 60 // T) + 3600 * 25)) 22 | ret.append(time.strftime("%Y-%m-%d-%H-%M", curr)) 23 | return ret 24 | 25 | 26 | def string2timestamp_future(strings, T=48): 27 | strings = timestamp_str_new(strings, T) 28 | timestamps = [] 29 | for v in strings: 30 | year, month, day, hour, tm_min = [int(z) for z in v.split('-')] 31 | timestamps.append(pd.Timestamp(datetime(year, month, day, hour, tm_min))) 32 | 33 | return timestamps 34 | 35 | 36 | def string2timestamp(strings, T=48): 37 | timestamps = [] 38 | 39 | time_per_slot = 24.0 / T 40 | num_per_T = T // 24 41 | for t in strings: 42 | year, month, day, slot = int(t[:4]), int(t[4:6]), int(t[6:8]), int(t[8:])-1 43 | timestamps.append(pd.Timestamp(datetime(year, month, day, hour=int(slot * time_per_slot), minute=(slot % num_per_T) * int(60.0 * time_per_slot)))) 44 | 45 | return timestamps 46 | 47 | 48 | def timestamp2string(timestamps, T=48): 49 | # timestamps = timestamp_str_new(timestamps) 50 | num_per_T = T // 24 51 | return ["%s%02i" % (ts.strftime('%Y%m%d'), 52 | int(1+ts.to_datetime().hour*num_per_T+ts.to_datetime().minute/(60 // num_per_T))) for ts in timestamps] 53 | # int(1+ts.to_datetime().hour*2+ts.to_datetime().minute/30)) for ts in timestamps] 54 | -------------------------------------------------------------------------------- /deepst/utils/eval.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import print_function 3 | import numpy as np 4 | # np.random.seed(1337) # for reproducibility 5 | 6 | 7 | def rmse(Y_true, Y_pred): 8 | # https://www.kaggle.com/wiki/RootMeanSquaredError 9 | from sklearn.metrics import mean_squared_error 10 | print('shape:', Y_true.shape, Y_pred.shape) 11 | print("===RMSE===") 12 | # in 13 | RMSE = mean_squared_error(Y_true[:, 0].flatten(), Y_pred[:, 0].flatten())**0.5 14 | print('inflow: ', RMSE) 15 | # out 16 | if Y_true.shape[1] > 1: 17 | RMSE = mean_squared_error(Y_true[:, 1].flatten(), Y_pred[:, 1].flatten())**0.5 18 | print('outflow: ', RMSE) 19 | # new 20 | if Y_true.shape[1] > 2: 21 | RMSE = mean_squared_error(Y_true[:, 2].flatten(), Y_pred[:, 2].flatten())**0.5 22 | print('newflow: ', RMSE) 23 | # end 24 | if Y_true.shape[1] > 3: 25 | RMSE = mean_squared_error(Y_true[:, 3].flatten(), Y_pred[:, 3].flatten())**0.5 26 | print('endflow: ', RMSE) 27 | 28 | RMSE = mean_squared_error(Y_true.flatten(), Y_pred.flatten())**0.5 29 | print("total rmse: ", RMSE) 30 | print("===RMSE===") 31 | return RMSE 32 | 33 | 34 | def mean_absolute_percentage_error(y_true, y_pred): 35 | idx = np.nonzero(y_true) 36 | return np.mean(np.abs((y_true[idx] - y_pred[idx]) / y_true[idx])) * 100 37 | 38 | 39 | def mape(Y_true, Y_pred): 40 | print("===MAPE===") 41 | # in 42 | MAPE = mean_absolute_percentage_error(Y_true[:, 0].flatten(), Y_pred[:, 0].flatten()) 43 | print("inflow: ", MAPE) 44 | # out 45 | MAPE = mean_absolute_percentage_error(Y_true[:, 1].flatten(), Y_pred[:, 1].flatten()) 46 | print("outflow: ", MAPE) 47 | # new 48 | MAPE = mean_absolute_percentage_error(Y_true[:, 2].flatten(), Y_pred[:, 2].flatten()) 49 | print("newflow: ", MAPE) 50 | # end 51 | MAPE = mean_absolute_percentage_error(Y_true[:, 3].flatten(), Y_pred[:, 3].flatten()) 52 | print("endflow: ", MAPE) 53 | MAPE = mean_absolute_percentage_error(Y_true.flatten(), Y_pred.flatten()) 54 | print("total mape: ", MAPE) 55 | print("===MAPE===") 56 | return MAPE 57 | -------------------------------------------------------------------------------- /deepst/utils/evalMultiStepAhead.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import sys 3 | from deepst_flow.models.gan import generator_model 4 | from deepst_flow.datasets import load_stdata 5 | from deepst_flow.preprocessing import MinMaxNormalization 6 | from deepst_flow.preprocessing import remove_incomplete_days 7 | # import h5py 8 | import numpy as np 9 | from keras.optimizers import Adam 10 | import os 11 | # from keras.callbacks import EarlyStopping 12 | import cPickle as pickle 13 | import time 14 | import pandas as pd 15 | from copy import copy 16 | from deepst_flow.config import Config 17 | from deepst_flow.datasets.STMatrix import STMatrix 18 | from deepst_flow.utils.eval import rmse 19 | np.random.seed(1337) # for reproducibility 20 | DATAPATH = Config().DATAPATH 21 | print(DATAPATH) 22 | 23 | 24 | def period_trend(period=1, trend=1): 25 | model_name = sys.argv[1] 26 | steps = 24 27 | Period = 7 28 | 29 | T = 48 # lenofday 30 | len_seq = 3 31 | nb_flow = 4 32 | nb_days = 120 33 | # divide data into two subsets: 34 | # Train: ~ 2015.06.21 & Test: 2015.06.22 ~ 2015.06.28 35 | len_train = T * (nb_days - 7) 36 | len_test = T * 7 37 | 38 | data, timestamps = load_stdata(os.path.join(DATAPATH, 'traffic_flow_bj15_nomissing.h5')) 39 | print(timestamps) 40 | # remove a certain day which has not 48 timestamps 41 | data, timestamps = remove_incomplete_days(data, timestamps, T) 42 | 43 | # minmax_scale 44 | data_train = data[:len_train] 45 | mmn = MinMaxNormalization() 46 | mmn.fit(data_train) 47 | data = mmn.transform(data) 48 | 49 | st = STMatrix(data, timestamps, T) 50 | 51 | # save TCN and MMS 52 | fpkl = open('preprocessing.pkl', 'wb') 53 | for obj in [mmn]: # [tcn, mmn]: 54 | pickle.dump(obj, fpkl) 55 | fpkl.close() 56 | 57 | if period == 1 and trend == 1: 58 | depends = [1, 2, 3, Period*T, Period*T+1, Period*T+2, Period*T+3] 59 | len_close = 3 60 | elif period == 1: 61 | depends = [1] + [Period * T * j for j in xrange(1, len_seq+1)] 62 | len_close = 1 63 | elif trend == 1: 64 | depends = range(1, 1+len_seq) 65 | len_close = 3 66 | else: 67 | depends = [1] 68 | len_close = 1 69 | # else: 70 | # print("unknown args") 71 | # sys.exit(-1) 72 | 73 | generator = generator_model(nb_flow, len(depends), 32, 32) 74 | adam = Adam() 75 | generator.compile(loss='mean_absolute_error', optimizer=adam) 76 | generator.load_weights(model_name) 77 | 78 | # instance-based dataset --> sequences with format as (X, Y) where X is a sequence of images and Y is an image. 79 | offset_frame = pd.DateOffset(minutes=24 * 60 // T) 80 | Y_test = st.data[-(len_test+steps-1):] 81 | Y_pd_timestamps = st.pd_timestamps[-(len_test+steps-1):] 82 | 83 | X_test = [] 84 | for pd_timestamp in Y_pd_timestamps: 85 | x = [st.get_matrix(pd_timestamp - j * offset_frame) for j in depends] 86 | X_test.append(np.vstack(x)) 87 | X_test = np.asarray(X_test) 88 | 89 | Y_true = mmn.inverse_transform(Y_test[-len_test:]) 90 | 91 | Y_hats = [] 92 | for k in xrange(1, steps+1): 93 | print("\n\n==%d-step rmse==" % k) 94 | ts = time.time() 95 | Y_hat = generator.predict(X_test) 96 | Y_hats.append(copy(Y_hat)) 97 | print('Y_hat shape', Y_hat.shape, 'X_test shape:', X_test.shape) 98 | # eval 99 | Y_pred = mmn.inverse_transform(Y_hat[-len_test:]) 100 | rmse(Y_true, Y_pred) 101 | X_test_hat = copy(X_test[1:]) 102 | for j in xrange(1, min(k, len_close) + 1): 103 | # Y^\hat _t replace 104 | X_test_hat[:, nb_flow*(j-1):nb_flow*j] = Y_hats[-j][:-j] 105 | 106 | X_test = copy(X_test_hat) 107 | print("\nelapsed time (eval): ", time.time() - ts) 108 | 109 | 110 | def period_trend_closeness(len_closeness=3, len_trend=3, TrendInterval=7, len_period=3, PeriodInterval=1): 111 | print("start: period_trend_closeness") 112 | model_name = sys.argv[1] 113 | steps = 24 114 | # Period = 7 115 | 116 | T = 48 # lenofday 117 | # len_seq = 3 118 | nb_flow = 4 119 | nb_days = 120 120 | # divide data into two subsets: 121 | # Train: ~ 2015.06.21 & Test: 2015.06.22 ~ 2015.06.28 122 | len_train = T * (nb_days - 7) 123 | len_test = T * 7 124 | 125 | data, timestamps = load_stdata(os.path.join(DATAPATH, 'traffic_flow_bj15_nomissing.h5')) 126 | print(timestamps) 127 | # remove a certain day which has not 48 timestamps 128 | data, timestamps = remove_incomplete_days(data, timestamps, T) 129 | 130 | # minmax_scale 131 | data_train = data[:len_train] 132 | mmn = MinMaxNormalization() 133 | mmn.fit(data_train) 134 | data = mmn.transform(data) 135 | 136 | st = STMatrix(data, timestamps, T) 137 | 138 | # save TCN and MMS 139 | fpkl = open('preprocessing.pkl', 'wb') 140 | for obj in [mmn]: # [tcn, mmn]: 141 | pickle.dump(obj, fpkl) 142 | fpkl.close() 143 | 144 | depends = range(1, len_closeness+1) + \ 145 | [PeriodInterval * T * j for j in xrange(1, len_period+1)] + \ 146 | [TrendInterval * T * j for j in xrange(1, len_trend+1)] 147 | 148 | generator = generator_model(nb_flow, len(depends), 32, 32) 149 | adam = Adam() 150 | generator.compile(loss='mean_absolute_error', optimizer=adam) 151 | generator.load_weights(model_name) 152 | 153 | # instance-based dataset --> sequences with format as (X, Y) where X is a sequence of images and Y is an image. 154 | offset_frame = pd.DateOffset(minutes=24 * 60 // T) 155 | Y_test = st.data[-(len_test+steps-1):] 156 | Y_pd_timestamps = st.pd_timestamps[-(len_test+steps-1):] 157 | 158 | X_test = [] 159 | for pd_timestamp in Y_pd_timestamps: 160 | x = [st.get_matrix(pd_timestamp - j * offset_frame) for j in depends] 161 | X_test.append(np.vstack(x)) 162 | X_test = np.asarray(X_test) 163 | 164 | Y_true = mmn.inverse_transform(Y_test[-len_test:]) 165 | 166 | Y_hats = [] 167 | for k in xrange(1, steps+1): 168 | print("\n\n==%d-step rmse==" % k) 169 | ts = time.time() 170 | Y_hat = generator.predict(X_test) 171 | Y_hats.append(copy(Y_hat)) 172 | print('Y_hat shape', Y_hat.shape, 'X_test shape:', X_test.shape) 173 | # eval 174 | Y_pred = mmn.inverse_transform(Y_hat[-len_test:]) 175 | rmse(Y_true, Y_pred) 176 | X_test_hat = copy(X_test[1:]) 177 | for j in xrange(1, min(k, len_closeness) + 1): 178 | # Y^\hat _t replace 179 | X_test_hat[:, nb_flow*(j-1):nb_flow*j] = Y_hats[-j][:-j] 180 | 181 | X_test = copy(X_test_hat) 182 | print("\nelapsed time (eval): ", time.time() - ts) 183 | 184 | if __name__ == '__main__': 185 | if int(sys.argv[2]) == 0: # period & trend 186 | period_trend(1, 1) 187 | elif int(sys.argv[2]) == 1: # period 188 | period_trend(1, 0) 189 | elif int(sys.argv[2]) == 2: # trend 190 | period_trend(0, 1) 191 | elif int(sys.argv[2]) == 3: 192 | period_trend(0, 0) 193 | else: 194 | period_trend_closeness() 195 | # print("unknown args") 196 | # sys.exit(-1) 197 | -------------------------------------------------------------------------------- /deepst/utils/evalMultiStepAhead4SeqModel.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import sys 3 | from deepst_flow.models.gan import generator_model 4 | from deepst_flow.datasets import load_stdata 5 | from deepst_flow.preprocessing import MinMaxNormalization 6 | from deepst_flow.preprocessing import remove_incomplete_days 7 | # import h5py 8 | import numpy as np 9 | from keras.optimizers import Adam 10 | import os 11 | # from keras.callbacks import EarlyStopping 12 | import cPickle as pickle 13 | import time 14 | import pandas as pd 15 | from copy import copy 16 | from deepst_flow.config import Config 17 | from deepst_flow.datasets.STMatrix import STMatrix 18 | from deepst_flow.utils.eval import rmse 19 | from deepst_flow.models.rnn import rnn_model 20 | np.random.seed(1337) # for reproducibility 21 | DATAPATH = Config().DATAPATH 22 | print(DATAPATH) 23 | 24 | 25 | def seq_model(len_seq=3): 26 | model_name = sys.argv[1] 27 | steps = 24 28 | # Period = 7 29 | 30 | T = 48 # lenofday 31 | nb_flow = 4 32 | nb_days = 120 33 | # divide data into two subsets: 34 | # Train: ~ 2015.06.21 & Test: 2015.06.22 ~ 2015.06.28 35 | len_train = T * (nb_days - 7) 36 | len_test = T * 7 37 | 38 | data, timestamps = load_stdata(os.path.join(DATAPATH, 'traffic_flow_bj15_nomissing.h5')) 39 | print(timestamps) 40 | # remove a certain day which has not 48 timestamps 41 | data, timestamps = remove_incomplete_days(data, timestamps, T) 42 | data = data.reshape(data.shape[0], -1) 43 | 44 | # minmax_scale 45 | data_train = data[:len_train] 46 | mmn = MinMaxNormalization() 47 | mmn.fit(data_train) 48 | data = mmn.transform(data) 49 | 50 | st = STMatrix(data, timestamps, T) 51 | 52 | # save TCN and MMS 53 | fpkl = open('preprocessing.pkl', 'wb') 54 | for obj in [mmn]: # [tcn, mmn]: 55 | pickle.dump(obj, fpkl) 56 | fpkl.close() 57 | 58 | depends = range(1, 1+len_seq) 59 | generator = rnn_model(nb_flow, len(depends), 32, 32) 60 | adam = Adam() 61 | generator.compile(loss='mean_absolute_error', optimizer=adam) 62 | generator.load_weights(model_name) 63 | 64 | # instance-based dataset --> sequences with format as (X, Y) where X is a sequence of images and Y is an image. 65 | offset_frame = pd.DateOffset(minutes=24 * 60 // T) 66 | Y_test = st.data[-(len_test+steps-1):] 67 | Y_pd_timestamps = st.pd_timestamps[-(len_test+steps-1):] 68 | 69 | X_test = [] 70 | for pd_timestamp in Y_pd_timestamps: 71 | x = [st.get_matrix(pd_timestamp - j * offset_frame) for j in depends] 72 | X_test.append(np.vstack(x)) 73 | X_test = np.asarray(X_test) 74 | 75 | Y_true = mmn.inverse_transform(Y_test[-len_test:]) 76 | Y_true = Y_true.reshape(len(Y_true), nb_flow, -1) 77 | 78 | Y_hats = [] 79 | for k in xrange(1, steps+1): 80 | print("\n\n==%d-step rmse==" % k) 81 | ts = time.time() 82 | Y_hat = generator.predict(X_test) 83 | Y_hats.append(copy(Y_hat)) 84 | print('Y_hat shape', Y_hat.shape, 'X_test shape:', X_test.shape) 85 | # eval 86 | Y_pred = mmn.inverse_transform(Y_hat[-len_test:]) 87 | Y_pred = Y_pred.reshape(len(Y_pred), nb_flow, -1) 88 | rmse(Y_true, Y_pred) 89 | X_test_hat = copy(X_test[1:]) 90 | for j in xrange(1, min(k, len_seq) + 1): 91 | # Y^\hat _t replace 92 | X_test_hat[:, j-1] = Y_hats[-j][:-j] 93 | 94 | X_test = copy(X_test_hat) 95 | print("\nelapsed time (eval): ", time.time() - ts) 96 | 97 | if __name__ == '__main__': 98 | if int(sys.argv[2]) > 0: 99 | seq_model(len_seq=int(sys.argv[2])) 100 | else: 101 | print("unknown args") 102 | sys.exit(-1) 103 | -------------------------------------------------------------------------------- /deepst/utils/evalMultiStepAheadNew.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import sys 3 | from deepst_flow.models.STConvolution import seqCNN_CPT2 4 | from deepst_flow.datasets import load_stdata 5 | from deepst_flow.preprocessing import MinMaxNormalization 6 | from deepst_flow.preprocessing import remove_incomplete_days 7 | # import h5py 8 | import numpy as np 9 | from keras.optimizers import Adam 10 | import os 11 | # from keras.callbacks import EarlyStopping 12 | import cPickle as pickle 13 | import time 14 | import pandas as pd 15 | from copy import copy 16 | from deepst_flow.config import Config 17 | from deepst_flow.datasets.STMatrix import STMatrix 18 | from deepst_flow.utils.eval import rmse 19 | np.random.seed(1337) # for reproducibility 20 | DATAPATH = Config().DATAPATH 21 | print(DATAPATH) 22 | 23 | 24 | def period_trend_closeness(len_closeness=3, len_trend=3, TrendInterval=7, len_period=3, PeriodInterval=1): 25 | print("start: period_trend_closeness") 26 | model_name = sys.argv[1] 27 | steps = 24 28 | # Period = 7 29 | 30 | T = 48 # lenofday 31 | # len_seq = 3 32 | nb_flow = 2 33 | # nb_days = 120 34 | # divide data into two subsets: 35 | # Train: ~ 2015.06.21 & Test: 2015.06.22 ~ 2015.06.28 36 | # len_train = T * (nb_days - 7) 37 | len_test = T * 7 38 | 39 | data, timestamps = load_stdata(os.path.join(DATAPATH, 'traffic_flow_bj15_nomissing.h5')) 40 | print(timestamps) 41 | # remove a certain day which has not 48 timestamps 42 | data, timestamps = remove_incomplete_days(data, timestamps, T) 43 | data = data[:, :nb_flow] 44 | # minmax_scale 45 | data_train = data[-len_test:] 46 | mmn = MinMaxNormalization() 47 | mmn.fit(data_train) 48 | data = mmn.transform(data) 49 | 50 | st = STMatrix(data, timestamps, T) 51 | 52 | # save TCN and MMS 53 | fpkl = open('preprocessing.pkl', 'wb') 54 | for obj in [mmn]: # [tcn, mmn]: 55 | pickle.dump(obj, fpkl) 56 | fpkl.close() 57 | 58 | depends = [range(1, len_closeness+1), 59 | [PeriodInterval * T * j for j in xrange(1, len_period+1)], 60 | [TrendInterval * T * j for j in xrange(1, len_trend+1)]] 61 | if len_closeness > 0: 62 | c_conf = (nb_flow, len_closeness, 32, 32) 63 | else: 64 | c_conf = None 65 | if len_period > 0: 66 | p_conf = (nb_flow, len_period, 32, 32) 67 | else: 68 | p_conf = None 69 | if len_trend > 0: 70 | t_conf = (nb_flow, len_trend, 32, 32) 71 | else: 72 | t_conf = None 73 | generator = seqCNN_CPT2(c_conf=c_conf, p_conf=p_conf, t_conf=t_conf) 74 | 75 | adam = Adam() 76 | generator.compile(loss='mean_absolute_error', optimizer=adam) 77 | generator.load_weights(model_name) 78 | 79 | # instance-based dataset --> sequences with format as (X, Y) where X is a sequence of images and Y is an image. 80 | offset_frame = pd.DateOffset(minutes=24 * 60 // T) 81 | Y_test = st.data[-(len_test+steps-1):] 82 | Y_pd_timestamps = st.pd_timestamps[-(len_test+steps-1):] 83 | 84 | XC = [] 85 | XP = [] 86 | XT = [] 87 | for pd_timestamp in Y_pd_timestamps: 88 | # x = [st.get_matrix(pd_timestamp - j * offset_frame) for j in depends] 89 | # X_test.append(np.vstack(x)) 90 | x_c = [st.get_matrix(pd_timestamp - j * offset_frame) for j in depends[0]] 91 | x_p = [st.get_matrix(pd_timestamp - j * offset_frame) for j in depends[1]] 92 | x_t = [st.get_matrix(pd_timestamp - j * offset_frame) for j in depends[2]] 93 | if len_closeness > 0: 94 | XC.append(np.vstack(x_c)) 95 | if len_period > 0: 96 | XP.append(np.vstack(x_p)) 97 | if len_trend > 0: 98 | XT.append(np.vstack(x_t)) 99 | if len_closeness > 0: 100 | XC = np.asarray(XC) 101 | if len_period > 0: 102 | XP = np.asarray(XP) 103 | if len_trend > 0: 104 | XT = np.asarray(XT) 105 | print(XC.shape, XP.shape, XT.shape) 106 | 107 | # X_test = np.asarray(X_test) 108 | XAll = [] 109 | for l, X_ in zip([len_closeness, len_period, len_trend], [XC, XP, XT]): 110 | if l > 0: 111 | XAll.append(X_) 112 | 113 | Y_true = mmn.inverse_transform(Y_test[-len_test:]) 114 | Y_hats = [] 115 | 116 | # for i in xrange(len(XAll[0])): 117 | # x = [] 118 | # for _X in XAll: 119 | # x.append([_X[i]]) 120 | 121 | for k in xrange(1, steps+1): 122 | print("\n\n==%d-step rmse==" % k) 123 | ts = time.time() 124 | # k^th predicted sequence 125 | Y_hat = generator.predict(XAll) 126 | Y_hats.append(copy(Y_hat)) 127 | print('Y_hat shape', Y_hat.shape) 128 | # eval 129 | Y_pred = mmn.inverse_transform(Y_hat[-len_test:]) 130 | rmse(Y_true, Y_pred) 131 | X_hat = [] 132 | for _X in XAll: 133 | X_hat.append(copy(_X[1:])) 134 | # X_hat = [XC[1:], XP[1:], XT[1:]] # copy(X_test[1:]) 135 | 136 | ''' 137 | # for j in xrange(len_closeness-1, 0): 138 | for j in xrange(1, min(k, len_closeness) + 1): 139 | # last sequence -j 140 | if j == 1: 141 | X_hat[0][:, -1 * nb_flow:] = Y_hats[-j][:-j] 142 | else: 143 | X_hat[0][:, nb_flow*(-j):nb_flow*(-j+1)] = Y_hats[-j][:-j] 144 | ''' 145 | 146 | XC_hat = X_hat[0] 147 | len_replace = min(k, len_closeness) 148 | 149 | for j in xrange(len_replace): 150 | # XC_hat[:, nb_flow*(j):nb_flow*(j+1)] = Y_hats[-(j+1)][:-(len_replace-j)] 151 | XC_hat[:, nb_flow*(j):nb_flow*(j+1)] = Y_hats[-(j+1)][:-(j+1)] 152 | # XC_hat[:, nb_flow*(j):nb_flow*(j+1)] = Y_hats[j][:-(j+1)] 153 | # for j in xrange(1, + 1): 154 | # XC_hat[:, ] = 155 | 156 | # for j in xrange(1, min(k, len_closeness) + 1): 157 | # Y^\hat _t replace 158 | # X_hat[0][:, nb_flow*(j-1):nb_flow*j] = Y_hats[-j][:-j] 159 | 160 | XAll = X_hat 161 | print("\nelapsed time (eval): ", time.time() - ts) 162 | 163 | if __name__ == '__main__': 164 | period_trend_closeness(len_closeness=int(sys.argv[2]), len_period=int(sys.argv[3]), len_trend=int(sys.argv[4])) 165 | -------------------------------------------------------------------------------- /deepst/utils/fill_missing_vals.py: -------------------------------------------------------------------------------- 1 | """ 2 | Usage: python fill_missing_vals.py [fname] [preprocessing_name] [model_name] 3 | """ 4 | from __future__ import print_function 5 | import sys 6 | # sys.path.append("/home/azureuser/workspace/deepst_flow") 7 | 8 | from deepst_flow.models.gan import generator_model 9 | # from deepst_flow.datasets import load_bj15 10 | from deepst_flow.datasets import load_stdata 11 | from deepst_flow.preprocessing import TemporalConstrastNormalization, MinMaxNormalization 12 | from deepst_flow.preprocessing import remove_incomplete_days, split_by_time, timeseries2seqs 13 | import h5py 14 | import numpy as np 15 | 16 | from keras.optimizers import Adam 17 | import os 18 | from keras.callbacks import EarlyStopping 19 | import cPickle as pickle 20 | import time 21 | import sys 22 | import pandas as pd 23 | 24 | np.random.seed(1337) # for reproducibility 25 | from deepst_flow.config import Config 26 | DATAPATH = Config().DATAPATH 27 | 28 | if len(sys.argv) != 4: 29 | print(__doc__) 30 | sys.exit(-1) 31 | 32 | fname = sys.argv[1] 33 | data, timestamps = load_stdata(os.path.join(DATAPATH, '{}.h5'.format(fname))) 34 | 35 | T = 48 36 | slot_time = 24. * 60 / 48 37 | # setting 38 | nb_flow = 2 39 | seq_len = 3 40 | 41 | data = data[:, :nb_flow] 42 | 43 | preprocessing_name = sys.argv[2] 44 | model_name = sys.argv[3] 45 | 46 | # load TCN and MMS 47 | fpkl = open(preprocessing_name, 'rb') 48 | mmn = pickle.load(fpkl) 49 | print("Load Normalization Successfully: ", mmn) 50 | 51 | # load model 52 | generator = generator_model(nb_flow, seq_len, 32, 32) 53 | adam = Adam(lr=0.0001) 54 | generator.compile(loss='mean_absolute_error', optimizer=adam) 55 | generator.load_weights(model_name) 56 | print("Load Model Successfully") 57 | 58 | # working 59 | data = mmn.transform(data) 60 | offset = pd.DateOffset(minutes=24 * 60 // T) 61 | 62 | from deepst_flow.utils import string2timestamp, timestamp2string 63 | timestamps = string2timestamp(timestamps, T=T) 64 | 65 | new_timestamps = timestamps[:seq_len] 66 | new_data = list(data[:seq_len]) 67 | 68 | i = seq_len 69 | 70 | while i < len(timestamps): 71 | if new_timestamps[-1] + offset == timestamps[i]: 72 | new_timestamps.append(timestamps[i]) 73 | new_data.append(data[i]) 74 | i += 1 75 | else: 76 | print('insert: ', new_timestamps[-1] + offset) 77 | new_timestamps.append(new_timestamps[-1] + offset) 78 | x = np.vstack(new_data[-seq_len:]) 79 | x = x[np.newaxis] 80 | Y_pred = generator.predict(x)[0] 81 | new_data.append(Y_pred) 82 | 83 | new_data = np.asarray(new_data) 84 | print("shape: ", new_data.shape, "len:", len(new_timestamps)) 85 | new_data = mmn.inverse_transform(new_data) 86 | 87 | h5 = h5py.File(os.path.join(DATAPATH, '{}_nomissing.h5'.format(fname)), 'w') 88 | h5.create_dataset('data', data=new_data) 89 | h5.create_dataset('date', data=timestamp2string(new_timestamps, T=48)) -------------------------------------------------------------------------------- /deepst/utils/runMe.bat: -------------------------------------------------------------------------------- 1 | 2 | set PYTHON="D:\Users\junbzha\softwares\python" 3 | 4 | call %PYTHON% preprocessing.py 5 | 6 | pause -------------------------------------------------------------------------------- /deepst/utils/txt2hdf5_InOut.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | """ 4 | from __future__ import print_function 5 | import h5py 6 | import itertools 7 | import sys 8 | import os 9 | import numpy as np 10 | np.random.seed(1337) # for reproducibility 11 | import time 12 | from datetime import datetime, timedelta 13 | import pandas as pd 14 | import scipy.sparse as sps 15 | from deepst_flow.config import Config 16 | # DATAPATH = Config().DATAPATH 17 | 18 | DATAPATH = "D:/Users/junbzha/data/traffic_flow" 19 | print(DATAPATH) 20 | 21 | rootdir = "D:/Users/xiuwen/Project/TrajectoryMap/Data/32_30" 22 | grid_size = 32 23 | 24 | def load_data_from_COO_fomat(input_path): 25 | """timeslot,x,y,inCount,outCount,newCount,endCount""" 26 | data=np.loadtxt(input_path, delimiter=',') 27 | I=data[:,1] - 1 # x-axis 28 | J=data[:,2] - 1 # y-axis 29 | inflow, outflow = data[:,3], data[:,4] 30 | inflow = sps.coo_matrix((inflow,(I,J)), shape=(grid_size, grid_size) ).toarray() 31 | outflow = sps.coo_matrix((outflow,(I,J)), shape=(grid_size, grid_size) ).toarray() 32 | # newflow = sps.coo_matrix((newflow,(I,J)), shape=(grid_size, grid_size) ).toarray() 33 | # endflow = sps.coo_matrix((endflow,(I,J)), shape=(grid_size, grid_size) ).toarray() 34 | return np.asarray([inflow, outflow]) 35 | 36 | 37 | def get_file_lines(input_path): 38 | with open(input_path) as f: 39 | return len(f.readlines()) 40 | 41 | def timestamp2string(timestamps): 42 | return ["%s%02i" % (ts.strftime('%Y%m%d'), 43 | int(1+ts.to_datetime().hour*2+ts.to_datetime().minute/30)) for ts in timestamps] 44 | 45 | def load_data(rootdir=rootdir, start='3/1/2015', end='7/1/2015', freq='30Min', year=13): 46 | rng = pd.date_range(start=start, end=end, periods=None, freq=freq) 47 | data = dict() 48 | data_mat = [] 49 | avail_timestamp = [] 50 | for timestamp in rng: 51 | hour, minute = timestamp.to_datetime().hour, timestamp.to_datetime().minute 52 | # print(timestamp, "%s%02i" % (timestamp.strftime('%Y%m%d'), int(1+hour*2+minute/30))) 53 | # subdir = "%04i%02i" % (timestamp.to_datetime().year, timestamp.to_datetime().month) 54 | fname = "%s%02i.txt" % (timestamp.strftime('%Y%m%d'), int(1+hour*2+minute/30)) 55 | input_path = os.path.join(rootdir, fname) 56 | if os.path.exists(input_path) is False: 57 | print('file cannot be found:', input_path) 58 | continue 59 | if get_file_lines(input_path) < grid_size * grid_size * 0.25: 60 | continue 61 | avail_timestamp.append(timestamp) 62 | print("processing", input_path) 63 | data_tensor = load_data_from_COO_fomat(input_path) 64 | data_mat.append(data_tensor) 65 | data[timestamp] = data_tensor 66 | 67 | print("len:", len(avail_timestamp)) 68 | h5 = h5py.File(os.path.join(DATAPATH, 'BJ', 'BJ{}_M{}_T30_Flow.h5'.format(year, grid_size)), 'w') 69 | h5.create_dataset("date", data=timestamp2string(avail_timestamp)) 70 | h5.create_dataset("data", data=np.asarray(data_mat)) 71 | h5.close() 72 | 73 | load_data(start='7/1/2013', end='11/1/2013', year=13) 74 | load_data(start='3/1/2014', end='7/1/2014', year=14) 75 | load_data(start='3/1/2015', end='7/1/2015', year=15) 76 | load_data(start='11/1/2015', end='4/11/2016', year=16) -------------------------------------------------------------------------------- /deepst/utils/viewRetFromPkl.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | import sys 4 | import cPickle as pickle 5 | def view(fname): 6 | pkl = pickle.load(open(fname, 'rb')) 7 | for ke in pkl.keys(): 8 | print '=' * 10 9 | print ke 10 | print pkl[ke] 11 | view(sys.argv[1]) 12 | -------------------------------------------------------------------------------- /scripts/papers/AAAI17/BikeNYC/README.md: -------------------------------------------------------------------------------- 1 | It will soon contain the code for our AAAI 2017 paper: 2 | ``` 3 | Junbo Zhang, Yu Zheng, Dekang Qi. Deep Spatio-Temporal Residual Networks for Citywide Crowd Flows Prediction. In AAAI 2017. 4 | ``` -------------------------------------------------------------------------------- /scripts/papers/AAAI17/README.md: -------------------------------------------------------------------------------- 1 | Deep Spatio-Temporal Residual Networks for Citywide Crowd Flows Prediction 2 | ========================================================================== 3 | 4 | The files in this directory recreate some of the experiments reported in the paper 5 | 6 | `Junbo Zhang, Yu Zheng, Dekang Qi. Deep Spatio-Temporal Residual Networks for Citywide Crowd Flows Prediction. In AAAI 2017. ` 7 | 8 | **If you use the code provided here, please cite that paper.** 9 | 10 | 1. Reproduce **TaxiBJ** results: [ReadMe](TaxiBJ/) 11 | 12 | 2. Reproduce **BikeNYC** results: [ReadMe](BikeNYC/) -------------------------------------------------------------------------------- /scripts/papers/AAAI17/TaxiBJ/README.md: -------------------------------------------------------------------------------- 1 | 1. Install [**DeepST**](https://github.com/lucktroy/DeepST). 2 | 3 | 2. Download [**TaxiBJ**](https://github.com/lucktroy/DeepST/tree/master/data/TaxiBJ) data 4 | 5 | 3. Reproduce the results of ST-ResNet and its variants. 6 | 7 | * Result of Model **L2-E** 8 | 9 | ``` 10 | THEANO_FLAGS="device=gpu,floatX=float32" python exptTaxiBJ.py 2 11 | ``` 12 | 13 | * Result of Model **L4-E** 14 | 15 | ``` 16 | THEANO_FLAGS="device=gpu,floatX=float32" python exptTaxiBJ.py 4 17 | ``` 18 | 19 | * Result of Model **L12-E** 20 | 21 | ``` 22 | THEANO_FLAGS="device=gpu,floatX=float32" python exptTaxiBJ.py 12 23 | ``` 24 | 25 | * Result of Model **L12** 26 | ``` 27 | THEANO_FLAGS="device=gpu,floatX=float32" python exptTaxiBJ-L12.py 28 | ``` -------------------------------------------------------------------------------- /scripts/papers/AAAI17/TaxiBJ/exptTaxiBJ-L12.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | THEANO_FLAGS="device=gpu0" python L12.py 4 | """ 5 | from __future__ import print_function 6 | import os 7 | import cPickle as pickle 8 | import time 9 | import numpy as np 10 | import h5py 11 | 12 | from keras.optimizers import Adam 13 | from keras.callbacks import EarlyStopping, ModelCheckpoint 14 | 15 | from deepst.models.STResNet import stresnet 16 | from deepst.config import Config 17 | import deepst.metrics as metrics 18 | from deepst.datasets import TaxiBJ 19 | np.random.seed(1337) # for reproducibility 20 | 21 | # parameters 22 | DATAPATH = Config().DATAPATH # data path, you may set your own data path with a global envirmental variable DATAPATH 23 | CACHEDATA = True # cache data or NOT 24 | path_cache = os.path.join(DATAPATH, 'CACHE') # cache path 25 | nb_epoch = 500 # number of epoch at training stage 26 | nb_epoch_cont = 100 # number of epoch at training (cont) stage 27 | batch_size = 32 # batch size 28 | T = 48 # number of time intervals at a day 29 | lr = 0.0002 # learning rate 30 | len_closeness = 3 # length of closeness dependent sequence 31 | len_period = 1 # length of peroid dependent sequence 32 | len_trend = 1 # length of trend dependent sequence 33 | nb_residual_unit = 12 # number of residual units 34 | 35 | 36 | nb_flow = 2 # there are two types of flows: inflow and outflow 37 | # split data into two subsets: Train & Test, of which the test set is the last 4 weeks 38 | days_test = 7 * 4 39 | len_test = T * days_test 40 | map_height, map_width = 32, 32 # grid size 41 | path_result = 'RET' 42 | path_model = 'MODEL' 43 | 44 | 45 | if os.path.isdir(path_result) is False: 46 | os.mkdir(path_result) 47 | if os.path.isdir(path_model) is False: 48 | os.mkdir(path_model) 49 | if CACHEDATA and os.path.isdir(path_cache) is False: 50 | os.mkdir(path_cache) 51 | 52 | 53 | def build_model(external_dim): 54 | c_conf = (len_closeness, nb_flow, map_height, 55 | map_width) if len_closeness > 0 else None 56 | p_conf = (len_period, nb_flow, map_height, 57 | map_width) if len_period > 0 else None 58 | t_conf = (len_trend, nb_flow, map_height, 59 | map_width) if len_trend > 0 else None 60 | 61 | model = stresnet(c_conf=c_conf, p_conf=p_conf, t_conf=t_conf, 62 | external_dim=external_dim, nb_residual_unit=nb_residual_unit) 63 | adam = Adam(lr=lr) 64 | model.compile(loss='mse', optimizer=adam, metrics=[metrics.rmse]) 65 | model.summary() 66 | # from keras.utils.visualize_util import plot 67 | # plot(model, to_file='model.png', show_shapes=True) 68 | return model 69 | 70 | 71 | def read_cache(fname): 72 | mmn = pickle.load(open('preprocessing.pkl', 'rb')) 73 | 74 | f = h5py.File(fname, 'r') 75 | num = int(f['num'].value) 76 | X_train, Y_train, X_test, Y_test = [], [], [], [] 77 | for i in xrange(num): 78 | X_train.append(f['X_train_%i' % i].value) 79 | X_test.append(f['X_test_%i' % i].value) 80 | Y_train = f['Y_train'].value 81 | Y_test = f['Y_test'].value 82 | external_dim = f['external_dim'].value 83 | timestamp_train = f['T_train'].value 84 | timestamp_test = f['T_test'].value 85 | f.close() 86 | 87 | return X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test 88 | 89 | 90 | def cache(fname, X_train, Y_train, X_test, Y_test, external_dim, timestamp_train, timestamp_test): 91 | h5 = h5py.File(fname, 'w') 92 | h5.create_dataset('num', data=len(X_train)) 93 | 94 | for i, data in enumerate(X_train): 95 | h5.create_dataset('X_train_%i' % i, data=data) 96 | # for i, data in enumerate(Y_train): 97 | for i, data in enumerate(X_test): 98 | h5.create_dataset('X_test_%i' % i, data=data) 99 | h5.create_dataset('Y_train', data=Y_train) 100 | h5.create_dataset('Y_test', data=Y_test) 101 | external_dim = -1 if external_dim is None else int(external_dim) 102 | h5.create_dataset('external_dim', data=external_dim) 103 | h5.create_dataset('T_train', data=timestamp_train) 104 | h5.create_dataset('T_test', data=timestamp_test) 105 | h5.close() 106 | 107 | 108 | def main(): 109 | # load data 110 | print("loading data...") 111 | ts = time.time() 112 | fname = os.path.join(DATAPATH, 'CACHE', 'TaxiBJ_C{}_P{}_T{}_noExternal.h5'.format( 113 | len_closeness, len_period, len_trend)) 114 | if os.path.exists(fname) and CACHEDATA: 115 | X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = read_cache( 116 | fname) 117 | print("load %s successfully" % fname) 118 | else: 119 | X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = TaxiBJ.load_data( 120 | T=T, nb_flow=nb_flow, len_closeness=len_closeness, len_period=len_period, len_trend=len_trend, len_test=len_test, 121 | preprocess_name='preprocessing.pkl', meta_data=False, meteorol_data=False, holiday_data=False) 122 | if CACHEDATA: 123 | cache(fname, X_train, Y_train, X_test, Y_test, 124 | external_dim, timestamp_train, timestamp_test) 125 | 126 | print("\n days (test): ", [v[:8] for v in timestamp_test[0::T]]) 127 | print("\nelapsed time (loading data): %.3f seconds\n" % (time.time() - ts)) 128 | 129 | print('=' * 10) 130 | print("compiling model...") 131 | print( 132 | "**at the first time, it takes a few minites to compile if you uses [Theano] as the backend**") 133 | 134 | ts = time.time() 135 | model = build_model(external_dim) 136 | hyperparams_name = 'c{}.p{}.t{}.resunit{}.lr{}.noExternal'.format( 137 | len_closeness, len_period, len_trend, nb_residual_unit, lr) 138 | fname_param = os.path.join('MODEL', '{}.best.h5'.format(hyperparams_name)) 139 | 140 | early_stopping = EarlyStopping(monitor='val_rmse', patience=2, mode='min') 141 | model_checkpoint = ModelCheckpoint( 142 | fname_param, monitor='val_rmse', verbose=0, save_best_only=True, mode='min') 143 | 144 | print("\nelapsed time (compiling model): %.3f seconds\n" % 145 | (time.time() - ts)) 146 | 147 | print('=' * 10) 148 | print("training model...") 149 | ts = time.time() 150 | history = model.fit(X_train, Y_train, 151 | nb_epoch=nb_epoch, 152 | batch_size=batch_size, 153 | validation_split=0.1, 154 | callbacks=[early_stopping, model_checkpoint], 155 | verbose=1) 156 | model.save_weights(os.path.join( 157 | 'MODEL', '{}.h5'.format(hyperparams_name)), overwrite=True) 158 | pickle.dump((history.history), open(os.path.join( 159 | path_result, '{}.history.pkl'.format(hyperparams_name)), 'wb')) 160 | print("\nelapsed time (training): %.3f seconds\n" % (time.time() - ts)) 161 | 162 | print('=' * 10) 163 | print('evaluating using the model that has the best loss on the valid set') 164 | ts = time.time() 165 | model.load_weights(fname_param) 166 | score = model.evaluate(X_train, Y_train, batch_size=Y_train.shape[ 167 | 0] // 48, verbose=0) 168 | print('Train score: %.6f Train rmse: %.6f %.6f' % 169 | (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) 170 | score = model.evaluate( 171 | X_test, Y_test, batch_size=Y_test.shape[0], verbose=0) 172 | print('Test score: %.6f Test rmse: %.6f %.6f' % 173 | (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) 174 | print("\nelapsed time (eval): %.3f seconds\n" % (time.time() - ts)) 175 | 176 | print('=' * 10) 177 | print("training model (cont)...") 178 | ts = time.time() 179 | fname_param = os.path.join( 180 | 'MODEL', '{}.cont.best.h5'.format(hyperparams_name)) 181 | model_checkpoint = ModelCheckpoint( 182 | fname_param, monitor='rmse', verbose=0, save_best_only=True, mode='min') 183 | history = model.fit(X_train, Y_train, nb_epoch=nb_epoch_cont, verbose=2, batch_size=batch_size, callbacks=[ 184 | model_checkpoint]) 185 | pickle.dump((history.history), open(os.path.join( 186 | path_result, '{}.cont.history.pkl'.format(hyperparams_name)), 'wb')) 187 | model.save_weights(os.path.join( 188 | 'MODEL', '{}_cont.h5'.format(hyperparams_name)), overwrite=True) 189 | print("\nelapsed time (training cont): %.3f seconds\n" % (time.time() - ts)) 190 | 191 | print('=' * 10) 192 | print('evaluating using the final model') 193 | score = model.evaluate(X_train, Y_train, batch_size=Y_train.shape[ 194 | 0] // 48, verbose=0) 195 | print('Train score: %.6f Train rmse: %.6f %.6f' % 196 | (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) 197 | ts = time.time() 198 | score = model.evaluate( 199 | X_test, Y_test, batch_size=Y_test.shape[0], verbose=0) 200 | print('Test score: %.6f Test rmse: %.6f %.6f' % 201 | (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) 202 | print("\nelapsed time (eval cont): %.3f seconds\n" % (time.time() - ts)) 203 | 204 | if __name__ == '__main__': 205 | main() -------------------------------------------------------------------------------- /scripts/papers/AAAI17/TaxiBJ/exptTaxiBJ.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | THEANO_FLAGS="device=gpu0" python trainTaxiBJ.py 4 | """ 5 | from __future__ import print_function 6 | import os 7 | import sys 8 | import cPickle as pickle 9 | import time 10 | import numpy as np 11 | import h5py 12 | 13 | from keras.optimizers import Adam 14 | from keras.callbacks import EarlyStopping, ModelCheckpoint 15 | 16 | from deepst.models.STResNet import stresnet 17 | from deepst.config import Config 18 | import deepst.metrics as metrics 19 | from deepst.datasets import TaxiBJ 20 | np.random.seed(1337) # for reproducibility 21 | 22 | # parameters 23 | DATAPATH = Config().DATAPATH # data path, you may set your own data path with a global envirmental variable DATAPATH 24 | CACHEDATA = True # cache data or NOT 25 | path_cache = os.path.join(DATAPATH, 'CACHE') # cache path 26 | nb_epoch = 500 # number of epoch at training stage 27 | nb_epoch_cont = 100 # number of epoch at training (cont) stage 28 | batch_size = 32 # batch size 29 | T = 48 # number of time intervals at a day 30 | lr = 0.0002 # learning rate 31 | len_closeness = 3 # length of closeness dependent sequence 32 | len_period = 1 # length of peroid dependent sequence 33 | len_trend = 1 # length of trend dependent sequence 34 | if len(sys.argv) == 1: 35 | nb_residual_unit = 2 # number of residual units 36 | else: 37 | nb_residual_unit = int(sys.argv[1]) # number of residual units 38 | 39 | nb_flow = 2 # there are two types of flows: inflow and outflow 40 | # divide data into two subsets: Train & Test, of which the test set is the 41 | # last 4 weeks 42 | days_test = 7 * 4 43 | len_test = T * days_test 44 | map_height, map_width = 32, 32 # grid size 45 | path_result = 'RET' 46 | path_model = 'MODEL' 47 | 48 | 49 | if os.path.isdir(path_result) is False: 50 | os.mkdir(path_result) 51 | if os.path.isdir(path_model) is False: 52 | os.mkdir(path_model) 53 | if CACHEDATA and os.path.isdir(path_cache) is False: 54 | os.mkdir(path_cache) 55 | 56 | 57 | def build_model(external_dim): 58 | c_conf = (len_closeness, nb_flow, map_height, 59 | map_width) if len_closeness > 0 else None 60 | p_conf = (len_period, nb_flow, map_height, 61 | map_width) if len_period > 0 else None 62 | t_conf = (len_trend, nb_flow, map_height, 63 | map_width) if len_trend > 0 else None 64 | 65 | model = stresnet(c_conf=c_conf, p_conf=p_conf, t_conf=t_conf, 66 | external_dim=external_dim, nb_residual_unit=nb_residual_unit) 67 | adam = Adam(lr=lr) 68 | model.compile(loss='mse', optimizer=adam, metrics=[metrics.rmse]) 69 | model.summary() 70 | # from keras.utils.visualize_util import plot 71 | # plot(model, to_file='model.png', show_shapes=True) 72 | return model 73 | 74 | 75 | def read_cache(fname): 76 | mmn = pickle.load(open('preprocessing.pkl', 'rb')) 77 | 78 | f = h5py.File(fname, 'r') 79 | num = int(f['num'].value) 80 | X_train, Y_train, X_test, Y_test = [], [], [], [] 81 | for i in xrange(num): 82 | X_train.append(f['X_train_%i' % i].value) 83 | X_test.append(f['X_test_%i' % i].value) 84 | Y_train = f['Y_train'].value 85 | Y_test = f['Y_test'].value 86 | external_dim = f['external_dim'].value 87 | timestamp_train = f['T_train'].value 88 | timestamp_test = f['T_test'].value 89 | f.close() 90 | 91 | return X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test 92 | 93 | 94 | def cache(fname, X_train, Y_train, X_test, Y_test, external_dim, timestamp_train, timestamp_test): 95 | h5 = h5py.File(fname, 'w') 96 | h5.create_dataset('num', data=len(X_train)) 97 | 98 | for i, data in enumerate(X_train): 99 | h5.create_dataset('X_train_%i' % i, data=data) 100 | # for i, data in enumerate(Y_train): 101 | for i, data in enumerate(X_test): 102 | h5.create_dataset('X_test_%i' % i, data=data) 103 | h5.create_dataset('Y_train', data=Y_train) 104 | h5.create_dataset('Y_test', data=Y_test) 105 | external_dim = -1 if external_dim is None else int(external_dim) 106 | h5.create_dataset('external_dim', data=external_dim) 107 | h5.create_dataset('T_train', data=timestamp_train) 108 | h5.create_dataset('T_test', data=timestamp_test) 109 | h5.close() 110 | 111 | 112 | def main(): 113 | # load data 114 | print("loading data...") 115 | ts = time.time() 116 | fname = os.path.join(DATAPATH, 'CACHE', 'TaxiBJ_C{}_P{}_T{}.h5'.format( 117 | len_closeness, len_period, len_trend)) 118 | if os.path.exists(fname) and CACHEDATA: 119 | X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = read_cache( 120 | fname) 121 | print("load %s successfully" % fname) 122 | else: 123 | X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = TaxiBJ.load_data( 124 | T=T, nb_flow=nb_flow, len_closeness=len_closeness, len_period=len_period, len_trend=len_trend, len_test=len_test, 125 | preprocess_name='preprocessing.pkl', meta_data=True, meteorol_data=True, holiday_data=True) 126 | if CACHEDATA: 127 | cache(fname, X_train, Y_train, X_test, Y_test, 128 | external_dim, timestamp_train, timestamp_test) 129 | 130 | print("\n days (test): ", [v[:8] for v in timestamp_test[0::T]]) 131 | print("\nelapsed time (loading data): %.3f seconds\n" % (time.time() - ts)) 132 | 133 | print('=' * 10) 134 | print("compiling model...") 135 | print( 136 | "**at the first time, it takes a few minites to compile if you uses [Theano] as the backend**") 137 | 138 | ts = time.time() 139 | model = build_model(external_dim) 140 | hyperparams_name = 'c{}.p{}.t{}.resunit{}.lr{}'.format( 141 | len_closeness, len_period, len_trend, nb_residual_unit, lr) 142 | fname_param = os.path.join('MODEL', '{}.best.h5'.format(hyperparams_name)) 143 | 144 | early_stopping = EarlyStopping(monitor='val_rmse', patience=2, mode='min') 145 | model_checkpoint = ModelCheckpoint( 146 | fname_param, monitor='val_rmse', verbose=0, save_best_only=True, mode='min') 147 | 148 | print("\nelapsed time (compiling model): %.3f seconds\n" % 149 | (time.time() - ts)) 150 | 151 | print('=' * 10) 152 | print("training model...") 153 | ts = time.time() 154 | history = model.fit(X_train, Y_train, 155 | nb_epoch=nb_epoch, 156 | batch_size=batch_size, 157 | validation_split=0.1, 158 | callbacks=[early_stopping, model_checkpoint], 159 | verbose=2) 160 | model.save_weights(os.path.join( 161 | 'MODEL', '{}.h5'.format(hyperparams_name)), overwrite=True) 162 | pickle.dump((history.history), open(os.path.join( 163 | path_result, '{}.history.pkl'.format(hyperparams_name)), 'wb')) 164 | print("\nelapsed time (training): %.3f seconds\n" % (time.time() - ts)) 165 | 166 | print('=' * 10) 167 | print('evaluating using the model that has the best loss on the valid set') 168 | ts = time.time() 169 | model.load_weights(fname_param) 170 | score = model.evaluate(X_train, Y_train, batch_size=Y_train.shape[ 171 | 0] // 48, verbose=0) 172 | print('Train score: %.6f Train rmse: %.6f %.6f' % 173 | (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) 174 | score = model.evaluate( 175 | X_test, Y_test, batch_size=Y_test.shape[0], verbose=0) 176 | print('Test score: %.6f Test rmse: %.6f %.6f' % 177 | (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) 178 | print("\nelapsed time (eval): %.3f seconds\n" % (time.time() - ts)) 179 | 180 | print('=' * 10) 181 | print("training model (cont)...") 182 | ts = time.time() 183 | fname_param = os.path.join( 184 | 'MODEL', '{}.cont.best.h5'.format(hyperparams_name)) 185 | model_checkpoint = ModelCheckpoint( 186 | fname_param, monitor='rmse', verbose=0, save_best_only=True, mode='min') 187 | history = model.fit(X_train, Y_train, nb_epoch=nb_epoch_cont, verbose=2, batch_size=batch_size, callbacks=[ 188 | model_checkpoint]) 189 | pickle.dump((history.history), open(os.path.join( 190 | path_result, '{}.cont.history.pkl'.format(hyperparams_name)), 'wb')) 191 | model.save_weights(os.path.join( 192 | 'MODEL', '{}_cont.h5'.format(hyperparams_name)), overwrite=True) 193 | print("\nelapsed time (training cont): %.3f seconds\n" % (time.time() - ts)) 194 | 195 | print('=' * 10) 196 | print('evaluating using the final model') 197 | score = model.evaluate(X_train, Y_train, batch_size=Y_train.shape[ 198 | 0] // 48, verbose=0) 199 | print('Train score: %.6f Train rmse: %.6f %.6f' % 200 | (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) 201 | ts = time.time() 202 | score = model.evaluate( 203 | X_test, Y_test, batch_size=Y_test.shape[0], verbose=0) 204 | print('Test score: %.6f Test rmse: %.6f %.6f' % 205 | (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) 206 | print("\nelapsed time (eval cont): %.3f seconds\n" % (time.time() - ts)) 207 | 208 | if __name__ == '__main__': 209 | main() 210 | -------------------------------------------------------------------------------- /scripts/papers/AAAI17/doc/ST-ResNet-AAAI17-Zhang.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amirkhango/DeepST/7ba669013bbafd5f413ef50d5d76094c3a68efd6/scripts/papers/AAAI17/doc/ST-ResNet-AAAI17-Zhang.pdf -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools import find_packages 3 | 4 | 5 | setup(name='DeepST', 6 | version='0.0.1', 7 | description='Deep Learning for Spatio-Temporal Data', 8 | author='Junbo Zhang', 9 | author_email='zjb2046@gmail.com', 10 | url='https://github.com/lucktroy/DeepST', 11 | download_url='https://github.com/lucktroy/DeepST/', 12 | license='MIT', 13 | install_requires=['keras', 'theano'], 14 | extras_require={ 15 | 'h5py': ['h5py'], 16 | 'visualize': ['pydot-ng'], 17 | }, 18 | packages=find_packages()) --------------------------------------------------------------------------------