├── .gitignore ├── LICENSE ├── README.md ├── filternet.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## FilterNet 2 | FilterNet is a ensemble neural network model used for time series analysis. It is comprised of a 1D convolutional neural 3 | network and fast.ai's MixedInputModel. An example of the network implemented in PyTorch is located in filternet.py and 4 | provides the model class along with its corresponding dataset class. Links to the abstract, slides, and video of our 5 | presentation at PyData Los Angeles 2018 will be provided below. 6 | 7 | This 1D convolutional neural network (CNN) was inspired by the traditional use of filters in discrete time signal 8 | processing. While developed independently, it closely resembles the findings described in the 9 | [WaveNet paper by Borovykh et al](https://arxiv.org/pdf/1703.04691.pdf). While the 1D CNN performed well on its own, 10 | datasets can have a lot of context associated with them (hour of day, day of week, etc.) which the 1D CNN alone is 11 | unable to handle. We utilized [fastai's MixedInputModel](https://github.com/fastai/fastai), which has been used 12 | successfully for tabular data, to include learnings on the context portion of our datasets. The two neural networks are 13 | combined using a final regression layer and were found to compliment each other. In testing, the resulting ensemble 14 | model outperformed one of our current best production time series models ([TBATS](https://robjhyndman.com/hyndsight/forecasting-weekly-data/)). 15 | 16 | Our hope is by open sourcing our approach it will help generate further ideas on how to improve time series modeling 17 | using neural networks. 18 | 19 | PyData Los Angeles 2018 presentation: 20 | - [Abstract](https://pydata.org/la2018/schedule/presentation/14) 21 | - [Slides](https://docs.google.com/presentation/d/e/2PACX-1vR6eea4L_Z_hyz24kgch3Lt5eEQ9PmmI2gUys_DcQrWY0EbG5CfOy4suqeLejXEql3x-nYT2NshrQRc/pub?start=false&loop=false&delayms=3000) 22 | - [Video](https://www.youtube.com/watch?v=nMkqWxMjWzg) 23 | 24 | Contributing authors: 25 | - Jeff Roach (Data Scientist at System1) 26 | - Nathan Janos (Chief Data Officer at System1) 27 | 28 | For more information about System1, please visit: www.system1.com 29 | 30 | -------------------------------------------------------------------------------- /filternet.py: -------------------------------------------------------------------------------- 1 | from fastai.core import ifnone, listify 2 | from fastai.layers import bn_drop_lin, embedding, Flatten 3 | import numpy as np 4 | import torch 5 | from torch import nn 6 | from torch.utils.data import Dataset 7 | 8 | 9 | def conv_layer(window, ks=3, dilation=1): 10 | return nn.Sequential( 11 | nn.Conv1d(1, 1, kernel_size=ks, bias=False, dilation=dilation), 12 | nn.AdaptiveAvgPool1d(window), 13 | nn.LeakyReLU(negative_slope=0.1, inplace=True)) 14 | 15 | 16 | class FilterNet24H2(nn.Module): 17 | def __init__(self, emb_szs, n_cont, out_sz, layers, emb_drop=0., window=24, filters=[1, 2, 3, 4, 5, 6], 18 | y_range=None, use_bn=False, ps=None, bn_final=False): 19 | super().__init__() 20 | 21 | # TODO: Use the filters arg to generate the conv_layers dynamically 22 | # Wavenet model layers 23 | self.c1a = conv_layer(window=window // 2, ks=1, dilation=1) 24 | self.c1b = conv_layer(window=window // 4, ks=1, dilation=2) 25 | self.c2a = conv_layer(window=window // 2, ks=2, dilation=1) 26 | self.c2b = conv_layer(window=window // 4, ks=2, dilation=2) 27 | self.c3a = conv_layer(window=window // 2, ks=3, dilation=1) 28 | self.c3b = conv_layer(window=window // 4, ks=3, dilation=2) 29 | self.c4a = conv_layer(window=window // 2, ks=4, dilation=1) 30 | self.c4b = conv_layer(window=window // 4, ks=4, dilation=2) 31 | self.c5a = conv_layer(window=window // 2, ks=5, dilation=1) 32 | self.c5b = conv_layer(window=window // 4, ks=5, dilation=2) 33 | self.c6a = conv_layer(window=window // 2, ks=6, dilation=1) 34 | self.c6b = conv_layer(window=window // 4, ks=6, dilation=2) 35 | 36 | num_wave_outputs = (len(filters) * (window // 2)) + (len(filters) * (window // 4)) 37 | 38 | # Fastai's Mixed Input model 39 | ps = ifnone(ps, [0]*len(layers)) 40 | ps = listify(ps, layers) 41 | self.embeds = nn.ModuleList([embedding(ni, nf) for ni,nf in emb_szs]) 42 | self.emb_drop = nn.Dropout(emb_drop) 43 | self.bn_cont = nn.BatchNorm1d(n_cont) 44 | n_emb = sum(e.embedding_dim for e in self.embeds) 45 | self.n_emb,self.n_cont,self.y_range = n_emb,n_cont,y_range 46 | sizes = self.get_sizes(layers, out_sz) 47 | actns = [nn.ReLU(inplace=True)] * (len(sizes)-2) + [None] 48 | layers = [] 49 | for i,(n_in,n_out,dp,act) in enumerate(zip(sizes[:-2],sizes[1:-1],[0.]+ps,actns)): 50 | layers += bn_drop_lin(n_in, n_out, bn=use_bn and i!=0, p=dp, actn=act) 51 | if bn_final: layers.append(nn.BatchNorm1d(sizes[-1])) 52 | self.layers = nn.Sequential(*layers) 53 | 54 | # Final layer 55 | self.f = Flatten() 56 | self.lin = nn.Linear(sizes[-2] + num_wave_outputs, out_sz, bias=False) 57 | 58 | self.sizes = sizes 59 | self.num_wave_outputs = num_wave_outputs 60 | 61 | def get_sizes(self, layers, out_sz): 62 | return [self.n_emb + self.n_cont] + layers + [out_sz] 63 | 64 | def forward(self, x_window, x_cat, x_cont): 65 | # TODO: Use the filters arg to generate the conv_layers dynamically 66 | # Wavenet model 67 | self.f1a = self.c1a(x_window) 68 | self.f1b = self.c1b(self.f1a) 69 | self.f2a = self.c2a(x_window) 70 | self.f2b = self.c2b(self.f2a) 71 | self.f3a = self.c3a(x_window) 72 | self.f3b = self.c3b(self.f3a) 73 | self.f4a = self.c4a(x_window) 74 | self.f4b = self.c4b(self.f4a) 75 | self.f5a = self.c5a(x_window) 76 | self.f5b = self.c5b(self.f5a) 77 | self.f6a = self.c6a(x_window) 78 | self.f6b = self.c6b(self.f6a) 79 | self.ffc = torch.cat([self.f1a, self.f1b, self.f2a, self.f2b, 80 | self.f3a, self.f3b, self.f4a, self.f4b, 81 | self.f5a, self.f5b, self.f6a, self.f6b, ], 2) 82 | 83 | # Fastai's Mixed Input Model 84 | if self.n_emb != 0: 85 | x = [e(x_cat[:,i]) for i,e in enumerate(self.embeds)] 86 | x = torch.cat(x, 1) 87 | x = self.emb_drop(x) 88 | if self.n_cont != 0: 89 | x_cont = self.bn_cont(x_cont) 90 | x = torch.cat([x, x_cont], 1) if self.n_emb != 0 else x_cont 91 | x = self.layers(x) 92 | if self.y_range is not None: 93 | x = (self.y_range[1]-self.y_range[0]) * torch.sigmoid(x) + self.y_range[0] 94 | 95 | # Combine results from both nets 96 | x = x.unsqueeze(1) 97 | self.fc = torch.cat([self.ffc, x], 2) 98 | self.flin = self.lin(self.f(self.fc)) 99 | return self.flin 100 | 101 | 102 | class FilterNetDataset(Dataset): 103 | def __init__(self, x_window, x_cat, x_cont, y): 104 | self.x_window = x_window 105 | self.x_cat = x_cat 106 | self.x_cont = x_cont 107 | self.y = y 108 | 109 | def __getitem__(self, idx): return [self.x_window[idx], self.x_cat[idx], self.x_cont[idx]], self.y[idx] 110 | def __len__(self): return max(len(self.x_window), len(self.x_cat), len(self.x_cont)) 111 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | fastai==0.7.0 2 | numpy==1.15.2 3 | torch==0.3.1 4 | --------------------------------------------------------------------------------