├── FANLayer-tf.py
├── FANLayer.py
├── Image_Recognition
├── run_image_recognition.sh
└── test_image_recognition.py
├── LICENSE
├── Periodicity_Modeling
├── architecture.py
├── generate_periodic_data.py
├── run.sh
└── test.py
├── README.md
├── Sentiment_Analysis
├── get_dataloader.py
├── model
│ ├── CustomBERT.py
│ ├── Mamba.py
│ ├── __init__.py
│ └── build_model.py
├── scripts
│ ├── Trans
│ │ ├── test_baseline_trans.sh
│ │ └── train_baseline_trans.sh
│ ├── Trans_with_FAN
│ │ ├── test_ours.sh
│ │ ├── test_ours_withgate.sh
│ │ ├── train_ours.sh
│ │ └── train_ours_withgate.sh
│ └── mamba
│ │ ├── test_mamba.sh
│ │ └── train_mamba.sh
├── test.py
├── train.py
└── utils.py
├── Symbolic_Formula_Representation
├── gen_dataset.py
├── requirements.txt
├── run_train_fan.sh
├── run_train_kan.sh
├── run_train_mlp.sh
├── run_train_transformer.sh
├── train_fan.py
├── train_kan.py
├── train_mlp.py
└── train_transformer.py
├── Timeseries_Forecasting
├── data_provider
│ ├── __init__.py
│ ├── data_factory.py
│ └── data_loader.py
├── exp
│ ├── __init__.py
│ ├── exp_basic.py
│ └── exp_main.py
├── layers
│ ├── AutoCorrelation.py
│ ├── Autoformer_EncDec.py
│ ├── Embed.py
│ ├── FANLayer.py
│ ├── SelfAttention_Family.py
│ ├── Transformer_EncDec.py
│ └── __init__.py
├── models
│ ├── Modified_Transformer.py
│ └── __init__.py
├── requirements.txt
├── run.py
├── scripts
│ ├── ETT_script
│ │ ├── Transformer.sh
│ │ ├── Transformer_setting_2.sh
│ │ └── Transformer_setting_4.sh
│ ├── Exchange_script
│ │ ├── Modified_Transformer_setting_2.sh
│ │ ├── Modified_Transformer_setting_4.sh
│ │ └── Transformer.sh
│ ├── Traffic_script
│ │ ├── Modified_Transformer_setting_2.sh
│ │ ├── Modified_Transformer_setting_4.sh
│ │ └── Transformer.sh
│ └── Weather_script
│ │ └── Modified_Transformer.sh
└── utils
│ ├── __init__.py
│ ├── download_data.py
│ ├── masking.py
│ ├── metrics.py
│ ├── timefeatures.py
│ └── tools.py
└── img
├── FANLayer.jpg
├── IR.jpg
├── mod.jpg
└── sin.jpg
/FANLayer-tf.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.keras import layers
3 | from tensorflow.keras import constraints, activations, initializers, regularizers
4 | from tensorflow.keras.constraints import NonNeg
5 | from tensorflow.keras.constraints import Constraint
6 |
7 | class FANLayer(tf.keras.layers.Layer):
8 | """
9 | FANLayer: The layer used in FAN (https://arxiv.org/abs/2410.02675).
10 |
11 | Args:
12 | input_dim (int): The number of input features.
13 | output_dim (int): The number of output features.
14 | p_ratio (float): The ratio of output dimensions used for cosine and sine parts (default: 0.25).
15 | activation (str or callable): The activation function to apply to the g component (default: 'gelu').
16 | use_p_bias (bool): If True, include bias in the linear transformations of the p component (default: True).
17 | gated (bool): If True, applies gating to the output.
18 | kernel_regularizer: Regularizer for kernel weights.
19 | bias_regularizer: Regularizer for bias weights.
20 | """
21 |
22 | def __init__(self,
23 | output_dim,
24 | p_ratio=0.25,
25 | activation='gelu',
26 | use_p_bias=True,
27 | gated=False,
28 | kernel_regularizer=None,
29 | bias_regularizer=None,
30 | **kwargs):
31 | super(FANLayer, self).__init__(**kwargs)
32 |
33 | assert 0 < p_ratio < 0.5, "p_ratio must be between 0 and 0.5"
34 |
35 | self.p_ratio = p_ratio
36 | self.output_dim = output_dim
37 | self.activation = activations.get(activation)
38 | self.use_p_bias = use_p_bias
39 | self.gated = gated
40 | self.kernel_regularizer = regularizers.get(kernel_regularizer)
41 | self.bias_regularizer = regularizers.get(bias_regularizer)
42 |
43 | # Compute output dimensions for p and g components
44 | self.p_output_dim = int(output_dim * self.p_ratio)
45 | self.g_output_dim = output_dim - 2 * self.p_output_dim # Account for cosine and sine
46 |
47 | # Layers for linear transformations
48 | self.input_linear_p = layers.Dense(self.p_output_dim,
49 | use_bias=self.use_p_bias,
50 | kernel_regularizer=self.kernel_regularizer,
51 | bias_regularizer=self.bias_regularizer)
52 | self.input_linear_g = layers.Dense(self.g_output_dim,
53 | kernel_regularizer=self.kernel_regularizer,
54 | bias_regularizer=self.bias_regularizer)
55 |
56 | if self.gated:
57 | self.gate = self.add_weight(name='gate',
58 | shape=(1,),
59 | initializer=initializers.RandomNormal(),
60 | trainable=True,
61 | regularizer=None,
62 | constraint=NonNeg())
63 |
64 | def call(self, inputs):
65 | # Apply the linear transformation followed by the activation for the g component
66 | g = self.activation(self.input_linear_g(inputs))
67 |
68 | # Apply the linear transformation for the p component
69 | p = self.input_linear_p(inputs)
70 |
71 | if self.gated:
72 | gate = tf.sigmoid(self.gate)
73 | output = tf.concat([gate * tf.cos(p), gate * tf.sin(p), (1 - gate) * g], axis=-1)
74 | else:
75 | output = tf.concat([tf.cos(p), tf.sin(p), g], axis=-1)
76 |
77 | return output
78 |
79 | def get_config(self):
80 | config = super(FANLayer, self).get_config()
81 | config.update({
82 | "output_dim": self.output_dim,
83 | "p_ratio": self.p_ratio,
84 | "activation": activations.serialize(self.activation),
85 | "use_p_bias": self.use_p_bias,
86 | "gated": self.gated,
87 | "kernel_regularizer": regularizers.serialize(self.kernel_regularizer),
88 | "bias_regularizer": regularizers.serialize(self.bias_regularizer)
89 | })
90 | return config
91 |
--------------------------------------------------------------------------------
/FANLayer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | class FANLayer(nn.Module):
6 | """
7 | FANLayer: The layer used in FAN (https://arxiv.org/abs/2410.02675).
8 |
9 | Args:
10 | input_dim (int): The number of input features.
11 | output_dim (int): The number of output features.
12 | p_ratio (float): The ratio of output dimensions used for cosine and sine parts (default: 0.25).
13 | activation (str or callable): The activation function to apply to the g component. If a string is passed,
14 | the corresponding activation from torch.nn.functional is used (default: 'gelu').
15 | use_p_bias (bool): If True, include bias in the linear transformations of p component (default: True).
16 | There is almost no difference between bias and non-bias in our experiments.
17 | """
18 |
19 | def __init__(self, input_dim, output_dim, p_ratio=0.25, activation='gelu', use_p_bias=True):
20 | super(FANLayer, self).__init__()
21 |
22 | # Ensure the p_ratio is within a valid range
23 | assert 0 < p_ratio < 0.5, "p_ratio must be between 0 and 0.5"
24 |
25 | self.p_ratio = p_ratio
26 | p_output_dim = int(output_dim * self.p_ratio)
27 | g_output_dim = output_dim - p_output_dim * 2 # Account for cosine and sine terms
28 |
29 | # Linear transformation for the p component (for cosine and sine parts)
30 | self.input_linear_p = nn.Linear(input_dim, p_output_dim, bias=use_p_bias)
31 |
32 | # Linear transformation for the g component
33 | self.input_linear_g = nn.Linear(input_dim, g_output_dim)
34 |
35 | # Set the activation function
36 | if isinstance(activation, str):
37 | self.activation = getattr(F, activation)
38 | else:
39 | self.activation = activation if activation else lambda x: x
40 |
41 | def forward(self, src):
42 | """
43 | Args:
44 | src (Tensor): Input tensor of shape (batch_size, input_dim).
45 |
46 | Returns:
47 | Tensor: Output tensor of shape (batch_size, output_dim), after applying the FAN layer.
48 | """
49 |
50 | # Apply the linear transformation followed by the activation for the g component
51 | g = self.activation(self.input_linear_g(src))
52 |
53 | # Apply the linear transformation for the p component
54 | p = self.input_linear_p(src)
55 |
56 | # Concatenate cos(p), sin(p), and activated g along the last dimension
57 | output = torch.cat((torch.cos(p), torch.sin(p), g), dim=-1)
58 |
59 | return output
60 |
61 |
--------------------------------------------------------------------------------
/Image_Recognition/run_image_recognition.sh:
--------------------------------------------------------------------------------
1 | GPU=1
2 | LR=0.01
3 | Epoch=100
4 | Versions=(fan mlp)
5 | Dataset=MNIST #(MNIST MNIST-M Fashion-MNIST Fashion-MNIST-corrupted)
6 | logdirpath=result
7 |
8 | if [ ! -d ./${logdirpath} ]; then
9 | mkdir ./${logdirpath}
10 | fi
11 |
12 | for Version in "${Versions[@]}"; do
13 | path=${Version}
14 |
15 | echo "running ${path}..."
16 | python3 -u ./test_image_recognition.py \
17 | --gpu_id ${GPU} \
18 | --lr ${LR} \
19 | --epoch ${Epoch} \
20 | --version ${Version} \
21 | --dataset ${Dataset} \
22 | > ./${logdirpath}/${Dataset}_${path}.log 2>&1 &
23 | done
24 |
25 | wait
26 | echo "End."
27 |
--------------------------------------------------------------------------------
/Image_Recognition/test_image_recognition.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.optim as optim
4 | import torchvision.transforms.functional as TF
5 | from torch.utils.data import DataLoader
6 | from torchvision import datasets, transforms
7 | import numpy as np
8 | import random
9 | from datasets import load_dataset
10 |
11 | import argparse
12 |
13 | parser = argparse.ArgumentParser()
14 | parser.add_argument('--dataset', type=str, help='dataset', default='MNIST')
15 | parser.add_argument('--gpu_id', type=int, help='gpu_id', default=1)
16 | parser.add_argument('--lr', type=float, help='lr', default=0.01)
17 | parser.add_argument('--epoch', type=int, help='epoch', default=100)
18 | parser.add_argument('--version', type=str, help='version', default='fan')
19 | parser.add_argument('--similarparameter', type=bool, help='similarparameter', default=True)
20 |
21 | args = parser.parse_args()
22 |
23 | def set_seed(seed):
24 | torch.manual_seed(seed)
25 | torch.cuda.manual_seed(seed)
26 | torch.cuda.manual_seed_all(seed)
27 | np.random.seed(seed)
28 | random.seed(seed)
29 | torch.backends.cudnn.deterministic = True
30 | torch.backends.cudnn.benchmark = False
31 |
32 | set_seed(2023)
33 |
34 |
35 | from FANLayer import FANLayer
36 |
37 | class CNNModel(nn.Module):
38 | def __init__(self, input_dim=1, output_dim=10):
39 | super(CNNModel, self).__init__()
40 | self.conv_layer = nn.Sequential(
41 |
42 | nn.Conv2d(input_dim, 64, kernel_size=3, padding=1),
43 | nn.BatchNorm2d(64),
44 | nn.ReLU(inplace=True),
45 | nn.Conv2d(64, 64, kernel_size=3, padding=1),
46 | nn.BatchNorm2d(64),
47 | nn.ReLU(inplace=True),
48 | nn.MaxPool2d(2, 2),
49 | nn.Dropout(0.25),
50 |
51 | nn.Conv2d(64, 128, kernel_size=3, padding=1),
52 | nn.BatchNorm2d(128),
53 | nn.ReLU(inplace=True),
54 | nn.Conv2d(128, 128, kernel_size=3, padding=1),
55 | nn.BatchNorm2d(128),
56 | nn.ReLU(inplace=True),
57 | nn.MaxPool2d(2, 2),
58 | nn.Dropout(0.25),
59 | )
60 |
61 | self.scalar = lambda x: x*4//3 if args.similarparameter else x
62 |
63 | if args.version == 'mlp':
64 | self.fc_layer = nn.Sequential(
65 | nn.Flatten(),
66 | nn.Linear(128 * 7 * 7, 256),
67 | nn.BatchNorm1d(256),
68 | nn.ReLU(inplace=True),
69 | nn.Dropout(0.5),
70 | nn.Linear(256, 10)
71 | )
72 | else:
73 | self.fc_layer = nn.Sequential(
74 | nn.Flatten(),
75 | FANLayer(128 * 7 * 7, self.scalar(256)), #nn.Linear(128 * 7 * 7, 256),
76 | nn.BatchNorm1d(self.scalar(256)),
77 | nn.Dropout(0.5),
78 | nn.Linear(self.scalar(256), output_dim)
79 | )
80 |
81 | def forward(self, x):
82 | x = self.conv_layer(x)
83 | x = self.fc_layer(x)
84 | return x
85 |
86 |
87 | device = torch.device(f"cuda:{args.gpu_id}" if torch.cuda.is_available() else "cpu")
88 |
89 | model = CNNModel()
90 | model.to(device)
91 |
92 | num_epochs = args.epoch
93 |
94 | def run(model, train_loader, OOD_test_loader, test_loader, num_epochs, name):
95 |
96 | criterion = nn.CrossEntropyLoss()
97 | optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9)
98 |
99 |
100 | scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8)
101 |
102 | best_accuracy = [0.0, 0.0]
103 |
104 | for epoch in range(1, num_epochs + 1):
105 | model.train()
106 | running_loss = 0.0
107 |
108 | for images, labels in train_loader:
109 | images = images.to(device)
110 | labels = labels.to(device)
111 |
112 | outputs = model(images)
113 | loss = criterion(outputs, labels)
114 |
115 | optimizer.zero_grad()
116 | loss.backward()
117 | optimizer.step()
118 |
119 | running_loss += loss.item() * images.size(0)
120 |
121 | epoch_loss = running_loss / len(train_loader.dataset)
122 |
123 | model.eval()
124 | correct = 0
125 | total = 0
126 | testloss = 0.0
127 |
128 | with torch.no_grad():
129 | for images, labels in test_loader:
130 | images = images.to(device)
131 | labels = labels.to(device)
132 | outputs = model(images)
133 | loss = criterion(outputs, labels)
134 | _, preds = torch.max(outputs.data, 1)
135 | total += labels.size(0)
136 | correct += (preds == labels).sum().item()
137 | testloss += loss.item() * images.size(0)
138 |
139 | epoch_accuracy = 100 * correct / total
140 | epoch_test_loss = testloss / len(test_loader.dataset)
141 |
142 | if OOD_test_loader is not None:
143 | model.eval()
144 | correct = 0
145 | total = 0
146 |
147 | with torch.no_grad():
148 | for images, labels in OOD_test_loader:
149 | images = images.to(device)
150 | labels = labels.to(device)
151 | outputs = model(images)
152 | _, preds = torch.max(outputs.data, 1)
153 | total += labels.size(0)
154 | correct += (preds == labels).sum().item()
155 |
156 | OOD_accuracy = 100 * correct / total
157 |
158 | print(f'Epoch [{epoch}/{num_epochs}], Loss: {epoch_test_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%, OOD Accuracy: {OOD_accuracy:.2f}%')
159 |
160 | scheduler.step()
161 |
162 | if epoch_accuracy > best_accuracy[0]:
163 | best_accuracy = [epoch_accuracy, best_accuracy[1]]
164 | if OOD_accuracy > best_accuracy[1]:
165 | best_accuracy = [best_accuracy[0], OOD_accuracy]
166 |
167 | return {'best_accuracy': best_accuracy[0], 'best_OOD_accuracy': best_accuracy[1],\
168 | 'accuracy': epoch_accuracy, 'OOD_accuracy': OOD_accuracy}
169 |
170 |
171 | def get_dataloader(dataset, batch_size=256, shuffle=True, Train=True):
172 | def transform_m(example):
173 | example['image'] = TF.resize(example['image'], (28, 28))
174 | example['image'] = example['image'].convert('L')
175 | example['image'] = TF.to_tensor(example['image'])
176 | example['image'] = TF.normalize(example['image'], mean=(.5,), std=(.5,))
177 | return example
178 |
179 |
180 | def collate_fn(batch):
181 | images = [torch.tensor(item['image']) for item in batch if not isinstance(item['image'], torch.Tensor)]
182 | labels = [torch.tensor(item['label']) for item in batch if not isinstance(item['label'], torch.Tensor)]
183 |
184 | images = torch.stack(images, dim=0)
185 | labels = torch.stack(labels, dim=0)
186 |
187 | return images, labels
188 |
189 | if Train:
190 | trainset = dataset['train'].map(transform_m)
191 | testset = dataset['test'].map(transform_m)
192 |
193 | train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate_fn)
194 | test_loader = DataLoader(testset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
195 | return train_loader, test_loader
196 | else:
197 | testset = dataset['test'].map(transform_m)
198 |
199 | test_loader = DataLoader(testset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
200 | return test_loader
201 |
202 |
203 | transform = transforms.Compose([
204 | transforms.RandomRotation(10),
205 | transforms.RandomAffine(0, translate=(0.1, 0.1)),
206 | transforms.ToTensor(),
207 | transforms.Normalize((0.1307,), (0.3081,))
208 | ])
209 |
210 | if args.dataset == 'MNIST':
211 |
212 | train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
213 | test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
214 |
215 | train_loader = DataLoader(dataset=train_dataset, batch_size=256, shuffle=True)
216 | test_loader = DataLoader(dataset=test_dataset, batch_size=256, shuffle=False)
217 |
218 | OOD_test_loader = get_dataloader(load_dataset("Mike0307/MNIST-M"), Train=False)
219 |
220 | accuracy_checkpoints = run(model, train_loader, OOD_test_loader, test_loader, num_epochs, name='mnist')
221 |
222 | elif args.dataset == 'MNIST-M':
223 | dataset = load_dataset("Mike0307/MNIST-M")
224 |
225 | train_loader, test_loader = get_dataloader(dataset)
226 |
227 | OOD_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
228 | OOD_test_loader = DataLoader(dataset=OOD_dataset, batch_size=256, shuffle=False)
229 |
230 | accuracy_checkpoints = run(model, train_loader, OOD_test_loader, test_loader, num_epochs, name='m_mnist')
231 |
232 | elif args.dataset == 'Fashion-MNIST':
233 | dataset = load_dataset("zalando-datasets/fashion_mnist")
234 | train_loader, test_loader = get_dataloader(dataset)
235 |
236 | OOD_test_loader = get_dataloader(load_dataset("mweiss/fashion_mnist_corrupted"), Train=False)
237 |
238 | accuracy_checkpoints = run(model, train_loader, OOD_test_loader, test_loader, num_epochs, name='f_mnist')
239 |
240 | elif args.dataset == 'Fashion-MNIST-corrupted':
241 | dataset = load_dataset("mweiss/fashion_mnist_corrupted")
242 | train_loader, test_loader = get_dataloader(dataset)
243 |
244 | OOD_test_loader = get_dataloader(load_dataset("zalando-datasets/fashion_mnist"), Train=False)
245 |
246 | accuracy_checkpoints = run(model, train_loader, OOD_test_loader, test_loader, num_epochs, name='fc_mnist')
247 |
248 |
249 | print(f'{args.dataset}:', accuracy_checkpoints)
250 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 YihongDong
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Periodicity_Modeling/architecture.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | import torch.nn.functional as F
4 | import math
5 |
6 | # Define a model registry
7 | model_registry = {}
8 |
9 | # Create a decorator to register models
10 | def register_model(model_name):
11 | def decorator(cls):
12 | model_registry[model_name] = cls
13 | return cls
14 | return decorator
15 |
16 | # Define a function to retrieve and instantiate the model class by model_name
17 | def get_model_by_name(model_name, *args, **kwargs):
18 | model_cls = model_registry.get(model_name)
19 | if model_cls is None:
20 | raise ValueError(f"No model found with model_name{model_name}.")
21 | return model_cls(*args, **kwargs)
22 |
23 | # Use the decorator to register the model class
24 |
25 | @register_model('FANLayer')
26 | class FANLayer(nn.Module):
27 | def __init__(self, input_dim, output_dim, bias=True):
28 | super(FANLayer, self).__init__()
29 | self.input_linear_p = nn.Linear(input_dim, output_dim//4, bias=bias) # There is almost no difference between bias and non-bias in our experiments.
30 | self.input_linear_g = nn.Linear(input_dim, (output_dim-output_dim//2))
31 | self.activation = nn.GELU()
32 |
33 | def forward(self, src):
34 | g = self.activation(self.input_linear_g(src))
35 | p = self.input_linear_p(src)
36 |
37 | output = torch.cat((torch.cos(p), torch.sin(p), g), dim=-1)
38 | return output
39 |
40 | @register_model('FANLayerGated')
41 | class FANLayerGated(nn.Module):
42 | def __init__(self, input_dim, output_dim, bias=True, gated = True):
43 | super(FANLayerGated, self).__init__()
44 | self.input_linear_p = nn.Linear(input_dim, output_dim//4, bias=bias)
45 | self.input_linear_g = nn.Linear(input_dim, (output_dim-output_dim//2))
46 | self.activation = nn.GELU()
47 | if gated:
48 | self.gate = nn.Parameter(torch.randn(1, dtype=torch.float32))
49 |
50 | def forward(self, src):
51 | g = self.activation(self.input_linear_g(src))
52 | p = self.input_linear_p(src)
53 |
54 | if not hasattr(self, 'gate'):
55 | output = torch.cat((torch.cos(p), torch.sin(p), g), dim=-1)
56 | else:
57 | gate = torch.sigmoid(self.gate)
58 | output = torch.cat((gate*torch.cos(p), gate*torch.sin(p), (1-gate)*g), dim=-1)
59 | return output
60 |
61 | @register_model('FAN')
62 | class FAN(nn.Module):
63 | def __init__(self, input_dim=1, output_dim=1, hidden_dim=2048, num_layers=3):
64 | super(FAN, self).__init__()
65 | self.embedding = nn.Linear(input_dim, hidden_dim)
66 | self.layers = nn.ModuleList()
67 | for _ in range(num_layers - 1):
68 | self.layers.append(FANLayer(hidden_dim, hidden_dim))
69 | self.layers.append(nn.Linear(hidden_dim, output_dim))
70 |
71 | def forward(self, src):
72 | output = self.embedding(src)
73 | for layer in self.layers:
74 | output = layer(output)
75 | return output
76 |
77 | @register_model('FANGated')
78 | class FANGated(nn.Module):
79 | def __init__(self, input_dim=1, output_dim=1, hidden_dim=2048, num_layers=3, gated = True):
80 | super(FANGated, self).__init__()
81 | self.embedding = nn.Linear(input_dim, hidden_dim)
82 | self.layers = nn.ModuleList()
83 | for _ in range(num_layers - 1):
84 | self.layers.append(FANLayerGated(hidden_dim, hidden_dim, gated = gated))
85 | self.layers.append(nn.Linear(hidden_dim, output_dim))
86 |
87 | def forward(self, src):
88 | output = self.embedding(src)
89 | for layer in self.layers:
90 | output = layer(output)
91 | return output
92 |
93 | @register_model('MLP')
94 | class MLPModel(nn.Module):
95 | def __init__(self, input_dim=1, output_dim=1, hidden_dim=2048, num_layers=3, use_embedding=True):
96 | super(MLPModel, self).__init__()
97 | self.activation = nn.GELU()
98 | self.layers = nn.ModuleList()
99 | if use_embedding:
100 | self.embedding = nn.Linear(input_dim, hidden_dim)
101 | self.layers.extend([nn.Linear(hidden_dim, hidden_dim), self.activation])
102 | else:
103 | self.layers.extend([nn.Linear(input_dim, hidden_dim), self.activation])
104 |
105 | for _ in range(num_layers - 2):
106 | self.layers.extend([nn.Linear(hidden_dim, hidden_dim), self.activation])
107 | self.layers.append(nn.Linear(hidden_dim, output_dim))
108 |
109 | def forward(self, src):
110 | output = self.embedding(src) if hasattr(self, 'embedding') else src
111 | for layer in self.layers:
112 | output = layer(output)
113 | return output
114 |
115 |
116 | class RoPEPositionalEncoding(torch.nn.Module):
117 |
118 | def __init__(self, d_model: int, max_len: int = 5000):
119 | super().__init__()
120 |
121 | position = torch.arange(max_len).unsqueeze(1)
122 | div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
123 | pe = torch.zeros(max_len, 1, d_model)
124 | pe[:, 0, 0::2] = torch.sin(position * div_term)
125 | pe[:, 0, 1::2] = torch.cos(position * div_term)
126 | self.register_buffer('pe', pe)
127 |
128 | def forward(self, x):
129 | """
130 | Arguments:
131 | x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
132 | """
133 | x = x + self.pe[:x.size(0)]
134 | return x
135 |
136 |
137 | @register_model('Transformer')
138 | class TransformerModel(nn.Module):
139 | def __init__(self, input_dim=1, output_dim=1, hidden_dim=768, num_layers=12, num_heads=12, norm_first = True, encoder_only=True, decoder_only=False):
140 | super(TransformerModel, self).__init__()
141 | self.embedding = nn.Linear(input_dim, hidden_dim)
142 | self.pos_encoder = RoPEPositionalEncoding(hidden_dim)
143 | self.encoder_only = encoder_only
144 | self.decoder_only = decoder_only
145 | assert not (self.encoder_only and self.decoder_only)
146 | if self.encoder_only:
147 | encoder_layers = nn.TransformerEncoderLayer(hidden_dim, num_heads, hidden_dim, norm_first = norm_first)
148 | self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
149 | elif self.decoder_only:
150 | decoder_layers = nn.TransformerDecoderLayer(hidden_dim, num_heads, hidden_dim, norm_first = norm_first)
151 | self.transformer_decoder = nn.TransformerDecoder(decoder_layers, num_layers)
152 | else:
153 | encoder_layers = nn.TransformerEncoderLayer(hidden_dim, num_heads, hidden_dim, norm_first = norm_first)
154 | self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers//2)
155 | decoder_layers = nn.TransformerDecoderLayer(hidden_dim, num_heads, hidden_dim, norm_first = norm_first)
156 | self.transformer_decoder = nn.TransformerDecoder(decoder_layers, num_layers//2)
157 | self.out = nn.Linear(hidden_dim, output_dim)
158 |
159 | def forward(self, src):
160 | src = self.embedding(src).unsqueeze(0)
161 | src = self.pos_encoder(src)
162 | if self.encoder_only:
163 | src = self.transformer_encoder(src)
164 | elif self.decoder_only:
165 | src = self.transformer_decoder(src, src)
166 | else:
167 | src = self.transformer_encoder(src)
168 | src = self.transformer_decoder(src, src)
169 | output = self.out(src)
170 | return output
171 |
172 |
173 | class KANLinear(torch.nn.Module):
174 | def __init__(
175 | self,
176 | in_features,
177 | out_features,
178 | grid_size=5,
179 | spline_order=3,
180 | scale_noise=0.1,
181 | scale_base=1.0,
182 | scale_spline=1.0,
183 | enable_standalone_scale_spline=True,
184 | base_activation=torch.nn.SiLU,
185 | grid_eps=0.02,
186 | grid_range=[-1, 1],
187 | ):
188 | super(KANLinear, self).__init__()
189 | self.in_features = in_features
190 | self.out_features = out_features
191 | self.grid_size = grid_size
192 | self.spline_order = spline_order
193 |
194 | h = (grid_range[1] - grid_range[0]) / grid_size
195 | grid = (
196 | (
197 | torch.arange(-spline_order, grid_size + spline_order + 1) * h
198 | + grid_range[0]
199 | )
200 | .expand(in_features, -1)
201 | .contiguous()
202 | )
203 |
204 | self.register_buffer("grid", grid)
205 |
206 | self.base_weight = torch.nn.Parameter(torch.Tensor(out_features, in_features))
207 | self.spline_weight = torch.nn.Parameter(
208 | torch.Tensor(out_features, in_features, grid_size + spline_order)
209 | )
210 | if enable_standalone_scale_spline:
211 | self.spline_scaler = torch.nn.Parameter(
212 | torch.Tensor(out_features, in_features)
213 | )
214 |
215 | self.scale_noise = scale_noise
216 | self.scale_base = scale_base
217 | self.scale_spline = scale_spline
218 | self.enable_standalone_scale_spline = enable_standalone_scale_spline
219 | self.base_activation = base_activation()
220 | self.grid_eps = grid_eps
221 |
222 | self.reset_parameters()
223 |
224 | def reset_parameters(self):
225 | torch.nn.init.kaiming_uniform_(self.base_weight, a=math.sqrt(5) * self.scale_base)
226 | with torch.no_grad():
227 | noise = (
228 | (
229 | torch.rand(self.grid_size + 1, self.in_features, self.out_features)
230 | - 1 / 2
231 | )
232 | * self.scale_noise
233 | / self.grid_size
234 | )
235 | self.spline_weight.data.copy_(
236 | (self.scale_spline if not self.enable_standalone_scale_spline else 1.0)
237 | * self.curve2coeff(
238 | self.grid.T[self.spline_order : -self.spline_order],
239 | noise,
240 | )
241 | )
242 | if self.enable_standalone_scale_spline:
243 | # torch.nn.init.constant_(self.spline_scaler, self.scale_spline)
244 | torch.nn.init.kaiming_uniform_(self.spline_scaler, a=math.sqrt(5) * self.scale_spline)
245 |
246 | def b_splines(self, x: torch.Tensor):
247 | """
248 | Compute the B-spline bases for the given input tensor.
249 |
250 | Args:
251 | x (torch.Tensor): Input tensor of shape (batch_size, in_features).
252 |
253 | Returns:
254 | torch.Tensor: B-spline bases tensor of shape (batch_size, in_features, grid_size + spline_order).
255 | """
256 | assert x.dim() == 2 and x.size(1) == self.in_features
257 |
258 | grid: torch.Tensor = (
259 | self.grid
260 | ) # (in_features, grid_size + 2 * spline_order + 1)
261 | x = x.unsqueeze(-1)
262 | bases = ((x >= grid[:, :-1]) & (x < grid[:, 1:])).to(x.dtype)
263 | for k in range(1, self.spline_order + 1):
264 | bases = (
265 | (x - grid[:, : -(k + 1)])
266 | / (grid[:, k:-1] - grid[:, : -(k + 1)])
267 | * bases[:, :, :-1]
268 | ) + (
269 | (grid[:, k + 1 :] - x)
270 | / (grid[:, k + 1 :] - grid[:, 1:(-k)])
271 | * bases[:, :, 1:]
272 | )
273 |
274 | assert bases.size() == (
275 | x.size(0),
276 | self.in_features,
277 | self.grid_size + self.spline_order,
278 | )
279 | return bases.contiguous()
280 |
281 | def curve2coeff(self, x: torch.Tensor, y: torch.Tensor):
282 | """
283 | Compute the coefficients of the curve that interpolates the given points.
284 |
285 | Args:
286 | x (torch.Tensor): Input tensor of shape (batch_size, in_features).
287 | y (torch.Tensor): Output tensor of shape (batch_size, in_features, out_features).
288 |
289 | Returns:
290 | torch.Tensor: Coefficients tensor of shape (out_features, in_features, grid_size + spline_order).
291 | """
292 | assert x.dim() == 2 and x.size(1) == self.in_features
293 | assert y.size() == (x.size(0), self.in_features, self.out_features)
294 |
295 | A = self.b_splines(x).transpose(
296 | 0, 1
297 | ) # (in_features, batch_size, grid_size + spline_order)
298 | B = y.transpose(0, 1) # (in_features, batch_size, out_features)
299 | solution = torch.linalg.lstsq(
300 | A, B
301 | ).solution # (in_features, grid_size + spline_order, out_features)
302 | result = solution.permute(
303 | 2, 0, 1
304 | ) # (out_features, in_features, grid_size + spline_order)
305 |
306 | assert result.size() == (
307 | self.out_features,
308 | self.in_features,
309 | self.grid_size + self.spline_order,
310 | )
311 | return result.contiguous()
312 |
313 | @property
314 | def scaled_spline_weight(self):
315 | return self.spline_weight * (
316 | self.spline_scaler.unsqueeze(-1)
317 | if self.enable_standalone_scale_spline
318 | else 1.0
319 | )
320 |
321 | def forward(self, x: torch.Tensor):
322 | assert x.size(-1) == self.in_features
323 | original_shape = x.shape
324 | x = x.reshape(-1, self.in_features)
325 |
326 | base_output = F.linear(self.base_activation(x), self.base_weight)
327 | spline_output = F.linear(
328 | self.b_splines(x).view(x.size(0), -1),
329 | self.scaled_spline_weight.view(self.out_features, -1),
330 | )
331 | output = base_output + spline_output
332 |
333 | output = output.reshape(*original_shape[:-1], self.out_features)
334 | return output
335 |
336 | @torch.no_grad()
337 | def update_grid(self, x: torch.Tensor, margin=0.01):
338 | assert x.dim() == 2 and x.size(1) == self.in_features
339 | batch = x.size(0)
340 |
341 | splines = self.b_splines(x) # (batch, in, coeff)
342 | splines = splines.permute(1, 0, 2) # (in, batch, coeff)
343 | orig_coeff = self.scaled_spline_weight # (out, in, coeff)
344 | orig_coeff = orig_coeff.permute(1, 2, 0) # (in, coeff, out)
345 | unreduced_spline_output = torch.bmm(splines, orig_coeff) # (in, batch, out)
346 | unreduced_spline_output = unreduced_spline_output.permute(
347 | 1, 0, 2
348 | ) # (batch, in, out)
349 |
350 | # sort each channel individually to collect data distribution
351 | x_sorted = torch.sort(x, dim=0)[0]
352 | grid_adaptive = x_sorted[
353 | torch.linspace(
354 | 0, batch - 1, self.grid_size + 1, dtype=torch.int64, device=x.device
355 | )
356 | ]
357 |
358 | uniform_step = (x_sorted[-1] - x_sorted[0] + 2 * margin) / self.grid_size
359 | grid_uniform = (
360 | torch.arange(
361 | self.grid_size + 1, dtype=torch.float32, device=x.device
362 | ).unsqueeze(1)
363 | * uniform_step
364 | + x_sorted[0]
365 | - margin
366 | )
367 |
368 | grid = self.grid_eps * grid_uniform + (1 - self.grid_eps) * grid_adaptive
369 | grid = torch.concatenate(
370 | [
371 | grid[:1]
372 | - uniform_step
373 | * torch.arange(self.spline_order, 0, -1, device=x.device).unsqueeze(1),
374 | grid,
375 | grid[-1:]
376 | + uniform_step
377 | * torch.arange(1, self.spline_order + 1, device=x.device).unsqueeze(1),
378 | ],
379 | dim=0,
380 | )
381 |
382 | self.grid.copy_(grid.T)
383 | self.spline_weight.data.copy_(self.curve2coeff(x, unreduced_spline_output))
384 |
385 | def regularization_loss(self, regularize_activation=1.0, regularize_entropy=1.0):
386 | """
387 | Compute the regularization loss.
388 |
389 | This is a dumb simulation of the original L1 regularization as stated in the
390 | paper, since the original one requires computing absolutes and entropy from the
391 | expanded (batch, in_features, out_features) intermediate tensor, which is hidden
392 | behind the F.linear function if we want an memory efficient implementation.
393 |
394 | The L1 regularization is now computed as mean absolute value of the spline
395 | weights. The authors implementation also includes this term in addition to the
396 | sample-based regularization.
397 | """
398 | l1_fake = self.spline_weight.abs().mean(-1)
399 | regularization_loss_activation = l1_fake.sum()
400 | p = l1_fake / regularization_loss_activation
401 | regularization_loss_entropy = -torch.sum(p * p.log())
402 | return (
403 | regularize_activation * regularization_loss_activation
404 | + regularize_entropy * regularization_loss_entropy
405 | )
406 |
407 |
408 | @register_model('KAN')
409 | class KAN(nn.Module):
410 | def __init__(
411 | self,
412 | input_dim=1,
413 | output_dim=1,
414 | hidden_dim=128,
415 | num_layers=3,
416 | grid_size=50,
417 | spline_order=3,
418 | scale_noise=0.1,
419 | scale_base=1.0,
420 | scale_spline=1.0,
421 | base_activation=torch.nn.SiLU,
422 | grid_eps=0.02,
423 | grid_range=[-1, 1],
424 | ):
425 | super(KAN, self).__init__()
426 | self.grid_size = grid_size
427 | self.spline_order = spline_order
428 | layers_hidden=[input_dim] + [hidden_dim] * num_layers + [output_dim]
429 |
430 | self.layers = torch.nn.ModuleList()
431 | for in_features, out_features in zip(layers_hidden, layers_hidden[1:]):
432 | self.layers.append(
433 | KANLinear(
434 | in_features,
435 | out_features,
436 | grid_size=grid_size,
437 | spline_order=spline_order,
438 | scale_noise=scale_noise,
439 | scale_base=scale_base,
440 | scale_spline=scale_spline,
441 | base_activation=base_activation,
442 | grid_eps=grid_eps,
443 | grid_range=grid_range,
444 | )
445 | )
446 |
447 | def forward(self, x: torch.Tensor, update_grid=False):
448 | for layer in self.layers:
449 | if update_grid:
450 | layer.update_grid(x)
451 | x = layer(x)
452 | return x
453 |
454 | def regularization_loss(self, regularize_activation=1.0, regularize_entropy=1.0):
455 | return sum(
456 | layer.regularization_loss(regularize_activation, regularize_entropy)
457 | for layer in self.layers
458 | )
459 |
--------------------------------------------------------------------------------
/Periodicity_Modeling/generate_periodic_data.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def sawtooth_wave(t, n):
4 | """Generate a single term of the sawtooth wave harmonic series."""
5 | return (t / np.pi) - np.floor(t / np.pi + 0.5)
6 |
7 | def gen_periodic_data(periodic_type):
8 |
9 | if periodic_type == 'sin':
10 | def generate_periodic_data(num_samples, num_periods=100, is_train = True):
11 | if is_train:
12 | t = np.linspace(-num_periods * np.pi, num_periods * np.pi, num_samples)
13 | else:
14 | t = np.linspace(-num_periods * 3 * np.pi, num_periods * 3 * np.pi, num_samples)
15 | data = np.sin(t)
16 | return t, data
17 | print(f'generate data from the {periodic_type} function')
18 |
19 | PERIOD = 6
20 | BATCHSIZE = 32
21 | NUMEPOCH = 10000
22 | PRINTEPOCH = 50
23 | lr = 1e-5
24 | wd = 0.01
25 |
26 | t, data = generate_periodic_data(int(10000*PERIOD), PERIOD)
27 | t_test, data_test = generate_periodic_data(4000, PERIOD, is_train = False)
28 |
29 | y_uper = 1.5
30 | y_lower = -1.5
31 |
32 | # ----------------------------------------------------------------------------------------------------------
33 |
34 | elif periodic_type == 'mod':
35 | def generate_periodic_data(num_samples, PERIOD=100, is_train = True):
36 | if is_train:
37 | t = np.linspace(-PERIOD, PERIOD, num_samples)
38 | else:
39 | t = np.linspace(-2*PERIOD, 2*PERIOD, num_samples)
40 | data = [i%5 for i in t]
41 | return t, data
42 |
43 | print(f'generate data from the {periodic_type} function')
44 |
45 | PERIOD = 20
46 | BATCHSIZE = 32
47 | NUMEPOCH = 10000
48 | PRINTEPOCH = 50
49 | lr = 1e-5
50 | wd = 0.01
51 |
52 | t, data = generate_periodic_data(int(10000*PERIOD))
53 | t_test, data_test = generate_periodic_data(4000, is_train = False)
54 |
55 | y_uper = 10
56 | y_lower = -5
57 |
58 |
59 | # ----------------------------------------------------------------------------------------------------------
60 |
61 | elif periodic_type == 'complex_1':
62 |
63 | # complex_period
64 | def generate_periodic_data(num_samples, PERIOD=100, is_train = True):
65 | if is_train:
66 | t = np.linspace(-PERIOD, PERIOD, num_samples)
67 | else:
68 | t = np.linspace(-2*PERIOD, 2*PERIOD, num_samples)
69 | data = np.exp(np.sin(np.pi * t)**2 + np.cos(t) + t%3 - 1)
70 | return t, data
71 | print(f'generate data from the {periodic_type} function')
72 |
73 | PERIOD = 4
74 | BATCHSIZE = 32
75 | NUMEPOCH = 10000
76 | PRINTEPOCH = 50
77 | lr = 1e-5
78 | wd = 0.01
79 |
80 | t, data = generate_periodic_data(int(10000*PERIOD))
81 | t_test, data_test = generate_periodic_data(4000, is_train = False)
82 |
83 | y_uper = 20
84 | y_lower = -20
85 |
86 | # ----------------------------------------------------------------------------------------------------------
87 |
88 | elif periodic_type == 'complex_2':
89 | def generate_periodic_data(num_samples, PERIOD=100, is_train = True):
90 | if is_train:
91 | t = np.linspace(-PERIOD, PERIOD, num_samples)
92 | else:
93 | t = np.linspace(-2*PERIOD, 2*PERIOD, num_samples)
94 |
95 | data = (1 + np.sin(t)) * np.sin(2 * t)
96 | return t, data
97 | print(f'generate data from the {periodic_type} function')
98 |
99 | PERIOD = 4
100 | BATCHSIZE = 32
101 | NUMEPOCH = 10000
102 | PRINTEPOCH = 50
103 | lr = 1e-5
104 | wd = 0.01
105 |
106 | t, data = generate_periodic_data(int(10000*PERIOD))
107 | t_test, data_test = generate_periodic_data(4000, is_train = False)
108 |
109 | y_uper = 4
110 | y_lower = -4
111 |
112 | # ----------------------------------------------------------------------------------------------------------
113 |
114 | elif periodic_type == 'complex_3':
115 |
116 | def generate_periodic_data(num_samples, PERIOD=100, is_train = True):
117 | if is_train:
118 | t = np.linspace(-PERIOD, PERIOD, num_samples)
119 | else:
120 | t = np.linspace(-2*PERIOD, 2*PERIOD, num_samples)
121 |
122 | data = np.sin(t + np.sin(2 * t))
123 | return t, data
124 | print(f'generate data from the {periodic_type} function')
125 |
126 | PERIOD = 4
127 | BATCHSIZE = 32
128 | NUMEPOCH = 10000
129 | PRINTEPOCH = 50
130 | lr = 1e-5
131 | wd = 0.01
132 |
133 | t, data = generate_periodic_data(int(10000*PERIOD))
134 | t_test, data_test = generate_periodic_data(4000, is_train = False)
135 |
136 | y_uper = 2
137 | y_lower = -2
138 |
139 | # ----------------------------------------------------------------------------------------------------------
140 |
141 | elif periodic_type == 'complex_4':
142 |
143 | def generate_periodic_data(num_samples, PERIOD=100, is_train = True):
144 | if is_train:
145 | t = np.linspace(-PERIOD, PERIOD, num_samples)
146 | else:
147 | t = np.linspace(-2*PERIOD, 2*PERIOD, num_samples)
148 |
149 | data = np.sin(t) * np.cos(2 * t)**2 + np.cos(t) * np.sin(3 * t)**2
150 | return t, data
151 | print(f'generate data from the {periodic_type} function')
152 |
153 | PERIOD = 4
154 | BATCHSIZE = 32
155 | NUMEPOCH = 10000
156 | PRINTEPOCH = 50
157 | lr = 1e-5
158 | wd = 0.01
159 |
160 | t, data = generate_periodic_data(int(10000*PERIOD))
161 | t_test, data_test = generate_periodic_data(4000, is_train = False)
162 |
163 | y_uper = 2
164 | y_lower = -2
165 |
166 |
167 | # ----------------------------------------------------------------------------------------------------------
168 |
169 | elif periodic_type == 'complex_5':
170 |
171 | def generate_periodic_data(num_samples, PERIOD=100, is_train = True):
172 | if is_train:
173 | t = np.linspace(-PERIOD, PERIOD, num_samples)
174 | else:
175 | t = np.linspace(-2*PERIOD, 2*PERIOD, num_samples)
176 |
177 | N = 5
178 | data = np.zeros_like(t)
179 | for n in range(1, N+1):
180 | data += (1/n) * sawtooth_wave(n * t, n)
181 |
182 | return t, data
183 | print(f'generate data from the {periodic_type} function')
184 |
185 | PERIOD = 4
186 | BATCHSIZE = 32
187 | NUMEPOCH = 10000
188 | PRINTEPOCH = 50
189 | lr = 1e-5
190 | wd = 0.01
191 |
192 | t, data = generate_periodic_data(int(10000*PERIOD))
193 | t_test, data_test = generate_periodic_data(4000, is_train = False)
194 |
195 | y_uper = 1
196 | y_lower = -1
197 |
198 | # ----------------------------------------------------------------------------------------------------------
199 |
200 | elif periodic_type == 'complex_6':
201 |
202 | def generate_periodic_data(num_samples, PERIOD=100, is_train = True):
203 | if is_train:
204 | t = np.linspace(-PERIOD, PERIOD, num_samples)
205 | else:
206 | t = np.linspace(-2*PERIOD, 2*PERIOD, num_samples)
207 |
208 | data = np.exp(np.sin(t)) / (1 + np.cos(2 * t)**2)
209 |
210 | return t, data
211 | print(f'generate data from the {periodic_type} function')
212 |
213 | PERIOD = 4
214 | BATCHSIZE = 32
215 | NUMEPOCH = 10000
216 | PRINTEPOCH = 50
217 | lr = 1e-5
218 | wd = 0.01
219 |
220 | t, data = generate_periodic_data(int(10000*PERIOD))
221 | t_test, data_test = generate_periodic_data(4000, is_train = False)
222 |
223 | y_uper = 3
224 | y_lower = 0
225 |
226 |
227 | return t, data, t_test, data_test, PERIOD, BATCHSIZE, NUMEPOCH, PRINTEPOCH, lr, wd, y_uper, y_lower
228 |
229 |
230 | def plot_periodic_data(t, data, t_test, data_test, result, args, epoch, path, y_uper, y_lower):
231 | import matplotlib.pyplot as plt
232 | import numpy as np
233 |
234 | plt.figure(figsize=(35, 5))
235 | plt.plot(t_test, data_test, label='Domain of Test Data', color='blue')
236 | plt.plot(t, data, label='Domain of Training Data', color='green')
237 | plt.plot(t_test, result, label='Model Predictions', color='red', linestyle='--')
238 | plt.xlabel('x')
239 | plt.ylabel('y')
240 | plt.xlim(min(t_test),max(t_test))
241 | plt.ylim(y_lower, y_uper)
242 | # plt.legend()
243 | plt.savefig(f'{path}/epoch{epoch}.png')
244 |
245 | def read_log_file(file_path):
246 | with open(file_path, 'r') as f:
247 | lines = f.readlines()
248 | train_loss = []
249 | test_loss = []
250 | for line in lines:
251 | if 'Train Loss' in line:
252 | train_loss.append(float(line.split(' ')[-1].strip()))
253 | elif 'Test Loss' in line:
254 | test_loss.append(float(line.split(' ')[-1].strip()))
255 | return train_loss, test_loss
256 |
257 | def plot_periodic_loss(log_file_path):
258 | import matplotlib.pyplot as plt
259 | fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
260 |
261 | train_log_loss, test_log_loss = read_log_file(log_file_path)
262 |
263 | log_file_name = log_file_path.split('.')[0]
264 | ax1.plot(np.arange(0,len(train_log_loss)*50,50),train_log_loss, label=log_file_name)
265 | ax2.plot(np.arange(0,len(test_log_loss)*50,50),test_log_loss, label=log_file_name)
266 |
267 | ax1.set_xlabel('Epoch')
268 | ax1.set_ylabel('Training Loss')
269 | ax1.legend(loc='upper right')
270 |
271 | ax2.set_xlabel('Epoch')
272 | ax2.set_ylabel('Test Loss')
273 | ax2.legend(loc='upper right')
274 | plt.savefig(f'{log_file_name}.pdf')
275 |
--------------------------------------------------------------------------------
/Periodicity_Modeling/run.sh:
--------------------------------------------------------------------------------
1 | GPU=0
2 | export CUDA_VISIBLE_DEVICES=${GPU}
3 |
4 | periodicType=sin
5 | modelName=FAN
6 | path=./${periodicType}_${modelName}
7 | python3 -u ./test.py \
8 | --model_name ${modelName} \
9 | --periodic_type ${periodicType} \
10 | --path ${path}
11 |
12 | wait $!
--------------------------------------------------------------------------------
/Periodicity_Modeling/test.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import math
3 |
4 | import matplotlib.pyplot as plt
5 | import seaborn as sns
6 |
7 | sns.set_style('whitegrid')
8 |
9 | import os
10 | import sys
11 | from generate_periodic_data import gen_periodic_data, plot_periodic_data
12 |
13 | import argparse
14 |
15 | model_names = ['FAN', 'FANGated', 'MLP', 'KAN', 'Transformer']
16 | periodic_types = ['sin', 'mod', 'complex_1', 'complex_2', 'complex_3', 'complex_4', 'complex_5', 'complex_6']
17 |
18 | parser = argparse.ArgumentParser()
19 | parser.add_argument('--periodic_type', type=str, choices=periodic_types, help='periodic type', default='sin')
20 | parser.add_argument('--path', type=str, help='path')
21 | parser.add_argument('--model_name', type=str, choices=model_names, help='model name', default='FAN')
22 |
23 | args = parser.parse_args()
24 |
25 | t, data, t_test, data_test, PERIOD, BATCHSIZE, NUMEPOCH, PRINTEPOCH, lr, wd, y_uper, y_lower = gen_periodic_data(args.periodic_type)
26 |
27 |
28 | import os
29 | path = args.path
30 | if not os.path.exists(f'{path}'):
31 | os.makedirs(f'{path}')
32 |
33 |
34 | from torch.utils.data import TensorDataset, DataLoader
35 |
36 | t_tensor = torch.tensor(t).float().unsqueeze(1)
37 | data_tensor = torch.tensor(data).float().unsqueeze(1)
38 | dataset = TensorDataset(t_tensor, data_tensor)
39 |
40 | dataloader_train = DataLoader(dataset, batch_size=BATCHSIZE, shuffle=True)
41 |
42 |
43 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
44 |
45 | # load model
46 | from architecture import get_model_by_name
47 |
48 | print(f'model name: {args.model_name}')
49 | model = get_model_by_name(args.model_name, input_dim=1, output_dim=1, num_layers = 3).to(device)
50 |
51 | criterion = torch.nn.MSELoss()
52 | optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
53 |
54 | t_test_tensor = torch.tensor(t_test).float().unsqueeze(1)
55 | data_test_tensor = torch.tensor(data_test).float().unsqueeze(1)
56 | dataset_test = TensorDataset(t_test_tensor, data_test_tensor)
57 |
58 | dataloader_test = DataLoader(dataset_test, batch_size=BATCHSIZE)
59 |
60 |
61 | # train
62 | num_epochs = NUMEPOCH
63 | for epoch in range(num_epochs):
64 | model.train()
65 | total_loss = 0
66 | for x,y in dataloader_train:
67 | x = x.to(device)
68 | y = y.to(device)
69 | optimizer.zero_grad()
70 | output = model(x)
71 | loss = criterion(output.squeeze(0), y)
72 | loss.backward()
73 | optimizer.step()
74 | total_loss += loss.item()
75 |
76 | if epoch%PRINTEPOCH==0:
77 | print(f'Epoch {epoch}, Train Loss {total_loss / len(dataloader_train)}')
78 | model.eval()
79 |
80 | result = []
81 | # test
82 | total_test_loss = 0
83 | with torch.no_grad():
84 | for x,y in dataloader_test:
85 | x = x.to(device)
86 | y = y.to(device)
87 | predictions = model(x)
88 | result.extend(predictions.cpu().squeeze())
89 | test_loss = criterion(predictions.squeeze(0), y)
90 | total_test_loss += test_loss.item()
91 | print(f'Epoch {epoch}, Test Loss {total_test_loss / len(dataloader_test)}')
92 |
93 |
94 | # plot
95 | plot_periodic_data(t, data, t_test, data_test, result, args, epoch, path, y_uper, y_lower)
96 |
97 | torch.save(model.state_dict(), f'{args.model_name}/.pth')
98 |
99 | model.eval()
100 |
101 | total_test_loss = 0
102 | with torch.no_grad():
103 | for x,y in dataloader_test:
104 | x = x.to(device)
105 | y = y.to(device)
106 | predictions = model(x)
107 | test_loss = criterion(predictions.squeeze(0), y)
108 | total_test_loss += test_loss.item()
109 | print(f'Final Epoch, Test Loss {total_test_loss / len(dataloader_test)}')
110 |
111 |
112 |
113 |
114 |
115 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # FAN: Fourier Analysis Networks
2 | [](https://arxiv.org/abs/2410.02675)
3 |
4 |
5 |
6 | | | MLP Layer | FAN layer |
7 | |---------------------------|:----------------------------------------------------------:|:------------------------------------------------------------------------:|
8 | | **Formula** | $\Phi(x) = \sigma(B_{m} + W_{m}x)$ | $\phi(x) = [\cos(W_px)\|\| \sin(W_px)\|\| \sigma(B_{\bar{p}} + W_{\bar{p}}x)]$ |
9 | | **Num of Params** | $(d_\text{input} \times d_\text{output}) + d_\text{output}$ | $(1-\frac{d_p}{d_\text{output}})\times((d_\text{input} \times d_\text{output}) + d_\text{output})$ |
10 | | **FLOPs** | $2\times(d_\text{input} \times d_\text{output})$
$+ d_\text{output} \times \text{FLOPs}_\text{non-linear}$ | $(1-\frac{d_p}{d_\text{output}})\times(2\times(d_\text{input} \times d_\text{output}))$
$+ d_\text{output} \times \text{FLOPs}_\text{non-linear} $ |
11 |
12 |
13 | ## Periodicity Modeling
14 | ```shell
15 | cd Periodicity_Modeling
16 | bash ./run.sh
17 | ```
18 | 
19 | 
20 |
21 | ## Scaling Law
22 | Detailed implementations are available in [](https://github.com/YihongDong/FANformer).
23 |
24 | ## Sentiment Analysis
25 | The data can be automatically downloaded using the Huggingface Datasets `load_dataset` function in the `./Sentiment_Analysis/get_dataloader.py`.
26 |
27 | ```shell
28 | cd Sentiment_Analysis
29 | bash scripts/Trans_with_FAN/train_ours.sh
30 | bash scripts/Trans_with_FAN/test_ours.sh
31 | ```
32 |
33 | ## Timeseries Forecasting
34 | You can obtain data from [Google Drive](https://drive.google.com/drive/folders/1v1uLx5zhGaNAOTIqHLHYMXtA-XFrKTxS?usp=sharing). All the datasets are well pre-processed and can be used easily.
35 |
36 | ```shell
37 | cd Timeseries_Forecasting
38 | bash scripts/Weather_script/Modified_Transformer.sh
39 | ```
40 |
41 | ## Symbolic Formula Representation
42 | ```shell
43 | cd Symbolic_Formula_Representation
44 | python gen_dataset.py
45 | bash run_train_fan.sh
46 | ```
47 |
48 | ## Image Recognition
49 | ```shell
50 | cd Image_Recognition
51 | bash run_image_recognition.sh
52 | ```
53 |
54 | ## Citation
55 | ```
56 | @article{dong2024fan,
57 | title={FAN: Fourier Analysis Networks},
58 | author={Yihong Dong and Ge Li and Yongding Tao and Xue Jiang and Kechi Zhang and Jia Li and Jing Su and Jun Zhang and Jingjing Xu},
59 | journal={arXiv preprint arXiv:2410.02675},
60 | year={2024}
61 | }
62 | ```
63 |
--------------------------------------------------------------------------------
/Sentiment_Analysis/get_dataloader.py:
--------------------------------------------------------------------------------
1 | from datasets import load_dataset
2 | from transformers import BertTokenizer, AutoTokenizer
3 | from torch.utils.data import DataLoader, ConcatDataset
4 |
5 | class tokenized_dataloader:
6 | def __init__(self, args):
7 | if args.model == "mamba":
8 | self.tokenizer = AutoTokenizer.from_pretrained("state-spaces/mamba-130m-hf")
9 | else:
10 | self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
11 |
12 | self.dataset = args.dataset
13 | self.batch_size = args.batch_size
14 |
15 | def get_data_loaders(self, part_list=["train"]):
16 | dataset_list = []
17 | for part in part_list:
18 | if part in ["train", "validation", "test"]:
19 | dataset = self.get_tokenized_dataset(dataset_name=self.dataset, part=part)
20 | dataset_list.append(dataset)
21 | else:
22 | raise ValueError("part must be one of 'train', 'validation', 'test'")
23 | if len(part_list) == 1 and part_list[0] in ["validation", "test"]:
24 | return DataLoader(dataset_list[0], batch_size=self.batch_size, shuffle=False, num_workers=4)
25 | else:
26 | dataset = ConcatDataset(dataset_list)
27 | dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True, num_workers=4)
28 | return dataloader
29 |
30 | def get_tokenized_dataset(self, dataset_name, part="train"):
31 | if dataset_name == "sst2":
32 | dataset = load_dataset("glue", "sst2")
33 | dataset[part] = dataset[part].remove_columns(["idx"])
34 | dataset[part] = dataset[part].rename_column("sentence", "text")
35 | elif dataset_name == "imdb":
36 | dataset = load_dataset("imdb")
37 | elif dataset_name == "sentiment140":
38 | dataset = load_dataset("adilbekovich/Sentiment140Twitter", encoding='ISO-8859-1')
39 | elif dataset_name == "amazon_polarity":
40 | dataset = load_dataset("amazon_polarity")
41 | def combine_title_content(batch):
42 | batch['text'] = [title + '. ' + content for title, content in zip(batch['title'], batch['content'])]
43 | return batch
44 | dataset = dataset.map(combine_title_content, batched=True)
45 | dataset = dataset.remove_columns(['title', 'content'])
46 |
47 | dataset = dataset[part].map(self.tokenize_function, batched=True)
48 | dataset = dataset.remove_columns(['text'])
49 | dataset = dataset.rename_column("label", "labels")
50 | dataset.set_format("torch")
51 | return dataset
52 |
53 | def tokenize_function(self, examples):
54 | return self.tokenizer(
55 | examples['text'],
56 | padding="max_length",
57 | truncation=True,
58 | max_length=128,
59 | return_tensors="pt"
60 | )
--------------------------------------------------------------------------------
/Sentiment_Analysis/model/CustomBERT.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from transformers import BertModel, BertForSequenceClassification, BertConfig
5 |
6 | class FANLayer(nn.Module):
7 | def __init__(self, input_dim, output_dim, bias=True, with_gate = True):
8 | super(FANLayer, self).__init__()
9 | self.input_linear_p = nn.Linear(input_dim, output_dim//4, bias=bias)
10 | self.input_linear_g = nn.Linear(input_dim, (output_dim-output_dim//2))
11 | self.activation = nn.GELU()
12 | if with_gate:
13 | self.gate = nn.Parameter(torch.randn(1, dtype=torch.float32))
14 |
15 | def forward(self, src):
16 | g = self.activation(self.input_linear_g(src))
17 | p = self.input_linear_p(src)
18 |
19 | if not hasattr(self, 'gate'):
20 | output = torch.cat((torch.cos(p), torch.sin(p), g), dim=-1)
21 | else:
22 | gate = torch.sigmoid(self.gate)
23 | output = torch.cat((gate*torch.cos(p), gate*torch.sin(p), (1-gate)*g), dim=-1)
24 | return output
25 |
26 | class CustomBertClassifier(BertForSequenceClassification):
27 | def __init__(self, num_labels=2, num_hidden_layers=12, replace_ffn=False, with_gate=False):
28 | config = BertConfig.from_pretrained("bert-base-uncased", num_labels=num_labels)
29 | config.num_hidden_layers = num_hidden_layers
30 | super(CustomBertClassifier, self).__init__(config)
31 | if replace_ffn: # replace the two linear layers in FFN for each layer
32 | for layer in self.bert.encoder.layer:
33 | layer.intermediate = BertIntermediate_withFAN(config) # replace the intermediate layer because we don't need the activation function within the bert intermediate layer, which is already implemented in the FANLayer
34 | layer.output.dense = FANLayer(config.intermediate_size, config.hidden_size, with_gate=with_gate)
35 |
36 | class BertIntermediate_withFAN(nn.Module):
37 | def __init__(self, config):
38 | super().__init__()
39 | self.dense = FANLayer(config.hidden_size, config.intermediate_size, p_ratio=config.p_ratio, with_gate=config.with_gate)
40 |
41 | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
42 | hidden_states = self.dense(hidden_states)
43 | return hidden_states
44 |
--------------------------------------------------------------------------------
/Sentiment_Analysis/model/Mamba.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('..')
3 | import torch
4 | import torch.nn as nn
5 | from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
6 | from typing import Any, Dict, Optional, Tuple, Union
7 | from transformers.cache_utils import MambaCache
8 | from transformers.modeling_outputs import SequenceClassifierOutput
9 | from transformers.models.mamba.modeling_mamba import MambaPreTrainedModel, MambaModel
10 | # from transformers.models.bert.modeling_bert import BertPooler
11 |
12 | from model.Linear import Pooling
13 |
14 | class Pooler(nn.Module):
15 | def __init__(self, config):
16 | super().__init__()
17 | self.dense = nn.Linear(config.hidden_size, config.hidden_size)
18 | self.activation = nn.Tanh()
19 |
20 | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
21 | # We "pool" the model by simply taking the hidden state corresponding
22 | # to the first token.
23 | first_token_tensor = hidden_states[:, 0, :]
24 | pooled_output = self.dense(first_token_tensor)
25 | pooled_output = self.activation(pooled_output)
26 | return pooled_output
27 |
28 | class MambaForSequenceClassification(MambaPreTrainedModel):
29 | def __init__(self, config, num_labels=2, max_pooler=False):
30 | super(MambaForSequenceClassification, self).__init__(config)
31 | self.num_labels = num_labels
32 | self.backbone = MambaModel(config)
33 | if max_pooler:
34 | self.pooler = Pooling(pool_type='max')
35 | else:
36 | self.pooler = Pooler(config)
37 | self.dropout = nn.Dropout(0.1)
38 | self.classifier = nn.Linear(config.hidden_size, num_labels)
39 |
40 | def forward(
41 | self,
42 | input_ids: Optional[torch.LongTensor] = None,
43 | inputs_embeds: Optional[torch.FloatTensor] = None,
44 | cache_params: Optional[MambaCache] = None,
45 | labels: Optional[torch.LongTensor] = None,
46 | output_hidden_states: Optional[bool] = None,
47 | return_dict: Optional[bool] = None,
48 | use_cache: Optional[bool] = None,
49 | cache_position: Optional[torch.Tensor] = None,
50 | **kwargs,
51 | ):
52 |
53 | mamba_outputs = self.backbone(
54 | input_ids,
55 | cache_params=cache_params,
56 | inputs_embeds=inputs_embeds,
57 | output_hidden_states=output_hidden_states,
58 | return_dict=return_dict,
59 | use_cache=use_cache,
60 | cache_position=cache_position,
61 | )
62 | last_hidden_state = mamba_outputs.last_hidden_state
63 | pooled_output = self.pooler(last_hidden_state)
64 | pooled_output = self.dropout(pooled_output)
65 | logits = self.classifier(pooled_output)
66 |
67 | loss = None
68 | if labels is not None:
69 | if self.config.problem_type is None:
70 | if self.num_labels == 1:
71 | self.config.problem_type = "regression"
72 | elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
73 | self.config.problem_type = "single_label_classification"
74 | else:
75 | self.config.problem_type = "multi_label_classification"
76 |
77 |
78 | if self.config.problem_type == "regression":
79 | loss_fct = MSELoss()
80 | if self.num_labels == 1:
81 | loss = loss_fct(logits.squeeze(), labels.squeeze())
82 | else:
83 | loss = loss_fct(logits, labels)
84 | elif self.config.problem_type == "single_label_classification":
85 | loss_fct = CrossEntropyLoss()
86 | loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
87 | elif self.config.problem_type == "multi_label_classification":
88 | loss_fct = BCEWithLogitsLoss()
89 | loss = loss_fct(logits, labels.float())
90 |
91 | return SequenceClassifierOutput(
92 | loss=loss,
93 | logits=logits
94 | )
--------------------------------------------------------------------------------
/Sentiment_Analysis/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YihongDong/FAN/7f1b16a1fdf2e36e8b123109d7f6b6987361a1fe/Sentiment_Analysis/model/__init__.py
--------------------------------------------------------------------------------
/Sentiment_Analysis/model/build_model.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from model.CustomBERT import CustomBertClassifier
3 | from model.Mamba import MambaForSequenceClassification
4 | from transformers import MambaConfig
5 |
6 |
7 | def build_model(args):
8 | if args.model == "trans":
9 | model = CustomBertClassifier(num_labels=args.num_classes, num_hidden_layers=args.num_hidden_layers, replace_ffn=args.replace_ffn, with_gate=args.with_gate).to(args.device)
10 | logging.info("with_gate: {}".format(args.with_gate))
11 | elif args.model == "mamba":
12 | config = MambaConfig.from_pretrained("state-spaces/mamba-130m-hf")
13 | config.num_hidden_layers = args.num_hidden_layers
14 | config.hidden_size = args.hidden_size
15 | logging.info("hidden_size: {}".format(config.hidden_size))
16 | logging.info("num_hidden_layers: {}".format(config.num_hidden_layers))
17 | model = MambaForSequenceClassification(config=config, num_labels=args.num_classes, max_pooler=args.max_pooler).to(args.device)
18 | print('model:', model)
19 | return model
--------------------------------------------------------------------------------
/Sentiment_Analysis/scripts/Trans/test_baseline_trans.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export CUDA_VISIBLE_DEVICES=0
4 |
5 | prefix=baseline_trans
6 | dataset=amazon_polarity # (imdb, sentiment140, amazon_polarity)
7 |
8 | # create log dir
9 | log_dir=./logs/$prefix
10 | mkdir -p $log_dir
11 |
12 | python test.py \
13 | --batch_size 128 \
14 | --prefix $prefix \
15 | --dataset $dataset \
16 | --log_file $log_dir/test_on_$dataset.log # > logs/screen_$prefix.log 2>&1
17 | # --save_path checkpoints/$prefix # > logs/screen_$prefix.log 2>&1
18 |
--------------------------------------------------------------------------------
/Sentiment_Analysis/scripts/Trans/train_baseline_trans.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export CUDA_VISIBLE_DEVICES=0
4 |
5 | prefix=baseline_trans
6 |
7 | python train.py \
8 | --batch_size 128 \
9 | --epochs 10 \
10 | --learning_rate 5e-5 \
11 | --prefix $prefix \
12 | --log_file logs/tmp.log
13 | # logs/$prefix.log # > logs/screen_$prefix.log 2>&1
14 | # --save_path checkpoints/$prefix # > logs/screen_$prefix.log 2>&1
--------------------------------------------------------------------------------
/Sentiment_Analysis/scripts/Trans_with_FAN/test_ours.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export CUDA_VISIBLE_DEVICES=0
4 |
5 | num_hidden_layers=12
6 | prefix=ours_trans
7 |
8 | dataset=amazon_polarity # (imdb, sentiment140, amazon_polarity)
9 |
10 | # create log dir
11 | log_dir=./logs/$prefix
12 | mkdir -p $log_dir
13 |
14 | python test.py \
15 | --batch_size 128 \
16 | --replace_ffn \
17 | --prefix $prefix \
18 | --dataset $dataset \
19 | --num_hidden_layers $num_hidden_layers \
20 | --log_file $log_dir/test_on_$dataset.log # > logs/screen_$prefix.log 2>&1
21 | # --save_path checkpoints/$prefix # > logs/screen_$prefix.log 2>&1
--------------------------------------------------------------------------------
/Sentiment_Analysis/scripts/Trans_with_FAN/test_ours_withgate.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export CUDA_VISIBLE_DEVICES=0
4 |
5 | num_hidden_layers=12
6 | prefix=ours_trans_withgate
7 |
8 | datasets=("imdb")
9 | # ( "imdb" "sentiment140" "amazon_polarity")
10 |
11 | log_dir=./logs/$prefix
12 | mkdir -p $log_dir
13 |
14 | for dataset in "${datasets[@]}"
15 | do
16 | python test.py \
17 | --batch_size 128 \
18 | --replace_ffn \
19 | --with_gate \
20 | --prefix $prefix \
21 | --dataset $dataset \
22 | --num_hidden_layers $num_hidden_layers \
23 | --log_file $log_dir/test_on_$dataset.log
24 | done
25 |
--------------------------------------------------------------------------------
/Sentiment_Analysis/scripts/Trans_with_FAN/train_ours.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export CUDA_VISIBLE_DEVICES=0
4 |
5 | num_hidden_layers=12
6 | prefix=ours_trans
7 |
8 | python train.py \
9 | --batch_size 128 \
10 | --epochs 50 \
11 | --learning_rate 5e-5 \
12 | --prefix $prefix \
13 | --replace_ffn \
14 | --num_hidden_layers $num_hidden_layers \
15 | --log_file logs/$prefix.log # > logs/screen_$prefix.log 2>&1
16 | # --save_path checkpoints/$prefix # > logs/screen_$prefix.log 2>&1
--------------------------------------------------------------------------------
/Sentiment_Analysis/scripts/Trans_with_FAN/train_ours_withgate.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export CUDA_VISIBLE_DEVICES=0
4 |
5 | prefix=ours_trans_withgate
6 |
7 | num_hidden_layers=12
8 |
9 | python train.py \
10 | --batch_size 128 \
11 | --epochs 50 \
12 | --learning_rate 5e-5 \
13 | --prefix $prefix \
14 | --replace_ffn \
15 | --with_gate \
16 | --num_hidden_layers $num_hidden_layers \
17 | --log_file logs/$prefix'_'hlayers_$num_hidden_layers.log # > logs/screen_$prefix.log 2>&1
18 | # --save_path checkpoints/$prefix # > logs/screen_$prefix.log 2>&1
--------------------------------------------------------------------------------
/Sentiment_Analysis/scripts/mamba/test_mamba.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export CUDA_VISIBLE_DEVICES=0
4 |
5 | model=mamba
6 |
7 | hidden_layers=24
8 | hidden_size=768
9 | learning_rate=5e-5
10 | prefix=$model'_'hlayers_$hidden_layers'_'hsize_$hidden_size'_'lr_$learning_rate'_'maxpooler
11 |
12 | # dataset list
13 | datasets=("imdb" "sentiment140" "amazon_polarity")
14 |
15 | log_dir=./logs/$model
16 | mkdir -p $log_dir
17 |
18 | for dataset in "${datasets[@]}"
19 | do
20 | python test.py \
21 | --batch_size 128 \
22 | --prefix $prefix \
23 | --dataset $dataset \
24 | --num_hidden_layers $hidden_layers \
25 | --hidden_size $hidden_size \
26 | --max_pooler \
27 | --model $model \
28 | --log_file $log_dir/$prefix'_'test_on_$dataset.log
29 | done
30 |
--------------------------------------------------------------------------------
/Sentiment_Analysis/scripts/mamba/train_mamba.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export CUDA_VISIBLE_DEVICES=0
4 |
5 | model=mamba
6 |
7 | hidden_layers=24
8 | hidden_size=768
9 | learning_rate=5e-5
10 | prefix=$model'_'hlayers_$hidden_layers'_'hsize_$hidden_size'_'lr_$learning_rate'_'maxpooler
11 |
12 | python train.py \
13 | --batch_size 128 \
14 | --epochs 50 \
15 | --learning_rate $learning_rate \
16 | --prefix $prefix \
17 | --num_hidden_layers $hidden_layers \
18 | --hidden_size $hidden_size \
19 | --max_pooler \
20 | --model $model \
21 | --log_file logs/$prefix.log # > logs/screen_$prefix.log 2>&1
22 | # --save_path checkpoints/$prefix # > logs/screen_$prefix.log 2>&1
--------------------------------------------------------------------------------
/Sentiment_Analysis/test.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 | import torch
4 | from model.build_model import build_model
5 | import logging
6 | from tqdm.auto import tqdm
7 | import evaluate
8 | from get_dataloader import tokenized_dataloader
9 |
10 | device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
11 |
12 | def setup_logging(log_file):
13 | logging.basicConfig(filename=log_file, level=logging.INFO,
14 | format='%(asctime)s - %(levelname)s - %(message)s')
15 |
16 | def test(args):
17 | setup_logging(args.log_file)
18 | logging.info("--------------- Start testing ---------------")
19 |
20 | dataloader = tokenized_dataloader(args=args)
21 | if args.dataset == "imdb":
22 | part_list = ["test"]
23 | else:
24 | part_list = ["train", "test"]
25 | test_loader = dataloader.get_data_loaders(part_list=part_list)
26 | logging.info("test data on {} of {}".format(part_list, args.dataset))
27 |
28 | model = build_model(args)
29 | print(model)
30 | model_path = os.path.join('checkpoints', args.prefix + "_best.pth")
31 | model.load_state_dict(torch.load(model_path))
32 |
33 | progress_bar = tqdm(range(len(test_loader)))
34 |
35 | metric = evaluate.load("accuracy")
36 | # testing
37 | model.eval()
38 | total_loss = 0.0
39 | for batch in test_loader:
40 | batch = {k: v.to(device) for k, v in batch.items()}
41 | with torch.no_grad():
42 | outputs = model(**batch)
43 | loss = outputs.loss
44 | total_loss += loss.item()
45 | logits = outputs.logits
46 | predictions = torch.argmax(logits, dim=-1)
47 | metric.add_batch(predictions=predictions, references=batch["labels"])
48 | progress_bar.update(1)
49 |
50 | avg_loss = total_loss / len(test_loader)
51 | val_accuracy = metric.compute()['accuracy']
52 |
53 | logging.info(f"Test {args.prefix} Model on {args.dataset} part_list={part_list}")
54 | logging.info(f"Test Loss: {avg_loss:.4f}, Test Accuracy: {val_accuracy:.4f}")
55 |
56 | if __name__ == "__main__":
57 | parser = argparse.ArgumentParser()
58 | parser.add_argument("--num_classes", type=int, default=2)
59 | parser.add_argument("--replace_ffn", action='store_true')
60 | parser.add_argument("--batch_size", type=int, default=32)
61 | parser.add_argument("--learning_rate", type=float, default=5e-5)
62 | parser.add_argument("--epochs", type=int, default=10)
63 | parser.add_argument("--log_file", type=str, default="training.log")
64 | parser.add_argument("--prefix", type=str, default="baseline")
65 | parser.add_argument("--model", type=str, default="trans")
66 | # for FAN layer
67 | parser.add_argument("--with_gate", action='store_true')
68 |
69 | # for trans and mamba
70 | parser.add_argument("--num_hidden_layers", type=int, default=12)
71 | parser.add_argument("--hidden_size", type=int, default=768)
72 | # form mamba
73 | parser.add_argument("--max_pooler", action='store_true')
74 |
75 | # for dataset
76 | parser.add_argument("--dataset", type=str, default="sst2")
77 |
78 | args = parser.parse_args()
79 | device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
80 | args.device = device
81 | test(args)
82 |
--------------------------------------------------------------------------------
/Sentiment_Analysis/train.py:
--------------------------------------------------------------------------------
1 | import time
2 | import argparse
3 | import torch
4 | import torch.optim as optim
5 | from torch.utils.data import DataLoader
6 | from datasets import load_dataset
7 | from model.build_model import build_model
8 | import logging
9 | from transformers import BertTokenizer, AutoTokenizer, DataCollatorWithPadding, BertConfig, MambaConfig
10 | from transformers import get_scheduler
11 | from tqdm.auto import tqdm
12 | from functools import partial
13 | import evaluate
14 | from utils import view_params
15 | from get_dataloader import tokenized_dataloader
16 |
17 |
18 | def setup_logging(log_file):
19 | logging.basicConfig(filename=log_file, level=logging.INFO,
20 | format='%(asctime)s - %(levelname)s - %(message)s')
21 |
22 | def tokenize_function(tokenizer, examples):
23 | return tokenizer(
24 | examples['sentence'],
25 | padding="max_length",
26 | truncation=True,
27 | max_length=128,
28 | return_tensors="pt"
29 | )
30 |
31 | def train(args):
32 | torch.manual_seed(42)
33 | setup_logging(args.log_file)
34 | logging.info("--------------- Start training ---------------")
35 |
36 | dataloader = tokenized_dataloader(args=args)
37 | train_loader = dataloader.get_data_loaders(part_list=['train'])
38 | val_loader = dataloader.get_data_loaders(part_list=['validation'])
39 |
40 | logging.info("the model is: {}".format(args.model))
41 |
42 | model = build_model(args)
43 |
44 | params = view_params(model)
45 | logging.info(params)
46 |
47 | optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
48 | logging.info("Optimizer: {}".format(optimizer))
49 | logging.info(f"Learning rate: {args.learning_rate}")
50 |
51 | num_training_steps = args.epochs * len(train_loader)
52 | lr_scheduler = get_scheduler(
53 | name="cosine_with_restarts",
54 | optimizer=optimizer,
55 | num_warmup_steps=0,
56 | num_training_steps=num_training_steps,
57 | )
58 |
59 | best_val_accuracy = 0.0
60 | progress_bar = tqdm(range(num_training_steps))
61 |
62 | for epoch in range(args.epochs):
63 | model.train()
64 | metric = evaluate.load("accuracy")
65 |
66 | total_loss = 0.0
67 | for batch in train_loader:
68 | batch = {k: v.to(args.device) for k, v in batch.items()}
69 | outputs = model(**batch)
70 | logits = outputs.logits
71 | if args.model == 'linear':
72 | predictions = (logits > 0.5).long()
73 | else:
74 | predictions = torch.argmax(logits, dim=-1)
75 | metric.add_batch(predictions=predictions, references=batch["labels"])
76 | loss = outputs.loss
77 | total_loss += loss.item()
78 | loss.backward()
79 |
80 | optimizer.step()
81 | lr_scheduler.step()
82 | optimizer.zero_grad()
83 | progress_bar.update(1)
84 |
85 | avg_loss = total_loss / len(train_loader)
86 | train_accuracy = metric.compute()['accuracy']
87 | logging.info(f"Epoch {epoch + 1}, Average Loss: {avg_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
88 |
89 | # validation
90 | model.eval()
91 | total_loss = 0.0
92 | for batch in val_loader:
93 | batch = {k: v.to(args.device) for k, v in batch.items()}
94 | with torch.no_grad():
95 | outputs = model(**batch)
96 | loss = outputs.loss
97 | total_loss += loss.item()
98 | logits = outputs.logits
99 | if args.model == 'linear':
100 | predictions = (logits > 0.5).long()
101 | else:
102 | predictions = torch.argmax(logits, dim=-1)
103 | metric.add_batch(predictions=predictions, references=batch["labels"])
104 |
105 | avg_loss = total_loss / len(val_loader)
106 | val_accuracy = metric.compute()['accuracy']
107 | logging.info(f"Epoch {epoch + 1}, Validation Loss: {avg_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")
108 |
109 | # save model with best validation accuracy
110 | if val_accuracy > best_val_accuracy:
111 | best_val_accuracy = val_accuracy
112 | save_directory = f"checkpoints/{args.prefix}_best.pth"
113 | torch.save(model.state_dict(), save_directory)
114 | logging.info("Save model with best validation accuracy")
115 |
116 | if __name__ == "__main__":
117 | parser = argparse.ArgumentParser()
118 | parser.add_argument("--num_classes", type=int, default=2)
119 | parser.add_argument("--replace_ffn", action='store_true')
120 | parser.add_argument("--batch_size", type=int, default=32)
121 | parser.add_argument("--learning_rate", type=float, default=5e-5)
122 | parser.add_argument("--epochs", type=int, default=10)
123 | parser.add_argument("--log_file", type=str, default="training.log")
124 | parser.add_argument("--prefix", type=str, default="baseline")
125 | parser.add_argument("--model", type=str, default="trans")
126 | # for FAN layer
127 | parser.add_argument("--with_gate", action='store_true')
128 |
129 | # for trans and mamba
130 | parser.add_argument("--num_hidden_layers", type=int, default=12)
131 | parser.add_argument("--hidden_size", type=int, default=768)
132 |
133 | # form mamba
134 | parser.add_argument("--max_pooler", action='store_true')
135 |
136 | # for dataset
137 | parser.add_argument("--dataset", type=str, default="sst2")
138 |
139 | args = parser.parse_args()
140 | device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
141 | args.device = device
142 | train(args)
--------------------------------------------------------------------------------
/Sentiment_Analysis/utils.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 | def view_params(model) -> str:
4 | res = "Total parameters: " + str(sum(p.numel() for p in model.parameters())/1e6) + ' M'
5 | return res
6 |
7 |
8 | class Pooling(nn.Module):
9 | def __init__(self, pool_type='mean'):
10 | super(Pooling, self).__init__()
11 | self.pool_type = pool_type
12 | if pool_type == 'mean':
13 | self.pool = nn.AdaptiveAvgPool1d(1)
14 | elif pool_type == 'max':
15 | self.pool = nn.AdaptiveMaxPool1d(1)
16 | else:
17 | raise ValueError("pool_type must be either 'mean' or 'max'.")
18 |
19 | def forward(self, x):
20 | x = x.transpose(1, 2)
21 | pooled = self.pool(x)
22 | return pooled.squeeze(2)
--------------------------------------------------------------------------------
/Symbolic_Formula_Representation/gen_dataset.py:
--------------------------------------------------------------------------------
1 | # The Code of this part is based on KAN (https://github.com/KindXiaoming/pykan).
2 |
3 | import torch
4 | import scipy.special
5 | import numpy as np
6 | import json
7 | import os
8 | from tqdm import tqdm
9 | from kan import *
10 |
11 |
12 | device = torch.device('cpu')
13 |
14 | def produce_dataset(dataset_idx):
15 |
16 | if dataset_idx == 0:
17 | f = lambda x: torch.tensor(torch.special.bessel_j0(20 * x[:, [0]]))
18 | dataset = create_dataset(f, n_var=1, train_num=3000, device=device)
19 | elif dataset_idx == 1:
20 | def f(x):
21 | return torch.exp(torch.sin(torch.pi * x[:, [0]]) + x[:, [0]]**2)
22 | dataset = create_dataset(f, n_var=2, train_num=3000, device=device)
23 | elif dataset_idx == 2:
24 | f = lambda x: x[:, [0]] * x[:, [1]]
25 | dataset = create_dataset(f, n_var=2, train_num=3000, device=device)
26 | elif dataset_idx == 3:
27 | f = lambda x: torch.exp((torch.sin(torch.pi*(x[:,[0]]**2+x[:,[1]]**2))+torch.sin(torch.pi*(x[:,[2]]**2+x[:,[3]]**2)))/2)
28 | dataset = create_dataset(f, n_var=4, train_num=3000, device=device)
29 | return dataset
30 |
31 |
32 | if __name__ == '__main__':
33 | save_dir = 'dataset'
34 | if not os.path.exists(save_dir):
35 | os.makedirs(save_dir)
36 |
37 | for i in range(4):
38 | if i == 1:
39 | continue
40 | dataset = produce_dataset(i)
41 | torch.save(dataset, f'{save_dir}/dataset_{i}.pt')
42 | print(f'dataset_{i} saved into {save_dir}/dataset_{i}.pt')
43 |
--------------------------------------------------------------------------------
/Symbolic_Formula_Representation/requirements.txt:
--------------------------------------------------------------------------------
1 | kan
2 | tqdm
3 | torch
4 | numpy
--------------------------------------------------------------------------------
/Symbolic_Formula_Representation/run_train_fan.sh:
--------------------------------------------------------------------------------
1 | dataset_ids=(0 1 2 3)
2 |
3 | for dataset_id in ${dataset_ids[@]}
4 | do
5 | CUDA_VISIBLE_DEVICES=0 python train_fan.py \
6 | --dataset_id $dataset_id \
7 | --dataset_dir dataset \
8 | --save_dir fan_checkpoint
9 |
10 | done
--------------------------------------------------------------------------------
/Symbolic_Formula_Representation/run_train_kan.sh:
--------------------------------------------------------------------------------
1 | dataset_ids=(0 1 2 3)
2 |
3 | for dataset_id in ${dataset_ids[@]}
4 | do
5 | CUDA_VISIBLE_DEVICES=0 python train_kan.py \
6 | --dataset_id $dataset_id \
7 | --dataset_dir dataset \
8 | --save_dir kan_checkpoint
9 | done
--------------------------------------------------------------------------------
/Symbolic_Formula_Representation/run_train_mlp.sh:
--------------------------------------------------------------------------------
1 | dataset_ids=(0 1 2 3)
2 |
3 | for dataset_id in ${dataset_ids[@]}
4 | do
5 | CUDA_VISIBLE_DEVICES=0 python train_MLP.py \
6 | --dataset_id $dataset_id \
7 | --dataset_dir dataset \
8 | --save_dir mlp_checkpoint
9 |
10 | done
--------------------------------------------------------------------------------
/Symbolic_Formula_Representation/run_train_transformer.sh:
--------------------------------------------------------------------------------
1 | dataset_ids=(0 1 2 3 4)
2 |
3 | for dataset_id in ${dataset_ids[@]}
4 | do
5 | CUDA_VISIBLE_DEVICES=0 python train_transformer.py \
6 | --dataset_id $dataset_id \
7 | --dataset_dir dataset \
8 | --save_dir transformer_checkpoint
9 |
10 | done
11 |
--------------------------------------------------------------------------------
/Symbolic_Formula_Representation/train_fan.py:
--------------------------------------------------------------------------------
1 | import torch, os, argparse, json
2 | import torch.nn as nn
3 | from tqdm import tqdm
4 | from kan import LBFGS
5 | import torch.optim as optim
6 | from torch.utils.data import DataLoader, TensorDataset
7 | import numpy as np
8 | import torch
9 | import torch.nn.functional as F
10 |
11 |
12 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
13 |
14 | def parse_args():
15 | parser = argparse.ArgumentParser(description='Train MLP')
16 | parser.add_argument('--dataset_idx', type=int, default=0, help='Dataset index')
17 | parser.add_argument('--dataset_dir', type=str, default='dataset')
18 | parser.add_argument('--save_dir', type=str, default='kan_checkpoint')
19 | return parser.parse_args()
20 |
21 |
22 | def load_dataset(args, dataset_idx):
23 | print(f'Loading dataset_{dataset_idx} from {args.dataset_dir}/dataset_{dataset_idx}.pt')
24 |
25 | dataset = torch.load(f'{args.dataset_dir}/dataset_{dataset_idx}.pt')
26 | dataset['train_input'] = dataset['train_input'].to(device)
27 | dataset['test_input'] = dataset['test_input'].to(device)
28 | dataset['train_label'] = dataset['train_label'].to(device)
29 | dataset['test_label'] = dataset['test_label'].to(device)
30 | return dataset
31 |
32 |
33 | class FANLayer(nn.Module):
34 | def __init__(self, input_dim, output_dim, bias=True):
35 | super(FANLayer, self).__init__()
36 | self.input_linear_p = nn.Linear(input_dim, output_dim//4, bias=bias)
37 | self.input_linear_g = nn.Linear(input_dim, (output_dim-output_dim//2))
38 | self.activation = nn.GELU()
39 |
40 | def forward(self, src):
41 | g = self.activation(self.input_linear_g(src))
42 | p = self.input_linear_p(src)
43 |
44 | output = torch.cat((torch.cos(p), torch.sin(p), g), dim=-1)
45 | return output
46 |
47 | class FAN(nn.Module):
48 | def __init__(self, input_dim=1, output_dim=1, hidden_dim=2048, num_layers=3):
49 | super(FAN, self).__init__()
50 | self.embedding = nn.Linear(input_dim, hidden_dim)
51 | self.layers = nn.ModuleList()
52 | for _ in range(num_layers - 1):
53 | self.layers.append(FANLayer(hidden_dim, hidden_dim))
54 | self.layers.append(nn.Linear(hidden_dim, output_dim))
55 |
56 | def forward(self, src):
57 | output = self.embedding(src)
58 | for layer in self.layers:
59 | output = layer(output)
60 | return output
61 |
62 |
63 | def train_with_test(model, dataset, ckpt_dir):
64 |
65 | if not os.path.exists(ckpt_dir):
66 | os.makedirs(ckpt_dir)
67 | criterion = nn.MSELoss()
68 | optimizer = LBFGS(filter(lambda p: p.requires_grad, model.parameters()),
69 | lr=0.0001,
70 | history_size=40,
71 | line_search_fn="strong_wolfe",
72 | tolerance_grad=1e-32,
73 | tolerance_change=1e-32,
74 | tolerance_ys=1e-32)
75 |
76 | model.train()
77 | for _ in tqdm(range(1800)):
78 | def closure():
79 | optimizer.zero_grad()
80 | output = model(dataset['train_input'])
81 | loss = criterion(output, dataset['train_label'])
82 | loss.backward()
83 | return loss
84 | optimizer.step(closure)
85 |
86 | torch.save(model.state_dict(), f'{ckpt_dir}/model.pth')
87 |
88 | model.eval()
89 | with torch.no_grad():
90 | output = model(dataset['test_input'])
91 | test_loss = criterion(output, dataset['test_label']).item()
92 | return test_loss
93 |
94 |
95 | if __name__ == '__main__':
96 | args = parse_args()
97 | if args.dataset_idx == 0:
98 | dataset = load_dataset(args, 0)
99 | input_size, output_size = 1, 1
100 | elif args.dataset_idx == 1:
101 | dataset = load_dataset(args, 1)
102 | input_size, output_size = 2, 1
103 | elif args.dataset_idx == 2:
104 | dataset = load_dataset(args, 2)
105 | input_size, output_size = 2, 1
106 | elif args.dataset_idx == 3:
107 | dataset = load_dataset(args, 3)
108 | input_size, output_size = 4, 1
109 |
110 | save_dir = f'{args.save_dir}/dataset_{args.dataset_idx}'
111 | if not os.path.exists(save_dir):
112 | os.makedirs(save_dir)
113 | log_file = open(f'{save_dir}/results.jsonl', 'w')
114 |
115 | for depth in [2, 3, 4, 5]:
116 | for hidden_size in [4, 8, 16, 32, 64, 128]:
117 | print(f'Depth: {depth}, Hidden size: {hidden_size}')
118 | model = FAN(input_dim=input_size, hidden_dim=hidden_size, output_dim=output_size, num_layers=depth).to(device)
119 | param_size = sum(p.numel() for p in model.parameters())
120 | ckpt_dir = f'{save_dir}/depth_{depth}_hidden_{hidden_size}'
121 | test_loss = train_with_test(model, dataset, ckpt_dir)
122 |
123 | output_js = {}
124 | output_js['depth'] = depth
125 | output_js['hidden_size'] = hidden_size
126 | param_size = model.get_param_size()
127 | output_js['param_size'] = param_size
128 | output_js['test_loss'] = test_loss
129 | log_file.write(json.dumps(output_js) + '\n')
130 | log_file.flush()
131 | log_file.close()
132 |
--------------------------------------------------------------------------------
/Symbolic_Formula_Representation/train_kan.py:
--------------------------------------------------------------------------------
1 | from kan import *
2 | import torch
3 | import argparse, json
4 | import numpy as np
5 | import pdb
6 | import os
7 |
8 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
9 |
10 | def parse_args():
11 | parser = argparse.ArgumentParser(description='Train KAN')
12 | parser.add_argument('--dataset_idx', type=int, default=0, help='Dataset index')
13 | parser.add_argument('--dataset_dir', type=str, default='dataset')
14 | parser.add_argument('--save_dir', type=str, default='kan_checkpoint')
15 | return parser.parse_args()
16 |
17 |
18 | def load_dataset(args, dataset_idx):
19 | print(f'Loading dataset_{dataset_idx} from {args.dataset_dir}/dataset_{dataset_idx}.pt')
20 |
21 | dataset = torch.load(f'{args.dataset_dir}/dataset_{dataset_idx}.pt')
22 | dataset['train_input'] = dataset['train_input'].to(device)
23 | dataset['test_input'] = dataset['test_input'].to(device)
24 | dataset['train_label'] = dataset['train_label'].to(device)
25 | dataset['test_label'] = dataset['test_label'].to(device)
26 | return dataset
27 |
28 |
29 | def compute_kan_size(width, grid, k):
30 | kan_size = 0
31 | for i in range(len(width) - 1):
32 | kan_size += (width[i][0] * width[i+1][0] * (grid + k + 3) + width[i+1][0])
33 | return kan_size
34 |
35 |
36 | if __name__ == '__main__':
37 | args = parse_args()
38 | if args.dataset_idx == 0:
39 | dataset = load_dataset(args, 0)
40 | width = [1, 1]
41 | elif args.dataset_idx == 1:
42 | dataset = load_dataset(args, 1)
43 | width = [2, 1, 1]
44 | elif args.dataset_idx == 2:
45 | dataset = load_dataset(args, 2)
46 | width = [2, 2, 1]
47 | elif args.dataset_idx == 3:
48 | dataset = load_dataset(args, 3)
49 | width = [4, 4, 2, 1]
50 | else:
51 | raise ValueError('Invalid dataset index')
52 |
53 | save_dir = f'{args.save_dir}/dataset_{args.dataset_idx}'
54 | if not os.path.exists(save_dir):
55 | os.makedirs(save_dir)
56 | log_file = open(f'{save_dir}/results.jsonl', 'a')
57 |
58 | grids = [3, 5, 10, 20, 50, 100, 200, 500, 1000]
59 | for i, grid in enumerate(grids):
60 | if i == 0:
61 | ckpt_dir = f'{save_dir}/ckpt'
62 | model = KAN(width=width, grid=grid, k=3, device=device, ckpt_path=ckpt_dir)
63 | else:
64 | model = model.refine(grid)
65 | results = model.fit(dataset, opt="LBFGS", steps=200, lr=0.01)
66 |
67 | output_js = {}
68 | output_js['grid'] = grid
69 | param_size = compute_kan_size(width, grid, 3)
70 | output_js['param_size'] = param_size
71 | output_js['train_loss'] = results['train_loss'][-1].item()
72 | output_js['test_loss'] = results['test_loss'][-1].item()
73 | log_file.write(json.dumps(output_js) + '\n')
74 | log_file.flush()
75 | log_file.close()
--------------------------------------------------------------------------------
/Symbolic_Formula_Representation/train_mlp.py:
--------------------------------------------------------------------------------
1 | import torch, os, argparse, json
2 | import torch.nn as nn
3 | import torch.optim as optim
4 | import numpy as np
5 | from torch.utils.data import DataLoader, TensorDataset
6 | from tqdm import tqdm
7 | from kan import LBFGS
8 |
9 |
10 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
11 |
12 | def parse_args():
13 | parser = argparse.ArgumentParser(description='Train MLP')
14 | parser.add_argument('--dataset_idx', type=int, default=0, help='Dataset index')
15 | parser.add_argument('--dataset_dir', type=str, default='dataset')
16 | parser.add_argument('--save_dir', type=str, default='kan_checkpoint')
17 | return parser.parse_args()
18 |
19 |
20 | def load_dataset(args, dataset_idx):
21 | print(f'Loading dataset_{dataset_idx} from {args.dataset_dir}/dataset_{dataset_idx}.pt')
22 |
23 | dataset = torch.load(f'{args.dataset_dir}/dataset_{dataset_idx}.pt')
24 | dataset['train_input'] = dataset['train_input'].to(device)
25 | dataset['test_input'] = dataset['test_input'].to(device)
26 | dataset['train_label'] = dataset['train_label'].to(device)
27 | dataset['test_label'] = dataset['test_label'].to(device)
28 | return dataset
29 |
30 |
31 | class MLP(nn.Module):
32 | def __init__(self, input_size, hidden_size, output_size, depth):
33 | super(MLP, self).__init__()
34 |
35 | self.input_size = input_size
36 | self.hidden_size = hidden_size
37 | self.output_size = output_size
38 | self.depth = depth
39 |
40 | layers = []
41 |
42 | layers.append(nn.Linear(input_size, hidden_size))
43 | layers.append(nn.ReLU())
44 |
45 | for _ in range(depth - 1):
46 | layers.append(nn.Linear(hidden_size, hidden_size))
47 | layers.append(nn.ReLU())
48 |
49 | layers.append(nn.Linear(hidden_size, output_size))
50 |
51 | self.network = nn.Sequential(*layers)
52 |
53 | def forward(self, x):
54 | return self.network(x)
55 |
56 | def get_param_size(self):
57 | total_parameters = 0
58 | total_parameters += (self.input_size + 1) * self.hidden_size
59 | for _ in range(self.depth - 1):
60 | total_parameters += (self.hidden_size + 1) * self.hidden_size
61 | total_parameters += (self.hidden_size + 1) * self.output_size
62 | return total_parameters
63 |
64 |
65 | def train_with_test(model, dataset, ckpt_dir):
66 |
67 | if not os.path.exists(ckpt_dir):
68 | os.makedirs(ckpt_dir)
69 | criterion = nn.MSELoss()
70 | optimizer = LBFGS(filter(lambda p: p.requires_grad, model.parameters()),
71 | lr=0.01,
72 | history_size=10,
73 | line_search_fn="strong_wolfe",
74 | tolerance_grad=1e-32,
75 | tolerance_change=1e-32,
76 | tolerance_ys=1e-32)
77 |
78 | model.train()
79 | for _ in tqdm(range(1800)):
80 | def closure():
81 | optimizer.zero_grad()
82 | output = model(dataset['train_input'])
83 | loss = criterion(output, dataset['train_label'])
84 | loss.backward()
85 | return loss
86 | optimizer.step(closure)
87 |
88 | torch.save(model.state_dict(), f'{ckpt_dir}/model.pth')
89 |
90 | model.eval()
91 | with torch.no_grad():
92 | output = model(dataset['test_input'])
93 | test_loss = criterion(output, dataset['test_label']).item()
94 | return test_loss
95 |
96 |
97 | if __name__ == '__main__':
98 | args = parse_args()
99 | if args.dataset_idx == 0:
100 | dataset = load_dataset(args, 0)
101 | input_size, output_size = 1, 1
102 | elif args.dataset_idx == 1:
103 | dataset = load_dataset(args, 1)
104 | input_size, output_size = 2, 1
105 | elif args.dataset_idx == 2:
106 | dataset = load_dataset(args, 2)
107 | input_size, output_size = 2, 1
108 | elif args.dataset_idx == 3:
109 | dataset = load_dataset(args, 3)
110 | input_size, output_size = 4, 1
111 |
112 | save_dir = f'{args.save_dir}/dataset_{args.dataset_idx}'
113 | if not os.path.exists(save_dir):
114 | os.makedirs(save_dir)
115 | log_file = open(f'{save_dir}/results.jsonl', 'w')
116 |
117 | for depth in [2, 3, 4, 5]:
118 | for hidden_size in [2, 4, 8, 16, 32, 64, 128]:
119 | print(f'Depth: {depth}, Hidden size: {hidden_size}')
120 | model = MLP(input_size=input_size, hidden_size=hidden_size, output_size=output_size, depth=depth).to(device)
121 | param_size = sum(p.numel() for p in model.parameters())
122 | ckpt_dir = f'{save_dir}/depth_{depth}_hidden_{hidden_size}'
123 | test_loss = train_with_test(model, dataset, ckpt_dir)
124 |
125 | output_js = {}
126 | output_js['depth'] = depth
127 | output_js['hidden_size'] = hidden_size
128 | param_size = model.get_param_size()
129 | output_js['param_size'] = param_size
130 | output_js['test_loss'] = test_loss
131 | log_file.write(json.dumps(output_js) + '\n')
132 | log_file.flush()
133 | log_file.close()
134 |
--------------------------------------------------------------------------------
/Symbolic_Formula_Representation/train_transformer.py:
--------------------------------------------------------------------------------
1 | import torch, os, argparse, json
2 | import torch.nn as nn
3 | import torch.optim as optim
4 | import numpy as np
5 | from torch.utils.data import DataLoader, TensorDataset
6 | from tqdm import tqdm
7 | from kan import LBFGS
8 | from torch.nn import TransformerEncoder, TransformerEncoderLayer
9 | import pdb
10 |
11 |
12 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
13 |
14 | def parse_args():
15 | parser = argparse.ArgumentParser(description='Train MLP')
16 | parser.add_argument('--dataset_idx', type=int, default=0, help='Dataset index')
17 | parser.add_argument('--dataset_dir', type=str, default='dataset')
18 | parser.add_argument('--save_dir', type=str, default='kan_checkpoint')
19 | return parser.parse_args()
20 |
21 |
22 | def load_dataset(args, dataset_idx):
23 | print(f'Loading dataset_{dataset_idx} from {args.dataset_dir}/dataset_{dataset_idx}.pt')
24 |
25 | dataset = torch.load(f'{args.dataset_dir}/dataset_{dataset_idx}.pt')
26 | dataset['train_input'] = dataset['train_input'].to(device)
27 | dataset['test_input'] = dataset['test_input'].to(device)
28 | dataset['train_label'] = dataset['train_label'].to(device)
29 | dataset['test_label'] = dataset['test_label'].to(device)
30 | return dataset
31 |
32 |
33 | class TransformerRegressor(nn.Module):
34 | def __init__(self, model_dim=64, num_layers=2):
35 | super(TransformerRegressor, self).__init__()
36 | self.model_dim = model_dim
37 | self.embedding = nn.Linear(1, model_dim)
38 | encoder_layer = nn.TransformerEncoderLayer(d_model=model_dim, nhead=4, dim_feedforward=4*model_dim)
39 | self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
40 | self.fc_out = nn.Linear(model_dim, 1)
41 |
42 | def forward(self, x):
43 | x = x.unsqueeze(2)
44 | x = self.embedding(x)
45 | x = x.permute(1, 0, 2)
46 | x = self.transformer_encoder(x)
47 | x = x.mean(dim=0)
48 | x = self.fc_out(x)
49 | return x
50 |
51 | def get_param_size(self):
52 | return sum(p.numel() for p in self.parameters())
53 |
54 | def train_with_test(model, dataset, ckpt_dir):
55 |
56 | if not os.path.exists(ckpt_dir):
57 | os.makedirs(ckpt_dir)
58 | criterion = nn.MSELoss()
59 | optimizer = LBFGS(filter(lambda p: p.requires_grad, model.parameters()),
60 | lr=0.01,
61 | history_size=10,
62 | line_search_fn="strong_wolfe",
63 | tolerance_grad=1e-32,
64 | tolerance_change=1e-32,
65 | tolerance_ys=1e-32)
66 |
67 | model.train()
68 | for _ in tqdm(range(1800)):
69 | def closure():
70 | optimizer.zero_grad()
71 | output = model(dataset['train_input'])
72 | loss = criterion(output, dataset['train_label'])
73 | loss.backward()
74 | return loss
75 | optimizer.step(closure)
76 |
77 | torch.save(model.state_dict(), f'{ckpt_dir}/model.pth')
78 |
79 | model.eval()
80 | with torch.no_grad():
81 | output = model(dataset['test_input'])
82 | test_loss = criterion(output, dataset['test_label']).item()
83 | return test_loss
84 |
85 |
86 |
87 | if __name__ == '__main__':
88 | args = parse_args()
89 | if args.dataset_idx == 0:
90 | dataset = load_dataset(args, 0)
91 | input_size, output_size = 1, 1
92 | elif args.dataset_idx == 1:
93 | dataset = load_dataset(args, 1)
94 | input_size, output_size = 2, 1
95 | elif args.dataset_idx == 2:
96 | dataset = load_dataset(args, 2)
97 | input_size, output_size = 2, 1
98 | elif args.dataset_idx == 3:
99 | dataset = load_dataset(args, 3)
100 | input_size, output_size = 4, 1
101 |
102 | save_dir = f'{args.save_dir}/dataset_{args.dataset_idx}'
103 | if not os.path.exists(save_dir):
104 | os.makedirs(save_dir)
105 | log_file = open(f'{save_dir}/results.jsonl', 'w')
106 |
107 |
108 | for layer_num in [2, 3, 4, 5]:
109 | for dim in [4, 8, 12, 16]:
110 | print(f'Layer Num: {layer_num}, Model Dim: {dim}, FFN Dim: {4*dim}')
111 | model = TransformerRegressor(dim, layer_num).to(device)
112 | param_size = model.get_param_size()
113 | ckpt_dir = f'{save_dir}/depth_{layer_num}_hidden_{dim}'
114 | test_loss = train_with_test(model, dataset, ckpt_dir)
115 |
116 | output_js = {}
117 | output_js['depth'] = layer_num
118 | output_js['hidden_size'] = dim
119 | param_size = model.get_param_size()
120 | output_js['param_size'] = param_size
121 | output_js['test_loss'] = test_loss
122 | log_file.write(json.dumps(output_js) + '\n')
123 | log_file.flush()
124 | log_file.close()
125 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/data_provider/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/data_provider/data_factory.py:
--------------------------------------------------------------------------------
1 | from data_provider.data_loader import Dataset_ETT_hour, Dataset_Custom, Dataset_Pred
2 | from torch.utils.data import DataLoader
3 |
4 | data_dict = {
5 | 'ETTh': Dataset_ETT_hour,
6 | 'custom': Dataset_Custom,
7 | }
8 |
9 |
10 | def data_provider(args, flag):
11 | Data = data_dict[args.data]
12 | timeenc = 0 if args.embed != 'timeF' else 1
13 |
14 | if flag == 'test':
15 | shuffle_flag = False
16 | drop_last = False
17 | batch_size = args.batch_size
18 | freq = args.freq
19 | elif flag == 'pred':
20 | shuffle_flag = False
21 | drop_last = False
22 | batch_size = 1
23 | freq = args.freq
24 | Data = Dataset_Pred
25 | else:
26 | shuffle_flag = True
27 | drop_last = True
28 | batch_size = args.batch_size
29 | freq = args.freq
30 |
31 | data_set = Data(
32 | root_path=args.root_path,
33 | data_path=args.data_path,
34 | flag=flag,
35 | size=[args.seq_len, args.label_len, args.pred_len],
36 | features=args.features,
37 | target=args.target,
38 | timeenc=timeenc,
39 | freq=freq
40 | )
41 | print(flag, len(data_set))
42 | data_loader = DataLoader(
43 | data_set,
44 | batch_size=batch_size,
45 | shuffle=shuffle_flag,
46 | num_workers=args.num_workers,
47 | drop_last=drop_last)
48 | return data_set, data_loader
49 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/data_provider/data_loader.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pandas as pd
3 | from torch.utils.data import Dataset
4 | from sklearn.preprocessing import StandardScaler
5 | from utils.timefeatures import time_features
6 | import warnings
7 |
8 | warnings.filterwarnings('ignore')
9 |
10 |
11 | class Dataset_ETT_hour(Dataset):
12 | def __init__(self, root_path, flag='train', size=None,
13 | features='S', data_path='ETTh.csv',
14 | target='OT', scale=True, timeenc=0, freq='h'):
15 | # size [seq_len, label_len, pred_len]
16 | # info
17 | if size == None:
18 | self.seq_len = 24 * 4 * 4
19 | self.label_len = 24 * 4
20 | self.pred_len = 24 * 4
21 | else:
22 | self.seq_len = size[0]
23 | self.label_len = size[1]
24 | self.pred_len = size[2]
25 | # init
26 | assert flag in ['train', 'test', 'val']
27 | type_map = {'train': 0, 'val': 1, 'test': 2}
28 | self.set_type = type_map[flag]
29 |
30 | self.features = features
31 | self.target = target
32 | self.scale = scale
33 | self.timeenc = timeenc
34 | self.freq = freq
35 |
36 | self.root_path = root_path
37 | self.data_path = data_path
38 | self.__read_data__()
39 |
40 | def __read_data__(self):
41 | self.scaler = StandardScaler()
42 | df_raw = pd.read_csv(os.path.join(self.root_path,
43 | self.data_path))
44 |
45 | border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]
46 | border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]
47 | border1 = border1s[self.set_type]
48 | border2 = border2s[self.set_type]
49 |
50 | if self.features == 'M' or self.features == 'MS':
51 | cols_data = df_raw.columns[1:]
52 | df_data = df_raw[cols_data]
53 | elif self.features == 'S':
54 | df_data = df_raw[[self.target]]
55 |
56 | if self.scale:
57 | train_data = df_data[border1s[0]:border2s[0]]
58 | self.scaler.fit(train_data.values)
59 | data = self.scaler.transform(df_data.values)
60 | else:
61 | data = df_data.values
62 |
63 | df_stamp = df_raw[['date']][border1:border2]
64 | df_stamp['date'] = pd.to_datetime(df_stamp.date)
65 | if self.timeenc == 0:
66 | df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
67 | df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
68 | df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
69 | df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
70 | data_stamp = df_stamp.drop(['date'], 1).values
71 | elif self.timeenc == 1:
72 | data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
73 | data_stamp = data_stamp.transpose(1, 0)
74 |
75 | self.data_x = data[border1:border2]
76 | self.data_y = data[border1:border2]
77 | self.data_stamp = data_stamp
78 |
79 | def __getitem__(self, index):
80 | s_begin = index
81 | s_end = s_begin + self.seq_len
82 | r_begin = s_end - self.label_len
83 | r_end = r_begin + self.label_len + self.pred_len
84 |
85 | seq_x = self.data_x[s_begin:s_end]
86 | seq_y = self.data_y[r_begin:r_end]
87 | seq_x_mark = self.data_stamp[s_begin:s_end]
88 | seq_y_mark = self.data_stamp[r_begin:r_end]
89 |
90 | return seq_x, seq_y, seq_x_mark, seq_y_mark
91 |
92 | def __len__(self):
93 | return len(self.data_x) - self.seq_len - self.pred_len + 1
94 |
95 | def inverse_transform(self, data):
96 | return self.scaler.inverse_transform(data)
97 |
98 |
99 | class Dataset_Custom(Dataset):
100 | def __init__(self, root_path, flag='train', size=None,
101 | features='S', data_path='ETTh.csv',
102 | target='OT', scale=True, timeenc=0, freq='h'):
103 | # size [seq_len, label_len, pred_len]
104 | # info
105 | if size == None:
106 | self.seq_len = 24 * 4 * 4
107 | self.label_len = 24 * 4
108 | self.pred_len = 24 * 4
109 | else:
110 | self.seq_len = size[0]
111 | self.label_len = size[1]
112 | self.pred_len = size[2]
113 | # init
114 | assert flag in ['train', 'test', 'val']
115 | type_map = {'train': 0, 'val': 1, 'test': 2}
116 | self.set_type = type_map[flag]
117 |
118 | self.features = features
119 | self.target = target
120 | self.scale = scale
121 | self.timeenc = timeenc
122 | self.freq = freq
123 |
124 | self.root_path = root_path
125 | self.data_path = data_path
126 | self.__read_data__()
127 |
128 | def __read_data__(self):
129 | self.scaler = StandardScaler()
130 | df_raw = pd.read_csv(os.path.join(self.root_path,
131 | self.data_path))
132 |
133 | '''
134 | df_raw.columns: ['date', ...(other features), target feature]
135 | '''
136 | cols = list(df_raw.columns)
137 | cols.remove(self.target)
138 | cols.remove('date')
139 | df_raw = df_raw[['date'] + cols + [self.target]]
140 | num_train = int(len(df_raw) * 0.7)
141 | num_test = int(len(df_raw) * 0.2)
142 | num_vali = len(df_raw) - num_train - num_test
143 | border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len]
144 | border2s = [num_train, num_train + num_vali, len(df_raw)]
145 | border1 = border1s[self.set_type]
146 | border2 = border2s[self.set_type]
147 |
148 | if self.features == 'M' or self.features == 'MS':
149 | cols_data = df_raw.columns[1:]
150 | df_data = df_raw[cols_data]
151 | elif self.features == 'S':
152 | df_data = df_raw[[self.target]]
153 |
154 | if self.scale:
155 | train_data = df_data[border1s[0]:border2s[0]]
156 | self.scaler.fit(train_data.values)
157 | data = self.scaler.transform(df_data.values)
158 | else:
159 | data = df_data.values
160 |
161 | df_stamp = df_raw[['date']][border1:border2]
162 | df_stamp['date'] = pd.to_datetime(df_stamp.date)
163 | if self.timeenc == 0:
164 | df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
165 | df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
166 | df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
167 | df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
168 | data_stamp = df_stamp.drop(['date'], 1).values
169 | elif self.timeenc == 1:
170 | data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
171 | data_stamp = data_stamp.transpose(1, 0)
172 |
173 | self.data_x = data[border1:border2]
174 | self.data_y = data[border1:border2]
175 | self.data_stamp = data_stamp
176 |
177 | def __getitem__(self, index):
178 | s_begin = index
179 | s_end = s_begin + self.seq_len
180 | r_begin = s_end - self.label_len
181 | r_end = r_begin + self.label_len + self.pred_len
182 |
183 | seq_x = self.data_x[s_begin:s_end]
184 | seq_y = self.data_y[r_begin:r_end]
185 | seq_x_mark = self.data_stamp[s_begin:s_end]
186 | seq_y_mark = self.data_stamp[r_begin:r_end]
187 |
188 | return seq_x, seq_y, seq_x_mark, seq_y_mark
189 |
190 | def __len__(self):
191 | return len(self.data_x) - self.seq_len - self.pred_len + 1
192 |
193 | def inverse_transform(self, data):
194 | return self.scaler.inverse_transform(data)
195 |
196 |
197 | class Dataset_Pred(Dataset):
198 | def __init__(self, root_path, flag='pred', size=None,
199 | features='S', data_path='ETTh.csv',
200 | target='OT', scale=True, inverse=False, timeenc=0, freq='15min', cols=None):
201 | # size [seq_len, label_len, pred_len]
202 | # info
203 | if size == None:
204 | self.seq_len = 24 * 4 * 4
205 | self.label_len = 24 * 4
206 | self.pred_len = 24 * 4
207 | else:
208 | self.seq_len = size[0]
209 | self.label_len = size[1]
210 | self.pred_len = size[2]
211 | # init
212 | assert flag in ['pred']
213 |
214 | self.features = features
215 | self.target = target
216 | self.scale = scale
217 | self.inverse = inverse
218 | self.timeenc = timeenc
219 | self.freq = freq
220 | self.cols = cols
221 | self.root_path = root_path
222 | self.data_path = data_path
223 | self.__read_data__()
224 |
225 | def __read_data__(self):
226 | self.scaler = StandardScaler()
227 | df_raw = pd.read_csv(os.path.join(self.root_path,
228 | self.data_path))
229 | '''
230 | df_raw.columns: ['date', ...(other features), target feature]
231 | '''
232 | if self.cols:
233 | cols = self.cols.copy()
234 | cols.remove(self.target)
235 | else:
236 | cols = list(df_raw.columns)
237 | cols.remove(self.target)
238 | cols.remove('date')
239 | df_raw = df_raw[['date'] + cols + [self.target]]
240 | border1 = len(df_raw) - self.seq_len
241 | border2 = len(df_raw)
242 |
243 | if self.features == 'M' or self.features == 'MS':
244 | cols_data = df_raw.columns[1:]
245 | df_data = df_raw[cols_data]
246 | elif self.features == 'S':
247 | df_data = df_raw[[self.target]]
248 |
249 | if self.scale:
250 | self.scaler.fit(df_data.values)
251 | data = self.scaler.transform(df_data.values)
252 | else:
253 | data = df_data.values
254 |
255 | tmp_stamp = df_raw[['date']][border1:border2]
256 | tmp_stamp['date'] = pd.to_datetime(tmp_stamp.date)
257 | pred_dates = pd.date_range(tmp_stamp.date.values[-1], periods=self.pred_len + 1, freq=self.freq)
258 |
259 | df_stamp = pd.DataFrame(columns=['date'])
260 | df_stamp.date = list(tmp_stamp.date.values) + list(pred_dates[1:])
261 | if self.timeenc == 0:
262 | df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
263 | df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
264 | df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
265 | df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
266 | df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1)
267 | df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15)
268 | data_stamp = df_stamp.drop(['date'], 1).values
269 | elif self.timeenc == 1:
270 | data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
271 | data_stamp = data_stamp.transpose(1, 0)
272 |
273 | self.data_x = data[border1:border2]
274 | if self.inverse:
275 | self.data_y = df_data.values[border1:border2]
276 | else:
277 | self.data_y = data[border1:border2]
278 | self.data_stamp = data_stamp
279 |
280 | def __getitem__(self, index):
281 | s_begin = index
282 | s_end = s_begin + self.seq_len
283 | r_begin = s_end - self.label_len
284 | r_end = r_begin + self.label_len + self.pred_len
285 |
286 | seq_x = self.data_x[s_begin:s_end]
287 | if self.inverse:
288 | seq_y = self.data_x[r_begin:r_begin + self.label_len]
289 | else:
290 | seq_y = self.data_y[r_begin:r_begin + self.label_len]
291 | seq_x_mark = self.data_stamp[s_begin:s_end]
292 | seq_y_mark = self.data_stamp[r_begin:r_end]
293 |
294 | return seq_x, seq_y, seq_x_mark, seq_y_mark
295 |
296 | def __len__(self):
297 | return len(self.data_x) - self.seq_len + 1
298 |
299 | def inverse_transform(self, data):
300 | return self.scaler.inverse_transform(data)
301 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/exp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YihongDong/FAN/7f1b16a1fdf2e36e8b123109d7f6b6987361a1fe/Timeseries_Forecasting/exp/__init__.py
--------------------------------------------------------------------------------
/Timeseries_Forecasting/exp/exp_basic.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 |
4 |
5 | class Exp_Basic(object):
6 | def __init__(self, args):
7 | self.args = args
8 | self.device = self._acquire_device()
9 | self.model = self._build_model().to(self.device)
10 |
11 | def _build_model(self):
12 | raise NotImplementedError
13 | return None
14 |
15 | def _acquire_device(self):
16 | if self.args.use_gpu:
17 | os.environ["CUDA_VISIBLE_DEVICES"] = str(
18 | self.args.gpu) if not self.args.use_multi_gpu else self.args.devices
19 | device = torch.device('cuda:{}'.format(self.args.gpu))
20 | print('Use GPU: cuda:{}'.format(self.args.gpu))
21 | else:
22 | device = torch.device('cpu')
23 | print('Use CPU')
24 | return device
25 |
26 | def _get_data(self):
27 | pass
28 |
29 | def vali(self):
30 | pass
31 |
32 | def train(self):
33 | pass
34 |
35 | def test(self):
36 | pass
37 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/exp/exp_main.py:
--------------------------------------------------------------------------------
1 | import logging
2 | logging.basicConfig(format='%(asctime)s,%(msecs)03d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
3 | datefmt='%Y-%m-%d:%H:%M:%S',
4 | level=logging.INFO)
5 |
6 | from data_provider.data_factory import data_provider
7 | from exp.exp_basic import Exp_Basic
8 | from models import Modified_Transformer
9 | from utils.tools import EarlyStopping, adjust_learning_rate, visual
10 | from utils.metrics import metric
11 |
12 | import numpy as np
13 | import torch
14 | import torch.nn as nn
15 | from torch import optim
16 |
17 | import os
18 | import time
19 |
20 | import warnings
21 | import numpy as np
22 |
23 | warnings.filterwarnings('ignore')
24 |
25 |
26 | class Exp_Main(Exp_Basic):
27 | def __init__(self, args):
28 | super(Exp_Main, self).__init__(args)
29 |
30 | def _build_model(self):
31 | model_dict = {
32 | 'Modified_Transformer': Modified_Transformer,
33 | }
34 | model = model_dict[self.args.model].Model(self.args).float()
35 | from utils.tools import count_parameters
36 | parameters = count_parameters(model)
37 | print('the parameters of the model: {} M'.format(parameters/1e6))
38 | if self.args.use_multi_gpu and self.args.use_gpu:
39 | model = nn.DataParallel(model, device_ids=self.args.device_ids)
40 |
41 | return model
42 |
43 | def _get_data(self, flag):
44 | data_set, data_loader = data_provider(self.args, flag)
45 | return data_set, data_loader
46 |
47 | def _select_optimizer(self):
48 | model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
49 | return model_optim
50 |
51 | def _select_criterion(self):
52 | criterion = nn.MSELoss()
53 | return criterion
54 |
55 | def _predict(self, batch_x, batch_y, batch_x_mark, batch_y_mark):
56 | # decoder input
57 | dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
58 | dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
59 | # encoder - decoder
60 |
61 | def _run_model():
62 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
63 | if self.args.output_attention:
64 | outputs = outputs[0]
65 | return outputs
66 |
67 | if self.args.use_amp:
68 | with torch.cuda.amp.autocast():
69 | outputs = _run_model()
70 | else:
71 | outputs = _run_model()
72 |
73 | f_dim = -1 if self.args.features == 'MS' else 0
74 | outputs = outputs[:, -self.args.pred_len:, f_dim:]
75 | batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
76 |
77 | return outputs, batch_y
78 |
79 | def vali(self, vali_data, vali_loader, criterion):
80 | total_loss = []
81 | self.model.eval()
82 | with torch.no_grad():
83 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
84 | batch_x = batch_x.float().to(self.device)
85 | batch_y = batch_y.float()
86 |
87 | batch_x_mark = batch_x_mark.float().to(self.device)
88 | batch_y_mark = batch_y_mark.float().to(self.device)
89 |
90 | outputs, batch_y = self._predict(batch_x, batch_y, batch_x_mark, batch_y_mark)
91 |
92 | pred = outputs.detach().cpu()
93 | true = batch_y.detach().cpu()
94 |
95 | loss = criterion(pred, true)
96 |
97 | total_loss.append(loss)
98 | total_loss = np.average(total_loss)
99 | self.model.train()
100 | return total_loss
101 |
102 | def train(self, setting):
103 | train_data, train_loader = self._get_data(flag='train')
104 | vali_data, vali_loader = self._get_data(flag='val')
105 | test_data, test_loader = self._get_data(flag='test')
106 |
107 | path = os.path.join(self.args.checkpoints, setting)
108 | if not os.path.exists(path):
109 | os.makedirs(path)
110 |
111 | time_now = time.time()
112 |
113 | train_steps = len(train_loader)
114 | early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
115 |
116 | model_optim = self._select_optimizer()
117 | criterion = self._select_criterion()
118 |
119 | if self.args.use_amp:
120 | scaler = torch.cuda.amp.GradScaler()
121 |
122 | for epoch in range(self.args.train_epochs):
123 | iter_count = 0
124 | train_loss = []
125 |
126 | self.model.train()
127 | epoch_time = time.time()
128 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
129 | iter_count += 1
130 | model_optim.zero_grad()
131 | batch_x = batch_x.float().to(self.device)
132 |
133 | batch_y = batch_y.float().to(self.device)
134 | batch_x_mark = batch_x_mark.float().to(self.device)
135 | batch_y_mark = batch_y_mark.float().to(self.device)
136 |
137 | outputs, batch_y = self._predict(batch_x, batch_y, batch_x_mark, batch_y_mark)
138 |
139 |
140 | loss = criterion(outputs, batch_y)
141 | train_loss.append(loss.item())
142 |
143 | if (i + 1) % 100 == 0:
144 | print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
145 | speed = (time.time() - time_now) / iter_count
146 | left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
147 | print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
148 | iter_count = 0
149 | time_now = time.time()
150 |
151 | if self.args.use_amp:
152 | scaler.scale(loss).backward()
153 | scaler.step(model_optim)
154 | scaler.update()
155 | else:
156 | loss.backward()
157 | model_optim.step()
158 |
159 | print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
160 | train_loss = np.average(train_loss)
161 | vali_loss = self.vali(vali_data, vali_loader, criterion)
162 | test_loss = self.vali(test_data, test_loader, criterion)
163 |
164 | print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
165 | epoch + 1, train_steps, train_loss, vali_loss, test_loss))
166 | early_stopping(vali_loss, self.model, path)
167 | if early_stopping.early_stop:
168 | print("Early stopping")
169 | break
170 |
171 | adjust_learning_rate(model_optim, epoch + 1, self.args)
172 |
173 | best_model_path = path + '/' + 'checkpoint.pth'
174 | self.model.load_state_dict(torch.load(best_model_path))
175 |
176 | return
177 |
178 | def test(self, setting, test=0):
179 | test_data, test_loader = self._get_data(flag='test')
180 | if test:
181 | print('loading model')
182 | self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
183 |
184 | preds = []
185 | trues = []
186 | folder_path = './exp_figure/' + setting + '/'
187 | if not os.path.exists(folder_path):
188 | os.makedirs(folder_path)
189 |
190 | self.model.eval()
191 | with torch.no_grad():
192 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
193 | batch_x = batch_x.float().to(self.device)
194 | batch_y = batch_y.float().to(self.device)
195 |
196 | batch_x_mark = batch_x_mark.float().to(self.device)
197 | batch_y_mark = batch_y_mark.float().to(self.device)
198 |
199 | outputs, batch_y = self._predict(batch_x, batch_y, batch_x_mark, batch_y_mark)
200 |
201 | outputs = outputs.detach().cpu().numpy()
202 | batch_y = batch_y.detach().cpu().numpy()
203 |
204 | pred = outputs # outputs.detach().cpu().numpy() # .squeeze()
205 | true = batch_y # batch_y.detach().cpu().numpy() # .squeeze()
206 |
207 | preds.append(pred)
208 | trues.append(true)
209 | if i % 20 == 0:
210 | input = batch_x.detach().cpu().numpy()
211 | gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0)
212 | pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0)
213 | visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))
214 |
215 | preds = np.concatenate(preds, axis=0)
216 | trues = np.concatenate(trues, axis=0)
217 | print('test shape:', preds.shape, trues.shape)
218 | preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
219 | trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
220 | print('test shape:', preds.shape, trues.shape)
221 |
222 | # result save
223 | folder_path = './results/' + setting + '/'
224 | if not os.path.exists(folder_path):
225 | os.makedirs(folder_path)
226 |
227 | mae, mse, rmse, mape, mspe = metric(preds, trues)
228 | print('mse:{}, mae:{}'.format(mse, mae))
229 | f = open("result.txt", 'a')
230 | f.write(setting + " \n")
231 | f.write('mse:{}, mae:{}'.format(mse, mae))
232 | f.write('\n')
233 | f.write('\n')
234 | f.close()
235 |
236 | np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
237 | # np.save(folder_path + 'pred.npy', preds)
238 | # np.save(folder_path + 'true.npy', trues)
239 |
240 | return
241 |
242 | def predict(self, setting, load=False):
243 | pred_data, pred_loader = self._get_data(flag='pred')
244 |
245 | if load:
246 | path = os.path.join(self.args.checkpoints, setting)
247 | best_model_path = path + '/' + 'checkpoint.pth'
248 | logging.info(best_model_path)
249 | self.model.load_state_dict(torch.load(best_model_path))
250 |
251 | preds = []
252 |
253 | self.model.eval()
254 | with torch.no_grad():
255 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(pred_loader):
256 | batch_x = batch_x.float().to(self.device)
257 | batch_y = batch_y.float()
258 | batch_x_mark = batch_x_mark.float().to(self.device)
259 | batch_y_mark = batch_y_mark.float().to(self.device)
260 |
261 | outputs, batch_y = self._predict(batch_x, batch_y, batch_x_mark, batch_y_mark)
262 |
263 | pred = outputs.detach().cpu().numpy() # .squeeze()
264 | preds.append(pred)
265 |
266 | preds = np.array(preds)
267 | preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
268 |
269 | # result save
270 | folder_path = './results/' + setting + '/'
271 | if not os.path.exists(folder_path):
272 | os.makedirs(folder_path)
273 |
274 | np.save(folder_path + 'real_prediction.npy', preds)
275 |
276 | return
277 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/layers/AutoCorrelation.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import math
4 |
5 |
6 | class AutoCorrelation(nn.Module):
7 | """
8 | AutoCorrelation Mechanism with the following two phases:
9 | (1) period-based dependencies discovery
10 | (2) time delay aggregation
11 | This block can replace the self-attention family mechanism seamlessly.
12 | """
13 | def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1, output_attention=False):
14 | super(AutoCorrelation, self).__init__()
15 | self.factor = factor
16 | self.scale = scale
17 | self.mask_flag = mask_flag
18 | self.output_attention = output_attention
19 | self.dropout = nn.Dropout(attention_dropout)
20 |
21 | def time_delay_agg_training(self, values, corr):
22 | """
23 | SpeedUp version of Autocorrelation (a batch-normalization style design)
24 | This is for the training phase.
25 | """
26 | head = values.shape[1]
27 | channel = values.shape[2]
28 | length = values.shape[3]
29 | # find top k
30 | top_k = int(self.factor * math.log(length))
31 | mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
32 | index = torch.topk(torch.mean(mean_value, dim=0), top_k, dim=-1)[1]
33 | weights = torch.stack([mean_value[:, index[i]] for i in range(top_k)], dim=-1)
34 | # update corr
35 | tmp_corr = torch.softmax(weights, dim=-1)
36 | # aggregation
37 | tmp_values = values
38 | delays_agg = torch.zeros_like(values).float()
39 | for i in range(top_k):
40 | pattern = torch.roll(tmp_values, -int(index[i]), -1)
41 | delays_agg = delays_agg + pattern * \
42 | (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
43 | return delays_agg
44 |
45 | def time_delay_agg_inference(self, values, corr):
46 | """
47 | SpeedUp version of Autocorrelation (a batch-normalization style design)
48 | This is for the inference phase.
49 | """
50 | batch = values.shape[0]
51 | head = values.shape[1]
52 | channel = values.shape[2]
53 | length = values.shape[3]
54 | # index init
55 | init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0)\
56 | .repeat(batch, head, channel, 1).to(values.device)
57 | # find top k
58 | top_k = int(self.factor * math.log(length))
59 | mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
60 | weights, delay = torch.topk(mean_value, top_k, dim=-1)
61 | # update corr
62 | tmp_corr = torch.softmax(weights, dim=-1)
63 | # aggregation
64 | tmp_values = values.repeat(1, 1, 1, 2)
65 | delays_agg = torch.zeros_like(values).float()
66 | for i in range(top_k):
67 | tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)
68 | pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
69 | delays_agg = delays_agg + pattern * \
70 | (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
71 | return delays_agg
72 |
73 | def time_delay_agg_full(self, values, corr):
74 | """
75 | Standard version of Autocorrelation
76 | """
77 | batch = values.shape[0]
78 | head = values.shape[1]
79 | channel = values.shape[2]
80 | length = values.shape[3]
81 | # index init
82 | init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0)\
83 | .repeat(batch, head, channel, 1).to(values.device)
84 | # find top k
85 | top_k = int(self.factor * math.log(length))
86 | weights, delay = torch.topk(corr, top_k, dim=-1)
87 | # update corr
88 | tmp_corr = torch.softmax(weights, dim=-1)
89 | # aggregation
90 | tmp_values = values.repeat(1, 1, 1, 2)
91 | delays_agg = torch.zeros_like(values).float()
92 | for i in range(top_k):
93 | tmp_delay = init_index + delay[..., i].unsqueeze(-1)
94 | pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
95 | delays_agg = delays_agg + pattern * (tmp_corr[..., i].unsqueeze(-1))
96 | return delays_agg
97 |
98 | def forward(self, queries, keys, values, attn_mask):
99 | B, L, H, E = queries.shape
100 | _, S, _, D = values.shape
101 | if L > S:
102 | zeros = torch.zeros_like(queries[:, :(L - S), :]).float()
103 | values = torch.cat([values, zeros], dim=1)
104 | keys = torch.cat([keys, zeros], dim=1)
105 | else:
106 | values = values[:, :L, :, :]
107 | keys = keys[:, :L, :, :]
108 |
109 | # period-based dependencies
110 | q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1)
111 | k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1)
112 | res = q_fft * torch.conj(k_fft)
113 | corr = torch.fft.irfft(res, n=L, dim=-1)
114 |
115 | # time delay agg
116 | if self.training:
117 | V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
118 | else:
119 | V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
120 |
121 | if self.output_attention:
122 | return (V.contiguous(), corr.permute(0, 3, 1, 2))
123 | else:
124 | return (V.contiguous(), None)
125 |
126 |
127 | class AutoCorrelationLayer(nn.Module):
128 | def __init__(self, correlation, d_model, n_heads, d_keys=None,
129 | d_values=None):
130 | super(AutoCorrelationLayer, self).__init__()
131 |
132 | d_keys = d_keys or (d_model // n_heads)
133 | d_values = d_values or (d_model // n_heads)
134 |
135 | self.inner_correlation = correlation
136 | self.query_projection = nn.Linear(d_model, d_keys * n_heads)
137 | self.key_projection = nn.Linear(d_model, d_keys * n_heads)
138 | self.value_projection = nn.Linear(d_model, d_values * n_heads)
139 | self.out_projection = nn.Linear(d_values * n_heads, d_model)
140 | self.n_heads = n_heads
141 |
142 | def forward(self, queries, keys, values, attn_mask):
143 | B, L, _ = queries.shape
144 | _, S, _ = keys.shape
145 | H = self.n_heads
146 |
147 | queries = self.query_projection(queries).view(B, L, H, -1)
148 | keys = self.key_projection(keys).view(B, S, H, -1)
149 | values = self.value_projection(values).view(B, S, H, -1)
150 |
151 | out, attn = self.inner_correlation(
152 | queries,
153 | keys,
154 | values,
155 | attn_mask
156 | )
157 | out = out.view(B, L, -1)
158 |
159 | return self.out_projection(out), attn
160 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/layers/Autoformer_EncDec.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class my_Layernorm(nn.Module):
7 | """
8 | Special designed layernorm for the seasonal part
9 | """
10 | def __init__(self, channels):
11 | super(my_Layernorm, self).__init__()
12 | self.layernorm = nn.LayerNorm(channels)
13 |
14 | def forward(self, x):
15 | x_hat = self.layernorm(x)
16 | bias = torch.mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1)
17 | return x_hat - bias
18 |
19 |
20 | class moving_avg(nn.Module):
21 | """
22 | Moving average block to highlight the trend of time series
23 | """
24 | def __init__(self, kernel_size, stride):
25 | super(moving_avg, self).__init__()
26 | self.kernel_size = kernel_size
27 | self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)
28 |
29 | def forward(self, x):
30 | # padding on the both ends of time series
31 | front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
32 | end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
33 | x = torch.cat([front, x, end], dim=1)
34 | x = self.avg(x.permute(0, 2, 1))
35 | x = x.permute(0, 2, 1)
36 | return x
37 |
38 |
39 | class series_decomp(nn.Module):
40 | """
41 | Series decomposition block
42 | """
43 | def __init__(self, kernel_size):
44 | super(series_decomp, self).__init__()
45 | self.moving_avg = moving_avg(kernel_size, stride=1)
46 |
47 | def forward(self, x):
48 | moving_mean = self.moving_avg(x)
49 | res = x - moving_mean
50 | return res, moving_mean
51 |
52 |
53 | class EncoderLayer(nn.Module):
54 | """
55 | Autoformer encoder layer with the progressive decomposition architecture
56 | """
57 | def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"):
58 | super(EncoderLayer, self).__init__()
59 | d_ff = d_ff or 4 * d_model
60 | self.attention = attention
61 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
62 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
63 | self.decomp1 = series_decomp(moving_avg)
64 | self.decomp2 = series_decomp(moving_avg)
65 | self.dropout = nn.Dropout(dropout)
66 | self.activation = F.relu if activation == "relu" else F.gelu
67 |
68 | def forward(self, x, attn_mask=None):
69 | new_x, attn = self.attention(
70 | x, x, x,
71 | attn_mask=attn_mask
72 | )
73 | x = x + self.dropout(new_x)
74 | x, _ = self.decomp1(x)
75 | y = x
76 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
77 | y = self.dropout(self.conv2(y).transpose(-1, 1))
78 | res, _ = self.decomp2(x + y)
79 | return res, attn
80 |
81 |
82 | class Encoder(nn.Module):
83 | """
84 | Autoformer encoder
85 | """
86 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
87 | super(Encoder, self).__init__()
88 | self.attn_layers = nn.ModuleList(attn_layers)
89 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
90 | self.norm = norm_layer
91 |
92 | def forward(self, x, attn_mask=None):
93 | attns = []
94 | if self.conv_layers is not None:
95 | for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
96 | x, attn = attn_layer(x, attn_mask=attn_mask)
97 | x = conv_layer(x)
98 | attns.append(attn)
99 | x, attn = self.attn_layers[-1](x)
100 | attns.append(attn)
101 | else:
102 | for attn_layer in self.attn_layers:
103 | x, attn = attn_layer(x, attn_mask=attn_mask)
104 | attns.append(attn)
105 |
106 | if self.norm is not None:
107 | x = self.norm(x)
108 |
109 | return x, attns
110 |
111 |
112 | class DecoderLayer(nn.Module):
113 | """
114 | Autoformer decoder layer with the progressive decomposition architecture
115 | """
116 | def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None,
117 | moving_avg=25, dropout=0.1, activation="relu"):
118 | super(DecoderLayer, self).__init__()
119 | d_ff = d_ff or 4 * d_model
120 | self.self_attention = self_attention
121 | self.cross_attention = cross_attention
122 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
123 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
124 | self.decomp1 = series_decomp(moving_avg)
125 | self.decomp2 = series_decomp(moving_avg)
126 | self.decomp3 = series_decomp(moving_avg)
127 | self.dropout = nn.Dropout(dropout)
128 | self.projection = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=3, stride=1, padding=1,
129 | padding_mode='circular', bias=False)
130 | self.activation = F.relu if activation == "relu" else F.gelu
131 |
132 | def forward(self, x, cross, x_mask=None, cross_mask=None):
133 | x = x + self.dropout(self.self_attention(
134 | x, x, x,
135 | attn_mask=x_mask
136 | )[0])
137 | x, trend1 = self.decomp1(x)
138 | x = x + self.dropout(self.cross_attention(
139 | x, cross, cross,
140 | attn_mask=cross_mask
141 | )[0])
142 | x, trend2 = self.decomp2(x)
143 | y = x
144 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
145 | y = self.dropout(self.conv2(y).transpose(-1, 1))
146 | x, trend3 = self.decomp3(x + y)
147 |
148 | residual_trend = trend1 + trend2 + trend3
149 | residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(1, 2)
150 | return x, residual_trend
151 |
152 |
153 | class Decoder(nn.Module):
154 | """
155 | Autoformer encoder
156 | """
157 | def __init__(self, layers, norm_layer=None, projection=None):
158 | super(Decoder, self).__init__()
159 | self.layers = nn.ModuleList(layers)
160 | self.norm = norm_layer
161 | self.projection = projection
162 |
163 | def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None):
164 | for layer in self.layers:
165 | x, residual_trend = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
166 | trend = trend + residual_trend
167 |
168 | if self.norm is not None:
169 | x = self.norm(x)
170 |
171 | if self.projection is not None:
172 | x = self.projection(x)
173 | return x, trend
174 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/layers/Embed.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import math
4 |
5 | class DataEmbedding_inverted(nn.Module):
6 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
7 | super(DataEmbedding_inverted, self).__init__()
8 | self.value_embedding = nn.Linear(c_in, d_model)
9 | self.dropout = nn.Dropout(p=dropout)
10 |
11 | def forward(self, x, x_mark):
12 | # x: [Batch Time Variate]
13 | x = x.permute(0, 2, 1)
14 |
15 | # x: [Batch Variate Time]
16 | if x_mark is None:
17 |
18 | x = self.value_embedding(x)
19 | else:
20 | x = self.value_embedding(torch.cat([x, x_mark.permute(0, 2, 1)], 1))
21 | return self.dropout(x)
22 |
23 | def compared_version(ver1, ver2):
24 | """
25 | :param ver1
26 | :param ver2
27 | :return: ver1< = >ver2 False/True
28 | """
29 | list1 = str(ver1).split(".")
30 | list2 = str(ver2).split(".")
31 |
32 | for i in range(len(list1)) if len(list1) < len(list2) else range(len(list2)):
33 | if int(list1[i]) == int(list2[i]):
34 | pass
35 | elif int(list1[i]) < int(list2[i]):
36 | return -1
37 | else:
38 | return 1
39 |
40 | if len(list1) == len(list2):
41 | return True
42 | elif len(list1) < len(list2):
43 | return False
44 | else:
45 | return True
46 |
47 | class PositionalEmbedding(nn.Module):
48 | def __init__(self, d_model, max_len=5000):
49 | super(PositionalEmbedding, self).__init__()
50 | # Compute the positional encodings once in log space.
51 | pe = torch.zeros(max_len, d_model).float()
52 | pe.require_grad = False
53 |
54 | position = torch.arange(0, max_len).float().unsqueeze(1)
55 | div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
56 |
57 | pe[:, 0::2] = torch.sin(position * div_term)
58 | pe[:, 1::2] = torch.cos(position * div_term)
59 |
60 | pe = pe.unsqueeze(0)
61 | self.register_buffer('pe', pe)
62 |
63 | def forward(self, x):
64 | return self.pe[:, :x.size(1)]
65 |
66 |
67 | class TokenEmbedding(nn.Module):
68 | def __init__(self, c_in, d_model):
69 | super(TokenEmbedding, self).__init__()
70 | padding = 1 if compared_version(torch.__version__, '1.5.0') else 2
71 | self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
72 | kernel_size=3, padding=padding, padding_mode='circular', bias=False)
73 | for m in self.modules():
74 | if isinstance(m, nn.Conv1d):
75 | nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
76 |
77 | def forward(self, x):
78 | x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
79 | return x
80 |
81 |
82 | class FixedEmbedding(nn.Module):
83 | def __init__(self, c_in, d_model):
84 | super(FixedEmbedding, self).__init__()
85 |
86 | w = torch.zeros(c_in, d_model).float()
87 | w.require_grad = False
88 |
89 | position = torch.arange(0, c_in).float().unsqueeze(1)
90 | div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
91 |
92 | w[:, 0::2] = torch.sin(position * div_term)
93 | w[:, 1::2] = torch.cos(position * div_term)
94 |
95 | self.emb = nn.Embedding(c_in, d_model)
96 | self.emb.weight = nn.Parameter(w, requires_grad=False)
97 |
98 | def forward(self, x):
99 | return self.emb(x).detach()
100 |
101 |
102 | class TemporalEmbedding(nn.Module):
103 | def __init__(self, d_model, embed_type='fixed', freq='h'):
104 | super(TemporalEmbedding, self).__init__()
105 |
106 | minute_size = 4
107 | hour_size = 24
108 | weekday_size = 7
109 | day_size = 32
110 | month_size = 13
111 |
112 | Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding
113 | if freq == 't':
114 | self.minute_embed = Embed(minute_size, d_model)
115 | self.hour_embed = Embed(hour_size, d_model)
116 | self.weekday_embed = Embed(weekday_size, d_model)
117 | self.day_embed = Embed(day_size, d_model)
118 | self.month_embed = Embed(month_size, d_model)
119 |
120 | def forward(self, x):
121 | x = x.long()
122 |
123 | minute_x = self.minute_embed(x[:, :, 4]) if hasattr(self, 'minute_embed') else 0.
124 | hour_x = self.hour_embed(x[:, :, 3])
125 | weekday_x = self.weekday_embed(x[:, :, 2])
126 | day_x = self.day_embed(x[:, :, 1])
127 | month_x = self.month_embed(x[:, :, 0])
128 |
129 | return hour_x + weekday_x + day_x + month_x + minute_x
130 |
131 |
132 | class TimeFeatureEmbedding(nn.Module):
133 | def __init__(self, d_model, embed_type='timeF', freq='h'):
134 | super(TimeFeatureEmbedding, self).__init__()
135 |
136 | freq_map = {'h': 4, 't': 5, 's': 6, 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
137 | d_inp = freq_map[freq]
138 | self.embed = nn.Linear(d_inp, d_model, bias=False)
139 |
140 | def forward(self, x):
141 | return self.embed(x)
142 |
143 |
144 | class DataEmbedding(nn.Module):
145 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
146 | super(DataEmbedding, self).__init__()
147 |
148 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
149 | self.position_embedding = PositionalEmbedding(d_model=d_model)
150 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
151 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
152 | d_model=d_model, embed_type=embed_type, freq=freq)
153 | self.dropout = nn.Dropout(p=dropout)
154 |
155 | def forward(self, x, x_mark):
156 | x = self.value_embedding(x) + self.temporal_embedding(x_mark) + self.position_embedding(x)
157 | return self.dropout(x)
158 |
159 |
160 | class DataEmbedding_wo_pos(nn.Module):
161 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
162 | super(DataEmbedding_wo_pos, self).__init__()
163 |
164 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
165 | self.position_embedding = PositionalEmbedding(d_model=d_model)
166 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
167 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
168 | d_model=d_model, embed_type=embed_type, freq=freq)
169 | self.dropout = nn.Dropout(p=dropout)
170 |
171 | def forward(self, x, x_mark):
172 | x = self.value_embedding(x) + self.temporal_embedding(x_mark)
173 | return self.dropout(x)
174 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/layers/FANLayer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | class FANLayer(nn.Module):
6 | def __init__(self, input_dim, output_dim, bias=True, with_gate = True):
7 | super(FANLayer, self).__init__()
8 | self.input_linear_p = nn.Linear(input_dim, output_dim//4, bias=bias)
9 | self.input_linear_g = nn.Linear(input_dim, (output_dim-output_dim//2))
10 | self.activation = nn.GELU()
11 | if with_gate:
12 | self.gate = nn.Parameter(torch.randn(1, dtype=torch.float32))
13 |
14 | def forward(self, src):
15 | g = self.activation(self.input_linear_g(src))
16 | p = self.input_linear_p(src)
17 |
18 | if not hasattr(self, 'gate'):
19 | output = torch.cat((torch.cos(p), torch.sin(p), g), dim=-1)
20 | else:
21 | gate = torch.sigmoid(self.gate)
22 | output = torch.cat((gate*torch.cos(p), gate*torch.sin(p), (1-gate)*g), dim=-1)
23 | return output
24 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/layers/SelfAttention_Family.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import torch
4 | import torch.nn as nn
5 |
6 | import numpy as np
7 | from math import sqrt
8 | from utils.masking import TriangularCausalMask, ProbMask
9 | from reformer_pytorch import LSHSelfAttention
10 | sys.path.append(os.path.dirname(os.path.abspath(__file__)))
11 | from FANLayer import FANLayer
12 |
13 |
14 |
15 | class FullAttention(nn.Module):
16 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
17 | super(FullAttention, self).__init__()
18 | self.scale = scale
19 | self.mask_flag = mask_flag
20 | self.output_attention = output_attention
21 | self.dropout = nn.Dropout(attention_dropout)
22 |
23 | def forward(self, queries, keys, values, attn_mask):
24 | B, L, H, E = queries.shape
25 | _, S, _, D = values.shape
26 | scale = self.scale or 1. / sqrt(E)
27 |
28 | scores = torch.einsum("blhe,bshe->bhls", queries, keys)
29 |
30 | if self.mask_flag:
31 | if attn_mask is None:
32 | attn_mask = TriangularCausalMask(B, L, device=queries.device)
33 |
34 | scores.masked_fill_(attn_mask.mask, -np.inf)
35 |
36 | A = self.dropout(torch.softmax(scale * scores, dim=-1))
37 | V = torch.einsum("bhls,bshd->blhd", A, values)
38 |
39 | if self.output_attention:
40 | return (V.contiguous(), A)
41 | else:
42 | return (V.contiguous(), None)
43 |
44 |
45 | class ProbAttention(nn.Module):
46 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
47 | super(ProbAttention, self).__init__()
48 | self.factor = factor
49 | self.scale = scale
50 | self.mask_flag = mask_flag
51 | self.output_attention = output_attention
52 | self.dropout = nn.Dropout(attention_dropout)
53 |
54 | def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q)
55 | # Q [B, H, L, D]
56 | B, H, L_K, E = K.shape
57 | _, _, L_Q, _ = Q.shape
58 |
59 | # calculate the sampled Q_K
60 | K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
61 | index_sample = torch.randint(L_K, (L_Q, sample_k)) # real U = U_part(factor*ln(L_k))*L_q
62 | K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :]
63 | Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze()
64 |
65 | # find the Top_k query with sparisty measurement
66 | M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
67 | M_top = M.topk(n_top, sorted=False)[1]
68 |
69 | # use the reduced Q to calculate Q_K
70 | Q_reduce = Q[torch.arange(B)[:, None, None],
71 | torch.arange(H)[None, :, None],
72 | M_top, :] # factor*ln(L_q)
73 | Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k
74 |
75 | return Q_K, M_top
76 |
77 | def _get_initial_context(self, V, L_Q):
78 | B, H, L_V, D = V.shape
79 | if not self.mask_flag:
80 | # V_sum = V.sum(dim=-2)
81 | V_sum = V.mean(dim=-2)
82 | contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone()
83 | else: # use mask
84 | assert (L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only
85 | contex = V.cumsum(dim=-2)
86 | return contex
87 |
88 | def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
89 | B, H, L_V, D = V.shape
90 |
91 | if self.mask_flag:
92 | attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
93 | scores.masked_fill_(attn_mask.mask, -np.inf)
94 |
95 | attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores)
96 |
97 | context_in[torch.arange(B)[:, None, None],
98 | torch.arange(H)[None, :, None],
99 | index, :] = torch.matmul(attn, V).type_as(context_in)
100 | if self.output_attention:
101 | attns = (torch.ones([B, H, L_V, L_V]) / L_V).type_as(attn).to(attn.device)
102 | attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn
103 | return (context_in, attns)
104 | else:
105 | return (context_in, None)
106 |
107 | def forward(self, queries, keys, values, attn_mask):
108 | B, L_Q, H, D = queries.shape
109 | _, L_K, _, _ = keys.shape
110 |
111 | queries = queries.transpose(2, 1)
112 | keys = keys.transpose(2, 1)
113 | values = values.transpose(2, 1)
114 |
115 | U_part = self.factor * np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k)
116 | u = self.factor * np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q)
117 |
118 | U_part = U_part if U_part < L_K else L_K
119 | u = u if u < L_Q else L_Q
120 |
121 | scores_top, index = self._prob_QK(queries, keys, sample_k=U_part, n_top=u)
122 |
123 | # add scale factor
124 | scale = self.scale or 1. / sqrt(D)
125 | if scale is not None:
126 | scores_top = scores_top * scale
127 | # get the context
128 | context = self._get_initial_context(values, L_Q)
129 | # update the context with selected top_k queries
130 | context, attn = self._update_context(context, values, scores_top, index, L_Q, attn_mask)
131 |
132 | return context.contiguous(), attn
133 |
134 |
135 | class AttentionLayer(nn.Module):
136 | def __init__(self, attention, d_model, n_heads, d_keys=None,
137 | d_values=None):
138 | super(AttentionLayer, self).__init__()
139 |
140 | d_keys = d_keys or (d_model // n_heads)
141 | d_values = d_values or (d_model // n_heads)
142 |
143 | self.inner_attention = attention
144 | self.query_projection = nn.Linear(d_model, d_keys * n_heads)
145 | self.key_projection = nn.Linear(d_model, d_keys * n_heads)
146 | self.value_projection = nn.Linear(d_model, d_values * n_heads)
147 | self.out_projection = nn.Linear(d_values * n_heads, d_model)
148 | self.n_heads = n_heads
149 |
150 | def forward(self, queries, keys, values, attn_mask):
151 | B, L, _ = queries.shape
152 | _, S, _ = keys.shape
153 | H = self.n_heads
154 |
155 | queries = self.query_projection(queries).view(B, L, H, -1)
156 | keys = self.key_projection(keys).view(B, S, H, -1)
157 | values = self.value_projection(values).view(B, S, H, -1)
158 |
159 | out, attn = self.inner_attention(
160 | queries,
161 | keys,
162 | values,
163 | attn_mask
164 | )
165 | out = out.view(B, L, -1)
166 |
167 | return self.out_projection(out), attn
168 |
169 | class ReformerLayer(nn.Module):
170 | def __init__(self, attention, d_model, n_heads, d_keys=None,
171 | d_values=None, causal=False, bucket_size=4, n_hashes=4):
172 | super().__init__()
173 | self.bucket_size = bucket_size
174 | self.attn = LSHSelfAttention(
175 | dim=d_model,
176 | heads=n_heads,
177 | bucket_size=bucket_size,
178 | n_hashes=n_hashes,
179 | causal=causal
180 | )
181 |
182 | def fit_length(self, queries):
183 | # inside reformer: assert N % (bucket_size * 2) == 0
184 | B, N, C = queries.shape
185 | if N % (self.bucket_size * 2) == 0:
186 | return queries
187 | else:
188 | # fill the time series
189 | fill_len = (self.bucket_size * 2) - (N % (self.bucket_size * 2))
190 | return torch.cat([queries, torch.zeros([B, fill_len, C]).to(queries.device)], dim=1)
191 |
192 | def forward(self, queries, keys, values, attn_mask):
193 | # in Reformer: defalut queries=keys
194 | B, N, C = queries.shape
195 | queries = self.attn(self.fit_length(queries))[:, :N, :]
196 | return queries, None
197 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/layers/Transformer_EncDec.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | sys.path.append(os.path.dirname(os.path.abspath(__file__)))
7 | from FANLayer import FANLayer
8 |
9 | class ConvLayer(nn.Module):
10 | def __init__(self, c_in):
11 | super(ConvLayer, self).__init__()
12 | self.downConv = nn.Conv1d(in_channels=c_in,
13 | out_channels=c_in,
14 | kernel_size=3,
15 | padding=2,
16 | padding_mode='circular')
17 | self.norm = nn.BatchNorm1d(c_in)
18 | self.activation = nn.ELU()
19 | self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
20 |
21 | def forward(self, x):
22 | x = self.downConv(x.permute(0, 2, 1))
23 | x = self.norm(x)
24 | x = self.activation(x)
25 | x = self.maxPool(x)
26 | x = x.transpose(1, 2)
27 | return x
28 |
29 |
30 | class EncoderLayer(nn.Module):
31 | def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu", exp_setting=0):
32 | super(EncoderLayer, self).__init__()
33 | self.exp_setting = exp_setting
34 | d_ff = d_ff or 4 * d_model
35 | self.attention = attention
36 | assert exp_setting in [0, 2, 4]
37 | if exp_setting == 0:
38 | self.mlp1 = nn.Linear(d_model, d_ff)
39 | self.mlp2 = nn.Linear(d_ff, d_model)
40 | elif exp_setting == 2:
41 | self.mlp1 = FANLayer(input_dim=d_model, output_dim=d_ff, with_gate=True)
42 | self.mlp2 = FANLayer(input_dim=d_ff, output_dim=d_model, with_gate=True)
43 | elif exp_setting == 4:
44 | self.mlp1 = FANLayer(input_dim=d_model, output_dim=d_ff, with_gate=False)
45 | self.mlp2 = FANLayer(input_dim=d_ff, output_dim=d_model, with_gate=False)
46 |
47 | self.norm1 = nn.LayerNorm(d_model)
48 | self.norm2 = nn.LayerNorm(d_model)
49 | self.dropout = nn.Dropout(dropout)
50 | self.activation = F.relu if activation == "relu" else F.gelu
51 |
52 | def forward(self, x, attn_mask=None):
53 | new_x, attn = self.attention(
54 | x, x, x,
55 | attn_mask=attn_mask
56 | )
57 | x = x + self.dropout(new_x)
58 |
59 | y = x = self.norm1(x)
60 | if self.exp_setting == 0:
61 | y = self.dropout(self.activation(self.mlp1(y)))
62 | else:
63 | y = self.dropout(self.mlp1(y))
64 | y = self.dropout(self.mlp2(y))
65 |
66 | return self.norm2(x + y), attn
67 |
68 |
69 | class Encoder(nn.Module):
70 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
71 | super(Encoder, self).__init__()
72 | self.attn_layers = nn.ModuleList(attn_layers)
73 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
74 | self.norm = norm_layer
75 |
76 | def forward(self, x, attn_mask=None):
77 | # x [B, L, D]
78 | attns = []
79 | if self.conv_layers is not None:
80 | for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
81 | x, attn = attn_layer(x, attn_mask=attn_mask)
82 | x = conv_layer(x)
83 | attns.append(attn)
84 | x, attn = self.attn_layers[-1](x)
85 | attns.append(attn)
86 | else:
87 | for attn_layer in self.attn_layers:
88 | x, attn = attn_layer(x, attn_mask=attn_mask)
89 | attns.append(attn)
90 |
91 | if self.norm is not None:
92 | x = self.norm(x)
93 |
94 | return x, attns
95 |
96 |
97 | class DecoderLayer(nn.Module):
98 | def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
99 | dropout=0.1, activation="relu", exp_setting=0):
100 | super(DecoderLayer, self).__init__()
101 | self.exp_setting = exp_setting
102 | d_ff = d_ff or 4 * d_model
103 | self.self_attention = self_attention
104 | self.cross_attention = cross_attention
105 | assert exp_setting in [0, 2, 4]
106 | if exp_setting == 0:
107 | self.mlp1 = nn.Linear(d_model, d_ff)
108 | self.mlp2 = nn.Linear(d_ff, d_model)
109 | elif exp_setting == 2:
110 | self.mlp1 = FANLayer(input_dim=d_model, output_dim=d_ff, with_gate=True)
111 | self.mlp2 = FANLayer(input_dim=d_ff, output_dim=d_model, with_gate=True)
112 | elif exp_setting == 4:
113 | self.mlp1 = FANLayer(input_dim=d_model, output_dim=d_ff, with_gate=False)
114 | self.mlp2 = FANLayer(input_dim=d_ff, output_dim=d_model, with_gate=False)
115 | self.norm1 = nn.LayerNorm(d_model)
116 | self.norm2 = nn.LayerNorm(d_model)
117 | self.norm3 = nn.LayerNorm(d_model)
118 | self.dropout = nn.Dropout(dropout)
119 | self.activation = F.relu if activation == "relu" else F.gelu
120 |
121 | def forward(self, x, cross, x_mask=None, cross_mask=None):
122 | x = x + self.dropout(self.self_attention(
123 | x, x, x,
124 | attn_mask=x_mask
125 | )[0])
126 | x = self.norm1(x)
127 |
128 | x = x + self.dropout(self.cross_attention(
129 | x, cross, cross,
130 | attn_mask=cross_mask
131 | )[0])
132 |
133 | y = x = self.norm2(x)
134 | if self.exp_setting == 0:
135 | y = self.dropout(self.activation(self.mlp1(y)))
136 | else:
137 | y = self.dropout(self.mlp1(y))
138 | y = self.dropout(self.mlp2(y))
139 |
140 | return self.norm3(x + y)
141 |
142 |
143 | class Decoder(nn.Module):
144 | def __init__(self, layers, norm_layer=None, projection=None):
145 | super(Decoder, self).__init__()
146 | self.layers = nn.ModuleList(layers)
147 | self.norm = norm_layer
148 | self.projection = projection
149 |
150 | def forward(self, x, cross, x_mask=None, cross_mask=None):
151 | for layer in self.layers:
152 | x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
153 |
154 | if self.norm is not None:
155 | x = self.norm(x)
156 |
157 | if self.projection is not None:
158 | x = self.projection(x)
159 | return x
160 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YihongDong/FAN/7f1b16a1fdf2e36e8b123109d7f6b6987361a1fe/Timeseries_Forecasting/layers/__init__.py
--------------------------------------------------------------------------------
/Timeseries_Forecasting/models/Modified_Transformer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer
4 | from layers.SelfAttention_Family import FullAttention, AttentionLayer
5 | from layers.Embed import DataEmbedding
6 |
7 |
8 | class Model(nn.Module):
9 | """
10 | Vanilla Transformer with O(L^2) complexity
11 | """
12 | def __init__(self, configs):
13 | super(Model, self).__init__()
14 | self.pred_len = configs.pred_len
15 | self.output_attention = configs.output_attention
16 |
17 | # Embedding
18 | self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
19 | configs.dropout)
20 | self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq,
21 | configs.dropout)
22 | # Encoder
23 | self.encoder = Encoder(
24 | [
25 | EncoderLayer(
26 | AttentionLayer(
27 | FullAttention(False, configs.factor, attention_dropout=configs.dropout,
28 | output_attention=configs.output_attention), configs.d_model, configs.n_heads),
29 | configs.d_model,
30 | configs.d_ff,
31 | dropout=configs.dropout,
32 | activation=configs.activation,
33 | exp_setting=configs.exp_setting,
34 | ) for l in range(configs.e_layers)
35 | ],
36 | norm_layer=torch.nn.LayerNorm(configs.d_model)
37 | )
38 | # Decoder
39 | self.decoder = Decoder(
40 | [
41 | DecoderLayer(
42 | AttentionLayer(
43 | FullAttention(True, configs.factor, attention_dropout=configs.dropout, output_attention=False),
44 | configs.d_model, configs.n_heads),
45 | AttentionLayer(
46 | FullAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False),
47 | configs.d_model, configs.n_heads),
48 | configs.d_model,
49 | configs.d_ff,
50 | dropout=configs.dropout,
51 | activation=configs.activation,
52 | exp_setting=configs.exp_setting,
53 | )
54 | for l in range(configs.d_layers)
55 | ],
56 | norm_layer=torch.nn.LayerNorm(configs.d_model),
57 | projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
58 | )
59 |
60 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec,
61 | enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
62 |
63 | enc_out = self.enc_embedding(x_enc, x_mark_enc)
64 | enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
65 |
66 | dec_out = self.dec_embedding(x_dec, x_mark_dec)
67 | dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask)
68 |
69 | if self.output_attention:
70 | return dec_out[:, -self.pred_len:, :], attns
71 | else:
72 | return dec_out[:, -self.pred_len:, :] # [B, L, D]
73 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YihongDong/FAN/7f1b16a1fdf2e36e8b123109d7f6b6987361a1fe/Timeseries_Forecasting/models/__init__.py
--------------------------------------------------------------------------------
/Timeseries_Forecasting/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas
2 | scikit-learn
3 | torchvision
4 | numpy
5 | matplotlib
6 | reformer_pytorch
7 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/run.py:
--------------------------------------------------------------------------------
1 | # The Code of this part is based on Autoformer (https://github.com/thuml/Autoformer).
2 |
3 | import argparse
4 | import os
5 | import torch
6 | from exp.exp_main import Exp_Main
7 | import random
8 | import numpy as np
9 |
10 | def get_setting_str(args, exp_iter):
11 | setting = [
12 | 'expset{}'.format(args.exp_setting),
13 | '{}'.format(args.model_id),
14 | '{}'.format(args.model),
15 | '{}'.format(args.data),
16 | 'bs{}'.format(args.batch_size),
17 | 'drop{}'.format(args.dropout),
18 | 'lr{}'.format(args.learning_rate),
19 | 'ep{}'.format(args.train_epochs),
20 | 'pat{}'.format(args.patience),
21 | 'ft{}'.format(args.features),
22 | 'sl{}'.format(args.seq_len),
23 | 'll{}'.format(args.label_len),
24 | 'pl{}'.format(args.pred_len),
25 | 'dm{}'.format(args.d_model),
26 | 'nh{}'.format(args.n_heads),
27 | 'el{}'.format(args.e_layers),
28 | 'dl{}'.format(args.d_layers),
29 | 'df{}'.format(args.d_ff),
30 | 'fc{}'.format(args.factor),
31 | 'eb{}'.format(args.embed),
32 | 'dt{}'.format(args.distil),
33 | '{}'.format(args.des),
34 | '{}'.format(exp_iter)
35 | ]
36 | setting = '_'.join(setting)
37 |
38 | return setting
39 |
40 |
41 | def main():
42 | fix_seed = 2021
43 | random.seed(fix_seed)
44 | torch.manual_seed(fix_seed)
45 | np.random.seed(fix_seed)
46 |
47 | parser = argparse.ArgumentParser(description='Autoformer & Transformer family for Time Series Forecasting')
48 |
49 | # basic config
50 | parser.add_argument('--is_training', type=int, required=True, default=1, help='status')
51 | parser.add_argument('--model_id', type=str, required=True, default='test', help='model id')
52 | parser.add_argument('--model', type=str, required=True, default='Transformer')
53 |
54 | # data loader
55 | parser.add_argument('--data', type=str, required=True, default='ETTh', help='dataset type')
56 | parser.add_argument('--root_path', type=str, default='./data/ETT/', help='root path of the data file')
57 | parser.add_argument('--data_path', type=str, default='ETTh.csv', help='data file')
58 | parser.add_argument('--features', type=str, default='M',
59 | help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate')
60 | parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
61 | parser.add_argument('--freq', type=str, default='h',
62 | help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h')
63 | parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints')
64 |
65 | # forecasting task
66 | parser.add_argument('--seq_len', type=int, default=96, help='input sequence length')
67 | parser.add_argument('--label_len', type=int, default=48, help='start token length')
68 | parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length')
69 |
70 | # model define
71 | parser.add_argument('--bucket_size', type=int, default=4, help='for Reformer')
72 | parser.add_argument('--n_hashes', type=int, default=4, help='for Reformer')
73 | parser.add_argument('--enc_in', type=int, default=7, help='encoder input size')
74 | parser.add_argument('--dec_in', type=int, default=7, help='decoder input size')
75 | parser.add_argument('--c_out', type=int, default=7, help='output size')
76 | parser.add_argument('--d_model', type=int, default=512, help='dimension of model')
77 | parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
78 | parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
79 | parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers')
80 | parser.add_argument('--d_ff', type=int, default=2048, help='dimension of fcn')
81 | parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average')
82 | parser.add_argument('--factor', type=int, default=1, help='attn factor')
83 | parser.add_argument('--distil', action='store_false',
84 | help='whether to use distilling in encoder, using this argument means not using distilling',
85 | default=True)
86 | parser.add_argument('--dropout', type=float, default=0.1, help='dropout')
87 | parser.add_argument('--embed', type=str, default='timeF',
88 | help='time features encoding, options:[timeF, fixed, learned]')
89 | parser.add_argument('--activation', type=str, default='gelu', help='activation')
90 | parser.add_argument('--output_attention', action='store_true', help='whether to output attention in encoder')
91 | parser.add_argument('--do_predict', action='store_true', help='whether to predict unseen future data')
92 | parser.add_argument('--exp_setting', type=int, default=0, help='experiment setting')
93 | parser.add_argument('--use_norm', type=int, default=True, help='use norm and denorm')
94 |
95 | # optimization
96 | parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers')
97 | parser.add_argument('--itr', type=int, default=2, help='experiments times')
98 | parser.add_argument('--train_epochs', type=int, default=10, help='train epochs')
99 | parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data')
100 | parser.add_argument('--patience', type=int, default=10, help='early stopping patience')
101 | parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate')
102 | parser.add_argument('--des', type=str, default='test', help='exp description')
103 | parser.add_argument('--loss', type=str, default='mse', help='loss function')
104 | parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate')
105 | parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False)
106 |
107 | # GPU
108 | parser.add_argument('--use_gpu', type=bool, default=True, help='use gpu')
109 | parser.add_argument('--gpu', type=int, default=0, help='gpu')
110 | parser.add_argument('--use_multi_gpu', action='store_true', help='use multiple gpus', default=False)
111 | parser.add_argument('--devices', type=str, default='0,1,2,3', help='device ids of multile gpus')
112 |
113 | args = parser.parse_args()
114 |
115 | args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False
116 |
117 | if args.use_gpu and args.use_multi_gpu:
118 | args.devices = args.devices.replace(' ', '')
119 | device_ids = args.devices.split(',')
120 | args.device_ids = [int(id_) for id_ in device_ids]
121 | args.gpu = args.device_ids[0]
122 |
123 | print('Args in experiment:')
124 | print(args)
125 |
126 | Exp = Exp_Main
127 |
128 | if args.is_training:
129 | for ii in range(args.itr):
130 | setting = get_setting_str(args, ii)
131 |
132 | exp = Exp(args) # set experiments
133 | print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
134 | exp.train(setting)
135 |
136 | print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
137 | exp.test(setting)
138 |
139 | if args.do_predict:
140 | print('>>>>>>>predicting : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
141 | exp.predict(setting, True)
142 |
143 | torch.cuda.empty_cache()
144 | else:
145 | ii = 0
146 | setting = get_setting_str(args, ii)
147 |
148 | exp = Exp(args) # set experiments
149 | print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
150 | exp.test(setting, test=1)
151 | torch.cuda.empty_cache()
152 |
153 |
154 | if __name__ == "__main__":
155 | main()
156 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/scripts/ETT_script/Transformer.sh:
--------------------------------------------------------------------------------
1 | export CUDA_VISIBLE_DEVICES=0
2 |
3 | datasets=("ETTh")
4 | pred_lens=(96 192 336 720)
5 | model="Modified_Transformer"
6 | exp_setting=0
7 |
8 | for dataset in "${datasets[@]}"; do
9 | if [ ! -d "./logs/LongForecasting/$dataset" ]; then
10 | mkdir ./logs/LongForecasting/$dataset
11 | fi
12 | for pred_len in "${pred_lens[@]}"; do
13 | model_id="${dataset}_96_${pred_len}"
14 | echo "Running model: $model_id"
15 | python -u run.py \
16 | --is_training 1 \
17 | --root_path "./dataset/ETT-small/" \
18 | --data_path "${dataset}.csv" \
19 | --model_id "$model_id" \
20 | --model "$model" \
21 | --data "$dataset" \
22 | --features M \
23 | --seq_len 96 \
24 | --label_len 48 \
25 | --pred_len "$pred_len" \
26 | --e_layers 2 \
27 | --d_layers 1 \
28 | --enc_in 7 \
29 | --dec_in 7 \
30 | --c_out 7 \
31 | --des 'Exp' \
32 | --freq 't' \
33 | --exp_setting $exp_setting \
34 | --itr 1 >logs/LongForecasting/$dataset/$model'_'$model_id'_'exp_setting_$exp_setting.log 2>&1
35 | done
36 | done
37 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/scripts/ETT_script/Transformer_setting_2.sh:
--------------------------------------------------------------------------------
1 | export CUDA_VISIBLE_DEVICES=0
2 |
3 | datasets=("ETTh")
4 | pred_lens=(96 192 336 720)
5 | model="Modified_Transformer"
6 | exp_setting=2
7 | learning_rate=1e-5
8 |
9 | for dataset in "${datasets[@]}"; do
10 | if [ ! -d "./logs/LongForecasting/$dataset" ]; then
11 | mkdir ./logs/LongForecasting/$dataset
12 | fi
13 | for pred_len in "${pred_lens[@]}"; do
14 | model_id="${dataset}_96_${pred_len}"
15 | echo "Running model: $model_id"
16 | python -u run.py \
17 | --is_training 1 \
18 | --root_path "./dataset/ETT-small/" \
19 | --data_path "${dataset}.csv" \
20 | --model_id "$model_id" \
21 | --model "$model" \
22 | --data "$dataset" \
23 | --features M \
24 | --seq_len 96 \
25 | --label_len 48 \
26 | --pred_len "$pred_len" \
27 | --e_layers 2 \
28 | --d_layers 1 \
29 | --enc_in 7 \
30 | --dec_in 7 \
31 | --c_out 7 \
32 | --des 'Exp' \
33 | --freq 't' \
34 | --learning_rate $learning_rate \
35 | --exp_setting $exp_setting \
36 | --itr 1 >logs/LongForecasting/$dataset/$model'_'$model_id'_'exp_setting_$exp_setting'_'lr$learning_rate.log 2>&1
37 | done
38 | done
39 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/scripts/ETT_script/Transformer_setting_4.sh:
--------------------------------------------------------------------------------
1 | export CUDA_VISIBLE_DEVICES=0
2 |
3 | datasets=("ETTh")
4 | pred_lens=(96 192 336 720)
5 | model="Modified_Transformer"
6 | exp_setting=4
7 | learning_rate=1e-5
8 |
9 | for dataset in "${datasets[@]}"; do
10 | if [ ! -d "./logs/LongForecasting/$dataset" ]; then
11 | mkdir ./logs/LongForecasting/$dataset
12 | fi
13 | for pred_len in "${pred_lens[@]}"; do
14 | model_id="${dataset}_96_${pred_len}"
15 | echo "Running model: $model_id"
16 | python -u run.py \
17 | --is_training 1 \
18 | --root_path "./dataset/ETT-small/" \
19 | --data_path "${dataset}.csv" \
20 | --model_id "$model_id" \
21 | --model "$model" \
22 | --data "$dataset" \
23 | --features M \
24 | --seq_len 96 \
25 | --label_len 48 \
26 | --pred_len "$pred_len" \
27 | --e_layers 2 \
28 | --d_layers 1 \
29 | --enc_in 7 \
30 | --dec_in 7 \
31 | --c_out 7 \
32 | --des 'Exp' \
33 | --freq 't' \
34 | --learning_rate $learning_rate \
35 | --exp_setting $exp_setting \
36 | --itr 1 >logs/LongForecasting/$dataset/$model'_'$model_id'_'exp_setting_$exp_setting'_'lr$learning_rate.log 2>&1
37 | done
38 | done
39 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/scripts/Exchange_script/Modified_Transformer_setting_2.sh:
--------------------------------------------------------------------------------
1 | if [ ! -d "./logs/LongForecasting/exchange_rate" ]; then
2 | mkdir ./logs/LongForecasting/exchange_rate
3 | fi
4 |
5 | export CUDA_VISIBLE_DEVICES=0
6 |
7 | ROOT_PATH="./dataset/exchange_rate/"
8 | DATA_PATH="exchange_rate.csv"
9 | MODEL="Modified_Transformer"
10 | DATA="custom"
11 | FEATURES=M
12 | SEQ_LEN=96
13 | LABEL_LEN=48
14 | E_LAYERS=2
15 | D_LAYERS=1
16 | FACTOR=3
17 | ENC_IN=8
18 | DEC_IN=8
19 | C_OUT=8
20 | DES="Exp"
21 | ITR=1
22 | EXP_SETTING=2
23 |
24 | # 96 192 336 720
25 | for PRED_LEN in 96 192 336 720
26 | do
27 | MODEL_ID="ECL_${SEQ_LEN}_${PRED_LEN}"
28 | python -u run.py \
29 | --is_training 1 \
30 | --root_path $ROOT_PATH \
31 | --data_path $DATA_PATH \
32 | --model_id $MODEL_ID \
33 | --model $MODEL \
34 | --data $DATA \
35 | --features $FEATURES \
36 | --seq_len $SEQ_LEN \
37 | --label_len $LABEL_LEN \
38 | --pred_len $PRED_LEN \
39 | --e_layers $E_LAYERS \
40 | --d_layers $D_LAYERS \
41 | --factor $FACTOR \
42 | --enc_in $ENC_IN \
43 | --dec_in $DEC_IN \
44 | --c_out $C_OUT \
45 | --des $DES \
46 | --itr $ITR \
47 | --exp_setting $EXP_SETTING # >logs/LongForecasting/exchange_rate/$MODEL'_'$MODEL_ID'_'exp_setting_$EXP_SETTING.log 2>&1
48 | done
49 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/scripts/Exchange_script/Modified_Transformer_setting_4.sh:
--------------------------------------------------------------------------------
1 | if [ ! -d "./logs/LongForecasting/exchange_rate" ]; then
2 | mkdir ./logs/LongForecasting/exchange_rate
3 | fi
4 |
5 | export CUDA_VISIBLE_DEVICES=0
6 |
7 | ROOT_PATH="./dataset/exchange_rate/"
8 | DATA_PATH="exchange_rate.csv"
9 | MODEL="Modified_Transformer"
10 | DATA="custom"
11 | FEATURES=M
12 | SEQ_LEN=96
13 | LABEL_LEN=48
14 | E_LAYERS=2
15 | D_LAYERS=1
16 | FACTOR=3
17 | ENC_IN=8
18 | DEC_IN=8
19 | C_OUT=8
20 | DES="Exp"
21 | ITR=1
22 | EXP_SETTING=4
23 |
24 | # 96 192 336 720
25 | for PRED_LEN in 96 192 336 720
26 | do
27 | MODEL_ID="ECL_${SEQ_LEN}_${PRED_LEN}"
28 | python -u run.py \
29 | --is_training 1 \
30 | --root_path $ROOT_PATH \
31 | --data_path $DATA_PATH \
32 | --model_id $MODEL_ID \
33 | --model $MODEL \
34 | --data $DATA \
35 | --features $FEATURES \
36 | --seq_len $SEQ_LEN \
37 | --label_len $LABEL_LEN \
38 | --pred_len $PRED_LEN \
39 | --e_layers $E_LAYERS \
40 | --d_layers $D_LAYERS \
41 | --factor $FACTOR \
42 | --enc_in $ENC_IN \
43 | --dec_in $DEC_IN \
44 | --c_out $C_OUT \
45 | --des $DES \
46 | --itr $ITR \
47 | --exp_setting $EXP_SETTING >logs/LongForecasting/exchange_rate/$MODEL'_'$MODEL_ID'_'exp_setting_$EXP_SETTING.log 2>&1
48 | done
49 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/scripts/Exchange_script/Transformer.sh:
--------------------------------------------------------------------------------
1 | export CUDA_VISIBLE_DEVICES=0
2 |
3 | model_id_prefix="Exchange_96"
4 |
5 | for pred_len in 96 192 336 720; do
6 | model_id="${model_id_prefix}_${pred_len}"
7 |
8 | python -u run.py \
9 | --is_training 1 \
10 | --root_path ./dataset/exchange_rate/ \
11 | --data_path exchange_rate.csv \
12 | --model_id "$model_id" \
13 | --model Modified_Transformer \
14 | --data custom \
15 | --features M \
16 | --seq_len 96 \
17 | --label_len 48 \
18 | --pred_len "$pred_len" \
19 | --e_layers 2 \
20 | --d_layers 1 \
21 | --factor 3 \
22 | --enc_in 8 \
23 | --dec_in 8 \
24 | --c_out 8 \
25 | --des 'Exp' \
26 | --exp_setting 0 \
27 | --itr 1 >logs/LongForecasting/exchange_rate/baseline_$model_id.log 2>&1
28 | done
--------------------------------------------------------------------------------
/Timeseries_Forecasting/scripts/Traffic_script/Modified_Transformer_setting_2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ ! -d "./logs" ]; then
4 | mkdir ./logs
5 | fi
6 |
7 | if [ ! -d "./logs/LongForecasting" ]; then
8 | mkdir ./logs/LongForecasting
9 | fi
10 |
11 | export CUDA_VISIBLE_DEVICES=0
12 |
13 | ROOT_PATH="./dataset/traffic/"
14 | DATA_PATH="traffic.csv"
15 | MODEL="Modified_Transformer"
16 | DATA="custom"
17 | FEATURES="M"
18 | SEQ_LEN=96
19 | LABEL_LEN=48
20 | E_LAYERS=2
21 | D_LAYERS=1
22 | FACTOR=3
23 | ENC_IN=862
24 | DEC_IN=862
25 | C_OUT=862
26 | DES="Exp"
27 | ITR=1
28 | EXP_SETTING=2
29 | TRAIN_EPOCHS=50
30 | learn_rates=(1e-3 5e-5 1e-5 1e-6)
31 |
32 | for LEARN_RATE in "${learn_rates[@]}"
33 | do
34 | for PRED_LEN in 336 720
35 | do
36 | MODEL_ID="traffic_${SEQ_LEN}_${PRED_LEN}"
37 |
38 | python -u run.py \
39 | --is_training 1 \
40 | --root_path $ROOT_PATH \
41 | --data_path $DATA_PATH \
42 | --model_id $MODEL_ID \
43 | --model $MODEL \
44 | --data $DATA \
45 | --features $FEATURES \
46 | --seq_len $SEQ_LEN \
47 | --label_len $LABEL_LEN \
48 | --pred_len $PRED_LEN \
49 | --e_layers $E_LAYERS \
50 | --d_layers $D_LAYERS \
51 | --factor $FACTOR \
52 | --enc_in $ENC_IN \
53 | --dec_in $DEC_IN \
54 | --c_out $C_OUT \
55 | --des $DES \
56 | --itr $ITR \
57 | --train_epochs $TRAIN_EPOCHS \
58 | --exp_setting $EXP_SETTING \
59 | --learning_rate $LEARN_RATE >logs/LongForecasting/traffic/$MODEL'_'$MODEL_ID'_'expsetting$exp_setting'_'learn_rate_$LEARN_RATE.log 2>&1
60 | done
61 | done
62 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/scripts/Traffic_script/Modified_Transformer_setting_4.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ ! -d "./logs" ]; then
4 | mkdir ./logs
5 | fi
6 |
7 | if [ ! -d "./logs/LongForecasting" ]; then
8 | mkdir ./logs/LongForecasting
9 | fi
10 |
11 | export CUDA_VISIBLE_DEVICES=0
12 |
13 | # 定义变量
14 | ROOT_PATH="./dataset/traffic/"
15 | DATA_PATH="traffic.csv"
16 | MODEL="Modified_Transformer"
17 | DATA="custom"
18 | FEATURES="M"
19 | SEQ_LEN=96
20 | LABEL_LEN=48
21 | E_LAYERS=2
22 | D_LAYERS=1
23 | FACTOR=3
24 | ENC_IN=862
25 | DEC_IN=862
26 | C_OUT=862
27 | DES="Exp"
28 | ITR=1
29 | EXP_SETTING=4
30 | TRAIN_EPOCHS=50
31 | learn_rates=(1e-3 5e-5 1e-5 1e-6)
32 |
33 | # 循环运行脚本
34 | for LEARN_RATE in "${learn_rates[@]}"
35 | do
36 | for PRED_LEN in 336 720
37 | do
38 | MODEL_ID="traffic_${SEQ_LEN}_${PRED_LEN}"
39 |
40 | python -u run.py \
41 | --is_training 1 \
42 | --root_path $ROOT_PATH \
43 | --data_path $DATA_PATH \
44 | --model_id $MODEL_ID \
45 | --model $MODEL \
46 | --data $DATA \
47 | --features $FEATURES \
48 | --seq_len $SEQ_LEN \
49 | --label_len $LABEL_LEN \
50 | --pred_len $PRED_LEN \
51 | --e_layers $E_LAYERS \
52 | --d_layers $D_LAYERS \
53 | --factor $FACTOR \
54 | --enc_in $ENC_IN \
55 | --dec_in $DEC_IN \
56 | --c_out $C_OUT \
57 | --des $DES \
58 | --itr $ITR \
59 | --train_epochs $TRAIN_EPOCHS \
60 | --exp_setting $EXP_SETTING \
61 | --learning_rate $LEARN_RATE >logs/LongForecasting/traffic/$MODEL'_'$MODEL_ID'_'expsetting$exp_setting'_'learn_rate_$LEARN_RATE.log 2>&1
62 | done
63 | done
64 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/scripts/Traffic_script/Transformer.sh:
--------------------------------------------------------------------------------
1 | export CUDA_VISIBLE_DEVICES=0
2 |
3 | train_epochs=50
4 |
5 | for pred_len in 96 192 336 720
6 | do
7 | model_id="traffic_96_${pred_len}"
8 |
9 | python -u run.py \
10 | --is_training 1 \
11 | --root_path ./dataset/traffic/ \
12 | --data_path traffic.csv \
13 | --model_id $model_id \
14 | --model Modified_Transformer \
15 | --data custom \
16 | --features M \
17 | --seq_len 96 \
18 | --label_len 48 \
19 | --pred_len $pred_len \
20 | --e_layers 2 \
21 | --d_layers 1 \
22 | --factor 3 \
23 | --enc_in 862 \
24 | --dec_in 862 \
25 | --c_out 862 \
26 | --des 'Exp' \
27 | --itr 1 \
28 | ----exp_setting 0 \
29 | --train_epochs $train_epochs >logs/LongForecasting/traffic/baseline_$model_id.log 2>&1
30 | done
31 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/scripts/Weather_script/Modified_Transformer.sh:
--------------------------------------------------------------------------------
1 | if [ ! -d "./logs" ]; then
2 | mkdir ./logs
3 | fi
4 |
5 | if [ ! -d "./logs/LongForecasting" ]; then
6 | mkdir ./logs/LongForecasting
7 | fi
8 |
9 | export CUDA_VISIBLE_DEVICES=0
10 |
11 | ROOT_PATH="./dataset/weather/"
12 | DATA_PATH="weather.csv"
13 | MODEL="Modified_Transformer"
14 | DATA="custom"
15 | FEATURES="M"
16 | SEQ_LEN=96
17 | LABEL_LEN=48
18 | E_LAYERS=2
19 | D_LAYERS=1
20 | FACTOR=3
21 | ENC_IN=21
22 | DEC_IN=21
23 | C_OUT=21
24 | DES="Exp"
25 | ITR=1
26 |
27 | BATCH_SIZE=256
28 | DROP_OUT=0.05
29 | lr=1e-5
30 | EPOCHS=10
31 | PATIENCE=3
32 | # Transformer(baseline): 0, FANGated: 2, FAN: 4
33 | EXP_SETTING=4
34 |
35 | for PRED_LEN in 96 192 336 720
36 | do
37 | MODEL_ID="weather_${SEQ_LEN}_${PRED_LEN}"
38 | python -u run.py \
39 | --is_training 1 \
40 | --root_path $ROOT_PATH \
41 | --data_path $DATA_PATH \
42 | --model_id $MODEL_ID \
43 | --model $MODEL \
44 | --data $DATA \
45 | --features $FEATURES \
46 | --seq_len $SEQ_LEN \
47 | --label_len $LABEL_LEN \
48 | --pred_len $PRED_LEN \
49 | --e_layers $E_LAYERS \
50 | --d_layers $D_LAYERS \
51 | --factor $FACTOR \
52 | --enc_in $ENC_IN \
53 | --dec_in $DEC_IN \
54 | --c_out $C_OUT \
55 | --des $DES \
56 | --itr $ITR \
57 | --batch_size $BATCH_SIZE \
58 | --dropout $DROP_OUT \
59 | --learning_rate $lr \
60 | --train_epochs $EPOCHS \
61 | --patience $PATIENCE \
62 | --exp_setting $EXP_SETTING # > logs/LongForecasting/$MODEL'_'$MODEL_ID'_'exp_setting_$EXP_SETTING.log 2>&1
63 | done
64 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YihongDong/FAN/7f1b16a1fdf2e36e8b123109d7f6b6987361a1fe/Timeseries_Forecasting/utils/__init__.py
--------------------------------------------------------------------------------
/Timeseries_Forecasting/utils/download_data.py:
--------------------------------------------------------------------------------
1 | import requests
2 |
3 | if __name__=="__main__":
4 | source_url = 'https://cloud.tsinghua.edu.cn/d/e1ccfff39ad541908bae/files/?p=%2Fall_six_datasets.zip&dl=1'
5 | headers = {'User-Agent': 'Mozilla/5.0'}
6 | res = requests.get(source_url, headers=headers)
7 |
8 | with open('dataset/datasets.zip', 'wb') as f:
9 | f.write(res.content)
10 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/utils/masking.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class TriangularCausalMask():
5 | def __init__(self, B, L, device="cpu"):
6 | mask_shape = [B, 1, L, L]
7 | with torch.no_grad():
8 | self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
9 |
10 | @property
11 | def mask(self):
12 | return self._mask
13 |
14 |
15 | class ProbMask():
16 | def __init__(self, B, H, L, index, scores, device="cpu"):
17 | _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
18 | _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
19 | indicator = _mask_ex[torch.arange(B)[:, None, None],
20 | torch.arange(H)[None, :, None],
21 | index, :].to(device)
22 | self._mask = indicator.view(scores.shape).to(device)
23 |
24 | @property
25 | def mask(self):
26 | return self._mask
27 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/utils/metrics.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def RSE(pred, true):
5 | return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2))
6 |
7 |
8 | def CORR(pred, true):
9 | u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0)
10 | d = np.sqrt(((true - true.mean(0)) ** 2).sum(0) * ((pred - pred.mean(0)) ** 2).sum(0))
11 | return (u / d).mean(-1)
12 |
13 |
14 | def MAE(pred, true):
15 | return np.mean(np.abs(pred - true))
16 |
17 |
18 | def MSE(pred, true):
19 | return np.mean((pred - true) ** 2)
20 |
21 |
22 | def RMSE(pred, true):
23 | return np.sqrt(MSE(pred, true))
24 |
25 |
26 | def MAPE(pred, true):
27 | return np.mean(np.abs((pred - true) / true))
28 |
29 |
30 | def MSPE(pred, true):
31 | return np.mean(np.square((pred - true) / true))
32 |
33 |
34 | def metric(pred, true):
35 | mae = MAE(pred, true)
36 | mse = MSE(pred, true)
37 | rmse = RMSE(pred, true)
38 | mape = MAPE(pred, true)
39 | mspe = MSPE(pred, true)
40 |
41 | return mae, mse, rmse, mape, mspe
42 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/utils/timefeatures.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | import numpy as np
4 | import pandas as pd
5 | from pandas.tseries import offsets
6 | from pandas.tseries.frequencies import to_offset
7 |
8 |
9 | class TimeFeature:
10 | def __init__(self):
11 | pass
12 |
13 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
14 | pass
15 |
16 | def __repr__(self):
17 | return self.__class__.__name__ + "()"
18 |
19 |
20 | class SecondOfMinute(TimeFeature):
21 | """Minute of hour encoded as value between [-0.5, 0.5]"""
22 |
23 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
24 | return index.second / 59.0 - 0.5
25 |
26 |
27 | class MinuteOfHour(TimeFeature):
28 | """Minute of hour encoded as value between [-0.5, 0.5]"""
29 |
30 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
31 | return index.minute / 59.0 - 0.5
32 |
33 |
34 | class HourOfDay(TimeFeature):
35 | """Hour of day encoded as value between [-0.5, 0.5]"""
36 |
37 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
38 | return index.hour / 23.0 - 0.5
39 |
40 |
41 | class DayOfWeek(TimeFeature):
42 | """Hour of day encoded as value between [-0.5, 0.5]"""
43 |
44 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
45 | return index.dayofweek / 6.0 - 0.5
46 |
47 |
48 | class DayOfMonth(TimeFeature):
49 | """Day of month encoded as value between [-0.5, 0.5]"""
50 |
51 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
52 | return (index.day - 1) / 30.0 - 0.5
53 |
54 |
55 | class DayOfYear(TimeFeature):
56 | """Day of year encoded as value between [-0.5, 0.5]"""
57 |
58 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
59 | return (index.dayofyear - 1) / 365.0 - 0.5
60 |
61 |
62 | class MonthOfYear(TimeFeature):
63 | """Month of year encoded as value between [-0.5, 0.5]"""
64 |
65 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
66 | return (index.month - 1) / 11.0 - 0.5
67 |
68 |
69 | class WeekOfYear(TimeFeature):
70 | """Week of year encoded as value between [-0.5, 0.5]"""
71 |
72 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
73 | return (index.isocalendar().week - 1) / 52.0 - 0.5
74 |
75 |
76 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
77 | """
78 | Returns a list of time features that will be appropriate for the given frequency string.
79 | Parameters
80 | ----------
81 | freq_str
82 | Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
83 | """
84 |
85 | features_by_offsets = {
86 | offsets.YearEnd: [],
87 | offsets.QuarterEnd: [MonthOfYear],
88 | offsets.MonthEnd: [MonthOfYear],
89 | offsets.Week: [DayOfMonth, WeekOfYear],
90 | offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
91 | offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
92 | offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
93 | offsets.Minute: [
94 | MinuteOfHour,
95 | HourOfDay,
96 | DayOfWeek,
97 | DayOfMonth,
98 | DayOfYear,
99 | ],
100 | offsets.Second: [
101 | SecondOfMinute,
102 | MinuteOfHour,
103 | HourOfDay,
104 | DayOfWeek,
105 | DayOfMonth,
106 | DayOfYear,
107 | ],
108 | }
109 |
110 | offset = to_offset(freq_str)
111 |
112 | for offset_type, feature_classes in features_by_offsets.items():
113 | if isinstance(offset, offset_type):
114 | return [cls() for cls in feature_classes]
115 |
116 | supported_freq_msg = f"""
117 | Unsupported frequency {freq_str}
118 | The following frequencies are supported:
119 | Y - yearly
120 | alias: A
121 | M - monthly
122 | W - weekly
123 | D - daily
124 | B - business days
125 | H - hourly
126 | T - minutely
127 | alias: min
128 | S - secondly
129 | """
130 | raise RuntimeError(supported_freq_msg)
131 |
132 |
133 | def time_features(dates, freq='h'):
134 | return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)])
135 |
--------------------------------------------------------------------------------
/Timeseries_Forecasting/utils/tools.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import matplotlib.pyplot as plt
4 |
5 | plt.switch_backend('agg')
6 |
7 | # calc the number of parameters
8 | def count_parameters(model):
9 | return sum(p.numel() for p in model.parameters() if p.requires_grad)
10 |
11 | def adjust_learning_rate(optimizer, epoch, args):
12 | # lr = args.learning_rate * (0.2 ** (epoch // 2))
13 | if args.lradj == 'type1':
14 | lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))}
15 | elif args.lradj == 'type2':
16 | lr_adjust = {
17 | 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
18 | 10: 5e-7, 15: 1e-7, 20: 5e-8
19 | }
20 | if epoch in lr_adjust.keys():
21 | lr = lr_adjust[epoch]
22 | for param_group in optimizer.param_groups:
23 | param_group['lr'] = lr
24 | print('Updating learning rate to {}'.format(lr))
25 |
26 |
27 | class EarlyStopping:
28 | def __init__(self, patience=7, verbose=False, delta=0):
29 | self.patience = patience
30 | self.verbose = verbose
31 | self.counter = 0
32 | self.best_score = None
33 | self.early_stop = False
34 | self.val_loss_min = np.inf
35 | self.delta = delta
36 |
37 | def __call__(self, val_loss, model, path):
38 | score = -val_loss
39 | if self.best_score is None:
40 | self.best_score = score
41 | self.save_checkpoint(val_loss, model, path)
42 | elif score < self.best_score + self.delta:
43 | self.counter += 1
44 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
45 | if self.counter >= self.patience:
46 | self.early_stop = True
47 | else:
48 | self.best_score = score
49 | self.save_checkpoint(val_loss, model, path)
50 | self.counter = 0
51 |
52 | def save_checkpoint(self, val_loss, model, path):
53 | if self.verbose:
54 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
55 | torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')
56 | self.val_loss_min = val_loss
57 |
58 |
59 | class dotdict(dict):
60 | """dot.notation access to dictionary attributes"""
61 | __getattr__ = dict.get
62 | __setattr__ = dict.__setitem__
63 | __delattr__ = dict.__delitem__
64 |
65 |
66 | class StandardScaler():
67 | def __init__(self, mean, std):
68 | self.mean = mean
69 | self.std = std
70 |
71 | def transform(self, data):
72 | return (data - self.mean) / self.std
73 |
74 | def inverse_transform(self, data):
75 | return (data * self.std) + self.mean
76 |
77 |
78 | def visual(true, preds=None, name='./pic/test.pdf'):
79 | """
80 | Results visualization
81 | """
82 | plt.figure()
83 | plt.plot(true, label='GroundTruth', linewidth=2)
84 | if preds is not None:
85 | plt.plot(preds, label='Prediction', linewidth=2)
86 | plt.legend()
87 | plt.savefig(name, bbox_inches='tight')
88 |
--------------------------------------------------------------------------------
/img/FANLayer.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YihongDong/FAN/7f1b16a1fdf2e36e8b123109d7f6b6987361a1fe/img/FANLayer.jpg
--------------------------------------------------------------------------------
/img/IR.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YihongDong/FAN/7f1b16a1fdf2e36e8b123109d7f6b6987361a1fe/img/IR.jpg
--------------------------------------------------------------------------------
/img/mod.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YihongDong/FAN/7f1b16a1fdf2e36e8b123109d7f6b6987361a1fe/img/mod.jpg
--------------------------------------------------------------------------------
/img/sin.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YihongDong/FAN/7f1b16a1fdf2e36e8b123109d7f6b6987361a1fe/img/sin.jpg
--------------------------------------------------------------------------------