├── requirements.txt ├── graph_algorithms ├── models │ ├── __init__.py │ ├── util.py │ ├── MLPs.py │ ├── base.py │ ├── gnn.py │ └── gnn_edges.py ├── util.py ├── sample_scripts │ ├── sample_maxdeg_identical.py │ ├── sample_maxdeg_uniform.py │ └── sample_shortest_uniform.py ├── reproduce.sh ├── README.md ├── maxdeg_generation.py ├── shortest_generation.py └── main.py ├── n_body ├── reproduce.sh ├── util.py ├── README.md ├── in_network.py ├── MLPs.py ├── physics.py └── main.py ├── feedforward ├── sample_scripts │ ├── sample_l1.py │ ├── sample_cos.py │ ├── sample_sqrt.py │ ├── sample_direction.py │ ├── sample_linear.py │ └── sample_quadratic.py ├── MLP_with_relu_on_linear_data.sh ├── NTK.py ├── MLP_with_relu_on_nonlinear_data.sh ├── NTK_main.py ├── MLPs.py ├── sphere_rsquare.py ├── README.md ├── data_generation.py └── main.py └── README.md /requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.4.0 2 | 3 | -------------------------------------------------------------------------------- /graph_algorithms/models/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /n_body/reproduce.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # using feature engineering 4 | 5 | # extrapolate distance 6 | python main.py --data=n_body_extrapolate_distance --fe 7 | 8 | # extrapolate mass 9 | python main.py --data=n_body_extrapolate_mass --fe 10 | 11 | # interpolation 12 | python main.py --data=n_body_interpolate --fe 13 | -------------------------------------------------------------------------------- /graph_algorithms/models/util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | from torch.autograd import Variable 7 | 8 | 9 | def calc_output_size(args): 10 | ans = 1 #regression type of problem 11 | return 1 12 | 13 | def median_absolute_percentage_error_compute_fn(y_pred, y): 14 | e = torch.abs(y.view_as(y_pred) - y_pred) / torch.abs(y.view_as(y_pred)) 15 | return 100.0 * torch.mean(e) 16 | 17 | -------------------------------------------------------------------------------- /graph_algorithms/util.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import numpy as np 3 | import random 4 | import torch 5 | 6 | class S2VGraph(object): 7 | def __init__(self, label, node_features, neighbors, g=None): 8 | ''' 9 | label: graph label 10 | neighbors: list of neighbors (without self-loop) 11 | node_features: a torch float tensor 12 | g: original networkX graph 13 | ''' 14 | self.label = label 15 | self.neighbors = neighbors 16 | self.node_features = node_features 17 | self.g = g -------------------------------------------------------------------------------- /n_body/util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | from torch.autograd import Variable 7 | import networkx as nx 8 | import random 9 | 10 | class S2VGraph(object): 11 | def __init__(self, node_features, neighbors, g=None): 12 | ''' 13 | neighbors: list of neighbors (without self-loop) 14 | node_features: a torch float tensor, one-hot representation of the tag that is used as input to neural nets 15 | g: original networkX graph 16 | ''' 17 | self.neighbors = neighbors 18 | self.node_features = node_features 19 | self.g = g 20 | 21 | def mape(pred, label): 22 | diff = torch.abs(pred - label) 23 | e = diff.norm(dim=2) / label.norm(dim=2) 24 | return 100.0 * e.mean() 25 | 26 | -------------------------------------------------------------------------------- /graph_algorithms/sample_scripts/sample_maxdeg_identical.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | folder = 'data' 4 | node_dim = 1 5 | sampling = 'identical' 6 | train_graphs = ['path', 'cycle', 'ladder', '4regular', 'complete', 'tree', 'expander', 'general'] 7 | test_graph = 'general' 8 | train_min_n = 20 9 | train_max_n = 30 10 | test_min_ns = [50] 11 | train_color = 1 12 | test_colors = [1] 13 | 14 | file_id = 0 15 | for train_graph in train_graphs: 16 | for test_min_n in test_min_ns: 17 | if test_min_n == 20: 18 | test_max_n = 30 19 | else: 20 | test_max_n = 100 21 | for test_color in test_colors: 22 | data_name = f"maxdeg_identical_{train_graph}_Ndim{node_dim}_Train_V{train_min_n}_{train_max_n}_C{train_color}_Test_V{test_min_n}_{test_max_n}_C{test_color}" 23 | os.system('python maxdeg_generation.py --folder=%s --node_dim=%s --sampling=%s --train_graph=%s --test_graph=%s --train_min_n=%s --train_max_n=%s --test_min_n=%s --test_max_n=%s --train_color=%s --test_color=%s --data=%s'%(folder,node_dim,sampling, train_graph, test_graph, train_min_n, train_max_n, test_min_n, test_max_n, train_color, test_color, data_name)) 24 | file_id +=1 25 | 26 | print("%d data files are successfully generated." %file_id) 27 | -------------------------------------------------------------------------------- /graph_algorithms/sample_scripts/sample_maxdeg_uniform.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | folder = 'data' 4 | node_dim = 3 5 | sampling = 'uniform' 6 | train_graphs = ['path', 'cycle', 'ladder', '4regular', 'complete', 'tree', 'expander', 'general'] 7 | test_graph = 'general' 8 | train_min_n = 20 9 | train_max_n = 30 10 | test_min_ns = [50] 11 | train_color = 5 12 | test_colors = [5, 10] 13 | 14 | file_id = 0 15 | for train_graph in train_graphs: 16 | for test_min_n in test_min_ns: 17 | if test_min_n == 20: 18 | test_max_n = 30 19 | else: 20 | test_max_n = 100 21 | for test_color in test_colors: 22 | data_name = f"maxdeg_uniform_{train_graph}_Ndim{node_dim}_Train_V{train_min_n}_{train_max_n}_C{train_color}_Test_V{test_min_n}_{test_max_n}_C{test_color}" 23 | os.system('python maxdeg_generation.py --folder=%s --node_dim=%s --sampling=%s --train_graph=%s --test_graph=%s --train_min_n=%s --train_max_n=%s --test_min_n=%s --test_max_n=%s --train_color=%s --test_color=%s --data=%s'%(folder,node_dim,sampling, train_graph, test_graph, train_min_n, train_max_n, test_min_n, test_max_n, train_color, test_color, data_name)) 24 | file_id +=1 25 | 26 | print("%d data files are successfully generated." %file_id) 27 | -------------------------------------------------------------------------------- /feedforward/sample_scripts/sample_l1.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # set manually 4 | x_dims = [1,2,8] 5 | train_shapes = ['cube','sphere'] 6 | signs = ['no'] 7 | fixes = [1] 8 | test_rs = [5.0, 10.0] 9 | train_rs = [0.5, 1.0] 10 | n_trains = [20000] 11 | n_val = 1000 12 | n_test = 20000 13 | A_rs = [1.0] 14 | b_rs = [0.0] 15 | 16 | data = 'l1' 17 | folder = 'data/non-linear/l1' 18 | for x_dim in x_dims: 19 | for train_shape in train_shapes: 20 | for sign in signs: 21 | for fix in fixes: 22 | if sign == 'no' and not fix == 1: 23 | continue 24 | if sign == 'z' and not fix == 1: 25 | continue 26 | for test_r in test_rs: 27 | for train_r in train_rs: 28 | for n_train in n_trains: 29 | for A_r in A_rs: 30 | for b_r in b_rs: 31 | val_r = train_r 32 | if train_shape == 'cube': 33 | test_shape = 'cube' 34 | else: 35 | test_shape = 'ball' 36 | 37 | os.system("python data_generation.py --folder=%s --data=%s --x_dim=%s --train_shape=%s --test_shape=%s --sign=%s --fix=%s --test_r=%s --train_r=%s --val_r=%s --n_train=%s --n_val=%s --n_test=%s --A_r=%s --b_r=%s"%(folder, data, x_dim, train_shape, test_shape, sign, fix, test_r, train_r, val_r, n_train, n_val, n_test, A_r, b_r)) 38 | -------------------------------------------------------------------------------- /feedforward/sample_scripts/sample_cos.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # set manually 4 | x_dims = [1,2,8] 5 | train_shapes = ['cube','sphere'] 6 | signs = ['no'] 7 | fixes = [1] 8 | test_rs = [4.0, 2.0] 9 | train_rs = [0.5, 1.0] 10 | n_trains = [20000] 11 | n_val = 1000 12 | n_test = 80000 13 | A_rs = [1.0] 14 | b_rs = [0.0] 15 | 16 | data = 'cos' 17 | folder = 'data/non-linear/cos' 18 | for x_dim in x_dims: 19 | for train_shape in train_shapes: 20 | for sign in signs: 21 | for fix in fixes: 22 | if sign == 'no' and not fix == 1: 23 | continue 24 | if sign == 'z' and not fix == 1: 25 | continue 26 | for test_r in test_rs: 27 | for train_r in train_rs: 28 | for n_train in n_trains: 29 | for A_r in A_rs: 30 | for b_r in b_rs: 31 | val_r = train_r 32 | if train_shape == 'cube': 33 | test_shape = 'cube' 34 | else: 35 | test_shape = 'ball' 36 | 37 | os.system("python data_generation.py --folder=%s --data=%s --x_dim=%s --train_shape=%s --test_shape=%s --sign=%s --fix=%s --test_r=%s --train_r=%s --val_r=%s --n_train=%s --n_val=%s --n_test=%s --A_r=%s --b_r=%s"%(folder, data, x_dim, train_shape, test_shape, sign, fix, test_r, train_r, val_r, n_train, n_val, n_test, A_r, b_r)) 38 | -------------------------------------------------------------------------------- /feedforward/sample_scripts/sample_sqrt.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # set manually 4 | x_dims = [1, 2, 8] 5 | train_shapes = ['cube'] 6 | signs = ['no'] 7 | fixes = [1] 8 | test_rs = [20.0, 10.0] 9 | train_rs = [1.0, 2.0] 10 | n_trains = [2000] 11 | n_val = 1000 12 | n_test = 20000 13 | A_rs = [1.0] 14 | b_rs = [0.0] 15 | 16 | data = 'sqrt' 17 | folder = 'data/non-linear/sqrt' 18 | for x_dim in x_dims: 19 | for train_shape in train_shapes: 20 | for sign in signs: 21 | for fix in fixes: 22 | if sign == 'no' and not fix == 1: 23 | continue 24 | if sign == 'z' and not fix == 1: 25 | continue 26 | for test_r in test_rs: 27 | for train_r in train_rs: 28 | for n_train in n_trains: 29 | for A_r in A_rs: 30 | for b_r in b_rs: 31 | val_r = train_r 32 | if train_shape == 'cube': 33 | test_shape = 'cube' 34 | else: 35 | test_shape = 'ball' 36 | 37 | os.system("python data_generation.py --folder=%s --data=%s --x_dim=%s --train_shape=%s --test_shape=%s --sign=%s --fix=%s --test_r=%s --train_r=%s --val_r=%s --n_train=%s --n_val=%s --n_test=%s --A_r=%s --b_r=%s"%(folder, data, x_dim, train_shape, test_shape, sign, fix, test_r, train_r, val_r, n_train, n_val, n_test, A_r, b_r)) 38 | 39 | -------------------------------------------------------------------------------- /feedforward/sample_scripts/sample_direction.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # set manually 4 | x_dims = [32] 5 | train_shapes = ['cube'] 6 | signs = ['p', 'n', 'z'] 7 | fixes = [1, 16, 32] 8 | test_rs = [20.0, 50.0] 9 | train_rs = [5.0, 10.0] 10 | n_trains = [10000] 11 | n_val = 1000 12 | n_test = 2000 13 | A_rs = [10.0] 14 | b_rs = [0.0] 15 | 16 | data = 'linear' 17 | folder = 'data/linear_miss_direction' 18 | for x_dim in x_dims: 19 | for train_shape in train_shapes: 20 | for sign in signs: 21 | for fix in fixes: 22 | if sign == 'no' and not fix == 1: 23 | continue 24 | if sign == 'z' and not fix == 1: 25 | continue 26 | for test_r in test_rs: 27 | for train_r in train_rs: 28 | for n_train in n_trains: 29 | for A_r in A_rs: 30 | for b_r in b_rs: 31 | val_r = train_r 32 | if train_shape == 'cube': 33 | test_shape = 'cube' 34 | else: 35 | test_shape = 'ball' 36 | 37 | os.system("python data_generation.py --folder=%s --data=%s --x_dim=%s --train_shape=%s --test_shape=%s --sign=%s --fix=%s --test_r=%s --train_r=%s --val_r=%s --n_train=%s --n_val=%s --n_test=%s --A_r=%s --b_r=%s"%(folder, data, x_dim, train_shape, test_shape, sign, fix, test_r, train_r, val_r, n_train, n_val, n_test, A_r, b_r)) 38 | -------------------------------------------------------------------------------- /feedforward/sample_scripts/sample_linear.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # set manually 4 | x_dims = [1, 16, 32] 5 | train_shapes = ['sphere', 'cube'] 6 | signs = ['no'] 7 | fixes = [1] 8 | test_rs = [100.0, 150.0, 200.0] 9 | train_rs = [5.0] 10 | n_trains = [10000] 11 | n_val = 1000 12 | n_test = 2000 13 | A_rs = [10.0, 20.0] 14 | b_rs = [0.0] 15 | 16 | data = 'linear' 17 | folder = 'data/systematic_linear' 18 | for x_dim in x_dims: 19 | for train_shape in train_shapes: 20 | for sign in signs: 21 | for fix in fixes: 22 | if sign == 'no' and not fix == 1: 23 | continue 24 | if sign == 'z' and not fix == 1: 25 | continue 26 | for test_r in test_rs: 27 | for train_r in train_rs: 28 | for n_train in n_trains: 29 | for A_r in A_rs: 30 | for b_r in b_rs: 31 | val_r = train_r 32 | if train_shape == 'cube': 33 | test_shape = 'cube' 34 | else: 35 | test_shape = 'ball' 36 | 37 | os.system("python data_generation.py --folder=%s --data=%s --x_dim=%s --train_shape=%s --test_shape=%s --sign=%s --fix=%s --test_r=%s --train_r=%s --val_r=%s --n_train=%s --n_val=%s --n_test=%s --A_r=%s --b_r=%s"%(folder, data, x_dim, train_shape, test_shape, sign, fix, test_r, train_r, val_r, n_train, n_val, n_test, A_r, b_r)) -------------------------------------------------------------------------------- /feedforward/sample_scripts/sample_quadratic.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # set manually 4 | x_dims = [1,2,8] 5 | train_shapes = ['cube', 'sphere'] 6 | signs = ['no'] 7 | fixes = [1] 8 | test_rs = [5.0, 10.0] 9 | train_rs = [0.5, 1.0] 10 | n_trains = [20000] 11 | n_val = 1000 12 | n_test = 20000 13 | A_rs = [1.0] 14 | b_rs = [0.0] 15 | 16 | data = 'quadratic' 17 | folder = 'data/non-linear/quadratic' 18 | for x_dim in x_dims: 19 | for train_shape in train_shapes: 20 | for sign in signs: 21 | for fix in fixes: 22 | if sign == 'no' and not fix == 1: 23 | continue 24 | if sign == 'z' and not fix == 1: 25 | continue 26 | for test_r in test_rs: 27 | for train_r in train_rs: 28 | for n_train in n_trains: 29 | for A_r in A_rs: 30 | for b_r in b_rs: 31 | val_r = train_r 32 | if train_shape == 'cube': 33 | test_shape = 'cube' 34 | else: 35 | test_shape = 'ball' 36 | 37 | os.system("python data_generation.py --folder=%s --data=%s --x_dim=%s --train_shape=%s --test_shape=%s --sign=%s --fix=%s --test_r=%s --train_r=%s --val_r=%s --n_train=%s --n_val=%s --n_test=%s --A_r=%s --b_r=%s"%(folder, data, x_dim, train_shape, test_shape, sign, fix, test_r, train_r, val_r, n_train, n_val, n_test, A_r, b_r)) 38 | -------------------------------------------------------------------------------- /graph_algorithms/sample_scripts/sample_shortest_uniform.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | folder = 'data' 4 | sampling = 'uniform' 5 | node_dim = 1 6 | max_hop = 3 7 | max_weight = 5 8 | max_weight_tests = [5, 10] 9 | train_graphs = ['path', 'cycle', 'ladder', '4regular', 'complete', 'tree', 'expander', 'general'] 10 | test_graph = 'general' 11 | train_min_n = 20 12 | train_max_n = 40 13 | test_min_n = 50 14 | test_max_n = 70 15 | train_color = 5 16 | test_color = 5 17 | 18 | file_id = 0 19 | for train_graph in train_graphs: 20 | for max_weight_test in max_weight_tests: 21 | data_name = f"shortestpath_uniform_{train_graph}_Ndim{node_dim}_maxhop{max_hop}_Train_V{train_min_n}_{train_max_n}_C{train_color}_E{max_weight}_Test_V{test_min_n}_{test_max_n}_C{test_color}_E{max_weight_test}" 22 | os.system('python shortest_generation.py --folder=%s --max_weight=%s --max_weight_test=%s --max_hop=%s --node_dim=%s --sampling=%s --train_graph=%s --test_graph=%s --train_min_n=%s --train_max_n=%s --test_min_n=%s --test_max_n=%s --train_color=%s --test_color=%s --data=%s'%(folder,max_weight,max_weight_test,max_hop,node_dim,sampling, train_graph, test_graph, train_min_n, train_max_n, test_min_n, test_max_n, train_color, test_color, data_name)) 23 | file_id +=1 24 | 25 | print("%d data files are successfully generated." %file_id) 26 | -------------------------------------------------------------------------------- /graph_algorithms/reproduce.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # max degree interpolate, training on general graph 4 | python main.py --model=GNN --n_iter=2 --weight=None --lr=0.01 --fc_output_layer=1 --mlp_layer=2 --hidden_dim=256 --batch_size=64 --graph_pooling_type=max --neighbor_pooling_type=sum --epochs=300 --data=maxdeg_uniform_general_Ndim3_Train_V20_30_C5_Test_V50_100_C5.pickle 5 | 6 | # max degree extrapolate, training on general graph 7 | python main.py --model=GNN --n_iter=2 --weight=None --lr=0.01 --fc_output_layer=1 --mlp_layer=2 --hidden_dim=256 --batch_size=64 --graph_pooling_type=max --neighbor_pooling_type=sum --epochs=300 --data=maxdeg_uniform_general_Ndim3_Train_V20_30_C5_Test_V50_100_C10.pickle 8 | 9 | # shortest path interpolate, training on general graph 10 | python main.py --model=GNN_E --n_iter=3 --weight=weight --lr=0.005 --fc_output_layer=1 --mlp_layer=2 --hidden_dim=256 --batch_size=32 --graph_pooling_type=min --neighbor_pooling_type=min --epochs=300 --data=shortestpath_uniform_general_Ndim1_maxhop3_Train_V20_40_C5_E5_Test_V50_70_C5_E5.pickle 11 | 12 | # shortest path extrapolate, training on general graph 13 | python main.py --model=GNN_E --n_iter=3 --weight=weight --lr=0.005 --fc_output_layer=1 --mlp_layer=2 --hidden_dim=256 --batch_size=32 --graph_pooling_type=min --neighbor_pooling_type=min --epochs=300 --data=shortestpath_uniform_general_Ndim1_maxhop3_Train_V20_40_C5_E5_Test_V50_70_C5_E10.pickle -------------------------------------------------------------------------------- /feedforward/MLP_with_relu_on_linear_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # linear function in dimension 32 with training, valdiation, and test data uniformly sampled from a hyper-cube 4 | python main.py --activation=relu --loss_fn=reg --decay=1e-5 --lr=0.001 --activation=relu --mlp_layer=2 --hidden_dim=256 --batch_size=64 --epochs=500 --optimizer=Adam --data=./data/systematic_linear/linear_xdim32_item1_trainscube_testscube_signno_fix1_testr100.0_trainr5.0_valr5.0_ntrain10000_nval1000_ntest2000_Ar20.0_br0.0.pickle 5 | 6 | # linear function in dimension 16 with training and valdiation uniformly sampled from a sphere and test data uniformly sampled from a hyper-ball 7 | python main.py --activation=relu --loss_fn=reg --decay=1e-5 --lr=0.001 --activation=relu --mlp_layer=2 --hidden_dim=256 --batch_size=64 --epochs=500 --optimizer=Adam --data=./data/systematic_linear/linear_xdim16_item1_trainssphere_testsball_signno_fix1_testr200.0_trainr5.0_valr5.0_ntrain10000_nval1000_ntest2000_Ar20.0_br0.0.pickle 8 | 9 | # linear function in dimension 32 where the training data is only restricted to positive in all dimensions 10 | python main.py --activation=relu --loss_fn=reg --decay=1e-5 --lr=0.001 --activation=relu --mlp_layer=2 --hidden_dim=256 --batch_size=64 --epochs=500 --optimizer=Adam --data=./data/linear_miss_direction/linear_xdim32_item1_trainscube_testscube_signp_fix32_testr20.0_trainr5.0_valr5.0_ntrain10000_nval1000_ntest2000_Ar10.0_br0.0.pickle 11 | 12 | # linear function in dimension 32 where the training data is only restricted to positive in the first 16 dimensions 13 | python main.py --activation=relu --loss_fn=reg --decay=1e-5 --lr=0.001 --activation=relu --mlp_layer=2 --hidden_dim=256 --batch_size=64 --epochs=500 --optimizer=Adam --data=./data/linear_miss_direction/linear_xdim32_item1_trainscube_testscube_signp_fix16_testr20.0_trainr5.0_valr5.0_ntrain10000_nval1000_ntest2000_Ar10.0_br0.0.pickle 14 | -------------------------------------------------------------------------------- /feedforward/NTK.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | 4 | # return an array K of size (d_max, d_max, N, N), K[i][j] is kernel value of depth i + 1 with first j layers fixed 5 | def kernel_value_batch(X, d_max): 6 | K = np.zeros((d_max, d_max, X.shape[0], X.shape[0])) 7 | for fix_dep in range(d_max): 8 | S = np.matmul(X, X.T) 9 | H = np.zeros_like(S) 10 | for dep in range(d_max): 11 | if fix_dep <= dep: 12 | H += S 13 | K[dep][fix_dep] = H 14 | L = np.diag(S) 15 | P = np.clip(np.sqrt(np.outer(L, L)), a_min = 1e-9, a_max = None) 16 | Sn = np.clip(S / P, a_min = -1, a_max = 1) 17 | S = (Sn * (math.pi - np.arccos(Sn)) + np.sqrt(1.0 - Sn * Sn)) * P / 2.0 / math.pi 18 | H = H * (math.pi - np.arccos(Sn)) / 2.0 / math.pi 19 | return K 20 | 21 | # return an array K of size (N, N), depth d_max, first fix_dep layers fixed 22 | def kernel_value(X, d_max, fix_dep): 23 | K = np.zeros((d_max, X.shape[0], X.shape[0])) 24 | S = np.matmul(X, X.T) 25 | H = np.zeros_like(S) 26 | for dep in range(d_max): 27 | if fix_dep <= dep: 28 | H += S 29 | K[dep] = H 30 | L = np.diag(S) 31 | P = np.clip(np.sqrt(np.outer(L, L)), a_min = 1e-9, a_max = None) 32 | Sn = np.clip(S / P, a_min = -1, a_max = 1) 33 | S = (Sn * (math.pi - np.arccos(Sn)) + np.sqrt(1.0 - Sn * Sn)) * P / 2.0 / math.pi 34 | H = H * (math.pi - np.arccos(Sn)) / 2.0 / math.pi 35 | return K[d_max - 1] 36 | 37 | def kernel_paper(X): 38 | X_normalized = X / np.linalg.norm(X, axis=1)[:, np.newaxis] 39 | S = np.matmul(X, X.T) 40 | S_normalized = np.matmul(X_normalized, X_normalized.T) 41 | 42 | S_clip = np.clip(S_normalized, a_min = -1, a_max = 1) 43 | SS = (math.pi - np.arccos(S_clip))/(2*math.pi) 44 | K = np.multiply(S, SS) 45 | return K -------------------------------------------------------------------------------- /feedforward/MLP_with_relu_on_nonlinear_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # quadratic function 4 | python main.py --activation=relu --loss_fn=reg --decay=1e-5 --lr=0.0001 --activation=relu --mlp_layer=2 --hidden_dim=128 --batch_size=64 --epochs=500 --optimizer=Adam --data=./data/non-linear/quadratic/quadratic_xdim8_item1_trainssphere_testsball_signno_fix1_testr5.0_trainr1.0_valr1.0_ntrain20000_nval1000_ntest20000_Ar1.0_br0.0.pickle 5 | 6 | 7 | # cosine function 8 | python main.py --activation=relu --loss_fn=reg --decay=1e-5 --lr=0.0001 --activation=relu --mlp_layer=2 --hidden_dim=128 --batch_size=64 --epochs=500 --optimizer=Adam --data=./data/non-linear/cos/cos_xdim2_item1_trainscube_testscube_signno_fix1_testr2.0_trainr1.0_valr1.0_ntrain20000_nval1000_ntest80000_Ar1.0_br0.0.pickle 9 | 10 | # square root function 11 | python main.py --activation=relu --loss_fn=reg --decay=1e-5 --lr=0.0001 --activation=relu --mlp_layer=2 --hidden_dim=128 --batch_size=64 --epochs=500 --optimizer=Adam --data=./data/non-linear/sqrt/sqrt_xdim8_item1_trainscube_testscube_signno_fix1_testr10.0_trainr2.0_valr2.0_ntrain2000_nval1000_ntest20000_Ar1.0_br0.0.pickle 12 | 13 | # l1 norm function, sensitive to hyper-parameters 14 | ## The following hyper-parameters lead to a small MAPE on test data 15 | python main.py --activation=relu --loss_fn=reg --decay=1e-5 --lr=0.001 --activation=relu --mlp_layer=2 --hidden_dim=256 --batch_size=64 --epochs=500 --optimizer=Adam --data=./data/non-linear/l1/l1_xdim8_item1_trainssphere_testsball_signno_fix1_testr10.0_trainr0.5_valr0.5_ntrain20000_nval1000_ntest20000_Ar1.0_br0.0.pickle 16 | 17 | ## The following hyper-parameters lead to a large MAPE on test data 18 | python main.py --activation=relu --loss_fn=reg --decay=1e-5 --lr=0.0001 --activation=relu --mlp_layer=2 --hidden_dim=128 --batch_size=64 --epochs=500 --optimizer=Adam --data=./data/non-linear/l1/l1_xdim8_item1_trainssphere_testsball_signno_fix1_testr10.0_trainr0.5_valr0.5_ntrain20000_nval1000_ntest20000_Ar1.0_br0.0.pickle -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # How Neural Networks Extrapolate: From Feedforward to Graph Neural Networks 2 | 3 | This repository is the PyTorch implementation of the experiments in the following paper: 4 | 5 | Keyulu Xu, Mozhi Zhang, Jingling Li, Simon S. Du, Ken-ichi Kawarabayashi, Stefanie Jegelka. How Neural Networks Extrapolate: From Feedforward to Graph Neural Networks. ICLR 2021. 6 | 7 | [arXiv](https://arxiv.org/abs/2009.11848) [OpenReview](https://openreview.net/forum?id=UH-cmocLJC) 8 | 9 | If you make use of the relevant code/experiment/idea in your work, please cite our paper (Bibtex below). 10 | ``` 11 | @inproceedings{ 12 | xu2021how, 13 | title={How Neural Networks Extrapolate: From Feedforward to Graph Neural Networks}, 14 | author={Keyulu Xu and Mozhi Zhang and Jingling Li and Simon Shaolei Du and Ken-Ichi Kawarabayashi and Stefanie Jegelka}, 15 | booktitle={International Conference on Learning Representations}, 16 | year={2021}, 17 | url={https://openreview.net/forum?id=UH-cmocLJC} 18 | } 19 | ``` 20 | 21 | 22 | ## Requirements 23 | - This codebase has been tested for `python3.7` and `pytorch 1.4.0` (with `CUDA VERSION 10.0`). 24 | - To install necessary python packages, run `pip install -r requirements.txt` (This installs pytorch). 25 | - The packages [networkx](https://networkx.org/documentation/stable/install.html) and [pytorch-geometric](https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html) need to be installed separately. networkx and geometric versions can be decided based on pytorch and CUDA version. 26 | 27 | ## Instructions 28 | Refer to each folder for instructions to reproduce the experiments. All experiments can be easily reproduced by typing the commands provided. 29 | - Experiments related to feedforward networks may be found in the [`feedforward`](./feedforward) folder. 30 | - Experiments on **architectures** that help extrapolation may be found in the [`graph_algorithms`](./graph_algorithms) folder. 31 | - Experiments on **representations** that help extrapolation may be found in the [`n_body`](./n_body) folder. 32 | -------------------------------------------------------------------------------- /n_body/README.md: -------------------------------------------------------------------------------- 1 | # Rrepresentations that help extrapolation 2 | 3 | We show **improved representations** help extrapolation on the n-body physical reasoning problem. 4 | 5 | The n-body problem asks a neural network to predict how n stars in a physical system evolves according to physics laws. That is, we train neural networks to predict properties of future states of each star in terms of next frames, e.g., 0.001 seconds. 6 | 7 | 8 | ## Reproducing results with one command 9 | We include a script file [`reproduce.sh`](./reproduce.sh) to repoduce the results on extrapolation settings for the n-body experiments. To be more specific, 10 | ``` 11 | bash ./reproduce.sh 12 | ``` 13 | reproduces the results with feature-engineered input representations (pink bars) in Figure 6(b) of the paper. Continue reading below for more details. 14 | 15 | 16 | ## Data Generation 17 | We sample 100 videos of 3-body system evolution, each with 500 rollout, i.e., time steps. We consider the orbit situation: there exists a huge center star and several other stars. Feel free to refer to appendix C.7 for more of our dataset details. 18 | 19 | We consider two extrapolation scenarios (distance or mass): 20 | - The test set contains objects with out-of-distribution distances, which is generated by the command 21 | ``` 22 | python physics.py --ext=mass --center_ratio=2 --obj_ratio=2 --data=n_body_extrapolate_mass 23 | ``` 24 | 25 | - The test set contains objects with out-of-distribution masses, which is generated by the command 26 | ``` 27 | python physics.py --ext=dist --data=n_body_extrapolate_distance 28 | ``` 29 | 30 | The interpolation scenario is generated by the command 31 | ``` 32 | python physics.py --ext=None --data=n_body_interpolate 33 | ``` 34 | 35 | The argument of the `--data` flag sets the name for the generated pickle files, which will be under the `data` folder by default. Refer to [`physics.py`](./physics.py) for more data generation options. 36 | 37 | ## Training 38 | To train on the generated dataset, just feed the dataset name to the `--data` flag. The `--fe` flag controls whether we want to add feature engineering to the edge features of the input graphs. For example, 39 | ``` 40 | CUDA_VISBILE_DEVICES=0 python main.py --data=n_body_extrapolate_mass --fe 41 | ``` 42 | trains a GNN on `n_body_extrapolate_mass.pickle` with feature-engineered input representations. 43 | 44 | And 45 | 46 | ``` 47 | CUDA_VISBILE_DEVICES=0 python main.py --data=n_body_extrapolate_mass 48 | ``` 49 | trains a GNN on `n_body_extrapolate_mass.pickle` with normal input representations. 50 | 51 | 52 | The training logs will be under the `results` folder. 53 | -------------------------------------------------------------------------------- /graph_algorithms/models/MLPs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | from torch.autograd import Variable 7 | 8 | 9 | class MLP(nn.Module): 10 | def __init__(self, n_layer, input_dim, hidden_dim, output_dim): 11 | super(MLP, self).__init__() 12 | 13 | self.n_layer = n_layer 14 | self.linears = nn.ModuleList([nn.Linear(input_dim, hidden_dim)]) 15 | for layer in range(n_layer-2): 16 | self.linears.append(nn.Linear(hidden_dim, hidden_dim)) 17 | self.linears.append(nn.Linear(hidden_dim, output_dim)) 18 | 19 | def forward(self, x): 20 | for layer in range(self.n_layer): 21 | x = self.linears[layer](x) 22 | x = F.relu(x) 23 | 24 | return x 25 | 26 | class FCOutputModel(nn.Module): 27 | def __init__(self, n_layer, input_dim, hidden_dim, output_dim): 28 | super(FCOutputModel, self).__init__() 29 | self.n_layer = n_layer 30 | if self.n_layer == 1: 31 | self.linears = nn.ModuleList([nn.Linear(input_dim, output_dim)]) 32 | else: 33 | self.linears = nn.ModuleList([nn.Linear(input_dim, hidden_dim)]) 34 | for layer in range(n_layer-2): 35 | self.linears.append(nn.Linear(hidden_dim, hidden_dim)) 36 | self.linears.append(nn.Linear(hidden_dim, int(output_dim))) 37 | 38 | def forward(self, x): 39 | if self.n_layer == 1: 40 | x = self.linears[self.n_layer-1](x) 41 | return x 42 | 43 | for layer in range(self.n_layer-1): 44 | x = self.linears[layer](x) 45 | x = F.relu(x) 46 | 47 | x = self.linears[self.n_layer-1](x) 48 | 49 | return F.log_softmax(x) 50 | 51 | class RegFCOutputModel(nn.Module): 52 | def __init__(self, n_layer, input_dim, hidden_dim, output_dim): 53 | super(RegFCOutputModel, self).__init__() 54 | self.n_layer = n_layer 55 | if self.n_layer == 1: 56 | self.linears = nn.ModuleList([nn.Linear(input_dim, output_dim)]) 57 | else: 58 | self.linears = nn.ModuleList([nn.Linear(input_dim, hidden_dim)]) 59 | for layer in range(n_layer-2): 60 | self.linears.append(nn.Linear(hidden_dim, hidden_dim)) 61 | self.linears.append(nn.Linear(hidden_dim, int(output_dim))) 62 | 63 | def forward(self, x): 64 | if self.n_layer == 1: 65 | x = self.linears[self.n_layer-1](x) 66 | return x 67 | 68 | for layer in range(self.n_layer-1): 69 | x = self.linears[layer](x) 70 | x = F.relu(x) 71 | 72 | x = self.linears[self.n_layer-1](x) 73 | 74 | return x 75 | -------------------------------------------------------------------------------- /graph_algorithms/models/base.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | from torch.nn import LSTM 7 | from torch.autograd import Variable 8 | import itertools 9 | from .util import median_absolute_percentage_error_compute_fn as mape 10 | from .MLPs import * 11 | from .util import * 12 | 13 | DEFAULT_MODE = -1 14 | DEFAULT_PAIR = (-1,-1) 15 | DEFAULT_IND = -1 16 | 17 | cls_criterion = torch.nn.CrossEntropyLoss() 18 | mse_criterion = torch.nn.MSELoss() 19 | lossfun = {'cls': cls_criterion, 'reg': mse_criterion, 'mape': mape} 20 | actfun = {'relu': F.relu, 'tanh': F.tanh, 'sigmoid': F.sigmoid} 21 | 22 | class BasicModel(nn.Module): 23 | def __init__(self, args, name): 24 | super(BasicModel, self).__init__() 25 | self.name=name 26 | self.loss_fn = args.loss_fn 27 | self.activation = args.activation 28 | self.actfunc = actfun[self.activation] 29 | 30 | def train_(self, input_nodes, label): 31 | self.optimizer.zero_grad() 32 | 33 | #print(input_nodes[0].node_features) 34 | 35 | output = self(input_nodes) 36 | pred = output.data.max(1)[1] 37 | loss = lossfun[self.loss_fn](output.view(label.shape), label) 38 | mape_loss = lossfun['mape'](output.view(label.shape), label) 39 | loss.backward() 40 | self.optimizer.step() 41 | 42 | if self.loss_fn != 'cls': 43 | return 0, loss.cpu(), mape_loss 44 | 45 | correct = pred.eq(label.data).cpu().sum() 46 | accuracy = correct.to(dtype=torch.float) * 100. / len(label) 47 | return accuracy, loss 48 | 49 | def test_(self, input_nodes, label, print_info=False): 50 | with torch.no_grad(): 51 | output = self(input_nodes) 52 | loss = lossfun[self.loss_fn](output.view(label.shape), label) 53 | mape_loss = lossfun['mape'](output.view(label.shape), label) 54 | if print_info: 55 | print(output.view(-1), label) 56 | if self.loss_fn != 'cls': 57 | return 0, loss.cpu(), mape_loss 58 | 59 | pred = output.data.max(1)[1] 60 | correct_ind = pred.eq(label.data).cpu() 61 | correct = pred.eq(label.data).cpu().sum() 62 | accuracy = correct.to(dtype=torch.float) * 100. / len(label) 63 | 64 | return accuracy, loss 65 | 66 | def pred_(self, input_nodes): 67 | with torch.no_grad(): 68 | output = self(input_nodes) 69 | pred = output.data.max(1)[1] 70 | return pred 71 | 72 | def save_model(self, epoch): 73 | torch.save(self.state_dict(), 'model/epoch_{}_{:02d}.pth'.format(self.name, epoch)) 74 | 75 | -------------------------------------------------------------------------------- /feedforward/NTK_main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import math 4 | import numpy as np 5 | import NTK 6 | from sklearn.kernel_ridge import KernelRidge 7 | import torch 8 | import pickle 9 | from mpl_toolkits import mplot3d 10 | import matplotlib.pyplot as plt 11 | 12 | np.set_printoptions(threshold=20) 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--data', default = "data", type = str, help = "data file") 16 | parser.add_argument('--files_dir', type=str, default='results', help='the directory to store trained models logs') 17 | parser.add_argument('--filename', type=str, default=None, help='the file which store trained model logs') 18 | parser.add_argument('--loss_fn', type=str, choices=['mse', 'mape'], default='mse', help='various regression loss fucntions') 19 | parser.add_argument('--alpha', type=float, default=0, help='alpha') 20 | 21 | args = parser.parse_args() 22 | 23 | def load_data(data_file): 24 | with open("%s" %data_file, 'rb') as f: 25 | train, val, test = pickle.load(f) 26 | 27 | X, y = [], [] 28 | for data in [train, val, test]: 29 | X.extend(data[0]) 30 | y.extend(data[1]) 31 | 32 | train_fold = list(range(0, len(train[0]))) 33 | val_fold = list(range(len(train[0]), len(train[0]) + len(val[0]))) 34 | test_fold = list(range(len(val[0]) + len(train[0]), len(train[0]) + len(val[0]) + len(test[0]))) 35 | X = np.asarray(X) 36 | y = np.asarray(y) 37 | 38 | return X, y, train_fold, val_fold, test_fold 39 | 40 | def ridge_regression(K1, K2, y1, y2, alpha): 41 | n_val, n_train = K2.shape 42 | clf = KernelRidge(kernel = "precomputed", alpha = 0.0) 43 | clf.fit(K1, y1) 44 | z = clf.predict(K2) 45 | loss = (np.square(z - y2)).mean(axis=ax) 46 | return loss 47 | 48 | # kernel ridge regression 49 | def process(args, K, X, y, train_fold, val_fold): 50 | K1 = K[train_fold][:, train_fold] 51 | K2 = K[val_fold][:, train_fold] 52 | y1 = y[train_fold] 53 | y2 = y[val_fold] 54 | 55 | n_val, n_train = K2.shape 56 | clf = KernelRidge(kernel = "precomputed", alpha = args.alpha) 57 | clf.fit(K1, y1) 58 | z = clf.predict(K2) 59 | loss = (np.square(z - y2)).mean() 60 | 61 | return loss, z 62 | 63 | def mape(y_pred, y): 64 | e = torch.abs(y.view_as(y_pred) - y_pred) / torch.abs(y.view_as(y_pred)) 65 | return 100.0 * torch.median(e) 66 | 67 | mse_criterion = torch.nn.MSELoss() 68 | lossfun = {'mse': mse_criterion, 'mape': mape} 69 | 70 | if not args.filename: 71 | args.filename = args.data.split('/')[-1] 72 | args.filename = args.filename + "_alpha" + str(args.alpha) 73 | print(args.filename) 74 | 75 | if not os.path.exists(args.files_dir): 76 | os.makedirs(args.files_dir) 77 | outf = open('%s/%s.log' %(args.files_dir, args.filename), "w") 78 | print(args, file = outf) 79 | 80 | X, y, train_fold, val_fold, test_fold = load_data(args.data) 81 | print('calculating kernel...') 82 | Ks = NTK.kernel_value_batch(X, d_max=4) 83 | K = NTK.kernel_paper(X) 84 | 85 | print('done calculating kernel') 86 | 87 | train_loss, y_pred_train = process(args, K, X, y, train_fold, train_fold) 88 | val_loss, y_pred_val = process(args, K, X, y, train_fold, val_fold) 89 | test_loss, y_pred_test = process(args, K, X, y, train_fold, test_fold) 90 | 91 | print('train loss: %f, val loss: %f, test_loss: %f' %(train_loss, val_loss, test_loss)) 92 | print('train loss: %f, val loss: %f, test_loss: %f' %(train_loss, val_loss, test_loss), file = outf) 93 | outf.close() 94 | -------------------------------------------------------------------------------- /graph_algorithms/README.md: -------------------------------------------------------------------------------- 1 | # Experiments on architectures that help extrapolation 2 | We validate that **linear algorithmic alignment** helps extrapolation on two Dynamic Programming (DP) tasks: 3 | - Max Degree 4 | - Shortest path 5 | 6 | 7 | ## Reproducing results with one command 8 | We include a script file [`reproduce.sh`](./reproduce.sh) to repoduce the results on extrapolation settings for the graph algorithm experiments in the paper. To be more specific, 9 | ``` 10 | bash ./reproduce.sh 11 | ``` 12 | reproduces the results (pink bars) in Figure 6(a). Continue reading below for more details. 13 | 14 | # Maximum Degree 15 | 16 | ### Data Generation 17 | 18 | Run the following script to generate datasets with identical node features: 19 | 20 | ``` 21 | python sample_scripts/sample_maxdeg_identical.py 22 | ``` 23 | 24 | Run the following script to generate datasets with randomly sampled node features: 25 | 26 | ``` 27 | python sample_scripts/sample_maxdeg_uniform.py 28 | ``` 29 | 30 | The names of the data files are of the form: 31 | ``` 32 | maxdeg_{node_feature_type}_{graph_type}_Ndim{node_dimension}_Train_V{min_num_of_vertices_in_train}_{max_num_of_vertices_in_train}_C{train_node_feature_range}_Test_V{min_num_of_vertices_in_test}_{max_num_of_vertices_in_test}_C{test_node_feature_range}.pickle` 33 | ``` 34 | 35 | For example, the data file 36 | ``` 37 | maxdeg_uniform_general_Ndim3_Train_V20_30_C5_Test_V50_100_C10.pickle 38 | ``` 39 | denotes the dataset with general graphs (Erdős–Rényi random graphs with various edge probability) as the training data, where the number of vertices is sampled uniformly from [20, 30], and each node in the training graph has a three-dimensional real vector as its node feature where each dimension is sampled from [−5.0, 5.0]. Accordingly, the test data consists random graphs where the number of vertices is sampled uniformly from [50, 100], and each node in the training graph has a three-dimensional real vector as its node feature where each dimension is sampled uniformly from [−10.0, 10.0] (a larger range than the ones in the training data). 40 | 41 | 42 | ### Training 43 | A sample command to train a 2-layer GNN with `max` graph pooling and `max` neighbor pooling: 44 | 45 | ``` 46 | CUDA_VISBILE_DEVICES=0 python main.py --model=GNN --n_iter=2 --weight=None --lr=0.01 --fc_output_layer=1 --mlp_layer=2 --hidden_dim=256 --batch_size=64 --graph_pooling_type=max --neighbor_pooling_type=sum --epochs=250 --data=maxdeg_uniform_general_Ndim3_Train_V20_30_C5_Test_V50_100_C10.pickle 47 | ``` 48 | 49 | Note that: 50 | - In max degree, there is no edge features, so the model should be set to `GNN`, and the flag `--weight` should be turn off (i.e., `--weight=None`). 51 | - Run 300 epochs for graphs with uniform node features, and 100 epochs for graphs with identical node features. 52 | 53 | ## Shortest Path 54 | 55 | ### Data Generation 56 | Run the following script to generate datasets. 57 | 58 | ``` 59 | python sample_scripts/sample_shortest_uniform.py 60 | ``` 61 | 62 | The names of the data files are of the form: 63 | ``` 64 | shortestpath_uniform_{graph_type}_Ndim{node_dimension}_maxhop{max_length_of_the_shortest_path}_Train_V{min_num_of_vertices_in_train}_{max_num_of_vertices_in_train}_C{train_node_feature_range}_E{max_edge_weight_in_train}_Test_V{min_num_of_vertices_in_test}_{max_num_of_vertices_in_test}_C{test_node_feature_range}_E{max_edge_weight_in_test} 65 | ``` 66 | 67 | For example, the file 68 | ``` 69 | shortestpath_uniform_general_Ndim1_maxhop3_Train_V20_40_C5_E5_Test_V50_70_C5_E10.pickle 70 | ``` 71 | denotes the dataset with general graphs (Erdős–Rényi random graphs with various edge probability) as the training data, where the number of vertices is sampled uniformly from [20, 40], and the edge weights are uniformly drawn from [1.0, 5.0]. Accordingly, the test data consists random graphs where the number of vertices is sampled uniformly from [50, 70], and the edge weights are uniformly drawn from [1.0, 10.0]. 72 | For all graphs in the dataset, each node feature contains a scalar sampled uniformaly from [−5.0, 5.0] along with two binary indicators, which correspondingly denotes whether the node is a starting node or not, and whether the node is a terminal node or not. 73 | 74 | ### Training 75 | A sample command to train a 3-layer GNN with `min` graph pooling and `min` neighbor pooling: 76 | ``` 77 | CUDA_VISBILE_DEVICES=0 python main.py --model=GNN_E --n_iter=3 --weight=weight --lr=0.005 --fc_output_layer=1 --mlp_layer=2 --hidden_dim=256 --batch_size=32 --graph_pooling_type=min --neighbor_pooling_type=min --epochs=250 --data=shortestpath_uniform_general_Ndim1_maxhop3_Train_V20_40_C5_E5_Test_V50_70_C5_E10.pickle 78 | ``` 79 | 80 | Note that: 81 | - There are edge features, so the model should be set to `GNN_E`, and the flag `--weight` should be turned on (i.e., `--weight=weight`). 82 | 83 | -------------------------------------------------------------------------------- /graph_algorithms/models/gnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pickle 3 | import torch.nn as nn 4 | from .MLPs import * 5 | from .util import * 6 | from .base import BasicModel 7 | from torch_scatter import scatter_max, scatter_min, scatter_mean, scatter_add 8 | 9 | pool_ops = {'sum': scatter_add, 'max': scatter_max, 'mean': scatter_mean, 'min': scatter_min} 10 | 11 | ''' General GNN for any graph 12 | ''' 13 | class GNN(BasicModel): 14 | def __init__(self, args): 15 | super(GNN, self).__init__(args, 'GGNN') 16 | 17 | self.device = args.device 18 | 19 | self.n_iter = args.n_iter 20 | self.mlp_layer = args.mlp_layer 21 | self.hidden_dim = args.hidden_dim 22 | self.fc_output_layer = args.fc_output_layer 23 | self.graph_pooling_type = args.graph_pooling_type 24 | self.neighbor_pooling_type = args.neighbor_pooling_type 25 | 26 | self.node_feature_size = args.node_feature_size 27 | self.answer_size = calc_output_size(args) 28 | self.add_self_loop = args.add_self_loop 29 | 30 | self.MLP0 = torch.nn.ModuleList() 31 | 32 | for layer in range(self.n_iter): 33 | if layer == 0: 34 | self.MLP0.append(MLP(self.mlp_layer, self.node_feature_size*2, self.hidden_dim, self.hidden_dim)) 35 | else: 36 | self.MLP0.append(MLP(self.mlp_layer, self.hidden_dim*2, self.hidden_dim, self.hidden_dim)) 37 | 38 | self.MLP0.append(MLP(self.mlp_layer, self.hidden_dim, self.hidden_dim, self.hidden_dim)) 39 | 40 | if args.loss_fn == 'cls': 41 | self.fcout = FCOutputModel(self.fc_output_layer, self.hidden_dim, self.hidden_dim, self.answer_size) 42 | elif args.loss_fn == 'reg': 43 | self.fcout = RegFCOutputModel(self.fc_output_layer, self.hidden_dim, self.hidden_dim, self.answer_size) 44 | 45 | if args.optimizer == 'Adam': 46 | self.optimizer = optim.Adam(self.parameters(), lr=args.lr, weight_decay=args.decay) 47 | else: 48 | self.optimizer = optim.SGD(self.parameters(), lr=args.lr, weight_decay=args.decay) 49 | 50 | def preprocess_neighbors_list(self, batch_graph): 51 | padded_neighbor_list = [] 52 | padded_self_list = [] 53 | graph_level_list = [] 54 | start_idx = [0] 55 | 56 | for i, graph in enumerate(batch_graph): 57 | start_idx.append(start_idx[i] + len(graph.neighbors)) 58 | graph_level_list.extend([i]*(start_idx[i+1]-start_idx[i])) 59 | 60 | padded_neighbors = [] 61 | padded_self = [] 62 | for j in range(len(graph.neighbors)): 63 | #add off-set values to the neighbor indices 64 | pad = [n + start_idx[i] for n in graph.neighbors[j]] 65 | #add self-loop 66 | if self.add_self_loop: 67 | pad.append(j + start_idx[i]) 68 | padded_self.extend([j + start_idx[i]]*(len(graph.neighbors[j])+1)) 69 | else: 70 | padded_self.extend([j + start_idx[i]]*len(graph.neighbors[j])) 71 | 72 | padded_neighbors.extend(pad) 73 | padded_neighbor_list.extend(padded_neighbors) 74 | padded_self_list.extend(padded_self) 75 | 76 | return torch.LongTensor(padded_neighbor_list).to(self.device), torch.LongTensor(padded_self_list).to(self.device), torch.LongTensor(graph_level_list).to(self.device) 77 | 78 | ''' One iteration/layer of GNNs, call n_layer times in forward 79 | ''' 80 | def reason_step(self, h, layer, padded_neighbor_list, padded_self_list): 81 | x_i = h[padded_neighbor_list] 82 | x_j = h[padded_self_list] 83 | 84 | # x_pair denotes the edges from v_j to v_i 85 | 86 | x_pair = torch.cat((x_i, x_j), dim=-1) 87 | relations = self.MLP0[layer](x_pair) 88 | 89 | # when we aggregate on vertex u, we aggregate all edges whose ending point is u (incoming edges to u) 90 | if self.neighbor_pooling_type in ['max', 'min']: 91 | x, _ = pool_ops[self.neighbor_pooling_type](relations, padded_neighbor_list, dim=0) 92 | else: 93 | x = pool_ops[self.neighbor_pooling_type](relations, padded_neighbor_list, dim=0) 94 | return x 95 | 96 | def forward(self, batch_graph): 97 | x = torch.cat([graph.node_features for graph in batch_graph], 0).to(self.device) 98 | padded_neighbor_list, padded_self_list, graph_level_list = self.preprocess_neighbors_list(batch_graph) 99 | 100 | for layer in range(self.n_iter): 101 | x = self.reason_step(x, layer, padded_neighbor_list, padded_self_list) 102 | #print(x) 103 | x = self.MLP0[self.n_iter](x) 104 | if self.graph_pooling_type in ['max', 'min']: 105 | x, _ = pool_ops[self.graph_pooling_type](x, graph_level_list, dim=0) 106 | else: 107 | x = pool_ops[self.graph_pooling_type](x, graph_level_list, dim=0) 108 | 109 | x = self.fcout(x) 110 | #print(x) 111 | return x 112 | -------------------------------------------------------------------------------- /feedforward/MLPs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | from torch.nn import LSTM 7 | from torch.autograd import Variable 8 | import itertools 9 | from math import * 10 | 11 | DEFAULT_MODE = -1 12 | DEFAULT_PAIR = (-1,-1) 13 | DEFAULT_IND = -1 14 | 15 | def square(x): 16 | return x ** 2 17 | 18 | def mape(y_pred, y): 19 | e = torch.abs(y.view_as(y_pred) - y_pred) / torch.abs(y.view_as(y_pred)) 20 | return 100.0 * torch.median(e) 21 | 22 | cls_criterion = torch.nn.CrossEntropyLoss() 23 | mse_criterion = torch.nn.MSELoss() 24 | lossfun = {'cls': cls_criterion, 'reg': mse_criterion, 'mape': mape} 25 | actfun = {'sin': torch.sin, 'square': square, 'tanh': F.tanh, 'exp': torch.exp, 'log':torch.log, 'relu': F.relu, 'gelu': F.gelu, 'sigmoid': F.sigmoid} 26 | 27 | class BasicModel(nn.Module): 28 | def __init__(self, args, name): 29 | super(BasicModel, self).__init__() 30 | self.name=name 31 | self.loss_fn = args.loss_fn 32 | self.activation = args.activation 33 | self.actfunc = actfun[self.activation] 34 | 35 | def train_(self, input_nodes, label): 36 | self.optimizer.zero_grad() 37 | output = self(input_nodes) 38 | 39 | if self.loss_fn != 'cls': 40 | loss = lossfun[self.loss_fn](output.view(label.shape), label) 41 | 42 | loss.backward() 43 | self.optimizer.step() 44 | 45 | mape_loss = lossfun['mape'](output.view(label.shape), label) 46 | return 0, loss.cpu(), mape_loss.cpu() 47 | else: 48 | loss = lossfun[self.loss_fn](output, label) 49 | 50 | loss.backward() 51 | self.optimizer.step() 52 | 53 | pred = output.data.max(1)[1] 54 | correct = pred.eq(label.data).cpu().sum() 55 | accuracy = correct.to(dtype=torch.float) * 100. / len(label) 56 | 57 | return accuracy, loss.cpu(), 0 58 | 59 | def test_(self, input_nodes, label, print_info=False): 60 | with torch.no_grad(): 61 | output = self(input_nodes) 62 | if print_info: 63 | print(output.view(-1), label) 64 | 65 | if self.loss_fn != 'cls': 66 | loss = lossfun[self.loss_fn](output.view(label.shape), label) 67 | mape_loss = lossfun['mape'](output.view(label.shape), label) 68 | return 0, loss.cpu(), mape_loss.cpu() 69 | else: 70 | loss = lossfun[self.loss_fn](output, label) 71 | pred = output.data.max(1)[1] 72 | correct_ind = pred.eq(label.data).cpu() 73 | correct = pred.eq(label.data).cpu().sum() 74 | accuracy = correct.to(dtype=torch.float) * 100. / len(label) 75 | 76 | return accuracy, loss.cpu(), 0 77 | 78 | def pred_(self, input_nodes): 79 | with torch.no_grad(): 80 | output = self(input_nodes) 81 | pred = output.data.max(1)[1] 82 | return pred 83 | 84 | def save_model(self, epoch): 85 | torch.save(self.state_dict(), 'model/epoch_{}_{:02d}.pth'.format(self.name, epoch)) 86 | 87 | class FeedForward(BasicModel): 88 | def __init__(self, args): 89 | super(FeedForward, self).__init__(args, 'FeedForward') 90 | self.n_layer = args.mlp_layer 91 | self.input_dim, self.hidden_dim, self.output_dim = args.input_dim, args.hidden_dim, args.output_dim 92 | self.option = args.option 93 | 94 | if self.n_layer == 1: 95 | self.linears = nn.ModuleList([nn.Linear(self.input_dim, self.output_dim)]) 96 | else: 97 | self.linears = nn.ModuleList([nn.Linear(self.input_dim, self.hidden_dim)]) 98 | for layer in range(self.n_layer-2): 99 | self.linears.append(nn.Linear(self.hidden_dim, self.hidden_dim)) 100 | self.linears.append(nn.Linear(self.hidden_dim, int(self.output_dim))) 101 | 102 | if self.option == 'A': 103 | for m in self.modules(): 104 | if isinstance(m, torch.nn.Linear): 105 | torch.nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 106 | elif self.option == 'B': 107 | for m in self.modules(): 108 | if isinstance(m, torch.nn.Linear): 109 | torch.nn.init.normal_(m.weight, mean=0.0, std=sqrt(2)) 110 | 111 | if args.optimizer == 'Adam': 112 | self.optimizer = optim.Adam(self.parameters(), lr=args.lr, weight_decay=args.decay) 113 | else: 114 | self.optimizer = optim.SGD(self.parameters(), lr=args.lr, weight_decay=args.decay) 115 | 116 | def forward(self, x): 117 | if self.n_layer == 1: 118 | layer = self.linears[self.n_layer-1] 119 | x = layer(x) 120 | if self.option == 'B': 121 | x = x / sqrt(layer.out_features * 1.0) 122 | return x 123 | 124 | for i in range(self.n_layer-1): 125 | layer = self.linears[i] 126 | x = layer(x) 127 | if not self.activation == 'linear': 128 | x = self.actfunc(x) 129 | if self.option == 'B': 130 | x = x / sqrt(layer.out_features * 1.0) 131 | 132 | x = self.linears[self.n_layer-1](x) 133 | 134 | return x -------------------------------------------------------------------------------- /n_body/in_network.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code modified based on ToruOwO's interactive network. 3 | Original implementation: 4 | https://github.com/ToruOwO/InteractionNetwork-pytorch/blob/master/model.py 5 | """ 6 | 7 | import numpy as np 8 | import torch 9 | import torch.nn as nn 10 | import torch.optim as optim 11 | from torch.autograd import Variable 12 | from MLPs import * 13 | from util import * 14 | 15 | class RelationModel(nn.Module): 16 | def __init__(self, input_size, hidden_size, output_size): 17 | super(RelationModel, self).__init__() 18 | 19 | self.model = nn.Sequential( 20 | nn.Linear(input_size, hidden_size), 21 | nn.ReLU(), 22 | nn.Linear(hidden_size, hidden_size), 23 | nn.ReLU(), 24 | nn.Linear(hidden_size, hidden_size), 25 | nn.ReLU(), 26 | nn.Linear(hidden_size, output_size), 27 | nn.ReLU() 28 | ) 29 | 30 | def forward(self, x): 31 | ''' 32 | Args: 33 | x: [n_relations, input_size] 34 | Returns: 35 | [n_relations, output_size] 36 | ''' 37 | return self.model(x) 38 | 39 | 40 | class ObjectModel(nn.Module): 41 | def __init__(self, input_size, hidden_size, output_size): 42 | super(ObjectModel, self).__init__() 43 | 44 | self.model = nn.Sequential( 45 | nn.Linear(input_size, hidden_size), 46 | nn.ReLU(), 47 | nn.Linear(hidden_size, output_size) 48 | ) 49 | 50 | def forward(self, x): 51 | ''' 52 | Args: 53 | x: [n_objects, input_size] 54 | Returns: 55 | [n_objects, output_size] 56 | 57 | Note: output_size = number of states we want to predict 58 | ''' 59 | return self.model(x) 60 | 61 | class InteractionNetwork(BasicModel): 62 | def __init__(self, args, x_dim=0): 63 | super(InteractionNetwork, self).__init__(args, 'InteractionNetwork') 64 | 65 | self.device = args.device 66 | self.bs = args.batch_size 67 | self.n_objects = args.n_objects 68 | self.n_relations = self.n_objects * (self.n_objects - 1) 69 | self.obj_dim = args.node_feature_size 70 | 71 | self.rel_dim = args.edge_feature_size 72 | self.fe = args.fe 73 | answer_size = args.answer_size 74 | self.eff_dim, hidden_rel_dim, hidden_obj_dim = args.hidden_dim, args.hidden_dim, args.hidden_dim 75 | self.rm = RelationModel(self.obj_dim * 2 + self.rel_dim, hidden_rel_dim, self.eff_dim) 76 | self.om = ObjectModel(self.obj_dim + self.eff_dim + x_dim, hidden_obj_dim, answer_size) # x, y 77 | 78 | receiver_r = np.zeros((self.n_objects, self.n_relations), dtype=float) 79 | sender_r = np.zeros((self.n_objects, self.n_relations), dtype=float) 80 | 81 | count = 0 # used as idx of relations 82 | for i in range(self.n_objects): 83 | for j in range(self.n_objects): 84 | if i != j: 85 | receiver_r[i, count] = 1.0 86 | sender_r[j, count] = 1.0 87 | count += 1 88 | 89 | self.rs = Variable(torch.FloatTensor(sender_r)).to(self.device) 90 | self.rr = Variable(torch.FloatTensor(receiver_r)).to(self.device) 91 | 92 | if args.optimizer == 'Adam': 93 | self.optimizer = optim.Adam(self.parameters(), lr=args.lr, weight_decay=args.decay) 94 | else: 95 | self.optimizer = optim.SGD(self.parameters(), lr=args.lr, weight_decay=args.decay) 96 | 97 | def m(self, obj, ra, bs): 98 | """ 99 | The marshalling function; 100 | computes the matrix products ORr and ORs and concatenates them with Ra 101 | 102 | :param obj: object states 103 | :param rr: receiver relations 104 | :param rs: sender relations 105 | :param ra: relation info 106 | :return: 107 | """ 108 | obj_t = torch.transpose(obj, 1, 2).reshape(-1, self.n_objects) # (bs * obj_dim, n_objects) 109 | orr = obj_t.mm(self.rr).reshape((bs, self.obj_dim, -1)) # (bs, obj_dim, n_relations) 110 | ors = obj_t.mm(self.rs).reshape((bs, self.obj_dim, -1)) # (bs, obj_dim, n_relations) 111 | 112 | return torch.cat([orr, ors, ra], dim = 1) # (bs, obj_dim*2+rel_dim, n_relations) 113 | 114 | def forward(self, input_nodes, bs, x=None): 115 | """ 116 | objects, sender_relations, receiver_relations, relation_info 117 | :param obj: (bs, n_objects, obj_dim) 118 | :param rr: (n_objects, n_relations) 119 | :param rs: (n_objects, n_relations) 120 | :param x: external forces, default to None 121 | :return: 122 | """ 123 | # marshalling function 124 | obj = input_nodes[0] 125 | ra = input_nodes[1] 126 | b = self.m(obj, ra, bs) # shape of b = (bs, obj_dim*2+rel_dim, n_relations) 127 | # relation module 128 | e = self.rm(torch.transpose(b, 1, 2)) # shape of e = (bs, n_relations, eff_dim) 129 | e = torch.transpose(e, 1, 2).reshape(-1, self.n_relations) # shape of e = (bs * eff_dim, n_relations) 130 | # effect aggregator 131 | if x is None: 132 | # shape of a = (bs, obj_dim+eff_dim, n_objects) 133 | a = torch.cat([torch.transpose(obj, 1, 2), e.mm(self.rr.t()).reshape((bs, self.eff_dim, -1))], dim = 1) 134 | 135 | # object module 136 | p = self.om(torch.transpose(a, 1, 2)) # shape of p = (bs, n_objects, answer_size) 137 | 138 | return p -------------------------------------------------------------------------------- /feedforward/sphere_rsquare.py: -------------------------------------------------------------------------------- 1 | """Plot model's prediction for a 1-d model""" 2 | 3 | from argparse import ArgumentParser 4 | 5 | from mpl_toolkits import mplot3d 6 | import matplotlib.pyplot as plt 7 | import torch 8 | import random 9 | 10 | from MLPs import RegFCOutputModel 11 | from main import load_data, cvt_data_axis, tensor_data 12 | import numpy as np 13 | from sklearn.linear_model import LinearRegression 14 | 15 | check = {'val': 1, 'test': 2} 16 | random.seed(1) 17 | n_sample = 100 18 | n_trials = 5000 19 | extend_ratio, adjust = 10, 1.1 20 | 21 | random.seed(1) 22 | def get_range(dataset_name): 23 | line = dataset_name.split('/')[-1] 24 | ks = {} 25 | for item in line.split('_'): 26 | if 'trainr' in item: 27 | r = float(item.replace('trainr', '')) 28 | return r 29 | 30 | def get_data_shape(dataset_name): 31 | line = dataset_name.split('/')[-1] 32 | 33 | for item in line.split('_'): 34 | if 'cube' in item: 35 | return 'cube' 36 | return 'sphere' 37 | 38 | def get_data_settings(dataset_name): 39 | line = dataset_name.split("/")[-1] 40 | ks = {} 41 | for item in line.split('_'): 42 | if '.pickle' in item: 43 | item = item.strip('.pickle') 44 | if 'xdim' in item: 45 | ks['xdim'] = int(item.replace('xdim', '')) 46 | elif 'trainr' in item: 47 | ks['train_range'] = float(item.replace('trainr', '')) 48 | elif 'testr' in item: 49 | ks['test_range'] = float(item.replace('testr', '')) 50 | elif 'valr' in item: 51 | ks['val_range'] = float(item.replace('valr', '')) 52 | elif 'ntrain' in item: 53 | ks['ntrain'] = int(item.replace('ntrain', '')) 54 | elif 'nval' in item: 55 | ks['nval'] = int(item.replace('nval', '')) 56 | elif 'fix' in item: 57 | ks['fix'] = int(item.replace('fix', '')) 58 | elif 'ntest' in item: 59 | ks['ntest'] = int(item.replace('ntest', '')) 60 | elif 'Ar' in item: 61 | ks['Ar'] = float(item.replace('Ar', '')) 62 | elif 'br' in item: 63 | ks['br'] = float(item.replace('br', '')) 64 | elif 'sign' in item: 65 | ks['sign'] = item.replace('sign', '') 66 | return ks 67 | 68 | 69 | def find_k(w, r, shape, adjust=adjust): 70 | if shape == 'cube': 71 | k = adjust * (r / w).abs().min() 72 | elif shape == 'sphere': 73 | k = adjust * r 74 | return k 75 | 76 | def main(): 77 | parser = ArgumentParser() 78 | parser.add_argument('--path', help='path to model') 79 | parser.add_argument('--log', help='path to the training log') 80 | parser.add_argument('--device', type=int, default=0, help='which gpu to use if any (default: 0)') 81 | args = parser.parse_args() 82 | 83 | with open(args.log, 'r') as f: 84 | args_dict = f.readlines()[0] 85 | params = eval(args_dict) 86 | 87 | # Parse path to get model hyper-parameters 88 | args.mlp_layer = params['mlp_layer'] 89 | args.hidden_dim = params['hidden_dim'] 90 | args.input_dim = params['input_dim'] 91 | args.output_dim = params['output_dim'] 92 | args.activation = 'relu' 93 | args.option = params['option'] 94 | 95 | args.loss_fn = params['loss_fn'] 96 | args.optimizer = params['optimizer'] 97 | args.lr = 0 98 | args.decay = 0 99 | args.device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") 100 | 101 | # finding datasets 102 | args.path.replace("data", "models_dir") 103 | dataset_name = params['data'] 104 | 105 | # Load data 106 | datasets = { 107 | 'train': load_data(dataset_name, 0), 108 | 'dev': load_data(dataset_name, 1), 109 | 'test': load_data(dataset_name, 2) 110 | } 111 | dim = datasets['train'][0][0].shape[0] 112 | 113 | # Load model 114 | model = RegFCOutputModel(args).to(args.device) 115 | checkpoint = torch.load(args.path, map_location=args.device) 116 | model.load_state_dict(checkpoint['state_dict']) 117 | model.eval() 118 | 119 | dataset = datasets['test'] 120 | args.batch_size = len(dataset) 121 | dataset = cvt_data_axis(dataset) 122 | input_nodes, label = tensor_data(dataset, 0, args) 123 | data = input_nodes.data.view((-1, dim)) 124 | 125 | r = get_range(dataset_name) 126 | shape = get_data_shape(dataset_name) 127 | data_settings = get_data_settings(dataset_name) 128 | data_settings['shape'] = shape 129 | mean = np.zeros(dim) 130 | cov = np.identity(dim) 131 | 132 | scores, xps = [], np.zeros((n_trials, dim)) 133 | bad_count = 0 134 | for i in range(n_trials): 135 | w = np.random.multivariate_normal(mean, cov) # sample w from a hypersphere 136 | w = torch.Tensor(w) / torch.Tensor(w).norm() # normalize w 137 | k = find_k(w, r, shape) 138 | 139 | sign_x = w.sign() 140 | w_abs = w.abs() 141 | 142 | ks = torch.Tensor(np.linspace(0, extend_ratio*data.max().item(), num=n_sample)).reshape(n_sample) + k 143 | ks = ks.T 144 | ws = w_abs.repeat(n_sample, 1) 145 | ws = ws * ks[:, None] 146 | wks = sign_x * ws 147 | 148 | wks = wks.to(args.device) 149 | with torch.no_grad(): 150 | y0 = model(wks[0]).item() 151 | ys = model(wks).data.cpu().numpy() 152 | 153 | 154 | X = (wks - wks[0]).cpu() 155 | y = ys - y0 156 | reg = LinearRegression().fit(X, y) 157 | score = reg.score(X, y) 158 | if score < 0.9: 159 | #print(wks[0], score) 160 | bad_count += 1 161 | scores.append(score) 162 | 163 | X = scores 164 | 165 | print(f"The mean and median R-square for the input model is: {np.mean(X):.2f} and {np.median(X):.2f}") 166 | 167 | if __name__ == '__main__': 168 | main() 169 | -------------------------------------------------------------------------------- /graph_algorithms/models/gnn_edges.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .MLPs import * 4 | from .util import * 5 | from .base import BasicModel 6 | from torch_scatter import scatter_max, scatter_min, scatter_mean, scatter_add 7 | 8 | pool_ops = {'sum': scatter_add, 'max': scatter_max, 'mean': scatter_mean, 'min': scatter_min} 9 | 10 | ''' General GNN for any graph 11 | ''' 12 | class GNN_E(BasicModel): 13 | def __init__(self, args): 14 | super(GNN_E, self).__init__(args, 'GNN_E') 15 | 16 | self.device = args.device 17 | 18 | self.n_iter = args.n_iter 19 | self.mlp_layer = args.mlp_layer 20 | self.hidden_dim = args.hidden_dim 21 | self.fc_output_layer = args.fc_output_layer 22 | self.graph_pooling_type = args.graph_pooling_type 23 | self.neighbor_pooling_type = args.neighbor_pooling_type 24 | 25 | self.node_feature_size = args.node_feature_size 26 | self.answer_size = calc_output_size(args) 27 | self.edge_feature_size = args.edge_feature_size 28 | self.add_self_loop = args.add_self_loop 29 | self.weight = args.weight 30 | 31 | self.MLP0 = torch.nn.ModuleList() 32 | 33 | for layer in range(self.n_iter): 34 | if layer == 0: 35 | self.MLP0.append(MLP(self.mlp_layer, self.node_feature_size*2 + self.edge_feature_size, self.hidden_dim, self.hidden_dim)) 36 | else: 37 | self.MLP0.append(MLP(self.mlp_layer, self.hidden_dim*2 + self.edge_feature_size, self.hidden_dim, self.hidden_dim)) 38 | 39 | self.MLP0.append(MLP(self.mlp_layer, self.hidden_dim, self.hidden_dim, self.hidden_dim)) 40 | 41 | if args.loss_fn == 'cls': 42 | self.fcout = FCOutputModel(self.fc_output_layer, self.hidden_dim, self.hidden_dim, self.answer_size) 43 | else: 44 | self.fcout = RegFCOutputModel(self.fc_output_layer, self.hidden_dim, self.hidden_dim, self.answer_size) 45 | 46 | if args.optimizer == 'Adam': 47 | self.optimizer = optim.Adam(self.parameters(), lr=args.lr, weight_decay=args.decay) 48 | else: 49 | self.optimizer = optim.SGD(self.parameters(), lr=args.lr, weight_decay=args.decay) 50 | 51 | def preprocess_neighbors_list(self, batch_graph): 52 | padded_neighbor_list = [] 53 | padded_self_list = [] 54 | graph_level_list = [] 55 | edges_weights = [] 56 | start_idx = [0] 57 | 58 | for i, graph in enumerate(batch_graph): 59 | start_idx.append(start_idx[i] + len(graph.neighbors)) 60 | graph_level_list.extend([i]*(start_idx[i+1]-start_idx[i])) 61 | 62 | padded_neighbors = [] 63 | padded_self = [] 64 | for j in range(len(graph.neighbors)): 65 | #add off-set values to the neighbor indices 66 | pad = [n + start_idx[i] for n in graph.neighbors[j]] 67 | 68 | #add self-loop 69 | if self.add_self_loop: 70 | pad.append(j + start_idx[i]) 71 | padded_self.extend([j + start_idx[i]]*(len(graph.neighbors[j])+1)) 72 | else: 73 | padded_self.extend([j + start_idx[i]]*len(graph.neighbors[j])) 74 | 75 | padded_neighbors.extend(pad) 76 | 77 | #add edge weights for edges from j to j's neighbors 78 | if self.weight == 'weight': 79 | edges = [graph.g[j][n][self.weight] for n in graph.neighbors[j]] 80 | else: 81 | edges = [0 for n in graph.neighbors[j]] 82 | 83 | if self.add_self_loop: 84 | edges.append(0) 85 | 86 | edges_weights.extend(edges) 87 | padded_neighbor_list.extend(padded_neighbors) 88 | padded_self_list.extend(padded_self) 89 | 90 | return torch.LongTensor(padded_neighbor_list).to(self.device), torch.LongTensor(padded_self_list).to(self.device), torch.LongTensor(graph_level_list).to(self.device), torch.FloatTensor(edges_weights).to(self.device) 91 | 92 | ''' One iteration/layer of GNNs, call n_layer times in forward 93 | ''' 94 | def reason_step(self, h, layer, padded_neighbor_list, padded_self_list, edges_weights): 95 | #print("reasoning layer", layer, padded_neighbor_list.shape) 96 | x_i = h[padded_neighbor_list] 97 | x_j = h[padded_self_list] 98 | 99 | # x_pair denotes the edges from v_j to v_i 100 | x_pair = torch.cat((x_i, x_j, edges_weights), dim=-1) 101 | 102 | relations = self.MLP0[layer](x_pair) 103 | 104 | # when we aggregate on vertex u, we aggregate all edges whose ending point is u (incoming edges to u) 105 | if self.neighbor_pooling_type in ['max', 'min']: 106 | x, _ = pool_ops[self.neighbor_pooling_type](relations, padded_neighbor_list, dim=0) 107 | else: 108 | x = pool_ops[self.neighbor_pooling_type](relations, padded_neighbor_list, dim=0) 109 | return x 110 | 111 | def forward(self, batch_graph): 112 | x = torch.cat([graph.node_features for graph in batch_graph], 0).to(self.device) 113 | padded_neighbor_list, padded_self_list, graph_level_list, edges_weights = self.preprocess_neighbors_list(batch_graph) 114 | edges_weights = edges_weights.unsqueeze(1) 115 | for layer in range(self.n_iter): 116 | x = self.reason_step(x, layer, padded_neighbor_list, padded_self_list, edges_weights) 117 | 118 | if self.graph_pooling_type in ['max', 'min']: 119 | x, _ = pool_ops[self.graph_pooling_type](x, graph_level_list, dim=0) 120 | else: 121 | x = pool_ops[self.graph_pooling_type](x, graph_level_list, dim=0) 122 | x = self.fcout(x) 123 | return x 124 | -------------------------------------------------------------------------------- /feedforward/README.md: -------------------------------------------------------------------------------- 1 | # How Feedforward Neural Networks Extrapolate 2 | 3 | We provide instructions to reproduce the following categories of experiments on feedforward networks: 4 | - Learning simple nonlinear tasks 5 | - Computation of R^2 of the learned functions (Theorem 1). 6 | - Learning linear tasks with different data geometry (Theorem 2) 7 | - Exact computation in neural tangent kernel regime (Lemma 1). The 3 experiments above are in regular regimes. 8 | 9 | 10 | ## Data generation 11 | This section shows general rules for generating the data files via [`data_generation.py`](./data_generation.py). For each task, we will still provide specific instructions to generate the required datasets. 12 | 13 | The folder [`sample_scripts`](./sample_scripts) contains scripts to generate the datasets. Generated data files will be under the `data` folder by default. Data files are named with the following rule: 14 | 15 | ``` 16 | {target function type}_xdim{input dimension}_item1_trains{shape of training data}_tests{shape of test data}_signno_fix1_testr{test data range}_trainr{training data range}_valr{validation data range}_ntrain{# of training data}_nval{# of validation data}_ntest{# of test data}_Ar1.0_br0.0.pickle 17 | ``` 18 | For example, the data file 19 | 20 | ``` 21 | quadratic_xdim8_item1_trainssphere_testsball_signno_fix1_testr5.0_trainr1.0_valr1.0_ntrain20000_nval1000_ntest20000_Ar1.0_br0.0.pickle 22 | ``` 23 | denotes the dataset with quadratic target function and input dimension = 8. There are 20000 samples in the training set, each uniformaly sampled from an eight-dimensional hyper-sphere with radius 1.0. Similar intepretations for val and test sets. 24 | 25 | 26 | ## Learning simple nonlinear tasks 27 | This section shows training feedforward networks on simple tasks and computing the extrapolation error in MAPE. 28 | 29 | First type the following commands to generate datasets. All generated data files will be under the `data/non-linear` folder by default. 30 | 31 | ``` 32 | python sample_scripts/sample_quadratic 33 | python sample_scripts/sample_cos.py 34 | python sample_scripts/sample_sqrt.py 35 | python sample_scripts/sample_l1.py 36 | ``` 37 | 38 | The following command provides an example to train a ReLU MLP on a generated task: 39 | 40 | ``` 41 | CUDA_VISBILE_DEVICES=0 python main.py --activation=relu --loss_fn=reg --decay=1e-5 --lr=0.0001 --activation=relu --mlp_layer=2 --hidden_dim=128 --batch_size=64 --epochs=500 --optimizer=Adam --data=./data/non-linear/quadratic/quadratic_xdim8_item1_trainssphere_testsball_signno_fix1_testr5.0_trainr1.0_valr1.0_ntrain20000_nval1000_ntest20000_Ar1.0_br0.0.pickle 42 | ``` 43 | 44 | The training logs will be under the `results` folder by default. Script [`MLP_with_relu_on_nonlinear_data.sh`](./MLP_with_relu_on_nonlinear_data.sh) shows more examples. 45 | 46 | 47 | ## Computation of R^2 48 | This instruction shows how to reproduce the 0.99 R^2 for learned functions in the OOD domain, which supports the non-asymptotic linear extrapolation (Theorem 1). 49 | 50 | 51 | First type the following commands to generate datasets. 52 | 53 | ``` 54 | python sample_scripts/sample_quadratic.py 55 | python sample_scripts/sample_cos.py 56 | python sample_scripts/sample_sqrt.py 57 | python sample_scripts/sample_l1.py 58 | ``` 59 | 60 | To compute the R^2 of NN’s learned functions along randomly sampled directions in out-of-distribution domain, we first need a "saved model" and supply the "path to the saved model" along with its training log to [`sphere_rsquare.py`](./sphere_rsquare.py). The following command provides an example of first training a MLP and save the best model (based on validation error) with `--save_model` flag: 61 | 62 | ``` 63 | CUDA_VISBILE_DEVICES=0 python main.py --activation=relu --loss_fn=reg --decay=1e-5 --lr=0.0001 --activation=relu --mlp_layer=2 --hidden_dim=128 --batch_size=64 --epochs=500 --optimizer=Adam --data=./data/non-linear/quadratic/quadratic_xdim8_item1_trainssphere_testsball_signno_fix1_testr5.0_trainr1.0_valr1.0_ntrain20000_nval1000_ntest20000_Ar1.0_br0.0.pickle --save_model 64 | ``` 65 | 66 | The following command computes the R^2 of the saved model in OOD domain: 67 | 68 | ``` 69 | python sphere_rsquare.py --path=./models_dir/linear_xdim16_item1_trainssphere_testsball_signno_fix1_testr200.0_trainr5.0_valr5.0_ntrain10000_nval1000_ntest2000_Ar20.0_br0.0.pickle/FeedForward_lr0.001_actrelu_mlp2_hdim256_idim16_odim1_bs64_optionNone_epoch500_seed2.log/model_best.pth.tar --log=./results/linear_xdim16_item1_trainssphere_testsball_signno_fix1_testr200.0_trainr5.0_valr5.0_ntrain10000_nval1000_ntest2000_Ar20.0_br0.0.pickle/FeedForward_lr0.001_actrelu_mlp2_hdim256_idim16_odim1_bs64_optionNone_epoch500_seed2.log 70 | ``` 71 | 72 | 73 | ## Learning linear tasks with different data geometry 74 | This section shows how the learned function interacts with data geometry (Theorem 2). 75 | 76 | First type the following commands to generate datasets. All generated data files will be under the `data/systematic_linear` (training data covers all directions) or `data/linear_miss_direction` (training data is restricted in some direction) folder by default. 77 | 78 | ``` 79 | python sample_scripts/sample_linear.py 80 | python sample_scripts/sample_direction.py 81 | ``` 82 | The following command provides an example for training. 83 | 84 | ``` 85 | CUDA_VISBILE_DEVICES=0 python main.py --activation=relu --loss_fn=reg --decay=1e-5 --lr=0.001 --activation=relu --mlp_layer=2 --hidden_dim=256 --batch_size=64 --epochs=500 --optimizer=Adam --data=./data/systematic_linear/linear_xdim16_item1_trainssphere_testsball_signno_fix1_testr200.0_trainr5.0_valr5.0_ntrain10000_nval1000_ntest2000_Ar20.0_br0.0.pickle 86 | ``` 87 | 88 | The training logs will be put under the `results` folder. Refer to script [`MLP_with_relu_on_linear_data.sh`](./MLP_with_relu_on_linear_data.sh) for more options. 89 | 90 | 91 | 92 | ## Exact computation with neural tangent kernel 93 | This section validates Lemma 1: provable extrapolation of linear functions with 2d data. We provide implementation for the exact computation of infinitely wide neural networks, i.e., neural tangent kernel. In this set of experiments, the training data contains an orthogonal basis and their opposite vectors. A two-layer neural tangent kernel (NTK) achieve zero test error up to machine precision. 94 | 95 | First type the following command to generate the training data. 96 | 97 | ``` 98 | python data_generation.py --folder='data/ntk_linear' --data='linear' --x_dim=32 --train_shape='basis' --test_shape='ball' --sign=no --fix=1 --test_r=20.0 --train_r=5.0 --val_r=5.0 --n_train=1000 --n_val=1000 --n_test=2000 --A_r=20.0 --b_r=0.0 99 | ``` 100 | 101 | Compute the NTK's performance on the generated dataset with the following command. The code for NTK is adapted from Arora et al 2020. 102 | 103 | ``` 104 | python NTK_main.py --data=data/ntk_linear/linear_xdim32_item1_trainsbasis_testsball_signno_fix1_testr20.0_trainr5.0_valr5.0_ntrain1000_nval1000_ntest2000_Ar20.0_br0.0.pickle 105 | ``` 106 | -------------------------------------------------------------------------------- /n_body/MLPs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | from torch.autograd import Variable 7 | from util import * 8 | 9 | cls_criterion = torch.nn.CrossEntropyLoss() 10 | mse_criterion = torch.nn.MSELoss() 11 | lossfun = {'cls': cls_criterion, 'reg': mse_criterion, 'mape': mape} 12 | actfun = {'relu': F.relu, 'tanh': F.tanh, 'sigmoid': F.sigmoid} 13 | 14 | class BasicModel(nn.Module): 15 | def __init__(self, args, name): 16 | super(BasicModel, self).__init__() 17 | self.name=name 18 | self.loss_fn = args.loss_fn 19 | self.activation = args.activation 20 | self.actfunc = actfun[self.activation] 21 | 22 | def train_(self, input_nodes, label): 23 | self.optimizer.zero_grad() 24 | if self.name == 'InteractionNetwork': 25 | bs = label.shape[0] 26 | output = self(input_nodes, bs) 27 | else: 28 | output = self(input_nodes) 29 | 30 | if self.loss_fn != 'cls': 31 | loss = lossfun[self.loss_fn](output.view(label.shape), label) 32 | 33 | loss.backward() 34 | self.optimizer.step() 35 | 36 | mape_loss = lossfun['mape'](output.view(label.shape), label) 37 | return 0, loss.cpu(), mape_loss.cpu() 38 | else: 39 | loss = lossfun[self.loss_fn](output, label) 40 | 41 | loss.backward() 42 | self.optimizer.step() 43 | 44 | pred = output.data.max(1)[1] 45 | correct = pred.eq(label.data).cpu().sum() 46 | accuracy = correct.to(dtype=torch.float) * 100. / len(label) 47 | 48 | return accuracy, loss.cpu(), 0 49 | 50 | def test_(self, input_nodes, label, print_info=False): 51 | with torch.no_grad(): 52 | if self.name == 'InteractionNetwork': 53 | bs = label.shape[0] 54 | output = self(input_nodes, bs) 55 | else: 56 | output = self(input_nodes) 57 | if print_info: 58 | print(output.view(-1), label) 59 | 60 | if self.loss_fn != 'cls': 61 | loss = lossfun[self.loss_fn](output.view(label.shape), label) 62 | mape_loss = lossfun['mape'](output.view(label.shape), label) 63 | 64 | return 0, loss.cpu(), mape_loss.cpu() 65 | else: 66 | loss = lossfun[self.loss_fn](output, label) 67 | pred = output.data.max(1)[1] 68 | correct_ind = pred.eq(label.data).cpu() 69 | correct = pred.eq(label.data).cpu().sum() 70 | accuracy = correct.to(dtype=torch.float) * 100. / len(label) 71 | 72 | return accuracy, loss.cpu(), 0 73 | 74 | def pred_(self, input_nodes): 75 | with torch.no_grad(): 76 | output = self(input_nodes) 77 | pred = output.data.max(1)[1] 78 | return pred 79 | 80 | def save_model(self, epoch): 81 | torch.save(self.state_dict(), 'model/epoch_{}_{:02d}.pth'.format(self.name, epoch)) 82 | 83 | class MLP(nn.Module): 84 | def __init__(self, n_layer, input_dim, hidden_dim, output_dim): 85 | super(MLP, self).__init__() 86 | 87 | self.n_layer = n_layer 88 | self.linears = nn.ModuleList([nn.Linear(input_dim, hidden_dim)]) 89 | for layer in range(n_layer-2): 90 | self.linears.append(nn.Linear(hidden_dim, hidden_dim)) 91 | self.linears.append(nn.Linear(hidden_dim, output_dim)) 92 | 93 | def forward(self, x): 94 | for layer in range(self.n_layer): 95 | x = self.linears[layer](x) 96 | x = F.relu(x) 97 | 98 | return x 99 | 100 | class FCOutputModel(nn.Module): 101 | def __init__(self, n_layer, input_dim, hidden_dim, output_dim): 102 | super(FCOutputModel, self).__init__() 103 | self.n_layer = n_layer 104 | if self.n_layer == 1: 105 | self.linears = nn.ModuleList([nn.Linear(input_dim, output_dim)]) 106 | else: 107 | self.linears = nn.ModuleList([nn.Linear(input_dim, hidden_dim)]) 108 | for layer in range(n_layer-2): 109 | self.linears.append(nn.Linear(hidden_dim, hidden_dim)) 110 | self.linears.append(nn.Linear(hidden_dim, int(output_dim))) 111 | 112 | def forward(self, x): 113 | if self.n_layer == 1: 114 | x = self.linears[self.n_layer-1](x) 115 | return x 116 | 117 | for layer in range(self.n_layer-1): 118 | #if layer == self.n_layer - 2: 119 | #x = F.dropout(x) 120 | x = self.linears[layer](x) 121 | x = F.relu(x) 122 | 123 | #x = F.dropout(x) 124 | x = self.linears[self.n_layer-1](x) 125 | 126 | return F.log_softmax(x) 127 | 128 | class RegFCOutputModel(nn.Module): 129 | def __init__(self, n_layer, input_dim, hidden_dim, output_dim): 130 | super(RegFCOutputModel, self).__init__() 131 | self.n_layer = n_layer 132 | if self.n_layer == 1: 133 | self.linears = nn.ModuleList([nn.Linear(input_dim, output_dim)]) 134 | else: 135 | self.linears = nn.ModuleList([nn.Linear(input_dim, hidden_dim)]) 136 | for layer in range(n_layer-2): 137 | self.linears.append(nn.Linear(hidden_dim, hidden_dim)) 138 | self.linears.append(nn.Linear(hidden_dim, int(output_dim))) 139 | 140 | def forward(self, x): 141 | if self.n_layer == 1: 142 | x = self.linears[self.n_layer-1](x) 143 | return x 144 | 145 | for layer in range(self.n_layer-1): 146 | #if layer == self.n_layer - 2: 147 | #x = F.dropout(x) 148 | x = self.linears[layer](x) 149 | x = F.relu(x) 150 | 151 | #x = F.dropout(x) 152 | x = self.linears[self.n_layer-1](x) 153 | 154 | return x 155 | 156 | class Bottleneck(nn.Module): 157 | def __init__(self, input_dim, hidden_dim, output_dim): 158 | super(Bottleneck, self).__init__() 159 | self.linears = nn.ModuleList([nn.Linear(input_dim, hidden_dim), nn.Linear(hidden_dim, output_dim)]) 160 | 161 | def forward(self, x): 162 | x_input = x 163 | x = self.linears[0](x) 164 | x = self.linears[1](x) 165 | x = x_input + x 166 | 167 | return x 168 | 169 | class FCOutputModel_SkipConnection(nn.Module): 170 | def __init__(self, n_layer, input_dim, hidden_dim, output_dim, block=Bottleneck): 171 | super(FCOutputModel_SkipConnection, self).__init__() 172 | self.n_layer = n_layer 173 | self.blocks = torch.nn.ModuleList() 174 | self.blocks.append(nn.Linear(input_dim, hidden_dim)) 175 | for layer in range(0, self.n_layer-2, 2): 176 | self.blocks.append(block(hidden_dim, hidden_dim, hidden_dim)) 177 | self.blocks.append(nn.Linear(hidden_dim, output_dim)) 178 | 179 | def forward(self, x): 180 | index = 0 181 | x = self.blocks[index](x) 182 | index = index + 1 183 | 184 | for layer in range(0, self.n_layer-2, 2): 185 | #if layer == (self.n_layer - 4): 186 | #x = F.dropout(x) 187 | x = self.blocks[index](x) 188 | x = F.relu(x) 189 | index = index + 1 190 | 191 | #x = F.dropout(x) 192 | x = self.blocks[index](x) 193 | 194 | return F.log_softmax(x) 195 | -------------------------------------------------------------------------------- /graph_algorithms/maxdeg_generation.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import numpy as np 3 | import numpy.random as nprnd 4 | from random import sample 5 | import os 6 | import math 7 | import pickle 8 | import argparse 9 | import torch 10 | from util import * 11 | import torch.nn.functional as F 12 | from pathlib import Path 13 | 14 | 15 | VAL_RATIO, TEST_RATIO = 0.2, 0.5 16 | 17 | def add_self_loops(graph): 18 | for n in graph.nodes: 19 | graph.add_edge(n, n, weight=0) 20 | 21 | def max_node_degree(graph): 22 | deg_list = [deg for i, deg in graph.degree] 23 | return max(deg_list) 24 | 25 | def add_edge_weights(graph, low, high): 26 | edge_weights = nprnd.randint(low, high+1, len(graph.edges)) 27 | edge_dict = {} 28 | count = 0 29 | for edge in graph.edges: 30 | edge_dict[edge] = edge_weights[count] 31 | count += 1 32 | nx.set_edge_attributes(graph, edge_dict, 'weight') 33 | 34 | def generate_random_trees(n): 35 | return nx.random_tree(n) 36 | 37 | def generate_random_graphs(n, p): 38 | while True: 39 | graph = nx.random_graphs.erdos_renyi_graph(n, p) 40 | if nx.is_connected(graph): 41 | break 42 | return graph 43 | 44 | def generate_complete_graphs(n): 45 | return nx.complete_graph(n) 46 | 47 | def sparse_connected_graph(args, n, p, num_components): 48 | graphs = [] 49 | for i in range(num_components): 50 | graphs.append(generate_random_graphs(n, p)) 51 | 52 | current_graph = graphs[0] 53 | for i in range(1, num_components): 54 | current_graph = nx.disjoint_union(current_graph, graphs[i]) 55 | node1 = nprnd.randint((i-1)*n, i*n) 56 | node2 = nprnd.randint(i*n, (i+1)*n) 57 | current_graph.add_edge(node1, node2) 58 | 59 | return current_graph 60 | 61 | 62 | def load_data(data): 63 | s2vs = [] 64 | for g, ans in data: 65 | neighbors = [] 66 | node_features = [] 67 | for i in sorted(list(g.nodes())): 68 | neighbors.append(list(g.neighbors(i))) 69 | node_features.append(g.nodes[i]['node_features']) 70 | node_features = np.array(node_features) 71 | node_features = torch.from_numpy(node_features).float() 72 | s2vg = S2VGraph(ans, node_features, neighbors, g) 73 | s2vs.append((s2vg, ans)) 74 | return s2vs 75 | 76 | 77 | def generate_graphs_various_nodes(args): 78 | if args.min_n == args.max_n: 79 | n = args.min_n 80 | else: 81 | n = nprnd.randint(args.min_n, args.max_n) 82 | if args.graph_type == 'random_graph': 83 | graph = generate_random_graphs(n, args.p) 84 | elif args.graph_type == 'tree': 85 | graph = generate_random_trees(n) 86 | elif args.graph_type == 'complete': 87 | graph = generate_complete_graphs(n) 88 | elif args.graph_type == 'path': 89 | graph = nx.path_graph(n) 90 | elif args.graph_type == 'ladder': 91 | graph = nx.ladder_graph(n) 92 | elif args.graph_type == 'tree': 93 | graph = nx.random_tree(n) 94 | elif args.graph_type == 'cycle': 95 | graph = nx.cycle_graph(n) 96 | elif args.graph_type == 'star': 97 | graph = nx.star_graph(n) 98 | elif args.graph_type == '4regular': 99 | graph = nx.random_regular_graph(4, n) 100 | else: 101 | print("Invalid graph type.") 102 | 103 | return graph 104 | 105 | ''' 106 | Generate graphs based on parameters. 107 | ''' 108 | def make_graph(args, num_graphs, min_n, max_n, graph_type): 109 | graphs = [] 110 | 111 | if graph_type == 'general': 112 | num_each = int(num_graphs/9) 113 | args.min_n, args.max_n, args.graph_type = min_n, max_n, 'random_graph' 114 | for p in np.linspace(0.1, 0.9, 9): 115 | args.p = p 116 | for i in range(num_each): 117 | graph = generate_graphs_various_nodes(args) 118 | ans = max_node_degree(graph) 119 | graphs.append((graph, ans)) 120 | elif graph_type == 'expander': 121 | args.min_n, args.max_n, args.graph_type = min_n, max_n, 'random_graph' 122 | args.p = 0.8 123 | for i in range(num_graphs): 124 | graph = generate_graphs_various_nodes(args) 125 | ans = max_node_degree(graph) 126 | graphs.append((graph, ans)) 127 | elif graph_type == 'complete' or graph_type == 'path' or graph_type == 'ladder' or graph_type == 'tree': 128 | args.min_n, args.max_n, args.graph_type = min_n, max_n, graph_type 129 | for i in range(num_graphs): 130 | graph = generate_graphs_various_nodes(args) 131 | ans = max_node_degree(graph) 132 | graphs.append((graph, ans)) 133 | elif graph_type == 'cycle' or graph_type == 'star' or graph_type == '4regular': 134 | args.min_n, args.max_n, args.graph_type = min_n, max_n, graph_type 135 | for i in range(num_graphs): 136 | graph = generate_graphs_various_nodes(args) 137 | ans = max_node_degree(graph) 138 | graphs.append((graph, ans)) 139 | 140 | else: 141 | print("Invalid graph type!") 142 | exit() 143 | 144 | return graphs 145 | 146 | ''' 147 | Generate task (G, y) 148 | ''' 149 | def generate_max_degree(args, num_graphs, min_n, max_n, num_colors, graph_type): 150 | graphs = make_graph(args, num_graphs, min_n, max_n, graph_type) 151 | 152 | for graph, _ in graphs: 153 | graph_nodes = len(graph.nodes) 154 | # node * feature 155 | if args.sampling == 'identical': 156 | colors_ind = nprnd.randint(1, num_colors+1, (graph_nodes, args.node_dim)) 157 | else: 158 | colors_ind = nprnd.uniform(-num_colors, num_colors, (graph_nodes, args.node_dim)) 159 | node_dict = {} 160 | ind = 0 161 | for node in graph.nodes: 162 | node_dict[node] = colors_ind[ind] 163 | ind += 1 164 | nx.set_node_attributes(graph, node_dict, 'node_features') 165 | return graphs 166 | 167 | 168 | def main(): 169 | # parameters for graph_generation 170 | parser = argparse.ArgumentParser(description='Graph generation') 171 | parser.add_argument('--graph_type', type=str, default='random_graph', help='select which graph type to generate') 172 | parser.add_argument('--train_min_n', default=20, type=int, help='min number of nodes in the graph') 173 | parser.add_argument('--train_max_n', default=30, type=int, help='max number of nodes in the graph') 174 | parser.add_argument('--test_min_n', default=50, type=int, help='min number of nodes in the graph') 175 | parser.add_argument('--test_max_n', default=100, type=int, help='min number of nodes in the graph') 176 | parser.add_argument('--train_p', default=0.9, type=float, help='probability of edges in erdos_renyi_graph') 177 | parser.add_argument('--test_p', default=0.1, type=float, help='probability of edges in erdos_renyi_graph') 178 | parser.add_argument('--train_color', default=5, type=int, help='number of colors') 179 | parser.add_argument('--test_color', default=10, type=int, help='number of colors') 180 | parser.add_argument('--node_dim', default=3, type=int, help='number of node features') 181 | parser.add_argument('--train_graph', default='4regular', type=str, help='train graph type') 182 | parser.add_argument('--test_graph', default='general', type=str, help='test graph type') 183 | parser.add_argument('--folder', default='data', type=str, help='folder to store the generated data') 184 | parser.add_argument('--sampling', default='uniform', choices=['uniform', 'identical'], type=str, help='sampling rules for the node features') 185 | 186 | parser.add_argument('--min_edge_weight', default=1, type=int, help='min edge weight in the graph') 187 | parser.add_argument('--max_edge_weight', default=30, type=int, help='max edge weight in the graph') 188 | parser.add_argument('--max_hop', default=14, type=int, help='max number of hops expected in the shortest path') 189 | parser.add_argument('--num_levels', default=10, type=int, help='num of possible levels for each node') 190 | parser.add_argument('--num_graphs', default=5000, type=int, help='num of graphs we want in the train dataset') 191 | parser.add_argument('--data', type=str, help='data filename') 192 | parser.add_argument('--random', default=0, type=int, help='random seed') 193 | args = parser.parse_args() 194 | 195 | random_seed = args.random 196 | np.random.seed(random_seed) 197 | random.seed(random_seed) 198 | 199 | Path(args.folder).mkdir(parents=True, exist_ok=True) 200 | output = './%s/%s.pickle' %(args.folder, args.data) 201 | 202 | if not os.path.exists(output): 203 | train = generate_max_degree(args, args.num_graphs, args.train_min_n, args.train_max_n, args.train_color, args.train_graph) 204 | train = load_data(train) 205 | val = generate_max_degree(args, max(int(args.num_graphs*VAL_RATIO),1), args.train_min_n, args.train_max_n, args.train_color, args.train_graph) 206 | val = load_data(val) 207 | test = generate_max_degree(args, max(int(args.num_graphs*TEST_RATIO),1), args.test_min_n, args.test_max_n, args.test_color, args.test_graph) 208 | test = load_data(test) 209 | 210 | with open(output, 'wb') as f: 211 | pickle.dump((train, val, test), f) 212 | 213 | print("data file saved to %s" % output) 214 | 215 | if __name__ == '__main__': 216 | main() 217 | -------------------------------------------------------------------------------- /n_body/physics.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code modified based on jaesik817's physics engine. 3 | Original implementation: 4 | https://github.com/jaesik817/Interaction-networks_tensorflow/blob/master/physics_engine.py 5 | """ 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import os 11 | import pickle 12 | import torch 13 | import random 14 | import math 15 | from pathlib import Path 16 | from math import cos, pi, radians, sin, ceil 17 | import argparse 18 | import matplotlib 19 | import numpy as np 20 | matplotlib.use("Agg") 21 | import matplotlib.pyplot as plt 22 | from torch.autograd import Variable 23 | import matplotlib.animation as manimation 24 | from sklearn.metrics import pairwise_distances 25 | 26 | VAL_RATIO, TEST_RATIO = 0.25, 0.5 27 | 28 | # number of features, [mass, x, y, vx, vy] 29 | num_features = 5 30 | CENTER_MASS = 100.0 31 | 32 | G = 10 ** 5 33 | 34 | def init(args, train, n_body, orbit, ts): 35 | """ 36 | Initialization on just the first time step; fill all other time steps with 0's 37 | :param n_body: number of objects 38 | :param num_features: number of features 39 | :param orbit: whether simulate planet orbit 40 | :return: a numpy vector of shape (ts, n_body, num_features) 41 | """ 42 | data = np.zeros((ts, n_body, num_features), dtype=float) 43 | if args.ext == 'mass' and not train: 44 | center_ratio = args.center_ratio 45 | obj_ratio = args.obj_ratio 46 | else: 47 | center_ratio, obj_ratio = 1.0, 1.0 48 | 49 | if orbit: 50 | data[0][0][0] = CENTER_MASS * center_ratio 51 | data[0][0][1:5] = 0.0 52 | for i in range(1, n_body): 53 | data[0][i][0] = (np.random.rand() * 8.98 + 0.02) * obj_ratio 54 | distance = np.random.rand() * 90.0 + 10.0 55 | theta = np.random.rand() * 360 56 | theta_rad = pi / 2 - radians(theta) 57 | data[0][i][1] = distance * cos(theta_rad) 58 | data[0][i][2] = distance * sin(theta_rad) 59 | data[0][i][3] = -1 * data[0][i][2] / norm(data[0][i][1:3]) * ( 60 | G * data[0][0][0] / norm(data[0][i][1:3]) ** 2) * distance / 1000 61 | data[0][i][4] = data[0][i][1] / norm(data[0][i][1:3]) * ( 62 | G * data[0][0][0] / norm(data[0][i][1:3]) ** 2) * distance / 1000 63 | else: 64 | for i in range(n_body): 65 | data[0][i][0] = np.random.rand() * 8.98 + 0.02 # mass 66 | distance = np.random.rand() * 90.0 + 10.0 67 | theta = np.random.rand() * 360 68 | theta_rad = pi / 2 - radians(theta) 69 | data[0][i][1] = distance * cos(theta_rad) # x pos 70 | data[0][i][2] = distance * sin(theta_rad) # y pos 71 | data[0][i][3] = np.random.rand() * 6.0 - 3.0 # x vel 72 | data[0][i][4] = np.random.rand() * 6.0 - 3.0 # y vel 73 | return data 74 | 75 | def norm(x): 76 | return np.sqrt(np.sum(x ** 2)) 77 | 78 | def get_f(receiver, sender): 79 | """ 80 | Return gravitational force between two bodies (in vector form). 81 | F = G*m1*m2 / r**2 82 | """ 83 | diff = sender[1:3] - receiver[1:3] # difference in (x, y) 84 | distance = norm(diff) 85 | if distance < 1: 86 | distance = 1 87 | return G * receiver[0] * sender[0] / (distance ** 3) * diff 88 | 89 | def calc(cur_state, n_body, dt): 90 | """ 91 | Given current states of n objects, calculate their next states. 92 | :return: a numpy vector of shape (n_body, num_features) 93 | """ 94 | next_state = np.zeros((n_body, num_features), dtype=float) 95 | f_mat = np.zeros((n_body, n_body, 2), dtype=float) 96 | f_sum = np.zeros((n_body, 2), dtype=float) 97 | acc = np.zeros((n_body, 2), dtype=float) 98 | for i in range(n_body): 99 | for j in range(i + 1, n_body): 100 | if j != i: 101 | # i is receiver, j is sender 102 | f = get_f(cur_state[i][:3], cur_state[j][:3]) 103 | f_mat[i, j] += f 104 | f_mat[j, i] -= f 105 | f_sum[i] = np.sum(f_mat[i], axis=0) 106 | acc[i] = f_sum[i] / cur_state[i][0] # F = ma 107 | next_state[i][0] = cur_state[i][0] 108 | next_state[i][3:5] = cur_state[i][3:5] + acc[i] * dt 109 | next_state[i][1:3] = cur_state[i][1:3] + next_state[i][3:5] * dt 110 | return next_state 111 | 112 | def gen(args, train): 113 | """ 114 | Return time-series data for object motions. 115 | :return: a numpy vector of shape (ts, n_body, num_features) 116 | """ 117 | # initialize the first time step 118 | data = init(args, train, args.n_body, args.orbit, args.ts) 119 | 120 | # calculate data for remaining time steps 121 | for i in range(1, args.ts): 122 | data[i] = calc(data[i - 1], args.n_body, args.dt) 123 | return data 124 | 125 | def make_video(xy, filename): 126 | os.system("rm -rf pics/*") 127 | FFMpegWriter = manimation.writers['ffmpeg'] 128 | metadata = dict(title='Movie Test', artist='Matplotlib', comment='Movie support!') 129 | writer = FFMpegWriter(fps=45, metadata=metadata) 130 | fig = plt.figure() 131 | plt.xlim(-200, 200) 132 | plt.ylim(-200, 200) 133 | fig_num = len(xy) 134 | color = ['ro', 'bo', 'go', 'ko', 'yo', 'mo', 'co'] 135 | with writer.saving(fig, filename, fig_num): 136 | for i in range(fig_num): 137 | for j in range(len(xy[0])): 138 | plt.plot(xy[i, j, 1], xy[i, j, 0], color[j % len(color)]) 139 | writer.grab_frame() 140 | 141 | def format_data_in(data): 142 | dataset = [] 143 | for idx in range(len(data) - 1): 144 | obj = data[idx, :, :] 145 | target = data[idx + 1, :, 3:] 146 | position = data[idx + 1, :, :3] 147 | dataset.append((obj, target, position)) 148 | return dataset 149 | 150 | def sample_data(dataset, ratio, flag=False): 151 | indices = list(range(len(dataset))) 152 | random.shuffle(indices) 153 | data_num = math.ceil(ratio*len(dataset)) 154 | if flag: 155 | data_idx = indices[data_num:] 156 | else: 157 | data_idx = indices[:data_num] 158 | data = [dataset[i] for i in data_idx] 159 | return data 160 | 161 | def get_min_dist(objs): 162 | pair_dist = pairwise_distances(objs[:, 1:3], metric='l2') 163 | min_dist = min([pair_dist[i][j] for i in range(len(pair_dist)) for j in range(len(pair_dist[i])) if i != j]) 164 | return min_dist 165 | 166 | def get_max_dist(objs): 167 | pair_dist = pairwise_distances(objs[:, 1:3], metric='l2') 168 | max_dist = max([pair_dist[i][j] for i in range(len(pair_dist)) for j in range(len(pair_dist[i])) if i != j]) 169 | return max_dist 170 | 171 | def generate_videos(args, train): 172 | videos = [] 173 | for i in range(args.scenes): 174 | data = gen(args, train) 175 | dataset = format_data_in(data) 176 | videos.extend(dataset) 177 | return videos 178 | 179 | def filter_videos(args, videos, train): 180 | filtered_videos = [] 181 | for v in videos: 182 | objs = v[0] 183 | if train: 184 | min_dist = get_min_dist(objs) 185 | if min_dist > args.tr_dist: 186 | filtered_videos.append(v) 187 | else: 188 | max_dist = get_max_dist(objs) 189 | if max_dist < args.tt_dist and max_dist > 1: 190 | filtered_videos.append(v) 191 | return filtered_videos 192 | 193 | def generate_data(args, n_samples, train=True): 194 | filtered_videos = [] 195 | while(len(filtered_videos) < n_samples): 196 | videos = generate_videos(args, train) 197 | if args.ext == 'dist': 198 | filtered_videos.extend(filter_videos(args, videos, train)) 199 | elif args.ext == 'mass': 200 | filtered_videos.extend(filter_videos(args, videos, train=True)) 201 | else: 202 | filtered_videos.extend(filter_videos(args, videos, train=True)) 203 | random.shuffle(filtered_videos) 204 | filtered_videos = filtered_videos[:n_samples] 205 | return filtered_videos 206 | 207 | def main(): 208 | parser = argparse.ArgumentParser(description='n-body simulation') 209 | 210 | #Model specifications 211 | parser.add_argument('--n_body', type=int, default=3, help='number of objects') 212 | parser.add_argument('--ts', type=int, default=500, help='number of time steps per video') 213 | parser.add_argument('--dt', type=float, default=0.001, help='time elapse unit') 214 | parser.add_argument('--sample_ratio', type=float, default=0.8, help='time elapse unit') 215 | parser.add_argument('--scenes', type=int, default=100, help='number of videos per dataset') 216 | parser.add_argument('--seed', type=int, default=0, help='random seed for generating the data') 217 | parser.add_argument('--orbit', type=bool, default=True, help='initial condition for the objects') 218 | parser.add_argument('--reverse', type=bool, default=False, help='initial condition for the objects') 219 | parser.add_argument('--output', type=str, default='./data', help='output directory') 220 | parser.add_argument('--n_samples', type=int, default=10000, help='number of samples for training data') 221 | parser.add_argument('--tr_dist', type=float, default=30, help='the min dist of each training data must be > tr_dist') 222 | parser.add_argument('--tt_dist', type=float, default=20, help='the max dist of each test data must be < tt_dist') 223 | parser.add_argument('--center_ratio', type=float, default=1.0, help='ratio of the mass of center object in test data') 224 | parser.add_argument('--obj_ratio', type=float, default=1.0, help='ratio of the mass of surrounding objects in test data') 225 | parser.add_argument('--data', type=str, help='filename of the stored data link file') 226 | parser.add_argument('--ext', type=str, default='dist', choices=['mass', 'dist', 'None'], help='generate data on exrapolating distance') 227 | 228 | args = parser.parse_args() 229 | np.random.seed(args.seed) 230 | random.seed(args.seed) 231 | 232 | train_data = generate_data(args, args.n_samples, train=True) 233 | val_data = generate_data(args, int(args.n_samples*VAL_RATIO), train=True) 234 | test_data = generate_data(args, int(args.n_samples*TEST_RATIO), train=False) 235 | 236 | Path("./%s" %(args.output)).mkdir(parents=True, exist_ok=True) 237 | output = args.data 238 | 239 | with open("./data/%s.pickle" %output, 'wb') as f: 240 | pickle.dump((train_data, val_data, test_data), f) 241 | 242 | print("data saved to %s" %output) 243 | 244 | if __name__ == '__main__': 245 | main() 246 | -------------------------------------------------------------------------------- /graph_algorithms/shortest_generation.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import numpy as np 3 | import numpy.random as nprnd 4 | from random import sample 5 | import os 6 | import math 7 | import pickle 8 | import argparse 9 | import torch 10 | from util import * 11 | import torch.nn.functional as F 12 | from pathlib import Path 13 | 14 | 15 | VAL_RATIO, TEST_RATIO = 0.1, 0.25 16 | 17 | def add_self_loops(graph): 18 | for n in graph.nodes: 19 | graph.add_edge(n, n, weight=0) 20 | 21 | def max_node_degree(graph): 22 | deg_list = [deg for i, deg in graph.degree] 23 | return max(deg_list) 24 | 25 | def add_edge_weights(graph, low, high): 26 | edge_weights = nprnd.randint(low, high+1, len(graph.edges)) 27 | edge_dict = {} 28 | count = 0 29 | for edge in graph.edges: 30 | edge_dict[edge] = edge_weights[count] 31 | count += 1 32 | nx.set_edge_attributes(graph, edge_dict, 'weight') 33 | 34 | def generate_random_trees(n): 35 | return nx.random_tree(n) 36 | 37 | def generate_random_graphs(n, p): 38 | while True: 39 | graph = nx.random_graphs.erdos_renyi_graph(n, p) 40 | if nx.is_connected(graph): 41 | break 42 | return graph 43 | 44 | def generate_complete_graphs(n): 45 | return nx.complete_graph(n) 46 | 47 | def sparse_connected_graph(args, n, p, num_components): 48 | graphs = [] 49 | for i in range(num_components): 50 | graphs.append(generate_random_graphs(n, p)) 51 | 52 | current_graph = graphs[0] 53 | for i in range(1, num_components): 54 | current_graph = nx.disjoint_union(current_graph, graphs[i]) 55 | node1 = nprnd.randint((i-1)*n, i*n) 56 | node2 = nprnd.randint(i*n, (i+1)*n) 57 | current_graph.add_edge(node1, node2) 58 | 59 | return current_graph 60 | 61 | 62 | def load_data(data): 63 | s2vs = [] 64 | for g, ans in data: 65 | neighbors = [] 66 | node_features = [] 67 | for i in sorted(list(g.nodes())): 68 | neighbors.append(list(g.neighbors(i))) 69 | node_features.append(g.nodes[i]['node_features']) 70 | node_features = np.array(node_features) 71 | node_features = torch.from_numpy(node_features).float() 72 | s2vg = S2VGraph(ans, node_features, neighbors, g) 73 | s2vs.append((s2vg, ans)) 74 | return s2vs 75 | 76 | 77 | def generate_graphs_various_nodes(args): 78 | if args.min_n == args.max_n: 79 | n = args.min_n 80 | else: 81 | n = nprnd.randint(args.min_n, args.max_n) 82 | if args.graph_type == 'random_graph': 83 | graph = generate_random_graphs(n, args.p) 84 | elif args.graph_type == 'tree': 85 | graph = generate_random_trees(n) 86 | elif args.graph_type == 'complete': 87 | graph = generate_complete_graphs(n) 88 | elif args.graph_type == 'path': 89 | graph = nx.path_graph(n) 90 | elif args.graph_type == 'ladder': 91 | graph = nx.ladder_graph(n) 92 | elif args.graph_type == 'tree': 93 | graph = nx.random_tree(n) 94 | elif args.graph_type == 'cycle': 95 | graph = nx.cycle_graph(n) 96 | elif args.graph_type == 'star': 97 | graph = nx.star_graph(n) 98 | elif args.graph_type == '4regular': 99 | graph = nx.random_regular_graph(4, n) 100 | else: 101 | print("Invalid graph type.") 102 | 103 | return graph 104 | 105 | def process_graph(graph, args, num_colors, max_weight): 106 | process_edges(graph, args, max_weight) 107 | source, target, ans = create_st(graph, args) 108 | process_nodes(graph, args, source, target, num_colors) 109 | return graph, ans 110 | 111 | ''' 112 | Generate graphs based on parameters. 113 | ''' 114 | def make_graph(args, num_graphs, min_n, max_n, num_colors, graph_type, max_weight): 115 | graphs = [] 116 | if graph_type == 'general': 117 | num_each = int(num_graphs/9) 118 | args.min_n, args.max_n, args.graph_type = min_n, max_n, 'random_graph' 119 | for p in np.linspace(0.1, 0.9, 9): 120 | args.p = p 121 | for i in range(num_each): 122 | graph = generate_graphs_various_nodes(args) 123 | graph, ans = process_graph(graph, args, num_colors, max_weight) 124 | graphs.append((graph, ans)) 125 | elif graph_type == 'expander': 126 | args.min_n, args.max_n, args.graph_type = min_n, max_n, 'random_graph' 127 | args.p = args.rp 128 | for i in range(num_graphs): 129 | graph = generate_graphs_various_nodes(args) 130 | graph, ans = process_graph(graph, args, num_colors, max_weight) 131 | graphs.append((graph, ans)) 132 | elif graph_type == 'complete' or graph_type == 'path' or graph_type == 'ladder' or graph_type == 'tree': 133 | args.min_n, args.max_n, args.graph_type = min_n, max_n, graph_type 134 | for i in range(num_graphs): 135 | graph = generate_graphs_various_nodes(args) 136 | graph, ans = process_graph(graph, args, num_colors, max_weight) 137 | graphs.append((graph, ans)) 138 | elif graph_type == 'cycle' or graph_type == 'star' or graph_type == '4regular': 139 | args.min_n, args.max_n, args.graph_type = min_n, max_n, graph_type 140 | for i in range(num_graphs): 141 | graph = generate_graphs_various_nodes(args) 142 | graph, ans = process_graph(graph, args, num_colors, max_weight) 143 | graphs.append((graph, ans)) 144 | 145 | else: 146 | print("Invalid graph type!") 147 | exit() 148 | 149 | return graphs 150 | 151 | def process_edges(graph, args, max_weight): 152 | if max_weight > 0: 153 | if args.sampling == 'int': 154 | edge_weights = nprnd.randint(1, max_weight+1, len(graph.edges)) 155 | else: 156 | edge_weights = nprnd.uniform(1, max_weight, len(graph.edges)) 157 | edge_dict = {} 158 | idx = 0 159 | for edge in graph.edges: 160 | edge_dict[edge] = edge_weights[idx] 161 | idx += 1 162 | nx.set_edge_attributes(graph, edge_dict, 'weight') 163 | 164 | def process_nodes(graph, args, source, target, num_colors): 165 | graph_nodes = len(graph.nodes) 166 | if args.sampling == 'int': 167 | colors_ind = nprnd.randint(1, num_colors+1, (graph_nodes, args.node_dim)) 168 | else: 169 | colors_ind = nprnd.uniform(-num_colors, num_colors, (graph_nodes, args.node_dim)) 170 | node_dict = {} 171 | ind = 0 172 | for node in graph.nodes: 173 | node_dict[node] = colors_ind[ind].tolist() + [node==source] + [node==target] 174 | ind += 1 175 | nx.set_node_attributes(graph, node_dict, 'node_features') 176 | 177 | 178 | def create_st(graph, args): 179 | while True: 180 | source = nprnd.randint(0, len(graph.nodes)) 181 | distance, path = nx.single_source_dijkstra(graph, source=source, cutoff=args.max_hop, weight=None) 182 | target = sample(distance.keys(), 1)[0] 183 | 184 | count = 0 185 | for p in nx.all_shortest_paths(graph, source, target, weight='weight'): 186 | count += 1 187 | 188 | if count == 1 and len(p) <= (args.max_hop+1) and source != target: 189 | length = nx.dijkstra_path_length(graph, source, target, weight='weight') #distance of the shortest path 190 | break 191 | return source, target, length 192 | 193 | ''' 194 | Generate task (G, y) 195 | ''' 196 | def generate_shortest_paths(args, num_graphs, min_n, max_n, num_colors, graph_type, max_weight): 197 | graphs = make_graph(args, num_graphs, min_n, max_n, num_colors, graph_type, max_weight) 198 | return graphs 199 | 200 | 201 | def main(): 202 | # parameters for graph_generation 203 | parser = argparse.ArgumentParser(description='Graph generation for shortest paths task') 204 | parser.add_argument('--graph_type', type=str, default='random_graph', help='select which graph type to generate') 205 | parser.add_argument('--train_min_n', default=20, type=int, help='min number of nodes in the graph') 206 | parser.add_argument('--train_max_n', default=40, type=int, help='max number of nodes in the graph') 207 | parser.add_argument('--test_min_n', default=50, type=int, help='min number of nodes in the graph') 208 | parser.add_argument('--test_max_n', default=70, type=int, help='min number of nodes in the graph') 209 | parser.add_argument('--train_color', default=5, type=int, help='number of colors') 210 | parser.add_argument('--test_color', default=5, type=int, help='number of colors') 211 | parser.add_argument('--node_dim', default=1, type=int, help='number of node features') 212 | parser.add_argument('--train_graph', default='tree', type=str, help='train graph type') 213 | parser.add_argument('--test_graph', default='general', type=str, help='test graph type') 214 | parser.add_argument('--folder', default='data/shortest', type=str, help='run file') 215 | parser.add_argument('--sampling', default='uniform', type=str, help='uniform or int') 216 | parser.add_argument('--rp', default=0.6, type=float, help='random graph (expander) probability') 217 | 218 | parser.add_argument('--max_weight', default=5, type=int, help='max edge weight in the graph, 0 for no weight') 219 | parser.add_argument('--max_weight_test', default=10, type=int, help='max edge weight in the graph, 0 for no weight') 220 | 221 | parser.add_argument('--max_hop', default=3, type=int, help='max number of hops expected in the shortest path') 222 | parser.add_argument('--num_graphs', default=10000, type=int, help='num of graphs we want in the train dataset') 223 | parser.add_argument('--random', default=0, type=int, help='random seed') 224 | parser.add_argument('--data', type=str, help='data filename') 225 | args = parser.parse_args() 226 | 227 | random_seed = args.random 228 | np.random.seed(random_seed) 229 | random.seed(random_seed) 230 | 231 | Path(args.folder).mkdir(parents=True, exist_ok=True) 232 | output = './%s/%s.pickle' %(args.folder, args.data) 233 | 234 | if not os.path.exists(output): 235 | train = generate_shortest_paths(args, args.num_graphs, args.train_min_n, args.train_max_n, args.train_color, args.train_graph, args.max_weight) 236 | train = load_data(train) 237 | val = generate_shortest_paths(args, max(int(args.num_graphs*VAL_RATIO),1), args.train_min_n, args.train_max_n, args.train_color, args.train_graph, args.max_weight) 238 | val = load_data(val) 239 | test = generate_shortest_paths(args, max(int(args.num_graphs*TEST_RATIO),1), args.test_min_n, args.test_max_n, args.test_color, args.test_graph, args.max_weight_test) 240 | test = load_data(test) 241 | 242 | with open(output, 'wb') as f: 243 | pickle.dump((train, val, test), f) 244 | 245 | print("data file saved to %s" % output) 246 | 247 | if __name__ == '__main__': 248 | main() 249 | -------------------------------------------------------------------------------- /feedforward/data_generation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | import torch.nn.functional as F 4 | import torch.utils.data as Data 5 | import matplotlib.pyplot as plt 6 | from pathlib import Path 7 | import numpy as np 8 | import numpy.random as nprnd 9 | from random import sample 10 | import math 11 | import pickle 12 | import argparse 13 | import torch 14 | import torch.nn.functional as F 15 | import numpy.linalg as LA 16 | import random 17 | 18 | def square(x): 19 | return x ** 2 20 | 21 | actfun = {'sin': np.sin, 'square': square, 'tanh': np.tanh, 'exp': torch.exp, 'log':torch.log, 'relu': F.relu, 'gelu': F.gelu, 'sigmoid': F.sigmoid} 22 | 23 | def sample_cube(lower, upper, x_dim): 24 | return np.random.uniform(lower, upper, x_dim) 25 | 26 | def sample_ball(radius, x_dim): 27 | u = np.random.normal(0, 1, x_dim) 28 | u = u / LA.norm(u) 29 | r = np.random.uniform(0.0, radius) 30 | x = u * r 31 | return x 32 | 33 | def sample_sphere(radius, x_dim): 34 | u = np.random.normal(0, 1, x_dim) 35 | u = u / LA.norm(u) 36 | x = u * radius 37 | return x 38 | 39 | def sample_A(args): 40 | if args.data=='linear' or args.data == 'mix': 41 | A = np.random.uniform(-args.A_r, args.A_r, (args.y_dim, args.x_dim)) 42 | else: 43 | A = np.random.uniform(-args.A_r, args.A_r, (args.x_dim, args.x_dim)) 44 | return A 45 | 46 | def sample_WA(args): 47 | a = np.random.uniform(-args.A_r, args.A_r, (args.y_dim, args.x_dim)) 48 | W = np.random.uniform(-args.A_r, args.A_r, (args.x_dim, args.x_dim)) 49 | return W, a 50 | 51 | def sample_AB(args): 52 | AB = np.random.uniform(-args.A_r, args.A_r, (4, args.n_item)) 53 | return AB 54 | 55 | def process_data(args, A, AB, b, W, a, data_range, n_data, noise, shape, sign, fix): 56 | data = [] 57 | target = [] 58 | data_cnt = 0 59 | idx = 0 60 | turn = 1 61 | prev_x = 0 62 | if args.data in ['sin', 'cos', 'zigzag'] or 'periodic' in args.data: 63 | data_range = data_range * np.pi 64 | if args.data == 'sqrt': 65 | sign = 'p' 66 | fix = args.x_dim 67 | 68 | while data_cnt < n_data: 69 | if shape == 'cube': 70 | if sign == 'z': 71 | x0 = [0.1] 72 | x1 = sample_cube(-data_range, data_range, args.x_dim-1) 73 | x = np.concatenate((x0, x1)) 74 | elif sign == 'p': 75 | x0 = sample_cube(0.0, data_range, fix) 76 | x1 = sample_cube(-data_range, data_range, args.x_dim-fix) 77 | x = np.concatenate((x0, x1)) 78 | elif sign == 'n': 79 | x0 = sample_cube(-data_range, 0.0, fix) 80 | x1 = sample_cube(-data_range, data_range, args.x_dim-fix) 81 | x = np.concatenate((x0, x1)) 82 | else: 83 | x = sample_cube(-data_range, data_range, args.x_dim) 84 | elif shape == 'sphere': 85 | if sign == 'z': 86 | x0 = [0.1] 87 | x1 = sample_sphere(data_range, args.x_dim-1) 88 | x = np.concatenate((x0, x1)) 89 | elif sign == 'p': 90 | x0 = sample_sphere(0.0, data_range, fix) 91 | x1 = sample_sphere(-data_range, data_range, args.x_dim-fix) 92 | x = np.concatenate((x0, x1)) 93 | elif sign == 'n': 94 | x0 = sample_sphere(-data_range, 0.0, fix) 95 | x1 = sample_sphere(-data_range, data_range, args.x_dim-fix) 96 | x = np.concatenate((x0, x1)) 97 | else: 98 | x = sample_sphere(data_range, args.x_dim) 99 | elif shape == 'basis': 100 | if turn == 1: 101 | x = np.zeros(args.x_dim) 102 | x[idx] = data_range 103 | prev_x = x 104 | x = np.matmul(args.Q, x) 105 | else: 106 | x = prev_x 107 | x = -x 108 | x = np.matmul(args.Q, x) 109 | elif shape == 'rd': 110 | if data_cnt > 2 * args.x_dim: 111 | x = sample_sphere(data_range, args.x_dim) 112 | elif turn == 1: 113 | x = np.zeros(args.x_dim) 114 | x[idx] = data_range 115 | prev_x = x 116 | x = np.matmul(args.Q, x) 117 | else: 118 | x = prev_x 119 | x = -x 120 | x = np.matmul(args.Q, x) 121 | else: 122 | if sign == 'z': 123 | x0 = [0.1] 124 | x1 = sample_ball(data_range, args.x_dim-1) 125 | x = np.concatenate((x0, x1)) 126 | else: 127 | x = sample_ball(data_range, args.x_dim) 128 | 129 | data_cnt += 1 130 | turn = -turn 131 | if turn == 1: 132 | idx += 1 133 | if idx == args.x_dim: 134 | idx = 0 135 | 136 | if 'act' in args.data: 137 | x = np.matmul(W, x) 138 | 139 | if args.data=='linear': 140 | y = np.matmul(A, x) 141 | y = y+ b 142 | elif args.data in ['linear_plain']: 143 | y = [x.sum()] 144 | elif args.data=='quadratic': 145 | a = np.matmul(x.T, A) 146 | y = np.matmul(a, x) 147 | y = [y] 148 | elif args.data=='mix': 149 | mid = int(args.x_dim / 2) 150 | x_linear = x[:mid] 151 | x_nonlinear = x[mid:] 152 | x_nonlinear = x_nonlinear ** 2 153 | x_feature = np.concatenate((x_linear, x_nonlinear)) 154 | y = np.matmul(A, x_feature) 155 | elif args.data=='cos': 156 | y = [np.cos(x * np.pi).sum()] 157 | elif args.data=='sin': 158 | y = [np.sin(x * np.pi).sum()] 159 | elif args.data=='l1': 160 | y = [LA.norm(x, ord=1)] 161 | elif args.data=='sqrt': 162 | y = [np.sqrt(x).sum()] 163 | elif args.data=='square': 164 | y = [np.sum(np.square(x))] 165 | elif args.data=='constant': 166 | y = [1] 167 | elif args.data=='tanh': 168 | y = [np.tanh(x).sum()] 169 | elif args.data=='sum': 170 | y = [x.sum()] 171 | elif args.data=='exp': 172 | y = [np.exp(x).sum()] 173 | elif args.data=='log': 174 | y = [np.log(x).sum()] 175 | elif args.data=='gelu': 176 | y = [F.gelu(torch.from_numpy(x)).sum().item()] 177 | elif args.data=='sigmoid': 178 | y = [torch.sigmoid(torch.from_numpy(x)).sum().item()] 179 | elif args.data=='zigzag': 180 | y = [np.arccos(np.cos(x)).sum()] 181 | elif 'periodic' in args.data: 182 | a1, a2, b1, b2 = AB[0,:], AB[1,:], AB[2,:], AB[3,:] 183 | total = 0 184 | for i in range(len(a1)): 185 | if 'sin' in args.data: 186 | total = total + a1[i] * np.sin(a2[i] * x * np.pi) 187 | elif 'cos' in args.data: 188 | total = total + b1[i] * np.cos(b2[i] * x * np.pi) 189 | else: 190 | total = total + a1[i] * np.sin(a2[i] * x * np.pi) + b1[i] * np.cos(b2[i] * x * np.pi) 191 | y = [total.sum()] 192 | elif 'act' in args.data: 193 | active = args.data[3:] 194 | y = [np.matmul(a, actfun[active](x))] 195 | if noise > 0: 196 | y += np.random.normal(0, args.noise, args.y_dim) 197 | data.append(x) 198 | target.append(y) 199 | return (data, target) 200 | 201 | def sample_data(args): 202 | A = sample_A(args) 203 | AB = sample_AB(args) 204 | W, a = sample_WA(args) 205 | b = np.random.uniform(-args.b_r, args.b_r, args.y_dim) 206 | P = np.random.uniform(-1.0, 1.0, (args.x_dim, args.x_dim)) 207 | Q, R = LA.qr(P) 208 | args.Q = Q 209 | 210 | train = process_data(args, A, AB, b, W, a, args.train_r, args.n_train, args.noise, args.train_shape, args.sign, args.fix) 211 | val = process_data(args, A, AB, b, W, a, args.val_r, args.n_val, args.noise, args.train_shape, args.sign, args.fix) 212 | test = process_data(args, A, AB, b, W, a, args.test_r, args.n_test, args.noise, args.test_shape, 'no', 0) 213 | output = '%s/%s_xdim%s_item%s_trains%s_tests%s_sign%s_fix%s_testr%s_trainr%s_valr%s_ntrain%s_nval%s_ntest%s_Ar%s_br%s.pickle' %(args.folder, args.data, args.x_dim, args.n_item, args.train_shape, args.test_shape, args.sign, args.fix, args.test_r, args.train_r, args.val_r, args.n_train, args.n_val, args.n_test, args.A_r, args.b_r) 214 | return (train, val, test), output 215 | 216 | def main(): 217 | parser = argparse.ArgumentParser(description='Data Generation') 218 | parser.add_argument('--data', type=str, default="linear", help='data function') 219 | parser.add_argument('--x_dim', type=int, default=10, help='input x dim') 220 | parser.add_argument('--y_dim', type=int, default=1, help='output y dim') 221 | parser.add_argument('--n_item', type=int, default=1, help='items in periodic') 222 | parser.add_argument('--train_shape', type=str, default='cube', help='cube, sphere, ball') 223 | parser.add_argument('--test_shape', type=str, default='cube', help='cube, sphere, ball') 224 | 225 | parser.add_argument('--A_r', type=float, default=0.1, help='A linear func entry uniform in -A_r..A_r') 226 | parser.add_argument('--b_r', type=float, default=0.5, help='bias uniform in -b_r..b_r') 227 | parser.add_argument('--train_r', type=float, default=0.1, help='train sample range') 228 | parser.add_argument('--val_r', type=float, default=0.1, help='validation sample range') 229 | parser.add_argument('--test_r', type=float, default=2.0, help='test sample range') 230 | parser.add_argument('--n_train', type=int, default=5000, help='# training data samples') 231 | parser.add_argument('--n_val', type=int, default=1000, help='# validation data samples') 232 | parser.add_argument('--n_test', type=int, default=3000, help='# test data samples') 233 | parser.add_argument('--sign', type=str, default='no', help='[no, p, n, z] miss direction(sign) ') 234 | parser.add_argument('--fix', type=int, default=0, help='fix first dimensions sign') 235 | 236 | parser.add_argument('--folder', type=str, default='data', help='data/linear/...') 237 | 238 | parser.add_argument('--ex_dim', type=int, default=0, help='extrapolate dimension') 239 | parser.add_argument('--noise', type=float, default=0, help='Gaussian noise sigma') 240 | parser.add_argument('--random_seed', type=int, default=2, help='random seed') 241 | args = parser.parse_args() 242 | 243 | np.random.seed(args.random_seed) 244 | random.seed(args.random_seed) 245 | 246 | # create folder if not exists 247 | Path(args.folder).mkdir(parents=True, exist_ok=True) 248 | 249 | # extrapolage all dimension by default 250 | if args.ex_dim == 0: 251 | args.ex_dim = args.x_dim 252 | 253 | # select a function to generate data 254 | data, output = sample_data(args) 255 | 256 | with open(output, 'wb') as f: 257 | pickle.dump(data, f) 258 | 259 | print("data saved to %s" % output) 260 | 261 | if __name__ == '__main__': 262 | main() 263 | -------------------------------------------------------------------------------- /feedforward/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import pickle 4 | import random 5 | import numpy as np 6 | import shutil 7 | import torch 8 | from torch.autograd import Variable 9 | 10 | from MLPs import * 11 | import logging 12 | import math 13 | 14 | random_seed = 2 15 | torch.manual_seed(random_seed) 16 | torch.cuda.manual_seed(random_seed) 17 | torch.backends.cudnn.deterministic = True 18 | torch.backends.cudnn.benchmark = False 19 | np.random.seed(random_seed) 20 | random.seed(random_seed) 21 | 22 | best_prec, best_loss, best_model_test_acc, best_model_test_loss, best_model_mape_loss = 0.0, 1e+8*1.0, 0.0, 1e+8*1.0, 1e+8*1.0 23 | is_best = False 24 | best_epoch = 0 25 | 26 | def save_checkpoint(state, is_best, epoch, args): 27 | if not is_best: 28 | return 29 | """Saves checkpoint to disk""" 30 | data_path = args.data.split("/")[-1] 31 | directory = "models_dir/%s/%s/"%(data_path, args.filename) 32 | if not os.path.exists(directory): 33 | os.makedirs(directory) 34 | filename = directory + 'model_best.pth.tar' 35 | torch.save(state, filename) 36 | 37 | def cvt_data_axis(dataset): 38 | data = [] 39 | label = [] 40 | for d, ans in dataset: 41 | data.append(d) 42 | label.append(ans) 43 | return (data, label) 44 | 45 | def tensor_data(data, i, args): 46 | nodes = torch.FloatTensor(data[0][args.batch_size*i:args.batch_size*(i+1)]).to(args.device) 47 | if args.loss_fn == 'cls': 48 | ans = torch.LongTensor(data[1][args.batch_size*i:args.batch_size*(i+1)]).to(args.device) 49 | return nodes, ans 50 | else: 51 | ans = torch.FloatTensor(data[1][args.batch_size*i:args.batch_size*(i+1)]).to(args.device) 52 | return nodes.unsqueeze(dim=1), ans.unsqueeze(dim=1) 53 | 54 | def load_data(datafile, mode): 55 | data = [] 56 | label = [] 57 | with open(datafile, 'rb') as f2: 58 | dataset = pickle.load(f2)[mode] 59 | data.extend(dataset[0]) 60 | label.extend(dataset[1]) 61 | dataset = [] 62 | for i in range(len(data)): 63 | dataset.append( (data[i], label[i])) 64 | return dataset 65 | 66 | def train(epoch, dataset, args, model): 67 | model.train() 68 | train_size = len(dataset) 69 | bs = args.batch_size 70 | random.shuffle(dataset) 71 | 72 | dataset = cvt_data_axis(dataset) 73 | 74 | running_loss, running_loss_mape = 0.0, 0.0 75 | accuracys = [] 76 | losses, losses_mape = [], [] 77 | 78 | batch_runs = max(1, train_size // bs) 79 | for batch_idx in range(batch_runs): 80 | input_nodes, label = tensor_data(dataset, batch_idx, args) 81 | accuracy, loss, mape_loss = model.train_(input_nodes, label) 82 | running_loss += loss 83 | running_loss_mape += mape_loss 84 | 85 | accuracys.append(accuracy) 86 | losses.append(loss) 87 | losses_mape.append(mape_loss) 88 | 89 | if (batch_idx + 1) % args.log_interval == 0: 90 | print('Train Epoch: {} [{}/{} ({:.2f}%)] loss: {:.7f}\t'.format(epoch, batch_idx * bs, train_size, 100 * batch_idx * bs / train_size, running_loss/(1 * args.log_interval))) 91 | logging.info('Train Epoch: {} [{}/{} ({:.2f}%)] loss: {:.7f} \t'.format(epoch, batch_idx * bs, train_size, 100 * batch_idx * bs / train_size, running_loss/(1 * args.log_interval))) 92 | running_loss = 0.0 93 | 94 | avg_accuracy = sum(accuracys) *1.0 / len(accuracys) 95 | avg_losses = sum(losses) *1.0 / len(losses) 96 | avg_losses_mape = sum(losses_mape) *1.0 / len(losses_mape) 97 | print('\nTrain set: accuracy: {:.2f}% \t | loss: {:.7f} \t | \t mape: {:.7f}'.format(avg_accuracy, avg_losses, avg_losses_mape)) 98 | logging.info('\nTrain set: accuracy: {:.2f}% \t | loss: {:.7f} \t\t | \t mape: {:.7f}'.format(avg_accuracy, avg_losses, avg_losses_mape)) 99 | 100 | def validate(epoch, dataset, args, model): 101 | global is_best, best_prec, best_loss 102 | 103 | model.eval() 104 | test_size = len(dataset) 105 | bs = args.batch_size 106 | dataset = cvt_data_axis(dataset) 107 | 108 | accuracys = [] 109 | losses, mape_losses = [], [] 110 | batch_runs = max(1, test_size // bs) 111 | for batch_idx in range(batch_runs): 112 | input_nodes, label = tensor_data(dataset, batch_idx, args) 113 | accuracy, loss, mape_loss = model.test_(input_nodes, label) 114 | accuracys.append(accuracy) 115 | losses.append(loss) 116 | mape_losses.append(mape_loss) 117 | 118 | avg_accuracy = sum(accuracys) *1.0 / len(accuracys) 119 | avg_losses = sum(losses) *1.0 / len(losses) 120 | avg_losses_mape = sum(mape_losses) *1.0 / len(mape_losses) 121 | print('Validation set: accuracy: {:.2f}% | loss: {:.7f} \t | \t mape: {:.7f}'.format(avg_accuracy, avg_losses, avg_losses_mape)) 122 | logging.info('Validation set: accuracy: {:.2f}% | loss: {:.7f} \t | \t mape: {:.7f}'.format(avg_accuracy, avg_losses, avg_losses_mape)) 123 | 124 | if args.loss_fn == 'cls': 125 | is_best = avg_accuracy > best_prec 126 | else: 127 | is_best = avg_losses < best_loss 128 | best_prec = max(avg_accuracy, best_prec) 129 | best_loss = min(avg_losses, best_loss) 130 | 131 | def test(epoch, dataset, args, model): 132 | global is_best, best_model_test_acc, best_model_test_loss, best_epoch, best_model_mape_loss 133 | 134 | model.eval() 135 | test_size = len(dataset) 136 | bs = args.batch_size 137 | dataset = cvt_data_axis(dataset) 138 | 139 | accuracys = [] 140 | losses, mape_losses = [], [] 141 | batch_runs = max(1, test_size // bs) 142 | for batch_idx in range(batch_runs): 143 | input_nodes, label = tensor_data(dataset, batch_idx, args) 144 | accuracy, loss, mape_loss = model.test_(input_nodes, label) 145 | accuracys.append(accuracy) 146 | losses.append(loss) 147 | mape_losses.append(mape_loss) 148 | 149 | avg_accuracy = sum(accuracys) *1.0 / len(accuracys) 150 | avg_losses = sum(losses) *1.0 / len(losses) 151 | avg_losses_mape = sum(mape_losses) *1.0 / len(mape_losses) 152 | 153 | print('Test set: accuracy: {:.2f}% \t | loss: {:.7f} \t | \t mape: {:.7f} \n'.format(avg_accuracy, avg_losses, avg_losses_mape)) 154 | logging.info('Test set: accuracy: {:.2f}% \t | loss: {:.7f} \t | \t mape: {:.7f} \n'.format(avg_accuracy, avg_losses, avg_losses_mape)) 155 | 156 | if is_best: 157 | best_model_test_acc = avg_accuracy 158 | best_model_test_loss = avg_losses 159 | best_model_mape_loss = avg_losses_mape 160 | best_epoch = epoch 161 | 162 | if epoch%10 == 0: 163 | print('************ Best model\'s test acc: {:.2f}%, test loss: {:.7f}, mape: {:.7f} (best model is from epoch {}) ************\n'.format(best_model_test_acc, best_model_test_loss, best_model_mape_loss, best_epoch)) 164 | logging.info('************ Best model\'s test acc: {:.2f}%, test loss: {:.7f}, mape: {:.7f} (best model is from epoch {}) ************\n'.format(best_model_test_acc, best_model_test_loss, best_model_mape_loss, best_epoch)) 165 | 166 | def setup_logs(args): 167 | file_dir = "results" 168 | if not args.no_log: 169 | data_path = args.data.split("/")[-1] 170 | files_dir = '%s/%s' %(file_dir, data_path) 171 | args.files_dir = files_dir 172 | args.filename = '%s_lr%s_act%s_mlp%s_hdim%s_idim%s_odim%s_bs%s_option%s_epoch%d_seed%d.log' %(args.model, args.lr, args.activation, args.mlp_layer, args.hidden_dim, args.input_dim, args.output_dim, args.batch_size, args.option, args.epochs, random_seed) 173 | 174 | if not os.path.exists(files_dir): 175 | os.makedirs(files_dir) 176 | mode = 'w+' 177 | if args.resume: 178 | mode = 'a+' 179 | logging.basicConfig(format='%(message)s', 180 | level=logging.INFO, 181 | datefmt='%m-%d %H:%M', 182 | filename="%s/%s" %(args.files_dir, args.filename), 183 | filemode='w+') 184 | 185 | print(vars(args)) 186 | logging.info(vars(args)) 187 | 188 | def resume(args, model): 189 | if os.path.isfile(args.resume): 190 | print("=> loading checkpoint '{}'".format(args.resume)) 191 | logging.info("=> loading checkpoint '{}'".format(args.resume)) 192 | 193 | checkpoint = torch.load(args.resume) 194 | 195 | args.start_epoch = checkpoint['epoch'] 196 | best_prec = checkpoint['best_prec'] 197 | best_model_test_acc = checkpoint['best_model_test_acc'] 198 | best_model_test_loss = checkpoint['best_model_test_loss'] 199 | best_model_mape_loss = checkpoint['best_model_mape_loss'] 200 | model.load_state_dict(checkpoint['state_dict']) 201 | model.optimizer.load_state_dict(checkpoint['optimizer']) 202 | print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) 203 | logging.info("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) 204 | else: 205 | print("=> no checkpoint found at '{}'".format(args.resume)) 206 | logging.info("=> no checkpoint found at '{}'".format(args.resume)) 207 | return model 208 | 209 | 210 | def main(): 211 | parser = argparse.ArgumentParser() 212 | 213 | #Model specifications 214 | parser.add_argument('--model', type=str, default='FeedForward', help='choose which model') 215 | parser.add_argument('--activation', type=str, choices=['sin', 'square', 'tanh', 'exp', 'log', 'relu', 'gelu', 'linear','sigmoid'], default='relu', help='activation function') 216 | parser.add_argument('--option', type=str, choices=['A', 'B', 'None'], default='None', help='initialization options') 217 | parser.add_argument('--mlp_layer', type=int, default=3, help='number of layers for MLPs') 218 | parser.add_argument('--hidden_dim', type=int, default=256, help='feature hidden dimension of MLPs') 219 | parser.add_argument('--input_dim', type=int, default=0, help='input dim') 220 | parser.add_argument('--output_dim', type=int, default=1, help='output dim') 221 | 222 | # Training settings 223 | parser.add_argument('--device', type=int, default=0, help='which gpu to use if any') 224 | parser.add_argument('--seed', type=int, default=1, help='random seed') 225 | parser.add_argument('--resume', type=str, help='resume from model stored') 226 | parser.add_argument('--lr', type=float, default=0.01, help='learning rate') 227 | parser.add_argument('--decay', type=float, default=1e-5, help='weight decay') 228 | parser.add_argument('--batch_size', type=int, default=32, help='input batch size for training') 229 | parser.add_argument('--epochs', type=int, default=250, help='number of epochs to train') 230 | parser.add_argument('--loss_fn', type=str, choices=['cls', 'reg', 'mape'], default='reg', help='classification or regression loss') 231 | parser.add_argument('--optimizer', type=str, choices=['Adam', 'SGD'], default='Adam', help='Adam or SGD') 232 | 233 | # Logging and storage settings 234 | parser.add_argument('--save_model', action='store_true', default=False, help='flag to store the training models') 235 | parser.add_argument('--no_log', action='store_true', default=False, help='flag to disable logging of results') 236 | parser.add_argument('--log_interval', type=int, default=100, help='how many batches to wait before logging training status') 237 | parser.add_argument('--filename', type=str, default='', help='file to store the training log') 238 | parser.add_argument('--files_dir', type=str, default='', help='the directory to store trained models logs') 239 | 240 | # Data settings 241 | parser.add_argument('--data', type=str, default='', help='path to the data file') 242 | 243 | args = parser.parse_args() 244 | device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") 245 | 246 | torch.manual_seed(args.seed) 247 | if torch.cuda.is_available(): 248 | torch.cuda.manual_seed_all(0) 249 | 250 | train_datasets = load_data(args.data, 0) 251 | validation_datasets = load_data(args.data, 1) 252 | test_datasets = load_data(args.data, 2) 253 | 254 | args.input_dim = len(train_datasets[0][0]) 255 | if args.loss_fn == 'cls': 256 | args.output_dim = 2 # binary classification 257 | else: 258 | args.output_dim = len(train_datasets[0][1]) 259 | 260 | setup_logs(args) 261 | 262 | model = FeedForward(args).to(device) 263 | 264 | scheduler = torch.optim.lr_scheduler.StepLR(model.optimizer, step_size=50, gamma=0.5) 265 | 266 | bs = args.batch_size 267 | 268 | model_dirs = './models_dir' 269 | try: 270 | os.makedirs(model_dirs) 271 | except: 272 | print('directory {} already exists'.format(model_dirs)) 273 | 274 | if args.epochs == 0: 275 | epoch = 0 276 | validate(epoch, validation_datasets, args, model) 277 | test(epoch, test_datasets, args, model) 278 | args.epochs = -1 279 | 280 | for epoch in range(1, args.epochs + 1): 281 | if epoch == 1: 282 | is_best = True 283 | train(epoch, train_datasets, args, model) 284 | validate(epoch, validation_datasets, args, model) 285 | test(epoch, test_datasets, args, model) 286 | scheduler.step() 287 | if is_best and args.save_model: 288 | save_checkpoint({ 289 | 'epoch': epoch + 1, 290 | 'arch': args.model, 291 | 'args': args, 292 | 'state_dict': model.state_dict(), 293 | 'best_prec': best_prec, 294 | 'best_model_test_acc': best_model_test_acc, 295 | 'best_model_test_loss': best_model_test_loss, 296 | 'best_model_mape_loss': best_model_mape_loss, 297 | 'optimizer' : model.optimizer.state_dict(), 298 | }, is_best, epoch, args) 299 | 300 | print('************ Best model\'s test acc: {:.2f}%, test loss: {:.7f} throughout training (best model is from epoch {}) ************\n'.format(best_model_test_acc, best_model_test_loss, best_epoch)) 301 | logging.info('************ Best model\'s test acc: {:.2f}%, test loss: {:.7f} throughout training (best model is from epoch {}) ************\n'.format(best_model_test_acc, best_model_test_loss, best_epoch)) 302 | 303 | if __name__ == '__main__': 304 | main() 305 | -------------------------------------------------------------------------------- /graph_algorithms/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import pickle 4 | import random 5 | import numpy as np 6 | import shutil 7 | import torch 8 | import networkx as nx 9 | from torch.autograd import Variable 10 | 11 | from models.gnn import GNN 12 | from models.gnn_edges import GNN_E 13 | from util import * 14 | import logging 15 | import math 16 | 17 | random_seed = 2 18 | torch.manual_seed(random_seed) 19 | torch.cuda.manual_seed(random_seed) 20 | torch.backends.cudnn.deterministic = True 21 | torch.backends.cudnn.benchmark = False 22 | np.random.seed(random_seed) 23 | random.seed(random_seed) 24 | 25 | best_prec, best_loss, best_model_test_acc, best_model_test_loss, best_model_mape_loss = 0.0, 1e+8*1.0, 0.0, 1e+8*1.0, 1e+8*1.0 26 | is_best = False 27 | best_epoch = 0 28 | 29 | model_types = {'GNN': GNN, 'GNN_E': GNN_E} 30 | 31 | def save_checkpoint(state, is_best, epoch, args): 32 | if not is_best: 33 | return 34 | """Saves checkpoint to disk""" 35 | 36 | directory = "models_dir/%s/%s/"%(args.data, args.filename) 37 | if not os.path.exists(directory): 38 | os.makedirs(directory) 39 | filename = directory + 'model_best.pth.tar' 40 | torch.save(state, filename) 41 | 42 | def cvt_data_axis(dataset): 43 | data = [] 44 | label = [] 45 | for d, ans in dataset: 46 | data.append(d) 47 | label.append(ans) 48 | return (data, label) 49 | 50 | def tensor_data(data, i, args): 51 | nodes = data[0][args.batch_size*i:args.batch_size*(i+1)] 52 | if args.loss_fn == 'cls': 53 | ans = torch.LongTensor(data[1][args.batch_size*i:args.batch_size*(i+1)]).to(args.device) 54 | else: 55 | ans = torch.FloatTensor(data[1][args.batch_size*i:args.batch_size*(i+1)]).to(args.device) 56 | return nodes, ans 57 | 58 | def train(epoch, dataset, args, model): 59 | model.train() 60 | train_size = len(dataset) 61 | bs = args.batch_size 62 | random.shuffle(dataset) 63 | 64 | data = cvt_data_axis(dataset) 65 | running_loss, running_loss_mape = 0.0, 0.0 66 | accuracys = [] 67 | losses, losses_mape = [], [] 68 | batch_runs = max(1, train_size // bs) 69 | for batch_idx in range(batch_runs): 70 | input_nodes, label = tensor_data(data, batch_idx, args) 71 | accuracy, loss, mape_loss = model.train_(input_nodes, label) 72 | running_loss += loss.item() 73 | running_loss_mape += mape_loss.item() 74 | 75 | accuracys.append(accuracy) 76 | losses.append(loss) 77 | losses_mape.append(mape_loss) 78 | 79 | if (batch_idx + 1) % args.log_interval == 0: 80 | print('Train Epoch: {} [{}/{} ({:.2f}%)] accuracy: {:.2f}%, loss: {:.7f}, mape: {:.7f}'.format(epoch, batch_idx * bs, train_size, 100 * batch_idx * bs / train_size, accuracy, running_loss/(1 * args.log_interval), running_loss_mape/(1 * args.log_interval))) 81 | logging.info('Train Epoch: {} [{}/{} ({:.2f}%)] accuracy: {:.2f}%, loss: {:.7f}, mape: {:.7f}'.format(epoch, batch_idx * bs, train_size, 100 * batch_idx * bs / train_size, accuracy, running_loss/(1 * args.log_interval), running_loss_mape/(1 * args.log_interval))) 82 | running_loss, running_loss_mape = 0.0, 0.0 83 | 84 | avg_accuracy = sum(accuracys) *1.0 / len(accuracys) 85 | avg_losses = sum(losses) *1.0 / len(losses) 86 | avg_losses_mape = sum(losses_mape) *1.0 / len(losses_mape) 87 | 88 | print('\nTrain set: accuracy: {:.2f}%, loss: {:.7f}, mape: {:.7f}'.format(avg_accuracy, avg_losses, avg_losses_mape)) 89 | logging.info('\nTrain set: accuracy: {:.2f}%, loss: {:.7f}, mape: {:.7f}'.format(avg_accuracy, avg_losses, avg_losses_mape)) 90 | 91 | def validate(epoch, dataset, args, model): 92 | global is_best, best_prec, best_loss 93 | 94 | model.eval() 95 | test_size = len(dataset) 96 | bs = args.batch_size 97 | data = cvt_data_axis(dataset) 98 | 99 | accuracys = [] 100 | losses, losses_mape = [], [] 101 | for batch_idx in range(test_size // bs): 102 | input_nodes, label = tensor_data(data, batch_idx, args) 103 | accuracy, loss, mape_loss = model.test_(input_nodes, label) 104 | accuracys.append(accuracy) 105 | losses.append(loss) 106 | losses_mape.append(mape_loss) 107 | 108 | avg_accuracy = sum(accuracys) *1.0 / len(accuracys) 109 | avg_losses = sum(losses) *1.0 / len(losses) 110 | avg_losses_mape = sum(losses_mape) *1.0 / len(losses_mape) 111 | print('Validation set: accuracy: {:.2f}%, loss: {:.7f}, mape: {:.7f}'.format(avg_accuracy, avg_losses, avg_losses_mape)) 112 | logging.info('Validation set: accuracy: {:.2f}%, loss: {:.7f}, mape: {:.7f}'.format(avg_accuracy, avg_losses, avg_losses_mape)) 113 | 114 | if args.loss_fn == 'cls': 115 | is_best = avg_accuracy > best_prec 116 | else: 117 | is_best = avg_losses < best_loss 118 | best_prec = max(avg_accuracy, best_prec) 119 | best_loss = min(avg_losses, best_loss) 120 | 121 | def test(epoch, dataset, args, model): 122 | global is_best, best_model_test_acc, best_model_test_loss, best_epoch, best_model_mape_loss 123 | 124 | model.eval() 125 | test_size = len(dataset) 126 | bs = args.batch_size 127 | data = cvt_data_axis(dataset) 128 | 129 | accuracys = [] 130 | losses, losses_mape = [], [] 131 | for batch_idx in range(test_size // bs): 132 | input_nodes, label = tensor_data(data, batch_idx, args) 133 | accuracy, loss, mape_loss = model.test_(input_nodes, label) 134 | 135 | accuracys.append(accuracy) 136 | losses.append(loss) 137 | losses_mape.append(mape_loss) 138 | 139 | avg_accuracy = sum(accuracys) *1.0 / len(accuracys) 140 | avg_losses = sum(losses) *1.0 / len(losses) 141 | avg_losses_mape = sum(losses_mape) *1.0 / len(losses_mape) 142 | 143 | print('Test set: accuracy: {:.2f}%, loss: {:.7f}, mape: {:.7f} \n'.format(avg_accuracy, avg_losses, avg_losses_mape)) 144 | logging.info('Test set: accuracy: {:.2f}%, loss: {:.7f}, mape: {:.7f} \n'.format(avg_accuracy, avg_losses, avg_losses_mape)) 145 | 146 | if is_best: 147 | best_model_test_acc = avg_accuracy 148 | best_model_test_loss = avg_losses 149 | best_model_mape_loss = avg_losses_mape 150 | best_epoch = epoch 151 | 152 | if epoch%10 == 0: 153 | print('************ Best model\'s test accuracy: {:.2f}%, test loss: {:.7f}, mape: {:.7f} (best model is from epoch {}) ************\n'.format(best_model_test_acc, best_model_test_loss, best_model_mape_loss, best_epoch)) 154 | logging.info('************ Best model\'s test accuracy: {:.2f}%, test loss: {:.7f}, mape: {:.7f} (best model is from epoch {}) ************\n'.format(best_model_test_acc, best_model_test_loss, best_model_mape_loss, best_epoch)) 155 | 156 | def data_portion(args, train_datasets, test_datasets, validation_datasets): 157 | if args.portion < 1.0: 158 | train_size = int(math.floor(len(train_datasets)*args.portion)) 159 | np.random.seed(random_seed) 160 | indices = np.random.choice(len(train_datasets), train_size) 161 | print(indices[0:10]) 162 | logging.info(indices[0:10]) 163 | train_datasets = [train_datasets[i] for i in indices] 164 | return train_datasets, test_datasets, validation_datasets 165 | 166 | def setup_logs(args): 167 | file_dir = "results" 168 | if not args.no_log: 169 | files_dir = '%s/%s' %(file_dir, args.data) 170 | args.files_dir = files_dir 171 | if args.model in ['GNN', 'GNN_E']: 172 | args.filename = '%s_%s_lr%s_hdim%s_fc%s_mlp%s_%s_%s_bs%s_epoch%d_seed%d.log' \ 173 | %(args.model, args.n_iter, args.lr, args.hidden_dim, args.fc_output_layer, args.mlp_layer, 174 | args.graph_pooling_type, args.neighbor_pooling_type, args.batch_size, args.epochs, args.seed) 175 | 176 | if not os.path.exists(files_dir): 177 | os.makedirs(files_dir) 178 | mode = 'w+' 179 | if args.resume: 180 | mode = 'a+' 181 | logging.basicConfig(format='%(message)s', 182 | level=logging.INFO, 183 | datefmt='%m-%d %H:%M', 184 | filename="%s/%s" %(args.files_dir, args.filename), 185 | filemode='w+') 186 | 187 | print(vars(args)) 188 | logging.info(vars(args)) 189 | 190 | def resume(args, model): 191 | if os.path.isfile(args.resume): 192 | print("=> loading checkpoint '{}'".format(args.resume)) 193 | logging.info("=> loading checkpoint '{}'".format(args.resume)) 194 | 195 | checkpoint = torch.load(args.resume) 196 | 197 | args.start_epoch = checkpoint['epoch'] 198 | best_prec = checkpoint['best_prec'] 199 | best_model_test_acc = checkpoint['best_model_test_acc'] 200 | best_model_test_loss = checkpoint['best_model_test_loss'] 201 | best_model_mape_loss = checkpoint['best_model_mape_loss'] 202 | model.load_state_dict(checkpoint['state_dict']) 203 | model.optimizer.load_state_dict(checkpoint['optimizer']) 204 | print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) 205 | logging.info("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) 206 | else: 207 | print("=> no checkpoint found at '{}'".format(args.resume)) 208 | logging.info("=> no checkpoint found at '{}'".format(args.resume)) 209 | return model 210 | 211 | def main(): 212 | parser = argparse.ArgumentParser() 213 | 214 | #Model specifications 215 | parser.add_argument('--model', type=str, choices=['GNN_E', 'GNN'], default='GNN_E', help='GNN does not use edge information whereas GNN_E uses edge information') 216 | parser.add_argument('--activation', type=str, choices=['relu', 'tanh', 'linear', 'sigmoid'], default='relu', help='activation function') 217 | parser.add_argument('--n_iter', type=int, help='number of GNN iterations/layers') 218 | parser.add_argument('--hidden_dim', type=int, default=256, help='width of MLPs') 219 | parser.add_argument('--mlp_layer', type=int, default=2, help='number of layers for MLPs in GNN before the last outoupt MLP') 220 | parser.add_argument('--fc_output_layer', type=int, default=1, help='number of layers for the output(softmax) MLP in GNN') 221 | parser.add_argument('--graph_pooling_type', type=str, choices=["sum", "mean", "max", "min"], 222 | help='Graph-level pooling over all nodes in the graph') 223 | parser.add_argument('--neighbor_pooling_type', type=str, choices=["sum", "mean", "max", "min"], 224 | help='Pooling for over neighboring nodes') 225 | parser.add_argument('--add_self_loop', action='store_true', 226 | default=False, help='add self loops in case graph does not contain it') 227 | 228 | # Training settings 229 | parser.add_argument('--device', type=int, default=0, help='which gpu to use if any') 230 | parser.add_argument('--seed', type=int, default=1, help='random seed') 231 | parser.add_argument('--resume', type=str, help='resume from model stored') 232 | parser.add_argument('--lr', type=float, default=0.01, help='learning rate') 233 | parser.add_argument('--decay', type=float, default=1e-5, help='weight decay') 234 | parser.add_argument('--batch_size', type=int, default=64, help='input batch size for training') 235 | parser.add_argument('--epochs', type=int, default=300, help='number of epochs to train') 236 | parser.add_argument('--loss_fn', type=str, choices=['cls', 'reg', 'mape'], default='reg', help='classification or regression') 237 | parser.add_argument('--optimizer', type=str, choices=['Adam', 'SGD'], default='Adam', help='Adam or SGD') 238 | 239 | 240 | # Logging and storage settings 241 | parser.add_argument('--save_model', action='store_true', default=False, help='flag to store the training models') 242 | parser.add_argument('--no_log', action='store_true', default=False, help='flag to disable logging of results') 243 | parser.add_argument('--log_interval', type=int, default=10, help='how many batches to wait before logging training status') 244 | parser.add_argument('--filename', type=str, default='', help='file to store the training log') 245 | parser.add_argument('--files_dir', type=str, default='', help='directory to store trained logfiles') 246 | 247 | # Data settings 248 | parser.add_argument('--data', type=str, default='', help='path to the data') 249 | parser.add_argument('--weight', type=str, default=None, help='indicating whether the input data has edge weights or not') 250 | parser.add_argument('--edge_feature_size', type=int, default=1, help='size of edge features') 251 | parser.add_argument('--node_feature_size', type=int, default=3, help='size of node features') 252 | 253 | args = parser.parse_args() 254 | device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") 255 | 256 | torch.manual_seed(args.seed) 257 | if torch.cuda.is_available(): 258 | torch.cuda.manual_seed_all(0) 259 | 260 | print('Begin loading data...') 261 | with open("./data/%s" %args.data, 'rb') as f: 262 | train_datasets, validation_datasets, test_datasets = pickle.load(f) 263 | print('Data loading completed!') 264 | 265 | args.node_feature_size = len(train_datasets[0][0].node_features[0]) 266 | setup_logs(args) 267 | 268 | model = model_types[args.model](args).to(device) 269 | scheduler = torch.optim.lr_scheduler.StepLR(model.optimizer, step_size=50, gamma=0.5) 270 | 271 | bs = args.batch_size 272 | 273 | model_dirs = './model_dirs' 274 | try: 275 | os.makedirs(model_dirs) 276 | except: 277 | print('directory {} already exists'.format(model_dirs)) 278 | 279 | if args.epochs == 0: 280 | epoch = 0 281 | validate(epoch, validation_datasets, args, model) 282 | test(epoch, test_datasets, args, model) 283 | args.epochs = -1 284 | 285 | for epoch in range(1, args.epochs + 1): 286 | train(epoch, train_datasets, args, model) 287 | validate(epoch, validation_datasets, args, model) 288 | test(epoch, test_datasets, args, model) 289 | scheduler.step() 290 | if is_best and args.save_model: 291 | save_checkpoint({ 292 | 'epoch': epoch + 1, 293 | 'arch': args.model, 294 | 'args': args, 295 | 'state_dict': model.state_dict(), 296 | 'best_prec': best_prec, 297 | 'best_model_test_acc': best_model_test_acc, 298 | 'best_model_test_loss': best_model_test_loss, 299 | 'best_model_mape_loss': best_model_mape_loss, 300 | 'optimizer' : model.optimizer.state_dict(), 301 | }, is_best, epoch, args) 302 | 303 | print('************ Best model\'s test accuracy: {:.2f}%, test loss: {:.7f} throughout training (best model is from epoch {}) ************\n'.format(best_model_test_acc, best_model_test_loss, best_epoch)) 304 | logging.info('************ Best model\'s test accuracy: {:.2f}%, test loss: {:.7f} throughout training (best model is from epoch {}) ************\n'.format(best_model_test_acc, best_model_test_loss, best_epoch)) 305 | 306 | if __name__ == '__main__': 307 | main() 308 | -------------------------------------------------------------------------------- /n_body/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import pickle 4 | import random 5 | import numpy as np 6 | import shutil 7 | import torch 8 | import networkx as nx 9 | from torch.autograd import Variable 10 | 11 | from util import * 12 | from MLPs import * 13 | import logging 14 | import math 15 | from in_network import InteractionNetwork as IN 16 | from physics import G 17 | 18 | random_seed = 1 19 | torch.manual_seed(random_seed) 20 | torch.cuda.manual_seed(random_seed) 21 | torch.backends.cudnn.deterministic = True 22 | torch.backends.cudnn.benchmark = False 23 | np.random.seed(random_seed) 24 | random.seed(random_seed) 25 | 26 | best_prec, best_loss, best_model_test_acc, best_model_test_loss, best_model_mape_loss = 0.0, 1e+8*1.0, 0.0, 1e+8*1.0, 1e+8*1.0 27 | is_best = False 28 | best_epoch = 0 29 | 30 | model_types = {'IN': IN} 31 | 32 | def calc_feature(receiver, sender): 33 | """ 34 | Return m_sender / (distance ** 3) * (X_other - X_self ) 35 | """ 36 | diff = sender[1:3] - receiver[1:3] # difference in (x, y) 37 | distance = torch.norm(diff) 38 | if distance < 1: 39 | distance = 1 40 | return G * sender[0] / (distance ** 3) * diff 41 | 42 | def feature_engineering(args, model, dataset): 43 | bs = len(dataset) 44 | if not args.fe: 45 | ra = torch.FloatTensor(np.zeros((bs, model.rel_dim, model.n_relations))).to(args.device) 46 | return ra 47 | 48 | dd = [] 49 | label = [] 50 | for d, ans, _ in dataset: 51 | dd.append(d) 52 | label.append(ans) 53 | data = (dd, label) 54 | 55 | obj = torch.FloatTensor(data[0][0:bs]).to(args.device) 56 | 57 | # receiver: orr; sender: ors 58 | r_info = np.zeros((bs, model.rel_dim, model.n_relations)) 59 | ra = torch.FloatTensor(r_info).to(args.device) 60 | obj_t = torch.transpose(obj, 1, 2).reshape(-1, model.n_objects) # (bs * obj_dim, n_objects) 61 | orr = obj_t.mm(model.rr).reshape((bs, model.obj_dim, -1)) # (bs, obj_dim, n_relations) 62 | ors = obj_t.mm(model.rs).reshape((bs, model.obj_dim, -1)) # (bs, obj_dim, n_relations) 63 | 64 | for b in range(bs): 65 | for i in range(model.n_relations): 66 | receiver = orr[b][:,i] 67 | sender = ors[b][:,i] 68 | fe = calc_feature(receiver, sender) 69 | ra[b, :, i] = fe 70 | return ra 71 | 72 | def add_fe2data(args, model, dataset): 73 | ra = feature_engineering(args, model, dataset) 74 | #dataset_fe = (dataset, ra) 75 | dataset_fe = [] 76 | for i in range(len(ra)): 77 | dataset_fe.append((dataset[i], ra[i])) 78 | return dataset_fe 79 | 80 | def save_checkpoint(state, is_best, epoch, args): 81 | if not is_best: 82 | return 83 | """Saves checkpoint to disk""" 84 | 85 | directory = "models_dir/%s/%s/"%(args.data, args.filename) 86 | if not os.path.exists(directory): 87 | os.makedirs(directory) 88 | filename = directory + 'model_best.pth.tar' 89 | torch.save(state, filename) 90 | 91 | def cvt_data_axis(dataset): 92 | data, label, fe = [], [], [] 93 | for d, f in dataset: 94 | obj, ans = d[0], d[1] 95 | data.append(obj) 96 | label.append(ans) 97 | fe.append(f) 98 | fe = torch.stack(fe) 99 | return (data, label, fe) 100 | 101 | def tensor_data(data, i, args): 102 | nodes = torch.FloatTensor(data[0][args.batch_size*i:args.batch_size*(i+1)]).to(args.device) 103 | if args.loss_fn == 'cls': 104 | ans = torch.LongTensor(data[1][args.batch_size*i:args.batch_size*(i+1)]).to(args.device) 105 | else: 106 | ans = torch.FloatTensor(data[1][args.batch_size*i:args.batch_size*(i+1)]).to(args.device) 107 | 108 | fe = data[-1][args.batch_size*i:args.batch_size*(i+1)].to(args.device) 109 | 110 | #ans = ans.view(-1, args.answer_size) 111 | return (nodes, fe), ans, 112 | 113 | def train(epoch, dataset, args, model): 114 | model.train() 115 | train_size = len(dataset) 116 | bs = args.batch_size 117 | 118 | random.shuffle(dataset) 119 | dataset = cvt_data_axis(dataset) 120 | 121 | running_loss, running_loss_mape = 0.0, 0.0 122 | accuracys = [] 123 | losses, losses_mape = [], [] 124 | 125 | batch_runs = max(1, train_size // bs) 126 | for batch_idx in range(batch_runs): 127 | input_nodes, label = tensor_data(dataset, batch_idx, args) 128 | accuracy, loss, mape_loss = model.train_(input_nodes, label) 129 | running_loss += loss 130 | running_loss_mape += mape_loss 131 | 132 | accuracys.append(accuracy) 133 | losses.append(loss) 134 | losses_mape.append(mape_loss) 135 | 136 | if (batch_idx + 1) % args.log_interval == 0: 137 | print('Train Epoch: {} [{}/{} ({:.2f}%)] loss: {:.7f}\t'.format(epoch, batch_idx * bs, train_size, 100 * batch_idx * bs / train_size, running_loss/(1 * args.log_interval))) 138 | logging.info('Train Epoch: {} [{}/{} ({:.2f}%)] loss: {:.7f} \t'.format(epoch, batch_idx * bs, train_size, 100 * batch_idx * bs / train_size, running_loss/(1 * args.log_interval))) 139 | running_loss = 0.0 140 | 141 | avg_accuracy = sum(accuracys) *1.0 / len(accuracys) 142 | avg_losses = sum(losses) *1.0 / len(losses) 143 | avg_losses_mape = sum(losses_mape) *1.0 / len(losses_mape) 144 | print('\nEpoch {}: Train set: accuracy: {:.2f}% \t | loss: {:.7f} \t | \t mape: {:.7f}'.format(epoch, avg_accuracy, avg_losses, avg_losses_mape)) 145 | logging.info('\nEpoch {}: Train set: accuracy: {:.2f}% \t | loss: {:.7f} \t\t | \t mape: {:.7f}'.format(epoch, avg_accuracy, avg_losses, avg_losses_mape)) 146 | 147 | def validate(epoch, dataset, args, model): 148 | global is_best, best_prec, best_loss 149 | 150 | model.eval() 151 | test_size = len(dataset) 152 | bs = args.batch_size 153 | dataset = cvt_data_axis(dataset) 154 | 155 | accuracys = [] 156 | losses, mape_losses = [], [] 157 | batch_runs = max(1, test_size // bs) 158 | for batch_idx in range(batch_runs): 159 | input_nodes, label = tensor_data(dataset, batch_idx, args) 160 | accuracy, loss, mape_loss = model.test_(input_nodes, label) 161 | accuracys.append(accuracy) 162 | losses.append(loss) 163 | mape_losses.append(mape_loss) 164 | 165 | avg_accuracy = sum(accuracys) *1.0 / len(accuracys) 166 | avg_losses = sum(losses) *1.0 / len(losses) 167 | avg_losses_mape = sum(mape_losses) *1.0 / len(mape_losses) 168 | print('Epoch {}: Validation set: accuracy: {:.2f}% | loss: {:.7f} \t | \t mape: {:.7f}'.format(epoch, avg_accuracy, avg_losses, avg_losses_mape)) 169 | logging.info('Epoch {}: Validation set: accuracy: {:.2f}% | loss: {:.7f} \t | \t mape: {:.7f}'.format(epoch, avg_accuracy, avg_losses, avg_losses_mape)) 170 | 171 | if args.loss_fn == 'cls': 172 | is_best = avg_accuracy > best_prec 173 | else: 174 | is_best = avg_losses < best_loss 175 | best_prec = max(avg_accuracy, best_prec) 176 | best_loss = min(avg_losses, best_loss) 177 | 178 | def test(epoch, dataset, args, model): 179 | global is_best, best_model_test_acc, best_model_test_loss, best_epoch, best_model_mape_loss 180 | 181 | model.eval() 182 | test_size = len(dataset) 183 | bs = args.batch_size 184 | dataset = cvt_data_axis(dataset) 185 | 186 | accuracys = [] 187 | losses, mape_losses = [], [] 188 | batch_runs = max(1, test_size // bs) 189 | for batch_idx in range(batch_runs): 190 | input_nodes, label = tensor_data(dataset, batch_idx, args) 191 | accuracy, loss, mape_loss = model.test_(input_nodes, label) 192 | accuracys.append(accuracy) 193 | losses.append(loss) 194 | mape_losses.append(mape_loss) 195 | 196 | avg_accuracy = sum(accuracys) *1.0 / len(accuracys) 197 | avg_losses = sum(losses) *1.0 / len(losses) 198 | avg_losses_mape = sum(mape_losses) *1.0 / len(mape_losses) 199 | 200 | print('Epoch {}: Test set: accuracy: {:.2f}% \t | loss: {:.7f} \t | \t mape: {:.7f} \n'.format(epoch, avg_accuracy, avg_losses, avg_losses_mape)) 201 | logging.info('Epoch {}: Test set: accuracy: {:.2f}% \t | loss: {:.7f} \t | \t mape: {:.7f} \n'.format(epoch, avg_accuracy, avg_losses, avg_losses_mape)) 202 | 203 | 204 | if is_best: 205 | best_model_test_acc = avg_accuracy 206 | best_model_test_loss = avg_losses 207 | best_model_mape_loss = avg_losses_mape 208 | best_epoch = epoch 209 | 210 | if epoch%10 == 0: 211 | print('************ Best model\'s test acc: {:.2f}%, test loss: {:.7f}, mape: {:.7f} (best model is from epoch {}) ************\n'.format(best_model_test_acc, best_model_test_loss, best_model_mape_loss, best_epoch)) 212 | logging.info('************ Best model\'s test acc: {:.2f}%, test loss: {:.7f}, mape: {:.7f} (best model is from epoch {}) ************\n'.format(best_model_test_acc, best_model_test_loss, best_model_mape_loss, best_epoch)) 213 | 214 | def load_data(index_filename, mode): 215 | with open("./run/%s.txt" %index_filename, 'r') as f: 216 | dataset = [] 217 | for line in f: 218 | with open("./data/%s" %line.strip(), 'rb') as f2: 219 | dataset.extend(pickle.load(f2)[mode]) 220 | return dataset 221 | 222 | def setup_logs(args): 223 | file_dir = "results" 224 | if not args.no_log: 225 | files_dir = '%s/%s' %(file_dir, args.data) 226 | args.files_dir = files_dir 227 | 228 | args.filename = '%s_fe%s_lr%s_hdim%s_bs%s_epoch%d_seed%d.log' \ 229 | %(args.model, args.fe, args.lr, args.hidden_dim, args.batch_size, args.epochs, random_seed) 230 | 231 | if not os.path.exists(files_dir): 232 | os.makedirs(files_dir, exist_ok=True) 233 | mode = 'w+' 234 | if args.resume: 235 | mode = 'a+' 236 | logging.basicConfig(format='%(message)s', 237 | level=logging.INFO, 238 | datefmt='%m-%d %H:%M', 239 | filename="%s/%s" %(args.files_dir, args.filename), 240 | filemode='w+') 241 | 242 | print(vars(args)) 243 | logging.info(vars(args)) 244 | 245 | def resume(args, model): 246 | if os.path.isfile(args.resume): 247 | print("=> loading checkpoint '{}'".format(args.resume)) 248 | logging.info("=> loading checkpoint '{}'".format(args.resume)) 249 | 250 | checkpoint = torch.load(args.resume) 251 | 252 | args.start_epoch = checkpoint['epoch'] 253 | best_prec = checkpoint['best_prec'] 254 | best_model_test_acc = checkpoint['best_model_test_acc'] 255 | best_model_test_loss = checkpoint['best_model_test_loss'] 256 | best_model_mape_loss = checkpoint['best_model_mape_loss'] 257 | model.load_state_dict(checkpoint['state_dict']) 258 | model.optimizer.load_state_dict(checkpoint['optimizer']) 259 | print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) 260 | logging.info("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) 261 | else: 262 | print("=> no checkpoint found at '{}'".format(args.resume)) 263 | logging.info("=> no checkpoint found at '{}'".format(args.resume)) 264 | return model 265 | 266 | 267 | def main(): 268 | parser = argparse.ArgumentParser() 269 | 270 | #Model specifications 271 | parser.add_argument('--model', type=str, choices=['IN'], default='IN', help='choose which model') 272 | parser.add_argument('--activation', type=str, choices=['relu', 'tanh','linear','sigmoid'], default='relu', help='activation function') 273 | parser.add_argument('--hidden_dim', type=int, default=128, help='width of MLPs') 274 | parser.add_argument('--fe', action='store_true', default=False, help='add feature engineering to the model') 275 | 276 | # Training settings 277 | parser.add_argument('--device', type=int, default=0, help='which gpu to use if any (default: 0)') 278 | parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)') 279 | parser.add_argument('--resume', type=str, help='resume from model stored') 280 | parser.add_argument('--lr', type=float, default=0.005, help='learning rate (default: 0.0001)') 281 | parser.add_argument('--decay', type=float, default=1e-5, help='weight decay (default: 0.0)') 282 | parser.add_argument('--batch_size', type=int, default=32, help='input batch size for training (default: 64)') 283 | parser.add_argument('--epochs', type=int, default=2000, help='number of epochs to train') 284 | parser.add_argument('--loss_fn', type=str, choices=['cls', 'reg', 'mape'], default='reg', help='classification or regression loss') 285 | parser.add_argument('--optimizer', type=str, choices=['Adam', 'SGD'], default='Adam', help='Adam or SGD') 286 | 287 | 288 | # Logging and storage settings 289 | parser.add_argument('--log_file', type=str, default='accuracy.log', help='dataset filename') 290 | parser.add_argument('--save_model', action='store_true', default=False, help='flag to store the training models') 291 | parser.add_argument('--no_log', action='store_true', default=False, help='flag to disable logging of results') 292 | parser.add_argument('--log_interval', type=int, default=50, help='how many batches to wait before logging training status') 293 | parser.add_argument('--filename', type=str, default='', help='the file which store trained model logs') 294 | parser.add_argument('--files_dir', type=str, default='', help='the directory to store trained models logs') 295 | 296 | # Data settings 297 | parser.add_argument('--data', type=str, help='path to datafile') 298 | parser.add_argument('--edge_feature_size', type=int, default=2, help='size of edge features') 299 | parser.add_argument('--node_feature_size', type=int, default=5, help='size of node features') 300 | 301 | args = parser.parse_args() 302 | device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") 303 | 304 | torch.manual_seed(args.seed) 305 | if torch.cuda.is_available(): 306 | torch.cuda.manual_seed_all(0) 307 | 308 | with open("./data/%s.pickle" %args.data, 'rb') as f: 309 | train_datasets, validation_datasets, test_datasets = pickle.load(f) 310 | 311 | args.node_feature_size = 5 312 | 313 | if args.model == 'IN': 314 | args.n_objects = train_datasets[0][0].shape[0] 315 | 316 | args.answer_size = 2 # predicts the location (x,y) for each object 317 | 318 | setup_logs(args) 319 | 320 | model = model_types[args.model](args).to(args.device) 321 | 322 | scheduler = torch.optim.lr_scheduler.StepLR(model.optimizer, step_size=50, gamma=0.5) 323 | 324 | bs = args.batch_size 325 | 326 | model_dirs = './models_dir' 327 | try: 328 | os.makedirs(model_dirs) 329 | except: 330 | print('directory {} already exists'.format(model_dirs)) 331 | 332 | train_datasets = add_fe2data(args, model, train_datasets) 333 | validation_datasets = add_fe2data(args, model, validation_datasets) 334 | test_datasets = add_fe2data(args, model, test_datasets) 335 | 336 | if args.epochs == 0: 337 | epoch = 0 338 | validate(epoch, validation_datasets, args, model) 339 | test(epoch, test_datasets, args, model) 340 | args.epochs = -1 341 | 342 | for epoch in range(1, args.epochs + 1): 343 | train(epoch, train_datasets, args, model) 344 | validate(epoch, validation_datasets, args, model) 345 | test(epoch, test_datasets, args, model) 346 | scheduler.step() 347 | if is_best and args.save_model: 348 | save_checkpoint({ 349 | 'epoch': epoch + 1, 350 | 'arch': args.model, 351 | 'args': args, 352 | 'state_dict': model.state_dict(), 353 | 'best_prec': best_prec, 354 | 'best_model_test_acc': best_model_test_acc, 355 | 'best_model_test_loss': best_model_test_loss, 356 | 'best_model_mape_loss': best_model_mape_loss, 357 | 'optimizer' : model.optimizer.state_dict(), 358 | }, is_best, epoch, args) 359 | 360 | print('************ Best model\'s test acc: {:.2f}%, test loss: {:.7f} throughout training (best model is from epoch {}) ************\n'.format(best_model_test_acc, best_model_test_loss, best_epoch)) 361 | logging.info('************ Best model\'s test acc: {:.2f}%, test loss: {:.7f} throughout training (best model is from epoch {}) ************\n'.format(best_model_test_acc, best_model_test_loss, best_epoch)) 362 | 363 | if __name__ == '__main__': 364 | main() 365 | --------------------------------------------------------------------------------