├── README.md
├── back_test.py
├── back_test_mlp.py
├── first_process.py
├── process_rolling.py
├── torch_model.py
├── torch_model_mlp.py
├── train.py
└── train_mlp.py


/README.md:
--------------------------------------------------------------------------------
 1 | ## ListFold
 2 | 
 3 | Files needed to run the code: `factor_data.npy`, `Y_cl.npy`. 
 4 | 
 5 | `factor_data.npy` contains the features needed for A share stocks and can be downloaded from https://drive.google.com/file/d/1Jc5o6LdMBkwMMiWSCz8w4MbWjymBM6AE/view?usp=sharing
 6 | 
 7 | `Y_cl.npy` contains the corresponding weekly price and can be downloaded from https://drive.google.com/file/d/11Gsqm_dbCVm5Npehz83mylKkmxJ_2xR8/view?usp=sharing
 8 | 
 9 | For test run, put these two files under main directory. 
10 | 
11 | ### Flowchart
12 | 
13 | 1. We first run `first_process.py`, which would turn `factor_data.npy` to `features_processed.npz`. It will change all `True, False` to `1, 0` and compress the factor data. 
14 | 
15 |    Estimated time of running: 5 minutes. 
16 | 
17 | 2. Then we run `process_rolling.py`. Now the default training and testing length is 300 16. This code will rolling split the data (features and prices) and put them in a folder.  
18 | 
19 |    Estimated time of running: 5 minutes, also depends on training and testing length. 
20 | 
21 | 3. Then we train the model using `train_20200111_rolling_torch.py`. It will save all the models to a folder. An example run is `python train_20200111_rolling_torch.py --pp 21`.Here `21` denotes the total number of train-test pairs.  
22 | 
23 |    Estimated time of running: 1 hour, also depends on training and testing length. 
24 |    
25 | 4. For back test, an example run is `python back_test.py`, which will generate a csv file, recording all positive and negative positions and combined return each week. 
26 | 
27 | ### Arguments
28 | 
29 | In `train.py`, we can control training and testing length, batch size, training epochs. Use `python train.py -h` to see details. 
30 | 
31 | In `back_test.py`, we can pick different saved models to load and test different strategies. Details see `python back_test.py -h`. 
32 | 
33 | ### MSE
34 | 
35 | We don't have `relu` for MSE loss, so we use `train_mlp.py` and `back_test_mlp.py`, who have slightly different network structures. 
36 | 
37 | 


--------------------------------------------------------------------------------
/back_test.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | import os
  3 | import math
  4 | import argparse
  5 | import pandas as pd 
  6 | import torch
  7 | from torch import nn, optim
  8 | from torch.autograd import Variable
  9 | from torch_model import CMLE, LMLE, Closs, Closs_explained, Closs_sigmoid
 10 | 
 11 | Models = {'CMLE': CMLE, 'LMLE': LMLE}
 12 | Losses = {'plain': Closs, 'explained': Closs_explained, 'sigmoid': Closs_sigmoid}
 13 | 
 14 | parser = argparse.ArgumentParser(description='Rolling Training')
 15 | parser.add_argument(
 16 |     '--train-rolling-length',
 17 |     type=float,
 18 |     default=300,
 19 |     help='rolling length of training')
 20 | parser.add_argument(
 21 |     '--test-rolling-length',
 22 |     type=float,
 23 |     default=16,
 24 |     help='rolling length of test')
 25 | parser.add_argument(
 26 |     '--epochs',
 27 |     type=int,
 28 |     default=1000,
 29 |     help='train epochs')
 30 | parser.add_argument(
 31 |     '--model-type',
 32 |     type=str,
 33 |     default='CMLE',
 34 |     help='model type, either CMLE or LMLE')
 35 | parser.add_argument(
 36 |     '--loss-type',
 37 |     type=str,
 38 |     default='explained',
 39 |     help='loss type, either explained, sigmoid or LMLE_loss')
 40 | parser.add_argument(
 41 |     '--N',
 42 |     type=int,
 43 |     default=20,
 44 |     help='number of rolling pairs')
 45 | parser.add_argument(
 46 |     '--nfeatures',
 47 |     type=int,
 48 |     default=68,
 49 |     help='number of features')
 50 | parser.add_argument(
 51 |     '--batch-size',
 52 |     type=int,
 53 |     default=32,
 54 |     help='batch size used in training')
 55 | parser.add_argument(
 56 |     '--short',
 57 |     type=str,
 58 |     default='bottom',
 59 |     help='which stocks to short, either bottom or average')
 60 | 
 61 | args = parser.parse_args()
 62 | suffix = str(args.train_rolling_length) + '_' + str(args.test_rolling_length)
 63 | 
 64 | batch_size = 32
 65 | 
 66 | def return_rank(a):
 67 |     a = a * -1
 68 |     order = a.argsort()
 69 |     return order.argsort()
 70 | 
 71 | 
 72 | def test(model, test_features):
 73 |     test_features = np.load(test_features, allow_pickle = True)
 74 |     print(test_features.shape)
 75 |     #print(len(test_features))
 76 |     L = len(test_features)
 77 |     N = len(test_features) // batch_size + 1
 78 |     v = np.zeros((N*batch_size, test_features.shape[1], test_features.shape[2]))
 79 |     v[:L, :, :] = test_features
 80 |     for i in range(N * batch_size - L):
 81 |         v[i+L,:,:] = test_features[0,:,:]
 82 |     res = []
 83 |     for i in range(N):
 84 |         batch_x = Variable(torch.from_numpy(v[i * batch_size:(i+1) * batch_size,:,:]).double())
 85 |         scores = model(batch_x)
 86 |         res.append(np.array(scores.data.cpu()))
 87 |     res = np.concatenate(res, axis = 0)
 88 |     res = res[:L]
 89 |     return res
 90 | 
 91 | def back_test(k, score, returns):
 92 |     res = []
 93 |     weight_list_pos, weight_list_neg = [], []
 94 |     return_list_pos, return_list_neg = [], []
 95 |     for i in range(len(score)):
 96 |         rank = return_rank(score[i])
 97 |         rank2ind = np.zeros(len(rank), dtype = int)
 98 |         for j in range(len(rank)):
 99 |             rank2ind[rank[j]] = j
100 |         weights = np.zeros(k)
101 |         for j in range(k):
102 |             weights[j] = score[i][rank2ind[j]]
103 |         s = k * (k+1) / 2.0
104 |         for j in range(k):
105 |             weights[j] = (k - j) / s
106 |             weights[j] = 1.0 / k
107 |         total_return = 0
108 |         for j in range(k):
109 |             total_return += weights[j] * returns[i][rank2ind[j]]
110 |             if args.short == 'bottom':
111 |                 total_return -= weights[j] * returns[i][rank2ind[79 - j]]
112 |         if args.short == 'average':
113 |             for j in range(80):
114 |                 total_return -= 1.0/80.0 * returns[i][rank2ind[j]]
115 |         res.append(total_return)
116 |         pos, neg = [], []
117 |         r_pos, r_neg = [], []
118 |         for j in range(k):
119 |             pos.append(rank2ind[j])
120 |             neg.append(rank2ind[79 - j])
121 |             r_pos.append(returns[i][rank2ind[j]])
122 |             r_neg.append(returns[i][rank2ind[79 - j]])
123 |         weight_list_pos.append(pos)
124 |         weight_list_neg.append(neg)
125 |         return_list_pos.append(r_pos)
126 |         return_list_neg.append(r_neg)
127 |     return np.array(res), np.array(weight_list_pos), np.array(weight_list_neg), np.array(return_list_pos), np.array(return_list_neg)
128 |     return np.array(res), 1
129 | 
130 | def back_test_all_rank(k, score, returns):
131 |     rp = np.zeros((len(score), 80))
132 |     rt = np.zeros((len(score), 80))
133 |     for i in range(len(score)):
134 |         rp[i] = return_rank(score[i])        
135 |         rt[i] = return_rank(returns[i])
136 |     return rp, rt 
137 | 
138 | 
139 |         
140 | 
141 | def back_test_rank(k, score, returns):
142 |     ranks = []
143 |     for i in range(len(score)):
144 |         rank = return_rank(score[i])
145 |         rank2ind = np.zeros(len(rank), dtype = int)
146 |         for j in range(len(rank)):
147 |             rank2ind[rank[j]] = j
148 |         ranks.append(rank2ind)
149 |     return ranks
150 | 
151 | def total_return(a):
152 |     ans = 1
153 |     for item in a:
154 |         ans *= (1 + item)
155 |     return ans
156 | 
157 | def load_model_test(model, model_name, test_features, test_ranks):
158 |     tmp = []
159 |     saved_state = torch.load(model_name)
160 |     model.load_state_dict(saved_state)
161 |     y_pred = test(model, test_features)
162 |     y = np.array(y_pred)
163 |     y_pred = test(model, test_features)
164 |     y = np.array(y_pred)
165 |     r = np.load(test_ranks)
166 |     r = r[:len(y), :]
167 |     
168 |     for j in range(80):
169 |         res, _ = back_test(j+1, y, r)
170 |         tmp.append(np.mean(res))
171 |     
172 |     #res, _ = back_test(8, y ,r)
173 |     return tmp
174 | 
175 | def load_model_test_rank(model, model_name, test_features, test_ranks):
176 |     tmp = []
177 |     saved_state = torch.load(model_name)
178 |     model.load_state_dict(saved_state)
179 |     y_pred = test(model, test_features)
180 |     y = np.array(y_pred)
181 |     y_pred = test(model, test_features)
182 |     y = np.array(y_pred)
183 |     r = np.load(test_ranks)
184 |     r = r[:len(y), :]
185 |     res, weight_list_pos, weight_list_neg, return_list_pos, return_list_neg = back_test(8, y, r)
186 |     return res, weight_list_pos, weight_list_neg, return_list_pos, return_list_neg
187 | 
188 | def load_model_test_all_rank(model, model_name, test_features, test_ranks, ranks):
189 |     tmp = []
190 |     saved_state = torch.load(model_name)
191 |     model.load_state_dict(saved_state)
192 |     y_pred = test(model, test_features)
193 |     y = np.array(y_pred)
194 |     y_pred = test(model, test_features)
195 |     y = np.array(y_pred)
196 |     r = np.load(test_ranks)
197 |     r = r[:len(y), :]
198 |     rp, rt = back_test_all_rank(8, y, r)
199 |     return rp, rt
200 | 
201 | 
202 | Model = Models[args.model_type]
203 | model_t = Model(n_features = args.nfeatures)
204 | model_t = model_t.double()
205 | 
206 | def load_model_test_ranks(model, model_name, test_features, test_ranks, ranks):
207 |     tmp = []
208 |     saved_state = torch.load(model_name)
209 |     model.load_state_dict(saved_state)
210 |     y_pred = test(model, test_features)
211 |     y = np.array(y_pred)
212 |     y_pred = test(model, test_features)
213 |     y = np.array(y_pred)
214 |     r = np.load(test_ranks)
215 |     r = r[:len(y), :]
216 |     res, weight_list_pos, weight_list_neg, return_list_pos, return_list_neg = back_test(ranks,y,r)
217 |     
218 |     return res, weight_list_pos, weight_list_neg, return_list_pos, return_list_neg
219 | 
220 | 
221 | d = pd.DataFrame()
222 | tt = []
223 | wp, wn, rp, rn = [], [], [], []
224 | epoch_to_use = 999
225 | rank_to_use = 8
226 | 
227 | for ind in range(0, args.N):
228 |     for itr in range(epoch_to_use, epoch_to_use+1):
229 |         print(ind, itr)
230 |         tmp, weight_list_pos, weight_list_neg, return_list_pos, return_list_neg = load_model_test_ranks(model_t, './models_' + suffix + '_' + args.loss_type +'/rolling_model_' + str(ind) + f'_{args.batch_size}_' + str(itr) + '.dat', './rolling_' + suffix + '/features_test_' + str(ind) + '.npy', './rolling_' + suffix +'/ranks_test_' + str(ind) + '.npy', rank_to_use)
231 |         tt.append(tmp)
232 |         wp.append(weight_list_pos)
233 |         wn.append(weight_list_neg)
234 |         rp.append(return_list_pos)
235 |         rn.append(return_list_neg)
236 | 
237 | if not os.path.exists('./results_' + suffix):
238 |     os.makedirs('./results_' + suffix)
239 | tt = np.concatenate(tt)
240 | wp = np.concatenate(wp, axis=0)
241 | wn = np.concatenate(wn, axis=0)
242 | rp = np.concatenate(rp, axis=0)
243 | rn = np.concatenate(rn, axis=0)
244 | for i in range(8):
245 |     d['pos_ticker_'+str(i+1)] = wp[:,i]
246 | for i in range(8):
247 |     d['neg_ticker_'+str(i+1)] = wn[:,i]
248 | d['return'] = tt
249 | d.to_csv('./results_' + suffix +f'/results_{args.short}_' + args.loss_type + '_' + str(epoch_to_use) + '_' + str(rank_to_use)+f'_{args.batch_size}.csv', index = False)
250 | 
251 | 


--------------------------------------------------------------------------------
/back_test_mlp.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | import os
  3 | import math
  4 | import argparse
  5 | import pandas as pd 
  6 | import torch
  7 | from torch import nn, optim
  8 | from torch.autograd import Variable
  9 | from torch_model_mlp import CMLE, LMLE, Closs, Closs_explained, Closs_sigmoid
 10 | 
 11 | Models = {'CMLE': CMLE, 'LMLE': LMLE}
 12 | Losses = {'plain': Closs, 'explained': Closs_explained, 'sigmoid': Closs_sigmoid}
 13 | 
 14 | parser = argparse.ArgumentParser(description='Rolling Training')
 15 | parser.add_argument(
 16 |     '--train-rolling-length',
 17 |     type=float,
 18 |     default=300,
 19 |     help='rolling length of training')
 20 | parser.add_argument(
 21 |     '--test-rolling-length',
 22 |     type=float,
 23 |     default=16,
 24 |     help='rolling length of test')
 25 | parser.add_argument(
 26 |     '--epochs',
 27 |     type=int,
 28 |     default=1000,
 29 |     help='train epochs')
 30 | parser.add_argument(
 31 |     '--model-type',
 32 |     type=str,
 33 |     default='CMLE',
 34 |     help='model type, either CMLE or LMLE')
 35 | parser.add_argument(
 36 |     '--loss-type',
 37 |     type=str,
 38 |     default='MSE',
 39 |     help='loss type, should be MSE')
 40 | parser.add_argument(
 41 |     '--N',
 42 |     type=int,
 43 |     default=20,
 44 |     help='number of rolling pairs')
 45 | parser.add_argument(
 46 |     '--nfeatures',
 47 |     type=int,
 48 |     default=68,
 49 |     help='number of features')
 50 | parser.add_argument(
 51 |     '--batch-size',
 52 |     type=int,
 53 |     default=32,
 54 |     help='batch size used in training')
 55 | parser.add_argument(
 56 |     '--short',
 57 |     type=str,
 58 |     default='bottom',
 59 |     help='which stocks to short, either bottom or average')
 60 | 
 61 | args = parser.parse_args()
 62 | suffix = str(args.train_rolling_length) + '_' + str(args.test_rolling_length)
 63 | 
 64 | batch_size = 32
 65 | 
 66 | def return_rank(a):
 67 |     a = a * -1
 68 |     order = a.argsort()
 69 |     return order.argsort()
 70 | 
 71 | 
 72 | def test(model, test_features):
 73 |     test_features = np.load(test_features, allow_pickle = True)
 74 |     print(test_features.shape)
 75 |     #print(len(test_features))
 76 |     L = len(test_features)
 77 |     N = len(test_features) // batch_size + 1
 78 |     v = np.zeros((N*batch_size, test_features.shape[1], test_features.shape[2]))
 79 |     v[:L, :, :] = test_features
 80 |     for i in range(N * batch_size - L):
 81 |         v[i+L,:,:] = test_features[0,:,:]
 82 |     res = []
 83 |     for i in range(N):
 84 |         batch_x = Variable(torch.from_numpy(v[i * batch_size:(i+1) * batch_size,:,:]).double())
 85 |         scores = model(batch_x)
 86 |         res.append(np.array(scores.data.cpu()))
 87 |     res = np.concatenate(res, axis = 0)
 88 |     res = res[:L]
 89 |     return res
 90 | 
 91 | def back_test(k, score, returns):
 92 |     res = []
 93 |     weight_list_pos, weight_list_neg = [], []
 94 |     return_list_pos, return_list_neg = [], []
 95 |     for i in range(len(score)):
 96 |         rank = return_rank(score[i])
 97 |         rank2ind = np.zeros(len(rank), dtype = int)
 98 |         for j in range(len(rank)):
 99 |             rank2ind[rank[j]] = j
100 |         weights = np.zeros(k)
101 |         for j in range(k):
102 |             weights[j] = score[i][rank2ind[j]]
103 |         s = k * (k+1) / 2.0
104 |         for j in range(k):
105 |             weights[j] = (k - j) / s
106 |             weights[j] = 1.0 / k
107 |         total_return = 0
108 |         for j in range(k):
109 |             total_return += weights[j] * returns[i][rank2ind[j]]
110 |             if args.short == 'bottom':
111 |                 total_return -= weights[j] * returns[i][rank2ind[79 - j]]
112 |         if args.short == 'average':
113 |             for j in range(80):
114 |                 total_return -= 1.0/80.0 * returns[i][rank2ind[j]]
115 |         res.append(total_return)
116 |         pos, neg = [], []
117 |         r_pos, r_neg = [], []
118 |         for j in range(k):
119 |             pos.append(rank2ind[j])
120 |             neg.append(rank2ind[79 - j])
121 |             r_pos.append(returns[i][rank2ind[j]])
122 |             r_neg.append(returns[i][rank2ind[79 - j]])
123 |         weight_list_pos.append(pos)
124 |         weight_list_neg.append(neg)
125 |         return_list_pos.append(r_pos)
126 |         return_list_neg.append(r_neg)
127 |     return np.array(res), np.array(weight_list_pos), np.array(weight_list_neg), np.array(return_list_pos), np.array(return_list_neg)
128 |     return np.array(res), 1
129 | 
130 | def back_test_all_rank(k, score, returns):
131 |     rp = np.zeros((len(score), 80))
132 |     rt = np.zeros((len(score), 80))
133 |     for i in range(len(score)):
134 |         rp[i] = return_rank(score[i])        
135 |         rt[i] = return_rank(returns[i])
136 |     return rp, rt 
137 | 
138 | 
139 |         
140 | 
141 | def back_test_rank(k, score, returns):
142 |     ranks = []
143 |     for i in range(len(score)):
144 |         rank = return_rank(score[i])
145 |         rank2ind = np.zeros(len(rank), dtype = int)
146 |         for j in range(len(rank)):
147 |             rank2ind[rank[j]] = j
148 |         ranks.append(rank2ind)
149 |     return ranks
150 | 
151 | def total_return(a):
152 |     ans = 1
153 |     for item in a:
154 |         ans *= (1 + item)
155 |     return ans
156 | 
157 | def load_model_test(model, model_name, test_features, test_ranks):
158 |     tmp = []
159 |     saved_state = torch.load(model_name)
160 |     model.load_state_dict(saved_state)
161 |     y_pred = test(model, test_features)
162 |     y = np.array(y_pred)
163 |     y_pred = test(model, test_features)
164 |     y = np.array(y_pred)
165 |     r = np.load(test_ranks)
166 |     r = r[:len(y), :]
167 |     
168 |     for j in range(80):
169 |         res, _ = back_test(j+1, y, r)
170 |         tmp.append(np.mean(res))
171 |     
172 |     #res, _ = back_test(8, y ,r)
173 |     return tmp
174 | 
175 | def load_model_test_rank(model, model_name, test_features, test_ranks):
176 |     tmp = []
177 |     saved_state = torch.load(model_name)
178 |     model.load_state_dict(saved_state)
179 |     y_pred = test(model, test_features)
180 |     y = np.array(y_pred)
181 |     y_pred = test(model, test_features)
182 |     y = np.array(y_pred)
183 |     r = np.load(test_ranks)
184 |     r = r[:len(y), :]
185 |     res, weight_list_pos, weight_list_neg, return_list_pos, return_list_neg = back_test(8, y, r)
186 |     return res, weight_list_pos, weight_list_neg, return_list_pos, return_list_neg
187 | 
188 | def load_model_test_all_rank(model, model_name, test_features, test_ranks, ranks):
189 |     tmp = []
190 |     saved_state = torch.load(model_name)
191 |     model.load_state_dict(saved_state)
192 |     y_pred = test(model, test_features)
193 |     y = np.array(y_pred)
194 |     y_pred = test(model, test_features)
195 |     y = np.array(y_pred)
196 |     r = np.load(test_ranks)
197 |     r = r[:len(y), :]
198 |     rp, rt = back_test_all_rank(8, y, r)
199 |     return rp, rt
200 | 
201 | 
202 | Model = Models[args.model_type]
203 | model_t = Model(n_features = args.nfeatures)
204 | model_t = model_t.double()
205 | 
206 | def load_model_test_ranks(model, model_name, test_features, test_ranks, ranks):
207 |     tmp = []
208 |     saved_state = torch.load(model_name)
209 |     model.load_state_dict(saved_state)
210 |     y_pred = test(model, test_features)
211 |     y = np.array(y_pred)
212 |     y_pred = test(model, test_features)
213 |     y = np.array(y_pred)
214 |     r = np.load(test_ranks)
215 |     r = r[:len(y), :]
216 |     res, weight_list_pos, weight_list_neg, return_list_pos, return_list_neg = back_test(ranks,y,r)
217 |     
218 |     return res, weight_list_pos, weight_list_neg, return_list_pos, return_list_neg
219 | 
220 | 
221 | d = pd.DataFrame()
222 | tt = []
223 | wp, wn, rp, rn = [], [], [], []
224 | epoch_to_use = 999
225 | rank_to_use = 8
226 | 
227 | for ind in range(0, args.N):
228 |     for itr in range(epoch_to_use, epoch_to_use+1):
229 |         print(ind, itr)
230 |         tmp, weight_list_pos, weight_list_neg, return_list_pos, return_list_neg = load_model_test_ranks(model_t, './models_' + suffix + '_' + args.loss_type +'/rolling_model_' + str(ind) + f'_{args.batch_size}_' + str(itr) + '.dat', './rolling_' + suffix + '/features_test_' + str(ind) + '.npy', './rolling_' + suffix +'/ranks_test_' + str(ind) + '.npy', rank_to_use)
231 |         tt.append(tmp)
232 |         wp.append(weight_list_pos)
233 |         wn.append(weight_list_neg)
234 |         rp.append(return_list_pos)
235 |         rn.append(return_list_neg)
236 | 
237 | if not os.path.exists('./results_' + suffix):
238 |     os.makedirs('./results_' + suffix)
239 | tt = np.concatenate(tt)
240 | wp = np.concatenate(wp, axis=0)
241 | wn = np.concatenate(wn, axis=0)
242 | rp = np.concatenate(rp, axis=0)
243 | rn = np.concatenate(rn, axis=0)
244 | for i in range(8):
245 |     d['pos_ticker_'+str(i+1)] = wp[:,i]
246 | for i in range(8):
247 |     d['neg_ticker_'+str(i+1)] = wn[:,i]
248 | d['return'] = tt
249 | d.to_csv('./results_' + suffix +f'/results_{args.short}_' + args.loss_type + '_' + str(epoch_to_use) + '_' + str(rank_to_use)+f'_{args.batch_size}.csv', index = False)
250 | 
251 | 


--------------------------------------------------------------------------------
/first_process.py:
--------------------------------------------------------------------------------
 1 | import numpy as np 
 2 | 
 3 | m = np.load('factor_data.npy', allow_pickle = True)
 4 | 
 5 | # Index: 51 is True or False
 6 | # We change that into 0 or 1
 7 | 
 8 | for i in range(len(m)):
 9 |     for j in range(len(m[i])):
10 |         if m[i][j][51] == True:
11 |             m[i][j][51] = 1
12 |         if m[i][j][51] == False:
13 |             m[i][j][51] = 0
14 | 
15 | np.savez_compressed('features_processed.npz', m)


--------------------------------------------------------------------------------
/process_rolling.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import copy
  3 | import os 
  4 | import argparse
  5 | 
  6 | parser = argparse.ArgumentParser(description='Rolling Training')
  7 | parser.add_argument(
  8 |     '--trnln',
  9 |     type=int,
 10 |     default=300,
 11 |     help='rolling length of training')
 12 | parser.add_argument(
 13 |     '--tstln',
 14 |     type=int,
 15 |     default=16,
 16 |     help='rolling length of test')
 17 | 
 18 | 
 19 | to_use = [81, 95, 101, 155, 157, 166, 177, 198, 200, 231, 336, 373, 405, 417, 442, 481, 496, 516, 576, 642, 653, 750, 766, 2206, 2208, 2211, 2223, 2224, 2228, 2244, 2252, 2267, 2286, 2296, 2303, 2309, 2352, 2354, 2359, 2368, 2389, 2396, 2419, 2436, 2443, 2466, 2468, 2484, 2486, 2488, 2507, 2523, 2582, 2585, 2624, 2626, 2627, 2651, 2669, 2672, 2675, 2684, 2695, 2710, 2726, 2740, 2772, 2778, 2802, 2827, 2830, 2852, 2886, 2889, 2922, 2933, 2941, 2947, 2951, 2983, 3004, 3036, 3051, 3063, 3199]
 20 | good_stocks = []
 21 | for i, ind in enumerate(to_use):
 22 |     if i >= 50 and i < 55:
 23 |         continue
 24 |     good_stocks.append(ind)
 25 | print(good_stocks)
 26 | 
 27 | feature_cols = ['alpha_100w', 'amount_21', 'amount_5',
 28 |        'amount_63', 'amount_div', 'avg_volume_21', 'avg_volume_5',
 29 |        'avg_volume_63', 'beta_100w', 'close_low_high', 'close_s_vwap5',
 30 |        'close_vwap5', 'c_l2_ibm', 'dlt_miclo', 'highlow_1', 'highlow_12',
 31 |        'highlow_3', 'highlow_6', 'ibm_close', 'ibm_svlo', 'IR_netasset_252',
 32 |        'IR_roe_252', 'l2_ibm_ewma', 'l2_lbm_ewma', 'magm_yop',
 33 |        'ma_crossover_15_36', 'net_assets', 'n_buy_value_small_order', 'pb',
 34 |        'pcf_gm', 'period_return', 'q_s_fa_yoyocf', 'rank_amount_div',
 35 |        'rank_close_low_high', 'rt_10', 'rt_126', 'rt_12_1', 'rt_15', 'rt_21',
 36 |        'rt_252', 'rt_5', 'rt_5_Skewness_10', 'rt_5_Skewness_15',
 37 |        'rt_5_Skewness_20', 'rt_5_Skewness_5', 'rt_63', 'std_deviation_100w',
 38 |        'sw_first_industry', 's_dq_mv', 's_price_div_dps', 's_val_mv',
 39 |        'trade_status', 'trk_rk_pe_re', 'ttm_pcf', 'ttm_pe', 'ttm_ps',
 40 |        'ttm_roa', 'ttm_roe', 'turnover_21', 'turnover_5', 'turnover_63',
 41 |        'vol_1', 'vol_12', 'vol_3', 'vol_6', 'yeildvol_1m', 'yeildvol_3m',
 42 |        'yeildvol_6m', 'yop_pcf', 'yop_pe', 'z_rank_pe', 'z_sde_pe']
 43 | 
 44 | no_cheating_feature = []
 45 | for i, col in enumerate(feature_cols):
 46 |     if col == 'period_return' or col == 'sw_first_industry' or col == 'trade_status' or col == 's_price_div_dps':
 47 |         continue
 48 |     no_cheating_feature.append(i)
 49 | 
 50 | print('Reading data')
 51 | m = np.load('features_processed.npz', allow_pickle = True)
 52 | m = m['arr_0']
 53 | print(m.shape)
 54 | m = m[:,good_stocks,:]
 55 | m = m[:,:,no_cheating_feature]
 56 | print(m.shape)
 57 | 
 58 | price = np.load('Y_cl.npy')
 59 | print(price.shape)
 60 | price_next_point = price[1:]
 61 | price_today = price[:-1]
 62 | price_ratio = np.divide(price_next_point, price_today)
 63 | 
 64 | returns = np.log(price_ratio)
 65 | print(returns.shape)
 66 | cnt = 0
 67 | for i in range(len(returns)):
 68 |     for j in range(len(returns[i])):
 69 |         if np.isnan(returns[i][j]):
 70 |             cnt += 1
 71 |             returns[i][j] = 0.0
 72 | 
 73 | D = {}
 74 | 
 75 | def get_preprocess_stock(data):
 76 |     "data is M * F"
 77 |     data = np.array(data, dtype = np.float32)
 78 |     a = np.zeros((3, data.shape[-1]))
 79 |     t = np.nan_to_num(data, nan = np.nan, neginf = 1e9)
 80 |     a[0, :] = np.nanmin(t, axis = 0)
 81 |     t = np.nan_to_num(data, nan = np.nan, posinf = -1e9)
 82 |     a[2, :] = np.nanmax(t, axis = 0)
 83 |     for i in range(data.shape[-1]):
 84 |         data[:,i] = np.nan_to_num(data[:,i], nan = np.nan, posinf = a[2,i], neginf = a[0,i])
 85 |         try:
 86 |             data[:,i] = (data[:,i] - a[0,i]) / (a[2,i] - a[0,i])
 87 |         except:
 88 |             if i not in D.keys():
 89 |                 D[i] = 0
 90 |             D[i] += 1
 91 |             print(i)
 92 |             print(data[:,i])
 93 |     for i in range(data.shape[-1]):
 94 |         nan_value = 0.0 if np.nanmean(data[:,i]) == np.nan else np.nanmean(data[:,i])
 95 |         data[:,i] = np.nan_to_num(data[:,i], nan = nan_value)
 96 |         a[1, i] = nan_value
 97 |     return data, a
 98 | 
 99 | def get_preprocess(data):
100 |     A = []
101 |     for i in range(data.shape[1]):
102 |         data[:,i,:], a = get_preprocess_stock(data[:,i,:])
103 |         A.append(a)
104 |     return data, A
105 | 
106 | def preprocess_stock(data, a):
107 |     for i in range(data.shape[-1]):
108 |         data[:,i] = np.nan_to_num(data[:,i], nan = a[1,i], posinf = a[2,i], neginf = a[0,i])
109 |     for i in range(data.shape[0]):
110 |         a[0,:] = np.minimum(a[0,:], data[i,:])
111 |         a[2,:] = np.maximum(a[2,:], data[i,:])
112 |         for j in range(data.shape[-1]):
113 |             try:
114 |                 data[i,j] = (data[i,j] - a[0,j]) / (a[2,j] - a[0,j])
115 |             except:
116 |                 print("!!!!!!\n\n")
117 |                 print(i,j)
118 |     return data
119 | 
120 | def preprocess(data, A):
121 |     for i in range(data.shape[1]):
122 |         data[:,i,:] = preprocess_stock(data[:,i,:], A[i])
123 |     return data
124 | 
125 | args = parser.parse_args()
126 | rolling_train_length = args.trnln
127 | rolling_test_length = args.tstln
128 | for ind, i in enumerate(range(rolling_train_length, len(m), rolling_test_length)):
129 |     train, test = copy.deepcopy(m[i-rolling_train_length:i,:,:]), copy.deepcopy(m[i:i+rolling_test_length,:,:])
130 |     train, a = get_preprocess(train)
131 |     test = preprocess(test, a)
132 |     if not os.path.exists('./rolling_' + str(rolling_train_length) + '_' + str(rolling_test_length)):
133 |         os.makedirs('./rolling_' + str(rolling_train_length) + '_' + str(rolling_test_length))
134 |     np.save('./rolling_' + str(rolling_train_length) + '_' + str(rolling_test_length) + '/features_train_' + str(ind) + '.npy', train)
135 |     np.save('./rolling_' + str(rolling_train_length) + '_' + str(rolling_test_length) + '/features_test_' + str(ind) + '.npy', test)
136 |     np.save('./rolling_' + str(rolling_train_length) + '_' + str(rolling_test_length) + '/ranks_train_' + str(ind) + '.npy', returns[i-rolling_train_length:i, good_stocks])
137 |     np.save('./rolling_' + str(rolling_train_length) + '_' + str(rolling_test_length) + '/ranks_test_' + str(ind) + '.npy', returns[i:i + rolling_test_length, good_stocks])
138 |     print(np.max(train, axis = (0,1)))
139 |     print(np.max(test, axis = (0,1)))
140 | 


--------------------------------------------------------------------------------
/torch_model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn, optim
 3 | import torch.nn.functional as F
 4 | 
 5 | def weights_init(m):
 6 |     classname = m.__class__.__name__
 7 |     if classname.find('Linear') != -1:
 8 |         weight_shape = list(m.weight.data.size())
 9 |         m.weight.data.normal_(mean = 0.0, std = 0.05)
10 |         m.bias.data.fill_(0.05)
11 | 
12 | class Closs(nn.Module):
13 |     def __init__(self):
14 |         super(Closs, self).__init__()
15 |     def forward(self, f, num_stocks):
16 |         l = torch.sum(f[:,num_stocks // 2:], dim = 1) - torch.sum(f[:, :num_stocks // 2], dim = 1)
17 |         for i in range(num_stocks // 2):
18 |             l += torch.logsumexp(f[:,i:num_stocks-i], dim = 1)
19 |             l += torch.logsumexp(torch.neg(f[:,i:num_stocks-i]), dim = 1)
20 |         l = torch.mean(l)
21 |         return l
22 | 
23 | class Closs_explained(nn.Module):
24 |     def __init__(self):
25 |         super(Closs_explained, self).__init__()
26 |     def forward(self, f, num_stocks):
27 |         l = torch.sum(f[:,num_stocks // 2:], dim = 1) - torch.sum(f[:, :num_stocks // 2], dim = 1)
28 |         for i in range(num_stocks // 2):
29 |             subtract = torch.tensor(num_stocks - 2*i,requires_grad = False)
30 |             l += torch.log(torch.sum(torch.exp(f[:,i:num_stocks-i]), dim = 1)*torch.sum(torch.exp(torch.neg(f[:,i:num_stocks-i])), dim = 1)-subtract)
31 |         l = torch.mean(l)
32 |         return l
33 | 
34 | class Closs_sigmoid(nn.Module):
35 |     def __init__(self):
36 |         super(Closs_sigmoid, self).__init__()
37 |     def forward(self, f, num_stocks):
38 |         l = torch.tensor(1, requires_grad=False)+torch.exp(f[:,num_stocks//2:] - f[:,:num_stocks//2])
39 |         return torch.mean(torch.log(l))
40 | 
41 | class Lloss(nn.Module):
42 |     def __init__(self):
43 |         super(Lloss, self).__init__()
44 |     def forward(self, f, num_stocks):
45 |         l = torch.neg(torch.sum(f, dim = 1))
46 |         for i in range(num_stocks):
47 |             l += torch.logsumexp(f[:,i:], dim = 1)
48 |         l = torch.mean(l)
49 |         return l
50 | 
51 | class CMLE(nn.Module):
52 |     def __init__(self, n_features):
53 |         super(CMLE, self).__init__()
54 |         self.n_features = n_features
55 |         self.linear1 = nn.Linear(self.n_features, self.n_features * 4)
56 |         self.linear2 = nn.Linear(self.n_features * 4, self.n_features * 2)
57 |         self.linear3 = nn.Linear(self.n_features * 2, self.n_features // 2)
58 |         self.linear4 = nn.Linear(self.n_features // 2, 1)
59 |         self.apply(weights_init)
60 |     
61 |     def forward(self, x):
62 |         x = F.relu(self.linear1(x))
63 |         x = F.relu(self.linear2(x))
64 |         x = F.relu(self.linear3(x))
65 |         result = F.relu(self.linear4(x))
66 |         result = result.view(result.shape[0], result.shape[1])
67 |         return result
68 | 
69 | class LMLE(nn.Module):
70 |     def __init__(self, n_features, num_stocks):
71 |         super(CMLE, self).__init__()
72 |         self.n_features = n_features
73 |         self.num_stocks = num_stocks
74 |         self.linear1 = nn.Linear(self.n_features, self.n_features * 4)
75 |         self.linear2 = nn.Linear(self.n_features * 4, self.n_features * 2)
76 |         self.linear3 = nn.Linear(self.n_features * 2, sefl.n_features // 2)
77 |         self.linear4 = nn.Linear(self.n_features // 2, 1)
78 |         self.apply(weights_init)
79 |     
80 |     def forward(self, x):
81 |         x = F.relu(self.linear1(x))
82 |         x = F.relu(self.linear2(x))
83 |         x = F.relu(self.linear3(x))
84 |         x = F.relu(self.linear4(x))
85 |         result = result.view(result.shape[0], result.shape[1])
86 |         return result
87 | 
88 | learning_rate = {'explained': 5e-5, 'sigmoid': 1e-4, 'LMLE_loss': 1e-4}
89 | 


--------------------------------------------------------------------------------
/torch_model_mlp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn, optim
 3 | import torch.nn.functional as F
 4 | 
 5 | def weights_init(m):
 6 |     classname = m.__class__.__name__
 7 |     if classname.find('Linear') != -1:
 8 |         weight_shape = list(m.weight.data.size())
 9 |         m.weight.data.normal_(mean = 0.0, std = 0.05)
10 |         m.bias.data.fill_(0.05)
11 | 
12 | class Closs(nn.Module):
13 |     def __init__(self):
14 |         super(Closs, self).__init__()
15 |     def forward(self, f, num_stocks):
16 |         l = torch.sum(f[:,num_stocks // 2:], dim = 1) - torch.sum(f[:, :num_stocks // 2], dim = 1)
17 |         for i in range(num_stocks // 2):
18 |             l += torch.logsumexp(f[:,i:num_stocks-i], dim = 1)
19 |             l += torch.logsumexp(torch.neg(f[:,i:num_stocks-i]), dim = 1)
20 |         l = torch.mean(l)
21 |         return l
22 | 
23 | class Closs_explained(nn.Module):
24 |     def __init__(self):
25 |         super(Closs_explained, self).__init__()
26 |     def forward(self, f, num_stocks):
27 |         l = torch.sum(f[:,num_stocks // 2:], dim = 1) - torch.sum(f[:, :num_stocks // 2], dim = 1)
28 |         for i in range(num_stocks // 2):
29 |             subtract = torch.tensor(num_stocks - 2*i,requires_grad = False)
30 |             l += torch.log(torch.sum(torch.exp(f[:,i:num_stocks-i]), dim = 1)*torch.sum(torch.exp(torch.neg(f[:,i:num_stocks-i])), dim = 1)-subtract)
31 |         l = torch.mean(l)
32 |         return l
33 | 
34 | class Closs_sigmoid(nn.Module):
35 |     def __init__(self):
36 |         super(Closs_sigmoid, self).__init__()
37 |     def forward(self, f, num_stocks):
38 |         l = torch.tensor(1, requires_grad=False)+torch.exp(f[:,num_stocks//2:] - f[:,:num_stocks//2])
39 |         return torch.mean(torch.log(l))
40 | 
41 | class Lloss(nn.Module):
42 |     def __init__(self):
43 |         super(Lloss, self).__init__()
44 |     def forward(self, f, num_stocks):
45 |         l = torch.neg(torch.sum(f, dim = 1))
46 |         for i in range(num_stocks):
47 |             l += torch.logsumexp(f[:,i:], dim = 1)
48 |         l = torch.mean(l)
49 |         return l
50 | 
51 | class CMLE(nn.Module):
52 |     def __init__(self, n_features):
53 |         super(CMLE, self).__init__()
54 |         self.n_features = n_features
55 |         self.linear1 = nn.Linear(self.n_features, self.n_features * 4)
56 |         self.linear2 = nn.Linear(self.n_features * 4, self.n_features * 2)
57 |         self.linear3 = nn.Linear(self.n_features * 2, self.n_features // 2)
58 |         self.linear4 = nn.Linear(self.n_features // 2, 1)
59 |         self.apply(weights_init)
60 |     
61 |     def forward(self, x):
62 |         x = F.relu(self.linear1(x))
63 |         x = F.relu(self.linear2(x))
64 |         x = F.relu(self.linear3(x))
65 |         result = self.linear4(x)
66 |         result = result.view(result.shape[0], result.shape[1])
67 |         return result
68 | 
69 | class LMLE(nn.Module):
70 |     def __init__(self, n_features, num_stocks):
71 |         super(CMLE, self).__init__()
72 |         self.n_features = n_features
73 |         self.num_stocks = num_stocks
74 |         self.linear1 = nn.Linear(self.n_features, self.n_features * 4)
75 |         self.linear2 = nn.Linear(self.n_features * 4, self.n_features * 2)
76 |         self.linear3 = nn.Linear(self.n_features * 2, sefl.n_features // 2)
77 |         self.linear4 = nn.Linear(self.n_features // 2, 1)
78 |         self.apply(weights_init)
79 |     
80 |     def forward(self, x):
81 |         x = F.relu(self.linear1(x))
82 |         x = F.relu(self.linear2(x))
83 |         x = F.relu(self.linear3(x))
84 |         x = self.linear4(x)
85 |         result = result.view(result.shape[0], result.shape[1])
86 |         return result
87 | 
88 | learning_rate = {'explained': 5e-5, 'sigmoid': 1e-4, 'LMLE_loss': 1e-4}
89 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | import os
  3 | import argparse
  4 | import torch
  5 | from torch import nn, optim
  6 | from torch.autograd import Variable
  7 | from torch_model import CMLE, LMLE, Closs, Closs_explained, Closs_sigmoid, Lloss, learning_rate
  8 | 
  9 | Models = {'CMLE': CMLE, 'LMLE': LMLE}
 10 | Losses = {'plain': Closs, 'explained': Closs_explained, 'sigmoid': Closs_sigmoid, 'LMLE_loss': Lloss}
 11 | 
 12 | 
 13 | parser = argparse.ArgumentParser(description='Rolling Training')
 14 | parser.add_argument(
 15 |     '--train-rolling-length',
 16 |     type=float,
 17 |     default=300,
 18 |     help='rolling length of training')
 19 | parser.add_argument(
 20 |     '--test-rolling-length',
 21 |     type=float,
 22 |     default=16,
 23 |     help='rolling length of test')
 24 | parser.add_argument(
 25 |     '--epochs',
 26 |     type=int,
 27 |     default=1000,
 28 |     help='train epochs')
 29 | parser.add_argument(
 30 |     '--model-type',
 31 |     type=str,
 32 |     default='CMLE',
 33 |     help='model type, either CMLE or LMLE')
 34 | parser.add_argument(
 35 |     '--loss-type',
 36 |     type=str,
 37 |     default='explained',
 38 |     help='loss type, either explained, sigmoid or LMLE_loss')
 39 | parser.add_argument(
 40 |     '--pp',
 41 |     type=int,
 42 |     nargs='+',
 43 |     default=21,
 44 |     help='parameters for parallel training')
 45 | parser.add_argument(
 46 |     '--nfeatures',
 47 |     type=int,
 48 |     default=68,
 49 |     help='number of features')
 50 | parser.add_argument(
 51 |     '--batch-size',
 52 |     type=int,
 53 |     default=32)
 54 | args = parser.parse_args()
 55 | batch_size = args.batch_size
 56 | 
 57 | def return_rank(a):
 58 |     a = a * -1
 59 |     order = a.argsort()
 60 |     return order.argsort()
 61 | 
 62 | def random_batch(x, y):
 63 | 	ind = np.random.randint(0, len(x), batch_size)
 64 | 	batch_x, batch_y = x[ind], y[ind]
 65 | 	x_sorted = np.zeros(batch_x.shape)
 66 | 	for i in range(len(batch_x)):
 67 | 		rank_temp = return_rank(batch_y[i])
 68 | 		rank2ind = np.zeros(80, dtype = int)
 69 | 		for j in range(len(rank_temp)):
 70 | 			rank2ind[rank_temp[j]] = int(j)
 71 | 		for j in range(len(rank_temp)):
 72 | 			x_sorted[i,rank_temp[j],:] = batch_x[i][rank2ind[rank_temp[j]]]
 73 | 	return x_sorted
 74 | 
 75 | 
 76 | def train(features, ranks, epochs, model_name, args):
 77 | 	features = np.load(features, allow_pickle = True)
 78 | 	ranks = np.load(ranks, allow_pickle = True)
 79 | 	print('Done reading data\n')
 80 | 	Model = Models[args.model_type]
 81 | 	model = Model(n_features = args.nfeatures)
 82 | 	model = model.double()
 83 | 	loss = Losses[args.loss_type]()
 84 | 	opt = optim.Adam(model.parameters(), lr=learning_rate[args.loss_type])
 85 | 	print('Done building model\n')
 86 | 	running_loss = []
 87 | 	torch.set_grad_enabled(True)
 88 | 	for itr in range(epochs):
 89 | 		batch_x = Variable(torch.from_numpy(random_batch(features, ranks)).double())
 90 | 		model.train()
 91 | 		scores = model(batch_x)
 92 | 		l = loss(scores, torch.tensor(80, requires_grad = False))
 93 | 		opt.zero_grad()
 94 | 		l.backward()
 95 | 		opt.step()
 96 | 		running_loss.append(float(l))
 97 | 		if (itr+1) % epochs == 0:
 98 | 			print("step", (itr+1), np.mean(running_loss))
 99 | 			running_loss = []
100 | 			torch.save(model.state_dict(), model_name + str(itr) + '.dat')
101 | 
102 | P = args.pp
103 | suffix = str(args.train_rolling_length) + '_' + str(args.test_rolling_length)
104 | if not os.path.exists('./models_' + suffix + '_' + args.loss_type):
105 | 	os.makedirs('./models_' + suffix + '_' + args.loss_type)
106 | if len(P) == 1:
107 | 	N = P[0]
108 | 	for ind in range(0, N):
109 | 		print(ind)
110 | 		train('./rolling_' + suffix + '/features_train_' + str(ind) + '.npy', './rolling_' + suffix + '/ranks_train_' + str(ind) + '.npy', args.epochs, './models_' + suffix + '_' + args.loss_type + '/rolling_model_' + str(ind) + '_'+str(batch_size) + '_', args) 
111 | if len(P) == 2:
112 | 	N, m = P[0], P[1]
113 | 	for ind in range(0, N):
114 | 		if ind % 3 != m:
115 | 			continue
116 | 		print(ind)
117 | 		train('./rolling_' + suffix + '/features_train_' + str(ind) + '.npy', './rolling_' + suffix + '/ranks_train_' + str(ind) + '.npy', args.epochs, './models_' + suffix + '_' + args.loss_type + '/rolling_model_' + str(ind) + '_'+str(batch_size) + '_', args) 
118 | 
119 | 


--------------------------------------------------------------------------------
/train_mlp.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | import os
  3 | import argparse
  4 | import torch
  5 | from torch import nn, optim
  6 | from torch.autograd import Variable
  7 | from torch_model_mlp import CMLE, LMLE, Closs, Closs_explained, Closs_sigmoid, Lloss
  8 | 
  9 | Models = {'CMLE': CMLE, 'LMLE': LMLE}
 10 | Losses = {'plain': Closs, 'explained': Closs_explained, 'sigmoid': Closs_sigmoid, 'LMLE_loss': Lloss}
 11 | 
 12 | 
 13 | parser = argparse.ArgumentParser(description='Rolling Training')
 14 | parser.add_argument(
 15 |     '--train-rolling-length',
 16 |     type=float,
 17 |     default=300,
 18 |     help='rolling length of training')
 19 | parser.add_argument(
 20 |     '--test-rolling-length',
 21 |     type=float,
 22 |     default=16,
 23 |     help='rolling length of test')
 24 | parser.add_argument(
 25 |     '--epochs',
 26 |     type=int,
 27 |     default=4000,
 28 |     help='train epochs')
 29 | parser.add_argument(
 30 |     '--model-type',
 31 |     type=str,
 32 |     default='CMLE',
 33 |     help='model type, either CMLE or LMLE')
 34 | parser.add_argument(
 35 |     '--loss-type',
 36 |     type=str,
 37 |     default='MSE',
 38 |     help='loss type, either plain, explained or sigmoid')
 39 | parser.add_argument(
 40 |     '--pp',
 41 |     type=int,
 42 |     nargs='+',
 43 |     default=1,
 44 |     help='parameters for parallel training')
 45 | parser.add_argument(
 46 |     '--nfeatures',
 47 |     type=int,
 48 |     default=68,
 49 |     help='number of features')
 50 | 
 51 | 
 52 | batch_size = 32
 53 | 
 54 | def return_rank(a):
 55 |     a = a * -1
 56 |     order = a.argsort()
 57 |     return order.argsort()
 58 | 
 59 | def random_batch(x, y):
 60 | 	ind = np.random.randint(0, len(x), batch_size)
 61 | 	batch_x, batch_y = x[ind].astype(np.float), y[ind].astype(np.float)
 62 | 	return batch_x, batch_y
 63 | 	x_sorted = np.zeros(batch_x.shape)
 64 | 	for i in range(len(batch_x)):
 65 | 		rank_temp = return_rank(batch_y[i])
 66 | 		rank2ind = np.zeros(80, dtype = int)
 67 | 		for j in range(len(rank_temp)):
 68 | 			rank2ind[rank_temp[j]] = int(j)
 69 | 		for j in range(len(rank_temp)):
 70 | 			x_sorted[i,79-rank_temp[j],:] = batch_x[i][rank2ind[rank_temp[j]]]
 71 | 	return x_sorted
 72 | 
 73 | 
 74 | def train(features, ranks, epochs, model_name, args):
 75 | 	features = np.load(features, allow_pickle = True)
 76 | 	ranks = np.load(ranks, allow_pickle = True)
 77 | 	print('Done reading data\n')
 78 | 	Model = Models[args.model_type]
 79 | 	model = Model(n_features = args.nfeatures)
 80 | 	model = model.double()
 81 | 	loss = nn.MSELoss()
 82 | 	opt = optim.Adam(model.parameters(), lr=1e-4)
 83 | 	print('Done building model\n')
 84 | 	running_loss = []
 85 | 	torch.set_grad_enabled(True)
 86 | 	for itr in range(epochs):
 87 | 		batch_x, batch_y = random_batch(features, ranks)
 88 | 		batch_x, batch_y = Variable(torch.from_numpy(batch_x)), Variable(torch.from_numpy(batch_y))
 89 | 		model.train()
 90 | 		scores = model(batch_x)
 91 | 		l = loss(scores, batch_y)
 92 | 		opt.zero_grad()
 93 | 		l.backward()
 94 | 		opt.step()
 95 | 		running_loss.append(float(l))
 96 | 		if (itr+1) % epochs == 0:
 97 | 			print("step", (itr+1), np.mean(running_loss))
 98 | 			running_loss = []
 99 | 			torch.save(model.state_dict(), model_name + str(itr) + '.dat')
100 | 
101 | args = parser.parse_args()
102 | P = args.pp
103 | suffix = str(args.train_rolling_length) + '_' + str(args.test_rolling_length)
104 | if not os.path.exists('./models_' + suffix + '_' + args.loss_type):
105 | 	os.makedirs('./models_' + suffix + '_' + args.loss_type)
106 | if len(P) == 1:
107 | 	N = P[0]
108 | 	for ind in range(0, N):
109 | 		print(ind)
110 | 		train('./rolling_' + suffix + '/features_train_' + str(ind) + '.npy', './rolling_' + suffix + '/ranks_train_' + str(ind) + '.npy', args.epochs, './models_' + suffix + '_' + args.loss_type + '/rolling_model_' + str(ind) + '_' + str(batch_size)+'_', args) 
111 | if len(P) == 2:
112 | 	N, m = P[0], P[1]
113 | 	for ind in range(0, N):
114 | 		if ind % 3 != m:
115 | 			continue
116 | 		print(ind)
117 | 		train('./rolling_' + suffix + '/features_train_' + str(ind) + '.npy', './rolling_' + suffix + '/ranks_train_' + str(ind) + '.npy', args.epochs, './models_' + suffix + '_' + args.loss_type + '/rolling_model_' + str(ind) + '_'+ str(batch_size)+'_', args) 
118 | 
119 | 
120 | 


--------------------------------------------------------------------------------