├── .gitignore
├── Classification
    ├── Readme.md
    ├── data
    │   ├── data.py
    │   ├── sparse_data.py
    │   ├── sparseloader.py
    │   └── untitled.py
    ├── main_cls_cv.py
    ├── misc
    │   ├── auc.py
    │   └── metrics.py
    ├── models
    │   ├── dynamic_net.py
    │   ├── mlp.py
    │   └── splinear.py
    ├── results
    │   └── results_readme.txt
    └── train.sh
├── L2R
    ├── DataLoader
    │   └── DataLoader.py
    ├── Misc
    │   ├── Calculations.py
    │   └── metrics.py
    ├── Readme.md
    ├── Utils
    │   └── utils.py
    ├── main_l2r_idiv_cv.py
    ├── main_l2r_mse_cv.py
    ├── main_l2r_pairwise_cv.py
    ├── models
    │   ├── dynamic_net.py
    │   ├── mlp.py
    │   └── splinear.py
    ├── results
    │   └── results_readme.txt
    └── train.sh
├── Model.png
├── README.md
├── Regression
    ├── Readme.md
    ├── data
    │   ├── data.py
    │   ├── sparse_data.py
    │   ├── sparseloader.py
    │   └── untitled.py
    ├── main_reg_cv.py
    ├── models
    │   ├── dynamic_net.py
    │   ├── mlp.py
    │   └── splinear.py
    ├── results
    │   └── results_readme.txt
    └── train.sh
├── baselines
    ├── reproduce_higgs.py
    ├── reproduce_slice.py
    └── reproduce_year.py
├── higgs2libsvm.py
├── reg_train_test_split.ipynb
├── requirements.txt
└── yahoo2mslr.ipynb


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/Classification/Readme.md:
--------------------------------------------------------------------------------
 1 | - Data loading and creating dataloader are handled in GrowNet/Classification/data/data.py. 
 2 | If you have a sparse data then try using sparse_data.py and opt sparse to True. If you want to try a new data please put it into one of the formats listed in data.py script.
 3 | 
 4 | - Individual model class and ensemble architecture are in GrowNet/Classification/models:  mlp.py and dynamic_net.py. 
 5 | You can increase number of hidden layers or change activation functions from mlp.py
 6 | 
 7 | 
 8 | 
 9 | 
10 | 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/Classification/data/data.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import sys
  3 | import os
  4 | import numpy as np
  5 | import pandas as pd
  6 | import torch
  7 | from torch.utils.data import Dataset
  8 | from scipy.sparse import csr_matrix
  9 | from sklearn.datasets import load_svmlight_file
 10 | from sklearn import datasets
 11 | from sklearn.model_selection import train_test_split
 12 | 
 13 | 
 14 | class LibSVMData(Dataset):
 15 |     def __init__(self, root, dim, normalization, pos=1, neg=-1, out_pos=1, out_neg=-1):
 16 |         self.feat, self.label = load_svmlight_file(root)
 17 | 
 18 |         self.feat = csr_matrix((self.feat.data, self.feat.indices, self.feat.indptr), shape=(len(self.label), dim))
 19 |         self.feat = self.feat.toarray().astype(np.float32)
 20 | 
 21 |         self.label = self.label.astype(np.float32)
 22 |         idx_pos = self.label == pos
 23 |         idx_neg = self.label == neg
 24 |         self.label[idx_pos] = out_pos
 25 |         self.label[idx_neg] = out_neg
 26 | 
 27 |     def __getitem__(self, index):
 28 |         arr = self.feat[index, :]
 29 |         return arr, self.label[index]
 30 |     def __len__(self):
 31 |         return len(self.label)
 32 | 
 33 | class LibSVMRankData(Dataset):
 34 |     def __init__(self, root2data, root2qid, dim):
 35 |         self.feat, self.label = load_svmlight_file(root2data)
 36 |         self.qid = np.loadtxt(root2qid, dtype='int32')
 37 |         self.feat = self.feat.toarray().astype(np.float32)
 38 |         self.label = self.label.astype(np.float32)
 39 |         self.feat = self.feat[:, ~(self.feat == 0).all(0)]
 40 |         print(self.feat.shape[1])
 41 | 
 42 |     def __getitem__(self, index):
 43 |         return self.feat[index, :], self.label[index], self.qid[index]
 44 | 
 45 |     def __len__(self):
 46 |         return len(self.label)
 47 | 
 48 | class LibSVMRegData(Dataset):
 49 |     def __init__(self, root, dim, normalization):
 50 |         data = np.load(root)        
 51 |         self.feat, self.label = data['features'], data['labels']
 52 |         del data
 53 |         self.feat = self.feat.astype(np.float32)
 54 |         self.label = self.label.astype(np.float32)
 55 |         #self.feat = self.feat[:, ~(self.feat == 0).all(0)]
 56 |         #import ipdb; ipdb.set_trace()
 57 | 
 58 |         print(self.feat.shape[1])
 59 | 
 60 |     def __getitem__(self, index):
 61 |         return self.feat[index, :], self.label[index]
 62 | 
 63 |     def __len__(self):
 64 |         return len(self.label)
 65 | 
 66 | class LibCSVData(Dataset):
 67 |     def __init__(self, root, dim, pos=1, neg=-1):
 68 |         self.data = np.loadtxt(root, delimiter=',').astype(np.float32)
 69 |         self.feat = self.data[:, 1:]
 70 |         self.label = self.data[:, 0]
 71 |         self.label[self.label == pos] = 1
 72 |         self.label[self.label == neg] = -1
 73 | 
 74 |     def __getitem__(self, index):
 75 |         #arr = np.log(self.feat[index, :] + 1.0e-5)
 76 |         #arr = np.log10(self.feat[index, :] + 1.0e-5)
 77 |         arr = self.feat[index, :]
 78 |         return arr, self.label[index]
 79 | 
 80 |     def __len__(self):
 81 |         return len(self.label)
 82 | class CriteoCSVData(Dataset):
 83 |     def __init__(self, root, dim, normalization, pos=1, neg=-1):
 84 |         # Reading the data into panda data frame
 85 |         self.data = pd.read_csv(root, header=None, dtype='float32')
 86 |         # extracting labels (0, 1) and weights
 87 |         self.label = self.data.iloc[:, -2]
 88 |         self.weights = self.data.iloc[:, -1]
 89 |         self.data = self.data.iloc[:, :-2]
 90 |         # transferring labels from {0, 1} to {-1, 1}
 91 |         self.label[self.label == pos] = 1
 92 |         self.label[self.label == neg] = -1
 93 | 
 94 |         # Applying log transformation
 95 |         mm = self.data.min().min() # to prevent 0 division
 96 |         if normalization:
 97 |             # Filling Nan values: Simple approach, mean of the that column or interpolation
 98 |             self.data = self.data.transform(lambda x: np.log(x - mm + 1))
 99 |             #self.data = self.data.interpolate(method='polynomial', order=2)
100 |             self.data = self.data.fillna(self.data.mean()) # To fill the rest of Nan values left untouched on the corners
101 |             #self.data = (self.data - self.data.mean())/self.data.std()
102 |         #self.feat = self.data.to_numpy('float32')
103 |         self.data = self.data.to_numpy('float32')
104 |     def __getitem__(self, index):
105 |         #arr = np.log(self.feat[index, :] + 1.0e-5)
106 |         #arr = np.log10(self.feat[index, :] + 1.0e-5)
107 |         #arr = self.feat[index, :]
108 |         arr = self.data[index, :]
109 |         return arr, self.label[index], self.weights[index]
110 | 
111 |     def __len__(self):
112 |         return len(self.label)
113 | 


--------------------------------------------------------------------------------
/Classification/data/sparse_data.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import sys
 3 | import os
 4 | import numpy as np
 5 | import torch
 6 | from torch.utils.data import Dataset
 7 | from scipy.sparse import csr_matrix
 8 | from sklearn.datasets import load_svmlight_file
 9 | 
10 | class LibSVMDataSp(Dataset):
11 |     def __init__(self, root, dim_in, pos=1, neg=-1):
12 |         self.feat, self.label = load_svmlight_file(root)
13 |         self.feat = csr_matrix((self.feat.data, self.feat.indices, self.feat.indptr), shape=(len(self.label), dim_in))
14 |         self.feat = self.feat.astype(np.float32)
15 |         self.label = self.label.astype(np.float32)
16 |         self.label[self.label == pos] = 1
17 |         self.label[self.label == neg] = -1
18 | 
19 |     def __getitem__(self, index):
20 |         arr = self.feat[index, :]
21 |         return arr, self.label[index]
22 |     def __len__(self):
23 |         return len(self.label)
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/Classification/data/sparseloader.py:
--------------------------------------------------------------------------------
  1 | import scipy
  2 | import random
  3 | import torch
  4 | import torch.multiprocessing as multiprocessing
  5 | # Changed _update_worker_pids into _set_worker_pids, due to new version of pytorch
  6 | from torch._C import _set_worker_signal_handlers, _set_worker_pids, \
  7 |     _remove_worker_pids, _error_if_any_worker_fails
  8 | from torch.utils.data.sampler import SequentialSampler, RandomSampler, BatchSampler
  9 | import signal
 10 | import functools
 11 | import collections
 12 | import re
 13 | import sys
 14 | import threading
 15 | import traceback
 16 | import os
 17 | import time
 18 | from torch._six import * #string_classes, int_classes, FileNotFoundError
 19 | 
 20 | IS_WINDOWS = sys.platform == "win32"
 21 | if IS_WINDOWS:
 22 |     import ctypes
 23 |     from ctypes.wintypes import DWORD, BOOL, HANDLE
 24 | 
 25 | if sys.version_info[0] == 2:
 26 |     import Queue as queue
 27 | else:
 28 |     import queue
 29 | 
 30 | 
 31 | class ExceptionWrapper(object):
 32 |     r"""Wraps an exception plus traceback to communicate across threads"""
 33 | 
 34 |     def __init__(self, exc_info):
 35 |         self.exc_type = exc_info[0]
 36 |         self.exc_msg = "".join(traceback.format_exception(*exc_info))
 37 | 
 38 | 
 39 | _use_shared_memory = False
 40 | r"""Whether to use shared memory in default_collate"""
 41 | 
 42 | MANAGER_STATUS_CHECK_INTERVAL = 5.0
 43 | 
 44 | if IS_WINDOWS:
 45 |     # On Windows, the parent ID of the worker process remains unchanged when the manager process
 46 |     # is gone, and the only way to check it through OS is to let the worker have a process handle
 47 |     # of the manager and ask if the process status has changed.
 48 |     class ManagerWatchdog(object):
 49 |         def __init__(self):
 50 |             self.manager_pid = os.getppid()
 51 | 
 52 |             self.kernel32 = ctypes.WinDLL('kernel32', use_last_error=True)
 53 |             self.kernel32.OpenProcess.argtypes = (DWORD, BOOL, DWORD)
 54 |             self.kernel32.OpenProcess.restype = HANDLE
 55 |             self.kernel32.WaitForSingleObject.argtypes = (HANDLE, DWORD)
 56 |             self.kernel32.WaitForSingleObject.restype = DWORD
 57 | 
 58 |             # Value obtained from https://msdn.microsoft.com/en-us/library/ms684880.aspx
 59 |             SYNCHRONIZE = 0x00100000
 60 |             self.manager_handle = self.kernel32.OpenProcess(SYNCHRONIZE, 0, self.manager_pid)
 61 | 
 62 |             if not self.manager_handle:
 63 |                 raise ctypes.WinError(ctypes.get_last_error())
 64 | 
 65 |         def is_alive(self):
 66 |             return self.kernel32.WaitForSingleObject(self.manager_handle, 0) != 0
 67 | else:
 68 |     class ManagerWatchdog(object):
 69 |         def __init__(self):
 70 |             self.manager_pid = os.getppid()
 71 | 
 72 |         def is_alive(self):
 73 |             return os.getppid() == self.manager_pid
 74 | 
 75 | 
 76 | def _worker_loop(dataset, index_queue, data_queue, collate_fn, seed, init_fn, worker_id):
 77 |     global _use_shared_memory
 78 |     _use_shared_memory = True
 79 | 
 80 |     _set_worker_signal_handlers()
 81 | 
 82 |     torch.set_num_threads(1)
 83 |     random.seed(seed)
 84 |     torch.manual_seed(seed)
 85 | 
 86 |     if init_fn is not None:
 87 |         init_fn(worker_id)
 88 | 
 89 |     watchdog = ManagerWatchdog()
 90 | 
 91 |     while True:
 92 |         try:
 93 |             r = index_queue.get(timeout=MANAGER_STATUS_CHECK_INTERVAL)
 94 |         except queue.Empty:
 95 |             if watchdog.is_alive():
 96 |                 continue
 97 |             else:
 98 |                 break
 99 |         if r is None:
100 |             break
101 |         idx, batch_indices = r
102 |         try:
103 |             samples = collate_fn([dataset[i] for i in batch_indices])
104 |         except Exception:
105 |             data_queue.put((idx, ExceptionWrapper(sys.exc_info())))
106 |         else:
107 |             data_queue.put((idx, samples))
108 |             del samples
109 | 
110 | 
111 | def _worker_manager_loop(in_queue, out_queue, done_event, pin_memory, device_id):
112 |     if pin_memory:
113 |         torch.cuda.set_device(device_id)
114 | 
115 |     while True:
116 |         try:
117 |             r = in_queue.get()
118 |         except Exception:
119 |             if done_event.is_set():
120 |                 return
121 |             raise
122 |         if r is None:
123 |             break
124 |         if isinstance(r[1], ExceptionWrapper):
125 |             out_queue.put(r)
126 |             continue
127 |         idx, batch = r
128 |         try:
129 |             if pin_memory:
130 |                 batch = pin_memory_batch(batch)
131 |         except Exception:
132 |             out_queue.put((idx, ExceptionWrapper(sys.exc_info())))
133 |         else:
134 |             out_queue.put((idx, batch))
135 | 
136 | numpy_type_map = {
137 |     'float64': torch.DoubleTensor,
138 |     'float32': torch.FloatTensor,
139 |     'float16': torch.HalfTensor,
140 |     'int64': torch.LongTensor,
141 |     'int32': torch.IntTensor,
142 |     'int16': torch.ShortTensor,
143 |     'int8': torch.CharTensor,
144 |     'uint8': torch.ByteTensor,
145 | }
146 | 
147 | 
148 | def default_collate(batch):
149 |     r"""Puts each data field into a tensor with outer dimension batch size"""
150 | 
151 |     error_msg = "batch must contain tensors, numbers, dicts or lists; found {}"
152 |     elem_type = type(batch[0])
153 |     if isinstance(batch[0], torch.Tensor):
154 |         out = None
155 |         if _use_shared_memory:
156 |             # If we're in a background process, concatenate directly into a
157 |             # shared memory tensor to avoid an extra copy
158 |             numel = sum([x.numel() for x in batch])
159 |             storage = batch[0].storage()._new_shared(numel)
160 |             out = batch[0].new(storage)
161 |         return torch.stack(batch, 0, out=out)
162 |     elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
163 |             and elem_type.__name__ != 'string_':
164 |         elem = batch[0]
165 |         if elem_type.__name__ == 'ndarray':
166 |             # array of string classes and object
167 |             if re.search('[SaUO]', elem.dtype.str) is not None:
168 |                 raise TypeError(error_msg.format(elem.dtype))
169 | 
170 |             return torch.stack([torch.from_numpy(b) for b in batch], 0)
171 |         if elem.shape == ():  # scalars
172 |             py_type = float if elem.dtype.name.startswith('float') else int
173 |             return numpy_type_map[elem.dtype.name](list(map(py_type, batch)))
174 |     elif isinstance(batch[0], int_classes):
175 |         return torch.LongTensor(batch)
176 |     elif isinstance(batch[0], float):
177 |         return torch.DoubleTensor(batch)
178 |     elif isinstance(batch[0], string_classes):
179 |         return batch
180 |     elif isinstance(batch[0], collections.Mapping):
181 |         return {key: default_collate([d[key] for d in batch]) for key in batch[0]}
182 |     elif isinstance(batch[0], collections.Sequence):
183 |         transposed = zip(*batch)
184 |         return [default_collate(samples) for samples in transposed]
185 |     elif isinstance(batch[0], scipy.sparse.csr.csr_matrix):
186 |         row_idx = []
187 |         col_idx = []
188 |         val = []
189 |         for i, b in enumerate(batch):
190 |             col = b.indices
191 |             row_idx.extend([i] * len(col))
192 |             col_idx.extend(col)
193 |             val.extend(b.data)
194 |         i = torch.LongTensor([row_idx, col_idx])
195 |         v = torch.FloatTensor(val)
196 |         return torch.sparse.FloatTensor(i, v, torch.Size([len(batch), batch[0].shape[1]]))
197 |     raise TypeError((error_msg.format(type(batch[0]))))
198 | 
199 | 
200 | def pin_memory_batch(batch):
201 |     if isinstance(batch, torch.Tensor):
202 |         return batch.pin_memory()
203 |     elif isinstance(batch, string_classes):
204 |         return batch
205 |     elif isinstance(batch, collections.Mapping):
206 |         return {k: pin_memory_batch(sample) for k, sample in batch.items()}
207 |     elif isinstance(batch, collections.Sequence):
208 |         return [pin_memory_batch(sample) for sample in batch]
209 |     else:
210 |         return batch
211 | 
212 | 
213 | _SIGCHLD_handler_set = False
214 | r"""Whether SIGCHLD handler is set for DataLoader worker failures. Only one
215 | handler needs to be set for all DataLoaders in a process."""
216 | 
217 | 
218 | def _set_SIGCHLD_handler():
219 |     # Windows doesn't support SIGCHLD handler
220 |     if sys.platform == 'win32':
221 |         return
222 |     # can't set signal in child threads
223 |     if not isinstance(threading.current_thread(), threading._MainThread):
224 |         return
225 |     global _SIGCHLD_handler_set
226 |     if _SIGCHLD_handler_set:
227 |         return
228 |     previous_handler = signal.getsignal(signal.SIGCHLD)
229 |     if not callable(previous_handler):
230 |         previous_handler = None
231 | 
232 |     def handler(signum, frame):
233 |         _error_if_any_worker_fails()
234 |         if previous_handler is not None:
235 |             previous_handler(signum, frame)
236 | 
237 |     signal.signal(signal.SIGCHLD, handler)
238 |     _SIGCHLD_handler_set = True
239 | 
240 | 
241 | class _DataLoaderIter(object):
242 |     r"""Iterates once over the DataLoader's dataset, as specified by the sampler"""
243 | 
244 |     def __init__(self, loader):
245 |         self.dataset = loader.dataset
246 |         self.collate_fn = loader.collate_fn
247 |         self.batch_sampler = loader.batch_sampler
248 |         self.num_workers = loader.num_workers
249 |         self.pin_memory = loader.pin_memory and torch.cuda.is_available()
250 |         self.timeout = loader.timeout
251 |         self.done_event = threading.Event()
252 | 
253 |         self.sample_iter = iter(self.batch_sampler)
254 | 
255 |         base_seed = torch.LongTensor(1).random_().item()
256 | 
257 |         if self.num_workers > 0:
258 |             self.worker_init_fn = loader.worker_init_fn
259 |             self.index_queues = [multiprocessing.Queue() for _ in range(self.num_workers)]
260 |             self.worker_queue_idx = 0
261 |             self.worker_result_queue = multiprocessing.SimpleQueue()
262 |             self.batches_outstanding = 0
263 |             self.worker_pids_set = False
264 |             self.shutdown = False
265 |             self.send_idx = 0
266 |             self.rcvd_idx = 0
267 |             self.reorder_dict = {}
268 | 
269 |             self.workers = [
270 |                 multiprocessing.Process(
271 |                     target=_worker_loop,
272 |                     args=(self.dataset, self.index_queues[i],
273 |                           self.worker_result_queue, self.collate_fn, base_seed + i,
274 |                           self.worker_init_fn, i))
275 |                 for i in range(self.num_workers)]
276 | 
277 |             if self.pin_memory or self.timeout > 0:
278 |                 self.data_queue = queue.Queue()
279 |                 if self.pin_memory:
280 |                     maybe_device_id = torch.cuda.current_device()
281 |                 else:
282 |                     # do not initialize cuda context if not necessary
283 |                     maybe_device_id = None
284 |                 self.worker_manager_thread = threading.Thread(
285 |                     target=_worker_manager_loop,
286 |                     args=(self.worker_result_queue, self.data_queue, self.done_event, self.pin_memory,
287 |                           maybe_device_id))
288 |                 self.worker_manager_thread.daemon = True
289 |                 self.worker_manager_thread.start()
290 |             else:
291 |                 self.data_queue = self.worker_result_queue
292 | 
293 |             for w in self.workers:
294 |                 w.daemon = True  # ensure that the worker exits on process exit
295 |                 w.start()
296 | 
297 |             _set_worker_pids(id(self), tuple(w.pid for w in self.workers))
298 |             _set_SIGCHLD_handler()
299 |             self.worker_pids_set = True
300 | 
301 |             # prime the prefetch loop
302 |             for _ in range(2 * self.num_workers):
303 |                 self._put_indices()
304 | 
305 |     def __len__(self):
306 |         return len(self.batch_sampler)
307 | 
308 |     def _get_batch(self):
309 |         if self.timeout > 0:
310 |             try:
311 |                 return self.data_queue.get(timeout=self.timeout)
312 |             except queue.Empty:
313 |                 raise RuntimeError('DataLoader timed out after {} seconds'.format(self.timeout))
314 |         else:
315 |             return self.data_queue.get()
316 | 
317 |     def __next__(self):
318 |         if self.num_workers == 0:  # same-process loading
319 |             indices = next(self.sample_iter)  # may raise StopIteration
320 |             batch = self.collate_fn([self.dataset[i] for i in indices])
321 |             if self.pin_memory:
322 |                 batch = pin_memory_batch(batch)
323 |             return batch
324 | 
325 |         # check if the next sample has already been generated
326 |         if self.rcvd_idx in self.reorder_dict:
327 |             batch = self.reorder_dict.pop(self.rcvd_idx)
328 |             return self._process_next_batch(batch)
329 | 
330 |         if self.batches_outstanding == 0:
331 |             self._shutdown_workers()
332 |             raise StopIteration
333 | 
334 |         while True:
335 |             assert (not self.shutdown and self.batches_outstanding > 0)
336 |             idx, batch = self._get_batch()
337 |             self.batches_outstanding -= 1
338 |             if idx != self.rcvd_idx:
339 |                 # store out-of-order samples
340 |                 self.reorder_dict[idx] = batch
341 |                 continue
342 |             return self._process_next_batch(batch)
343 | 
344 |     next = __next__  # Python 2 compatibility
345 | 
346 |     def __iter__(self):
347 |         return self
348 | 
349 |     def _put_indices(self):
350 |         assert self.batches_outstanding < 2 * self.num_workers
351 |         indices = next(self.sample_iter, None)
352 |         if indices is None:
353 |             return
354 |         self.index_queues[self.worker_queue_idx].put((self.send_idx, indices))
355 |         self.worker_queue_idx = (self.worker_queue_idx + 1) % self.num_workers
356 |         self.batches_outstanding += 1
357 |         self.send_idx += 1
358 | 
359 |     def _process_next_batch(self, batch):
360 |         self.rcvd_idx += 1
361 |         self._put_indices()
362 |         if isinstance(batch, ExceptionWrapper):
363 |             raise batch.exc_type(batch.exc_msg)
364 |         return batch
365 | 
366 |     def __getstate__(self):
367 | 
368 |         raise NotImplementedError("_DataLoaderIter cannot be pickled")
369 | 
370 |     def _shutdown_workers(self):
371 |         try:
372 |             if not self.shutdown:
373 |                 self.shutdown = True
374 |                 self.done_event.set()
375 |                 for q in self.index_queues:
376 |                     q.put(None)
377 |                 # if some workers are waiting to put, make place for them
378 |                 try:
379 |                     while not self.worker_result_queue.empty():
380 |                         self.worker_result_queue.get()
381 |                 except (FileNotFoundError, ImportError):
382 |                     pass
383 |                 self.worker_result_queue.put(None)
384 |         finally:
385 |             # removes pids no matter what
386 |             if self.worker_pids_set:
387 |                 _remove_worker_pids(id(self))
388 |                 self.worker_pids_set = False
389 | 
390 |     def __del__(self):
391 |         if self.num_workers > 0:
392 |             self._shutdown_workers()
393 | 
394 | 
395 | class DataLoader(object):
396 |     r"""
397 |     Data loader. Combines a dataset and a sampler, and provides
398 |     single- or multi-process iterators over the dataset.
399 | 
400 |     Arguments:
401 |         dataset (Dataset): dataset from which to load the data.
402 |         batch_size (int, optional): how many samples per batch to load
403 |             (default: 1).
404 |         shuffle (bool, optional): set to ``True`` to have the data reshuffled
405 |             at every epoch (default: False).
406 |         sampler (Sampler, optional): defines the strategy to draw samples from
407 |             the dataset. If specified, ``shuffle`` must be False.
408 |         batch_sampler (Sampler, optional): like sampler, but returns a batch of
409 |             indices at a time. Mutually exclusive with batch_size, shuffle,
410 |             sampler, and drop_last.
411 |         num_workers (int, optional): how many subprocesses to use for data
412 |             loading. 0 means that the data will be loaded in the main process.
413 |             (default: 0)
414 |         collate_fn (callable, optional): merges a list of samples to form a mini-batch.
415 |         pin_memory (bool, optional): If ``True``, the data loader will copy tensors
416 |             into CUDA pinned memory before returning them.
417 |         drop_last (bool, optional): set to ``True`` to drop the last incomplete batch,
418 |             if the dataset size is not divisible by the batch size. If ``False`` and
419 |             the size of dataset is not divisible by the batch size, then the last batch
420 |             will be smaller. (default: False)
421 |         timeout (numeric, optional): if positive, the timeout value for collecting a batch
422 |             from workers. Should always be non-negative. (default: 0)
423 |         worker_init_fn (callable, optional): If not None, this will be called on each
424 |             worker subprocess with the worker id (an int in ``[0, num_workers - 1]``) as
425 |             input, after seeding and before data loading. (default: None)
426 | 
427 |     .. note:: By default, each worker will have its PyTorch seed set to
428 |               ``base_seed + worker_id``, where ``base_seed`` is a long generated
429 |               by main process using its RNG. However, seeds for other libraies
430 |               may be duplicated upon initializing workers (w.g., NumPy), causing
431 |               each worker to return identical random numbers. (See
432 |               :ref:`dataloader-workers-random-seed` section in FAQ.) You may
433 |               use ``torch.initial_seed()`` to access the PyTorch seed for each
434 |               worker in :attr:`worker_init_fn`, and use it to set other seeds
435 |               before data loading.
436 | 
437 |     .. warning:: If ``spawn`` start method is used, :attr:`worker_init_fn` cannot be an
438 |                  unpicklable object, e.g., a lambda function.
439 |     """
440 | 
441 |     __initialized = False
442 | 
443 |     def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None,
444 |                  num_workers=0, collate_fn=default_collate, pin_memory=False, drop_last=False,
445 |                  timeout=0, worker_init_fn=None):
446 |         self.dataset = dataset
447 |         self.batch_size = batch_size
448 |         self.num_workers = num_workers
449 |         self.collate_fn = collate_fn
450 |         self.pin_memory = pin_memory
451 |         self.drop_last = drop_last
452 |         self.timeout = timeout
453 |         self.worker_init_fn = worker_init_fn
454 | 
455 |         if timeout < 0:
456 |             raise ValueError('timeout option should be non-negative')
457 | 
458 |         if batch_sampler is not None:
459 |             if batch_size > 1 or shuffle or sampler is not None or drop_last:
460 |                 raise ValueError('batch_sampler option is mutually exclusive '
461 |                                  'with batch_size, shuffle, sampler, and '
462 |                                  'drop_last')
463 |             self.batch_size = None
464 |             self.drop_last = None
465 | 
466 |         if sampler is not None and shuffle:
467 |             raise ValueError('sampler option is mutually exclusive with '
468 |                              'shuffle')
469 | 
470 |         if self.num_workers < 0:
471 |             raise ValueError('num_workers option cannot be negative; '
472 |                              'use num_workers=0 to disable multiprocessing.')
473 | 
474 |         if batch_sampler is None:
475 |             if sampler is None:
476 |                 if shuffle:
477 |                     sampler = RandomSampler(dataset)
478 |                 else:
479 |                     sampler = SequentialSampler(dataset)
480 |             batch_sampler = BatchSampler(sampler, batch_size, drop_last)
481 | 
482 |         self.sampler = sampler
483 |         self.batch_sampler = batch_sampler
484 |         self.__initialized = True
485 | 
486 |     def __setattr__(self, attr, val):
487 |         if self.__initialized and attr in ('batch_size', 'sampler', 'drop_last'):
488 |             raise ValueError('{} attribute should not be set after {} is '
489 |                              'initialized'.format(attr, self.__class__.__name__))
490 | 
491 |         super(DataLoader, self).__setattr__(attr, val)
492 | 
493 |     def __iter__(self):
494 |         return _DataLoaderIter(self)
495 | 
496 |     def __len__(self):
497 |         return len(self.batch_sampler)
498 | 
499 | 


--------------------------------------------------------------------------------
/Classification/data/untitled.py:
--------------------------------------------------------------------------------
 1 | def convert2npz(input_filename, out_data_filename):
 2 | 	input = open(input_filename,"r")
 3 | 	output_feature = open(out_data_filename,"w")
 4 | 	#output_query = open(out_query_filename,"w")
 5 | 	#output_label = open(out_query_filename2,"w")
 6 | 
 7 | 	while True:
 8 | 		line = input.readline()
 9 | 		if not line:
10 | 			break
11 | 		tokens = line.split(' ')
12 | 		tokens[-1] = tokens[-1].strip()
13 | 		label = tokens[0]
14 | 		qid = int(tokens[1].split(':')[1])
15 | 
16 | 		#output_label.write(label + '\n')
17 | 		#output_query.write(qid + '\n')
18 | 		output_feature.write(label+' ')
19 | 		output_feature.write(qid + ' ')
20 | 		output_feature.write(' '.join(tokens[2:]) + '\n')
21 | 	
22 | 	input.close()
23 | 	output_query.close()
24 | 	output_feature.close()
25 | 	output_query2.close()
26 | 
27 | convert("set1.train.txt","yahoo.train")
28 | convert("set1.test.txt","yahoo.test")
29 | 


--------------------------------------------------------------------------------
/Classification/main_cls_cv.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import numpy as np
  3 | import sklearn
  4 | import argparse
  5 | import copy
  6 | import time
  7 | import torch
  8 | import torch.nn as nn
  9 | from data.sparseloader import DataLoader
 10 | from data.data import LibSVMData, LibCSVData, CriteoCSVData
 11 | from data.sparse_data import LibSVMDataSp
 12 | from models.mlp import MLP_1HL, MLP_2HL, MLP_3HL
 13 | from models.dynamic_net import DynamicNet, ForwardType
 14 | from sklearn.preprocessing import StandardScaler, MinMaxScaler
 15 | from torch.utils.data.sampler import SubsetRandomSampler
 16 | from torch.optim import SGD, Adam
 17 | from misc.auc import auc
 18 | 
 19 | 
 20 | parser = argparse.ArgumentParser()
 21 | parser.add_argument('--feat_d', type=int, required=True)
 22 | parser.add_argument('--hidden_d', type=int, required=True)
 23 | parser.add_argument('--boost_rate', type=float, required=True)
 24 | parser.add_argument('--lr', type=float, required=True)
 25 | parser.add_argument('--num_nets', type=int, required=True)
 26 | parser.add_argument('--data', type=str, required=True)
 27 | parser.add_argument('--tr', type=str, required=True)
 28 | parser.add_argument('--te', type=str, required=True)
 29 | parser.add_argument('--batch_size', type=int, required=True)
 30 | parser.add_argument('--epochs_per_stage', type=int, required=True)
 31 | parser.add_argument('--correct_epoch', type=int ,required=True)
 32 | parser.add_argument('--L2', type=float, required=True)
 33 | parser.add_argument('--sparse', default=False, type=lambda x: (str(x).lower() == 'true'))
 34 | parser.add_argument('--normalization', default=False, type=lambda x: (str(x).lower() == 'true'))
 35 | parser.add_argument('--cv', default=False, type=lambda x: (str(x).lower() == 'true')) 
 36 | parser.add_argument('--model_order',default='second', type=str)
 37 | parser.add_argument('--out_f', type=str, required=True)
 38 | parser.add_argument('--cuda', action='store_true')
 39 | 
 40 | opt = parser.parse_args()
 41 | 
 42 | if not opt.cuda:
 43 |     torch.set_num_threads(16)
 44 | 
 45 | # prepare the dataset
 46 | def get_data():
 47 |     if opt.data in ['a9a', 'ijcnn1']:
 48 |         train = LibSVMData(opt.tr, opt.feat_d, opt.normalization)
 49 |         test = LibSVMData(opt.te, opt.feat_d, opt.normalization)
 50 |     elif opt.data == 'covtype':
 51 |         train = LibSVMData(opt.tr, opt.feat_d,opt.normalization, 1, 2)
 52 |         test = LibSVMData(opt.te, opt.feat_d, opt.normalization, 1, 2)
 53 |     elif opt.data == 'mnist28':
 54 |         train = LibSVMData(opt.tr, opt.feat_d, opt.normalization, 2, 8)
 55 |         test = LibSVMData(opt.te, opt.feat_d, opt.normalization, 2, 8)
 56 |     elif opt.data == 'higgs':
 57 |         train = LibSVMData(opt.tr, opt.feat_d,opt.normalization, 0, 1)
 58 |         test = LibSVMData(opt.te, opt.feat_d,opt.normalization, 0, 1)
 59 |     elif opt.data == 'real-sim':
 60 |         train = LibSVMDataSp(opt.tr, opt.feat_d)
 61 |         test = LibSVMDataSp(opt.te, opt.feat_d)
 62 |     elif opt.data in ['criteo', 'criteo2', 'Allstate']:
 63 |         train = LibCSVData(opt.tr, opt.feat_d, 1, 0)
 64 |         test = LibCSVData(opt.te, opt.feat_d, 1, 0)
 65 |     elif opt.data == 'yahoo.pair':
 66 |         train = LibCSVData(opt.tr, opt.feat_d)
 67 |         test = LibCSVData(opt.te, opt.feat_d)
 68 |     else:
 69 |         pass
 70 | 
 71 |     val = []
 72 |     if opt.cv:
 73 |         val = copy.deepcopy(train)
 74 | 
 75 |         # Split the data from cut point
 76 |         print('Creating Validation set! \n')
 77 |         indices = list(range(len(train)))
 78 |         cut = int(len(train)*0.95)
 79 |         np.random.shuffle(indices)
 80 |         train_idx = indices[:cut]
 81 |         val_idx = indices[cut:]
 82 | 
 83 |         train.feat = train.feat[train_idx]
 84 |         train.label = train.label[train_idx]
 85 |         val.feat = val.feat[val_idx]
 86 |         val.label = val.label[val_idx]
 87 | 
 88 |     if opt.normalization:
 89 |         scaler = MinMaxScaler() #StandardScaler()
 90 |         scaler.fit(train.feat)
 91 |         train.feat = scaler.transform(train.feat)
 92 |         test.feat = scaler.transform(test.feat)
 93 |         if opt.cv:
 94 |             val.feat = scaler.transform(val.feat)
 95 | 
 96 |     print(f'#Train: {len(train)}, #Val: {len(val)} #Test: {len(test)}')
 97 |     return train, test, val
 98 | 
 99 | 
100 | def get_optim(params, lr, weight_decay):
101 |     optimizer = Adam(params, lr, weight_decay=weight_decay)
102 |     return optimizer
103 | 
104 | def accuracy(net_ensemble, test_loader):
105 |     correct = 0
106 |     total = 0
107 |     loss = 0
108 |     for x, y in test_loader:
109 |         if opt.cuda:
110 |             x, y = x.cuda(), y.cuda()
111 |         with torch.no_grad():
112 |             middle_feat, out = net_ensemble.forward(x)
113 |         correct += (torch.sum(y[out > 0.] > 0) + torch.sum(y[out < .0] < 0)).item()
114 |         total += y.numel()
115 |     return correct / total
116 | 
117 | def logloss(net_ensemble, test_loader):
118 |     loss = 0
119 |     total = 0
120 |     loss_f = nn.BCEWithLogitsLoss() # Binary cross entopy loss with logits, reduction=mean by default
121 |     for x, y in test_loader:
122 |         if opt.cuda:
123 |             x, y= x.cuda(), y.cuda().view(-1, 1)
124 |         y = (y + 1) / 2
125 |         with torch.no_grad():
126 |             _, out = net_ensemble.forward(x)
127 |         out = torch.as_tensor(out, dtype=torch.float32).cuda().view(-1, 1)
128 |         loss += loss_f(out, y)
129 |         total += 1
130 | 
131 |     return loss / total
132 | 
133 | def auc_score(net_ensemble, test_loader):
134 |     actual = []
135 |     posterior = []
136 |     for x, y in test_loader:
137 |         if opt.cuda:
138 |             x = x.cuda()
139 |         with torch.no_grad():
140 |             _, out = net_ensemble.forward(x)
141 |         prob = 1.0 - 1.0 / torch.exp(out)   # Why not using the scores themselve than converting to prob
142 |         prob = prob.cpu().numpy().tolist()
143 |         posterior.extend(prob)
144 |         actual.extend(y.numpy().tolist())
145 |     score = auc(actual, posterior)
146 |     return score
147 | 
148 | def init_gbnn(train):
149 |     positive = negative = 0
150 |     for i in range(len(train)):
151 |         if train[i][1] > 0:
152 |             positive += 1
153 |         else:
154 |             negative += 1
155 |     blind_acc = max(positive, negative) / (positive + negative)
156 |     print(f'Blind accuracy: {blind_acc}')
157 |     #print(f'Blind Logloss: {blind_acc}')
158 |     return float(np.log(positive / negative))
159 | 
160 | if __name__ == "__main__":
161 | 
162 |     train, test, val = get_data()
163 |     print(opt.data + ' training and test datasets are loaded!')
164 |     train_loader = DataLoader(train, opt.batch_size, shuffle = True, drop_last=False, num_workers=2)
165 |     test_loader = DataLoader(test, opt.batch_size, shuffle=False, drop_last=False, num_workers=2)
166 |     if opt.cv:
167 |         val_loader = DataLoader(val, opt.batch_size, shuffle=True, drop_last=False, num_workers=2)
168 |     # For CV use
169 |     best_score = 0
170 |     val_score = best_score
171 |     best_stage = opt.num_nets-1
172 | 
173 |     c0 = init_gbnn(train)
174 |     net_ensemble = DynamicNet(c0, opt.boost_rate)
175 |     loss_f1 = nn.MSELoss(reduction='none')
176 |     loss_f2 = nn.BCEWithLogitsLoss(reduction='none')
177 |     loss_models = torch.zeros((opt.num_nets, 3))
178 | 
179 |     all_ensm_losses = []
180 |     all_ensm_losses_te = []
181 |     all_mdl_losses = []
182 |     dynamic_br = []
183 | 
184 |     for stage in range(opt.num_nets):
185 |         t0 = time.time()
186 |         #### Higgs 100K, 1M , 10M experiment: Subsampling the data each model training time ############
187 |         indices = list(range(len(train)))
188 |         split = 1000000
189 |         indices = sklearn.utils.shuffle(indices, random_state=41)
190 |         train_idx = indices[:split]
191 |         train_sampler = SubsetRandomSampler(train_idx)
192 |         train_loader = DataLoader(train, opt.batch_size, sampler = train_sampler, drop_last=True, num_workers=2)
193 |         ################################################################################################
194 | 
195 |         model = MLP_2HL.get_model(stage, opt)  # Initialize the model_k: f_k(x), multilayer perception v2
196 |         if opt.cuda:
197 |             model.cuda()
198 | 
199 |         optimizer = get_optim(model.parameters(), opt.lr, opt.L2)
200 |         net_ensemble.to_train() # Set the models in ensemble net to train mode
201 | 
202 |         stage_mdlloss = []
203 |         for epoch in range(opt.epochs_per_stage):
204 |             for i, (x, y) in enumerate(train_loader):
205 |                 if opt.cuda:
206 |                     x, y= x.cuda(), y.cuda().view(-1, 1)
207 |                 middle_feat, out = net_ensemble.forward(x)
208 |                 out = torch.as_tensor(out, dtype=torch.float32).cuda().view(-1, 1)
209 |                 if opt.model_order=='first':
210 |                     grad_direction = y / (1.0 + torch.exp(y * out))
211 |                 else:
212 |                     h = 1/((1+torch.exp(y*out))*(1+torch.exp(-y*out)))
213 |                     grad_direction = y * (1.0 + torch.exp(-y * out))
214 |                     out = torch.as_tensor(out)
215 |                     nwtn_weights = (torch.exp(out) + torch.exp(-out)).abs()
216 |                 _, out = model(x, middle_feat)
217 |                 out = torch.as_tensor(out, dtype=torch.float32).cuda().view(-1, 1)
218 |                 loss = loss_f1(net_ensemble.boost_rate*out, grad_direction)  # T
219 |                 loss = loss*h
220 |                 loss = loss.mean()
221 |                 model.zero_grad()
222 |                 loss.backward()
223 |                 optimizer.step()
224 |                 stage_mdlloss.append(loss.item()) 
225 | 
226 |         net_ensemble.add(model)
227 |         sml = np.mean(stage_mdlloss)
228 | 
229 | 
230 |         stage_loss = []
231 |         lr_scaler = 2
232 |         # fully-corrective step
233 |         if stage != 0:
234 |             # Adjusting corrective step learning rate 
235 |             if stage % 15 == 0:
236 |                 #lr_scaler *= 2
237 |                 opt.lr /= 2
238 |                 opt.L2 /= 2
239 |             optimizer = get_optim(net_ensemble.parameters(), opt.lr / lr_scaler, opt.L2)
240 |             for _ in range(opt.correct_epoch):
241 |                 for i, (x, y) in enumerate(train_loader):
242 |                     if opt.cuda:
243 |                         x, y = x.cuda(), y.cuda().view(-1, 1)
244 |                     _, out = net_ensemble.forward_grad(x)
245 |                     out = torch.as_tensor(out, dtype=torch.float32).cuda().view(-1, 1)
246 |                     y = (y + 1.0) / 2.0
247 |                     loss = loss_f2(out, y).mean() 
248 |                     optimizer.zero_grad()
249 |                     loss.backward()
250 |                     optimizer.step()
251 |                     stage_loss.append(loss.item())
252 | 
253 |         
254 |         sl_te = logloss(net_ensemble, test_loader)
255 |         # Store dynamic boost rate
256 |         dynamic_br.append(net_ensemble.boost_rate.item())
257 |         # store model
258 |         net_ensemble.to_file(opt.out_f)
259 |         net_ensemble = DynamicNet.from_file(opt.out_f, lambda stage: MLP_2HL.get_model(stage, opt))
260 | 
261 |         elapsed_tr = time.time()-t0
262 |         sl = 0
263 |         if stage_loss != []:
264 |             sl = np.mean(stage_loss)
265 | 
266 |         
267 | 
268 |         all_ensm_losses.append(sl)
269 |         all_ensm_losses_te.append(sl_te)
270 |         all_mdl_losses.append(sml)
271 |         print(f'Stage - {stage}, training time: {elapsed_tr: .1f} sec, boost rate: {net_ensemble.boost_rate: .4f}, Training Loss: {sl: .4f}, Test Loss: {sl_te: .4f}')
272 | 
273 | 
274 |         if opt.cuda:
275 |             net_ensemble.to_cuda()
276 |         net_ensemble.to_eval() # Set the models in ensemble net to eval mode
277 | 
278 |         # Train
279 |         print('Acc results from stage := ' + str(stage) + '\n')
280 |         # AUC
281 |         if opt.cv:
282 |             val_score = auc_score(net_ensemble, val_loader) 
283 |             if val_score > best_score:
284 |                 best_score = val_score
285 |                 best_stage = stage
286 | 
287 |         test_score = auc_score(net_ensemble, test_loader)
288 |         print(f'Stage: {stage}, AUC@Val: {val_score:.4f}, AUC@Test: {test_score:.4f}')
289 | 
290 |         loss_models[stage, 1], loss_models[stage, 2] = val_score, test_score
291 | 
292 |     val_auc, te_auc = loss_models[best_stage, 1], loss_models[best_stage, 2]
293 |     print(f'Best validation stage: {best_stage},  AUC@Val: {val_auc:.4f}, final AUC@Test: {te_auc:.4f}')
294 | 
295 |     loss_models = loss_models.detach().cpu().numpy()
296 |     fname = 'tr_ts_' + opt.data +'_auc'
297 |     np.save(fname, loss_models) 
298 | 
299 |     fname = './results/' + opt.data + '_cls'
300 |     np.savez(fname, training_loss=all_ensm_losses, test_loss=all_ensm_losses_te, model_losses=all_mdl_losses, dynamic_boostrate=dynamic_br, params=opt)
301 | 
302 | 


--------------------------------------------------------------------------------
/Classification/misc/auc.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def tied_rank(x):
 3 |     """
 4 |     Computes the tied rank of elements in x.
 5 | 
 6 |     This function computes the tied rank of elements in x.
 7 | 
 8 |     Parameters
 9 |     ----------
10 |     x : list of numbers, numpy array
11 | 
12 |     Returns
13 |     -------
14 |     score : list of numbers
15 |             The tied rank f each element in x
16 | 
17 |     """
18 |     sorted_x = sorted(zip(x,range(len(x))))
19 |     r = [0 for k in x]
20 |     cur_val = sorted_x[0][0]
21 |     last_rank = 0
22 |     for i in range(len(sorted_x)):
23 |         if cur_val != sorted_x[i][0]:
24 |             cur_val = sorted_x[i][0]
25 |             for j in range(last_rank, i): 
26 |                 r[sorted_x[j][1]] = float(last_rank+1+i)/2.0
27 |             last_rank = i
28 |         if i==len(sorted_x)-1:
29 |             for j in range(last_rank, i+1): 
30 |                 r[sorted_x[j][1]] = float(last_rank+i+2)/2.0
31 |     return r
32 | 
33 | def auc(actual, posterior):
34 |     """
35 |     Computes the area under the receiver-operater characteristic (AUC)
36 | 
37 |     This function computes the AUC error metric for binary classification.
38 | 
39 |     Parameters
40 |     ----------
41 |     actual : list of binary numbers, numpy array
42 |              The ground truth value
43 |     posterior : same type as actual
44 |                 Defines a ranking on the binary numbers, from most likely to
45 |                 be positive to least likely to be positive.
46 | 
47 |     Returns
48 |     -------
49 |     score : double
50 |             The mean squared error between actual and posterior
51 | 
52 |     """
53 |     r = tied_rank(posterior)
54 |     num_positive = len([0 for x in actual if x==1])
55 |     num_negative = len(actual)-num_positive
56 |     sum_positive = sum([r[i] for i in range(len(r)) if actual[i]==1])
57 |     auc = ((sum_positive - num_positive*(num_positive+1)/2.0) /
58 |            (num_negative*num_positive))
59 |     return auc


--------------------------------------------------------------------------------
/Classification/misc/metrics.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | def ranking_precision_score(y_true, y_score, k=10):
  5 |     """Precision at rank k
  6 | 
  7 |     Parameters
  8 |     ----------
  9 |     y_true : array-like, shape = [n_samples]
 10 |         Ground truth (true relevance labels).
 11 | 
 12 |     y_score : array-like, shape = [n_samples]
 13 |         Predicted scores.
 14 | 
 15 |     k : int
 16 |         Rank.
 17 | 
 18 |     Returns
 19 |     -------
 20 |     precision @k : float
 21 |     """
 22 |     unique_y = np.unique(y_true)
 23 | 
 24 |     if len(unique_y) > 2:
 25 |         raise ValueError("Only supported for two relevance levels.")
 26 | 
 27 |     pos_label = unique_y[1]
 28 |     n_pos = np.sum(y_true == pos_label)
 29 | 
 30 |     order = np.argsort(y_score)[::-1]
 31 |     y_true = np.take(y_true, order[:k])
 32 |     n_relevant = np.sum(y_true == pos_label)
 33 | 
 34 |     # Divide by min(n_pos, k) such that the best achievable score is always 1.0.
 35 |     return float(n_relevant) / min(n_pos, k)
 36 | 
 37 | 
 38 | def average_precision_score(y_true, y_score, k=10):
 39 |     """Average precision at rank k
 40 | 
 41 |     Parameters
 42 |     ----------
 43 |     y_true : array-like, shape = [n_samples]
 44 |         Ground truth (true relevance labels).
 45 | 
 46 |     y_score : array-like, shape = [n_samples]
 47 |         Predicted scores.
 48 | 
 49 |     k : int
 50 |         Rank.
 51 | 
 52 |     Returns
 53 |     -------
 54 |     average precision @k : float
 55 |     """
 56 |     unique_y = np.unique(y_true)
 57 | 
 58 |     if len(unique_y) > 2:
 59 |         raise ValueError("Only supported for two relevance levels.")
 60 | 
 61 |     pos_label = unique_y[1]
 62 |     n_pos = np.sum(y_true == pos_label)
 63 | 
 64 |     order = np.argsort(y_score)[::-1][:min(n_pos, k)]
 65 |     y_true = np.asarray(y_true)[order]
 66 | 
 67 |     score = 0
 68 |     for i in xrange(len(y_true)):
 69 |         if y_true[i] == pos_label:
 70 |             # Compute precision up to document i
 71 |             # i.e, percentage of relevant documents up to document i.
 72 |             prec = 0
 73 |             for j in xrange(0, i + 1):
 74 |                 if y_true[j] == pos_label:
 75 |                     prec += 1.0
 76 |             prec /= (i + 1.0)
 77 |             score += prec
 78 | 
 79 |     if n_pos == 0:
 80 |         return 0
 81 | 
 82 |     return score / n_pos
 83 | 
 84 | 
 85 | def dcg_score(y_true, y_score, k=10, gains="exponential"):
 86 |     """Discounted cumulative gain (DCG) at rank k
 87 | 
 88 |     Parameters
 89 |     ----------
 90 |     y_true : array-like, shape = [n_samples]
 91 |         Ground truth (true relevance labels).
 92 | 
 93 |     y_score : array-like, shape = [n_samples]
 94 |         Predicted scores.
 95 | 
 96 |     k : int
 97 |         Rank.
 98 | 
 99 |     gains : str
100 |         Whether gains should be "exponential" (default) or "linear".
101 | 
102 |     Returns
103 |     -------
104 |     DCG @k : float
105 |     """
106 |     order = np.argsort(y_score)[::-1]
107 |     y_true = y_true[order[:k]]
108 |     if y_true.any():
109 |         if gains == "exponential":
110 |             gains = 2 ** y_true - 1
111 |         elif gains == "linear":
112 |             gains = y_true
113 |         else:
114 |             raise ValueError("Invalid gains option.")
115 | 
116 |         # highest rank is 1 so +2 instead of +1
117 |         discounts = np.log2(np.arange(len(y_true)) + 2)
118 |         return np.sum(gains / discounts)
119 |     return 0
120 | 
121 | def ndcg_score(y_true, y_score, k=10, gains="exponential"):
122 |     """Normalized discounted cumulative gain (NDCG) at rank k
123 | 
124 |     Parameters
125 |     ----------
126 |     y_true : array-like, shape = [n_samples]
127 |         Ground truth (true relevance labels).
128 | 
129 |     y_score : array-like, shape = [n_samples]
130 |         Predicted scores.
131 | 
132 |     k : int
133 |         Rank.
134 | 
135 |     gains : str
136 |         Whether gains should be "exponential" (default) or "linear".
137 | 
138 |     Returns
139 |     -------
140 |     NDCG @k : float
141 |     """
142 |     best = dcg_score(y_true, y_true, k, gains)
143 |     if best == 0:
144 |         return 0
145 |     actual = dcg_score(y_true, y_score, k, gains)
146 |     return actual / best
147 | 
148 | 
149 | # Alternative API.
150 | 
151 | def dcg_from_ranking(y_true, ranking):
152 |     """Discounted cumulative gain (DCG) at rank k
153 | 
154 |     Parameters
155 |     ----------
156 |     y_true : array-like, shape = [n_samples]
157 |         Ground truth (true relevance labels).
158 | 
159 |     ranking : array-like, shape = [k]
160 |         Document indices, i.e.,
161 |             ranking[0] is the index of top-ranked document,
162 |             ranking[1] is the index of second-ranked document,
163 |             ...
164 | 
165 |     k : int
166 |         Rank.
167 | 
168 |     Returns
169 |     -------
170 |     DCG @k : float
171 |     """
172 |     y_true = np.asarray(y_true)
173 |     ranking = np.asarray(ranking)
174 |     rel = y_true[ranking]
175 |     gains = 2 ** rel - 1
176 |     discounts = np.log2(np.arange(len(ranking)) + 2)
177 |     return np.sum(gains / discounts)
178 | 
179 | 
180 | def ndcg_from_ranking(y_true, ranking):
181 |     """Normalized discounted cumulative gain (NDCG) at rank k
182 | 
183 |     Parameters
184 |     ----------
185 |     y_true : array-like, shape = [n_samples]
186 |         Ground truth (true relevance labels).
187 | 
188 |     ranking : array-like, shape = [k]
189 |         Document indices, i.e.,
190 |             ranking[0] is the index of top-ranked document,
191 |             ranking[1] is the index of second-ranked document,
192 |             ...
193 | 
194 |     k : int
195 |         Rank.
196 | 
197 |     Returns
198 |     -------
199 |     NDCG @k : float
200 |     """
201 |     k = len(ranking)
202 |     best_ranking = np.argsort(y_true)[::-1]
203 |     best = dcg_from_ranking(y_true, best_ranking[:k])
204 |     return dcg_from_ranking(y_true, ranking) / best
205 | 
206 | 
207 | 
208 | 
209 | 


--------------------------------------------------------------------------------
/Classification/models/dynamic_net.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | import torch
 3 | #import pickle
 4 | import torch.nn as nn
 5 | 
 6 | class ForwardType(Enum):
 7 |     SIMPLE = 0
 8 |     STACKED = 1
 9 |     CASCADE = 2
10 |     GRADIENT = 3
11 | 
12 | class DynamicNet(object):
13 |     def __init__(self, c0, lr):
14 |         self.models = []
15 |         self.c0 = c0
16 |         self.lr = lr
17 |         self.boost_rate  = nn.Parameter(torch.tensor(lr, requires_grad=True, device="cuda"))
18 | 
19 |     def add(self, model):
20 |         self.models.append(model)
21 | 
22 |     def parameters(self):
23 |         params = []
24 |         for m in self.models:
25 |             params.extend(m.parameters())
26 | 
27 |         params.append(self.boost_rate)
28 |         return params
29 | 
30 |     def zero_grad(self):
31 |         for m in self.models:
32 |             m.zero_grad()
33 | 
34 |     def to_cuda(self):
35 |         for m in self.models:
36 |             m.cuda()
37 | 
38 |     def to_eval(self):
39 |         for m in self.models:
40 |             m.eval()
41 | 
42 |     def to_train(self):
43 |         for m in self.models:
44 |             m.train(True)
45 | 
46 |     def forward(self, x):
47 |         if len(self.models) == 0:
48 |             return None, self.c0
49 |         middle_feat_cum = None
50 |         prediction = None
51 |         with torch.no_grad():
52 |             for m in self.models:
53 |                 if middle_feat_cum is None:
54 |                     middle_feat_cum, prediction = m(x, middle_feat_cum)
55 |                 else:
56 |                     middle_feat_cum, pred = m(x, middle_feat_cum)
57 |                     prediction += pred
58 |         return middle_feat_cum, self.c0 + self.boost_rate * prediction
59 | 
60 |     def forward_grad(self, x):
61 |         if len(self.models) == 0:
62 |             return None, self.c0
63 |         # at least one model
64 |         middle_feat_cum = None
65 |         prediction = None
66 |         for m in self.models:
67 |             if middle_feat_cum is None:
68 |                 middle_feat_cum, prediction = m(x, middle_feat_cum)
69 |             else:
70 |                 middle_feat_cum, pred = m(x, middle_feat_cum)
71 |                 prediction += pred
72 |         return middle_feat_cum, self.c0 + self.boost_rate * prediction
73 | 
74 |     @classmethod
75 |     def from_file(cls, path, builder):
76 |         d = torch.load(path)
77 |         net = DynamicNet(d['c0'], d['lr'])
78 |         net.boost_rate = d['boost_rate']
79 |         for stage, m in enumerate(d['models']):
80 |             submod = builder(stage)
81 |             submod.load_state_dict(m)
82 |             net.add(submod)
83 |         return net
84 | 
85 |     def to_file(self, path):
86 |         models = [m.state_dict() for m in self.models]
87 |         d = {'models': models, 'c0': self.c0, 'lr': self.lr, 'boost_rate': self.boost_rate}
88 |         torch.save(d, path)
89 | 


--------------------------------------------------------------------------------
/Classification/models/mlp.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from .splinear import SpLinear
  6 | 
  7 | 
  8 | class MLP_1HL(nn.Module):
  9 |     def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True):
 10 |         super(MLP_1HL, self).__init__()
 11 |         self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1)
 12 |         self.out_layer = nn.Linear(dim_hidden1, 1)
 13 |         self.lrelu = nn.LeakyReLU(0.1)
 14 |         self.relu = nn.ReLU()
 15 |         if bn:
 16 |             self.bn = nn.BatchNorm1d(dim_hidden1)
 17 |             self.bn2 = nn.BatchNorm1d(dim_in)
 18 | 
 19 |     def forward(self, x, lower_f):
 20 |         if lower_f is not None:
 21 |             x = torch.cat([x, lower_f], dim=1)
 22 |             x = self.bn2(x)
 23 |         out = self.in_layer(x)
 24 |         return out, self.out_layer(self.relu(out)).squeeze()
 25 | 
 26 |     @classmethod
 27 |     def get_model(cls, stage, opt):
 28 |         if stage == 0:
 29 |             dim_in = opt.feat_d
 30 |         else:
 31 |             dim_in = opt.feat_d + opt.hidden_d
 32 |         model = MLP_1HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse)
 33 |         return model
 34 | 
 35 | 
 36 | class MLP_2HL(nn.Module):
 37 |     def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True):
 38 |         super(MLP_2HL, self).__init__()
 39 |         self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1)
 40 |         self.dropout_layer = nn.Dropout(0.0)
 41 |         self.lrelu = nn.LeakyReLU(0.1)
 42 |         self.relu = nn.ReLU()
 43 |         self.hidden_layer = nn.Linear(dim_hidden1, dim_hidden2)
 44 |         self.out_layer = nn.Linear(dim_hidden2, 1)
 45 |         self.bn = nn.BatchNorm1d(dim_hidden1)
 46 |         self.bn2 = nn.BatchNorm1d(dim_in)
 47 | 
 48 |     def forward(self, x, lower_f):
 49 |         if lower_f is not None:
 50 |             x = torch.cat([x, lower_f], dim=1)
 51 |             x = self.bn2(x)
 52 |         out = self.lrelu(self.in_layer(x))
 53 |         out = self.bn(out)
 54 |         out = self.hidden_layer(out)
 55 |         return out, self.out_layer(self.relu(out)).squeeze()
 56 | 
 57 |     @classmethod
 58 |     def get_model(cls, stage, opt):
 59 |         if stage == 0:
 60 |             dim_in = opt.feat_d
 61 |         else:
 62 |             dim_in = opt.feat_d + opt.hidden_d
 63 |         model = MLP_2HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse)
 64 |         return model
 65 | 
 66 | class MLP_3HL(nn.Module):
 67 |     def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True):
 68 |         super(MLP_3HL, self).__init__()
 69 |         self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1)
 70 |         self.dropout_layer = nn.Dropout(0.0)
 71 |         self.lrelu = nn.LeakyReLU(0.1)
 72 |         self.relu = nn.ReLU()
 73 |         self.hidden_layer = nn.Linear(dim_hidden2, dim_hidden1)
 74 |         self.out_layer = nn.Linear(dim_hidden1, 1)
 75 |         self.bn = nn.BatchNorm1d(dim_hidden1)
 76 |         self.bn2 = nn.BatchNorm1d(dim_in)
 77 |         # print('Batch normalization is processed!')
 78 | 
 79 |     def forward(self, x, lower_f):
 80 |         if lower_f is not None:
 81 |             x = torch.cat([x, lower_f], dim=1)
 82 |             x = self.bn2(x)
 83 |         out = self.lrelu(self.in_layer(x))
 84 |         out = self.bn(out)
 85 |         out = self.lrelu(self.hidden_layer(out))
 86 |         out = self.bn(out)
 87 |         out = self.hidden_layer(out)
 88 |         return out, self.out_layer(self.relu(out)).squeeze()
 89 | 
 90 |     @classmethod
 91 |     def get_model(cls, stage, opt):
 92 |         if stage == 0:
 93 |             dim_in = opt.feat_d
 94 |         else:
 95 |             dim_in = opt.feat_d + opt.hidden_d
 96 |         model = MLP_3HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse)
 97 |         return model
 98 | 
 99 | class MLP_4HL(nn.Module):
100 |     def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True):
101 |         super(MLP_3HL, self).__init__()
102 |         self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1)
103 |         self.dropout_layer = nn.Dropout(0.0)
104 |         self.lrelu = nn.LeakyReLU(0.1)
105 |         self.relu = nn.ReLU()
106 |         self.hidden_layer = nn.Linear(dim_hidden2, dim_hidden1)
107 |         self.out_layer = nn.Linear(dim_hidden1, 1)
108 |         self.bn = nn.BatchNorm1d(dim_hidden1)
109 |         self.bn2 = nn.BatchNorm1d(dim_in)
110 |         # print('Batch normalization is processed!')
111 | 
112 |     def forward(self, x, lower_f):
113 |         if lower_f is not None:
114 |             x = torch.cat([x, lower_f], dim=1)
115 |             x = self.bn2(x)
116 |         out = self.lrelu(self.in_layer(x)) #HL-1
117 |         out = self.bn(out)
118 |         out = self.lrelu(self.hidden_layer(out)) #HL-2
119 |         out = self.bn(out)
120 |         out = self.lrelu(self.hidden_layer(out)) #HL-3
121 |         out = self.bn(out)
122 |         out = self.hidden_layer(out) #HL-4
123 |         return out, self.out_layer(self.relu(out)).squeeze()
124 | 
125 |     @classmethod
126 |     def get_model(cls, stage, opt):
127 |         if stage == 0:
128 |             dim_in = opt.feat_d
129 |         else:
130 |             dim_in = opt.feat_d + opt.hidden_d
131 |         model = MLP_3HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse)
132 |         return model
133 | 
134 | 
135 | class DNN(nn.Module):
136 |     def __init__(self, dim_in, dim_hidden, n_hidden=20, sparse=False, bn=True, drop_out=0.3):
137 |         super(DNN, self).__init__()
138 |         if sparse:
139 |             self.in_layer = SpLinear(dim_in, dim_hidden)
140 |         else:
141 |             self.in_layer = nn.Linear(dim_in, dim_hidden)
142 |         self.in_act = nn.SELU()
143 |         hidden_layers = []
144 |         for _ in range(n_hidden):
145 |             hidden_layers.append(nn.Linear(dim_hidden, dim_hidden))
146 |             if bn:
147 |                 hidden_layers.append(nn.BatchNorm1d(dim_hidden))
148 |             hidden_layers.append(nn.SELU())
149 |             if drop_out > 0:
150 |                 hidden_layers.append(nn.Dropout(drop_out))
151 |         self.hidden_layers = nn.Sequential(*hidden_layers)
152 |         self.out_layer = nn.Linear(dim_hidden, 1)
153 | 
154 |     def forward(self, x):
155 |         out = self.in_act(self.in_layer(x))
156 |         out = self.hidden_layers(out)
157 |         out = self.out_layer(out)
158 |         return out.squeeze()
159 | 


--------------------------------------------------------------------------------
/Classification/models/splinear.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class SpLinearFunc(torch.autograd.Function):
 8 |     @staticmethod
 9 |     def forward(ctx, input, weight, bias=None):
10 |         ctx.save_for_backward(input, weight, bias)
11 |         output = input.mm(weight.t())
12 |         if bias is not None:
13 |             output += bias.unsqueeze(0).expand_as(output)
14 |         return output
15 | 
16 |     @staticmethod
17 |     def backward(ctx, grad_output):
18 |         input, weight, bias = ctx.saved_tensors
19 |         grad_input = grad_weight = grad_bias = None
20 | 
21 |         if ctx.needs_input_grad[0]:
22 |             grad_input = grad_output.mm(weight)
23 |         if ctx.needs_input_grad[1]:
24 |             grad_weight = (input.t().mm(grad_output)).t()
25 |         if bias is not None and ctx.needs_input_grad[2]:
26 |             grad_bias = grad_output.sum(0).squeeze(0)
27 | 
28 |         return grad_input, grad_weight, grad_bias
29 | 
30 | splinear = SpLinearFunc.apply
31 | 
32 | class SpLinear(nn.Module):
33 |     def __init__(self, input_features, output_features, bias=True):
34 |         super(SpLinear, self).__init__()
35 |         self.input_features = input_features
36 |         self.output_features = output_features
37 |         self.weight = nn.Parameter(torch.Tensor(output_features, input_features))
38 |         if bias:
39 |             self.bias = nn.Parameter(torch.Tensor(output_features))
40 |         else:
41 |             self.register_parameter('bias', None)
42 |         #TODO write a default initialization
43 |         stdv = 1. / math.sqrt(self.weight.size(1))
44 |         self.weight.data.uniform_(-stdv, stdv)
45 | 
46 |     def forward(self, input):
47 |         return splinear(input, self.weight, self.bias)
48 | 


--------------------------------------------------------------------------------
/Classification/results/results_readme.txt:
--------------------------------------------------------------------------------
1 | Your results will be saved here.


--------------------------------------------------------------------------------
/Classification/train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ### Feature Table ###
 4 | # a9a 123
 5 | # ijcnn1 22
 6 | # covtype 54
 7 | # mnist28 752
 8 | # real-sim 20958
 9 | # higgs 28
10 | dataset=higgs
11 | 
12 | BASEDIR=$(dirname "$0")
13 | OUTDIR="${BASEDIR}/ckpt/"
14 | 
15 | if [ ! -d "${OUTDIR}" ]
16 | then   
17 |     echo "Output dir ${OUTDIR} does not exist, creating..."
18 |     mkdir -p ${OUTDIR}
19 | fi    
20 | 
21 | CUDA_VISIBLE_DEVICES=0 python -u main_cls_cv.py \
22 |     --feat_d 28 \
23 |     --hidden_d 16 \
24 |     --boost_rate 1 \
25 |     --lr 0.005 \
26 |     --L2 .0e-3 \
27 |     --num_nets 40 \
28 |     --data ${dataset} \
29 |     --tr ${BASEDIR}/../data/${dataset}.train \
30 |     --te ${BASEDIR}/../data/${dataset}.test \
31 |     --batch_size 2048 \
32 |     --epochs_per_stage 1 \
33 |     --correct_epoch 1 \
34 |     --model_order second \
35 |     --normalization True \
36 |     --cv True \
37 |     --sparse False \
38 |     --out_f ${OUTDIR}/${dataset}_cls.pth \
39 |     --cuda
40 | 


--------------------------------------------------------------------------------
/L2R/DataLoader/DataLoader.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import os
  3 | 
  4 | import pandas as pd
  5 | import numpy as np
  6 | from sklearn import preprocessing
  7 | 
  8 | 
  9 | def get_time():
 10 |     return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
 11 | 
 12 | 
 13 | class L2R_DataLoader:
 14 | 
 15 |     def __init__(self, path):
 16 |         """
 17 |         :param path: str
 18 |         """
 19 |         self.path = path
 20 |         self.pickle_path = path[:-3] + 'pkl'
 21 |         self.df = None
 22 |         self.num_pairs = None
 23 |         self.num_sessions = None
 24 | 
 25 |     def get_num_pairs(self):
 26 |         if self.num_pairs is not None:
 27 |             return self.num_pairs
 28 |         self.num_pairs = 0
 29 |         for _, Y in self.generate_batch_per_query(self.df):
 30 |             Y = Y.reshape(-1, 1)
 31 |             pairs = Y - Y.T
 32 |             pos_pairs = np.sum(pairs > 0, (0, 1))
 33 |             neg_pairs = np.sum(pairs < 0, (0, 1))
 34 |             assert pos_pairs == neg_pairs
 35 |             self.num_pairs += pos_pairs + neg_pairs
 36 |         return self.num_pairs
 37 | 
 38 |     def get_num_sessions(self):
 39 |         return self.num_sessions
 40 | 
 41 |     def _load_mslr(self):
 42 |         print(get_time(), "load file from {}".format(self.path))
 43 |         df = pd.read_csv(self.path, sep=" ", header=None)
 44 |         df.drop(columns=df.columns[-1], inplace=True)
 45 |         self.num_features = len(df.columns) - 2
 46 |         self.num_paris = None
 47 |         print(get_time(), "finish loading from {}".format(self.path))
 48 |         print("dataframe shape: {}, features: {}".format(df.shape, self.num_features))
 49 |         return df
 50 | 
 51 |     def _parse_feature_and_label(self, df):
 52 |         """
 53 |         :param df: pandas.DataFrame
 54 |         :return: pandas.DataFrame
 55 |         """
 56 |         print(get_time(), "parse dataframe ...", df.shape)
 57 |         for col in range(1, len(df.columns)):
 58 |             if ':' in str(df.iloc[:, col][0]):
 59 |                 df.iloc[:, col] = df.iloc[:, col].apply(lambda x: x.split(":")[1])
 60 |         df.columns = ['rel', 'qid'] + [str(f) for f in range(1, len(df.columns) - 1)]
 61 | 
 62 |         for col in [str(f) for f in range(1, len(df.columns) - 1)]:
 63 |             df[col] = df[col].astype(np.float32)
 64 | 
 65 |         print(get_time(), "finish parsing dataframe")
 66 |         self.df = df
 67 |         self.num_sessions = len(df.qid.unique())
 68 |         return df
 69 | 
 70 |     def generate_query_pairs(self, df, qid):
 71 |         """
 72 |         :param df: pandas.DataFrame, contains column qid, rel, fid from 1 to self.num_features
 73 |         :param qid: query id
 74 |         :returns: numpy.ndarray of x_i, y_i, x_j, y_j
 75 |         """
 76 |         df_qid = df[df.qid == qid]
 77 |         rels = df_qid.rel.unique()
 78 |         x_i, x_j, y_i, y_j = [], [], [], []
 79 |         for r in rels:
 80 |             df1 = df_qid[df_qid.rel == r]
 81 |             df2 = df_qid[df_qid.rel != r]
 82 |             df_merged = pd.merge(df1, df2, on='qid')
 83 |             df_merged.reindex(np.random.permutation(df_merged.index))
 84 |             y_i.append(df_merged.rel_x.values.reshape(-1, 1))
 85 |             y_j.append(df_merged.rel_y.values.reshape(-1, 1))
 86 |             x_i.append(df_merged[['{}_x'.format(i) for i in range(1, self.num_features + 1)]].values)
 87 |             x_j.append(df_merged[['{}_y'.format(i) for i in range(1, self.num_features + 1)]].values)
 88 |         return np.vstack(x_i), np.vstack(y_i), np.vstack(x_j), np.vstack(y_j)
 89 | 
 90 |     def generate_query_pair_batch(self, df=None, batchsize=2000):
 91 |         """
 92 |         :param df: pandas.DataFrame, contains column qid
 93 |         :returns: numpy.ndarray of x_i, y_i, x_j, y_j
 94 |         """
 95 |         if df is None:
 96 |             df = self.df
 97 |         x_i_buf, y_i_buf, x_j_buf, y_j_buf = None, None, None, None
 98 |         qids = df.qid.unique()
 99 |         np.random.shuffle(qids)
100 |         for qid in qids:
101 |             x_i, y_i, x_j, y_j = self.generate_query_pairs(df, qid)
102 |             if x_i_buf is None:
103 |                 x_i_buf, y_i_buf, x_j_buf, y_j_buf = x_i, y_i, x_j, y_j
104 |             else:
105 |                 x_i_buf = np.vstack((x_i_buf, x_i))
106 |                 y_i_buf = np.vstack((y_i_buf, y_i))
107 |                 x_j_buf = np.vstack((x_j_buf, x_j))
108 |                 y_j_buf = np.vstack((y_j_buf, y_j))
109 |             idx = 0
110 |             while (idx + 1) * batchsize <= x_i_buf.shape[0]:
111 |                 start = idx * batchsize
112 |                 end = (idx + 1) * batchsize
113 |                 yield x_i_buf[start: end, :], y_i_buf[start: end, :], x_j_buf[start: end, :], y_j_buf[start: end, :]
114 |                 idx += 1
115 | 
116 |             x_i_buf = x_i_buf[idx * batchsize:, :]
117 |             y_i_buf = y_i_buf[idx * batchsize:, :]
118 |             x_j_buf = x_j_buf[idx * batchsize:, :]
119 |             y_j_buf = y_j_buf[idx * batchsize:, :]
120 | 
121 |         yield x_i_buf, y_i_buf, x_j_buf, y_j_buf
122 | 
123 |     def generate_query_batch(self, df, batchsize):
124 |         """
125 |         :param df: pandas.DataFrame, contains column qid
126 |         :returns: numpy.ndarray qid, rel, x_i
127 |         """
128 |         idx = 0
129 |         while idx * batchsize < df.shape[0]:
130 |             r = df.iloc[idx * batchsize: (idx + 1) * batchsize, :]
131 |             yield r.qid.values, r.rel.values, r[['{}'.format(i) for i in range(1, self.num_features + 1)]].values
132 |             idx += 1
133 | 
134 | 
135 |     def generate_batch_per_query(self, df=None):
136 |         """
137 |         :param df: pandas.DataFrame
138 |         :return: X for features, y for relavance
139 |         :rtype: numpy.ndarray, numpy.ndarray
140 |         """
141 |         if df is None:
142 |             df = self.df
143 |         qids = df.qid.unique()
144 |         np.random.shuffle(qids)
145 |         for qid in qids:
146 |             df_qid = df[df.qid == qid]
147 |             yield df_qid[['{}'.format(i) for i in range(1, self.num_features + 1)]].values, df_qid.rel.values
148 | 
149 |     def load(self):
150 |         """
151 |         :return: pandas.DataFrame
152 |         """
153 |         if os.path.isfile(self.pickle_path):
154 |             print(get_time(), "load from pickle file {}".format(self.pickle_path))
155 |             self.df = pd.read_pickle(self.pickle_path)
156 |             self.num_features = len(self.df.columns) - 2
157 |             self.num_paris = None
158 |             self.num_sessions = len(self.df.qid.unique())
159 |         else:
160 |             self.df = self._parse_feature_and_label(self._load_mslr())
161 |             self.df.to_pickle(self.pickle_path)
162 |         return self.df
163 | 
164 |     def train_scaler_and_transform(self):
165 |         """Learn a scalar and apply transform."""
166 |         feature_columns = [str(i) for i in range(1, self.num_features + 1)]
167 |         X_train = self.df[feature_columns]
168 |         #scaler = preprocessing.StandardScaler().fit(X_train)
169 |         scaler = preprocessing.MinMaxScaler().fit(X_train)
170 |         self.df[feature_columns] = scaler.transform(X_train)
171 |         return self.df, scaler
172 | 
173 |     def apply_scaler(self, scaler):
174 |         print(get_time(), "apply scaler to transform feature for {}".format(self.path))
175 |         feature_columns = [str(i) for i in range(1, self.num_features + 1)]
176 |         X_train = self.df[feature_columns]
177 |         self.df[feature_columns] = scaler.transform(X_train)
178 |         return self.df


--------------------------------------------------------------------------------
/L2R/Misc/Calculations.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import torch
  4 | 
  5 | def loss_calc_(y_true, y_pred, gain_type, sigma, N, device):
  6 | 
  7 |     # compute the rank order of each document
  8 |     rank_df = pd.DataFrame({"y": y_true, "doc": np.arange(y_true.shape[0])})
  9 |     rank_df = rank_df.sort_values("y").reset_index(drop=True)
 10 |     rank_order = rank_df.sort_values("doc").index.values + 1
 11 | 
 12 |     pos_pairs_score_diff = 1.0 + torch.exp(-sigma * (y_pred - y_pred.t()))
 13 | 
 14 |     y_tensor = torch.tensor(y_true, dtype=torch.float32, device=device).view(-1, 1)
 15 |     rel_diff = y_tensor - y_tensor.t()
 16 |     pos_pairs = (rel_diff > 0).type(torch.float32)
 17 |     neg_pairs = (rel_diff < 0).type(torch.float32)
 18 |     Sij = pos_pairs - neg_pairs
 19 |     if gain_type == "exp2":
 20 |         gain_diff = torch.pow(2.0, y_tensor) - torch.pow(2.0, y_tensor.t())
 21 |     elif gain_type == "identity":
 22 |         gain_diff = y_tensor - y_tensor.t()
 23 |     else:
 24 |         raise ValueError("NDCG_gain method not supported yet {}".format(ndcg_gain_in_train))
 25 | 
 26 |     rank_order_tensor = torch.tensor(rank_order, dtype=torch.float32, device=device).view(-1, 1)
 27 |     decay_diff = 1.0 / torch.log2(rank_order_tensor + 1.0) - 1.0 / torch.log2(rank_order_tensor.t() + 1.0)
 28 | 
 29 |     loss = (0.5*sigma*(1 - Sij)*(y_pred - y_pred.t()) + torch.log(pos_pairs_score_diff))
 30 |     loss = torch.sum(loss, 1, keepdim=True)
 31 |     #import ipdb; ipdb.set_trace()
 32 |     return  loss
 33 | 
 34 | 
 35 | def grad_calc_(y_true, y_pred, gain_type, sigma, N, device):
 36 | 
 37 |     # compute the rank order of each document
 38 |     rank_df = pd.DataFrame({"y": y_true, "doc": np.arange(y_true.shape[0])})
 39 |     rank_df = rank_df.sort_values("y").reset_index(drop=True)
 40 |     rank_order = rank_df.sort_values("doc").index.values + 1
 41 | 
 42 |     pos_pairs_score_diff = 1.0/(1.0 + torch.exp(sigma * (y_pred - y_pred.t())))
 43 | 
 44 |     y_tensor = torch.tensor(y_true, dtype=torch.float32, device=device).view(-1, 1)
 45 |     rel_diff = y_tensor - y_tensor.t()
 46 |     pos_pairs = (rel_diff > 0).type(torch.float32)
 47 |     neg_pairs = (rel_diff < 0).type(torch.float32)
 48 |     Sij = pos_pairs - neg_pairs
 49 |     if gain_type == "exp2":
 50 |         gain_diff = torch.pow(2.0, y_tensor) - torch.pow(2.0, y_tensor.t())
 51 |     elif gain_type == "identity":
 52 |         gain_diff = y_tensor - y_tensor.t()
 53 |     else:
 54 |         raise ValueError("NDCG_gain method not supported yet {}".format(ndcg_gain_in_train))
 55 | 
 56 |     rank_order_tensor = torch.tensor(rank_order, dtype=torch.float32, device=device).view(-1, 1)
 57 |     decay_diff = 1.0 / torch.log2(rank_order_tensor + 1.0) - 1.0 / torch.log2(rank_order_tensor.t() + 1.0)
 58 | 
 59 |     grad_ord1 = sigma * (0.5 * (1 - Sij) - pos_pairs_score_diff) 
 60 |     grad_ord2 = sigma*sigma*pos_pairs_score_diff*(1-pos_pairs_score_diff) 
 61 |     
 62 |     #import ipdb; ipdb.set_trace()
 63 | 
 64 |     grad_ord1 = torch.sum(grad_ord1, 1, keepdim=True)
 65 |     grad_ord2 = torch.sum(grad_ord2, 1, keepdim=True)
 66 | 
 67 | 
 68 |     #print(grad_ord1.shape, y_pred.shape)
 69 |     assert grad_ord1.shape == y_pred.shape
 70 |     check_grad = torch.sum(grad_ord1, (0, 1)).item()
 71 |     check_grad2 = torch.sum(grad_ord2, (0, 1)).item()
 72 | 
 73 |     if check_grad == float('inf') or np.isnan(check_grad) or check_grad2 == float('inf') or np.isnan(check_grad2):
 74 |        import ipdb; ipdb.set_trace()
 75 |         
 76 |     return  grad_ord1, grad_ord2
 77 | 
 78 | 
 79 | def grad_calc_v2(y_true, y_pred, gain_type, sigma, N, device):
 80 |     # Normalize the gradients with NDCG delta adopted from Microsoft paper
 81 | 
 82 | 
 83 |     # Only pairs with positive rel values
 84 |     # compute the rank order of each document
 85 |     rank_df = pd.DataFrame({"y": y_true, "doc": np.arange(y_true.shape[0])})
 86 |     rank_df = rank_df.sort_values("y").reset_index(drop=True)
 87 |     rank_order = rank_df.sort_values("doc").index.values + 1
 88 | 
 89 |     pos_pairs_score_diff = 1.0/(1.0 + torch.exp(sigma * (y_pred - y_pred.t())))
 90 |     y_tensor = torch.tensor(y_true, dtype=torch.float32, device=device).view(-1, 1)
 91 | 
 92 |     if gain_type == "exp2":
 93 |         gain_diff = torch.pow(2.0, y_tensor) - torch.pow(2.0, y_tensor.t())
 94 |     elif gain_type == "identity":
 95 |         gain_diff = y_tensor - y_tensor.t()
 96 |     else:
 97 |         raise ValueError("NDCG_gain method not supported yet {}".format(ndcg_gain_in_train))
 98 | 
 99 |     rank_order_tensor = torch.tensor(rank_order, dtype=torch.float32, device=device).view(-1, 1)
100 |     decay_diff = 1.0 / torch.log2(rank_order_tensor + 1.0) - 1.0 / torch.log2(rank_order_tensor.t() + 1.0)
101 | 
102 |     delta_ndcg = torch.abs(N * gain_diff * decay_diff)
103 | 
104 |     grad_ord1 = sigma * (-pos_pairs_score_diff * delta_ndcg)
105 |     grad_ord1 = torch.sum(grad_ord1, 1, keepdim=True)
106 | 
107 |     grad_ord2 = (sigma*sigma)*pos_pairs_score_diff*(1-pos_pairs_score_diff)*delta_ndcg
108 |     grad_ord2 = torch.sum(grad_ord2, 1, keepdim=True)
109 | 
110 | 
111 |     #print(grad_ord1.shape, y_pred.shape)
112 |     assert grad_ord1.shape == y_pred.shape
113 |     check_grad = torch.sum(grad_ord1, (0, 1)).item()
114 |     if check_grad == float('inf') or np.isnan(check_grad):
115 |         import ipdb; ipdb.set_trace()
116 |         
117 |     return  grad_ord1, grad_ord2
118 | 
119 | def loss_calc_v2(y_true, y_pred, gain_type, sigma, N, device):
120 |     # Normalize the loss with NDCG delta adopted from Microsoft paper
121 | 
122 |     rank_df = pd.DataFrame({"y": y_true, "doc": np.arange(y_true.shape[0])})
123 |     rank_df = rank_df.sort_values("y").reset_index(drop=True)
124 |     rank_order = rank_df.sort_values("doc").index.values + 1
125 | 
126 | 
127 |     pos_pairs_score_diff = 1.0 + torch.exp(-sigma * (y_pred - y_pred.t()))
128 |     y_tensor = torch.tensor(y_true, dtype=torch.float32, device=device).view(-1, 1)
129 | 
130 |     if gain_type == "exp2":
131 |         gain_diff = torch.pow(2.0, y_tensor) - torch.pow(2.0, y_tensor.t())
132 |     elif gain_type == "identity":
133 |         gain_diff = y_tensor - y_tensor.t()
134 |     else:
135 |         raise ValueError("NDCG_gain method not supported yet {}".format(ndcg_gain_in_train))
136 | 
137 |     rank_order_tensor = torch.tensor(rank_order, dtype=torch.float32, device=device).view(-1, 1)
138 |     decay_diff = 1.0 / torch.log2(rank_order_tensor + 1.0) - 1.0 / torch.log2(rank_order_tensor.t() + 1.0)
139 | 
140 |     delta_ndcg = torch.abs(N * gain_diff * decay_diff)
141 | 
142 |     loss = torch.log(pos_pairs_score_diff) * delta_ndcg
143 |     loss = torch.sum(loss, 1, keepdim=True)
144 |         
145 |     return  loss


--------------------------------------------------------------------------------
/L2R/Misc/metrics.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Metrics:
 3 | 
 4 | NDCG:
 5 | https://en.wikipedia.org/wiki/Discounted_cumulative_gain
 6 | https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf
 7 | """
 8 | 
 9 | import numpy as np
10 | 
11 | 
12 | class DCG(object):
13 | 
14 |     def __init__(self, k=10, gain_type='exp2'):
15 |         """
16 |         :param k: int DCG@k
17 |         :param gain_type: 'exp2' or 'identity'
18 |         """
19 |         self.k = k
20 |         self.discount = self._make_discount(256)
21 |         if gain_type in ['exp2', 'identity']:
22 |             self.gain_type = gain_type
23 |         else:
24 |             raise ValueError('gain type not equal to exp2 or identity')
25 | 
26 |     def evaluate(self, targets):
27 |         """
28 |         :param targets: ranked list with relevance
29 |         :return: float
30 |         """
31 |         gain = self._get_gain(targets)
32 |         discount = self._get_discount(min(self.k, len(gain)))
33 |         return np.sum(np.divide(gain, discount))
34 | 
35 |     def _get_gain(self, targets):
36 |         t = targets[:self.k]
37 |         if self.gain_type == 'exp2':
38 |             return np.power(2.0, t) - 1.0
39 |         else:
40 |             return t
41 | 
42 |     def _get_discount(self, k):
43 |         if k > len(self.discount):
44 |             self.discount = self._make_discount(2 * len(self.discount))
45 |         return self.discount[:k]
46 | 
47 |     @staticmethod
48 |     def _make_discount(n):
49 |         x = np.arange(1, n+1, 1)
50 |         discount = np.log2(x + 1)
51 |         return discount
52 | 
53 | 
54 | class NDCG(DCG):
55 | 
56 |     def __init__(self, k=10, gain_type='exp2'):
57 |         """
58 |         :param k: int NDCG@k
59 |         :param gain_type: 'exp2' or 'identity'
60 |         """
61 |         super(NDCG, self).__init__(k, gain_type)
62 | 
63 |     def evaluate(self, targets):
64 |         """
65 |         :param targets: ranked list with relevance
66 |         :return: float
67 |         """
68 |         dcg = super(NDCG, self).evaluate(targets)
69 |         ideal = np.sort(targets)[::-1]
70 |         idcg = super(NDCG, self).evaluate(ideal)
71 |         return dcg / idcg
72 | 
73 |     def maxDCG(self, targets):
74 |         """
75 |         :param targets: ranked list with relevance
76 |         :return:
77 |         """
78 |         ideal = np.sort(targets)[::-1]
79 |         return super(NDCG, self).evaluate(ideal)
80 | 
81 | 
82 | if __name__ == "__main__":
83 |     targets = [3, 2, 3, 0, 1, 2, 3, 2]
84 |     dcg6 = DCG(6, 'identity')
85 |     ndcg6 = NDCG(6, 'identity')
86 |     assert 6.861 < dcg6.evaluate(targets) < 6.862
87 |     assert 0.785 < ndcg6.evaluate(targets) < 0.786
88 |     ndcg10 = NDCG(10)
89 |     assert 0 < ndcg10.evaluate(targets) < 1.0
90 |     assert 0 < ndcg10.evaluate([1, 2, 3]) < 1.0


--------------------------------------------------------------------------------
/L2R/Readme.md:
--------------------------------------------------------------------------------
1 | - Data loading and creating dataloader are handled in GrowNet/L2R/data/data.py. If you want to try new data please put it into Microsoft data format. 
2 | 
3 | - Individual model class and ensemble architecture are in GrowNet/L2R/models:  mlp.py and dynamic_net.py. 
4 | You can increase number of hidden layers or change activation function from here: mlp.py
5 | 
6 | - train.sh contains pairwise-loss implementation. If you want to try I-divergence or MSE loss implementations just change the python -u main_l2r_pairwise_cv.py to python -u main_l2r_idiv_cv.py (or main_l2r_mse_cv.py). You can also change the dtaset to yahoo, but when you do, change the feature dimension as well (from 136 to 518). You may want to alter the hidden layer dimension as well, say 128 or 256.


--------------------------------------------------------------------------------
/L2R/Utils/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Common function used in training Learn to Rank
  3 | """
  4 | from argparse import ArgumentParser, ArgumentTypeError
  5 | from collections import defaultdict
  6 | import os
  7 | 
  8 | import numpy as np
  9 | import pandas as pd
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | 
 14 | from DataLoader.DataLoader import get_time, L2R_DataLoader
 15 | from Misc.metrics import NDCG
 16 | 
 17 | 
 18 | def get_device():
 19 |     if torch.cuda.is_available():
 20 |         device = "cuda:0" 
 21 |     else:
 22 |         device = "cpu"
 23 |     print("Using device ", device)
 24 |     return device
 25 | 
 26 | 
 27 | def get_ckptdir(net_name, net_structure, sigma=None):
 28 |     net_name = '{}-{}'.format(net_name, '-'.join([str(x) for x in net_structure]))
 29 |     if sigma:
 30 |         net_name += '-scale-{}'.format(sigma)
 31 |     ckptdir = os.path.join(os.path.dirname(__file__), 'ckptdir')
 32 |     if not os.path.exists(ckptdir):
 33 |         os.makedirs(ckptdir)
 34 |     ckptfile = os.path.join(ckptdir, net_name)
 35 |     print("checkpoint dir:", ckptfile)
 36 |     return ckptfile
 37 | 
 38 | 
 39 | def save_to_ckpt(ckpt_file, epoch, model, optimizer, lr_scheduler):
 40 |     ckpt_file = ckpt_file + '_{}'.format(epoch)
 41 |     print(get_time(), 'save to ckpt {}'.format(ckpt_file))
 42 |     torch.save({
 43 |         'epoch': epoch,
 44 |         'model_state_dict': model.state_dict(),
 45 |         'optimizer_state_dict': optimizer.state_dict(),
 46 |         'lr_scheduler': lr_scheduler.state_dict(),
 47 |     }, ckpt_file)
 48 |     print(get_time(), 'finish save to ckpt {}'.format(ckpt_file))
 49 | 
 50 | 
 51 | def load_train_test_data(d_dir, data_fold, dataname, cv):
 52 |     """
 53 |     :param data_fold: str, which fold's data was going to use to train
 54 |     :return:
 55 |     """
 56 |     df_val = []
 57 |     val_loader = None
 58 | 
 59 |     train_file, test_file = dataname + ".train.txt", dataname +  ".test.txt"
 60 | 
 61 |     if data_fold:
 62 |         data_dir = os.path.join(d_dir, 'MSLR-WEB10K/')
 63 |         train_data = os.path.join(data_dir, data_fold, train_file)
 64 |         train_loader = L2R_DataLoader(train_data)
 65 |         df_train = train_loader.load()
 66 | 
 67 |         test_data = os.path.join(data_dir, data_fold, test_file)
 68 |         test_loader = L2R_DataLoader(test_data)
 69 |         df_test = test_loader.load()
 70 | 
 71 |         if cv:
 72 |             val_file = dataname + ".vali.txt"
 73 |             val_data = os.path.join(data_dir, data_fold, val_file)
 74 |             val_loader = L2R_DataLoader(val_data)
 75 |             df_val = val_loader.load()
 76 | 
 77 |     else: 
 78 |         data_dir = d_dir + 'Yahoo/'
 79 |         train_data = os.path.join(os.path.dirname(__file__), data_dir, train_file)
 80 |         train_loader = L2R_DataLoader(train_data)
 81 |         df_train = train_loader.load()
 82 | 
 83 |         test_data = os.path.join(os.path.dirname(__file__), data_dir, test_file)
 84 |         test_loader = L2R_DataLoader(test_data)
 85 |         df_test = test_loader.load()
 86 | 
 87 |         if cv:
 88 |             val_file = dataname + ".vali.txt"
 89 |             val_data = os.path.join(os.path.dirname(__file__), data_dir, val_file)
 90 |             val_loader = L2R_DataLoader(val_data)
 91 |             df_val = val_loader.load()
 92 |     
 93 |     return train_loader, df_train, test_loader, df_test, val_loader, df_val
 94 | 
 95 | 
 96 | def init_weights(m):
 97 |     if type(m) == nn.Linear:
 98 |         nn.init.xavier_uniform_(m.weight)
 99 |         m.bias.data.fill_(0.01)
100 | 
101 | 
102 | def eval_cross_entropy_loss(model, device, loader, phase="Eval", sigma=1.0):
103 |     """
104 |     formula in https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf
105 | 
106 |     C = 0.5 * (1 - S_ij) * sigma * (si - sj) + log(1 + exp(-sigma * (si - sj)))
107 |     when S_ij = 1:  C = log(1 + exp(-sigma(si - sj)))
108 |     when S_ij = -1: C = log(1 + exp(-sigma(sj - si)))
109 |     sigma can change the shape of the curve
110 |     """
111 |     print(get_time(), "{} Phase evaluate pairwise cross entropy loss".format(phase))
112 |     model.eval()
113 |     with torch.set_grad_enabled(False):
114 |         total_cost = 0
115 |         total_pairs = loader.get_num_pairs()
116 |         pairs_in_compute = 0
117 |         for X, Y in loader.generate_batch_per_query(loader.df):
118 |             Y = Y.reshape(-1, 1)
119 |             rel_diff = Y - Y.T
120 |             pos_pairs = (rel_diff > 0).astype(np.float32)
121 |             num_pos_pairs = np.sum(pos_pairs, (0, 1))
122 |             # skip negative sessions, no relevant info:
123 |             if num_pos_pairs == 0:
124 |                 continue
125 |             neg_pairs = (rel_diff < 0).astype(np.float32)
126 |             num_pairs = 2 * num_pos_pairs  # num pos pairs and neg pairs are always the same
127 |             pos_pairs = torch.tensor(pos_pairs, device=device)
128 |             neg_pairs = torch.tensor(neg_pairs, device=device)
129 |             Sij = pos_pairs - neg_pairs
130 |             # only calculate the different pairs
131 |             diff_pairs = pos_pairs + neg_pairs
132 |             pairs_in_compute += num_pairs
133 | 
134 |             X_tensor = torch.Tensor(X).to(device)
135 |             y_pred = model(X_tensor)
136 |             y_pred_diff = y_pred - y_pred.t()
137 | 
138 |             # logsigmoid(x) = log(1 / (1 + exp(-x))) equivalent to log(1 + exp(-x))
139 |             C = 0.5 * (1 - Sij) * sigma * y_pred_diff - F.logsigmoid(-sigma * y_pred_diff)
140 |             C = C * diff_pairs
141 |             cost = torch.sum(C, (0, 1))
142 |             if cost.item() == float('inf') or np.isnan(cost.item()):
143 |                 import ipdb; ipdb.set_trace()
144 |             total_cost += cost
145 | 
146 |         assert total_pairs == pairs_in_compute
147 |         avg_cost = total_cost / total_pairs
148 |     print(
149 |         get_time(),
150 |         "{} Phase pairwise corss entropy loss {:.6f}, total_paris {}".format(
151 |             phase, avg_cost.item(), total_pairs
152 |         ))
153 | 
154 | 
155 | def eval_ndcg_at_k(inference_model, device, df_valid, valid_loader, batch_size, k_list, gain_type, phase="Eval"):
156 |     # print("Eval Phase evaluate NDCG @ {}".format(k_list))
157 |     ndcg_metrics = {k: NDCG(k, gain_type) for k in k_list}
158 |     qids, rels, scores = [], [], []
159 |     inference_model.to_eval() # Set the models in ensemble net to eval mode
160 |     with torch.no_grad():
161 |         for qid, rel, x in valid_loader.generate_query_batch(df_valid, batch_size):
162 |             if x is None or x.shape[0] == 0:
163 |                 continue
164 |             _, y_tensor = inference_model.forward(torch.Tensor(x).to(device))
165 |             scores.append(y_tensor.cpu().numpy().squeeze())
166 |             qids.append(qid)
167 |             rels.append(rel)
168 | 
169 |     qids = np.hstack(qids)
170 |     rels = np.hstack(rels)
171 |     scores = np.hstack(scores)
172 |     result_df = pd.DataFrame({'qid': qids, 'rel': rels, 'score': scores})
173 |     session_ndcgs = defaultdict(list)
174 |     for qid in result_df.qid.unique():
175 |         result_qid = result_df[result_df.qid == qid].sort_values('score', ascending=False)
176 |         rel_rank = result_qid.rel.values
177 |         for k, ndcg in ndcg_metrics.items():
178 |             if ndcg.maxDCG(rel_rank) == 0:
179 |                 continue
180 |             ndcg_k = ndcg.evaluate(rel_rank)
181 |             if not np.isnan(ndcg_k):
182 |                 session_ndcgs[k].append(ndcg_k)
183 | 
184 |     ndcg_result = {k: np.mean(session_ndcgs[k]) for k in k_list}
185 |     ndcg_result_print = ", ".join(["NDCG@{}: {:.5f}".format(k, ndcg_result[k]) for k in k_list])
186 |     print(get_time(), "{} Phase evaluate {}".format(phase, ndcg_result_print))
187 |     return ndcg_result
188 | 
189 | def eval_spearman_kendall(inference_model, device, df_test, test_loader, test_group):
190 | 
191 |     # Switch the model into eval mode
192 |     inference_model.to_eval()
193 |     batch_size = df_test.shape[0]
194 |     with torch.no_grad():
195 |         for qid, rel, x in test_loader.generate_query_batch(df_test, batch_size):
196 |             _, y_tensor = inference_model.forward(torch.Tensor(x).to(device))
197 |             preds = y_tensor.cpu().numpy().tolist()
198 |             labels = rel.tolist()
199 | 
200 |     mean_kt, weighted_mean_kt = mean_kendall_tau(labels, preds, test_group)
201 |     mean_sr, weighted_mean_sr = mean_spearman_rho(labels, preds, test_group)
202 | 
203 |     return mean_sr, weighted_mean_sr, mean_kt, weighted_mean_kt
204 | 
205 | 
206 | def str2bool(v):
207 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
208 |         return True
209 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
210 |         return False
211 |     else:
212 |         raise ArgumentTypeError('Boolean value expected.')
213 | 
214 | def check_for_single_queries(q, y):
215 |     idx = []
216 |     uq = np.unique(q)
217 |     for i in range(len(uq)):
218 |         idd = np.where(q==uq[i])[0]
219 |         if len(idd) > 1 and len(np.unique(y[idd]))>1:
220 |             idx.append(idd)
221 |     idx = np.concatenate(idx).ravel().tolist()
222 |     return idx
223 | 
224 | 
225 | def get_args_parser():
226 |     """Common Args needed for different Learn to Rank training method.
227 |     :rtype: ArgumentParser
228 |     """
229 |     parser = ArgumentParser(description="additional training specification")
230 |     parser.add_argument("--start_epoch", dest="start_epoch", type=int, default=0)
231 |     parser.add_argument("--additional_epoch", dest="additional_epoch", type=int, default=100)
232 |     parser.add_argument("--lr", dest="lr", type=float, default=0.0001)
233 |     parser.add_argument("--optim", dest="optim", type=str, default="adam", choices=["adam", "sgd"])
234 |     parser.add_argument("--leaky_relu", dest="leaky_relu", type=str2bool, nargs="?", const=True, default=False)
235 |     parser.add_argument(
236 |         "--ndcg_gain_in_train", dest="ndcg_gain_in_train",
237 |         type=str, default="exp2", choices=["exp2","identity"]
238 |     )
239 |     parser.add_argument("--small_dataset", type=str2bool, nargs='?', const=True, default=False)
240 |     parser.add_argument("--debug", type=str2bool, nargs='?', const=True, default=False)
241 |     parser.add_argument("--double_precision", type=str2bool, nargs="?", const=True, default=False)
242 |     parser.add_argument("--standardize", type=str2bool, nargs="?", const=True, default=False)
243 |     return parser
244 | 


--------------------------------------------------------------------------------
/L2R/main_l2r_idiv_cv.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import numpy as np
  3 | import pandas as pd
  4 | import argparse
  5 | import torch
  6 | import torch.nn as nn
  7 | from models.mlp import MLP_1HL, MLP_2HL, MLP_3HL
  8 | from models.dynamic_net import DynamicNet, ForwardType
  9 | from torch.optim import SGD, Adam
 10 | from DataLoader.DataLoader import L2R_DataLoader
 11 | from Utils.utils import load_train_test_data, init_weights, get_device, eval_ndcg_at_k, check_for_single_queries
 12 | import time
 13 | 
 14 | parser = argparse.ArgumentParser()
 15 | parser.add_argument('--model_version', type=str, required=True)
 16 | parser.add_argument('--model_order',default='second', type=str)
 17 | parser.add_argument('--feat_d', type=int, required=True)
 18 | parser.add_argument('--hidden_d', type=int, required=True)
 19 | parser.add_argument('--boost_rate', type=float, required=True)
 20 | parser.add_argument('--lr', type=float, required=True)
 21 | parser.add_argument('--num_nets', type=int, required=True)
 22 | parser.add_argument('--data', type=str, required=True)
 23 | parser.add_argument('--data_dir', type=str, required=True)
 24 | parser.add_argument('--batch_size', type=int, required=True)
 25 | parser.add_argument('--epochs_per_stage', type=int, required=True)
 26 | parser.add_argument('--correct_epoch', type=int ,required=True)
 27 | parser.add_argument('--L2', type=float, required=True)
 28 | parser.add_argument('--sigma', type=float, required=True)
 29 | parser.add_argument('--normalization', default=False, type=lambda x: (str(x).lower() == 'true')) 
 30 | parser.add_argument('--cv', default=False, type=lambda x: (str(x).lower() == 'true')) 
 31 | parser.add_argument('--sparse', action='store_true')
 32 | parser.add_argument('--cuda', action='store_true')
 33 | 
 34 | opt = parser.parse_args()
 35 | 
 36 | if not opt.cuda:
 37 |     torch.set_num_threads(16)
 38 | 
 39 | # prepare the dataset
 40 | def get_data():
 41 |     if opt.data == 'yahoo':
 42 |         data_fold = None
 43 |         train_loader, df_train, test_loader, df_test, val_loader, df_val = load_train_test_data(opt.data_dir, data_fold, opt.data, opt.cv)
 44 |     elif opt.data == 'microsoft':
 45 |         data_fold = 'Fold1'
 46 |         train_loader, df_train, test_loader, df_test, val_loader, df_val = load_train_test_data(opt.data_dir, data_fold, opt.data, opt.cv)
 47 |     else:
 48 |         pass
 49 | 
 50 |     if opt.normalization:
 51 |         print(opt.normalization)
 52 |         df_train, scaler = train_loader.train_scaler_and_transform()
 53 |         df_test = test_loader.apply_scaler(scaler)
 54 |         if opt.cv:
 55 |             df_val = val_loader.apply_scaler(scaler)
 56 | 
 57 |     print(f'#Train: {len(df_train)}, #Val: {len(df_val)} #Test: {len(df_test)}')
 58 | 
 59 |     return train_loader, df_train, test_loader, df_test, val_loader, df_val
 60 | 
 61 | 
 62 | def get_optim(params, lr, weight_decay):
 63 |     #optimizer = SGD(params, lr, momentum=0.9, weight_decay=weight_decay)
 64 |     optimizer = Adam(params, lr, weight_decay=weight_decay)
 65 |     return optimizer
 66 | 
 67 | 
 68 | def init_gbnn(df_train):
 69 |     avg = (2**df_train['rel'] - 1)/16
 70 | 
 71 |     return avg.mean()
 72 | 
 73 | 
 74 | if __name__ == "__main__":
 75 |     # prepare datasets
 76 |     device = get_device()
 77 |     #device_id = 1
 78 |     #device = 'cuda:' + str(device_id)
 79 |     print('Loading data...')
 80 |     train_loader, df_train, test_loader, df_test, val_loader, df_val = get_data()    
 81 | 
 82 |     print(f'Start training with model version {opt.model_version} on {opt.data} dataset...')
 83 |     c0 = init_gbnn(df_train)
 84 |     net_ensemble = DynamicNet(c0, opt.boost_rate)
 85 |     loss_f = nn.MSELoss(reduction='none')
 86 |     all_scores = []
 87 |     all_ensm_losses = []
 88 |     all_mdl_losses = []
 89 |     # NDCG parameters
 90 |     K = 10
 91 |     gain_type = 'identity'
 92 | 
 93 |     ### Validation parameters ###
 94 |     best_ndcg  = 0
 95 |     val_ndcg   = best_ndcg
 96 |     best_stage = opt.num_nets-1
 97 | 
 98 |     for stage in range(opt.num_nets):
 99 |         t0 = time.time()
100 |         model = MLP_2HL.get_model(stage, opt)
101 |         model.apply(init_weights)  # Applying uniform xavier initialization for Linear layers 
102 |         if opt.cuda:
103 |             model.cuda()
104 |         optimizer = get_optim(model.parameters(), opt.lr, opt.L2)
105 |         net_ensemble.to_train() # Set the models in ensemble net to train mode
106 |         stage_resid = []
107 |         stage_mdlloss = []
108 |         for epoch in range(opt.epochs_per_stage):
109 |             for q, y, x in train_loader.generate_query_batch(df_train, opt.batch_size):
110 |                 
111 |                 if opt.cuda:
112 |                     x = torch.tensor(x, dtype=torch.float32, device=device)
113 |                     y = torch.tensor(y+1, dtype=torch.float32, device=device).view(-1, 1)
114 |                 # Feeding input into ensemble Net
115 |                 middle_feat, out = net_ensemble.forward(x)
116 |                 out = torch.as_tensor(out, dtype=torch.float32, device=device).view(-1, 1)
117 |                 
118 |                 # First proccess output through custom activation
119 |                 out = torch.exp(out) # Exponential
120 |                 grad_ord1 = -(y-out)
121 |                 grad_ord2 = out
122 |                 if opt.model_order=='second':
123 |                     resid = -grad_ord1/grad_ord2
124 |                 else:
125 |                     resid = -grad_ord1
126 | 
127 |                 stage_resid.append(resid.sum().item())
128 |                 _, out = model(x, middle_feat)
129 |                 out = torch.as_tensor(out, dtype=torch.float32, device=device).view(-1, 1)
130 |                 
131 |                 loss = loss_f(net_ensemble.boost_rate*out, resid)
132 |                 loss = grad_ord2*loss
133 |                 loss = loss.mean()
134 |                 stage_mdlloss.append(loss.item())      
135 |                 model.zero_grad()
136 |                 loss.backward()
137 |                 optimizer.step()
138 | 
139 |         net_ensemble.add(model)
140 |         sr = np.mean(stage_resid)
141 |         sml = np.mean(stage_mdlloss)
142 |         #print(f'Stage - {stage} resid: {sr}, and model loss: {sml}')
143 | 
144 |         # fully-corrective step
145 |         stage_loss = []
146 |         lr_scaler = 2
147 |         if stage > 0:
148 |             # Adjusting corrective step learning rate 
149 |             if stage % 15 == 0:
150 |                 lr_scaler *= 2
151 |                 opt.L2 /= 2
152 | 
153 |             optimizer = get_optim(net_ensemble.parameters(), opt.lr / lr_scaler, opt.L2)
154 |             #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.75)
155 |             for _ in range(opt.correct_epoch):
156 |                 for q, y, x in train_loader.generate_query_batch(df_train, opt.batch_size):
157 |                     
158 |                     if opt.cuda:
159 |                         x = torch.tensor(x, dtype=torch.float32, device=device)
160 |                         y = torch.tensor(y+1, dtype=torch.float32, device=device).view(-1, 1)
161 |                     
162 |                     _, out = net_ensemble.forward_grad(x)
163 |                     out = torch.as_tensor(out, dtype=torch.float32, device=device).view(-1, 1)
164 |                     out = torch.exp(out) # exponential
165 |                     #import ipdb; ipdb.set_trace()
166 |                     
167 |                     loss = torch.mean(y*torch.log(y/out) - (y-out))
168 |                     optimizer.zero_grad()
169 |                     loss.backward()
170 |                     #scheduler.step()
171 |                     optimizer.step()
172 |                     stage_loss.append(loss.item())
173 |         sl = 0
174 |         if stage_loss != []:
175 |             sl = np.mean(stage_loss)
176 |                     
177 |         all_ensm_losses.append(sl)
178 |         all_mdl_losses.append(sml)
179 |         print(f'Stage - {stage}, Boost rate: {net_ensemble.boost_rate} Loss: {sl}')          
180 |         elapsed_tr = time.time()-t0
181 |         
182 |         ndcg_result = eval_ndcg_at_k(net_ensemble, device, df_test, test_loader, 100000, [5, 10], gain_type)
183 | 
184 |         if opt.cv:
185 |             val_result = eval_ndcg_at_k(net_ensemble, device, df_val, val_loader, 100000, [5, 10], gain_type, "Validation") 
186 |             if val_result[5] > best_ndcg:
187 |                 best_ndcg = val_result[5]
188 |                 best_stage = stage
189 |         
190 |         all_scores.append([ndcg_result[5], ndcg_result[10]])
191 |         elapsed_te = time.time()-t0 - elapsed_tr
192 |         print(f'Stage: {stage} Training time: {elapsed_tr: .1f} sec and Test time: {elapsed_te: .1f} sec \n')
193 | 
194 |     te_ndcg_5, te_ndcg_10 = all_scores[best_stage][0], all_scores[best_stage][1]
195 |     print(f'Best validation stage: {best_stage}  final Test NDCG@5: {te_ndcg_5:.5f}, NDCG@10: {te_ndcg_10:.5f}')
196 |     
197 |     fname = './results/' + opt.data  + '_NDCG_Idivergenceloss'
198 |     np.savez(fname, all_scores, all_ensm_losses, all_mdl_losses)
199 |     np.savez('./results/' + opt.data + '_GID_parameters', opt)
200 | 
201 | 


--------------------------------------------------------------------------------
/L2R/main_l2r_mse_cv.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import numpy as np
  3 | import pandas as pd
  4 | import argparse
  5 | import torch
  6 | import torch.nn as nn
  7 | from models.mlp import MLP_1HL, MLP_2HL, MLP_3HL
  8 | from models.dynamic_net import DynamicNet, ForwardType
  9 | from torch.optim import SGD, Adam
 10 | from DataLoader.DataLoader import L2R_DataLoader
 11 | from Utils.utils import load_train_test_data, init_weights, get_device, eval_ndcg_at_k
 12 | from Misc.Calculations import grad_calc_, loss_calc_, grad_calc_v2, loss_calc_v2
 13 | import time
 14 | 
 15 | parser = argparse.ArgumentParser()
 16 | parser.add_argument('--model_version', type=str, required=True)
 17 | parser.add_argument('--model_order',default='second', type=str)
 18 | parser.add_argument('--feat_d', type=int, required=True)
 19 | parser.add_argument('--hidden_d', type=int, required=True)
 20 | parser.add_argument('--boost_rate', type=float, required=True)
 21 | parser.add_argument('--lr', type=float, required=True)
 22 | parser.add_argument('--num_nets', type=int, required=True)
 23 | parser.add_argument('--data', type=str, required=True)
 24 | parser.add_argument('--data_dir', type=str, required=True)
 25 | parser.add_argument('--batch_size', type=int, required=True)
 26 | parser.add_argument('--epochs_per_stage', type=int, required=True)
 27 | parser.add_argument('--correct_epoch', type=int ,required=True)
 28 | parser.add_argument('--L2', type=float, required=True)
 29 | parser.add_argument('--sigma', type=float, required=True)
 30 | parser.add_argument('--normalization', default=False, type=lambda x: (str(x).lower() == 'true')) 
 31 | parser.add_argument('--cv', default=False, type=lambda x: (str(x).lower() == 'true')) 
 32 | parser.add_argument('--sparse', action='store_true')
 33 | parser.add_argument('--cuda', action='store_true')
 34 | 
 35 | opt = parser.parse_args()
 36 | 
 37 | if not opt.cuda:
 38 |     torch.set_num_threads(16)
 39 | 
 40 | # prepare the dataset
 41 | def get_data():
 42 |     if opt.data == 'yahoo':
 43 |         data_fold = None
 44 |         train_loader, df_train, test_loader, df_test, val_loader, df_val = load_train_test_data(opt.data_dir, data_fold, opt.data, opt.cv)
 45 |     elif opt.data == 'microsoft':
 46 |         data_fold = 'Fold1'
 47 |         train_loader, df_train, test_loader, df_test, val_loader, df_val = load_train_test_data(opt.data_dir, data_fold, opt.data, opt.cv)
 48 |     else:
 49 |         pass
 50 | 
 51 |     if opt.normalization:
 52 |         print(opt.normalization)
 53 |         df_train, scaler = train_loader.train_scaler_and_transform()
 54 |         df_test = test_loader.apply_scaler(scaler)
 55 |         if opt.cv:
 56 |             df_val = val_loader.apply_scaler(scaler)
 57 | 
 58 |     print(f'#Train: {len(df_train.index)}, #Val: {len(df_val)} #Test: {len(df_test.index)}')
 59 | 
 60 |     return train_loader, df_train, test_loader, df_test, val_loader, df_val
 61 | 
 62 | 
 63 | def get_optim(params, lr, weight_decay):
 64 |     #optimizer = SGD(params, lr, momentum=0.9, weight_decay=weight_decay)
 65 |     optimizer = Adam(params, lr, weight_decay=weight_decay)
 66 |     return optimizer
 67 | 
 68 | def init_gbnn(df_train):
 69 |     avg = (2**df_train['rel'] - 1)/16
 70 | 
 71 |     return avg.mean()
 72 | if __name__ == "__main__":
 73 |     # prepare datasets
 74 |     device = get_device()
 75 |     print('Loading data...')
 76 |     train_loader, df_train, test_loader, df_test, val_loader, df_val = get_data()
 77 |     print(f'Start training with {opt.data} dataset...')
 78 |     c0 = init_gbnn(df_train)
 79 |     net_ensemble = DynamicNet(c0, opt.boost_rate)
 80 |     loss_f = nn.MSELoss()
 81 |     all_scores = []
 82 |     all_ensm_losses = []
 83 |     all_mdl_losses = []
 84 |     # NDCG parameters
 85 |     K = 10
 86 |     gain_type = 'identity'
 87 | 
 88 |     ### Validation parameters ###
 89 |     best_ndcg  = 0
 90 |     val_ndcg   = best_ndcg
 91 |     best_stage = opt.num_nets-1
 92 | 
 93 | 
 94 |     for stage in range(opt.num_nets):
 95 |         t0 = time.time()
 96 |         model = MLP_2HL.get_model(stage, opt)
 97 |         model.apply(init_weights)  # Applying uniform xavier initialization for Linear layers
 98 |         if opt.cuda:
 99 |             model.cuda()
100 |         optimizer = get_optim(model.parameters(), opt.lr, opt.L2)
101 |         net_ensemble.to_train() # Set the models in ensemble net to train mode
102 |         stage_resid = []
103 |         stage_mdlloss = []
104 |         for epoch in range(opt.epochs_per_stage):
105 |             for q, y, x in train_loader.generate_query_batch(df_train, opt.batch_size):
106 |                 
107 |                 if opt.cuda:
108 |                     x = torch.tensor(x, dtype=torch.float32, device=device)
109 |                     y = torch.tensor(y, dtype=torch.float32, device=device).view(-1, 1)
110 |                 # Feeding input into ensemble Net
111 |                 middle_feat, out = net_ensemble.forward(x)
112 |                 out = torch.as_tensor(out, dtype=torch.float32, device=device).view(-1, 1)
113 |                 resid = y - out  # Negative of gradient direction: -grad/grad2
114 |                 stage_resid.append(resid.sum().detach().cpu().numpy())
115 |                 _, out = model(x, middle_feat)
116 |                 out = torch.as_tensor(out, dtype=torch.float32, device=device).view(-1, 1)
117 |                 loss = loss_f(net_ensemble.boost_rate*out, resid)
118 | 
119 |                 stage_mdlloss.append(loss.item())    
120 |                 model.zero_grad()
121 |                 loss.backward()
122 |                 optimizer.step()
123 | 
124 |         net_ensemble.add(model)
125 |         sr = np.mean(stage_resid)
126 |         sml = np.mean(stage_mdlloss)
127 |         #print(f'Stage - {stage} resid: {sr}, and model loss: {sml}')
128 | 
129 |         # fully-corrective step
130 |         stage_loss = []
131 |         lr_scaler = 3
132 |         if stage > 2:
133 |             # Adjusting corrective step learning rate 
134 |             if stage % 15 == 0:
135 |                 #lr_scaler *= 2
136 |                 opt.lr /= 2
137 |                 opt.L2 /= 2
138 | 
139 |             optimizer = get_optim(net_ensemble.parameters(), opt.lr / lr_scaler, opt.L2)
140 |             #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.75)
141 |             for _ in range(opt.correct_epoch):
142 |                 for q, y, x in train_loader.generate_query_batch(df_train, opt.batch_size):
143 |                     
144 |                     if opt.cuda:
145 |                         x = torch.tensor(x, dtype=torch.float32, device=device)
146 |                         y = torch.tensor(y, dtype=torch.float32, device=device).view(-1, 1)
147 |                     
148 |                     _, out = net_ensemble.forward_grad(x)
149 |                     out = torch.as_tensor(out, dtype=torch.float32, device=device).view(-1, 1)
150 |                     loss = loss_f(out, y)
151 |                     #net_ensemble.zero_grad()
152 |                     optimizer.zero_grad()
153 |                     loss.backward()
154 |                     #scheduler.step()
155 |                     optimizer.step()
156 |                     stage_loss.append(loss.item())
157 |         sl = 0
158 |         if stage_loss != []:
159 |             sl = np.mean(stage_loss)
160 |                     
161 |         all_ensm_losses.append(sl)
162 |         all_mdl_losses.append(sml)
163 |         print(f'Stage - {stage}, Boost rate: {net_ensemble.boost_rate} Loss: {sl}')          
164 | 
165 |         elapsed_tr = time.time()-t0
166 |         
167 |         ndcg_result = eval_ndcg_at_k(net_ensemble, device, df_test, test_loader, 100000, [5, 10], gain_type)
168 |         if opt.cv:
169 |             val_result = eval_ndcg_at_k(net_ensemble, device, df_val, val_loader, 100000, [5, 10], gain_type, "Validation") 
170 |             if val_result[5] > best_ndcg:
171 |                 best_ndcg = val_result[5]
172 |                 best_stage = stage
173 | 
174 |         all_scores.append([ndcg_result[5], ndcg_result[10]])
175 |         elapsed_te = time.time()-t0 - elapsed_tr
176 |         print(f'Stage: {stage} Training time: {elapsed_tr: .1f} sec and Test time: {elapsed_te: .1f} sec \n')
177 |         
178 |     ### Test results from CV ###
179 |     te_ndcg_5, te_ndcg_10 = all_scores[best_stage][0], all_scores[best_stage][1]
180 |     print(f'Best validation stage: {best_stage}  final Test NDCG@5: {te_ndcg_5:.5f}, NDCG@10: {te_ndcg_10:.5f}')
181 |     fname = opt.data + '_NDCG_MSEloss'
182 |     np.savez(fname, all_scores, all_ensm_losses, all_mdl_losses)
183 |     np.savez('./results/' + opt.data + '_MSE_parameters', opt)
184 | 


--------------------------------------------------------------------------------
/L2R/main_l2r_pairwise_cv.py:
--------------------------------------------------------------------------------
  1 | ##!/usr/bin/env python
  2 | import numpy as np
  3 | import pandas as pd
  4 | import argparse
  5 | import torch
  6 | import torch.nn as nn
  7 | from models.mlp import MLP_1HL, MLP_2HL, MLP_3HL
  8 | from models.dynamic_net import DynamicNet, ForwardType
  9 | from torch.optim import SGD, Adam
 10 | from DataLoader.DataLoader import L2R_DataLoader
 11 | from Utils.utils import load_train_test_data, init_weights, get_device, eval_ndcg_at_k, check_for_single_queries
 12 | from Misc.Calculations import grad_calc_, loss_calc_, grad_calc_v2, loss_calc_v2
 13 | from Misc.metrics import NDCG, DCG
 14 | import time
 15 | 
 16 | parser = argparse.ArgumentParser()
 17 | parser.add_argument('--model_version', type=str, required=True)
 18 | parser.add_argument('--model_order',default='second', type=str)
 19 | parser.add_argument('--feat_d', type=int, required=True)
 20 | parser.add_argument('--hidden_d', type=int, required=True)
 21 | parser.add_argument('--boost_rate', type=float, required=True)
 22 | parser.add_argument('--lr', type=float, required=True)
 23 | parser.add_argument('--num_nets', type=int, required=True)
 24 | parser.add_argument('--data', type=str, required=True)
 25 | parser.add_argument('--data_dir', type=str, required=True)
 26 | parser.add_argument('--batch_size', type=int, required=True)
 27 | parser.add_argument('--epochs_per_stage', type=int, required=True)
 28 | parser.add_argument('--correct_epoch', type=int ,required=True)
 29 | parser.add_argument('--L2', type=float, required=True)
 30 | parser.add_argument('--sigma', type=float, required=True)
 31 | parser.add_argument('--normalization', default=False, type=lambda x: (str(x).lower() == 'true')) 
 32 | parser.add_argument('--cv', default=False, type=lambda x: (str(x).lower() == 'true')) 
 33 | parser.add_argument('--sparse', action='store_true')
 34 | parser.add_argument('--cuda', action='store_true')
 35 | 
 36 | opt = parser.parse_args()
 37 | 
 38 | if not opt.cuda:
 39 |     torch.set_num_threads(16)
 40 | 
 41 | # prepare the dataset
 42 | def get_data():
 43 |     if opt.data == 'yahoo':
 44 |         data_fold = None
 45 |         train_loader, df_train, test_loader, df_test, val_loader, df_val = load_train_test_data(opt.data_dir, data_fold, opt.data, opt.cv)
 46 |     elif opt.data == 'microsoft':
 47 |         data_fold = 'Fold1'
 48 |         train_loader, df_train, test_loader, df_test, val_loader, df_val = load_train_test_data(opt.data_dir, data_fold, opt.data, opt.cv)
 49 |     else:
 50 |         pass
 51 | 
 52 |     if opt.normalization:
 53 |         print(opt.normalization)
 54 |         df_train, scaler = train_loader.train_scaler_and_transform()
 55 |         df_test = test_loader.apply_scaler(scaler)
 56 |         if opt.cv:
 57 |             df_val = val_loader.apply_scaler(scaler)
 58 | 
 59 |     print(f'#Train: {len(df_train.index)}, #Val: {len(df_val)} #Test: {len(df_test.index)}')
 60 | 
 61 |     return train_loader, df_train, test_loader, df_test, val_loader, df_val
 62 | 
 63 | 
 64 | def get_optim(params, lr, weight_decay):
 65 |     #optimizer = SGD(params, lr, momentum=0.9, weight_decay=weight_decay)
 66 |     optimizer = Adam(params, lr, weight_decay=weight_decay)
 67 |     return optimizer
 68 | 
 69 | 
 70 | def init_gbnn(df_train):
 71 |     avg = (2**df_train['rel'] - 1)/16
 72 | 
 73 |     return avg.mean()
 74 | 
 75 | if __name__ == "__main__":
 76 |     # prepare datasets
 77 |     device = get_device()
 78 |     print('Loading data...')
 79 |     train_loader, df_train, test_loader, df_test, val_loader, df_val = get_data()
 80 | 
 81 |     print(f'Start training with model version {opt.model_version} on {opt.data} dataset...')
 82 |     c0 = init_gbnn(df_train)
 83 |     net_ensemble = DynamicNet(c0, opt.boost_rate)
 84 |     loss_f = nn.MSELoss(reduction='none')
 85 |     all_scores = []
 86 |     all_ensm_losses = []
 87 |     all_mdl_losses = []
 88 |     dynamic_br = []
 89 |     execution_time = []
 90 | 
 91 |     ### Validation parameters ###
 92 |     best_ndcg  = 0
 93 |     val_ndcg   = best_ndcg
 94 |     best_stage = opt.num_nets-1
 95 | 
 96 |     # NDCG parameters
 97 |     K = 10
 98 |     gain_type = 'identity'
 99 |     ideal_dcg = NDCG(2**(K-1), gain_type)
100 | 
101 |     for stage in range(opt.num_nets):
102 |         t0 = time.time()
103 |         model = MLP_2HL.get_model(stage, opt)
104 |         model.apply(init_weights)  # Applying uniform xavier initialization for Linear layers
105 |         if opt.cuda:
106 |             model.cuda()
107 |         optimizer = get_optim(model.parameters(), opt.lr, opt.L2)
108 |         net_ensemble.to_train() # Set the models in ensemble net to train mode
109 |         stage_resid = []
110 |         stage_mdlloss = []
111 |         for epoch in range(opt.epochs_per_stage):
112 |             count = 0
113 |             for q, y, x in train_loader.generate_query_batch(df_train, opt.batch_size):
114 |                 
115 |                 # Removing queries with a single doc.
116 |                 idx1 = check_for_single_queries(q, y)
117 |                 q, y, x = q[idx1], y[idx1], x[idx1]
118 | 
119 |                 if opt.cuda:
120 |                     x = torch.tensor(x, dtype=torch.float32, device=device)
121 |                     #y = torch.tensor(y, dtype=torch.float32, device=device).view(-1, 1)
122 | 
123 |                 # Feeding input into ensemble Net
124 |                 middle_feat, out = net_ensemble.forward(x)
125 |                 out = torch.as_tensor(out.view(-1, 1), dtype=torch.float32, device=device)
126 |                 # Indexing data by qid
127 |                 uq = np.unique(q)
128 |                 grad_batch = None
129 |                 for i in range(len(uq)):
130 |                     idx = np.where(q==uq[i])[0]
131 |                     y_i = y[idx]
132 |                     idx = torch.tensor(idx, device=device)
133 |                     out_i = torch.index_select(out, 0, idx)
134 | 
135 |                     #if np.sum(y_i)==0 or len(y_i)<=1:
136 |                     #    continue # All irrelevant docs, no useful info
137 |                     N = 1.0 / ideal_dcg.maxDCG(y_i)
138 | 
139 |                     grad_ord1, grad_ord2 = grad_calc_(y_i, out_i, gain_type, opt.sigma, N, device)
140 |                     if opt.model_order=='second':
141 |                         resid = -grad_ord1/grad_ord2
142 |                     else:
143 |                         resid = -grad_ord1
144 | 
145 |                     if grad_batch is None:
146 |                         grad_batch = resid
147 |                         grad_ord2_batch = grad_ord2
148 |                     else:
149 |                         grad_ord2_batch = torch.cat((grad_ord2_batch, grad_ord2), dim=0)
150 |                         grad_batch = torch.cat((grad_batch, resid), dim=0)
151 | 
152 |                 _, out = model(x, middle_feat)
153 |                 out = torch.as_tensor(out.view(-1, 1), dtype=torch.float32, device=device)
154 | 
155 |                 loss = loss_f(net_ensemble.boost_rate*out, grad_batch)
156 |                 loss = grad_ord2_batch*loss
157 |                 loss = loss.mean()
158 |                 model.zero_grad()
159 |                 loss.backward()
160 |                 optimizer.step()
161 |                 stage_resid.append(grad_batch.sum().item())
162 |                 stage_mdlloss.append(loss.item())
163 |                 #print('Model parameters after grad update \n')
164 |                 for name, param in model.named_parameters():
165 |                     if param.requires_grad:
166 |                         if np.isnan(param.data.sum().detach().cpu().numpy()):
167 |                             import ipdb; ipdb.set_trace()
168 | 
169 | 
170 |         net_ensemble.add(model)
171 |         sr = -np.mean(stage_resid)
172 |         sml = np.mean(stage_mdlloss)
173 |         #print(f'Stage - {stage} resid: {sr}, and model loss: {sml}')
174 | 
175 |         # fully-corrective step
176 |         stage_loss = []
177 |         lr_scaler = 2
178 |         if stage >3:
179 |             
180 |             # Adjusting corrective step learning rate 
181 |             if stage % 15 == 0:
182 |                 #lr_scaler *= 2
183 |                 opt.lr /= 2
184 |                 opt.L2 /= 2
185 | 
186 |             optimizer = get_optim(net_ensemble.parameters(), opt.lr/lr_scaler, opt.L2)
187 |             for _ in range(opt.correct_epoch):
188 |                 count = 0
189 |                 for q, y, x in train_loader.generate_query_batch(df_train, opt.batch_size):
190 |                     
191 |                     # Removing queries with a single doc
192 |                     idx1 = check_for_single_queries(q, y)
193 |                     q, y, x = q[idx1], y[idx1], x[idx1]
194 | 
195 |                     if opt.cuda:
196 |                         x = torch.tensor(x, dtype=torch.float32, device=device)
197 |                         #y = torch.tensor(y, dtype=torch.float32, device=device).view(-1, 1)
198 | 
199 |                     _, out = net_ensemble.forward_grad(x)
200 |                     out = torch.as_tensor(out.view(-1, 1), dtype=torch.float32, device=device)
201 |                     uq = np.unique(q)
202 |                     loss_batch = 0
203 |                     for i in range(len(uq)):
204 |                         idx = np.where(q==uq[i])[0]
205 |                         y_i = y[idx]
206 |                         idx = torch.tensor(idx, device=device)
207 |                         out_i = torch.index_select(out, 0, idx)
208 | 
209 |                         #if np.sum(y_i)==0 or len(y_i)<=1:
210 |                         #    continue # All irrelevant docs, no useful info
211 |                         N = 1.0 / ideal_dcg.maxDCG(y_i) 
212 |                         loss_batch += loss_calc_(y_i, out_i, gain_type, opt.sigma, N, device).mean()
213 | 
214 |                     loss_batch = loss_batch/len(uq) #opt.batch_size
215 |                     #import ipdb; ipdb.set_trace()
216 |                     optimizer.zero_grad()
217 |                     loss_batch.backward()
218 |                     optimizer.step()
219 |                     stage_loss.append(loss_batch.item())
220 |                     #net_ensemble.zero_grad()
221 |         sl = 0
222 |         if stage_loss != []:
223 |             sl = np.mean(stage_loss)
224 |         # Storing losses and dynamic boost rate
225 |         dynamic_br.append(net_ensemble.boost_rate.item())
226 |         all_ensm_losses.append(sl)
227 |         all_mdl_losses.append(sml)
228 |         print(f'Stage - {stage}, Boost rate: {net_ensemble.boost_rate: .4f} Loss: {sl: .4f}')
229 |         
230 |         elapsed_tr = time.time()-t0
231 |         
232 |         net_ensemble.to_eval() # Set the models in ensemble net to eval mode
233 |         
234 |         ndcg_result = eval_ndcg_at_k(net_ensemble, device, df_test, test_loader, 100000, [5, 10], gain_type)
235 |         if opt.cv:
236 |             val_result = eval_ndcg_at_k(net_ensemble, device, df_val, val_loader, 100000, [5, 10], gain_type, "Validation") 
237 |             if val_result[5] > best_ndcg:
238 |                 best_ndcg = val_result[5]
239 |                 best_stage = stage
240 | 
241 |         
242 |         all_scores.append([ndcg_result[5], ndcg_result[10]])
243 |         elapsed_te = time.time()-t0 - elapsed_tr
244 |         # Storing training and test time
245 |         execution_time.append([elapsed_tr, elapsed_te])
246 |         print(f'Stage: {stage} Training time: {elapsed_tr: .1f} sec and Test time: {elapsed_te: .1f} sec \n')
247 | 
248 |     ### Test results from CV ###
249 |     te_ndcg_5, te_ndcg_10 = all_scores[best_stage][0], all_scores[best_stage][1]
250 |     print(f'Best validation stage: {best_stage}  final Test NDCG@5: {te_ndcg_5:.5f}, NDCG@10: {te_ndcg_10:.5f}')
251 | 
252 |     fname = './results/' + opt.data +'_'+ str(opt.hidden_d) + 'u_2hl_pairwiseloss'
253 |     np.savez(fname, all_scores=all_scores, all_ensm_losses=all_ensm_losses, all_mdl_losses=all_mdl_losses, dynamic_br=dynamic_br, execution_time=execution_time, options=opt)
254 | 


--------------------------------------------------------------------------------
/L2R/models/dynamic_net.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | import torch
 3 | #import pickle
 4 | import torch.nn as nn
 5 | 
 6 | class ForwardType(Enum):
 7 |     SIMPLE = 0
 8 |     STACKED = 1
 9 |     CASCADE = 2
10 |     GRADIENT = 3
11 | 
12 | class DynamicNet(object):
13 |     def __init__(self, c0, lr):
14 |         self.models = []
15 |         self.c0 = c0
16 |         self.lr = lr
17 |         self.boost_rate  = nn.Parameter(torch.tensor(lr, requires_grad=True, device="cuda:0"))
18 | 
19 |     def add(self, model):
20 |         self.models.append(model)
21 | 
22 |     def parameters(self):
23 |         params = []
24 |         for m in self.models:
25 |             params.extend(m.parameters())
26 | 
27 |         params.append(self.boost_rate)
28 |         return params
29 | 
30 |     def zero_grad(self):
31 |         for m in self.models:
32 |             m.zero_grad()
33 | 
34 |     def to_cuda(self):
35 |         for m in self.models:
36 |             m.cuda()
37 | 
38 |     def to_eval(self):
39 |         for m in self.models:
40 |             m.eval()
41 | 
42 |     def to_train(self):
43 |         for m in self.models:
44 |             m.train(True)
45 | 
46 |     def forward(self, x):
47 |         if len(self.models) == 0:
48 |             return None, self.c0*torch.ones((len(x), 1))
49 |         middle_feat_cum = None
50 |         prediction = None
51 |         with torch.no_grad():
52 |             for m in self.models:
53 |                 if middle_feat_cum is None:
54 |                     middle_feat_cum, prediction = m(x, middle_feat_cum)
55 |                 else:
56 |                     middle_feat_cum, pred = m(x, middle_feat_cum)
57 |                     prediction += pred
58 |         return middle_feat_cum, self.c0 + self.boost_rate * prediction
59 | 
60 |     def forward_grad(self, x):
61 |         if len(self.models) == 0:
62 |             return None, self.c0
63 |         # at least one model
64 |         middle_feat_cum = None
65 |         prediction = None
66 |         for m in self.models:
67 |             if middle_feat_cum is None:
68 |                 middle_feat_cum, prediction = m(x, middle_feat_cum)
69 |             else:
70 |                 middle_feat_cum, pred = m(x, middle_feat_cum)
71 |                 prediction += pred
72 |         return middle_feat_cum, self.c0 + self.boost_rate * prediction
73 | 
74 |     @classmethod
75 |     def from_file(cls, path, builder):
76 |         d = torch.load(path)
77 |         net = DynamicNet(d['c0'], d['lr'])
78 |         net.boost_rate = d['boost_rate']
79 |         for stage, m in enumerate(d['models']):
80 |             submod = builder(stage)
81 |             submod.load_state_dict(m)
82 |             net.add(submod)
83 |         return net
84 | 
85 |     def to_file(self, path):
86 |         models = [m.state_dict() for m in self.models]
87 |         d = {'models': models, 'c0': self.c0, 'lr': self.lr, 'boost_rate': self.boost_rate}
88 |         torch.save(d, path)
89 | 


--------------------------------------------------------------------------------
/L2R/models/mlp.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from .splinear import SpLinear
  6 | 
  7 | 
  8 | class MLP_1HL(nn.Module):
  9 |     def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True):
 10 |         super(MLP_1HL, self).__init__()
 11 |         self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1)
 12 |         self.out_layer = nn.Linear(dim_hidden1, 1)
 13 |         self.lrelu = nn.LeakyReLU(0.1)
 14 |         self.relu = nn.ReLU()
 15 |         if bn:
 16 |             self.bn = nn.BatchNorm1d(dim_hidden1)
 17 |             self.bn2 = nn.BatchNorm1d(dim_in)
 18 | 
 19 |     def forward(self, x, lower_f):
 20 |         if lower_f is not None:
 21 |             x = torch.cat([x, lower_f], dim=1)
 22 |             x = self.bn2(x)
 23 |         out = self.in_layer(x)
 24 |         return out, self.out_layer(self.relu(out)).squeeze()
 25 | 
 26 |     @classmethod
 27 |     def get_model(cls, stage, opt):
 28 |         if stage == 0:
 29 |             dim_in = opt.feat_d
 30 |         else:
 31 |             dim_in = opt.feat_d + opt.hidden_d
 32 |         model = MLP_1HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse)
 33 |         return model
 34 | 
 35 | 
 36 | class MLP_2HL(nn.Module):
 37 |     def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True):
 38 |         super(MLP_2HL, self).__init__()
 39 |         self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1)
 40 |         self.dropout_layer = nn.Dropout(0.0)
 41 |         self.lrelu = nn.LeakyReLU(0.1)
 42 |         self.relu = nn.ReLU()
 43 |         self.hidden_layer = nn.Linear(dim_hidden1, dim_hidden2)
 44 |         self.out_layer = nn.Linear(int(dim_hidden2), 1)
 45 |         self.bn = nn.BatchNorm1d(dim_hidden1)
 46 |         self.bn2 = nn.BatchNorm1d(dim_in)
 47 | 
 48 |     def forward(self, x, lower_f):
 49 |         if lower_f is not None:
 50 |             x = torch.cat([x, lower_f], dim=1)
 51 |             x = self.bn2(x)
 52 |         out = self.lrelu(self.in_layer(x))
 53 |         out = self.bn(out)
 54 |         out = self.hidden_layer(out)
 55 |         return out, self.out_layer(self.relu(out)).squeeze()
 56 | 
 57 |     @classmethod
 58 |     def get_model(cls, stage, opt):
 59 |         if stage == 0:
 60 |             dim_in = opt.feat_d
 61 |         else:
 62 |             dim_in = opt.feat_d + opt.hidden_d
 63 |         model = MLP_2HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse)
 64 |         return model
 65 | 
 66 | class MLP_3HL(nn.Module):
 67 |     def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True):
 68 |         super(MLP_3HL, self).__init__()
 69 |         self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1)
 70 |         self.dropout_layer = nn.Dropout(0.0)
 71 |         self.lrelu = nn.LeakyReLU(0.1)
 72 |         self.relu = nn.ReLU()
 73 |         self.hidden_layer = nn.Linear(dim_hidden2, dim_hidden1)
 74 |         self.out_layer = nn.Linear(dim_hidden1, 1)
 75 |         self.bn = nn.BatchNorm1d(dim_hidden1)
 76 |         self.bn2 = nn.BatchNorm1d(dim_in)
 77 |         # print('Batch normalization is processed!')
 78 | 
 79 |     def forward(self, x, lower_f):
 80 |         if lower_f is not None:
 81 |             x = torch.cat([x, lower_f], dim=1)
 82 |             x = self.bn2(x)
 83 |         out = self.lrelu(self.in_layer(x))
 84 |         out = self.bn(out)
 85 |         out = self.lrelu(self.hidden_layer(out))
 86 |         out = self.bn(out)
 87 |         out = self.hidden_layer(out)
 88 |         return out, self.out_layer(self.relu(out)).squeeze()
 89 | 
 90 |     @classmethod
 91 |     def get_model(cls, stage, opt):
 92 |         if stage == 0:
 93 |             dim_in = opt.feat_d
 94 |         else:
 95 |             dim_in = opt.feat_d + opt.hidden_d
 96 |         model = MLP_3HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse)
 97 |         return model
 98 | 
 99 | class MLP_4HL(nn.Module):
100 |     def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True):
101 |         super(MLP_3HL, self).__init__()
102 |         self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1)
103 |         self.dropout_layer = nn.Dropout(0.0)
104 |         self.lrelu = nn.LeakyReLU(0.1)
105 |         self.relu = nn.ReLU()
106 |         self.hidden_layer = nn.Linear(dim_hidden2, dim_hidden1)
107 |         self.out_layer = nn.Linear(dim_hidden1, 1)
108 |         self.bn = nn.BatchNorm1d(dim_hidden1)
109 |         self.bn2 = nn.BatchNorm1d(dim_in)
110 |         # print('Batch normalization is processed!')
111 | 
112 |     def forward(self, x, lower_f):
113 |         if lower_f is not None:
114 |             x = torch.cat([x, lower_f], dim=1)
115 |             x = self.bn2(x)
116 |         out = self.lrelu(self.in_layer(x)) #HL-1
117 |         out = self.bn(out)
118 |         out = self.lrelu(self.hidden_layer(out)) #HL-2
119 |         out = self.bn(out)
120 |         out = self.lrelu(self.hidden_layer(out)) #HL-3
121 |         out = self.bn(out)
122 |         out = self.hidden_layer(out) #HL-4
123 |         return out, self.out_layer(self.relu(out)).squeeze()
124 | 
125 |     @classmethod
126 |     def get_model(cls, stage, opt):
127 |         if stage == 0:
128 |             dim_in = opt.feat_d
129 |         else:
130 |             dim_in = opt.feat_d + opt.hidden_d
131 |         model = MLP_3HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse)
132 |         return model
133 | 
134 | 
135 | class DNN(nn.Module):
136 |     def __init__(self, dim_in, dim_hidden, n_hidden=20, sparse=False, bn=True, drop_out=0.3):
137 |         super(DNN, self).__init__()
138 |         if sparse:
139 |             self.in_layer = SpLinear(dim_in, dim_hidden)
140 |         else:
141 |             self.in_layer = nn.Linear(dim_in, dim_hidden)
142 |         self.in_act = nn.SELU()
143 |         hidden_layers = []
144 |         for _ in range(n_hidden):
145 |             hidden_layers.append(nn.Linear(dim_hidden, dim_hidden))
146 |             if bn:
147 |                 hidden_layers.append(nn.BatchNorm1d(dim_hidden))
148 |             hidden_layers.append(nn.SELU())
149 |             if drop_out > 0:
150 |                 hidden_layers.append(nn.Dropout(drop_out))
151 |         self.hidden_layers = nn.Sequential(*hidden_layers)
152 |         self.out_layer = nn.Linear(dim_hidden, 1)
153 | 
154 |     def forward(self, x):
155 |         out = self.in_act(self.in_layer(x))
156 |         out = self.hidden_layers(out)
157 |         out = self.out_layer(out)
158 |         return out.squeeze()
159 | 


--------------------------------------------------------------------------------
/L2R/models/splinear.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class SpLinearFunc(torch.autograd.Function):
 8 |     @staticmethod
 9 |     def forward(ctx, input, weight, bias=None):
10 |         ctx.save_for_backward(input, weight, bias)
11 |         output = input.mm(weight.t())
12 |         if bias is not None:
13 |             output += bias.unsqueeze(0).expand_as(output)
14 |         return output
15 | 
16 |     @staticmethod
17 |     def backward(ctx, grad_output):
18 |         input, weight, bias = ctx.saved_tensors
19 |         grad_input = grad_weight = grad_bias = None
20 | 
21 |         if ctx.needs_input_grad[0]:
22 |             grad_input = grad_output.mm(weight)
23 |         if ctx.needs_input_grad[1]:
24 |             grad_weight = (input.t().mm(grad_output)).t()
25 |         if bias is not None and ctx.needs_input_grad[2]:
26 |             grad_bias = grad_output.sum(0).squeeze(0)
27 | 
28 |         return grad_input, grad_weight, grad_bias
29 | 
30 | splinear = SpLinearFunc.apply
31 | 
32 | class SpLinear(nn.Module):
33 |     def __init__(self, input_features, output_features, bias=True):
34 |         super(SpLinear, self).__init__()
35 |         self.input_features = input_features
36 |         self.output_features = output_features
37 |         self.weight = nn.Parameter(torch.Tensor(output_features, input_features))
38 |         if bias:
39 |             self.bias = nn.Parameter(torch.Tensor(output_features))
40 |         else:
41 |             self.register_parameter('bias', None)
42 |         #TODO write a default initialization
43 |         stdv = 1. / math.sqrt(self.weight.size(1))
44 |         self.weight.data.uniform_(-stdv, stdv)
45 | 
46 |     def forward(self, input):
47 |         return splinear(input, self.weight, self.bias)
48 | 


--------------------------------------------------------------------------------
/L2R/results/results_readme.txt:
--------------------------------------------------------------------------------
1 | Your results will be saved here.


--------------------------------------------------------------------------------
/L2R/train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | ### Feature Table ###
 5 | # yahoo 519 - 1
 6 | # microsoft 136
 7 | dataset=microsoft
 8 | 
 9 | BASEDIR=$(dirname "$0")
10 | OUTDIR="${BASEDIR}/ckpt/"
11 | 
12 | if [ ! -d "${OUTDIR}" ]
13 | then   
14 |     echo "Output dir ${OUTDIR} does not exist, creating..."
15 |     mkdir -p ${OUTDIR}
16 | fi    
17 | 
18 | CUDA_VISIBLE_DEVICES=0 python -u main_l2r_idiv_cv.py \
19 |     --data_dir ${BASEDIR}/../data \
20 |     --model_version main_l2r_idiv_cv.py \
21 |     --model_order second \
22 |     --feat_d 136 \
23 |     --hidden_d 64 \
24 |     --boost_rate 1 \
25 |     --lr 0.005 \
26 |     --L2 1.0e-3 \
27 |     --num_nets 40 \
28 |     --data ${dataset} \
29 |     --batch_size 10000 \
30 |     --epochs_per_stage 2 \
31 |     --correct_epoch 2 \
32 |     --normalization True \
33 |     --sigma 1. \
34 |     --cv True \
35 |     --cuda
36 | 


--------------------------------------------------------------------------------
/Model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sbadirli/GrowNet/6b045243477766bef1990218504f7de6645d24a3/Model.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GrowNet
 2 | 
 3 | Original PyTorch implementation of "Gradient Boosting Neural Networks: GrowNet" 
 4 | 
 5 | Paper at: https://arxiv.org/pdf/2002.07971.pdf
 6 | 
 7 | <p align="center">
 8 |   <img width="800" src="Model.png">
 9 | </p>
10 | <p align="justify">
11 | 
12 | ## Getting Started
13 | 
14 | In this paper, we combine the power of gradient boosting with the flexibility and
15 | versatility of neural networks and introduce a new modelling paradigm called GrowNet that can
16 | build up a DNN layer by layer. Instead of decision trees, we use shallow neural networks as our
17 | weak learners in a general gradient boosting framework that can be applied to a wide variety of tasks
18 | spanning classification, regression and ranking. We introduce further innovations like adding second
19 | order statistics to the training process, and also including a global corrective step that has been shown,
20 | both in theory and in empirical evaluation, to provide performance lift and precise fine-tuning to the specific task at hand.
21 | 
22 | 
23 | ## Prerequisites
24 | 
25 | The code was implemented in Python 3.6.10 and utilized the packages (full list) in requirements.txt file. The platform I used was linux-64. Most important packages you need are the followings:
26 | ```
27 | cudatoolkit=10.1.243 
28 | numpy=1.18.1 
29 | pandas=1.0.0 
30 | python=3.6.10 
31 | pytorch=1.4.0 
32 | ```
33 | 
34 | ## Installing
35 | 
36 | To run the code, You may create a conda environment (assuming you already have miniconda3 installed) by the following command on terminal:
37 | 
38 | ```
39 | conda create --name grownet --file requirements.txt
40 | ```
41 | 
42 | ## Data
43 | 
44 | You can download the datasets used in the paper from [Google Drive](https://drive.google.com/open?id=1NnBpwvfSdqs-lRb5UFIC-q8P455o3vO3).  Create a `data` folder under `GrowNet` and put the data under this folder.
45 | 
46 | 
47 | #### 1. Classification:
48 | The original HIGGS data is splitted into train and test (same as done in XGBoost paper) using higgs2libsvm.py script.
49 | 
50 | #### 2. Learning to Rank (L2R): 
51 | Data Loading pipeline for L2R task is implemented by taking Microsoft (MSLR-WEB10K) dataset as a baseline. We also converted Yahoo into tis format (The jupyter notebook "yahoo2mslr" does this conversion). Thus if you want to use some other L2R datasets with GrowNet, please convert it into MSLR format. Below you can find a simple sample with just 10 features:
52 | ```
53 | 0 qid:10 1:2 2:0 3:0 4:0 5:2 6:0.666667 7:0 8:0 9:0 10:0.666667 
54 | ```
55 | The first feature is label, second is query id and the rest are data features.
56 | 
57 | #### 3. Regression:
58 | Training and test splits of regression datasets are done in jupyter notebook "reg_train_test_split". The data link already contains splitted data. 
59 | 
60 | ## Experiments
61 | 
62 | To reproduce the results from pape, first activate conda virtual environment
63 | 
64 | ```
65 | conda activate grownet
66 | ```
67 | Then simply navigate to the task folder: Classification, L2R or Regression and execute the following command on terminal:
68 | 
69 | ```
70 | ./train.sh
71 | ```
72 | 
73 | You may change the dataset, number of hidden layers, number of hidden units in hidden layers, batch size, learning rate and etc from train.sh. 
74 | 
75 | The results may vary 1% or less between identical runs due to random initialization.
76 | 
77 | ### Contact
78 | 
79 | Feel free to drop me an email if you have any questions: s.badirli@gmail.com
80 | 
81 | ### Acknowledgments
82 | 
83 | * To his immense credit, my colleague, Xuanqing Liu (https://xuanqing94.github.io/), did an awesome job on the code development.  
84 | 


--------------------------------------------------------------------------------
/Regression/Readme.md:
--------------------------------------------------------------------------------
1 | - Data loading and creating dataloader are handled in GrowNet/Regression/data/data.py. If you want to try new data please check the LibSVMRegdata function in data.py for the right format. 
2 | 
3 | - Individual model class and ensemble architecture are in GrowNet/Reg/models:  mlp.py and dynamic_net.py. 
4 | You can increase number of hidden layers or change activation function from here: mlp.py
5 | 
6 | - train.sh will reproduce the results for Music Year Prediction data. You can change the dataset to slice_localization and feature dimension accordingly. You may also want to change hidden layre dimension to 128 or more for slice localization data.


--------------------------------------------------------------------------------
/Regression/data/data.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import sys
  3 | import os
  4 | import numpy as np
  5 | import pandas as pd
  6 | import torch
  7 | from torch.utils.data import Dataset
  8 | from scipy.sparse import csr_matrix
  9 | from sklearn.datasets import load_svmlight_file
 10 | from sklearn import datasets
 11 | from sklearn.model_selection import train_test_split
 12 | 
 13 | 
 14 | class LibSVMData(Dataset):
 15 |     def __init__(self, root, dim, normalization, pos=1, neg=-1, out_pos=1, out_neg=-1):
 16 |         self.feat, self.label = load_svmlight_file(root)
 17 | 
 18 |         self.feat = csr_matrix((self.feat.data, self.feat.indices, self.feat.indptr), shape=(len(self.label), dim))
 19 |         self.feat = self.feat.toarray().astype(np.float32)
 20 | 
 21 |         self.label = self.label.astype(np.float32)
 22 |         idx_pos = self.label == pos
 23 |         idx_neg = self.label == neg
 24 |         self.label[idx_pos] = out_pos
 25 |         self.label[idx_neg] = out_neg
 26 | 
 27 |     def __getitem__(self, index):
 28 |         arr = self.feat[index, :]
 29 |         return arr, self.label[index]
 30 |     def __len__(self):
 31 |         return len(self.label)
 32 | 
 33 | class LibSVMRankData(Dataset):
 34 |     def __init__(self, root2data, root2qid, dim):
 35 |         self.feat, self.label = load_svmlight_file(root2data)
 36 |         self.qid = np.loadtxt(root2qid, dtype='int32')
 37 |         self.feat = self.feat.toarray().astype(np.float32)
 38 |         self.label = self.label.astype(np.float32)
 39 |         self.feat = self.feat[:, ~(self.feat == 0).all(0)]
 40 |         print(self.feat.shape[1])
 41 | 
 42 |     def __getitem__(self, index):
 43 |         return self.feat[index, :], self.label[index], self.qid[index]
 44 | 
 45 |     def __len__(self):
 46 |         return len(self.label)
 47 | 
 48 | class LibSVMRegData(Dataset):
 49 |     def __init__(self, root, dim, normalization):
 50 |         data = np.load(root)        
 51 |         self.feat, self.label = data['features'], data['labels']
 52 |         del data
 53 |         self.feat = self.feat.astype(np.float32)
 54 |         self.label = self.label.astype(np.float32)
 55 |         #self.feat = self.feat[:, ~(self.feat == 0).all(0)]
 56 |         #import ipdb; ipdb.set_trace()
 57 | 
 58 |         print(self.feat.shape[1])
 59 | 
 60 |     def __getitem__(self, index):
 61 |         return self.feat[index, :], self.label[index]
 62 | 
 63 |     def __len__(self):
 64 |         return len(self.label)
 65 | 
 66 | class LibCSVData(Dataset):
 67 |     def __init__(self, root, dim, pos=1, neg=-1):
 68 |         self.data = np.loadtxt(root, delimiter=',').astype(np.float32)
 69 |         self.feat = self.data[:, 1:]
 70 |         self.label = self.data[:, 0]
 71 |         self.label[self.label == pos] = 1
 72 |         self.label[self.label == neg] = -1
 73 | 
 74 |     def __getitem__(self, index):
 75 |         #arr = np.log(self.feat[index, :] + 1.0e-5)
 76 |         #arr = np.log10(self.feat[index, :] + 1.0e-5)
 77 |         arr = self.feat[index, :]
 78 |         return arr, self.label[index]
 79 | 
 80 |     def __len__(self):
 81 |         return len(self.label)
 82 | class CriteoCSVData(Dataset):
 83 |     def __init__(self, root, dim, normalization, pos=1, neg=-1):
 84 |         # Reading the data into panda data frame
 85 |         self.data = pd.read_csv(root, header=None, dtype='float32')
 86 |         # extracting labels (0, 1) and weights
 87 |         self.label = self.data.iloc[:, -2]
 88 |         self.weights = self.data.iloc[:, -1]
 89 |         self.data = self.data.iloc[:, :-2]
 90 |         # transferring labels from {0, 1} to {-1, 1}
 91 |         self.label[self.label == pos] = 1
 92 |         self.label[self.label == neg] = -1
 93 | 
 94 |         # Applying log transformation
 95 |         mm = self.data.min().min() # to prevent 0 division
 96 |         if normalization:
 97 |             # Filling Nan values: Simple approach, mean of the that column or interpolation
 98 |             self.data = self.data.transform(lambda x: np.log(x - mm + 1))
 99 |             #self.data = self.data.interpolate(method='polynomial', order=2)
100 |             self.data = self.data.fillna(self.data.mean()) # To fill the rest of Nan values left untouched on the corners
101 |             #self.data = (self.data - self.data.mean())/self.data.std()
102 |         #self.feat = self.data.to_numpy('float32')
103 |         self.data = self.data.to_numpy('float32')
104 |     def __getitem__(self, index):
105 |         #arr = np.log(self.feat[index, :] + 1.0e-5)
106 |         #arr = np.log10(self.feat[index, :] + 1.0e-5)
107 |         #arr = self.feat[index, :]
108 |         arr = self.data[index, :]
109 |         return arr, self.label[index], self.weights[index]
110 | 
111 |     def __len__(self):
112 |         return len(self.label)
113 | 


--------------------------------------------------------------------------------
/Regression/data/sparse_data.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import sys
 3 | import os
 4 | import numpy as np
 5 | import torch
 6 | from torch.utils.data import Dataset
 7 | from scipy.sparse import csr_matrix
 8 | from sklearn.datasets import load_svmlight_file
 9 | 
10 | class LibSVMDataSp(Dataset):
11 |     def __init__(self, root, dim_in, pos=1, neg=-1):
12 |         self.feat, self.label = load_svmlight_file(root)
13 |         self.feat = csr_matrix((self.feat.data, self.feat.indices, self.feat.indptr), shape=(len(self.label), dim_in))
14 |         self.feat = self.feat.astype(np.float32)
15 |         self.label = self.label.astype(np.float32)
16 |         self.label[self.label == pos] = 1
17 |         self.label[self.label == neg] = -1
18 | 
19 |     def __getitem__(self, index):
20 |         arr = self.feat[index, :]
21 |         return arr, self.label[index]
22 |     def __len__(self):
23 |         return len(self.label)
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/Regression/data/sparseloader.py:
--------------------------------------------------------------------------------
  1 | import scipy
  2 | import random
  3 | import torch
  4 | import torch.multiprocessing as multiprocessing
  5 | # Changed _update_worker_pids into _set_worker_pids, due to new version of pytorch
  6 | from torch._C import _set_worker_signal_handlers, _set_worker_pids, \
  7 |     _remove_worker_pids, _error_if_any_worker_fails
  8 | from torch.utils.data.sampler import SequentialSampler, RandomSampler, BatchSampler
  9 | import signal
 10 | import functools
 11 | import collections
 12 | import re
 13 | import sys
 14 | import threading
 15 | import traceback
 16 | import os
 17 | import time
 18 | from torch._six import * #string_classes, int_classes, FileNotFoundError
 19 | 
 20 | IS_WINDOWS = sys.platform == "win32"
 21 | if IS_WINDOWS:
 22 |     import ctypes
 23 |     from ctypes.wintypes import DWORD, BOOL, HANDLE
 24 | 
 25 | if sys.version_info[0] == 2:
 26 |     import Queue as queue
 27 | else:
 28 |     import queue
 29 | 
 30 | 
 31 | class ExceptionWrapper(object):
 32 |     r"""Wraps an exception plus traceback to communicate across threads"""
 33 | 
 34 |     def __init__(self, exc_info):
 35 |         self.exc_type = exc_info[0]
 36 |         self.exc_msg = "".join(traceback.format_exception(*exc_info))
 37 | 
 38 | 
 39 | _use_shared_memory = False
 40 | r"""Whether to use shared memory in default_collate"""
 41 | 
 42 | MANAGER_STATUS_CHECK_INTERVAL = 5.0
 43 | 
 44 | if IS_WINDOWS:
 45 |     # On Windows, the parent ID of the worker process remains unchanged when the manager process
 46 |     # is gone, and the only way to check it through OS is to let the worker have a process handle
 47 |     # of the manager and ask if the process status has changed.
 48 |     class ManagerWatchdog(object):
 49 |         def __init__(self):
 50 |             self.manager_pid = os.getppid()
 51 | 
 52 |             self.kernel32 = ctypes.WinDLL('kernel32', use_last_error=True)
 53 |             self.kernel32.OpenProcess.argtypes = (DWORD, BOOL, DWORD)
 54 |             self.kernel32.OpenProcess.restype = HANDLE
 55 |             self.kernel32.WaitForSingleObject.argtypes = (HANDLE, DWORD)
 56 |             self.kernel32.WaitForSingleObject.restype = DWORD
 57 | 
 58 |             # Value obtained from https://msdn.microsoft.com/en-us/library/ms684880.aspx
 59 |             SYNCHRONIZE = 0x00100000
 60 |             self.manager_handle = self.kernel32.OpenProcess(SYNCHRONIZE, 0, self.manager_pid)
 61 | 
 62 |             if not self.manager_handle:
 63 |                 raise ctypes.WinError(ctypes.get_last_error())
 64 | 
 65 |         def is_alive(self):
 66 |             return self.kernel32.WaitForSingleObject(self.manager_handle, 0) != 0
 67 | else:
 68 |     class ManagerWatchdog(object):
 69 |         def __init__(self):
 70 |             self.manager_pid = os.getppid()
 71 | 
 72 |         def is_alive(self):
 73 |             return os.getppid() == self.manager_pid
 74 | 
 75 | 
 76 | def _worker_loop(dataset, index_queue, data_queue, collate_fn, seed, init_fn, worker_id):
 77 |     global _use_shared_memory
 78 |     _use_shared_memory = True
 79 | 
 80 |     _set_worker_signal_handlers()
 81 | 
 82 |     torch.set_num_threads(1)
 83 |     random.seed(seed)
 84 |     torch.manual_seed(seed)
 85 | 
 86 |     if init_fn is not None:
 87 |         init_fn(worker_id)
 88 | 
 89 |     watchdog = ManagerWatchdog()
 90 | 
 91 |     while True:
 92 |         try:
 93 |             r = index_queue.get(timeout=MANAGER_STATUS_CHECK_INTERVAL)
 94 |         except queue.Empty:
 95 |             if watchdog.is_alive():
 96 |                 continue
 97 |             else:
 98 |                 break
 99 |         if r is None:
100 |             break
101 |         idx, batch_indices = r
102 |         try:
103 |             samples = collate_fn([dataset[i] for i in batch_indices])
104 |         except Exception:
105 |             data_queue.put((idx, ExceptionWrapper(sys.exc_info())))
106 |         else:
107 |             data_queue.put((idx, samples))
108 |             del samples
109 | 
110 | 
111 | def _worker_manager_loop(in_queue, out_queue, done_event, pin_memory, device_id):
112 |     if pin_memory:
113 |         torch.cuda.set_device(device_id)
114 | 
115 |     while True:
116 |         try:
117 |             r = in_queue.get()
118 |         except Exception:
119 |             if done_event.is_set():
120 |                 return
121 |             raise
122 |         if r is None:
123 |             break
124 |         if isinstance(r[1], ExceptionWrapper):
125 |             out_queue.put(r)
126 |             continue
127 |         idx, batch = r
128 |         try:
129 |             if pin_memory:
130 |                 batch = pin_memory_batch(batch)
131 |         except Exception:
132 |             out_queue.put((idx, ExceptionWrapper(sys.exc_info())))
133 |         else:
134 |             out_queue.put((idx, batch))
135 | 
136 | numpy_type_map = {
137 |     'float64': torch.DoubleTensor,
138 |     'float32': torch.FloatTensor,
139 |     'float16': torch.HalfTensor,
140 |     'int64': torch.LongTensor,
141 |     'int32': torch.IntTensor,
142 |     'int16': torch.ShortTensor,
143 |     'int8': torch.CharTensor,
144 |     'uint8': torch.ByteTensor,
145 | }
146 | 
147 | 
148 | def default_collate(batch):
149 |     r"""Puts each data field into a tensor with outer dimension batch size"""
150 | 
151 |     error_msg = "batch must contain tensors, numbers, dicts or lists; found {}"
152 |     elem_type = type(batch[0])
153 |     if isinstance(batch[0], torch.Tensor):
154 |         out = None
155 |         if _use_shared_memory:
156 |             # If we're in a background process, concatenate directly into a
157 |             # shared memory tensor to avoid an extra copy
158 |             numel = sum([x.numel() for x in batch])
159 |             storage = batch[0].storage()._new_shared(numel)
160 |             out = batch[0].new(storage)
161 |         return torch.stack(batch, 0, out=out)
162 |     elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
163 |             and elem_type.__name__ != 'string_':
164 |         elem = batch[0]
165 |         if elem_type.__name__ == 'ndarray':
166 |             # array of string classes and object
167 |             if re.search('[SaUO]', elem.dtype.str) is not None:
168 |                 raise TypeError(error_msg.format(elem.dtype))
169 | 
170 |             return torch.stack([torch.from_numpy(b) for b in batch], 0)
171 |         if elem.shape == ():  # scalars
172 |             py_type = float if elem.dtype.name.startswith('float') else int
173 |             return numpy_type_map[elem.dtype.name](list(map(py_type, batch)))
174 |     elif isinstance(batch[0], int_classes):
175 |         return torch.LongTensor(batch)
176 |     elif isinstance(batch[0], float):
177 |         return torch.DoubleTensor(batch)
178 |     elif isinstance(batch[0], string_classes):
179 |         return batch
180 |     elif isinstance(batch[0], collections.Mapping):
181 |         return {key: default_collate([d[key] for d in batch]) for key in batch[0]}
182 |     elif isinstance(batch[0], collections.Sequence):
183 |         transposed = zip(*batch)
184 |         return [default_collate(samples) for samples in transposed]
185 |     elif isinstance(batch[0], scipy.sparse.csr.csr_matrix):
186 |         row_idx = []
187 |         col_idx = []
188 |         val = []
189 |         for i, b in enumerate(batch):
190 |             col = b.indices
191 |             row_idx.extend([i] * len(col))
192 |             col_idx.extend(col)
193 |             val.extend(b.data)
194 |         i = torch.LongTensor([row_idx, col_idx])
195 |         v = torch.FloatTensor(val)
196 |         return torch.sparse.FloatTensor(i, v, torch.Size([len(batch), batch[0].shape[1]]))
197 |     raise TypeError((error_msg.format(type(batch[0]))))
198 | 
199 | 
200 | def pin_memory_batch(batch):
201 |     if isinstance(batch, torch.Tensor):
202 |         return batch.pin_memory()
203 |     elif isinstance(batch, string_classes):
204 |         return batch
205 |     elif isinstance(batch, collections.Mapping):
206 |         return {k: pin_memory_batch(sample) for k, sample in batch.items()}
207 |     elif isinstance(batch, collections.Sequence):
208 |         return [pin_memory_batch(sample) for sample in batch]
209 |     else:
210 |         return batch
211 | 
212 | 
213 | _SIGCHLD_handler_set = False
214 | r"""Whether SIGCHLD handler is set for DataLoader worker failures. Only one
215 | handler needs to be set for all DataLoaders in a process."""
216 | 
217 | 
218 | def _set_SIGCHLD_handler():
219 |     # Windows doesn't support SIGCHLD handler
220 |     if sys.platform == 'win32':
221 |         return
222 |     # can't set signal in child threads
223 |     if not isinstance(threading.current_thread(), threading._MainThread):
224 |         return
225 |     global _SIGCHLD_handler_set
226 |     if _SIGCHLD_handler_set:
227 |         return
228 |     previous_handler = signal.getsignal(signal.SIGCHLD)
229 |     if not callable(previous_handler):
230 |         previous_handler = None
231 | 
232 |     def handler(signum, frame):
233 |         _error_if_any_worker_fails()
234 |         if previous_handler is not None:
235 |             previous_handler(signum, frame)
236 | 
237 |     signal.signal(signal.SIGCHLD, handler)
238 |     _SIGCHLD_handler_set = True
239 | 
240 | 
241 | class _DataLoaderIter(object):
242 |     r"""Iterates once over the DataLoader's dataset, as specified by the sampler"""
243 | 
244 |     def __init__(self, loader):
245 |         self.dataset = loader.dataset
246 |         self.collate_fn = loader.collate_fn
247 |         self.batch_sampler = loader.batch_sampler
248 |         self.num_workers = loader.num_workers
249 |         self.pin_memory = loader.pin_memory and torch.cuda.is_available()
250 |         self.timeout = loader.timeout
251 |         self.done_event = threading.Event()
252 | 
253 |         self.sample_iter = iter(self.batch_sampler)
254 | 
255 |         base_seed = torch.LongTensor(1).random_().item()
256 | 
257 |         if self.num_workers > 0:
258 |             self.worker_init_fn = loader.worker_init_fn
259 |             self.index_queues = [multiprocessing.Queue() for _ in range(self.num_workers)]
260 |             self.worker_queue_idx = 0
261 |             self.worker_result_queue = multiprocessing.SimpleQueue()
262 |             self.batches_outstanding = 0
263 |             self.worker_pids_set = False
264 |             self.shutdown = False
265 |             self.send_idx = 0
266 |             self.rcvd_idx = 0
267 |             self.reorder_dict = {}
268 | 
269 |             self.workers = [
270 |                 multiprocessing.Process(
271 |                     target=_worker_loop,
272 |                     args=(self.dataset, self.index_queues[i],
273 |                           self.worker_result_queue, self.collate_fn, base_seed + i,
274 |                           self.worker_init_fn, i))
275 |                 for i in range(self.num_workers)]
276 | 
277 |             if self.pin_memory or self.timeout > 0:
278 |                 self.data_queue = queue.Queue()
279 |                 if self.pin_memory:
280 |                     maybe_device_id = torch.cuda.current_device()
281 |                 else:
282 |                     # do not initialize cuda context if not necessary
283 |                     maybe_device_id = None
284 |                 self.worker_manager_thread = threading.Thread(
285 |                     target=_worker_manager_loop,
286 |                     args=(self.worker_result_queue, self.data_queue, self.done_event, self.pin_memory,
287 |                           maybe_device_id))
288 |                 self.worker_manager_thread.daemon = True
289 |                 self.worker_manager_thread.start()
290 |             else:
291 |                 self.data_queue = self.worker_result_queue
292 | 
293 |             for w in self.workers:
294 |                 w.daemon = True  # ensure that the worker exits on process exit
295 |                 w.start()
296 | 
297 |             _set_worker_pids(id(self), tuple(w.pid for w in self.workers))
298 |             _set_SIGCHLD_handler()
299 |             self.worker_pids_set = True
300 | 
301 |             # prime the prefetch loop
302 |             for _ in range(2 * self.num_workers):
303 |                 self._put_indices()
304 | 
305 |     def __len__(self):
306 |         return len(self.batch_sampler)
307 | 
308 |     def _get_batch(self):
309 |         if self.timeout > 0:
310 |             try:
311 |                 return self.data_queue.get(timeout=self.timeout)
312 |             except queue.Empty:
313 |                 raise RuntimeError('DataLoader timed out after {} seconds'.format(self.timeout))
314 |         else:
315 |             return self.data_queue.get()
316 | 
317 |     def __next__(self):
318 |         if self.num_workers == 0:  # same-process loading
319 |             indices = next(self.sample_iter)  # may raise StopIteration
320 |             batch = self.collate_fn([self.dataset[i] for i in indices])
321 |             if self.pin_memory:
322 |                 batch = pin_memory_batch(batch)
323 |             return batch
324 | 
325 |         # check if the next sample has already been generated
326 |         if self.rcvd_idx in self.reorder_dict:
327 |             batch = self.reorder_dict.pop(self.rcvd_idx)
328 |             return self._process_next_batch(batch)
329 | 
330 |         if self.batches_outstanding == 0:
331 |             self._shutdown_workers()
332 |             raise StopIteration
333 | 
334 |         while True:
335 |             assert (not self.shutdown and self.batches_outstanding > 0)
336 |             idx, batch = self._get_batch()
337 |             self.batches_outstanding -= 1
338 |             if idx != self.rcvd_idx:
339 |                 # store out-of-order samples
340 |                 self.reorder_dict[idx] = batch
341 |                 continue
342 |             return self._process_next_batch(batch)
343 | 
344 |     next = __next__  # Python 2 compatibility
345 | 
346 |     def __iter__(self):
347 |         return self
348 | 
349 |     def _put_indices(self):
350 |         assert self.batches_outstanding < 2 * self.num_workers
351 |         indices = next(self.sample_iter, None)
352 |         if indices is None:
353 |             return
354 |         self.index_queues[self.worker_queue_idx].put((self.send_idx, indices))
355 |         self.worker_queue_idx = (self.worker_queue_idx + 1) % self.num_workers
356 |         self.batches_outstanding += 1
357 |         self.send_idx += 1
358 | 
359 |     def _process_next_batch(self, batch):
360 |         self.rcvd_idx += 1
361 |         self._put_indices()
362 |         if isinstance(batch, ExceptionWrapper):
363 |             raise batch.exc_type(batch.exc_msg)
364 |         return batch
365 | 
366 |     def __getstate__(self):
367 | 
368 |         raise NotImplementedError("_DataLoaderIter cannot be pickled")
369 | 
370 |     def _shutdown_workers(self):
371 |         try:
372 |             if not self.shutdown:
373 |                 self.shutdown = True
374 |                 self.done_event.set()
375 |                 for q in self.index_queues:
376 |                     q.put(None)
377 |                 # if some workers are waiting to put, make place for them
378 |                 try:
379 |                     while not self.worker_result_queue.empty():
380 |                         self.worker_result_queue.get()
381 |                 except (FileNotFoundError, ImportError):
382 |                     pass
383 |                 self.worker_result_queue.put(None)
384 |         finally:
385 |             # removes pids no matter what
386 |             if self.worker_pids_set:
387 |                 _remove_worker_pids(id(self))
388 |                 self.worker_pids_set = False
389 | 
390 |     def __del__(self):
391 |         if self.num_workers > 0:
392 |             self._shutdown_workers()
393 | 
394 | 
395 | class DataLoader(object):
396 |     r"""
397 |     Data loader. Combines a dataset and a sampler, and provides
398 |     single- or multi-process iterators over the dataset.
399 | 
400 |     Arguments:
401 |         dataset (Dataset): dataset from which to load the data.
402 |         batch_size (int, optional): how many samples per batch to load
403 |             (default: 1).
404 |         shuffle (bool, optional): set to ``True`` to have the data reshuffled
405 |             at every epoch (default: False).
406 |         sampler (Sampler, optional): defines the strategy to draw samples from
407 |             the dataset. If specified, ``shuffle`` must be False.
408 |         batch_sampler (Sampler, optional): like sampler, but returns a batch of
409 |             indices at a time. Mutually exclusive with batch_size, shuffle,
410 |             sampler, and drop_last.
411 |         num_workers (int, optional): how many subprocesses to use for data
412 |             loading. 0 means that the data will be loaded in the main process.
413 |             (default: 0)
414 |         collate_fn (callable, optional): merges a list of samples to form a mini-batch.
415 |         pin_memory (bool, optional): If ``True``, the data loader will copy tensors
416 |             into CUDA pinned memory before returning them.
417 |         drop_last (bool, optional): set to ``True`` to drop the last incomplete batch,
418 |             if the dataset size is not divisible by the batch size. If ``False`` and
419 |             the size of dataset is not divisible by the batch size, then the last batch
420 |             will be smaller. (default: False)
421 |         timeout (numeric, optional): if positive, the timeout value for collecting a batch
422 |             from workers. Should always be non-negative. (default: 0)
423 |         worker_init_fn (callable, optional): If not None, this will be called on each
424 |             worker subprocess with the worker id (an int in ``[0, num_workers - 1]``) as
425 |             input, after seeding and before data loading. (default: None)
426 | 
427 |     .. note:: By default, each worker will have its PyTorch seed set to
428 |               ``base_seed + worker_id``, where ``base_seed`` is a long generated
429 |               by main process using its RNG. However, seeds for other libraies
430 |               may be duplicated upon initializing workers (w.g., NumPy), causing
431 |               each worker to return identical random numbers. (See
432 |               :ref:`dataloader-workers-random-seed` section in FAQ.) You may
433 |               use ``torch.initial_seed()`` to access the PyTorch seed for each
434 |               worker in :attr:`worker_init_fn`, and use it to set other seeds
435 |               before data loading.
436 | 
437 |     .. warning:: If ``spawn`` start method is used, :attr:`worker_init_fn` cannot be an
438 |                  unpicklable object, e.g., a lambda function.
439 |     """
440 | 
441 |     __initialized = False
442 | 
443 |     def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None,
444 |                  num_workers=0, collate_fn=default_collate, pin_memory=False, drop_last=False,
445 |                  timeout=0, worker_init_fn=None):
446 |         self.dataset = dataset
447 |         self.batch_size = batch_size
448 |         self.num_workers = num_workers
449 |         self.collate_fn = collate_fn
450 |         self.pin_memory = pin_memory
451 |         self.drop_last = drop_last
452 |         self.timeout = timeout
453 |         self.worker_init_fn = worker_init_fn
454 | 
455 |         if timeout < 0:
456 |             raise ValueError('timeout option should be non-negative')
457 | 
458 |         if batch_sampler is not None:
459 |             if batch_size > 1 or shuffle or sampler is not None or drop_last:
460 |                 raise ValueError('batch_sampler option is mutually exclusive '
461 |                                  'with batch_size, shuffle, sampler, and '
462 |                                  'drop_last')
463 |             self.batch_size = None
464 |             self.drop_last = None
465 | 
466 |         if sampler is not None and shuffle:
467 |             raise ValueError('sampler option is mutually exclusive with '
468 |                              'shuffle')
469 | 
470 |         if self.num_workers < 0:
471 |             raise ValueError('num_workers option cannot be negative; '
472 |                              'use num_workers=0 to disable multiprocessing.')
473 | 
474 |         if batch_sampler is None:
475 |             if sampler is None:
476 |                 if shuffle:
477 |                     sampler = RandomSampler(dataset)
478 |                 else:
479 |                     sampler = SequentialSampler(dataset)
480 |             batch_sampler = BatchSampler(sampler, batch_size, drop_last)
481 | 
482 |         self.sampler = sampler
483 |         self.batch_sampler = batch_sampler
484 |         self.__initialized = True
485 | 
486 |     def __setattr__(self, attr, val):
487 |         if self.__initialized and attr in ('batch_size', 'sampler', 'drop_last'):
488 |             raise ValueError('{} attribute should not be set after {} is '
489 |                              'initialized'.format(attr, self.__class__.__name__))
490 | 
491 |         super(DataLoader, self).__setattr__(attr, val)
492 | 
493 |     def __iter__(self):
494 |         return _DataLoaderIter(self)
495 | 
496 |     def __len__(self):
497 |         return len(self.batch_sampler)
498 | 
499 | 


--------------------------------------------------------------------------------
/Regression/data/untitled.py:
--------------------------------------------------------------------------------
 1 | def convert2npz(input_filename, out_data_filename):
 2 | 	input = open(input_filename,"r")
 3 | 	output_feature = open(out_data_filename,"w")
 4 | 	#output_query = open(out_query_filename,"w")
 5 | 	#output_label = open(out_query_filename2,"w")
 6 | 
 7 | 	while True:
 8 | 		line = input.readline()
 9 | 		if not line:
10 | 			break
11 | 		tokens = line.split(' ')
12 | 		tokens[-1] = tokens[-1].strip()
13 | 		label = tokens[0]
14 | 		qid = int(tokens[1].split(':')[1])
15 | 
16 | 		#output_label.write(label + '\n')
17 | 		#output_query.write(qid + '\n')
18 | 		output_feature.write(label+' ')
19 | 		output_feature.write(qid + ' ')
20 | 		output_feature.write(' '.join(tokens[2:]) + '\n')
21 | 	
22 | 	input.close()
23 | 	output_query.close()
24 | 	output_feature.close()
25 | 	output_query2.close()
26 | 
27 | convert("set1.train.txt","yahoo.train")
28 | convert("set1.test.txt","yahoo.test")
29 | 


--------------------------------------------------------------------------------
/Regression/main_reg_cv.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import numpy as np
  3 | import argparse
  4 | import copy
  5 | import torch
  6 | import torch.nn as nn
  7 | import time
  8 | from data.sparseloader import DataLoader
  9 | from data.data import LibSVMData, LibCSVData, LibSVMRegData
 10 | from data.sparse_data import LibSVMDataSp
 11 | from models.mlp import MLP_1HL, MLP_2HL, MLP_3HL
 12 | from models.dynamic_net import DynamicNet, ForwardType
 13 | from sklearn.metrics import mean_squared_error
 14 | from sklearn.preprocessing import StandardScaler, MinMaxScaler
 15 | from torch.optim import SGD, Adam
 16 | 
 17 | 
 18 | 
 19 | parser = argparse.ArgumentParser()
 20 | parser.add_argument('--feat_d', type=int, required=True)
 21 | parser.add_argument('--hidden_d', type=int, required=True)
 22 | parser.add_argument('--boost_rate', type=float, required=True)
 23 | parser.add_argument('--lr', type=float, required=True)
 24 | parser.add_argument('--num_nets', type=int, required=True)
 25 | parser.add_argument('--data', type=str, required=True)
 26 | parser.add_argument('--tr', type=str, required=True)
 27 | parser.add_argument('--te', type=str, required=True)
 28 | parser.add_argument('--batch_size', type=int, required=True)
 29 | parser.add_argument('--epochs_per_stage', type=int, required=True)
 30 | parser.add_argument('--correct_epoch', type=int ,required=True)
 31 | parser.add_argument('--L2', type=float, required=True)
 32 | parser.add_argument('--sparse', action='store_true')
 33 | parser.add_argument('--normalization', default=False, type=lambda x: (str(x).lower() == 'true')) 
 34 | parser.add_argument('--cv', default=False, type=lambda x: (str(x).lower() == 'true')) 
 35 | parser.add_argument('--out_f', type=str, required=True)
 36 | parser.add_argument('--cuda', action='store_true')
 37 | 
 38 | opt = parser.parse_args()
 39 | 
 40 | if not opt.cuda:
 41 |     torch.set_num_threads(16)
 42 | 
 43 | # prepare the dataset
 44 | def get_data():
 45 |     if opt.data in ['ca_housing', 'ailerons', 'YearPredictionMSD', 'slice_localization']:
 46 |         train = LibSVMRegData(opt.tr, opt.feat_d, opt.normalization)
 47 |         test = LibSVMRegData(opt.te, opt.feat_d, opt.normalization)
 48 |         val = []
 49 |         if opt.cv:
 50 |             val = copy.deepcopy(train)
 51 |             print('Creating Validation set! \n')
 52 |             indices = list(range(len(train)))
 53 |             cut = int(len(train)*0.95)
 54 |             np.random.shuffle(indices)
 55 |             train_idx = indices[:cut]
 56 |             val_idx = indices[cut:]
 57 | 
 58 |             train.feat = train.feat[train_idx]
 59 |             train.label = train.label[train_idx]
 60 |             val.feat = val.feat[val_idx]
 61 |             val.label = val.label[val_idx]
 62 |     else:
 63 |         pass
 64 | 
 65 |     if opt.normalization:
 66 |         scaler = StandardScaler()
 67 |         scaler.fit(train.feat)
 68 |         train.feat = scaler.transform(train.feat)
 69 |         test.feat = scaler.transform(test.feat)
 70 |         if opt.cv:
 71 |             val.feat = scaler.transform(val.feat)
 72 |     print(f'#Train: {len(train)}, #Val: {len(val)} #Test: {len(test)}')
 73 |     return train, test, val
 74 | 
 75 | 
 76 | def get_optim(params, lr, weight_decay):
 77 |     optimizer = Adam(params, lr, weight_decay=weight_decay)
 78 |     #optimizer = SGD(params, lr, weight_decay=weight_decay)
 79 |     return optimizer
 80 | 
 81 | 
 82 | def root_mse(net_ensemble, loader):
 83 |     loss = 0
 84 |     total = 0
 85 |  
 86 |     for x, y in loader:
 87 |         if opt.cuda:
 88 |             x = x.cuda()
 89 | 
 90 |         with torch.no_grad():
 91 |             _, out = net_ensemble.forward(x)
 92 |         y = y.cpu().numpy().reshape(len(y), 1)
 93 |         out = out.cpu().numpy().reshape(len(y), 1)
 94 |         loss += mean_squared_error(y, out)* len(y)
 95 |         total += len(y)
 96 |     return np.sqrt(loss / total)
 97 | 
 98 | 
 99 | def init_gbnn(train):
100 |     positive = negative = 0
101 |     for i in range(len(train)):
102 |         if train[i][1] > 0:
103 |             positive += 1
104 |         else:
105 |             negative += 1
106 |     blind_acc = max(positive, negative) / (positive + negative)
107 |     print(f'Blind accuracy: {blind_acc}')
108 |     #print(f'Blind Logloss: {blind_acc}')
109 |     return float(np.log(positive / negative))
110 | 
111 | if __name__ == "__main__":
112 | 
113 |     train, test, val = get_data()
114 |     N = len(train)
115 |     print(opt.data + ' training and test datasets are loaded!')
116 |     train_loader = DataLoader(train, opt.batch_size, shuffle=True, drop_last=False, num_workers=2)
117 |     test_loader = DataLoader(test, opt.batch_size, shuffle=False, drop_last=False, num_workers=2)
118 |     if opt.cv:
119 |         val_loader = DataLoader(val, opt.batch_size, shuffle=True, drop_last=False, num_workers=2)
120 |     best_rmse = pow(10, 6)
121 |     val_rmse = best_rmse
122 |     best_stage = opt.num_nets-1
123 |     c0 = np.mean(train.label)  #init_gbnn(train)
124 |     net_ensemble = DynamicNet(c0, opt.boost_rate)
125 |     loss_f1 = nn.MSELoss()
126 |     loss_models = torch.zeros((opt.num_nets, 3))
127 |     for stage in range(opt.num_nets):
128 |         t0 = time.time()
129 |         model = MLP_2HL.get_model(stage, opt)  # Initialize the model_k: f_k(x), multilayer perception v2
130 |         if opt.cuda:
131 |             model.cuda()
132 | 
133 |         optimizer = get_optim(model.parameters(), opt.lr, opt.L2)
134 |         net_ensemble.to_train() # Set the models in ensemble net to train mode
135 |         stage_mdlloss = []
136 |         for epoch in range(opt.epochs_per_stage):
137 |             for i, (x, y) in enumerate(train_loader):
138 |                 
139 |                 if opt.cuda:
140 |                     x= x.cuda()
141 |                     y = torch.as_tensor(y, dtype=torch.float32).cuda().view(-1, 1)
142 |                 middle_feat, out = net_ensemble.forward(x)
143 |                 out = torch.as_tensor(out, dtype=torch.float32).cuda().view(-1, 1)
144 |                 grad_direction = -(out-y)
145 | 
146 |                 _, out = model(x, middle_feat)
147 |                 out = torch.as_tensor(out, dtype=torch.float32).cuda().view(-1, 1)
148 |                 loss = loss_f1(net_ensemble.boost_rate*out, grad_direction)  # T
149 | 
150 |                 model.zero_grad()
151 |                 loss.backward()
152 |                 optimizer.step()
153 |                 stage_mdlloss.append(loss.item()*len(y))
154 | 
155 |         net_ensemble.add(model)
156 |         sml = np.sqrt(np.sum(stage_mdlloss)/N)
157 |         
158 | 
159 | 
160 |         lr_scaler = 3
161 |         # fully-corrective step
162 |         stage_loss = []
163 |         if stage > 0:
164 |             # Adjusting corrective step learning rate 
165 |             if stage % 15 == 0:
166 |                 #lr_scaler *= 2
167 |                 opt.lr /= 2
168 |                 opt.L2 /= 2
169 |             optimizer = get_optim(net_ensemble.parameters(), opt.lr / lr_scaler, opt.L2)
170 |             for _ in range(opt.correct_epoch):
171 |                 stage_loss = []
172 |                 for i, (x, y) in enumerate(train_loader):
173 |                     if opt.cuda:
174 |                         x, y = x.cuda(), y.cuda().view(-1, 1)
175 |                     _, out = net_ensemble.forward_grad(x)
176 |                     out = torch.as_tensor(out, dtype=torch.float32).cuda().view(-1, 1)
177 |                     
178 |                     loss = loss_f1(out, y) 
179 |                     optimizer.zero_grad()
180 |                     loss.backward()
181 |                     optimizer.step()
182 |                     stage_loss.append(loss.item()*len(y))
183 |         #print(net_ensemble.boost_rate)
184 |         # store model
185 |         elapsed_tr = time.time()-t0
186 |         sl = 0
187 |         if stage_loss != []:
188 |             sl = np.sqrt(np.sum(stage_loss)/N)
189 | 
190 |         print(f'Stage - {stage}, training time: {elapsed_tr: .1f} sec, model MSE loss: {sml: .5f}, Ensemble Net MSE Loss: {sl: .5f}')
191 | 
192 |         net_ensemble.to_file(opt.out_f)
193 |         net_ensemble = DynamicNet.from_file(opt.out_f, lambda stage: MLP_2HL.get_model(stage, opt))
194 | 
195 |         if opt.cuda:
196 |             net_ensemble.to_cuda()
197 |         net_ensemble.to_eval() # Set the models in ensemble net to eval mode
198 | 
199 |         # Train
200 |         tr_rmse  = root_mse(net_ensemble, train_loader)
201 |         if opt.cv:
202 |             val_rmse = root_mse(net_ensemble, val_loader) 
203 |             if val_rmse < best_rmse:
204 |                 best_rmse = val_rmse
205 |                 best_stage = stage
206 | 
207 |         te_rmse  = root_mse(net_ensemble, test_loader)
208 | 
209 |         print(f'Stage: {stage}  RMSE@Tr: {tr_rmse:.5f}, RMSE@Val: {val_rmse:.5f}, RMSE@Te: {te_rmse:.5f}')
210 | 
211 |         loss_models[stage, 0], loss_models[stage, 1] = tr_rmse, te_rmse
212 | 
213 |     tr_rmse, te_rmse = loss_models[best_stage, 0], loss_models[best_stage, 1]
214 |     print(f'Best validation stage: {best_stage}  RMSE@Tr: {tr_rmse:.5f}, final RMSE@Te: {te_rmse:.5f}')
215 |     loss_models = loss_models.detach().cpu().numpy()
216 |     fname =  './results/' + opt.data +'_rmse'
217 |     np.savez(fname, rmse=loss_models, params=opt) 
218 | 
219 | 


--------------------------------------------------------------------------------
/Regression/models/dynamic_net.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | import torch
 3 | #import pickle
 4 | import torch.nn as nn
 5 | 
 6 | class ForwardType(Enum):
 7 |     SIMPLE = 0
 8 |     STACKED = 1
 9 |     CASCADE = 2
10 |     GRADIENT = 3
11 | 
12 | class DynamicNet(object):
13 |     def __init__(self, c0, lr):
14 |         self.models = []
15 |         self.c0 = c0
16 |         self.lr = lr
17 |         self.boost_rate  = nn.Parameter(torch.tensor(lr, requires_grad=True, device="cuda"))
18 | 
19 |     def add(self, model):
20 |         self.models.append(model)
21 | 
22 |     def parameters(self):
23 |         params = []
24 |         for m in self.models:
25 |             params.extend(m.parameters())
26 | 
27 |         params.append(self.boost_rate)
28 |         return params
29 | 
30 |     def zero_grad(self):
31 |         for m in self.models:
32 |             m.zero_grad()
33 | 
34 |     def to_cuda(self):
35 |         for m in self.models:
36 |             m.cuda()
37 | 
38 |     def to_eval(self):
39 |         for m in self.models:
40 |             m.eval()
41 | 
42 |     def to_train(self):
43 |         for m in self.models:
44 |             m.train(True)
45 | 
46 |     def forward(self, x):
47 |         if len(self.models) == 0:
48 |             return None, self.c0
49 |         middle_feat_cum = None
50 |         prediction = None
51 |         with torch.no_grad():
52 |             for m in self.models:
53 |                 if middle_feat_cum is None:
54 |                     middle_feat_cum, prediction = m(x, middle_feat_cum)
55 |                 else:
56 |                     middle_feat_cum, pred = m(x, middle_feat_cum)
57 |                     prediction += pred
58 |         return middle_feat_cum, self.c0 + self.boost_rate * prediction
59 | 
60 |     def forward_grad(self, x):
61 |         if len(self.models) == 0:
62 |             return None, self.c0
63 |         # at least one model
64 |         middle_feat_cum = None
65 |         prediction = None
66 |         for m in self.models:
67 |             if middle_feat_cum is None:
68 |                 middle_feat_cum, prediction = m(x, middle_feat_cum)
69 |             else:
70 |                 middle_feat_cum, pred = m(x, middle_feat_cum)
71 |                 prediction += pred
72 |         return middle_feat_cum, self.c0 + self.boost_rate * prediction
73 | 
74 |     @classmethod
75 |     def from_file(cls, path, builder):
76 |         d = torch.load(path)
77 |         net = DynamicNet(d['c0'], d['lr'])
78 |         net.boost_rate = d['boost_rate']
79 |         for stage, m in enumerate(d['models']):
80 |             submod = builder(stage)
81 |             submod.load_state_dict(m)
82 |             net.add(submod)
83 |         return net
84 | 
85 |     def to_file(self, path):
86 |         models = [m.state_dict() for m in self.models]
87 |         d = {'models': models, 'c0': self.c0, 'lr': self.lr, 'boost_rate': self.boost_rate}
88 |         torch.save(d, path)
89 | 


--------------------------------------------------------------------------------
/Regression/models/mlp.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from .splinear import SpLinear
  6 | 
  7 | 
  8 | class MLP_1HL(nn.Module):
  9 |     def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True):
 10 |         super(MLP_1HL, self).__init__()
 11 |         self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1)
 12 |         self.out_layer = nn.Linear(dim_hidden1, 1)
 13 |         self.lrelu = nn.LeakyReLU(0.1)
 14 |         self.relu = nn.ReLU()
 15 |         if bn:
 16 |             self.bn = nn.BatchNorm1d(dim_hidden1)
 17 |             self.bn2 = nn.BatchNorm1d(dim_in)
 18 | 
 19 |     def forward(self, x, lower_f):
 20 |         if lower_f is not None:
 21 |             x = torch.cat([x, lower_f], dim=1)
 22 |             x = self.bn2(x)
 23 |         out = self.in_layer(x)
 24 |         return out, self.out_layer(self.relu(out)).squeeze()
 25 | 
 26 |     @classmethod
 27 |     def get_model(cls, stage, opt):
 28 |         if stage == 0:
 29 |             dim_in = opt.feat_d
 30 |         else:
 31 |             dim_in = opt.feat_d + opt.hidden_d
 32 |         model = MLP_1HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse)
 33 |         return model
 34 | 
 35 | 
 36 | class MLP_2HL(nn.Module):
 37 |     def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True):
 38 |         super(MLP_2HL, self).__init__()
 39 |         self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1)
 40 |         self.dropout_layer = nn.Dropout(0.0)
 41 |         self.lrelu = nn.LeakyReLU(0.1)
 42 |         self.relu = nn.ReLU()
 43 |         self.hidden_layer = nn.Linear(dim_hidden1, dim_hidden2)
 44 |         self.out_layer = nn.Linear(dim_hidden2, 1)
 45 |         self.bn = nn.BatchNorm1d(dim_hidden1)
 46 |         self.bn2 = nn.BatchNorm1d(dim_in)
 47 | 
 48 |     def forward(self, x, lower_f):
 49 |         if lower_f is not None:
 50 |             x = torch.cat([x, lower_f], dim=1)
 51 |             x = self.bn2(x)
 52 |         out = self.lrelu(self.in_layer(x))
 53 |         out = self.bn(out)
 54 |         out = self.hidden_layer(out)
 55 |         return out, self.out_layer(self.relu(out)).squeeze()
 56 | 
 57 |     @classmethod
 58 |     def get_model(cls, stage, opt):
 59 |         if stage == 0:
 60 |             dim_in = opt.feat_d
 61 |         else:
 62 |             dim_in = opt.feat_d + opt.hidden_d
 63 |         model = MLP_2HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse)
 64 |         return model
 65 | 
 66 | class MLP_3HL(nn.Module):
 67 |     def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True):
 68 |         super(MLP_3HL, self).__init__()
 69 |         self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1)
 70 |         self.dropout_layer = nn.Dropout(0.0)
 71 |         self.lrelu = nn.LeakyReLU(0.1)
 72 |         self.relu = nn.ReLU()
 73 |         self.hidden_layer = nn.Linear(dim_hidden2, dim_hidden1)
 74 |         self.out_layer = nn.Linear(dim_hidden1, 1)
 75 |         self.bn = nn.BatchNorm1d(dim_hidden1)
 76 |         self.bn2 = nn.BatchNorm1d(dim_in)
 77 |         # print('Batch normalization is processed!')
 78 | 
 79 |     def forward(self, x, lower_f):
 80 |         if lower_f is not None:
 81 |             x = torch.cat([x, lower_f], dim=1)
 82 |             x = self.bn2(x)
 83 |         out = self.lrelu(self.in_layer(x))
 84 |         out = self.bn(out)
 85 |         out = self.lrelu(self.hidden_layer(out))
 86 |         out = self.bn(out)
 87 |         out = self.hidden_layer(out)
 88 |         return out, self.out_layer(self.relu(out)).squeeze()
 89 | 
 90 |     @classmethod
 91 |     def get_model(cls, stage, opt):
 92 |         if stage == 0:
 93 |             dim_in = opt.feat_d
 94 |         else:
 95 |             dim_in = opt.feat_d + opt.hidden_d
 96 |         model = MLP_3HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse)
 97 |         return model
 98 | 
 99 | class MLP_4HL(nn.Module):
100 |     def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True):
101 |         super(MLP_3HL, self).__init__()
102 |         self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1)
103 |         self.dropout_layer = nn.Dropout(0.0)
104 |         self.lrelu = nn.LeakyReLU(0.1)
105 |         self.relu = nn.ReLU()
106 |         self.hidden_layer = nn.Linear(dim_hidden2, dim_hidden1)
107 |         self.out_layer = nn.Linear(dim_hidden1, 1)
108 |         self.bn = nn.BatchNorm1d(dim_hidden1)
109 |         self.bn2 = nn.BatchNorm1d(dim_in)
110 |         # print('Batch normalization is processed!')
111 | 
112 |     def forward(self, x, lower_f):
113 |         if lower_f is not None:
114 |             x = torch.cat([x, lower_f], dim=1)
115 |             x = self.bn2(x)
116 |         out = self.lrelu(self.in_layer(x)) #HL-1
117 |         out = self.bn(out)
118 |         out = self.lrelu(self.hidden_layer(out)) #HL-2
119 |         out = self.bn(out)
120 |         out = self.lrelu(self.hidden_layer(out)) #HL-3
121 |         out = self.bn(out)
122 |         out = self.hidden_layer(out) #HL-4
123 |         return out, self.out_layer(self.relu(out)).squeeze()
124 | 
125 |     @classmethod
126 |     def get_model(cls, stage, opt):
127 |         if stage == 0:
128 |             dim_in = opt.feat_d
129 |         else:
130 |             dim_in = opt.feat_d + opt.hidden_d
131 |         model = MLP_3HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse)
132 |         return model
133 | 
134 | 
135 | class DNN(nn.Module):
136 |     def __init__(self, dim_in, dim_hidden, n_hidden=20, sparse=False, bn=True, drop_out=0.3):
137 |         super(DNN, self).__init__()
138 |         if sparse:
139 |             self.in_layer = SpLinear(dim_in, dim_hidden)
140 |         else:
141 |             self.in_layer = nn.Linear(dim_in, dim_hidden)
142 |         self.in_act = nn.SELU()
143 |         hidden_layers = []
144 |         for _ in range(n_hidden):
145 |             hidden_layers.append(nn.Linear(dim_hidden, dim_hidden))
146 |             if bn:
147 |                 hidden_layers.append(nn.BatchNorm1d(dim_hidden))
148 |             hidden_layers.append(nn.SELU())
149 |             if drop_out > 0:
150 |                 hidden_layers.append(nn.Dropout(drop_out))
151 |         self.hidden_layers = nn.Sequential(*hidden_layers)
152 |         self.out_layer = nn.Linear(dim_hidden, 1)
153 | 
154 |     def forward(self, x):
155 |         out = self.in_act(self.in_layer(x))
156 |         out = self.hidden_layers(out)
157 |         out = self.out_layer(out)
158 |         return out.squeeze()
159 | 


--------------------------------------------------------------------------------
/Regression/models/splinear.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class SpLinearFunc(torch.autograd.Function):
 8 |     @staticmethod
 9 |     def forward(ctx, input, weight, bias=None):
10 |         ctx.save_for_backward(input, weight, bias)
11 |         output = input.mm(weight.t())
12 |         if bias is not None:
13 |             output += bias.unsqueeze(0).expand_as(output)
14 |         return output
15 | 
16 |     @staticmethod
17 |     def backward(ctx, grad_output):
18 |         input, weight, bias = ctx.saved_tensors
19 |         grad_input = grad_weight = grad_bias = None
20 | 
21 |         if ctx.needs_input_grad[0]:
22 |             grad_input = grad_output.mm(weight)
23 |         if ctx.needs_input_grad[1]:
24 |             grad_weight = (input.t().mm(grad_output)).t()
25 |         if bias is not None and ctx.needs_input_grad[2]:
26 |             grad_bias = grad_output.sum(0).squeeze(0)
27 | 
28 |         return grad_input, grad_weight, grad_bias
29 | 
30 | splinear = SpLinearFunc.apply
31 | 
32 | class SpLinear(nn.Module):
33 |     def __init__(self, input_features, output_features, bias=True):
34 |         super(SpLinear, self).__init__()
35 |         self.input_features = input_features
36 |         self.output_features = output_features
37 |         self.weight = nn.Parameter(torch.Tensor(output_features, input_features))
38 |         if bias:
39 |             self.bias = nn.Parameter(torch.Tensor(output_features))
40 |         else:
41 |             self.register_parameter('bias', None)
42 |         #TODO write a default initialization
43 |         stdv = 1. / math.sqrt(self.weight.size(1))
44 |         self.weight.data.uniform_(-stdv, stdv)
45 | 
46 |     def forward(self, input):
47 |         return splinear(input, self.weight, self.bias)
48 | 


--------------------------------------------------------------------------------
/Regression/results/results_readme.txt:
--------------------------------------------------------------------------------
1 | Your results will be saved here.


--------------------------------------------------------------------------------
/Regression/train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ### Feature Table ###
 4 | # ca_housing 8
 5 | # YearPredictionMSD 90
 6 | # slice_localization 384
 7 | dataset=YearPredictionMSD
 8 | 
 9 | BASEDIR=$(dirname "$0")
10 | OUTDIR="${BASEDIR}/ckpt/"
11 | 
12 | if [ ! -d "${OUTDIR}" ]
13 | then   
14 |     echo "Output dir ${OUTDIR} does not exist, creating..."
15 |     mkdir -p ${OUTDIR}
16 | fi    
17 | 
18 | CUDA_VISIBLE_DEVICES=0 python -u main_reg_cv.py \
19 |     --feat_d 90 \
20 |     --hidden_d 32 \
21 |     --boost_rate 1 \
22 |     --lr 0.005 \
23 |     --L2 .0e-3 \
24 |     --num_nets 40 \
25 |     --data ${dataset} \
26 |     --tr ${BASEDIR}/../data/${dataset}_tr.npz \
27 |     --te ${BASEDIR}/../data/${dataset}_te.npz \
28 |     --batch_size 2048 \
29 |     --epochs_per_stage 1 \
30 |     --correct_epoch 1 \
31 |     --normalization True \
32 |     --cv True \
33 |     --out_f ${OUTDIR}/${dataset}_cls.pth \
34 |     --cuda
35 | 


--------------------------------------------------------------------------------
/baselines/reproduce_higgs.py:
--------------------------------------------------------------------------------
 1 | import xgboost as xgb
 2 | from sklearn.metrics import roc_auc_score
 3 | from sklearn.datasets import load_svmlight_file
 4 | 
 5 | # load data
 6 | tr_x, tr_y = load_svmlight_file('./higgs.train')
 7 | te_x, te_y = load_svmlight_file('./higgs.test')
 8 | 
 9 | # grid search
10 | param = {
11 |     'objective': 'binary:logistic',
12 |     'tree_method': 'hist',
13 |     'learning_rate': 0.05,
14 |     'n_estimators': 800,
15 |     'max_depth': 7,
16 |     'reg_lambda': 0.02,
17 | }
18 | 
19 | # regressor
20 | model = xgb.XGBRegressor(verbosity=2, seed=0, **param)
21 | model.fit(tr_x, tr_y)
22 | 
23 | # predict on test data
24 | auc = roc_auc_score(te_y, model.predict(te_x))
25 | print(auc)
26 | 


--------------------------------------------------------------------------------
/baselines/reproduce_slice.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import xgboost as xgb
 3 | from sklearn.metrics import mean_squared_error
 4 | 
 5 | # load data
 6 | tr_npz = np.load('./slice_localization_tr.npz')
 7 | te_npz = np.load('./slice_localization_te.npz')
 8 | 
 9 | # grid search
10 | param = {
11 |     'learning_rate': 0.1,
12 |     'n_estimators': 1024,
13 |     'max_depth': 7,
14 |     'reg_lambda': 0.0,
15 | }
16 | 
17 | # regressor
18 | model = xgb.XGBRegressor(objective='reg:squarederror',
19 |                          verbosity=2,
20 |                          seed=0,
21 |                          **param)
22 | model.fit(tr_npz['features'], tr_npz['labels'])
23 | 
24 | # predict on test data
25 | mse = mean_squared_error(te_npz['labels'], model.predict(te_npz['features']))
26 | print(np.sqrt(mse))
27 | 


--------------------------------------------------------------------------------
/baselines/reproduce_year.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import xgboost as xgb
 3 | from sklearn.metrics import mean_squared_error
 4 | 
 5 | # load data
 6 | tr_npz = np.load('./YearPredictionMSD_tr.npz')
 7 | te_npz = np.load('./YearPredictionMSD_te.npz')
 8 | 
 9 | # grid search
10 | param = {
11 |     'learning_rate': 0.05,
12 |     'n_estimators': 800,
13 |     'max_depth': 7,
14 |     'reg_lambda': 0.02,
15 | }
16 | 
17 | # regressor
18 | model = xgb.XGBRegressor(objective='reg:squarederror',
19 |                          verbosity=2,
20 |                          seed=0,
21 |                          **param)
22 | model.fit(tr_npz['features'], tr_npz['labels'])
23 | 
24 | # predict on test data
25 | mse = mean_squared_error(te_npz['labels'], model.predict(te_npz['features']))
26 | print(np.sqrt(mse))
27 | 


--------------------------------------------------------------------------------
/higgs2libsvm.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | input_filename = "HIGGS.csv"
 4 | output_train = "higgs.train"
 5 | output_test = "higgs.test"
 6 | 
 7 | num_train = 10500000
 8 | 
 9 | read_num = 0
10 | 
11 | input = open(input_filename, "r")
12 | train = open(output_train, "w")
13 | test = open(output_test,"w")
14 | 
15 | def WriteOneLine(tokens, output):
16 | 	label = int(float(tokens[0]))
17 | 	output.write(str(label))
18 | 	for i in range(1,len(tokens)):
19 | 		feature_value = float(tokens[i])
20 | 		output.write(' ' + str(i-1) + ':' + str(feature_value))
21 | 	output.write('\n')
22 | 
23 | line = input.readline()
24 | 
25 | while line:
26 | 	tokens = line.split(',')
27 | 	if read_num < num_train:
28 | 		WriteOneLine(tokens, train)
29 | 	else:
30 | 		WriteOneLine(tokens, test)
31 | 	read_num += 1
32 | 	line = input.readline()
33 | 
34 | input.close()
35 | train.close()
36 | test.close()
37 | 


--------------------------------------------------------------------------------
/reg_train_test_split.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from sklearn import datasets\n",
 10 |     "from sklearn.model_selection import train_test_split\n",
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "import xgboost as xgb\n",
 14 |     "\n",
 15 |     "datapath = r'.\\data'"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "## Slice Localization data split"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 3,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "# Load the data: slice_localization\n",
 32 |     "df = pd.read_csv(datapath+'\\slice_localization_data.csv')"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 4,
 38 |    "metadata": {},
 39 |    "outputs": [
 40 |     {
 41 |      "data": {
 42 |       "text/html": [
 43 |        "<div>\n",
 44 |        "<style scoped>\n",
 45 |        "    .dataframe tbody tr th:only-of-type {\n",
 46 |        "        vertical-align: middle;\n",
 47 |        "    }\n",
 48 |        "\n",
 49 |        "    .dataframe tbody tr th {\n",
 50 |        "        vertical-align: top;\n",
 51 |        "    }\n",
 52 |        "\n",
 53 |        "    .dataframe thead th {\n",
 54 |        "        text-align: right;\n",
 55 |        "    }\n",
 56 |        "</style>\n",
 57 |        "<table border=\"1\" class=\"dataframe\">\n",
 58 |        "  <thead>\n",
 59 |        "    <tr style=\"text-align: right;\">\n",
 60 |        "      <th></th>\n",
 61 |        "      <th>patientId</th>\n",
 62 |        "      <th>value0</th>\n",
 63 |        "      <th>value1</th>\n",
 64 |        "      <th>value2</th>\n",
 65 |        "      <th>value3</th>\n",
 66 |        "      <th>value4</th>\n",
 67 |        "      <th>value5</th>\n",
 68 |        "      <th>value6</th>\n",
 69 |        "      <th>value7</th>\n",
 70 |        "      <th>value8</th>\n",
 71 |        "      <th>...</th>\n",
 72 |        "      <th>value375</th>\n",
 73 |        "      <th>value376</th>\n",
 74 |        "      <th>value377</th>\n",
 75 |        "      <th>value378</th>\n",
 76 |        "      <th>value379</th>\n",
 77 |        "      <th>value380</th>\n",
 78 |        "      <th>value381</th>\n",
 79 |        "      <th>value382</th>\n",
 80 |        "      <th>value383</th>\n",
 81 |        "      <th>reference</th>\n",
 82 |        "    </tr>\n",
 83 |        "  </thead>\n",
 84 |        "  <tbody>\n",
 85 |        "    <tr>\n",
 86 |        "      <td>0</td>\n",
 87 |        "      <td>0</td>\n",
 88 |        "      <td>0.0</td>\n",
 89 |        "      <td>0.0</td>\n",
 90 |        "      <td>0.0</td>\n",
 91 |        "      <td>0.0</td>\n",
 92 |        "      <td>0.0</td>\n",
 93 |        "      <td>0.0</td>\n",
 94 |        "      <td>-0.25</td>\n",
 95 |        "      <td>-0.25</td>\n",
 96 |        "      <td>-0.25</td>\n",
 97 |        "      <td>...</td>\n",
 98 |        "      <td>-0.25</td>\n",
 99 |        "      <td>0.980381</td>\n",
100 |        "      <td>0.0</td>\n",
101 |        "      <td>0.0</td>\n",
102 |        "      <td>0.0</td>\n",
103 |        "      <td>0.0</td>\n",
104 |        "      <td>0.0</td>\n",
105 |        "      <td>-0.25</td>\n",
106 |        "      <td>-0.25</td>\n",
107 |        "      <td>21.803851</td>\n",
108 |        "    </tr>\n",
109 |        "    <tr>\n",
110 |        "      <td>1</td>\n",
111 |        "      <td>0</td>\n",
112 |        "      <td>0.0</td>\n",
113 |        "      <td>0.0</td>\n",
114 |        "      <td>0.0</td>\n",
115 |        "      <td>0.0</td>\n",
116 |        "      <td>0.0</td>\n",
117 |        "      <td>0.0</td>\n",
118 |        "      <td>-0.25</td>\n",
119 |        "      <td>-0.25</td>\n",
120 |        "      <td>-0.25</td>\n",
121 |        "      <td>...</td>\n",
122 |        "      <td>-0.25</td>\n",
123 |        "      <td>0.977008</td>\n",
124 |        "      <td>0.0</td>\n",
125 |        "      <td>0.0</td>\n",
126 |        "      <td>0.0</td>\n",
127 |        "      <td>0.0</td>\n",
128 |        "      <td>0.0</td>\n",
129 |        "      <td>-0.25</td>\n",
130 |        "      <td>-0.25</td>\n",
131 |        "      <td>21.745726</td>\n",
132 |        "    </tr>\n",
133 |        "    <tr>\n",
134 |        "      <td>2</td>\n",
135 |        "      <td>0</td>\n",
136 |        "      <td>0.0</td>\n",
137 |        "      <td>0.0</td>\n",
138 |        "      <td>0.0</td>\n",
139 |        "      <td>0.0</td>\n",
140 |        "      <td>0.0</td>\n",
141 |        "      <td>0.0</td>\n",
142 |        "      <td>-0.25</td>\n",
143 |        "      <td>-0.25</td>\n",
144 |        "      <td>-0.25</td>\n",
145 |        "      <td>...</td>\n",
146 |        "      <td>-0.25</td>\n",
147 |        "      <td>0.977008</td>\n",
148 |        "      <td>0.0</td>\n",
149 |        "      <td>0.0</td>\n",
150 |        "      <td>0.0</td>\n",
151 |        "      <td>0.0</td>\n",
152 |        "      <td>0.0</td>\n",
153 |        "      <td>-0.25</td>\n",
154 |        "      <td>-0.25</td>\n",
155 |        "      <td>21.687600</td>\n",
156 |        "    </tr>\n",
157 |        "    <tr>\n",
158 |        "      <td>3</td>\n",
159 |        "      <td>0</td>\n",
160 |        "      <td>0.0</td>\n",
161 |        "      <td>0.0</td>\n",
162 |        "      <td>0.0</td>\n",
163 |        "      <td>0.0</td>\n",
164 |        "      <td>0.0</td>\n",
165 |        "      <td>0.0</td>\n",
166 |        "      <td>-0.25</td>\n",
167 |        "      <td>-0.25</td>\n",
168 |        "      <td>-0.25</td>\n",
169 |        "      <td>...</td>\n",
170 |        "      <td>-0.25</td>\n",
171 |        "      <td>0.977008</td>\n",
172 |        "      <td>0.0</td>\n",
173 |        "      <td>0.0</td>\n",
174 |        "      <td>0.0</td>\n",
175 |        "      <td>0.0</td>\n",
176 |        "      <td>0.0</td>\n",
177 |        "      <td>-0.25</td>\n",
178 |        "      <td>-0.25</td>\n",
179 |        "      <td>21.629474</td>\n",
180 |        "    </tr>\n",
181 |        "    <tr>\n",
182 |        "      <td>4</td>\n",
183 |        "      <td>0</td>\n",
184 |        "      <td>0.0</td>\n",
185 |        "      <td>0.0</td>\n",
186 |        "      <td>0.0</td>\n",
187 |        "      <td>0.0</td>\n",
188 |        "      <td>0.0</td>\n",
189 |        "      <td>0.0</td>\n",
190 |        "      <td>-0.25</td>\n",
191 |        "      <td>-0.25</td>\n",
192 |        "      <td>-0.25</td>\n",
193 |        "      <td>...</td>\n",
194 |        "      <td>-0.25</td>\n",
195 |        "      <td>0.976833</td>\n",
196 |        "      <td>0.0</td>\n",
197 |        "      <td>0.0</td>\n",
198 |        "      <td>0.0</td>\n",
199 |        "      <td>0.0</td>\n",
200 |        "      <td>0.0</td>\n",
201 |        "      <td>-0.25</td>\n",
202 |        "      <td>-0.25</td>\n",
203 |        "      <td>21.571348</td>\n",
204 |        "    </tr>\n",
205 |        "  </tbody>\n",
206 |        "</table>\n",
207 |        "<p>5 rows × 386 columns</p>\n",
208 |        "</div>"
209 |       ],
210 |       "text/plain": [
211 |        "   patientId  value0  value1  value2  value3  value4  value5  value6  value7  \\\n",
212 |        "0          0     0.0     0.0     0.0     0.0     0.0     0.0   -0.25   -0.25   \n",
213 |        "1          0     0.0     0.0     0.0     0.0     0.0     0.0   -0.25   -0.25   \n",
214 |        "2          0     0.0     0.0     0.0     0.0     0.0     0.0   -0.25   -0.25   \n",
215 |        "3          0     0.0     0.0     0.0     0.0     0.0     0.0   -0.25   -0.25   \n",
216 |        "4          0     0.0     0.0     0.0     0.0     0.0     0.0   -0.25   -0.25   \n",
217 |        "\n",
218 |        "   value8  ...  value375  value376  value377  value378  value379  value380  \\\n",
219 |        "0   -0.25  ...     -0.25  0.980381       0.0       0.0       0.0       0.0   \n",
220 |        "1   -0.25  ...     -0.25  0.977008       0.0       0.0       0.0       0.0   \n",
221 |        "2   -0.25  ...     -0.25  0.977008       0.0       0.0       0.0       0.0   \n",
222 |        "3   -0.25  ...     -0.25  0.977008       0.0       0.0       0.0       0.0   \n",
223 |        "4   -0.25  ...     -0.25  0.976833       0.0       0.0       0.0       0.0   \n",
224 |        "\n",
225 |        "   value381  value382  value383  reference  \n",
226 |        "0       0.0     -0.25     -0.25  21.803851  \n",
227 |        "1       0.0     -0.25     -0.25  21.745726  \n",
228 |        "2       0.0     -0.25     -0.25  21.687600  \n",
229 |        "3       0.0     -0.25     -0.25  21.629474  \n",
230 |        "4       0.0     -0.25     -0.25  21.571348  \n",
231 |        "\n",
232 |        "[5 rows x 386 columns]"
233 |       ]
234 |      },
235 |      "execution_count": 4,
236 |      "metadata": {},
237 |      "output_type": "execute_result"
238 |     }
239 |    ],
240 |    "source": [
241 |     "df.head(5)"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "code",
246 |    "execution_count": 5,
247 |    "metadata": {},
248 |    "outputs": [
249 |     {
250 |      "name": "stdout",
251 |      "output_type": "stream",
252 |      "text": [
253 |       "[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23\n",
254 |       " 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47\n",
255 |       " 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71\n",
256 |       " 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95\n",
257 |       " 96] 97\n",
258 |       "[ 4  9 14 19 24 29 34 39 44 49 54 59 64 69 74 79 84 89 94]\n",
259 |       "7675 45825\n"
260 |      ]
261 |     }
262 |    ],
263 |    "source": [
264 |     "# Slice data train and test split: Patient id shouldn't get mixed during train and test split\n",
265 |     "u_pid = np.unique(df['patientId'])\n",
266 |     "print(u_pid, len(u_pid))\n",
267 |     "\n",
268 |     "# 80 - 20% split based on patient id: \n",
269 |     "test_idx = np.arange(1, int(len(u_pid)/5) + 1)*5 - 1\n",
270 |     "print(test_idx)\n",
271 |     "test = np.where(np.isin(df['patientId'],test_idx))[0]\n",
272 |     "train = np.where(~np.isin(df['patientId'],test_idx))[0]\n",
273 |     "print(len(test), len(train))"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": 8,
279 |    "metadata": {},
280 |    "outputs": [
281 |     {
282 |      "name": "stdout",
283 |      "output_type": "stream",
284 |      "text": [
285 |       "(45825, 384) (7675, 384)\n"
286 |      ]
287 |     }
288 |    ],
289 |    "source": [
290 |     "X_train,  y_train = df.iloc[train, 1:385].values, df.iloc[train, 385].values\n",
291 |     "X_test, y_test = df.iloc[test, 1:385].values, df.iloc[test, 385].values\n",
292 |     "print(X_train.shape, X_test.shape)"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "code",
297 |    "execution_count": null,
298 |    "metadata": {},
299 |    "outputs": [],
300 |    "source": [
301 |     "# Saving train and test into csv file\n",
302 |     "np.savez(datapath + '\\slice_localization_tr.npz', features=X_train, labels=y_train)\n",
303 |     "np.savez(datapath + '\\slice_localization_te.npz', features=X_test, labels=y_test)"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "markdown",
308 |    "metadata": {},
309 |    "source": [
310 |     "## Music Year Prediction data split"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "code",
315 |    "execution_count": 9,
316 |    "metadata": {},
317 |    "outputs": [],
318 |    "source": [
319 |     "# Load the data: YearsPrediction\n",
320 |     "df = pd.read_csv(datapath + '\\YearPredictionMSD.txt', header=None)"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "code",
325 |    "execution_count": 10,
326 |    "metadata": {
327 |     "scrolled": true
328 |    },
329 |    "outputs": [
330 |     {
331 |      "data": {
332 |       "text/html": [
333 |        "<div>\n",
334 |        "<style scoped>\n",
335 |        "    .dataframe tbody tr th:only-of-type {\n",
336 |        "        vertical-align: middle;\n",
337 |        "    }\n",
338 |        "\n",
339 |        "    .dataframe tbody tr th {\n",
340 |        "        vertical-align: top;\n",
341 |        "    }\n",
342 |        "\n",
343 |        "    .dataframe thead th {\n",
344 |        "        text-align: right;\n",
345 |        "    }\n",
346 |        "</style>\n",
347 |        "<table border=\"1\" class=\"dataframe\">\n",
348 |        "  <thead>\n",
349 |        "    <tr style=\"text-align: right;\">\n",
350 |        "      <th></th>\n",
351 |        "      <th>1</th>\n",
352 |        "      <th>2</th>\n",
353 |        "      <th>3</th>\n",
354 |        "      <th>4</th>\n",
355 |        "      <th>5</th>\n",
356 |        "      <th>6</th>\n",
357 |        "      <th>7</th>\n",
358 |        "      <th>8</th>\n",
359 |        "      <th>9</th>\n",
360 |        "      <th>10</th>\n",
361 |        "      <th>...</th>\n",
362 |        "      <th>81</th>\n",
363 |        "      <th>82</th>\n",
364 |        "      <th>83</th>\n",
365 |        "      <th>84</th>\n",
366 |        "      <th>85</th>\n",
367 |        "      <th>86</th>\n",
368 |        "      <th>87</th>\n",
369 |        "      <th>88</th>\n",
370 |        "      <th>89</th>\n",
371 |        "      <th>90</th>\n",
372 |        "    </tr>\n",
373 |        "  </thead>\n",
374 |        "  <tbody>\n",
375 |        "    <tr>\n",
376 |        "      <td>0</td>\n",
377 |        "      <td>49.94357</td>\n",
378 |        "      <td>21.47114</td>\n",
379 |        "      <td>73.07750</td>\n",
380 |        "      <td>8.74861</td>\n",
381 |        "      <td>-17.40628</td>\n",
382 |        "      <td>-13.09905</td>\n",
383 |        "      <td>-25.01202</td>\n",
384 |        "      <td>-12.23257</td>\n",
385 |        "      <td>7.83089</td>\n",
386 |        "      <td>-2.46783</td>\n",
387 |        "      <td>...</td>\n",
388 |        "      <td>13.01620</td>\n",
389 |        "      <td>-54.40548</td>\n",
390 |        "      <td>58.99367</td>\n",
391 |        "      <td>15.37344</td>\n",
392 |        "      <td>1.11144</td>\n",
393 |        "      <td>-23.08793</td>\n",
394 |        "      <td>68.40795</td>\n",
395 |        "      <td>-1.82223</td>\n",
396 |        "      <td>-27.46348</td>\n",
397 |        "      <td>2.26327</td>\n",
398 |        "    </tr>\n",
399 |        "    <tr>\n",
400 |        "      <td>1</td>\n",
401 |        "      <td>48.73215</td>\n",
402 |        "      <td>18.42930</td>\n",
403 |        "      <td>70.32679</td>\n",
404 |        "      <td>12.94636</td>\n",
405 |        "      <td>-10.32437</td>\n",
406 |        "      <td>-24.83777</td>\n",
407 |        "      <td>8.76630</td>\n",
408 |        "      <td>-0.92019</td>\n",
409 |        "      <td>18.76548</td>\n",
410 |        "      <td>4.59210</td>\n",
411 |        "      <td>...</td>\n",
412 |        "      <td>5.66812</td>\n",
413 |        "      <td>-19.68073</td>\n",
414 |        "      <td>33.04964</td>\n",
415 |        "      <td>42.87836</td>\n",
416 |        "      <td>-9.90378</td>\n",
417 |        "      <td>-32.22788</td>\n",
418 |        "      <td>70.49388</td>\n",
419 |        "      <td>12.04941</td>\n",
420 |        "      <td>58.43453</td>\n",
421 |        "      <td>26.92061</td>\n",
422 |        "    </tr>\n",
423 |        "    <tr>\n",
424 |        "      <td>2</td>\n",
425 |        "      <td>50.95714</td>\n",
426 |        "      <td>31.85602</td>\n",
427 |        "      <td>55.81851</td>\n",
428 |        "      <td>13.41693</td>\n",
429 |        "      <td>-6.57898</td>\n",
430 |        "      <td>-18.54940</td>\n",
431 |        "      <td>-3.27872</td>\n",
432 |        "      <td>-2.35035</td>\n",
433 |        "      <td>16.07017</td>\n",
434 |        "      <td>1.39518</td>\n",
435 |        "      <td>...</td>\n",
436 |        "      <td>3.03800</td>\n",
437 |        "      <td>26.05866</td>\n",
438 |        "      <td>-50.92779</td>\n",
439 |        "      <td>10.93792</td>\n",
440 |        "      <td>-0.07568</td>\n",
441 |        "      <td>43.20130</td>\n",
442 |        "      <td>-115.00698</td>\n",
443 |        "      <td>-0.05859</td>\n",
444 |        "      <td>39.67068</td>\n",
445 |        "      <td>-0.66345</td>\n",
446 |        "    </tr>\n",
447 |        "    <tr>\n",
448 |        "      <td>3</td>\n",
449 |        "      <td>48.24750</td>\n",
450 |        "      <td>-1.89837</td>\n",
451 |        "      <td>36.29772</td>\n",
452 |        "      <td>2.58776</td>\n",
453 |        "      <td>0.97170</td>\n",
454 |        "      <td>-26.21683</td>\n",
455 |        "      <td>5.05097</td>\n",
456 |        "      <td>-10.34124</td>\n",
457 |        "      <td>3.55005</td>\n",
458 |        "      <td>-6.36304</td>\n",
459 |        "      <td>...</td>\n",
460 |        "      <td>34.57337</td>\n",
461 |        "      <td>-171.70734</td>\n",
462 |        "      <td>-16.96705</td>\n",
463 |        "      <td>-46.67617</td>\n",
464 |        "      <td>-12.51516</td>\n",
465 |        "      <td>82.58061</td>\n",
466 |        "      <td>-72.08993</td>\n",
467 |        "      <td>9.90558</td>\n",
468 |        "      <td>199.62971</td>\n",
469 |        "      <td>18.85382</td>\n",
470 |        "    </tr>\n",
471 |        "    <tr>\n",
472 |        "      <td>4</td>\n",
473 |        "      <td>50.97020</td>\n",
474 |        "      <td>42.20998</td>\n",
475 |        "      <td>67.09964</td>\n",
476 |        "      <td>8.46791</td>\n",
477 |        "      <td>-15.85279</td>\n",
478 |        "      <td>-16.81409</td>\n",
479 |        "      <td>-12.48207</td>\n",
480 |        "      <td>-9.37636</td>\n",
481 |        "      <td>12.63699</td>\n",
482 |        "      <td>0.93609</td>\n",
483 |        "      <td>...</td>\n",
484 |        "      <td>9.92661</td>\n",
485 |        "      <td>-55.95724</td>\n",
486 |        "      <td>64.92712</td>\n",
487 |        "      <td>-17.72522</td>\n",
488 |        "      <td>-1.49237</td>\n",
489 |        "      <td>-7.50035</td>\n",
490 |        "      <td>51.76631</td>\n",
491 |        "      <td>7.88713</td>\n",
492 |        "      <td>55.66926</td>\n",
493 |        "      <td>28.74903</td>\n",
494 |        "    </tr>\n",
495 |        "  </tbody>\n",
496 |        "</table>\n",
497 |        "<p>5 rows × 90 columns</p>\n",
498 |        "</div>"
499 |       ],
500 |       "text/plain": [
501 |        "         1         2         3         4         5         6         7   \\\n",
502 |        "0  49.94357  21.47114  73.07750   8.74861 -17.40628 -13.09905 -25.01202   \n",
503 |        "1  48.73215  18.42930  70.32679  12.94636 -10.32437 -24.83777   8.76630   \n",
504 |        "2  50.95714  31.85602  55.81851  13.41693  -6.57898 -18.54940  -3.27872   \n",
505 |        "3  48.24750  -1.89837  36.29772   2.58776   0.97170 -26.21683   5.05097   \n",
506 |        "4  50.97020  42.20998  67.09964   8.46791 -15.85279 -16.81409 -12.48207   \n",
507 |        "\n",
508 |        "         8         9        10  ...        81         82        83        84  \\\n",
509 |        "0 -12.23257   7.83089 -2.46783  ...  13.01620  -54.40548  58.99367  15.37344   \n",
510 |        "1  -0.92019  18.76548  4.59210  ...   5.66812  -19.68073  33.04964  42.87836   \n",
511 |        "2  -2.35035  16.07017  1.39518  ...   3.03800   26.05866 -50.92779  10.93792   \n",
512 |        "3 -10.34124   3.55005 -6.36304  ...  34.57337 -171.70734 -16.96705 -46.67617   \n",
513 |        "4  -9.37636  12.63699  0.93609  ...   9.92661  -55.95724  64.92712 -17.72522   \n",
514 |        "\n",
515 |        "         85        86         87        88         89        90  \n",
516 |        "0   1.11144 -23.08793   68.40795  -1.82223  -27.46348   2.26327  \n",
517 |        "1  -9.90378 -32.22788   70.49388  12.04941   58.43453  26.92061  \n",
518 |        "2  -0.07568  43.20130 -115.00698  -0.05859   39.67068  -0.66345  \n",
519 |        "3 -12.51516  82.58061  -72.08993   9.90558  199.62971  18.85382  \n",
520 |        "4  -1.49237  -7.50035   51.76631   7.88713   55.66926  28.74903  \n",
521 |        "\n",
522 |        "[5 rows x 90 columns]"
523 |       ]
524 |      },
525 |      "execution_count": 10,
526 |      "metadata": {},
527 |      "output_type": "execute_result"
528 |     }
529 |    ],
530 |    "source": [
531 |     "# YearsPrediction data\n",
532 |     "data = df.loc[:, 1:]\n",
533 |     "target = df.loc[:, 0]\n",
534 |     "\n",
535 |     "data.head(5)"
536 |    ]
537 |   },
538 |   {
539 |    "cell_type": "code",
540 |    "execution_count": 11,
541 |    "metadata": {},
542 |    "outputs": [
543 |     {
544 |      "name": "stdout",
545 |      "output_type": "stream",
546 |      "text": [
547 |       "(463715, 90) (51630, 90)\n"
548 |      ]
549 |     }
550 |    ],
551 |    "source": [
552 |     "# YearsPrediction split\n",
553 |     "cut = 463715 # From the website\n",
554 |     "X_train, y_train = data[:cut].values, target[:cut].values\n",
555 |     "X_test, y_test = data[cut:].values, target[cut:].values\n",
556 |     "\n",
557 |     "print(X_train.shape, X_test.shape)\n"
558 |    ]
559 |   },
560 |   {
561 |    "cell_type": "code",
562 |    "execution_count": 12,
563 |    "metadata": {},
564 |    "outputs": [],
565 |    "source": [
566 |     "# Saving train and test into npz file\n",
567 |     "np.savez(datapath + '\\YearPredictionMSD_tr.npz', features=X_train, labels=y_train)\n",
568 |     "np.savez(datapath + '\\YearPredictionMSD_te.npz', features=X_test, labels=y_test)"
569 |    ]
570 |   }
571 |  ],
572 |  "metadata": {
573 |   "kernelspec": {
574 |    "display_name": "Python 3",
575 |    "language": "python",
576 |    "name": "python3"
577 |   },
578 |   "language_info": {
579 |    "codemirror_mode": {
580 |     "name": "ipython",
581 |     "version": 3
582 |    },
583 |    "file_extension": ".py",
584 |    "mimetype": "text/x-python",
585 |    "name": "python",
586 |    "nbconvert_exporter": "python",
587 |    "pygments_lexer": "ipython3",
588 |    "version": "3.7.1"
589 |   }
590 |  },
591 |  "nbformat": 4,
592 |  "nbformat_minor": 2
593 | }
594 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # This file may be used to create an environment using:
2 | # $ conda create --name <env> --file <this file>
3 | # platform: linux-64
4 | cudatoolkit=10.1.243 
5 | numpy=1.18.1 
6 | pandas=1.0.0 
7 | python=3.6.10 
8 | pytorch=1.4.0 
9 | 


--------------------------------------------------------------------------------
/yahoo2mslr.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from sklearn.datasets import load_svmlight_file\n",
 10 |     "import numpy as np"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 3,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "feat, label = load_svmlight_file(r'.\\yahoo.train')"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 4,
 25 |    "metadata": {},
 26 |    "outputs": [
 27 |     {
 28 |      "name": "stdout",
 29 |      "output_type": "stream",
 30 |      "text": [
 31 |       "519\n"
 32 |      ]
 33 |     }
 34 |    ],
 35 |    "source": [
 36 |     "feat = feat.toarray().astype(np.float32)\n",
 37 |     "label = label.astype(np.float32)\n",
 38 |     "feat = feat[:, ~(feat == 0).all(0)]\n",
 39 |     "print(feat.shape[1])"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 5,
 45 |    "metadata": {},
 46 |    "outputs": [
 47 |     {
 48 |      "name": "stdout",
 49 |      "output_type": "stream",
 50 |      "text": [
 51 |       "519\n"
 52 |      ]
 53 |     }
 54 |    ],
 55 |    "source": [
 56 |     "#### Getting querries ####\n",
 57 |     "input = open(r'.\\set1.train.txt',\"r\")\n",
 58 |     "qid = []\n",
 59 |     "labels = []\n",
 60 |     "while True:\n",
 61 |     "    line = input.readline()\n",
 62 |     "    if not line:\n",
 63 |     "        break\n",
 64 |     "    tokens = line.split(' ')\n",
 65 |     "    tokens[-1] = tokens[-1].strip()\n",
 66 |     "    labels.append(int(tokens[0]))\n",
 67 |     "    qid.append(int(tokens[1].split(':')[1]))\n",
 68 |     "input.close()"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 7,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "output_feature = open('yahoo.train.txt',\"w\")\n",
 78 |     "for i in range(len(labels)):\n",
 79 |     "    output_feature.write(str(labels[i]) +' qid:' + str(qid[i]) + ' ')\n",
 80 |     "    tmp = []\n",
 81 |     "    for j in range(len(feat[0])):\n",
 82 |     "        c = str(j+1) + ':' +str(feat[i, j])\n",
 83 |     "        tmp.append(c)\n",
 84 |     "    output_feature.write(' '.join(tmp) + '\\n')\n",
 85 |     "output_feature.close()"
 86 |    ]
 87 |   }
 88 |  ],
 89 |  "metadata": {
 90 |   "kernelspec": {
 91 |    "display_name": "Python 3",
 92 |    "language": "python",
 93 |    "name": "python3"
 94 |   },
 95 |   "language_info": {
 96 |    "codemirror_mode": {
 97 |     "name": "ipython",
 98 |     "version": 3
 99 |    },
100 |    "file_extension": ".py",
101 |    "mimetype": "text/x-python",
102 |    "name": "python",
103 |    "nbconvert_exporter": "python",
104 |    "pygments_lexer": "ipython3",
105 |    "version": "3.7.1"
106 |   }
107 |  },
108 |  "nbformat": 4,
109 |  "nbformat_minor": 2
110 | }
111 | 


--------------------------------------------------------------------------------