├── .gitignore ├── Classification ├── Readme.md ├── data │ ├── data.py │ ├── sparse_data.py │ ├── sparseloader.py │ └── untitled.py ├── main_cls_cv.py ├── misc │ ├── auc.py │ └── metrics.py ├── models │ ├── dynamic_net.py │ ├── mlp.py │ └── splinear.py ├── results │ └── results_readme.txt └── train.sh ├── L2R ├── DataLoader │ └── DataLoader.py ├── Misc │ ├── Calculations.py │ └── metrics.py ├── Readme.md ├── Utils │ └── utils.py ├── main_l2r_idiv_cv.py ├── main_l2r_mse_cv.py ├── main_l2r_pairwise_cv.py ├── models │ ├── dynamic_net.py │ ├── mlp.py │ └── splinear.py ├── results │ └── results_readme.txt └── train.sh ├── Model.png ├── README.md ├── Regression ├── Readme.md ├── data │ ├── data.py │ ├── sparse_data.py │ ├── sparseloader.py │ └── untitled.py ├── main_reg_cv.py ├── models │ ├── dynamic_net.py │ ├── mlp.py │ └── splinear.py ├── results │ └── results_readme.txt └── train.sh ├── baselines ├── reproduce_higgs.py ├── reproduce_slice.py └── reproduce_year.py ├── higgs2libsvm.py ├── reg_train_test_split.ipynb ├── requirements.txt └── yahoo2mslr.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /Classification/Readme.md: -------------------------------------------------------------------------------- 1 | - Data loading and creating dataloader are handled in GrowNet/Classification/data/data.py. 2 | If you have a sparse data then try using sparse_data.py and opt sparse to True. If you want to try a new data please put it into one of the formats listed in data.py script. 3 | 4 | - Individual model class and ensemble architecture are in GrowNet/Classification/models: mlp.py and dynamic_net.py. 5 | You can increase number of hidden layers or change activation functions from mlp.py 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /Classification/data/data.py: -------------------------------------------------------------------------------- 1 | import time 2 | import sys 3 | import os 4 | import numpy as np 5 | import pandas as pd 6 | import torch 7 | from torch.utils.data import Dataset 8 | from scipy.sparse import csr_matrix 9 | from sklearn.datasets import load_svmlight_file 10 | from sklearn import datasets 11 | from sklearn.model_selection import train_test_split 12 | 13 | 14 | class LibSVMData(Dataset): 15 | def __init__(self, root, dim, normalization, pos=1, neg=-1, out_pos=1, out_neg=-1): 16 | self.feat, self.label = load_svmlight_file(root) 17 | 18 | self.feat = csr_matrix((self.feat.data, self.feat.indices, self.feat.indptr), shape=(len(self.label), dim)) 19 | self.feat = self.feat.toarray().astype(np.float32) 20 | 21 | self.label = self.label.astype(np.float32) 22 | idx_pos = self.label == pos 23 | idx_neg = self.label == neg 24 | self.label[idx_pos] = out_pos 25 | self.label[idx_neg] = out_neg 26 | 27 | def __getitem__(self, index): 28 | arr = self.feat[index, :] 29 | return arr, self.label[index] 30 | def __len__(self): 31 | return len(self.label) 32 | 33 | class LibSVMRankData(Dataset): 34 | def __init__(self, root2data, root2qid, dim): 35 | self.feat, self.label = load_svmlight_file(root2data) 36 | self.qid = np.loadtxt(root2qid, dtype='int32') 37 | self.feat = self.feat.toarray().astype(np.float32) 38 | self.label = self.label.astype(np.float32) 39 | self.feat = self.feat[:, ~(self.feat == 0).all(0)] 40 | print(self.feat.shape[1]) 41 | 42 | def __getitem__(self, index): 43 | return self.feat[index, :], self.label[index], self.qid[index] 44 | 45 | def __len__(self): 46 | return len(self.label) 47 | 48 | class LibSVMRegData(Dataset): 49 | def __init__(self, root, dim, normalization): 50 | data = np.load(root) 51 | self.feat, self.label = data['features'], data['labels'] 52 | del data 53 | self.feat = self.feat.astype(np.float32) 54 | self.label = self.label.astype(np.float32) 55 | #self.feat = self.feat[:, ~(self.feat == 0).all(0)] 56 | #import ipdb; ipdb.set_trace() 57 | 58 | print(self.feat.shape[1]) 59 | 60 | def __getitem__(self, index): 61 | return self.feat[index, :], self.label[index] 62 | 63 | def __len__(self): 64 | return len(self.label) 65 | 66 | class LibCSVData(Dataset): 67 | def __init__(self, root, dim, pos=1, neg=-1): 68 | self.data = np.loadtxt(root, delimiter=',').astype(np.float32) 69 | self.feat = self.data[:, 1:] 70 | self.label = self.data[:, 0] 71 | self.label[self.label == pos] = 1 72 | self.label[self.label == neg] = -1 73 | 74 | def __getitem__(self, index): 75 | #arr = np.log(self.feat[index, :] + 1.0e-5) 76 | #arr = np.log10(self.feat[index, :] + 1.0e-5) 77 | arr = self.feat[index, :] 78 | return arr, self.label[index] 79 | 80 | def __len__(self): 81 | return len(self.label) 82 | class CriteoCSVData(Dataset): 83 | def __init__(self, root, dim, normalization, pos=1, neg=-1): 84 | # Reading the data into panda data frame 85 | self.data = pd.read_csv(root, header=None, dtype='float32') 86 | # extracting labels (0, 1) and weights 87 | self.label = self.data.iloc[:, -2] 88 | self.weights = self.data.iloc[:, -1] 89 | self.data = self.data.iloc[:, :-2] 90 | # transferring labels from {0, 1} to {-1, 1} 91 | self.label[self.label == pos] = 1 92 | self.label[self.label == neg] = -1 93 | 94 | # Applying log transformation 95 | mm = self.data.min().min() # to prevent 0 division 96 | if normalization: 97 | # Filling Nan values: Simple approach, mean of the that column or interpolation 98 | self.data = self.data.transform(lambda x: np.log(x - mm + 1)) 99 | #self.data = self.data.interpolate(method='polynomial', order=2) 100 | self.data = self.data.fillna(self.data.mean()) # To fill the rest of Nan values left untouched on the corners 101 | #self.data = (self.data - self.data.mean())/self.data.std() 102 | #self.feat = self.data.to_numpy('float32') 103 | self.data = self.data.to_numpy('float32') 104 | def __getitem__(self, index): 105 | #arr = np.log(self.feat[index, :] + 1.0e-5) 106 | #arr = np.log10(self.feat[index, :] + 1.0e-5) 107 | #arr = self.feat[index, :] 108 | arr = self.data[index, :] 109 | return arr, self.label[index], self.weights[index] 110 | 111 | def __len__(self): 112 | return len(self.label) 113 | -------------------------------------------------------------------------------- /Classification/data/sparse_data.py: -------------------------------------------------------------------------------- 1 | import time 2 | import sys 3 | import os 4 | import numpy as np 5 | import torch 6 | from torch.utils.data import Dataset 7 | from scipy.sparse import csr_matrix 8 | from sklearn.datasets import load_svmlight_file 9 | 10 | class LibSVMDataSp(Dataset): 11 | def __init__(self, root, dim_in, pos=1, neg=-1): 12 | self.feat, self.label = load_svmlight_file(root) 13 | self.feat = csr_matrix((self.feat.data, self.feat.indices, self.feat.indptr), shape=(len(self.label), dim_in)) 14 | self.feat = self.feat.astype(np.float32) 15 | self.label = self.label.astype(np.float32) 16 | self.label[self.label == pos] = 1 17 | self.label[self.label == neg] = -1 18 | 19 | def __getitem__(self, index): 20 | arr = self.feat[index, :] 21 | return arr, self.label[index] 22 | def __len__(self): 23 | return len(self.label) 24 | 25 | 26 | -------------------------------------------------------------------------------- /Classification/data/sparseloader.py: -------------------------------------------------------------------------------- 1 | import scipy 2 | import random 3 | import torch 4 | import torch.multiprocessing as multiprocessing 5 | # Changed _update_worker_pids into _set_worker_pids, due to new version of pytorch 6 | from torch._C import _set_worker_signal_handlers, _set_worker_pids, \ 7 | _remove_worker_pids, _error_if_any_worker_fails 8 | from torch.utils.data.sampler import SequentialSampler, RandomSampler, BatchSampler 9 | import signal 10 | import functools 11 | import collections 12 | import re 13 | import sys 14 | import threading 15 | import traceback 16 | import os 17 | import time 18 | from torch._six import * #string_classes, int_classes, FileNotFoundError 19 | 20 | IS_WINDOWS = sys.platform == "win32" 21 | if IS_WINDOWS: 22 | import ctypes 23 | from ctypes.wintypes import DWORD, BOOL, HANDLE 24 | 25 | if sys.version_info[0] == 2: 26 | import Queue as queue 27 | else: 28 | import queue 29 | 30 | 31 | class ExceptionWrapper(object): 32 | r"""Wraps an exception plus traceback to communicate across threads""" 33 | 34 | def __init__(self, exc_info): 35 | self.exc_type = exc_info[0] 36 | self.exc_msg = "".join(traceback.format_exception(*exc_info)) 37 | 38 | 39 | _use_shared_memory = False 40 | r"""Whether to use shared memory in default_collate""" 41 | 42 | MANAGER_STATUS_CHECK_INTERVAL = 5.0 43 | 44 | if IS_WINDOWS: 45 | # On Windows, the parent ID of the worker process remains unchanged when the manager process 46 | # is gone, and the only way to check it through OS is to let the worker have a process handle 47 | # of the manager and ask if the process status has changed. 48 | class ManagerWatchdog(object): 49 | def __init__(self): 50 | self.manager_pid = os.getppid() 51 | 52 | self.kernel32 = ctypes.WinDLL('kernel32', use_last_error=True) 53 | self.kernel32.OpenProcess.argtypes = (DWORD, BOOL, DWORD) 54 | self.kernel32.OpenProcess.restype = HANDLE 55 | self.kernel32.WaitForSingleObject.argtypes = (HANDLE, DWORD) 56 | self.kernel32.WaitForSingleObject.restype = DWORD 57 | 58 | # Value obtained from https://msdn.microsoft.com/en-us/library/ms684880.aspx 59 | SYNCHRONIZE = 0x00100000 60 | self.manager_handle = self.kernel32.OpenProcess(SYNCHRONIZE, 0, self.manager_pid) 61 | 62 | if not self.manager_handle: 63 | raise ctypes.WinError(ctypes.get_last_error()) 64 | 65 | def is_alive(self): 66 | return self.kernel32.WaitForSingleObject(self.manager_handle, 0) != 0 67 | else: 68 | class ManagerWatchdog(object): 69 | def __init__(self): 70 | self.manager_pid = os.getppid() 71 | 72 | def is_alive(self): 73 | return os.getppid() == self.manager_pid 74 | 75 | 76 | def _worker_loop(dataset, index_queue, data_queue, collate_fn, seed, init_fn, worker_id): 77 | global _use_shared_memory 78 | _use_shared_memory = True 79 | 80 | _set_worker_signal_handlers() 81 | 82 | torch.set_num_threads(1) 83 | random.seed(seed) 84 | torch.manual_seed(seed) 85 | 86 | if init_fn is not None: 87 | init_fn(worker_id) 88 | 89 | watchdog = ManagerWatchdog() 90 | 91 | while True: 92 | try: 93 | r = index_queue.get(timeout=MANAGER_STATUS_CHECK_INTERVAL) 94 | except queue.Empty: 95 | if watchdog.is_alive(): 96 | continue 97 | else: 98 | break 99 | if r is None: 100 | break 101 | idx, batch_indices = r 102 | try: 103 | samples = collate_fn([dataset[i] for i in batch_indices]) 104 | except Exception: 105 | data_queue.put((idx, ExceptionWrapper(sys.exc_info()))) 106 | else: 107 | data_queue.put((idx, samples)) 108 | del samples 109 | 110 | 111 | def _worker_manager_loop(in_queue, out_queue, done_event, pin_memory, device_id): 112 | if pin_memory: 113 | torch.cuda.set_device(device_id) 114 | 115 | while True: 116 | try: 117 | r = in_queue.get() 118 | except Exception: 119 | if done_event.is_set(): 120 | return 121 | raise 122 | if r is None: 123 | break 124 | if isinstance(r[1], ExceptionWrapper): 125 | out_queue.put(r) 126 | continue 127 | idx, batch = r 128 | try: 129 | if pin_memory: 130 | batch = pin_memory_batch(batch) 131 | except Exception: 132 | out_queue.put((idx, ExceptionWrapper(sys.exc_info()))) 133 | else: 134 | out_queue.put((idx, batch)) 135 | 136 | numpy_type_map = { 137 | 'float64': torch.DoubleTensor, 138 | 'float32': torch.FloatTensor, 139 | 'float16': torch.HalfTensor, 140 | 'int64': torch.LongTensor, 141 | 'int32': torch.IntTensor, 142 | 'int16': torch.ShortTensor, 143 | 'int8': torch.CharTensor, 144 | 'uint8': torch.ByteTensor, 145 | } 146 | 147 | 148 | def default_collate(batch): 149 | r"""Puts each data field into a tensor with outer dimension batch size""" 150 | 151 | error_msg = "batch must contain tensors, numbers, dicts or lists; found {}" 152 | elem_type = type(batch[0]) 153 | if isinstance(batch[0], torch.Tensor): 154 | out = None 155 | if _use_shared_memory: 156 | # If we're in a background process, concatenate directly into a 157 | # shared memory tensor to avoid an extra copy 158 | numel = sum([x.numel() for x in batch]) 159 | storage = batch[0].storage()._new_shared(numel) 160 | out = batch[0].new(storage) 161 | return torch.stack(batch, 0, out=out) 162 | elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ 163 | and elem_type.__name__ != 'string_': 164 | elem = batch[0] 165 | if elem_type.__name__ == 'ndarray': 166 | # array of string classes and object 167 | if re.search('[SaUO]', elem.dtype.str) is not None: 168 | raise TypeError(error_msg.format(elem.dtype)) 169 | 170 | return torch.stack([torch.from_numpy(b) for b in batch], 0) 171 | if elem.shape == (): # scalars 172 | py_type = float if elem.dtype.name.startswith('float') else int 173 | return numpy_type_map[elem.dtype.name](list(map(py_type, batch))) 174 | elif isinstance(batch[0], int_classes): 175 | return torch.LongTensor(batch) 176 | elif isinstance(batch[0], float): 177 | return torch.DoubleTensor(batch) 178 | elif isinstance(batch[0], string_classes): 179 | return batch 180 | elif isinstance(batch[0], collections.Mapping): 181 | return {key: default_collate([d[key] for d in batch]) for key in batch[0]} 182 | elif isinstance(batch[0], collections.Sequence): 183 | transposed = zip(*batch) 184 | return [default_collate(samples) for samples in transposed] 185 | elif isinstance(batch[0], scipy.sparse.csr.csr_matrix): 186 | row_idx = [] 187 | col_idx = [] 188 | val = [] 189 | for i, b in enumerate(batch): 190 | col = b.indices 191 | row_idx.extend([i] * len(col)) 192 | col_idx.extend(col) 193 | val.extend(b.data) 194 | i = torch.LongTensor([row_idx, col_idx]) 195 | v = torch.FloatTensor(val) 196 | return torch.sparse.FloatTensor(i, v, torch.Size([len(batch), batch[0].shape[1]])) 197 | raise TypeError((error_msg.format(type(batch[0])))) 198 | 199 | 200 | def pin_memory_batch(batch): 201 | if isinstance(batch, torch.Tensor): 202 | return batch.pin_memory() 203 | elif isinstance(batch, string_classes): 204 | return batch 205 | elif isinstance(batch, collections.Mapping): 206 | return {k: pin_memory_batch(sample) for k, sample in batch.items()} 207 | elif isinstance(batch, collections.Sequence): 208 | return [pin_memory_batch(sample) for sample in batch] 209 | else: 210 | return batch 211 | 212 | 213 | _SIGCHLD_handler_set = False 214 | r"""Whether SIGCHLD handler is set for DataLoader worker failures. Only one 215 | handler needs to be set for all DataLoaders in a process.""" 216 | 217 | 218 | def _set_SIGCHLD_handler(): 219 | # Windows doesn't support SIGCHLD handler 220 | if sys.platform == 'win32': 221 | return 222 | # can't set signal in child threads 223 | if not isinstance(threading.current_thread(), threading._MainThread): 224 | return 225 | global _SIGCHLD_handler_set 226 | if _SIGCHLD_handler_set: 227 | return 228 | previous_handler = signal.getsignal(signal.SIGCHLD) 229 | if not callable(previous_handler): 230 | previous_handler = None 231 | 232 | def handler(signum, frame): 233 | _error_if_any_worker_fails() 234 | if previous_handler is not None: 235 | previous_handler(signum, frame) 236 | 237 | signal.signal(signal.SIGCHLD, handler) 238 | _SIGCHLD_handler_set = True 239 | 240 | 241 | class _DataLoaderIter(object): 242 | r"""Iterates once over the DataLoader's dataset, as specified by the sampler""" 243 | 244 | def __init__(self, loader): 245 | self.dataset = loader.dataset 246 | self.collate_fn = loader.collate_fn 247 | self.batch_sampler = loader.batch_sampler 248 | self.num_workers = loader.num_workers 249 | self.pin_memory = loader.pin_memory and torch.cuda.is_available() 250 | self.timeout = loader.timeout 251 | self.done_event = threading.Event() 252 | 253 | self.sample_iter = iter(self.batch_sampler) 254 | 255 | base_seed = torch.LongTensor(1).random_().item() 256 | 257 | if self.num_workers > 0: 258 | self.worker_init_fn = loader.worker_init_fn 259 | self.index_queues = [multiprocessing.Queue() for _ in range(self.num_workers)] 260 | self.worker_queue_idx = 0 261 | self.worker_result_queue = multiprocessing.SimpleQueue() 262 | self.batches_outstanding = 0 263 | self.worker_pids_set = False 264 | self.shutdown = False 265 | self.send_idx = 0 266 | self.rcvd_idx = 0 267 | self.reorder_dict = {} 268 | 269 | self.workers = [ 270 | multiprocessing.Process( 271 | target=_worker_loop, 272 | args=(self.dataset, self.index_queues[i], 273 | self.worker_result_queue, self.collate_fn, base_seed + i, 274 | self.worker_init_fn, i)) 275 | for i in range(self.num_workers)] 276 | 277 | if self.pin_memory or self.timeout > 0: 278 | self.data_queue = queue.Queue() 279 | if self.pin_memory: 280 | maybe_device_id = torch.cuda.current_device() 281 | else: 282 | # do not initialize cuda context if not necessary 283 | maybe_device_id = None 284 | self.worker_manager_thread = threading.Thread( 285 | target=_worker_manager_loop, 286 | args=(self.worker_result_queue, self.data_queue, self.done_event, self.pin_memory, 287 | maybe_device_id)) 288 | self.worker_manager_thread.daemon = True 289 | self.worker_manager_thread.start() 290 | else: 291 | self.data_queue = self.worker_result_queue 292 | 293 | for w in self.workers: 294 | w.daemon = True # ensure that the worker exits on process exit 295 | w.start() 296 | 297 | _set_worker_pids(id(self), tuple(w.pid for w in self.workers)) 298 | _set_SIGCHLD_handler() 299 | self.worker_pids_set = True 300 | 301 | # prime the prefetch loop 302 | for _ in range(2 * self.num_workers): 303 | self._put_indices() 304 | 305 | def __len__(self): 306 | return len(self.batch_sampler) 307 | 308 | def _get_batch(self): 309 | if self.timeout > 0: 310 | try: 311 | return self.data_queue.get(timeout=self.timeout) 312 | except queue.Empty: 313 | raise RuntimeError('DataLoader timed out after {} seconds'.format(self.timeout)) 314 | else: 315 | return self.data_queue.get() 316 | 317 | def __next__(self): 318 | if self.num_workers == 0: # same-process loading 319 | indices = next(self.sample_iter) # may raise StopIteration 320 | batch = self.collate_fn([self.dataset[i] for i in indices]) 321 | if self.pin_memory: 322 | batch = pin_memory_batch(batch) 323 | return batch 324 | 325 | # check if the next sample has already been generated 326 | if self.rcvd_idx in self.reorder_dict: 327 | batch = self.reorder_dict.pop(self.rcvd_idx) 328 | return self._process_next_batch(batch) 329 | 330 | if self.batches_outstanding == 0: 331 | self._shutdown_workers() 332 | raise StopIteration 333 | 334 | while True: 335 | assert (not self.shutdown and self.batches_outstanding > 0) 336 | idx, batch = self._get_batch() 337 | self.batches_outstanding -= 1 338 | if idx != self.rcvd_idx: 339 | # store out-of-order samples 340 | self.reorder_dict[idx] = batch 341 | continue 342 | return self._process_next_batch(batch) 343 | 344 | next = __next__ # Python 2 compatibility 345 | 346 | def __iter__(self): 347 | return self 348 | 349 | def _put_indices(self): 350 | assert self.batches_outstanding < 2 * self.num_workers 351 | indices = next(self.sample_iter, None) 352 | if indices is None: 353 | return 354 | self.index_queues[self.worker_queue_idx].put((self.send_idx, indices)) 355 | self.worker_queue_idx = (self.worker_queue_idx + 1) % self.num_workers 356 | self.batches_outstanding += 1 357 | self.send_idx += 1 358 | 359 | def _process_next_batch(self, batch): 360 | self.rcvd_idx += 1 361 | self._put_indices() 362 | if isinstance(batch, ExceptionWrapper): 363 | raise batch.exc_type(batch.exc_msg) 364 | return batch 365 | 366 | def __getstate__(self): 367 | 368 | raise NotImplementedError("_DataLoaderIter cannot be pickled") 369 | 370 | def _shutdown_workers(self): 371 | try: 372 | if not self.shutdown: 373 | self.shutdown = True 374 | self.done_event.set() 375 | for q in self.index_queues: 376 | q.put(None) 377 | # if some workers are waiting to put, make place for them 378 | try: 379 | while not self.worker_result_queue.empty(): 380 | self.worker_result_queue.get() 381 | except (FileNotFoundError, ImportError): 382 | pass 383 | self.worker_result_queue.put(None) 384 | finally: 385 | # removes pids no matter what 386 | if self.worker_pids_set: 387 | _remove_worker_pids(id(self)) 388 | self.worker_pids_set = False 389 | 390 | def __del__(self): 391 | if self.num_workers > 0: 392 | self._shutdown_workers() 393 | 394 | 395 | class DataLoader(object): 396 | r""" 397 | Data loader. Combines a dataset and a sampler, and provides 398 | single- or multi-process iterators over the dataset. 399 | 400 | Arguments: 401 | dataset (Dataset): dataset from which to load the data. 402 | batch_size (int, optional): how many samples per batch to load 403 | (default: 1). 404 | shuffle (bool, optional): set to ``True`` to have the data reshuffled 405 | at every epoch (default: False). 406 | sampler (Sampler, optional): defines the strategy to draw samples from 407 | the dataset. If specified, ``shuffle`` must be False. 408 | batch_sampler (Sampler, optional): like sampler, but returns a batch of 409 | indices at a time. Mutually exclusive with batch_size, shuffle, 410 | sampler, and drop_last. 411 | num_workers (int, optional): how many subprocesses to use for data 412 | loading. 0 means that the data will be loaded in the main process. 413 | (default: 0) 414 | collate_fn (callable, optional): merges a list of samples to form a mini-batch. 415 | pin_memory (bool, optional): If ``True``, the data loader will copy tensors 416 | into CUDA pinned memory before returning them. 417 | drop_last (bool, optional): set to ``True`` to drop the last incomplete batch, 418 | if the dataset size is not divisible by the batch size. If ``False`` and 419 | the size of dataset is not divisible by the batch size, then the last batch 420 | will be smaller. (default: False) 421 | timeout (numeric, optional): if positive, the timeout value for collecting a batch 422 | from workers. Should always be non-negative. (default: 0) 423 | worker_init_fn (callable, optional): If not None, this will be called on each 424 | worker subprocess with the worker id (an int in ``[0, num_workers - 1]``) as 425 | input, after seeding and before data loading. (default: None) 426 | 427 | .. note:: By default, each worker will have its PyTorch seed set to 428 | ``base_seed + worker_id``, where ``base_seed`` is a long generated 429 | by main process using its RNG. However, seeds for other libraies 430 | may be duplicated upon initializing workers (w.g., NumPy), causing 431 | each worker to return identical random numbers. (See 432 | :ref:`dataloader-workers-random-seed` section in FAQ.) You may 433 | use ``torch.initial_seed()`` to access the PyTorch seed for each 434 | worker in :attr:`worker_init_fn`, and use it to set other seeds 435 | before data loading. 436 | 437 | .. warning:: If ``spawn`` start method is used, :attr:`worker_init_fn` cannot be an 438 | unpicklable object, e.g., a lambda function. 439 | """ 440 | 441 | __initialized = False 442 | 443 | def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None, 444 | num_workers=0, collate_fn=default_collate, pin_memory=False, drop_last=False, 445 | timeout=0, worker_init_fn=None): 446 | self.dataset = dataset 447 | self.batch_size = batch_size 448 | self.num_workers = num_workers 449 | self.collate_fn = collate_fn 450 | self.pin_memory = pin_memory 451 | self.drop_last = drop_last 452 | self.timeout = timeout 453 | self.worker_init_fn = worker_init_fn 454 | 455 | if timeout < 0: 456 | raise ValueError('timeout option should be non-negative') 457 | 458 | if batch_sampler is not None: 459 | if batch_size > 1 or shuffle or sampler is not None or drop_last: 460 | raise ValueError('batch_sampler option is mutually exclusive ' 461 | 'with batch_size, shuffle, sampler, and ' 462 | 'drop_last') 463 | self.batch_size = None 464 | self.drop_last = None 465 | 466 | if sampler is not None and shuffle: 467 | raise ValueError('sampler option is mutually exclusive with ' 468 | 'shuffle') 469 | 470 | if self.num_workers < 0: 471 | raise ValueError('num_workers option cannot be negative; ' 472 | 'use num_workers=0 to disable multiprocessing.') 473 | 474 | if batch_sampler is None: 475 | if sampler is None: 476 | if shuffle: 477 | sampler = RandomSampler(dataset) 478 | else: 479 | sampler = SequentialSampler(dataset) 480 | batch_sampler = BatchSampler(sampler, batch_size, drop_last) 481 | 482 | self.sampler = sampler 483 | self.batch_sampler = batch_sampler 484 | self.__initialized = True 485 | 486 | def __setattr__(self, attr, val): 487 | if self.__initialized and attr in ('batch_size', 'sampler', 'drop_last'): 488 | raise ValueError('{} attribute should not be set after {} is ' 489 | 'initialized'.format(attr, self.__class__.__name__)) 490 | 491 | super(DataLoader, self).__setattr__(attr, val) 492 | 493 | def __iter__(self): 494 | return _DataLoaderIter(self) 495 | 496 | def __len__(self): 497 | return len(self.batch_sampler) 498 | 499 | -------------------------------------------------------------------------------- /Classification/data/untitled.py: -------------------------------------------------------------------------------- 1 | def convert2npz(input_filename, out_data_filename): 2 | input = open(input_filename,"r") 3 | output_feature = open(out_data_filename,"w") 4 | #output_query = open(out_query_filename,"w") 5 | #output_label = open(out_query_filename2,"w") 6 | 7 | while True: 8 | line = input.readline() 9 | if not line: 10 | break 11 | tokens = line.split(' ') 12 | tokens[-1] = tokens[-1].strip() 13 | label = tokens[0] 14 | qid = int(tokens[1].split(':')[1]) 15 | 16 | #output_label.write(label + '\n') 17 | #output_query.write(qid + '\n') 18 | output_feature.write(label+' ') 19 | output_feature.write(qid + ' ') 20 | output_feature.write(' '.join(tokens[2:]) + '\n') 21 | 22 | input.close() 23 | output_query.close() 24 | output_feature.close() 25 | output_query2.close() 26 | 27 | convert("set1.train.txt","yahoo.train") 28 | convert("set1.test.txt","yahoo.test") 29 | -------------------------------------------------------------------------------- /Classification/main_cls_cv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import numpy as np 3 | import sklearn 4 | import argparse 5 | import copy 6 | import time 7 | import torch 8 | import torch.nn as nn 9 | from data.sparseloader import DataLoader 10 | from data.data import LibSVMData, LibCSVData, CriteoCSVData 11 | from data.sparse_data import LibSVMDataSp 12 | from models.mlp import MLP_1HL, MLP_2HL, MLP_3HL 13 | from models.dynamic_net import DynamicNet, ForwardType 14 | from sklearn.preprocessing import StandardScaler, MinMaxScaler 15 | from torch.utils.data.sampler import SubsetRandomSampler 16 | from torch.optim import SGD, Adam 17 | from misc.auc import auc 18 | 19 | 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument('--feat_d', type=int, required=True) 22 | parser.add_argument('--hidden_d', type=int, required=True) 23 | parser.add_argument('--boost_rate', type=float, required=True) 24 | parser.add_argument('--lr', type=float, required=True) 25 | parser.add_argument('--num_nets', type=int, required=True) 26 | parser.add_argument('--data', type=str, required=True) 27 | parser.add_argument('--tr', type=str, required=True) 28 | parser.add_argument('--te', type=str, required=True) 29 | parser.add_argument('--batch_size', type=int, required=True) 30 | parser.add_argument('--epochs_per_stage', type=int, required=True) 31 | parser.add_argument('--correct_epoch', type=int ,required=True) 32 | parser.add_argument('--L2', type=float, required=True) 33 | parser.add_argument('--sparse', default=False, type=lambda x: (str(x).lower() == 'true')) 34 | parser.add_argument('--normalization', default=False, type=lambda x: (str(x).lower() == 'true')) 35 | parser.add_argument('--cv', default=False, type=lambda x: (str(x).lower() == 'true')) 36 | parser.add_argument('--model_order',default='second', type=str) 37 | parser.add_argument('--out_f', type=str, required=True) 38 | parser.add_argument('--cuda', action='store_true') 39 | 40 | opt = parser.parse_args() 41 | 42 | if not opt.cuda: 43 | torch.set_num_threads(16) 44 | 45 | # prepare the dataset 46 | def get_data(): 47 | if opt.data in ['a9a', 'ijcnn1']: 48 | train = LibSVMData(opt.tr, opt.feat_d, opt.normalization) 49 | test = LibSVMData(opt.te, opt.feat_d, opt.normalization) 50 | elif opt.data == 'covtype': 51 | train = LibSVMData(opt.tr, opt.feat_d,opt.normalization, 1, 2) 52 | test = LibSVMData(opt.te, opt.feat_d, opt.normalization, 1, 2) 53 | elif opt.data == 'mnist28': 54 | train = LibSVMData(opt.tr, opt.feat_d, opt.normalization, 2, 8) 55 | test = LibSVMData(opt.te, opt.feat_d, opt.normalization, 2, 8) 56 | elif opt.data == 'higgs': 57 | train = LibSVMData(opt.tr, opt.feat_d,opt.normalization, 0, 1) 58 | test = LibSVMData(opt.te, opt.feat_d,opt.normalization, 0, 1) 59 | elif opt.data == 'real-sim': 60 | train = LibSVMDataSp(opt.tr, opt.feat_d) 61 | test = LibSVMDataSp(opt.te, opt.feat_d) 62 | elif opt.data in ['criteo', 'criteo2', 'Allstate']: 63 | train = LibCSVData(opt.tr, opt.feat_d, 1, 0) 64 | test = LibCSVData(opt.te, opt.feat_d, 1, 0) 65 | elif opt.data == 'yahoo.pair': 66 | train = LibCSVData(opt.tr, opt.feat_d) 67 | test = LibCSVData(opt.te, opt.feat_d) 68 | else: 69 | pass 70 | 71 | val = [] 72 | if opt.cv: 73 | val = copy.deepcopy(train) 74 | 75 | # Split the data from cut point 76 | print('Creating Validation set! \n') 77 | indices = list(range(len(train))) 78 | cut = int(len(train)*0.95) 79 | np.random.shuffle(indices) 80 | train_idx = indices[:cut] 81 | val_idx = indices[cut:] 82 | 83 | train.feat = train.feat[train_idx] 84 | train.label = train.label[train_idx] 85 | val.feat = val.feat[val_idx] 86 | val.label = val.label[val_idx] 87 | 88 | if opt.normalization: 89 | scaler = MinMaxScaler() #StandardScaler() 90 | scaler.fit(train.feat) 91 | train.feat = scaler.transform(train.feat) 92 | test.feat = scaler.transform(test.feat) 93 | if opt.cv: 94 | val.feat = scaler.transform(val.feat) 95 | 96 | print(f'#Train: {len(train)}, #Val: {len(val)} #Test: {len(test)}') 97 | return train, test, val 98 | 99 | 100 | def get_optim(params, lr, weight_decay): 101 | optimizer = Adam(params, lr, weight_decay=weight_decay) 102 | return optimizer 103 | 104 | def accuracy(net_ensemble, test_loader): 105 | correct = 0 106 | total = 0 107 | loss = 0 108 | for x, y in test_loader: 109 | if opt.cuda: 110 | x, y = x.cuda(), y.cuda() 111 | with torch.no_grad(): 112 | middle_feat, out = net_ensemble.forward(x) 113 | correct += (torch.sum(y[out > 0.] > 0) + torch.sum(y[out < .0] < 0)).item() 114 | total += y.numel() 115 | return correct / total 116 | 117 | def logloss(net_ensemble, test_loader): 118 | loss = 0 119 | total = 0 120 | loss_f = nn.BCEWithLogitsLoss() # Binary cross entopy loss with logits, reduction=mean by default 121 | for x, y in test_loader: 122 | if opt.cuda: 123 | x, y= x.cuda(), y.cuda().view(-1, 1) 124 | y = (y + 1) / 2 125 | with torch.no_grad(): 126 | _, out = net_ensemble.forward(x) 127 | out = torch.as_tensor(out, dtype=torch.float32).cuda().view(-1, 1) 128 | loss += loss_f(out, y) 129 | total += 1 130 | 131 | return loss / total 132 | 133 | def auc_score(net_ensemble, test_loader): 134 | actual = [] 135 | posterior = [] 136 | for x, y in test_loader: 137 | if opt.cuda: 138 | x = x.cuda() 139 | with torch.no_grad(): 140 | _, out = net_ensemble.forward(x) 141 | prob = 1.0 - 1.0 / torch.exp(out) # Why not using the scores themselve than converting to prob 142 | prob = prob.cpu().numpy().tolist() 143 | posterior.extend(prob) 144 | actual.extend(y.numpy().tolist()) 145 | score = auc(actual, posterior) 146 | return score 147 | 148 | def init_gbnn(train): 149 | positive = negative = 0 150 | for i in range(len(train)): 151 | if train[i][1] > 0: 152 | positive += 1 153 | else: 154 | negative += 1 155 | blind_acc = max(positive, negative) / (positive + negative) 156 | print(f'Blind accuracy: {blind_acc}') 157 | #print(f'Blind Logloss: {blind_acc}') 158 | return float(np.log(positive / negative)) 159 | 160 | if __name__ == "__main__": 161 | 162 | train, test, val = get_data() 163 | print(opt.data + ' training and test datasets are loaded!') 164 | train_loader = DataLoader(train, opt.batch_size, shuffle = True, drop_last=False, num_workers=2) 165 | test_loader = DataLoader(test, opt.batch_size, shuffle=False, drop_last=False, num_workers=2) 166 | if opt.cv: 167 | val_loader = DataLoader(val, opt.batch_size, shuffle=True, drop_last=False, num_workers=2) 168 | # For CV use 169 | best_score = 0 170 | val_score = best_score 171 | best_stage = opt.num_nets-1 172 | 173 | c0 = init_gbnn(train) 174 | net_ensemble = DynamicNet(c0, opt.boost_rate) 175 | loss_f1 = nn.MSELoss(reduction='none') 176 | loss_f2 = nn.BCEWithLogitsLoss(reduction='none') 177 | loss_models = torch.zeros((opt.num_nets, 3)) 178 | 179 | all_ensm_losses = [] 180 | all_ensm_losses_te = [] 181 | all_mdl_losses = [] 182 | dynamic_br = [] 183 | 184 | for stage in range(opt.num_nets): 185 | t0 = time.time() 186 | #### Higgs 100K, 1M , 10M experiment: Subsampling the data each model training time ############ 187 | indices = list(range(len(train))) 188 | split = 1000000 189 | indices = sklearn.utils.shuffle(indices, random_state=41) 190 | train_idx = indices[:split] 191 | train_sampler = SubsetRandomSampler(train_idx) 192 | train_loader = DataLoader(train, opt.batch_size, sampler = train_sampler, drop_last=True, num_workers=2) 193 | ################################################################################################ 194 | 195 | model = MLP_2HL.get_model(stage, opt) # Initialize the model_k: f_k(x), multilayer perception v2 196 | if opt.cuda: 197 | model.cuda() 198 | 199 | optimizer = get_optim(model.parameters(), opt.lr, opt.L2) 200 | net_ensemble.to_train() # Set the models in ensemble net to train mode 201 | 202 | stage_mdlloss = [] 203 | for epoch in range(opt.epochs_per_stage): 204 | for i, (x, y) in enumerate(train_loader): 205 | if opt.cuda: 206 | x, y= x.cuda(), y.cuda().view(-1, 1) 207 | middle_feat, out = net_ensemble.forward(x) 208 | out = torch.as_tensor(out, dtype=torch.float32).cuda().view(-1, 1) 209 | if opt.model_order=='first': 210 | grad_direction = y / (1.0 + torch.exp(y * out)) 211 | else: 212 | h = 1/((1+torch.exp(y*out))*(1+torch.exp(-y*out))) 213 | grad_direction = y * (1.0 + torch.exp(-y * out)) 214 | out = torch.as_tensor(out) 215 | nwtn_weights = (torch.exp(out) + torch.exp(-out)).abs() 216 | _, out = model(x, middle_feat) 217 | out = torch.as_tensor(out, dtype=torch.float32).cuda().view(-1, 1) 218 | loss = loss_f1(net_ensemble.boost_rate*out, grad_direction) # T 219 | loss = loss*h 220 | loss = loss.mean() 221 | model.zero_grad() 222 | loss.backward() 223 | optimizer.step() 224 | stage_mdlloss.append(loss.item()) 225 | 226 | net_ensemble.add(model) 227 | sml = np.mean(stage_mdlloss) 228 | 229 | 230 | stage_loss = [] 231 | lr_scaler = 2 232 | # fully-corrective step 233 | if stage != 0: 234 | # Adjusting corrective step learning rate 235 | if stage % 15 == 0: 236 | #lr_scaler *= 2 237 | opt.lr /= 2 238 | opt.L2 /= 2 239 | optimizer = get_optim(net_ensemble.parameters(), opt.lr / lr_scaler, opt.L2) 240 | for _ in range(opt.correct_epoch): 241 | for i, (x, y) in enumerate(train_loader): 242 | if opt.cuda: 243 | x, y = x.cuda(), y.cuda().view(-1, 1) 244 | _, out = net_ensemble.forward_grad(x) 245 | out = torch.as_tensor(out, dtype=torch.float32).cuda().view(-1, 1) 246 | y = (y + 1.0) / 2.0 247 | loss = loss_f2(out, y).mean() 248 | optimizer.zero_grad() 249 | loss.backward() 250 | optimizer.step() 251 | stage_loss.append(loss.item()) 252 | 253 | 254 | sl_te = logloss(net_ensemble, test_loader) 255 | # Store dynamic boost rate 256 | dynamic_br.append(net_ensemble.boost_rate.item()) 257 | # store model 258 | net_ensemble.to_file(opt.out_f) 259 | net_ensemble = DynamicNet.from_file(opt.out_f, lambda stage: MLP_2HL.get_model(stage, opt)) 260 | 261 | elapsed_tr = time.time()-t0 262 | sl = 0 263 | if stage_loss != []: 264 | sl = np.mean(stage_loss) 265 | 266 | 267 | 268 | all_ensm_losses.append(sl) 269 | all_ensm_losses_te.append(sl_te) 270 | all_mdl_losses.append(sml) 271 | print(f'Stage - {stage}, training time: {elapsed_tr: .1f} sec, boost rate: {net_ensemble.boost_rate: .4f}, Training Loss: {sl: .4f}, Test Loss: {sl_te: .4f}') 272 | 273 | 274 | if opt.cuda: 275 | net_ensemble.to_cuda() 276 | net_ensemble.to_eval() # Set the models in ensemble net to eval mode 277 | 278 | # Train 279 | print('Acc results from stage := ' + str(stage) + '\n') 280 | # AUC 281 | if opt.cv: 282 | val_score = auc_score(net_ensemble, val_loader) 283 | if val_score > best_score: 284 | best_score = val_score 285 | best_stage = stage 286 | 287 | test_score = auc_score(net_ensemble, test_loader) 288 | print(f'Stage: {stage}, AUC@Val: {val_score:.4f}, AUC@Test: {test_score:.4f}') 289 | 290 | loss_models[stage, 1], loss_models[stage, 2] = val_score, test_score 291 | 292 | val_auc, te_auc = loss_models[best_stage, 1], loss_models[best_stage, 2] 293 | print(f'Best validation stage: {best_stage}, AUC@Val: {val_auc:.4f}, final AUC@Test: {te_auc:.4f}') 294 | 295 | loss_models = loss_models.detach().cpu().numpy() 296 | fname = 'tr_ts_' + opt.data +'_auc' 297 | np.save(fname, loss_models) 298 | 299 | fname = './results/' + opt.data + '_cls' 300 | np.savez(fname, training_loss=all_ensm_losses, test_loss=all_ensm_losses_te, model_losses=all_mdl_losses, dynamic_boostrate=dynamic_br, params=opt) 301 | 302 | -------------------------------------------------------------------------------- /Classification/misc/auc.py: -------------------------------------------------------------------------------- 1 | 2 | def tied_rank(x): 3 | """ 4 | Computes the tied rank of elements in x. 5 | 6 | This function computes the tied rank of elements in x. 7 | 8 | Parameters 9 | ---------- 10 | x : list of numbers, numpy array 11 | 12 | Returns 13 | ------- 14 | score : list of numbers 15 | The tied rank f each element in x 16 | 17 | """ 18 | sorted_x = sorted(zip(x,range(len(x)))) 19 | r = [0 for k in x] 20 | cur_val = sorted_x[0][0] 21 | last_rank = 0 22 | for i in range(len(sorted_x)): 23 | if cur_val != sorted_x[i][0]: 24 | cur_val = sorted_x[i][0] 25 | for j in range(last_rank, i): 26 | r[sorted_x[j][1]] = float(last_rank+1+i)/2.0 27 | last_rank = i 28 | if i==len(sorted_x)-1: 29 | for j in range(last_rank, i+1): 30 | r[sorted_x[j][1]] = float(last_rank+i+2)/2.0 31 | return r 32 | 33 | def auc(actual, posterior): 34 | """ 35 | Computes the area under the receiver-operater characteristic (AUC) 36 | 37 | This function computes the AUC error metric for binary classification. 38 | 39 | Parameters 40 | ---------- 41 | actual : list of binary numbers, numpy array 42 | The ground truth value 43 | posterior : same type as actual 44 | Defines a ranking on the binary numbers, from most likely to 45 | be positive to least likely to be positive. 46 | 47 | Returns 48 | ------- 49 | score : double 50 | The mean squared error between actual and posterior 51 | 52 | """ 53 | r = tied_rank(posterior) 54 | num_positive = len([0 for x in actual if x==1]) 55 | num_negative = len(actual)-num_positive 56 | sum_positive = sum([r[i] for i in range(len(r)) if actual[i]==1]) 57 | auc = ((sum_positive - num_positive*(num_positive+1)/2.0) / 58 | (num_negative*num_positive)) 59 | return auc -------------------------------------------------------------------------------- /Classification/misc/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def ranking_precision_score(y_true, y_score, k=10): 5 | """Precision at rank k 6 | 7 | Parameters 8 | ---------- 9 | y_true : array-like, shape = [n_samples] 10 | Ground truth (true relevance labels). 11 | 12 | y_score : array-like, shape = [n_samples] 13 | Predicted scores. 14 | 15 | k : int 16 | Rank. 17 | 18 | Returns 19 | ------- 20 | precision @k : float 21 | """ 22 | unique_y = np.unique(y_true) 23 | 24 | if len(unique_y) > 2: 25 | raise ValueError("Only supported for two relevance levels.") 26 | 27 | pos_label = unique_y[1] 28 | n_pos = np.sum(y_true == pos_label) 29 | 30 | order = np.argsort(y_score)[::-1] 31 | y_true = np.take(y_true, order[:k]) 32 | n_relevant = np.sum(y_true == pos_label) 33 | 34 | # Divide by min(n_pos, k) such that the best achievable score is always 1.0. 35 | return float(n_relevant) / min(n_pos, k) 36 | 37 | 38 | def average_precision_score(y_true, y_score, k=10): 39 | """Average precision at rank k 40 | 41 | Parameters 42 | ---------- 43 | y_true : array-like, shape = [n_samples] 44 | Ground truth (true relevance labels). 45 | 46 | y_score : array-like, shape = [n_samples] 47 | Predicted scores. 48 | 49 | k : int 50 | Rank. 51 | 52 | Returns 53 | ------- 54 | average precision @k : float 55 | """ 56 | unique_y = np.unique(y_true) 57 | 58 | if len(unique_y) > 2: 59 | raise ValueError("Only supported for two relevance levels.") 60 | 61 | pos_label = unique_y[1] 62 | n_pos = np.sum(y_true == pos_label) 63 | 64 | order = np.argsort(y_score)[::-1][:min(n_pos, k)] 65 | y_true = np.asarray(y_true)[order] 66 | 67 | score = 0 68 | for i in xrange(len(y_true)): 69 | if y_true[i] == pos_label: 70 | # Compute precision up to document i 71 | # i.e, percentage of relevant documents up to document i. 72 | prec = 0 73 | for j in xrange(0, i + 1): 74 | if y_true[j] == pos_label: 75 | prec += 1.0 76 | prec /= (i + 1.0) 77 | score += prec 78 | 79 | if n_pos == 0: 80 | return 0 81 | 82 | return score / n_pos 83 | 84 | 85 | def dcg_score(y_true, y_score, k=10, gains="exponential"): 86 | """Discounted cumulative gain (DCG) at rank k 87 | 88 | Parameters 89 | ---------- 90 | y_true : array-like, shape = [n_samples] 91 | Ground truth (true relevance labels). 92 | 93 | y_score : array-like, shape = [n_samples] 94 | Predicted scores. 95 | 96 | k : int 97 | Rank. 98 | 99 | gains : str 100 | Whether gains should be "exponential" (default) or "linear". 101 | 102 | Returns 103 | ------- 104 | DCG @k : float 105 | """ 106 | order = np.argsort(y_score)[::-1] 107 | y_true = y_true[order[:k]] 108 | if y_true.any(): 109 | if gains == "exponential": 110 | gains = 2 ** y_true - 1 111 | elif gains == "linear": 112 | gains = y_true 113 | else: 114 | raise ValueError("Invalid gains option.") 115 | 116 | # highest rank is 1 so +2 instead of +1 117 | discounts = np.log2(np.arange(len(y_true)) + 2) 118 | return np.sum(gains / discounts) 119 | return 0 120 | 121 | def ndcg_score(y_true, y_score, k=10, gains="exponential"): 122 | """Normalized discounted cumulative gain (NDCG) at rank k 123 | 124 | Parameters 125 | ---------- 126 | y_true : array-like, shape = [n_samples] 127 | Ground truth (true relevance labels). 128 | 129 | y_score : array-like, shape = [n_samples] 130 | Predicted scores. 131 | 132 | k : int 133 | Rank. 134 | 135 | gains : str 136 | Whether gains should be "exponential" (default) or "linear". 137 | 138 | Returns 139 | ------- 140 | NDCG @k : float 141 | """ 142 | best = dcg_score(y_true, y_true, k, gains) 143 | if best == 0: 144 | return 0 145 | actual = dcg_score(y_true, y_score, k, gains) 146 | return actual / best 147 | 148 | 149 | # Alternative API. 150 | 151 | def dcg_from_ranking(y_true, ranking): 152 | """Discounted cumulative gain (DCG) at rank k 153 | 154 | Parameters 155 | ---------- 156 | y_true : array-like, shape = [n_samples] 157 | Ground truth (true relevance labels). 158 | 159 | ranking : array-like, shape = [k] 160 | Document indices, i.e., 161 | ranking[0] is the index of top-ranked document, 162 | ranking[1] is the index of second-ranked document, 163 | ... 164 | 165 | k : int 166 | Rank. 167 | 168 | Returns 169 | ------- 170 | DCG @k : float 171 | """ 172 | y_true = np.asarray(y_true) 173 | ranking = np.asarray(ranking) 174 | rel = y_true[ranking] 175 | gains = 2 ** rel - 1 176 | discounts = np.log2(np.arange(len(ranking)) + 2) 177 | return np.sum(gains / discounts) 178 | 179 | 180 | def ndcg_from_ranking(y_true, ranking): 181 | """Normalized discounted cumulative gain (NDCG) at rank k 182 | 183 | Parameters 184 | ---------- 185 | y_true : array-like, shape = [n_samples] 186 | Ground truth (true relevance labels). 187 | 188 | ranking : array-like, shape = [k] 189 | Document indices, i.e., 190 | ranking[0] is the index of top-ranked document, 191 | ranking[1] is the index of second-ranked document, 192 | ... 193 | 194 | k : int 195 | Rank. 196 | 197 | Returns 198 | ------- 199 | NDCG @k : float 200 | """ 201 | k = len(ranking) 202 | best_ranking = np.argsort(y_true)[::-1] 203 | best = dcg_from_ranking(y_true, best_ranking[:k]) 204 | return dcg_from_ranking(y_true, ranking) / best 205 | 206 | 207 | 208 | 209 | -------------------------------------------------------------------------------- /Classification/models/dynamic_net.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | import torch 3 | #import pickle 4 | import torch.nn as nn 5 | 6 | class ForwardType(Enum): 7 | SIMPLE = 0 8 | STACKED = 1 9 | CASCADE = 2 10 | GRADIENT = 3 11 | 12 | class DynamicNet(object): 13 | def __init__(self, c0, lr): 14 | self.models = [] 15 | self.c0 = c0 16 | self.lr = lr 17 | self.boost_rate = nn.Parameter(torch.tensor(lr, requires_grad=True, device="cuda")) 18 | 19 | def add(self, model): 20 | self.models.append(model) 21 | 22 | def parameters(self): 23 | params = [] 24 | for m in self.models: 25 | params.extend(m.parameters()) 26 | 27 | params.append(self.boost_rate) 28 | return params 29 | 30 | def zero_grad(self): 31 | for m in self.models: 32 | m.zero_grad() 33 | 34 | def to_cuda(self): 35 | for m in self.models: 36 | m.cuda() 37 | 38 | def to_eval(self): 39 | for m in self.models: 40 | m.eval() 41 | 42 | def to_train(self): 43 | for m in self.models: 44 | m.train(True) 45 | 46 | def forward(self, x): 47 | if len(self.models) == 0: 48 | return None, self.c0 49 | middle_feat_cum = None 50 | prediction = None 51 | with torch.no_grad(): 52 | for m in self.models: 53 | if middle_feat_cum is None: 54 | middle_feat_cum, prediction = m(x, middle_feat_cum) 55 | else: 56 | middle_feat_cum, pred = m(x, middle_feat_cum) 57 | prediction += pred 58 | return middle_feat_cum, self.c0 + self.boost_rate * prediction 59 | 60 | def forward_grad(self, x): 61 | if len(self.models) == 0: 62 | return None, self.c0 63 | # at least one model 64 | middle_feat_cum = None 65 | prediction = None 66 | for m in self.models: 67 | if middle_feat_cum is None: 68 | middle_feat_cum, prediction = m(x, middle_feat_cum) 69 | else: 70 | middle_feat_cum, pred = m(x, middle_feat_cum) 71 | prediction += pred 72 | return middle_feat_cum, self.c0 + self.boost_rate * prediction 73 | 74 | @classmethod 75 | def from_file(cls, path, builder): 76 | d = torch.load(path) 77 | net = DynamicNet(d['c0'], d['lr']) 78 | net.boost_rate = d['boost_rate'] 79 | for stage, m in enumerate(d['models']): 80 | submod = builder(stage) 81 | submod.load_state_dict(m) 82 | net.add(submod) 83 | return net 84 | 85 | def to_file(self, path): 86 | models = [m.state_dict() for m in self.models] 87 | d = {'models': models, 'c0': self.c0, 'lr': self.lr, 'boost_rate': self.boost_rate} 88 | torch.save(d, path) 89 | -------------------------------------------------------------------------------- /Classification/models/mlp.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from .splinear import SpLinear 6 | 7 | 8 | class MLP_1HL(nn.Module): 9 | def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True): 10 | super(MLP_1HL, self).__init__() 11 | self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1) 12 | self.out_layer = nn.Linear(dim_hidden1, 1) 13 | self.lrelu = nn.LeakyReLU(0.1) 14 | self.relu = nn.ReLU() 15 | if bn: 16 | self.bn = nn.BatchNorm1d(dim_hidden1) 17 | self.bn2 = nn.BatchNorm1d(dim_in) 18 | 19 | def forward(self, x, lower_f): 20 | if lower_f is not None: 21 | x = torch.cat([x, lower_f], dim=1) 22 | x = self.bn2(x) 23 | out = self.in_layer(x) 24 | return out, self.out_layer(self.relu(out)).squeeze() 25 | 26 | @classmethod 27 | def get_model(cls, stage, opt): 28 | if stage == 0: 29 | dim_in = opt.feat_d 30 | else: 31 | dim_in = opt.feat_d + opt.hidden_d 32 | model = MLP_1HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse) 33 | return model 34 | 35 | 36 | class MLP_2HL(nn.Module): 37 | def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True): 38 | super(MLP_2HL, self).__init__() 39 | self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1) 40 | self.dropout_layer = nn.Dropout(0.0) 41 | self.lrelu = nn.LeakyReLU(0.1) 42 | self.relu = nn.ReLU() 43 | self.hidden_layer = nn.Linear(dim_hidden1, dim_hidden2) 44 | self.out_layer = nn.Linear(dim_hidden2, 1) 45 | self.bn = nn.BatchNorm1d(dim_hidden1) 46 | self.bn2 = nn.BatchNorm1d(dim_in) 47 | 48 | def forward(self, x, lower_f): 49 | if lower_f is not None: 50 | x = torch.cat([x, lower_f], dim=1) 51 | x = self.bn2(x) 52 | out = self.lrelu(self.in_layer(x)) 53 | out = self.bn(out) 54 | out = self.hidden_layer(out) 55 | return out, self.out_layer(self.relu(out)).squeeze() 56 | 57 | @classmethod 58 | def get_model(cls, stage, opt): 59 | if stage == 0: 60 | dim_in = opt.feat_d 61 | else: 62 | dim_in = opt.feat_d + opt.hidden_d 63 | model = MLP_2HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse) 64 | return model 65 | 66 | class MLP_3HL(nn.Module): 67 | def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True): 68 | super(MLP_3HL, self).__init__() 69 | self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1) 70 | self.dropout_layer = nn.Dropout(0.0) 71 | self.lrelu = nn.LeakyReLU(0.1) 72 | self.relu = nn.ReLU() 73 | self.hidden_layer = nn.Linear(dim_hidden2, dim_hidden1) 74 | self.out_layer = nn.Linear(dim_hidden1, 1) 75 | self.bn = nn.BatchNorm1d(dim_hidden1) 76 | self.bn2 = nn.BatchNorm1d(dim_in) 77 | # print('Batch normalization is processed!') 78 | 79 | def forward(self, x, lower_f): 80 | if lower_f is not None: 81 | x = torch.cat([x, lower_f], dim=1) 82 | x = self.bn2(x) 83 | out = self.lrelu(self.in_layer(x)) 84 | out = self.bn(out) 85 | out = self.lrelu(self.hidden_layer(out)) 86 | out = self.bn(out) 87 | out = self.hidden_layer(out) 88 | return out, self.out_layer(self.relu(out)).squeeze() 89 | 90 | @classmethod 91 | def get_model(cls, stage, opt): 92 | if stage == 0: 93 | dim_in = opt.feat_d 94 | else: 95 | dim_in = opt.feat_d + opt.hidden_d 96 | model = MLP_3HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse) 97 | return model 98 | 99 | class MLP_4HL(nn.Module): 100 | def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True): 101 | super(MLP_3HL, self).__init__() 102 | self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1) 103 | self.dropout_layer = nn.Dropout(0.0) 104 | self.lrelu = nn.LeakyReLU(0.1) 105 | self.relu = nn.ReLU() 106 | self.hidden_layer = nn.Linear(dim_hidden2, dim_hidden1) 107 | self.out_layer = nn.Linear(dim_hidden1, 1) 108 | self.bn = nn.BatchNorm1d(dim_hidden1) 109 | self.bn2 = nn.BatchNorm1d(dim_in) 110 | # print('Batch normalization is processed!') 111 | 112 | def forward(self, x, lower_f): 113 | if lower_f is not None: 114 | x = torch.cat([x, lower_f], dim=1) 115 | x = self.bn2(x) 116 | out = self.lrelu(self.in_layer(x)) #HL-1 117 | out = self.bn(out) 118 | out = self.lrelu(self.hidden_layer(out)) #HL-2 119 | out = self.bn(out) 120 | out = self.lrelu(self.hidden_layer(out)) #HL-3 121 | out = self.bn(out) 122 | out = self.hidden_layer(out) #HL-4 123 | return out, self.out_layer(self.relu(out)).squeeze() 124 | 125 | @classmethod 126 | def get_model(cls, stage, opt): 127 | if stage == 0: 128 | dim_in = opt.feat_d 129 | else: 130 | dim_in = opt.feat_d + opt.hidden_d 131 | model = MLP_3HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse) 132 | return model 133 | 134 | 135 | class DNN(nn.Module): 136 | def __init__(self, dim_in, dim_hidden, n_hidden=20, sparse=False, bn=True, drop_out=0.3): 137 | super(DNN, self).__init__() 138 | if sparse: 139 | self.in_layer = SpLinear(dim_in, dim_hidden) 140 | else: 141 | self.in_layer = nn.Linear(dim_in, dim_hidden) 142 | self.in_act = nn.SELU() 143 | hidden_layers = [] 144 | for _ in range(n_hidden): 145 | hidden_layers.append(nn.Linear(dim_hidden, dim_hidden)) 146 | if bn: 147 | hidden_layers.append(nn.BatchNorm1d(dim_hidden)) 148 | hidden_layers.append(nn.SELU()) 149 | if drop_out > 0: 150 | hidden_layers.append(nn.Dropout(drop_out)) 151 | self.hidden_layers = nn.Sequential(*hidden_layers) 152 | self.out_layer = nn.Linear(dim_hidden, 1) 153 | 154 | def forward(self, x): 155 | out = self.in_act(self.in_layer(x)) 156 | out = self.hidden_layers(out) 157 | out = self.out_layer(out) 158 | return out.squeeze() 159 | -------------------------------------------------------------------------------- /Classification/models/splinear.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | class SpLinearFunc(torch.autograd.Function): 8 | @staticmethod 9 | def forward(ctx, input, weight, bias=None): 10 | ctx.save_for_backward(input, weight, bias) 11 | output = input.mm(weight.t()) 12 | if bias is not None: 13 | output += bias.unsqueeze(0).expand_as(output) 14 | return output 15 | 16 | @staticmethod 17 | def backward(ctx, grad_output): 18 | input, weight, bias = ctx.saved_tensors 19 | grad_input = grad_weight = grad_bias = None 20 | 21 | if ctx.needs_input_grad[0]: 22 | grad_input = grad_output.mm(weight) 23 | if ctx.needs_input_grad[1]: 24 | grad_weight = (input.t().mm(grad_output)).t() 25 | if bias is not None and ctx.needs_input_grad[2]: 26 | grad_bias = grad_output.sum(0).squeeze(0) 27 | 28 | return grad_input, grad_weight, grad_bias 29 | 30 | splinear = SpLinearFunc.apply 31 | 32 | class SpLinear(nn.Module): 33 | def __init__(self, input_features, output_features, bias=True): 34 | super(SpLinear, self).__init__() 35 | self.input_features = input_features 36 | self.output_features = output_features 37 | self.weight = nn.Parameter(torch.Tensor(output_features, input_features)) 38 | if bias: 39 | self.bias = nn.Parameter(torch.Tensor(output_features)) 40 | else: 41 | self.register_parameter('bias', None) 42 | #TODO write a default initialization 43 | stdv = 1. / math.sqrt(self.weight.size(1)) 44 | self.weight.data.uniform_(-stdv, stdv) 45 | 46 | def forward(self, input): 47 | return splinear(input, self.weight, self.bias) 48 | -------------------------------------------------------------------------------- /Classification/results/results_readme.txt: -------------------------------------------------------------------------------- 1 | Your results will be saved here. -------------------------------------------------------------------------------- /Classification/train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ### Feature Table ### 4 | # a9a 123 5 | # ijcnn1 22 6 | # covtype 54 7 | # mnist28 752 8 | # real-sim 20958 9 | # higgs 28 10 | dataset=higgs 11 | 12 | BASEDIR=$(dirname "$0") 13 | OUTDIR="${BASEDIR}/ckpt/" 14 | 15 | if [ ! -d "${OUTDIR}" ] 16 | then 17 | echo "Output dir ${OUTDIR} does not exist, creating..." 18 | mkdir -p ${OUTDIR} 19 | fi 20 | 21 | CUDA_VISIBLE_DEVICES=0 python -u main_cls_cv.py \ 22 | --feat_d 28 \ 23 | --hidden_d 16 \ 24 | --boost_rate 1 \ 25 | --lr 0.005 \ 26 | --L2 .0e-3 \ 27 | --num_nets 40 \ 28 | --data ${dataset} \ 29 | --tr ${BASEDIR}/../data/${dataset}.train \ 30 | --te ${BASEDIR}/../data/${dataset}.test \ 31 | --batch_size 2048 \ 32 | --epochs_per_stage 1 \ 33 | --correct_epoch 1 \ 34 | --model_order second \ 35 | --normalization True \ 36 | --cv True \ 37 | --sparse False \ 38 | --out_f ${OUTDIR}/${dataset}_cls.pth \ 39 | --cuda 40 | -------------------------------------------------------------------------------- /L2R/DataLoader/DataLoader.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | 4 | import pandas as pd 5 | import numpy as np 6 | from sklearn import preprocessing 7 | 8 | 9 | def get_time(): 10 | return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') 11 | 12 | 13 | class L2R_DataLoader: 14 | 15 | def __init__(self, path): 16 | """ 17 | :param path: str 18 | """ 19 | self.path = path 20 | self.pickle_path = path[:-3] + 'pkl' 21 | self.df = None 22 | self.num_pairs = None 23 | self.num_sessions = None 24 | 25 | def get_num_pairs(self): 26 | if self.num_pairs is not None: 27 | return self.num_pairs 28 | self.num_pairs = 0 29 | for _, Y in self.generate_batch_per_query(self.df): 30 | Y = Y.reshape(-1, 1) 31 | pairs = Y - Y.T 32 | pos_pairs = np.sum(pairs > 0, (0, 1)) 33 | neg_pairs = np.sum(pairs < 0, (0, 1)) 34 | assert pos_pairs == neg_pairs 35 | self.num_pairs += pos_pairs + neg_pairs 36 | return self.num_pairs 37 | 38 | def get_num_sessions(self): 39 | return self.num_sessions 40 | 41 | def _load_mslr(self): 42 | print(get_time(), "load file from {}".format(self.path)) 43 | df = pd.read_csv(self.path, sep=" ", header=None) 44 | df.drop(columns=df.columns[-1], inplace=True) 45 | self.num_features = len(df.columns) - 2 46 | self.num_paris = None 47 | print(get_time(), "finish loading from {}".format(self.path)) 48 | print("dataframe shape: {}, features: {}".format(df.shape, self.num_features)) 49 | return df 50 | 51 | def _parse_feature_and_label(self, df): 52 | """ 53 | :param df: pandas.DataFrame 54 | :return: pandas.DataFrame 55 | """ 56 | print(get_time(), "parse dataframe ...", df.shape) 57 | for col in range(1, len(df.columns)): 58 | if ':' in str(df.iloc[:, col][0]): 59 | df.iloc[:, col] = df.iloc[:, col].apply(lambda x: x.split(":")[1]) 60 | df.columns = ['rel', 'qid'] + [str(f) for f in range(1, len(df.columns) - 1)] 61 | 62 | for col in [str(f) for f in range(1, len(df.columns) - 1)]: 63 | df[col] = df[col].astype(np.float32) 64 | 65 | print(get_time(), "finish parsing dataframe") 66 | self.df = df 67 | self.num_sessions = len(df.qid.unique()) 68 | return df 69 | 70 | def generate_query_pairs(self, df, qid): 71 | """ 72 | :param df: pandas.DataFrame, contains column qid, rel, fid from 1 to self.num_features 73 | :param qid: query id 74 | :returns: numpy.ndarray of x_i, y_i, x_j, y_j 75 | """ 76 | df_qid = df[df.qid == qid] 77 | rels = df_qid.rel.unique() 78 | x_i, x_j, y_i, y_j = [], [], [], [] 79 | for r in rels: 80 | df1 = df_qid[df_qid.rel == r] 81 | df2 = df_qid[df_qid.rel != r] 82 | df_merged = pd.merge(df1, df2, on='qid') 83 | df_merged.reindex(np.random.permutation(df_merged.index)) 84 | y_i.append(df_merged.rel_x.values.reshape(-1, 1)) 85 | y_j.append(df_merged.rel_y.values.reshape(-1, 1)) 86 | x_i.append(df_merged[['{}_x'.format(i) for i in range(1, self.num_features + 1)]].values) 87 | x_j.append(df_merged[['{}_y'.format(i) for i in range(1, self.num_features + 1)]].values) 88 | return np.vstack(x_i), np.vstack(y_i), np.vstack(x_j), np.vstack(y_j) 89 | 90 | def generate_query_pair_batch(self, df=None, batchsize=2000): 91 | """ 92 | :param df: pandas.DataFrame, contains column qid 93 | :returns: numpy.ndarray of x_i, y_i, x_j, y_j 94 | """ 95 | if df is None: 96 | df = self.df 97 | x_i_buf, y_i_buf, x_j_buf, y_j_buf = None, None, None, None 98 | qids = df.qid.unique() 99 | np.random.shuffle(qids) 100 | for qid in qids: 101 | x_i, y_i, x_j, y_j = self.generate_query_pairs(df, qid) 102 | if x_i_buf is None: 103 | x_i_buf, y_i_buf, x_j_buf, y_j_buf = x_i, y_i, x_j, y_j 104 | else: 105 | x_i_buf = np.vstack((x_i_buf, x_i)) 106 | y_i_buf = np.vstack((y_i_buf, y_i)) 107 | x_j_buf = np.vstack((x_j_buf, x_j)) 108 | y_j_buf = np.vstack((y_j_buf, y_j)) 109 | idx = 0 110 | while (idx + 1) * batchsize <= x_i_buf.shape[0]: 111 | start = idx * batchsize 112 | end = (idx + 1) * batchsize 113 | yield x_i_buf[start: end, :], y_i_buf[start: end, :], x_j_buf[start: end, :], y_j_buf[start: end, :] 114 | idx += 1 115 | 116 | x_i_buf = x_i_buf[idx * batchsize:, :] 117 | y_i_buf = y_i_buf[idx * batchsize:, :] 118 | x_j_buf = x_j_buf[idx * batchsize:, :] 119 | y_j_buf = y_j_buf[idx * batchsize:, :] 120 | 121 | yield x_i_buf, y_i_buf, x_j_buf, y_j_buf 122 | 123 | def generate_query_batch(self, df, batchsize): 124 | """ 125 | :param df: pandas.DataFrame, contains column qid 126 | :returns: numpy.ndarray qid, rel, x_i 127 | """ 128 | idx = 0 129 | while idx * batchsize < df.shape[0]: 130 | r = df.iloc[idx * batchsize: (idx + 1) * batchsize, :] 131 | yield r.qid.values, r.rel.values, r[['{}'.format(i) for i in range(1, self.num_features + 1)]].values 132 | idx += 1 133 | 134 | 135 | def generate_batch_per_query(self, df=None): 136 | """ 137 | :param df: pandas.DataFrame 138 | :return: X for features, y for relavance 139 | :rtype: numpy.ndarray, numpy.ndarray 140 | """ 141 | if df is None: 142 | df = self.df 143 | qids = df.qid.unique() 144 | np.random.shuffle(qids) 145 | for qid in qids: 146 | df_qid = df[df.qid == qid] 147 | yield df_qid[['{}'.format(i) for i in range(1, self.num_features + 1)]].values, df_qid.rel.values 148 | 149 | def load(self): 150 | """ 151 | :return: pandas.DataFrame 152 | """ 153 | if os.path.isfile(self.pickle_path): 154 | print(get_time(), "load from pickle file {}".format(self.pickle_path)) 155 | self.df = pd.read_pickle(self.pickle_path) 156 | self.num_features = len(self.df.columns) - 2 157 | self.num_paris = None 158 | self.num_sessions = len(self.df.qid.unique()) 159 | else: 160 | self.df = self._parse_feature_and_label(self._load_mslr()) 161 | self.df.to_pickle(self.pickle_path) 162 | return self.df 163 | 164 | def train_scaler_and_transform(self): 165 | """Learn a scalar and apply transform.""" 166 | feature_columns = [str(i) for i in range(1, self.num_features + 1)] 167 | X_train = self.df[feature_columns] 168 | #scaler = preprocessing.StandardScaler().fit(X_train) 169 | scaler = preprocessing.MinMaxScaler().fit(X_train) 170 | self.df[feature_columns] = scaler.transform(X_train) 171 | return self.df, scaler 172 | 173 | def apply_scaler(self, scaler): 174 | print(get_time(), "apply scaler to transform feature for {}".format(self.path)) 175 | feature_columns = [str(i) for i in range(1, self.num_features + 1)] 176 | X_train = self.df[feature_columns] 177 | self.df[feature_columns] = scaler.transform(X_train) 178 | return self.df -------------------------------------------------------------------------------- /L2R/Misc/Calculations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import torch 4 | 5 | def loss_calc_(y_true, y_pred, gain_type, sigma, N, device): 6 | 7 | # compute the rank order of each document 8 | rank_df = pd.DataFrame({"y": y_true, "doc": np.arange(y_true.shape[0])}) 9 | rank_df = rank_df.sort_values("y").reset_index(drop=True) 10 | rank_order = rank_df.sort_values("doc").index.values + 1 11 | 12 | pos_pairs_score_diff = 1.0 + torch.exp(-sigma * (y_pred - y_pred.t())) 13 | 14 | y_tensor = torch.tensor(y_true, dtype=torch.float32, device=device).view(-1, 1) 15 | rel_diff = y_tensor - y_tensor.t() 16 | pos_pairs = (rel_diff > 0).type(torch.float32) 17 | neg_pairs = (rel_diff < 0).type(torch.float32) 18 | Sij = pos_pairs - neg_pairs 19 | if gain_type == "exp2": 20 | gain_diff = torch.pow(2.0, y_tensor) - torch.pow(2.0, y_tensor.t()) 21 | elif gain_type == "identity": 22 | gain_diff = y_tensor - y_tensor.t() 23 | else: 24 | raise ValueError("NDCG_gain method not supported yet {}".format(ndcg_gain_in_train)) 25 | 26 | rank_order_tensor = torch.tensor(rank_order, dtype=torch.float32, device=device).view(-1, 1) 27 | decay_diff = 1.0 / torch.log2(rank_order_tensor + 1.0) - 1.0 / torch.log2(rank_order_tensor.t() + 1.0) 28 | 29 | loss = (0.5*sigma*(1 - Sij)*(y_pred - y_pred.t()) + torch.log(pos_pairs_score_diff)) 30 | loss = torch.sum(loss, 1, keepdim=True) 31 | #import ipdb; ipdb.set_trace() 32 | return loss 33 | 34 | 35 | def grad_calc_(y_true, y_pred, gain_type, sigma, N, device): 36 | 37 | # compute the rank order of each document 38 | rank_df = pd.DataFrame({"y": y_true, "doc": np.arange(y_true.shape[0])}) 39 | rank_df = rank_df.sort_values("y").reset_index(drop=True) 40 | rank_order = rank_df.sort_values("doc").index.values + 1 41 | 42 | pos_pairs_score_diff = 1.0/(1.0 + torch.exp(sigma * (y_pred - y_pred.t()))) 43 | 44 | y_tensor = torch.tensor(y_true, dtype=torch.float32, device=device).view(-1, 1) 45 | rel_diff = y_tensor - y_tensor.t() 46 | pos_pairs = (rel_diff > 0).type(torch.float32) 47 | neg_pairs = (rel_diff < 0).type(torch.float32) 48 | Sij = pos_pairs - neg_pairs 49 | if gain_type == "exp2": 50 | gain_diff = torch.pow(2.0, y_tensor) - torch.pow(2.0, y_tensor.t()) 51 | elif gain_type == "identity": 52 | gain_diff = y_tensor - y_tensor.t() 53 | else: 54 | raise ValueError("NDCG_gain method not supported yet {}".format(ndcg_gain_in_train)) 55 | 56 | rank_order_tensor = torch.tensor(rank_order, dtype=torch.float32, device=device).view(-1, 1) 57 | decay_diff = 1.0 / torch.log2(rank_order_tensor + 1.0) - 1.0 / torch.log2(rank_order_tensor.t() + 1.0) 58 | 59 | grad_ord1 = sigma * (0.5 * (1 - Sij) - pos_pairs_score_diff) 60 | grad_ord2 = sigma*sigma*pos_pairs_score_diff*(1-pos_pairs_score_diff) 61 | 62 | #import ipdb; ipdb.set_trace() 63 | 64 | grad_ord1 = torch.sum(grad_ord1, 1, keepdim=True) 65 | grad_ord2 = torch.sum(grad_ord2, 1, keepdim=True) 66 | 67 | 68 | #print(grad_ord1.shape, y_pred.shape) 69 | assert grad_ord1.shape == y_pred.shape 70 | check_grad = torch.sum(grad_ord1, (0, 1)).item() 71 | check_grad2 = torch.sum(grad_ord2, (0, 1)).item() 72 | 73 | if check_grad == float('inf') or np.isnan(check_grad) or check_grad2 == float('inf') or np.isnan(check_grad2): 74 | import ipdb; ipdb.set_trace() 75 | 76 | return grad_ord1, grad_ord2 77 | 78 | 79 | def grad_calc_v2(y_true, y_pred, gain_type, sigma, N, device): 80 | # Normalize the gradients with NDCG delta adopted from Microsoft paper 81 | 82 | 83 | # Only pairs with positive rel values 84 | # compute the rank order of each document 85 | rank_df = pd.DataFrame({"y": y_true, "doc": np.arange(y_true.shape[0])}) 86 | rank_df = rank_df.sort_values("y").reset_index(drop=True) 87 | rank_order = rank_df.sort_values("doc").index.values + 1 88 | 89 | pos_pairs_score_diff = 1.0/(1.0 + torch.exp(sigma * (y_pred - y_pred.t()))) 90 | y_tensor = torch.tensor(y_true, dtype=torch.float32, device=device).view(-1, 1) 91 | 92 | if gain_type == "exp2": 93 | gain_diff = torch.pow(2.0, y_tensor) - torch.pow(2.0, y_tensor.t()) 94 | elif gain_type == "identity": 95 | gain_diff = y_tensor - y_tensor.t() 96 | else: 97 | raise ValueError("NDCG_gain method not supported yet {}".format(ndcg_gain_in_train)) 98 | 99 | rank_order_tensor = torch.tensor(rank_order, dtype=torch.float32, device=device).view(-1, 1) 100 | decay_diff = 1.0 / torch.log2(rank_order_tensor + 1.0) - 1.0 / torch.log2(rank_order_tensor.t() + 1.0) 101 | 102 | delta_ndcg = torch.abs(N * gain_diff * decay_diff) 103 | 104 | grad_ord1 = sigma * (-pos_pairs_score_diff * delta_ndcg) 105 | grad_ord1 = torch.sum(grad_ord1, 1, keepdim=True) 106 | 107 | grad_ord2 = (sigma*sigma)*pos_pairs_score_diff*(1-pos_pairs_score_diff)*delta_ndcg 108 | grad_ord2 = torch.sum(grad_ord2, 1, keepdim=True) 109 | 110 | 111 | #print(grad_ord1.shape, y_pred.shape) 112 | assert grad_ord1.shape == y_pred.shape 113 | check_grad = torch.sum(grad_ord1, (0, 1)).item() 114 | if check_grad == float('inf') or np.isnan(check_grad): 115 | import ipdb; ipdb.set_trace() 116 | 117 | return grad_ord1, grad_ord2 118 | 119 | def loss_calc_v2(y_true, y_pred, gain_type, sigma, N, device): 120 | # Normalize the loss with NDCG delta adopted from Microsoft paper 121 | 122 | rank_df = pd.DataFrame({"y": y_true, "doc": np.arange(y_true.shape[0])}) 123 | rank_df = rank_df.sort_values("y").reset_index(drop=True) 124 | rank_order = rank_df.sort_values("doc").index.values + 1 125 | 126 | 127 | pos_pairs_score_diff = 1.0 + torch.exp(-sigma * (y_pred - y_pred.t())) 128 | y_tensor = torch.tensor(y_true, dtype=torch.float32, device=device).view(-1, 1) 129 | 130 | if gain_type == "exp2": 131 | gain_diff = torch.pow(2.0, y_tensor) - torch.pow(2.0, y_tensor.t()) 132 | elif gain_type == "identity": 133 | gain_diff = y_tensor - y_tensor.t() 134 | else: 135 | raise ValueError("NDCG_gain method not supported yet {}".format(ndcg_gain_in_train)) 136 | 137 | rank_order_tensor = torch.tensor(rank_order, dtype=torch.float32, device=device).view(-1, 1) 138 | decay_diff = 1.0 / torch.log2(rank_order_tensor + 1.0) - 1.0 / torch.log2(rank_order_tensor.t() + 1.0) 139 | 140 | delta_ndcg = torch.abs(N * gain_diff * decay_diff) 141 | 142 | loss = torch.log(pos_pairs_score_diff) * delta_ndcg 143 | loss = torch.sum(loss, 1, keepdim=True) 144 | 145 | return loss -------------------------------------------------------------------------------- /L2R/Misc/metrics.py: -------------------------------------------------------------------------------- 1 | """ 2 | Metrics: 3 | 4 | NDCG: 5 | https://en.wikipedia.org/wiki/Discounted_cumulative_gain 6 | https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf 7 | """ 8 | 9 | import numpy as np 10 | 11 | 12 | class DCG(object): 13 | 14 | def __init__(self, k=10, gain_type='exp2'): 15 | """ 16 | :param k: int DCG@k 17 | :param gain_type: 'exp2' or 'identity' 18 | """ 19 | self.k = k 20 | self.discount = self._make_discount(256) 21 | if gain_type in ['exp2', 'identity']: 22 | self.gain_type = gain_type 23 | else: 24 | raise ValueError('gain type not equal to exp2 or identity') 25 | 26 | def evaluate(self, targets): 27 | """ 28 | :param targets: ranked list with relevance 29 | :return: float 30 | """ 31 | gain = self._get_gain(targets) 32 | discount = self._get_discount(min(self.k, len(gain))) 33 | return np.sum(np.divide(gain, discount)) 34 | 35 | def _get_gain(self, targets): 36 | t = targets[:self.k] 37 | if self.gain_type == 'exp2': 38 | return np.power(2.0, t) - 1.0 39 | else: 40 | return t 41 | 42 | def _get_discount(self, k): 43 | if k > len(self.discount): 44 | self.discount = self._make_discount(2 * len(self.discount)) 45 | return self.discount[:k] 46 | 47 | @staticmethod 48 | def _make_discount(n): 49 | x = np.arange(1, n+1, 1) 50 | discount = np.log2(x + 1) 51 | return discount 52 | 53 | 54 | class NDCG(DCG): 55 | 56 | def __init__(self, k=10, gain_type='exp2'): 57 | """ 58 | :param k: int NDCG@k 59 | :param gain_type: 'exp2' or 'identity' 60 | """ 61 | super(NDCG, self).__init__(k, gain_type) 62 | 63 | def evaluate(self, targets): 64 | """ 65 | :param targets: ranked list with relevance 66 | :return: float 67 | """ 68 | dcg = super(NDCG, self).evaluate(targets) 69 | ideal = np.sort(targets)[::-1] 70 | idcg = super(NDCG, self).evaluate(ideal) 71 | return dcg / idcg 72 | 73 | def maxDCG(self, targets): 74 | """ 75 | :param targets: ranked list with relevance 76 | :return: 77 | """ 78 | ideal = np.sort(targets)[::-1] 79 | return super(NDCG, self).evaluate(ideal) 80 | 81 | 82 | if __name__ == "__main__": 83 | targets = [3, 2, 3, 0, 1, 2, 3, 2] 84 | dcg6 = DCG(6, 'identity') 85 | ndcg6 = NDCG(6, 'identity') 86 | assert 6.861 < dcg6.evaluate(targets) < 6.862 87 | assert 0.785 < ndcg6.evaluate(targets) < 0.786 88 | ndcg10 = NDCG(10) 89 | assert 0 < ndcg10.evaluate(targets) < 1.0 90 | assert 0 < ndcg10.evaluate([1, 2, 3]) < 1.0 -------------------------------------------------------------------------------- /L2R/Readme.md: -------------------------------------------------------------------------------- 1 | - Data loading and creating dataloader are handled in GrowNet/L2R/data/data.py. If you want to try new data please put it into Microsoft data format. 2 | 3 | - Individual model class and ensemble architecture are in GrowNet/L2R/models: mlp.py and dynamic_net.py. 4 | You can increase number of hidden layers or change activation function from here: mlp.py 5 | 6 | - train.sh contains pairwise-loss implementation. If you want to try I-divergence or MSE loss implementations just change the python -u main_l2r_pairwise_cv.py to python -u main_l2r_idiv_cv.py (or main_l2r_mse_cv.py). You can also change the dtaset to yahoo, but when you do, change the feature dimension as well (from 136 to 518). You may want to alter the hidden layer dimension as well, say 128 or 256. -------------------------------------------------------------------------------- /L2R/Utils/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common function used in training Learn to Rank 3 | """ 4 | from argparse import ArgumentParser, ArgumentTypeError 5 | from collections import defaultdict 6 | import os 7 | 8 | import numpy as np 9 | import pandas as pd 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | 14 | from DataLoader.DataLoader import get_time, L2R_DataLoader 15 | from Misc.metrics import NDCG 16 | 17 | 18 | def get_device(): 19 | if torch.cuda.is_available(): 20 | device = "cuda:0" 21 | else: 22 | device = "cpu" 23 | print("Using device ", device) 24 | return device 25 | 26 | 27 | def get_ckptdir(net_name, net_structure, sigma=None): 28 | net_name = '{}-{}'.format(net_name, '-'.join([str(x) for x in net_structure])) 29 | if sigma: 30 | net_name += '-scale-{}'.format(sigma) 31 | ckptdir = os.path.join(os.path.dirname(__file__), 'ckptdir') 32 | if not os.path.exists(ckptdir): 33 | os.makedirs(ckptdir) 34 | ckptfile = os.path.join(ckptdir, net_name) 35 | print("checkpoint dir:", ckptfile) 36 | return ckptfile 37 | 38 | 39 | def save_to_ckpt(ckpt_file, epoch, model, optimizer, lr_scheduler): 40 | ckpt_file = ckpt_file + '_{}'.format(epoch) 41 | print(get_time(), 'save to ckpt {}'.format(ckpt_file)) 42 | torch.save({ 43 | 'epoch': epoch, 44 | 'model_state_dict': model.state_dict(), 45 | 'optimizer_state_dict': optimizer.state_dict(), 46 | 'lr_scheduler': lr_scheduler.state_dict(), 47 | }, ckpt_file) 48 | print(get_time(), 'finish save to ckpt {}'.format(ckpt_file)) 49 | 50 | 51 | def load_train_test_data(d_dir, data_fold, dataname, cv): 52 | """ 53 | :param data_fold: str, which fold's data was going to use to train 54 | :return: 55 | """ 56 | df_val = [] 57 | val_loader = None 58 | 59 | train_file, test_file = dataname + ".train.txt", dataname + ".test.txt" 60 | 61 | if data_fold: 62 | data_dir = os.path.join(d_dir, 'MSLR-WEB10K/') 63 | train_data = os.path.join(data_dir, data_fold, train_file) 64 | train_loader = L2R_DataLoader(train_data) 65 | df_train = train_loader.load() 66 | 67 | test_data = os.path.join(data_dir, data_fold, test_file) 68 | test_loader = L2R_DataLoader(test_data) 69 | df_test = test_loader.load() 70 | 71 | if cv: 72 | val_file = dataname + ".vali.txt" 73 | val_data = os.path.join(data_dir, data_fold, val_file) 74 | val_loader = L2R_DataLoader(val_data) 75 | df_val = val_loader.load() 76 | 77 | else: 78 | data_dir = d_dir + 'Yahoo/' 79 | train_data = os.path.join(os.path.dirname(__file__), data_dir, train_file) 80 | train_loader = L2R_DataLoader(train_data) 81 | df_train = train_loader.load() 82 | 83 | test_data = os.path.join(os.path.dirname(__file__), data_dir, test_file) 84 | test_loader = L2R_DataLoader(test_data) 85 | df_test = test_loader.load() 86 | 87 | if cv: 88 | val_file = dataname + ".vali.txt" 89 | val_data = os.path.join(os.path.dirname(__file__), data_dir, val_file) 90 | val_loader = L2R_DataLoader(val_data) 91 | df_val = val_loader.load() 92 | 93 | return train_loader, df_train, test_loader, df_test, val_loader, df_val 94 | 95 | 96 | def init_weights(m): 97 | if type(m) == nn.Linear: 98 | nn.init.xavier_uniform_(m.weight) 99 | m.bias.data.fill_(0.01) 100 | 101 | 102 | def eval_cross_entropy_loss(model, device, loader, phase="Eval", sigma=1.0): 103 | """ 104 | formula in https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf 105 | 106 | C = 0.5 * (1 - S_ij) * sigma * (si - sj) + log(1 + exp(-sigma * (si - sj))) 107 | when S_ij = 1: C = log(1 + exp(-sigma(si - sj))) 108 | when S_ij = -1: C = log(1 + exp(-sigma(sj - si))) 109 | sigma can change the shape of the curve 110 | """ 111 | print(get_time(), "{} Phase evaluate pairwise cross entropy loss".format(phase)) 112 | model.eval() 113 | with torch.set_grad_enabled(False): 114 | total_cost = 0 115 | total_pairs = loader.get_num_pairs() 116 | pairs_in_compute = 0 117 | for X, Y in loader.generate_batch_per_query(loader.df): 118 | Y = Y.reshape(-1, 1) 119 | rel_diff = Y - Y.T 120 | pos_pairs = (rel_diff > 0).astype(np.float32) 121 | num_pos_pairs = np.sum(pos_pairs, (0, 1)) 122 | # skip negative sessions, no relevant info: 123 | if num_pos_pairs == 0: 124 | continue 125 | neg_pairs = (rel_diff < 0).astype(np.float32) 126 | num_pairs = 2 * num_pos_pairs # num pos pairs and neg pairs are always the same 127 | pos_pairs = torch.tensor(pos_pairs, device=device) 128 | neg_pairs = torch.tensor(neg_pairs, device=device) 129 | Sij = pos_pairs - neg_pairs 130 | # only calculate the different pairs 131 | diff_pairs = pos_pairs + neg_pairs 132 | pairs_in_compute += num_pairs 133 | 134 | X_tensor = torch.Tensor(X).to(device) 135 | y_pred = model(X_tensor) 136 | y_pred_diff = y_pred - y_pred.t() 137 | 138 | # logsigmoid(x) = log(1 / (1 + exp(-x))) equivalent to log(1 + exp(-x)) 139 | C = 0.5 * (1 - Sij) * sigma * y_pred_diff - F.logsigmoid(-sigma * y_pred_diff) 140 | C = C * diff_pairs 141 | cost = torch.sum(C, (0, 1)) 142 | if cost.item() == float('inf') or np.isnan(cost.item()): 143 | import ipdb; ipdb.set_trace() 144 | total_cost += cost 145 | 146 | assert total_pairs == pairs_in_compute 147 | avg_cost = total_cost / total_pairs 148 | print( 149 | get_time(), 150 | "{} Phase pairwise corss entropy loss {:.6f}, total_paris {}".format( 151 | phase, avg_cost.item(), total_pairs 152 | )) 153 | 154 | 155 | def eval_ndcg_at_k(inference_model, device, df_valid, valid_loader, batch_size, k_list, gain_type, phase="Eval"): 156 | # print("Eval Phase evaluate NDCG @ {}".format(k_list)) 157 | ndcg_metrics = {k: NDCG(k, gain_type) for k in k_list} 158 | qids, rels, scores = [], [], [] 159 | inference_model.to_eval() # Set the models in ensemble net to eval mode 160 | with torch.no_grad(): 161 | for qid, rel, x in valid_loader.generate_query_batch(df_valid, batch_size): 162 | if x is None or x.shape[0] == 0: 163 | continue 164 | _, y_tensor = inference_model.forward(torch.Tensor(x).to(device)) 165 | scores.append(y_tensor.cpu().numpy().squeeze()) 166 | qids.append(qid) 167 | rels.append(rel) 168 | 169 | qids = np.hstack(qids) 170 | rels = np.hstack(rels) 171 | scores = np.hstack(scores) 172 | result_df = pd.DataFrame({'qid': qids, 'rel': rels, 'score': scores}) 173 | session_ndcgs = defaultdict(list) 174 | for qid in result_df.qid.unique(): 175 | result_qid = result_df[result_df.qid == qid].sort_values('score', ascending=False) 176 | rel_rank = result_qid.rel.values 177 | for k, ndcg in ndcg_metrics.items(): 178 | if ndcg.maxDCG(rel_rank) == 0: 179 | continue 180 | ndcg_k = ndcg.evaluate(rel_rank) 181 | if not np.isnan(ndcg_k): 182 | session_ndcgs[k].append(ndcg_k) 183 | 184 | ndcg_result = {k: np.mean(session_ndcgs[k]) for k in k_list} 185 | ndcg_result_print = ", ".join(["NDCG@{}: {:.5f}".format(k, ndcg_result[k]) for k in k_list]) 186 | print(get_time(), "{} Phase evaluate {}".format(phase, ndcg_result_print)) 187 | return ndcg_result 188 | 189 | def eval_spearman_kendall(inference_model, device, df_test, test_loader, test_group): 190 | 191 | # Switch the model into eval mode 192 | inference_model.to_eval() 193 | batch_size = df_test.shape[0] 194 | with torch.no_grad(): 195 | for qid, rel, x in test_loader.generate_query_batch(df_test, batch_size): 196 | _, y_tensor = inference_model.forward(torch.Tensor(x).to(device)) 197 | preds = y_tensor.cpu().numpy().tolist() 198 | labels = rel.tolist() 199 | 200 | mean_kt, weighted_mean_kt = mean_kendall_tau(labels, preds, test_group) 201 | mean_sr, weighted_mean_sr = mean_spearman_rho(labels, preds, test_group) 202 | 203 | return mean_sr, weighted_mean_sr, mean_kt, weighted_mean_kt 204 | 205 | 206 | def str2bool(v): 207 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 208 | return True 209 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 210 | return False 211 | else: 212 | raise ArgumentTypeError('Boolean value expected.') 213 | 214 | def check_for_single_queries(q, y): 215 | idx = [] 216 | uq = np.unique(q) 217 | for i in range(len(uq)): 218 | idd = np.where(q==uq[i])[0] 219 | if len(idd) > 1 and len(np.unique(y[idd]))>1: 220 | idx.append(idd) 221 | idx = np.concatenate(idx).ravel().tolist() 222 | return idx 223 | 224 | 225 | def get_args_parser(): 226 | """Common Args needed for different Learn to Rank training method. 227 | :rtype: ArgumentParser 228 | """ 229 | parser = ArgumentParser(description="additional training specification") 230 | parser.add_argument("--start_epoch", dest="start_epoch", type=int, default=0) 231 | parser.add_argument("--additional_epoch", dest="additional_epoch", type=int, default=100) 232 | parser.add_argument("--lr", dest="lr", type=float, default=0.0001) 233 | parser.add_argument("--optim", dest="optim", type=str, default="adam", choices=["adam", "sgd"]) 234 | parser.add_argument("--leaky_relu", dest="leaky_relu", type=str2bool, nargs="?", const=True, default=False) 235 | parser.add_argument( 236 | "--ndcg_gain_in_train", dest="ndcg_gain_in_train", 237 | type=str, default="exp2", choices=["exp2","identity"] 238 | ) 239 | parser.add_argument("--small_dataset", type=str2bool, nargs='?', const=True, default=False) 240 | parser.add_argument("--debug", type=str2bool, nargs='?', const=True, default=False) 241 | parser.add_argument("--double_precision", type=str2bool, nargs="?", const=True, default=False) 242 | parser.add_argument("--standardize", type=str2bool, nargs="?", const=True, default=False) 243 | return parser 244 | -------------------------------------------------------------------------------- /L2R/main_l2r_idiv_cv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import numpy as np 3 | import pandas as pd 4 | import argparse 5 | import torch 6 | import torch.nn as nn 7 | from models.mlp import MLP_1HL, MLP_2HL, MLP_3HL 8 | from models.dynamic_net import DynamicNet, ForwardType 9 | from torch.optim import SGD, Adam 10 | from DataLoader.DataLoader import L2R_DataLoader 11 | from Utils.utils import load_train_test_data, init_weights, get_device, eval_ndcg_at_k, check_for_single_queries 12 | import time 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--model_version', type=str, required=True) 16 | parser.add_argument('--model_order',default='second', type=str) 17 | parser.add_argument('--feat_d', type=int, required=True) 18 | parser.add_argument('--hidden_d', type=int, required=True) 19 | parser.add_argument('--boost_rate', type=float, required=True) 20 | parser.add_argument('--lr', type=float, required=True) 21 | parser.add_argument('--num_nets', type=int, required=True) 22 | parser.add_argument('--data', type=str, required=True) 23 | parser.add_argument('--data_dir', type=str, required=True) 24 | parser.add_argument('--batch_size', type=int, required=True) 25 | parser.add_argument('--epochs_per_stage', type=int, required=True) 26 | parser.add_argument('--correct_epoch', type=int ,required=True) 27 | parser.add_argument('--L2', type=float, required=True) 28 | parser.add_argument('--sigma', type=float, required=True) 29 | parser.add_argument('--normalization', default=False, type=lambda x: (str(x).lower() == 'true')) 30 | parser.add_argument('--cv', default=False, type=lambda x: (str(x).lower() == 'true')) 31 | parser.add_argument('--sparse', action='store_true') 32 | parser.add_argument('--cuda', action='store_true') 33 | 34 | opt = parser.parse_args() 35 | 36 | if not opt.cuda: 37 | torch.set_num_threads(16) 38 | 39 | # prepare the dataset 40 | def get_data(): 41 | if opt.data == 'yahoo': 42 | data_fold = None 43 | train_loader, df_train, test_loader, df_test, val_loader, df_val = load_train_test_data(opt.data_dir, data_fold, opt.data, opt.cv) 44 | elif opt.data == 'microsoft': 45 | data_fold = 'Fold1' 46 | train_loader, df_train, test_loader, df_test, val_loader, df_val = load_train_test_data(opt.data_dir, data_fold, opt.data, opt.cv) 47 | else: 48 | pass 49 | 50 | if opt.normalization: 51 | print(opt.normalization) 52 | df_train, scaler = train_loader.train_scaler_and_transform() 53 | df_test = test_loader.apply_scaler(scaler) 54 | if opt.cv: 55 | df_val = val_loader.apply_scaler(scaler) 56 | 57 | print(f'#Train: {len(df_train)}, #Val: {len(df_val)} #Test: {len(df_test)}') 58 | 59 | return train_loader, df_train, test_loader, df_test, val_loader, df_val 60 | 61 | 62 | def get_optim(params, lr, weight_decay): 63 | #optimizer = SGD(params, lr, momentum=0.9, weight_decay=weight_decay) 64 | optimizer = Adam(params, lr, weight_decay=weight_decay) 65 | return optimizer 66 | 67 | 68 | def init_gbnn(df_train): 69 | avg = (2**df_train['rel'] - 1)/16 70 | 71 | return avg.mean() 72 | 73 | 74 | if __name__ == "__main__": 75 | # prepare datasets 76 | device = get_device() 77 | #device_id = 1 78 | #device = 'cuda:' + str(device_id) 79 | print('Loading data...') 80 | train_loader, df_train, test_loader, df_test, val_loader, df_val = get_data() 81 | 82 | print(f'Start training with model version {opt.model_version} on {opt.data} dataset...') 83 | c0 = init_gbnn(df_train) 84 | net_ensemble = DynamicNet(c0, opt.boost_rate) 85 | loss_f = nn.MSELoss(reduction='none') 86 | all_scores = [] 87 | all_ensm_losses = [] 88 | all_mdl_losses = [] 89 | # NDCG parameters 90 | K = 10 91 | gain_type = 'identity' 92 | 93 | ### Validation parameters ### 94 | best_ndcg = 0 95 | val_ndcg = best_ndcg 96 | best_stage = opt.num_nets-1 97 | 98 | for stage in range(opt.num_nets): 99 | t0 = time.time() 100 | model = MLP_2HL.get_model(stage, opt) 101 | model.apply(init_weights) # Applying uniform xavier initialization for Linear layers 102 | if opt.cuda: 103 | model.cuda() 104 | optimizer = get_optim(model.parameters(), opt.lr, opt.L2) 105 | net_ensemble.to_train() # Set the models in ensemble net to train mode 106 | stage_resid = [] 107 | stage_mdlloss = [] 108 | for epoch in range(opt.epochs_per_stage): 109 | for q, y, x in train_loader.generate_query_batch(df_train, opt.batch_size): 110 | 111 | if opt.cuda: 112 | x = torch.tensor(x, dtype=torch.float32, device=device) 113 | y = torch.tensor(y+1, dtype=torch.float32, device=device).view(-1, 1) 114 | # Feeding input into ensemble Net 115 | middle_feat, out = net_ensemble.forward(x) 116 | out = torch.as_tensor(out, dtype=torch.float32, device=device).view(-1, 1) 117 | 118 | # First proccess output through custom activation 119 | out = torch.exp(out) # Exponential 120 | grad_ord1 = -(y-out) 121 | grad_ord2 = out 122 | if opt.model_order=='second': 123 | resid = -grad_ord1/grad_ord2 124 | else: 125 | resid = -grad_ord1 126 | 127 | stage_resid.append(resid.sum().item()) 128 | _, out = model(x, middle_feat) 129 | out = torch.as_tensor(out, dtype=torch.float32, device=device).view(-1, 1) 130 | 131 | loss = loss_f(net_ensemble.boost_rate*out, resid) 132 | loss = grad_ord2*loss 133 | loss = loss.mean() 134 | stage_mdlloss.append(loss.item()) 135 | model.zero_grad() 136 | loss.backward() 137 | optimizer.step() 138 | 139 | net_ensemble.add(model) 140 | sr = np.mean(stage_resid) 141 | sml = np.mean(stage_mdlloss) 142 | #print(f'Stage - {stage} resid: {sr}, and model loss: {sml}') 143 | 144 | # fully-corrective step 145 | stage_loss = [] 146 | lr_scaler = 2 147 | if stage > 0: 148 | # Adjusting corrective step learning rate 149 | if stage % 15 == 0: 150 | lr_scaler *= 2 151 | opt.L2 /= 2 152 | 153 | optimizer = get_optim(net_ensemble.parameters(), opt.lr / lr_scaler, opt.L2) 154 | #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.75) 155 | for _ in range(opt.correct_epoch): 156 | for q, y, x in train_loader.generate_query_batch(df_train, opt.batch_size): 157 | 158 | if opt.cuda: 159 | x = torch.tensor(x, dtype=torch.float32, device=device) 160 | y = torch.tensor(y+1, dtype=torch.float32, device=device).view(-1, 1) 161 | 162 | _, out = net_ensemble.forward_grad(x) 163 | out = torch.as_tensor(out, dtype=torch.float32, device=device).view(-1, 1) 164 | out = torch.exp(out) # exponential 165 | #import ipdb; ipdb.set_trace() 166 | 167 | loss = torch.mean(y*torch.log(y/out) - (y-out)) 168 | optimizer.zero_grad() 169 | loss.backward() 170 | #scheduler.step() 171 | optimizer.step() 172 | stage_loss.append(loss.item()) 173 | sl = 0 174 | if stage_loss != []: 175 | sl = np.mean(stage_loss) 176 | 177 | all_ensm_losses.append(sl) 178 | all_mdl_losses.append(sml) 179 | print(f'Stage - {stage}, Boost rate: {net_ensemble.boost_rate} Loss: {sl}') 180 | elapsed_tr = time.time()-t0 181 | 182 | ndcg_result = eval_ndcg_at_k(net_ensemble, device, df_test, test_loader, 100000, [5, 10], gain_type) 183 | 184 | if opt.cv: 185 | val_result = eval_ndcg_at_k(net_ensemble, device, df_val, val_loader, 100000, [5, 10], gain_type, "Validation") 186 | if val_result[5] > best_ndcg: 187 | best_ndcg = val_result[5] 188 | best_stage = stage 189 | 190 | all_scores.append([ndcg_result[5], ndcg_result[10]]) 191 | elapsed_te = time.time()-t0 - elapsed_tr 192 | print(f'Stage: {stage} Training time: {elapsed_tr: .1f} sec and Test time: {elapsed_te: .1f} sec \n') 193 | 194 | te_ndcg_5, te_ndcg_10 = all_scores[best_stage][0], all_scores[best_stage][1] 195 | print(f'Best validation stage: {best_stage} final Test NDCG@5: {te_ndcg_5:.5f}, NDCG@10: {te_ndcg_10:.5f}') 196 | 197 | fname = './results/' + opt.data + '_NDCG_Idivergenceloss' 198 | np.savez(fname, all_scores, all_ensm_losses, all_mdl_losses) 199 | np.savez('./results/' + opt.data + '_GID_parameters', opt) 200 | 201 | -------------------------------------------------------------------------------- /L2R/main_l2r_mse_cv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import numpy as np 3 | import pandas as pd 4 | import argparse 5 | import torch 6 | import torch.nn as nn 7 | from models.mlp import MLP_1HL, MLP_2HL, MLP_3HL 8 | from models.dynamic_net import DynamicNet, ForwardType 9 | from torch.optim import SGD, Adam 10 | from DataLoader.DataLoader import L2R_DataLoader 11 | from Utils.utils import load_train_test_data, init_weights, get_device, eval_ndcg_at_k 12 | from Misc.Calculations import grad_calc_, loss_calc_, grad_calc_v2, loss_calc_v2 13 | import time 14 | 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--model_version', type=str, required=True) 17 | parser.add_argument('--model_order',default='second', type=str) 18 | parser.add_argument('--feat_d', type=int, required=True) 19 | parser.add_argument('--hidden_d', type=int, required=True) 20 | parser.add_argument('--boost_rate', type=float, required=True) 21 | parser.add_argument('--lr', type=float, required=True) 22 | parser.add_argument('--num_nets', type=int, required=True) 23 | parser.add_argument('--data', type=str, required=True) 24 | parser.add_argument('--data_dir', type=str, required=True) 25 | parser.add_argument('--batch_size', type=int, required=True) 26 | parser.add_argument('--epochs_per_stage', type=int, required=True) 27 | parser.add_argument('--correct_epoch', type=int ,required=True) 28 | parser.add_argument('--L2', type=float, required=True) 29 | parser.add_argument('--sigma', type=float, required=True) 30 | parser.add_argument('--normalization', default=False, type=lambda x: (str(x).lower() == 'true')) 31 | parser.add_argument('--cv', default=False, type=lambda x: (str(x).lower() == 'true')) 32 | parser.add_argument('--sparse', action='store_true') 33 | parser.add_argument('--cuda', action='store_true') 34 | 35 | opt = parser.parse_args() 36 | 37 | if not opt.cuda: 38 | torch.set_num_threads(16) 39 | 40 | # prepare the dataset 41 | def get_data(): 42 | if opt.data == 'yahoo': 43 | data_fold = None 44 | train_loader, df_train, test_loader, df_test, val_loader, df_val = load_train_test_data(opt.data_dir, data_fold, opt.data, opt.cv) 45 | elif opt.data == 'microsoft': 46 | data_fold = 'Fold1' 47 | train_loader, df_train, test_loader, df_test, val_loader, df_val = load_train_test_data(opt.data_dir, data_fold, opt.data, opt.cv) 48 | else: 49 | pass 50 | 51 | if opt.normalization: 52 | print(opt.normalization) 53 | df_train, scaler = train_loader.train_scaler_and_transform() 54 | df_test = test_loader.apply_scaler(scaler) 55 | if opt.cv: 56 | df_val = val_loader.apply_scaler(scaler) 57 | 58 | print(f'#Train: {len(df_train.index)}, #Val: {len(df_val)} #Test: {len(df_test.index)}') 59 | 60 | return train_loader, df_train, test_loader, df_test, val_loader, df_val 61 | 62 | 63 | def get_optim(params, lr, weight_decay): 64 | #optimizer = SGD(params, lr, momentum=0.9, weight_decay=weight_decay) 65 | optimizer = Adam(params, lr, weight_decay=weight_decay) 66 | return optimizer 67 | 68 | def init_gbnn(df_train): 69 | avg = (2**df_train['rel'] - 1)/16 70 | 71 | return avg.mean() 72 | if __name__ == "__main__": 73 | # prepare datasets 74 | device = get_device() 75 | print('Loading data...') 76 | train_loader, df_train, test_loader, df_test, val_loader, df_val = get_data() 77 | print(f'Start training with {opt.data} dataset...') 78 | c0 = init_gbnn(df_train) 79 | net_ensemble = DynamicNet(c0, opt.boost_rate) 80 | loss_f = nn.MSELoss() 81 | all_scores = [] 82 | all_ensm_losses = [] 83 | all_mdl_losses = [] 84 | # NDCG parameters 85 | K = 10 86 | gain_type = 'identity' 87 | 88 | ### Validation parameters ### 89 | best_ndcg = 0 90 | val_ndcg = best_ndcg 91 | best_stage = opt.num_nets-1 92 | 93 | 94 | for stage in range(opt.num_nets): 95 | t0 = time.time() 96 | model = MLP_2HL.get_model(stage, opt) 97 | model.apply(init_weights) # Applying uniform xavier initialization for Linear layers 98 | if opt.cuda: 99 | model.cuda() 100 | optimizer = get_optim(model.parameters(), opt.lr, opt.L2) 101 | net_ensemble.to_train() # Set the models in ensemble net to train mode 102 | stage_resid = [] 103 | stage_mdlloss = [] 104 | for epoch in range(opt.epochs_per_stage): 105 | for q, y, x in train_loader.generate_query_batch(df_train, opt.batch_size): 106 | 107 | if opt.cuda: 108 | x = torch.tensor(x, dtype=torch.float32, device=device) 109 | y = torch.tensor(y, dtype=torch.float32, device=device).view(-1, 1) 110 | # Feeding input into ensemble Net 111 | middle_feat, out = net_ensemble.forward(x) 112 | out = torch.as_tensor(out, dtype=torch.float32, device=device).view(-1, 1) 113 | resid = y - out # Negative of gradient direction: -grad/grad2 114 | stage_resid.append(resid.sum().detach().cpu().numpy()) 115 | _, out = model(x, middle_feat) 116 | out = torch.as_tensor(out, dtype=torch.float32, device=device).view(-1, 1) 117 | loss = loss_f(net_ensemble.boost_rate*out, resid) 118 | 119 | stage_mdlloss.append(loss.item()) 120 | model.zero_grad() 121 | loss.backward() 122 | optimizer.step() 123 | 124 | net_ensemble.add(model) 125 | sr = np.mean(stage_resid) 126 | sml = np.mean(stage_mdlloss) 127 | #print(f'Stage - {stage} resid: {sr}, and model loss: {sml}') 128 | 129 | # fully-corrective step 130 | stage_loss = [] 131 | lr_scaler = 3 132 | if stage > 2: 133 | # Adjusting corrective step learning rate 134 | if stage % 15 == 0: 135 | #lr_scaler *= 2 136 | opt.lr /= 2 137 | opt.L2 /= 2 138 | 139 | optimizer = get_optim(net_ensemble.parameters(), opt.lr / lr_scaler, opt.L2) 140 | #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.75) 141 | for _ in range(opt.correct_epoch): 142 | for q, y, x in train_loader.generate_query_batch(df_train, opt.batch_size): 143 | 144 | if opt.cuda: 145 | x = torch.tensor(x, dtype=torch.float32, device=device) 146 | y = torch.tensor(y, dtype=torch.float32, device=device).view(-1, 1) 147 | 148 | _, out = net_ensemble.forward_grad(x) 149 | out = torch.as_tensor(out, dtype=torch.float32, device=device).view(-1, 1) 150 | loss = loss_f(out, y) 151 | #net_ensemble.zero_grad() 152 | optimizer.zero_grad() 153 | loss.backward() 154 | #scheduler.step() 155 | optimizer.step() 156 | stage_loss.append(loss.item()) 157 | sl = 0 158 | if stage_loss != []: 159 | sl = np.mean(stage_loss) 160 | 161 | all_ensm_losses.append(sl) 162 | all_mdl_losses.append(sml) 163 | print(f'Stage - {stage}, Boost rate: {net_ensemble.boost_rate} Loss: {sl}') 164 | 165 | elapsed_tr = time.time()-t0 166 | 167 | ndcg_result = eval_ndcg_at_k(net_ensemble, device, df_test, test_loader, 100000, [5, 10], gain_type) 168 | if opt.cv: 169 | val_result = eval_ndcg_at_k(net_ensemble, device, df_val, val_loader, 100000, [5, 10], gain_type, "Validation") 170 | if val_result[5] > best_ndcg: 171 | best_ndcg = val_result[5] 172 | best_stage = stage 173 | 174 | all_scores.append([ndcg_result[5], ndcg_result[10]]) 175 | elapsed_te = time.time()-t0 - elapsed_tr 176 | print(f'Stage: {stage} Training time: {elapsed_tr: .1f} sec and Test time: {elapsed_te: .1f} sec \n') 177 | 178 | ### Test results from CV ### 179 | te_ndcg_5, te_ndcg_10 = all_scores[best_stage][0], all_scores[best_stage][1] 180 | print(f'Best validation stage: {best_stage} final Test NDCG@5: {te_ndcg_5:.5f}, NDCG@10: {te_ndcg_10:.5f}') 181 | fname = opt.data + '_NDCG_MSEloss' 182 | np.savez(fname, all_scores, all_ensm_losses, all_mdl_losses) 183 | np.savez('./results/' + opt.data + '_MSE_parameters', opt) 184 | -------------------------------------------------------------------------------- /L2R/main_l2r_pairwise_cv.py: -------------------------------------------------------------------------------- 1 | ##!/usr/bin/env python 2 | import numpy as np 3 | import pandas as pd 4 | import argparse 5 | import torch 6 | import torch.nn as nn 7 | from models.mlp import MLP_1HL, MLP_2HL, MLP_3HL 8 | from models.dynamic_net import DynamicNet, ForwardType 9 | from torch.optim import SGD, Adam 10 | from DataLoader.DataLoader import L2R_DataLoader 11 | from Utils.utils import load_train_test_data, init_weights, get_device, eval_ndcg_at_k, check_for_single_queries 12 | from Misc.Calculations import grad_calc_, loss_calc_, grad_calc_v2, loss_calc_v2 13 | from Misc.metrics import NDCG, DCG 14 | import time 15 | 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument('--model_version', type=str, required=True) 18 | parser.add_argument('--model_order',default='second', type=str) 19 | parser.add_argument('--feat_d', type=int, required=True) 20 | parser.add_argument('--hidden_d', type=int, required=True) 21 | parser.add_argument('--boost_rate', type=float, required=True) 22 | parser.add_argument('--lr', type=float, required=True) 23 | parser.add_argument('--num_nets', type=int, required=True) 24 | parser.add_argument('--data', type=str, required=True) 25 | parser.add_argument('--data_dir', type=str, required=True) 26 | parser.add_argument('--batch_size', type=int, required=True) 27 | parser.add_argument('--epochs_per_stage', type=int, required=True) 28 | parser.add_argument('--correct_epoch', type=int ,required=True) 29 | parser.add_argument('--L2', type=float, required=True) 30 | parser.add_argument('--sigma', type=float, required=True) 31 | parser.add_argument('--normalization', default=False, type=lambda x: (str(x).lower() == 'true')) 32 | parser.add_argument('--cv', default=False, type=lambda x: (str(x).lower() == 'true')) 33 | parser.add_argument('--sparse', action='store_true') 34 | parser.add_argument('--cuda', action='store_true') 35 | 36 | opt = parser.parse_args() 37 | 38 | if not opt.cuda: 39 | torch.set_num_threads(16) 40 | 41 | # prepare the dataset 42 | def get_data(): 43 | if opt.data == 'yahoo': 44 | data_fold = None 45 | train_loader, df_train, test_loader, df_test, val_loader, df_val = load_train_test_data(opt.data_dir, data_fold, opt.data, opt.cv) 46 | elif opt.data == 'microsoft': 47 | data_fold = 'Fold1' 48 | train_loader, df_train, test_loader, df_test, val_loader, df_val = load_train_test_data(opt.data_dir, data_fold, opt.data, opt.cv) 49 | else: 50 | pass 51 | 52 | if opt.normalization: 53 | print(opt.normalization) 54 | df_train, scaler = train_loader.train_scaler_and_transform() 55 | df_test = test_loader.apply_scaler(scaler) 56 | if opt.cv: 57 | df_val = val_loader.apply_scaler(scaler) 58 | 59 | print(f'#Train: {len(df_train.index)}, #Val: {len(df_val)} #Test: {len(df_test.index)}') 60 | 61 | return train_loader, df_train, test_loader, df_test, val_loader, df_val 62 | 63 | 64 | def get_optim(params, lr, weight_decay): 65 | #optimizer = SGD(params, lr, momentum=0.9, weight_decay=weight_decay) 66 | optimizer = Adam(params, lr, weight_decay=weight_decay) 67 | return optimizer 68 | 69 | 70 | def init_gbnn(df_train): 71 | avg = (2**df_train['rel'] - 1)/16 72 | 73 | return avg.mean() 74 | 75 | if __name__ == "__main__": 76 | # prepare datasets 77 | device = get_device() 78 | print('Loading data...') 79 | train_loader, df_train, test_loader, df_test, val_loader, df_val = get_data() 80 | 81 | print(f'Start training with model version {opt.model_version} on {opt.data} dataset...') 82 | c0 = init_gbnn(df_train) 83 | net_ensemble = DynamicNet(c0, opt.boost_rate) 84 | loss_f = nn.MSELoss(reduction='none') 85 | all_scores = [] 86 | all_ensm_losses = [] 87 | all_mdl_losses = [] 88 | dynamic_br = [] 89 | execution_time = [] 90 | 91 | ### Validation parameters ### 92 | best_ndcg = 0 93 | val_ndcg = best_ndcg 94 | best_stage = opt.num_nets-1 95 | 96 | # NDCG parameters 97 | K = 10 98 | gain_type = 'identity' 99 | ideal_dcg = NDCG(2**(K-1), gain_type) 100 | 101 | for stage in range(opt.num_nets): 102 | t0 = time.time() 103 | model = MLP_2HL.get_model(stage, opt) 104 | model.apply(init_weights) # Applying uniform xavier initialization for Linear layers 105 | if opt.cuda: 106 | model.cuda() 107 | optimizer = get_optim(model.parameters(), opt.lr, opt.L2) 108 | net_ensemble.to_train() # Set the models in ensemble net to train mode 109 | stage_resid = [] 110 | stage_mdlloss = [] 111 | for epoch in range(opt.epochs_per_stage): 112 | count = 0 113 | for q, y, x in train_loader.generate_query_batch(df_train, opt.batch_size): 114 | 115 | # Removing queries with a single doc. 116 | idx1 = check_for_single_queries(q, y) 117 | q, y, x = q[idx1], y[idx1], x[idx1] 118 | 119 | if opt.cuda: 120 | x = torch.tensor(x, dtype=torch.float32, device=device) 121 | #y = torch.tensor(y, dtype=torch.float32, device=device).view(-1, 1) 122 | 123 | # Feeding input into ensemble Net 124 | middle_feat, out = net_ensemble.forward(x) 125 | out = torch.as_tensor(out.view(-1, 1), dtype=torch.float32, device=device) 126 | # Indexing data by qid 127 | uq = np.unique(q) 128 | grad_batch = None 129 | for i in range(len(uq)): 130 | idx = np.where(q==uq[i])[0] 131 | y_i = y[idx] 132 | idx = torch.tensor(idx, device=device) 133 | out_i = torch.index_select(out, 0, idx) 134 | 135 | #if np.sum(y_i)==0 or len(y_i)<=1: 136 | # continue # All irrelevant docs, no useful info 137 | N = 1.0 / ideal_dcg.maxDCG(y_i) 138 | 139 | grad_ord1, grad_ord2 = grad_calc_(y_i, out_i, gain_type, opt.sigma, N, device) 140 | if opt.model_order=='second': 141 | resid = -grad_ord1/grad_ord2 142 | else: 143 | resid = -grad_ord1 144 | 145 | if grad_batch is None: 146 | grad_batch = resid 147 | grad_ord2_batch = grad_ord2 148 | else: 149 | grad_ord2_batch = torch.cat((grad_ord2_batch, grad_ord2), dim=0) 150 | grad_batch = torch.cat((grad_batch, resid), dim=0) 151 | 152 | _, out = model(x, middle_feat) 153 | out = torch.as_tensor(out.view(-1, 1), dtype=torch.float32, device=device) 154 | 155 | loss = loss_f(net_ensemble.boost_rate*out, grad_batch) 156 | loss = grad_ord2_batch*loss 157 | loss = loss.mean() 158 | model.zero_grad() 159 | loss.backward() 160 | optimizer.step() 161 | stage_resid.append(grad_batch.sum().item()) 162 | stage_mdlloss.append(loss.item()) 163 | #print('Model parameters after grad update \n') 164 | for name, param in model.named_parameters(): 165 | if param.requires_grad: 166 | if np.isnan(param.data.sum().detach().cpu().numpy()): 167 | import ipdb; ipdb.set_trace() 168 | 169 | 170 | net_ensemble.add(model) 171 | sr = -np.mean(stage_resid) 172 | sml = np.mean(stage_mdlloss) 173 | #print(f'Stage - {stage} resid: {sr}, and model loss: {sml}') 174 | 175 | # fully-corrective step 176 | stage_loss = [] 177 | lr_scaler = 2 178 | if stage >3: 179 | 180 | # Adjusting corrective step learning rate 181 | if stage % 15 == 0: 182 | #lr_scaler *= 2 183 | opt.lr /= 2 184 | opt.L2 /= 2 185 | 186 | optimizer = get_optim(net_ensemble.parameters(), opt.lr/lr_scaler, opt.L2) 187 | for _ in range(opt.correct_epoch): 188 | count = 0 189 | for q, y, x in train_loader.generate_query_batch(df_train, opt.batch_size): 190 | 191 | # Removing queries with a single doc 192 | idx1 = check_for_single_queries(q, y) 193 | q, y, x = q[idx1], y[idx1], x[idx1] 194 | 195 | if opt.cuda: 196 | x = torch.tensor(x, dtype=torch.float32, device=device) 197 | #y = torch.tensor(y, dtype=torch.float32, device=device).view(-1, 1) 198 | 199 | _, out = net_ensemble.forward_grad(x) 200 | out = torch.as_tensor(out.view(-1, 1), dtype=torch.float32, device=device) 201 | uq = np.unique(q) 202 | loss_batch = 0 203 | for i in range(len(uq)): 204 | idx = np.where(q==uq[i])[0] 205 | y_i = y[idx] 206 | idx = torch.tensor(idx, device=device) 207 | out_i = torch.index_select(out, 0, idx) 208 | 209 | #if np.sum(y_i)==0 or len(y_i)<=1: 210 | # continue # All irrelevant docs, no useful info 211 | N = 1.0 / ideal_dcg.maxDCG(y_i) 212 | loss_batch += loss_calc_(y_i, out_i, gain_type, opt.sigma, N, device).mean() 213 | 214 | loss_batch = loss_batch/len(uq) #opt.batch_size 215 | #import ipdb; ipdb.set_trace() 216 | optimizer.zero_grad() 217 | loss_batch.backward() 218 | optimizer.step() 219 | stage_loss.append(loss_batch.item()) 220 | #net_ensemble.zero_grad() 221 | sl = 0 222 | if stage_loss != []: 223 | sl = np.mean(stage_loss) 224 | # Storing losses and dynamic boost rate 225 | dynamic_br.append(net_ensemble.boost_rate.item()) 226 | all_ensm_losses.append(sl) 227 | all_mdl_losses.append(sml) 228 | print(f'Stage - {stage}, Boost rate: {net_ensemble.boost_rate: .4f} Loss: {sl: .4f}') 229 | 230 | elapsed_tr = time.time()-t0 231 | 232 | net_ensemble.to_eval() # Set the models in ensemble net to eval mode 233 | 234 | ndcg_result = eval_ndcg_at_k(net_ensemble, device, df_test, test_loader, 100000, [5, 10], gain_type) 235 | if opt.cv: 236 | val_result = eval_ndcg_at_k(net_ensemble, device, df_val, val_loader, 100000, [5, 10], gain_type, "Validation") 237 | if val_result[5] > best_ndcg: 238 | best_ndcg = val_result[5] 239 | best_stage = stage 240 | 241 | 242 | all_scores.append([ndcg_result[5], ndcg_result[10]]) 243 | elapsed_te = time.time()-t0 - elapsed_tr 244 | # Storing training and test time 245 | execution_time.append([elapsed_tr, elapsed_te]) 246 | print(f'Stage: {stage} Training time: {elapsed_tr: .1f} sec and Test time: {elapsed_te: .1f} sec \n') 247 | 248 | ### Test results from CV ### 249 | te_ndcg_5, te_ndcg_10 = all_scores[best_stage][0], all_scores[best_stage][1] 250 | print(f'Best validation stage: {best_stage} final Test NDCG@5: {te_ndcg_5:.5f}, NDCG@10: {te_ndcg_10:.5f}') 251 | 252 | fname = './results/' + opt.data +'_'+ str(opt.hidden_d) + 'u_2hl_pairwiseloss' 253 | np.savez(fname, all_scores=all_scores, all_ensm_losses=all_ensm_losses, all_mdl_losses=all_mdl_losses, dynamic_br=dynamic_br, execution_time=execution_time, options=opt) 254 | -------------------------------------------------------------------------------- /L2R/models/dynamic_net.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | import torch 3 | #import pickle 4 | import torch.nn as nn 5 | 6 | class ForwardType(Enum): 7 | SIMPLE = 0 8 | STACKED = 1 9 | CASCADE = 2 10 | GRADIENT = 3 11 | 12 | class DynamicNet(object): 13 | def __init__(self, c0, lr): 14 | self.models = [] 15 | self.c0 = c0 16 | self.lr = lr 17 | self.boost_rate = nn.Parameter(torch.tensor(lr, requires_grad=True, device="cuda:0")) 18 | 19 | def add(self, model): 20 | self.models.append(model) 21 | 22 | def parameters(self): 23 | params = [] 24 | for m in self.models: 25 | params.extend(m.parameters()) 26 | 27 | params.append(self.boost_rate) 28 | return params 29 | 30 | def zero_grad(self): 31 | for m in self.models: 32 | m.zero_grad() 33 | 34 | def to_cuda(self): 35 | for m in self.models: 36 | m.cuda() 37 | 38 | def to_eval(self): 39 | for m in self.models: 40 | m.eval() 41 | 42 | def to_train(self): 43 | for m in self.models: 44 | m.train(True) 45 | 46 | def forward(self, x): 47 | if len(self.models) == 0: 48 | return None, self.c0*torch.ones((len(x), 1)) 49 | middle_feat_cum = None 50 | prediction = None 51 | with torch.no_grad(): 52 | for m in self.models: 53 | if middle_feat_cum is None: 54 | middle_feat_cum, prediction = m(x, middle_feat_cum) 55 | else: 56 | middle_feat_cum, pred = m(x, middle_feat_cum) 57 | prediction += pred 58 | return middle_feat_cum, self.c0 + self.boost_rate * prediction 59 | 60 | def forward_grad(self, x): 61 | if len(self.models) == 0: 62 | return None, self.c0 63 | # at least one model 64 | middle_feat_cum = None 65 | prediction = None 66 | for m in self.models: 67 | if middle_feat_cum is None: 68 | middle_feat_cum, prediction = m(x, middle_feat_cum) 69 | else: 70 | middle_feat_cum, pred = m(x, middle_feat_cum) 71 | prediction += pred 72 | return middle_feat_cum, self.c0 + self.boost_rate * prediction 73 | 74 | @classmethod 75 | def from_file(cls, path, builder): 76 | d = torch.load(path) 77 | net = DynamicNet(d['c0'], d['lr']) 78 | net.boost_rate = d['boost_rate'] 79 | for stage, m in enumerate(d['models']): 80 | submod = builder(stage) 81 | submod.load_state_dict(m) 82 | net.add(submod) 83 | return net 84 | 85 | def to_file(self, path): 86 | models = [m.state_dict() for m in self.models] 87 | d = {'models': models, 'c0': self.c0, 'lr': self.lr, 'boost_rate': self.boost_rate} 88 | torch.save(d, path) 89 | -------------------------------------------------------------------------------- /L2R/models/mlp.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from .splinear import SpLinear 6 | 7 | 8 | class MLP_1HL(nn.Module): 9 | def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True): 10 | super(MLP_1HL, self).__init__() 11 | self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1) 12 | self.out_layer = nn.Linear(dim_hidden1, 1) 13 | self.lrelu = nn.LeakyReLU(0.1) 14 | self.relu = nn.ReLU() 15 | if bn: 16 | self.bn = nn.BatchNorm1d(dim_hidden1) 17 | self.bn2 = nn.BatchNorm1d(dim_in) 18 | 19 | def forward(self, x, lower_f): 20 | if lower_f is not None: 21 | x = torch.cat([x, lower_f], dim=1) 22 | x = self.bn2(x) 23 | out = self.in_layer(x) 24 | return out, self.out_layer(self.relu(out)).squeeze() 25 | 26 | @classmethod 27 | def get_model(cls, stage, opt): 28 | if stage == 0: 29 | dim_in = opt.feat_d 30 | else: 31 | dim_in = opt.feat_d + opt.hidden_d 32 | model = MLP_1HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse) 33 | return model 34 | 35 | 36 | class MLP_2HL(nn.Module): 37 | def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True): 38 | super(MLP_2HL, self).__init__() 39 | self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1) 40 | self.dropout_layer = nn.Dropout(0.0) 41 | self.lrelu = nn.LeakyReLU(0.1) 42 | self.relu = nn.ReLU() 43 | self.hidden_layer = nn.Linear(dim_hidden1, dim_hidden2) 44 | self.out_layer = nn.Linear(int(dim_hidden2), 1) 45 | self.bn = nn.BatchNorm1d(dim_hidden1) 46 | self.bn2 = nn.BatchNorm1d(dim_in) 47 | 48 | def forward(self, x, lower_f): 49 | if lower_f is not None: 50 | x = torch.cat([x, lower_f], dim=1) 51 | x = self.bn2(x) 52 | out = self.lrelu(self.in_layer(x)) 53 | out = self.bn(out) 54 | out = self.hidden_layer(out) 55 | return out, self.out_layer(self.relu(out)).squeeze() 56 | 57 | @classmethod 58 | def get_model(cls, stage, opt): 59 | if stage == 0: 60 | dim_in = opt.feat_d 61 | else: 62 | dim_in = opt.feat_d + opt.hidden_d 63 | model = MLP_2HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse) 64 | return model 65 | 66 | class MLP_3HL(nn.Module): 67 | def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True): 68 | super(MLP_3HL, self).__init__() 69 | self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1) 70 | self.dropout_layer = nn.Dropout(0.0) 71 | self.lrelu = nn.LeakyReLU(0.1) 72 | self.relu = nn.ReLU() 73 | self.hidden_layer = nn.Linear(dim_hidden2, dim_hidden1) 74 | self.out_layer = nn.Linear(dim_hidden1, 1) 75 | self.bn = nn.BatchNorm1d(dim_hidden1) 76 | self.bn2 = nn.BatchNorm1d(dim_in) 77 | # print('Batch normalization is processed!') 78 | 79 | def forward(self, x, lower_f): 80 | if lower_f is not None: 81 | x = torch.cat([x, lower_f], dim=1) 82 | x = self.bn2(x) 83 | out = self.lrelu(self.in_layer(x)) 84 | out = self.bn(out) 85 | out = self.lrelu(self.hidden_layer(out)) 86 | out = self.bn(out) 87 | out = self.hidden_layer(out) 88 | return out, self.out_layer(self.relu(out)).squeeze() 89 | 90 | @classmethod 91 | def get_model(cls, stage, opt): 92 | if stage == 0: 93 | dim_in = opt.feat_d 94 | else: 95 | dim_in = opt.feat_d + opt.hidden_d 96 | model = MLP_3HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse) 97 | return model 98 | 99 | class MLP_4HL(nn.Module): 100 | def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True): 101 | super(MLP_3HL, self).__init__() 102 | self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1) 103 | self.dropout_layer = nn.Dropout(0.0) 104 | self.lrelu = nn.LeakyReLU(0.1) 105 | self.relu = nn.ReLU() 106 | self.hidden_layer = nn.Linear(dim_hidden2, dim_hidden1) 107 | self.out_layer = nn.Linear(dim_hidden1, 1) 108 | self.bn = nn.BatchNorm1d(dim_hidden1) 109 | self.bn2 = nn.BatchNorm1d(dim_in) 110 | # print('Batch normalization is processed!') 111 | 112 | def forward(self, x, lower_f): 113 | if lower_f is not None: 114 | x = torch.cat([x, lower_f], dim=1) 115 | x = self.bn2(x) 116 | out = self.lrelu(self.in_layer(x)) #HL-1 117 | out = self.bn(out) 118 | out = self.lrelu(self.hidden_layer(out)) #HL-2 119 | out = self.bn(out) 120 | out = self.lrelu(self.hidden_layer(out)) #HL-3 121 | out = self.bn(out) 122 | out = self.hidden_layer(out) #HL-4 123 | return out, self.out_layer(self.relu(out)).squeeze() 124 | 125 | @classmethod 126 | def get_model(cls, stage, opt): 127 | if stage == 0: 128 | dim_in = opt.feat_d 129 | else: 130 | dim_in = opt.feat_d + opt.hidden_d 131 | model = MLP_3HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse) 132 | return model 133 | 134 | 135 | class DNN(nn.Module): 136 | def __init__(self, dim_in, dim_hidden, n_hidden=20, sparse=False, bn=True, drop_out=0.3): 137 | super(DNN, self).__init__() 138 | if sparse: 139 | self.in_layer = SpLinear(dim_in, dim_hidden) 140 | else: 141 | self.in_layer = nn.Linear(dim_in, dim_hidden) 142 | self.in_act = nn.SELU() 143 | hidden_layers = [] 144 | for _ in range(n_hidden): 145 | hidden_layers.append(nn.Linear(dim_hidden, dim_hidden)) 146 | if bn: 147 | hidden_layers.append(nn.BatchNorm1d(dim_hidden)) 148 | hidden_layers.append(nn.SELU()) 149 | if drop_out > 0: 150 | hidden_layers.append(nn.Dropout(drop_out)) 151 | self.hidden_layers = nn.Sequential(*hidden_layers) 152 | self.out_layer = nn.Linear(dim_hidden, 1) 153 | 154 | def forward(self, x): 155 | out = self.in_act(self.in_layer(x)) 156 | out = self.hidden_layers(out) 157 | out = self.out_layer(out) 158 | return out.squeeze() 159 | -------------------------------------------------------------------------------- /L2R/models/splinear.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | class SpLinearFunc(torch.autograd.Function): 8 | @staticmethod 9 | def forward(ctx, input, weight, bias=None): 10 | ctx.save_for_backward(input, weight, bias) 11 | output = input.mm(weight.t()) 12 | if bias is not None: 13 | output += bias.unsqueeze(0).expand_as(output) 14 | return output 15 | 16 | @staticmethod 17 | def backward(ctx, grad_output): 18 | input, weight, bias = ctx.saved_tensors 19 | grad_input = grad_weight = grad_bias = None 20 | 21 | if ctx.needs_input_grad[0]: 22 | grad_input = grad_output.mm(weight) 23 | if ctx.needs_input_grad[1]: 24 | grad_weight = (input.t().mm(grad_output)).t() 25 | if bias is not None and ctx.needs_input_grad[2]: 26 | grad_bias = grad_output.sum(0).squeeze(0) 27 | 28 | return grad_input, grad_weight, grad_bias 29 | 30 | splinear = SpLinearFunc.apply 31 | 32 | class SpLinear(nn.Module): 33 | def __init__(self, input_features, output_features, bias=True): 34 | super(SpLinear, self).__init__() 35 | self.input_features = input_features 36 | self.output_features = output_features 37 | self.weight = nn.Parameter(torch.Tensor(output_features, input_features)) 38 | if bias: 39 | self.bias = nn.Parameter(torch.Tensor(output_features)) 40 | else: 41 | self.register_parameter('bias', None) 42 | #TODO write a default initialization 43 | stdv = 1. / math.sqrt(self.weight.size(1)) 44 | self.weight.data.uniform_(-stdv, stdv) 45 | 46 | def forward(self, input): 47 | return splinear(input, self.weight, self.bias) 48 | -------------------------------------------------------------------------------- /L2R/results/results_readme.txt: -------------------------------------------------------------------------------- 1 | Your results will be saved here. -------------------------------------------------------------------------------- /L2R/train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | ### Feature Table ### 5 | # yahoo 519 - 1 6 | # microsoft 136 7 | dataset=microsoft 8 | 9 | BASEDIR=$(dirname "$0") 10 | OUTDIR="${BASEDIR}/ckpt/" 11 | 12 | if [ ! -d "${OUTDIR}" ] 13 | then 14 | echo "Output dir ${OUTDIR} does not exist, creating..." 15 | mkdir -p ${OUTDIR} 16 | fi 17 | 18 | CUDA_VISIBLE_DEVICES=0 python -u main_l2r_idiv_cv.py \ 19 | --data_dir ${BASEDIR}/../data \ 20 | --model_version main_l2r_idiv_cv.py \ 21 | --model_order second \ 22 | --feat_d 136 \ 23 | --hidden_d 64 \ 24 | --boost_rate 1 \ 25 | --lr 0.005 \ 26 | --L2 1.0e-3 \ 27 | --num_nets 40 \ 28 | --data ${dataset} \ 29 | --batch_size 10000 \ 30 | --epochs_per_stage 2 \ 31 | --correct_epoch 2 \ 32 | --normalization True \ 33 | --sigma 1. \ 34 | --cv True \ 35 | --cuda 36 | -------------------------------------------------------------------------------- /Model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sbadirli/GrowNet/6b045243477766bef1990218504f7de6645d24a3/Model.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GrowNet 2 | 3 | Original PyTorch implementation of "Gradient Boosting Neural Networks: GrowNet" 4 | 5 | Paper at: https://arxiv.org/pdf/2002.07971.pdf 6 | 7 |
8 |
9 |
11 | 12 | ## Getting Started 13 | 14 | In this paper, we combine the power of gradient boosting with the flexibility and 15 | versatility of neural networks and introduce a new modelling paradigm called GrowNet that can 16 | build up a DNN layer by layer. Instead of decision trees, we use shallow neural networks as our 17 | weak learners in a general gradient boosting framework that can be applied to a wide variety of tasks 18 | spanning classification, regression and ranking. We introduce further innovations like adding second 19 | order statistics to the training process, and also including a global corrective step that has been shown, 20 | both in theory and in empirical evaluation, to provide performance lift and precise fine-tuning to the specific task at hand. 21 | 22 | 23 | ## Prerequisites 24 | 25 | The code was implemented in Python 3.6.10 and utilized the packages (full list) in requirements.txt file. The platform I used was linux-64. Most important packages you need are the followings: 26 | ``` 27 | cudatoolkit=10.1.243 28 | numpy=1.18.1 29 | pandas=1.0.0 30 | python=3.6.10 31 | pytorch=1.4.0 32 | ``` 33 | 34 | ## Installing 35 | 36 | To run the code, You may create a conda environment (assuming you already have miniconda3 installed) by the following command on terminal: 37 | 38 | ``` 39 | conda create --name grownet --file requirements.txt 40 | ``` 41 | 42 | ## Data 43 | 44 | You can download the datasets used in the paper from [Google Drive](https://drive.google.com/open?id=1NnBpwvfSdqs-lRb5UFIC-q8P455o3vO3). Create a `data` folder under `GrowNet` and put the data under this folder. 45 | 46 | 47 | #### 1. Classification: 48 | The original HIGGS data is splitted into train and test (same as done in XGBoost paper) using higgs2libsvm.py script. 49 | 50 | #### 2. Learning to Rank (L2R): 51 | Data Loading pipeline for L2R task is implemented by taking Microsoft (MSLR-WEB10K) dataset as a baseline. We also converted Yahoo into tis format (The jupyter notebook "yahoo2mslr" does this conversion). Thus if you want to use some other L2R datasets with GrowNet, please convert it into MSLR format. Below you can find a simple sample with just 10 features: 52 | ``` 53 | 0 qid:10 1:2 2:0 3:0 4:0 5:2 6:0.666667 7:0 8:0 9:0 10:0.666667 54 | ``` 55 | The first feature is label, second is query id and the rest are data features. 56 | 57 | #### 3. Regression: 58 | Training and test splits of regression datasets are done in jupyter notebook "reg_train_test_split". The data link already contains splitted data. 59 | 60 | ## Experiments 61 | 62 | To reproduce the results from pape, first activate conda virtual environment 63 | 64 | ``` 65 | conda activate grownet 66 | ``` 67 | Then simply navigate to the task folder: Classification, L2R or Regression and execute the following command on terminal: 68 | 69 | ``` 70 | ./train.sh 71 | ``` 72 | 73 | You may change the dataset, number of hidden layers, number of hidden units in hidden layers, batch size, learning rate and etc from train.sh. 74 | 75 | The results may vary 1% or less between identical runs due to random initialization. 76 | 77 | ### Contact 78 | 79 | Feel free to drop me an email if you have any questions: s.badirli@gmail.com 80 | 81 | ### Acknowledgments 82 | 83 | * To his immense credit, my colleague, Xuanqing Liu (https://xuanqing94.github.io/), did an awesome job on the code development. 84 | -------------------------------------------------------------------------------- /Regression/Readme.md: -------------------------------------------------------------------------------- 1 | - Data loading and creating dataloader are handled in GrowNet/Regression/data/data.py. If you want to try new data please check the LibSVMRegdata function in data.py for the right format. 2 | 3 | - Individual model class and ensemble architecture are in GrowNet/Reg/models: mlp.py and dynamic_net.py. 4 | You can increase number of hidden layers or change activation function from here: mlp.py 5 | 6 | - train.sh will reproduce the results for Music Year Prediction data. You can change the dataset to slice_localization and feature dimension accordingly. You may also want to change hidden layre dimension to 128 or more for slice localization data. -------------------------------------------------------------------------------- /Regression/data/data.py: -------------------------------------------------------------------------------- 1 | import time 2 | import sys 3 | import os 4 | import numpy as np 5 | import pandas as pd 6 | import torch 7 | from torch.utils.data import Dataset 8 | from scipy.sparse import csr_matrix 9 | from sklearn.datasets import load_svmlight_file 10 | from sklearn import datasets 11 | from sklearn.model_selection import train_test_split 12 | 13 | 14 | class LibSVMData(Dataset): 15 | def __init__(self, root, dim, normalization, pos=1, neg=-1, out_pos=1, out_neg=-1): 16 | self.feat, self.label = load_svmlight_file(root) 17 | 18 | self.feat = csr_matrix((self.feat.data, self.feat.indices, self.feat.indptr), shape=(len(self.label), dim)) 19 | self.feat = self.feat.toarray().astype(np.float32) 20 | 21 | self.label = self.label.astype(np.float32) 22 | idx_pos = self.label == pos 23 | idx_neg = self.label == neg 24 | self.label[idx_pos] = out_pos 25 | self.label[idx_neg] = out_neg 26 | 27 | def __getitem__(self, index): 28 | arr = self.feat[index, :] 29 | return arr, self.label[index] 30 | def __len__(self): 31 | return len(self.label) 32 | 33 | class LibSVMRankData(Dataset): 34 | def __init__(self, root2data, root2qid, dim): 35 | self.feat, self.label = load_svmlight_file(root2data) 36 | self.qid = np.loadtxt(root2qid, dtype='int32') 37 | self.feat = self.feat.toarray().astype(np.float32) 38 | self.label = self.label.astype(np.float32) 39 | self.feat = self.feat[:, ~(self.feat == 0).all(0)] 40 | print(self.feat.shape[1]) 41 | 42 | def __getitem__(self, index): 43 | return self.feat[index, :], self.label[index], self.qid[index] 44 | 45 | def __len__(self): 46 | return len(self.label) 47 | 48 | class LibSVMRegData(Dataset): 49 | def __init__(self, root, dim, normalization): 50 | data = np.load(root) 51 | self.feat, self.label = data['features'], data['labels'] 52 | del data 53 | self.feat = self.feat.astype(np.float32) 54 | self.label = self.label.astype(np.float32) 55 | #self.feat = self.feat[:, ~(self.feat == 0).all(0)] 56 | #import ipdb; ipdb.set_trace() 57 | 58 | print(self.feat.shape[1]) 59 | 60 | def __getitem__(self, index): 61 | return self.feat[index, :], self.label[index] 62 | 63 | def __len__(self): 64 | return len(self.label) 65 | 66 | class LibCSVData(Dataset): 67 | def __init__(self, root, dim, pos=1, neg=-1): 68 | self.data = np.loadtxt(root, delimiter=',').astype(np.float32) 69 | self.feat = self.data[:, 1:] 70 | self.label = self.data[:, 0] 71 | self.label[self.label == pos] = 1 72 | self.label[self.label == neg] = -1 73 | 74 | def __getitem__(self, index): 75 | #arr = np.log(self.feat[index, :] + 1.0e-5) 76 | #arr = np.log10(self.feat[index, :] + 1.0e-5) 77 | arr = self.feat[index, :] 78 | return arr, self.label[index] 79 | 80 | def __len__(self): 81 | return len(self.label) 82 | class CriteoCSVData(Dataset): 83 | def __init__(self, root, dim, normalization, pos=1, neg=-1): 84 | # Reading the data into panda data frame 85 | self.data = pd.read_csv(root, header=None, dtype='float32') 86 | # extracting labels (0, 1) and weights 87 | self.label = self.data.iloc[:, -2] 88 | self.weights = self.data.iloc[:, -1] 89 | self.data = self.data.iloc[:, :-2] 90 | # transferring labels from {0, 1} to {-1, 1} 91 | self.label[self.label == pos] = 1 92 | self.label[self.label == neg] = -1 93 | 94 | # Applying log transformation 95 | mm = self.data.min().min() # to prevent 0 division 96 | if normalization: 97 | # Filling Nan values: Simple approach, mean of the that column or interpolation 98 | self.data = self.data.transform(lambda x: np.log(x - mm + 1)) 99 | #self.data = self.data.interpolate(method='polynomial', order=2) 100 | self.data = self.data.fillna(self.data.mean()) # To fill the rest of Nan values left untouched on the corners 101 | #self.data = (self.data - self.data.mean())/self.data.std() 102 | #self.feat = self.data.to_numpy('float32') 103 | self.data = self.data.to_numpy('float32') 104 | def __getitem__(self, index): 105 | #arr = np.log(self.feat[index, :] + 1.0e-5) 106 | #arr = np.log10(self.feat[index, :] + 1.0e-5) 107 | #arr = self.feat[index, :] 108 | arr = self.data[index, :] 109 | return arr, self.label[index], self.weights[index] 110 | 111 | def __len__(self): 112 | return len(self.label) 113 | -------------------------------------------------------------------------------- /Regression/data/sparse_data.py: -------------------------------------------------------------------------------- 1 | import time 2 | import sys 3 | import os 4 | import numpy as np 5 | import torch 6 | from torch.utils.data import Dataset 7 | from scipy.sparse import csr_matrix 8 | from sklearn.datasets import load_svmlight_file 9 | 10 | class LibSVMDataSp(Dataset): 11 | def __init__(self, root, dim_in, pos=1, neg=-1): 12 | self.feat, self.label = load_svmlight_file(root) 13 | self.feat = csr_matrix((self.feat.data, self.feat.indices, self.feat.indptr), shape=(len(self.label), dim_in)) 14 | self.feat = self.feat.astype(np.float32) 15 | self.label = self.label.astype(np.float32) 16 | self.label[self.label == pos] = 1 17 | self.label[self.label == neg] = -1 18 | 19 | def __getitem__(self, index): 20 | arr = self.feat[index, :] 21 | return arr, self.label[index] 22 | def __len__(self): 23 | return len(self.label) 24 | 25 | 26 | -------------------------------------------------------------------------------- /Regression/data/sparseloader.py: -------------------------------------------------------------------------------- 1 | import scipy 2 | import random 3 | import torch 4 | import torch.multiprocessing as multiprocessing 5 | # Changed _update_worker_pids into _set_worker_pids, due to new version of pytorch 6 | from torch._C import _set_worker_signal_handlers, _set_worker_pids, \ 7 | _remove_worker_pids, _error_if_any_worker_fails 8 | from torch.utils.data.sampler import SequentialSampler, RandomSampler, BatchSampler 9 | import signal 10 | import functools 11 | import collections 12 | import re 13 | import sys 14 | import threading 15 | import traceback 16 | import os 17 | import time 18 | from torch._six import * #string_classes, int_classes, FileNotFoundError 19 | 20 | IS_WINDOWS = sys.platform == "win32" 21 | if IS_WINDOWS: 22 | import ctypes 23 | from ctypes.wintypes import DWORD, BOOL, HANDLE 24 | 25 | if sys.version_info[0] == 2: 26 | import Queue as queue 27 | else: 28 | import queue 29 | 30 | 31 | class ExceptionWrapper(object): 32 | r"""Wraps an exception plus traceback to communicate across threads""" 33 | 34 | def __init__(self, exc_info): 35 | self.exc_type = exc_info[0] 36 | self.exc_msg = "".join(traceback.format_exception(*exc_info)) 37 | 38 | 39 | _use_shared_memory = False 40 | r"""Whether to use shared memory in default_collate""" 41 | 42 | MANAGER_STATUS_CHECK_INTERVAL = 5.0 43 | 44 | if IS_WINDOWS: 45 | # On Windows, the parent ID of the worker process remains unchanged when the manager process 46 | # is gone, and the only way to check it through OS is to let the worker have a process handle 47 | # of the manager and ask if the process status has changed. 48 | class ManagerWatchdog(object): 49 | def __init__(self): 50 | self.manager_pid = os.getppid() 51 | 52 | self.kernel32 = ctypes.WinDLL('kernel32', use_last_error=True) 53 | self.kernel32.OpenProcess.argtypes = (DWORD, BOOL, DWORD) 54 | self.kernel32.OpenProcess.restype = HANDLE 55 | self.kernel32.WaitForSingleObject.argtypes = (HANDLE, DWORD) 56 | self.kernel32.WaitForSingleObject.restype = DWORD 57 | 58 | # Value obtained from https://msdn.microsoft.com/en-us/library/ms684880.aspx 59 | SYNCHRONIZE = 0x00100000 60 | self.manager_handle = self.kernel32.OpenProcess(SYNCHRONIZE, 0, self.manager_pid) 61 | 62 | if not self.manager_handle: 63 | raise ctypes.WinError(ctypes.get_last_error()) 64 | 65 | def is_alive(self): 66 | return self.kernel32.WaitForSingleObject(self.manager_handle, 0) != 0 67 | else: 68 | class ManagerWatchdog(object): 69 | def __init__(self): 70 | self.manager_pid = os.getppid() 71 | 72 | def is_alive(self): 73 | return os.getppid() == self.manager_pid 74 | 75 | 76 | def _worker_loop(dataset, index_queue, data_queue, collate_fn, seed, init_fn, worker_id): 77 | global _use_shared_memory 78 | _use_shared_memory = True 79 | 80 | _set_worker_signal_handlers() 81 | 82 | torch.set_num_threads(1) 83 | random.seed(seed) 84 | torch.manual_seed(seed) 85 | 86 | if init_fn is not None: 87 | init_fn(worker_id) 88 | 89 | watchdog = ManagerWatchdog() 90 | 91 | while True: 92 | try: 93 | r = index_queue.get(timeout=MANAGER_STATUS_CHECK_INTERVAL) 94 | except queue.Empty: 95 | if watchdog.is_alive(): 96 | continue 97 | else: 98 | break 99 | if r is None: 100 | break 101 | idx, batch_indices = r 102 | try: 103 | samples = collate_fn([dataset[i] for i in batch_indices]) 104 | except Exception: 105 | data_queue.put((idx, ExceptionWrapper(sys.exc_info()))) 106 | else: 107 | data_queue.put((idx, samples)) 108 | del samples 109 | 110 | 111 | def _worker_manager_loop(in_queue, out_queue, done_event, pin_memory, device_id): 112 | if pin_memory: 113 | torch.cuda.set_device(device_id) 114 | 115 | while True: 116 | try: 117 | r = in_queue.get() 118 | except Exception: 119 | if done_event.is_set(): 120 | return 121 | raise 122 | if r is None: 123 | break 124 | if isinstance(r[1], ExceptionWrapper): 125 | out_queue.put(r) 126 | continue 127 | idx, batch = r 128 | try: 129 | if pin_memory: 130 | batch = pin_memory_batch(batch) 131 | except Exception: 132 | out_queue.put((idx, ExceptionWrapper(sys.exc_info()))) 133 | else: 134 | out_queue.put((idx, batch)) 135 | 136 | numpy_type_map = { 137 | 'float64': torch.DoubleTensor, 138 | 'float32': torch.FloatTensor, 139 | 'float16': torch.HalfTensor, 140 | 'int64': torch.LongTensor, 141 | 'int32': torch.IntTensor, 142 | 'int16': torch.ShortTensor, 143 | 'int8': torch.CharTensor, 144 | 'uint8': torch.ByteTensor, 145 | } 146 | 147 | 148 | def default_collate(batch): 149 | r"""Puts each data field into a tensor with outer dimension batch size""" 150 | 151 | error_msg = "batch must contain tensors, numbers, dicts or lists; found {}" 152 | elem_type = type(batch[0]) 153 | if isinstance(batch[0], torch.Tensor): 154 | out = None 155 | if _use_shared_memory: 156 | # If we're in a background process, concatenate directly into a 157 | # shared memory tensor to avoid an extra copy 158 | numel = sum([x.numel() for x in batch]) 159 | storage = batch[0].storage()._new_shared(numel) 160 | out = batch[0].new(storage) 161 | return torch.stack(batch, 0, out=out) 162 | elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ 163 | and elem_type.__name__ != 'string_': 164 | elem = batch[0] 165 | if elem_type.__name__ == 'ndarray': 166 | # array of string classes and object 167 | if re.search('[SaUO]', elem.dtype.str) is not None: 168 | raise TypeError(error_msg.format(elem.dtype)) 169 | 170 | return torch.stack([torch.from_numpy(b) for b in batch], 0) 171 | if elem.shape == (): # scalars 172 | py_type = float if elem.dtype.name.startswith('float') else int 173 | return numpy_type_map[elem.dtype.name](list(map(py_type, batch))) 174 | elif isinstance(batch[0], int_classes): 175 | return torch.LongTensor(batch) 176 | elif isinstance(batch[0], float): 177 | return torch.DoubleTensor(batch) 178 | elif isinstance(batch[0], string_classes): 179 | return batch 180 | elif isinstance(batch[0], collections.Mapping): 181 | return {key: default_collate([d[key] for d in batch]) for key in batch[0]} 182 | elif isinstance(batch[0], collections.Sequence): 183 | transposed = zip(*batch) 184 | return [default_collate(samples) for samples in transposed] 185 | elif isinstance(batch[0], scipy.sparse.csr.csr_matrix): 186 | row_idx = [] 187 | col_idx = [] 188 | val = [] 189 | for i, b in enumerate(batch): 190 | col = b.indices 191 | row_idx.extend([i] * len(col)) 192 | col_idx.extend(col) 193 | val.extend(b.data) 194 | i = torch.LongTensor([row_idx, col_idx]) 195 | v = torch.FloatTensor(val) 196 | return torch.sparse.FloatTensor(i, v, torch.Size([len(batch), batch[0].shape[1]])) 197 | raise TypeError((error_msg.format(type(batch[0])))) 198 | 199 | 200 | def pin_memory_batch(batch): 201 | if isinstance(batch, torch.Tensor): 202 | return batch.pin_memory() 203 | elif isinstance(batch, string_classes): 204 | return batch 205 | elif isinstance(batch, collections.Mapping): 206 | return {k: pin_memory_batch(sample) for k, sample in batch.items()} 207 | elif isinstance(batch, collections.Sequence): 208 | return [pin_memory_batch(sample) for sample in batch] 209 | else: 210 | return batch 211 | 212 | 213 | _SIGCHLD_handler_set = False 214 | r"""Whether SIGCHLD handler is set for DataLoader worker failures. Only one 215 | handler needs to be set for all DataLoaders in a process.""" 216 | 217 | 218 | def _set_SIGCHLD_handler(): 219 | # Windows doesn't support SIGCHLD handler 220 | if sys.platform == 'win32': 221 | return 222 | # can't set signal in child threads 223 | if not isinstance(threading.current_thread(), threading._MainThread): 224 | return 225 | global _SIGCHLD_handler_set 226 | if _SIGCHLD_handler_set: 227 | return 228 | previous_handler = signal.getsignal(signal.SIGCHLD) 229 | if not callable(previous_handler): 230 | previous_handler = None 231 | 232 | def handler(signum, frame): 233 | _error_if_any_worker_fails() 234 | if previous_handler is not None: 235 | previous_handler(signum, frame) 236 | 237 | signal.signal(signal.SIGCHLD, handler) 238 | _SIGCHLD_handler_set = True 239 | 240 | 241 | class _DataLoaderIter(object): 242 | r"""Iterates once over the DataLoader's dataset, as specified by the sampler""" 243 | 244 | def __init__(self, loader): 245 | self.dataset = loader.dataset 246 | self.collate_fn = loader.collate_fn 247 | self.batch_sampler = loader.batch_sampler 248 | self.num_workers = loader.num_workers 249 | self.pin_memory = loader.pin_memory and torch.cuda.is_available() 250 | self.timeout = loader.timeout 251 | self.done_event = threading.Event() 252 | 253 | self.sample_iter = iter(self.batch_sampler) 254 | 255 | base_seed = torch.LongTensor(1).random_().item() 256 | 257 | if self.num_workers > 0: 258 | self.worker_init_fn = loader.worker_init_fn 259 | self.index_queues = [multiprocessing.Queue() for _ in range(self.num_workers)] 260 | self.worker_queue_idx = 0 261 | self.worker_result_queue = multiprocessing.SimpleQueue() 262 | self.batches_outstanding = 0 263 | self.worker_pids_set = False 264 | self.shutdown = False 265 | self.send_idx = 0 266 | self.rcvd_idx = 0 267 | self.reorder_dict = {} 268 | 269 | self.workers = [ 270 | multiprocessing.Process( 271 | target=_worker_loop, 272 | args=(self.dataset, self.index_queues[i], 273 | self.worker_result_queue, self.collate_fn, base_seed + i, 274 | self.worker_init_fn, i)) 275 | for i in range(self.num_workers)] 276 | 277 | if self.pin_memory or self.timeout > 0: 278 | self.data_queue = queue.Queue() 279 | if self.pin_memory: 280 | maybe_device_id = torch.cuda.current_device() 281 | else: 282 | # do not initialize cuda context if not necessary 283 | maybe_device_id = None 284 | self.worker_manager_thread = threading.Thread( 285 | target=_worker_manager_loop, 286 | args=(self.worker_result_queue, self.data_queue, self.done_event, self.pin_memory, 287 | maybe_device_id)) 288 | self.worker_manager_thread.daemon = True 289 | self.worker_manager_thread.start() 290 | else: 291 | self.data_queue = self.worker_result_queue 292 | 293 | for w in self.workers: 294 | w.daemon = True # ensure that the worker exits on process exit 295 | w.start() 296 | 297 | _set_worker_pids(id(self), tuple(w.pid for w in self.workers)) 298 | _set_SIGCHLD_handler() 299 | self.worker_pids_set = True 300 | 301 | # prime the prefetch loop 302 | for _ in range(2 * self.num_workers): 303 | self._put_indices() 304 | 305 | def __len__(self): 306 | return len(self.batch_sampler) 307 | 308 | def _get_batch(self): 309 | if self.timeout > 0: 310 | try: 311 | return self.data_queue.get(timeout=self.timeout) 312 | except queue.Empty: 313 | raise RuntimeError('DataLoader timed out after {} seconds'.format(self.timeout)) 314 | else: 315 | return self.data_queue.get() 316 | 317 | def __next__(self): 318 | if self.num_workers == 0: # same-process loading 319 | indices = next(self.sample_iter) # may raise StopIteration 320 | batch = self.collate_fn([self.dataset[i] for i in indices]) 321 | if self.pin_memory: 322 | batch = pin_memory_batch(batch) 323 | return batch 324 | 325 | # check if the next sample has already been generated 326 | if self.rcvd_idx in self.reorder_dict: 327 | batch = self.reorder_dict.pop(self.rcvd_idx) 328 | return self._process_next_batch(batch) 329 | 330 | if self.batches_outstanding == 0: 331 | self._shutdown_workers() 332 | raise StopIteration 333 | 334 | while True: 335 | assert (not self.shutdown and self.batches_outstanding > 0) 336 | idx, batch = self._get_batch() 337 | self.batches_outstanding -= 1 338 | if idx != self.rcvd_idx: 339 | # store out-of-order samples 340 | self.reorder_dict[idx] = batch 341 | continue 342 | return self._process_next_batch(batch) 343 | 344 | next = __next__ # Python 2 compatibility 345 | 346 | def __iter__(self): 347 | return self 348 | 349 | def _put_indices(self): 350 | assert self.batches_outstanding < 2 * self.num_workers 351 | indices = next(self.sample_iter, None) 352 | if indices is None: 353 | return 354 | self.index_queues[self.worker_queue_idx].put((self.send_idx, indices)) 355 | self.worker_queue_idx = (self.worker_queue_idx + 1) % self.num_workers 356 | self.batches_outstanding += 1 357 | self.send_idx += 1 358 | 359 | def _process_next_batch(self, batch): 360 | self.rcvd_idx += 1 361 | self._put_indices() 362 | if isinstance(batch, ExceptionWrapper): 363 | raise batch.exc_type(batch.exc_msg) 364 | return batch 365 | 366 | def __getstate__(self): 367 | 368 | raise NotImplementedError("_DataLoaderIter cannot be pickled") 369 | 370 | def _shutdown_workers(self): 371 | try: 372 | if not self.shutdown: 373 | self.shutdown = True 374 | self.done_event.set() 375 | for q in self.index_queues: 376 | q.put(None) 377 | # if some workers are waiting to put, make place for them 378 | try: 379 | while not self.worker_result_queue.empty(): 380 | self.worker_result_queue.get() 381 | except (FileNotFoundError, ImportError): 382 | pass 383 | self.worker_result_queue.put(None) 384 | finally: 385 | # removes pids no matter what 386 | if self.worker_pids_set: 387 | _remove_worker_pids(id(self)) 388 | self.worker_pids_set = False 389 | 390 | def __del__(self): 391 | if self.num_workers > 0: 392 | self._shutdown_workers() 393 | 394 | 395 | class DataLoader(object): 396 | r""" 397 | Data loader. Combines a dataset and a sampler, and provides 398 | single- or multi-process iterators over the dataset. 399 | 400 | Arguments: 401 | dataset (Dataset): dataset from which to load the data. 402 | batch_size (int, optional): how many samples per batch to load 403 | (default: 1). 404 | shuffle (bool, optional): set to ``True`` to have the data reshuffled 405 | at every epoch (default: False). 406 | sampler (Sampler, optional): defines the strategy to draw samples from 407 | the dataset. If specified, ``shuffle`` must be False. 408 | batch_sampler (Sampler, optional): like sampler, but returns a batch of 409 | indices at a time. Mutually exclusive with batch_size, shuffle, 410 | sampler, and drop_last. 411 | num_workers (int, optional): how many subprocesses to use for data 412 | loading. 0 means that the data will be loaded in the main process. 413 | (default: 0) 414 | collate_fn (callable, optional): merges a list of samples to form a mini-batch. 415 | pin_memory (bool, optional): If ``True``, the data loader will copy tensors 416 | into CUDA pinned memory before returning them. 417 | drop_last (bool, optional): set to ``True`` to drop the last incomplete batch, 418 | if the dataset size is not divisible by the batch size. If ``False`` and 419 | the size of dataset is not divisible by the batch size, then the last batch 420 | will be smaller. (default: False) 421 | timeout (numeric, optional): if positive, the timeout value for collecting a batch 422 | from workers. Should always be non-negative. (default: 0) 423 | worker_init_fn (callable, optional): If not None, this will be called on each 424 | worker subprocess with the worker id (an int in ``[0, num_workers - 1]``) as 425 | input, after seeding and before data loading. (default: None) 426 | 427 | .. note:: By default, each worker will have its PyTorch seed set to 428 | ``base_seed + worker_id``, where ``base_seed`` is a long generated 429 | by main process using its RNG. However, seeds for other libraies 430 | may be duplicated upon initializing workers (w.g., NumPy), causing 431 | each worker to return identical random numbers. (See 432 | :ref:`dataloader-workers-random-seed` section in FAQ.) You may 433 | use ``torch.initial_seed()`` to access the PyTorch seed for each 434 | worker in :attr:`worker_init_fn`, and use it to set other seeds 435 | before data loading. 436 | 437 | .. warning:: If ``spawn`` start method is used, :attr:`worker_init_fn` cannot be an 438 | unpicklable object, e.g., a lambda function. 439 | """ 440 | 441 | __initialized = False 442 | 443 | def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None, 444 | num_workers=0, collate_fn=default_collate, pin_memory=False, drop_last=False, 445 | timeout=0, worker_init_fn=None): 446 | self.dataset = dataset 447 | self.batch_size = batch_size 448 | self.num_workers = num_workers 449 | self.collate_fn = collate_fn 450 | self.pin_memory = pin_memory 451 | self.drop_last = drop_last 452 | self.timeout = timeout 453 | self.worker_init_fn = worker_init_fn 454 | 455 | if timeout < 0: 456 | raise ValueError('timeout option should be non-negative') 457 | 458 | if batch_sampler is not None: 459 | if batch_size > 1 or shuffle or sampler is not None or drop_last: 460 | raise ValueError('batch_sampler option is mutually exclusive ' 461 | 'with batch_size, shuffle, sampler, and ' 462 | 'drop_last') 463 | self.batch_size = None 464 | self.drop_last = None 465 | 466 | if sampler is not None and shuffle: 467 | raise ValueError('sampler option is mutually exclusive with ' 468 | 'shuffle') 469 | 470 | if self.num_workers < 0: 471 | raise ValueError('num_workers option cannot be negative; ' 472 | 'use num_workers=0 to disable multiprocessing.') 473 | 474 | if batch_sampler is None: 475 | if sampler is None: 476 | if shuffle: 477 | sampler = RandomSampler(dataset) 478 | else: 479 | sampler = SequentialSampler(dataset) 480 | batch_sampler = BatchSampler(sampler, batch_size, drop_last) 481 | 482 | self.sampler = sampler 483 | self.batch_sampler = batch_sampler 484 | self.__initialized = True 485 | 486 | def __setattr__(self, attr, val): 487 | if self.__initialized and attr in ('batch_size', 'sampler', 'drop_last'): 488 | raise ValueError('{} attribute should not be set after {} is ' 489 | 'initialized'.format(attr, self.__class__.__name__)) 490 | 491 | super(DataLoader, self).__setattr__(attr, val) 492 | 493 | def __iter__(self): 494 | return _DataLoaderIter(self) 495 | 496 | def __len__(self): 497 | return len(self.batch_sampler) 498 | 499 | -------------------------------------------------------------------------------- /Regression/data/untitled.py: -------------------------------------------------------------------------------- 1 | def convert2npz(input_filename, out_data_filename): 2 | input = open(input_filename,"r") 3 | output_feature = open(out_data_filename,"w") 4 | #output_query = open(out_query_filename,"w") 5 | #output_label = open(out_query_filename2,"w") 6 | 7 | while True: 8 | line = input.readline() 9 | if not line: 10 | break 11 | tokens = line.split(' ') 12 | tokens[-1] = tokens[-1].strip() 13 | label = tokens[0] 14 | qid = int(tokens[1].split(':')[1]) 15 | 16 | #output_label.write(label + '\n') 17 | #output_query.write(qid + '\n') 18 | output_feature.write(label+' ') 19 | output_feature.write(qid + ' ') 20 | output_feature.write(' '.join(tokens[2:]) + '\n') 21 | 22 | input.close() 23 | output_query.close() 24 | output_feature.close() 25 | output_query2.close() 26 | 27 | convert("set1.train.txt","yahoo.train") 28 | convert("set1.test.txt","yahoo.test") 29 | -------------------------------------------------------------------------------- /Regression/main_reg_cv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import numpy as np 3 | import argparse 4 | import copy 5 | import torch 6 | import torch.nn as nn 7 | import time 8 | from data.sparseloader import DataLoader 9 | from data.data import LibSVMData, LibCSVData, LibSVMRegData 10 | from data.sparse_data import LibSVMDataSp 11 | from models.mlp import MLP_1HL, MLP_2HL, MLP_3HL 12 | from models.dynamic_net import DynamicNet, ForwardType 13 | from sklearn.metrics import mean_squared_error 14 | from sklearn.preprocessing import StandardScaler, MinMaxScaler 15 | from torch.optim import SGD, Adam 16 | 17 | 18 | 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument('--feat_d', type=int, required=True) 21 | parser.add_argument('--hidden_d', type=int, required=True) 22 | parser.add_argument('--boost_rate', type=float, required=True) 23 | parser.add_argument('--lr', type=float, required=True) 24 | parser.add_argument('--num_nets', type=int, required=True) 25 | parser.add_argument('--data', type=str, required=True) 26 | parser.add_argument('--tr', type=str, required=True) 27 | parser.add_argument('--te', type=str, required=True) 28 | parser.add_argument('--batch_size', type=int, required=True) 29 | parser.add_argument('--epochs_per_stage', type=int, required=True) 30 | parser.add_argument('--correct_epoch', type=int ,required=True) 31 | parser.add_argument('--L2', type=float, required=True) 32 | parser.add_argument('--sparse', action='store_true') 33 | parser.add_argument('--normalization', default=False, type=lambda x: (str(x).lower() == 'true')) 34 | parser.add_argument('--cv', default=False, type=lambda x: (str(x).lower() == 'true')) 35 | parser.add_argument('--out_f', type=str, required=True) 36 | parser.add_argument('--cuda', action='store_true') 37 | 38 | opt = parser.parse_args() 39 | 40 | if not opt.cuda: 41 | torch.set_num_threads(16) 42 | 43 | # prepare the dataset 44 | def get_data(): 45 | if opt.data in ['ca_housing', 'ailerons', 'YearPredictionMSD', 'slice_localization']: 46 | train = LibSVMRegData(opt.tr, opt.feat_d, opt.normalization) 47 | test = LibSVMRegData(opt.te, opt.feat_d, opt.normalization) 48 | val = [] 49 | if opt.cv: 50 | val = copy.deepcopy(train) 51 | print('Creating Validation set! \n') 52 | indices = list(range(len(train))) 53 | cut = int(len(train)*0.95) 54 | np.random.shuffle(indices) 55 | train_idx = indices[:cut] 56 | val_idx = indices[cut:] 57 | 58 | train.feat = train.feat[train_idx] 59 | train.label = train.label[train_idx] 60 | val.feat = val.feat[val_idx] 61 | val.label = val.label[val_idx] 62 | else: 63 | pass 64 | 65 | if opt.normalization: 66 | scaler = StandardScaler() 67 | scaler.fit(train.feat) 68 | train.feat = scaler.transform(train.feat) 69 | test.feat = scaler.transform(test.feat) 70 | if opt.cv: 71 | val.feat = scaler.transform(val.feat) 72 | print(f'#Train: {len(train)}, #Val: {len(val)} #Test: {len(test)}') 73 | return train, test, val 74 | 75 | 76 | def get_optim(params, lr, weight_decay): 77 | optimizer = Adam(params, lr, weight_decay=weight_decay) 78 | #optimizer = SGD(params, lr, weight_decay=weight_decay) 79 | return optimizer 80 | 81 | 82 | def root_mse(net_ensemble, loader): 83 | loss = 0 84 | total = 0 85 | 86 | for x, y in loader: 87 | if opt.cuda: 88 | x = x.cuda() 89 | 90 | with torch.no_grad(): 91 | _, out = net_ensemble.forward(x) 92 | y = y.cpu().numpy().reshape(len(y), 1) 93 | out = out.cpu().numpy().reshape(len(y), 1) 94 | loss += mean_squared_error(y, out)* len(y) 95 | total += len(y) 96 | return np.sqrt(loss / total) 97 | 98 | 99 | def init_gbnn(train): 100 | positive = negative = 0 101 | for i in range(len(train)): 102 | if train[i][1] > 0: 103 | positive += 1 104 | else: 105 | negative += 1 106 | blind_acc = max(positive, negative) / (positive + negative) 107 | print(f'Blind accuracy: {blind_acc}') 108 | #print(f'Blind Logloss: {blind_acc}') 109 | return float(np.log(positive / negative)) 110 | 111 | if __name__ == "__main__": 112 | 113 | train, test, val = get_data() 114 | N = len(train) 115 | print(opt.data + ' training and test datasets are loaded!') 116 | train_loader = DataLoader(train, opt.batch_size, shuffle=True, drop_last=False, num_workers=2) 117 | test_loader = DataLoader(test, opt.batch_size, shuffle=False, drop_last=False, num_workers=2) 118 | if opt.cv: 119 | val_loader = DataLoader(val, opt.batch_size, shuffle=True, drop_last=False, num_workers=2) 120 | best_rmse = pow(10, 6) 121 | val_rmse = best_rmse 122 | best_stage = opt.num_nets-1 123 | c0 = np.mean(train.label) #init_gbnn(train) 124 | net_ensemble = DynamicNet(c0, opt.boost_rate) 125 | loss_f1 = nn.MSELoss() 126 | loss_models = torch.zeros((opt.num_nets, 3)) 127 | for stage in range(opt.num_nets): 128 | t0 = time.time() 129 | model = MLP_2HL.get_model(stage, opt) # Initialize the model_k: f_k(x), multilayer perception v2 130 | if opt.cuda: 131 | model.cuda() 132 | 133 | optimizer = get_optim(model.parameters(), opt.lr, opt.L2) 134 | net_ensemble.to_train() # Set the models in ensemble net to train mode 135 | stage_mdlloss = [] 136 | for epoch in range(opt.epochs_per_stage): 137 | for i, (x, y) in enumerate(train_loader): 138 | 139 | if opt.cuda: 140 | x= x.cuda() 141 | y = torch.as_tensor(y, dtype=torch.float32).cuda().view(-1, 1) 142 | middle_feat, out = net_ensemble.forward(x) 143 | out = torch.as_tensor(out, dtype=torch.float32).cuda().view(-1, 1) 144 | grad_direction = -(out-y) 145 | 146 | _, out = model(x, middle_feat) 147 | out = torch.as_tensor(out, dtype=torch.float32).cuda().view(-1, 1) 148 | loss = loss_f1(net_ensemble.boost_rate*out, grad_direction) # T 149 | 150 | model.zero_grad() 151 | loss.backward() 152 | optimizer.step() 153 | stage_mdlloss.append(loss.item()*len(y)) 154 | 155 | net_ensemble.add(model) 156 | sml = np.sqrt(np.sum(stage_mdlloss)/N) 157 | 158 | 159 | 160 | lr_scaler = 3 161 | # fully-corrective step 162 | stage_loss = [] 163 | if stage > 0: 164 | # Adjusting corrective step learning rate 165 | if stage % 15 == 0: 166 | #lr_scaler *= 2 167 | opt.lr /= 2 168 | opt.L2 /= 2 169 | optimizer = get_optim(net_ensemble.parameters(), opt.lr / lr_scaler, opt.L2) 170 | for _ in range(opt.correct_epoch): 171 | stage_loss = [] 172 | for i, (x, y) in enumerate(train_loader): 173 | if opt.cuda: 174 | x, y = x.cuda(), y.cuda().view(-1, 1) 175 | _, out = net_ensemble.forward_grad(x) 176 | out = torch.as_tensor(out, dtype=torch.float32).cuda().view(-1, 1) 177 | 178 | loss = loss_f1(out, y) 179 | optimizer.zero_grad() 180 | loss.backward() 181 | optimizer.step() 182 | stage_loss.append(loss.item()*len(y)) 183 | #print(net_ensemble.boost_rate) 184 | # store model 185 | elapsed_tr = time.time()-t0 186 | sl = 0 187 | if stage_loss != []: 188 | sl = np.sqrt(np.sum(stage_loss)/N) 189 | 190 | print(f'Stage - {stage}, training time: {elapsed_tr: .1f} sec, model MSE loss: {sml: .5f}, Ensemble Net MSE Loss: {sl: .5f}') 191 | 192 | net_ensemble.to_file(opt.out_f) 193 | net_ensemble = DynamicNet.from_file(opt.out_f, lambda stage: MLP_2HL.get_model(stage, opt)) 194 | 195 | if opt.cuda: 196 | net_ensemble.to_cuda() 197 | net_ensemble.to_eval() # Set the models in ensemble net to eval mode 198 | 199 | # Train 200 | tr_rmse = root_mse(net_ensemble, train_loader) 201 | if opt.cv: 202 | val_rmse = root_mse(net_ensemble, val_loader) 203 | if val_rmse < best_rmse: 204 | best_rmse = val_rmse 205 | best_stage = stage 206 | 207 | te_rmse = root_mse(net_ensemble, test_loader) 208 | 209 | print(f'Stage: {stage} RMSE@Tr: {tr_rmse:.5f}, RMSE@Val: {val_rmse:.5f}, RMSE@Te: {te_rmse:.5f}') 210 | 211 | loss_models[stage, 0], loss_models[stage, 1] = tr_rmse, te_rmse 212 | 213 | tr_rmse, te_rmse = loss_models[best_stage, 0], loss_models[best_stage, 1] 214 | print(f'Best validation stage: {best_stage} RMSE@Tr: {tr_rmse:.5f}, final RMSE@Te: {te_rmse:.5f}') 215 | loss_models = loss_models.detach().cpu().numpy() 216 | fname = './results/' + opt.data +'_rmse' 217 | np.savez(fname, rmse=loss_models, params=opt) 218 | 219 | -------------------------------------------------------------------------------- /Regression/models/dynamic_net.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | import torch 3 | #import pickle 4 | import torch.nn as nn 5 | 6 | class ForwardType(Enum): 7 | SIMPLE = 0 8 | STACKED = 1 9 | CASCADE = 2 10 | GRADIENT = 3 11 | 12 | class DynamicNet(object): 13 | def __init__(self, c0, lr): 14 | self.models = [] 15 | self.c0 = c0 16 | self.lr = lr 17 | self.boost_rate = nn.Parameter(torch.tensor(lr, requires_grad=True, device="cuda")) 18 | 19 | def add(self, model): 20 | self.models.append(model) 21 | 22 | def parameters(self): 23 | params = [] 24 | for m in self.models: 25 | params.extend(m.parameters()) 26 | 27 | params.append(self.boost_rate) 28 | return params 29 | 30 | def zero_grad(self): 31 | for m in self.models: 32 | m.zero_grad() 33 | 34 | def to_cuda(self): 35 | for m in self.models: 36 | m.cuda() 37 | 38 | def to_eval(self): 39 | for m in self.models: 40 | m.eval() 41 | 42 | def to_train(self): 43 | for m in self.models: 44 | m.train(True) 45 | 46 | def forward(self, x): 47 | if len(self.models) == 0: 48 | return None, self.c0 49 | middle_feat_cum = None 50 | prediction = None 51 | with torch.no_grad(): 52 | for m in self.models: 53 | if middle_feat_cum is None: 54 | middle_feat_cum, prediction = m(x, middle_feat_cum) 55 | else: 56 | middle_feat_cum, pred = m(x, middle_feat_cum) 57 | prediction += pred 58 | return middle_feat_cum, self.c0 + self.boost_rate * prediction 59 | 60 | def forward_grad(self, x): 61 | if len(self.models) == 0: 62 | return None, self.c0 63 | # at least one model 64 | middle_feat_cum = None 65 | prediction = None 66 | for m in self.models: 67 | if middle_feat_cum is None: 68 | middle_feat_cum, prediction = m(x, middle_feat_cum) 69 | else: 70 | middle_feat_cum, pred = m(x, middle_feat_cum) 71 | prediction += pred 72 | return middle_feat_cum, self.c0 + self.boost_rate * prediction 73 | 74 | @classmethod 75 | def from_file(cls, path, builder): 76 | d = torch.load(path) 77 | net = DynamicNet(d['c0'], d['lr']) 78 | net.boost_rate = d['boost_rate'] 79 | for stage, m in enumerate(d['models']): 80 | submod = builder(stage) 81 | submod.load_state_dict(m) 82 | net.add(submod) 83 | return net 84 | 85 | def to_file(self, path): 86 | models = [m.state_dict() for m in self.models] 87 | d = {'models': models, 'c0': self.c0, 'lr': self.lr, 'boost_rate': self.boost_rate} 88 | torch.save(d, path) 89 | -------------------------------------------------------------------------------- /Regression/models/mlp.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from .splinear import SpLinear 6 | 7 | 8 | class MLP_1HL(nn.Module): 9 | def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True): 10 | super(MLP_1HL, self).__init__() 11 | self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1) 12 | self.out_layer = nn.Linear(dim_hidden1, 1) 13 | self.lrelu = nn.LeakyReLU(0.1) 14 | self.relu = nn.ReLU() 15 | if bn: 16 | self.bn = nn.BatchNorm1d(dim_hidden1) 17 | self.bn2 = nn.BatchNorm1d(dim_in) 18 | 19 | def forward(self, x, lower_f): 20 | if lower_f is not None: 21 | x = torch.cat([x, lower_f], dim=1) 22 | x = self.bn2(x) 23 | out = self.in_layer(x) 24 | return out, self.out_layer(self.relu(out)).squeeze() 25 | 26 | @classmethod 27 | def get_model(cls, stage, opt): 28 | if stage == 0: 29 | dim_in = opt.feat_d 30 | else: 31 | dim_in = opt.feat_d + opt.hidden_d 32 | model = MLP_1HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse) 33 | return model 34 | 35 | 36 | class MLP_2HL(nn.Module): 37 | def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True): 38 | super(MLP_2HL, self).__init__() 39 | self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1) 40 | self.dropout_layer = nn.Dropout(0.0) 41 | self.lrelu = nn.LeakyReLU(0.1) 42 | self.relu = nn.ReLU() 43 | self.hidden_layer = nn.Linear(dim_hidden1, dim_hidden2) 44 | self.out_layer = nn.Linear(dim_hidden2, 1) 45 | self.bn = nn.BatchNorm1d(dim_hidden1) 46 | self.bn2 = nn.BatchNorm1d(dim_in) 47 | 48 | def forward(self, x, lower_f): 49 | if lower_f is not None: 50 | x = torch.cat([x, lower_f], dim=1) 51 | x = self.bn2(x) 52 | out = self.lrelu(self.in_layer(x)) 53 | out = self.bn(out) 54 | out = self.hidden_layer(out) 55 | return out, self.out_layer(self.relu(out)).squeeze() 56 | 57 | @classmethod 58 | def get_model(cls, stage, opt): 59 | if stage == 0: 60 | dim_in = opt.feat_d 61 | else: 62 | dim_in = opt.feat_d + opt.hidden_d 63 | model = MLP_2HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse) 64 | return model 65 | 66 | class MLP_3HL(nn.Module): 67 | def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True): 68 | super(MLP_3HL, self).__init__() 69 | self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1) 70 | self.dropout_layer = nn.Dropout(0.0) 71 | self.lrelu = nn.LeakyReLU(0.1) 72 | self.relu = nn.ReLU() 73 | self.hidden_layer = nn.Linear(dim_hidden2, dim_hidden1) 74 | self.out_layer = nn.Linear(dim_hidden1, 1) 75 | self.bn = nn.BatchNorm1d(dim_hidden1) 76 | self.bn2 = nn.BatchNorm1d(dim_in) 77 | # print('Batch normalization is processed!') 78 | 79 | def forward(self, x, lower_f): 80 | if lower_f is not None: 81 | x = torch.cat([x, lower_f], dim=1) 82 | x = self.bn2(x) 83 | out = self.lrelu(self.in_layer(x)) 84 | out = self.bn(out) 85 | out = self.lrelu(self.hidden_layer(out)) 86 | out = self.bn(out) 87 | out = self.hidden_layer(out) 88 | return out, self.out_layer(self.relu(out)).squeeze() 89 | 90 | @classmethod 91 | def get_model(cls, stage, opt): 92 | if stage == 0: 93 | dim_in = opt.feat_d 94 | else: 95 | dim_in = opt.feat_d + opt.hidden_d 96 | model = MLP_3HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse) 97 | return model 98 | 99 | class MLP_4HL(nn.Module): 100 | def __init__(self, dim_in, dim_hidden1, dim_hidden2, sparse=False, bn=True): 101 | super(MLP_3HL, self).__init__() 102 | self.in_layer = SpLinear(dim_in, dim_hidden1) if sparse else nn.Linear(dim_in, dim_hidden1) 103 | self.dropout_layer = nn.Dropout(0.0) 104 | self.lrelu = nn.LeakyReLU(0.1) 105 | self.relu = nn.ReLU() 106 | self.hidden_layer = nn.Linear(dim_hidden2, dim_hidden1) 107 | self.out_layer = nn.Linear(dim_hidden1, 1) 108 | self.bn = nn.BatchNorm1d(dim_hidden1) 109 | self.bn2 = nn.BatchNorm1d(dim_in) 110 | # print('Batch normalization is processed!') 111 | 112 | def forward(self, x, lower_f): 113 | if lower_f is not None: 114 | x = torch.cat([x, lower_f], dim=1) 115 | x = self.bn2(x) 116 | out = self.lrelu(self.in_layer(x)) #HL-1 117 | out = self.bn(out) 118 | out = self.lrelu(self.hidden_layer(out)) #HL-2 119 | out = self.bn(out) 120 | out = self.lrelu(self.hidden_layer(out)) #HL-3 121 | out = self.bn(out) 122 | out = self.hidden_layer(out) #HL-4 123 | return out, self.out_layer(self.relu(out)).squeeze() 124 | 125 | @classmethod 126 | def get_model(cls, stage, opt): 127 | if stage == 0: 128 | dim_in = opt.feat_d 129 | else: 130 | dim_in = opt.feat_d + opt.hidden_d 131 | model = MLP_3HL(dim_in, opt.hidden_d, opt.hidden_d, opt.sparse) 132 | return model 133 | 134 | 135 | class DNN(nn.Module): 136 | def __init__(self, dim_in, dim_hidden, n_hidden=20, sparse=False, bn=True, drop_out=0.3): 137 | super(DNN, self).__init__() 138 | if sparse: 139 | self.in_layer = SpLinear(dim_in, dim_hidden) 140 | else: 141 | self.in_layer = nn.Linear(dim_in, dim_hidden) 142 | self.in_act = nn.SELU() 143 | hidden_layers = [] 144 | for _ in range(n_hidden): 145 | hidden_layers.append(nn.Linear(dim_hidden, dim_hidden)) 146 | if bn: 147 | hidden_layers.append(nn.BatchNorm1d(dim_hidden)) 148 | hidden_layers.append(nn.SELU()) 149 | if drop_out > 0: 150 | hidden_layers.append(nn.Dropout(drop_out)) 151 | self.hidden_layers = nn.Sequential(*hidden_layers) 152 | self.out_layer = nn.Linear(dim_hidden, 1) 153 | 154 | def forward(self, x): 155 | out = self.in_act(self.in_layer(x)) 156 | out = self.hidden_layers(out) 157 | out = self.out_layer(out) 158 | return out.squeeze() 159 | -------------------------------------------------------------------------------- /Regression/models/splinear.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | class SpLinearFunc(torch.autograd.Function): 8 | @staticmethod 9 | def forward(ctx, input, weight, bias=None): 10 | ctx.save_for_backward(input, weight, bias) 11 | output = input.mm(weight.t()) 12 | if bias is not None: 13 | output += bias.unsqueeze(0).expand_as(output) 14 | return output 15 | 16 | @staticmethod 17 | def backward(ctx, grad_output): 18 | input, weight, bias = ctx.saved_tensors 19 | grad_input = grad_weight = grad_bias = None 20 | 21 | if ctx.needs_input_grad[0]: 22 | grad_input = grad_output.mm(weight) 23 | if ctx.needs_input_grad[1]: 24 | grad_weight = (input.t().mm(grad_output)).t() 25 | if bias is not None and ctx.needs_input_grad[2]: 26 | grad_bias = grad_output.sum(0).squeeze(0) 27 | 28 | return grad_input, grad_weight, grad_bias 29 | 30 | splinear = SpLinearFunc.apply 31 | 32 | class SpLinear(nn.Module): 33 | def __init__(self, input_features, output_features, bias=True): 34 | super(SpLinear, self).__init__() 35 | self.input_features = input_features 36 | self.output_features = output_features 37 | self.weight = nn.Parameter(torch.Tensor(output_features, input_features)) 38 | if bias: 39 | self.bias = nn.Parameter(torch.Tensor(output_features)) 40 | else: 41 | self.register_parameter('bias', None) 42 | #TODO write a default initialization 43 | stdv = 1. / math.sqrt(self.weight.size(1)) 44 | self.weight.data.uniform_(-stdv, stdv) 45 | 46 | def forward(self, input): 47 | return splinear(input, self.weight, self.bias) 48 | -------------------------------------------------------------------------------- /Regression/results/results_readme.txt: -------------------------------------------------------------------------------- 1 | Your results will be saved here. -------------------------------------------------------------------------------- /Regression/train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ### Feature Table ### 4 | # ca_housing 8 5 | # YearPredictionMSD 90 6 | # slice_localization 384 7 | dataset=YearPredictionMSD 8 | 9 | BASEDIR=$(dirname "$0") 10 | OUTDIR="${BASEDIR}/ckpt/" 11 | 12 | if [ ! -d "${OUTDIR}" ] 13 | then 14 | echo "Output dir ${OUTDIR} does not exist, creating..." 15 | mkdir -p ${OUTDIR} 16 | fi 17 | 18 | CUDA_VISIBLE_DEVICES=0 python -u main_reg_cv.py \ 19 | --feat_d 90 \ 20 | --hidden_d 32 \ 21 | --boost_rate 1 \ 22 | --lr 0.005 \ 23 | --L2 .0e-3 \ 24 | --num_nets 40 \ 25 | --data ${dataset} \ 26 | --tr ${BASEDIR}/../data/${dataset}_tr.npz \ 27 | --te ${BASEDIR}/../data/${dataset}_te.npz \ 28 | --batch_size 2048 \ 29 | --epochs_per_stage 1 \ 30 | --correct_epoch 1 \ 31 | --normalization True \ 32 | --cv True \ 33 | --out_f ${OUTDIR}/${dataset}_cls.pth \ 34 | --cuda 35 | -------------------------------------------------------------------------------- /baselines/reproduce_higgs.py: -------------------------------------------------------------------------------- 1 | import xgboost as xgb 2 | from sklearn.metrics import roc_auc_score 3 | from sklearn.datasets import load_svmlight_file 4 | 5 | # load data 6 | tr_x, tr_y = load_svmlight_file('./higgs.train') 7 | te_x, te_y = load_svmlight_file('./higgs.test') 8 | 9 | # grid search 10 | param = { 11 | 'objective': 'binary:logistic', 12 | 'tree_method': 'hist', 13 | 'learning_rate': 0.05, 14 | 'n_estimators': 800, 15 | 'max_depth': 7, 16 | 'reg_lambda': 0.02, 17 | } 18 | 19 | # regressor 20 | model = xgb.XGBRegressor(verbosity=2, seed=0, **param) 21 | model.fit(tr_x, tr_y) 22 | 23 | # predict on test data 24 | auc = roc_auc_score(te_y, model.predict(te_x)) 25 | print(auc) 26 | -------------------------------------------------------------------------------- /baselines/reproduce_slice.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import xgboost as xgb 3 | from sklearn.metrics import mean_squared_error 4 | 5 | # load data 6 | tr_npz = np.load('./slice_localization_tr.npz') 7 | te_npz = np.load('./slice_localization_te.npz') 8 | 9 | # grid search 10 | param = { 11 | 'learning_rate': 0.1, 12 | 'n_estimators': 1024, 13 | 'max_depth': 7, 14 | 'reg_lambda': 0.0, 15 | } 16 | 17 | # regressor 18 | model = xgb.XGBRegressor(objective='reg:squarederror', 19 | verbosity=2, 20 | seed=0, 21 | **param) 22 | model.fit(tr_npz['features'], tr_npz['labels']) 23 | 24 | # predict on test data 25 | mse = mean_squared_error(te_npz['labels'], model.predict(te_npz['features'])) 26 | print(np.sqrt(mse)) 27 | -------------------------------------------------------------------------------- /baselines/reproduce_year.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import xgboost as xgb 3 | from sklearn.metrics import mean_squared_error 4 | 5 | # load data 6 | tr_npz = np.load('./YearPredictionMSD_tr.npz') 7 | te_npz = np.load('./YearPredictionMSD_te.npz') 8 | 9 | # grid search 10 | param = { 11 | 'learning_rate': 0.05, 12 | 'n_estimators': 800, 13 | 'max_depth': 7, 14 | 'reg_lambda': 0.02, 15 | } 16 | 17 | # regressor 18 | model = xgb.XGBRegressor(objective='reg:squarederror', 19 | verbosity=2, 20 | seed=0, 21 | **param) 22 | model.fit(tr_npz['features'], tr_npz['labels']) 23 | 24 | # predict on test data 25 | mse = mean_squared_error(te_npz['labels'], model.predict(te_npz['features'])) 26 | print(np.sqrt(mse)) 27 | -------------------------------------------------------------------------------- /higgs2libsvm.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | input_filename = "HIGGS.csv" 4 | output_train = "higgs.train" 5 | output_test = "higgs.test" 6 | 7 | num_train = 10500000 8 | 9 | read_num = 0 10 | 11 | input = open(input_filename, "r") 12 | train = open(output_train, "w") 13 | test = open(output_test,"w") 14 | 15 | def WriteOneLine(tokens, output): 16 | label = int(float(tokens[0])) 17 | output.write(str(label)) 18 | for i in range(1,len(tokens)): 19 | feature_value = float(tokens[i]) 20 | output.write(' ' + str(i-1) + ':' + str(feature_value)) 21 | output.write('\n') 22 | 23 | line = input.readline() 24 | 25 | while line: 26 | tokens = line.split(',') 27 | if read_num < num_train: 28 | WriteOneLine(tokens, train) 29 | else: 30 | WriteOneLine(tokens, test) 31 | read_num += 1 32 | line = input.readline() 33 | 34 | input.close() 35 | train.close() 36 | test.close() 37 | -------------------------------------------------------------------------------- /reg_train_test_split.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from sklearn import datasets\n", 10 | "from sklearn.model_selection import train_test_split\n", 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "import xgboost as xgb\n", 14 | "\n", 15 | "datapath = r'.\\data'" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "## Slice Localization data split" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "# Load the data: slice_localization\n", 32 | "df = pd.read_csv(datapath+'\\slice_localization_data.csv')" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 4, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "data": { 42 | "text/html": [ 43 | "
\n", 61 | " | patientId | \n", 62 | "value0 | \n", 63 | "value1 | \n", 64 | "value2 | \n", 65 | "value3 | \n", 66 | "value4 | \n", 67 | "value5 | \n", 68 | "value6 | \n", 69 | "value7 | \n", 70 | "value8 | \n", 71 | "... | \n", 72 | "value375 | \n", 73 | "value376 | \n", 74 | "value377 | \n", 75 | "value378 | \n", 76 | "value379 | \n", 77 | "value380 | \n", 78 | "value381 | \n", 79 | "value382 | \n", 80 | "value383 | \n", 81 | "reference | \n", 82 | "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", 87 | "0 | \n", 88 | "0.0 | \n", 89 | "0.0 | \n", 90 | "0.0 | \n", 91 | "0.0 | \n", 92 | "0.0 | \n", 93 | "0.0 | \n", 94 | "-0.25 | \n", 95 | "-0.25 | \n", 96 | "-0.25 | \n", 97 | "... | \n", 98 | "-0.25 | \n", 99 | "0.980381 | \n", 100 | "0.0 | \n", 101 | "0.0 | \n", 102 | "0.0 | \n", 103 | "0.0 | \n", 104 | "0.0 | \n", 105 | "-0.25 | \n", 106 | "-0.25 | \n", 107 | "21.803851 | \n", 108 | "
1 | \n", 111 | "0 | \n", 112 | "0.0 | \n", 113 | "0.0 | \n", 114 | "0.0 | \n", 115 | "0.0 | \n", 116 | "0.0 | \n", 117 | "0.0 | \n", 118 | "-0.25 | \n", 119 | "-0.25 | \n", 120 | "-0.25 | \n", 121 | "... | \n", 122 | "-0.25 | \n", 123 | "0.977008 | \n", 124 | "0.0 | \n", 125 | "0.0 | \n", 126 | "0.0 | \n", 127 | "0.0 | \n", 128 | "0.0 | \n", 129 | "-0.25 | \n", 130 | "-0.25 | \n", 131 | "21.745726 | \n", 132 | "
2 | \n", 135 | "0 | \n", 136 | "0.0 | \n", 137 | "0.0 | \n", 138 | "0.0 | \n", 139 | "0.0 | \n", 140 | "0.0 | \n", 141 | "0.0 | \n", 142 | "-0.25 | \n", 143 | "-0.25 | \n", 144 | "-0.25 | \n", 145 | "... | \n", 146 | "-0.25 | \n", 147 | "0.977008 | \n", 148 | "0.0 | \n", 149 | "0.0 | \n", 150 | "0.0 | \n", 151 | "0.0 | \n", 152 | "0.0 | \n", 153 | "-0.25 | \n", 154 | "-0.25 | \n", 155 | "21.687600 | \n", 156 | "
3 | \n", 159 | "0 | \n", 160 | "0.0 | \n", 161 | "0.0 | \n", 162 | "0.0 | \n", 163 | "0.0 | \n", 164 | "0.0 | \n", 165 | "0.0 | \n", 166 | "-0.25 | \n", 167 | "-0.25 | \n", 168 | "-0.25 | \n", 169 | "... | \n", 170 | "-0.25 | \n", 171 | "0.977008 | \n", 172 | "0.0 | \n", 173 | "0.0 | \n", 174 | "0.0 | \n", 175 | "0.0 | \n", 176 | "0.0 | \n", 177 | "-0.25 | \n", 178 | "-0.25 | \n", 179 | "21.629474 | \n", 180 | "
4 | \n", 183 | "0 | \n", 184 | "0.0 | \n", 185 | "0.0 | \n", 186 | "0.0 | \n", 187 | "0.0 | \n", 188 | "0.0 | \n", 189 | "0.0 | \n", 190 | "-0.25 | \n", 191 | "-0.25 | \n", 192 | "-0.25 | \n", 193 | "... | \n", 194 | "-0.25 | \n", 195 | "0.976833 | \n", 196 | "0.0 | \n", 197 | "0.0 | \n", 198 | "0.0 | \n", 199 | "0.0 | \n", 200 | "0.0 | \n", 201 | "-0.25 | \n", 202 | "-0.25 | \n", 203 | "21.571348 | \n", 204 | "
5 rows × 386 columns
\n", 208 | "\n", 351 | " | 1 | \n", 352 | "2 | \n", 353 | "3 | \n", 354 | "4 | \n", 355 | "5 | \n", 356 | "6 | \n", 357 | "7 | \n", 358 | "8 | \n", 359 | "9 | \n", 360 | "10 | \n", 361 | "... | \n", 362 | "81 | \n", 363 | "82 | \n", 364 | "83 | \n", 365 | "84 | \n", 366 | "85 | \n", 367 | "86 | \n", 368 | "87 | \n", 369 | "88 | \n", 370 | "89 | \n", 371 | "90 | \n", 372 | "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", 377 | "49.94357 | \n", 378 | "21.47114 | \n", 379 | "73.07750 | \n", 380 | "8.74861 | \n", 381 | "-17.40628 | \n", 382 | "-13.09905 | \n", 383 | "-25.01202 | \n", 384 | "-12.23257 | \n", 385 | "7.83089 | \n", 386 | "-2.46783 | \n", 387 | "... | \n", 388 | "13.01620 | \n", 389 | "-54.40548 | \n", 390 | "58.99367 | \n", 391 | "15.37344 | \n", 392 | "1.11144 | \n", 393 | "-23.08793 | \n", 394 | "68.40795 | \n", 395 | "-1.82223 | \n", 396 | "-27.46348 | \n", 397 | "2.26327 | \n", 398 | "
1 | \n", 401 | "48.73215 | \n", 402 | "18.42930 | \n", 403 | "70.32679 | \n", 404 | "12.94636 | \n", 405 | "-10.32437 | \n", 406 | "-24.83777 | \n", 407 | "8.76630 | \n", 408 | "-0.92019 | \n", 409 | "18.76548 | \n", 410 | "4.59210 | \n", 411 | "... | \n", 412 | "5.66812 | \n", 413 | "-19.68073 | \n", 414 | "33.04964 | \n", 415 | "42.87836 | \n", 416 | "-9.90378 | \n", 417 | "-32.22788 | \n", 418 | "70.49388 | \n", 419 | "12.04941 | \n", 420 | "58.43453 | \n", 421 | "26.92061 | \n", 422 | "
2 | \n", 425 | "50.95714 | \n", 426 | "31.85602 | \n", 427 | "55.81851 | \n", 428 | "13.41693 | \n", 429 | "-6.57898 | \n", 430 | "-18.54940 | \n", 431 | "-3.27872 | \n", 432 | "-2.35035 | \n", 433 | "16.07017 | \n", 434 | "1.39518 | \n", 435 | "... | \n", 436 | "3.03800 | \n", 437 | "26.05866 | \n", 438 | "-50.92779 | \n", 439 | "10.93792 | \n", 440 | "-0.07568 | \n", 441 | "43.20130 | \n", 442 | "-115.00698 | \n", 443 | "-0.05859 | \n", 444 | "39.67068 | \n", 445 | "-0.66345 | \n", 446 | "
3 | \n", 449 | "48.24750 | \n", 450 | "-1.89837 | \n", 451 | "36.29772 | \n", 452 | "2.58776 | \n", 453 | "0.97170 | \n", 454 | "-26.21683 | \n", 455 | "5.05097 | \n", 456 | "-10.34124 | \n", 457 | "3.55005 | \n", 458 | "-6.36304 | \n", 459 | "... | \n", 460 | "34.57337 | \n", 461 | "-171.70734 | \n", 462 | "-16.96705 | \n", 463 | "-46.67617 | \n", 464 | "-12.51516 | \n", 465 | "82.58061 | \n", 466 | "-72.08993 | \n", 467 | "9.90558 | \n", 468 | "199.62971 | \n", 469 | "18.85382 | \n", 470 | "
4 | \n", 473 | "50.97020 | \n", 474 | "42.20998 | \n", 475 | "67.09964 | \n", 476 | "8.46791 | \n", 477 | "-15.85279 | \n", 478 | "-16.81409 | \n", 479 | "-12.48207 | \n", 480 | "-9.37636 | \n", 481 | "12.63699 | \n", 482 | "0.93609 | \n", 483 | "... | \n", 484 | "9.92661 | \n", 485 | "-55.95724 | \n", 486 | "64.92712 | \n", 487 | "-17.72522 | \n", 488 | "-1.49237 | \n", 489 | "-7.50035 | \n", 490 | "51.76631 | \n", 491 | "7.88713 | \n", 492 | "55.66926 | \n", 493 | "28.74903 | \n", 494 | "
5 rows × 90 columns
\n", 498 | "