├── qreg ├── __init__.py ├── setup.py ├── dataset_fast.pxd ├── dataset_fast.pyx ├── sdca_qr_al_fast.pyx ├── sdca_qr_fast.pyx └── qreg.py ├── Makefile ├── setup.py ├── examples ├── algorithms.py ├── svr.py └── methods.py └── README.rst /qreg/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1" 2 | 3 | from .qreg import QRegressor 4 | from .qreg import QRegMTL 5 | from .qreg import toy_data 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PYTHON ?= python 2 | CYTHON ?= cython 3 | 4 | CYTHONSRC= $(wildcard qreg/*.pyx) 5 | CSRC= $(CYTHONSRC:.pyx=.cpp) 6 | 7 | inplace: 8 | $(PYTHON) setup.py build_ext -i 9 | 10 | all: cython inplace 11 | 12 | cython: $(CSRC) 13 | 14 | clean: 15 | rm -f qreg/*.c qreg/*.html 16 | rm -f `find qreg -name "*.pyc"` 17 | rm -f `find qreg -name "*.so"` 18 | rm -rf `find qreg -name "*pycache*"` 19 | rm -rf build 20 | rm -rf *egg-info 21 | rm -rf dist 22 | 23 | %.cpp: %.pyx 24 | $(CYTHON) --cplus $< 25 | 26 | -------------------------------------------------------------------------------- /qreg/setup.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | 3 | import numpy 4 | 5 | 6 | def configuration(parent_package='', top_path=None): 7 | from numpy.distutils.misc_util import Configuration 8 | 9 | config = Configuration('qreg', parent_package, top_path) 10 | 11 | srcdir = os.path.join(top_path, "qreg/") 12 | print(srcdir) 13 | 14 | config.add_extension('dataset_fast', 15 | sources=['dataset_fast.cpp'], 16 | include_dirs=[numpy.get_include(), srcdir]) 17 | 18 | config.add_extension('sdca_qr_fast', 19 | sources=['sdca_qr_fast.cpp'], 20 | include_dirs=[numpy.get_include(), srcdir]) 21 | 22 | config.add_extension('sdca_qr_al_fast', 23 | sources=['sdca_qr_al_fast.cpp'], 24 | include_dirs=[numpy.get_include(), srcdir]) 25 | 26 | return config 27 | 28 | if __name__ == '__main__': 29 | from numpy.distutils.core import setup 30 | setup(**configuration(top_path='').todict()) 31 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # 3 | # Copyright (C) 2012 Maxime Sangnier, Olivier Fercoq 4 | 5 | import sys 6 | import os 7 | import setuptools 8 | from numpy.distutils.core import setup 9 | 10 | 11 | def configuration(parent_package='', top_path=None): 12 | if os.path.exists('MANIFEST'): 13 | os.remove('MANIFEST') 14 | 15 | from numpy.distutils.misc_util import Configuration 16 | config = Configuration(None, parent_package, top_path) 17 | 18 | config.add_subpackage('qreg') 19 | 20 | return config 21 | 22 | if __name__ == "__main__": 23 | 24 | old_path = os.getcwd() 25 | local_path = os.path.dirname(os.path.abspath(sys.argv[0])) 26 | 27 | os.chdir(local_path) 28 | sys.path.insert(0, local_path) 29 | 30 | setup(configuration=configuration, 31 | name='qreg', 32 | maintainer='Maxime Sangnier', 33 | maintainer_email='maxime.sangnier@upmc.fr', 34 | description='Data sparse quantile regression in Python', 35 | license='New BSD', 36 | url='https://github.com/msangnier/qreg', 37 | version='0.1', 38 | download_url='https://github.com/msangnier/qreg', 39 | long_description=open('README.rst').read(), 40 | zip_safe=False, 41 | install_requires=['numpy', 'cvxopt']) 42 | -------------------------------------------------------------------------------- /qreg/dataset_fast.pxd: -------------------------------------------------------------------------------- 1 | # Author: Mathieu Blondel 2 | # License: BSD 3 | 4 | cdef class Dataset: 5 | 6 | cdef int n_samples 7 | cdef int n_features 8 | 9 | cpdef int get_n_samples(self) 10 | cpdef int get_n_features(self) 11 | 12 | 13 | cdef class RowDataset(Dataset): 14 | 15 | cdef void get_row_ptr(self, 16 | int i, 17 | int** indices, 18 | double** data, 19 | int* n_nz) nogil 20 | 21 | cpdef get_row(self, int i) 22 | 23 | 24 | cdef class ColumnDataset(Dataset): 25 | 26 | cdef void get_column_ptr(self, 27 | int j, 28 | int** indices, 29 | double** data, 30 | int* n_nz) nogil 31 | 32 | cpdef get_column(self, int j) 33 | 34 | 35 | cdef class ContiguousDataset(RowDataset): 36 | 37 | cdef int* indices 38 | cdef double* data 39 | cdef object X 40 | 41 | cdef void get_row_ptr(self, 42 | int i, 43 | int** indices, 44 | double** data, 45 | int* n_nz) nogil 46 | 47 | 48 | cdef class FortranDataset(ColumnDataset): 49 | 50 | cdef int* indices 51 | cdef double* data 52 | cdef object X 53 | 54 | cdef void get_column_ptr(self, 55 | int j, 56 | int** indices, 57 | double** data, 58 | int* n_nz) nogil 59 | 60 | 61 | cdef class CSRDataset(RowDataset): 62 | 63 | cdef int* indices 64 | cdef double* data 65 | cdef int* indptr 66 | cdef object X 67 | 68 | cdef void get_row_ptr(self, 69 | int i, 70 | int** indices, 71 | double** data, 72 | int* n_nz) nogil 73 | 74 | 75 | cdef class CSCDataset(ColumnDataset): 76 | 77 | cdef int* indices 78 | cdef double* data 79 | cdef int* indptr 80 | cdef object X 81 | 82 | cdef void get_column_ptr(self, 83 | int j, 84 | int** indices, 85 | double** data, 86 | int* n_nz) nogil 87 | -------------------------------------------------------------------------------- /examples/algorithms.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # Author: Maxime Sangnier 3 | # License: BSD 4 | 5 | """ 6 | Example of how to use the Quantile Regression toolbox with epsilon-insensitive loss (comparison of algorithms). 7 | """ 8 | 9 | import numpy as np 10 | from scipy.stats import norm 11 | import matplotlib.pyplot as plt 12 | from qreg import QRegressor, toy_data 13 | 14 | 15 | if __name__ == '__main__': 16 | probs = np.linspace(0.1, 0.9, 5) # Joint quantile regression 17 | eps = 0.25*len(probs) # Threshold for epsilon-loss 18 | algorithms = ['qp', 'sdca', 'qp-eps', 'coneqp-eps', 'sdca-eps'] # Algorithms to compare 19 | 20 | x_train, y_train, z_train = toy_data(50) 21 | x_test, y_test, z_test = toy_data(1000, t_min=-0.2, t_max=1.7, probs=probs) 22 | reg = QRegressor(C=1e2, probs=probs, gamma_out=1e-2, max_iter=1e4, verbose=False, lag_tol=1e-3, active_set=True) 23 | 24 | res = [] # List for resulting coefficients 25 | plt.figure(figsize=(12, 7)) 26 | for it, alg in enumerate(algorithms): 27 | if 'eps' in alg.lower(): 28 | reg.alg = alg[:-4] 29 | reg.eps = eps 30 | else: 31 | reg.alg = alg 32 | reg.eps = 0. 33 | 34 | # Fit on training data and predict on test data 35 | reg.fit(x_train, y_train) 36 | pred = reg.predict(x_test) 37 | 38 | # Plot the estimated conditional quantiles 39 | plt.subplot(1, len(algorithms), it+1) 40 | plt.plot(x_train, y_train, '.') 41 | for q in pred: 42 | plt.plot(x_test, q, '-') 43 | for q in z_test: 44 | plt.plot(x_test, q, '--') 45 | plt.title(alg.upper()) 46 | 47 | # Print the optimal objective value 48 | print(alg.upper() + ":") 49 | print(" objective value: %f" % reg.obj) 50 | print(" training time: %0.2fs" % reg.time) 51 | # Print number of support vectors 52 | print(" #SV: %d" % reg.num_sv()) 53 | 54 | # Save optimal objectives and coefficients 55 | res.append((reg.obj, reg.coefs)) 56 | 57 | # Comparison SDCA / CVXOPT 58 | plt.figure(figsize=(12, 7)) 59 | plt.subplot2grid((1, len(algorithms)*2), (0, 0), colspan=len(algorithms)) 60 | for alg, (obj, coefs) in zip(algorithms, res): 61 | # Plot the solutions of SDCA, CVXOPT and AL 62 | plt.plot(coefs.ravel()) 63 | plt.legend([alg.upper() for alg in algorithms]) 64 | plt.title('Dual coefs') 65 | plt.plot([0, coefs.size], [0, 0], ':') 66 | 67 | for it, (alg, (obj, coefs)) in enumerate(zip(algorithms, res)): 68 | # Plot the solutions of SDCA, CVXOPT and AL 69 | plt.subplot2grid((1, len(algorithms)*2), (0, len(algorithms)+it)) 70 | plt.imshow(np.fabs(coefs.T)) 71 | plt.title(alg.upper()) 72 | 73 | plt.show() 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. -*- mode: rst -*- 2 | 3 | qreg 4 | ==== 5 | 6 | qreg is a Python library for data sparse and non-parametric quantile regression. It implements quantile regression with matrix-valued kernels and makes it possible to learn several quantile curves simultaneously with a sparsity requirement on supporting data. 7 | 8 | Highlights: 9 | 10 | - based on the library `lightning `_; 11 | - follows the `scikit-learn `_ style of programming; 12 | - computationally demanding parts implemented in `Cython `_. 13 | 14 | Example 15 | ------- 16 | 17 | .. code-block:: python 18 | 19 | import numpy as np 20 | import matplotlib.pyplot as plt 21 | from qreg import QRegressor, toy_data 22 | 23 | # Quantile levels to prediect 24 | probs = np.linspace(0.1, 0.9, 5) 25 | 26 | # Train and test dataset 27 | x_train, y_train, z_train = toy_data(50) 28 | x_test, y_test, z_test = toy_data(1000, t_min=-0.2, t_max=1.7, probs=probs) 29 | 30 | # Define the quantile regressor 31 | reg = QRegressor(C=1e2, # Trade-off parameter 32 | probs=probs, # Quantile levels 33 | gamma_out=1e-2, # Inner kernel parameter 34 | eps=2, # Epsilon-loss level 35 | alg='sdca', # Algorithm (can change to 'qp') 36 | max_iter=1e4, # Maximal number of iteration 37 | active_set=True, # Active set strategy 38 | verbose=True) 39 | 40 | # Fit on training data and predict on test data 41 | reg.fit(x_train, y_train) 42 | pred = reg.predict(x_test) 43 | 44 | # Plot the estimated conditional quantiles 45 | plt.plot(x_train, y_train, '.') 46 | for q in pred: 47 | plt.plot(x_test, q, '-') 48 | for q in z_test: 49 | plt.plot(x_test, q, '--') 50 | 51 | # Print some information 52 | print("Objective value: %f" % reg.obj) 53 | print("Training time: %0.2fs" % reg.time) 54 | print("#SV: %d" % reg.num_sv()) 55 | print("Score: %f" % reg.score(x_test, y_test)) 56 | 57 | plt.show() 58 | 59 | Dependencies 60 | ------------ 61 | 62 | qreg needs Python >= 2.7, setuptools, Numpy, SciPy, scikit-learn, cvxopt and a working C/C++ compiler. 63 | 64 | Installation 65 | ------------ 66 | 67 | To install qreg from pip, type:: 68 | 69 | pip install https://github.com/msangnier/qreg/archive/master.zip 70 | 71 | To install qreg from source, type:: 72 | 73 | git clone https://github.com/msangnier/qreg.git 74 | cd qreg 75 | python setup.py build 76 | sudo python setup.py install 77 | 78 | Authors 79 | ------- 80 | 81 | Olivier Fercoq and Maxime Sangnier 82 | 83 | References 84 | ---------- 85 | 86 | - Data sparse nonparametric regression with epsilon-insensitive losses (2017), M. Sangnier, O. Fercoq, F. d'Alché-Buc. Asian Conference on Machine Learning (ACML). 87 | - Joint quantile regression in vector-valued RKHSs (2016), M. Sangnier, O. Fercoq, F. d'Alché-Buc. Neural Information Processing Systems (NIPS). 88 | 89 | -------------------------------------------------------------------------------- /examples/svr.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # Author: Maxime Sangnier 3 | # License: BSD 4 | 5 | """ 6 | Quantile regression with epsilon-insensitive loss (comparison to SVR). 7 | """ 8 | 9 | import numpy as np 10 | from scipy.stats import norm 11 | from scipy.spatial.distance import pdist, squareform 12 | import matplotlib.pyplot as plt 13 | from qreg import QRegressor, toy_data 14 | from sklearn.svm import SVR 15 | 16 | 17 | if __name__ == '__main__': 18 | probs = [0.5] # Single quantile regression (match SVR) 19 | eps = 1e-1 # Threshold for epsilon-loss 20 | C = 1e2 # Trade-off parameter 21 | gamma_in = 1 # Gaussian parameter for input data 22 | max_iter = 1e8 # Large enough 23 | verbose = False 24 | 25 | # Data 26 | x_train, y_train, z_train = toy_data(50) 27 | x_train = x_train[:, np.newaxis] # Make x 2-dimensional 28 | 29 | # Methods to compare 30 | methods = [('SVR', SVR(C=C, gamma=gamma_in, epsilon=eps)), 31 | ('SDCA', QRegressor(C=C*2, probs=probs, gamma_in=gamma_in, eps=eps, coefs_init=None, 32 | max_iter=max_iter, verbose=verbose, max_time=3, alg='sdca')), 33 | ('QP', QRegressor(C=C*2, probs=probs, gamma_in=gamma_in, eps=eps, coefs_init=None, 34 | max_iter=max_iter, verbose=verbose, max_time=3, alg='qp'))] 35 | 36 | # Objective value 37 | K = np.exp(-gamma_in * squareform(pdist(x_train, 'sqeuclidean'))) # Kernel matrix 38 | obj_fun = lambda x: 0.5 * x.dot(K.dot(x)) - y_train.dot(x) + eps*np.linalg.norm(x, ord=1) 39 | 40 | # Figure for dual coefs and residues 41 | plt.figure(figsize=(15, 8)) 42 | plt.plot([0, y_train.size], [eps] * 2, 'k:', label='+eps') 43 | plt.plot([0, y_train.size], [-eps] * 2, 'k:', label='-eps') 44 | # plt.plot([0, y_train.size], [0] * 2, 'k-', label='') 45 | 46 | # Do the job 47 | for name, reg in methods: 48 | # Fit the model 49 | reg.fit(x_train, y_train) 50 | 51 | # Get the dual vector and intercept 52 | if 'svr' in name.lower(): 53 | dual = np.zeros(y_train.shape) 54 | dual[reg.support_] = reg.dual_coef_[0, :].copy() 55 | intercept = reg.intercept_[0] 56 | pred = reg.predict(x_train) 57 | else: 58 | dual = reg.coefs[0, :].copy() 59 | intercept = reg.intercept[0] 60 | pred = reg.predict(x_train)[0] 61 | 62 | # Print information 63 | print(name) 64 | # Objective value 65 | if 'sdca' in name.lower(): 66 | print(" objective value: %f (inner value: %f)" % (obj_fun(dual), reg.obj)) 67 | else: 68 | print(" objective value: %f" % obj_fun(dual)) 69 | # Others 70 | print(" contraint: 0 = %e" % dual.sum()) # Constraint 71 | print(" intercept: {}".format(intercept)) 72 | 73 | # Plot dual coefs and residues 74 | plt.plot(dual/C, '-*', label="dual "+name) 75 | plt.plot(y_train-pred, label="residues "+name) 76 | 77 | # Figure for dual coefs and residues 78 | plt.grid() 79 | plt.legend(loc="best") 80 | plt.show() 81 | -------------------------------------------------------------------------------- /examples/methods.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # Author: Maxime Sangnier 3 | # License: BSD 4 | 5 | """ 6 | Quantile regression with operator-valued kernels and multi-task learning. 7 | """ 8 | 9 | import numpy as np 10 | from scipy.stats import norm 11 | import matplotlib.pyplot as plt 12 | from qreg import QRegressor, QRegMTL, toy_data 13 | 14 | 15 | if __name__ == '__main__': 16 | probs = np.linspace(0.1, 0.9, 5) # Joint quantile regression 17 | x_train, y_train, z_train = toy_data(50) 18 | x_test, y_test, z_test = toy_data(1000, probs=probs) 19 | 20 | # QR with operator-valued kernel 21 | ovk = QRegressor(C=1e2, probs=probs, gamma_out=1e-2, alg='qp') 22 | 23 | # Fit on training data and predict on test data 24 | print("Learn QRegressor") 25 | ovk.fit(x_train, y_train) 26 | pred = ovk.predict(x_test) 27 | 28 | # Plot the estimated conditional quantiles 29 | plt.close('all') 30 | plt.figure(figsize=(12, 7)) 31 | plt.subplot(231) 32 | plt.plot(x_train, y_train, '.') 33 | for q in pred: 34 | plt.plot(x_test, q, '-') 35 | for q in z_test: 36 | plt.plot(x_test, q, '--') 37 | plt.title('Operator-valued kernel') 38 | 39 | # QR with multi-task learning 40 | mtl = QRegMTL(C=1e2, probs=probs, n_landmarks=0.2) 41 | 42 | # Fit on training data and predict on test data 43 | print("Learn QRegMTL (with location)") 44 | mtl.fit(x_train, y_train) 45 | pred = mtl.predict(x_test) 46 | 47 | # Plot the estimated conditional quantiles 48 | plt.subplot(232) 49 | plt.plot(x_train, y_train, '.') 50 | for q in pred: 51 | plt.plot(x_test, q, '-') 52 | for q in z_test: 53 | plt.plot(x_test, q, '--') 54 | plt.title('Multi-task learning (with location)') 55 | 56 | plt.subplot(235) 57 | plt.imshow(mtl.reg.D) 58 | plt.colorbar() 59 | plt.title('Learned metric (with location)') 60 | 61 | # QR with multi-task learning (without location regression) 62 | mtl = QRegMTL(C=1e4, probs=probs, n_landmarks=0.2, location=False) 63 | 64 | # Fit on training data and predict on test data 65 | print("Learn QRegMTL (without location)") 66 | mtl.fit(x_train, y_train) 67 | pred = mtl.predict(x_test) 68 | 69 | # Plot the estimated conditional quantiles 70 | plt.subplot(233) 71 | plt.plot(x_train, y_train, '.') 72 | for q in pred: 73 | plt.plot(x_test, q, '-') 74 | for q in z_test: 75 | plt.plot(x_test, q, '--') 76 | plt.title('Multi-task learning (without location)') 77 | 78 | plt.subplot(236) 79 | plt.imshow(mtl.reg.D) 80 | plt.colorbar() 81 | plt.title('Learned metric (without location)') 82 | 83 | # QR with multi-task learning (several parameters) 84 | Cs = np.logspace(-8, 8, num=8) 85 | plt.figure() 86 | for i, C in enumerate(Cs): 87 | print('Learn QRegMTL with C={}'.format(C)) 88 | mtl = QRegMTL(C=C, probs=probs, n_landmarks=0.2, location=False) 89 | mtl.fit(x_train, y_train) 90 | pred = mtl.predict(x_test) 91 | 92 | # Plot the estimated conditional quantiles 93 | plt.subplot(4, 4, 4*(i//4)+i+1) 94 | plt.plot(x_train, y_train, '.') 95 | for q in pred: 96 | plt.plot(x_test, q, '-') 97 | for q in z_test: 98 | plt.plot(x_test, q, '--') 99 | plt.title('C={}'.format(C)) 100 | 101 | plt.subplot(4, 4, 4*(i//4+1)+i+1) 102 | plt.imshow(mtl.reg.D) 103 | plt.colorbar() 104 | 105 | plt.show() 106 | 107 | -------------------------------------------------------------------------------- /qreg/dataset_fast.pyx: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: cdivision=True 3 | # cython: boundscheck=False 4 | # cython: wraparound=False 5 | # 6 | # Author: Mathieu Blondel 7 | # License: BSD 8 | 9 | from libc cimport stdlib 10 | 11 | import numpy as np 12 | cimport numpy as np 13 | np.import_array() 14 | 15 | import scipy.sparse as sp 16 | 17 | cdef class Dataset: 18 | 19 | cpdef int get_n_samples(self): 20 | return self.n_samples 21 | 22 | cpdef int get_n_features(self): 23 | return self.n_features 24 | 25 | 26 | cdef class RowDataset(Dataset): 27 | 28 | cdef void get_row_ptr(self, 29 | int i, 30 | int** indices, 31 | double** data, 32 | int* n_nz) nogil: 33 | pass 34 | 35 | cpdef get_row(self, int i): 36 | cdef double* data 37 | cdef int* indices 38 | cdef int n_nz 39 | cdef np.npy_intp shape[1] 40 | 41 | self.get_row_ptr(i, &indices, &data, &n_nz) 42 | 43 | shape[0] = self.n_features 44 | indices_ = np.PyArray_SimpleNewFromData(1, shape, np.NPY_INT, indices) 45 | data_ = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, data) 46 | 47 | return indices_, data_, n_nz 48 | 49 | 50 | cdef class ColumnDataset(Dataset): 51 | 52 | cdef void get_column_ptr(self, 53 | int j, 54 | int** indices, 55 | double** data, 56 | int* n_nz) nogil: 57 | pass 58 | 59 | cpdef get_column(self, int j): 60 | cdef double* data 61 | cdef int* indices 62 | cdef int n_nz 63 | cdef np.npy_intp shape[1] 64 | 65 | self.get_column_ptr(j, &indices, &data, &n_nz) 66 | 67 | shape[0] = self.n_samples 68 | indices_ = np.PyArray_SimpleNewFromData(1, shape, np.NPY_INT, indices) 69 | data_ = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, data) 70 | 71 | return indices_, data_, n_nz 72 | 73 | 74 | cdef class ContiguousDataset(RowDataset): 75 | 76 | def __init__(self, np.ndarray[double, ndim=2, mode='c'] X): 77 | self.n_samples = X.shape[0] 78 | self.n_features = X.shape[1] 79 | self.data = X.data 80 | self.X = X 81 | 82 | def __cinit__(self, np.ndarray[double, ndim=2, mode='c'] X): 83 | cdef int i 84 | cdef int n_features = X.shape[1] 85 | self.indices = stdlib.malloc(sizeof(int) * n_features) 86 | for j in xrange(n_features): 87 | self.indices[j] = j 88 | 89 | def __dealloc__(self): 90 | stdlib.free(self.indices) 91 | 92 | # This is used to reconstruct the object in order to make it picklable. 93 | def __reduce__(self): 94 | return (ContiguousDataset, (self.X, )) 95 | 96 | cdef void get_row_ptr(self, 97 | int i, 98 | int** indices, 99 | double** data, 100 | int* n_nz) nogil: 101 | indices[0] = self.indices 102 | data[0] = self.data + i * self.n_features 103 | n_nz[0] = self.n_features 104 | 105 | 106 | cdef class FortranDataset(ColumnDataset): 107 | 108 | def __init__(self, np.ndarray[double, ndim=2, mode='fortran'] X): 109 | self.n_samples = X.shape[0] 110 | self.n_features = X.shape[1] 111 | self.data = X.data 112 | self.X = X 113 | 114 | def __cinit__(self, np.ndarray[double, ndim=2, mode='fortran'] X): 115 | cdef int i 116 | cdef int n_samples = X.shape[0] 117 | self.indices = stdlib.malloc(sizeof(int) * n_samples) 118 | for i in xrange(n_samples): 119 | self.indices[i] = i 120 | 121 | def __dealloc__(self): 122 | stdlib.free(self.indices) 123 | 124 | # This is used to reconstruct the object in order to make it picklable. 125 | def __reduce__(self): 126 | return (FortranDataset, (self.X, )) 127 | 128 | cdef void get_column_ptr(self, 129 | int j, 130 | int** indices, 131 | double** data, 132 | int* n_nz) nogil: 133 | indices[0] = self.indices 134 | data[0] = self.data + j * self.n_samples 135 | n_nz[0] = self.n_samples 136 | 137 | 138 | cdef class CSRDataset(RowDataset): 139 | 140 | def __init__(self, X): 141 | cdef np.ndarray[double, ndim=1, mode='c'] X_data = X.data 142 | cdef np.ndarray[int, ndim=1, mode='c'] X_indices = X.indices 143 | cdef np.ndarray[int, ndim=1, mode='c'] X_indptr = X.indptr 144 | 145 | self.n_samples = X.shape[0] 146 | self.n_features = X.shape[1] 147 | self.data = X_data.data 148 | self.indices = X_indices.data 149 | self.indptr = X_indptr.data 150 | 151 | self.X = X 152 | 153 | # This is used to reconstruct the object in order to make it picklable. 154 | def __reduce__(self): 155 | return (CSRDataset, (self.X, )) 156 | 157 | cdef void get_row_ptr(self, 158 | int i, 159 | int** indices, 160 | double** data, 161 | int* n_nz) nogil: 162 | indices[0] = self.indices + self.indptr[i] 163 | data[0] = self.data + self.indptr[i] 164 | n_nz[0] = self.indptr[i + 1] - self.indptr[i] 165 | 166 | 167 | cdef class CSCDataset(ColumnDataset): 168 | 169 | def __init__(self, X): 170 | cdef np.ndarray[double, ndim=1, mode='c'] X_data = X.data 171 | cdef np.ndarray[int, ndim=1, mode='c'] X_indices = X.indices 172 | cdef np.ndarray[int, ndim=1, mode='c'] X_indptr = X.indptr 173 | 174 | self.n_samples = X.shape[0] 175 | self.n_features = X.shape[1] 176 | self.data = X_data.data 177 | self.indices = X_indices.data 178 | self.indptr = X_indptr.data 179 | 180 | self.X = X 181 | 182 | # This is used to reconstruct the object in order to make it picklable. 183 | def __reduce__(self): 184 | return (CSCDataset, (self.X, )) 185 | 186 | cdef void get_column_ptr(self, 187 | int j, 188 | int** indices, 189 | double** data, 190 | int* n_nz) nogil: 191 | indices[0] = self.indices + self.indptr[j] 192 | data[0] = self.data + self.indptr[j] 193 | n_nz[0] = self.indptr[j + 1] - self.indptr[j] 194 | 195 | 196 | def get_dataset(X, order="c"): 197 | if isinstance(X, Dataset): 198 | return X 199 | 200 | if sp.isspmatrix(X): 201 | if order == "fortran": 202 | X = X.tocsc() 203 | ds = CSCDataset(X) 204 | else: 205 | X = X.tocsr() 206 | ds = CSRDataset(X) 207 | else: 208 | if order == "fortran": 209 | X = np.asfortranarray(X, dtype=np.float64) 210 | ds = FortranDataset(X) 211 | else: 212 | X = np.ascontiguousarray(X, dtype=np.float64) 213 | ds = ContiguousDataset(X) 214 | return ds 215 | -------------------------------------------------------------------------------- /qreg/sdca_qr_al_fast.pyx: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: cdivision=True 3 | # cython: boundscheck=False 4 | # cython: wraparound=False 5 | # 6 | # Authors: Maxime Sangnier and Olivier Fercoq from Mathieu Blondel's sdca 7 | # License: BSD 8 | 9 | import numpy as np 10 | cimport numpy as np 11 | ctypedef np.int64_t LONG 12 | from libc.math cimport fabs 13 | from dataset_fast cimport RowDataset 14 | 15 | cdef void _add_l2(double* datain, 16 | int* indicesin, 17 | int n_nzin, 18 | double* dataout, 19 | int* indicesout, 20 | int n_nzout, 21 | double update, 22 | int n_dim, 23 | double* coefs, 24 | int coefi, 25 | double mu, 26 | double* regul) nogil: 27 | 28 | cdef int i, j, ii, jj, l, m 29 | cdef double dot 30 | m = coefi / n_dim 31 | l = coefi - m * n_dim 32 | 33 | dot = 0. 34 | for ii in xrange(n_nzin): 35 | i = indicesin[ii] 36 | for jj in xrange(n_nzout): 37 | j = indicesout[jj] 38 | # True update 39 | dot += coefs[i*n_dim+j] * datain[ii] * dataout[jj] if j != l \ 40 | else coefs[i*n_dim+j] * (datain[ii] * dataout[jj] + mu) # ALREADY DONE IN _PRED !!! 41 | # Update as if mu=0 (without augmentation) 42 | # dot += coefs[i*n_dim+j] * datain[ii] * dataout[jj] 43 | regul[0] += update * (2*dot - datain[m] * dataout[l] * update) 44 | 45 | 46 | cdef _sqnorms(RowDataset Kin, RowDataset Kout, 47 | np.ndarray[double, ndim=1, mode='c'] sqnorms): 48 | 49 | cdef int n_samples = Kin.get_n_samples() 50 | cdef int n_dim = Kout.get_n_features() 51 | cdef int i, j, ii, jj 52 | 53 | # Data pointers. 54 | cdef double* datain 55 | cdef double* dataout 56 | cdef int* indicesin 57 | cdef int* indicesout 58 | cdef int n_nzin 59 | cdef int n_nzout 60 | cdef double tempin, tempout 61 | 62 | for i in xrange(n_samples): 63 | tempin = 0. 64 | Kin.get_row_ptr(i, &indicesin, &datain, &n_nzin) 65 | for ii in xrange(n_nzin): # Look for the ith element in Kin(i, :) 66 | if indicesin[ii] == i: 67 | tempin = datain[ii] 68 | break 69 | for j in xrange(n_dim): 70 | tempout = 0. 71 | Kout.get_row_ptr(j, &indicesout, &dataout, &n_nzout) 72 | for jj in xrange(n_nzout): # Look for the jth element in Kout(j, :) 73 | if indicesout[jj] == j: 74 | tempout = dataout[jj] 75 | break 76 | sqnorms[i*n_dim + j] = tempin * tempout 77 | 78 | 79 | cdef double _pred(double* datain, 80 | int* indicesin, 81 | int n_nzin, 82 | double* dataout, 83 | int* indicesout, 84 | int n_nzout, 85 | int n_dim, 86 | int coefi, 87 | double mu, 88 | double* coefs) nogil: 89 | 90 | cdef int i, j, ii, jj, l 91 | cdef double dot = 0 92 | l = coefi - (coefi / n_dim) * n_dim 93 | 94 | for ii in xrange(n_nzin): 95 | i = indicesin[ii] 96 | for jj in xrange(n_nzout): 97 | j = indicesout[jj] 98 | dot += coefs[i*n_dim+j] * datain[ii] * dataout[jj] if j != l \ 99 | else coefs[i*n_dim+j] * (datain[ii] * dataout[jj] + mu) 100 | 101 | return dot 102 | 103 | 104 | cdef void _solve_subproblem(double* datain, 105 | int* indicesin, 106 | int n_nzin, 107 | double* dataout, 108 | int* indicesout, 109 | int n_nzout, 110 | double y, 111 | double* dcoef, 112 | int dcoefi, 113 | double* xdm, # 1.T * dcoef 114 | int n_samples, 115 | int n_dim, 116 | double sqnorm, 117 | double scale, 118 | double stepsize_factor, 119 | double prob, 120 | double intercept, 121 | double mu, 122 | double* primal, 123 | double* dual, 124 | double* regul): 125 | 126 | cdef double pred, dcoef_old, residual, error, loss, update 127 | cdef double inv_d_stepsize, mult_stepsize 128 | 129 | dcoef_old = dcoef[dcoefi] 130 | 131 | mult_stepsize = sqnorm * stepsize_factor # is it the best? 132 | inv_d_stepsize = (sqnorm + mult_stepsize) / 0.95 133 | 134 | pred = _pred(datain, indicesin, n_nzin, 135 | dataout, indicesout, n_nzout, 136 | n_dim, dcoefi, mu, dcoef) 137 | 138 | residual = y - intercept - pred 139 | # loss = prob*residual if residual > 0 else (prob-1.)*residual 140 | update = dcoef_old + residual / inv_d_stepsize 141 | update = min(scale*prob, update) 142 | update = max(scale*(prob-1.), update) 143 | update -= dcoef_old 144 | dual[0] += (y-intercept) * update # True dual 145 | # dual[0] += y * update # Dual as if intercept=0 (without augmentation) 146 | 147 | # Use accumulated loss rather than true primal objective value, which is 148 | # expensive to compute. 149 | # primal[0] += loss * scale 150 | 151 | if update != 0: 152 | dcoef[dcoefi] += update 153 | _add_l2(datain, indicesin, n_nzin, 154 | dataout, indicesout, n_nzout, 155 | update, n_dim, dcoef, dcoefi, mu, regul) 156 | xdm[0] += update 157 | 158 | 159 | #SUPPRIMER INDICESIN, INDICESOUT 160 | def _prox_sdca_al_fit(self, 161 | RowDataset Kin, 162 | RowDataset Kout, 163 | np.ndarray[double, ndim=1] y, 164 | np.ndarray[double, ndim=1] dual_coef, 165 | double alpha2, 166 | double C, 167 | double stepsize_factor, 168 | np.ndarray[double, ndim=1] probs, 169 | np.ndarray[double, ndim=1] intercept, # Dual vector of the linear constraint: + intercept.T * LC 170 | double mu, # Coef of the L2 penalization of the linear constraint: + mu/2 * ||LC||**2 171 | int max_iter, 172 | double tol, 173 | callback, 174 | int n_calls, 175 | int verbose, 176 | rng): 177 | 178 | cdef int n_samples = Kin.get_n_samples() 179 | cdef int n_dim = Kout.get_n_features() 180 | 181 | # Variables 182 | cdef double sigma, scale, primal, dual, regul, gap 183 | cdef int it, ii, i, j 184 | cdef int has_callback = callback is not None 185 | cdef LONG t 186 | 187 | # Pre-compute square norms. 188 | cdef np.ndarray[double, ndim=1, mode='c'] sqnorms 189 | sqnorms = np.zeros(n_samples*n_dim, dtype=np.float64) 190 | _sqnorms(Kin, Kout, sqnorms) 191 | sqnorms += mu 192 | 193 | # Pointers 194 | cdef double* dcoef = dual_coef.data 195 | 196 | cdef np.ndarray[double, ndim=1] xdm_data 197 | xdm_data = np.zeros(n_dim, dtype=np.float64) # 1.T * dcoef 198 | cdef double* xdm = xdm_data.data 199 | for j in xrange(n_dim): 200 | dot = 0. 201 | for i in xrange(n_samples): 202 | dot += dcoef[i*n_dim+j] 203 | xdm[j] = dot 204 | 205 | cdef np.ndarray[int, ndim=1] sindices 206 | sindices = np.arange(n_samples*n_dim, dtype=np.int32) 207 | 208 | # Data pointers. 209 | cdef int* indicesin 210 | cdef double* datain 211 | cdef int n_nzin 212 | cdef int* indicesout 213 | cdef double* dataout 214 | cdef int n_nzout 215 | 216 | scale = C * 1. / alpha2 217 | 218 | dual = 0 219 | regul = 0 220 | prev_obj = np.inf 221 | 222 | t = 0 223 | for it in xrange(max_iter): 224 | primal = 0 225 | 226 | rng.shuffle(sindices) 227 | 228 | for ii in xrange(n_samples*n_dim): 229 | ij = sindices[ii] 230 | i = ij / n_dim 231 | j = ij - i*n_dim 232 | 233 | if sqnorms[i*n_dim + j] == 0: 234 | continue 235 | 236 | # Retrieve rows 237 | Kin.get_row_ptr(i, &indicesin, &datain, &n_nzin) 238 | Kout.get_row_ptr(j, &indicesout, &dataout, &n_nzout) 239 | 240 | _solve_subproblem(datain, indicesin, n_nzin, 241 | dataout, indicesout, n_nzout, 242 | y[i], dcoef, i*n_dim + j, 243 | xdm + j, n_samples, n_dim, 244 | sqnorms[i*n_dim + j], scale, stepsize_factor, 245 | probs[j], intercept[j], mu, 246 | &primal, &dual, ®ul) 247 | 248 | if has_callback and t % n_calls == 0: 249 | ret = callback(self) 250 | if ret is not None: 251 | break 252 | 253 | t += 1 254 | 255 | # if has_callback and t % n_calls == 0: 256 | # ret = callback(self) 257 | # if ret is not None: 258 | # break 259 | 260 | # tol is the objective value to reach 261 | # if tol < 0. and np.mod(it, 1e3) == 0: 262 | # # Minus dual objective value 263 | # obj = alpha2 * (regul/2. - dual) 264 | # 265 | # if verbose: 266 | # print "%8d: %5.2e %5.2e" % (it + 1, obj, obj-tol) 267 | # 268 | # # Objective value reached 269 | # if obj <= tol: 270 | # if verbose: 271 | # print "Ground truth objective value reached." 272 | # break 273 | 274 | if np.mod(it, 1e3) == 0: 275 | # Minus dual objective value 276 | obj = alpha2 * (regul/2. - dual) 277 | dobj = prev_obj - obj 278 | prev_obj = obj 279 | 280 | if verbose: 281 | print "%8d: %5.2e %5.2e" % (it + 1, obj, dobj / n_samples) 282 | 283 | if np.abs(dobj) / n_samples <= tol: 284 | break 285 | else: 286 | if verbose: 287 | print "Stop before convergence." 288 | -------------------------------------------------------------------------------- /qreg/sdca_qr_fast.pyx: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: cdivision=True 3 | # cython: boundscheck=False 4 | # cython: wraparound=False 5 | # 6 | # Authors: Maxime Sangnier and Olivier Fercoq from Mathieu Blondel's sdca 7 | # License: BSD 8 | 9 | import numpy as np 10 | cimport numpy as np 11 | ctypedef np.int64_t LONG 12 | from libc.math cimport fabs, sqrt 13 | from libc.stdlib cimport malloc, free 14 | from dataset_fast cimport RowDataset 15 | from time import process_time 16 | 17 | #np.set_printoptions(precision=4) 18 | 19 | cdef void _add_l2(double* datain, 20 | int* indicesin, 21 | int n_nzin, 22 | double* dataout, 23 | int* indicesout, 24 | int n_nzout, 25 | double update, 26 | int n_dim, 27 | double* coefs, 28 | int coefi, 29 | double* regul) nogil: 30 | 31 | cdef int i, j, ii, jj, l, m 32 | cdef double dot 33 | m = coefi / n_dim 34 | l = coefi - m * n_dim 35 | 36 | dot = 0. 37 | for ii in xrange(n_nzin): 38 | i = indicesin[ii] 39 | for jj in xrange(n_nzout): 40 | j = indicesout[jj] 41 | dot += coefs[i*n_dim+j] * datain[ii] * dataout[jj] # ALREADY DONE IN _PRED !!! 42 | regul[0] += update * (2*dot - datain[m] * dataout[l] * update) 43 | 44 | 45 | cdef _sqnorms(RowDataset Kin, RowDataset Kout, 46 | np.ndarray[double, ndim=1, mode='c'] sqnorms): 47 | 48 | cdef int n_samples = Kin.get_n_samples() 49 | cdef int n_dim = Kout.get_n_features() 50 | cdef int i, j, ii, jj 51 | 52 | # Data pointers. 53 | cdef double* datain 54 | cdef double* dataout 55 | cdef int* indicesin 56 | cdef int* indicesout 57 | cdef int n_nzin 58 | cdef int n_nzout 59 | cdef double tempin, tempout 60 | 61 | for i in xrange(n_samples): 62 | tempin = 0. 63 | Kin.get_row_ptr(i, &indicesin, &datain, &n_nzin) 64 | for ii in xrange(n_nzin): # Look for the ith element in Kin(i, :) 65 | if indicesin[ii] == i: 66 | tempin = datain[ii] 67 | break 68 | for j in xrange(n_dim): 69 | tempout = 0. 70 | Kout.get_row_ptr(j, &indicesout, &dataout, &n_nzout) 71 | for jj in xrange(n_nzout): # Look for the jth element in Kout(j, :) 72 | if indicesout[jj] == j: 73 | tempout = dataout[jj] 74 | break 75 | sqnorms[i*n_dim + j] = tempin * tempout 76 | 77 | 78 | cdef _sqnormsL(RowDataset Kin, double lambda_max, 79 | np.ndarray[double, ndim=1, mode='c'] sqnorms): 80 | 81 | cdef int n_samples = Kin.get_n_samples() 82 | cdef int i, ii 83 | 84 | # Data pointers. 85 | cdef double* datain 86 | cdef int* indicesin 87 | cdef int n_nzin 88 | cdef double tempin 89 | 90 | for i in xrange(n_samples): 91 | tempin = 0. 92 | Kin.get_row_ptr(i, &indicesin, &datain, &n_nzin) 93 | for ii in xrange(n_nzin): # Look for the ith element in Kin(i, :) 94 | if indicesin[ii] == i: 95 | tempin = datain[ii] 96 | break 97 | sqnorms[i] = tempin * lambda_max 98 | 99 | 100 | cdef double _pred(double* datain, 101 | int* indicesin, 102 | int n_nzin, 103 | double* dataout, 104 | int* indicesout, 105 | int n_nzout, 106 | int n_dim, 107 | double* coefs) nogil: 108 | 109 | cdef int i, j, ii, jj 110 | cdef double dot = 0 111 | 112 | for ii in xrange(n_nzin): 113 | i = indicesin[ii] 114 | for jj in xrange(n_nzout): 115 | j = indicesout[jj] 116 | dot += coefs[i*n_dim+j] * datain[ii] * dataout[jj] 117 | 118 | return dot 119 | 120 | cdef double norm_square(double* y, int y_size) nogil: 121 | cdef double norm = 0. 122 | for it in range(y_size): 123 | norm += y[it]**2 124 | return norm 125 | 126 | cdef void clip(double mu, double* y, double* probs, double scale, int y_size, 127 | double* clip_y, double* circ_y) nogil: 128 | for it in range(y_size): 129 | clip_y[it] = min(scale*probs[it], max(scale*(probs[it]-1), mu*y[it])) 130 | circ_y[it] = y[it] if clip_y[it] < scale*probs[it] and clip_y[it] > scale*(probs[it]-1) else 0. 131 | 132 | cdef double solve_prox_equ(double mu, double l, double* y, double* probs, 133 | double scale, int y_size) nogil: 134 | cdef double tol = 1.48e-08 # Scipy value 135 | cdef int max_iter = 50 # Scipy value 136 | cdef double* v = malloc(y_size*sizeof(double)) # Truncated vector 137 | cdef double* u = malloc(y_size*sizeof(double)) # Zero truncation 138 | # cdef np.ndarray[double, ndim=1] v_data # Truncated vector 139 | # cdef np.ndarray[double, ndim=1] u_data # Zero truncation 140 | # v_data = np.zeros(y_size, dtype=np.float64) 141 | # u_data = np.zeros(y_size, dtype=np.float64) 142 | # cdef double* v = v_data.data 143 | # cdef double* u = u_data.data 144 | 145 | # print("------------------------------------------------------------------") 146 | # print("mu init", mu) 147 | 148 | for it in range(max_iter): 149 | clip(mu, y, probs, scale, y_size, v, u) 150 | v_norm = sqrt(norm_square(v, y_size)) 151 | phi = 1 + l / v_norm - 1/mu # Objective 152 | err = fabs(phi) 153 | if err < tol: 154 | break 155 | diff_phi = 1/mu**2 - l*mu*norm_square(u, y_size) / v_norm**3 # Derivative 156 | mu -= phi / diff_phi # Newton update 157 | # print("it", it, "mu", mu, "phi", phi, "diff_phi", diff_phi, 158 | # "norm", norm_square(y, y_size)) 159 | # Prevent divergence 160 | # if mu < 0 or mu > 1: 161 | # print("Error in mu") 162 | # return solve_prox_equ_bisect(l, y, probs, scale, y_size) 163 | 164 | free(v) 165 | free(u) 166 | return mu 167 | 168 | #cdef double solve_prox_equ_bisect(double l, double* y, double* probs, 169 | # double scale, int y_size): 170 | # cdef double tol = 1e-12 171 | # cdef int max_iter = 100 172 | # cdef np.ndarray[double, ndim=1] v_data # Truncated vector 173 | # cdef np.ndarray[double, ndim=1] u_data # Zero truncation 174 | # v_data = np.zeros(y_size, dtype=np.float64) 175 | # u_data = np.zeros(y_size, dtype=np.float64) 176 | # cdef double* v = v_data.data 177 | # cdef double* u = u_data.data 178 | # 179 | # mu1 = 1e-6 180 | # mu2 = 1 181 | # # Find a negative point 182 | # for it in range(max_iter): 183 | # clip(mu1, y, probs, scale, y_size, v, u) 184 | # v_norm = np.sqrt(norm_square(v, y_size)) 185 | # phi = 1 + l / v_norm - 1/mu1 # Objective 186 | # if phi < 0: 187 | # break 188 | # mu1 /= 10 189 | ## print(mu1, phi) 190 | # for it in range(max_iter): 191 | # mu = (mu1+mu2)/2 192 | # clip(mu, y, probs, scale, y_size, v, u) 193 | # v_norm = np.sqrt(norm_square(v, y_size)) 194 | # phi = 1 + l / v_norm - 1/mu # Objective 195 | # err = abs(phi) 196 | # if err < tol: 197 | # break 198 | # if phi > 0: 199 | # mu2 = mu 200 | # else: 201 | # mu1 = mu 202 | ## print("it", it, "mu", mu, "phi", phi) 203 | # return mu 204 | 205 | cdef void _solve_subproblem(double* datain, 206 | int* indicesin, 207 | int n_nzin, 208 | RowDataset Kout, 209 | double* dataout, 210 | int* indicesout, 211 | int n_nzout, 212 | double y, 213 | double* dcoef, 214 | int dcoefi, 215 | double* multiplier, 216 | double* residual, 217 | double* xdm, # 1.T * dcoef 218 | double* ydm, # 1.T * multiplier 219 | int n_samples, 220 | int n_dim, 221 | double sqnorm, 222 | double scale, 223 | double eps, 224 | double* group_norm, 225 | double* res_norm, 226 | int* coef_on_bound, 227 | double* approx_mu, 228 | double stepsize_factor, 229 | double* probs, 230 | int i, 231 | double* primal, 232 | double* dual, 233 | double* regul): 234 | 235 | cdef double pred, error, loss, eps_prox, new_norm, gnorm, res_coef 236 | cdef double multiplier_old, multiplier_update 237 | cdef double inv_d_stepsize, mult_stepsize 238 | cdef double tol_bound 239 | 240 | # Updates of dual coefs 241 | cdef double* update = malloc(n_dim*sizeof(double)) 242 | 243 | mult_stepsize = sqnorm * stepsize_factor # is it the best? 244 | inv_d_stepsize = (sqnorm + mult_stepsize) / 0.95 245 | eps_prox = eps/inv_d_stepsize 246 | gnorm = group_norm[0] 247 | 248 | res_norm[0] = 0 249 | coef_on_bound[0] = 1 250 | tol_bound = 1e-6 251 | 252 | for j in xrange(n_dim): 253 | multiplier_old = multiplier[j] 254 | 255 | Kout.get_row_ptr(j, &indicesout, &dataout, &n_nzout) 256 | pred = _pred(datain, indicesin, n_nzin, 257 | dataout, indicesout, n_nzout, 258 | n_dim, dcoef) 259 | 260 | # i-th element of the projection of 261 | # mutiplier + mult_stepsize * dcoef on [1, ..., 1] 262 | multiplier_update = (ydm[j] + mult_stepsize * xdm[j]) / n_samples 263 | multiplier_update -= multiplier_old 264 | 265 | residual[j] = y - pred 266 | update[j] = (dcoef[dcoefi+j] + ( 267 | residual[j] - (multiplier_old + 2. * multiplier_update)) 268 | / inv_d_stepsize) 269 | residual[j] -= multiplier_old # Minus intercept 270 | res_norm[0] += residual[j]**2 271 | 272 | # Compute the loss (first way to do, an other one is below) 273 | # # loss = probs[j]*residual[0] if residual[0] > 0 else (probs[j]-1.)*residual[0] 274 | # loss = probs[j]*max(0, residual[j]-eps/n_dim) + (probs[j]-1)*min(0, residual[j]+eps/n_dim) 275 | # primal[0] += loss # Accumulated loss 276 | 277 | # Update group norm 278 | gnorm += update[j]**2 - dcoef[dcoefi+j]**2 279 | 280 | # Update multiplier 281 | if multiplier_update != 0: 282 | multiplier[j] += multiplier_update 283 | ydm[j] += multiplier_update 284 | 285 | # Is coef on bound? 286 | if probs[j] - dcoef[dcoefi+j]/scale > tol_bound and \ 287 | dcoef[dcoefi+j]/scale - probs[j]+1 > tol_bound: 288 | coef_on_bound[0] = 0 289 | 290 | res_norm[0] = sqrt(res_norm[0]) 291 | 292 | # l1-l2 proximal operator + box constraint 293 | # Method 1 (full) 294 | if eps > 0.: 295 | new_norm = sqrt(gnorm) 296 | if new_norm > eps_prox: 297 | mu = solve_prox_equ(1-eps_prox/new_norm, eps_prox, update, probs, 298 | scale, n_dim) 299 | for j in xrange(n_dim): 300 | update[j] *= mu 301 | # Box constraint projection 302 | update[j] = min(scale*probs[j], update[j]) 303 | update[j] = max(scale*(probs[j]-1.), update[j]) 304 | group_norm[0] += update[j]**2 - dcoef[dcoefi+j]**2 305 | else: 306 | for j in xrange(n_dim): 307 | update[j] = 0. 308 | group_norm[0] -= dcoef[dcoefi+j]**2 309 | else: 310 | # Box constraint projection 311 | for j in xrange(n_dim): 312 | update[j] = min(scale*probs[j], update[j]) 313 | update[j] = max(scale*(probs[j]-1.), update[j]) 314 | 315 | # Coef for computing the loss 316 | res_coef = 1 - min(eps, res_norm[0])/res_norm[0] if res_norm[0]>0 else 0 317 | 318 | for j in xrange(n_dim): 319 | # Compute the loss (second way to do, more accurate) 320 | loss = probs[j]*max(0, residual[j]*res_coef) + (probs[j]-1)*min(0, residual[j]*res_coef) 321 | primal[0] += loss # Accumulated loss 322 | 323 | update[j] -= dcoef[dcoefi+j] 324 | dual[0] += y * update[j] 325 | 326 | if update[j] != 0: 327 | dcoef[dcoefi+j] += update[j] 328 | 329 | Kout.get_row_ptr(j, &indicesout, &dataout, &n_nzout) 330 | _add_l2(datain, indicesin, n_nzin, 331 | dataout, indicesout, n_nzout, 332 | update[j], n_dim, dcoef, dcoefi+j, regul) 333 | xdm[j] += update[j] 334 | 335 | free(update) 336 | 337 | # Method 2 (totally approximated) 338 | # update *= approx_mu 339 | # # Box constraint projection 340 | # update = min(scale*prob, update) 341 | # update = max(scale*(prob-1.), update) 342 | # new_norm = group_norm[0]**2 - dcoef_old**2 + update**2 343 | 344 | # # Method 3 (partially approximated) 345 | # if eps > 0.: 346 | # new_norm = group_norm[0] + update**2 - dcoef_old**2 347 | # if sqrt(new_norm) > eps: 348 | ## if approx_mu[0] > 0.: 349 | ## update *= approx_mu[0] 350 | ## else: 351 | # if approx_mu[0] == 0.: 352 | # dcoef[dcoefi] = update 353 | # mu = solve_prox_equ(1-eps/sqrt(new_norm), eps, dcoef+i*n_dim, 354 | # probs, scale, n_dim) 355 | # dcoef[dcoefi] = dcoef_old 356 | ## print("mu vs approx_mu", mu, approx_mu[0]) 357 | # approx_mu[0] = mu 358 | ## update *= mu 359 | # update *= approx_mu[0] 360 | # else: 361 | # update = 0. 362 | # # Box constraint projection 363 | # update = min(scale*prob, update) 364 | # update = max(scale*(prob-1.), update) 365 | # group_norm[0] += update**2 - dcoef_old**2 366 | 367 | # Method 4 (partially approximated, best for now) 368 | # if eps > 0.: 369 | # if approx_mu[0] == 0.: 370 | # eps_prox = eps/inv_d_stepsize 371 | # new_norm = sqrt(group_norm[0] + update**2 - dcoef_old**2) 372 | # if new_norm > eps_prox: 373 | # mu_init = 1-eps_prox/new_norm 374 | # if abs(0.5-mu_init) < 0.45: 375 | # dcoef[dcoefi] = update 376 | # mu = solve_prox_equ(mu_init, eps_prox, dcoef+i*n_dim, 377 | # probs, scale, n_dim) 378 | # dcoef[dcoefi] = dcoef_old 379 | # approx_mu[0] = mu 380 | # else: 381 | # approx_mu[0] = mu_init # mu_init is close to the solution 382 | # #when it is close to 0 or 1 383 | # else: 384 | # approx_mu[0] = 0. 385 | # update *= approx_mu[0] 386 | 387 | # Method 5 (partially approximated) 388 | # if eps > 0.: 389 | # new_norm = sqrt(group_norm[0] + update**2 - dcoef_old**2) 390 | # mu = 1 - eps/new_norm if new_norm > eps else 0. 391 | # mu = max(0, 1 - eps/sqrt(new_norm)) 392 | # if abs(0.5-mu) < 0.45: 393 | # print("top") 394 | # dcoef[dcoefi] = update 395 | # mu = solve_prox_equ(mu, eps, dcoef+i*n_dim, probs, scale, 396 | # n_dim) 397 | # dcoef[dcoefi] = dcoef_old 398 | # update *= mu 399 | 400 | # # Box constraint projection 401 | # update = min(scale*prob, update) 402 | # update = max(scale*(prob-1.), update) 403 | # group_norm[0] += update**2 - dcoef_old**2 404 | 405 | 406 | #SUPPRIMER INDICESIN, INDICESOUT 407 | def _prox_sdca_intercept_fit(self, 408 | RowDataset Kin, 409 | RowDataset Kout, 410 | np.ndarray[double, ndim=1] y, 411 | np.ndarray[double, ndim=1] dual_coef, 412 | double alpha2, 413 | double C, 414 | double eps, 415 | double stepsize_factor, 416 | np.ndarray[double, ndim=1] probs, 417 | int max_iter, 418 | double tol, 419 | callback, 420 | int n_calls, 421 | float max_time, 422 | int n_gap, 423 | float gap_time_ratio, 424 | int verbose, 425 | rng, 426 | np.ndarray[short int, ndim=1] status, 427 | int active_set, 428 | double lambda_max): 429 | # np.ndarray[double, ndim=1] inner_obj): 430 | 431 | cdef int n_samples = Kin.get_n_samples() 432 | cdef int n_dim = Kout.get_n_features() 433 | 434 | # Variables 435 | cdef double sigma, scale, primal, dual, regul, gap, dual_sparsity, old_gn, constraint 436 | cdef int it, ii, i, j 437 | cdef int has_callback = callback is not None 438 | cdef LONG t 439 | cdef double tol_bound 440 | cdef int check_gap, perf_active_set, n_act_coord, n_act_coord_prev 441 | 442 | # Pre-compute square norms. 443 | # cdef np.ndarray[double, ndim=1, mode='c'] sqnorms 444 | # sqnorms = np.zeros(n_samples*n_dim, dtype=np.float64) 445 | # _sqnorms(Kin, Kout, sqnorms) 446 | 447 | # Pre-compute Lipschitz constants 448 | cdef np.ndarray[double, ndim=1, mode='c'] sqnorms 449 | sqnorms = np.zeros(n_samples, dtype=np.float64) 450 | _sqnormsL(Kin, lambda_max, sqnorms) 451 | 452 | # Pointers 453 | cdef double* dcoef = dual_coef.data 454 | cdef double* cprobs = probs.data 455 | # cdef double* iobj = inner_obj.data 456 | cdef int* cstatus = status.data 457 | 458 | cdef np.ndarray[double, ndim=1] multiplier_data 459 | multiplier_data = np.zeros(n_dim*n_samples, dtype=np.float64) 460 | cdef double* multiplier = multiplier_data.data 461 | 462 | cdef np.ndarray[double, ndim=1] residual_data 463 | residual_data = np.zeros(n_dim*n_samples, dtype=np.float64) 464 | cdef double* residual = residual_data.data 465 | 466 | cdef np.ndarray[double, ndim=1] ydm_data 467 | ydm_data = np.zeros(n_dim, dtype=np.float64) # 1.T * multiplier 468 | cdef double* ydm = ydm_data.data 469 | 470 | cdef np.ndarray[double, ndim=1] xdm_data 471 | xdm_data = np.zeros(n_dim, dtype=np.float64) # 1.T * dcoef 472 | cdef double* xdm = xdm_data.data 473 | for j in xrange(n_dim): 474 | dot = 0. 475 | for i in xrange(n_samples): 476 | dot += dcoef[i*n_dim+j] 477 | xdm[j] = dot 478 | 479 | cdef np.ndarray[int, ndim=1] sindices 480 | sindices = np.arange(n_samples, dtype=np.int32) 481 | sindices_size = n_samples 482 | 483 | cdef np.ndarray[double, ndim=1] group_norm_data 484 | group_norm_data = np.zeros(n_samples, dtype=np.float64) # squared norm for each group 485 | cdef double* group_norm = group_norm_data.data 486 | for i in range(n_samples): 487 | group_norm[i] = norm_square(dcoef+i*n_dim, n_dim) 488 | 489 | cdef np.ndarray[double, ndim=1] res_norm_data 490 | res_norm_data = np.zeros(n_samples, dtype=np.float64) # norm for each residue 491 | cdef double* res_norm = res_norm_data.data 492 | 493 | cdef np.ndarray[int, ndim=1] coef_on_bound_data 494 | coef_on_bound_data = np.zeros(n_samples, dtype=np.int32) # Is coef on box bound? 495 | cdef int* coef_on_bound= coef_on_bound_data.data 496 | 497 | cdef np.ndarray[double, ndim=1] mus_data 498 | mus_data = np.ones(n_samples, dtype=np.float64) # 1 for eps=0 499 | cdef double* mus = mus_data.data 500 | 501 | # Data pointers. 502 | cdef int* indicesin 503 | cdef double* datain 504 | cdef int n_nzin 505 | cdef int* indicesout 506 | cdef double* dataout 507 | cdef int n_nzout 508 | n_gap_auto = n_gap==0 509 | if n_gap_auto: 510 | n_gap = 100 511 | 512 | swap_active_set = 0 513 | 514 | scale = C * 1. / alpha2 515 | 516 | # dual = (y * dual_coef).sum() 517 | dual = (y * np.reshape(dual_coef, (n_samples, n_dim)).T).sum() 518 | # dual = 0. 519 | # for i in xrange(n_samples): 520 | # dot = 0. 521 | # for j in xrange(n_dim): 522 | # dot += dcoef[i*n_dim+j] 523 | # dual += y[i]*dot 524 | 525 | dual_sparsity = np.sqrt(group_norm_data).sum() 526 | regul = 0 527 | for i in xrange(n_samples): 528 | Kin.get_row_ptr(i, &indicesin, &datain, &n_nzin) 529 | for j in xrange(n_dim): 530 | Kout.get_row_ptr(j, &indicesout, &dataout, &n_nzout) 531 | dot = 0. 532 | for ii in xrange(n_nzin): 533 | for jj in xrange(n_nzout): 534 | dot += dcoef[indicesin[ii]*n_dim+indicesout[jj]] * datain[ii] * dataout[jj] 535 | regul += dcoef[i*n_dim+j] * dot 536 | if verbose: 537 | # print("regul", regul, "dual", dual, "group_norm", dual_sparsity) 538 | constraint = np.sum(np.fabs(xdm_data)) 539 | obj = alpha2 * (regul/2. - dual + 100.*constraint + eps * dual_sparsity) 540 | print("Initial obj:", obj) 541 | 542 | ################ Test solve_prox_equ ############## 543 | # n = 6 544 | # cdef np.ndarray[double, ndim=1] b 545 | # cdef np.ndarray[double, ndim=1] yy 546 | # b = 0.8*np.ones(n) 547 | # a = b-1 548 | # l = 2 549 | # yy = np.random.randn(n)*5 550 | # 551 | # mu = 0.5 552 | # for it in range(50): 553 | # phi = 1 + l / np.linalg.norm(np.fmin(b, np.fmax(a, mu*yy))) - 1/mu 554 | # v = np.fmin(b, np.fmax(a, mu*yy)) 555 | # u = yy * (va) 556 | # diff_phi = 1/mu**2 - l*mu*np.linalg.norm(u)**2 / np.linalg.norm(v)**3 557 | # mu -= phi / diff_phi 558 | # 559 | # print("mu Newton", mu) 560 | # 561 | # cdef double* cb = b.data 562 | # cdef double* cy = yy.data 563 | # mu = solve_prox_equ(0.5, l, cy, cb, 1, n) 564 | # print("mu Newton", mu) 565 | # mu = solve_prox_equ_bisect(l, cy, cb, 1, n) 566 | # print("mu bisect", mu) 567 | ################ Test solve_prox_equ ############## 568 | 569 | # n_indices_max = 200 570 | # if sindices_size > n_indices_max: 571 | # rng.shuffle(sindices) 572 | # sindices = sindices[:n_indices_max] 573 | # sindices_size = n_indices_max 574 | 575 | t = 0 576 | i_check_gap = 0 577 | time_gap = 0 578 | n_act_coord = 0 579 | tol_bound = 1e-6 580 | 581 | start_it = process_time() 582 | for it in xrange(max_iter): 583 | primal = 0 584 | n_act_coord_prev = n_act_coord 585 | n_act_coord = 0 586 | 587 | check_gap = it+1 - (it+1)//n_gap * n_gap == 0 # np.mod(it+1, n_gap) 588 | # perf_active_set = it+2 - (it+2)//n_gap * n_gap != 0 # Iteration before checking the gap 589 | perf_active_set = 1-check_gap 590 | 591 | rng.shuffle(sindices) 592 | 593 | # Set values for mu (leave at 1 for first iteration) 594 | # if eps > 0. and it>0: 595 | # for i in range(n_samples): 596 | ## print(group_norm[i], norm_square(dcoef+i*n_dim, n_dim)) 597 | # if np.sqrt(group_norm[i]) > eps: 598 | # mus[i] = solve_prox_equ(1-eps/np.sqrt(group_norm[i]), eps, 599 | # dcoef+i*n_dim, cprobs, scale, n_dim) 600 | # else: 601 | # mus[i] = 0. 602 | # # print("mu", i, mus[i], 1-eps/np.sqrt(group_norm[i]), group_norm[i]) 603 | 604 | # Reset mus 605 | # if eps > 0. and it>0: 606 | # for i in range(n_samples): 607 | # mus[i] = 0. 608 | 609 | # for ii in xrange(n_samples*n_dim): 610 | # ij = sindices[ii] 611 | # i = ij / n_dim # Sample index 612 | # j = ij - i*n_dim # Dimension index 613 | for ii in xrange(sindices_size): 614 | i = sindices[ii] 615 | # mus[i] = 0. 616 | old_gn = group_norm[i] 617 | if sqnorms[i] == 0: 618 | continue 619 | 620 | if active_set==1: 621 | if it>100 and (\ 622 | (res_norm[i] < eps*0.9 and \ 623 | sqrt(group_norm[i]) / (n_dim*scale) < tol_bound) or \ 624 | (res_norm[i] > eps*1.1 and coef_on_bound[i] == 1) 625 | ) and perf_active_set==1: 626 | continue 627 | 628 | n_act_coord += 1 629 | 630 | # Retrieve rows 631 | Kin.get_row_ptr(i, &indicesin, &datain, &n_nzin) 632 | # Kout.get_row_ptr(j, &indicesout, &dataout, &n_nzout) 633 | 634 | _solve_subproblem(datain, indicesin, n_nzin, 635 | Kout, dataout, indicesout, n_nzout, 636 | y[i], dcoef, i*n_dim, 637 | multiplier + i*n_dim, 638 | residual + i*n_dim, 639 | xdm, ydm, n_samples, n_dim, 640 | sqnorms[i], scale, eps, 641 | group_norm + i, res_norm + i, 642 | coef_on_bound + i, 643 | mus + i, stepsize_factor, 644 | cprobs, i, 645 | &primal, &dual, ®ul) 646 | 647 | if has_callback and t % n_calls == 0: 648 | ret = callback(self) 649 | if ret is not None: 650 | break 651 | 652 | t += 1 653 | 654 | if eps > 0.: 655 | if group_norm[i] < 0.: 656 | group_norm[i] = 0. 657 | dual_sparsity += sqrt(group_norm[i]) - sqrt(old_gn) 658 | 659 | # if has_callback and t % n_calls == 0: 660 | # ret = callback(self) 661 | # if ret is not None: 662 | # break 663 | 664 | # Debug 665 | # Compute 0.5 * dcoef.T * kron(Kin, Kout) * dcoef - 666 | # kron(y, ones(n_dim)).T * dcoef 667 | # This should be equal to alpha2 * (regul/2. - dual) 668 | # if np.mod(it, 1e0) == 0: 669 | # obj = 0. 670 | # dot = 0. 671 | # for i in xrange(n_samples): 672 | # Kin.get_row_ptr(i, &indicesin, &datain, &n_nzin) 673 | # for j in xrange(n_dim): 674 | # Kout.get_row_ptr(j, &indicesout, &dataout, &n_nzout) 675 | # dot += dcoef[i*n_dim+j] * y[i] 676 | # # print "%f < %f < %f" % (scale*(probs[j]-1), scale*dcoef[i*n_dim+j], scale*probs[j]) 677 | # for ii in xrange(n_samples): 678 | # for jj in xrange(n_dim): 679 | # obj += datain[ii] * dataout[jj] * dcoef[i*n_dim+j] * dcoef[ii*n_dim+jj] 680 | # obj *= 0.5 681 | # obj -= dot 682 | # print "It: %d obj: %f" % (it+1, obj) 683 | 684 | 685 | ################## Duality gap, print and active set ################## 686 | # if it+1 - (it+1)//n_gap * n_gap == 0: # np.mod(it+1, n_gap) 687 | if check_gap: 688 | i_check_gap += 1 689 | start_gap = process_time() 690 | 691 | # constraint = np.sum(np.fabs(xdm_data)) 692 | constraint = 0 693 | for j in xrange(n_dim): 694 | constraint += abs(xdm[j]) 695 | # obj = alpha2 * (regul/2. - dual + 100.*(constraint if constraint > 1e-4 else 0.)) # Minus dual objective value 696 | obj = alpha2 * (regul/2. - dual + 100.*constraint) # Minus dual objective value 697 | obj += alpha2 * eps * dual_sparsity 698 | 699 | # iobj[i_check_gap-1] = obj 700 | 701 | # Compute the intercept (not needed anymore since -multiplier_old has been added to residual) 702 | # rresidual = np.reshape(residual_data, (n_samples, n_dim)).T 703 | # By duality 704 | # intercept = np.reshape(multiplier_data, (n_samples, n_dim)).mean(axis=0) 705 | # Or… 706 | # if eps == 0.: 707 | # # Minimize primal problem 708 | # intercept = [ 709 | # np.percentile(res, 100.*prob) for (res, prob) in 710 | # zip(rresidual, probs)] 711 | # else: 712 | # # Use optimality conditions 713 | # tol_bound = 1e-3 # Tolerance for boundaries 714 | # coefs = np.reshape(dual_coef, (n_samples, n_dim)).T 715 | # ind_supp = np.where(np.sqrt(group_norm_data) / (n_dim * C) > tol_bound)[0] # Support vectors 716 | # ind_up = np.where(np.all( 717 | # (probs*C-coefs.T) / C > tol_bound, axis=1))[0] # Not on boundary sup 718 | # ind_down = np.where(np.all( 719 | # (coefs.T - (probs-1)*C) / C > tol_bound, axis=1))[0] # Not on boundary inf 720 | # # All conditions together: coefs of interest 721 | # # Intersection of ind_up, in_down and ind_supp 722 | # ind = [el for el in ind_up if el in ind_down and el in ind_supp] 723 | ## print("ind in sdca", ind) 724 | # if ind: 725 | # # Residues without intercept - expected values from dual coefs 726 | # intercept = (rresidual[:, ind]\ 727 | # -eps * coefs[:, ind]/\ 728 | # np.sqrt(group_norm_data[ind])).mean(axis=1) 729 | ## print("sdca residual") 730 | ## print(rresidual[:, ind]) 731 | ## print("sdca coefs") 732 | ## print(coefs[:, ind]) 733 | ## print("sdca group_norm") 734 | ## print(np.sqrt(group_norm_data[ind])) 735 | # else: 736 | # # If ind empty, do similarly as quantile regression 737 | # intercept = [ 738 | # np.percentile(res, 100.*prob) for (res, prob) in 739 | # zip(rresidual, probs)] 740 | ## print("intercept") 741 | ## print(intercept) 742 | ## print("multiplier") 743 | ## print(np.reshape(multiplier_data, (n_samples, n_dim)).mean(axis=0)) 744 | 745 | # rresidual = (rresidual.T - intercept).T 746 | 747 | # Compute the primal objective (approximated for eps-loss) 748 | # primal2 = np.sum([ 749 | # prob*np.fmax(0, res-eps) for (res, prob) in 750 | # zip(rresidual, probs)]) 751 | # primal2 += np.sum([ 752 | # (prob-1)*np.fmin(0, res+eps) for (res, prob) in 753 | # zip(rresidual, probs)]) 754 | # print(primal2, primal) 755 | # Use accumulated loss 756 | primal2 = alpha2 * (regul/2. + primal*scale) 757 | gap = (primal2 + obj) / (C * n_samples) # Dual gap 758 | if gap < 0.: 759 | gap = 1 760 | 761 | # Active set (if enabled and intercept obtained by optimality conditions) 762 | # if active_set > 0: 763 | # if swap_active_set: 764 | # tol_bound = 1e-6 # Tolerance for boundaries (redefinition) 765 | ## coefs = np.reshape(dual_coef, (n_samples, n_dim)).T 766 | ## rresidual = np.reshape(residual_data, (n_samples, n_dim)).T 767 | ## rresidual_norm = np.linalg.norm(rresidual, axis=0) # Residues norm 768 | ## print("residual norm") 769 | ## print(rresidual_norm) 770 | ## print("online residual norm") 771 | ## print(res_norm_data) 772 | # 773 | # # Points with small residues and zero coefs 774 | ## ind_null = np.where(rresidual_norm_data < eps-tol_bound)[0] 775 | ## ind_null_coef = np.where(np.sqrt(group_norm_data) / (n_dim * C) < tol_bound)[0] 776 | ## ind_null_coef = [e for e in ind_null if e in ind_null_coef] 777 | # ind_null_coef = [j for j in xrange(n_samples) if 778 | # res_norm[j] < eps*0.9 and 779 | # sqrt(group_norm[j]) / (n_dim*C) < tol_bound] 780 | # 781 | # # Points with large residues and coefs on box borders 782 | ## ind_bound = np.where(res_norm_data > eps*1.1)[0] 783 | ## ind_bound_coef = np.where(np.all(np.logical_or( 784 | ## (probs*C-coefs.T) / C < tol_bound, 785 | ## (coefs.T - (probs-1)*C) / C < tol_bound), axis=1))[0] 786 | ## ind_bound_coef = [e for e in ind_bound if e in ind_bound_coef] 787 | # ind_bound_coef = [j for j in xrange(n_samples) if 788 | # res_norm[j] > eps*1.1 and 789 | # coef_on_bound[j] == 1] 790 | # 791 | # # All that points satisfy optimality conditions 792 | # ind_all_coef = ind_null_coef + ind_bound_coef 793 | # sindices = np.delete(np.arange(n_samples, dtype=np.int32), 794 | # ind_all_coef) 795 | # sindices_size = sindices.size 796 | ## print(np.sort(sindices)) 797 | # else: 798 | # if it > 1: 799 | # swap_active_set = 1 800 | # 801 | # sindices = np.arange(n_samples, dtype=np.int32) 802 | # sindices_size = n_samples 803 | # swap_active_set = 1 - swap_active_set # 0 <-> 1 804 | 805 | # swap_active_set += 1 806 | # if swap_active_set > 2: 807 | # swap_active_set = 0 808 | 809 | # if sindices_size > n_indices_max: 810 | # rng.shuffle(sindices) 811 | # sindices = sindices[:n_indices_max] 812 | # sindices_size = n_indices_max 813 | 814 | # print("all coefs", ind_all_coef) 815 | # print("active indexes", sindices) 816 | # print(coefs[:, ind_all_coef] / C) 817 | # print("# active coord", sindices.size) 818 | 819 | # Automatic tuning such that the time of computing the duality gap 820 | # don't exceed 100*gap_time_ratio % of the total time 821 | end_gap = process_time() 822 | elapsed_time = (end_gap - start_it) / i_check_gap # Time between 2 checks 823 | time_gap = ((i_check_gap-1)*time_gap + end_gap - start_gap) / i_check_gap 824 | if n_gap_auto: 825 | n_gap = max(100, int(n_gap * time_gap / (elapsed_time * gap_time_ratio))) 826 | 827 | # if swap_active_set: 828 | # n_gap = 10 829 | 830 | if verbose: 831 | print "%8d: %5.2e (gap) %5.2f (obj) %5.2e (constraint) %5.2f (gap time ratio) %d (# act coord)" % (it + 1, gap, obj, constraint, 100.*time_gap/elapsed_time, n_act_coord_prev) 832 | 833 | # Stopping criterion 834 | if gap <= tol: 835 | if verbose: 836 | print "Optimal solution found." 837 | status[0] = 1 838 | break 839 | 840 | if max_time > 0 and process_time() - start_it > max_time: 841 | if verbose: 842 | print "Max time reached." 843 | status[0] = 3 844 | break 845 | 846 | # # tol is the objective value to reach 847 | # if (verbose or tol < 0.) and np.mod(it+1, 1e3) == 0: 848 | # # Minus dual objective value 849 | # obj = alpha2 * (regul/2. - dual + 100.*np.sum(np.fabs(xdm_data))) 850 | ## obj = alpha2 * (regul/2. - dual) 851 | # 852 | # if verbose: 853 | # print "%8d: %5.2e %5.2e" % (it + 1, obj-tol, 854 | # np.sum(np.fabs(xdm_data))) 855 | # 856 | # # Objective value reached 857 | # if tol < 0. and obj <= tol: 858 | # if verbose: 859 | # print "Ground truth objective value reached." 860 | # break 861 | 862 | else: 863 | if verbose: 864 | print "Max iteration reached." 865 | status[0] = 2 866 | # Debug sparsity (norm accumulation) 867 | # print(group_norm_data) 868 | # coefs = np.reshape(dual_coef, (n_samples, n_dim)).T 869 | # print(np.sqrt((coefs**2).sum(axis=0))) 870 | 871 | # print("sindices", np.sort(sindices)) 872 | 873 | -------------------------------------------------------------------------------- /qreg/qreg.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # Author: Maxime Sangnier 3 | # License: BSD 4 | import sys 5 | 6 | import numpy as np 7 | import scipy.spatial.distance as dist 8 | from scipy.linalg import eigvalsh 9 | from scipy.stats import norm 10 | from sklearn.utils import check_random_state 11 | from sklearn.base import BaseEstimator 12 | from sklearn.linear_model import LinearRegression, Ridge 13 | from sklearn.svm import SVR 14 | 15 | from cvxopt import matrix, solvers 16 | from .dataset_fast import get_dataset 17 | from .sdca_qr_fast import _prox_sdca_intercept_fit 18 | from .sdca_qr_al_fast import _prox_sdca_al_fit 19 | 20 | import time 21 | import warnings 22 | 23 | # time.clock() has been removed in Python 3.8+ 24 | # See: https://docs.python.org/3/whatsnew/3.8.html#api-and-feature-removals 25 | #if sys.version_info >= (3,8): 26 | # get_time = time.perf_counter 27 | #else: 28 | # get_time = time.clock 29 | 30 | 31 | def toy_data(n=50, t_min=0., t_max=1.5, noise=1., probs=[0.5]): 32 | """ 33 | Parameters 34 | n: number of points (t, y) 35 | t_min: minimum input data t 36 | t_max: maximum input data t 37 | noise: noise level 38 | probs: probabilities (quantiles levels) 39 | 40 | Returns: 41 | x: sorted random data in [t_min, t_max] 42 | y: targets corresponding to x (following a noisy sin curve) 43 | q: true quantiles corresponding to x 44 | """ 45 | t_down, t_up = 0., 1.5 # Bounds for the noise 46 | t = np.random.rand(n) * (t_max-t_min) + t_min 47 | t = np.sort(t) 48 | pattern = -np.sin(2*np.pi*t) # Pattern of the signal 49 | enveloppe = 1 + np.sin(2*np.pi*t/3) # Enveloppe of the signal 50 | pattern = pattern * enveloppe 51 | # Noise decreasing std (from noise+0.2 to 0.2) 52 | noise_std = 0.2 + noise*(t_up - t) / (t_up - t_down) 53 | # Gaussian noise with decreasing std 54 | add_noise = noise_std * np.random.randn(n) 55 | observations = pattern + add_noise 56 | quantiles = [pattern + norm.ppf(p, loc=np.zeros(n), 57 | scale=np.fabs(noise_std)) for p in probs] 58 | return t, observations, quantiles 59 | 60 | 61 | def proj_dual(coefs, C, probs): 62 | n = coefs.shape[1] 63 | for it in range(100): 64 | # Project onto the hyperplan 65 | coefs = np.asarray([x - x.sum() / n for x in coefs]) 66 | coefs = np.asarray([np.fmin(C*probs, np.fmax(C*(probs-1), x)) for x in coefs.T]).T 67 | 68 | return coefs 69 | 70 | 71 | class QRegressor(BaseEstimator): 72 | def __init__(self, C=1, probs=[0.5], eps=0., kernel='rbf', gamma_in=None, 73 | gamma_out=0., alg='coneqp', max_iter=100, tol=1e-6, lag_tol=1e-4, 74 | stepsize_factor=10., callback=None, 75 | n_calls=None, verbose=False, random_state=None, 76 | coefs_init="svr", nc_const=False, al_max_time=180., 77 | max_time=None, n_gap=None, gap_time_ratio=1e-3, 78 | active_set=True, sv_tol=1e-3): 79 | """ 80 | Quantile Regression. 81 | 82 | C: cost parameter (upper bound of dual variables). Positive scalar. 83 | probs: probabilities (quantiles levels) 84 | eps: threshold for the epsilon-loss (if used) 85 | kernel: input kernel ('rbf' or 'linear') 86 | gamma_in: gamma parameter for the input RBF kernel 87 | gamma_out: gamma parameter for the output RBF kernel 88 | alg: algorithm, which can be: 89 | - 'qp': CVXOPT (alternate optimization when eps > 0) 90 | - 'coneqp': CVXOPT (cone programming when eps > 0) 91 | - 'sdca': "A Coordinate Descent Primal-Dual Algorithm with Large 92 | Step Size and Possibly Non Separable Functions", by Olivier Fercoq 93 | and Pascal Bianchi 94 | - 'al': augmented Lagrangian 95 | - 'mtl': multi-task learning ("Parametric Task Learning", by Ichiro 96 | Takeuchi, Tatsuya Hongo, Masashi Sugiyama and Shinichi Nakajima) 97 | max_iter: maximum number of iterations 98 | tol: prescribed tolerance 99 | lag_tol: prescribed for the outer loop of the augmented Lagrangian 100 | algorithm and QP eps 101 | stepsize_factor 102 | callback 103 | n_calls 104 | verbose 105 | random_state 106 | coefs_init: initial dual coefficients (numpy array, n_probs, n_samples)) 107 | If None, initialize with 0. If "svr", initialize with esp-conditional 108 | median (scikit-learn SVR) 109 | nc_const: add non-crossing consraints when set to true (only available 110 | with alg='qp') 111 | al_max_time: maximum training time (seconds) for al algorithm 112 | max_time: maximum training time (seconds) for sdca algorithm 113 | n_gap: number of iterations between two dual gap check (if None, automatic) 114 | gap_time_ratio: ratio time to compute dual gap / time for n_gap iterations 115 | (this quantity is used to adjust automatically n_gap) 116 | active_set: whether to use active set or not 117 | sv_tol: tolerance for detecting support vector before prediction 118 | """ 119 | self.C = C 120 | self.probs = probs 121 | self.eps = eps 122 | self.kernel = kernel 123 | self.gamma_in = gamma_in 124 | self.gamma_out = gamma_out 125 | self.alg = alg 126 | self.alpha = 1.0 # Do not change 127 | self.max_iter = max_iter 128 | self.tol = tol 129 | self.lag_tol = lag_tol 130 | self.stepsize_factor = stepsize_factor 131 | self.callback = callback 132 | self.n_calls = n_calls 133 | self.verbose = verbose 134 | self.random_state = random_state 135 | self.coefs_init = coefs_init 136 | self.nc_const = nc_const 137 | self.al_max_time = al_max_time 138 | self.max_time = max_time 139 | self.n_gap = n_gap 140 | self.gap_time_ratio = gap_time_ratio 141 | self.status = "" # Resolution status 142 | self.active_set = active_set 143 | self.sv_tol = sv_tol 144 | 145 | def predict(self, X): 146 | """ 147 | Predict the conditional quantiles 148 | 149 | Parameters: 150 | X: data in rows (numpy array) 151 | 152 | Returns: 153 | y: prediction for each prescribed quantile levels 154 | """ 155 | 156 | X = np.asarray(X) 157 | if X.ndim == 1: 158 | # X = np.asarray([X]).T 159 | # Data has a single feature 160 | X = X.reshape(-1, 1) 161 | 162 | # Indexes of support vectors 163 | ind_sv = self.ind_sv() 164 | 165 | # Compute kernels 166 | if self.kernel == 'rbf': 167 | Din = dist.cdist(self.X[ind_sv, :], X, 'sqeuclidean') 168 | Kin = np.exp(-self.gamma_in * Din) 169 | else: # Linear kernel 170 | Kin = np.dot(self.X[ind_sv, :], self.D.dot(X.T)) 171 | 172 | Dout = dist.pdist(np.asarray([self.probs]).T, 'sqeuclidean') 173 | Kout = np.exp(-self.gamma_out * dist.squareform(Dout)) \ 174 | if self.gamma_out != np.inf else np.eye(np.size(self.probs)) 175 | 176 | pred = np.dot(np.dot(Kout, self.coefs[:, ind_sv]), Kin).T 177 | pred += self.intercept 178 | return pred.T 179 | 180 | def fit(self, X, y): 181 | """ 182 | Fit the model. 183 | 184 | X: data in rows (numpy array) 185 | y: targets in rows (numpy array) 186 | """ 187 | 188 | # Was in __init__ before 189 | self.kernel = self.kernel.lower() 190 | self.probs = np.asarray(self.probs) 191 | self.max_iter = int(self.max_iter) 192 | if self.max_time is None: 193 | self.max_time = 0 194 | if self.n_gap is None: 195 | self.n_gap = 0 196 | if self.nc_const and self.alg != 'qp': 197 | self.alg = 'qp' 198 | warnings.warn("alg set to 'qp' (this is the only available " + \ 199 | "algorithm to deal with the non-crossing consraints)") 200 | 201 | if self.kernel != 'rbf' and self.kernel != 'linear': 202 | raise ValueError('Choose kernel between rbf and linear.') 203 | 204 | if self.alg == 'mtl': 205 | self.kernel = 'linear' 206 | self.gamma_out = np.inf 207 | self.gamma_in = None 208 | 209 | # Data refactoring 210 | self.X = np.asarray(X) 211 | if self.X.ndim == 1: 212 | # self.X = np.asarray([X]).T 213 | # Data has a single feature 214 | self.X = self.X.reshape(-1, 1) 215 | y = np.ravel(y) 216 | 217 | # If no gamma_in specified, take 0.5 / q, where q is the 0.7-quantile 218 | # of the squared distances 219 | if self.kernel == 'rbf': 220 | Din = dist.pdist(self.X, 'sqeuclidean') 221 | if self.gamma_in is None: 222 | self.gamma_in = 1. / (2. * np.percentile(Din, 70.)) 223 | 224 | # Compute kernels 225 | if self.kernel == 'rbf': 226 | Kin = np.exp(-self.gamma_in * dist.squareform(Din)) 227 | else: # Linear kernel 228 | self.D = np.eye(self.X.shape[1]) 229 | Kin = np.dot(self.X, self.D.dot(self.X.T)) 230 | 231 | Dout = dist.pdist(np.asarray([self.probs]).T, 'sqeuclidean') 232 | Kout = np.exp(-self.gamma_out * dist.squareform(Dout)) \ 233 | if self.gamma_out != np.inf else np.eye(np.size(self.probs)) 234 | 235 | # Check algorithm 236 | if self.eps > 0 and self.alg != 'qp' and self.alg != 'coneqp' and self.alg != 'sdca': 237 | raise ValueError('Use qp or sdca for epsilon quantile regression.') 238 | if self.nc_const and self.eps > 0: 239 | raise ValueError('Not implemented yet.') 240 | 241 | # Initialization 242 | # For QP, it seems to slow down convergence. 243 | if self.coefs_init is None: 244 | coefs_init = None 245 | elif isinstance(self.coefs_init, str) and self.coefs_init.lower() == "svr": 246 | # Estimate condition median 247 | svr = SVR(C=self.C/2, kernel="precomputed", epsilon=self.eps) 248 | svr.fit(Kin, y) 249 | svr_dual = np.zeros(y.shape) 250 | svr_dual[svr.support_] = svr.dual_coef_[0, :] 251 | coefs_init = np.kron(svr_dual, np.ones(np.size(self.probs))) 252 | else: 253 | coefs_init = self.coefs_init.T.ravel() 254 | 255 | # Choose the algorithm 256 | if self.alg == 'qp': # Off-the-shelf solver (cvxopt) 257 | if self.nc_const: 258 | self.qp_nc2(Kin, Kout, y) 259 | else: 260 | K = np.kron(Kin, Kout) 261 | self.qp_eps(K, y) #, coefs_init) 262 | elif self.alg == 'coneqp': 263 | if self.nc_const: 264 | self.qp_nc2(Kin, Kout, y) 265 | else: 266 | K = np.kron(Kin, Kout) 267 | self.coneqp_eps(K, y) 268 | elif self.alg == 'sdca': # Stochastic dual coordinate descent 269 | self.sdca(Kin, Kout, y, coefs_init) 270 | elif self.alg == 'al': 271 | self.al(Kin, Kout, y, 1, coefs_init) 272 | elif self.alg == 'penal': 273 | self.al(Kin, Kout, y, 4, coefs_init) 274 | elif self.alg == 'mtl': 275 | self.mtl(y) 276 | # Recompute the kernel with learned D 277 | Kin = np.dot(self.X, self.D.dot(self.X.T)) 278 | else: 279 | raise ValueError('Unknown algorithm') 280 | 281 | # When there is no additional constraints, the quantile property is 282 | # satisfied. 283 | if not self.nc_const: 284 | # Make the dual point feasible (Mainly for SDCA) 285 | self.coefs = proj_dual(self.coefs, self.C, self.probs) 286 | 287 | # Set the intercept 288 | # Erase the previous intercept before prediction 289 | self.intercept = 0. 290 | # For usual quantile prediction 291 | if self.eps == 0.: 292 | self.intercept = [ 293 | np.percentile(y-pred, 100.*prob) for 294 | (pred, prob) in zip(self.predict(self.X), self.probs)] 295 | self.intercept = np.asarray(self.intercept) 296 | else: 297 | # For eps-quantile prediction 298 | # Use optimality conditions to find: 299 | # residues = eps * coef / coef_norm. 300 | # True for coefs that: 301 | # - are not 0 302 | # - are not on the boundaries 303 | tol = 1e-3 # Tolerance for boundaries 304 | group_norm = np.linalg.norm(self.coefs, axis=0) # Norm of coefs vectors 305 | ind_supp = np.where( 306 | group_norm / (self.probs.size * self.C) > tol 307 | )[0] # Support vectors 308 | ind_up = np.where(np.all( 309 | (self.probs*self.C-self.coefs.T) / self.C > tol, 310 | axis=1))[0] # Not on boundary sup 311 | ind_down = np.where(np.all( 312 | (self.coefs.T - (self.probs-1)*self.C) / self.C > tol, 313 | axis=1))[0] # Not on boundary inf 314 | # All conditions together: coefs of interest 315 | ind = list(set(ind_up) & set(ind_down) & set(ind_supp)) 316 | if ind: 317 | # Residues without intercept 318 | res = y[ind] - self.predict(self.X)[:, ind] 319 | # Expected values from dual coefs 320 | res_dual = self.eps * self.coefs[:, ind]/group_norm[ind] 321 | # Intercept 322 | self.intercept = (res-res_dual).mean(axis=1) 323 | # print("qreg residual") 324 | # print(res) 325 | # print("qreg coefs") 326 | # print(self.coefs[:, ind]) 327 | # print("qreg group_norm") 328 | # print(group_norm[ind]) 329 | else: 330 | # If ind empty, do similarly as quantile regression 331 | self.intercept = [ 332 | np.percentile(y-pred, 100.*prob) for 333 | (pred, prob) in zip(self.predict(self.X), self.probs)] 334 | self.intercept = np.asarray(self.intercept) 335 | 336 | # Set optimal objective value 337 | self.obj = 0.5 * np.trace(np.dot( 338 | self.coefs.T, np.dot(Kout, np.dot(self.coefs, Kin)))) \ 339 | - np.sum(self.coefs * y) 340 | self.obj += self.eps * np.linalg.norm(self.coefs, axis=0).sum() 341 | 342 | def score(self, X, y, sample_weight=None): 343 | # Pinball loss 344 | return 1 - self.pinball_loss(self.predict(X), y).mean() 345 | # Pinball loss + Indicator (crossing_loss) 346 | # p = self.predict(X) 347 | # return 1 - self.pinball_loss(p, y).mean() + \ 348 | # 100. * self.crossing_loss(p).sum() 349 | 350 | def qp_nc(self, Kin, Kout, y): 351 | ind = np.argsort(self.probs) # Needed to sort constraints on quantile levels 352 | 353 | K = np.kron(Kin, Kout) 354 | p = np.size(self.probs) # Number of quantiles to predict 355 | n = K.shape[0] # Number of coefficients 356 | m = n / p # Number of training instances 357 | probs = np.kron(np.ones(m), self.probs) # Quantiles levels 358 | 359 | D = -np.eye(p) + np.diag(np.ones(p-1), 1) # Difference matrix 360 | D = np.delete(D, -1, 0) 361 | D = D.T[np.argsort(ind)].T 362 | 363 | U = np.kron(Kin, np.dot(Kout, D.T)) # Uper and lower part 364 | L = np.kron(Kin, np.dot(D, np.dot(Kout, D.T))) # Right-lower part 365 | 366 | K = matrix(np.r_[np.c_[K, U], np.c_[U.T, L]]) # Quad. part of the obj. 367 | q = matrix(np.r_[-np.kron(y, np.ones(p)), np.zeros(m*(p-1))]) # Linear part of the objective 368 | G = matrix(np.r_[np.c_[np.eye(n), np.zeros((n, m*(p-1)))], 369 | np.c_[-np.eye(n), np.zeros((n, m*(p-1)))], 370 | np.c_[np.zeros((m*(p-1), n)), -np.eye(m*(p-1))]]) # LHS of the inequ. constr. 371 | h = matrix(np.r_[self.C*probs, self.C*(1-probs), np.zeros(m*(p-1))]) # RHS of the inequ. 372 | A = matrix(np.c_[np.kron(np.ones(m), np.eye(p)), 373 | np.kron(np.ones(m), D.T)]) # LHS of the equ. constr. 374 | b = matrix(np.zeros(p)) # RHS of the equality constraint 375 | 376 | # The following parameters control the execution of the default solver. 377 | # options['show_progress'] True/False (default: True) 378 | # options['maxiters'] positive integer (default: 100) 379 | # options['refinement'] positive integer (default: 0) 380 | # options['abstol'] scalar (default: 1e-7) 381 | # options['reltol'] scalar (default: 1e-6) 382 | # options['feastol'] scalar (default: 1e-7) 383 | # Returns: 384 | # {'dual infeasibility' 385 | # 'dual objective' 386 | # 'dual slack' 387 | # 'gap' 388 | # 'iterations' 389 | # 'primal infeasibility' 390 | # 'primal objective' 391 | # 'primal slack' 392 | # 'relgap' 393 | # 's': <0x1 matrix, tc='d'>, 394 | # 'status' 395 | # 'x' 396 | # 'y' 397 | # 'z' 398 | solvers.options['show_progress'] = self.verbose 399 | if self.tol > 0: 400 | solvers.options['reltol'] = self.tol 401 | self.time = time.process_time() # Store beginning time 402 | sol = solvers.qp(K, q, G, h, A, b) # Solve the dual opt. problem 403 | self.time = time.process_time() - self.time # Store training time 404 | 405 | # Set coefs 406 | self.coefs = np.reshape(sol['x'][:n], (m, p)).T 407 | self.coefs += np.dot(D.T, np.reshape(sol['x'][n:], (m, p-1)).T) 408 | self.sol = sol 409 | 410 | # Set the intercept (the quantile property is not verified) 411 | self.intercept = np.asarray(sol['y']).squeeze() 412 | 413 | def qp_nc2(self, Kin, Kout, y): 414 | ind = np.argsort(self.probs) # Needed to sort constraints on quantile levels 415 | 416 | K = np.kron(Kin, Kout) 417 | p = np.size(self.probs) # Number of quantiles to predict 418 | n = K.shape[0] # Number of coefficients 419 | m = n / p # Number of training instances 420 | l = m * (p-1) # Number of non-crossing dual variables 421 | probs = np.kron(np.ones(m), self.probs) # Quantiles levels 422 | 423 | D = -np.eye(p) + np.diag(np.ones(p-1), 1) # Difference matrix 424 | D = np.delete(D, -1, 0) 425 | D = D.T[np.argsort(ind)].T 426 | 427 | K = matrix(np.r_[np.c_[K, np.zeros((n, l))], np.zeros((l, n+l))]) # Quad. part of the obj. 428 | q = matrix(np.r_[-np.kron(y, np.ones(p)), np.zeros(l)]) # Linear part of the objective 429 | G = matrix(np.r_[np.c_[np.eye(n), -np.kron(np.eye(m), D.T)], 430 | np.c_[-np.eye(n), np.kron(np.eye(m), D.T)], 431 | np.c_[np.zeros((l, n)), -np.eye(l)]]) # LHS of the inequ. constr. 432 | h = matrix(np.r_[self.C*probs, self.C*(1-probs), np.zeros(m*(p-1))]) # RHS of the inequ. 433 | A = matrix(np.c_[np.kron(np.ones(m), np.eye(p)), np.zeros((p, l))]) # LHS of the equ. constr. 434 | b = matrix(np.zeros(p)) # RHS of the equality constraint 435 | 436 | # See qp_nc for usage instruction 437 | solvers.options['show_progress'] = self.verbose 438 | if self.tol > 0: 439 | solvers.options['reltol'] = self.tol 440 | self.time = time.process_time() # Store beginning time 441 | sol = solvers.qp(K, q, G, h, A, b) # Solve the dual opt. problem 442 | self.time = time.process_time() - self.time # Store training time 443 | 444 | # Set coefs 445 | self.coefs = np.reshape(sol['x'][:n], (m, p)).T 446 | self.sol = sol 447 | 448 | # Set the intercept (the quantile property is not verified) 449 | self.intercept = np.asarray(sol['y']).squeeze() 450 | 451 | def qp(self, K, y): 452 | p = np.size(self.probs) # Number of quantiles to predict 453 | n = K.shape[0] # Number of variables 454 | probs = np.kron(np.ones(n//p), self.probs) # Quantiles levels 455 | 456 | K = matrix(K) # Quadratic part of the objective 457 | q = matrix(-np.kron(y, np.ones(p))) # Linear part of the objective 458 | G = matrix(np.r_[np.eye(n), -np.eye(n)]) # LHS of the inequ. constr. 459 | h = matrix(np.r_[self.C*probs, self.C*(1-probs)]) # RHS of the inequ. 460 | A = matrix(np.kron(np.ones(n//p), np.eye(p))) # LHS of the equ. constr. 461 | b = matrix(np.zeros(p)) # RHS of the equality constraint 462 | 463 | # See qp_nc for usage instruction 464 | solvers.options['show_progress'] = self.verbose 465 | if self.tol > 0: 466 | solvers.options['reltol'] = self.tol 467 | # solvers.options['feastol'] = self.tol * 1./10 468 | self.time = time.process_time() # Store beginning time 469 | sol = solvers.qp(K, q, G, h, A, b) # Solve the dual opt. problem 470 | self.time = time.process_time() - self.time # Store training time 471 | 472 | 473 | # Set coefs 474 | self.coefs = np.reshape(sol['x'], (n//p, p)).T 475 | self.sol = sol 476 | 477 | # Set the intercept 478 | # self.intercept = np.asarray(sol['y']).squeeze() 479 | 480 | # Set optimal objective value 481 | # Either this 482 | # self.obj = np.asarray(0.5 * sol['x'].T * K * sol['x'] \ 483 | # + q.T * sol['x']) 484 | # self.obj = float(self.obj.squeeze()) 485 | # Or that 486 | # self.obj = sol['primal objective'] 487 | 488 | def qp_eps(self, K, y): #, coefs_init): 489 | p = np.size(self.probs) # Number of quantiles to predict 490 | n = K.shape[0] # Number of variables 491 | probs = np.kron(np.ones(n//p), self.probs) # Quantiles levels 492 | 493 | q = matrix(-np.kron(y, np.ones(p))) # Linear part of the objective 494 | G = matrix(np.r_[np.eye(n), -np.eye(n)]) # LHS of the inequ. constr. 495 | h = matrix(np.r_[self.C*probs, self.C*(1-probs)]) # RHS of the inequ. 496 | A = matrix(np.kron(np.ones(n//p), np.eye(p))) # LHS of the equ. constr. 497 | b = matrix(np.zeros(p)) # RHS of the equality constraint 498 | # Initialization is disabled because it seems to slow down convergence 499 | # initvals = None if self.coefs_init is None else matrix(coefs_init) 500 | initvals = None 501 | 502 | # See qp_nc for usage instruction 503 | solvers.options['show_progress'] = self.verbose 504 | if self.tol > 0: 505 | solvers.options['reltol'] = self.tol 506 | # solvers.options['feastol'] = self.tol * 1./10 507 | 508 | self.time = time.process_time() # Store beginning time 509 | if self.eps == 0: 510 | K = matrix(K) # Quadratic part of the objective 511 | sol = solvers.qp(K, q, G, h, A, b, initvals=initvals) # Solve the dual opt. problem 512 | coefs = np.reshape(sol['x'], (n//p, p)).T 513 | else: 514 | solvers.options['show_progress'] = False 515 | mu = np.ones(n//p) # Penalty for l1-l2 norm 516 | coefs = np.r_[0] # Initialization for computing improvement 517 | 518 | start_it = time.process_time() 519 | for it in range(self.max_iter): 520 | mu = self.eps / mu 521 | Kmu = matrix(K + np.diag(np.kron(mu, np.ones(p)))) # Quadratic part of the objective 522 | sol = solvers.qp(Kmu, q, G, h, A, b, initvals=initvals) # Solve the dual opt. problem 523 | improvement = np.linalg.norm(coefs.T.ravel() - 524 | np.asarray(sol['x']).ravel()) / (self.C*p) 525 | coefs = np.reshape(sol['x'], (n//p, p)).T 526 | if self.verbose: 527 | print("it: %d improvement: %0.2e" % (it, improvement)) 528 | if improvement < self.lag_tol: 529 | break 530 | if self.max_time > 0 and time.process_time() - start_it >self.max_time: 531 | break 532 | # Warm-start is disabled because it seems to slow down convergence 533 | # initvals = sol['x'] 534 | mu = np.linalg.norm(coefs, axis=0) 535 | mu[mu < 1e-32] = 1e-32 536 | self.time = time.process_time() - self.time # Store training time 537 | 538 | # Set coefs 539 | self.coefs = coefs 540 | self.sol = sol 541 | 542 | def coneqp_eps(self, K, y): #, coefs_init): 543 | p = np.size(self.probs) # Number of quantiles to predict 544 | n = K.shape[0] # Number of variables 545 | m = n//p # Number of points 546 | probs = np.kron(np.ones(m), self.probs) # Quantiles levels 547 | 548 | # Initialization is disabled because it seems to slow down convergence 549 | # initvals = None if self.coefs_init is None else matrix(coefs_init) 550 | initvals = None 551 | 552 | # See qp_nc for usage instruction 553 | solvers.options['show_progress'] = self.verbose 554 | solvers.options['maxiters'] = self.max_iter 555 | if self.tol > 0: 556 | solvers.options['reltol'] = self.tol 557 | # solvers.options['feastol'] = self.tol * 1./10 558 | 559 | self.time = time.process_time() # Store beginning time 560 | if self.eps == 0: 561 | K = matrix(K) # Quadratic part of the objective 562 | q = matrix(-np.kron(y, np.ones(p))) # Linear part of the objective 563 | G = matrix(np.r_[np.eye(n), -np.eye(n)]) # LHS of the inequ. constr. 564 | h = matrix(np.r_[self.C*probs, self.C*(1-probs)]) # RHS of the inequ. 565 | A = matrix(np.kron(np.ones(m), np.eye(p))) # LHS of the equ. constr. 566 | b = matrix(np.zeros(p)) # RHS of the equality constraint 567 | 568 | sol = solvers.qp(K, q, G, h, A, b, initvals=initvals) # Solve the dual opt. problem 569 | coefs = np.reshape(sol['x'], (m, p)).T 570 | else: 571 | def buildG(m, p): 572 | n = m*p 573 | 574 | # Get the norm bounds (m last variables) 575 | A = np.zeros(p+1) 576 | A[0] = -1 577 | A = np.kron(np.eye(m), A).T 578 | # Get the m p-long vectors 579 | B = np.kron(np.eye(m), np.c_[np.zeros(p), np.eye(p)].T) 580 | # Box constraint 581 | C = np.c_[np.r_[np.eye(n), -np.eye(n)], np.zeros((2*n, m))] 582 | # Set everything together 583 | C = np.r_[C, np.c_[B, A]] 584 | return C 585 | 586 | # 2*n non-negative variables 587 | # [p+1]*m SOC variables 588 | 589 | K = matrix(np.r_[np.c_[K, np.zeros((n, m))], np.zeros((m, n+m))]) # Quadratic part of the objective 590 | q = matrix(np.r_[-np.kron(y, np.ones(p)), np.ones(m)*self.eps]) # Linear part of the objective 591 | G = matrix(buildG(m, p)) # LHS of the inequ. constr. 592 | h = matrix(np.r_[self.C*probs, self.C*(1-probs), np.zeros(m*(p+1))]) # RHS of the inequ. 593 | A = matrix(np.c_[np.kron(np.ones(m), np.eye(p)), np.zeros((p, m))]) # LHS of the equ. constr. 594 | b = matrix(np.zeros(p)) # RHS of the equality constraint 595 | dims = {'l': 2*n, 'q': [p+1]*m, 's': []} 596 | 597 | sol = solvers.coneqp(K, q, G, h, dims, A, b, initvals=initvals) # Solve the dual opt. problem 598 | coefs = np.reshape(sol['x'][:n], (m, p)).T 599 | self.time = time.process_time() - self.time # Store training time 600 | 601 | # Set coefs 602 | self.coefs = coefs 603 | self.sol = sol 604 | 605 | def sdca(self, Kin, Kout, y, coefs_init): 606 | n_samples = Kin.shape[0] 607 | n_dim = Kout.shape[0] 608 | 609 | # For block descent, step size depends on max eigen value of Kout 610 | # Same as np.linalg.eigvalsh(Kout)[-1] 611 | Kout_lambda_max = eigvalsh(Kout, eigvals=(n_dim-1,n_dim-1))[0] 612 | 613 | # Data 614 | dsin = get_dataset(Kin, order="c") 615 | dsout = get_dataset(Kout, order="c") 616 | 617 | # Initialization 618 | # Used if done in fit 619 | self.coefs = np.zeros(n_dim*n_samples, dtype=np.float64) if \ 620 | self.coefs_init is None else coefs_init 621 | # What is below was relegated to fit 622 | # if self.coefs_init is None: 623 | # self.coefs = np.zeros(n_dim*n_samples, dtype=np.float64) 624 | # elif isinstance(self.coefs_init, str) and self.coefs_init.lower() == "svr": 625 | # # Estimate condition median 626 | # svr = SVR(C=self.C/2, kernel="precomputed", epsilon=self.eps) 627 | # svr.fit(Kin, y) 628 | # svr_dual = np.zeros(y.shape) 629 | # svr_dual[svr.support_] = svr.dual_coef_[0, :] 630 | # self.coefs = np.kron(svr_dual, np.ones(n_dim)) 631 | # else: 632 | # self.coefs = self.coefs_init.T.ravel() 633 | 634 | # Array for objective values 635 | # inner_obj = np.ones(self.max_iter) 636 | 637 | # Some Parameters 638 | n_calls = n_samples if self.n_calls is None else self.n_calls 639 | rng = check_random_state(self.random_state) 640 | status = np.zeros(1, dtype=np.int16) 641 | 642 | # Call to the solver 643 | self.time = time.process_time() # Store beginning time 644 | _prox_sdca_intercept_fit(self, dsin, dsout, y, self.coefs, self.alpha, 645 | self.C, self.eps, self.stepsize_factor, 646 | self.probs, self.max_iter, self.tol, 647 | self.callback, n_calls, self.max_time, 648 | self.n_gap, self.gap_time_ratio, 649 | self.verbose, rng, status, self.active_set, 650 | Kout_lambda_max) 651 | # , inner_obj) 652 | self.time = time.process_time() - self.time # Store training time 653 | 654 | # Set coefs 655 | self.coefs = np.reshape(self.coefs, (n_samples, n_dim)).T 656 | 657 | # Save inner objective values 658 | # self.inner_obj = inner_obj[inner_obj < 0] 659 | 660 | # Resolution status 661 | if status[0] == 1: 662 | self.status = "Optimal solution found" 663 | elif status[0] == 2: 664 | self.status = "Maximum iteration reached" 665 | elif status[0] == 3: 666 | self.status = "Maximum time reached" 667 | else: 668 | self.status = "" 669 | 670 | # Set the intercept 671 | # self.intercept = 0. # Erase the previous intercept before prediction 672 | # self.intercept = [np.percentile(y-pred, 100.*prob) for (pred, prob)\ 673 | # in zip(self.predict(self.X), self.probs)] 674 | # self.intercept = np.asarray(self.intercept) 675 | 676 | # Set optimal objective value 677 | # self.obj = 0.5 * np.trace( 678 | # np.dot(self.coefs.T, np.dot(Kout, np.dot(self.coefs, Kin)))) \ 679 | # - np.sum(self.coefs * y) 680 | 681 | def al(self, Kin, Kout, y, mugrow, coefs_init): 682 | n_samples = Kin.shape[0] 683 | n_dim = Kout.shape[0] 684 | 685 | dsin = get_dataset(Kin, order="c") 686 | dsout = get_dataset(Kout, order="c") 687 | 688 | # Initialization 689 | # Used if done in fit 690 | coefs = np.zeros(n_dim*n_samples, dtype=np.float64) if \ 691 | self.coefs_init is None else coefs_init 692 | # coefs = np.zeros(n_dim*n_samples, dtype=np.float64) \ 693 | # if self.coefs_init is None else self.coefs_init.T.ravel() 694 | b = np.zeros(n_dim) # Intercept 695 | 696 | n_calls = n_samples if self.n_calls is None else self.n_calls 697 | rng = check_random_state(self.random_state) 698 | 699 | # Parameters of the outer loop 700 | if mugrow > 1: 701 | mu = 2 # Factor of the Lagrangian penalization 702 | elif mugrow == 1: 703 | mu = 10 704 | else: 705 | raise ValueError("mugrow >= 1") 706 | # mugrow = 4 # Growing factor of the penalization 707 | prev_err = float('inf') # Previous error for the outer loop 708 | 709 | # dual_tol = np.sqrt(n) * self.C * self.dual_tol # Inner loop 710 | # Loop 711 | self.time = time.process_time() # Store beginning time 712 | for ito in range(self.max_iter): 713 | _prox_sdca_al_fit(self, dsin, dsout, y, coefs, self.alpha, 714 | self.C, self.stepsize_factor, self.probs, b, mu, 715 | self.max_iter, self.tol, self.callback, n_calls, 716 | self.verbose, rng) 717 | 718 | # Update the intercept 719 | # Gradient of the objective wrt the intercept 720 | der = np.reshape(coefs, (n_samples, n_dim)).sum(axis=0) 721 | b += mu * der # Intercept update 722 | mu *= mugrow # mu update 723 | 724 | # Stopping criterion 725 | lag_err = np.sum(der**2) # Dual error 726 | if lag_err < self.lag_tol or \ 727 | np.abs(lag_err/prev_err - 1) < self.lag_tol: 728 | break 729 | prev_err = lag_err # Update the previous Lagrangian error 730 | 731 | # tol is the objective value to reach 732 | if self.tol < 0: 733 | # Project coefs on the constraints 734 | proj_coefs = proj_dual(np.reshape(coefs, (n_samples, n_dim)).T, 735 | self.C, self.probs) 736 | 737 | # Compute the objective value 738 | obj = 0.5 * np.trace(np.dot( 739 | proj_coefs.T, np.dot(Kout, np.dot(proj_coefs, Kin)))) \ 740 | - np.sum(proj_coefs * y) 741 | 742 | if self.verbose: 743 | print("it: %d obj: %0.2f" % (ito, obj)) 744 | 745 | # Stopping criterionperformance. 746 | if obj <= self.tol: 747 | if self.verbose: 748 | print("Ground truth objective value reached.") 749 | break 750 | 751 | # Maximum training time 752 | current_time = time.process_time() - self.time # Current training time 753 | if current_time > self.al_max_time: 754 | if self.verbose: 755 | print("Maximum training time reached") 756 | break 757 | else: 758 | if self.verbose: 759 | print('Did not converge after {} iterations.'.format(ito+1)) 760 | 761 | self.time = time.process_time() - self.time # Store training time 762 | 763 | # Set coefs 764 | self.coefs = np.reshape(coefs, (n_samples, n_dim)).T 765 | 766 | # Set the intercept 767 | # self.intercept = b 768 | 769 | # Set optimal objective value 770 | # self.obj = 0.5 * np.trace( 771 | # np.dot(self.coefs.T, np.dot(Kout, np.dot(self.coefs, Kin))) ) \ 772 | # - np.sum(self.coefs * y) 773 | 774 | def mtl(self, y): 775 | d = self.X.shape[1] # Data dimension 776 | p = self.probs.shape[0] # Number of tasks 777 | verbose = self.verbose 778 | self.verbose = False 779 | 780 | self.D = np.eye(d) / d # Initialize D 781 | err = np.inf 782 | 783 | self.time = time.process_time() # Store beginning time 784 | for it in range(self.max_iter): 785 | Kin = np.dot(self.X, self.D.dot(self.X.T)) # Compute input kernel 786 | self.qp(np.kron(Kin, np.eye(p)), y) # Solve QR problem (fixed D) 787 | 788 | if it < self.max_iter-1: 789 | # Update D 790 | B = self.coefs.dot(self.X).dot(self.D) # Coefficients of the linear predictor 791 | 792 | # Update with eigenvalue decomposition 793 | # C = B.T.dot(B) 794 | # e, V = np.linalg.eigh(C) # Eigen values and vectors 795 | # e[e<0] = 0 796 | # self.D = V.dot(np.diag(np.sqrt(e)).dot(V.T)) 797 | # self.D /= np.trace(self.D) 798 | 799 | # Update with singular value decomposition 800 | _, s, V = np.linalg.svd(B) 801 | s = np.r_[s, np.zeros(max(0, d-p))] 802 | D = np.dot(V.T, np.diag(s).dot(V)) / s.sum() 803 | err = np.linalg.norm(D-self.D) 804 | self.D = D 805 | 806 | if verbose: 807 | obj = -0.5 * np.trace(np.dot( 808 | self.coefs.T, np.dot(self.coefs, Kin))) \ 809 | + np.sum(self.coefs * y) 810 | print(it, obj, err, self.tol) 811 | 812 | if err < self.tol: 813 | if verbose: 814 | print("Did converge.") 815 | break 816 | else: 817 | if verbose: 818 | print('Did not converge after {} iterations.'.format(it+1)) 819 | 820 | self.time = time.process_time() - self.time # Store training time 821 | self.verbose = verbose 822 | 823 | def pinball_loss(self, pred, y): 824 | y = np.ravel(y) 825 | residual = y - pred 826 | loss = np.sum([prob*np.fmax(0, res) for (res, prob) in 827 | zip(residual, self.probs)], axis=1) 828 | loss += np.sum([(prob-1)*np.fmin(0, res) for (res, prob) in 829 | zip(residual, self.probs)], axis=1) 830 | loss = loss * 1./y.size 831 | return loss 832 | 833 | def qloss(self, pred, y): 834 | y = np.ravel(y) 835 | residual = y - pred 836 | loss = np.sum([res < 0 for (res, prob) in 837 | zip(residual, self.probs)], axis=1) 838 | loss = loss * 1./y.size - self.probs 839 | return loss 840 | 841 | def crossing_loss(self, pred): 842 | ind = np.argsort(self.probs) 843 | loss = np.sum([np.fmax(0, -np.diff(res)) for res in pred[ind].T], 844 | axis=0) 845 | loss = loss * 1./pred.shape[1] 846 | return loss 847 | 848 | def ind_sv(self): 849 | group_norm = np.linalg.norm(self.coefs, axis=0) / (self.C * len(self.probs)) 850 | return np.where(group_norm > self.sv_tol)[0] 851 | 852 | def num_sv(self): 853 | return self.ind_sv().size 854 | 855 | 856 | class QRegMTL(BaseEstimator): 857 | def __init__(self, gamma_in=None, Creg=None, location=True, 858 | n_landmarks=None, **args): 859 | """ 860 | Quantile Regression with multi-task learning. 861 | 862 | Ref: Parametric Task Learning, by Ichiro Takeuchi, Tatsuya Hongo, 863 | Masashi Sugiyama and Shinichi Nakajima (NIPS 2013). 864 | 865 | Methodology: 866 | First, estimate conditional mean function E[Y|X=x] by least-square 867 | regression, and compute the residual r_i = y_i − E[Y|X=x_i]. Then, 868 | apply multi-task learning to (x_i, r_i) and estimate a conditional 869 | quantile function by Q(x|p) = E[Y|X=x] + h(x|p), where h(.|p) is 870 | the estimated quantile regression fitted to the residuals. 871 | 872 | location: whether to use a location model (as proposed in the paper) 873 | gamma_in: gamma parameter for the input RBF map 874 | n_landmarks: number of landmarks for the input mapping. When None, 875 | use all training points. When less than 1, consider it as a ratio 876 | of training points. Else it indicates the number of landmarks. 877 | Creg: cost parameter for the ridge regression (location model). 878 | Positive scalar. When it is None, use least-square regression. 879 | C: cost parameter (upper bound of dual variables). Positive scalar. 880 | probs: probabilities (quantiles levels) 881 | max_iter: maximum number of iterations 882 | tol: prescribed tolerance 883 | """ 884 | self.gamma_in = gamma_in 885 | self.location = location 886 | self.Creg = Creg 887 | self.n_landmarks = n_landmarks 888 | 889 | if 'alg' in args: 890 | del args['alg'] 891 | self.reg = QRegressor(alg='mtl', **args) 892 | 893 | def predict(self, X): 894 | """ 895 | Predict the conditional quantiles 896 | 897 | Parameters: 898 | X: data in rows (numpy array) 899 | 900 | Returns: 901 | y: prediction for each prescribed quantile levels 902 | """ 903 | 904 | X = np.asarray(X) 905 | if X.ndim == 1: 906 | # X = np.asarray([X]).T 907 | # Data has a single feature 908 | X = X.reshape(-1, 1) 909 | 910 | # Map the data with RBF kernel 911 | Din = dist.cdist(X, self.X, 'sqeuclidean') 912 | X_map = np.exp(-self.gamma_in * Din) 913 | 914 | # Prediction 915 | pred = self.reg.predict(X_map) 916 | if self.location: 917 | pred += self.lsr.predict(X_map) * self.std_residue + \ 918 | self.mean_residue 919 | # pred += self.lsr.predict(X_map) if self.location else 0 920 | 921 | return pred 922 | 923 | def fit(self, X, y): 924 | """ 925 | Fit the model. 926 | 927 | X: data in rows (numpy array) 928 | y: targets in rows (numpy array) 929 | """ 930 | 931 | self.X = np.asarray(X) # Training data as landmarks 932 | if self.X.ndim == 1: 933 | # self.X = np.asarray([X]).T 934 | # Data has a single feature 935 | self.X = self.X.reshape(-1, 1) 936 | 937 | # If no gamma_in specified, take 0.5 / q, where q is the 0.7-quantile 938 | # of the squared distances 939 | Din = dist.pdist(self.X, 'sqeuclidean') 940 | if self.gamma_in is None: 941 | self.gamma_in = 1. / (2. * np.percentile(Din, 70.)) 942 | 943 | # Map the data with RBF kernel 944 | if not self.n_landmarks: # landmarks = None => Use all data 945 | X_map = np.exp(-self.gamma_in * dist.squareform(Din)) # All data as landmarks 946 | else: 947 | if self.n_landmarks < 1: # Ratio 948 | n_landmarks = int(np.floor(self.n_landmarks * self.X.shape[0])) 949 | else: 950 | n_landmarks = self.n_landmarks 951 | L = self.X[np.random.randint(self.X.shape[0], size=n_landmarks)] # Random landmarks 952 | Din = dist.cdist(self.X, L, 'sqeuclidean') 953 | self.X = L # Store landmarks 954 | X_map = np.exp(-self.gamma_in * Din) 955 | 956 | # Lest-squares regression 957 | if self.location: 958 | self.lsr = LinearRegression() if not self.Creg \ 959 | else Ridge(alpha=1./self.Creg) 960 | self.lsr.fit(X_map, y) 961 | residue = y - self.lsr.predict(X_map) 962 | 963 | self.mean_residue = residue.mean() 964 | self.std_residue = residue.std() 965 | residue = (residue - self.mean_residue) / self.std_residue 966 | else: 967 | self.lsr = None 968 | self.mean_residue = None 969 | self.std_residue = None 970 | residue = y 971 | 972 | # Fit on training data 973 | self.reg.fit(X_map, residue) 974 | 975 | def score(self, X, y, sample_weight=None): 976 | # Pinball loss 977 | return 1 - self.pinball_loss(self.predict(X), y).mean() 978 | 979 | def get_params(self, deep=True): 980 | p = super(QRegMTL, self).get_params() 981 | p.update(self.reg.get_params()) 982 | return p 983 | 984 | def set_params(self, **parameters): 985 | for parameter in ['gamma_in', 'location', 'Creg', 'reg', 'n_landmarks']: 986 | if parameter in parameters: 987 | setattr(self, parameter, parameters[parameter]) 988 | del parameters[parameter] 989 | self.reg.set_params(**parameters) 990 | return self 991 | 992 | def pinball_loss(self, pred, y): 993 | return self.reg.pinball_loss(pred, y) 994 | 995 | def qloss(self, pred, y): 996 | return self.reg.qloss(pred, y) 997 | 998 | def crossing_loss(self, pred): 999 | return self.reg.crossing_loss(pred) 1000 | --------------------------------------------------------------------------------