├── qreg
    ├── __init__.py
    ├── setup.py
    ├── dataset_fast.pxd
    ├── dataset_fast.pyx
    ├── sdca_qr_al_fast.pyx
    ├── sdca_qr_fast.pyx
    └── qreg.py
├── Makefile
├── setup.py
├── examples
    ├── algorithms.py
    ├── svr.py
    └── methods.py
└── README.rst


/qreg/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1"
2 | 
3 | from .qreg import QRegressor
4 | from .qreg import QRegMTL
5 | from .qreg import toy_data
6 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PYTHON ?= python
 2 | CYTHON ?= cython
 3 | 
 4 | CYTHONSRC= $(wildcard qreg/*.pyx)
 5 | CSRC= $(CYTHONSRC:.pyx=.cpp)
 6 | 
 7 | inplace:
 8 | 	$(PYTHON) setup.py build_ext -i
 9 | 
10 | all: cython inplace
11 | 
12 | cython: $(CSRC)
13 | 
14 | clean:
15 | 	rm -f qreg/*.c qreg/*.html
16 | 	rm -f `find qreg -name "*.pyc"`
17 | 	rm -f `find qreg -name "*.so"`
18 | 	rm -rf `find qreg -name "*pycache*"`
19 | 	rm -rf build
20 | 	rm -rf *egg-info
21 | 	rm -rf dist
22 | 
23 | %.cpp: %.pyx
24 | 	$(CYTHON) --cplus $<
25 | 
26 | 


--------------------------------------------------------------------------------
/qreg/setup.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | 
 3 | import numpy
 4 | 
 5 | 
 6 | def configuration(parent_package='', top_path=None):
 7 |     from numpy.distutils.misc_util import Configuration
 8 | 
 9 |     config = Configuration('qreg', parent_package, top_path)
10 | 
11 |     srcdir = os.path.join(top_path, "qreg/")
12 |     print(srcdir)
13 | 
14 |     config.add_extension('dataset_fast',
15 |                          sources=['dataset_fast.cpp'],
16 |                          include_dirs=[numpy.get_include(), srcdir])
17 | 
18 |     config.add_extension('sdca_qr_fast',
19 |                          sources=['sdca_qr_fast.cpp'],
20 |                          include_dirs=[numpy.get_include(), srcdir])
21 | 
22 |     config.add_extension('sdca_qr_al_fast',
23 |                          sources=['sdca_qr_al_fast.cpp'],
24 |                          include_dirs=[numpy.get_include(), srcdir])
25 | 
26 |     return config
27 | 
28 | if __name__ == '__main__':
29 |     from numpy.distutils.core import setup
30 |     setup(**configuration(top_path='').todict())
31 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | #
 3 | # Copyright (C) 2012 Maxime Sangnier, Olivier Fercoq
 4 | 
 5 | import sys
 6 | import os
 7 | import setuptools
 8 | from numpy.distutils.core import setup
 9 | 
10 | 
11 | def configuration(parent_package='', top_path=None):
12 |     if os.path.exists('MANIFEST'):
13 |         os.remove('MANIFEST')
14 | 
15 |     from numpy.distutils.misc_util import Configuration
16 |     config = Configuration(None, parent_package, top_path)
17 | 
18 |     config.add_subpackage('qreg')
19 | 
20 |     return config
21 | 
22 | if __name__ == "__main__":
23 | 
24 |     old_path = os.getcwd()
25 |     local_path = os.path.dirname(os.path.abspath(sys.argv[0]))
26 | 
27 |     os.chdir(local_path)
28 |     sys.path.insert(0, local_path)
29 | 
30 |     setup(configuration=configuration,
31 |           name='qreg',
32 |           maintainer='Maxime Sangnier',
33 |           maintainer_email='maxime.sangnier@upmc.fr',
34 |           description='Data sparse quantile regression in Python',
35 |           license='New BSD',
36 |           url='https://github.com/msangnier/qreg',
37 |           version='0.1',
38 |           download_url='https://github.com/msangnier/qreg',
39 |           long_description=open('README.rst').read(),
40 |           zip_safe=False,
41 |           install_requires=['numpy', 'cvxopt'])
42 | 


--------------------------------------------------------------------------------
/qreg/dataset_fast.pxd:
--------------------------------------------------------------------------------
 1 | # Author: Mathieu Blondel
 2 | # License: BSD
 3 | 
 4 | cdef class Dataset:
 5 | 
 6 |     cdef int n_samples
 7 |     cdef int n_features
 8 | 
 9 |     cpdef int get_n_samples(self)
10 |     cpdef int get_n_features(self)
11 | 
12 | 
13 | cdef class RowDataset(Dataset):
14 | 
15 |     cdef void get_row_ptr(self,
16 |                              int i,
17 |                              int** indices,
18 |                              double** data,
19 |                              int* n_nz) nogil
20 | 
21 |     cpdef get_row(self, int i)
22 | 
23 | 
24 | cdef class ColumnDataset(Dataset):
25 | 
26 |     cdef void get_column_ptr(self,
27 |                              int j,
28 |                              int** indices,
29 |                              double** data,
30 |                              int* n_nz) nogil
31 | 
32 |     cpdef get_column(self, int j)
33 | 
34 | 
35 | cdef class ContiguousDataset(RowDataset):
36 | 
37 |     cdef int* indices
38 |     cdef double* data
39 |     cdef object X
40 | 
41 |     cdef void get_row_ptr(self,
42 |                           int i,
43 |                           int** indices,
44 |                           double** data,
45 |                           int* n_nz) nogil
46 | 
47 | 
48 | cdef class FortranDataset(ColumnDataset):
49 | 
50 |     cdef int* indices
51 |     cdef double* data
52 |     cdef object X
53 | 
54 |     cdef void get_column_ptr(self,
55 |                              int j,
56 |                              int** indices,
57 |                              double** data,
58 |                              int* n_nz) nogil
59 | 
60 | 
61 | cdef class CSRDataset(RowDataset):
62 | 
63 |     cdef int* indices
64 |     cdef double* data
65 |     cdef int* indptr
66 |     cdef object X
67 | 
68 |     cdef void get_row_ptr(self,
69 |                           int i,
70 |                           int** indices,
71 |                           double** data,
72 |                           int* n_nz) nogil
73 | 
74 | 
75 | cdef class CSCDataset(ColumnDataset):
76 | 
77 |     cdef int* indices
78 |     cdef double* data
79 |     cdef int* indptr
80 |     cdef object X
81 | 
82 |     cdef void get_column_ptr(self,
83 |                              int j,
84 |                              int** indices,
85 |                              double** data,
86 |                              int* n_nz) nogil
87 | 


--------------------------------------------------------------------------------
/examples/algorithms.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | # Author: Maxime Sangnier
 3 | # License: BSD
 4 | 
 5 | """
 6 | Example of how to use the Quantile Regression toolbox with epsilon-insensitive loss (comparison of algorithms).
 7 | """
 8 | 
 9 | import numpy as np
10 | from scipy.stats import norm
11 | import matplotlib.pyplot as plt
12 | from qreg import QRegressor, toy_data
13 | 
14 | 
15 | if __name__ == '__main__':
16 |     probs = np.linspace(0.1, 0.9, 5)  # Joint quantile regression
17 |     eps = 0.25*len(probs)  # Threshold for epsilon-loss
18 |     algorithms = ['qp', 'sdca', 'qp-eps', 'coneqp-eps', 'sdca-eps']  # Algorithms to compare
19 | 
20 |     x_train, y_train, z_train = toy_data(50)
21 |     x_test, y_test, z_test = toy_data(1000, t_min=-0.2, t_max=1.7, probs=probs)
22 |     reg = QRegressor(C=1e2, probs=probs, gamma_out=1e-2, max_iter=1e4, verbose=False, lag_tol=1e-3, active_set=True)
23 | 
24 |     res = []  # List for resulting coefficients
25 |     plt.figure(figsize=(12, 7))
26 |     for it, alg in enumerate(algorithms):
27 |         if 'eps' in alg.lower():
28 |             reg.alg = alg[:-4]
29 |             reg.eps = eps
30 |         else:
31 |             reg.alg = alg
32 |             reg.eps = 0.
33 | 
34 |         # Fit on training data and predict on test data
35 |         reg.fit(x_train, y_train)
36 |         pred = reg.predict(x_test)
37 | 
38 |         # Plot the estimated conditional quantiles
39 |         plt.subplot(1, len(algorithms), it+1)
40 |         plt.plot(x_train, y_train, '.')
41 |         for q in pred:
42 |             plt.plot(x_test, q, '-')
43 |         for q in z_test:
44 |             plt.plot(x_test, q, '--')
45 |         plt.title(alg.upper())
46 | 
47 |         # Print the optimal objective value
48 |         print(alg.upper() + ":")
49 |         print("   objective value: %f" % reg.obj)
50 |         print("   training time: %0.2fs" % reg.time)
51 |         # Print number of support vectors
52 |         print("   #SV: %d" % reg.num_sv())
53 | 
54 |         # Save optimal objectives and coefficients
55 |         res.append((reg.obj, reg.coefs))
56 | 
57 |     # Comparison SDCA / CVXOPT
58 |     plt.figure(figsize=(12, 7))
59 |     plt.subplot2grid((1, len(algorithms)*2), (0, 0), colspan=len(algorithms))
60 |     for alg, (obj, coefs) in zip(algorithms, res):
61 |         # Plot the solutions of SDCA, CVXOPT and AL
62 |         plt.plot(coefs.ravel())
63 |     plt.legend([alg.upper() for alg in algorithms])
64 |     plt.title('Dual coefs')
65 |     plt.plot([0, coefs.size], [0, 0], ':')
66 | 
67 |     for it, (alg, (obj, coefs)) in enumerate(zip(algorithms, res)):
68 |         # Plot the solutions of SDCA, CVXOPT and AL
69 |         plt.subplot2grid((1, len(algorithms)*2), (0, len(algorithms)+it))
70 |         plt.imshow(np.fabs(coefs.T))
71 |         plt.title(alg.upper())
72 | 
73 |     plt.show()
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | qreg
 4 | ====
 5 | 
 6 | qreg is a Python library for data sparse and non-parametric quantile regression. It implements quantile regression with matrix-valued kernels and makes it possible to learn several quantile curves simultaneously with a sparsity requirement on supporting data.
 7 | 
 8 | Highlights:
 9 | 
10 | - based on the library `lightning <https://github.com/mblondel/lightning>`_;
11 | - follows the `scikit-learn <http://scikit-learn.org>`_ style of programming;
12 | - computationally demanding parts implemented in `Cython <http://cython.org>`_.
13 | 
14 | Example
15 | -------
16 | 
17 | .. code-block:: python
18 | 
19 |     import numpy as np
20 |     import matplotlib.pyplot as plt
21 |     from qreg import QRegressor, toy_data
22 |     
23 |     # Quantile levels to prediect
24 |     probs = np.linspace(0.1, 0.9, 5)
25 |     
26 |     # Train and test dataset
27 |     x_train, y_train, z_train = toy_data(50)
28 |     x_test, y_test, z_test = toy_data(1000, t_min=-0.2, t_max=1.7, probs=probs)
29 |     
30 |     # Define the quantile regressor
31 |     reg = QRegressor(C=1e2,  # Trade-off parameter
32 |                      probs=probs,  # Quantile levels
33 |                      gamma_out=1e-2,  # Inner kernel parameter
34 |                      eps=2,  # Epsilon-loss level
35 |                      alg='sdca',  # Algorithm (can change to 'qp')
36 |                      max_iter=1e4,  # Maximal number of iteration
37 |                      active_set=True,  # Active set strategy
38 |                      verbose=True)
39 |     
40 |     # Fit on training data and predict on test data
41 |     reg.fit(x_train, y_train)
42 |     pred = reg.predict(x_test)
43 |     
44 |     # Plot the estimated conditional quantiles
45 |     plt.plot(x_train, y_train, '.')
46 |     for q in pred:
47 |         plt.plot(x_test, q, '-')
48 |     for q in z_test:
49 |         plt.plot(x_test, q, '--')
50 |     
51 |     # Print some information
52 |     print("Objective value: %f" % reg.obj)
53 |     print("Training time: %0.2fs" % reg.time)
54 |     print("#SV: %d" % reg.num_sv())
55 |     print("Score: %f" % reg.score(x_test, y_test))
56 |     
57 |     plt.show()
58 | 
59 | Dependencies
60 | ------------
61 | 
62 | qreg needs Python >= 2.7, setuptools, Numpy, SciPy, scikit-learn, cvxopt and a working C/C++ compiler.
63 | 
64 | Installation
65 | ------------
66 | 
67 | To install qreg from pip, type::
68 | 
69 |     pip install https://github.com/msangnier/qreg/archive/master.zip
70 | 
71 | To install qreg from source, type::
72 | 
73 |     git clone https://github.com/msangnier/qreg.git
74 |     cd qreg
75 |     python setup.py build
76 |     sudo python setup.py install
77 | 
78 | Authors
79 | -------
80 | 
81 | Olivier Fercoq and Maxime Sangnier
82 | 
83 | References
84 | ----------
85 | 
86 | - Data sparse nonparametric regression with epsilon-insensitive losses (2017), M. Sangnier, O. Fercoq, F. d'Alché-Buc. Asian Conference on Machine Learning (ACML).
87 | - Joint quantile regression in vector-valued RKHSs (2016), M. Sangnier, O. Fercoq, F. d'Alché-Buc. Neural Information Processing Systems (NIPS).
88 | 
89 | 


--------------------------------------------------------------------------------
/examples/svr.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | # Author: Maxime Sangnier
 3 | # License: BSD
 4 | 
 5 | """
 6 | Quantile regression with epsilon-insensitive loss (comparison to SVR).
 7 | """
 8 | 
 9 | import numpy as np
10 | from scipy.stats import norm
11 | from scipy.spatial.distance import pdist, squareform
12 | import matplotlib.pyplot as plt
13 | from qreg import QRegressor, toy_data
14 | from sklearn.svm import SVR
15 | 
16 | 
17 | if __name__ == '__main__':
18 |     probs = [0.5]  # Single quantile regression (match SVR)
19 |     eps = 1e-1  # Threshold for epsilon-loss
20 |     C = 1e2  # Trade-off parameter
21 |     gamma_in = 1  # Gaussian parameter for input data
22 |     max_iter = 1e8  # Large enough
23 |     verbose = False
24 | 
25 |     # Data
26 |     x_train, y_train, z_train = toy_data(50)
27 |     x_train = x_train[:, np.newaxis]  # Make x 2-dimensional
28 | 
29 |     # Methods to compare
30 |     methods = [('SVR', SVR(C=C, gamma=gamma_in, epsilon=eps)),
31 |                ('SDCA', QRegressor(C=C*2, probs=probs, gamma_in=gamma_in, eps=eps, coefs_init=None,
32 |                                    max_iter=max_iter, verbose=verbose, max_time=3, alg='sdca')),
33 |                ('QP', QRegressor(C=C*2, probs=probs, gamma_in=gamma_in, eps=eps, coefs_init=None,
34 |                                    max_iter=max_iter, verbose=verbose, max_time=3, alg='qp'))]
35 | 
36 |     # Objective value
37 |     K = np.exp(-gamma_in * squareform(pdist(x_train, 'sqeuclidean')))  # Kernel matrix
38 |     obj_fun = lambda x: 0.5 * x.dot(K.dot(x)) - y_train.dot(x) + eps*np.linalg.norm(x, ord=1)
39 | 
40 |     # Figure for dual coefs and residues
41 |     plt.figure(figsize=(15, 8))
42 |     plt.plot([0, y_train.size], [eps] * 2, 'k:', label='+eps')
43 |     plt.plot([0, y_train.size], [-eps] * 2, 'k:', label='-eps')
44 |     # plt.plot([0, y_train.size], [0] * 2, 'k-', label='')
45 | 
46 |     # Do the job
47 |     for name, reg in methods:
48 |         # Fit the model
49 |         reg.fit(x_train, y_train)
50 | 
51 |         # Get the dual vector and intercept
52 |         if 'svr' in name.lower():
53 |             dual = np.zeros(y_train.shape)
54 |             dual[reg.support_] = reg.dual_coef_[0, :].copy()
55 |             intercept = reg.intercept_[0]
56 |             pred = reg.predict(x_train)
57 |         else:
58 |             dual = reg.coefs[0, :].copy()
59 |             intercept = reg.intercept[0]
60 |             pred = reg.predict(x_train)[0]
61 | 
62 |         # Print information
63 |         print(name)
64 |         # Objective value
65 |         if 'sdca' in name.lower():
66 |             print("   objective value: %f (inner value: %f)" % (obj_fun(dual), reg.obj))
67 |         else:
68 |             print("   objective value: %f" % obj_fun(dual))
69 |         # Others
70 |         print("   contraint: 0 = %e" % dual.sum())  # Constraint
71 |         print("   intercept: {}".format(intercept))
72 | 
73 |         # Plot dual coefs and residues
74 |         plt.plot(dual/C, '-*', label="dual "+name)
75 |         plt.plot(y_train-pred, label="residues "+name)
76 | 
77 |     # Figure for dual coefs and residues
78 |     plt.grid()
79 |     plt.legend(loc="best")
80 |     plt.show()
81 | 


--------------------------------------------------------------------------------
/examples/methods.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | # Author: Maxime Sangnier
  3 | # License: BSD
  4 | 
  5 | """
  6 | Quantile regression with operator-valued kernels and multi-task learning.
  7 | """
  8 | 
  9 | import numpy as np
 10 | from scipy.stats import norm
 11 | import matplotlib.pyplot as plt
 12 | from qreg import QRegressor, QRegMTL, toy_data
 13 | 
 14 | 
 15 | if __name__ == '__main__':
 16 |     probs = np.linspace(0.1, 0.9, 5)  # Joint quantile regression
 17 |     x_train, y_train, z_train = toy_data(50)
 18 |     x_test, y_test, z_test = toy_data(1000, probs=probs)
 19 | 
 20 |     # QR with operator-valued kernel
 21 |     ovk = QRegressor(C=1e2, probs=probs, gamma_out=1e-2, alg='qp')
 22 | 
 23 |     # Fit on training data and predict on test data
 24 |     print("Learn QRegressor")
 25 |     ovk.fit(x_train, y_train)
 26 |     pred = ovk.predict(x_test)
 27 | 
 28 |     # Plot the estimated conditional quantiles
 29 |     plt.close('all')
 30 |     plt.figure(figsize=(12, 7))
 31 |     plt.subplot(231)
 32 |     plt.plot(x_train, y_train, '.')
 33 |     for q in pred:
 34 |         plt.plot(x_test, q, '-')
 35 |     for q in z_test:
 36 |         plt.plot(x_test, q, '--')
 37 |     plt.title('Operator-valued kernel')
 38 | 
 39 |     # QR with multi-task learning
 40 |     mtl = QRegMTL(C=1e2, probs=probs, n_landmarks=0.2)
 41 | 
 42 |     # Fit on training data and predict on test data
 43 |     print("Learn QRegMTL (with location)")
 44 |     mtl.fit(x_train, y_train)
 45 |     pred = mtl.predict(x_test)
 46 | 
 47 |     # Plot the estimated conditional quantiles
 48 |     plt.subplot(232)
 49 |     plt.plot(x_train, y_train, '.')
 50 |     for q in pred:
 51 |         plt.plot(x_test, q, '-')
 52 |     for q in z_test:
 53 |         plt.plot(x_test, q, '--')
 54 |     plt.title('Multi-task learning (with location)')
 55 | 
 56 |     plt.subplot(235)
 57 |     plt.imshow(mtl.reg.D)
 58 |     plt.colorbar()
 59 |     plt.title('Learned metric (with location)')
 60 | 
 61 |     # QR with multi-task learning (without location regression)
 62 |     mtl = QRegMTL(C=1e4, probs=probs, n_landmarks=0.2, location=False)
 63 | 
 64 |     # Fit on training data and predict on test data
 65 |     print("Learn QRegMTL (without location)")
 66 |     mtl.fit(x_train, y_train)
 67 |     pred = mtl.predict(x_test)
 68 | 
 69 |     # Plot the estimated conditional quantiles
 70 |     plt.subplot(233)
 71 |     plt.plot(x_train, y_train, '.')
 72 |     for q in pred:
 73 |         plt.plot(x_test, q, '-')
 74 |     for q in z_test:
 75 |         plt.plot(x_test, q, '--')
 76 |     plt.title('Multi-task learning (without location)')
 77 | 
 78 |     plt.subplot(236)
 79 |     plt.imshow(mtl.reg.D)
 80 |     plt.colorbar()
 81 |     plt.title('Learned metric (without location)')
 82 | 
 83 |     # QR with multi-task learning (several parameters)
 84 |     Cs = np.logspace(-8, 8, num=8)
 85 |     plt.figure()
 86 |     for i, C in enumerate(Cs):
 87 |         print('Learn QRegMTL with C={}'.format(C))
 88 |         mtl = QRegMTL(C=C, probs=probs, n_landmarks=0.2, location=False)
 89 |         mtl.fit(x_train, y_train)
 90 |         pred = mtl.predict(x_test)
 91 | 
 92 |         # Plot the estimated conditional quantiles
 93 |         plt.subplot(4, 4, 4*(i//4)+i+1)
 94 |         plt.plot(x_train, y_train, '.')
 95 |         for q in pred:
 96 |             plt.plot(x_test, q, '-')
 97 |         for q in z_test:
 98 |             plt.plot(x_test, q, '--')
 99 |         plt.title('C={}'.format(C))
100 | 
101 |         plt.subplot(4, 4, 4*(i//4+1)+i+1)
102 |         plt.imshow(mtl.reg.D)
103 |         plt.colorbar()
104 | 
105 |     plt.show()
106 | 
107 | 


--------------------------------------------------------------------------------
/qreg/dataset_fast.pyx:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | # cython: cdivision=True
  3 | # cython: boundscheck=False
  4 | # cython: wraparound=False
  5 | #
  6 | # Author: Mathieu Blondel
  7 | # License: BSD
  8 | 
  9 | from libc cimport stdlib
 10 | 
 11 | import numpy as np
 12 | cimport numpy as np
 13 | np.import_array()
 14 | 
 15 | import scipy.sparse as sp
 16 | 
 17 | cdef class Dataset:
 18 | 
 19 |     cpdef int get_n_samples(self):
 20 |         return self.n_samples
 21 | 
 22 |     cpdef int get_n_features(self):
 23 |         return self.n_features
 24 | 
 25 | 
 26 | cdef class RowDataset(Dataset):
 27 | 
 28 |     cdef void get_row_ptr(self,
 29 |                           int i,
 30 |                           int** indices,
 31 |                           double** data,
 32 |                           int* n_nz) nogil:
 33 |         pass
 34 | 
 35 |     cpdef get_row(self, int i):
 36 |         cdef double* data
 37 |         cdef int* indices
 38 |         cdef int n_nz
 39 |         cdef np.npy_intp shape[1]
 40 | 
 41 |         self.get_row_ptr(i, &indices, &data, &n_nz)
 42 | 
 43 |         shape[0] = <np.npy_intp> self.n_features
 44 |         indices_ = np.PyArray_SimpleNewFromData(1, shape, np.NPY_INT, indices)
 45 |         data_ = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, data)
 46 | 
 47 |         return indices_, data_, n_nz
 48 | 
 49 | 
 50 | cdef class ColumnDataset(Dataset):
 51 | 
 52 |     cdef void get_column_ptr(self,
 53 |                              int j,
 54 |                              int** indices,
 55 |                              double** data,
 56 |                              int* n_nz) nogil:
 57 |         pass
 58 | 
 59 |     cpdef get_column(self, int j):
 60 |         cdef double* data
 61 |         cdef int* indices
 62 |         cdef int n_nz
 63 |         cdef np.npy_intp shape[1]
 64 | 
 65 |         self.get_column_ptr(j, &indices, &data, &n_nz)
 66 | 
 67 |         shape[0] = <np.npy_intp> self.n_samples
 68 |         indices_ = np.PyArray_SimpleNewFromData(1, shape, np.NPY_INT, indices)
 69 |         data_ = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, data)
 70 | 
 71 |         return indices_, data_, n_nz
 72 | 
 73 | 
 74 | cdef class ContiguousDataset(RowDataset):
 75 | 
 76 |     def __init__(self, np.ndarray[double, ndim=2, mode='c'] X):
 77 |         self.n_samples = X.shape[0]
 78 |         self.n_features = X.shape[1]
 79 |         self.data = <double*> X.data
 80 |         self.X = X
 81 | 
 82 |     def __cinit__(self, np.ndarray[double, ndim=2, mode='c'] X):
 83 |         cdef int i
 84 |         cdef int n_features = X.shape[1]
 85 |         self.indices = <int*> stdlib.malloc(sizeof(int) * n_features)
 86 |         for j in xrange(n_features):
 87 |             self.indices[j] = j
 88 | 
 89 |     def __dealloc__(self):
 90 |         stdlib.free(self.indices)
 91 | 
 92 |     # This is used to reconstruct the object in order to make it picklable.
 93 |     def __reduce__(self):
 94 |         return (ContiguousDataset, (self.X, ))
 95 | 
 96 |     cdef void get_row_ptr(self,
 97 |                           int i,
 98 |                           int** indices,
 99 |                           double** data,
100 |                           int* n_nz) nogil:
101 |         indices[0] = self.indices
102 |         data[0] = self.data + i * self.n_features
103 |         n_nz[0] = self.n_features
104 | 
105 | 
106 | cdef class FortranDataset(ColumnDataset):
107 | 
108 |     def __init__(self, np.ndarray[double, ndim=2, mode='fortran'] X):
109 |         self.n_samples = X.shape[0]
110 |         self.n_features = X.shape[1]
111 |         self.data = <double*> X.data
112 |         self.X = X
113 | 
114 |     def __cinit__(self, np.ndarray[double, ndim=2, mode='fortran'] X):
115 |         cdef int i
116 |         cdef int n_samples = X.shape[0]
117 |         self.indices = <int*> stdlib.malloc(sizeof(int) * n_samples)
118 |         for i in xrange(n_samples):
119 |             self.indices[i] = i
120 | 
121 |     def __dealloc__(self):
122 |         stdlib.free(self.indices)
123 | 
124 |     # This is used to reconstruct the object in order to make it picklable.
125 |     def __reduce__(self):
126 |         return (FortranDataset, (self.X, ))
127 | 
128 |     cdef void get_column_ptr(self,
129 |                              int j,
130 |                              int** indices,
131 |                              double** data,
132 |                              int* n_nz) nogil:
133 |         indices[0] = self.indices
134 |         data[0] = self.data + j * self.n_samples
135 |         n_nz[0] = self.n_samples
136 | 
137 | 
138 | cdef class CSRDataset(RowDataset):
139 | 
140 |     def __init__(self, X):
141 |         cdef np.ndarray[double, ndim=1, mode='c'] X_data = X.data
142 |         cdef np.ndarray[int, ndim=1, mode='c'] X_indices = X.indices
143 |         cdef np.ndarray[int, ndim=1, mode='c'] X_indptr = X.indptr
144 | 
145 |         self.n_samples = X.shape[0]
146 |         self.n_features = X.shape[1]
147 |         self.data = <double*> X_data.data
148 |         self.indices = <int*> X_indices.data
149 |         self.indptr = <int*> X_indptr.data
150 | 
151 |         self.X = X
152 | 
153 |     # This is used to reconstruct the object in order to make it picklable.
154 |     def __reduce__(self):
155 |         return (CSRDataset, (self.X, ))
156 | 
157 |     cdef void get_row_ptr(self,
158 |                           int i,
159 |                           int** indices,
160 |                           double** data,
161 |                           int* n_nz) nogil:
162 |         indices[0] = self.indices + self.indptr[i]
163 |         data[0] = self.data + self.indptr[i]
164 |         n_nz[0] = self.indptr[i + 1] - self.indptr[i]
165 | 
166 | 
167 | cdef class CSCDataset(ColumnDataset):
168 | 
169 |     def __init__(self, X):
170 |         cdef np.ndarray[double, ndim=1, mode='c'] X_data = X.data
171 |         cdef np.ndarray[int, ndim=1, mode='c'] X_indices = X.indices
172 |         cdef np.ndarray[int, ndim=1, mode='c'] X_indptr = X.indptr
173 | 
174 |         self.n_samples = X.shape[0]
175 |         self.n_features = X.shape[1]
176 |         self.data = <double*> X_data.data
177 |         self.indices = <int*> X_indices.data
178 |         self.indptr = <int*> X_indptr.data
179 | 
180 |         self.X = X
181 | 
182 |     # This is used to reconstruct the object in order to make it picklable.
183 |     def __reduce__(self):
184 |         return (CSCDataset, (self.X, ))
185 | 
186 |     cdef void get_column_ptr(self,
187 |                              int j,
188 |                              int** indices,
189 |                              double** data,
190 |                              int* n_nz) nogil:
191 |         indices[0] = self.indices + self.indptr[j]
192 |         data[0] = self.data + self.indptr[j]
193 |         n_nz[0] = self.indptr[j + 1] - self.indptr[j]
194 | 
195 | 
196 | def get_dataset(X, order="c"):
197 |     if isinstance(X, Dataset):
198 |         return X
199 | 
200 |     if sp.isspmatrix(X):
201 |         if order == "fortran":
202 |             X = X.tocsc()
203 |             ds = CSCDataset(X)
204 |         else:
205 |             X = X.tocsr()
206 |             ds = CSRDataset(X)
207 |     else:
208 |         if order == "fortran":
209 |             X = np.asfortranarray(X, dtype=np.float64)
210 |             ds = FortranDataset(X)
211 |         else:
212 |             X = np.ascontiguousarray(X, dtype=np.float64)
213 |             ds = ContiguousDataset(X)
214 |     return ds
215 | 


--------------------------------------------------------------------------------
/qreg/sdca_qr_al_fast.pyx:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | # cython: cdivision=True
  3 | # cython: boundscheck=False
  4 | # cython: wraparound=False
  5 | #
  6 | # Authors: Maxime Sangnier and Olivier Fercoq from Mathieu Blondel's sdca
  7 | # License: BSD
  8 | 
  9 | import numpy as np
 10 | cimport numpy as np
 11 | ctypedef np.int64_t LONG
 12 | from libc.math cimport fabs
 13 | from dataset_fast cimport RowDataset
 14 | 
 15 | cdef void _add_l2(double* datain,
 16 |                   int* indicesin,
 17 |                   int n_nzin,
 18 |                   double* dataout,
 19 |                   int* indicesout,
 20 |                   int n_nzout,
 21 |                   double update,
 22 |                   int n_dim,
 23 |                   double* coefs,
 24 |                   int coefi,
 25 |                   double mu,
 26 |                   double* regul) nogil:
 27 | 
 28 |     cdef int i, j, ii, jj, l, m
 29 |     cdef double dot
 30 |     m = coefi / n_dim
 31 |     l = coefi - m * n_dim
 32 | 
 33 |     dot = 0.
 34 |     for ii in xrange(n_nzin):
 35 |         i = indicesin[ii]
 36 |         for jj in xrange(n_nzout):
 37 |             j = indicesout[jj]
 38 |             # True update
 39 |             dot += coefs[i*n_dim+j] * datain[ii] * dataout[jj] if j != l \
 40 |             else coefs[i*n_dim+j] * (datain[ii] * dataout[jj] + mu)  # ALREADY DONE IN _PRED !!!
 41 |             # Update as if mu=0 (without augmentation)
 42 | #            dot += coefs[i*n_dim+j] * datain[ii] * dataout[jj]
 43 |     regul[0] += update * (2*dot - datain[m] * dataout[l] * update)
 44 | 
 45 | 
 46 | cdef _sqnorms(RowDataset Kin, RowDataset Kout,
 47 |               np.ndarray[double, ndim=1, mode='c'] sqnorms):
 48 | 
 49 |     cdef int n_samples = Kin.get_n_samples()
 50 |     cdef int n_dim = Kout.get_n_features()
 51 |     cdef int i, j, ii, jj
 52 | 
 53 |     # Data pointers.
 54 |     cdef double* datain
 55 |     cdef double* dataout
 56 |     cdef int* indicesin
 57 |     cdef int* indicesout
 58 |     cdef int n_nzin
 59 |     cdef int n_nzout
 60 |     cdef double tempin, tempout
 61 | 
 62 |     for i in xrange(n_samples):
 63 |         tempin = 0.
 64 |         Kin.get_row_ptr(i, &indicesin, &datain, &n_nzin)
 65 |         for ii in xrange(n_nzin):  # Look for the ith element in Kin(i, :)
 66 |             if indicesin[ii] == i:
 67 |                 tempin = datain[ii]
 68 |                 break
 69 |         for j in xrange(n_dim):
 70 |             tempout = 0.
 71 |             Kout.get_row_ptr(j, &indicesout, &dataout, &n_nzout)
 72 |             for jj in xrange(n_nzout):  # Look for the jth element in Kout(j, :)
 73 |                 if indicesout[jj] == j:
 74 |                     tempout = dataout[jj]
 75 |                     break
 76 |             sqnorms[i*n_dim + j] = tempin * tempout
 77 | 
 78 | 
 79 | cdef double _pred(double* datain,
 80 |                   int* indicesin,
 81 |                   int n_nzin,
 82 |                   double* dataout,
 83 |                   int* indicesout,
 84 |                   int n_nzout,
 85 |                   int n_dim,
 86 |                   int coefi,
 87 |                   double mu,
 88 |                   double* coefs) nogil:
 89 | 
 90 |     cdef int i, j, ii, jj, l
 91 |     cdef double dot = 0
 92 |     l = coefi - (coefi / n_dim) * n_dim
 93 | 
 94 |     for ii in xrange(n_nzin):
 95 |         i = indicesin[ii]
 96 |         for jj in xrange(n_nzout):
 97 |             j = indicesout[jj]
 98 |             dot += coefs[i*n_dim+j] * datain[ii] * dataout[jj] if j != l \
 99 |             else coefs[i*n_dim+j] * (datain[ii] * dataout[jj] + mu)
100 | 
101 |     return dot
102 | 
103 | 
104 | cdef void _solve_subproblem(double* datain,
105 |                             int* indicesin,
106 |                             int n_nzin,
107 |                             double* dataout,
108 |                             int* indicesout,
109 |                             int n_nzout,
110 |                             double y,
111 |                             double* dcoef,
112 |                             int dcoefi,
113 |                             double* xdm,  # 1.T * dcoef
114 |                             int n_samples,
115 |                             int n_dim,
116 |                             double sqnorm,
117 |                             double scale,
118 |                             double stepsize_factor,
119 |                             double prob,
120 |                             double intercept,
121 |                             double mu,
122 |                             double* primal,
123 |                             double* dual,
124 |                             double* regul):
125 | 
126 |     cdef double pred, dcoef_old, residual, error, loss, update
127 |     cdef double inv_d_stepsize, mult_stepsize
128 | 
129 |     dcoef_old = dcoef[dcoefi]
130 | 
131 |     mult_stepsize = sqnorm * stepsize_factor  # is it the best?
132 |     inv_d_stepsize = (sqnorm + mult_stepsize) / 0.95
133 | 
134 |     pred = _pred(datain, indicesin, n_nzin,
135 |                  dataout, indicesout, n_nzout,
136 |                  n_dim, dcoefi, mu, dcoef)
137 |     
138 |     residual = y - intercept - pred
139 | #    loss = prob*residual if residual > 0 else (prob-1.)*residual
140 |     update = dcoef_old + residual / inv_d_stepsize
141 |     update = min(scale*prob, update)
142 |     update = max(scale*(prob-1.), update)
143 |     update -= dcoef_old
144 |     dual[0] += (y-intercept) * update  # True dual
145 | #    dual[0] += y * update  # Dual as if intercept=0 (without augmentation)
146 | 
147 |     # Use accumulated loss rather than true primal objective value, which is
148 |     # expensive to compute.
149 | #    primal[0] += loss * scale
150 | 
151 |     if update != 0:
152 |         dcoef[dcoefi] += update
153 |         _add_l2(datain, indicesin, n_nzin,
154 |                 dataout, indicesout, n_nzout,
155 |                 update, n_dim, dcoef, dcoefi, mu, regul)
156 |         xdm[0] += update
157 | 
158 | 
159 | #SUPPRIMER INDICESIN, INDICESOUT
160 | def _prox_sdca_al_fit(self,
161 |                    RowDataset Kin,
162 |                    RowDataset Kout,
163 |                    np.ndarray[double, ndim=1] y,
164 |                    np.ndarray[double, ndim=1] dual_coef,
165 |                    double alpha2,
166 |                    double C,
167 |                    double stepsize_factor,
168 |                    np.ndarray[double, ndim=1] probs,
169 |                    np.ndarray[double, ndim=1] intercept,  # Dual vector of the linear constraint: + intercept.T * LC
170 |                    double mu,  # Coef of the L2 penalization of the linear constraint: + mu/2 * ||LC||**2
171 |                    int max_iter,
172 |                    double tol,
173 |                    callback,
174 |                    int n_calls,
175 |                    int verbose,
176 |                    rng):
177 | 
178 |     cdef int n_samples = Kin.get_n_samples()
179 |     cdef int n_dim = Kout.get_n_features()
180 | 
181 |     # Variables
182 |     cdef double sigma, scale, primal, dual, regul, gap
183 |     cdef int it, ii, i, j
184 |     cdef int has_callback = callback is not None
185 |     cdef LONG t
186 | 
187 |     # Pre-compute square norms.
188 |     cdef np.ndarray[double, ndim=1, mode='c'] sqnorms
189 |     sqnorms = np.zeros(n_samples*n_dim, dtype=np.float64)
190 |     _sqnorms(Kin, Kout, sqnorms)
191 |     sqnorms += mu
192 | 
193 |     # Pointers
194 |     cdef double* dcoef = <double*>dual_coef.data
195 |     
196 |     cdef np.ndarray[double, ndim=1] xdm_data
197 |     xdm_data = np.zeros(n_dim, dtype=np.float64)  # 1.T * dcoef
198 |     cdef double* xdm = <double*>xdm_data.data
199 |     for j in xrange(n_dim):
200 |         dot = 0.
201 |         for i in xrange(n_samples):
202 |             dot += dcoef[i*n_dim+j]
203 |         xdm[j] = dot
204 | 
205 |     cdef np.ndarray[int, ndim=1] sindices
206 |     sindices = np.arange(n_samples*n_dim, dtype=np.int32)
207 | 
208 |     # Data pointers.
209 |     cdef int* indicesin
210 |     cdef double* datain
211 |     cdef int n_nzin
212 |     cdef int* indicesout
213 |     cdef double* dataout
214 |     cdef int n_nzout
215 | 
216 |     scale = C * 1. / alpha2
217 | 
218 |     dual = 0
219 |     regul = 0
220 |     prev_obj = np.inf
221 | 
222 |     t = 0        
223 |     for it in xrange(max_iter):
224 |         primal = 0
225 | 
226 |         rng.shuffle(sindices)
227 | 
228 |         for ii in xrange(n_samples*n_dim):
229 |             ij = sindices[ii]
230 |             i = ij / n_dim
231 |             j = ij - i*n_dim
232 | 
233 |             if sqnorms[i*n_dim + j] == 0:
234 |                 continue
235 | 
236 |             # Retrieve rows
237 |             Kin.get_row_ptr(i, &indicesin, &datain, &n_nzin)
238 |             Kout.get_row_ptr(j, &indicesout, &dataout, &n_nzout)
239 | 
240 |             _solve_subproblem(datain, indicesin, n_nzin,
241 |                               dataout, indicesout, n_nzout,
242 |                               y[i], dcoef, i*n_dim + j,
243 |                               xdm + j, n_samples, n_dim,
244 |                               sqnorms[i*n_dim + j], scale, stepsize_factor,
245 |                               probs[j], intercept[j], mu,
246 |                               &primal, &dual, &regul)
247 | 
248 |             if has_callback and t % n_calls == 0:
249 |                 ret = callback(self)
250 |                 if ret is not None:
251 |                     break
252 | 
253 |             t += 1
254 | 
255 | #                if has_callback and t % n_calls == 0:
256 | #                    ret = callback(self)
257 | #                    if ret is not None:
258 | #                        break
259 | 
260 |         # tol is the objective value to reach
261 | #        if tol < 0. and np.mod(it, 1e3) == 0:
262 | #            # Minus dual objective value
263 | #            obj = alpha2 * (regul/2. - dual)
264 | #
265 | #            if verbose:
266 | #                print "%8d: %5.2e %5.2e" % (it + 1, obj, obj-tol) 
267 | #
268 | #            # Objective value reached
269 | #            if obj <= tol:
270 | #                if verbose:
271 | #                    print "Ground truth objective value reached."
272 | #                break
273 | 
274 |         if np.mod(it, 1e3) == 0:
275 |             # Minus dual objective value
276 |             obj = alpha2 * (regul/2. - dual)
277 |             dobj = prev_obj - obj
278 |             prev_obj = obj
279 | 
280 |             if verbose:
281 |                 print "%8d: %5.2e %5.2e" % (it + 1, obj, dobj / n_samples) 
282 |             
283 |             if np.abs(dobj) / n_samples <= tol:
284 |                 break
285 |     else:
286 |         if verbose:
287 |             print "Stop before convergence."
288 | 


--------------------------------------------------------------------------------
/qreg/sdca_qr_fast.pyx:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | # cython: cdivision=True
  3 | # cython: boundscheck=False
  4 | # cython: wraparound=False
  5 | #
  6 | # Authors: Maxime Sangnier and Olivier Fercoq from Mathieu Blondel's sdca
  7 | # License: BSD
  8 | 
  9 | import numpy as np
 10 | cimport numpy as np
 11 | ctypedef np.int64_t LONG
 12 | from libc.math cimport fabs, sqrt
 13 | from libc.stdlib cimport malloc, free
 14 | from dataset_fast cimport RowDataset
 15 | from time import process_time
 16 | 
 17 | #np.set_printoptions(precision=4)
 18 | 
 19 | cdef void _add_l2(double* datain,
 20 |                   int* indicesin,
 21 |                   int n_nzin,
 22 |                   double* dataout,
 23 |                   int* indicesout,
 24 |                   int n_nzout,
 25 |                   double update,
 26 |                   int n_dim,
 27 |                   double* coefs,
 28 |                   int coefi,
 29 |                   double* regul) nogil:
 30 | 
 31 |     cdef int i, j, ii, jj, l, m
 32 |     cdef double dot
 33 |     m = coefi / n_dim
 34 |     l = coefi - m * n_dim
 35 | 
 36 |     dot = 0.
 37 |     for ii in xrange(n_nzin):
 38 |         i = indicesin[ii]
 39 |         for jj in xrange(n_nzout):
 40 |             j = indicesout[jj]
 41 |             dot += coefs[i*n_dim+j] * datain[ii] * dataout[jj]  # ALREADY DONE IN _PRED !!!
 42 |     regul[0] += update * (2*dot - datain[m] * dataout[l] * update)
 43 | 
 44 | 
 45 | cdef _sqnorms(RowDataset Kin, RowDataset Kout,
 46 |               np.ndarray[double, ndim=1, mode='c'] sqnorms):
 47 | 
 48 |     cdef int n_samples = Kin.get_n_samples()
 49 |     cdef int n_dim = Kout.get_n_features()
 50 |     cdef int i, j, ii, jj
 51 | 
 52 |     # Data pointers.
 53 |     cdef double* datain
 54 |     cdef double* dataout
 55 |     cdef int* indicesin
 56 |     cdef int* indicesout
 57 |     cdef int n_nzin
 58 |     cdef int n_nzout
 59 |     cdef double tempin, tempout
 60 | 
 61 |     for i in xrange(n_samples):
 62 |         tempin = 0.
 63 |         Kin.get_row_ptr(i, &indicesin, &datain, &n_nzin)
 64 |         for ii in xrange(n_nzin):  # Look for the ith element in Kin(i, :)
 65 |             if indicesin[ii] == i:
 66 |                 tempin = datain[ii]
 67 |                 break
 68 |         for j in xrange(n_dim):
 69 |             tempout = 0.
 70 |             Kout.get_row_ptr(j, &indicesout, &dataout, &n_nzout)
 71 |             for jj in xrange(n_nzout):  # Look for the jth element in Kout(j, :)
 72 |                 if indicesout[jj] == j:
 73 |                     tempout = dataout[jj]
 74 |                     break
 75 |             sqnorms[i*n_dim + j] = tempin * tempout
 76 | 
 77 | 
 78 | cdef _sqnormsL(RowDataset Kin, double lambda_max,
 79 |               np.ndarray[double, ndim=1, mode='c'] sqnorms):
 80 | 
 81 |     cdef int n_samples = Kin.get_n_samples()
 82 |     cdef int i, ii
 83 | 
 84 |     # Data pointers.
 85 |     cdef double* datain
 86 |     cdef int* indicesin
 87 |     cdef int n_nzin
 88 |     cdef double tempin
 89 | 
 90 |     for i in xrange(n_samples):
 91 |         tempin = 0.
 92 |         Kin.get_row_ptr(i, &indicesin, &datain, &n_nzin)
 93 |         for ii in xrange(n_nzin):  # Look for the ith element in Kin(i, :)
 94 |             if indicesin[ii] == i:
 95 |                 tempin = datain[ii]
 96 |                 break
 97 |         sqnorms[i] = tempin * lambda_max
 98 | 
 99 | 
100 | cdef double _pred(double* datain,
101 |                   int* indicesin,
102 |                   int n_nzin,
103 |                   double* dataout,
104 |                   int* indicesout,
105 |                   int n_nzout,
106 |                   int n_dim,
107 |                   double* coefs) nogil:
108 | 
109 |     cdef int i, j, ii, jj
110 |     cdef double dot = 0
111 | 
112 |     for ii in xrange(n_nzin):
113 |         i = indicesin[ii]
114 |         for jj in xrange(n_nzout):
115 |             j = indicesout[jj]
116 |             dot += coefs[i*n_dim+j] * datain[ii] * dataout[jj]
117 | 
118 |     return dot
119 | 
120 | cdef double norm_square(double* y, int y_size) nogil:
121 |     cdef double norm = 0.
122 |     for it in range(y_size):
123 |         norm += y[it]**2
124 |     return norm
125 | 
126 | cdef void clip(double mu, double* y, double* probs, double scale, int y_size,
127 |                double* clip_y, double* circ_y) nogil:
128 |     for it in range(y_size):
129 |         clip_y[it] = min(scale*probs[it], max(scale*(probs[it]-1), mu*y[it]))
130 |         circ_y[it] = y[it] if clip_y[it] < scale*probs[it] and clip_y[it] > scale*(probs[it]-1) else 0.
131 | 
132 | cdef double solve_prox_equ(double mu, double l, double* y, double* probs,
133 |                            double scale, int y_size) nogil:
134 |     cdef double tol = 1.48e-08  # Scipy value
135 |     cdef int max_iter = 50  # Scipy value
136 |     cdef double* v = <double*> malloc(y_size*sizeof(double))  # Truncated vector
137 |     cdef double* u = <double*> malloc(y_size*sizeof(double))  # Zero truncation
138 | #    cdef np.ndarray[double, ndim=1] v_data  # Truncated vector
139 | #    cdef np.ndarray[double, ndim=1] u_data  # Zero truncation
140 | #    v_data = np.zeros(y_size, dtype=np.float64)
141 | #    u_data = np.zeros(y_size, dtype=np.float64)
142 | #    cdef double* v = <double*>v_data.data
143 | #    cdef double* u = <double*>u_data.data
144 | 
145 | #    print("------------------------------------------------------------------")
146 | #    print("mu init", mu)
147 | 
148 |     for it in range(max_iter):
149 |         clip(mu, y, probs, scale, y_size, v, u)
150 |         v_norm = sqrt(norm_square(v, y_size))
151 |         phi = 1 + l / v_norm - 1/mu  # Objective
152 |         err = fabs(phi)
153 |         if err < tol:
154 |             break
155 |         diff_phi = 1/mu**2 - l*mu*norm_square(u, y_size) / v_norm**3  # Derivative
156 |         mu -= phi / diff_phi  # Newton update
157 | #        print("it", it, "mu", mu, "phi", phi, "diff_phi", diff_phi,
158 | #              "norm", norm_square(y, y_size))
159 |         # Prevent divergence
160 | #        if mu < 0 or mu > 1:
161 | #            print("Error in mu")
162 | #            return solve_prox_equ_bisect(l, y, probs, scale, y_size)
163 | 
164 |     free(v)
165 |     free(u)
166 |     return mu
167 | 
168 | #cdef double solve_prox_equ_bisect(double l, double* y, double* probs,
169 | #                           double scale, int y_size):
170 | #    cdef double tol = 1e-12
171 | #    cdef int max_iter = 100
172 | #    cdef np.ndarray[double, ndim=1] v_data  # Truncated vector
173 | #    cdef np.ndarray[double, ndim=1] u_data  # Zero truncation
174 | #    v_data = np.zeros(y_size, dtype=np.float64)
175 | #    u_data = np.zeros(y_size, dtype=np.float64)
176 | #    cdef double* v = <double*>v_data.data
177 | #    cdef double* u = <double*>u_data.data
178 | #
179 | #    mu1 = 1e-6
180 | #    mu2 = 1
181 | #    # Find a negative point
182 | #    for it in range(max_iter):
183 | #        clip(mu1, y, probs, scale, y_size, v, u)
184 | #        v_norm = np.sqrt(norm_square(v, y_size))
185 | #        phi = 1 + l / v_norm - 1/mu1  # Objective
186 | #        if phi < 0:
187 | #            break
188 | #        mu1 /= 10
189 | ##    print(mu1, phi)
190 | #    for it in range(max_iter):
191 | #        mu = (mu1+mu2)/2
192 | #        clip(mu, y, probs, scale, y_size, v, u)
193 | #        v_norm = np.sqrt(norm_square(v, y_size))
194 | #        phi = 1 + l / v_norm - 1/mu  # Objective
195 | #        err = abs(phi)
196 | #        if err < tol:
197 | #            break
198 | #        if phi > 0:
199 | #            mu2 = mu
200 | #        else:
201 | #            mu1 = mu
202 | ##        print("it", it, "mu", mu, "phi", phi)
203 | #    return mu
204 | 
205 | cdef void _solve_subproblem(double* datain,
206 |                             int* indicesin,
207 |                             int n_nzin,
208 |                             RowDataset Kout,
209 |                             double* dataout,
210 |                             int* indicesout,
211 |                             int n_nzout,
212 |                             double y,
213 |                             double* dcoef,
214 |                             int dcoefi,
215 |                             double* multiplier,
216 |                             double* residual,
217 |                             double* xdm,  # 1.T * dcoef
218 |                             double* ydm,  # 1.T * multiplier
219 |                             int n_samples,
220 |                             int n_dim,
221 |                             double sqnorm,
222 |                             double scale,
223 |                             double eps,
224 |                             double* group_norm,
225 |                             double* res_norm,
226 |                             int* coef_on_bound,
227 |                             double* approx_mu,
228 |                             double stepsize_factor,
229 |                             double* probs,
230 |                             int i,
231 |                             double* primal,
232 |                             double* dual,
233 |                             double* regul):
234 | 
235 |     cdef double pred, error, loss, eps_prox, new_norm, gnorm, res_coef
236 |     cdef double multiplier_old, multiplier_update
237 |     cdef double inv_d_stepsize, mult_stepsize
238 |     cdef double tol_bound
239 | 
240 |     # Updates of dual coefs
241 |     cdef double* update = <double*> malloc(n_dim*sizeof(double))
242 | 
243 |     mult_stepsize = sqnorm * stepsize_factor  # is it the best?
244 |     inv_d_stepsize = (sqnorm + mult_stepsize) / 0.95
245 |     eps_prox = eps/inv_d_stepsize
246 |     gnorm = group_norm[0]
247 | 
248 |     res_norm[0] = 0
249 |     coef_on_bound[0] = 1
250 |     tol_bound = 1e-6
251 | 
252 |     for j in xrange(n_dim):
253 |         multiplier_old = multiplier[j]
254 | 
255 |         Kout.get_row_ptr(j, &indicesout, &dataout, &n_nzout)
256 |         pred = _pred(datain, indicesin, n_nzin,
257 |                      dataout, indicesout, n_nzout,
258 |                      n_dim, dcoef)
259 | 
260 |         # i-th element of the projection of
261 |         # mutiplier + mult_stepsize * dcoef on [1, ..., 1]
262 |         multiplier_update = (ydm[j] + mult_stepsize * xdm[j]) / n_samples
263 |         multiplier_update -= multiplier_old
264 | 
265 |         residual[j] = y - pred
266 |         update[j] = (dcoef[dcoefi+j] + (
267 |                 residual[j] - (multiplier_old + 2. *  multiplier_update))
268 |                 / inv_d_stepsize)
269 |         residual[j] -= multiplier_old  # Minus intercept
270 |         res_norm[0] += residual[j]**2
271 | 
272 |         # Compute the loss (first way to do, an other one is below)
273 | #    #    loss = probs[j]*residual[0] if residual[0] > 0 else (probs[j]-1.)*residual[0]
274 | #        loss = probs[j]*max(0, residual[j]-eps/n_dim) + (probs[j]-1)*min(0, residual[j]+eps/n_dim)
275 | #        primal[0] += loss  # Accumulated loss
276 | 
277 |         # Update group norm
278 |         gnorm += update[j]**2 - dcoef[dcoefi+j]**2
279 | 
280 |         # Update multiplier
281 |         if multiplier_update != 0:
282 |             multiplier[j] += multiplier_update
283 |             ydm[j] += multiplier_update
284 | 
285 |         # Is coef on bound?
286 |         if probs[j] - dcoef[dcoefi+j]/scale > tol_bound and \
287 |             dcoef[dcoefi+j]/scale - probs[j]+1 > tol_bound:
288 |                 coef_on_bound[0] = 0
289 | 
290 |     res_norm[0] = sqrt(res_norm[0])
291 | 
292 |     # l1-l2 proximal operator + box constraint
293 |     # Method 1 (full)
294 |     if eps > 0.:
295 |         new_norm = sqrt(gnorm)
296 |         if new_norm > eps_prox:
297 |             mu = solve_prox_equ(1-eps_prox/new_norm, eps_prox, update, probs,
298 |                                 scale, n_dim)
299 |             for j in xrange(n_dim):
300 |                 update[j] *= mu
301 |                 # Box constraint projection
302 |                 update[j] = min(scale*probs[j], update[j])
303 |                 update[j] = max(scale*(probs[j]-1.), update[j])
304 |                 group_norm[0] += update[j]**2 - dcoef[dcoefi+j]**2
305 |         else:
306 |             for j in xrange(n_dim):
307 |                 update[j] = 0.
308 |                 group_norm[0] -= dcoef[dcoefi+j]**2
309 |     else:
310 |         # Box constraint projection
311 |         for j in xrange(n_dim):
312 |             update[j] = min(scale*probs[j], update[j])
313 |             update[j] = max(scale*(probs[j]-1.), update[j])
314 | 
315 |     # Coef for computing the loss
316 |     res_coef = 1 - min(eps, res_norm[0])/res_norm[0] if res_norm[0]>0 else 0
317 | 
318 |     for j in xrange(n_dim):
319 |         # Compute the loss (second way to do, more accurate)
320 |         loss = probs[j]*max(0, residual[j]*res_coef) + (probs[j]-1)*min(0, residual[j]*res_coef)
321 |         primal[0] += loss  # Accumulated loss
322 | 
323 |         update[j] -= dcoef[dcoefi+j]
324 |         dual[0] += y * update[j]
325 | 
326 |         if update[j] != 0:
327 |             dcoef[dcoefi+j] += update[j]
328 | 
329 |             Kout.get_row_ptr(j, &indicesout, &dataout, &n_nzout)
330 |             _add_l2(datain, indicesin, n_nzin,
331 |                     dataout, indicesout, n_nzout,
332 |                     update[j], n_dim, dcoef, dcoefi+j, regul)
333 |             xdm[j] += update[j]
334 | 
335 |     free(update)
336 | 
337 |         # Method 2 (totally approximated)
338 |     #    update *= approx_mu
339 |     #    # Box constraint projection
340 |     #    update = min(scale*prob, update)
341 |     #    update = max(scale*(prob-1.), update)
342 |     #    new_norm = group_norm[0]**2 - dcoef_old**2 + update**2
343 | 
344 |     #    # Method 3 (partially approximated)
345 |     #    if eps > 0.:
346 |     #        new_norm = group_norm[0] + update**2 - dcoef_old**2
347 |     #        if sqrt(new_norm) > eps:
348 |     ##            if approx_mu[0] > 0.:
349 |     ##                update *= approx_mu[0]
350 |     ##            else:
351 |     #            if approx_mu[0] == 0.:
352 |     #                dcoef[dcoefi] = update
353 |     #                mu = solve_prox_equ(1-eps/sqrt(new_norm), eps, dcoef+i*n_dim,
354 |     #                                    probs, scale, n_dim)
355 |     #                dcoef[dcoefi] = dcoef_old
356 |     ##                print("mu vs approx_mu", mu, approx_mu[0])
357 |     #                approx_mu[0] = mu
358 |     ##                update *= mu
359 |     #            update *= approx_mu[0]
360 |     #        else:
361 |     #            update = 0.
362 |     #    # Box constraint projection
363 |     #    update = min(scale*prob, update)
364 |     #    update = max(scale*(prob-1.), update)
365 |     #    group_norm[0] += update**2 - dcoef_old**2
366 | 
367 |         # Method 4 (partially approximated, best for now)
368 |     #    if eps > 0.:
369 |     #        if approx_mu[0] == 0.:
370 |     #            eps_prox = eps/inv_d_stepsize
371 |     #            new_norm = sqrt(group_norm[0] + update**2 - dcoef_old**2)
372 |     #            if new_norm > eps_prox:
373 |     #                mu_init = 1-eps_prox/new_norm
374 |     #                if abs(0.5-mu_init) < 0.45:
375 |     #                    dcoef[dcoefi] = update
376 |     #                    mu = solve_prox_equ(mu_init, eps_prox, dcoef+i*n_dim,
377 |     #                                        probs, scale, n_dim)
378 |     #                    dcoef[dcoefi] = dcoef_old
379 |     #                    approx_mu[0] = mu
380 |     #                else:
381 |     #                    approx_mu[0] = mu_init  # mu_init is close to the solution
382 |     #                    #when it is close to 0 or 1
383 |     #            else:
384 |     #                approx_mu[0] = 0.
385 |     #        update *= approx_mu[0]
386 | 
387 |         # Method 5 (partially approximated)
388 |     #    if eps > 0.:
389 |     #        new_norm = sqrt(group_norm[0] + update**2 - dcoef_old**2)
390 |     #        mu = 1 - eps/new_norm if new_norm > eps else 0.
391 |     #        mu = max(0, 1 - eps/sqrt(new_norm))
392 |     #        if abs(0.5-mu) < 0.45:
393 |     #            print("top")
394 |     #            dcoef[dcoefi] = update
395 |     #            mu = solve_prox_equ(mu, eps, dcoef+i*n_dim, probs, scale,
396 |     #                                n_dim)
397 |     #            dcoef[dcoefi] = dcoef_old
398 |     #        update *= mu
399 | 
400 |     #    # Box constraint projection
401 |     #    update = min(scale*prob, update)
402 |     #    update = max(scale*(prob-1.), update)
403 |     #    group_norm[0] += update**2 - dcoef_old**2
404 | 
405 | 
406 | #SUPPRIMER INDICESIN, INDICESOUT
407 | def _prox_sdca_intercept_fit(self,
408 |                    RowDataset Kin,
409 |                    RowDataset Kout,
410 |                    np.ndarray[double, ndim=1] y,
411 |                    np.ndarray[double, ndim=1] dual_coef,
412 |                    double alpha2,
413 |                    double C,
414 |                    double eps,
415 |                    double stepsize_factor,
416 |                    np.ndarray[double, ndim=1] probs,
417 |                    int max_iter,
418 |                    double tol,
419 |                    callback,
420 |                    int n_calls,
421 |                    float max_time,
422 |                    int n_gap,
423 |                    float gap_time_ratio,
424 |                    int verbose,
425 |                    rng,
426 |                    np.ndarray[short int, ndim=1] status,
427 |                    int active_set,
428 |                    double lambda_max):
429 | #                   np.ndarray[double, ndim=1] inner_obj):
430 | 
431 |     cdef int n_samples = Kin.get_n_samples()
432 |     cdef int n_dim = Kout.get_n_features()
433 | 
434 |     # Variables
435 |     cdef double sigma, scale, primal, dual, regul, gap, dual_sparsity, old_gn, constraint
436 |     cdef int it, ii, i, j
437 |     cdef int has_callback = callback is not None
438 |     cdef LONG t
439 |     cdef double tol_bound
440 |     cdef int check_gap, perf_active_set, n_act_coord, n_act_coord_prev
441 | 
442 |     # Pre-compute square norms.
443 | #    cdef np.ndarray[double, ndim=1, mode='c'] sqnorms
444 | #    sqnorms = np.zeros(n_samples*n_dim, dtype=np.float64)
445 | #    _sqnorms(Kin, Kout, sqnorms)
446 | 
447 |     # Pre-compute Lipschitz constants
448 |     cdef np.ndarray[double, ndim=1, mode='c'] sqnorms
449 |     sqnorms = np.zeros(n_samples, dtype=np.float64)
450 |     _sqnormsL(Kin, lambda_max, sqnorms)
451 | 
452 |     # Pointers
453 |     cdef double* dcoef = <double*>dual_coef.data
454 |     cdef double* cprobs = <double*>probs.data
455 | #    cdef double* iobj = <double*>inner_obj.data
456 |     cdef int* cstatus = <int*>status.data
457 | 
458 |     cdef np.ndarray[double, ndim=1] multiplier_data
459 |     multiplier_data = np.zeros(n_dim*n_samples, dtype=np.float64)
460 |     cdef double* multiplier = <double*>multiplier_data.data
461 | 
462 |     cdef np.ndarray[double, ndim=1] residual_data
463 |     residual_data = np.zeros(n_dim*n_samples, dtype=np.float64)
464 |     cdef double* residual = <double*>residual_data.data
465 | 
466 |     cdef np.ndarray[double, ndim=1] ydm_data
467 |     ydm_data = np.zeros(n_dim, dtype=np.float64)  # 1.T * multiplier
468 |     cdef double* ydm = <double*>ydm_data.data
469 | 
470 |     cdef np.ndarray[double, ndim=1] xdm_data
471 |     xdm_data = np.zeros(n_dim, dtype=np.float64)  # 1.T * dcoef
472 |     cdef double* xdm = <double*>xdm_data.data
473 |     for j in xrange(n_dim):
474 |         dot = 0.
475 |         for i in xrange(n_samples):
476 |             dot += dcoef[i*n_dim+j]
477 |         xdm[j] = dot
478 | 
479 |     cdef np.ndarray[int, ndim=1] sindices
480 |     sindices = np.arange(n_samples, dtype=np.int32)
481 |     sindices_size = n_samples
482 | 
483 |     cdef np.ndarray[double, ndim=1] group_norm_data
484 |     group_norm_data = np.zeros(n_samples, dtype=np.float64)  # squared norm for each group
485 |     cdef double* group_norm = <double*>group_norm_data.data
486 |     for i in range(n_samples):
487 |         group_norm[i] = norm_square(dcoef+i*n_dim, n_dim)
488 | 
489 |     cdef np.ndarray[double, ndim=1] res_norm_data
490 |     res_norm_data = np.zeros(n_samples, dtype=np.float64)  # norm for each residue
491 |     cdef double* res_norm = <double*>res_norm_data.data
492 | 
493 |     cdef np.ndarray[int, ndim=1] coef_on_bound_data
494 |     coef_on_bound_data = np.zeros(n_samples, dtype=np.int32)  # Is coef on box bound?
495 |     cdef int* coef_on_bound= <int*>coef_on_bound_data.data
496 | 
497 |     cdef np.ndarray[double, ndim=1] mus_data
498 |     mus_data = np.ones(n_samples, dtype=np.float64)  # 1 for eps=0
499 |     cdef double* mus = <double*>mus_data.data
500 | 
501 |     # Data pointers.
502 |     cdef int* indicesin
503 |     cdef double* datain
504 |     cdef int n_nzin
505 |     cdef int* indicesout
506 |     cdef double* dataout
507 |     cdef int n_nzout
508 |     n_gap_auto = n_gap==0
509 |     if n_gap_auto:
510 |         n_gap = 100
511 | 
512 |     swap_active_set = 0
513 | 
514 |     scale = C * 1. / alpha2
515 | 
516 | #    dual = (y * dual_coef).sum()
517 |     dual = (y * np.reshape(dual_coef, (n_samples, n_dim)).T).sum()
518 | #    dual = 0.
519 | #    for i in xrange(n_samples):
520 | #        dot = 0.
521 | #        for j in xrange(n_dim):
522 | #            dot += dcoef[i*n_dim+j]
523 | #        dual += y[i]*dot
524 | 
525 |     dual_sparsity = np.sqrt(group_norm_data).sum()
526 |     regul = 0
527 |     for i in xrange(n_samples):
528 |         Kin.get_row_ptr(i, &indicesin, &datain, &n_nzin)
529 |         for j in xrange(n_dim):
530 |             Kout.get_row_ptr(j, &indicesout, &dataout, &n_nzout)
531 |             dot = 0.
532 |             for ii in xrange(n_nzin):
533 |                 for jj in xrange(n_nzout):
534 |                     dot += dcoef[indicesin[ii]*n_dim+indicesout[jj]] * datain[ii] * dataout[jj]
535 |             regul += dcoef[i*n_dim+j] * dot
536 |     if verbose:
537 | #        print("regul", regul, "dual", dual, "group_norm", dual_sparsity)
538 |         constraint = np.sum(np.fabs(xdm_data))
539 |         obj = alpha2 * (regul/2. - dual + 100.*constraint + eps * dual_sparsity)
540 |         print("Initial obj:", obj)
541 | 
542 |     ################ Test solve_prox_equ ##############
543 | #    n = 6
544 | #    cdef np.ndarray[double, ndim=1] b
545 | #    cdef np.ndarray[double, ndim=1] yy
546 | #    b = 0.8*np.ones(n)
547 | #    a = b-1
548 | #    l = 2
549 | #    yy = np.random.randn(n)*5
550 | #
551 | #    mu = 0.5
552 | #    for it in range(50):
553 | #        phi = 1 + l / np.linalg.norm(np.fmin(b, np.fmax(a, mu*yy))) - 1/mu
554 | #        v = np.fmin(b, np.fmax(a, mu*yy))
555 | #        u = yy * (v<b) * (v>a)
556 | #        diff_phi = 1/mu**2 - l*mu*np.linalg.norm(u)**2 / np.linalg.norm(v)**3
557 | #        mu -= phi / diff_phi
558 | #
559 | #    print("mu Newton", mu)
560 | #
561 | #    cdef double* cb = <double*>b.data
562 | #    cdef double* cy = <double*>yy.data
563 | #    mu = solve_prox_equ(0.5, l, cy, cb, 1, n)
564 | #    print("mu Newton", mu)
565 | #    mu = solve_prox_equ_bisect(l, cy, cb, 1, n)
566 | #    print("mu bisect", mu)
567 |     ################ Test solve_prox_equ ##############
568 | 
569 | #    n_indices_max = 200
570 | #    if sindices_size > n_indices_max:
571 | #        rng.shuffle(sindices)
572 | #        sindices = sindices[:n_indices_max]
573 | #        sindices_size = n_indices_max
574 | 
575 |     t = 0
576 |     i_check_gap = 0
577 |     time_gap = 0
578 |     n_act_coord = 0
579 |     tol_bound = 1e-6
580 | 
581 |     start_it = process_time()
582 |     for it in xrange(max_iter):
583 |         primal = 0
584 |         n_act_coord_prev = n_act_coord
585 |         n_act_coord = 0
586 | 
587 |         check_gap = it+1 - (it+1)//n_gap * n_gap == 0  # np.mod(it+1, n_gap)
588 | #        perf_active_set = it+2 - (it+2)//n_gap * n_gap != 0 # Iteration before checking the gap
589 |         perf_active_set = 1-check_gap
590 | 
591 |         rng.shuffle(sindices)
592 | 
593 |         # Set values for mu (leave at 1 for first iteration)
594 | #        if eps > 0. and it>0:
595 | #            for i in range(n_samples):
596 | ##                print(group_norm[i], norm_square(dcoef+i*n_dim, n_dim))
597 | #                if np.sqrt(group_norm[i]) > eps:
598 | #                    mus[i] = solve_prox_equ(1-eps/np.sqrt(group_norm[i]), eps,
599 | #                       dcoef+i*n_dim, cprobs, scale, n_dim)
600 | #                else:
601 | #                    mus[i] = 0.
602 | #    #            print("mu", i, mus[i], 1-eps/np.sqrt(group_norm[i]), group_norm[i])
603 | 
604 |         # Reset mus
605 | #        if eps > 0. and it>0:
606 | #            for i in range(n_samples):
607 | #                mus[i] = 0.
608 | 
609 | #        for ii in xrange(n_samples*n_dim):
610 | #            ij = sindices[ii]
611 | #            i = ij / n_dim  # Sample index
612 | #            j = ij - i*n_dim  # Dimension index
613 |         for ii in xrange(sindices_size):
614 |             i = sindices[ii]
615 | #            mus[i] = 0.
616 |             old_gn = group_norm[i]
617 |             if sqnorms[i] == 0:
618 |                 continue
619 | 
620 |             if active_set==1:
621 |                 if it>100 and (\
622 |                     (res_norm[i] < eps*0.9 and \
623 |                     sqrt(group_norm[i]) / (n_dim*scale) < tol_bound) or \
624 |                      (res_norm[i] > eps*1.1 and coef_on_bound[i] == 1)
625 |                     ) and perf_active_set==1:
626 |                     continue
627 | 
628 |             n_act_coord += 1
629 | 
630 |             # Retrieve rows
631 |             Kin.get_row_ptr(i, &indicesin, &datain, &n_nzin)
632 | #            Kout.get_row_ptr(j, &indicesout, &dataout, &n_nzout)
633 | 
634 |             _solve_subproblem(datain, indicesin, n_nzin,
635 |                               Kout, dataout, indicesout, n_nzout,
636 |                               y[i], dcoef, i*n_dim,
637 |                               multiplier + i*n_dim,
638 |                               residual + i*n_dim,
639 |                               xdm, ydm, n_samples, n_dim,
640 |                               sqnorms[i], scale, eps,
641 |                               group_norm + i, res_norm + i,
642 |                               coef_on_bound + i,
643 |                               mus + i, stepsize_factor,
644 |                               cprobs, i,
645 |                               &primal, &dual, &regul)
646 | 
647 |             if has_callback and t % n_calls == 0:
648 |                 ret = callback(self)
649 |                 if ret is not None:
650 |                     break
651 | 
652 |             t += 1
653 | 
654 |             if eps > 0.:
655 |                 if group_norm[i] < 0.:
656 |                     group_norm[i] = 0.
657 |                 dual_sparsity += sqrt(group_norm[i]) - sqrt(old_gn)
658 | 
659 | #                if has_callback and t % n_calls == 0:
660 | #                    ret = callback(self)
661 | #                    if ret is not None:
662 | #                        break
663 | 
664 |         # Debug
665 |         # Compute 0.5 * dcoef.T * kron(Kin, Kout) * dcoef -
666 |         #   kron(y, ones(n_dim)).T * dcoef
667 |         # This should be equal to alpha2 * (regul/2. - dual)
668 | #        if np.mod(it, 1e0) == 0:
669 | #            obj = 0.
670 | #            dot = 0.
671 | #            for i in xrange(n_samples):
672 | #                Kin.get_row_ptr(i, &indicesin, &datain, &n_nzin)
673 | #                for j in xrange(n_dim):
674 | #                    Kout.get_row_ptr(j, &indicesout, &dataout, &n_nzout)
675 | #                    dot += dcoef[i*n_dim+j] * y[i]
676 | #    #                print "%f < %f < %f" % (scale*(probs[j]-1), scale*dcoef[i*n_dim+j], scale*probs[j])
677 | #                    for ii in xrange(n_samples):
678 | #                        for jj in xrange(n_dim):
679 | #                            obj += datain[ii] * dataout[jj] * dcoef[i*n_dim+j] * dcoef[ii*n_dim+jj]
680 | #            obj *= 0.5
681 | #            obj -= dot
682 | #            print "It: %d   obj: %f" % (it+1, obj)
683 | 
684 | 
685 |         ################## Duality gap, print and active set ##################
686 | #        if it+1 - (it+1)//n_gap * n_gap == 0:  # np.mod(it+1, n_gap)
687 |         if check_gap:
688 |             i_check_gap += 1
689 |             start_gap = process_time()
690 | 
691 | #            constraint = np.sum(np.fabs(xdm_data))
692 |             constraint = 0
693 |             for j in xrange(n_dim):
694 |                 constraint += abs(xdm[j])
695 | #            obj = alpha2 * (regul/2. - dual + 100.*(constraint if constraint > 1e-4 else 0.))  # Minus dual objective value
696 |             obj = alpha2 * (regul/2. - dual + 100.*constraint)  # Minus dual objective value
697 |             obj += alpha2 * eps * dual_sparsity
698 | 
699 | #            iobj[i_check_gap-1] = obj
700 | 
701 |             # Compute the intercept (not needed anymore since -multiplier_old has been added to residual)
702 | #            rresidual = np.reshape(residual_data, (n_samples, n_dim)).T
703 |             # By duality
704 | #            intercept = np.reshape(multiplier_data, (n_samples, n_dim)).mean(axis=0)
705 |             # Or…
706 | #            if eps == 0.:
707 | #                # Minimize primal problem
708 | #                intercept = [
709 | #                    np.percentile(res, 100.*prob) for (res, prob) in
710 | #                    zip(rresidual, probs)]
711 | #            else:
712 | #                # Use optimality conditions
713 | #                tol_bound = 1e-3  # Tolerance for boundaries
714 | #                coefs = np.reshape(dual_coef, (n_samples, n_dim)).T
715 | #                ind_supp = np.where(np.sqrt(group_norm_data) / (n_dim * C) > tol_bound)[0]  # Support vectors
716 | #                ind_up = np.where(np.all(
717 | #                        (probs*C-coefs.T) / C > tol_bound, axis=1))[0]  # Not on boundary sup
718 | #                ind_down = np.where(np.all(
719 | #                        (coefs.T - (probs-1)*C) / C > tol_bound, axis=1))[0]  # Not on boundary inf
720 | #                # All conditions together: coefs of interest
721 | #                # Intersection of ind_up, in_down and ind_supp
722 | #                ind = [el for el in ind_up if el in ind_down and el in ind_supp]
723 | ##                print("ind in sdca", ind)
724 | #                if ind:
725 | #                    # Residues without intercept - expected values from dual coefs
726 | #                    intercept = (rresidual[:, ind]\
727 | #                                      -eps * coefs[:, ind]/\
728 | #                                      np.sqrt(group_norm_data[ind])).mean(axis=1)
729 | ##                    print("sdca residual")
730 | ##                    print(rresidual[:, ind])
731 | ##                    print("sdca coefs")
732 | ##                    print(coefs[:, ind])
733 | ##                    print("sdca group_norm")
734 | ##                    print(np.sqrt(group_norm_data[ind]))
735 | #                else:
736 | #                    # If ind empty, do similarly as quantile regression
737 | #                    intercept = [
738 | #                        np.percentile(res, 100.*prob) for (res, prob) in
739 | #                        zip(rresidual, probs)]
740 | ##            print("intercept")
741 | ##            print(intercept)
742 | ##            print("multiplier")
743 | ##            print(np.reshape(multiplier_data, (n_samples, n_dim)).mean(axis=0))
744 | 
745 | #            rresidual = (rresidual.T - intercept).T
746 | 
747 |             # Compute the primal objective (approximated for eps-loss)
748 | #            primal2 = np.sum([
749 | #                prob*np.fmax(0, res-eps) for (res, prob) in
750 | #                zip(rresidual, probs)])
751 | #            primal2 += np.sum([
752 | #                (prob-1)*np.fmin(0, res+eps) for (res, prob) in
753 | #                zip(rresidual, probs)])
754 | #            print(primal2, primal)
755 |             # Use accumulated loss
756 |             primal2 = alpha2 * (regul/2. + primal*scale)
757 |             gap = (primal2 + obj) / (C * n_samples)  # Dual gap
758 |             if gap < 0.:
759 |                 gap = 1
760 | 
761 |             # Active set (if enabled and intercept obtained by optimality conditions)
762 | #            if active_set > 0:
763 | #                if swap_active_set:
764 | #                    tol_bound = 1e-6  # Tolerance for boundaries (redefinition)
765 | ##                    coefs = np.reshape(dual_coef, (n_samples, n_dim)).T
766 | ##                    rresidual = np.reshape(residual_data, (n_samples, n_dim)).T
767 | ##                    rresidual_norm = np.linalg.norm(rresidual, axis=0)  # Residues norm
768 | ##                    print("residual norm")
769 | ##                    print(rresidual_norm)
770 | ##                    print("online residual norm")
771 | ##                    print(res_norm_data)
772 | #
773 | #                    # Points with small residues and zero coefs
774 | ##                    ind_null = np.where(rresidual_norm_data < eps-tol_bound)[0]
775 | ##                    ind_null_coef = np.where(np.sqrt(group_norm_data) / (n_dim * C) < tol_bound)[0]
776 | ##                    ind_null_coef = [e for e in ind_null if e in ind_null_coef]
777 | #                    ind_null_coef = [j for j in xrange(n_samples) if
778 | #                                     res_norm[j] < eps*0.9 and
779 | #                                     sqrt(group_norm[j]) / (n_dim*C) < tol_bound]
780 | #
781 | #                    # Points with large residues and coefs on box borders
782 | ##                    ind_bound = np.where(res_norm_data > eps*1.1)[0]
783 | ##                    ind_bound_coef = np.where(np.all(np.logical_or(
784 | ##                            (probs*C-coefs.T) / C < tol_bound,
785 | ##                            (coefs.T - (probs-1)*C) / C < tol_bound), axis=1))[0]
786 | ##                    ind_bound_coef = [e for e in ind_bound if e in ind_bound_coef]
787 | #                    ind_bound_coef = [j for j in xrange(n_samples) if
788 | #                                     res_norm[j] > eps*1.1 and
789 | #                                     coef_on_bound[j] == 1]
790 | #
791 | #                    # All that points satisfy optimality conditions
792 | #                    ind_all_coef = ind_null_coef + ind_bound_coef
793 | #                    sindices = np.delete(np.arange(n_samples, dtype=np.int32),
794 | #                                         ind_all_coef)
795 | #                    sindices_size = sindices.size
796 | ##                    print(np.sort(sindices))
797 | #                else:
798 | #                    if it > 1:
799 | #                        swap_active_set = 1
800 | #
801 | #                    sindices = np.arange(n_samples, dtype=np.int32)
802 | #                    sindices_size = n_samples
803 | #                swap_active_set = 1 - swap_active_set  # 0 <-> 1
804 | 
805 | #                swap_active_set += 1
806 | #                if swap_active_set > 2:
807 | #                    swap_active_set = 0
808 | 
809 | #                if sindices_size > n_indices_max:
810 | #                    rng.shuffle(sindices)
811 | #                    sindices = sindices[:n_indices_max]
812 | #                    sindices_size = n_indices_max
813 | 
814 |     #            print("all coefs", ind_all_coef)
815 |     #            print("active indexes", sindices)
816 |     #            print(coefs[:, ind_all_coef] / C)
817 |     #            print("# active coord", sindices.size)
818 | 
819 |             # Automatic tuning such that the time of computing the duality gap
820 |             # don't exceed 100*gap_time_ratio % of the total time
821 |             end_gap = process_time()
822 |             elapsed_time = (end_gap - start_it) / i_check_gap  # Time between 2 checks
823 |             time_gap = ((i_check_gap-1)*time_gap + end_gap - start_gap) / i_check_gap
824 |             if n_gap_auto:
825 |                 n_gap = max(100, int(n_gap * time_gap / (elapsed_time * gap_time_ratio)))
826 | 
827 | #            if swap_active_set:
828 | #                n_gap = 10
829 | 
830 |             if verbose:
831 |                 print "%8d: %5.2e (gap) %5.2f (obj) %5.2e (constraint) %5.2f (gap time ratio) %d (# act coord)" % (it + 1, gap, obj, constraint, 100.*time_gap/elapsed_time, n_act_coord_prev)
832 | 
833 |             # Stopping criterion
834 |             if gap <= tol:
835 |                 if verbose:
836 |                     print "Optimal solution found."
837 |                 status[0] = 1
838 |                 break
839 | 
840 |         if max_time > 0 and process_time() - start_it > max_time:
841 |             if verbose:
842 |                 print "Max time reached."
843 |             status[0] = 3
844 |             break
845 | 
846 | #        # tol is the objective value to reach
847 | #        if (verbose or tol < 0.) and np.mod(it+1, 1e3) == 0:
848 | #            # Minus dual objective value
849 | #            obj = alpha2 * (regul/2. - dual + 100.*np.sum(np.fabs(xdm_data)))
850 | ##            obj = alpha2 * (regul/2. - dual)
851 | #
852 | #            if verbose:
853 | #                print "%8d: %5.2e %5.2e" % (it + 1, obj-tol,
854 | #                                            np.sum(np.fabs(xdm_data)))
855 | #
856 | #            # Objective value reached
857 | #            if tol < 0. and obj <= tol:
858 | #                if verbose:
859 | #                    print "Ground truth objective value reached."
860 | #                break
861 | 
862 |     else:
863 |         if verbose:
864 |             print "Max iteration reached."
865 |         status[0] = 2
866 |     # Debug sparsity (norm accumulation)
867 | #    print(group_norm_data)
868 | #    coefs = np.reshape(dual_coef, (n_samples, n_dim)).T
869 | #    print(np.sqrt((coefs**2).sum(axis=0)))
870 | 
871 | #    print("sindices", np.sort(sindices))
872 | 
873 | 


--------------------------------------------------------------------------------
/qreg/qreg.py:
--------------------------------------------------------------------------------
   1 | # encoding: utf-8
   2 | # Author: Maxime Sangnier
   3 | # License: BSD
   4 | import sys 
   5 | 
   6 | import numpy as np
   7 | import scipy.spatial.distance as dist
   8 | from scipy.linalg import eigvalsh
   9 | from scipy.stats import norm
  10 | from sklearn.utils import check_random_state
  11 | from sklearn.base import BaseEstimator
  12 | from sklearn.linear_model import LinearRegression, Ridge
  13 | from sklearn.svm import SVR
  14 | 
  15 | from cvxopt import matrix, solvers
  16 | from .dataset_fast import get_dataset
  17 | from .sdca_qr_fast import _prox_sdca_intercept_fit
  18 | from .sdca_qr_al_fast import _prox_sdca_al_fit
  19 | 
  20 | import time
  21 | import warnings
  22 | 
  23 | # time.clock() has been removed in Python 3.8+
  24 | # See: https://docs.python.org/3/whatsnew/3.8.html#api-and-feature-removals
  25 | #if sys.version_info >= (3,8):
  26 | #    get_time = time.perf_counter
  27 | #else:
  28 | #    get_time = time.clock
  29 | 
  30 | 
  31 | def toy_data(n=50, t_min=0., t_max=1.5, noise=1., probs=[0.5]):
  32 |     """
  33 |     Parameters
  34 |     n: number of points (t, y)
  35 |     t_min: minimum input data t
  36 |     t_max: maximum input data t
  37 |     noise: noise level
  38 |     probs: probabilities (quantiles levels)
  39 | 
  40 |     Returns:
  41 |     x: sorted random data in [t_min, t_max]
  42 |     y: targets corresponding to x (following a noisy sin curve)
  43 |     q: true quantiles corresponding to x
  44 |     """
  45 |     t_down, t_up = 0., 1.5  # Bounds for the noise
  46 |     t = np.random.rand(n) * (t_max-t_min) + t_min
  47 |     t = np.sort(t)
  48 |     pattern = -np.sin(2*np.pi*t)  # Pattern of the signal
  49 |     enveloppe = 1 + np.sin(2*np.pi*t/3)  # Enveloppe of the signal
  50 |     pattern = pattern * enveloppe
  51 |     # Noise decreasing std (from noise+0.2 to 0.2)
  52 |     noise_std = 0.2 + noise*(t_up - t) / (t_up - t_down)
  53 |     # Gaussian noise with decreasing std
  54 |     add_noise = noise_std * np.random.randn(n)
  55 |     observations = pattern + add_noise
  56 |     quantiles = [pattern + norm.ppf(p, loc=np.zeros(n),
  57 |                                     scale=np.fabs(noise_std)) for p in probs]
  58 |     return t, observations, quantiles
  59 | 
  60 | 
  61 | def proj_dual(coefs, C, probs):
  62 |     n = coefs.shape[1]
  63 |     for it in range(100):
  64 |         # Project onto the hyperplan
  65 |         coefs = np.asarray([x - x.sum() / n for x in coefs])
  66 |         coefs = np.asarray([np.fmin(C*probs, np.fmax(C*(probs-1), x)) for x in coefs.T]).T
  67 | 
  68 |     return coefs
  69 | 
  70 | 
  71 | class QRegressor(BaseEstimator):
  72 |     def __init__(self, C=1, probs=[0.5], eps=0., kernel='rbf', gamma_in=None,
  73 |                  gamma_out=0., alg='coneqp', max_iter=100, tol=1e-6, lag_tol=1e-4,
  74 |                  stepsize_factor=10., callback=None,
  75 |                  n_calls=None, verbose=False, random_state=None,
  76 |                  coefs_init="svr", nc_const=False, al_max_time=180.,
  77 |                  max_time=None, n_gap=None, gap_time_ratio=1e-3,
  78 |                  active_set=True, sv_tol=1e-3):
  79 |         """
  80 |         Quantile Regression.
  81 | 
  82 |         C: cost parameter (upper bound of dual variables). Positive scalar.
  83 |         probs: probabilities (quantiles levels)
  84 |         eps: threshold for the epsilon-loss (if used)
  85 |         kernel: input kernel ('rbf' or 'linear')
  86 |         gamma_in: gamma parameter for the input RBF kernel
  87 |         gamma_out: gamma parameter for the output RBF kernel
  88 |         alg: algorithm, which can be:
  89 |             - 'qp': CVXOPT (alternate optimization when eps > 0)
  90 |             - 'coneqp': CVXOPT (cone programming when eps > 0)
  91 |             - 'sdca': "A Coordinate Descent Primal-Dual Algorithm with Large
  92 |             Step Size and Possibly Non Separable Functions", by Olivier Fercoq
  93 |             and Pascal Bianchi
  94 |             - 'al': augmented Lagrangian
  95 |             - 'mtl': multi-task learning ("Parametric Task Learning", by Ichiro
  96 |             Takeuchi, Tatsuya Hongo, Masashi Sugiyama and Shinichi Nakajima)
  97 |         max_iter: maximum number of iterations
  98 |         tol: prescribed tolerance
  99 |         lag_tol: prescribed for the outer loop of the augmented Lagrangian
 100 |             algorithm and QP eps
 101 |         stepsize_factor
 102 |         callback
 103 |         n_calls
 104 |         verbose
 105 |         random_state
 106 |         coefs_init: initial dual coefficients (numpy array, n_probs, n_samples))
 107 |             If None, initialize with 0. If "svr", initialize with esp-conditional
 108 |             median (scikit-learn SVR)
 109 |         nc_const: add non-crossing consraints when set to true (only available
 110 |             with alg='qp')
 111 |         al_max_time: maximum training time (seconds) for al algorithm
 112 |         max_time: maximum training time (seconds) for sdca algorithm
 113 |         n_gap: number of iterations between two dual gap check (if None, automatic)
 114 |         gap_time_ratio: ratio time to compute dual gap / time for n_gap iterations
 115 |         (this quantity is used to adjust automatically n_gap)
 116 |         active_set: whether to use active set or not
 117 |         sv_tol: tolerance for detecting support vector before prediction
 118 |         """
 119 |         self.C = C
 120 |         self.probs = probs
 121 |         self.eps = eps
 122 |         self.kernel = kernel
 123 |         self.gamma_in = gamma_in
 124 |         self.gamma_out = gamma_out
 125 |         self.alg = alg
 126 |         self.alpha = 1.0  # Do not change
 127 |         self.max_iter = max_iter
 128 |         self.tol = tol
 129 |         self.lag_tol = lag_tol
 130 |         self.stepsize_factor = stepsize_factor
 131 |         self.callback = callback
 132 |         self.n_calls = n_calls
 133 |         self.verbose = verbose
 134 |         self.random_state = random_state
 135 |         self.coefs_init = coefs_init
 136 |         self.nc_const = nc_const
 137 |         self.al_max_time = al_max_time
 138 |         self.max_time = max_time
 139 |         self.n_gap = n_gap
 140 |         self.gap_time_ratio = gap_time_ratio
 141 |         self.status = ""  # Resolution status
 142 |         self.active_set = active_set
 143 |         self.sv_tol = sv_tol
 144 | 
 145 |     def predict(self, X):
 146 |         """
 147 |         Predict the conditional quantiles
 148 | 
 149 |         Parameters:
 150 |         X: data in rows (numpy array)
 151 | 
 152 |         Returns:
 153 |         y: prediction for each prescribed quantile levels
 154 |         """
 155 | 
 156 |         X = np.asarray(X)
 157 |         if X.ndim == 1:
 158 | #            X = np.asarray([X]).T
 159 |             # Data has a single feature
 160 |             X = X.reshape(-1, 1)
 161 | 
 162 |         # Indexes of support vectors
 163 |         ind_sv = self.ind_sv()
 164 | 
 165 |         # Compute kernels
 166 |         if self.kernel == 'rbf':
 167 |             Din = dist.cdist(self.X[ind_sv, :], X, 'sqeuclidean')
 168 |             Kin = np.exp(-self.gamma_in * Din)
 169 |         else:  # Linear kernel
 170 |             Kin = np.dot(self.X[ind_sv, :], self.D.dot(X.T))
 171 | 
 172 |         Dout = dist.pdist(np.asarray([self.probs]).T, 'sqeuclidean')
 173 |         Kout = np.exp(-self.gamma_out * dist.squareform(Dout)) \
 174 |             if self.gamma_out != np.inf else np.eye(np.size(self.probs))
 175 | 
 176 |         pred = np.dot(np.dot(Kout, self.coefs[:, ind_sv]), Kin).T
 177 |         pred += self.intercept
 178 |         return pred.T
 179 | 
 180 |     def fit(self, X, y):
 181 |         """
 182 |         Fit the model.
 183 | 
 184 |         X: data in rows (numpy array)
 185 |         y: targets in rows (numpy array)
 186 |         """
 187 | 
 188 |         # Was in __init__ before
 189 |         self.kernel = self.kernel.lower()
 190 |         self.probs = np.asarray(self.probs)
 191 |         self.max_iter = int(self.max_iter)
 192 |         if self.max_time is None:
 193 |             self.max_time = 0
 194 |         if self.n_gap is None:
 195 |             self.n_gap = 0
 196 |         if self.nc_const and self.alg != 'qp':
 197 |             self.alg = 'qp'
 198 |             warnings.warn("alg set to 'qp' (this is the only available " + \
 199 |                 "algorithm to deal with the non-crossing consraints)")
 200 | 
 201 |         if self.kernel != 'rbf' and self.kernel != 'linear':
 202 |             raise ValueError('Choose kernel between rbf and linear.')
 203 | 
 204 |         if self.alg == 'mtl':
 205 |             self.kernel = 'linear'
 206 |             self.gamma_out = np.inf
 207 |             self.gamma_in = None
 208 | 
 209 |         # Data refactoring
 210 |         self.X = np.asarray(X)
 211 |         if self.X.ndim == 1:
 212 | #            self.X = np.asarray([X]).T
 213 |             # Data has a single feature
 214 |             self.X = self.X.reshape(-1, 1)
 215 |         y = np.ravel(y)
 216 | 
 217 |         # If no gamma_in specified, take 0.5 / q, where q is the 0.7-quantile
 218 |         # of the squared distances
 219 |         if self.kernel == 'rbf':
 220 |             Din = dist.pdist(self.X, 'sqeuclidean')
 221 |             if self.gamma_in is None:
 222 |                 self.gamma_in = 1. / (2. * np.percentile(Din, 70.))
 223 | 
 224 |         # Compute kernels
 225 |         if self.kernel == 'rbf':
 226 |             Kin = np.exp(-self.gamma_in * dist.squareform(Din))
 227 |         else:  # Linear kernel
 228 |             self.D = np.eye(self.X.shape[1])
 229 |             Kin = np.dot(self.X, self.D.dot(self.X.T))
 230 | 
 231 |         Dout = dist.pdist(np.asarray([self.probs]).T, 'sqeuclidean')
 232 |         Kout = np.exp(-self.gamma_out * dist.squareform(Dout)) \
 233 |             if self.gamma_out != np.inf else np.eye(np.size(self.probs))
 234 | 
 235 |         # Check algorithm
 236 |         if self.eps > 0 and self.alg != 'qp' and self.alg != 'coneqp' and self.alg != 'sdca':
 237 |             raise ValueError('Use qp or sdca for epsilon quantile regression.')
 238 |         if self.nc_const and self.eps > 0:
 239 |             raise ValueError('Not implemented yet.')
 240 | 
 241 |         # Initialization
 242 |         # For QP, it seems to slow down convergence.
 243 |         if self.coefs_init is None:
 244 |             coefs_init = None
 245 |         elif isinstance(self.coefs_init, str) and self.coefs_init.lower() == "svr":
 246 |            # Estimate condition median
 247 |            svr = SVR(C=self.C/2, kernel="precomputed", epsilon=self.eps)
 248 |            svr.fit(Kin, y)
 249 |            svr_dual = np.zeros(y.shape)
 250 |            svr_dual[svr.support_] = svr.dual_coef_[0, :]
 251 |            coefs_init = np.kron(svr_dual, np.ones(np.size(self.probs)))
 252 |         else:
 253 |            coefs_init = self.coefs_init.T.ravel()
 254 | 
 255 |         # Choose the algorithm
 256 |         if self.alg == 'qp':  # Off-the-shelf solver (cvxopt)
 257 |             if self.nc_const:
 258 |                 self.qp_nc2(Kin, Kout, y)
 259 |             else:
 260 |                 K = np.kron(Kin, Kout)
 261 |                 self.qp_eps(K, y) #, coefs_init)
 262 |         elif self.alg == 'coneqp':
 263 |             if self.nc_const:
 264 |                 self.qp_nc2(Kin, Kout, y)
 265 |             else:
 266 |                 K = np.kron(Kin, Kout)
 267 |                 self.coneqp_eps(K, y)
 268 |         elif self.alg == 'sdca':  # Stochastic dual coordinate descent
 269 |             self.sdca(Kin, Kout, y, coefs_init)
 270 |         elif self.alg == 'al':
 271 |             self.al(Kin, Kout, y, 1, coefs_init)
 272 |         elif self.alg == 'penal':
 273 |             self.al(Kin, Kout, y, 4, coefs_init)
 274 |         elif self.alg == 'mtl':
 275 |             self.mtl(y)
 276 |             # Recompute the kernel with learned D
 277 |             Kin = np.dot(self.X, self.D.dot(self.X.T))
 278 |         else:
 279 |             raise ValueError('Unknown algorithm')
 280 | 
 281 |         # When there is no additional constraints, the quantile property is
 282 |         # satisfied.
 283 |         if not self.nc_const:
 284 |             # Make the dual point feasible (Mainly for SDCA)
 285 |             self.coefs = proj_dual(self.coefs, self.C, self.probs)
 286 | 
 287 |             # Set the intercept
 288 |             # Erase the previous intercept before prediction
 289 |             self.intercept = 0.
 290 |             # For usual quantile prediction
 291 |             if self.eps == 0.:
 292 |                 self.intercept = [
 293 |                     np.percentile(y-pred, 100.*prob) for
 294 |                     (pred, prob) in zip(self.predict(self.X), self.probs)]
 295 |                 self.intercept = np.asarray(self.intercept)
 296 |             else:
 297 |                 # For eps-quantile prediction
 298 |                 # Use optimality conditions to find:
 299 |                 # residues = eps * coef / coef_norm.
 300 |                 # True for coefs that:
 301 |                 #   - are not 0
 302 |                 #   - are not on the boundaries
 303 |                 tol = 1e-3  # Tolerance for boundaries
 304 |                 group_norm = np.linalg.norm(self.coefs, axis=0)  # Norm of coefs vectors
 305 |                 ind_supp = np.where(
 306 |                         group_norm / (self.probs.size * self.C) > tol
 307 |                         )[0]  # Support vectors
 308 |                 ind_up = np.where(np.all(
 309 |                         (self.probs*self.C-self.coefs.T) / self.C > tol,
 310 |                         axis=1))[0]  # Not on boundary sup
 311 |                 ind_down = np.where(np.all(
 312 |                         (self.coefs.T - (self.probs-1)*self.C) / self.C > tol,
 313 |                         axis=1))[0]  # Not on boundary inf
 314 |                 # All conditions together: coefs of interest
 315 |                 ind = list(set(ind_up) & set(ind_down) & set(ind_supp))
 316 |                 if ind:
 317 |                     # Residues without intercept
 318 |                     res = y[ind] - self.predict(self.X)[:, ind]
 319 |                     # Expected values from dual coefs
 320 |                     res_dual = self.eps * self.coefs[:, ind]/group_norm[ind]
 321 |                     # Intercept
 322 |                     self.intercept = (res-res_dual).mean(axis=1)
 323 | #                    print("qreg residual")
 324 | #                    print(res)
 325 | #                    print("qreg coefs")
 326 | #                    print(self.coefs[:, ind])
 327 | #                    print("qreg group_norm")
 328 | #                    print(group_norm[ind])
 329 |                 else:
 330 |                     # If ind empty, do similarly as quantile regression
 331 |                     self.intercept = [
 332 |                         np.percentile(y-pred, 100.*prob) for
 333 |                         (pred, prob) in zip(self.predict(self.X), self.probs)]
 334 |                     self.intercept = np.asarray(self.intercept)
 335 | 
 336 |         # Set optimal objective value
 337 |         self.obj = 0.5 * np.trace(np.dot(
 338 |             self.coefs.T, np.dot(Kout, np.dot(self.coefs, Kin)))) \
 339 |             - np.sum(self.coefs * y)
 340 |         self.obj += self.eps * np.linalg.norm(self.coefs, axis=0).sum()
 341 | 
 342 |     def score(self, X, y, sample_weight=None):
 343 |         # Pinball loss
 344 |         return 1 - self.pinball_loss(self.predict(X), y).mean()
 345 |         # Pinball loss + Indicator (crossing_loss)
 346 | #        p = self.predict(X)
 347 | #        return 1 - self.pinball_loss(p, y).mean() + \
 348 | #            100. * self.crossing_loss(p).sum()
 349 | 
 350 |     def qp_nc(self, Kin, Kout, y):
 351 |         ind = np.argsort(self.probs)  # Needed to sort constraints on quantile levels
 352 | 
 353 |         K = np.kron(Kin, Kout)
 354 |         p = np.size(self.probs)  # Number of quantiles to predict
 355 |         n = K.shape[0]  # Number of coefficients
 356 |         m = n / p  # Number of training instances
 357 |         probs = np.kron(np.ones(m), self.probs)  # Quantiles levels
 358 | 
 359 |         D = -np.eye(p) + np.diag(np.ones(p-1), 1)  # Difference matrix
 360 |         D = np.delete(D, -1, 0)
 361 |         D = D.T[np.argsort(ind)].T
 362 | 
 363 |         U = np.kron(Kin, np.dot(Kout, D.T))  # Uper and lower part
 364 |         L = np.kron(Kin, np.dot(D, np.dot(Kout, D.T)))  # Right-lower part
 365 | 
 366 |         K = matrix(np.r_[np.c_[K, U], np.c_[U.T, L]])  # Quad. part of the obj.
 367 |         q = matrix(np.r_[-np.kron(y, np.ones(p)), np.zeros(m*(p-1))])  # Linear part of the objective
 368 |         G = matrix(np.r_[np.c_[np.eye(n), np.zeros((n, m*(p-1)))],
 369 |                                np.c_[-np.eye(n), np.zeros((n, m*(p-1)))],
 370 |                                np.c_[np.zeros((m*(p-1), n)), -np.eye(m*(p-1))]])  # LHS of the inequ. constr.
 371 |         h = matrix(np.r_[self.C*probs, self.C*(1-probs), np.zeros(m*(p-1))])  # RHS of the inequ.
 372 |         A = matrix(np.c_[np.kron(np.ones(m), np.eye(p)),
 373 |                          np.kron(np.ones(m), D.T)])  # LHS of the equ. constr.
 374 |         b = matrix(np.zeros(p))  # RHS of the equality constraint
 375 | 
 376 | #        The following parameters control the execution of the default solver.
 377 | #        options['show_progress'] True/False (default: True)
 378 | #        options['maxiters'] positive integer (default: 100)
 379 | #        options['refinement']  positive integer (default: 0)
 380 | #        options['abstol'] scalar (default: 1e-7)
 381 | #        options['reltol'] scalar (default: 1e-6)
 382 | #        options['feastol'] scalar (default: 1e-7)
 383 | #        Returns:
 384 | #        {'dual infeasibility'
 385 | #         'dual objective'
 386 | #         'dual slack'
 387 | #         'gap'
 388 | #         'iterations'
 389 | #         'primal infeasibility'
 390 | #         'primal objective'
 391 | #         'primal slack'
 392 | #         'relgap'
 393 | #         's': <0x1 matrix, tc='d'>,
 394 | #         'status'
 395 | #         'x'
 396 | #         'y'
 397 | #         'z'
 398 |         solvers.options['show_progress'] = self.verbose
 399 |         if self.tol > 0:
 400 |             solvers.options['reltol'] = self.tol
 401 |         self.time = time.process_time()  # Store beginning time
 402 |         sol = solvers.qp(K, q, G, h, A, b)  # Solve the dual opt. problem
 403 |         self.time = time.process_time() - self.time  # Store training time
 404 | 
 405 |         # Set coefs
 406 |         self.coefs = np.reshape(sol['x'][:n], (m, p)).T
 407 |         self.coefs += np.dot(D.T, np.reshape(sol['x'][n:], (m, p-1)).T)
 408 |         self.sol = sol
 409 | 
 410 |         # Set the intercept (the quantile property is not verified)
 411 |         self.intercept = np.asarray(sol['y']).squeeze()
 412 | 
 413 |     def qp_nc2(self, Kin, Kout, y):
 414 |         ind = np.argsort(self.probs)  # Needed to sort constraints on quantile levels
 415 | 
 416 |         K = np.kron(Kin, Kout)
 417 |         p = np.size(self.probs)  # Number of quantiles to predict
 418 |         n = K.shape[0]  # Number of coefficients
 419 |         m = n / p  # Number of training instances
 420 |         l = m * (p-1)  # Number of non-crossing dual variables
 421 |         probs = np.kron(np.ones(m), self.probs)  # Quantiles levels
 422 | 
 423 |         D = -np.eye(p) + np.diag(np.ones(p-1), 1)  # Difference matrix
 424 |         D = np.delete(D, -1, 0)
 425 |         D = D.T[np.argsort(ind)].T
 426 | 
 427 |         K = matrix(np.r_[np.c_[K, np.zeros((n, l))], np.zeros((l, n+l))])  # Quad. part of the obj.
 428 |         q = matrix(np.r_[-np.kron(y, np.ones(p)), np.zeros(l)])  # Linear part of the objective
 429 |         G = matrix(np.r_[np.c_[np.eye(n), -np.kron(np.eye(m), D.T)],
 430 |                          np.c_[-np.eye(n), np.kron(np.eye(m), D.T)],
 431 |                          np.c_[np.zeros((l, n)), -np.eye(l)]])  # LHS of the inequ. constr.
 432 |         h = matrix(np.r_[self.C*probs, self.C*(1-probs), np.zeros(m*(p-1))])  # RHS of the inequ.
 433 |         A = matrix(np.c_[np.kron(np.ones(m), np.eye(p)), np.zeros((p, l))])  # LHS of the equ. constr.
 434 |         b = matrix(np.zeros(p))  # RHS of the equality constraint
 435 | 
 436 |         # See qp_nc for usage instruction
 437 |         solvers.options['show_progress'] = self.verbose
 438 |         if self.tol > 0:
 439 |             solvers.options['reltol'] = self.tol
 440 |         self.time = time.process_time()  # Store beginning time
 441 |         sol = solvers.qp(K, q, G, h, A, b)  # Solve the dual opt. problem
 442 |         self.time = time.process_time() - self.time  # Store training time
 443 | 
 444 |         # Set coefs
 445 |         self.coefs = np.reshape(sol['x'][:n], (m, p)).T
 446 |         self.sol = sol
 447 | 
 448 |         # Set the intercept (the quantile property is not verified)
 449 |         self.intercept = np.asarray(sol['y']).squeeze()
 450 | 
 451 |     def qp(self, K, y):
 452 |         p = np.size(self.probs)  # Number of quantiles to predict
 453 |         n = K.shape[0]  # Number of variables
 454 |         probs = np.kron(np.ones(n//p), self.probs)  # Quantiles levels
 455 | 
 456 |         K = matrix(K)  # Quadratic part of the objective
 457 |         q = matrix(-np.kron(y, np.ones(p)))  # Linear part of the objective
 458 |         G = matrix(np.r_[np.eye(n), -np.eye(n)])  # LHS of the inequ. constr.
 459 |         h = matrix(np.r_[self.C*probs, self.C*(1-probs)])  # RHS of the inequ.
 460 |         A = matrix(np.kron(np.ones(n//p), np.eye(p)))  # LHS of the equ. constr.
 461 |         b = matrix(np.zeros(p))  # RHS of the equality constraint
 462 | 
 463 |         # See qp_nc for usage instruction
 464 |         solvers.options['show_progress'] = self.verbose
 465 |         if self.tol > 0:
 466 |             solvers.options['reltol'] = self.tol
 467 | #            solvers.options['feastol'] = self.tol * 1./10
 468 |         self.time = time.process_time()  # Store beginning time
 469 |         sol = solvers.qp(K, q, G, h, A, b)  # Solve the dual opt. problem
 470 |         self.time = time.process_time() - self.time  # Store training time
 471 | 
 472 | 
 473 |         # Set coefs
 474 |         self.coefs = np.reshape(sol['x'], (n//p, p)).T
 475 |         self.sol = sol
 476 | 
 477 |         # Set the intercept
 478 | #        self.intercept = np.asarray(sol['y']).squeeze()
 479 | 
 480 |         # Set optimal objective value
 481 |         # Either this
 482 | #        self.obj = np.asarray(0.5 * sol['x'].T * K * sol['x'] \
 483 | #            + q.T * sol['x'])
 484 | #        self.obj = float(self.obj.squeeze())
 485 |         # Or that
 486 | #        self.obj = sol['primal objective']
 487 | 
 488 |     def qp_eps(self, K, y): #, coefs_init):
 489 |         p = np.size(self.probs)  # Number of quantiles to predict
 490 |         n = K.shape[0]  # Number of variables
 491 |         probs = np.kron(np.ones(n//p), self.probs)  # Quantiles levels
 492 | 
 493 |         q = matrix(-np.kron(y, np.ones(p)))  # Linear part of the objective
 494 |         G = matrix(np.r_[np.eye(n), -np.eye(n)])  # LHS of the inequ. constr.
 495 |         h = matrix(np.r_[self.C*probs, self.C*(1-probs)])  # RHS of the inequ.
 496 |         A = matrix(np.kron(np.ones(n//p), np.eye(p)))  # LHS of the equ. constr.
 497 |         b = matrix(np.zeros(p))  # RHS of the equality constraint
 498 |         # Initialization is disabled because it seems to slow down convergence
 499 | #        initvals = None if self.coefs_init is None else matrix(coefs_init)
 500 |         initvals = None
 501 | 
 502 |         # See qp_nc for usage instruction
 503 |         solvers.options['show_progress'] = self.verbose
 504 |         if self.tol > 0:
 505 |             solvers.options['reltol'] = self.tol
 506 | #            solvers.options['feastol'] = self.tol * 1./10
 507 | 
 508 |         self.time = time.process_time()  # Store beginning time
 509 |         if self.eps == 0:
 510 |             K = matrix(K)  # Quadratic part of the objective
 511 |             sol = solvers.qp(K, q, G, h, A, b, initvals=initvals)  # Solve the dual opt. problem
 512 |             coefs = np.reshape(sol['x'], (n//p, p)).T
 513 |         else:
 514 |             solvers.options['show_progress'] = False
 515 |             mu = np.ones(n//p)  # Penalty for l1-l2 norm
 516 |             coefs = np.r_[0]  # Initialization for computing improvement
 517 | 
 518 |             start_it = time.process_time()
 519 |             for it in range(self.max_iter):
 520 |                 mu = self.eps / mu
 521 |                 Kmu = matrix(K + np.diag(np.kron(mu, np.ones(p))))  # Quadratic part of the objective
 522 |                 sol = solvers.qp(Kmu, q, G, h, A, b, initvals=initvals)  # Solve the dual opt. problem
 523 |                 improvement = np.linalg.norm(coefs.T.ravel() -
 524 |                                              np.asarray(sol['x']).ravel()) / (self.C*p)
 525 |                 coefs = np.reshape(sol['x'], (n//p, p)).T
 526 |                 if self.verbose:
 527 |                     print("it: %d   improvement: %0.2e" % (it, improvement))
 528 |                 if improvement < self.lag_tol:
 529 |                     break
 530 |                 if self.max_time > 0 and time.process_time() - start_it >self.max_time:
 531 |                     break
 532 |                 # Warm-start is disabled because it seems to slow down convergence
 533 | #                initvals = sol['x']
 534 |                 mu = np.linalg.norm(coefs, axis=0)
 535 |                 mu[mu < 1e-32] = 1e-32
 536 |         self.time = time.process_time() - self.time  # Store training time
 537 | 
 538 |         # Set coefs
 539 |         self.coefs = coefs
 540 |         self.sol = sol
 541 | 
 542 |     def coneqp_eps(self, K, y): #, coefs_init):
 543 |         p = np.size(self.probs)  # Number of quantiles to predict
 544 |         n = K.shape[0]  # Number of variables
 545 |         m = n//p  # Number of points
 546 |         probs = np.kron(np.ones(m), self.probs)  # Quantiles levels
 547 | 
 548 |         # Initialization is disabled because it seems to slow down convergence
 549 | #        initvals = None if self.coefs_init is None else matrix(coefs_init)
 550 |         initvals = None
 551 | 
 552 |         # See qp_nc for usage instruction
 553 |         solvers.options['show_progress'] = self.verbose
 554 |         solvers.options['maxiters'] = self.max_iter
 555 |         if self.tol > 0:
 556 |             solvers.options['reltol'] = self.tol
 557 | #            solvers.options['feastol'] = self.tol * 1./10
 558 | 
 559 |         self.time = time.process_time()  # Store beginning time
 560 |         if self.eps == 0:
 561 |             K = matrix(K)  # Quadratic part of the objective
 562 |             q = matrix(-np.kron(y, np.ones(p)))  # Linear part of the objective
 563 |             G = matrix(np.r_[np.eye(n), -np.eye(n)])  # LHS of the inequ. constr.
 564 |             h = matrix(np.r_[self.C*probs, self.C*(1-probs)])  # RHS of the inequ.
 565 |             A = matrix(np.kron(np.ones(m), np.eye(p)))  # LHS of the equ. constr.
 566 |             b = matrix(np.zeros(p))  # RHS of the equality constraint
 567 | 
 568 |             sol = solvers.qp(K, q, G, h, A, b, initvals=initvals)  # Solve the dual opt. problem
 569 |             coefs = np.reshape(sol['x'], (m, p)).T
 570 |         else:
 571 |             def buildG(m, p):
 572 |                 n = m*p
 573 | 
 574 |                 # Get the norm bounds (m last variables)
 575 |                 A = np.zeros(p+1)
 576 |                 A[0] = -1
 577 |                 A = np.kron(np.eye(m), A).T
 578 |                 # Get the m p-long vectors
 579 |                 B = np.kron(np.eye(m), np.c_[np.zeros(p), np.eye(p)].T)
 580 |                 # Box constraint
 581 |                 C = np.c_[np.r_[np.eye(n), -np.eye(n)], np.zeros((2*n, m))]
 582 |                 # Set everything together
 583 |                 C = np.r_[C, np.c_[B, A]]
 584 |                 return C
 585 | 
 586 |             # 2*n non-negative variables
 587 |             # [p+1]*m SOC variables
 588 | 
 589 |             K = matrix(np.r_[np.c_[K, np.zeros((n, m))], np.zeros((m, n+m))])  # Quadratic part of the objective
 590 |             q = matrix(np.r_[-np.kron(y, np.ones(p)), np.ones(m)*self.eps])  # Linear part of the objective
 591 |             G = matrix(buildG(m, p))  # LHS of the inequ. constr.
 592 |             h = matrix(np.r_[self.C*probs, self.C*(1-probs), np.zeros(m*(p+1))])  # RHS of the inequ.
 593 |             A = matrix(np.c_[np.kron(np.ones(m), np.eye(p)), np.zeros((p, m))])  # LHS of the equ. constr.
 594 |             b = matrix(np.zeros(p))  # RHS of the equality constraint
 595 |             dims = {'l': 2*n, 'q': [p+1]*m, 's': []}
 596 | 
 597 |             sol = solvers.coneqp(K, q, G, h, dims, A, b, initvals=initvals)  # Solve the dual opt. problem
 598 |             coefs = np.reshape(sol['x'][:n], (m, p)).T
 599 |         self.time = time.process_time() - self.time  # Store training time
 600 | 
 601 |         # Set coefs
 602 |         self.coefs = coefs
 603 |         self.sol = sol
 604 | 
 605 |     def sdca(self, Kin, Kout, y, coefs_init):
 606 |         n_samples = Kin.shape[0]
 607 |         n_dim = Kout.shape[0]
 608 | 
 609 |         # For block descent, step size depends on max eigen value of Kout
 610 |         # Same as np.linalg.eigvalsh(Kout)[-1]
 611 |         Kout_lambda_max = eigvalsh(Kout, eigvals=(n_dim-1,n_dim-1))[0]
 612 | 
 613 |         # Data
 614 |         dsin = get_dataset(Kin, order="c")
 615 |         dsout = get_dataset(Kout, order="c")
 616 | 
 617 |         # Initialization
 618 |         # Used if done in fit
 619 |         self.coefs = np.zeros(n_dim*n_samples, dtype=np.float64) if \
 620 |            self.coefs_init is None else coefs_init
 621 |         # What is below was relegated to fit
 622 |         # if self.coefs_init is None:
 623 |         #     self.coefs = np.zeros(n_dim*n_samples, dtype=np.float64)
 624 |         # elif isinstance(self.coefs_init, str) and self.coefs_init.lower() == "svr":
 625 |         #     # Estimate condition median
 626 |         #     svr = SVR(C=self.C/2, kernel="precomputed", epsilon=self.eps)
 627 |         #     svr.fit(Kin, y)
 628 |         #     svr_dual = np.zeros(y.shape)
 629 |         #     svr_dual[svr.support_] = svr.dual_coef_[0, :]
 630 |         #     self.coefs = np.kron(svr_dual, np.ones(n_dim))
 631 |         # else:
 632 |         #     self.coefs = self.coefs_init.T.ravel()
 633 | 
 634 |         # Array for objective values
 635 | #        inner_obj = np.ones(self.max_iter)
 636 | 
 637 |         # Some Parameters
 638 |         n_calls = n_samples if self.n_calls is None else self.n_calls
 639 |         rng = check_random_state(self.random_state)
 640 |         status = np.zeros(1, dtype=np.int16)
 641 | 
 642 |         # Call to the solver
 643 |         self.time = time.process_time()  # Store beginning time
 644 |         _prox_sdca_intercept_fit(self, dsin, dsout, y, self.coefs, self.alpha,
 645 |                                  self.C, self.eps, self.stepsize_factor,
 646 |                                  self.probs, self.max_iter, self.tol,
 647 |                                  self.callback, n_calls, self.max_time,
 648 |                                  self.n_gap, self.gap_time_ratio,
 649 |                                  self.verbose, rng, status, self.active_set,
 650 |                                  Kout_lambda_max)
 651 | #                                , inner_obj)
 652 |         self.time = time.process_time() - self.time  # Store training time
 653 | 
 654 |         # Set coefs
 655 |         self.coefs = np.reshape(self.coefs, (n_samples, n_dim)).T
 656 | 
 657 |         # Save inner objective values
 658 | #        self.inner_obj = inner_obj[inner_obj < 0]
 659 | 
 660 |         # Resolution status
 661 |         if status[0] == 1:
 662 |             self.status = "Optimal solution found"
 663 |         elif status[0] == 2:
 664 |             self.status = "Maximum iteration reached"
 665 |         elif status[0] == 3:
 666 |             self.status = "Maximum time reached"
 667 |         else:
 668 |             self.status = ""
 669 | 
 670 |         # Set the intercept
 671 | #        self.intercept = 0.  # Erase the previous intercept before prediction
 672 | #        self.intercept = [np.percentile(y-pred, 100.*prob) for (pred, prob)\
 673 | #        in zip(self.predict(self.X), self.probs)]
 674 | #        self.intercept = np.asarray(self.intercept)
 675 | 
 676 |         # Set optimal objective value
 677 | #        self.obj = 0.5 * np.trace(
 678 | #            np.dot(self.coefs.T, np.dot(Kout, np.dot(self.coefs, Kin)))) \
 679 | #            - np.sum(self.coefs * y)
 680 | 
 681 |     def al(self, Kin, Kout, y, mugrow, coefs_init):
 682 |         n_samples = Kin.shape[0]
 683 |         n_dim = Kout.shape[0]
 684 | 
 685 |         dsin = get_dataset(Kin, order="c")
 686 |         dsout = get_dataset(Kout, order="c")
 687 | 
 688 |         # Initialization
 689 |         # Used if done in fit
 690 |         coefs = np.zeros(n_dim*n_samples, dtype=np.float64) if \
 691 |             self.coefs_init is None else coefs_init
 692 |         # coefs = np.zeros(n_dim*n_samples, dtype=np.float64) \
 693 |         #     if self.coefs_init is None else self.coefs_init.T.ravel()
 694 |         b = np.zeros(n_dim)  # Intercept
 695 | 
 696 |         n_calls = n_samples if self.n_calls is None else self.n_calls
 697 |         rng = check_random_state(self.random_state)
 698 | 
 699 |         # Parameters of the outer loop
 700 |         if mugrow > 1:
 701 |             mu = 2  # Factor of the Lagrangian penalization
 702 |         elif mugrow == 1:
 703 |             mu = 10
 704 |         else:
 705 |             raise ValueError("mugrow >= 1")
 706 |         # mugrow = 4  # Growing factor of the penalization
 707 |         prev_err = float('inf')  # Previous error for the outer loop
 708 | 
 709 | #        dual_tol = np.sqrt(n) * self.C * self.dual_tol  # Inner loop
 710 |         # Loop
 711 |         self.time = time.process_time()  # Store beginning time
 712 |         for ito in range(self.max_iter):
 713 |             _prox_sdca_al_fit(self, dsin, dsout, y, coefs, self.alpha,
 714 |                               self.C, self.stepsize_factor, self.probs, b, mu,
 715 |                               self.max_iter, self.tol, self.callback, n_calls,
 716 |                               self.verbose, rng)
 717 | 
 718 |             # Update the intercept
 719 |             # Gradient of the objective wrt the intercept
 720 |             der = np.reshape(coefs, (n_samples, n_dim)).sum(axis=0)
 721 |             b += mu * der  # Intercept update
 722 |             mu *= mugrow  # mu update
 723 | 
 724 |             # Stopping criterion
 725 |             lag_err = np.sum(der**2)  # Dual error
 726 |             if lag_err < self.lag_tol or \
 727 |                     np.abs(lag_err/prev_err - 1) < self.lag_tol:
 728 |                 break
 729 |             prev_err = lag_err  # Update the previous Lagrangian error
 730 | 
 731 |             # tol is the objective value to reach
 732 |             if self.tol < 0:
 733 |                 # Project coefs on the constraints
 734 |                 proj_coefs = proj_dual(np.reshape(coefs, (n_samples, n_dim)).T,
 735 |                                        self.C, self.probs)
 736 | 
 737 |                 # Compute the objective value
 738 |                 obj = 0.5 * np.trace(np.dot(
 739 |                     proj_coefs.T, np.dot(Kout, np.dot(proj_coefs, Kin)))) \
 740 |                     - np.sum(proj_coefs * y)
 741 | 
 742 |                 if self.verbose:
 743 |                     print("it: %d   obj: %0.2f" % (ito, obj))
 744 | 
 745 |                 # Stopping criterionperformance.
 746 |                 if obj <= self.tol:
 747 |                     if self.verbose:
 748 |                         print("Ground truth objective value reached.")
 749 |                     break
 750 | 
 751 |             # Maximum training time
 752 |             current_time = time.process_time() - self.time  # Current training time
 753 |             if current_time > self.al_max_time:
 754 |                 if self.verbose:
 755 |                     print("Maximum training time reached")
 756 |                 break
 757 |         else:
 758 |             if self.verbose:
 759 |                 print('Did not converge after {} iterations.'.format(ito+1))
 760 | 
 761 |         self.time = time.process_time() - self.time  # Store training time
 762 | 
 763 |         # Set coefs
 764 |         self.coefs = np.reshape(coefs, (n_samples, n_dim)).T
 765 | 
 766 |         # Set the intercept
 767 | #        self.intercept = b
 768 | 
 769 |         # Set optimal objective value
 770 | #        self.obj = 0.5 * np.trace(
 771 | #            np.dot(self.coefs.T, np.dot(Kout, np.dot(self.coefs, Kin))) ) \
 772 | #            - np.sum(self.coefs * y)
 773 | 
 774 |     def mtl(self, y):
 775 |         d = self.X.shape[1]  # Data dimension
 776 |         p = self.probs.shape[0]  # Number of tasks
 777 |         verbose = self.verbose
 778 |         self.verbose = False
 779 | 
 780 |         self.D = np.eye(d) / d  # Initialize D
 781 |         err = np.inf
 782 |         
 783 |         self.time = time.process_time()  # Store beginning time
 784 |         for it in range(self.max_iter):
 785 |             Kin = np.dot(self.X, self.D.dot(self.X.T))  # Compute input kernel
 786 |             self.qp(np.kron(Kin, np.eye(p)), y)  # Solve QR problem (fixed D)
 787 | 
 788 |             if it < self.max_iter-1:
 789 |                 # Update D
 790 |                 B = self.coefs.dot(self.X).dot(self.D)  # Coefficients of the linear predictor
 791 | 
 792 |                 # Update with eigenvalue decomposition
 793 | #                C = B.T.dot(B)
 794 | #                e, V = np.linalg.eigh(C)  # Eigen values and vectors
 795 | #                e[e<0] = 0
 796 | #                self.D = V.dot(np.diag(np.sqrt(e)).dot(V.T))
 797 | #                self.D /= np.trace(self.D)
 798 | 
 799 |                 # Update with singular value decomposition
 800 |                 _, s, V = np.linalg.svd(B)
 801 |                 s = np.r_[s, np.zeros(max(0, d-p))]
 802 |                 D = np.dot(V.T, np.diag(s).dot(V)) / s.sum()
 803 |                 err = np.linalg.norm(D-self.D)
 804 |                 self.D = D
 805 | 
 806 |             if verbose:
 807 |                 obj = -0.5 * np.trace(np.dot(
 808 |                     self.coefs.T, np.dot(self.coefs, Kin))) \
 809 |                     + np.sum(self.coefs * y)
 810 |                 print(it, obj, err, self.tol)
 811 | 
 812 |             if err < self.tol:
 813 |                 if verbose:
 814 |                     print("Did converge.")
 815 |                 break
 816 |         else:
 817 |             if verbose:
 818 |                 print('Did not converge after {} iterations.'.format(it+1))
 819 | 
 820 |         self.time = time.process_time() - self.time  # Store training time
 821 |         self.verbose = verbose
 822 | 
 823 |     def pinball_loss(self, pred, y):
 824 |         y = np.ravel(y)
 825 |         residual = y - pred
 826 |         loss = np.sum([prob*np.fmax(0, res) for (res, prob) in
 827 |             zip(residual, self.probs)], axis=1)
 828 |         loss += np.sum([(prob-1)*np.fmin(0, res) for (res, prob) in
 829 |             zip(residual, self.probs)], axis=1)
 830 |         loss = loss * 1./y.size
 831 |         return loss
 832 | 
 833 |     def qloss(self, pred, y):
 834 |         y = np.ravel(y)
 835 |         residual = y - pred
 836 |         loss = np.sum([res < 0 for (res, prob) in
 837 |             zip(residual, self.probs)], axis=1)
 838 |         loss = loss * 1./y.size - self.probs
 839 |         return loss
 840 | 
 841 |     def crossing_loss(self, pred):
 842 |         ind = np.argsort(self.probs)
 843 |         loss = np.sum([np.fmax(0, -np.diff(res)) for res in pred[ind].T],
 844 |                        axis=0)
 845 |         loss = loss * 1./pred.shape[1]
 846 |         return loss
 847 | 
 848 |     def ind_sv(self):
 849 |         group_norm = np.linalg.norm(self.coefs, axis=0) / (self.C * len(self.probs))
 850 |         return np.where(group_norm > self.sv_tol)[0]
 851 | 
 852 |     def num_sv(self):
 853 |         return self.ind_sv().size
 854 | 
 855 | 
 856 | class QRegMTL(BaseEstimator):
 857 |     def __init__(self, gamma_in=None, Creg=None, location=True,
 858 |                  n_landmarks=None, **args):
 859 |         """
 860 |         Quantile Regression with multi-task learning.
 861 | 
 862 |         Ref: Parametric Task Learning, by Ichiro Takeuchi, Tatsuya Hongo,
 863 |         Masashi Sugiyama and Shinichi Nakajima (NIPS 2013).
 864 | 
 865 |         Methodology:
 866 |             First, estimate conditional mean function E[Y|X=x] by least-square
 867 |             regression, and compute the residual r_i = y_i − E[Y|X=x_i]. Then,
 868 |             apply multi-task learning to (x_i, r_i) and estimate a conditional
 869 |             quantile function by Q(x|p) = E[Y|X=x] + h(x|p), where h(.|p) is
 870 |             the estimated quantile regression fitted to the residuals.
 871 | 
 872 |         location: whether to use a location model (as proposed in the paper)
 873 |         gamma_in: gamma parameter for the input RBF map
 874 |         n_landmarks: number of landmarks for the input mapping. When None,
 875 |             use all training points. When less than 1, consider it as a ratio
 876 |             of training points. Else it indicates the number of landmarks.
 877 |         Creg: cost parameter for the ridge regression (location model).
 878 |             Positive scalar. When it is None, use least-square regression.
 879 |         C: cost parameter (upper bound of dual variables). Positive scalar.
 880 |         probs: probabilities (quantiles levels)
 881 |         max_iter: maximum number of iterations
 882 |         tol: prescribed tolerance
 883 |         """
 884 |         self.gamma_in = gamma_in
 885 |         self.location = location
 886 |         self.Creg = Creg
 887 |         self.n_landmarks = n_landmarks
 888 | 
 889 |         if 'alg' in args:
 890 |             del args['alg']
 891 |         self.reg = QRegressor(alg='mtl', **args)
 892 | 
 893 |     def predict(self, X):
 894 |         """
 895 |         Predict the conditional quantiles
 896 | 
 897 |         Parameters:
 898 |         X: data in rows (numpy array)
 899 | 
 900 |         Returns:
 901 |         y: prediction for each prescribed quantile levels
 902 |         """
 903 | 
 904 |         X = np.asarray(X)
 905 |         if X.ndim == 1:
 906 | #            X = np.asarray([X]).T
 907 |             # Data has a single feature
 908 |             X = X.reshape(-1, 1)
 909 | 
 910 |         # Map the data with RBF kernel
 911 |         Din = dist.cdist(X, self.X, 'sqeuclidean')
 912 |         X_map = np.exp(-self.gamma_in * Din)
 913 | 
 914 |         # Prediction
 915 |         pred = self.reg.predict(X_map)
 916 |         if self.location:
 917 |             pred += self.lsr.predict(X_map) * self.std_residue + \
 918 |                 self.mean_residue
 919 | #        pred += self.lsr.predict(X_map) if self.location else 0
 920 | 
 921 |         return pred
 922 | 
 923 |     def fit(self, X, y):
 924 |         """
 925 |         Fit the model.
 926 | 
 927 |         X: data in rows (numpy array)
 928 |         y: targets in rows (numpy array)
 929 |         """
 930 | 
 931 |         self.X = np.asarray(X)  # Training data as landmarks
 932 |         if self.X.ndim == 1:
 933 | #            self.X = np.asarray([X]).T
 934 |             # Data has a single feature
 935 |             self.X = self.X.reshape(-1, 1)
 936 | 
 937 |         # If no gamma_in specified, take 0.5 / q, where q is the 0.7-quantile
 938 |         # of the squared distances
 939 |         Din = dist.pdist(self.X, 'sqeuclidean')
 940 |         if self.gamma_in is None:
 941 |             self.gamma_in = 1. / (2. * np.percentile(Din, 70.))
 942 | 
 943 |         # Map the data with RBF kernel
 944 |         if not self.n_landmarks:  # landmarks = None  =>  Use all data
 945 |             X_map = np.exp(-self.gamma_in * dist.squareform(Din))  # All data as landmarks
 946 |         else:
 947 |             if self.n_landmarks < 1:  # Ratio
 948 |                 n_landmarks = int(np.floor(self.n_landmarks * self.X.shape[0]))
 949 |             else:
 950 |                 n_landmarks = self.n_landmarks
 951 |             L = self.X[np.random.randint(self.X.shape[0], size=n_landmarks)]  # Random landmarks
 952 |             Din = dist.cdist(self.X, L, 'sqeuclidean')
 953 |             self.X = L  # Store landmarks
 954 |             X_map = np.exp(-self.gamma_in * Din)
 955 | 
 956 |         # Lest-squares regression
 957 |         if self.location:
 958 |             self.lsr = LinearRegression() if not self.Creg \
 959 |                 else Ridge(alpha=1./self.Creg)
 960 |             self.lsr.fit(X_map, y)
 961 |             residue = y - self.lsr.predict(X_map)
 962 | 
 963 |             self.mean_residue = residue.mean()
 964 |             self.std_residue = residue.std()
 965 |             residue = (residue - self.mean_residue) / self.std_residue
 966 |         else:
 967 |             self.lsr = None
 968 |             self.mean_residue = None
 969 |             self.std_residue = None
 970 |             residue = y
 971 | 
 972 |         # Fit on training data
 973 |         self.reg.fit(X_map, residue)
 974 | 
 975 |     def score(self, X, y, sample_weight=None):
 976 |         # Pinball loss
 977 |         return 1 - self.pinball_loss(self.predict(X), y).mean()
 978 | 
 979 |     def get_params(self, deep=True):
 980 |         p = super(QRegMTL, self).get_params()
 981 |         p.update(self.reg.get_params())
 982 |         return p
 983 | 
 984 |     def set_params(self, **parameters):
 985 |         for parameter in ['gamma_in', 'location', 'Creg', 'reg', 'n_landmarks']:
 986 |             if parameter in parameters:
 987 |                 setattr(self, parameter, parameters[parameter])
 988 |                 del parameters[parameter]
 989 |         self.reg.set_params(**parameters)
 990 |         return self
 991 | 
 992 |     def pinball_loss(self, pred, y):
 993 |         return self.reg.pinball_loss(pred, y)
 994 | 
 995 |     def qloss(self, pred, y):
 996 |         return self.reg.qloss(pred, y)
 997 | 
 998 |     def crossing_loss(self, pred):
 999 |         return self.reg.crossing_loss(pred)
1000 | 


--------------------------------------------------------------------------------