├── man ├── kha.Rd ├── spirals.Rd ├── reuters.Rd ├── plot.Rd ├── predict.kqr.Rd ├── as.kernelMatrix.Rd ├── musk.Rd ├── ranking-class.Rd ├── promotergene.Rd ├── specc-class.Rd ├── inchol-class.Rd ├── spam.Rd ├── ipop-class.Rd ├── kcca-class.Rd ├── income.Rd ├── prc-class.Rd ├── couple.Rd ├── kfa-class.Rd ├── kmmd-class.Rd ├── kernel-class.Rd ├── predict.gausspr.Rd ├── kha-class.Rd ├── kpca-class.Rd ├── onlearn.Rd ├── predict.ksvm.Rd ├── ipop.Rd ├── csi-class.Rd ├── inlearn.Rd ├── sigest.Rd ├── stringdot.Rd ├── onlearn-class.Rd ├── kcca.Rd ├── vm-class.Rd ├── dots.Rd ├── inchol.Rd ├── gausspr-class.Rd ├── lssvm-class.Rd └── kqr-class.Rd ├── src ├── Makevars ├── Makevars.win ├── misc.c ├── dprecond.c ├── dgpnrm.c ├── init.c ├── svm.h ├── inductionsort.cpp ├── dgpstep.c ├── ilcpfactory.h ├── isafactory.h ├── cweight.h ├── iweightfactory.h ├── brweight.h ├── kspectrumweight.h ├── dtrqsol.c ├── solvebqp.c ├── wkasailcp.h ├── expdecayweight.h ├── wmsufsort.h ├── errorcode.h ├── datatype.h ├── dbreakpt.c ├── cweight.cpp ├── ctable.h ├── brweight.cpp ├── wmsufsort.cpp ├── wkasailcp.cpp ├── lcp.h ├── expdecayweight.cpp ├── stringkernel.h ├── inductionsort.h ├── kspectrumweight.cpp ├── stack.h ├── ctable.cpp ├── dprsrch.c └── esa.h ├── data ├── income.rda ├── musk.rda ├── spam.rda ├── reuters.rda ├── spirals.rda ├── ticdata.rda └── promotergene.rda ├── inst ├── doc │ ├── kernlab.pdf │ └── kernlab.R ├── COPYRIGHTS └── CITATION ├── R ├── kernelmatrix.R ├── kcca.R ├── sigest.R ├── couplers.R ├── kfa.R └── kha.R ├── DESCRIPTION └── NAMESPACE /man/kha.Rd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/kernlab/HEAD/man/kha.Rd -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 2 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 2 | -------------------------------------------------------------------------------- /data/income.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/kernlab/HEAD/data/income.rda -------------------------------------------------------------------------------- /data/musk.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/kernlab/HEAD/data/musk.rda -------------------------------------------------------------------------------- /data/spam.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/kernlab/HEAD/data/spam.rda -------------------------------------------------------------------------------- /data/reuters.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/kernlab/HEAD/data/reuters.rda -------------------------------------------------------------------------------- /data/spirals.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/kernlab/HEAD/data/spirals.rda -------------------------------------------------------------------------------- /data/ticdata.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/kernlab/HEAD/data/ticdata.rda -------------------------------------------------------------------------------- /inst/doc/kernlab.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/kernlab/HEAD/inst/doc/kernlab.pdf -------------------------------------------------------------------------------- /data/promotergene.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/kernlab/HEAD/data/promotergene.rda -------------------------------------------------------------------------------- /R/kernelmatrix.R: -------------------------------------------------------------------------------- 1 | 2 | setGeneric("as.kernelMatrix",function(x, center = FALSE) standardGeneric("as.kernelMatrix")) 3 | setMethod("as.kernelMatrix", signature(x = "matrix"), 4 | function(x, center = FALSE) 5 | { 6 | 7 | if(center){ 8 | m <- dim(x)[1] 9 | x <- t(t(x - colSums(x)/m) - rowSums(x)/m) + sum(x)/m^2 10 | } 11 | 12 | return(new("kernelMatrix",.Data = x)) 13 | }) 14 | -------------------------------------------------------------------------------- /man/spirals.Rd: -------------------------------------------------------------------------------- 1 | \name{spirals} 2 | \alias{spirals} 3 | \title{Spirals Dataset} 4 | \description{A toy data set representing 5 | two spirals with Gaussian noise. The data was created with 6 | the \code{mlbench.spirals} function in \code{mlbench}. 7 | } 8 | \usage{data(spirals)} 9 | \format{ 10 | A matrix with 300 observations and 2 variables. 11 | } 12 | 13 | \examples{ 14 | data(spirals) 15 | plot(spirals) 16 | } 17 | \keyword{datasets} 18 | -------------------------------------------------------------------------------- /inst/COPYRIGHTS: -------------------------------------------------------------------------------- 1 | COPYRIGHT STATUS 2 | ---------------- 3 | 4 | The R code in this package is 5 | 6 | Copyright (C) 2002 Alexandros Karatzoglou 7 | 8 | the C++ code in src/ is 9 | 10 | Copyright (C) 2002 Alexandros Karatzoglou and Chi-Jen Lin 11 | the fast string kernel code is Copyright (C) Choon Hui Theo, SVN Vishwanathan and Alexandros Karatzoglou 12 | 13 | MSufSort Version 2.2 14 | is Copyright (C) 2005 Michael A Maniscalo 15 | 16 | -------------------------------------------------------------------------------- /src/misc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void *xmalloc(size_t size) 5 | { 6 | void *ptr = (void *) malloc(size); 7 | return ptr; 8 | } 9 | double mymax(double a, double b) 10 | { 11 | if (a > b) 12 | return a; 13 | return b; 14 | } 15 | double mymin(double a, double b) 16 | { 17 | if (a < b) 18 | return a; 19 | return b; 20 | } 21 | double sign(double a, double b) 22 | { 23 | if (b >= 0) 24 | return fabs(a); 25 | return -fabs(a); 26 | } 27 | -------------------------------------------------------------------------------- /man/reuters.Rd: -------------------------------------------------------------------------------- 1 | \name{reuters} 2 | \alias{reuters} 3 | \alias{rlabels} 4 | \title{Reuters Text Data} 5 | \description{A small sample from the Reuters news data set.} 6 | \usage{data(reuters)} 7 | 8 | \format{ 9 | A list of 40 text documents along with the labels. \code{reuters} 10 | contains the text documents and \code{rlabels} the labels in a vector. 11 | 12 | } 13 | \details{ 14 | This dataset contains a list of 40 text documents along with the 15 | labels. The data consist out of 20 documents from the \code{acq} 16 | category and 20 documents from the crude category. The labels are 17 | stored in \code{rlabels} 18 | 19 | 20 | } 21 | \source{Reuters} 22 | \keyword{datasets} 23 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | bibentry("Manual", 2 | other = unlist(citation(auto = meta), recursive = FALSE)) 3 | 4 | bibentry("Article", 5 | title = "kernlab -- An {S4} Package for Kernel Methods in {R}", 6 | author = c(person("Alexandros", "Karatzoglou"), 7 | person("Alex", "Smola"), 8 | person("Kurt", "Hornik", 9 | email = "Kurt.Hornik@R-project.org", 10 | comment = c(ORCID = "0000-0003-4198-9911")), 11 | person("Achim", "Zeileis", 12 | email = "Achim.Zeileis@R-project.org", 13 | comment = c(ORCID = "0000-0003-0918-3766"))), 14 | journal = "Journal of Statistical Software", 15 | year = "2004", 16 | volume = "11", 17 | number = "9", 18 | pages = "1--20", 19 | doi = "10.18637/jss.v011.i09" 20 | ) 21 | -------------------------------------------------------------------------------- /src/dprecond.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #ifndef USE_FC_LEN_T 5 | # define USE_FC_LEN_T 6 | #endif 7 | #include 8 | /* LAPACK */ 9 | /* extern int dpotf2_(char *, int *, double *, int *, int *); */ 10 | 11 | double dcholfact(int n, double *A, double *L) 12 | { 13 | /* if A is p.d. , A = L*L' 14 | if A is p.s.d. , A + lambda*I = L*L'; */ 15 | int indef, i; 16 | static double lambda = 1e-3/512/512; 17 | memcpy(L, A, sizeof(double)*n*n); 18 | F77_CALL(dpotf2)("L", &n, L, &n, &indef FCONE); 19 | if (indef != 0) 20 | { 21 | memcpy(L, A, sizeof(double)*n*n); 22 | for (i=0;i 2 | 3 | double dgpnrm(int n, double *x, double *xl, double *xu, double *g) 4 | { 5 | /* 6 | c ********** 7 | c 8 | c Function dgpnrm 9 | c 10 | c This function computes the infinite norm of the 11 | c projected gradient at x. 12 | c 13 | c parameters: 14 | c 15 | c n is an integer variable. 16 | c On entry n is the number of variables. 17 | c On exit n is unchanged. 18 | c 19 | c x is a double precision array of dimension n. 20 | c On entry x specifies the vector x. 21 | c On exit x is unchanged. 22 | c 23 | c xl is a double precision array of dimension n. 24 | c On entry xl is the vector of lower bounds. 25 | c On exit xl is unchanged. 26 | c 27 | c xu is a double precision array of dimension n. 28 | c On entry xu is the vector of upper bounds. 29 | c On exit xu is unchanged. 30 | c 31 | c g is a double precision array of dimension n. 32 | c On entry g specifies the gradient g. 33 | c On exit g is unchanged. 34 | c 35 | c ********** 36 | */ 37 | int i; 38 | double norm = 0; 39 | 40 | for (i=0;i= 0 && x[i] == xl[i]))) 43 | if (fabs(g[i]) > norm) 44 | norm = fabs(g[i]); 45 | return norm; 46 | } 47 | -------------------------------------------------------------------------------- /man/predict.kqr.Rd: -------------------------------------------------------------------------------- 1 | \name{predict.kqr} 2 | \alias{predict.kqr} 3 | \alias{predict,kqr-method} 4 | \title{Predict method for kernel Quantile Regression object} 5 | 6 | 7 | \description{Prediction of test data for kernel quantile regression} 8 | 9 | 10 | \usage{ 11 | \S4method{predict}{kqr}(object, newdata) 12 | } 13 | 14 | \arguments{ 15 | 16 | \item{object}{an S4 object of class \code{kqr} created by the 17 | \code{kqr} function} 18 | \item{newdata}{a data frame, matrix, or kernelMatrix containing new data} 19 | } 20 | 21 | \value{The value of the quantile given by the computed \code{kqr} 22 | model in a vector of length equal to the the rows of \code{newdata}. 23 | } 24 | 25 | \author{Alexandros Karatzoglou\cr 26 | \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 27 | 28 | \keyword{methods} 29 | \keyword{regression} 30 | 31 | 32 | \examples{ 33 | # create data 34 | x <- sort(runif(300)) 35 | y <- sin(pi*x) + rnorm(300,0,sd=exp(sin(2*pi*x))) 36 | 37 | # first calculate the median 38 | qrm <- kqr(x, y, tau = 0.5, C=0.15) 39 | 40 | # predict and plot 41 | plot(x, y) 42 | ytest <- predict(qrm, x) 43 | lines(x, ytest, col="blue") 44 | 45 | # calculate 0.9 quantile 46 | qrm <- kqr(x, y, tau = 0.9, kernel = "rbfdot", 47 | kpar= list(sigma=10), C=0.15) 48 | ytest <- predict(qrm, x) 49 | lines(x, ytest, col="red") 50 | } 51 | 52 | -------------------------------------------------------------------------------- /src/init.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include // for NULL 4 | #include 5 | 6 | /* .Call calls */ 7 | extern SEXP fullsubstringk(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); 8 | extern SEXP smo_optim(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); 9 | extern SEXP stringtv(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); 10 | extern SEXP subsequencek(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); 11 | extern SEXP substringk(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); 12 | extern SEXP tron_optim(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); 13 | 14 | static const R_CallMethodDef CallEntries[] = { 15 | {"fullsubstringk", (DL_FUNC) &fullsubstringk, 6}, 16 | {"smo_optim", (DL_FUNC) &smo_optim, 23}, 17 | {"stringtv", (DL_FUNC) &stringtv, 7}, 18 | {"subsequencek", (DL_FUNC) &subsequencek, 6}, 19 | {"substringk", (DL_FUNC) &substringk, 6}, 20 | {"tron_optim", (DL_FUNC) &tron_optim, 27}, 21 | {NULL, NULL, 0} 22 | }; 23 | 24 | void R_init_kernlab(DllInfo *dll) 25 | { 26 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 27 | R_useDynamicSymbols(dll, FALSE); 28 | } 29 | -------------------------------------------------------------------------------- /man/as.kernelMatrix.Rd: -------------------------------------------------------------------------------- 1 | \name{as.kernelMatrix} 2 | \docType{methods} 3 | \alias{kernelMatrix-class} 4 | \alias{as.kernelMatrix} 5 | \alias{as.kernelMatrix-methods} 6 | \alias{as.kernelMatrix,matrix-method} 7 | \title{Assing kernelMatrix class to matrix objects} 8 | 9 | \description{\code{as.kernelMatrix} in package \pkg{kernlab} can be used 10 | to coerce the kernelMatrix class to matrix objects representing a 11 | kernel matrix. These matrices can then be used with the kernelMatrix 12 | interfaces which most of the functions in \pkg{kernlab} support.} 13 | 14 | \usage{ 15 | \S4method{as.kernelMatrix}{matrix}(x, center = FALSE) 16 | } 17 | \arguments{ 18 | \item{x}{matrix to be assigned the \code{kernelMatrix} class } 19 | \item{center}{center the kernel matrix in feature space (default: FALSE) } 20 | } 21 | 22 | \author{ 23 | Alexandros Karatzoglou \cr 24 | \email{alexandros.karatzoglou@ci.tuwien.ac.at} 25 | } 26 | 27 | \seealso{\code{\link{kernelMatrix}}, \code{\link{dots}}} 28 | 29 | \keyword{methods} 30 | 31 | 32 | \examples{ 33 | ## Create toy data 34 | x <- rbind(matrix(rnorm(10),,2),matrix(rnorm(10,mean=3),,2)) 35 | y <- matrix(c(rep(1,5),rep(-1,5))) 36 | 37 | ### Use as.kernelMatrix to label the cov. matrix as a kernel matrix 38 | ### which is eq. to using a linear kernel 39 | 40 | K <- as.kernelMatrix(crossprod(t(x))) 41 | 42 | K 43 | 44 | svp2 <- ksvm(K, y, type="C-svc") 45 | 46 | svp2 47 | 48 | } 49 | -------------------------------------------------------------------------------- /src/svm.h: -------------------------------------------------------------------------------- 1 | #ifndef _LIBSVM_H 2 | #define _LIBSVM_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | struct svm_node 9 | { 10 | int index; 11 | double value; 12 | }; 13 | 14 | struct svm_problem 15 | { 16 | int l, n; 17 | double *y; 18 | struct svm_node **x; 19 | }; 20 | 21 | enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR, C_BSVC, EPSILON_BSVR, SPOC, KBB }; /* svm_type */ 22 | enum { LINEAR, POLY, RBF, SIGMOID, R, LAPLACE, BESSEL, ANOVA, SPLINE }; /* kernel_type */ 23 | 24 | struct svm_parameter 25 | { 26 | int svm_type; 27 | int kernel_type; 28 | int degree; /* for poly */ 29 | double gamma; /* for poly/rbf/sigmoid */ 30 | double coef0; /* for poly/sigmoid */ 31 | 32 | /* these are for training only */ 33 | double cache_size; /* in MB */ 34 | double eps; /* stopping criteria */ 35 | double C; /* for C_SVC, EPSILON_SVR and NU_SVR */ 36 | int nr_weight; /* for C_SVC */ 37 | int *weight_label; /* for C_SVC */ 38 | double* weight; /* for C_SVC */ 39 | double nu; /* for NU_SVC, ONE_CLASS, and NU_SVR */ 40 | double p; /* for EPSILON_SVR */ 41 | int shrinking; /* use the shrinking heuristics */ 42 | int qpsize; 43 | double Cbegin, Cstep; /* for linear kernel */ 44 | double lim; /* for bessel kernel */ 45 | double *K; /* pointer to kernel matrix */ 46 | int m; 47 | }; 48 | 49 | struct BQP 50 | { 51 | double eps; 52 | int n; 53 | double *x, *C, *Q, *p; 54 | }; 55 | 56 | 57 | #ifdef __cplusplus 58 | } 59 | #endif 60 | 61 | #endif /* _LIBSVM_H */ 62 | -------------------------------------------------------------------------------- /man/musk.Rd: -------------------------------------------------------------------------------- 1 | \name{musk} 2 | \alias{musk} 3 | \docType{data} 4 | \title{Musk data set} 5 | \description{ 6 | This dataset describes a set of 92 molecules of which 47 are judged 7 | by human experts to be musks and the remaining 45 molecules are 8 | judged to be non-musks. 9 | } 10 | \usage{data(musk)} 11 | \format{ 12 | A data frame with 476 observations on the following 167 variables. 13 | 14 | Variables 1-162 are "distance features" along rays. The distances are 15 | measured in hundredths of Angstroms. The distances may be negative or 16 | positive, since they are actually measured relative to an origin placed 17 | along each ray. The origin was defined by a "consensus musk" surface 18 | that is no longer used. Hence, any experiments with the data should 19 | treat these feature values as lying on an arbitrary continuous scale. In 20 | particular, the algorithm should not make any use of the zero point or 21 | the sign of each feature value. 22 | 23 | Variable 163 is the distance of the oxygen atom in the molecule to a 24 | designated point in 3-space. This is also called OXY-DIS. 25 | 26 | Variable 164 is the X-displacement from the designated point. 27 | 28 | Variable 165 is the Y-displacement from the designated point. 29 | 30 | Variable 166 is the Z-displacement from the designated point. 31 | 32 | Class: 0 for non-musk, and 1 for musk 33 | } 34 | 35 | 36 | \source{ 37 | UCI Machine Learning data repository \cr 38 | } 39 | 40 | \examples{ 41 | data(musk) 42 | 43 | muskm <- ksvm(Class~.,data=musk,kernel="rbfdot",C=1000) 44 | 45 | muskm 46 | 47 | } 48 | \keyword{datasets} 49 | -------------------------------------------------------------------------------- /src/inductionsort.cpp: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the MSufSort suffix sorting algorithm (Version 2.2). 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Michael A. Maniscalco 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Michael A. Maniscalco 23 | * 24 | * ***** END LICENSE BLOCK ***** */ 25 | 26 | #include "inductionsort.h" 27 | 28 | InductionSortObject::InductionSortObject(unsigned int inductionPosition, unsigned int inductionValue, 29 | unsigned int suffixIndex) 30 | { 31 | // sort value is 64 bits long. 32 | // bits are ... 33 | // 63 - 60: induction position (0 - 15) 34 | // 59 - 29: induction value at induction position (0 - (2^30 -1)) 35 | // 28 - 0: suffix index for the suffix sorted by induction (0 - (2^30) - 1) 36 | m_sortValue[0] = inductionPosition << 28; 37 | m_sortValue[0] |= ((inductionValue & 0x3fffffff) >> 2); 38 | m_sortValue[1] = (inductionValue << 30); 39 | m_sortValue[1] |= suffixIndex; 40 | } 41 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: kernlab 2 | Version: 0.9-33 3 | Title: Kernel-Based Machine Learning Lab 4 | Authors@R: c(person("Alexandros", "Karatzoglou", role = c("aut", "cre"), 5 | email = "alexandros.karatzoglou@gmail.com"), 6 | person("Alex", "Smola", role = "aut"), 7 | person("Kurt", "Hornik", role = "aut", 8 | email = "Kurt.Hornik@R-project.org", 9 | comment = c(ORCID = "0000-0003-4198-9911")), 10 | person("National ICT Australia (NICTA)", 11 | role = "cph"), 12 | person(c("Michael", "A."), "Maniscalco", 13 | role = c("ctb", "cph")), 14 | person(c("Choon", "Hui"), "Teo", role = "ctb")) 15 | Description: Kernel-based machine learning methods for classification, 16 | regression, clustering, novelty detection, quantile regression 17 | and dimensionality reduction. Among other methods 'kernlab' 18 | includes Support Vector Machines, Spectral Clustering, Kernel 19 | PCA, Gaussian Processes and a QP solver. 20 | Depends: R (>= 2.10) 21 | Imports: methods, stats, grDevices, graphics 22 | LazyLoad: Yes 23 | License: GPL-2 24 | NeedsCompilation: yes 25 | Packaged: 2024-08-13 14:40:27 UTC; hornik 26 | Author: Alexandros Karatzoglou [aut, cre], 27 | Alex Smola [aut], 28 | Kurt Hornik [aut] (), 29 | National ICT Australia (NICTA) [cph], 30 | Michael A. Maniscalco [ctb, cph], 31 | Choon Hui Teo [ctb] 32 | Maintainer: Alexandros Karatzoglou 33 | Repository: CRAN 34 | Date/Publication: 2024-08-13 15:25:01 UTC 35 | -------------------------------------------------------------------------------- /man/ranking-class.Rd: -------------------------------------------------------------------------------- 1 | \name{ranking-class} 2 | \docType{class} 3 | \alias{ranking-class} 4 | \alias{edgegraph} 5 | \alias{convergence} 6 | \alias{convergence,ranking-method} 7 | \alias{edgegraph,ranking-method} 8 | \alias{show,ranking-method} 9 | 10 | \title{Class "ranking"} 11 | \description{Object of the class \code{"ranking"} are created from the 12 | \code{ranking} function and extend the class \code{matrix}} 13 | \section{Objects from the Class}{ 14 | 15 | Objects can be created by calls of the form \code{new("ranking", ...)}. 16 | 17 | } 18 | \section{Slots}{ 19 | \describe{ 20 | \item{\code{.Data}:}{Object of class \code{"matrix"} containing the 21 | data ranking and scores} 22 | \item{\code{convergence}:}{Object of class \code{"matrix"} 23 | containing the convergence matrix} 24 | \item{\code{edgegraph}:}{Object of class \code{"matrix"} containing 25 | the edgegraph} 26 | } 27 | } 28 | \section{Extends}{ 29 | Class \code{"matrix"}, directly. 30 | } 31 | \section{Methods}{ 32 | \describe{ 33 | \item{show}{\code{signature(object = "ranking")}: displays the 34 | ranking score matrix} 35 | } 36 | } 37 | 38 | \author{Alexandros Karatzoglou \cr 39 | \email{alexandros.karatzoglou@ci.tuwien.ac.at} 40 | } 41 | 42 | \seealso{ 43 | \code{\link{ranking}} 44 | } 45 | \examples{ 46 | data(spirals) 47 | 48 | ## create data set to be ranked 49 | ran<-spirals[rowSums(abs(spirals)<0.55)==2,] 50 | 51 | ## rank points according to "relevance" to point 54 (up left) 52 | ranked<-ranking(ran,54,kernel="rbfdot", 53 | kpar=list(sigma=100),edgegraph=TRUE) 54 | 55 | ranked 56 | edgegraph(ranked)[1:10,1:10] 57 | } 58 | \keyword{classes} 59 | -------------------------------------------------------------------------------- /man/promotergene.Rd: -------------------------------------------------------------------------------- 1 | \name{promotergene} 2 | \alias{promotergene} 3 | \docType{data} 4 | \title{E. coli promoter gene sequences (DNA)} 5 | \description{ 6 | Promoters have a region where a protein (RNA polymerase) must make contact 7 | and the helical DNA sequence must have a valid conformation so that 8 | the two pieces of the contact region spatially align. 9 | The data contains DNA sequences of promoters and non-promoters. 10 | } 11 | \usage{data(promotergene)} 12 | \format{ 13 | A data frame with 106 observations and 58 variables. 14 | The first variable \code{Class} is a factor with levels \code{+} for a promoter gene 15 | and \code{-} for a non-promoter gene. 16 | The remaining 57 variables \code{V2 to V58} are factors describing the sequence. 17 | The DNA bases are coded as follows: \code{a} adenine \code{c} cytosine \code{g} 18 | guanine \code{t} thymine 19 | } 20 | 21 | \source{ 22 | \doi{10.24432/C5S01D} 23 | } 24 | \references{ 25 | Towell, G., Shavlik, J. and Noordewier, M. \cr 26 | \emph{Refinement of Approximate Domain Theories by Knowledge-Based 27 | Artificial Neural Networks.} \cr 28 | In Proceedings of the Eighth National Conference on Artificial Intelligence (AAAI-90) 29 | } 30 | 31 | 32 | \examples{ 33 | data(promotergene) 34 | 35 | ## Create classification model using Gaussian Processes 36 | 37 | prom <- gausspr(Class~.,data=promotergene,kernel="rbfdot", 38 | kpar=list(sigma=0.02),cross=4) 39 | prom 40 | 41 | ## Create model using Support Vector Machines 42 | 43 | promsv <- ksvm(Class~.,data=promotergene,kernel="laplacedot", 44 | kpar="automatic",C=60,cross=4) 45 | promsv 46 | } 47 | \keyword{datasets} 48 | -------------------------------------------------------------------------------- /src/dgpstep.c: -------------------------------------------------------------------------------- 1 | void dgpstep(int n, double *x, double *xl, double *xu, double alpha, double *w, double *s) 2 | { 3 | /* 4 | c ********** 5 | c 6 | c Subroutine dgpstep 7 | c 8 | c This subroutine computes the gradient projection step 9 | c 10 | c s = P[x + alpha*w] - x, 11 | c 12 | c where P is the projection on the n-dimensional interval [xl,xu]. 13 | c 14 | c parameters: 15 | c 16 | c n is an integer variable. 17 | c On entry n is the number of variables. 18 | c On exit n is unchanged. 19 | c 20 | c x is a double precision array of dimension n. 21 | c On entry x specifies the vector x. 22 | c On exit x is unchanged. 23 | c 24 | c xl is a double precision array of dimension n. 25 | c On entry xl is the vector of lower bounds. 26 | c On exit xl is unchanged. 27 | c 28 | c xu is a double precision array of dimension n. 29 | c On entry xu is the vector of upper bounds. 30 | c On exit xu is unchanged. 31 | c 32 | c alpha is a double precision variable. 33 | c On entry alpha specifies the scalar alpha. 34 | c On exit alpha is unchanged. 35 | c 36 | c w is a double precision array of dimension n. 37 | c On entry w specifies the vector w. 38 | c On exit w is unchanged. 39 | c 40 | c s is a double precision array of dimension n. 41 | c On entry s need not be specified. 42 | c On exit s contains the gradient projection step. 43 | c 44 | c ********** 45 | */ 46 | int i; 47 | 48 | for (i=0;i xu[i]) 53 | s[i] = xu[i] - x[i]; 54 | else 55 | s[i] = alpha*w[i]; 56 | } 57 | -------------------------------------------------------------------------------- /src/ilcpfactory.h: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the Suffix Array based String Kernel. 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Choon Hui Teo 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/I_LCPFactory.h 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | 37 | 38 | #ifndef ILCPFACTORY_H 39 | #define ILCPFACTORY_H 40 | 41 | #include "datatype.h" 42 | #include "errorcode.h" 43 | #include "lcp.h" 44 | 45 | class I_LCPFactory 46 | { 47 | 48 | public: 49 | 50 | /// Constructor 51 | I_LCPFactory(){} 52 | 53 | /// Destructor 54 | virtual ~I_LCPFactory(){} 55 | 56 | /// Methods 57 | virtual ErrorCode ComputeLCP(const SYMBOL *text, const UInt32 &length, 58 | const UInt32 *sa, LCP& lcp) = 0; 59 | 60 | }; 61 | #endif 62 | -------------------------------------------------------------------------------- /src/isafactory.h: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the Suffix Array based String Kernel. 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Choon Hui Teo 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/I_SAFactory.h 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | 37 | 38 | //' Interface for Enhanced Suffix Array construction algorithms 39 | #ifndef I_SAFACTORY_H 40 | #define I_SAFACTORY_H 41 | 42 | #include "datatype.h" 43 | #include "errorcode.h" 44 | 45 | class I_SAFactory 46 | { 47 | 48 | public: 49 | 50 | ///Constructor 51 | I_SAFactory(){} 52 | 53 | ///Destructor 54 | virtual ~I_SAFactory(){} 55 | 56 | ///Methods 57 | virtual ErrorCode ConstructSA(SYMBOL *text, const UInt32 &len, UInt32 *&array) = 0; 58 | 59 | }; 60 | #endif 61 | -------------------------------------------------------------------------------- /man/specc-class.Rd: -------------------------------------------------------------------------------- 1 | \name{specc-class} 2 | \docType{class} 3 | \alias{specc-class} 4 | \alias{centers} 5 | \alias{size} 6 | \alias{withinss} 7 | \alias{centers,specc-method} 8 | \alias{withinss,specc-method} 9 | \alias{size,specc-method} 10 | \alias{kernelf,specc-method} 11 | 12 | 13 | \title{Class "specc"} 14 | \description{ The Spectral Clustering Class} 15 | \section{Objects from the Class}{ 16 | Objects can be created by calls of the form \code{new("specc", ...)}. 17 | or by calling the function \code{specc}. 18 | } 19 | \section{Slots}{ 20 | \describe{ 21 | \item{\code{.Data}:}{Object of class \code{"vector"} containing the cluster assignments} 22 | \item{\code{centers}:}{Object of class \code{"matrix"} containing 23 | the cluster centers} 24 | \item{\code{size}:}{Object of class \code{"vector"} containing the 25 | number of points in each cluster} 26 | \item{\code{withinss}:}{Object of class \code{"vector"} containing 27 | the within-cluster sum of squares for each cluster} 28 | \item{\code{kernelf}}{Object of class \code{kernel} containing the 29 | used kernel function.} 30 | } 31 | } 32 | \section{Methods}{ 33 | \describe{ 34 | \item{centers}{\code{signature(object = "specc")}: returns the 35 | cluster centers} 36 | \item{withinss}{\code{signature(object = "specc")}: returns the 37 | within-cluster sum of squares for each cluster} 38 | \item{size}{\code{signature(object = "specc")}: returns the number 39 | of points in each cluster } 40 | 41 | } 42 | } 43 | 44 | \author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 45 | 46 | 47 | 48 | \seealso{ 49 | \code{\link{specc}}, 50 | \code{\link{kpca-class}} 51 | } 52 | \examples{ 53 | ## Cluster the spirals data set. 54 | data(spirals) 55 | 56 | sc <- specc(spirals, centers=2) 57 | 58 | centers(sc) 59 | size(sc) 60 | } 61 | \keyword{classes} 62 | -------------------------------------------------------------------------------- /man/inchol-class.Rd: -------------------------------------------------------------------------------- 1 | \name{inchol-class} 2 | \docType{class} 3 | \alias{inchol-class} 4 | \alias{diagresidues} 5 | \alias{maxresiduals} 6 | \alias{pivots} 7 | \alias{diagresidues,inchol-method} 8 | \alias{maxresiduals,inchol-method} 9 | \alias{pivots,inchol-method} 10 | 11 | \title{Class "inchol" } 12 | \description{The reduced Cholesky decomposition object} 13 | 14 | \section{Objects from the Class}{Objects can be created by calls of the form \code{new("inchol", ...)}. 15 | or by calling the \code{inchol} function.} 16 | 17 | \section{Slots}{ 18 | \describe{ 19 | \item{\code{.Data}:}{Object of class \code{"matrix"} contains 20 | the decomposed matrix} 21 | 22 | \item{\code{pivots}:}{Object of class \code{"vector"} contains 23 | the pivots performed} 24 | 25 | \item{\code{diagresidues}:}{Object of class \code{"vector"} contains 26 | the diagonial residues} 27 | 28 | \item{\code{maxresiduals}:}{Object of class \code{"vector"} contains 29 | the maximum residues} 30 | } 31 | } 32 | \section{Extends}{ 33 | Class \code{"matrix"}, directly. 34 | } 35 | \section{Methods}{ 36 | \describe{ 37 | 38 | \item{diagresidues}{\code{signature(object = "inchol")}: returns 39 | the diagonial residues} 40 | 41 | \item{maxresiduals}{\code{signature(object = "inchol")}: returns 42 | the maximum residues} 43 | 44 | \item{pivots}{\code{signature(object = "inchol")}: returns 45 | the pivots performed} 46 | } 47 | } 48 | 49 | \author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 50 | 51 | 52 | \seealso{ \code{\link{inchol}}, \code{\link{csi-class}}, \code{\link{csi}}} 53 | 54 | \examples{ 55 | data(iris) 56 | datamatrix <- as.matrix(iris[,-5]) 57 | # initialize kernel function 58 | rbf <- rbfdot(sigma=0.1) 59 | rbf 60 | Z <- inchol(datamatrix,kernel=rbf) 61 | dim(Z) 62 | pivots(Z) 63 | diagresidues(Z) 64 | maxresiduals(Z) 65 | } 66 | \keyword{classes} 67 | -------------------------------------------------------------------------------- /src/cweight.h: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the Suffix Array based String Kernel. 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Choon Hui Teo 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/ConstantWeight.h 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | // 12 Jul 2006 37 | // 12 Oct 2006 38 | 39 | #ifndef CWEIGHT_H 40 | #define CWEIGHT_H 41 | 42 | #include "datatype.h" 43 | #include "errorcode.h" 44 | #include "iweightfactory.h" 45 | #include 46 | 47 | 48 | //' Constant weight class 49 | class ConstantWeight : public I_WeightFactory 50 | { 51 | public: 52 | 53 | /// Constructor 54 | ConstantWeight(){} 55 | 56 | /// Destructor 57 | virtual ~ConstantWeight(){} 58 | 59 | /// Compute weight 60 | ErrorCode ComputeWeight(const UInt32 &floor_len, const UInt32 &x_len, Real &weight); 61 | }; 62 | #endif 63 | -------------------------------------------------------------------------------- /src/iweightfactory.h: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the Suffix Array based String Kernel. 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Choon Hui Teo 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/I_WeightFactory.h 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | 37 | #ifndef I_WEIGHTFACTORY_H 38 | #define I_WEIGHTFACTORY_H 39 | 40 | #include "datatype.h" 41 | #include "errorcode.h" 42 | 43 | 44 | /// Weight Factory interface for string kernel 45 | class I_WeightFactory 46 | { 47 | 48 | public: 49 | /// Constructor 50 | I_WeightFactory(){} 51 | 52 | /// Destructor 53 | virtual ~I_WeightFactory(){} 54 | 55 | /// Compute edge weight between floor interval and the end of matched substring. 56 | virtual ErrorCode ComputeWeight(const UInt32 &floor_len, 57 | const UInt32 &x_len, 58 | Real &weight) = 0; 59 | }; 60 | #endif 61 | -------------------------------------------------------------------------------- /src/brweight.h: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the Suffix Array based String Kernel. 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Choon Hui Teo 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/BoundedRangeWeight.h 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | // 12 Jul 2006 37 | 38 | #ifndef BRWEIGHT_H 39 | #define BRWEIGHT_H 40 | 41 | #include "datatype.h" 42 | #include "errorcode.h" 43 | #include "iweightfactory.h" 44 | #include 45 | 46 | //' Bounded Range weight class 47 | class BoundedRangeWeight : public I_WeightFactory 48 | { 49 | 50 | Real n; 51 | public: 52 | 53 | /// Constructor 54 | BoundedRangeWeight(const Real &n_=1): n(n_){} 55 | 56 | /// Destructor 57 | virtual ~BoundedRangeWeight(){} 58 | 59 | /// Compute weight 60 | ErrorCode ComputeWeight(const UInt32 &floor_len, const UInt32 &x_len, Real &weight); 61 | }; 62 | #endif 63 | -------------------------------------------------------------------------------- /man/spam.Rd: -------------------------------------------------------------------------------- 1 | \name{spam} 2 | \alias{spam} 3 | \title{Spam E-mail Database} 4 | \description{A data set collected at Hewlett-Packard Labs, that classifies 4601 5 | e-mails as spam or non-spam. In addition to this class label there are 57 6 | variables indicating the frequency of certain words and characters in the 7 | e-mail.} 8 | \usage{data(spam)} 9 | \format{A data frame with 4601 observations and 58 variables. 10 | 11 | The first 48 variables contain the frequency of the variable name 12 | (e.g., business) in the e-mail. If the variable name starts with num (e.g., 13 | num650) the it indicates the frequency of the corresponding number (e.g., 650). 14 | The variables 49-54 indicate the frequency of the characters `;', `(', `[', `!', 15 | `$', and `#'. The variables 55-57 contain the average, longest 16 | and total run-length of capital letters. Variable 58 indicates the type of the 17 | mail and is either \code{"nonspam"} or \code{"spam"}, i.e. unsolicited 18 | commercial e-mail.} 19 | 20 | \details{ 21 | The data set contains 2788 e-mails classified as \code{"nonspam"} and 1813 22 | classified as \code{"spam"}. 23 | 24 | The ``spam'' concept is diverse: advertisements for products/web 25 | sites, make money fast schemes, chain letters, pornography... 26 | This collection of spam e-mails came from the collectors' postmaster and 27 | individuals who had filed spam. The collection of non-spam 28 | e-mails came from filed work and personal e-mails, and hence 29 | the word 'george' and the area code '650' are indicators of 30 | non-spam. These are useful when constructing a personalized 31 | spam filter. One would either have to blind such non-spam 32 | indicators or get a very wide collection of non-spam to 33 | generate a general purpose spam filter. 34 | } 35 | \source{ 36 | \doi{10.24432/C53G6X} 37 | } 38 | 39 | \references{ 40 | T. Hastie, R. Tibshirani, J.H. Friedman. 41 | \emph{The Elements of Statistical Learning}. 42 | Springer, 2001. 43 | } 44 | 45 | \keyword{datasets} 46 | -------------------------------------------------------------------------------- /src/kspectrumweight.h: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the Suffix Array based String Kernel. 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Choon Hui Teo 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/KSpectrumWeight.h 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | // 12 Jul 2006 37 | 38 | #ifndef KSPECTRUMWEIGHT_H 39 | #define KSPECTRUMWEIGHT_H 40 | 41 | #include "datatype.h" 42 | #include "errorcode.h" 43 | #include "iweightfactory.h" 44 | #include 45 | 46 | //' K-spectrum weight class 47 | class KSpectrumWeight : public I_WeightFactory 48 | { 49 | 50 | Real k; 51 | 52 | public: 53 | 54 | /// Constructor 55 | KSpectrumWeight(const Real & k_=5.0):k(k_) {} 56 | 57 | /// Destructor 58 | virtual ~KSpectrumWeight(){} 59 | 60 | /// Compute weight 61 | ErrorCode ComputeWeight(const UInt32 &floor_len, const UInt32 &x_len, Real &weight); 62 | }; 63 | #endif 64 | -------------------------------------------------------------------------------- /man/ipop-class.Rd: -------------------------------------------------------------------------------- 1 | \name{ipop-class} 2 | \docType{class} 3 | \alias{ipop-class} 4 | \alias{primal,ipop-method} 5 | \alias{dual,ipop-method} 6 | \alias{how,ipop-method} 7 | \alias{primal} 8 | \alias{dual} 9 | \alias{how} 10 | 11 | \title{Class "ipop"} 12 | \description{The quadratic problem solver class} 13 | \section{Objects from the Class}{ 14 | Objects can be created by calls of the form \code{new("ipop", ...)}. 15 | or by calling the \code{ipop} function. 16 | } 17 | \section{Slots}{ 18 | \describe{ 19 | \item{\code{primal}:}{Object of class \code{"vector"} the primal 20 | solution of the problem} 21 | \item{\code{dual}:}{Object of class \code{"numeric"} the dual of the 22 | problem} 23 | \item{\code{how}:}{Object of class \code{"character"} convergence information} 24 | } 25 | } 26 | \section{Methods}{ 27 | \describe{ 28 | \item{primal}{\code{signature(object = "ipop")}: Return the primal of 29 | the problem} 30 | \item{dual}{\code{signature(object = "ipop")}: Return the dual of 31 | the problem} 32 | \item{how}{\code{signature(object = "ipop")}: Return information on 33 | convergence} 34 | } 35 | } 36 | \author{Alexandros Karatzoglou\cr 37 | \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 38 | 39 | \seealso{ 40 | \code{\link{ipop}} 41 | 42 | } 43 | \examples{ 44 | ## solve the Support Vector Machine optimization problem 45 | data(spam) 46 | 47 | ## sample a scaled part (300 points) of the spam data set 48 | m <- 300 49 | set <- sample(1:dim(spam)[1],m) 50 | x <- scale(as.matrix(spam[,-58]))[set,] 51 | y <- as.integer(spam[set,58]) 52 | y[y==2] <- -1 53 | 54 | ##set C parameter and kernel 55 | C <- 5 56 | rbf <- rbfdot(sigma = 0.1) 57 | 58 | ## create H matrix etc. 59 | H <- kernelPol(rbf,x,,y) 60 | c <- matrix(rep(-1,m)) 61 | A <- t(y) 62 | b <- 0 63 | l <- matrix(rep(0,m)) 64 | u <- matrix(rep(C,m)) 65 | r <- 0 66 | 67 | sv <- ipop(c,H,A,b,l,u,r) 68 | primal(sv) 69 | dual(sv) 70 | how(sv) 71 | 72 | } 73 | \keyword{classes} 74 | -------------------------------------------------------------------------------- /src/dtrqsol.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | extern double mymax(double, double); 5 | /* LEVEL 1 BLAS */ 6 | /*extern double ddot_(int *, double *, int *, double *, int *);*/ 7 | 8 | void dtrqsol(int n, double *x, double *p, double delta, double *sigma) 9 | { 10 | /* 11 | c ********** 12 | c 13 | c Subroutine dtrqsol 14 | c 15 | c This subroutine computes the largest (non-negative) solution 16 | c of the quadratic trust region equation 17 | c 18 | c ||x + sigma*p|| = delta. 19 | c 20 | c The code is only guaranteed to produce a non-negative solution 21 | c if ||x|| <= delta, and p != 0. If the trust region equation has 22 | c no solution, sigma = 0. 23 | c 24 | c parameters: 25 | c 26 | c n is an integer variable. 27 | c On entry n is the number of variables. 28 | c On exit n is unchanged. 29 | c 30 | c x is a double precision array of dimension n. 31 | c On entry x must contain the vector x. 32 | c On exit x is unchanged. 33 | c 34 | c p is a double precision array of dimension n. 35 | c On entry p must contain the vector p. 36 | c On exit p is unchanged. 37 | c 38 | c delta is a double precision variable. 39 | c On entry delta specifies the scalar delta. 40 | c On exit delta is unchanged. 41 | c 42 | c sigma is a double precision variable. 43 | c On entry sigma need not be specified. 44 | c On exit sigma contains the non-negative solution. 45 | c 46 | c ********** 47 | */ 48 | int inc = 1; 49 | double dsq = delta*delta, ptp, ptx, rad, xtx; 50 | ptx = F77_CALL(ddot)(&n, p, &inc, x, &inc); 51 | ptp = F77_CALL(ddot)(&n, p, &inc, p, &inc); 52 | xtx = F77_CALL(ddot)(&n, x, &inc, x, &inc); 53 | 54 | /* Guard against abnormal cases. */ 55 | rad = ptx*ptx + ptp*(dsq - xtx); 56 | rad = sqrt(mymax(rad, 0)); 57 | if (ptx > 0) 58 | *sigma = (dsq - xtx)/(ptx + rad); 59 | else 60 | if (rad > 0) 61 | *sigma = (rad - ptx)/ptp; 62 | else 63 | *sigma = 0; 64 | } 65 | -------------------------------------------------------------------------------- /src/solvebqp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #ifndef USE_FC_LEN_T 4 | # define USE_FC_LEN_T 5 | #endif 6 | #include 7 | /* LEVEL 1 BLAS */ 8 | /*extern double ddot_(int *, double *, int *, double *, int *); */ 9 | /* LEVEL 2 BLAS */ 10 | /*extern int dsymv_(char *, int *, double *, double *, int *, double *, int *, double *, double *, int *);*/ 11 | /* MINPACK 2 */ 12 | extern void dtron(int, double *, double *, double *, double, double, double, double, int, double); 13 | 14 | struct BQP 15 | { 16 | double eps; 17 | int n; 18 | double *x, *C, *Q, *p; 19 | }; 20 | 21 | int nfev, inc = 1; 22 | double one = 1, zero = 0, *A, *g0; 23 | 24 | int uhes(int n, double *x, double **H) 25 | { 26 | *H = A; 27 | return 0; 28 | } 29 | int ugrad(int n, double *x, double *g) 30 | { 31 | /* evaluate the gradient g = A*x + g0 */ 32 | memcpy(g, g0, sizeof(double)*n); 33 | F77_CALL(dsymv)("U", &n, &one, A, &n, x, &inc, &one, g, &inc FCONE); 34 | return 0; 35 | } 36 | int ufv(int n, double *x, double *f) 37 | { 38 | /* evaluate the function value f(x) = 0.5*x'*A*x + g0'*x */ 39 | double *t = (double *) malloc(sizeof(double)*n); 40 | F77_CALL(dsymv)("U", &n, &one, A, &n, x, &inc, &zero, t, &inc FCONE); 41 | *f = F77_CALL(ddot)(&n, x, &inc, g0, &inc) + 0.5 * F77_CALL(ddot)(&n, x, &inc, t, &inc); 42 | free(t); 43 | return ++nfev; 44 | } 45 | 46 | void solvebqp(struct BQP *qp) 47 | { 48 | /* driver for positive semidefinite quadratic programing version 49 | of tron */ 50 | int i, n, maxfev; 51 | double *x, *xl, *xu; 52 | double frtol, fatol, fmin, gtol, cgtol; 53 | 54 | n = qp->n; 55 | maxfev = 1000; /* ? */ 56 | nfev = 0; 57 | 58 | x = qp->x; 59 | xu = qp->C; 60 | A = qp->Q; 61 | g0 = qp->p; 62 | xl = (double *) malloc(sizeof(double)*n); 63 | for (i=0;ieps; 71 | 72 | dtron(n, x, xl, xu, gtol, frtol, fatol, fmin, maxfev, cgtol); 73 | 74 | free(xl); 75 | } 76 | -------------------------------------------------------------------------------- /src/wkasailcp.h: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the Suffix Array based String Kernel. 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Choon Hui Teo 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/W_kasai_lcp.h 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | 37 | 38 | #ifndef W_KASAI_LCP_H 39 | #define W_KASAI_LCP_H 40 | 41 | #include "datatype.h" 42 | #include "errorcode.h" 43 | #include "ilcpfactory.h" 44 | #include "lcp.h" 45 | 46 | 47 | /** 48 | * Kasai et al's LCP array computation algorithm is 49 | * is slightly faster than Manzini's algorithm. However, 50 | * it needs inverse suffix array which costs extra memory. 51 | */ 52 | class W_kasai_lcp : public I_LCPFactory 53 | { 54 | 55 | public: 56 | 57 | /// Constructor 58 | W_kasai_lcp(){} 59 | 60 | /// Desctructor 61 | virtual ~W_kasai_lcp(){} 62 | 63 | /// Compute LCP array. 64 | ErrorCode ComputeLCP(const SYMBOL *text, const UInt32 &len, 65 | const UInt32 *sa, LCP& lcp); 66 | 67 | }; 68 | #endif 69 | -------------------------------------------------------------------------------- /man/kcca-class.Rd: -------------------------------------------------------------------------------- 1 | \name{kcca-class} 2 | \docType{class} 3 | \alias{kcca-class} 4 | \alias{kcor} 5 | \alias{xcoef} 6 | \alias{ycoef} 7 | %%\alias{yvar} 8 | %%\alias{xvar} 9 | \alias{kcor,kcca-method} 10 | \alias{xcoef,kcca-method} 11 | \alias{xvar,kcca-method} 12 | \alias{ycoef,kcca-method} 13 | \alias{yvar,kcca-method} 14 | 15 | \title{Class "kcca"} 16 | \description{The "kcca" class } 17 | \section{Objects from the Class}{ 18 | Objects can be created by calls of the form \code{new("kcca", ...)}. 19 | or by the calling the \code{kcca} function. 20 | } 21 | \section{Slots}{ 22 | \describe{ 23 | \item{\code{kcor}:}{Object of class \code{"vector"} describing the correlations} 24 | \item{\code{xcoef}:}{Object of class \code{"matrix"} estimated coefficients for the \code{x} variables} 25 | \item{\code{ycoef}:}{Object of class \code{"matrix"} estimated coefficients for the \code{y} variables } 26 | %% \item{\code{xvar}:}{Object of class \code{"matrix"} holds the 27 | %% canonical variates for \code{x}} 28 | %% \item{\code{yvar}:}{Object of class \code{"matrix"} holds the 29 | %% canonical variates for \code{y}} 30 | } 31 | } 32 | \section{Methods}{ 33 | \describe{ 34 | \item{kcor}{\code{signature(object = "kcca")}: returns the correlations} 35 | \item{xcoef}{\code{signature(object = "kcca")}: returns the estimated coefficients for the \code{x} variables} 36 | \item{ycoef}{\code{signature(object = "kcca")}: returns the estimated coefficients for the \code{y} variables } 37 | %% \item{xvar}{\code{signature(object = "kcca")}: returns the canonical 38 | %% variates for \code{x}} 39 | %% \item{yvar}{\code{signature(object = "kcca")}: returns the canonical 40 | %% variates for \code{y}} 41 | } 42 | } 43 | 44 | \author{Alexandros Karatzoglou \cr 45 | \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 46 | 47 | 48 | \seealso{ 49 | \code{\link{kcca}}, 50 | \code{\link{kpca-class}} 51 | } 52 | \examples{ 53 | 54 | ## dummy data 55 | x <- matrix(rnorm(30),15) 56 | y <- matrix(rnorm(30),15) 57 | 58 | kcca(x,y,ncomps=2) 59 | 60 | } 61 | \keyword{classes} 62 | -------------------------------------------------------------------------------- /src/expdecayweight.h: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the Suffix Array based String Kernel. 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Choon Hui Teo 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/ExpDecayWeight.h 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | // 12 Jul 2006 37 | 38 | #ifndef EXPDECAYWEIGHT_H 39 | #define EXPDECAYWEIGHT_H 40 | 41 | #include "datatype.h" 42 | #include "errorcode.h" 43 | #include "iweightfactory.h" 44 | #include 45 | 46 | 47 | class ExpDecayWeight : public I_WeightFactory 48 | { 49 | 50 | public: 51 | 52 | Real lambda; 53 | 54 | /// Constructors 55 | 56 | //' NOTE: lambda shouldn't be equal to 1, othexrwise there will be 57 | //' divide-by-zero error. 58 | ExpDecayWeight(const Real &lambda_=2.0):lambda(lambda_) {} 59 | 60 | 61 | /// Destructor 62 | virtual ~ExpDecayWeight(){} 63 | 64 | 65 | /// Compute weight 66 | ErrorCode ComputeWeight(const UInt32 &floor_len, const UInt32 &x_len, Real &weight); 67 | 68 | }; 69 | #endif 70 | -------------------------------------------------------------------------------- /src/wmsufsort.h: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the Suffix Array based String Kernel. 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Choon Hui Teo 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/W_msufsort.h 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | // 13 Jul 2007 : use MSufSort v3.1 instead of v2.2 37 | 38 | // Wrapper for Michael Maniscalco's MSufSort version 3.1 algorithm 39 | #ifndef W_MSUFSORT_H 40 | #define W_MSUFSORT_H 41 | 42 | #include "datatype.h" 43 | #include "isafactory.h" 44 | #include "msufsort.h" 45 | 46 | 47 | class W_msufsort : public I_SAFactory 48 | { 49 | 50 | public: 51 | 52 | ///Variables 53 | 54 | //'Declaration of object POINTERS, no initialization needed. 55 | //'If Declaration of objects, initialize them in member initialization list. 56 | MSufSort *msuffixsorter; 57 | 58 | ///Constructor 59 | W_msufsort(); 60 | 61 | ///Destructor 62 | virtual ~W_msufsort(); 63 | 64 | ///Methods 65 | ErrorCode ConstructSA(SYMBOL *text, const UInt32 &len, UInt32 *&array); 66 | 67 | }; 68 | #endif 69 | -------------------------------------------------------------------------------- /man/income.Rd: -------------------------------------------------------------------------------- 1 | \name{income} 2 | \alias{income} 3 | \title{Income Data} 4 | \description{ 5 | Customer Income Data from a marketing survey. 6 | } 7 | \usage{data(income)} 8 | 9 | \format{ 10 | A data frame with 14 categorical variables (8993 observations). 11 | 12 | Explanation of the variable names: 13 | 14 | \tabular{rllll}{ 15 | \tab 1 \tab \code{INCOME} \tab annual income of household \tab \cr 16 | \tab \tab \tab (Personal income if single) \tab ordinal\cr 17 | \tab 2 \tab \code{SEX} \tab sex \tab nominal\cr 18 | \tab 3 \tab \code{MARITAL.STATUS} \tab marital status \tab nominal\cr 19 | \tab 4 \tab \code{AGE} \tab age \tab ordinal\cr 20 | \tab 5 \tab \code{EDUCATION} \tab educational grade \tab ordinal\cr 21 | \tab 6 \tab \code{OCCUPATION} \tab type of work \tab nominal \cr 22 | \tab 7 \tab \code{AREA} \tab how long the interviewed person has lived\tab 23 | \cr 24 | \tab \tab \tab in the San Francisco/Oakland/San Jose area \tab ordinal\cr 25 | \tab 8 \tab \code{DUAL.INCOMES} \tab dual incomes (if married) \tab nominal\cr 26 | \tab 9 \tab \code{HOUSEHOLD.SIZE} \tab persons living in the 27 | household \tab ordinal\cr 28 | \tab 10 \tab \code{UNDER18} \tab persons in household under 18 \tab ordinal\cr 29 | \tab 11 \tab \code{HOUSEHOLDER} \tab householder status \tab nominal\cr 30 | \tab 12 \tab \code{HOME.TYPE} \tab type of home \tab nominal\cr 31 | \tab 13 \tab \code{ETHNIC.CLASS} \tab ethnic classification \tab nominal\cr 32 | \tab 14 \tab \code{LANGUAGE} \tab language most often spoken at 33 | home \tab nominal\cr 34 | } 35 | } 36 | \details{ 37 | A total of N=9409 questionnaires containing 502 questions were 38 | filled out by shopping mall customers in the San Francisco Bay area. 39 | The dataset is an extract from this survey. It consists of 40 | 14 demographic attributes. The dataset is a mixture of nominal and 41 | ordinal variables with a lot of missing data. 42 | The goal is to predict the Anual Income of Household from the other 13 43 | demographics attributes. 44 | } 45 | \source{ 46 | Impact Resources, Inc., Columbus, OH (1987). 47 | } 48 | \keyword{datasets} 49 | -------------------------------------------------------------------------------- /man/prc-class.Rd: -------------------------------------------------------------------------------- 1 | \name{prc-class} 2 | \docType{class} 3 | \alias{prc-class} 4 | 5 | \alias{eig} 6 | \alias{pcv} 7 | 8 | \alias{eig,prc-method} 9 | \alias{kcall,prc-method} 10 | \alias{kernelf,prc-method} 11 | \alias{pcv,prc-method} 12 | \alias{xmatrix,prc-method} 13 | 14 | \title{Class "prc"} 15 | \description{Principal Components Class} 16 | \section{Objects of class "prc"}{Objects from the class cannot be created directly but only contained 17 | in other classes.} 18 | 19 | \section{Slots}{ 20 | \describe{ 21 | \item{\code{pcv}:}{Object of class \code{"matrix"} containing the 22 | principal component vectors } 23 | 24 | \item{\code{eig}:}{Object of class \code{"vector"} containing the 25 | corresponding eigenvalues} 26 | 27 | \item{\code{kernelf}:}{Object of class \code{"kfunction"} containing 28 | the kernel function used} 29 | 30 | \item{\code{kpar}:}{Object of class \code{"list"} containing the 31 | kernel parameters used } 32 | 33 | \item{\code{xmatrix}:}{Object of class \code{"input"} containing 34 | the data matrix used } 35 | 36 | \item{\code{kcall}:}{Object of class \code{"ANY"} containing the 37 | function call } 38 | 39 | \item{\code{n.action}:}{Object of class \code{"ANY"} containing the 40 | action performed on NA } 41 | } 42 | } 43 | \section{Methods}{ 44 | \describe{ 45 | 46 | \item{eig}{\code{signature(object = "prc")}: returns the eigenvalues } 47 | 48 | \item{kcall}{\code{signature(object = "prc")}: returns the 49 | performed call} 50 | 51 | \item{kernelf}{\code{signature(object = "prc")}: returns the used 52 | kernel function} 53 | 54 | \item{pcv}{\code{signature(object = "prc")}: returns the principal 55 | component vectors } 56 | 57 | \item{predict}{\code{signature(object = "prc")}: embeds new data } 58 | 59 | \item{xmatrix}{\code{signature(object = "prc")}: returns the used 60 | data matrix } 61 | } 62 | } 63 | 64 | \author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 65 | 66 | \seealso{ 67 | \code{\link{kpca-class}},\code{\link{kha-class}}, \code{\link{kfa-class}} 68 | } 69 | 70 | \keyword{classes} 71 | -------------------------------------------------------------------------------- /man/couple.Rd: -------------------------------------------------------------------------------- 1 | \name{couple} 2 | \alias{couple} 3 | 4 | \title{Probabilities Coupling function} 5 | \description{ 6 | \code{couple} is used to link class-probability estimates produced by 7 | pairwise coupling in multi-class classification problems. 8 | } 9 | \usage{ 10 | couple(probin, coupler = "minpair") 11 | } 12 | 13 | \arguments{ 14 | \item{probin}{ The pairwise coupled class-probability estimates} 15 | \item{coupler}{The type of coupler to use. Currently \code{minpar} and 16 | \code{pkpd} and \code{vote} are supported (see reference for more 17 | details). 18 | If \code{vote} is selected the returned value is a primitive estimate 19 | passed on given votes.} 20 | 21 | } 22 | \details{ 23 | As binary classification problems are much easier to solve many 24 | techniques exist to decompose multi-class classification problems into 25 | many binary classification problems (voting, error codes, 26 | etc.). Pairwise coupling (one against one) constructs a rule for 27 | discriminating between every pair of classes and then selecting the 28 | class 29 | with the most winning two-class decisions. 30 | By using Platt's probabilities output for SVM one can get a class 31 | probability for each of the \eqn{k(k-1)/2} models created in the pairwise 32 | classification. The couple method implements various techniques to combine 33 | these probabilities. 34 | } 35 | \value{ 36 | A matrix with the resulting probability estimates. 37 | } 38 | \references{ 39 | Ting-Fan Wu, Chih-Jen Lin, ruby C. Weng\cr 40 | \emph{Probability Estimates for Multi-class Classification by Pairwise 41 | Coupling}\cr 42 | Neural Information Processing Symposium 2003 \cr 43 | \url{https://papers.neurips.cc/paper/2454-probability-estimates-for-multi-class-classification-by-pairwise-coupling.pdf} 44 | } 45 | \author{Alexandros Karatzoglou \cr \email{alexandros.karatzoglou@ci.tuwien.ac.at} } 46 | 47 | 48 | 49 | 50 | \seealso{ \code{\link{predict.ksvm}}, \code{\link{ksvm}}} 51 | \examples{ 52 | ## create artificial pairwise probabilities 53 | pairs <- matrix(c(0.82,0.12,0.76,0.1,0.9,0.05),2) 54 | 55 | couple(pairs) 56 | 57 | couple(pairs, coupler="pkpd") 58 | 59 | couple(pairs, coupler ="vote") 60 | } 61 | \keyword{classif} 62 | 63 | -------------------------------------------------------------------------------- /man/kfa-class.Rd: -------------------------------------------------------------------------------- 1 | \name{kfa-class} 2 | \docType{class} 3 | \alias{kfa-class} 4 | \alias{alpha,kfa-method} 5 | \alias{alphaindex,kfa-method} 6 | \alias{kcall,kfa-method} 7 | \alias{kernelf,kfa-method} 8 | \alias{predict,kfa-method} 9 | \alias{xmatrix,kfa-method} 10 | 11 | \title{Class "kfa"} 12 | \description{The class of the object returned by the Kernel Feature 13 | Analysis \code{kfa} function} 14 | \section{Objects from the Class}{ 15 | Objects can be created by calls of the form \code{new("kfa", ...)} or by 16 | calling the \code{kfa} method. The objects contain the features along with the 17 | alpha values. 18 | } 19 | \section{Slots}{ 20 | \describe{ 21 | \item{\code{alpha}:}{Object of class \code{"matrix"} containing the 22 | alpha values } 23 | \item{\code{alphaindex}:}{Object of class \code{"vector"} containing 24 | the indexes of the selected feature} 25 | \item{\code{kernelf}:}{Object of class \code{"kfunction"} containing 26 | the kernel function used} 27 | \item{\code{xmatrix}:}{Object of class \code{"matrix"} containing 28 | the selected features} 29 | \item{\code{kcall}:}{Object of class \code{"call"} containing the 30 | \code{kfa} function call} 31 | \item{\code{terms}:}{Object of class \code{"ANY"} containing the 32 | formula terms} 33 | } 34 | } 35 | \section{Methods}{ 36 | \describe{ 37 | \item{alpha}{\code{signature(object = "kfa")}: returns the alpha values } 38 | \item{alphaindex}{\code{signature(object = "kfa")}: returns the 39 | index of the selected features} 40 | \item{kcall}{\code{signature(object = "kfa")}: returns the function call } 41 | \item{kernelf}{\code{signature(object = "kfa")}: returns the kernel 42 | function used } 43 | \item{predict}{\code{signature(object = "kfa")}: used to embed more 44 | data points to the feature base} 45 | \item{xmatrix}{\code{signature(object = "kfa")}: returns the 46 | selected features. } 47 | } 48 | } 49 | 50 | \author{Alexandros Karatzoglou\cr 51 | \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 52 | 53 | 54 | 55 | 56 | \seealso{\code{\link{kfa}}, \code{\link{kpca-class}} } 57 | 58 | \examples{ 59 | data(promotergene) 60 | f <- kfa(~.,data=promotergene) 61 | } 62 | \keyword{classes} 63 | -------------------------------------------------------------------------------- /src/errorcode.h: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the Suffix Array based String Kernel. 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Choon Hui Teo 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/ErrorCode.cpp 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | 37 | 38 | #ifndef _ERRORCODE_H_ 39 | #define _ERRORCODE_H_ 40 | 41 | #include "datatype.h" 42 | #include 43 | 44 | // Verbosity level 45 | enum verbosity {QUIET, INFO, DEBUG1}; 46 | 47 | 48 | #define ErrorCode UInt32 49 | 50 | /** 51 | * for general use 52 | */ 53 | #define NOERROR 0 54 | #define GENERAL_ERROR 1 55 | #define MEM_ALLOC_FAILED 2 56 | #define INVALID_PARAM 3 57 | #define ARRAY_EMPTY 4 58 | #define OPERATION_FAILED 5 59 | 60 | /** 61 | * SuffixArray 62 | */ 63 | #define MATCH_NOT_FOUND 101 64 | #define PARTIAL_MATCH 102 65 | 66 | /** 67 | * LCP 68 | */ 69 | #define LCP_COMPACT_FAILED 201 70 | 71 | 72 | #define CHECKERROR(i) { \ 73 | if((i) != NOERROR) { \ 74 | exit(EXIT_FAILURE); \ 75 | } \ 76 | } 77 | 78 | 79 | // #define MESSAGE(msg) { std::cout<<(msg)< 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/DataType.h 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | // 11 Oct 2006 37 | 38 | 39 | #ifndef DATATYPE_H 40 | #define DATATYPE_H 41 | 42 | // #define UInt32 unsigned int 43 | // #define UInt64 unsigned long long 44 | // #define Byte1 unsigned char 45 | // #define Byte2 unsigned short 46 | // #define Real double 47 | 48 | typedef unsigned int UInt32; 49 | 50 | // Seems that even using __extension__ g++ 4.6 will complain that 51 | // ISO C++ 1998 does not support 'long long' ... 52 | /* 53 | #if defined __GNUC__ && __GNUC__ >= 2 54 | __extension__ typedef unsigned long long UInt64; 55 | #else 56 | typedef unsigned long long UInt64; 57 | #endif 58 | */ 59 | 60 | #include 61 | typedef uint64_t UInt64; 62 | 63 | typedef unsigned char Byte1; 64 | typedef unsigned short Byte2; 65 | typedef double Real; 66 | 67 | // #define SENTINEL '\n' 68 | // #define SENTINEL2 '\0' 69 | 70 | const char SENTINEL = '\n'; 71 | const char SENTINEL2 = '\0'; 72 | 73 | #ifndef UNICODE 74 | // # define SYMBOL Byte1 75 | typedef Byte1 SYMBOL; 76 | #else 77 | // # define SYMBOL Byte2 78 | typedef Byte2 SYMBOL; 79 | #endif 80 | 81 | #endif 82 | -------------------------------------------------------------------------------- /man/kernel-class.Rd: -------------------------------------------------------------------------------- 1 | \name{kernel-class} 2 | \docType{class} 3 | \alias{rbfkernel-class} 4 | \alias{polykernel-class} 5 | \alias{vanillakernel-class} 6 | \alias{tanhkernel-class} 7 | \alias{anovakernel-class} 8 | \alias{besselkernel-class} 9 | \alias{laplacekernel-class} 10 | \alias{splinekernel-class} 11 | \alias{stringkernel-class} 12 | \alias{fourierkernel-class} 13 | \alias{kfunction-class} 14 | 15 | \alias{kernel-class} 16 | \alias{kpar,kernel-method} 17 | \title{Class "kernel" "rbfkernel" "polykernel", "tanhkernel", "vanillakernel"} 18 | \description{ The built-in kernel classes in \pkg{kernlab}} 19 | \section{Objects from the Class}{ 20 | Objects can be created by calls of the form \code{new("rbfkernel")}, 21 | \code{new{"polykernel"}}, \code{new{"tanhkernel"}}, 22 | \code{new{"vanillakernel"}}, \code{new{"anovakernel"}}, 23 | \code{new{"besselkernel"}}, \code{new{"laplacekernel"}}, 24 | \code{new{"splinekernel"}}, \code{new{"stringkernel"}} 25 | 26 | or by calling the \code{rbfdot}, \code{polydot}, \code{tanhdot}, 27 | \code{vanilladot}, \code{anovadot}, \code{besseldot}, \code{laplacedot}, 28 | \code{splinedot}, \code{stringdot} functions etc.. 29 | } 30 | \section{Slots}{ 31 | \describe{ 32 | \item{\code{.Data}:}{Object of class \code{"function"} containing 33 | the kernel function } 34 | \item{\code{kpar}:}{Object of class \code{"list"} containing the 35 | kernel parameters } 36 | } 37 | } 38 | \section{Extends}{ 39 | Class \code{"kernel"}, directly. 40 | Class \code{"function"}, by class \code{"kernel"}. 41 | } 42 | \section{Methods}{ 43 | \describe{ 44 | \item{kernelMatrix}{\code{signature(kernel = "rbfkernel", x = 45 | "matrix")}: computes the kernel matrix} 46 | \item{kernelMult}{\code{signature(kernel = "rbfkernel", x = 47 | "matrix")}: computes the quadratic kernel expression} 48 | \item{kernelPol}{\code{signature(kernel = "rbfkernel", x = 49 | "matrix")}: computes the kernel expansion} 50 | \item{kernelFast}{\code{signature(kernel = "rbfkernel", x = 51 | "matrix"),,a}: computes parts or the full kernel matrix, mainly 52 | used in kernel algorithms where columns of the kernel matrix are 53 | computed per invocation } 54 | } 55 | } 56 | 57 | \author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at} } 58 | 59 | 60 | 61 | \seealso{ 62 | \code{\link{dots}} 63 | 64 | } 65 | \examples{ 66 | 67 | rbfkernel <- rbfdot(sigma = 0.1) 68 | rbfkernel 69 | is(rbfkernel) 70 | kpar(rbfkernel) 71 | 72 | } 73 | \keyword{classes} 74 | -------------------------------------------------------------------------------- /man/predict.gausspr.Rd: -------------------------------------------------------------------------------- 1 | \name{predict.gausspr} 2 | \alias{predict.gausspr} 3 | \alias{predict,gausspr-method} 4 | \title{predict method for Gaussian Processes object} 5 | 6 | 7 | \description{Prediction of test data using Gaussian Processes} 8 | 9 | 10 | \usage{ 11 | \S4method{predict}{gausspr}(object, newdata, type = "response", coupler = "minpair") 12 | } 13 | 14 | \arguments{ 15 | 16 | \item{object}{an S4 object of class \code{gausspr} created by the 17 | \code{gausspr} function} 18 | \item{newdata}{a data frame or matrix containing new data} 19 | \item{type}{one of \code{response}, \code{probabilities} 20 | indicating the type of output: predicted values or matrix of class 21 | probabilities} 22 | \item{coupler}{Coupling method used in the multiclass case, can be one 23 | of \code{minpair} or \code{pkpd} (see reference for more details).} 24 | 25 | } 26 | 27 | \value{ 28 | \item{response}{predicted classes (the classes with majority vote) 29 | or the response value in regression.} 30 | 31 | \item{probabilities}{matrix of class probabilities (one column for each class and 32 | one row for each input).} 33 | } 34 | 35 | 36 | \references{ 37 | \itemize{ 38 | 39 | \item 40 | C. K. I. Williams and D. Barber \cr 41 | Bayesian classification with Gaussian processes. \cr 42 | IEEE Transactions on Pattern Analysis and Machine Intelligence, 20(12):1342-1351, 1998\cr 43 | \url{https://homepages.inf.ed.ac.uk/ckiw/postscript/pami_final.ps.gz} 44 | 45 | \item 46 | T.F. Wu, C.J. Lin, R.C. Weng. \cr 47 | \emph{Probability estimates for Multi-class Classification by 48 | Pairwise Coupling}\cr 49 | \url{https://www.csie.ntu.edu.tw/~cjlin/papers/svmprob/svmprob.pdf} 50 | 51 | } 52 | } 53 | \author{Alexandros Karatzoglou\cr 54 | \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 55 | 56 | \keyword{methods} 57 | \keyword{regression} 58 | \keyword{classif} 59 | 60 | 61 | \examples{ 62 | 63 | ## example using the promotergene data set 64 | data(promotergene) 65 | 66 | ## create test and training set 67 | ind <- sample(1:dim(promotergene)[1],20) 68 | genetrain <- promotergene[-ind, ] 69 | genetest <- promotergene[ind, ] 70 | 71 | ## train a support vector machine 72 | gene <- gausspr(Class~.,data=genetrain,kernel="rbfdot", 73 | kpar=list(sigma=0.015)) 74 | gene 75 | 76 | ## predict gene type probabilities on the test set 77 | genetype <- predict(gene,genetest,type="probabilities") 78 | genetype 79 | } 80 | 81 | -------------------------------------------------------------------------------- /src/dbreakpt.c: -------------------------------------------------------------------------------- 1 | extern double mymin(double, double); 2 | extern double mymax(double, double); 3 | 4 | void dbreakpt(int n, double *x, double *xl, double *xu, double *w, int *nbrpt, double *brptmin, double *brptmax) 5 | { 6 | /* 7 | c ********** 8 | c 9 | c Subroutine dbreakpt 10 | c 11 | c This subroutine computes the number of break-points, and 12 | c the minimal and maximal break-points of the projection of 13 | c x + alpha*w on the n-dimensional interval [xl,xu]. 14 | c 15 | c parameters: 16 | c 17 | c n is an integer variable. 18 | c On entry n is the number of variables. 19 | c On exit n is unchanged. 20 | c 21 | c x is a double precision array of dimension n. 22 | c On entry x specifies the vector x. 23 | c On exit x is unchanged. 24 | c 25 | c xl is a double precision array of dimension n. 26 | c On entry xl is the vector of lower bounds. 27 | c On exit xl is unchanged. 28 | c 29 | c xu is a double precision array of dimension n. 30 | c On entry xu is the vector of upper bounds. 31 | c On exit xu is unchanged. 32 | c 33 | c w is a double precision array of dimension n. 34 | c On entry w specifies the vector w. 35 | c On exit w is unchanged. 36 | c 37 | c nbrpt is an integer variable. 38 | c On entry nbrpt need not be specified. 39 | c On exit nbrpt is the number of break points. 40 | c 41 | c brptmin is a double precision variable 42 | c On entry brptmin need not be specified. 43 | c On exit brptmin is minimal break-point. 44 | c 45 | c brptmax is a double precision variable 46 | c On entry brptmax need not be specified. 47 | c On exit brptmax is maximal break-point. 48 | c 49 | c ********** 50 | */ 51 | int i; 52 | double brpt; 53 | 54 | *nbrpt = 0; 55 | for (i=0;i 0) 57 | { 58 | (*nbrpt)++; 59 | brpt = (xu[i] - x[i])/w[i]; 60 | if (*nbrpt == 1) 61 | *brptmin = *brptmax = brpt; 62 | else 63 | { 64 | *brptmin = mymin(brpt, *brptmin); 65 | *brptmax = mymax(brpt, *brptmax); 66 | } 67 | } 68 | else 69 | if (x[i] > xl[i] && w[i] < 0) 70 | { 71 | (*nbrpt)++; 72 | brpt = (xl[i] - x[i])/w[i]; 73 | if (*nbrpt == 1) 74 | *brptmin = *brptmax = brpt; 75 | else 76 | { 77 | *brptmin = mymin(brpt, *brptmin); 78 | *brptmax = mymax(brpt, *brptmax); 79 | } 80 | } 81 | if (*nbrpt == 0) 82 | *brptmin = *brptmax = 0; 83 | } 84 | -------------------------------------------------------------------------------- /src/cweight.cpp: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the Suffix Array based String Kernel. 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Choon Hui Teo 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/ConstantWeight.cpp 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | // 12 Jul 2006 37 | // 12 Oct 2006 38 | 39 | 40 | #ifndef CWEIGHT_CPP 41 | #define CWEIGHT_CPP 42 | 43 | #include "cweight.h" 44 | #include 45 | 46 | /** 47 | * Constant weight function. Computes number of common substrings. Every 48 | * matched substring is of same weight (i.e. 1) 49 | * W(y,t) := tau - gamma 50 | * 51 | * \param floor_len - (IN) Length of floor interval of matched substring. 52 | * (cf. gamma in VisSmo02). 53 | * \param x_len - (IN) Length of the matched substring. 54 | * (cf. tau in visSmo02). 55 | * \param weight - (OUT) The weight value. 56 | * 57 | */ 58 | ErrorCode 59 | ConstantWeight::ComputeWeight(const UInt32 &floor_len, const UInt32 &x_len, Real &weight) 60 | { 61 | //' Input validation 62 | assert(x_len >= floor_len); 63 | 64 | //' x_len == floor_len when the substring found ends on an interval. 65 | 66 | weight = (x_len - floor_len); 67 | 68 | // std::cout << "floor_len : " << floor_len 69 | // << " x_len : " << x_len 70 | // << " weight : " << weight << std::endl; 71 | 72 | return NOERROR; 73 | } 74 | 75 | #endif 76 | -------------------------------------------------------------------------------- /src/ctable.h: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the Suffix Array based String Kernel. 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Choon Hui Teo 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/ChildTable.h 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | 37 | 38 | #ifndef CTABLE_H 39 | #define CTABLE_H 40 | 41 | #include 42 | #include 43 | 44 | #include "datatype.h" 45 | #include "errorcode.h" 46 | #include "lcp.h" 47 | 48 | // using namespace std; 49 | 50 | /** 51 | * ChildTable represents the parent-child relationship between 52 | * the lcp-intervals of suffix array. 53 | * Reference: AboKurOhl04 54 | */ 55 | class ChildTable : public std::vector 56 | { 57 | 58 | private: 59 | // childtab needs lcptab to differentiate between up, down, and 60 | // nextlIndex values. 61 | LCP& _lcptab; 62 | 63 | public: 64 | 65 | // Constructors 66 | ChildTable(const UInt32 &size, LCP& lcptab): std::vector(size), _lcptab(lcptab){} 67 | 68 | // Destructor 69 | virtual ~ChildTable() {} 70 | 71 | 72 | // Get first l-index of an l-[i..j] interval 73 | ErrorCode l_idx(const UInt32 &i, const UInt32 &j, UInt32 &idx); 74 | 75 | // .up field 76 | ErrorCode up(const UInt32 &idx, UInt32 &val); 77 | 78 | // .down field 79 | ErrorCode down(const UInt32 &idx, UInt32 &val); 80 | 81 | // .next field can be retrieved by accessing the array directly. 82 | 83 | friend std::ostream& operator << (std::ostream& os, const ChildTable& ct); 84 | 85 | }; 86 | #endif 87 | -------------------------------------------------------------------------------- /src/brweight.cpp: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the Suffix Array based String Kernel. 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Choon Hui Teo 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/BoundedRangeWeight.cpp 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | // 12 Jul 2006 37 | 38 | #ifndef BRWEIGHT_CPP 39 | #define BRWEIGHT_CPP 40 | 41 | #include "brweight.h" 42 | #include 43 | 44 | 45 | #define MIN(x,y) (((x) < (y)) ? (x) : (y)) 46 | #define MAX(x,y) (((x) > (y)) ? (x) : (y)) 47 | 48 | 49 | /** 50 | * Bounded Range weight function. 51 | * W(y,t) := max(0,min(tau,n)-gamma) 52 | * 53 | * \param floor_len - (IN) Length of floor interval of matched substring. 54 | * (cf. gamma in VisSmo02). 55 | * \param x_len - (IN) Length of the matched substring. 56 | * (cf. tau in visSmo02). 57 | * \param weight - (OUT) The weight value. 58 | * 59 | */ 60 | ErrorCode 61 | BoundedRangeWeight::ComputeWeight(const UInt32 &floor_len, const UInt32 &x_len, Real &weight) 62 | { 63 | //' Input validation 64 | assert(x_len >= floor_len); 65 | 66 | //' x_len == floor_len when the substring found ends on an interval. 67 | 68 | Real tau = (Real)x_len; 69 | Real gamma = (Real)floor_len; 70 | 71 | weight = MAX(0,MIN(tau,n)-gamma); 72 | 73 | // std::cout << "floor_len:"< 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/W_msufsort.cpp 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | 37 | 38 | //' Wrapper for Michael Maniscalco's MSufSort version 2.2 algorithm 39 | #ifndef W_MSUFSORT_CPP 40 | #define W_MSUFSORT_CPP 41 | 42 | #include 43 | #include 44 | #include 45 | 46 | #include "wmsufsort.h" 47 | 48 | 49 | W_msufsort::W_msufsort() 50 | { 51 | msuffixsorter = new MSufSort(); 52 | } 53 | 54 | W_msufsort::~W_msufsort() 55 | { 56 | delete msuffixsorter; 57 | } 58 | 59 | 60 | /** 61 | * Construct Suffix Array using Michael Maniscalco's algorithm 62 | * 63 | * \param _text - (IN) The text which resultant SA corresponds to. 64 | * \param _len - (IN) The length of the text. 65 | * \param _sa - (OUT) Suffix array instance. 66 | */ 67 | ErrorCode 68 | W_msufsort::ConstructSA(SYMBOL *text, const UInt32 &len, UInt32 *&array){ 69 | 70 | //' A temporary copy of text 71 | SYMBOL *text_copy = new SYMBOL[len]; 72 | 73 | //' chteo: BUGBUG 74 | //' redundant? 75 | assert(text_copy != NULL); 76 | 77 | memcpy(text_copy, text, sizeof(SYMBOL) * len); 78 | msuffixsorter->Sort(text_copy, len); 79 | 80 | //' Code adapted from MSufSort::verifySort() 81 | for (UInt32 i = 0; i < len; i++) { 82 | UInt32 tmp = msuffixsorter->ISA(i)-1; 83 | array[tmp] = i; 84 | } 85 | 86 | //' Deallocate the memory allocated for #text_copy# 87 | delete [] text_copy; 88 | 89 | return NOERROR; 90 | } 91 | #endif 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /man/kpca-class.Rd: -------------------------------------------------------------------------------- 1 | \name{kpca-class} 2 | \docType{class} 3 | \alias{kpca-class} 4 | \alias{rotated} 5 | \alias{eig,kpca-method} 6 | \alias{kcall,kpca-method} 7 | \alias{kernelf,kpca-method} 8 | \alias{pcv,kpca-method} 9 | \alias{rotated,kpca-method} 10 | \alias{xmatrix,kpca-method} 11 | 12 | \title{Class "kpca"} 13 | \description{ The Kernel Principal Components Analysis class} 14 | \section{Objects of class "kpca"}{ 15 | Objects can be created by calls of the form \code{new("kpca", ...)}. 16 | or by calling the \code{kpca} function. 17 | } 18 | \section{Slots}{ 19 | \describe{ 20 | \item{\code{pcv}:}{Object of class \code{"matrix"} containing the 21 | principal component vectors } 22 | \item{\code{eig}:}{Object of class \code{"vector"} containing the 23 | corresponding eigenvalues} 24 | \item{\code{rotated}:}{Object of class \code{"matrix"} containing the 25 | projection of the data on the principal components} 26 | \item{\code{kernelf}:}{Object of class \code{"function"} containing 27 | the kernel function used} 28 | \item{\code{kpar}:}{Object of class \code{"list"} containing the 29 | kernel parameters used } 30 | \item{\code{xmatrix}:}{Object of class \code{"matrix"} containing 31 | the data matrix used } 32 | \item{\code{kcall}:}{Object of class \code{"ANY"} containing the 33 | function call } 34 | \item{\code{n.action}:}{Object of class \code{"ANY"} containing the 35 | action performed on NA } 36 | } 37 | } 38 | \section{Methods}{ 39 | \describe{ 40 | 41 | \item{eig}{\code{signature(object = "kpca")}: returns the eigenvalues } 42 | 43 | \item{kcall}{\code{signature(object = "kpca")}: returns the 44 | performed call} 45 | \item{kernelf}{\code{signature(object = "kpca")}: returns the used 46 | kernel function} 47 | \item{pcv}{\code{signature(object = "kpca")}: returns the principal 48 | component vectors } 49 | \item{predict}{\code{signature(object = "kpca")}: embeds new data } 50 | \item{rotated}{\code{signature(object = "kpca")}: returns the 51 | projected data} 52 | \item{xmatrix}{\code{signature(object = "kpca")}: returns the used 53 | data matrix } 54 | } 55 | } 56 | 57 | \author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 58 | 59 | \seealso{ 60 | \code{\link{ksvm-class}}, 61 | \code{\link{kcca-class}} 62 | } 63 | \examples{ 64 | # another example using the iris 65 | data(iris) 66 | test <- sample(1:50,20) 67 | 68 | kpc <- kpca(~.,data=iris[-test,-5],kernel="rbfdot", 69 | kpar=list(sigma=0.2),features=2) 70 | 71 | #print the principal component vectors 72 | pcv(kpc) 73 | rotated(kpc) 74 | kernelf(kpc) 75 | eig(kpc) 76 | } 77 | \keyword{classes} 78 | -------------------------------------------------------------------------------- /R/sigest.R: -------------------------------------------------------------------------------- 1 | ## sigma estimation for RBF kernels 2 | ## author: alexandros 3 | 4 | setGeneric("sigest", function(x, ...) standardGeneric("sigest")) 5 | setMethod("sigest",signature(x="formula"), 6 | function (x, data=NULL, frac = 0.5, na.action = na.omit, scaled = TRUE){ 7 | call <- match.call() 8 | m <- match.call(expand.dots = FALSE) 9 | if (is.matrix(eval(m$data, parent.frame()))) 10 | m$data <- as.data.frame(data) 11 | ## m$... <- NULL 12 | m$formula <- m$x 13 | m$x <- NULL 14 | m$scaled <- NULL 15 | m$frac <- NULL 16 | m[[1L]] <- quote(stats::model.frame) 17 | m <- eval(m, parent.frame()) 18 | Terms <- attr(m, "terms") 19 | attr(Terms, "intercept") <- 0 20 | x <- model.matrix(Terms, m) 21 | if (length(scaled) == 1) 22 | scaled <- rep(scaled, ncol(x)) 23 | if (any(scaled)) { 24 | remove <- unique(c(which(labels(Terms) %in% names(attr(x, "contrasts"))), 25 | which(!scaled) 26 | ) 27 | ) 28 | scaled <- !attr(x, "assign") %in% remove 29 | } 30 | ret <- sigest(x, scaled = scaled, frac = frac, na.action = na.action) 31 | return (ret) 32 | }) 33 | setMethod("sigest",signature(x="matrix"), 34 | function (x, 35 | frac = 0.5, 36 | scaled = TRUE, 37 | na.action = na.omit) 38 | { 39 | x <- na.action(x) 40 | 41 | if (length(scaled) == 1) 42 | scaled <- rep(scaled, ncol(x)) 43 | if (any(scaled)) { 44 | co <- !apply(x[,scaled, drop = FALSE], 2, var) 45 | if (any(co)) { 46 | scaled <- rep(FALSE, ncol(x)) 47 | warning(paste("Variable(s)", 48 | paste("`",colnames(x[,scaled, drop = FALSE])[co], 49 | "'", sep="", collapse=" and "), 50 | "constant. Cannot scale data.") 51 | ) 52 | } else { 53 | xtmp <- scale(x[,scaled]) 54 | x[,scaled] <- xtmp 55 | } 56 | } 57 | 58 | m <- dim(x)[1] 59 | n <- floor(frac*m) 60 | index <- sample(1:m, n, replace = TRUE) 61 | index2 <- sample(1:m, n, replace = TRUE) 62 | temp <- x[index,, drop=FALSE] - x[index2,,drop=FALSE] 63 | dist <- rowSums(temp^2) 64 | srange <- 1/quantile(dist[dist!=0],probs=c(0.9,0.5,0.1)) 65 | 66 | ## ds <- sort(dist[dist!=0]) 67 | ## sl <- ds[ceiling(0.2*length(ds))] 68 | ## su <- ds[ceiling(0.8*length(ds))] 69 | ## srange <- c(1/su,1/median(ds), 1/sl) 70 | ## names(srange) <- NULL 71 | 72 | return(srange) 73 | }) 74 | -------------------------------------------------------------------------------- /src/wkasailcp.cpp: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the Suffix Array based String Kernel. 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Choon Hui Teo 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/W_kasai_lcp.cpp 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | // 11 Oct 2006 37 | 38 | 39 | #ifndef W_KASAI_LCP_CPP 40 | #define W_KASAI_LCP_CPP 41 | 42 | #include "wkasailcp.h" 43 | #include 44 | 45 | /** 46 | * Compute LCP array. Algorithm adapted from Manzini's SWAT2004 paper. 47 | * Modification: array indexing changed from 1-based to 0-based. 48 | * 49 | * \param text - (IN) The text which corresponds to SA. 50 | * \param len - (IN) Length of text. 51 | * \param sa - (IN) Suffix array. 52 | * \param lcp - (OUT) Computed LCP array. 53 | */ 54 | 55 | ErrorCode 56 | W_kasai_lcp::ComputeLCP(const SYMBOL *text, const UInt32 &len, 57 | const UInt32 *sa, LCP& lcp) 58 | { 59 | //chteo: [111006:0141] 60 | //std::vector isa(len); 61 | 62 | UInt32 *isa = new UInt32[len]; 63 | 64 | //' Step 1: Compute inverse suffix array 65 | for(UInt32 i=0; i0) h--; 85 | } 86 | 87 | //chteo: [111006:0141] 88 | delete [] isa; isa = 0; 89 | 90 | return NOERROR; 91 | } 92 | #endif 93 | -------------------------------------------------------------------------------- /man/onlearn.Rd: -------------------------------------------------------------------------------- 1 | \name{onlearn} 2 | \alias{onlearn} 3 | \alias{onlearn,onlearn-method} 4 | 5 | \title{Kernel Online Learning algorithms} 6 | \description{ 7 | Online Kernel-based Learning algorithms for classification, novelty 8 | detection, and regression. 9 | } 10 | \usage{ 11 | \S4method{onlearn}{onlearn}(obj, x, y = NULL, nu = 0.2, lambda = 1e-04) 12 | } 13 | 14 | \arguments{ 15 | \item{obj}{\code{obj} an object of class \code{onlearn} created by the 16 | initialization function \code{inlearn} containing the kernel to be 17 | used during learning and the parameters of the 18 | learned model} 19 | \item{x}{vector or matrix containing the data. Factors have 20 | to be numerically coded. If \code{x} is a matrix the code is 21 | run internally one sample at the time.} 22 | \item{y}{the class label in case of classification. Only binary 23 | classification is supported and class labels have to be -1 or +1. 24 | } 25 | \item{nu}{the parameter similarly to the \code{nu} parameter in SVM 26 | bounds the training error.} 27 | \item{lambda}{the learning rate} 28 | } 29 | \details{ 30 | The online algorithms are based on a simple stochastic gradient descent 31 | method in feature space. 32 | The state of the algorithm is stored in an object of class 33 | \code{onlearn} and has to be passed to the function at each iteration. 34 | } 35 | \value{ 36 | The function returns an \code{S4} object of class \code{onlearn} 37 | containing the model parameters and the last fitted value which can be 38 | retrieved by the accessor method \code{fit}. The value returned in the 39 | classification and novelty detection problem is the decision function 40 | value phi. 41 | The accessor methods \code{alpha} returns the model parameters. 42 | } 43 | \references{ Kivinen J. Smola A.J. Williamson R.C. \cr 44 | \emph{Online Learning with Kernels}\cr 45 | IEEE Transactions on Signal Processing vol. 52, Issue 8, 2004\cr 46 | \url{https://alex.smola.org/papers/2004/KivSmoWil04.pdf}} 47 | 48 | \author{Alexandros Karatzoglou\cr 49 | \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 50 | 51 | 52 | \seealso{\code{\link{inlearn}}} 53 | \examples{ 54 | 55 | ## create toy data set 56 | x <- rbind(matrix(rnorm(100),,2),matrix(rnorm(100)+3,,2)) 57 | y <- matrix(c(rep(1,50),rep(-1,50)),,1) 58 | 59 | ## initialize onlearn object 60 | on <- inlearn(2,kernel="rbfdot",kpar=list(sigma=0.2), 61 | type="classification") 62 | 63 | ind <- sample(1:100,100) 64 | ## learn one data point at the time 65 | for(i in ind) 66 | on <- onlearn(on,x[i,],y[i],nu=0.03,lambda=0.1) 67 | 68 | ## or learn all the data 69 | on <- onlearn(on,x[ind,],y[ind],nu=0.03,lambda=0.1) 70 | 71 | sign(predict(on,x)) 72 | } 73 | 74 | \keyword{classif} 75 | \keyword{neural} 76 | \keyword{regression} 77 | \keyword{ts} 78 | -------------------------------------------------------------------------------- /src/lcp.h: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the Suffix Array based String Kernel. 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Choon Hui Teo 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/LCP.h 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | // 11 Oct 2006 37 | 38 | 39 | #ifndef LCP_H 40 | #define LCP_H 41 | 42 | 43 | #include "datatype.h" 44 | #include "errorcode.h" 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include 50 | 51 | /** 52 | * LCP array class 53 | */ 54 | 55 | class LCP 56 | { 57 | private: 58 | /// Compacted array 59 | /* std::vector _p_array; */ 60 | /* std::vector _idx_array; */ 61 | /* std::vector _val_array; */ 62 | 63 | Byte1 *_p_array; 64 | UInt32 *_idx_array; 65 | UInt32 *_val_array; 66 | UInt32 _size; 67 | 68 | bool _is_compact; 69 | 70 | UInt32 *_beg; 71 | UInt32 *_end; 72 | UInt32 *_cache; 73 | 74 | /* typedef std::vector::const_iterator const_itr; */ 75 | 76 | /* const_itr _beg; */ 77 | /* const_itr _end; */ 78 | 79 | /* const_itr _cache; */ 80 | UInt32 _dist; 81 | 82 | public: 83 | 84 | /// Original array - 4bytes 85 | //std::vector array; 86 | UInt32 *array; 87 | 88 | /// Constructors 89 | LCP(const UInt32 &size); 90 | 91 | /// Destructors 92 | virtual ~LCP(); 93 | 94 | /// Methods 95 | 96 | /// Compact 4n bytes array into (1n+8p) bytes arrays 97 | ErrorCode compact(void); 98 | 99 | /// Retrieve lcp array value 100 | // ErrorCode lcp(const UInt32 &idx, UInt32 &value); 101 | 102 | UInt32 operator[] (const UInt32& idx); 103 | 104 | friend std::ostream& operator << (std::ostream& os, LCP& lcp); 105 | 106 | }; 107 | #endif 108 | -------------------------------------------------------------------------------- /man/predict.ksvm.Rd: -------------------------------------------------------------------------------- 1 | \name{predict.ksvm} 2 | \alias{predict.ksvm} 3 | \alias{predict,ksvm-method} 4 | \title{predict method for support vector object} 5 | 6 | 7 | \description{Prediction of test data using support vector machines} 8 | 9 | 10 | \usage{ 11 | \S4method{predict}{ksvm}(object, newdata, type = "response", coupler = "minpair") 12 | } 13 | 14 | \arguments{ 15 | 16 | \item{object}{an S4 object of class \code{ksvm} created by the 17 | \code{ksvm} function} 18 | \item{newdata}{a data frame or matrix containing new data} 19 | \item{type}{one of \code{response}, \code{probabilities} 20 | ,\code{votes}, \code{decision} 21 | indicating the type of output: predicted values, matrix of class 22 | probabilities, matrix of vote counts, or matrix of decision values.} 23 | \item{coupler}{Coupling method used in the multiclass case, can be one 24 | of \code{minpair} or \code{pkpd} (see reference for more details).} 25 | 26 | } 27 | 28 | \value{ 29 | If \code{type(object)} is \code{C-svc}, 30 | \code{nu-svc}, \code{C-bsvm} or \code{spoc-svc} 31 | the vector returned depends on the argument \code{type}: 32 | 33 | \item{response}{predicted classes (the classes with majority vote).} 34 | 35 | \item{probabilities}{matrix of class probabilities (one column for each class and 36 | one row for each input).} 37 | 38 | \item{votes}{matrix of vote counts (one column for each class and one row 39 | for each new input)} 40 | 41 | If \code{type(object)} is \code{eps-svr}, \code{eps-bsvr} or 42 | \code{nu-svr} a vector of predicted values is returned. 43 | If \code{type(object)} is \code{one-classification} a vector of 44 | logical values is returned. 45 | } 46 | 47 | 48 | \references{ 49 | \itemize{ 50 | \item 51 | T.F. Wu, C.J. Lin, R.C. Weng. \cr 52 | \emph{Probability estimates for Multi-class Classification by 53 | Pairwise Coupling}\cr 54 | \url{https://www.csie.ntu.edu.tw/~cjlin/papers/svmprob/svmprob.pdf} 55 | 56 | \item 57 | H.T. Lin, C.J. Lin, R.C. Weng (2007), 58 | A note on Platt's probabilistic outputs for support vector 59 | machines. 60 | \emph{Machine Learning}, \bold{68}, 267--276. 61 | \doi{10.1007/s10994-007-5018-6}. 62 | } 63 | } 64 | \author{Alexandros Karatzoglou\cr 65 | \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 66 | 67 | \keyword{methods} 68 | \keyword{regression} 69 | \keyword{classif} 70 | 71 | 72 | \examples{ 73 | 74 | ## example using the promotergene data set 75 | data(promotergene) 76 | 77 | ## create test and training set 78 | ind <- sample(1:dim(promotergene)[1],20) 79 | genetrain <- promotergene[-ind, ] 80 | genetest <- promotergene[ind, ] 81 | 82 | ## train a support vector machine 83 | gene <- ksvm(Class~.,data=genetrain,kernel="rbfdot", 84 | kpar=list(sigma=0.015),C=70,cross=4,prob.model=TRUE) 85 | gene 86 | 87 | ## predict gene type probabilities on the test set 88 | genetype <- predict(gene,genetest,type="probabilities") 89 | genetype 90 | } 91 | 92 | -------------------------------------------------------------------------------- /man/ipop.Rd: -------------------------------------------------------------------------------- 1 | \name{ipop} 2 | \alias{ipop} 3 | \alias{ipop,ANY,matrix-method} 4 | 5 | \title{Quadratic Programming Solver} 6 | \description{ 7 | ipop solves the quadratic programming problem :\cr 8 | \eqn{\min(c'*x + 1/2 * x' * H * x)}\cr 9 | subject to: \cr 10 | \eqn{b <= A * x <= b + r}\cr 11 | \eqn{l <= x <= u} 12 | } 13 | \usage{ 14 | ipop(c, H, A, b, l, u, r, sigf = 7, maxiter = 40, margin = 0.05, 15 | bound = 10, verb = 0) 16 | } 17 | 18 | \arguments{ 19 | \item{c}{Vector or one column matrix appearing in the quadratic function} 20 | \item{H}{square matrix appearing in the quadratic function, or the 21 | decomposed form \eqn{Z} of the \eqn{H} matrix where \eqn{Z} is a 22 | \eqn{n x m} matrix with \eqn{n > m} and \eqn{ZZ' = H}.} 23 | \item{A}{Matrix defining the constrains under which we minimize the 24 | quadratic function} 25 | \item{b}{Vector or one column matrix defining the constrains} 26 | \item{l}{Lower bound vector or one column matrix} 27 | \item{u}{Upper bound vector or one column matrix} 28 | \item{r}{Vector or one column matrix defining constrains} 29 | \item{sigf}{Precision (default: 7 significant figures)} 30 | \item{maxiter}{Maximum number of iterations} 31 | \item{margin}{how close we get to the constrains} 32 | \item{bound}{Clipping bound for the variables} 33 | \item{verb}{Display convergence information during runtime} 34 | } 35 | \details{ 36 | ipop uses an interior point method to solve the quadratic programming 37 | problem. \cr 38 | The \eqn{H} matrix can also be provided in the decomposed form \eqn{Z} 39 | where \eqn{ZZ' = H} in that case the Sherman Morrison Woodbury formula 40 | is used internally. 41 | } 42 | \value{ 43 | An S4 object with the following slots 44 | \item{primal}{Vector containing the primal solution of the quadratic problem} 45 | \item{dual}{The dual solution of the problem} 46 | \item{how}{Character string describing the type of convergence} 47 | 48 | all slots can be accessed through accessor functions (see example) 49 | } 50 | \references{ 51 | R. J. Vanderbei\cr 52 | \emph{LOQO: An interior point code for quadratic programming}\cr 53 | Optimization Methods and Software 11, 451-484, 1999 \cr 54 | \url{https://vanderbei.princeton.edu/ps/loqo5.pdf} 55 | } 56 | \author{Alexandros Karatzoglou (based on Matlab code by Alex Smola) \cr 57 | \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 58 | 59 | 60 | \seealso{\code{solve.QP}, \code{\link{inchol}}, \code{\link{csi}}} 61 | \examples{ 62 | ## solve the Support Vector Machine optimization problem 63 | data(spam) 64 | 65 | ## sample a scaled part (500 points) of the spam data set 66 | m <- 500 67 | set <- sample(1:dim(spam)[1],m) 68 | x <- scale(as.matrix(spam[,-58]))[set,] 69 | y <- as.integer(spam[set,58]) 70 | y[y==2] <- -1 71 | 72 | ##set C parameter and kernel 73 | C <- 5 74 | rbf <- rbfdot(sigma = 0.1) 75 | 76 | ## create H matrix etc. 77 | H <- kernelPol(rbf,x,,y) 78 | c <- matrix(rep(-1,m)) 79 | A <- t(y) 80 | b <- 0 81 | l <- matrix(rep(0,m)) 82 | u <- matrix(rep(C,m)) 83 | r <- 0 84 | 85 | sv <- ipop(c,H,A,b,l,u,r) 86 | sv 87 | dual(sv) 88 | 89 | } 90 | \keyword{optimize} 91 | 92 | -------------------------------------------------------------------------------- /src/expdecayweight.cpp: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the Suffix Array based String Kernel. 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Choon Hui Teo 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/ExpDecayWeight.cpp 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | // 12 Jul 2006 37 | 38 | #ifndef EXPDECAYWEIGHT_CPP 39 | #define EXPDECAYWEIGHT_CPP 40 | 41 | #include 42 | #include 43 | 44 | #include "expdecayweight.h" 45 | 46 | using namespace std; 47 | 48 | 49 | /** 50 | * Exponential Decay weight function. 51 | * W(y,t) := (lambda^{-gamma} - lambda^{-tau}) / (lambda - 1) 52 | * 53 | * \param floor_len - (IN) Length of floor interval of matched substring. 54 | * (cf. gamma in VisSmo02). 55 | * \param x_len - (IN) Length of the matched substring. 56 | * (cf. tau in visSmo02). 57 | * \param weight - (OUT) The weight value. 58 | * 59 | */ 60 | 61 | ErrorCode 62 | ExpDecayWeight::ComputeWeight(const UInt32 &floor_len, const UInt32 &x_len, Real &weight) 63 | 64 | // ErrorCode 65 | // ExpDecayWeight::ComputeWeight(const Real &floor_len, const Real &x_len, Real &weight) 66 | { 67 | //' Input validation 68 | assert(x_len >= floor_len); 69 | 70 | //' x_len == floor_len when the substring found ends on an interval. 71 | if(floor_len == x_len) { 72 | //' substring ended on an interval, so, get the val from val[] 73 | weight = 0.0; 74 | } 75 | else { 76 | //weight = (pow(-(floor_len-1), lambda) - pow(-x_len, lambda)) / (1-lambda); 77 | //weight = (pow(lambda,((Real)floor_len)) - pow(lambda, (Real)x_len+1)) / (1-lambda); 78 | // double a=floor_len*-1.0; 79 | // double b=x_len*-1.0; 80 | // weight = (pow(lambda,a) - pow(lambda, b)) / (lambda-1); 81 | weight = (pow(lambda,Real(-1.0*floor_len)) - pow(lambda, Real(-1.0*x_len))) / (lambda-1); 82 | } 83 | 84 | // std::cout << "floor_len : " << floor_len 85 | // << " x_len : " << x_len 86 | // << " pow1 : " << pow(lambda,-((Real)floor_len)) 87 | // << " pow2 : " << pow(lambda,-(Real)x_len) 88 | // << " weight : " << weight << std::endl; 89 | 90 | return NOERROR; 91 | } 92 | 93 | #endif 94 | -------------------------------------------------------------------------------- /man/csi-class.Rd: -------------------------------------------------------------------------------- 1 | \name{csi-class} 2 | \docType{class} 3 | \alias{csi-class} 4 | \alias{Q} 5 | \alias{R} 6 | \alias{predgain} 7 | \alias{truegain} 8 | \alias{diagresidues,csi-method} 9 | \alias{maxresiduals,csi-method} 10 | \alias{pivots,csi-method} 11 | \alias{predgain,csi-method} 12 | \alias{truegain,csi-method} 13 | \alias{Q,csi-method} 14 | \alias{R,csi-method} 15 | 16 | \title{Class "csi"} 17 | 18 | \description{The reduced Cholesky decomposition object} 19 | 20 | \section{Objects from the Class}{Objects can be created by calls of the form \code{new("csi", ...)}. 21 | or by calling the \code{csi} function.} 22 | 23 | \section{Slots}{ 24 | \describe{ 25 | 26 | \item{\code{.Data}:}{Object of class \code{"matrix"} contains 27 | the decomposed matrix} 28 | 29 | \item{\code{pivots}:}{Object of class \code{"vector"} contains 30 | the pivots performed} 31 | 32 | \item{\code{diagresidues}:}{Object of class \code{"vector"} contains 33 | the diagonial residues} 34 | 35 | \item{\code{maxresiduals}:}{Object of class \code{"vector"} contains 36 | the maximum residues} 37 | 38 | \item{predgain}{Object of class \code{"vector"} contains 39 | the predicted gain before adding each column} 40 | 41 | \item{truegain}{Object of class \code{"vector"} contains 42 | the actual gain after adding each column} 43 | 44 | \item{Q}{Object of class \code{"matrix"} contains 45 | Q from the QR decomposition of the kernel matrix} 46 | 47 | \item{R}{Object of class \code{"matrix"} contains 48 | R from the QR decomposition of the kernel matrix} 49 | 50 | } 51 | } 52 | 53 | \section{Extends}{ 54 | Class \code{"matrix"}, directly. 55 | } 56 | \section{Methods}{ 57 | \describe{ 58 | 59 | \item{diagresidues}{\code{signature(object = "csi")}: returns 60 | the diagonial residues} 61 | 62 | \item{maxresiduals}{\code{signature(object = "csi")}: returns 63 | the maximum residues} 64 | 65 | \item{pivots}{\code{signature(object = "csi")}: returns 66 | the pivots performed} 67 | 68 | \item{predgain}{\code{signature(object = "csi")}: returns 69 | the predicted gain before adding each column} 70 | 71 | \item{truegain}{\code{signature(object = "csi")}: returns 72 | the actual gain after adding each column} 73 | 74 | \item{Q}{\code{signature(object = "csi")}: returns 75 | Q from the QR decomposition of the kernel matrix} 76 | 77 | \item{R}{\code{signature(object = "csi")}: returns 78 | R from the QR decomposition of the kernel matrix} 79 | } 80 | } 81 | 82 | \author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 83 | 84 | \seealso{ \code{\link{csi}}, \code{\link{inchol-class}}} 85 | 86 | \examples{ 87 | data(iris) 88 | 89 | ## create multidimensional y matrix 90 | yind <- t(matrix(1:3,3,150)) 91 | ymat <- matrix(0, 150, 3) 92 | ymat[yind==as.integer(iris[,5])] <- 1 93 | 94 | datamatrix <- as.matrix(iris[,-5]) 95 | # initialize kernel function 96 | rbf <- rbfdot(sigma=0.1) 97 | rbf 98 | Z <- csi(datamatrix,ymat, kernel=rbf, rank = 30) 99 | dim(Z) 100 | pivots(Z) 101 | # calculate kernel matrix 102 | K <- crossprod(t(Z)) 103 | # difference between approximated and real kernel matrix 104 | (K - kernelMatrix(kernel=rbf, datamatrix))[6,] 105 | 106 | } 107 | \keyword{classes} 108 | -------------------------------------------------------------------------------- /man/inlearn.Rd: -------------------------------------------------------------------------------- 1 | \name{inlearn} 2 | \alias{inlearn} 3 | \alias{inlearn,numeric-method} 4 | \title{Onlearn object initialization} 5 | \description{ 6 | Online Kernel Algorithm object \code{onlearn} initialization function. 7 | } 8 | \usage{ 9 | 10 | \S4method{inlearn}{numeric}(d, kernel = "rbfdot", kpar = list(sigma = 0.1), 11 | type = "novelty", buffersize = 1000) 12 | } 13 | \arguments{ 14 | \item{d}{the dimensionality of the data to be learned} 15 | 16 | \item{kernel}{the kernel function used in training and predicting. 17 | This parameter can be set to any function, of class kernel, which computes a dot product between two 18 | vector arguments. kernlab provides the most popular kernel functions 19 | which can be used by setting the kernel parameter to the following 20 | strings: 21 | \itemize{ 22 | \item \code{rbfdot} Radial Basis kernel function "Gaussian" 23 | \item \code{polydot} Polynomial kernel function 24 | \item \code{vanilladot} Linear kernel function 25 | \item \code{tanhdot} Hyperbolic tangent kernel function 26 | \item \code{laplacedot} Laplacian kernel function 27 | \item \code{besseldot} Bessel kernel function 28 | \item \code{anovadot} ANOVA RBF kernel function 29 | } 30 | The kernel parameter can also be set to a user defined function of 31 | class kernel by passing the function name as an argument. 32 | } 33 | 34 | \item{kpar}{the list of hyper-parameters (kernel parameters). 35 | This is a list which contains the parameters to be used with the 36 | kernel function. For valid parameters for existing kernels are : 37 | \itemize{ 38 | \item \code{sigma} inverse kernel width for the Radial Basis 39 | kernel function "rbfdot" and the Laplacian kernel "laplacedot". 40 | \item \code{degree, scale, offset} for the Polynomial kernel "polydot" 41 | \item \code{scale, offset} for the Hyperbolic tangent kernel 42 | function "tanhdot" 43 | \item \code{sigma, order, degree} for the Bessel kernel "besseldot". 44 | \item \code{sigma, degree} for the ANOVA kernel "anovadot". 45 | } 46 | Hyper-parameters for user defined kernels can be passed through the 47 | \code{kpar} parameter as well.} 48 | 49 | \item{type}{the type of problem to be learned by the online algorithm 50 | : 51 | \code{classification}, \code{regression}, \code{novelty}} 52 | \item{buffersize}{the size of the buffer to be used} 53 | } 54 | \details{ 55 | The \code{inlearn} is used to initialize a blank \code{onlearn} object. 56 | } 57 | \value{ 58 | The function returns an \code{S4} object of class \code{onlearn} that 59 | can be used by the \code{onlearn} function. 60 | } 61 | \author{Alexandros Karatzoglou\cr 62 | \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 63 | 64 | \seealso{ \code{\link{onlearn}}, \code{\link{onlearn-class}} } 65 | \examples{ 66 | 67 | ## create toy data set 68 | x <- rbind(matrix(rnorm(100),,2),matrix(rnorm(100)+3,,2)) 69 | y <- matrix(c(rep(1,50),rep(-1,50)),,1) 70 | 71 | ## initialize onlearn object 72 | on <- inlearn(2, kernel = "rbfdot", kpar = list(sigma = 0.2), 73 | type = "classification") 74 | 75 | ## learn one data point at the time 76 | for(i in sample(1:100,100)) 77 | on <- onlearn(on,x[i,],y[i],nu=0.03,lambda=0.1) 78 | 79 | sign(predict(on,x)) 80 | 81 | } 82 | \keyword{classif} 83 | \keyword{neural} 84 | \keyword{regression} 85 | \keyword{ts} 86 | -------------------------------------------------------------------------------- /src/stringkernel.h: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the Suffix Array based String Kernel. 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Choon Hui Teo 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/StringKernel.h 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | // 12 Jul 2006 37 | // 10 Aug 2006 38 | 39 | 40 | #ifndef STRINGKERNEL_H 41 | #define STRINGKERNEL_H 42 | 43 | 44 | #include "datatype.h" 45 | #include "errorcode.h" 46 | #include "esa.h" 47 | #include "isafactory.h" 48 | #include "ilcpfactory.h" 49 | #include "iweightfactory.h" 50 | 51 | //#include "W_msufsort.h" 52 | #include "wkasailcp.h" 53 | 54 | #include "cweight.h" 55 | #include "expdecayweight.h" 56 | #include "brweight.h" 57 | #include "kspectrumweight.h" 58 | 59 | 60 | 61 | //' Types of substring weighting functions 62 | enum WeightFunction{CONSTANT, EXPDECAY, KSPECTRUM, BOUNDRANGE}; 63 | 64 | using namespace std; 65 | 66 | class StringKernel { 67 | 68 | 69 | public: 70 | /// Variables 71 | ESA *esa; 72 | I_WeightFactory *weigher; 73 | Real *val; //' val array. Storing precomputed val(t) values. 74 | Real *lvs; //' leaves array. Storing weights for leaves. 75 | 76 | 77 | /// Constructors 78 | StringKernel(); 79 | 80 | //' Given contructed suffix array 81 | StringKernel(ESA *esa_, int weightfn, Real param, int verb=INFO); 82 | 83 | //' Given text, build suffix array for it 84 | StringKernel(const UInt32 &size, SYMBOL *text, int weightfn, Real param, int verb=INFO); 85 | 86 | 87 | /// Destructor 88 | virtual ~StringKernel(); 89 | 90 | //' Methods 91 | 92 | /// Precompute the contribution of each intervals (or internal nodes) 93 | void PrecomputeVal(); 94 | 95 | /// Compute Kernel matrix 96 | void Compute_K(SYMBOL *xprime, const UInt32 &xprime_len, Real &value); 97 | 98 | /// Set leaves array, lvs[] 99 | void Set_Lvs(const Real *leafWeight, const UInt32 *len, const UInt32 &m); 100 | 101 | /// Set leaves array as lvs[i]=i for i=0 to esa->length 102 | void Set_Lvs(); 103 | 104 | private: 105 | 106 | int _verb; 107 | 108 | /// An iterative auxiliary function used in PrecomputeVal() 109 | void IterativeCompute(const UInt32 &left, const UInt32 &right); 110 | 111 | }; 112 | #endif 113 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | useDynLib("kernlab", .registration = TRUE) 2 | 3 | import("methods") 4 | importFrom("stats", "coef", "delete.response", "fitted", "kmeans", 5 | "median", "model.extract", "model.matrix", "na.action", 6 | "na.omit", "predict", "quantile", "rnorm", "runif", "sd", 7 | "terms", "var") 8 | importFrom("graphics", "axis", "filled.contour", "plot", "points", "title") 9 | importFrom("grDevices", "hcl") 10 | 11 | export( 12 | ## kernel functions 13 | "rbfdot", 14 | "laplacedot", 15 | "besseldot", 16 | "polydot", 17 | "tanhdot", 18 | "vanilladot", 19 | "anovadot", 20 | "splinedot", 21 | "stringdot", 22 | "kernelMatrix", 23 | "kernelMult", 24 | "kernelPol", 25 | "kernelFast", 26 | "as.kernelMatrix", 27 | 28 | ## High level functions 29 | "kmmd", 30 | "kpca", 31 | "kcca", 32 | "kha", 33 | "specc", 34 | "kkmeans", 35 | "ksvm", 36 | "rvm", 37 | "gausspr", 38 | "ranking", 39 | "csi", 40 | "lssvm", 41 | "kqr", 42 | 43 | ## Utility functions 44 | "ipop", 45 | "inchol", 46 | "couple", 47 | "sigest", 48 | 49 | ## Accessor functions 50 | 51 | ## VM 52 | "type", 53 | "prior", 54 | "alpha", 55 | "alphaindex", 56 | "kernelf", 57 | "kpar", 58 | "param", 59 | "scaling", 60 | "xmatrix", 61 | "ymatrix", 62 | "lev", 63 | "kcall", 64 | "error", 65 | "cross", 66 | "SVindex", 67 | "nSV", 68 | "RVindex", 69 | "prob.model", 70 | "b", 71 | "obj", 72 | 73 | ## kpca 74 | "rotated", 75 | "eig", 76 | "pcv", 77 | 78 | ## ipop 79 | "primal", 80 | "dual", 81 | "how", 82 | 83 | ## kcca 84 | "kcor", 85 | "xcoef", 86 | "ycoef", 87 | ## "xvar", 88 | ## "yvar", 89 | 90 | ## specc 91 | "size", 92 | "centers", 93 | "withinss", 94 | 95 | ## rvm 96 | "mlike", 97 | "nvar", 98 | 99 | ## ranking 100 | "convergence", 101 | "edgegraph", 102 | 103 | ## onlearn 104 | "onlearn", 105 | "inlearn", 106 | "buffer", 107 | "rho", 108 | 109 | ## kfa 110 | "kfa", 111 | 112 | ## inc.chol 113 | "pivots", 114 | "diagresidues", 115 | "maxresiduals", 116 | 117 | ## csi 118 | "R", 119 | "Q", 120 | "truegain", 121 | "predgain", 122 | 123 | ## kmmd 124 | "H0", 125 | "AsympH0", 126 | "Radbound", 127 | "Asymbound", 128 | "mmdstats" 129 | ) 130 | 131 | exportMethods("coef", "fitted", "plot", "predict", "show") 132 | 133 | exportClasses("ksvm", "kmmd", "rvm", "ipop", "gausspr", "lssvm", "kpca", "kha", 134 | "kcca", "kernel", "rbfkernel", "laplacekernel", 135 | "besselkernel", "tanhkernel", "polykernel","fourierkernel", 136 | "vanillakernel", "anovakernel", "splinekernel", 137 | "stringkernel", "specc", "ranking", "inchol", "onlearn", 138 | "kfa", "csi","kqr", 139 | "kernelMatrix","kfunction") 140 | 141 | -------------------------------------------------------------------------------- /src/inductionsort.h: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the MSufSort suffix sorting algorithm (Version 2.2). 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Michael A. Maniscalco 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Michael A. Maniscalco 23 | * 24 | * ***** END LICENSE BLOCK ***** */ 25 | 26 | #ifndef MSUFSORT_INDUCTION_SORTING_H 27 | #define MSUFSORT_INDUCTION_SORTING_H 28 | 29 | #include "introsort.h" 30 | 31 | 32 | class InductionSortObject 33 | { 34 | public: 35 | InductionSortObject(unsigned int inductionPosition = 0, unsigned int inductionValue = 0, unsigned int suffixIndex = 0); 36 | 37 | bool operator <= (InductionSortObject & object); 38 | 39 | bool operator == (InductionSortObject & object); 40 | 41 | InductionSortObject& operator = (InductionSortObject & object); 42 | 43 | bool operator >= (InductionSortObject & object); 44 | 45 | bool operator > (InductionSortObject & object); 46 | 47 | bool operator < (InductionSortObject & object); 48 | 49 | unsigned int m_sortValue[2]; 50 | }; 51 | 52 | 53 | inline bool InductionSortObject::operator <= (InductionSortObject & object) 54 | { 55 | if (m_sortValue[0] < object.m_sortValue[0]) 56 | return true; 57 | else 58 | if (m_sortValue[0] == object.m_sortValue[0]) 59 | return (m_sortValue[1] <= object.m_sortValue[1]); 60 | return false; 61 | } 62 | 63 | 64 | 65 | inline bool InductionSortObject::operator == (InductionSortObject & object) 66 | { 67 | return ((m_sortValue[0] == object.m_sortValue[0]) && (m_sortValue[1] == object.m_sortValue[1])); 68 | } 69 | 70 | 71 | 72 | inline bool InductionSortObject::operator >= (InductionSortObject & object) 73 | { 74 | if (m_sortValue[0] > object.m_sortValue[0]) 75 | return true; 76 | else 77 | if (m_sortValue[0] == object.m_sortValue[0]) 78 | return (m_sortValue[1] >= object.m_sortValue[1]); 79 | return false; 80 | } 81 | 82 | 83 | 84 | inline InductionSortObject & InductionSortObject::operator = (InductionSortObject & object) 85 | { 86 | m_sortValue[0] = object.m_sortValue[0]; 87 | m_sortValue[1] = object.m_sortValue[1]; 88 | return *this; 89 | } 90 | 91 | 92 | 93 | 94 | inline bool InductionSortObject::operator > (InductionSortObject & object) 95 | { 96 | if (m_sortValue[0] > object.m_sortValue[0]) 97 | return true; 98 | else 99 | if (m_sortValue[0] == object.m_sortValue[0]) 100 | return (m_sortValue[1] > object.m_sortValue[1]); 101 | return false; 102 | } 103 | 104 | 105 | 106 | inline bool InductionSortObject::operator < (InductionSortObject & object) 107 | { 108 | if (m_sortValue[0] < object.m_sortValue[0]) 109 | return true; 110 | else 111 | if (m_sortValue[0] == object.m_sortValue[0]) 112 | return (m_sortValue[1] < object.m_sortValue[1]); 113 | return false; 114 | } 115 | 116 | 117 | 118 | 119 | #endif 120 | -------------------------------------------------------------------------------- /man/sigest.Rd: -------------------------------------------------------------------------------- 1 | \name{sigest} 2 | \alias{sigest} 3 | \alias{sigest,formula-method} 4 | \alias{sigest,matrix-method} 5 | 6 | \title{Hyperparameter estimation for the Gaussian Radial Basis kernel} 7 | \description{ 8 | Given a range of values for the "sigma" inverse width parameter in the Gaussian Radial Basis kernel 9 | for use with Support Vector Machines. The estimation is based on the 10 | data to be used. 11 | } 12 | \usage{ 13 | \S4method{sigest}{formula}(x, data=NULL, frac = 0.5, na.action = na.omit, scaled = TRUE) 14 | \S4method{sigest}{matrix}(x, frac = 0.5, scaled = TRUE, na.action = na.omit) 15 | } 16 | 17 | \arguments{ 18 | \item{x}{a symbolic description of the model upon the estimation is 19 | based. When not using a formula x is a matrix or vector 20 | containing the data} 21 | \item{data}{an optional data frame containing the variables in the model. 22 | By default the variables are taken from the environment which 23 | `ksvm' is called from.} 24 | 25 | \item{frac}{Fraction of data to use for estimation. By default a quarter 26 | of the data is used to estimate the range of the sigma hyperparameter.} 27 | 28 | \item{scaled}{A logical vector indicating the variables to be 29 | scaled. If \code{scaled} is of length 1, the value is recycled as 30 | many times as needed and all non-binary variables are scaled. 31 | Per default, data are scaled internally to zero mean and unit 32 | variance 33 | (since this the default action in \code{ksvm} as well). The center and scale 34 | values are returned and used for later predictions. } 35 | \item{na.action}{A function to specify the action to be taken if \code{NA}s are 36 | found. The default action is \code{na.omit}, which leads to rejection of cases 37 | with missing values on any required variable. An alternative 38 | is \code{na.fail}, which causes an error if \code{NA} cases 39 | are found. (NOTE: If given, this argument must be named.)} 40 | 41 | } 42 | 43 | 44 | 45 | \details{ 46 | \code{sigest} estimates the range of values for the sigma parameter 47 | which would return good results when used with a Support Vector 48 | Machine (\code{ksvm}). The estimation is based upon the 0.1 and 0.9 quantile 49 | of \eqn{\|x -x'\|^2}. Basically any value in between those two bounds will 50 | produce good results. 51 | } 52 | \value{ 53 | Returns a vector of length 3 defining the range (0.1 quantile, median 54 | and 0.9 quantile) of 55 | the sigma hyperparameter. 56 | } 57 | \references{ B. Caputo, K. Sim, F. Furesjo, A. Smola, \cr 58 | \emph{Appearance-based object recognition using SVMs: which kernel should I use?}\cr 59 | Proc of NIPS workshop on Statitsical methods for computational experiments in visual processing and computer vision, Whistler, 2002. 60 | } 61 | \author{Alexandros Karatzoglou \cr 62 | \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 63 | 64 | 65 | 66 | \seealso{\code{\link{ksvm}}} 67 | \examples{ 68 | 69 | ## estimate good sigma values for promotergene 70 | data(promotergene) 71 | srange <- sigest(Class~.,data = promotergene) 72 | srange 73 | 74 | s <- srange[2] 75 | s 76 | ## create test and training set 77 | ind <- sample(1:dim(promotergene)[1],20) 78 | genetrain <- promotergene[-ind, ] 79 | genetest <- promotergene[ind, ] 80 | 81 | ## train a support vector machine 82 | gene <- ksvm(Class~.,data=genetrain,kernel="rbfdot", 83 | kpar=list(sigma = s),C=50,cross=3) 84 | gene 85 | 86 | ## predict gene type on the test set 87 | promoter <- predict(gene,genetest[,-1]) 88 | 89 | ## Check results 90 | table(promoter,genetest[,1]) 91 | } 92 | \keyword{classif} 93 | \keyword{regression} 94 | -------------------------------------------------------------------------------- /man/stringdot.Rd: -------------------------------------------------------------------------------- 1 | \name{stringdot} 2 | \alias{stringdot} 3 | \title{String Kernel Functions} 4 | \description{ 5 | String kernels. 6 | } 7 | \usage{ 8 | stringdot(length = 4, lambda = 1.1, type = "spectrum", normalized = TRUE) 9 | } 10 | 11 | \arguments{ 12 | 13 | \item{length}{The length of the substrings considered} 14 | 15 | \item{lambda}{The decay factor} 16 | 17 | \item{type}{Type of string kernel, currently the following kernels are 18 | supported : \cr 19 | 20 | \code{spectrum} the kernel considers only matching substring of 21 | exactly length \eqn{n} (also know as string kernel). Each such matching 22 | substring is given a constant weight. The length parameter in this 23 | kernel has to be \eqn{length > 1}.\cr 24 | 25 | \code{boundrange} 26 | this kernel (also known as boundrange) considers only matching substrings of length less than or equal to a 27 | given number N. This type of string kernel requires a length 28 | parameter \eqn{length > 1}\cr 29 | 30 | \code{constant} 31 | The kernel considers all matching substrings and assigns constant weight (e.g. 1) to each 32 | of them. This \code{constant} kernel does not require any additional 33 | parameter.\cr 34 | 35 | 36 | \code{exponential} 37 | Exponential Decay kernel where the substring weight decays as the 38 | matching substring gets longer. The kernel requires a decay factor \eqn{ 39 | \lambda > 1}\cr 40 | 41 | \code{string} essentially identical to the spectrum kernel, only 42 | computed using a more conventional way.\cr 43 | 44 | \code{fullstring} essentially identical to the boundrange kernel 45 | only computed in a more conventional way. \cr 46 | } 47 | \item{normalized}{normalize string kernel values, (default: \code{TRUE})} 48 | } 49 | \details{ 50 | The kernel generating functions are used to initialize a kernel function 51 | which calculates the dot (inner) product between two feature vectors in a 52 | Hilbert Space. These functions or their function generating names 53 | can be passed as a \code{kernel} argument on almost all 54 | functions in \pkg{kernlab}(e.g., \code{ksvm}, \code{kpca} etc.). 55 | 56 | The string kernels calculate similarities between two strings 57 | (e.g. texts or sequences) by matching the common substring 58 | in the strings. Different types of string kernel exists and are 59 | mainly distinguished by how the matching is performed i.e. some string 60 | kernels count the exact matchings of \eqn{n} characters (spectrum 61 | kernel) between the strings, others allow gaps (mismatch kernel) etc. 62 | 63 | 64 | } 65 | \value{ 66 | Returns an S4 object of class \code{stringkernel} which extents the 67 | \code{function} class. The resulting function implements the given 68 | kernel calculating the inner (dot) product between two character vectors. 69 | \item{kpar}{a list containing the kernel parameters (hyperparameters) 70 | used.} 71 | The kernel parameters can be accessed by the \code{kpar} function. 72 | } 73 | 74 | \author{Alexandros Karatzoglou\cr 75 | \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 76 | 77 | \note{ The \code{spectrum} and \code{boundrange} kernel are faster and 78 | more efficient implementations of the \code{string} and 79 | \code{fullstring} kernels 80 | which will be still included in \code{kernlab} for the next two versions. 81 | 82 | } 83 | 84 | 85 | 86 | 87 | \seealso{ \code{\link{dots} }, \code{\link{kernelMatrix} }, \code{\link{kernelMult}}, \code{\link{kernelPol}}} 88 | \examples{ 89 | 90 | sk <- stringdot(type="string", length=5) 91 | 92 | sk 93 | 94 | 95 | 96 | } 97 | \keyword{symbolmath} 98 | 99 | -------------------------------------------------------------------------------- /src/kspectrumweight.cpp: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the Suffix Array based String Kernel. 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Choon Hui Teo 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/KSpectrumWeight.cpp 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | // 12 Jul 2006 37 | 38 | #ifndef KSPECTRUMWEIGHT_CPP 39 | #define KSPECTRUMWEIGHT_CPP 40 | 41 | #include "kspectrumweight.h" 42 | #include 43 | 44 | 45 | 46 | /** 47 | * K-spectrum weight function. Compute number of common (exactly) k character substring. 48 | * 49 | * \param floor_len - (IN) Length of floor interval of matched substring. (cf. gamma in VisSmo02). 50 | * \param x_len - (IN) Length of the matched substring. (cf. tau in VisSmo02). 51 | * \param weight - (OUT) The weight value. 52 | * 53 | */ 54 | ErrorCode 55 | KSpectrumWeight::ComputeWeight(const UInt32 &floor_len, const UInt32 &x_len, Real &weight) 56 | { 57 | //' Input validation 58 | assert(x_len >= floor_len); 59 | 60 | //' x_len == floor_len when the substring found ends on an interval. 61 | 62 | 63 | weight = 0.0; 64 | 65 | if(floor_len < k && x_len >= k) 66 | weight = 1.0; 67 | 68 | // std::cout << "floor_len : " << floor_len 69 | // << " x_len : " << x_len 70 | // << " weight : " << weight << std::endl; 71 | 72 | return NOERROR; 73 | } 74 | 75 | #endif 76 | 77 | 78 | //' Question: Why return only 0 or 1? 79 | //' Answer : In k-spectrum method, any length of matched substring other than k 80 | //' does not play a significant role in the string kernel. So, returning 1 81 | //' means that the substring weight equals to # of suffix in the current interval. 82 | //' When 0 is returned, it means that substring weight equals to the floor 83 | //' interval entry in val[]. (See the definition of substring weight in 84 | //' StringKernel.cpp) 85 | 86 | //' Question: Why is the following a correct implementation of k-spectrum ? 87 | //' Answer : [Val precomputation phase] Every Interval with lcp < k has val := 0. 88 | //' For intervals with (lcp==k) or (lcp>k but floor_lcp= k but floor interval 93 | //' has val := 0 (floor_lcp < k). Hence, returning weight:=1 will make substring 94 | //' weight equals to the size of the immediate ceil interval (# of substring in common). 95 | -------------------------------------------------------------------------------- /man/onlearn-class.Rd: -------------------------------------------------------------------------------- 1 | \name{onlearn-class} 2 | \docType{class} 3 | \alias{onlearn-class} 4 | \alias{alpha,onlearn-method} 5 | \alias{b,onlearn-method} 6 | \alias{buffer,onlearn-method} 7 | \alias{fit,onlearn-method} 8 | \alias{kernelf,onlearn-method} 9 | \alias{kpar,onlearn-method} 10 | \alias{predict,onlearn-method} 11 | \alias{rho,onlearn-method} 12 | \alias{rho} 13 | \alias{show,onlearn-method} 14 | \alias{type,onlearn-method} 15 | \alias{xmatrix,onlearn-method} 16 | \alias{buffer} 17 | 18 | \title{Class "onlearn"} 19 | \description{ The class of objects used by the Kernel-based Online 20 | learning algorithms} 21 | \section{Objects from the Class}{ 22 | Objects can be created by calls of the form \code{new("onlearn", ...)}. 23 | or by calls to the function \code{inlearn}. 24 | } 25 | \section{Slots}{ 26 | \describe{ 27 | \item{\code{kernelf}:}{Object of class \code{"function"} containing 28 | the used kernel function} 29 | \item{\code{buffer}:}{Object of class \code{"numeric"} containing 30 | the size of the buffer} 31 | \item{\code{kpar}:}{Object of class \code{"list"} containing the 32 | hyperparameters of the kernel function.} 33 | \item{\code{xmatrix}:}{Object of class \code{"matrix"} containing 34 | the data points (similar to support vectors) } 35 | \item{\code{fit}:}{Object of class \code{"numeric"} containing the 36 | decision function value of the last data point} 37 | \item{\code{onstart}:}{Object of class \code{"numeric"} used for indexing } 38 | \item{\code{onstop}:}{Object of class \code{"numeric"} used for indexing} 39 | \item{\code{alpha}:}{Object of class \code{"ANY"} containing the 40 | model parameters} 41 | \item{\code{rho}:}{Object of class \code{"numeric"} containing model 42 | parameter} 43 | \item{\code{b}:}{Object of class \code{"numeric"} containing the offset} 44 | \item{\code{pattern}:}{Object of class \code{"factor"} used for 45 | dealing with factors} 46 | \item{\code{type}:}{Object of class \code{"character"} containing 47 | the problem type (classification, regression, or novelty } 48 | } 49 | } 50 | \section{Methods}{ 51 | \describe{ 52 | \item{alpha}{\code{signature(object = "onlearn")}: returns the model 53 | parameters} 54 | \item{b}{\code{signature(object = "onlearn")}: returns the offset } 55 | \item{buffer}{\code{signature(object = "onlearn")}: returns the 56 | buffer size} 57 | \item{fit}{\code{signature(object = "onlearn")}: returns the last 58 | decision function value} 59 | \item{kernelf}{\code{signature(object = "onlearn")}: return the 60 | kernel function used} 61 | \item{kpar}{\code{signature(object = "onlearn")}: returns the 62 | hyper-parameters used} 63 | \item{onlearn}{\code{signature(obj = "onlearn")}: the learning function} 64 | \item{predict}{\code{signature(object = "onlearn")}: the predict function} 65 | \item{rho}{\code{signature(object = "onlearn")}: returns model parameter} 66 | \item{show}{\code{signature(object = "onlearn")}: show function} 67 | \item{type}{\code{signature(object = "onlearn")}: returns the type 68 | of problem} 69 | \item{xmatrix}{\code{signature(object = "onlearn")}: returns the 70 | stored data points} 71 | } 72 | } 73 | 74 | \author{Alexandros Karatzoglou\cr 75 | \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 76 | 77 | 78 | \seealso{ 79 | \code{\link{onlearn}}, \code{\link{inlearn}} 80 | } 81 | \examples{ 82 | 83 | ## create toy data set 84 | x <- rbind(matrix(rnorm(100),,2),matrix(rnorm(100)+3,,2)) 85 | y <- matrix(c(rep(1,50),rep(-1,50)),,1) 86 | 87 | ## initialize onlearn object 88 | on <- inlearn(2,kernel="rbfdot",kpar=list(sigma=0.2), 89 | type="classification") 90 | 91 | ## learn one data point at the time 92 | for(i in sample(1:100,100)) 93 | on <- onlearn(on,x[i,],y[i],nu=0.03,lambda=0.1) 94 | 95 | sign(predict(on,x)) 96 | 97 | } 98 | \keyword{classes} 99 | -------------------------------------------------------------------------------- /man/kcca.Rd: -------------------------------------------------------------------------------- 1 | \name{kcca} 2 | \alias{kcca} 3 | \alias{kcca,matrix-method} 4 | \title{Kernel Canonical Correlation Analysis} 5 | \description{ 6 | Computes the canonical correlation analysis in feature space. 7 | } 8 | \usage{ 9 | \S4method{kcca}{matrix}(x, y, kernel="rbfdot", kpar=list(sigma=0.1), 10 | gamma = 0.1, ncomps = 10, ...) 11 | } 12 | %- maybe also 'usage' for other objects documented here. 13 | \arguments{ 14 | \item{x}{a matrix containing data index by row} 15 | \item{y}{a matrix containing data index by row} 16 | \item{kernel}{the kernel function used in training and predicting. 17 | This parameter can be set to any function, of class kernel, 18 | which computes a inner product in feature space between two 19 | vector arguments. kernlab provides the most popular kernel functions 20 | which can be used by setting the kernel parameter to the following 21 | strings: 22 | \itemize{ 23 | \item \code{rbfdot} Radial Basis kernel function "Gaussian" 24 | \item \code{polydot} Polynomial kernel function 25 | \item \code{vanilladot} Linear kernel function 26 | \item \code{tanhdot} Hyperbolic tangent kernel function 27 | \item \code{laplacedot} Laplacian kernel function 28 | \item \code{besseldot} Bessel kernel function 29 | \item \code{anovadot} ANOVA RBF kernel function 30 | \item \code{splinedot} Spline kernel 31 | } 32 | The kernel parameter can also be set to a user defined function of 33 | class kernel by passing the function name as an argument. 34 | } 35 | 36 | \item{kpar}{the list of hyper-parameters (kernel parameters). 37 | This is a list which contains the parameters to be used with the 38 | kernel function. Valid parameters for existing kernels are : 39 | \itemize{ 40 | \item \code{sigma} inverse kernel width for the Radial Basis 41 | kernel function "rbfdot" and the Laplacian kernel "laplacedot". 42 | \item \code{degree, scale, offset} for the Polynomial kernel "polydot" 43 | \item \code{scale, offset} for the Hyperbolic tangent kernel 44 | function "tanhdot" 45 | \item \code{sigma, order, degree} for the Bessel kernel "besseldot". 46 | \item \code{sigma, degree} for the ANOVA kernel "anovadot". 47 | } 48 | 49 | Hyper-parameters for user defined kernels can be passed through the 50 | kpar parameter as well.} 51 | 52 | \item{gamma}{regularization parameter (default : 0.1)} 53 | 54 | \item{ncomps}{number of canonical components (default : 10) } 55 | 56 | \item{\dots}{additional parameters for the \code{kpca} function} 57 | } 58 | \details{ 59 | The kernel version of canonical correlation analysis. 60 | Kernel Canonical Correlation Analysis (KCCA) is a non-linear extension 61 | of CCA. Given two random variables, KCCA aims at extracting the 62 | information which is shared by the two random variables. More 63 | precisely given \eqn{x} and \eqn{y} the purpose of KCCA is to provide 64 | nonlinear mappings \eqn{f(x)} and \eqn{g(y)} such that their 65 | correlation is maximized. 66 | } 67 | \value{ 68 | An S4 object containing the following slots: 69 | \item{kcor}{Correlation coefficients in feature space} 70 | \item{xcoef}{estimated coefficients for the \code{x} variables in the 71 | feature space} 72 | \item{ycoef}{estimated coefficients for the \code{y} variables in the 73 | feature space} 74 | %% \item{xvar}{The canonical variates for \code{x}} 75 | %% \item{yvar}{The canonical variates for \code{y}} 76 | 77 | } 78 | \references{ Malte Kuss, Thore Graepel \cr 79 | \emph{The Geometry Of Kernel Canonical Correlation Analysis}\cr 80 | \url{https://www.microsoft.com/en-us/research/publication/the-geometry-of-kernel-canonical-correlation-analysis/}} 81 | \author{ 82 | Alexandros Karatzoglou \cr 83 | \email{alexandros.karatzoglou@ci.tuwien.ac.at} 84 | } 85 | 86 | \seealso{\code{\link{cancor}}, \code{\link{kpca}}, \code{\link{kfa}}, \code{\link{kha}}} 87 | \examples{ 88 | 89 | ## dummy data 90 | x <- matrix(rnorm(30),15) 91 | y <- matrix(rnorm(30),15) 92 | 93 | kcca(x,y,ncomps=2) 94 | 95 | } 96 | \keyword{multivariate} 97 | 98 | -------------------------------------------------------------------------------- /src/stack.h: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the MSufSort suffix sorting algorithm (Version 2.2). 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Michael A. Maniscalco 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Michael A. Maniscalco 23 | * 24 | * ***** END LICENSE BLOCK ***** */ 25 | 26 | #ifndef MSUFSORT_STACK_H 27 | #define MSUFSORT_STACK_H 28 | 29 | //============================================================================================= 30 | // A quick and dirty stack class for use with the MSufSort algorithm 31 | // 32 | // Author: M.A. Maniscalco 33 | // Date: 7/30/04 34 | // email: michael@www.michael-maniscalco.com 35 | // 36 | //============================================================================================= 37 | 38 | #include "memory.h" 39 | 40 | 41 | template 42 | class Stack 43 | { 44 | public: 45 | Stack(unsigned int initialSize, unsigned int maxExpandSize, bool preAllocate = false): 46 | m_initialSize(initialSize), m_maxExpandSize(maxExpandSize), m_preAllocate(preAllocate) 47 | { 48 | Initialize(); 49 | } 50 | 51 | virtual ~Stack(){SetSize(0);} 52 | 53 | void Push(T value); 54 | 55 | T & Pop(); 56 | 57 | T & Top(); 58 | 59 | void SetSize(unsigned int stackSize); 60 | 61 | void Initialize(); 62 | 63 | unsigned int Count(); 64 | 65 | void Clear(); 66 | 67 | T * m_stack; 68 | 69 | T * m_stackPtr; 70 | 71 | T * m_endOfStack; 72 | 73 | unsigned int m_stackSize; 74 | 75 | unsigned int m_initialSize; 76 | 77 | unsigned int m_maxExpandSize; 78 | 79 | bool m_preAllocate; 80 | }; 81 | 82 | 83 | 84 | 85 | 86 | 87 | template 88 | inline void Stack::Clear() 89 | { 90 | m_stackPtr = m_stack; 91 | } 92 | 93 | 94 | 95 | 96 | template 97 | inline unsigned int Stack::Count() 98 | { 99 | return (unsigned int)(m_stackPtr - m_stack); 100 | } 101 | 102 | 103 | 104 | 105 | template 106 | inline void Stack::Initialize() 107 | { 108 | m_stack = m_endOfStack = m_stackPtr = 0; 109 | m_stackSize = 0; 110 | if (m_preAllocate) 111 | SetSize(m_initialSize); 112 | } 113 | 114 | 115 | 116 | 117 | template 118 | inline void Stack::Push(T value) 119 | { 120 | if (m_stackPtr >= m_endOfStack) 121 | { 122 | unsigned int newSize = (m_stackSize < m_maxExpandSize) ? m_stackSize + m_maxExpandSize : (m_stackSize << 1); 123 | SetSize(newSize); 124 | } 125 | *(m_stackPtr++) = value; 126 | } 127 | 128 | 129 | 130 | 131 | 132 | 133 | template 134 | inline T & Stack::Pop() 135 | { 136 | return *(--m_stackPtr); 137 | } 138 | 139 | 140 | 141 | template 142 | inline T & Stack::Top() 143 | { 144 | return *(m_stackPtr - 1); 145 | } 146 | 147 | 148 | 149 | 150 | 151 | template 152 | inline void Stack::SetSize(unsigned int stackSize) 153 | { 154 | if (m_stackSize == stackSize) 155 | return; 156 | 157 | T * newStack = 0; 158 | if (stackSize) 159 | { 160 | newStack = new T[stackSize]; 161 | unsigned int bytesToCopy = (unsigned int)(m_stackPtr - m_stack) * (unsigned int)sizeof(T); 162 | if (bytesToCopy) 163 | memcpy((void *)newStack, m_stack, bytesToCopy); 164 | 165 | m_stackPtr = &newStack[m_stackPtr - m_stack]; 166 | m_endOfStack = &newStack[stackSize]; 167 | m_stackSize = stackSize; 168 | } 169 | 170 | if (m_stack) 171 | delete [] m_stack; 172 | m_stack = newStack; 173 | } 174 | #endif 175 | -------------------------------------------------------------------------------- /src/ctable.cpp: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the Suffix Array based String Kernel. 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Choon Hui Teo 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/ChildTable.cpp 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | #ifndef CTABLE_CPP 37 | #define CTABLE_CPP 38 | 39 | #include "ctable.h" 40 | #include 41 | 42 | /** 43 | * Return the value of idx-th "up" field of child table. 44 | * val = childtab[idx -1]; 45 | * 46 | * \param idx - (IN) The index of child table. 47 | * \param val - (OUT) The value of idx-th entry in child table's "up" field. 48 | */ 49 | ErrorCode 50 | ChildTable::up(const UInt32 &idx, UInt32 &val){ 51 | 52 | if(idx == size()) { 53 | // Special case: To get the first 0-index 54 | val = (*this)[idx-1]; 55 | return NOERROR; 56 | } 57 | 58 | // svnvish: BUGBUG 59 | // Do we need to this in production code? 60 | UInt32 lcp_idx = 0, lcp_prev_idx = 0; 61 | lcp_idx = _lcptab[idx]; 62 | lcp_prev_idx = _lcptab[idx-1]; 63 | 64 | assert(lcp_prev_idx > lcp_idx); 65 | val = (*this)[idx-1]; 66 | 67 | return NOERROR; 68 | } 69 | 70 | /** 71 | * Return the value of idx-th "down" field of child table. Deprecated. 72 | * Instead use val = childtab[idx]; 73 | * 74 | * \param idx - (IN) The index of child table. 75 | * \param val - (OUT) The value of idx-th entry in child table's "down" field. 76 | */ 77 | ErrorCode 78 | ChildTable::down(const UInt32 &idx, UInt32 &val){ 79 | 80 | // For a l-interval, l-[i..j], childtab[i].down == childtab[j+1].up 81 | // If l-[i..j] is last child-interval of its parent OR 0-[0..n], 82 | // childtab[i].nextlIndex == childtab[i].down 83 | 84 | // svnvish: BUGBUG 85 | // Do we need to this in production code? 86 | // UInt32 lcp_idx = 0, lcp_nextidx = 0; 87 | // lcp_nextidx = _lcptab[(*this)[idx]]; 88 | // lcp_idx = _lcptab[idx]; 89 | // assert(lcp_nextidx > lcp_idx); 90 | 91 | // childtab[i].down := childtab[i].nextlIndex 92 | val = (*this)[idx]; 93 | 94 | return NOERROR; 95 | } 96 | 97 | 98 | /** 99 | * Return the first l-index of a given l-[i..j] interval. 100 | * 101 | * \param i - (IN) Left bound of l-[i..j] 102 | * \param j - (IN) Right bound of l-[i..j] 103 | * \param idx - (OUT) The first l-index. 104 | */ 105 | 106 | ErrorCode 107 | ChildTable::l_idx(const UInt32 &i, const UInt32 &j, UInt32 &idx){ 108 | 109 | UInt32 up = (*this)[j]; 110 | 111 | if(i < up && up <= j){ 112 | idx = up; 113 | }else { 114 | idx = (*this)[i]; 115 | } 116 | return NOERROR; 117 | } 118 | 119 | 120 | /** 121 | * Dump array elements to output stream 122 | * 123 | * \param os - (IN) Output stream. 124 | * \param ct - (IN) ChildTable object. 125 | */ 126 | std::ostream& 127 | operator << (std::ostream& os, const ChildTable& ct){ 128 | 129 | for( UInt32 i = 0; i < ct.size(); i++ ){ 130 | os << "ct[ " << i << "]: " << ct[i] << std::endl; 131 | } 132 | return os; 133 | } 134 | 135 | #endif 136 | -------------------------------------------------------------------------------- /man/vm-class.Rd: -------------------------------------------------------------------------------- 1 | \name{vm-class} 2 | \docType{class} 3 | 4 | \alias{vm-class} 5 | \alias{cross} 6 | \alias{alpha} 7 | \alias{error} 8 | \alias{type} 9 | \alias{kernelf} 10 | \alias{xmatrix} 11 | \alias{ymatrix} 12 | \alias{lev} 13 | \alias{kcall} 14 | 15 | \alias{alpha,vm-method} 16 | \alias{cross,vm-method} 17 | \alias{error,vm-method} 18 | \alias{fitted,vm-method} 19 | \alias{kernelf,vm-method} 20 | \alias{kpar,vm-method} 21 | \alias{lev,vm-method} 22 | \alias{kcall,vm-method} 23 | \alias{type,vm-method} 24 | \alias{xmatrix,vm-method} 25 | \alias{ymatrix,vm-method} 26 | 27 | \title{Class "vm" } 28 | \description{An S4 VIRTUAL class used as a base for the various vector 29 | machine classes in \pkg{kernlab}} 30 | 31 | \section{Objects from the Class}{ 32 | Objects from the class cannot be created directly but only contained 33 | in other classes. 34 | } 35 | 36 | \section{Slots}{ 37 | \describe{ 38 | 39 | \item{\code{alpha}:}{Object of class \code{"listI"} containing the 40 | resulting alpha vector (list in case of multiclass classification) (support vectors)} 41 | 42 | \item{\code{type}:}{Object of class \code{"character"} containing 43 | the vector machine type e.g., 44 | ("C-svc", "nu-svc", "C-bsvc", "spoc-svc", 45 | "one-svc", "eps-svr", "nu-svr", "eps-bsvr")} 46 | 47 | \item{\code{kernelf}:}{Object of class \code{"function"} containing 48 | the kernel function} 49 | 50 | \item{\code{kpar}:}{Object of class \code{"list"} containing the 51 | kernel function parameters (hyperparameters)} 52 | 53 | \item{\code{kcall}:}{Object of class \code{"call"} containing the function call} 54 | 55 | \item{\code{terms}:}{Object of class \code{"ANY"} containing the 56 | terms representation of the symbolic model used (when using a formula)} 57 | 58 | \item{\code{xmatrix}:}{Object of class \code{"input"} the data 59 | matrix used during computations (support vectors) (possibly scaled and without NA)} 60 | 61 | \item{\code{ymatrix}:}{Object of class \code{"output"} the response matrix/vector } 62 | 63 | \item{\code{fitted}:}{Object of class \code{"output"} with the fitted values, 64 | predictions using the training set.} 65 | 66 | \item{\code{lev}:}{Object of class \code{"vector"} with the levels of the 67 | response (in the case of classification)} 68 | 69 | \item{\code{nclass}:}{Object of class \code{"numeric"} containing 70 | the number of classes (in the case of classification)} 71 | 72 | \item{\code{error}:}{Object of class \code{"vector"} containing the 73 | training error} 74 | 75 | \item{\code{cross}:}{Object of class \code{"vector"} containing the 76 | cross-validation error } 77 | 78 | \item{\code{n.action}:}{Object of class \code{"ANY"} containing the 79 | action performed for NA } 80 | } 81 | } 82 | \section{Methods}{ 83 | \describe{ 84 | 85 | \item{alpha}{\code{signature(object = "vm")}: returns the complete 86 | alpha vector (wit zero values)} 87 | 88 | \item{cross}{\code{signature(object = "vm")}: returns the 89 | cross-validation error } 90 | 91 | \item{error}{\code{signature(object = "vm")}: returns the training 92 | error } 93 | 94 | \item{fitted}{\code{signature(object = "vm")}: returns the fitted 95 | values (predict on training set) } 96 | 97 | \item{kernelf}{\code{signature(object = "vm")}: returns the kernel 98 | function} 99 | 100 | \item{kpar}{\code{signature(object = "vm")}: returns the kernel 101 | parameters (hyperparameters)} 102 | 103 | \item{lev}{\code{signature(object = "vm")}: returns the levels in 104 | case of classification } 105 | 106 | \item{kcall}{\code{signature(object="vm")}: returns the function call} 107 | 108 | \item{type}{\code{signature(object = "vm")}: returns the problem type} 109 | 110 | \item{xmatrix}{\code{signature(object = "vm")}: returns the data 111 | matrix used(support vectors)} 112 | 113 | \item{ymatrix}{\code{signature(object = "vm")}: returns the 114 | response vector} 115 | } 116 | } 117 | 118 | \author{Alexandros Karatzoglou \cr \email{alexandros.karatzolgou@ci.tuwien.ac.at}} 119 | 120 | 121 | \seealso{ 122 | \code{\link{ksvm-class}}, 123 | \code{\link{rvm-class}}, 124 | \code{\link{gausspr-class}} 125 | } 126 | 127 | \keyword{classes} 128 | -------------------------------------------------------------------------------- /man/dots.Rd: -------------------------------------------------------------------------------- 1 | \name{dots} 2 | \alias{dots} 3 | \alias{kernels} 4 | \alias{rbfdot} 5 | \alias{polydot} 6 | \alias{tanhdot} 7 | \alias{vanilladot} 8 | \alias{laplacedot} 9 | \alias{besseldot} 10 | \alias{anovadot} 11 | \alias{fourierdot} 12 | \alias{splinedot} 13 | \alias{kpar} 14 | \alias{kfunction} 15 | \alias{show,kernel-method} 16 | \title{Kernel Functions} 17 | \description{ 18 | The kernel generating functions provided in kernlab. \cr 19 | The Gaussian RBF kernel \eqn{k(x,x') = \exp(-\sigma \|x - x'\|^2)} \cr 20 | The Polynomial kernel \eqn{k(x,x') = (scale + offset)^{degree}}\cr 21 | The Linear kernel \eqn{k(x,x') = }\cr 22 | The Hyperbolic tangent kernel \eqn{k(x, x') = \tanh(scale + offset)}\cr 23 | The Laplacian kernel \eqn{k(x,x') = \exp(-\sigma \|x - x'\|)} \cr 24 | The Bessel kernel \eqn{k(x,x') = (- Bessel_{(\nu+1)}^n \sigma \|x - x'\|^2)} \cr 25 | The ANOVA RBF kernel \eqn{k(x,x') = \sum_{1\leq i_1 \ldots < i_D \leq 26 | N} \prod_{d=1}^D k(x_{id}, {x'}_{id})} where k(x,x) is a Gaussian 27 | RBF kernel. \cr 28 | The Spline kernel \eqn{ \prod_{d=1}^D 1 + x_i x_j + x_i x_j min(x_i, 29 | x_j) - \frac{x_i + x_j}{2} min(x_i,x_j)^2 + 30 | \frac{min(x_i,x_j)^3}{3}} \\ 31 | The String kernels (see \code{stringdot}. 32 | } 33 | \usage{ 34 | rbfdot(sigma = 1) 35 | 36 | polydot(degree = 1, scale = 1, offset = 1) 37 | 38 | tanhdot(scale = 1, offset = 1) 39 | 40 | vanilladot() 41 | 42 | laplacedot(sigma = 1) 43 | 44 | besseldot(sigma = 1, order = 1, degree = 1) 45 | 46 | anovadot(sigma = 1, degree = 1) 47 | 48 | splinedot() 49 | } 50 | 51 | \arguments{ 52 | \item{sigma}{The inverse kernel width used by the Gaussian the 53 | Laplacian, the Bessel and the ANOVA kernel } 54 | \item{degree}{The degree of the polynomial, bessel or ANOVA 55 | kernel function. This has to be an positive integer.} 56 | \item{scale}{The scaling parameter of the polynomial and tangent 57 | kernel is a convenient way of normalizing 58 | patterns without the need to modify the data itself} 59 | \item{offset}{The offset used in a polynomial or hyperbolic tangent 60 | kernel} 61 | \item{order}{The order of the Bessel function to be used as a kernel} 62 | } 63 | \details{ 64 | The kernel generating functions are used to initialize a kernel 65 | function 66 | which calculates the dot (inner) product between two feature vectors in a 67 | Hilbert Space. These functions can be passed as a \code{kernel} argument on almost all 68 | functions in \pkg{kernlab}(e.g., \code{ksvm}, \code{kpca} etc). 69 | 70 | Although using one of the existing kernel functions as a 71 | \code{kernel} argument in various functions in \pkg{kernlab} has the 72 | advantage that optimized code is used to calculate various kernel expressions, 73 | any other function implementing a dot product of class \code{kernel} can also be used as a kernel 74 | argument. This allows the user to use, test and develop special kernels 75 | for a given data set or algorithm. 76 | For details on the string kernels see \code{stringdot}. 77 | } 78 | \value{ 79 | Return an S4 object of class \code{kernel} which extents the 80 | \code{function} class. The resulting function implements the given 81 | kernel calculating the inner (dot) product between two vectors. 82 | \item{kpar}{a list containing the kernel parameters (hyperparameters) 83 | used.} 84 | The kernel parameters can be accessed by the \code{kpar} function. 85 | } 86 | 87 | \author{Alexandros Karatzoglou\cr 88 | \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 89 | 90 | \note{If the offset in the Polynomial kernel is set to $0$, we obtain homogeneous polynomial 91 | kernels, for positive values, we have inhomogeneous 92 | kernels. Note that for negative values the kernel does not satisfy Mercer's 93 | condition and thus the optimizers may fail. \cr 94 | 95 | In the Hyperbolic tangent kernel if the offset is negative the likelihood of obtaining a kernel 96 | matrix that is not positive definite is much higher (since then even some 97 | diagonal elements may be negative), hence if this kernel has to be used, the 98 | offset should always be positive. Note, however, that this is no guarantee 99 | that the kernel will be positive. 100 | } 101 | 102 | 103 | 104 | 105 | \seealso{\code{stringdot}, \code{\link{kernelMatrix} }, \code{\link{kernelMult}}, \code{\link{kernelPol}}} 106 | \examples{ 107 | rbfkernel <- rbfdot(sigma = 0.1) 108 | rbfkernel 109 | 110 | kpar(rbfkernel) 111 | 112 | ## create two vectors 113 | x <- rnorm(10) 114 | y <- rnorm(10) 115 | 116 | ## calculate dot product 117 | rbfkernel(x,y) 118 | 119 | } 120 | \keyword{symbolmath} 121 | 122 | -------------------------------------------------------------------------------- /man/inchol.Rd: -------------------------------------------------------------------------------- 1 | \name{inchol} 2 | \alias{inchol} 3 | \alias{inchol,matrix-method} 4 | %- Also NEED an '\alias' for EACH other topic documented here. 5 | \title{Incomplete Cholesky decomposition} 6 | \description{ 7 | \code{inchol} computes the incomplete Cholesky decomposition 8 | of the kernel matrix from a data matrix. 9 | } 10 | \usage{ 11 | inchol(x, kernel="rbfdot", kpar=list(sigma=0.1), tol = 0.001, 12 | maxiter = dim(x)[1], blocksize = 50, verbose = 0) 13 | } 14 | %- maybe also 'usage' for other objects documented here. 15 | \arguments{ 16 | \item{x}{The data matrix indexed by row} 17 | \item{kernel}{the kernel function used in training and predicting. 18 | This parameter can be set to any function, of class \code{kernel}, 19 | which computes the inner product in feature space between two 20 | vector arguments. kernlab provides the most popular kernel functions 21 | which can be used by setting the kernel parameter to the following 22 | strings: 23 | \itemize{ 24 | \item \code{rbfdot} Radial Basis kernel function "Gaussian" 25 | \item \code{polydot} Polynomial kernel function 26 | \item \code{vanilladot} Linear kernel function 27 | \item \code{tanhdot} Hyperbolic tangent kernel function 28 | \item \code{laplacedot} Laplacian kernel function 29 | \item \code{besseldot} Bessel kernel function 30 | \item \code{anovadot} ANOVA RBF kernel function 31 | \item \code{splinedot} Spline kernel 32 | 33 | } 34 | The kernel parameter can also be set to a user defined function of 35 | class kernel by passing the function name as an argument. 36 | } 37 | 38 | \item{kpar}{the list of hyper-parameters (kernel parameters). 39 | This is a list which contains the parameters to be used with the 40 | kernel function. Valid parameters for existing kernels are : 41 | \itemize{ 42 | \item \code{sigma} inverse kernel width for the Radial Basis 43 | kernel function "rbfdot" and the Laplacian kernel "laplacedot". 44 | \item \code{degree, scale, offset} for the Polynomial kernel "polydot" 45 | \item \code{scale, offset} for the Hyperbolic tangent kernel 46 | function "tanhdot" 47 | \item \code{sigma, order, degree} for the Bessel kernel "besseldot". 48 | \item \code{sigma, degree} for the ANOVA kernel "anovadot". 49 | } 50 | Hyper-parameters for user defined kernels can be passed through the 51 | kpar parameter as well. 52 | } 53 | 54 | \item{tol}{algorithm stops when remaining pivots bring less accuracy 55 | then \code{tol} (default: 0.001)} 56 | \item{maxiter}{maximum number of iterations and columns in \eqn{Z}} 57 | \item{blocksize}{add this many columns to matrix per iteration} 58 | \item{verbose}{print info on algorithm convergence} 59 | } 60 | \details{An incomplete cholesky decomposition calculates 61 | \eqn{Z} where \eqn{K= ZZ'} \eqn{K} being the kernel matrix. 62 | Since the rank of a kernel matrix is usually low, \eqn{Z} tends to be smaller 63 | then the complete kernel matrix. The decomposed matrix can be 64 | used to create memory efficient kernel-based algorithms without the 65 | need to compute and store a complete kernel matrix in memory.} 66 | \value{ 67 | An S4 object of class "inchol" which is an extension of the class 68 | "matrix". The object is the decomposed kernel matrix along with 69 | the slots : 70 | \item{pivots}{Indices on which pivots where done} 71 | \item{diagresidues}{Residuals left on the diagonal} 72 | \item{maxresiduals}{Residuals picked for pivoting} 73 | 74 | slots can be accessed either by \code{object@slot} 75 | or by accessor functions with the same name (e.g., \code{pivots(object))}} 76 | 77 | \references{ 78 | Francis R. Bach, Michael I. Jordan\cr 79 | \emph{Kernel Independent Component Analysis}\cr 80 | Journal of Machine Learning Research 3, 1-48\cr 81 | \url{https://www.jmlr.org/papers/volume3/bach02a/bach02a.pdf} 82 | } 83 | 84 | \author{Alexandros Karatzoglou (based on Matlab code by 85 | S.V.N. (Vishy) Vishwanathan and Alex Smola)\cr 86 | \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 87 | 88 | \seealso{\code{\link{csi}}, \code{\link{inchol-class}}, \code{\link{chol}}} 89 | \examples{ 90 | 91 | data(iris) 92 | datamatrix <- as.matrix(iris[,-5]) 93 | # initialize kernel function 94 | rbf <- rbfdot(sigma=0.1) 95 | rbf 96 | Z <- inchol(datamatrix,kernel=rbf) 97 | dim(Z) 98 | pivots(Z) 99 | # calculate kernel matrix 100 | K <- crossprod(t(Z)) 101 | # difference between approximated and real kernel matrix 102 | (K - kernelMatrix(kernel=rbf, datamatrix))[6,] 103 | 104 | } 105 | \keyword{methods} 106 | \keyword{algebra} 107 | \keyword{array} 108 | -------------------------------------------------------------------------------- /R/couplers.R: -------------------------------------------------------------------------------- 1 | ## wrapper function for couplers 2 | ## author : alexandros karatzoglou 3 | 4 | couple <- function(probin, coupler = "minpair") 5 | { 6 | if(is.vector(probin)) 7 | probin <- matrix(probin,1) 8 | m <- dim(probin)[1] 9 | 10 | coupler <- match.arg(coupler, c("minpair", "pkpd", "vote", "ht")) 11 | 12 | # if(coupler == "ht") 13 | # multiprob <- sapply(1:m, function(x) do.call(coupler, list(probin[x ,], clscnt))) 14 | # else 15 | multiprob <- sapply(1:m, function(x) do.call(coupler, list(probin[x ,]))) 16 | 17 | return(t(multiprob)) 18 | } 19 | 20 | 21 | ht <- function(probin, clscnt, iter=1000) 22 | { 23 | nclass <- length(clscnt) 24 | probim <- matrix(0, nclass, nclass) 25 | for(i in 1:nclass) 26 | for(j in 1:nclass) 27 | if(j>i) 28 | { 29 | probim[i,j] <- probin[i] 30 | probim[j,i] <- 1 - probin[i] 31 | } 32 | 33 | p <- rep(1/nclass,nclass) 34 | u <- matrix((1/nclass)/((1/nclass)+(1/nclass)) ,nclass,nclass) 35 | iter <- 0 36 | 37 | while(TRUE) 38 | { 39 | iter <- iter + 1 40 | stoperror <- 0 41 | 42 | for(i in 1:nclass){ 43 | num <- den <- 0 44 | for(j in 1:nclass) 45 | { 46 | if (j!=i) 47 | { 48 | num <- num + (clscnt[i] + clscnt[j]) * probim[i,j] 49 | den <- den + (clscnt[i] + clscnt[j]) * u[i,j] 50 | } 51 | } 52 | alpha <- num/(den + 1e-308) 53 | p[i] <- p[i]*alpha 54 | stoperror <- stoperror + (alpha -1)^2 55 | if(0) 56 | { 57 | sum <- 0 58 | sum <- sum(p) + sum 59 | p <- p/sum 60 | for(ui in 1:nclass) 61 | for(uj in 1:nclass) 62 | u[ui, uj] <- p[ui]/(p[ui] + p[uj]) 63 | } 64 | else 65 | { 66 | for(j in 1:nclass) 67 | if (i!=j) 68 | { 69 | u[i,j] <- p[i]/(p[i] + p[j]) 70 | u[j,i] <- 1 - u[i,j] 71 | } 72 | } 73 | } 74 | if(stoperror < 1e-3) 75 | break 76 | if(iter > 400) 77 | { 78 | cat("Too many iterations: aborting", probin, iter, stoperror, p) 79 | break 80 | } 81 | } 82 | ## normalize prob. 83 | p <- p/sum(p) 84 | return(p) 85 | } 86 | 87 | 88 | minpair <- function(probin) 89 | { ## Count number of classes and construct prob. matrix 90 | nclass <- (1+sqrt(1 + 8*length(probin)))/2 91 | if(nclass%%1 != 0) stop("Vector has wrong length only one against one problems supported") 92 | probim <- matrix(0, nclass, nclass) 93 | probim[upper.tri(probim)] <- probin 94 | probim[lower.tri(probim)] <- 1 - probin 95 | 96 | sum <- colSums(probim^2) 97 | Q <- diag(sum) 98 | Q[upper.tri(Q)] <- - probin*(1 - probin) 99 | Q[lower.tri(Q)] <- - probin*(1 - probin) 100 | SQ <- matrix(0,nclass +1, nclass +1) 101 | SQ[1:(nclass+1) <= nclass, 1:(nclass+1) <= nclass] <- Q 102 | SQ[1:(nclass+1) > nclass, 1:(nclass+1) <= nclass] <- rep(1,nclass) 103 | SQ[1:(nclass+1) <= nclass, 1:(nclass+1) > nclass] <- rep(1,nclass) 104 | 105 | rhs <- rep(0,nclass+1) 106 | rhs[nclass + 1] <- 1 107 | 108 | p <- solve(SQ,rhs) 109 | 110 | p <- p[-(nclass+1)]/sum(p[-(nclass+1)]) 111 | return(p) 112 | } 113 | 114 | 115 | pkpd <- function(probin) 116 | { ## Count number of classes and constuct prob. matrix 117 | nclass <- k <- (1+sqrt(1 + 8*length(probin)))/2 118 | if(nclass%%1 != 0) stop("Vector has wrong length only one against one problems supported") 119 | probim <- matrix(0, nclass, nclass) 120 | probim[upper.tri(probim)] <- probin 121 | probim[lower.tri(probim)] <- 1 - probin 122 | 123 | probim[probim==0] <- 1e-300 124 | R <- 1/probim 125 | diag(R) <- 0 126 | p <- 1/(rowSums(R) - (k-2)) 127 | 128 | p <- p/sum(p) 129 | return(p) 130 | } 131 | 132 | 133 | vote<- function(probin) 134 | { 135 | nclass <- (1+sqrt(1 + 8*length(probin)))/2 136 | if(nclass%%1 != 0) stop("Vector has wrong length only one against one problems supported") 137 | 138 | votev <- rep(0,nclass) 139 | p <- 0 140 | for(i in 1:(nclass-1)) 141 | { 142 | jj <- i+1 143 | for(j in jj:nclass) 144 | { 145 | p <- p+1 146 | votev[i][probin[i] >= 0.5] <- votev[i][probin[i] >= 0.5] + 1 147 | votev[j][probin[j] < 0.5] <- votev[j][probin[j] < 0.5] + 1 148 | } 149 | } 150 | 151 | p <- votev/sum(votev) 152 | return(p) 153 | } 154 | 155 | 156 | -------------------------------------------------------------------------------- /man/gausspr-class.Rd: -------------------------------------------------------------------------------- 1 | \name{gausspr-class} 2 | \docType{class} 3 | \alias{gausspr-class} 4 | \alias{alpha,gausspr-method} 5 | \alias{cross,gausspr-method} 6 | \alias{error,gausspr-method} 7 | \alias{kcall,gausspr-method} 8 | \alias{kernelf,gausspr-method} 9 | \alias{kpar,gausspr-method} 10 | \alias{lev,gausspr-method} 11 | \alias{type,gausspr-method} 12 | \alias{alphaindex,gausspr-method} 13 | \alias{xmatrix,gausspr-method} 14 | \alias{ymatrix,gausspr-method} 15 | \alias{scaling,gausspr-method} 16 | 17 | \title{Class "gausspr"} 18 | \description{The Gaussian Processes object class} 19 | \section{Objects from the Class}{ 20 | Objects can be created by calls of the form \code{new("gausspr", ...)}. 21 | or by calling the \code{gausspr} function 22 | } 23 | \section{Slots}{ 24 | \describe{ 25 | \item{\code{tol}:}{Object of class \code{"numeric"} contains 26 | tolerance of termination criteria} 27 | \item{\code{kernelf}:}{Object of class \code{"kfunction"} contains 28 | the kernel function used} 29 | \item{\code{kpar}:}{Object of class \code{"list"} contains the 30 | kernel parameter used } 31 | \item{\code{kcall}:}{Object of class \code{"list"} contains the used 32 | function call } 33 | \item{\code{type}:}{Object of class \code{"character"} contains 34 | type of problem } 35 | \item{\code{terms}:}{Object of class \code{"ANY"} contains the 36 | terms representation of the symbolic model used (when using a formula)} 37 | \item{\code{xmatrix}:}{Object of class \code{"input"} containing 38 | the data matrix used } 39 | \item{\code{ymatrix}:}{Object of class \code{"output"} containing the 40 | response matrix} 41 | \item{\code{fitted}:}{Object of class \code{"output"} containing the 42 | fitted values } 43 | \item{\code{lev}:}{Object of class \code{"vector"} containing the 44 | levels of the response (in case of classification) } 45 | \item{\code{nclass}:}{Object of class \code{"numeric"} containing 46 | the number of classes (in case of classification) } 47 | \item{\code{alpha}:}{Object of class \code{"listI"} containing the 48 | computes alpha values } 49 | \item{\code{alphaindex}}{Object of class \code{"list"} containing 50 | the indexes for the alphas in various classes (in multi-class 51 | problems).} 52 | \item{\code{sol}}{Object of class \code{"matrix"} containing the solution to the Gaussian Process formulation, it is used to compute the variance in regression problems.} 53 | \item{\code{scaling}}{Object of class \code{"ANY"} containing 54 | the scaling coefficients of the data (when case \code{scaled = TRUE} is used).} 55 | \item{\code{nvar}:}{Object of class \code{"numeric"} containing the 56 | computed variance} 57 | \item{\code{error}:}{Object of class \code{"numeric"} containing the 58 | training error} 59 | \item{\code{cross}:}{Object of class \code{"numeric"} containing the 60 | cross validation error} 61 | \item{\code{n.action}:}{Object of class \code{"ANY"} containing the 62 | action performed in NA } 63 | } 64 | } 65 | \section{Methods}{ 66 | \describe{ 67 | \item{alpha}{\code{signature(object = "gausspr")}: returns the alpha 68 | vector} 69 | \item{cross}{\code{signature(object = "gausspr")}: returns the cross 70 | validation error } 71 | \item{error}{\code{signature(object = "gausspr")}: returns the 72 | training error } 73 | \item{fitted}{\code{signature(object = "vm")}: returns the fitted values } 74 | \item{kcall}{\code{signature(object = "gausspr")}: returns the call performed} 75 | \item{kernelf}{\code{signature(object = "gausspr")}: returns the 76 | kernel function used} 77 | \item{kpar}{\code{signature(object = "gausspr")}: returns the kernel 78 | parameter used} 79 | \item{lev}{\code{signature(object = "gausspr")}: returns the 80 | response levels (in classification) } 81 | \item{type}{\code{signature(object = "gausspr")}: returns the type 82 | of problem} 83 | \item{xmatrix}{\code{signature(object = "gausspr")}: returns the 84 | data matrix used} 85 | \item{ymatrix}{\code{signature(object = "gausspr")}: returns the 86 | response matrix used} 87 | \item{scaling}{\code{signature(object = "gausspr")}: returns the 88 | scaling coefficients of the data (when \code{scaled = TRUE} is used)} 89 | 90 | } 91 | } 92 | 93 | \author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 94 | 95 | 96 | 97 | \seealso{ 98 | \code{\link{gausspr}}, 99 | \code{\link{ksvm-class}}, 100 | \code{\link{vm-class}} 101 | } 102 | \examples{ 103 | 104 | # train model 105 | data(iris) 106 | test <- gausspr(Species~.,data=iris,var=2) 107 | test 108 | alpha(test) 109 | error(test) 110 | lev(test) 111 | } 112 | \keyword{classes} 113 | -------------------------------------------------------------------------------- /man/lssvm-class.Rd: -------------------------------------------------------------------------------- 1 | \name{lssvm-class} 2 | \docType{class} 3 | \alias{lssvm-class} 4 | \alias{alpha,lssvm-method} 5 | \alias{b,lssvm-method} 6 | \alias{cross,lssvm-method} 7 | \alias{error,lssvm-method} 8 | \alias{kcall,lssvm-method} 9 | \alias{kernelf,lssvm-method} 10 | \alias{kpar,lssvm-method} 11 | \alias{param,lssvm-method} 12 | \alias{lev,lssvm-method} 13 | \alias{type,lssvm-method} 14 | \alias{alphaindex,lssvm-method} 15 | \alias{xmatrix,lssvm-method} 16 | \alias{ymatrix,lssvm-method} 17 | \alias{scaling,lssvm-method} 18 | \alias{nSV,lssvm-method} 19 | 20 | \title{Class "lssvm"} 21 | \description{The Gaussian Processes object } 22 | \section{Objects from the Class}{ 23 | Objects can be created by calls of the form \code{new("lssvm", ...)}. 24 | or by calling the \code{lssvm} function 25 | } 26 | \section{Slots}{ 27 | \describe{ 28 | \item{\code{kernelf}:}{Object of class \code{"kfunction"} contains 29 | the kernel function used} 30 | \item{\code{kpar}:}{Object of class \code{"list"} contains the 31 | kernel parameter used } 32 | \item{\code{param}:}{Object of class \code{"list"} contains the 33 | regularization parameter used.} 34 | \item{\code{kcall}:}{Object of class \code{"call"} contains the used 35 | function call } 36 | \item{\code{type}:}{Object of class \code{"character"} contains 37 | type of problem } 38 | \item{\code{coef}:}{Object of class \code{"ANY"} contains 39 | the model parameter } 40 | \item{\code{terms}:}{Object of class \code{"ANY"} contains the 41 | terms representation of the symbolic model used (when using a formula)} 42 | \item{\code{xmatrix}:}{Object of class \code{"matrix"} containing 43 | the data matrix used } 44 | \item{\code{ymatrix}:}{Object of class \code{"output"} containing the 45 | response matrix} 46 | \item{\code{fitted}:}{Object of class \code{"output"} containing the 47 | fitted values } 48 | \item{\code{b}:}{Object of class \code{"numeric"} containing the 49 | offset } 50 | \item{\code{lev}:}{Object of class \code{"vector"} containing the 51 | levels of the response (in case of classification) } 52 | \item{\code{scaling}:}{Object of class \code{"ANY"} containing the 53 | scaling information performed on the data} 54 | \item{\code{nclass}:}{Object of class \code{"numeric"} containing 55 | the number of classes (in case of classification) } 56 | \item{\code{alpha}:}{Object of class \code{"listI"} containing the 57 | computes alpha values } 58 | \item{\code{alphaindex}}{Object of class \code{"list"} containing 59 | the indexes for the alphas in various classes (in multi-class problems).} 60 | \item{\code{error}:}{Object of class \code{"numeric"} containing the 61 | training error} 62 | \item{\code{cross}:}{Object of class \code{"numeric"} containing the 63 | cross validation error} 64 | \item{\code{n.action}:}{Object of class \code{"ANY"} containing the 65 | action performed in NA } 66 | \item{\code{nSV}:}{Object of class \code{"numeric"} containing the 67 | number of model parameters } 68 | } 69 | } 70 | \section{Methods}{ 71 | \describe{ 72 | \item{alpha}{\code{signature(object = "lssvm")}: returns the alpha 73 | vector} 74 | \item{cross}{\code{signature(object = "lssvm")}: returns the cross 75 | validation error } 76 | \item{error}{\code{signature(object = "lssvm")}: returns the 77 | training error } 78 | \item{fitted}{\code{signature(object = "vm")}: returns the fitted values } 79 | \item{kcall}{\code{signature(object = "lssvm")}: returns the call performed} 80 | \item{kernelf}{\code{signature(object = "lssvm")}: returns the 81 | kernel function used} 82 | \item{kpar}{\code{signature(object = "lssvm")}: returns the kernel 83 | parameter used} 84 | \item{param}{\code{signature(object = "lssvm")}: returns the regularization 85 | parameter used} 86 | \item{lev}{\code{signature(object = "lssvm")}: returns the 87 | response levels (in classification) } 88 | \item{type}{\code{signature(object = "lssvm")}: returns the type 89 | of problem} 90 | \item{scaling}{\code{signature(object = "ksvm")}: returns the 91 | scaling values } 92 | \item{xmatrix}{\code{signature(object = "lssvm")}: returns the 93 | data matrix used} 94 | \item{ymatrix}{\code{signature(object = "lssvm")}: returns the 95 | response matrix used} 96 | } 97 | } 98 | 99 | \author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 100 | 101 | 102 | 103 | \seealso{ 104 | \code{\link{lssvm}}, 105 | \code{\link{ksvm-class}} 106 | } 107 | \examples{ 108 | 109 | # train model 110 | data(iris) 111 | test <- lssvm(Species~.,data=iris,var=2) 112 | test 113 | alpha(test) 114 | error(test) 115 | lev(test) 116 | } 117 | \keyword{classes} 118 | -------------------------------------------------------------------------------- /R/kfa.R: -------------------------------------------------------------------------------- 1 | 2 | ## This code takes the set x of vectors from the input space 3 | ## and does projection pursuit to find a good basis for x. 4 | ## 5 | ## The algorithm is described in Section 14.5 of 6 | ## Learning with Kernels by B. Schoelkopf and A. Smola, entitled 7 | ## Kernel Feature Analysis. 8 | ## 9 | ## created : 17.09.04 alexandros 10 | ## updated : 11 | 12 | setGeneric("kfa",function(x, ...) standardGeneric("kfa")) 13 | setMethod("kfa", signature(x = "formula"), 14 | function(x, data = NULL, na.action = na.omit, ...) 15 | { 16 | mt <- terms(x, data = data) 17 | if(attr(mt, "response") > 0) stop("response not allowed in formula") 18 | attr(mt, "intercept") <- 0 19 | cl <- match.call() 20 | mf <- match.call(expand.dots = FALSE) 21 | mf$formula <- mf$x 22 | mf$... <- NULL 23 | mf[[1L]] <- quote(stats::model.frame) 24 | mf <- eval(mf, parent.frame()) 25 | Terms <- attr(mf, "terms") 26 | na.act <- attr(mf, "na.action") 27 | x <- model.matrix(mt, mf) 28 | res <- kfa(x, ...) 29 | ## fix up call to refer to the generic, but leave arg name as `formula' 30 | cl[[1]] <- as.name("kfa") 31 | kcall(res) <- cl 32 | attr(Terms,"intercept") <- 0 33 | terms(res) <- Terms 34 | if(!is.null(na.act)) 35 | n.action(res) <- na.act 36 | 37 | return(res) 38 | }) 39 | 40 | setMethod("kfa",signature(x="matrix"), 41 | function(x, kernel="rbfdot", kpar=list(sigma=0.1), features = 0, subset = 59, normalize = TRUE, na.action = na.omit) 42 | { 43 | if(!is.matrix(x)) 44 | stop("x must be a matrix") 45 | 46 | x <- na.action(x) 47 | if(!is(kernel,"kernel")) 48 | { 49 | if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) 50 | kernel <- do.call(kernel, kpar) 51 | } 52 | 53 | if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") 54 | 55 | ## initialize variables 56 | m <- dim(x)[1] 57 | 58 | if(subset > m) 59 | subset <- m 60 | 61 | if (features==0) 62 | features <- subset 63 | 64 | alpha <- matrix(0,subset,features) 65 | alphazero <- rep(1,subset) 66 | alphafeat <- matrix(0,features,features) 67 | idx <- -(1:subset) 68 | randomindex <- sample(1:m, subset) 69 | K <- kernelMatrix(kernel,x[randomindex,,drop=FALSE],x) 70 | 71 | ## main loop 72 | for (i in 1:features) 73 | { 74 | K.cols <- K[-idx, , drop = FALSE] 75 | 76 | if(i > 1) 77 | projections <- K.cols * (alphazero[-idx]%*%t(rep(1,m))) + crossprod(t(alpha[-idx,1:(i-1),drop=FALSE]),K[idx, ,drop = FALSE]) 78 | else 79 | projections <- K.cols * (alphazero%*%t(rep(1,m))) 80 | 81 | Q <- apply(projections, 1, sd) 82 | Q.tmp <- rep(0,subset) 83 | Q.tmp[-idx] <- Q 84 | Qidx <- which.max(Q.tmp) 85 | Qmax <- Q.tmp[Qidx] 86 | 87 | if(i > 1) 88 | alphafeat[i,1:(i-1)] <- alpha[Qidx,1:(i-1)] 89 | 90 | alphafeat[i,i] <- alphazero[Qidx] 91 | 92 | if (i > 1) 93 | idx <- c(idx,Qidx) 94 | else 95 | idx <- Qidx 96 | 97 | if (i > 1) 98 | Qfeat <- c(Qfeat, Qmax) 99 | else 100 | Qfeat <- Qmax 101 | 102 | Ksub <- K[idx, idx, drop = FALSE] 103 | alphasub <- alphafeat[i,1:i] 104 | phisquare <- alphasub %*% Ksub %*% t(t(alphasub)) 105 | dotprod <- (alphazero * (K[,idx, drop = FALSE] %*% t(t(alphasub))) + alpha[,1:i]%*%(Ksub%*%t(t(alphasub))))/drop(phisquare) 106 | alpha[,1:i] <- alpha[,1:i] - dotprod %*%alphasub 107 | 108 | if(normalize){ 109 | sumalpha <- alphazero + rowSums(abs(alpha)) 110 | alphazero <- alphazero / sumalpha 111 | alpha <- alpha/ (sumalpha %*% t(rep(1,features))) 112 | } 113 | } 114 | 115 | obj <- new("kfa") 116 | alpha(obj) <- alphafeat 117 | alphaindex(obj) <- randomindex[idx] 118 | xmatrix(obj) <- x[alphaindex(obj),] 119 | kernelf(obj) <- kernel 120 | kcall(obj) <- match.call() 121 | return(obj) 122 | }) 123 | 124 | 125 | ## project a new matrix into the feature space 126 | 127 | setMethod("predict",signature(object="kfa"), 128 | function(object , x) 129 | { 130 | if (!is.null(terms(object))) 131 | { 132 | if(!is.matrix(x)) 133 | x <- model.matrix(delete.response(terms(object)), as.data.frame(x), na.action = n.action(object)) 134 | } 135 | else 136 | x <- if (is.vector(x)) t(t(x)) else as.matrix(x) 137 | 138 | if (!is.matrix(x)) stop("x must be a matrix a vector or a data frame") 139 | tmpres <- kernelMult(kernelf(object), x, xmatrix(object), alpha(object)) 140 | return(tmpres - matrix(colSums(tmpres)/dim(tmpres)[1],dim(tmpres)[1],dim(tmpres)[2],byrow=TRUE)) 141 | 142 | 143 | }) 144 | 145 | setMethod("show",signature(object="kfa"), 146 | function(object) 147 | { 148 | cat(paste("Number of features :",dim(alpha(object))[2],"\n")) 149 | show(kernelf(object)) 150 | }) 151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /man/kqr-class.Rd: -------------------------------------------------------------------------------- 1 | \name{kqr-class} 2 | \docType{class} 3 | \alias{kqr-class} 4 | \alias{alpha,kqr-method} 5 | \alias{cross,kqr-method} 6 | \alias{error,kqr-method} 7 | \alias{kcall,kqr-method} 8 | \alias{kernelf,kqr-method} 9 | \alias{kpar,kqr-method} 10 | \alias{param,kqr-method} 11 | \alias{alphaindex,kqr-method} 12 | \alias{b,kqr-method} 13 | \alias{xmatrix,kqr-method} 14 | \alias{ymatrix,kqr-method} 15 | \alias{scaling,kqr-method} 16 | 17 | \title{Class "kqr"} 18 | \description{The Kernel Quantile Regression object class} 19 | \section{Objects from the Class}{ 20 | Objects can be created by calls of the form \code{new("kqr", ...)}. 21 | or by calling the \code{kqr} function 22 | } 23 | \section{Slots}{ 24 | \describe{ 25 | \item{\code{kernelf}:}{Object of class \code{"kfunction"} contains 26 | the kernel function used} 27 | \item{\code{kpar}:}{Object of class \code{"list"} contains the 28 | kernel parameter used } 29 | \item{\code{coef}:}{Object of class \code{"ANY"} containing the model parameters} 30 | \item{\code{param}:}{Object of class \code{"list"} contains the 31 | cost parameter C and tau parameter used } 32 | \item{\code{kcall}:}{Object of class \code{"list"} contains the used 33 | function call } 34 | \item{\code{terms}:}{Object of class \code{"ANY"} contains the 35 | terms representation of the symbolic model used (when using a formula)} 36 | \item{\code{xmatrix}:}{Object of class \code{"input"} containing 37 | the data matrix used } 38 | \item{\code{ymatrix}:}{Object of class \code{"output"} containing the 39 | response matrix} 40 | \item{\code{fitted}:}{Object of class \code{"output"} containing the 41 | fitted values } 42 | \item{\code{alpha}:}{Object of class \code{"listI"} containing the 43 | computes alpha values } 44 | \item{\code{b}:}{Object of class \code{"numeric"} containing the 45 | offset of the model.} 46 | \item{\code{scaling}}{Object of class \code{"ANY"} containing 47 | the scaling coefficients of the data (when case \code{scaled = TRUE} is used).} 48 | \item{\code{error}:}{Object of class \code{"numeric"} containing the 49 | training error} 50 | \item{\code{cross}:}{Object of class \code{"numeric"} containing the 51 | cross validation error} 52 | \item{\code{n.action}:}{Object of class \code{"ANY"} containing the 53 | action performed in NA } 54 | \item{\code{nclass}:}{Inherited from class \code{vm}, not used in kqr} 55 | \item{\code{lev}:}{Inherited from class \code{vm}, not used in kqr} 56 | \item{\code{type}:}{Inherited from class \code{vm}, not used in kqr} 57 | } 58 | } 59 | \section{Methods}{ 60 | \describe{ 61 | \item{coef}{\code{signature(object = "kqr")}: returns the 62 | coefficients (alpha) of the model} 63 | \item{alpha}{\code{signature(object = "kqr")}: returns the alpha 64 | vector (identical to \code{coef})} 65 | \item{b}{\code{signature(object = "kqr")}: returns the offset beta 66 | of the model.} 67 | \item{cross}{\code{signature(object = "kqr")}: returns the cross 68 | validation error } 69 | \item{error}{\code{signature(object = "kqr")}: returns the 70 | training error } 71 | \item{fitted}{\code{signature(object = "vm")}: returns the fitted values } 72 | \item{kcall}{\code{signature(object = "kqr")}: returns the call performed} 73 | \item{kernelf}{\code{signature(object = "kqr")}: returns the 74 | kernel function used} 75 | \item{kpar}{\code{signature(object = "kqr")}: returns the kernel 76 | parameter used} 77 | \item{param}{\code{signature(object = "kqr")}: returns the 78 | cost regularization parameter C and tau used} 79 | \item{xmatrix}{\code{signature(object = "kqr")}: returns the 80 | data matrix used} 81 | \item{ymatrix}{\code{signature(object = "kqr")}: returns the 82 | response matrix used} 83 | \item{scaling}{\code{signature(object = "kqr")}: returns the 84 | scaling coefficients of the data (when \code{scaled = TRUE} is used)} 85 | 86 | } 87 | } 88 | 89 | \author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} 90 | 91 | \seealso{ 92 | \code{\link{kqr}}, 93 | \code{\link{vm-class}}, 94 | \code{\link{ksvm-class}} 95 | } 96 | \examples{ 97 | 98 | 99 | # create data 100 | x <- sort(runif(300)) 101 | y <- sin(pi*x) + rnorm(300,0,sd=exp(sin(2*pi*x))) 102 | 103 | # first calculate the median 104 | qrm <- kqr(x, y, tau = 0.5, C=0.15) 105 | 106 | # predict and plot 107 | plot(x, y) 108 | ytest <- predict(qrm, x) 109 | lines(x, ytest, col="blue") 110 | 111 | # calculate 0.9 quantile 112 | qrm <- kqr(x, y, tau = 0.9, kernel = "rbfdot", 113 | kpar = list(sigma = 10), C = 0.15) 114 | ytest <- predict(qrm, x) 115 | lines(x, ytest, col="red") 116 | 117 | # print model coefficients and other information 118 | coef(qrm) 119 | b(qrm) 120 | error(qrm) 121 | kernelf(qrm) 122 | } 123 | \keyword{classes} 124 | -------------------------------------------------------------------------------- /src/dprsrch.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #ifndef USE_FC_LEN_T 4 | # define USE_FC_LEN_T 5 | #endif 6 | #include 7 | extern double mymin(double, double); 8 | extern double mymax(double, double); 9 | extern void *xmalloc(size_t); 10 | /* LEVEL 1 BLAS */ 11 | /*extern double ddot_(int *, double *, int *, double *, int *);*/ 12 | /*extern int daxpy_(int *, double *, double *, int *, double *, int *);*/ 13 | /* LEVEL 2 BLAS */ 14 | /*extern int dsymv_(char *, int *, double *, double *, int *, double *, int *, double *, double *, int *);*/ 15 | /* MINPACK 2 */ 16 | extern void dbreakpt(int, double *, double *, double *, double *, int *, double *, double *); 17 | extern void dgpstep(int, double *, double *, double *, double, double *, double *); 18 | 19 | void dprsrch(int n, double *x, double *xl, double *xu, double *A, double *g, double *w) 20 | { 21 | /* 22 | c ********** 23 | c 24 | c Subroutine dprsrch 25 | c 26 | c This subroutine uses a projected search to compute a step 27 | c that satisfies a sufficient decrease condition for the quadratic 28 | c 29 | c q(s) = 0.5*s'*A*s + g'*s, 30 | c 31 | c where A is a symmetric matrix and g is a vector. Given the 32 | c parameter alpha, the step is 33 | c 34 | c s[alpha] = P[x + alpha*w] - x, 35 | c 36 | c where w is the search direction and P the projection onto the 37 | c n-dimensional interval [xl,xu]. The final step s = s[alpha] 38 | c satisfies the sufficient decrease condition 39 | c 40 | c q(s) <= mu_0*(g'*s), 41 | c 42 | c where mu_0 is a constant in (0,1). 43 | c 44 | c The search direction w must be a descent direction for the 45 | c quadratic q at x such that the quadratic is decreasing 46 | c in the ray x + alpha*w for 0 <= alpha <= 1. 47 | c 48 | c parameters: 49 | c 50 | c n is an integer variable. 51 | c On entry n is the number of variables. 52 | c On exit n is unchanged. 53 | c 54 | c x is a double precision array of dimension n. 55 | c On entry x specifies the vector x. 56 | c On exit x is set to the final point P[x + alpha*w]. 57 | c 58 | c xl is a double precision array of dimension n. 59 | c On entry xl is the vector of lower bounds. 60 | c On exit xl is unchanged. 61 | c 62 | c xu is a double precision array of dimension n. 63 | c On entry xu is the vector of upper bounds. 64 | c On exit xu is unchanged. 65 | c 66 | c A is a double precision array of dimension n*n. 67 | c On entry A specifies the matrix A 68 | c On exit A is unchanged. 69 | c 70 | c g is a double precision array of dimension n. 71 | c On entry g specifies the vector g. 72 | c On exit g is unchanged. 73 | c 74 | c w is a double prevision array of dimension n. 75 | c On entry w specifies the search direction. 76 | c On exit w is the step s[alpha]. 77 | c 78 | c ********** 79 | */ 80 | 81 | double one = 1, zero = 0; 82 | 83 | /* Constant that defines sufficient decrease. */ 84 | /* Interpolation factor. */ 85 | double mu0 = 0.01, interpf = 0.5; 86 | 87 | double *wa1 = (double *) xmalloc(sizeof(double)*n); 88 | double *wa2 = (double *) xmalloc(sizeof(double)*n); 89 | 90 | /* Set the initial alpha = 1 because the quadratic function is 91 | decreasing in the ray x + alpha*w for 0 <= alpha <= 1 */ 92 | double alpha = 1, brptmin, brptmax, gts, q; 93 | int search = 1, nbrpt, nsteps = 0, i, inc = 1; 94 | 95 | /* Find the smallest break-point on the ray x + alpha*w. */ 96 | dbreakpt(n, x, xl, xu, w, &nbrpt, &brptmin, &brptmax); 97 | 98 | /* Reduce alpha until the sufficient decrease condition is 99 | satisfied or x + alpha*w is feasible. */ 100 | while (search && alpha > brptmin) 101 | { 102 | 103 | /* Calculate P[x + alpha*w] - x and check the sufficient 104 | decrease condition. */ 105 | nsteps++; 106 | dgpstep(n, x, xl, xu, alpha, w, wa1); 107 | F77_CALL(dsymv)("U", &n, &one, A, &n, wa1, &inc, &zero, wa2, &inc FCONE); 108 | gts = F77_CALL(ddot)(&n, g, &inc, wa1, &inc); 109 | q = 0.5*F77_CALL(ddot)(&n, wa1, &inc, wa2, &inc) + gts; 110 | if (q <= mu0*gts) 111 | search = 0; 112 | else 113 | 114 | /* This is a crude interpolation procedure that 115 | will be replaced in future versions of the code. */ 116 | alpha *= interpf; 117 | } 118 | 119 | /* Force at least one more constraint to be added to the active 120 | set if alpha < brptmin and the full step is not successful. 121 | There is sufficient decrease because the quadratic function 122 | is decreasing in the ray x + alpha*w for 0 <= alpha <= 1. */ 123 | if (alpha < 1 && alpha < brptmin) 124 | alpha = brptmin; 125 | 126 | /* Compute the final iterate and step. */ 127 | dgpstep(n, x, xl, xu, alpha, w, wa1); 128 | F77_CALL(daxpy)(&n, &alpha, w, &inc, x, &inc); 129 | for (i=0;i 0) stop("response not allowed in formula") 11 | attr(mt, "intercept") <- 0 12 | cl <- match.call() 13 | mf <- match.call(expand.dots = FALSE) 14 | mf$formula <- mf$x 15 | mf$... <- NULL 16 | mf[[1L]] <- quote(stats::model.frame) 17 | mf <- eval(mf, parent.frame()) 18 | na.act <- attr(mf, "na.action") 19 | Terms <- attr(mf, "terms") 20 | x <- model.matrix(mt, mf) 21 | res <- kha(x, ...) 22 | ## fix up call to refer to the generic, but leave arg name as `formula' 23 | cl[[1]] <- as.name("kha") 24 | kcall(res) <- cl 25 | attr(Terms,"intercept") <- 0 26 | terms(res) <- Terms 27 | if(!is.null(na.act)) 28 | n.action(res) <- na.act 29 | return(res) 30 | }) 31 | 32 | 33 | 34 | setMethod("kha",signature(x="matrix"), 35 | function(x, kernel = "rbfdot", kpar = list(sigma = 0.1), 36 | features = 5, eta = 0.005, th = 1e-4, maxiter = 10000, verbose = FALSE, na.action = na.omit, ...) 37 | { 38 | x <- na.action(x) 39 | x <- as.matrix(x) 40 | m <- nrow(x) 41 | ret <- new("kha") 42 | if(!is(kernel,"kernel")) 43 | { 44 | if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) 45 | kernel <- do.call(kernel, kpar) 46 | } 47 | if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") 48 | 49 | ## Initialize A dual variables 50 | A <- matrix(runif(features*m),m,features)*2 - 1 51 | AOld <- A 52 | 53 | ## compute square norm of data 54 | a <- rowSums(x^2) 55 | 56 | ## initialize the empirical sum kernel map 57 | eskm <- rep(0,m) 58 | 59 | for (i in 1:m) 60 | eskm[i] <- sum(kernelFast(kernel,x,x[i,,drop=FALSE], a)) 61 | 62 | eks <- sum(eskm) 63 | 64 | counter <- 0 65 | step <- th + 1 66 | Aold <- A 67 | 68 | while(step > th && counter < maxiter) 69 | { 70 | y <- rep(0, features) 71 | ot <- rep(0,m) 72 | 73 | ## Hebbian Iteration 74 | for (i in 1:m) 75 | { 76 | ## compute y output 77 | etkm <- as.vector(kernelFast(kernel,x,x[i,,drop=FALSE], a)) 78 | sum1 <- as.vector(etkm %*% A) 79 | sum2 <- as.vector(eskm%*%A)/m 80 | asum <- colSums(A) 81 | sum3 <- as.vector(eskm[i]*asum)/m 82 | sum4 <- as.vector(eks * asum)/m^2 83 | y <- sum1 - sum2 - sum3 + sum4 84 | 85 | ## update A 86 | yy <- y%*%t(y) 87 | yy[upper.tri(yy)] <- 0 88 | tA <- t(A) 89 | A <- t(tA - eta * yy%*%tA) 90 | A[i,] <- A[i,] + eta * y 91 | } 92 | 93 | if (counter %% 100 == 0 ) 94 | { 95 | step = mean(abs(Aold - A)) 96 | Aold <- A 97 | if(verbose) 98 | cat("Iteration :", counter, "Converged :", step,"\n") 99 | } 100 | counter <- counter + 1 101 | } 102 | 103 | ## Normalize in Feature space 104 | cA <- t(A) - colSums(A) 105 | Fnorm <- rep(0,features) 106 | for (j in 1:m) 107 | Fnorm <- Fnorm + colSums(t(cA[,j] * cA) * as.vector(kernelFast(kernel,x,x[j,,drop=FALSE],a))) 108 | 109 | 110 | if(any(Fnorm==0)) 111 | { 112 | warning("Normalization vector contains zeros, replacing them with ones") 113 | Fnorm[which(Fnorm==0)] <- 1 114 | } 115 | 116 | A <- t(t(A)/sqrt(Fnorm)) 117 | 118 | pcv(ret) <- A 119 | eig(ret) <- Fnorm 120 | names(eig(ret)) <- paste("Comp.", 1:features, sep = "") 121 | eskm(ret) <- eskm 122 | kcall(ret) <- match.call() 123 | kernelf(ret) <- kernel 124 | xmatrix(ret) <- x 125 | return(ret) 126 | }) 127 | 128 | 129 | ## Project a new matrix into the feature space 130 | setMethod("predict",signature(object="kha"), 131 | function(object , x) 132 | { 133 | if (!is.null(terms(object))) 134 | { 135 | if(!is.matrix(x)) 136 | x <- model.matrix(delete.response(terms(object)), as.data.frame(x), na.action = n.action(object)) 137 | } 138 | else 139 | x <- if (is.vector(x)) t(t(x)) else as.matrix(x) 140 | 141 | if (is.vector(x)||is.data.frame(x)) 142 | x<-as.matrix(x) 143 | if (!is.matrix(x)) stop("x must be a matrix a vector or a data frame") 144 | n <- nrow(x) 145 | m <- nrow(xmatrix(object)) 146 | A <- pcv(object) 147 | y <- matrix(0,n,dim(A)[2]) 148 | eks <- sum(eskm(object)) 149 | a <- rowSums(xmatrix(object)^2) 150 | 151 | ## Project data 152 | sum2 <- as.vector(eskm(object)%*%A)/m 153 | asum <- colSums(A) 154 | 155 | sum4 <- as.vector(eks * asum)/m^2 156 | 157 | for (i in 1:n) 158 | { 159 | ## compute y output 160 | etkm <- as.vector(kernelFast(kernelf(object),xmatrix(object),x[i,,drop=FALSE], a)) 161 | sum1 <- as.vector(etkm %*% A) 162 | sum3 <- sum(etkm)*asum/m 163 | y[i,] <- sum1 - sum2 - sum3 + sum4 164 | } 165 | 166 | return(y) 167 | }) 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /src/esa.h: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 2.0 3 | * 4 | * This Source Code Form is subject to the terms of the Mozilla Public 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | * 8 | * Software distributed under the License is distributed on an "AS IS" basis, 9 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10 | * for the specific language governing rights and limitations under the 11 | * License. 12 | * 13 | * The Original Code is the Suffix Array based String Kernel. 14 | * 15 | * The Initial Developer of the Original Code is 16 | * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). 17 | * Portions created by the Initial Developer are Copyright (C) 2006 18 | * the Initial Developer. All Rights Reserved. 19 | * 20 | * Contributor(s): 21 | * 22 | * Choon Hui Teo 23 | * S V N Vishwanathan 24 | * 25 | * ***** END LICENSE BLOCK ***** */ 26 | 27 | 28 | // File : sask/Code/ESA.h 29 | // 30 | // Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) 31 | // S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) 32 | // 33 | // Created : 09 Feb 2006 34 | // 35 | // Updated : 24 Apr 2006 36 | 37 | 38 | #ifndef ESA_H 39 | #define ESA_H 40 | 41 | 42 | #include "datatype.h" 43 | #include "errorcode.h" 44 | #include "lcp.h" 45 | #include "ctable.h" 46 | #include "ilcpfactory.h" 47 | #include "isafactory.h" 48 | #include 49 | #include 50 | 51 | 52 | //#define SLINK 53 | 54 | // #define SSARRAY // does not yeet work correctly, CW 55 | 56 | class ESA 57 | { 58 | 59 | private: 60 | 61 | int _verb; 62 | 63 | public: 64 | 65 | UInt32 size; //' The length of #text# 66 | SYMBOL *text; //' Text corresponds to SA 67 | #ifdef SSARRAY 68 | int *suftab; //' Suffix Array 69 | #else 70 | UInt32 *suftab; //' Suffix Array 71 | #endif 72 | LCP lcptab; //' LCP array 73 | ChildTable childtab; //' Child table (fields merged) 74 | UInt32 *suflink; //' Suffix link table. Two fields: l,r 75 | 76 | 77 | //' --- for bucket table --- 78 | UInt32 bcktab_depth; //' Number of char defining each bucket 79 | UInt32 bcktab_size; //' size of bucket table 80 | UInt32 *bcktab_val; //' value column of bucket table 81 | 82 | UInt32 *bcktab_key4; //' 4-bytes key column of Bucket table 83 | UInt32 *coef4; 84 | UInt32 hash_value4; 85 | 86 | UInt64 *bcktab_key8; //' 8-bytes key column of Bucket table 87 | UInt64 *coef8; 88 | UInt64 hash_value8; 89 | //' --- 90 | 91 | 92 | /// Constructors 93 | ESA(const UInt32 & size_, SYMBOL *text_, int verb=INFO); 94 | 95 | /// Destructor 96 | virtual ~ESA(); 97 | 98 | /// Construct child table 99 | ErrorCode ConstructChildTable(); 100 | 101 | 102 | /// Get suffix link interval 103 | ErrorCode GetSuflink(const UInt32 &i, const UInt32 &j, 104 | UInt32 &sl_i, UInt32 &sl_j); 105 | 106 | 107 | /// Find the suffix link 108 | ErrorCode FindSuflink(const UInt32 &parent_i, const UInt32 &parent_j, 109 | const UInt32 &child_i, const UInt32 &child_j, 110 | UInt32 &sl_i, UInt32 &sl_j); 111 | 112 | /// Construct suffix link table 113 | ErrorCode ConstructSuflink(); 114 | 115 | /// Construct bucket table 116 | ErrorCode ConstructBcktab(const UInt32 &alphabet_size=256); 117 | 118 | 119 | /// Get all non-singleton child-intervals 120 | ErrorCode GetChildIntervals(const UInt32 &lb, const UInt32 &rb, 121 | std::vector > &q); 122 | 123 | /// Get intervals by index 124 | ErrorCode GetIntervalByIndex(const UInt32 &parent_i, const UInt32 &parent_j, 125 | const UInt32 &start_idx, UInt32 &child_i, 126 | UInt32 &child_j); 127 | 128 | /// Get intervals by character 129 | ErrorCode GetIntervalByChar(const UInt32 &parent_i, const UInt32 &parent_j, 130 | const SYMBOL &start_ch, const UInt32 &depth, 131 | UInt32 &child_i, UInt32 &child_j); 132 | /// Get lcp value 133 | ErrorCode GetLcp(const UInt32 &i, const UInt32 &j, UInt32 &val); 134 | 135 | /// Compare pattern to text[suftab[idx]..length]. 136 | ErrorCode Compare(const UInt32 &idx, const UInt32 &depth, SYMBOL *pattern, 137 | const UInt32 &p_len, UInt32 &matched_len); 138 | 139 | /// Find longest substring of pattern in enhanced suffix array. 140 | ErrorCode Match(const UInt32 &i, const UInt32 &j, SYMBOL *pattern, const UInt32 p_len, 141 | UInt32 &lb, UInt32 &rb, UInt32 &matched_len); 142 | 143 | /// Similar to Match() but returns also floor interval of [lb..rb] 144 | ErrorCode ExactSuffixMatch(const UInt32 &i, const UInt32 &j, const UInt32 &offset, 145 | SYMBOL *pattern, const UInt32 p_len, UInt32 &lb, UInt32 &rb, 146 | UInt32 &matched_len, UInt32 &floor_lb, UInt32 &floor_rb, 147 | UInt32 &floor_len); 148 | 149 | }; 150 | #endif 151 | --------------------------------------------------------------------------------