├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── R ├── PLS_est.R ├── SPJ_PLS.R ├── determine_K.R ├── init_est.R └── utilities_functions.R ├── README.md ├── classo.Rproj ├── data └── sample_data.rda └── man ├── PLS.cvxr.Rd ├── PLS.mosek.Rd ├── PLS.nlopt.Rd ├── SPJ_PLS.Rd ├── determine_K.Rd └── init_est.Rd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## LyX 2 | *.lyx~ 3 | *.lyx# 4 | 5 | ## SWP 6 | *.bak 7 | *.aaa 8 | 9 | ## R 10 | *.Rhistory 11 | *_cache* 12 | 13 | ## matlab 14 | *.asv 15 | 16 | 17 | ## Core latex/pdflatex auxiliary files: 18 | *.aux 19 | *.lof 20 | *.log 21 | *.lot 22 | *.fls 23 | *.out 24 | *.toc 25 | *.gz 26 | 27 | ## Intermediate documents: 28 | *.dvi 29 | *-converted-to.* 30 | # these rules might exclude image files for figures etc. 31 | # *.ps 32 | # *.eps 33 | # *.pdf 34 | 35 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 36 | *.bbl 37 | *.bcf 38 | *.blg 39 | *-blx.aux 40 | *-blx.bib 41 | *.brf 42 | *.run.xml 43 | 44 | 45 | ## Build tool auxiliary files: 46 | *.fdb_latexmk 47 | *.synctex 48 | *.synctex.gz 49 | *.synctex.gz(busy) 50 | *.pdfsync 51 | 52 | ## Auxiliary and intermediate files from other packages: 53 | 54 | # algorithms 55 | *.alg 56 | *.loa 57 | 58 | # achemso 59 | acs-*.bib 60 | 61 | # amsthm 62 | *.thm 63 | 64 | # beamer 65 | *.nav 66 | *.snm 67 | *.vrb 68 | 69 | #(e)ledmac/(e)ledpar 70 | *.end 71 | *.[1-9] 72 | *.[1-9][0-9] 73 | *.[1-9][0-9][0-9] 74 | *.[1-9]R 75 | *.[1-9][0-9]R 76 | *.[1-9][0-9][0-9]R 77 | *.eledsec[1-9] 78 | *.eledsec[1-9]R 79 | *.eledsec[1-9][0-9] 80 | *.eledsec[1-9][0-9]R 81 | *.eledsec[1-9][0-9][0-9] 82 | *.eledsec[1-9][0-9][0-9]R 83 | 84 | # glossaries 85 | *.acn 86 | *.acr 87 | *.glg 88 | *.glo 89 | *.gls 90 | 91 | # hyperref 92 | *.brf 93 | 94 | # knitr 95 | *-concordance.tex 96 | *.tikz 97 | *-tikzDictionary 98 | 99 | # listings 100 | *.lol 101 | 102 | # makeidx 103 | *.idx 104 | *.ilg 105 | *.ind 106 | *.ist 107 | 108 | # minitoc 109 | *.maf 110 | *.mtc 111 | *.mtc0 112 | 113 | # minted 114 | *.pyg 115 | 116 | # morewrites 117 | *.mw 118 | 119 | # nomencl 120 | *.nlo 121 | 122 | # sagetex 123 | *.sagetex.sage 124 | *.sagetex.py 125 | *.sagetex.scmd 126 | 127 | # sympy 128 | *.sout 129 | *.sympy 130 | sympy-plots-for-*.tex/ 131 | 132 | # todonotes 133 | *.tdo 134 | 135 | # xindy 136 | *.xdy 137 | *.Rproj.user 138 | .Rproj.user 139 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: classo 2 | Type: Package 3 | Title: Classifier-Lasso for Panel Data Grouping 4 | Version: 0.0.0.9100 5 | Authors@R: c( 6 | person("Zhan", "Gao", email = "zhangao@usc.edu", role = c("aut", "cre")), 7 | person("Zhentao", "Shi", email = "zhentaoshi@cuhk.edu.hk", role = "aut")) 8 | Maintainer: Zhan Gao 9 | Description: More about what it does (maybe more than one line) 10 | Use four spaces when indenting paragraphs within the Description. 11 | License: MIT 12 | Encoding: UTF-8 13 | LazyData: true 14 | Depends: 15 | SparseM, 16 | MASS, 17 | Matrix, 18 | CVXR 19 | Imports: 20 | robustHD 21 | Suggests: 22 | knitr, 23 | rmarkdown, 24 | Rmosek 25 | RoxygenNote: 6.1.1 26 | VignetteBuilder: knitr 27 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(PLS.cvxr) 4 | export(PLS.cvxr.solver) 5 | export(PLS.mosek) 6 | export(PLS.nlopt) 7 | export(SPJ_PLS) 8 | export(data.normalization) 9 | export(demean) 10 | export(determine_K) 11 | export(group.coerce) 12 | export(init_est) 13 | -------------------------------------------------------------------------------- /R/PLS_est.R: -------------------------------------------------------------------------------- 1 | #' PLS estimation by the iterative algorithm via Rmosek 2 | #' 3 | #' @param N The dimension of cross-sectional units in the panel. 4 | #' @param TT The time series dimension in the panel. 5 | #' @param y Dependent variable. (TN * 1). T is the fast index. 6 | #' @param X Independent variable. (TN * P). T is the fast index. P is the number of regressors. 7 | #' @param K The number of groups. 8 | #' @param lambda The tuning parameter. 9 | #' @param beta0 N*p matrix. The initial estimator for each i=1,...,N. 10 | #' @param R Maximum number of iteration. 11 | #' @param tol Tolerance level in the convergence criterion. 12 | #' @param post_est A boolean: do post-lasso estimation or not. 13 | #' @param bias_corr A boolean: do bias correction in the post-lasso estimation or not. 14 | #' 15 | #' @return A list contains estimated coefficients and group struncture 16 | #' \item{b.est}{N * p matrix containing estimated slope for each cross-sectional unit.} 17 | #' \item{a.out}{K * p matrix containing estimated slope for each group} 18 | #' \item{group.est}{group_id for each individual} 19 | #' \item{converge}{A boolean indicating whether convergence criteria is met } 20 | #' 21 | #' @export 22 | #' 23 | #' 24 | 25 | PLS.mosek <- function(N, TT, y, X, K, lambda, beta0 = NULL, R = 500, 26 | tol = 1e-04, post_est = TRUE, bias_corr = FALSE) { 27 | 28 | p <- dim(X)[2] 29 | 30 | 31 | if (is.null(beta0)) { 32 | # Use individual regression result as the initial value 33 | beta0 <- init_est(X, y, TT) 34 | } 35 | 36 | b.out <- array(beta0, c(N, p, K)) 37 | a.out <- matrix(0, K, p) 38 | 39 | b.old <- matrix(1, N, p) 40 | a.old <- matrix(1, 1, p) 41 | 42 | for (r in 1:R) { 43 | 44 | for (k in 1:K) { 45 | 46 | # N * 1: consider it as gamma 47 | penalty.out <- pen.generate(b.out, a.out, N, p, K, k) 48 | # do optimization 49 | mosek.out <- opt.mosek(y, X, penalty.out, N, TT, K, p, lambda) 50 | a.out[k, ] <- mosek.out$alpha 51 | b.out[, , k] <- matrix(mosek.out$beta, N, p, byrow = TRUE) 52 | 53 | } 54 | 55 | # Check the convergence criterion 56 | a.new <- a.out[K, ] 57 | b.new <- b.out[, , K] 58 | 59 | if (criterion(a.old, a.new, b.old, b.new, tol)) { 60 | break 61 | } 62 | # Update 63 | a.old <- a.out[K, ] 64 | b.old <- b.out[, , K] 65 | } 66 | 67 | # put b.out to nearest a.out and get the group estimation 68 | 69 | a.out.exp <- aperm(array(a.out, c(K, p, N)), c(3, 2, 1)) 70 | d.temp <- (b.out - a.out.exp)^2 71 | dist <- sqrt(apply(d.temp, c(1, 3), sum)) 72 | group.est <- apply(dist, 1, which.min) 73 | 74 | 75 | # Post estimation 76 | if (post_est) { 77 | if (bias_corr) { 78 | a.out <- post.corr(group.est, a.out, y, X, K, p, N, TT) 79 | } else { 80 | a.out <- post.lasso(group.est, a.out, y, X, K, p, N, TT) 81 | } 82 | } 83 | 84 | b.est <- matrix(999, N, p) 85 | for (i in 1:N) { 86 | group <- group.est[i] 87 | b.est[i, ] <- a.out[group, ] 88 | } 89 | 90 | result <- list(b.est = b.est, a.out = a.out, group.est = group.est, 91 | converge = (r < R)) 92 | 93 | return(result) 94 | } 95 | 96 | #' PLS estimation by the iterative algorithm via CVXR + ECOS 97 | #' 98 | #' @inheritParams PLS.mosek 99 | #' 100 | #' @return A list contains estimated coeffcients and group struncture 101 | #' \item{b.est}{N * p matrix containing estimated slope for each individual} 102 | #' \item{a.out}{K * p matrix containing estimated slope for each group} 103 | #' \item{group.est}{group_id for each individual} 104 | #' \item{converge}{A boolean indicating whether convergence criteria is met } 105 | #' 106 | #' @export 107 | #' 108 | 109 | PLS.cvxr <- function(N, TT, y, X, K, lambda, beta0 = NULL, R = 500, tol = 1e-04, 110 | post_est = TRUE, bias_corr = FALSE) { 111 | 112 | p <- dim(X)[2] 113 | 114 | if (is.null(beta0)) { 115 | # Use individual regression result as the initial value 116 | beta0 <- init_est(X, y, TT) 117 | } 118 | 119 | b.out <- array(beta0, c(N, p, K)) 120 | a.out <- matrix(0, K, p) 121 | 122 | b.old <- matrix(1, N, p) 123 | a.old <- matrix(1, 1, p) 124 | 125 | for (r in 1:R) { 126 | 127 | for (k in 1:K) { 128 | 129 | # N * 1: consider it as gamma 130 | gamma <- pen.generate(b.out, a.out, N, p, K, k) 131 | 132 | # Commented out: Suggested by Dr. Narasimhan b = Variable(N*p) a = 133 | # Variable(p) obj = 0 End Commented out 134 | 135 | X.list = list() 136 | for (i in 1:N) { 137 | ind = ((i - 1) * TT + 1):(i * TT) 138 | id = ((i - 1) * p + 1):(i * p) 139 | X.list[[i]] = X[ind, ] 140 | # Commented out: Suggested by Dr. Narasimhan obj = obj + gamma[i] * 141 | # norm2( b[id] - a ) End Commented out 142 | } 143 | 144 | ## Code added 145 | b = Variable(p, N) 146 | a = Variable(p) 147 | A <- matrix(1, nrow = 1, ncol = N) 148 | obj1 <- t(norm2(b - a %*% A, axis = 2)) %*% gamma 149 | ## End Code added 150 | 151 | XX = bdiag(X.list) 152 | 153 | ## Original commented out obj = Minimize( sum_squares(y - XX %*% b)/(N 154 | ## * TT) + obj*(lambda/N) ) End Original commented out 155 | 156 | ## Code added and modified 157 | obj = Minimize(sum_squares(y - XX %*% vec(b))/(N * TT) + 158 | obj1 * (lambda/N)) 159 | ## End Code added and modified 160 | Prob = Problem(obj) 161 | 162 | 163 | prob_data <- get_problem_data(Prob, solver = "ECOS") 164 | if (packageVersion("CVXR") > "0.99-7") { 165 | ECOS_dims <- ECOS.dims_to_solver_dict(prob_data$data[["dims"]]) 166 | } else { 167 | ECOS_dims <- prob_data$data[["dims"]] 168 | } 169 | solver_output <- ECOSolveR::ECOS_csolve(c = prob_data$data[["c"]], 170 | G = prob_data$data[["G"]], 171 | h = prob_data$data[["h"]], 172 | dims = ECOS_dims, 173 | A = prob_data$data[["A"]], 174 | b = prob_data$data[["b"]]) 175 | 176 | if (packageVersion("CVXR") > "0.99-7") { 177 | direct_soln <- unpack_results(Prob, solver_output, prob_data$chain, prob_data$inverse_data) 178 | } else { 179 | direct_soln <- unpack_results(Prob, "ECOS", solver_output) 180 | } 181 | 182 | a.out[k, ] = direct_soln$getValue(a) 183 | b.out[, , k] = matrix(direct_soln$getValue(b), N, p, byrow = TRUE) 184 | 185 | # cvxr.out = solve(Prob, solver = solver) 186 | # a.out[k, ] = cvxr.out$getValue(a) 187 | # b.out[, , k] = matrix(cvxr.out$getValue(b), N, p, byrow = TRUE) 188 | 189 | } 190 | 191 | # Check the convergence criterion 192 | a.new <- a.out[K, ] 193 | b.new <- b.out[, , K] 194 | 195 | if (criterion(a.old, a.new, b.old, b.new, tol)) { 196 | break 197 | } 198 | # Update 199 | a.old <- a.out[K, ] 200 | b.old <- b.out[, , K] 201 | } 202 | 203 | # put b.out to nearest a.out and get the group estimation 204 | 205 | a.out.exp <- aperm(array(a.out, c(K, p, N)), c(3, 2, 1)) 206 | d.temp <- (b.out - a.out.exp)^2 207 | dist <- sqrt(apply(d.temp, c(1, 3), sum)) 208 | group.est <- apply(dist, 1, which.min) 209 | 210 | 211 | # Post estimation 212 | if (post_est) { 213 | if (bias_corr) { 214 | a.out <- post.corr(group.est, a.out, y, X, K, p, N, TT) 215 | } else { 216 | a.out <- post.lasso(group.est, a.out, y, X, K, p, N, TT) 217 | } 218 | } 219 | 220 | 221 | b.est <- matrix(999, N, p) 222 | for (i in 1:N) { 223 | group <- group.est[i] 224 | b.est[i, ] <- a.out[group, ] 225 | } 226 | 227 | result <- list(b.est = b.est, a.out = a.out, group.est = group.est, 228 | converge = (r < R)) 229 | 230 | return(result) 231 | } 232 | 233 | 234 | #' PLS estimation by the iterative algorithm via CVXR with user solver 235 | #' 236 | #' @inheritParams PLS.cvxr 237 | #' 238 | #' @return A list contains estimated coeffcients and group struncture 239 | #' \item{b.est}{N * p matrix containing estimated slope for each individual} 240 | #' \item{a.out}{K * p matrix containing estimated slope for each group} 241 | #' \item{group.est}{group_id for each individual} 242 | #' \item{converge}{A boolean indicating whether convergence criteria is met } 243 | #' 244 | #' @export 245 | #' 246 | 247 | PLS.cvxr.solver <- function(N, TT, y, X, K, lambda, beta0 = NULL, R = 500, tol = 1e-04, 248 | post_est = TRUE, bias_corr = FALSE, solver = "ECOS") { 249 | 250 | p <- dim(X)[2] 251 | 252 | if (is.null(beta0)) { 253 | # Use individual regression result as the initial value 254 | beta0 <- init_est(X, y, TT) 255 | } 256 | 257 | b.out <- array(beta0, c(N, p, K)) 258 | a.out <- matrix(0, K, p) 259 | 260 | b.old <- matrix(1, N, p) 261 | a.old <- matrix(1, 1, p) 262 | 263 | for (r in 1:R) { 264 | 265 | for (k in 1:K) { 266 | 267 | # N * 1: consider it as gamma 268 | gamma <- pen.generate(b.out, a.out, N, p, K, k) 269 | 270 | # Commented out: Suggested by Dr. Narasimhan b = Variable(N*p) a = 271 | # Variable(p) obj = 0 End Commented out 272 | 273 | X.list = list() 274 | for (i in 1:N) { 275 | ind = ((i - 1) * TT + 1):(i * TT) 276 | id = ((i - 1) * p + 1):(i * p) 277 | X.list[[i]] = X[ind, ] 278 | # Commented out: Suggested by Dr. Narasimhan obj = obj + gamma[i] * 279 | # norm2( b[id] - a ) End Commented out 280 | } 281 | 282 | ## Code added 283 | b = Variable(p, N) 284 | a = Variable(p) 285 | A <- matrix(1, nrow = 1, ncol = N) 286 | obj1 <- t(norm2(b - a %*% A, axis = 2)) %*% gamma 287 | ## End Code added 288 | 289 | XX = bdiag(X.list) 290 | 291 | ## Original commented out obj = Minimize( sum_squares(y - XX %*% b)/(N 292 | ## * TT) + obj*(lambda/N) ) End Original commented out 293 | 294 | ## Code added and modified 295 | obj = Minimize(sum_squares(y - XX %*% vec(b))/(N * TT) + 296 | obj1 * (lambda/N)) 297 | ## End Code added and modified 298 | Prob = Problem(obj) 299 | 300 | cvxr.out = solve(Prob, solver = solver) 301 | a.out[k, ] = cvxr.out$getValue(a) 302 | b.out[, , k] = matrix(cvxr.out$getValue(b), N, p, byrow = TRUE) 303 | } 304 | 305 | # Check the convergence criterion 306 | a.new <- a.out[K, ] 307 | b.new <- b.out[, , K] 308 | 309 | if (criterion(a.old, a.new, b.old, b.new, tol)) { 310 | break 311 | } 312 | # Update 313 | a.old <- a.out[K, ] 314 | b.old <- b.out[, , K] 315 | } 316 | 317 | # put b.out to nearest a.out and get the group estimation 318 | 319 | a.out.exp <- aperm(array(a.out, c(K, p, N)), c(3, 2, 1)) 320 | d.temp <- (b.out - a.out.exp)^2 321 | dist <- sqrt(apply(d.temp, c(1, 3), sum)) 322 | group.est <- apply(dist, 1, which.min) 323 | 324 | 325 | # Post estimation 326 | if (post_est) { 327 | if (bias_corr) { 328 | a.out <- post.corr(group.est, a.out, y, X, K, p, N, TT) 329 | } else { 330 | a.out <- post.lasso(group.est, a.out, y, X, K, p, N, TT) 331 | } 332 | } 333 | 334 | 335 | b.est <- matrix(999, N, p) 336 | for (i in 1:N) { 337 | group <- group.est[i] 338 | b.est[i, ] <- a.out[group, ] 339 | } 340 | 341 | result <- list(b.est = b.est, a.out = a.out, group.est = group.est, 342 | converge = (r < R)) 343 | 344 | return(result) 345 | } 346 | 347 | 348 | #' PLS estimation by the iterative algorithm via NLOPTR 349 | #' 350 | #' @inheritParams PLS.mosek 351 | #' @param algo choose algorithm for nloptr 352 | #' 353 | #' @return A list contains estimated coeffcients and group struncture 354 | #' \item{b.est}{N * p matrix containing estimated slope for each individual} 355 | #' \item{a.out}{K * p matrix containing estimated slope for each group} 356 | #' \item{group.est}{group_id for each individual} 357 | #' \item{converge}{A boolean indicating whether convergence criteria is met } 358 | #' 359 | #' @export 360 | #' 361 | 362 | PLS.nlopt <- function(N, TT, y, X, K, lambda, beta0 = NULL, R = 500, 363 | tol = 1e-04, post_est = TRUE, bias_corr = FALSE, algo = "NLOPT_LN_NELDERMEAD") { 364 | 365 | 366 | p <- dim(X)[2] 367 | 368 | if (is.null(beta0)) { 369 | # Use individual regression result as the initial value 370 | beta0 = numeric() 371 | for (i in 1:N) { 372 | ind <- ((i - 1) * TT + 1):(i * TT) 373 | yy <- y[ind, ] 374 | XX <- X[ind, ] 375 | beta0 <- c(beta0, solve(t(XX) %*% XX) %*% (t(XX) %*% yy)) 376 | } 377 | } 378 | 379 | init <- c(beta0, c(1, 1)) 380 | 381 | b.out <- array(beta0, c(N, p, K)) 382 | a.out <- matrix(0, K, p) 383 | 384 | b.old <- matrix(1, N, p) 385 | a.old <- matrix(1, 1, p) 386 | 387 | for (r in 1:R) { 388 | 389 | for (k in 1:K) { 390 | 391 | # N * 1: consider it as gamma 392 | penalty.out <- pen.generate(b.out, a.out, N, p, K, k) 393 | 394 | # opt.out <- optimx(fn = obj, gr = obj.grad, par = init, penalty = 395 | # penalty.out); 396 | 397 | # opts = list(algorithm = 'NLOPT_LN_NELDERMEAD', xtol_rel = 1e-5, 398 | # maxeval = 500); 399 | opts = list(algorithm = algo, xtol_rel = 1e-05, maxeval = 2500) 400 | nlopt.out <- nloptr(x0 = init, eval_f = obj, eval_grad_f = obj.grad, 401 | opts = opts, N = N, penalty = penalty.out) 402 | # eval_grad_f = obj.grad, 403 | coef.temp <- nlopt.out$solution 404 | 405 | a.out[k, ] <- coef.temp[(p * N + 1):(p * (N + 1))] 406 | b.out[, , k] <- matrix(coef.temp[1:(N * p)], N, p, byrow = TRUE) 407 | 408 | 409 | } 410 | 411 | # Check the convergence criterion 412 | a.new <- a.out[K, ] 413 | b.new <- b.out[, , K] 414 | 415 | if (criterion(a.old, a.new, b.old, b.new, tol)) { 416 | break 417 | } 418 | # Update 419 | a.old <- a.out[K, ] 420 | b.old <- b.out[, , K] 421 | } 422 | 423 | # put b.out to nearest a.out and get the group estimation 424 | 425 | a.out.exp <- aperm(array(a.out, c(K, p, N)), c(3, 2, 1)) 426 | d.temp <- (b.out - a.out.exp)^2 427 | dist <- sqrt(apply(d.temp, c(1, 3), sum)) 428 | group.est <- apply(dist, 1, which.min) 429 | 430 | 431 | # Post estimation 432 | if (post_est) { 433 | if (bias_corr) { 434 | a.out <- post.corr(group.est, a.out, y, X, K, p, N, TT) 435 | } else { 436 | a.out <- post.lasso(group.est, a.out, y, X, K, p, N, TT) 437 | } 438 | } 439 | 440 | 441 | b.est <- matrix(999, N, p) 442 | for (i in 1:N) { 443 | group <- group.est[i] 444 | b.est[i, ] <- a.out[group, ] 445 | } 446 | 447 | result <- list(b.est = b.est, a.out = a.out, group.est = group.est, 448 | converge = (r < R)) 449 | 450 | return(result) 451 | 452 | } 453 | 454 | 455 | ########################################################## 456 | obj <- function(coeff, N = NULL, penalty = NULL) { 457 | 458 | # objective function 459 | 460 | B <- matrix(coeff[1:(p * N)], nrow = p) 461 | ee <- X %*% B 462 | 463 | ii <- as.logical(c(rep(c(rep(1, TT), rep(0, TT * N)), N - 1), rep(1, 464 | TT))) 465 | ob <- (sum((y - ee[ii])^2))/(N * TT) 466 | 467 | ob <- ob + sum(sqrt(apply(matrix((coeff[1:(p * N)] - rep(coeff[(p * 468 | N + 1):(p * (N + 1))], N))^2, nrow = N, byrow = TRUE), 1, sum)) * 469 | penalty * (lambda/N)) 470 | return(ob) 471 | 472 | } 473 | ########################################################## 474 | 475 | obj.grad <- function(coeff, N = NULL, penalty = NULL) { 476 | 477 | # gradient of the objective function 478 | 479 | B <- matrix(coeff[1:(p * N)], nrow = p) 480 | ee <- X %*% B 481 | ii <- as.logical(c(rep(c(rep(1, TT), rep(0, TT * N)), N - 1), rep(1, 482 | TT))) 483 | ob.grad.1 <- as.vector(t(apply(array(matrix(sum((y - ee[ii])^2), 484 | TT * N, p) * X, c(TT, N, p)), c(2, 3), sum))) * 2/(N * TT) 485 | 486 | b.minus.a <- coeff[1:(p * N)] - rep(coeff[(p * N + 1):(p * (N + 1))], 487 | N) 488 | norm2 <- sqrt(apply(matrix((b.minus.a)^2, nrow = N, byrow = TRUE), 489 | 1, sum)) 490 | ob.grad.1 <- ob.grad.1 + (b.minus.a/rep(norm2, rep(p, N))) * penalty * 491 | (lambda/N) 492 | 493 | ob.grad.2 <- apply(matrix((-b.minus.a/rep(norm2, rep(p, N))) * penalty * 494 | (lambda/N), nrow = N, byrow = TRUE), 2, sum) 495 | 496 | return(c(ob.grad.1, ob.grad.2)) 497 | 498 | } 499 | 500 | 501 | 502 | ########################################################## 503 | pen.generate <- function(b, a, N, p, K, kk) { 504 | 505 | # generate the known part of the penalty term output a N*1 vector 506 | 507 | a.out.exp <- aperm(array(a, c(K, p, N)), c(3, 2, 1)) 508 | p.temp <- (b - a.out.exp)^2 509 | p.norm <- sqrt(apply(p.temp, c(1, 3), sum)) 510 | 511 | ind <- setdiff(1:K, kk) 512 | 513 | pen <- apply(as.matrix(p.norm[, ind]), 1, prod) 514 | return(pen) 515 | 516 | } 517 | 518 | 519 | ########################################################## 520 | opt.mosek <- function(y, X, penalty, N, TT, K, p, lambda) { 521 | 522 | # call Mosek solver to solve the optimization conic programming 523 | 524 | 525 | # INPUT Arg: dimensions N, TT, K, p tuning parameter lambda data y(TN 526 | # * 1), X(TN * P) parameter penalty (N*1) numeric 527 | 528 | 529 | # set sense of optim and tolerance 530 | prob <- list(sense = "min") 531 | prob$dparam <- list(INTPNT_CO_TOL_REL_GAP=1e-5) 532 | # prob$dparam$intpnt_nl_tol_rel_gap <- 1e-05 533 | 534 | # objective: coeffiects c order of variables: beta_i (i = 1,2,..N), 535 | # nu_i (i=1,2,...,N) , mu_i (i=1,2,...,N) alpha_k, s_i (i=1,2,...,N), 536 | # r_i (i=1,2,...,N), t_i (i=1,2,...,N), w_i (i=1,2,...,N) 537 | 538 | prob$c <- c(rep(0, N * (2 * p + TT + 2) + p), rep(1/(N * TT), N), 539 | penalty * c(lambda/N)) 540 | 541 | # lieanr constraint: matrix A 542 | 543 | # There must be some smart methods to split the matrix without invoke 544 | # loops. Try to modify it later 545 | X.split <- list() 546 | for (i in 1:N) { 547 | ind <- ((i - 1) * TT + 1):(i * TT) 548 | X.split[[i]] <- X[ind, ] 549 | } 550 | 551 | A.y <- cbind(bdiag(X.split), Diagonal(TT * N), Matrix(0, TT * N, 552 | N * (p + 4) + p)) 553 | 554 | A.0 <- cbind(Diagonal(N * p), Matrix(0, N * p, TT * N), -Diagonal(N * 555 | p), -matrix(diag(p), N * p, p, byrow = TRUE), Matrix(0, N * p, 556 | N * 4)) 557 | 558 | A.nhalf <- cbind(Matrix(0, N, N * (2 * p + TT) + p), Diagonal(N), 559 | Matrix(0, N, N), -Diagonal(N)/2, Matrix(0, N, N)) 560 | 561 | A.phalf <- cbind(Matrix(0, N, N * (2 * p + TT) + p), Matrix(0, N, 562 | N), Diagonal(N), -Diagonal(N)/2, Matrix(0, N, N)) 563 | 564 | A <- rbind(A.y, A.0, A.nhalf, A.phalf) 565 | prob$A <- as(A, "CsparseMatrix") 566 | 567 | # linear constraint: upperbound and lowerbound 568 | prob$bc <- rbind(blc = c(y, rep(0, N * p), rep(-1/2, N), rep(1/2, 569 | N)), buc = c(y, rep(0, N * p), rep(-1/2, N), rep(1/2, N))) 570 | # t_i \geq 0 571 | prob$bx <- rbind(blx = c(rep(-Inf, N * (2 * p + TT + 2) + p), rep(0, 572 | N), rep(-Inf, N)), bux = c(rep(Inf, N * (2 * p + TT + 4) + p))) 573 | 574 | # Conic constraints 575 | CC <- list() 576 | bench <- N * (2 * p + TT) + p 577 | 578 | for (i in 1:N) { 579 | s.i <- bench + i 580 | r.i <- bench + N + i 581 | nu.i <- (N * p + (i - 1) * TT + 1):(N * p + i * TT) 582 | w.i <- bench + 3 * N + i 583 | mu.i <- (N * (TT + p) + (i - 1) * p + 1):(N * (TT + p) + i * 584 | p) 585 | CC <- cbind(CC, list("QUAD", c(r.i, nu.i, s.i)), list("QUAD", 586 | c(w.i, mu.i))) 587 | } 588 | prob$cones <- CC 589 | rownames(prob$cones) <- c("type", "sub") 590 | 591 | # Invoke mosek solver 592 | 593 | mosek.out <- mosek(prob, opts = list(verbose = 0)) 594 | 595 | est <- mosek.out$sol$itr$xx 596 | beta <- est[1:(N * p)] 597 | alpha <- est[(N * (2 * p + TT) + 1):(N * (2 * p + TT) + p)] 598 | result <- list(beta = beta, alpha = alpha) 599 | return(result) 600 | } 601 | 602 | ########################################################## 603 | criterion <- function(a.old, a.new, b.old, b.new, tol) { 604 | 605 | d <- FALSE 606 | 607 | a.cri <- sum(abs(a.old - a.new))/(sum(abs(a.old)) + 1e-04) 608 | b.cri <- mean(abs(b.old - b.new))/(mean(abs(b.old)) + 1e-04) 609 | 610 | if (a.cri < tol & b.cri < tol) { 611 | d <- TRUE 612 | } 613 | 614 | return(d) 615 | 616 | } 617 | 618 | ########################################################## 619 | post.lasso <- function(group.est, a.out, y, X, K, p, N, TT) { 620 | 621 | a.out.post <- matrix(0, K, p) 622 | 623 | for (k in 1:K) { 624 | 625 | group_k <- (group.est == k) 626 | 627 | if (sum(group_k) >= p/TT) { 628 | 629 | Ind <- 1:N 630 | group.ind <- Ind[group.est == k] 631 | 632 | data.ind <- as.numeric(sapply(group.ind, function(i) { 633 | ((i - 1) * TT + 1):(i * TT) 634 | })) 635 | yy <- y[data.ind] 636 | XX <- X[data.ind, ] 637 | 638 | a.out.post[k, ] <- lsfit(XX, yy, intercept = FALSE)$coefficients 639 | 640 | } else { 641 | a.out.post[k, ] <- a.out[k, ] 642 | } 643 | } 644 | return(a.out.post) 645 | } 646 | 647 | ########################################################## 648 | post.corr <- function(group.est, a.out, y, X, K, p, N, TT) { 649 | 650 | a.out.corr <- matrix(0, K, p) 651 | 652 | for (k in 1:K) { 653 | 654 | group_k <- (group.est == k) 655 | 656 | if (sum(group_k) >= 2 * p/TT) { 657 | Ind <- 1:N 658 | group.ind <- Ind[group.est == k] 659 | 660 | data.ind <- as.numeric(sapply(group.ind, function(i) { 661 | ((i - 1) * TT + 1):(i * TT) 662 | })) 663 | yy <- y[data.ind] 664 | XX <- X[data.ind, ] 665 | 666 | bias.k <- SPJ_PLS(TT, yy, XX) 667 | a.k <- lsfit(XX, yy, intercept = FALSE)$coefficients 668 | a.out.corr[k, ] <- 2 * a.k - bias.k 669 | } else { 670 | a.out.corr[k, ] <- a.out[k, ] 671 | } 672 | } 673 | return(a.out.corr) 674 | } 675 | -------------------------------------------------------------------------------- /R/SPJ_PLS.R: -------------------------------------------------------------------------------- 1 | #' Half panel Jackknife 2 | #' develop on Dhaene and Jochmans (2015) 3 | #' 4 | #' @param t # of time periods 5 | #' @param y (n*t vector) response variable 6 | #' @param x (nt * p matrix)independent variable 7 | #' 8 | #' @export 9 | #' 10 | SPJ_PLS <- function(t, y, x){ 11 | 12 | x <- as.matrix(x) 13 | n <- length(y) / t 14 | p <- ncol(x) 15 | 16 | period1_i <- c(rep(1, floor(t/2)), rep(0, ceiling(t/2))) 17 | period1 <- as.logical( rep(period1_i, n) ) 18 | period2 <- !period1 19 | 20 | theta_bar <- V <- matrix(0, p, 2) 21 | 22 | 23 | 24 | for(tt in c(1,2)){ 25 | 26 | if(tt == 1){ 27 | x_half <- x[period1, ] 28 | y_half <- as.matrix(y[period1]) 29 | } else { 30 | x_half <- x[period2, ] 31 | y_half <- as.matrix(y[period2]) 32 | } 33 | 34 | t_half <- length(y_half) / n 35 | 36 | y_demean <- demean(y_half, n, t_half) 37 | x_demean <- demean(x_half, n, t_half) 38 | 39 | # b <- lsfit(x_demean, y_demean, intercept = FALSE)$coefficients 40 | b <- MASS::ginv( t(x_demean) %*% x_demean ) %*% ( t(x_demean) %*% y_demean ) 41 | theta_bar[, tt] <- b 42 | 43 | } 44 | 45 | return(rowMeans(theta_bar)) 46 | 47 | } 48 | -------------------------------------------------------------------------------- /R/determine_K.R: -------------------------------------------------------------------------------- 1 | #' Determine number of groups using information criterion 2 | #' 3 | #' @param N individual dimension 4 | #' @param TT time dimension 5 | #' @param y y(TN * 1) 6 | #' @param X X(TN * P) 7 | #' @param y_raw y_raw(TN * 1) raw data without standardization 8 | #' @param X_raw X_raw(TN * P) raw data without standardization 9 | #' @param lambda_seq Candidate tuning variable 10 | #' @param K_max Maximum number of groups 11 | #' @param rho Tuning parameter in the IC 12 | #' @param FUN choose which function to be used in PLS estimation 13 | #' @param beta0 N*p matrix. initial estimator 14 | #' @param MaxIter Maximum # of iteration 15 | #' @param tol convergence criterion 16 | #' 17 | #' @return A list contains optimal K and lambda 18 | #' \item{lambda}{Optimal lambda} 19 | #' \item{K}{Optimal K} 20 | #' 21 | #' @export 22 | 23 | determine_K <- function(N, 24 | TT, 25 | y, 26 | X, 27 | y_raw, 28 | X_raw, 29 | lambda_seq, 30 | K_max, 31 | rho = 2 / 3 * (N * TT)^(-0.5), 32 | FUN = PLS.cvxr, 33 | beta0 = NULL, 34 | MaxIter = 500, 35 | tol = 1e-4) { 36 | 37 | p <- ncol(X) 38 | 39 | num_lambda <- length(lambda_seq) 40 | IC_total <- matrix(0, K_max, num_lambda) 41 | 42 | for(ll in 1:num_lambda){ 43 | for(K in 1:K_max){ 44 | 45 | print(paste(as.character(ll), 46 | "/", 47 | as.character(num_lambda), 48 | "th parameter; K = ", 49 | as.character(K), 50 | "/", 51 | as.character(K_max))) 52 | 53 | lambda <- lambda_seq[ll] 54 | 55 | if(K == 1){ 56 | a <- lsfit(X, y, intercept = FALSE)$coefficients 57 | bias <- SPJ_PLS(TT, y_raw, X_raw) 58 | a_corr <- 2*a - bias 59 | 60 | IC_total[K, ] <- mean( (y - X %*% a_corr)^2 ) 61 | 62 | next 63 | } 64 | 65 | pls_out <- FUN(N, 66 | TT, 67 | y, 68 | X, 69 | K, 70 | lambda, 71 | beta0 = beta0, 72 | R = MaxIter, 73 | tol = tol, 74 | post_est = FALSE, 75 | bias_corr = FALSE) 76 | 77 | Q <- rep(1e10, K) 78 | 79 | # Post-estimation 80 | for (k in 1:K){ 81 | 82 | group_k <- (pls_out$group.est == k) 83 | 84 | if(sum(group_k) > 2*p/TT){ 85 | 86 | Ind <- 1:N 87 | group_ind <- Ind[group_k] 88 | data_ind <- as.numeric( sapply(group_ind, function(i){((i-1)*TT+1):(i*TT)}) ) 89 | yy_k <- y[data_ind] 90 | XX_k <- X[data_ind, ] 91 | yy_raw_k <- y_raw[data_ind] 92 | XX_raw_k <- X_raw[data_ind, ] 93 | 94 | # bias correction 95 | bias_k <- SPJ_PLS(TT, yy_raw_k, XX_raw_k) 96 | a_k <- lsfit(XX_k, yy_k, intercept = FALSE)$coefficients 97 | a_corr_k <- 2*a_k - bias_k 98 | 99 | } else { 100 | a_corr_k <- pls_out$a.out[k, ] 101 | } 102 | 103 | Q[k] <- sum( (yy_k - XX_k %*% a_corr_k)^2 ) 104 | } 105 | 106 | IC_total[K, ll] <- sum(Q) / (N*TT) 107 | 108 | } 109 | } 110 | 111 | IC <- log(IC_total) + rho * p * matrix(rep(1:K_max, num_lambda), nrow = K_max) 112 | 113 | 114 | min_ind <- which.min(IC) 115 | lambda_opt <- lambda_seq[ceiling(min_ind / K_max)] 116 | K_temp <- min_ind %% K_max 117 | if (K_temp == 0) K_temp <- K_max 118 | K_opt <- K_temp 119 | 120 | return(list(lambda = lambda_opt, K = K_opt)) 121 | 122 | } 123 | -------------------------------------------------------------------------------- /R/init_est.R: -------------------------------------------------------------------------------- 1 | #' Initial estimator 2 | #' 3 | #' @export init_est 4 | 5 | init_est <- function(X, y, TT){ 6 | 7 | p <- ncol(X) 8 | N <- length(y) / TT 9 | 10 | beta0 <- matrix(0, N, p); 11 | for(i in 1:N){ 12 | ind <- ( (i-1)*TT+1 ):(i*TT); 13 | yy <- y[ind] 14 | XX <- X[ind, ] 15 | 16 | # beta0[i, ] <- solve( t(XX) %*% XX ) %*% ( t(XX) %*% yy ) 17 | beta0[i, ] <- lsfit(x = XX, y = yy, intercept = FALSE)$coefficients 18 | } 19 | 20 | return(beta0) 21 | } 22 | -------------------------------------------------------------------------------- /R/utilities_functions.R: -------------------------------------------------------------------------------- 1 | #' @export data.normalization 2 | #' @export demean 3 | #' @export group.coerce 4 | 5 | demean <- function(yy, N, TT){ 6 | 7 | # Output is the demeaned data with the same dimension as input 8 | # NT * 1 or NT * p 9 | 10 | if(dim(yy)[1] != N*TT) print("Error! Dimension of 11 | inputs in demean is wrong!") 12 | 13 | p <- dim(yy)[2]; 14 | 15 | if( p == 1){ 16 | y.temp <- matrix(yy, nrow = TT); 17 | m <- apply(y.temp, 2, mean); 18 | y.temp <- y.temp - matrix( rep(m, times = TT), nrow = TT, 19 | ncol = N, byrow = TRUE); 20 | y <- matrix(y.temp, nrow = N*TT) 21 | return(y) 22 | } 23 | else{ 24 | y <- matrix(0, N*TT, p); 25 | for(j in 1:p){ 26 | y.temp <- matrix( yy[,j], nrow = TT); 27 | m <- apply(y.temp, 2, mean); 28 | y.temp <- y.temp - matrix( rep(m, times = TT), nrow = TT, 29 | ncol = N, byrow = TRUE); 30 | y[,j] <- matrix(y.temp, nrow = N*TT); 31 | } 32 | return(y) 33 | } 34 | } 35 | 36 | data.normalization <- function(yy, N, TT){ 37 | 38 | # Output is the demeaned data with the same dimension as input 39 | # NT * 1 or NT * p 40 | 41 | if(dim(yy)[1] != N*TT) print("Error! Dimension of 42 | inputs in demean is wrong!") 43 | 44 | p <- dim(yy)[2]; 45 | 46 | if( p == 1){ 47 | y.temp <- robustHD::standardize( matrix(yy, nrow = TT), 48 | scaleFun = function(x) { 49 | sqrt(sum((x - mean(x)) ^ 2) / length(x)) 50 | }) 51 | y.mean <- matrix( rep( colMeans( matrix(yy, nrow = TT)), TT), nrow = TT, byrow = TRUE) 52 | y.raw.temp <- y.temp + y.mean 53 | 54 | y <- matrix(y.temp, nrow = N * TT) 55 | y.raw <- matrix(y.raw.temp, nrow = N*TT) 56 | 57 | } 58 | else{ 59 | y <- matrix(0, N*TT, p); 60 | y.raw <- matrix(0, N*TT, p); 61 | for(j in 1:p){ 62 | y.temp <- robustHD::standardize( matrix(yy[, j], nrow = TT), 63 | scaleFun = function(x) { 64 | sqrt(sum((x - mean(x)) ^ 2) / length(x)) 65 | }) 66 | 67 | y.mean <- matrix( rep(colMeans( matrix(yy[, j], nrow = TT)), TT), nrow = TT, byrow = TRUE) 68 | y.raw.temp <- y.temp + y.mean 69 | 70 | y[, j] <- matrix(y.temp, nrow = N * TT) 71 | y.raw[, j] <- matrix(y.raw.temp, nrow = N*TT) 72 | 73 | } 74 | } 75 | 76 | return(list(y = y, y.raw = y.raw)) 77 | } 78 | 79 | 80 | group.coerce <- function(group.est, a.out, group0, a0, N, N.frac, K, p){ 81 | 82 | # auxiliary function useful in simulation experiments 83 | 84 | a.est <- matrix(0,K,p); 85 | group <- rep(0,N); 86 | 87 | for(k in 1:K){ 88 | dif <- apply( (matrix( a.out[k,], K, p, byrow = TRUE) - a0)^2, 1, sum); 89 | gg <- which.min(dif); 90 | 91 | a.est[gg, ] <- a.out[k, ]; 92 | group[group.est == k] <- gg; 93 | } 94 | 95 | if( sum(group == 0) ){ 96 | noquote("Error: some inidividuals are not classfied!") 97 | } 98 | 99 | # correct ratio 100 | ratio <- sum( group == group0 ) / N; 101 | # rmse 102 | weighted.se <- sum( ( a.est[,1] - a0[,1] )^2 * N.frac ); 103 | 104 | return( list(ratio = ratio, se = weighted.se, 105 | a = a.est, group = group) ); 106 | } 107 | 108 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Classifier-Lasso 2 | 3 | 4 | 5 | 6 | This package implements in R the Classifier-Lasso method by 7 | 8 | Su, L., Shi, Z., & Phillips, P. C. (2016): ["Identifying latent structures in panel data"](https://onlinelibrary.wiley.com/doi/abs/10.3982/ECTA12560), *Econometrica*, *84*(6), 2215-2264. 9 | 10 | 11 | 12 | *This package is under active development...* 13 | 14 | 15 | 16 | Code of the classifier-Lasso method was originally developed in `MATLAB` using `CVX` as the modeling language and `MOSEK` as the convex solver. Here is [replicable empirical examples](https://zhentaoshi.github.io/C-Lasso) in the paper. 17 | 18 | The package uses an open source solver [ECOS](https://github.com/embotech/ecos) via [CVXR](https://github.com/anqif/CVXR) by default. We skipped the Disciplined Convex Programming (DCP) check steps to speed up the optimization. 19 | 20 | To further speed up the computation, an R version using `Rmosek` to directly invoke `MOSEK` is elaborated in ["Implementing Convex Optimization in R: Two Econometric Examples"](https://www.researchgate.net/publication/326029597_Implementing_Convex_Optimization_in_R_Two_Econometric_Examples) with [demonstration code](https://github.com/zhan-gao/convex_prog_in_econometrics). In our experiments, this `R+Rmosek` implementation often solves the optimization problem with at most 1/3 of the time by the `MATLAB+CVX+MOSEK` implementation and at most 2/3 of the time by `CVXR+ECOS` implementation without DCP check. 21 | 22 | 23 | 24 | ## Installation 25 | 26 | The current beta version can be installed from [Github](https://CRAN.R-project.org) by: 27 | 28 | ``` r 29 | library(devtools) 30 | devtools::install_github("zhan-gao/classo", INSTALL_opts=c("--no-multiarch")) 31 | library(classo) 32 | ``` 33 | 34 | Though not required for installation and use, `Rmosek` is highly recommended. According to our extensive experience, using `Rmosek` is often much faster than R with other solvers. 35 | 36 | An installation gist of `MOSEK` can be found at [here](https://gist.github.com/mikelove/67ea44d5be5a053e599257fe357483dc ). 37 | The installation of the latest version `MOSEK 9.0` includes `Rmosek`. It can be invoked in `R` following [this instruction]() (Tested with success). 38 | 39 | Alternatively, `Rmosek` can be downloaded from CRAN. We have tested with success on R 3.6.3 the following lines: 40 | 41 | ``` r 42 | install.packages("Rmosek") 43 | library(Rmosek) 44 | mosek_attachbuilder("path_to_the_bin_folder_of_MOSEK") 45 | install.rmosek() 46 | ``` 47 | 48 | *Please make sure `Rmosek` is successfully installed and activated before use `PLS.mosek()` function to do estimation*. 49 | 50 | ## Examples 51 | 52 | The sample data is generated by DGP 1 described in Su, Shi and Phillips (2016) with N = 200 and T = 25. 53 | 54 | ```r 55 | data("sample_data") 56 | # CAVEAT: Please convert data.frame to matrix to proceed. 57 | y <- as.matrix(sample_data[, 1]) 58 | x <- as.matrix(sample_data[, -1]) 59 | n <- 200 60 | tt <- 25 61 | lambda <- as.numeric( 0.5 * var(y) / (tt^(1/3)) ) 62 | pls_out <- PLS.cvxr(n, tt, y, x, K = 3, lambda = lambda) 63 | 64 | # Use Rmosek if it is successfully installed 65 | # pls_out <- PLS.mosek(n, tt, y, x, K = 3, lambda = lambda) 66 | 67 | # estimated slope for each group. True coefficients: [1,1; 0.4,1.6; 1.6,0.4] 68 | pls_out$a.out 69 | ``` 70 | 71 | ``` 72 | [,1] [,2] 73 | [1,] 1.0387521 0.9986867 74 | [2,] 0.4017041 1.6014119 75 | [3,] 1.6197497 0.3614408 76 | ``` 77 | 78 | ```R 79 | # Estimated group structure 80 | # True group structure: 81 | # group 2: 1 - 60 82 | # group 1: 61 - 120 83 | # group 3: 121 - 200 84 | pls_out$group.est 85 | ``` 86 | 87 | ``` 88 | [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 89 | [33] 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 90 | [65] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 91 | [97] 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 3 3 3 3 3 3 92 | [129] 3 3 3 3 3 1 3 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 93 | [161] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 94 | [193] 3 3 3 3 3 3 3 3 95 | ``` 96 | 97 | -------------------------------------------------------------------------------- /classo.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 4 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | -------------------------------------------------------------------------------- /data/sample_data.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhan-gao/classo/0683dcb7c7a12f57d3a96f23cf7ad37476377916/data/sample_data.rda -------------------------------------------------------------------------------- /man/PLS.cvxr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PLS_est.R 3 | \name{PLS.cvxr} 4 | \alias{PLS.cvxr} 5 | \title{PLS estimation by the iterative algorithm via CVXR} 6 | \usage{ 7 | PLS.cvxr(N, TT, y, X, K, lambda, beta0 = NULL, R = 500, tol = 1e-04, 8 | post_est = TRUE, bias_corr = FALSE) 9 | } 10 | \arguments{ 11 | \item{N}{The dimension of cross-sectional units in the panel.} 12 | 13 | \item{TT}{The time series dimension in the panel.} 14 | 15 | \item{y}{Dependent variable. (TN * 1). T is the fast index.} 16 | 17 | \item{X}{Independent variable. (TN * P). T is the fast index. P is the number of regressors.} 18 | 19 | \item{K}{The number of groups.} 20 | 21 | \item{lambda}{The tuning parameter.} 22 | 23 | \item{beta0}{N*p matrix. The initial estimator for each i=1,...,N.} 24 | 25 | \item{R}{Maximum number of iteration.} 26 | 27 | \item{tol}{Tolerance level in the convergence criterion.} 28 | 29 | \item{post_est}{A boolean: do post-lasso estimation or not.} 30 | 31 | \item{bias_corr}{A boolean: do bias correction in the post-lasso estimation or not.} 32 | } 33 | \value{ 34 | A list contains estimated coeffcients and group struncture 35 | \item{b.est}{N * p matrix containing estimated slope for each individual} 36 | \item{a.out}{K * p matrix containing estimated slope for each group} 37 | \item{group.est}{group_id for each individual} 38 | \item{converge}{A boolean indicating whether convergence criteria is met } 39 | } 40 | \description{ 41 | PLS estimation by the iterative algorithm via CVXR 42 | } 43 | -------------------------------------------------------------------------------- /man/PLS.mosek.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PLS_est.R 3 | \name{PLS.mosek} 4 | \alias{PLS.mosek} 5 | \title{PLS estimation by the iterative algorithm via Rmosek} 6 | \usage{ 7 | PLS.mosek(N, TT, y, X, K, lambda, beta0 = NULL, R = 500, tol = 1e-04, 8 | post_est = TRUE, bias_corr = FALSE) 9 | } 10 | \arguments{ 11 | \item{N}{The dimension of cross-sectional units in the panel.} 12 | 13 | \item{TT}{The time series dimension in the panel.} 14 | 15 | \item{y}{Dependent variable. (TN * 1). T is the fast index.} 16 | 17 | \item{X}{Independent variable. (TN * P). T is the fast index. P is the number of regressors.} 18 | 19 | \item{K}{The number of groups.} 20 | 21 | \item{lambda}{The tuning parameter.} 22 | 23 | \item{beta0}{N*p matrix. The initial estimator for each i=1,...,N.} 24 | 25 | \item{R}{Maximum number of iteration.} 26 | 27 | \item{tol}{Tolerance level in the convergence criterion.} 28 | 29 | \item{post_est}{A boolean: do post-lasso estimation or not.} 30 | 31 | \item{bias_corr}{A boolean: do bias correction in the post-lasso estimation or not.} 32 | } 33 | \value{ 34 | A list contains estimated coefficients and group struncture 35 | \item{b.est}{N * p matrix containing estimated slope for each cross-sectional unit.} 36 | \item{a.out}{K * p matrix containing estimated slope for each group} 37 | \item{group.est}{group_id for each individual} 38 | \item{converge}{A boolean indicating whether convergence criteria is met } 39 | } 40 | \description{ 41 | PLS estimation by the iterative algorithm via Rmosek 42 | } 43 | -------------------------------------------------------------------------------- /man/PLS.nlopt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PLS_est.R 3 | \name{PLS.nlopt} 4 | \alias{PLS.nlopt} 5 | \title{PLS estimation by the iterative algorithm via NLOPTR} 6 | \usage{ 7 | PLS.nlopt(N, TT, y, X, K, lambda, beta0 = NULL, R = 500, tol = 1e-04, 8 | post_est = TRUE, bias_corr = FALSE, algo = "NLOPT_LN_NELDERMEAD") 9 | } 10 | \arguments{ 11 | \item{N}{The dimension of cross-sectional units in the panel.} 12 | 13 | \item{TT}{The time series dimension in the panel.} 14 | 15 | \item{y}{Dependent variable. (TN * 1). T is the fast index.} 16 | 17 | \item{X}{Independent variable. (TN * P). T is the fast index. P is the number of regressors.} 18 | 19 | \item{K}{The number of groups.} 20 | 21 | \item{lambda}{The tuning parameter.} 22 | 23 | \item{beta0}{N*p matrix. The initial estimator for each i=1,...,N.} 24 | 25 | \item{R}{Maximum number of iteration.} 26 | 27 | \item{tol}{Tolerance level in the convergence criterion.} 28 | 29 | \item{post_est}{A boolean: do post-lasso estimation or not.} 30 | 31 | \item{bias_corr}{A boolean: do bias correction in the post-lasso estimation or not.} 32 | 33 | \item{algo}{choose algorithm for nloptr} 34 | } 35 | \value{ 36 | A list contains estimated coeffcients and group struncture 37 | \item{b.est}{N * p matrix containing estimated slope for each individual} 38 | \item{a.out}{K * p matrix containing estimated slope for each group} 39 | \item{group.est}{group_id for each individual} 40 | \item{converge}{A boolean indicating whether convergence criteria is met } 41 | } 42 | \description{ 43 | PLS estimation by the iterative algorithm via NLOPTR 44 | } 45 | -------------------------------------------------------------------------------- /man/SPJ_PLS.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SPJ_PLS.R 3 | \name{SPJ_PLS} 4 | \alias{SPJ_PLS} 5 | \title{Half panel Jackknife 6 | develop on Dhaene and Jochmans (2015)} 7 | \usage{ 8 | SPJ_PLS(t, y, x) 9 | } 10 | \arguments{ 11 | \item{t}{# of time periods} 12 | 13 | \item{y}{(n*t vector) response variable} 14 | 15 | \item{x}{(nt * p matrix)independent variable} 16 | } 17 | \description{ 18 | Half panel Jackknife 19 | develop on Dhaene and Jochmans (2015) 20 | } 21 | -------------------------------------------------------------------------------- /man/determine_K.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/determine_K.R 3 | \name{determine_K} 4 | \alias{determine_K} 5 | \title{Determine number of groups using information criterion} 6 | \usage{ 7 | determine_K(N, TT, y, X, y_raw, X_raw, lambda_seq, K_max, rho = 2/3 * (N 8 | * TT)^(-0.5), FUN = PLS.cvxr, beta0 = NULL, MaxIter = 500, 9 | tol = 1e-04) 10 | } 11 | \arguments{ 12 | \item{N}{individual dimension} 13 | 14 | \item{TT}{time dimension} 15 | 16 | \item{y}{y(TN * 1)} 17 | 18 | \item{X}{X(TN * P)} 19 | 20 | \item{y_raw}{y_raw(TN * 1) raw data without standardization} 21 | 22 | \item{X_raw}{X_raw(TN * P) raw data without standardization} 23 | 24 | \item{lambda_seq}{Candidate tuning variable} 25 | 26 | \item{K_max}{Maximum number of groups} 27 | 28 | \item{rho}{Tuning parameter in the IC} 29 | 30 | \item{FUN}{choose which function to be used in PLS estimation} 31 | 32 | \item{beta0}{N*p matrix. initial estimator} 33 | 34 | \item{MaxIter}{Maximum # of iteration} 35 | 36 | \item{tol}{convergence criterion} 37 | } 38 | \value{ 39 | A list contains optimal K and lambda 40 | \item{lambda}{Optimal lambda} 41 | \item{K}{Optimal K} 42 | } 43 | \description{ 44 | Determine number of groups using information criterion 45 | } 46 | -------------------------------------------------------------------------------- /man/init_est.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/init_est.R 3 | \name{init_est} 4 | \alias{init_est} 5 | \title{Initial estimator} 6 | \usage{ 7 | init_est(X, y, TT) 8 | } 9 | \description{ 10 | Initial estimator 11 | } 12 | --------------------------------------------------------------------------------