├── .gitattributes ├── .gitignore ├── Miscellaneous-R-Code.Rproj ├── ModelFitting ├── Bayesian │ ├── StanBugsJags │ │ ├── BetaMixedModel.stan │ │ ├── IRT_models │ │ │ ├── IRT_2PM.stan │ │ │ ├── IRT_3PM.stan │ │ │ ├── IRT_4PM.stan │ │ │ ├── IRT_rasch.stan │ │ │ ├── ReadMe.md │ │ │ └── test_stan_irt.R │ │ ├── MixedModelGeneric.stan │ │ ├── MixedModelSleepstudy_withREcorrelation.stan │ │ ├── ReadMe.md │ │ ├── cfa │ │ ├── jags_MixedModelBetaRegression.jags │ │ ├── linregwithprior.stan │ │ ├── lmbugs.bugs │ │ ├── lmjags.jags │ │ ├── multilevelmediation.stan │ │ ├── multilevelmediationYuanMacKinnon2009.bugs │ │ └── singlelevelmediationYuanMacKinnon2009.bugs │ ├── bugs_linreg.R │ ├── horseshoe │ │ ├── README.md │ │ ├── example.R │ │ ├── wei_bg.stan │ │ ├── wei_bg_joint.stan │ │ ├── wei_gau.stan │ │ ├── wei_gau_joint.stan │ │ ├── wei_hs.stan │ │ ├── wei_hs_joint.stan │ │ ├── wei_lap.stan │ │ └── wei_lap_joint.stan │ ├── jags_MixedModelBetaRegression.R │ ├── jags_linreg.R │ ├── multinomial │ │ ├── README.md │ │ ├── mnl_1.stan │ │ ├── mnl_2.stan │ │ ├── mnl_3.stan │ │ ├── mnl_4.stan │ │ ├── multinomial_stan_comparisons.R │ │ ├── stan_basic_mnl_conceptual.stan │ │ └── stan_mnl_altspecific.stan │ ├── r2bugs.distributions.R │ ├── rstanBetaRegression.R │ ├── rstan_MixedModelBetaRegression.R │ ├── rstan_MixedModelSleepstudy.R │ ├── rstan_MixedModelSleepstudy_withREcorrelation.R │ ├── rstan_MixtureModel.R │ ├── rstan_linregwithprior.R │ ├── rstan_multilevelMediation.R │ ├── rstan_topicModels0.R │ ├── rstant_testBEST.R │ ├── stochasticVolatility.R │ ├── topicModelgibbs.R │ ├── variationalBayesRegression.R │ └── variationalBayesRegression.Rmd ├── EM Examples │ ├── EM Mixture MV.R │ ├── EM Mixture.R │ ├── EM algorithm for ppca with missing.R │ ├── EM algorithm for ppca.R │ ├── EM algorithm for probit example.R │ ├── EM for pca.R │ └── EM for state space unobserved components.R ├── Mixed Models │ ├── mixedModelML │ │ ├── mixedModelML.R │ │ ├── mixedModelML.Rmd │ │ ├── mixedModelML.html │ │ ├── mixedModelML.md │ │ ├── mixedModelML.pdf │ │ └── mixedModelML_files │ │ │ ├── figure-html │ │ │ ├── diag.html │ │ │ ├── fitCSgam-1.png │ │ │ ├── gamSleepStudy-1.png │ │ │ ├── gammSleepStudy-1.png │ │ │ ├── unnamed-chunk-1-1.png │ │ │ ├── unnamed-chunk-3-1.png │ │ │ └── unnamed-chunk-3-2.png │ │ │ └── figure-latex │ │ │ ├── fitCSgam-1.pdf │ │ │ └── gammSleepStudy-1.pdf │ ├── mixedModels │ │ ├── anovamixed.Rmd │ │ ├── anovamixed.html │ │ ├── anovamixed.pdf │ │ ├── growthCurvevsMixedModel.R │ │ ├── growth_vs_mixed.Rmd │ │ ├── growth_vs_mixed.html │ │ ├── growth_vs_mixed_files │ │ │ ├── figure-html │ │ │ │ ├── lavaanmod-1.png │ │ │ │ ├── randomIntsSlopesMods-1.png │ │ │ │ └── visualizeTrends-1.png │ │ │ └── growthvsMixed_EstResults.RData │ │ ├── growth_vs_mixed_sim.Rmd │ │ ├── growth_vs_mixed_sim.html │ │ ├── growth_vs_mixed_sim.md │ │ ├── mixedModels.Rmd │ │ ├── mixedModels.md │ │ ├── mixedModels.pdf │ │ └── mixedModels_files │ │ │ ├── figure-html │ │ │ ├── sleepModFits-1.png │ │ │ ├── sleepModFitsReduced-1.png │ │ │ └── sleepstudyPlot-1.png │ │ │ ├── figure-latex │ │ │ ├── sleepModFits-1.pdf │ │ │ ├── sleepModFitsReduced-1.pdf │ │ │ └── sleepstudyPlot-1.pdf │ │ │ └── figure-markdown_github │ │ │ ├── sleepModFits-1.png │ │ │ ├── sleepModFitsReduced-1.png │ │ │ └── sleepstudyPlot-1.png │ ├── one_factor_RE.R │ ├── one_factor_RE.jl │ ├── one_factor_RE.m │ ├── twofactorRE.R │ ├── twofactorRE.jl │ └── twofactorRE.m ├── NBzeroinfl.R ├── RKHSReg │ ├── RKHSReg.R │ ├── RKHSReg.md │ └── update.Rmd ├── bivariateProbit.R ├── cfa_ml.R ├── convert_code_to_html.R ├── crp.R ├── cubicsplines.R ├── elm.R ├── gp Examples │ ├── README.md │ ├── gaussSample.m │ ├── gaussianProcessStan.R │ ├── gaussianProcessStan.Rmd │ ├── gaussianprocessNoiseFree.R │ ├── gaussianprocessNoisy.R │ ├── gpStanModelCode.stan │ ├── gpStanModelCode_gammaExponential.stan │ ├── gpStanModelCode_generalizedSquaredExponential.stan │ ├── gpStanModelCode_rationalQuadratic.stan │ ├── gpStan_squaredExponentialFactorAnalysis.R │ ├── gpStan_squaredExponentialFactorAnalysis.stan │ ├── gprDemoChangeHparams.m │ ├── gprDemoNoiseFree.m │ └── stangp.RData ├── gradient_descent.R ├── heckman_selection.R ├── hmm_viterbi.R ├── hmm_viterbi.py ├── hurdle.R ├── ipw.R ├── lasso.R ├── markov_model.R ├── multinomial.R ├── naivebayes.R ├── nelder_mead.R ├── nelder_mead.ipynb ├── nelder_mead.py ├── newton_irls.R ├── one_line_models.R ├── ordinal_regression.R ├── penalized_ML.R ├── poiszeroinfl.R ├── quantile_regression.Rmd ├── quantile_regression.html ├── ridge.R ├── standard_lm.R ├── standard_logistic.R ├── stochastic_gradient_descent.R ├── survivalCox.R └── tobit.R ├── Other ├── Programming_Shenanigans │ ├── compoundInterest.R │ ├── fillbyLastRecursive.R │ ├── fizzbuzz.R │ ├── fizzbuzz.jl │ ├── fizzbuzz.py │ ├── matrixOperations.Rmd │ ├── matrixOperations.html │ ├── matrixOperations.md │ ├── matrixOperations_files │ │ └── figure-html │ │ │ └── unnamed-chunk-16-1.png │ ├── stringReverseRecursively.R │ ├── stringReverseRecursively.py │ ├── wordWrap.R │ └── wordWrap.py ├── Python Startup │ ├── mixedModel.py │ └── regressionExamples.py ├── getRollCall.R ├── ggtheme.R ├── shakespeareanInsulter.R ├── spurriousCorrelationwithRatios.R ├── xkcdscrape.R └── xkcdscrape.py ├── README.md ├── SC and TR ├── GAMS.R ├── MLcode.R ├── coursecode.r ├── mixedModelML │ ├── mixedModelML.R │ ├── mixedModelML.Rmd │ ├── mixedModelML.html │ ├── mixedModelML.md │ ├── mixedModelML.pdf │ └── mixedModelML_files │ │ ├── figure-html │ │ ├── diag.html │ │ ├── fitCSgam-1.png │ │ ├── gamSleepStudy-1.png │ │ ├── gammSleepStudy-1.png │ │ ├── unnamed-chunk-1-1.png │ │ ├── unnamed-chunk-3-1.png │ │ └── unnamed-chunk-3-2.png │ │ └── figure-latex │ │ ├── fitCSgam-1.pdf │ │ └── gammSleepStudy-1.pdf └── mixedModels │ ├── anovamixed.Rmd │ ├── anovamixed.html │ ├── anovamixed.pdf │ ├── growthCurvevsMixedModel.R │ ├── growth_vs_mixed.Rmd │ ├── growth_vs_mixed.html │ ├── growth_vs_mixed_files │ ├── figure-html │ │ ├── lavaanmod-1.png │ │ ├── randomIntsSlopesMods-1.png │ │ └── visualizeTrends-1.png │ └── growthvsMixed_EstResults.RData │ ├── growth_vs_mixed_sim.Rmd │ ├── growth_vs_mixed_sim.html │ ├── growth_vs_mixed_sim.md │ ├── mixedModels.Rmd │ ├── mixedModels.md │ ├── mixedModels.pdf │ ├── mixedModels_files │ ├── figure-html │ │ ├── sleepModFits-1.png │ │ ├── sleepModFitsReduced-1.png │ │ └── sleepstudyPlot-1.png │ ├── figure-latex │ │ ├── sleepModFits-1.pdf │ │ ├── sleepModFitsReduced-1.pdf │ │ └── sleepstudyPlot-1.pdf │ └── figure-markdown_github │ │ ├── sleepModFits-1.png │ │ ├── sleepModFitsReduced-1.png │ │ └── sleepstudyPlot-1.png │ └── pearlpic.png └── other.css /.gitattributes: -------------------------------------------------------------------------------- 1 | *.html linguist-vendored 2 | *.stan linguist-language=Stan 3 | 4 | 5 | # Source files 6 | # ============ 7 | *.Rdata text 8 | *.rdb binary 9 | *.rds binary 10 | *.Rd text 11 | *.Rdx binary 12 | *.Rmd text 13 | *.R text 14 | *.stan text -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | *_cache 5 | .DS_Store -------------------------------------------------------------------------------- /Miscellaneous-R-Code.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /ModelFitting/Bayesian/StanBugsJags/BetaMixedModel.stan: -------------------------------------------------------------------------------- 1 | //see rstan_MixedModelBetaRegression.R 2 | data { 3 | int N; // number of observations 4 | int L; // number of batches 5 | vector[N] yield; // response 6 | int id[N]; // batch 7 | vector[N] temp; // temperature 8 | } 9 | 10 | transformed data { 11 | vector[N] tempCen; 12 | tempCen = temp - mean(temp); // centered explanatory variable 13 | } 14 | 15 | parameters { 16 | real Intercept; // "fixed" effects 17 | real betaTemp; 18 | 19 | real phi; // dispersion parameter 20 | 21 | real sd_int; // sd for ints 22 | real sd_beta; // sd for temp 23 | 24 | vector[L] gammaIntercept; // individual effects 25 | vector[L] gammaTemp; // individual effects 26 | } 27 | 28 | transformed parameters{ 29 | vector[N] A; // parameter for beta distn 30 | vector[N] B; // parameter for beta distn 31 | vector[N] yhat; // transformed linear predictor 32 | vector[L] IntRE; 33 | vector[L] SlopeRE; 34 | 35 | for (l in 1:L){ 36 | IntRE[l] = gammaIntercept[l]*sd_int; 37 | SlopeRE[l] = gammaTemp[l]*sd_beta ; 38 | } 39 | 40 | // model calculations 41 | for(n in 1:N) { 42 | yhat[n] = inv_logit((IntRE[id[n]] + Intercept) + (SlopeRE[id[n]] + betaTemp) * tempCen[n]); 43 | } 44 | 45 | A = yhat * phi; 46 | B = (1.0-yhat) * phi; 47 | } 48 | 49 | model { 50 | // priors 51 | Intercept ~ normal(0, 10); 52 | betaTemp ~ normal(0, 1); 53 | 54 | sd_int ~ cauchy(0, 2.5); 55 | sd_beta ~ cauchy(0, 2.5); 56 | phi ~ cauchy(0, 5); 57 | // matt trick used for following; 58 | // else slower and convergence issues 59 | gammaIntercept ~ normal(0, 1); // random intercepts for each batch 60 | gammaTemp ~ normal(0, 1); // random slopes for each batch 61 | 62 | // likelihood 63 | yield ~ beta(A, B); 64 | } -------------------------------------------------------------------------------- /ModelFitting/Bayesian/StanBugsJags/IRT_models/IRT_2PM.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; 3 | int J; 4 | int Y[N,J]; 5 | } 6 | 7 | transformed data{ 8 | 9 | } 10 | 11 | parameters { 12 | vector[J] difficulty; 13 | vector[J] discrim; 14 | vector[N] Z; 15 | } 16 | 17 | transformed parameters { 18 | 19 | } 20 | 21 | model { 22 | matrix[N, J] lmat; 23 | 24 | # priors 25 | Z ~ normal(0, 1); 26 | discrim ~ student_t(3, 0, 5); 27 | difficulty ~ student_t(3, 0, 5); 28 | 29 | 30 | 31 | for (j in 1:J){ 32 | lmat[,j] = discrim[j] * (Z - difficulty[j]); 33 | } 34 | 35 | 36 | // likelihood 37 | for (j in 1:J) Y[,j] ~ bernoulli_logit(lmat[,j]); 38 | 39 | } 40 | 41 | generated quantities { 42 | 43 | } 44 | -------------------------------------------------------------------------------- /ModelFitting/Bayesian/StanBugsJags/IRT_models/IRT_3PM.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; 3 | int J; 4 | int Y[N,J]; 5 | } 6 | 7 | transformed data{ 8 | 9 | } 10 | 11 | parameters { 12 | vector[J] difficulty; 13 | vector[J] discrim; 14 | vector[J] guess; 15 | vector[N] Z; 16 | } 17 | 18 | transformed parameters { 19 | 20 | } 21 | 22 | model { 23 | matrix[N, J] pmat; 24 | 25 | # priors 26 | Z ~ normal(0, 1); 27 | discrim ~ student_t(3, 0, 5); 28 | difficulty ~ student_t(3, 0, 5); 29 | guess ~ beta(1, 19); 30 | 31 | 32 | for (j in 1:J){ 33 | pmat[,j] = guess[j] + (1 - guess[j]) * inv_logit(discrim[j] * (Z - difficulty[j])); 34 | } 35 | 36 | 37 | // likelihood 38 | for (j in 1:J) Y[,j] ~ bernoulli(pmat[,j]); 39 | 40 | } 41 | 42 | generated quantities { 43 | 44 | } 45 | -------------------------------------------------------------------------------- /ModelFitting/Bayesian/StanBugsJags/IRT_models/IRT_4PM.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; 3 | int J; 4 | int Y[N,J]; 5 | } 6 | 7 | transformed data{ 8 | 9 | } 10 | 11 | parameters { 12 | vector[J] difficulty; 13 | vector[J] discrim; 14 | vector[J] guess; 15 | vector[J] ceiling; 16 | vector[N] Z; 17 | } 18 | 19 | transformed parameters { 20 | 21 | } 22 | 23 | model { 24 | matrix[N, J] pmat; 25 | 26 | # priors 27 | Z ~ normal(0, 1); 28 | discrim ~ student_t(3, 0, 5); 29 | difficulty ~ student_t(3, 0, 5); 30 | guess ~ beta(1, 19); 31 | ceiling ~ beta(49, 1); 32 | 33 | 34 | for (j in 1:J){ 35 | pmat[,j] = guess[j] + (ceiling[j] - guess[j]) * inv_logit(discrim[j] * (Z - difficulty[j])); 36 | } 37 | 38 | 39 | // likelihood 40 | for (j in 1:J) Y[,j] ~ bernoulli(pmat[,j]); 41 | 42 | } 43 | 44 | generated quantities { 45 | 46 | } 47 | -------------------------------------------------------------------------------- /ModelFitting/Bayesian/StanBugsJags/IRT_models/IRT_rasch.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; 3 | int J; 4 | int Y[N,J]; 5 | } 6 | 7 | transformed data{ 8 | 9 | } 10 | 11 | parameters { 12 | vector[J] difficulty; 13 | real discrim; 14 | vector[N] Z; 15 | } 16 | 17 | transformed parameters { 18 | 19 | } 20 | 21 | model { 22 | matrix[N, J] lmat; 23 | 24 | # priors 25 | Z ~ normal(0, 1); 26 | discrim ~ student_t(3, 0, 5); 27 | difficulty ~ student_t(3, 0, 5); 28 | 29 | 30 | 31 | for (j in 1:J){ 32 | lmat[,j] = discrim * (Z - difficulty[j]); 33 | } 34 | 35 | 36 | // likelihood 37 | for (j in 1:J) Y[,j] ~ bernoulli_logit(lmat[,j]); 38 | 39 | } 40 | 41 | generated quantities { 42 | 43 | } 44 | -------------------------------------------------------------------------------- /ModelFitting/Bayesian/StanBugsJags/IRT_models/ReadMe.md: -------------------------------------------------------------------------------- 1 | These are the basic 1 through 4 parameter IRT models with Stan, and comparisons to models from ltm and sirt packages in R in the test_stan_irt.R file. 2 | 3 | Some of these standard models are now easily implemented in [brms](https://cran.r-project.org/web/packages/brms/vignettes/brms_nonlinear.html#advanced-item-response-models). 4 | -------------------------------------------------------------------------------- /ModelFitting/Bayesian/StanBugsJags/IRT_models/test_stan_irt.R: -------------------------------------------------------------------------------- 1 | library(tidyverse); library(rstan); library(shinystan); library(ltm) 2 | 3 | # Data setup 4 | data(Abortion, package='ltm') 5 | Abortion_ = sapply(Abortion, as.numeric) 6 | datalist = list(Y=Abortion_, N=nrow(Abortion_), J=ncol(Abortion_), Model=1) 7 | 8 | # 1PM 9 | test_rasch = stan(file='IRT_rasch.stan', data=datalist, iter=10) 10 | real_rasch = stan(fit=test_rasch, data=datalist, cores=4, iter=3000, warmup=1000, thin=8) 11 | 12 | print(real_rasch, par=c('difficulty', 'discrim'), digits=3) 13 | 14 | # rasch(Abortion, constraint=cbind(ncol(Abortion) + 1, 1)) 15 | rasch(Abortion) 16 | 17 | # std of random effect === discrimination; coefs are comparable to rasch(..., IRT.param=FALSE) 18 | # Abortion %>% 19 | # mutate(Subject=1:nrow(Abortion)) %>% 20 | # gather(key=Item, value=Response, -Subject) %>% 21 | # lme4::glmer(Response ~ -1 + Item + (1|Subject), data=., family=binomial) 22 | # or 23 | # brms::brm(Response ~ -1 + Item + (1|Subject), data=., family=bernoulli) 24 | 25 | 26 | # launch_shinystan(real_rasch) 27 | 28 | 29 | # 2PM 30 | test_2pm = stan(file='IRT_2PM.stan', data=datalist, iter=10) 31 | real_2pm = stan(fit=test_2pm, data=datalist, cores=4, iter=3000, warmup=1000, thin=8) 32 | 33 | print(real_2pm, par=c('difficulty', 'discrim'), digits=3) 34 | 35 | ltm(Abortion ~ z1) 36 | # launch_shinystan(real_2pm) 37 | 38 | 39 | # 3PM 40 | test_3pm = stan(file='IRT_3PM.stan', data=datalist, iter=10) 41 | real_3pm = stan(fit=test_3pm, data=datalist, cores=4, iter=4000, warmup=2000, thin=8) 42 | 43 | print(real_3pm, par=c('difficulty', 'discrim', 'guess')) 44 | 45 | tpm(Abortion) 46 | # launch_shinystan(real_3pm) 47 | 48 | 49 | # 4PM 50 | test_4pm = stan(file='IRT_4PM.stan', data=datalist, iter=10) 51 | real_4pm = stan(fit=test_4pm, data=datalist, cores=4, iter=4000, warmup=2000, thin=8) 52 | 53 | library(sirt) 54 | compare4pm = rasch.mml2(Abortion, est.a = 1:4 , est.c=1:4 , est.d = 1:4, 55 | min.a=0, min.b=-3, max.b=3, min.d = .95 , max.c = .25) # so nice to have no verbose option! 56 | compare4pm$item[, c('a','b','c','d')] # a = discrim, b = diff, c = guess, d = ceiling 57 | print(real_4pm, par=c('difficulty', 'discrim', 'guess', 'ceiling')) 58 | 59 | # launch_shinystan(real_4pm) 60 | -------------------------------------------------------------------------------- /ModelFitting/Bayesian/StanBugsJags/MixedModelGeneric.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; # sample size 3 | int Ncol_x; # number of columns of teh covariate matrix 4 | int I; # number of groups 5 | vector[N] y; # target variable: center or standardize 6 | matrix[N, Ncol_x] X; # covariate matrix: center or standardize; currently set up for one covariate 7 | int Group[N]; # grouping factor 8 | } 9 | 10 | transformed data { 11 | int P; 12 | // matrix[N,Ncol_x+1] MM; # in case you want to work with a model matrix 13 | // 14 | // MM = append_col(rep_vector(1, N), X); 15 | // P = cols(MM); 16 | P = Ncol_x + 1; 17 | } 18 | 19 | parameters { 20 | vector[P] fixefs; # fixed effects 21 | real sigma_int_fe; # fixed effects scales 22 | real sigma_beta_fe; 23 | cholesky_factor_corr[P] Omega_FE_chol; # correlation matrix for fixed effects (chol decomp) 24 | 25 | real sigma_int_re; # random effects scales 26 | real sigma_beta_re; 27 | real sigma_int_re_hyper; # random effects scales hyperprior 28 | real sigma_beta_re_hyper; 29 | 30 | vector[2] gamma[I]; # individual effects; array allows vectorized multinormal 31 | cholesky_factor_corr[2] Omega_RE_chol; # correlation matrix for random intercepts and slopes (chol decomp) 32 | 33 | real sigma_y; # residual sd 34 | } 35 | 36 | transformed parameters { 37 | real Intercept; 38 | real beta; 39 | 40 | Intercept = fixefs[1]; 41 | beta = fixefs[2]; 42 | } 43 | 44 | model { 45 | matrix[P,P] DC_fe; 46 | matrix[2,2] DC_re; 47 | vector[P] sigma_fe; 48 | vector[2] sigma_re; 49 | vector[N] LP; # Linear predictor 50 | 51 | # priors 52 | Omega_FE_chol ~ lkj_corr_cholesky(2.0); 53 | Omega_RE_chol ~ lkj_corr_cholesky(2.0); 54 | 55 | ### hyperpriors 56 | sigma_int_re_hyper ~ exponential(.2); 57 | sigma_beta_re_hyper ~ exponential(.2); 58 | 59 | ### priors 60 | sigma_int_fe ~ cauchy(0, 2.5); 61 | sigma_beta_fe ~ cauchy(0, 2.5); 62 | 63 | sigma_int_re ~ cauchy(0, sigma_int_re_hyper); 64 | sigma_beta_re ~ cauchy(0, sigma_beta_re_hyper); 65 | 66 | sigma_y ~ cauchy(0, 2.5); 67 | 68 | # multivariate draw for fixed effects 69 | sigma_fe[1] = sigma_int_fe; 70 | sigma_fe[2] = sigma_beta_fe; 71 | DC_fe = diag_pre_multiply(sigma_fe, Omega_FE_chol); 72 | 73 | fixefs ~ multi_normal_cholesky(rep_vector(0, 2), DC_fe); 74 | 75 | 76 | # multivariate draw for random effects 77 | sigma_re[1] = sigma_int_re; 78 | sigma_re[2] = sigma_beta_re; 79 | DC_re = diag_pre_multiply(sigma_re, Omega_RE_chol); 80 | 81 | gamma ~ multi_normal_cholesky(fixefs, DC_re); 82 | 83 | 84 | # likelihood 85 | for (n in 1:N){ 86 | # LP[n] = MM[n] * betavec; # tried matrix approach, but it was actually 2-3 times slower 87 | LP[n] = gamma[Group[n],1] + gamma[Group[n],2] * X[n,1]; 88 | } 89 | y ~ normal(LP, sigma_y); 90 | } 91 | 92 | generated quantities { 93 | corr_matrix[P] Omega_FE; # correlation of FE 94 | corr_matrix[2] Omega_RE; # correlation of RE 95 | 96 | Omega_FE = tcrossprod(Omega_FE_chol); 97 | Omega_RE = tcrossprod(Omega_RE_chol); 98 | } -------------------------------------------------------------------------------- /ModelFitting/Bayesian/StanBugsJags/MixedModelSleepstudy_withREcorrelation.stan: -------------------------------------------------------------------------------- 1 | data { // data setup 2 | int N; // sample size 3 | int I; // number of subjects 4 | vector[N] RT; // Response: reaction time 5 | vector[N] Days; // Days in study 6 | int Subject[N]; // Subject 7 | } 8 | 9 | transformed data { 10 | real IntBase; 11 | real RTsd; 12 | 13 | IntBase = mean(RT); // Intercept starting point 14 | RTsd = sd(RT); 15 | } 16 | 17 | parameters { 18 | real Intercept01; // fixed effects 19 | real beta01; 20 | vector[2] sigma_u; // sd for ints and slopes 21 | real sigma_y; // residual sd 22 | vector[2] gamma[I]; // individual effects 23 | cholesky_factor_corr[2] Omega_chol; // correlation matrix for random intercepts and slopes (chol decomp) 24 | } 25 | 26 | transformed parameters { 27 | vector[I] gammaIntercept; // individual effects (named) 28 | vector[I] gammaDays; 29 | real Intercept; 30 | real beta; 31 | 32 | Intercept = IntBase + Intercept01 * RTsd; 33 | beta = beta01 * 10; 34 | 35 | for (i in 1:I){ 36 | gammaIntercept[i] = gamma[i,1]; 37 | gammaDays[i] = gamma[i,2]; 38 | } 39 | 40 | } 41 | 42 | model { 43 | matrix[2,2] D; 44 | matrix[2,2] DC; 45 | vector[N] yhat; // Linear predictor 46 | vector[2] mu; // vector of Intercept and beta 47 | 48 | D = diag_matrix(sigma_u); 49 | mu[1] = Intercept; 50 | mu[2] = beta; 51 | 52 | // priors 53 | Intercept01 ~ normal(0, 1); // example of weakly informative priors; 54 | beta01 ~ normal(0, 1); // remove to essentially duplicate lme4 via improper prior 55 | 56 | Omega_chol ~ lkj_corr_cholesky(2.0); 57 | 58 | sigma_u ~ cauchy(0, 2.5); // prior for RE scale 59 | sigma_y ~ cauchy(0, 2.5); // prior for residual scale 60 | 61 | DC = D * Omega_chol; 62 | 63 | for (i in 1:I) // loop for Subject random effects 64 | gamma[i] ~ multi_normal_cholesky(mu, DC); 65 | 66 | // likelihood 67 | for (n in 1:N) 68 | yhat[n] = gammaIntercept[Subject[n]] + gammaDays[Subject[n]] * Days[n]; 69 | 70 | RT ~ normal(yhat, sigma_y); 71 | } 72 | 73 | generated quantities { 74 | matrix[2,2] Omega; // correlation of RE 75 | 76 | Omega = tcrossprod(Omega_chol); 77 | } -------------------------------------------------------------------------------- /ModelFitting/Bayesian/StanBugsJags/ReadMe.md: -------------------------------------------------------------------------------- 1 | Read Me 2 | ------------- 3 | 4 | All of these can be found in context within their respective .R counterparts. But as there is usually a lot of other (R specific) things going on, I separate them out here for easier inspection. -------------------------------------------------------------------------------- /ModelFitting/Bayesian/StanBugsJags/cfa: -------------------------------------------------------------------------------- 1 | data{ 2 | int N; // sample size 3 | int P; // number of variables 4 | int K; // number of factors 5 | matrix[N,P] X; // data matrix of order [N,P] 6 | } 7 | 8 | transformed data{ 9 | int L; 10 | L <- P-K; // Number of loadings 11 | } 12 | 13 | 14 | parameters{ 15 | vector[P] b; // intercepts 16 | vector[L] lambda01; // initial factor loadings 17 | matrix[N,K] FS; // factor scores, matrix of order [N,K] 18 | corr_matrix[K] phi; // factor correlations 19 | vector[K] sdFactorsUnif; 20 | vector[P] sdXUnif; 21 | //vector[L] sdLambdaUnif; 22 | } 23 | 24 | transformed parameters{ 25 | } 26 | 27 | model{ 28 | matrix[N,P] mu; 29 | matrix[K,K] Ld; 30 | vector[K] muFactors; 31 | vector[L] lambda; // factor loadings 32 | vector[K] sdFactors; // sd of factors 33 | vector[P] sdX; // variance for each variable 34 | //vector[L] sdLambda; // hyperprior variance for loading estimates 35 | 36 | // the following will equate the sd to (half) cauchy(0, 2.5) 37 | for (k in 1:K) sdFactors[k] <- 0 + 2.5*tan(sdFactorsUnif[k]); 38 | for (p in 1:P) sdX[p] <- 0 + 2.5*tan(sdXUnif[p]); 39 | // for (l in 1:L) sdLambda[l] <- 0 + 2.5*tan(sdLambdaUnif[l]); 40 | 41 | lambda <- lambda01 * 5; 42 | 43 | muFactors <- rep_vector(0,K); // Factor means, set to zero 44 | Ld <- diag_matrix(sdFactors) * cholesky_decompose(phi); 45 | 46 | for(n in 1:N){ 47 | mu[n,1] <- b[1] + FS[n,1]; // Agree 48 | mu[n,2] <- b[2] + FS[n,1]*lambda[1]; 49 | mu[n,3] <- b[3] + FS[n,1]*lambda[2]; 50 | mu[n,4] <- b[4] + FS[n,1]*lambda[3]; 51 | mu[n,5] <- b[5] + FS[n,1]*lambda[4]; 52 | 53 | mu[n,6] <- b[6] + FS[n,2]; // Consc 54 | mu[n,7] <- b[7] + FS[n,2]*lambda[5]; 55 | mu[n,8] <- b[8] + FS[n,2]*lambda[6]; 56 | mu[n,9] <- b[9] + FS[n,2]*lambda[7]; 57 | mu[n,10] <- b[10] + FS[n,2]*lambda[8]; 58 | 59 | mu[n,11] <- b[11] + FS[n,3]; // Extro 60 | mu[n,12] <- b[12] + FS[n,3]*lambda[9]; 61 | mu[n,13] <- b[13] + FS[n,3]*lambda[10]; 62 | mu[n,14] <- b[14] + FS[n,3]*lambda[11]; 63 | mu[n,15] <- b[15] + FS[n,3]*lambda[12]; 64 | 65 | mu[n,16] <- b[16] + FS[n,4]; // Neuro 66 | mu[n,17] <- b[17] + FS[n,4]*lambda[13]; 67 | mu[n,18] <- b[18] + FS[n,4]*lambda[14]; 68 | mu[n,19] <- b[19] + FS[n,4]*lambda[15]; 69 | mu[n,20] <- b[20] + FS[n,4]*lambda[16]; 70 | 71 | mu[n,21] <- b[21] + FS[n,5]; // Open 72 | mu[n,22] <- b[22] + FS[n,5]*lambda[17]; 73 | mu[n,23] <- b[23] + FS[n,5]*lambda[18]; 74 | mu[n,24] <- b[24] + FS[n,5]*lambda[19]; 75 | mu[n,25] <- b[25] + FS[n,5]*lambda[20]; 76 | } 77 | 78 | // priors 79 | phi ~ lkj_corr(2.0); 80 | // sdX ~ cauchy(0, 2.5); 81 | // sdLambda ~ cauchy(0, 2.5); 82 | // sdFactors ~ cauchy(0, 2.5); 83 | 84 | b ~ normal(0, 10); 85 | lambda01 ~ normal(0, 1); 86 | 87 | for(i in 1:N){ 88 | X[i] ~ normal(mu[i], sdX); 89 | FS[i] ~ multi_normal_cholesky(muFactors, Ld); 90 | } 91 | 92 | } -------------------------------------------------------------------------------- /ModelFitting/Bayesian/StanBugsJags/jags_MixedModelBetaRegression.jags: -------------------------------------------------------------------------------- 1 | model { 2 | for (n in 1:N){ 3 | logit(mu[n]) <- (Intercept + IntRE[id[n]]) + (betaTemp + SlopeRE[id[n]])*tempCen[n] 4 | A[n] <- mu[n]*phi 5 | B[n] <- (1.0-mu[n])*phi 6 | yield[n] ~ dbeta(A[n], B[n]) 7 | } 8 | 9 | Intercept ~ dnorm(0, 1/10^2) 10 | betaTemp ~ dnorm(0, 1) 11 | 12 | for (l in 1:L){ 13 | IntRE[l] <- gammaIntercept[l]*sd_int 14 | SlopeRE[l] <- gammaTemp[l]*sd_beta 15 | 16 | gammaIntercept[l] ~ dnorm(0, 1) 17 | gammaTemp[l] ~ dnorm(0, 1) 18 | } 19 | 20 | # Half-cauchy as in Gelman 2006 21 | # If scale parameter in cauchy is 5, precision of z = 1/5^2 = 0.04 22 | # sigma int 23 | # sd_int <- zInt/sqrt(chSqInt) # prior for sigma; cauchy = normal/sqrt(chi^2) 24 | # zInt ~ dnorm(0, .04)I(0,) 25 | # chSqInt ~ dgamma(0.5, 0.5) # chi^2 with 1 d.f. 26 | sd_int ~ dt(0, .04, 1)I(0,) 27 | 28 | # sigma beta 29 | # sd_beta <- zbeta/sqrt(chSqbeta) 30 | # zbeta ~ dnorm(0, .04)I(0,) 31 | # chSqbeta ~ dgamma(0.5, 0.5) 32 | sd_beta ~ dt(0, .04, 1)I(0,) 33 | 34 | # phi 35 | # phi <- zphi/sqrt(chSqphi) 36 | # zphi ~ dnorm(0, .04)I(0,) 37 | # chSqphi ~ dgamma(0.5, 0.5) 38 | phi ~ dt(0, .04, 1)I(0,) 39 | } -------------------------------------------------------------------------------- /ModelFitting/Bayesian/StanBugsJags/linregwithprior.stan: -------------------------------------------------------------------------------- 1 | data { // Data block; declarations only 2 | int N; // Sample size 3 | int k; // Dimension of model matrix 4 | matrix [N, k] X; // Model Matrix 5 | vector[N] y; // response 6 | } 7 | 8 | /* transformed data { // Transformed data block; declarations and statements. None needed here. 9 | } 10 | */ 11 | 12 | parameters { // Parameters block; declarations only 13 | vector[k] beta; // coefficient vector 14 | real sigma; // error scale 15 | } 16 | 17 | transformed parameters { // Transformed parameters block; declarations and statements. 18 | } 19 | 20 | model { // Model block; declarations and statements. 21 | vector[N] mu; 22 | mu = X * beta; // creation of linear predictor 23 | 24 | // priors 25 | beta ~ normal(0, 10); 26 | sigma ~ cauchy(0, 5); // With sigma bounded at 0, this is half-cauchy 27 | 28 | // likelihood 29 | y ~ normal(mu, sigma); 30 | } 31 | 32 | generated quantities { // Generated quantities block; declarations and statements. 33 | real rss; 34 | real totalss; 35 | real R2; // Calculate Rsq as a demonstration 36 | vector[N] mu; 37 | 38 | mu = X * beta; 39 | rss = dot_self(mu-y); 40 | totalss = dot_self(y-mean(y)); 41 | R2 = 1 - rss/totalss; 42 | } -------------------------------------------------------------------------------- /ModelFitting/Bayesian/StanBugsJags/lmbugs.bugs: -------------------------------------------------------------------------------- 1 | model { 2 | for (n in 1:N){ 3 | mu[n] <- beta[1]*X[n,1] + beta[2]*X[n,2] + beta[3]*X[n,3] + beta[4]*X[n,4] 4 | y[n] ~ dnorm(mu[n], inv.sigma.sq) 5 | } 6 | 7 | for (k in 1:K){ 8 | beta[k] ~ dnorm(0, .001) # prior for reg coefs 9 | } 10 | 11 | # Half-cauchy as in Gelman 2006 12 | # Scale parameter is 5, so precision of z = 1/5^2 = 0.04 13 | sigma.y <- abs(z)/sqrt(chSq) # prior for sigma; cauchy = normal/sqrt(chi^2) 14 | z ~ dnorm(0, .0016) 15 | chSq ~ dgamma(0.5, 0.5) # chi^2 with 1 d.f. 16 | # sigma.y ~ dgamma(.001,.001) # prior for sigma; a typical approach used. 17 | inv.sigma.sq <- pow(sigma.y, -2) # precision 18 | } -------------------------------------------------------------------------------- /ModelFitting/Bayesian/StanBugsJags/lmjags.jags: -------------------------------------------------------------------------------- 1 | model { 2 | for (n in 1:N){ 3 | mu[n] <- beta[1]*X[n,1] + beta[2]*X[n,2] + beta[3]*X[n,3] + beta[4]*X[n,4] 4 | y[n] ~ dnorm(mu[n], inv.sigma.sq) 5 | } 6 | 7 | for (k in 1:K){ 8 | beta[k] ~ dnorm(0, .001) # prior for reg coefs 9 | } 10 | 11 | # Half-cauchy as in Gelman 2006 12 | # Scale parameter is 5, so precision of z = 1/5^2 = 0.04 13 | z ~ dnorm(0, .04)I(0,) 14 | chSq ~ dgamma(0.5, 0.5) # chi^2 with 1 d.f. 15 | sigma.y <- z/sqrt(chSq) # prior for sigma; cauchy = normal/sqrt(chi^2) 16 | inv.sigma.sq <- pow(sigma.y, -2) # precision 17 | } -------------------------------------------------------------------------------- /ModelFitting/Bayesian/StanBugsJags/multilevelmediation.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; 3 | vector[N] X; 4 | vector[N] Med; 5 | vector[N] y; 6 | int J; # number of groups 7 | int Group[N]; # Groups 8 | } 9 | 10 | parameters{ 11 | real alpha_xm; # mediator model reg parameters 12 | real beta_xm; 13 | real sigma_xm; 14 | 15 | real alpha; # main model reg parameters 16 | real beta1; 17 | real beta2; 18 | real sigma_y; 19 | 20 | real sigma_alpha_xm; # sd for ranef 21 | real sigma_alpha; 22 | 23 | vector[J] gammaAlpha_xm; 24 | vector[J] gammaAlpha; 25 | } 26 | 27 | model { 28 | vector[N] mu_y; # linear predictors for Y and M 29 | vector[N] mu_Med; 30 | 31 | # priors 32 | alpha_xm ~ normal(0, 10); 33 | beta_xm ~ normal(0, 10); 34 | sigma_xm ~ cauchy(0,5); 35 | 36 | alpha ~ normal(0, 10); 37 | beta1 ~ normal(0, 10); 38 | beta2 ~ normal(0, 10); 39 | sigma_y ~ cauchy(0,5); 40 | 41 | sigma_alpha_xm ~ cauchy(0,5); 42 | sigma_alpha ~ cauchy(0,5); 43 | 44 | for (j in 1:J){ 45 | gammaAlpha_xm[j] ~ normal(0, sigma_alpha_xm); 46 | gammaAlpha[j] ~ normal(0, sigma_alpha); 47 | } 48 | 49 | for (n in 1:N){ 50 | mu_Med[n] = alpha_xm + gammaAlpha_xm[Group[n]] + beta_xm*X[n]; 51 | mu_y[n] = alpha + gammaAlpha[Group[n]] + beta1*X[n] + beta2*Med[n] ; 52 | } 53 | 54 | 55 | # sampling 56 | Med ~ normal(mu_Med, sigma_xm); 57 | y ~ normal(mu_y, sigma_y); 58 | } 59 | 60 | generated quantities{ 61 | real indEffect; 62 | 63 | # since no random slope for beta2 64 | indEffect = beta_xm *beta2; 65 | } -------------------------------------------------------------------------------- /ModelFitting/Bayesian/StanBugsJags/multilevelmediationYuanMacKinnon2009.bugs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Bayesian/StanBugsJags/multilevelmediationYuanMacKinnon2009.bugs -------------------------------------------------------------------------------- /ModelFitting/Bayesian/StanBugsJags/singlelevelmediationYuanMacKinnon2009.bugs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Bayesian/StanBugsJags/singlelevelmediationYuanMacKinnon2009.bugs -------------------------------------------------------------------------------- /ModelFitting/Bayesian/horseshoe/README.md: -------------------------------------------------------------------------------- 1 | These are the scripts regarding the horseshoe prior from: 2 | 3 | "Hierarchical Bayesian Survival Analysis and Projective Covariate Selection in Cardiovascular Event Risk Prediction" by Peltola et al. (2014) 4 | 5 | [link to paper](http://ceur-ws.org/Vol-1218/bmaw2014_paper_8.pdf) 6 | [link to scripts](http://becs.aalto.fi/en/research/bayes/diabcvd/) -------------------------------------------------------------------------------- /ModelFitting/Bayesian/horseshoe/wei_bg.stan: -------------------------------------------------------------------------------- 1 | /* Variable naming: 2 | obs = observed 3 | cen = (right) censored 4 | N = number of samples 5 | M = number of covariates 6 | bg = established risk (or protective) factors 7 | tau = scale parameter 8 | */ 9 | // Tomi Peltola, tomi.peltola@aalto.fi 10 | 11 | functions { 12 | vector sqrt_vec(vector x) { 13 | vector[dims(x)[1]] res; 14 | 15 | for (m in 1:dims(x)[1]){ 16 | res[m] <- sqrt(x[m]); 17 | } 18 | 19 | return res; 20 | } 21 | 22 | vector bg_prior_lp(real r_global, vector r_local) { 23 | r_global ~ normal(0.0, 10.0); 24 | r_local ~ inv_chi_square(1.0); 25 | 26 | return r_global * sqrt_vec(r_local); 27 | } 28 | } 29 | 30 | data { 31 | int Nobs; 32 | int Ncen; 33 | int M_bg; 34 | vector[Nobs] yobs; 35 | vector[Ncen] ycen; 36 | matrix[Nobs, M_bg] Xobs_bg; 37 | matrix[Ncen, M_bg] Xcen_bg; 38 | } 39 | 40 | transformed data { 41 | real tau_mu; 42 | real tau_al; 43 | 44 | tau_mu <- 10.0; 45 | tau_al <- 10.0; 46 | } 47 | 48 | parameters { 49 | real tau_s_bg_raw; 50 | vector[M_bg] tau_bg_raw; 51 | 52 | real alpha_raw; 53 | vector[M_bg] beta_bg_raw; 54 | 55 | real mu; 56 | } 57 | 58 | transformed parameters { 59 | vector[M_bg] beta_bg; 60 | real alpha; 61 | 62 | beta_bg <- bg_prior_lp(tau_s_bg_raw, tau_bg_raw) .* beta_bg_raw; 63 | alpha <- exp(tau_al * alpha_raw); 64 | } 65 | 66 | model { 67 | yobs ~ weibull(alpha, exp(-(mu + Xobs_bg * beta_bg)/alpha)); 68 | increment_log_prob(weibull_ccdf_log(ycen, alpha, exp(-(mu + Xcen_bg * beta_bg)/alpha))); 69 | 70 | beta_bg_raw ~ normal(0.0, 1.0); 71 | alpha_raw ~ normal(0.0, 1.0); 72 | 73 | mu ~ normal(0.0, tau_mu); 74 | } 75 | -------------------------------------------------------------------------------- /ModelFitting/Bayesian/horseshoe/wei_gau.stan: -------------------------------------------------------------------------------- 1 | /* Variable naming: 2 | obs = observed 3 | cen = (right) censored 4 | N = number of samples 5 | M = number of covariates 6 | bg = established risk (or protective) factors 7 | biom = candidate biomarkers (candidate risk factors) 8 | tau = scale parameter 9 | */ 10 | // Tomi Peltola, tomi.peltola@aalto.fi 11 | 12 | functions { 13 | vector sqrt_vec(vector x) { 14 | vector[dims(x)[1]] res; 15 | 16 | for (m in 1:dims(x)[1]){ 17 | res[m] <- sqrt(x[m]); 18 | } 19 | 20 | return res; 21 | } 22 | 23 | vector gau_prior_lp(real r1_global, real r2_global, vector ones_biom) { 24 | r1_global ~ normal(0.0, 1.0); 25 | r2_global ~ inv_gamma(0.5, 0.5); 26 | 27 | return (r1_global * sqrt(r2_global)) * ones_biom; 28 | } 29 | 30 | vector bg_prior_lp(real r_global, vector r_local) { 31 | r_global ~ normal(0.0, 10.0); 32 | r_local ~ inv_chi_square(1.0); 33 | 34 | return r_global * sqrt_vec(r_local); 35 | } 36 | } 37 | 38 | data { 39 | int Nobs; 40 | int Ncen; 41 | int M_bg; 42 | int M_biom; 43 | vector[Nobs] yobs; 44 | vector[Ncen] ycen; 45 | matrix[Nobs, M_bg] Xobs_bg; 46 | matrix[Ncen, M_bg] Xcen_bg; 47 | matrix[Nobs, M_biom] Xobs_biom; 48 | matrix[Ncen, M_biom] Xcen_biom; 49 | } 50 | 51 | transformed data { 52 | real tau_mu; 53 | real tau_al; 54 | vector[M_biom] ones_biom; 55 | 56 | tau_mu <- 10.0; 57 | tau_al <- 10.0; 58 | 59 | for (m in 1:M_biom) { 60 | ones_biom[m] <- 1.0; 61 | } 62 | } 63 | 64 | parameters { 65 | real tau_s_bg_raw; 66 | vector[M_bg] tau_bg_raw; 67 | 68 | real tau_s1_biom_raw; 69 | real tau_s2_biom_raw; 70 | 71 | real alpha_raw; 72 | vector[M_bg] beta_bg_raw; 73 | vector[M_biom] beta_biom_raw; 74 | 75 | real mu; 76 | } 77 | 78 | transformed parameters { 79 | vector[M_biom] beta_biom; 80 | vector[M_bg] beta_bg; 81 | real alpha; 82 | 83 | beta_biom <- gau_prior_lp(tau_s1_biom_raw, tau_s2_biom_raw, ones_biom) .* beta_biom_raw; 84 | beta_bg <- bg_prior_lp(tau_s_bg_raw, tau_bg_raw) .* beta_bg_raw; 85 | alpha <- exp(tau_al * alpha_raw); 86 | } 87 | 88 | model { 89 | yobs ~ weibull(alpha, exp(-(mu + Xobs_bg * beta_bg + Xobs_biom * beta_biom)/alpha)); 90 | increment_log_prob(weibull_ccdf_log(ycen, alpha, exp(-(mu + Xcen_bg * beta_bg + Xcen_biom * beta_biom)/alpha))); 91 | 92 | beta_biom_raw ~ normal(0.0, 1.0); 93 | beta_bg_raw ~ normal(0.0, 1.0); 94 | alpha_raw ~ normal(0.0, 1.0); 95 | 96 | mu ~ normal(0.0, tau_mu); 97 | } 98 | -------------------------------------------------------------------------------- /ModelFitting/Bayesian/horseshoe/wei_hs.stan: -------------------------------------------------------------------------------- 1 | /* Variable naming: 2 | obs = observed 3 | cen = (right) censored 4 | N = number of samples 5 | M = number of covariates 6 | bg = established risk (or protective) factors 7 | biom = candidate biomarkers (candidate risk factors) 8 | tau = scale parameter 9 | */ 10 | // Tomi Peltola, tomi.peltola@aalto.fi 11 | 12 | functions { 13 | vector sqrt_vec(vector x) { 14 | vector[dims(x)[1]] res; 15 | 16 | for (m in 1:dims(x)[1]){ 17 | res[m] <- sqrt(x[m]); 18 | } 19 | 20 | return res; 21 | } 22 | 23 | vector hs_prior_lp(real r1_global, real r2_global, vector r1_local, vector r2_local) { 24 | r1_global ~ normal(0.0, 1.0); 25 | r2_global ~ inv_gamma(0.5, 0.5); 26 | 27 | r1_local ~ normal(0.0, 1.0); 28 | r2_local ~ inv_gamma(0.5, 0.5); 29 | 30 | return (r1_global * sqrt(r2_global)) * r1_local .* sqrt_vec(r2_local); 31 | } 32 | 33 | vector bg_prior_lp(real r_global, vector r_local) { 34 | r_global ~ normal(0.0, 10.0); 35 | r_local ~ inv_chi_square(1.0); 36 | 37 | return r_global * sqrt_vec(r_local); 38 | } 39 | } 40 | 41 | data { 42 | int Nobs; 43 | int Ncen; 44 | int M_bg; 45 | int M_biom; 46 | vector[Nobs] yobs; 47 | vector[Ncen] ycen; 48 | matrix[Nobs, M_bg] Xobs_bg; 49 | matrix[Ncen, M_bg] Xcen_bg; 50 | matrix[Nobs, M_biom] Xobs_biom; 51 | matrix[Ncen, M_biom] Xcen_biom; 52 | } 53 | 54 | transformed data { 55 | real tau_mu; 56 | real tau_al; 57 | 58 | tau_mu <- 10.0; 59 | tau_al <- 10.0; 60 | } 61 | 62 | parameters { 63 | real tau_s_bg_raw; 64 | vector[M_bg] tau_bg_raw; 65 | 66 | real tau_s1_biom_raw; 67 | real tau_s2_biom_raw; 68 | vector[M_biom] tau1_biom_raw; 69 | vector[M_biom] tau2_biom_raw; 70 | 71 | real alpha_raw; 72 | vector[M_bg] beta_bg_raw; 73 | vector[M_biom] beta_biom_raw; 74 | 75 | real mu; 76 | } 77 | 78 | transformed parameters { 79 | vector[M_biom] beta_biom; 80 | vector[M_bg] beta_bg; 81 | real alpha; 82 | 83 | beta_biom <- hs_prior_lp(tau_s1_biom_raw, tau_s2_biom_raw, tau1_biom_raw, tau2_biom_raw) .* beta_biom_raw; 84 | beta_bg <- bg_prior_lp(tau_s_bg_raw, tau_bg_raw) .* beta_bg_raw; 85 | alpha <- exp(tau_al * alpha_raw); 86 | } 87 | 88 | model { 89 | yobs ~ weibull(alpha, exp(-(mu + Xobs_bg * beta_bg + Xobs_biom * beta_biom)/alpha)); 90 | increment_log_prob(weibull_ccdf_log(ycen, alpha, exp(-(mu + Xcen_bg * beta_bg + Xcen_biom * beta_biom)/alpha))); 91 | 92 | beta_biom_raw ~ normal(0.0, 1.0); 93 | beta_bg_raw ~ normal(0.0, 1.0); 94 | alpha_raw ~ normal(0.0, 1.0); 95 | 96 | mu ~ normal(0.0, tau_mu); 97 | } 98 | -------------------------------------------------------------------------------- /ModelFitting/Bayesian/horseshoe/wei_lap.stan: -------------------------------------------------------------------------------- 1 | /* Variable naming: 2 | obs = observed 3 | cen = (right) censored 4 | N = number of samples 5 | M = number of covariates 6 | bg = established risk (or protective) factors 7 | biom = candidate biomarkers (candidate risk factors) 8 | tau = scale parameter 9 | */ 10 | // Tomi Peltola, tomi.peltola@aalto.fi 11 | 12 | functions { 13 | vector sqrt_vec(vector x) { 14 | vector[dims(x)[1]] res; 15 | 16 | for (m in 1:dims(x)[1]){ 17 | res[m] <- sqrt(x[m]); 18 | } 19 | 20 | return res; 21 | } 22 | 23 | vector lap_prior_lp(real r1_global, real r2_global, vector r_local) { 24 | r1_global ~ normal(0.0, 1.0); 25 | r2_global ~ inv_gamma(0.5, 0.5); 26 | 27 | r_local ~ exponential(1.0); 28 | 29 | return (r1_global * sqrt(r2_global)) * sqrt_vec(r_local); 30 | } 31 | 32 | vector bg_prior_lp(real r_global, vector r_local) { 33 | r_global ~ normal(0.0, 10.0); 34 | r_local ~ inv_chi_square(1.0); 35 | 36 | return r_global * sqrt_vec(r_local); 37 | } 38 | } 39 | 40 | data { 41 | int Nobs; 42 | int Ncen; 43 | int M_bg; 44 | int M_biom; 45 | vector[Nobs] yobs; 46 | vector[Ncen] ycen; 47 | matrix[Nobs, M_bg] Xobs_bg; 48 | matrix[Ncen, M_bg] Xcen_bg; 49 | matrix[Nobs, M_biom] Xobs_biom; 50 | matrix[Ncen, M_biom] Xcen_biom; 51 | } 52 | 53 | transformed data { 54 | real tau_mu; 55 | real tau_al; 56 | 57 | tau_mu <- 10.0; 58 | tau_al <- 10.0; 59 | } 60 | 61 | parameters { 62 | real tau_s_bg_raw; 63 | vector[M_bg] tau_bg_raw; 64 | 65 | real tau_s1_biom_raw; 66 | real tau_s2_biom_raw; 67 | vector[M_biom] tau_biom_raw; 68 | 69 | real alpha_raw; 70 | vector[M_bg] beta_bg_raw; 71 | vector[M_biom] beta_biom_raw; 72 | 73 | real mu; 74 | } 75 | 76 | transformed parameters { 77 | vector[M_biom] beta_biom; 78 | vector[M_bg] beta_bg; 79 | real alpha; 80 | 81 | beta_biom <- lap_prior_lp(tau_s1_biom_raw, tau_s2_biom_raw, tau_biom_raw) .* beta_biom_raw; 82 | beta_bg <- bg_prior_lp(tau_s_bg_raw, tau_bg_raw) .* beta_bg_raw; 83 | alpha <- exp(tau_al * alpha_raw); 84 | } 85 | 86 | model { 87 | yobs ~ weibull(alpha, exp(-(mu + Xobs_bg * beta_bg + Xobs_biom *beta_biom)/alpha)); 88 | increment_log_prob(weibull_ccdf_log(ycen, alpha, exp(-(mu + Xcen_bg * beta_bg + Xcen_biom * beta_biom)/alpha))); 89 | 90 | beta_biom_raw ~ normal(0.0, 1.0); 91 | beta_bg_raw ~ normal(0.0, 1.0); 92 | alpha_raw ~ normal(0.0, 1.0); 93 | 94 | mu ~ normal(0.0, tau_mu); 95 | } 96 | -------------------------------------------------------------------------------- /ModelFitting/Bayesian/jags_MixedModelBetaRegression.R: -------------------------------------------------------------------------------- 1 | ### The following is an attempt to reproduce the Stan approach to a mixed model 2 | ### with a beta distribution for the response response in 3 | ### rstan_MixedModelBetaRegression.R. For more detail about what the JAGS code 4 | ### is doing, see the Stan code in that file. See the helpfile for 5 | ### GasolineYield for information on the data. 6 | 7 | ### A couple of changes to note. JAGS doesn't have the Cauchy distribution, so I 8 | ### show the approach in the code outlined in Gelman 2006. Instead of that, one 9 | ### can also use an approach with the t-distribution similarly truncated. Unlike 10 | ### the Stan code, the variance components and phi priors are all half.cauchy(0, 11 | ### 5). I'll have to do a little more playing with it but it seemed initially 12 | ### that JAGS took a lot more iterations to start looking as healthy as the Stan 13 | ### results in terms of effective sample size (though this is the case for many 14 | ### models), having particular trouble with the random intercepts and variance 15 | ### estimate (though Stan had issues there too). But just for comparison's 16 | ### sake, it will reproduce Stan parameter results using the same amount of 17 | ### iterations, so I leave it shorter. Keep in mind also we're not dealing with 18 | ### a lot of data here. 19 | 20 | ################## 21 | ### Data setup ### 22 | ################## 23 | data('GasolineYield', package='betareg') 24 | jagsdat = list('yield'=GasolineYield$yield, 'tempCen'=c(scale(GasolineYield$temp, scale=F)), 25 | 'N'=nrow(GasolineYield), 'id'=as.numeric(GasolineYield$batch), L=10) 26 | 27 | 28 | ####################### 29 | ### Jags Model Code ### 30 | ####################### 31 | 32 | # write to file 33 | sink('betaMixedModeljags.txt') 34 | cat( 35 | 'model { 36 | for (n in 1:N){ 37 | logit(mu[n]) <- (Intercept + IntRE[id[n]]) + (betaTemp + SlopeRE[id[n]])*tempCen[n] 38 | A[n] <- mu[n]*phi 39 | B[n] <- (1.0-mu[n])*phi 40 | yield[n] ~ dbeta(A[n], B[n]) 41 | } 42 | 43 | Intercept ~ dnorm(0, 1/10^2) 44 | betaTemp ~ dnorm(0, 1) 45 | 46 | for (l in 1:L){ 47 | IntRE[l] <- gammaIntercept[l]*sd_int 48 | SlopeRE[l] <- gammaTemp[l]*sd_beta 49 | 50 | gammaIntercept[l] ~ dnorm(0, 1) 51 | gammaTemp[l] ~ dnorm(0, 1) 52 | } 53 | 54 | # Half-cauchy as in Gelman 2006 55 | # If scale parameter in cauchy is 5, precision of z = 1/5^2 = 0.04 56 | # sigma int 57 | # sd_int <- zInt/sqrt(chSqInt) # prior for sigma; cauchy = normal/sqrt(chi^2) 58 | # zInt ~ dnorm(0, .04)I(0,) 59 | # chSqInt ~ dgamma(0.5, 0.5) # chi^2 with 1 d.f. 60 | sd_int ~ dt(0, .04, 1)I(0,) 61 | 62 | # sigma beta 63 | # sd_beta <- zbeta/sqrt(chSqbeta) 64 | # zbeta ~ dnorm(0, .04)I(0,) 65 | # chSqbeta ~ dgamma(0.5, 0.5) 66 | sd_beta ~ dt(0, .04, 1)I(0,) 67 | 68 | # phi 69 | # phi <- zphi/sqrt(chSqphi) 70 | # zphi ~ dnorm(0, .04)I(0,) 71 | # chSqphi ~ dgamma(0.5, 0.5) 72 | phi ~ dt(0, .04, 1)I(0,) 73 | }' 74 | ) 75 | sink() 76 | 77 | 78 | 79 | ##################### 80 | ### Run the model ### 81 | ##################### 82 | library(rjags) 83 | mixedbeta0 <- jags.model(file='betaMixedModeljags.txt', data=jagsdat, # inits=inits 84 | n.chains=4, n.adapt=2000) 85 | 86 | # update(mixedbetamod, 10000) 87 | 88 | mixedbeta = coda.samples(mixedbeta0, c('Intercept','betaTemp','IntRE', 'SlopeRE', 89 | 'phi', 'sd_int', 'sd_beta'), n.iter=200000, 90 | thin=200, n.chains=4) 91 | summary(mixedbeta) 92 | 93 | ### cleaner view of coefficients of interest 94 | round(summary(mixedbeta)[[1]], 4) 95 | 96 | ### effective sample size 97 | effectiveSize(mixedbeta) 98 | 99 | ### trace and density plots 100 | plot(mixedbeta) 101 | -------------------------------------------------------------------------------- /ModelFitting/Bayesian/multinomial/README.md: -------------------------------------------------------------------------------- 1 | 2 | Multinomial logistic regression in Stan 3 | ======================================= 4 | 5 | Files included: 6 | 7 | - multinomial\_stan\_comparisons.R: Shows a variety of ways one could write a basic multinomial model with an eye toward speed comparisons. 8 | - mnl\_1 to 4: stan scripts used in the previous. 9 | - stan\_basic\_mnl\_conceptual.stan: This is straightforward conceptual stan code for a basic multinomial model. It duplicates the initial example in the manual. As noted there, without a suitable prior it is not identified 10 | - stan\_basic\_mnl\_altspecific.stan: This extends the standard multinomial model to one that can include both class specific and class constant effects. 11 | -------------------------------------------------------------------------------- /ModelFitting/Bayesian/multinomial/mnl_1.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int K; # number of classes 3 | int N; # nrow of x 4 | int D; # ncol of x 5 | int y[N]; # target as integer 6 | vector[D] x[N]; # array of D 7 | } 8 | 9 | transformed data { 10 | row_vector[D] zeros; # create reference level coefs of zero 11 | zeros = rep_row_vector(0, D); 12 | } 13 | 14 | parameters { 15 | matrix[K-1,D] beta_raw; # estimated coefs 16 | } 17 | 18 | transformed parameters{ 19 | matrix[K, D] beta; 20 | beta = append_row(zeros, beta_raw); 21 | } 22 | 23 | model { 24 | # prior 25 | to_vector(beta_raw) ~ normal(0, 10); 26 | 27 | # likelihood 28 | for (n in 1:N) 29 | y[n] ~ categorical_logit(beta * x[n]); 30 | } 31 | -------------------------------------------------------------------------------- /ModelFitting/Bayesian/multinomial/mnl_2.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int K; # number of classes 3 | int N; # nrow of x 4 | int D; # ncol of x 5 | int y[N]; # target as integer 6 | matrix[N,D] x; 7 | } 8 | 9 | transformed data{ 10 | matrix[D,N] xt; 11 | row_vector[D] zeros; 12 | 13 | xt = x'; 14 | zeros = rep_row_vector(0, D); 15 | } 16 | 17 | parameters { 18 | matrix[K-1,D] beta_raw; 19 | } 20 | 21 | transformed parameters{ 22 | matrix[K, D] beta; 23 | beta = append_row(zeros, beta_raw); 24 | } 25 | 26 | model { 27 | matrix[K,N] L; 28 | 29 | L = beta * xt; 30 | to_vector(beta_raw) ~ normal(0, 10); 31 | 32 | for (n in 1:N) 33 | y[n] ~ categorical_logit(L[,n]); 34 | } -------------------------------------------------------------------------------- /ModelFitting/Bayesian/multinomial/mnl_3.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int K; 3 | int N; 4 | int D; 5 | int y[N]; 6 | matrix[N,D] x; 7 | } 8 | 9 | transformed data{ 10 | vector[D] zeros; 11 | 12 | zeros = rep_vector(0, D); 13 | } 14 | 15 | parameters { 16 | matrix[D,K-1] beta_raw; 17 | } 18 | 19 | transformed parameters{ 20 | matrix[D, K] beta; 21 | beta = append_col(zeros, beta_raw); 22 | } 23 | 24 | model { 25 | matrix[N, K] L; 26 | 27 | L = x * beta; 28 | to_vector(beta_raw) ~ normal(0, 10); 29 | 30 | for (n in 1:N) 31 | y[n] ~ categorical_logit(to_vector(L[n])); 32 | } -------------------------------------------------------------------------------- /ModelFitting/Bayesian/multinomial/mnl_4.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int K; 3 | int N; 4 | int D; 5 | int y[N]; 6 | matrix[N,D] x; 7 | } 8 | 9 | transformed data{ 10 | matrix[D,N] xt; 11 | 12 | xt = x'; 13 | } 14 | 15 | parameters { 16 | matrix[K-1,D] beta_raw; 17 | 18 | } 19 | 20 | transformed parameters{ 21 | matrix[K, D] beta; 22 | 23 | for (d in 1:D) { 24 | beta[1,d] = -sum(beta_raw[,d]); 25 | beta[2:K,d] = beta_raw[,d]; 26 | } 27 | } 28 | 29 | model { 30 | matrix[K,N] L; 31 | 32 | L = beta * xt; 33 | 34 | to_vector(beta_raw) ~ normal(0, 10); 35 | 36 | for (n in 1:N) 37 | y[n] ~ categorical_logit(L[,n]); 38 | } -------------------------------------------------------------------------------- /ModelFitting/Bayesian/multinomial/stan_basic_mnl_conceptual.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int K; 3 | int N; 4 | int D; 5 | int y[N]; 6 | vector[D] x[N]; 7 | } 8 | 9 | parameters { 10 | matrix[K,D] beta; 11 | } 12 | 13 | model { 14 | to_vector(beta) ~ normal(0, 10); 15 | 16 | for (n in 1:N) 17 | # y[n] ~ categorical(softmax(beta * x[n])); 18 | y[n] ~ categorical_logit(beta * x[n]); 19 | } 20 | -------------------------------------------------------------------------------- /ModelFitting/Bayesian/multinomial/stan_mnl_altspecific.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int K; # number of choices 3 | int N; # number of individuals 4 | int D; # number of indiv specific vars 5 | int G; # number of alt specific vars 6 | int T; # number of alt constant vars 7 | 8 | int y[N*K]; # choices 9 | vector[N*K] choice; # choice made (logical) 10 | 11 | matrix[N,D] X; # data for indiv specific effects 12 | matrix[N*K, G] Y; # data for alt specific effects 13 | matrix[N*(K-1),T] Z; # data for alt constant effects 14 | 15 | } 16 | 17 | transformed data { 18 | 19 | } 20 | 21 | parameters { 22 | matrix[D, K-1] beta; # individual specific coefs 23 | matrix[G, K] gamma; # choice specific coefs for alt-specific variables 24 | vector[T] theta; # choice constant coefs for alt-specific variables 25 | 26 | } 27 | 28 | transformed parameters{ 29 | 30 | } 31 | 32 | model { 33 | matrix[N, K-1] Vx; # Utility for individual vars 34 | 35 | vector[N*K] Vy0; 36 | matrix[N, K-1] Vy; # Utility for alt-specific/alt-varying vars 37 | 38 | vector[N*(K-1)] Vz0; 39 | matrix[N, (K-1)] Vz; # Utility for alt-specific/alt-constant vars 40 | 41 | matrix[N,K-1] V; # combined utilities 42 | 43 | vector[N] baseProbVec; # reference group probabilities 44 | real ll0; # intermediate log likelihood 45 | real loglik; # final log likelihood 46 | 47 | 48 | to_vector(beta) ~ normal(0, 10); # diffuse priors on coefficients 49 | to_vector(gamma) ~ normal(0, 10); # diffuse priors on coefficients 50 | to_vector(theta) ~ normal(0, 10); # diffuse priors on coefficients 51 | 52 | 53 | Vx = X * beta; 54 | 55 | for(alt in 1:K){ 56 | vector[G] par; 57 | int start; 58 | int end; 59 | 60 | par = gamma[,alt]; 61 | start = N*alt-N+1; 62 | end = N*alt; 63 | Vy0[start:end] = Y[start:end,] * par; 64 | if(alt>1) Vy[,alt-1] = Vy0[start:end] - Vy0[1:N]; 65 | } 66 | 67 | Vz0 = Z * theta; 68 | 69 | for(alt in 1:(K-1)){ 70 | int start; 71 | int end; 72 | 73 | start = N*alt-N+1; 74 | end = N*alt; 75 | Vz[,alt] = Vz0[start:end]; 76 | } 77 | 78 | V = Vx + Vy + Vz; 79 | 80 | for(n in 1:N) baseProbVec[n] = 1/(1 + sum(exp(V[n]))); 81 | ll0 = dot_product(to_vector(V), choice[(N+1):(N*K)]); # just going to assume no neg index 82 | loglik = sum(log(baseProbVec)) + ll0; 83 | target += loglik; 84 | 85 | } 86 | 87 | 88 | generated quantities { 89 | matrix[N,K-1] fitted_nonref; 90 | vector[N] fitted_ref; 91 | matrix[N,K] fitted; 92 | 93 | matrix[N,K-1] Vx; # Utility for individual vars 94 | 95 | vector[N*K] Vy0; 96 | matrix[N,K-1] Vy; # Utility for alt-specific/alt-varying vars 97 | 98 | vector[N*(K-1)] Vz0; 99 | matrix[N, (K-1)] Vz; # Utility for alt-specific/alt-constant vars 100 | 101 | matrix[N,K-1] V; # combined utilities 102 | 103 | vector[N] baseProbVec; # reference group probabilities 104 | 105 | Vx = X * beta; 106 | 107 | for(alt in 1:K){ 108 | vector[G] par; 109 | int start; 110 | int end; 111 | 112 | par = gamma[,alt]; 113 | start = N*alt-N+1; 114 | end = N*alt; 115 | Vy0[start:end] = Y[start:end,] * par; 116 | if(alt>1) Vy[,alt-1] = Vy0[start:end] - Vy0[1:N]; 117 | } 118 | 119 | Vz0 = Z * theta; 120 | 121 | for(alt in 1:(K-1)){ 122 | int start; 123 | int end; 124 | 125 | start = N*alt-N+1; 126 | end = N*alt; 127 | Vz[,alt] = Vz0[start:end]; 128 | } 129 | 130 | V = Vx + Vy + Vz; 131 | 132 | for(n in 1:N) baseProbVec[n] = 1/(1 + sum(exp(V[n]))); 133 | fitted_nonref = exp(V) .* rep_matrix(baseProbVec, K-1); 134 | for(n in 1:N) fitted_ref[n] = 1-sum(fitted_nonref[n]); 135 | fitted = append_col(fitted_ref, fitted_nonref); 136 | 137 | } 138 | -------------------------------------------------------------------------------- /ModelFitting/Bayesian/r2bugs.distributions.R: -------------------------------------------------------------------------------- 1 | # From Translating Probability Density Functions: From R to BUGS and Back Again 2 | # by David S. LeBauer, Michael C. Dietze, Benjamin M. Bolker 3 | 4 | r2bugs.distributions <- function(priors, direction = 'r2bugs') { 5 | priors$distn <- as.character(priors$distn) 6 | priors$parama <- as.numeric(priors$parama) 7 | priors$paramb <- as.numeric(priors$paramb) 8 | ## index dataframe according to distribution 9 | norm <- priors$distn %in% c('norm', 'lnorm') # these have same transform 10 | weib <- grepl("weib", priors$distn) # matches r and bugs version 11 | gamma <- priors$distn == 'gamma' 12 | chsq <- grepl("chisq", priors$distn) # matches r and bugs version 13 | bin <- priors$distn %in% c('binom', 'bin') # matches r and bugs version 14 | nbin <- priors$distn %in% c('nbinom', 'negbin') # matches r and bugs version 15 | ## Normal, log-Normal: Convert sd to precision 16 | exponent <- ifelse(direction == "r2bugs", -2, -0.5) 17 | priors$paramb[norm] <- priors$paramb[norm] ^ exponent 18 | ## Weibull 19 | if(direction == 'r2bugs'){ 20 | ## Convert R parameter b to BUGS parameter lambda by l = (1/b)^a 21 | priors$paramb[weib] <- (1 / priors$paramb[weib]) ^ priors$parama[weib] 22 | } else if (direction == 'bugs2r') { 23 | ## Convert BUGS parameter lambda to BUGS parameter b by b = l^(-1/a) 24 | priors$paramb[weib] <- priors$paramb[weib] ^ (- 1 / priors$parama[weib] ) 25 | } 26 | ## Reverse parameter order for binomial and negative binomial 27 | priors[bin | nbin, c('parama', 'paramb')] <- 28 | priors[bin | nbin, c('paramb', 'parama')] 29 | ## Translate distribution names 30 | if(direction == "r2bugs"){ 31 | priors$distn[weib] <- "weib" 32 | priors$distn[chsq] <- "chisqr" 33 | priors$distn[bin] <- "bin" 34 | priors$distn[nbin] <- "negbin" 35 | } else if(direction == "bugs2r"){ 36 | priors$distn[weib] <- "weibull" 37 | priors$distn[chsq] <- "chisq" 38 | priors$distn[bin] <- "binom" 39 | priors$distn[nbin] <- "nbinom" 40 | } 41 | return(priors) 42 | } 43 | 44 | ### Example ### 45 | r.distn <- data.frame(distn = "norm", parama = 10, paramb = 2) 46 | r2bugs.distributions(r.distn) -------------------------------------------------------------------------------- /ModelFitting/Bayesian/rstan_MixedModelSleepstudy.R: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------------------# 2 | # A mixed model via stan/rstan with comparison to lme4 output. In this model # 3 | # there is a random intercept as well as a random slope for the single predictor. # 4 | #---------------------------------------------------------------------------------# 5 | 6 | ############# 7 | ### Setup ### 8 | ############# 9 | 10 | ### Data ### 11 | library(lme4) 12 | data(sleepstudy) 13 | # ?sleepstudy 14 | 15 | # Create a model for later comparison 16 | mod_lme = lmer(Reaction ~ Days + (1 | Subject) + (0 + Days | Subject), sleepstudy) 17 | 18 | dat = list(N=nrow(sleepstudy), I=length(unique(sleepstudy$Subject)), 19 | Subject=as.numeric(sleepstudy$Subject), Days = sleepstudy$Days, 20 | RT=sleepstudy$Reaction) 21 | 22 | 23 | ### Stan code ### 24 | stanmodelcode = ' 25 | data { // data setup 26 | int N; // sample size 27 | int I; // number of subjects 28 | vector[N] RT; // Response: reaction time 29 | vector[N] Days; // Days in study 30 | int Subject[N]; // Subject 31 | } 32 | 33 | transformed data { 34 | real startInt; // Create starting point for Intercept (mean day 0) 35 | 36 | startInt = 0; 37 | for (n in 1:N) 38 | if (Days[n] == 0) startInt = startInt + RT[n]/I; 39 | } 40 | 41 | parameters { 42 | real Intercept; // fixed effects 43 | real beta; 44 | real sd_int; // sd for ints and slopes 45 | real sd_beta; 46 | real sigma_y; // residual sd 47 | 48 | vector[I] gammaIntercept; // individual effects 49 | vector[I] gammaDays; 50 | } 51 | 52 | transformed parameters { 53 | vector[N] yhat; 54 | 55 | for (n in 1:N) // Linear predictor 56 | yhat[n] = gammaIntercept[Subject[n]] + gammaDays[Subject[n]] * Days[n]; 57 | } 58 | 59 | model { 60 | // priors 61 | Intercept ~ normal(startInt, 100); // example of weakly informative priors; 62 | beta ~ normal(0, 100); // remove to essentially duplicate lme4 via improper prior 63 | 64 | gammaIntercept ~ normal(Intercept, sd_int); 65 | gammaDays ~ normal(beta, sd_beta); 66 | 67 | sd_int ~ cauchy(0, 5); 68 | sd_beta ~ cauchy(0, 5); 69 | sigma_y ~ cauchy(0, 5); 70 | 71 | // likelihood 72 | RT ~ normal(yhat, sigma_y); 73 | } 74 | ' 75 | ############# 76 | ### Model ### 77 | ############# 78 | 79 | library(rstan) 80 | fit = stan(model_code = stanmodelcode, model_name = "example", 81 | data = dat, iter = 7000, warmup=2000, thin=20, chains = 4, 82 | verbose = F) 83 | 84 | ### Summarize 85 | print(fit, digits_summary=3, pars=c('Intercept','beta','sigma_y', 'sd_int', 'sd_beta'), 86 | probs = c(0, .025, .5, .975, 1)) 87 | 88 | ### Compare 89 | mod_lme 90 | 91 | print(fit, digits_summary=3, pars=c('gammaIntercept', 'gammaDays')) 92 | 93 | ### Diagnostic plots 94 | shinystan::launch_shinystan(fit) 95 | 96 | ############################### 97 | ### A parallelized approach ### 98 | ############################### 99 | 100 | fit2 = stan(model_code = stanmodelcode, model_name = "mixedreg", #init=0, 101 | fit = fit, # if using the same model code, this will use the previous compilation 102 | data = dat, iter = 7000, warmup=2000, thin=20, cores=4, 103 | verbose = T) 104 | 105 | 106 | # examine some diagnostics 107 | samplerpar = get_sampler_params(fit2)[[1]] 108 | summary(samplerpar) 109 | 110 | 111 | print(fit2, pars= c('Intercept','beta','sigma_y', 'sd_int', 'sd_beta','lp__'), digits=3, 112 | probs = c(.01, .025, .05, .5, .95, 0.975, .99)) 113 | 114 | # Compare again 115 | mod_lme 116 | 117 | # diagnostics 118 | shinystan::launch_shinystan(fit2) 119 | -------------------------------------------------------------------------------- /ModelFitting/Bayesian/rstan_MixtureModel.R: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------------------# 2 | # Mixture model (draft); The Stan manual notes issues with mixture modeling, esp. # 3 | # label switching (as does the bugs book), but the code in the manual chapter on # 4 | # mixture modeling does not appear able to reproduce the old faithful mixture or # 5 | # even a simulated mixture of normals, even with some heavy hand holding on # 6 | # starting points, limits etc. On occasion the means can be recovered, but the # 7 | # variances are all over the place and notably influenced by the prior. The # 8 | # Stan manual chapter on problematic posteriors covers many of the issues. # 9 | # # 10 | # The following stan code however is verbatim from github and should work fine, # 11 | # albeit slowly. # 12 | # github stan/src/models/basic_estimators/normal_mixture_k_prop.stan # 13 | #---------------------------------------------------------------------------------# 14 | 15 | data(faithful) 16 | head(faithful) 17 | par(mfrow=c(1,2)) 18 | apply(faithful, 2, ggplot2:::qplot, geom='density') 19 | layout(1) 20 | 21 | 22 | y = rnorm(500, c(50,80), c(5,10)) 23 | ggplot2:::qplot(y, geom='density') 24 | psych::describe(data.frame(y[seq(1,500, 2)], y[seq(2,500, 2)])) 25 | 26 | 27 | # take your pick 28 | standat = list(N=nrow(faithful), K=2, y=faithful$waiting) 29 | 30 | standat = list(N=nrow(faithful), K=2, y=faithful$eruptions) 31 | 32 | standat = list(N=length(y), K=2, y=y) 33 | 34 | 35 | stanmodelcode = ' 36 | data { 37 | int K; # K components 38 | int N; # N observations 39 | real y[N]; # variable of interest 40 | } 41 | 42 | parameters { 43 | simplex[K] theta; # mixing proportions 44 | simplex[K] mu_prop; 45 | real mu_loc; 46 | real mu_scale; 47 | real sigma[K]; # sds of the components 48 | } 49 | 50 | transformed parameters { 51 | ordered[K] mu; 52 | mu = mu_loc + mu_scale * cumulative_sum(mu_prop); # means of the components 53 | } 54 | 55 | model { 56 | // prior 57 | mu_loc ~ cauchy(0,5); 58 | mu_scale ~ cauchy(0,5); 59 | sigma ~ cauchy(0,5); 60 | 61 | // likelihood 62 | { 63 | real ps[K]; 64 | vector[K] log_theta; 65 | log_theta = log(theta); 66 | 67 | for (n in 1:N) { 68 | for (k in 1:K) { 69 | ps[k] = log_theta[k] + normal_lpdf(y[n] | mu[k], sigma[k]); 70 | } 71 | target += log_sum_exp(ps); 72 | } 73 | } 74 | } 75 | ' 76 | 77 | 78 | ################ 79 | ### Test Run ### 80 | ################ 81 | library(rstan) 82 | 83 | # the following may take several minutes per chain depending on the data 84 | test = stan(model_code = stanmodelcode, model_name = "example", 85 | data = standat, iter = 7000, warmup=2000, thin=5, chains = 2, cores=2, 86 | verbose = F) 87 | 88 | # shinystan::launch_shinystan(test) 89 | print(test, digits=3) 90 | 91 | ### compare to flexmix: coefs in flexmod1 = mu in test 92 | library(flexmix) 93 | flexmod1 = flexmix(standat$y~1, k=2, control=list(tolerance=1e-12, iter.max=1000)) 94 | summary(flexmod1) 95 | parameters(flexmod1) 96 | 97 | 98 | ###################### 99 | ### Production run ### 100 | ###################### 101 | 102 | # This can take a notably long time (i.e. hours) depending on the data. 103 | fit = stan(model_code = stanmodelcode, model_name = "mixture", fit = test, 104 | data = standat, iter = 62000, warmup=12000, thin=50, cores = 4, 105 | verbose = T) 106 | print(fit, digits=3) 107 | shinystan::launch_shinystan(fit) 108 | 109 | -------------------------------------------------------------------------------- /ModelFitting/EM Examples/EM algorithm for probit example.R: -------------------------------------------------------------------------------- 1 | ########################################################## 2 | ### The following regards models for a binary response ### 3 | ### See Murphy, 2012 Probabilistic Machine Learning ### 4 | ### Chapter 11.4. ### 5 | ########################################################## 6 | 7 | mydata = haven::read_dta("https://stats.idre.ucla.edu/stat/stata/dae/binary.dta") 8 | 9 | ################################# 10 | ### glm for a standard probit ### 11 | ################################# 12 | 13 | myprobit = glm(admit ~ gre + gpa + rank, family=binomial(link = "probit"), control=list(maxit=500, epsilon=1e-8), data = mydata) 14 | 15 | ## model summary 16 | summary(myprobit) 17 | coefsMAIN = coef(myprobit) 18 | 19 | ### input data ### 20 | X = as.matrix(cbind(1, mydata[,2:4])) 21 | y = mydata[,1] 22 | init = c(0,0,0,0) 23 | 24 | ########################################## 25 | ### MLE function for a standard probit ### 26 | ########################################## 27 | 28 | myprobMLE = function(params, X, y){ 29 | # Arguments are starting parameters (coefficients), model matrix, response 30 | 31 | b = params 32 | mu = X%*%b 33 | ll = sum(y*pnorm(mu, log.p=T) + (1-y)*pnorm(-mu, log.p=T)) # compute the log likelihood 34 | ll 35 | } 36 | 37 | ### Fit with optim 38 | outMLE = optim(init, myprobMLE, X=X, y=y, control=list(fnscale=-1, maxit=1000, reltol=1e-8)) # make tolerance really low to duplicate glm result 39 | # outMLE 40 | coefsMLE = outMLE$par 41 | 42 | ### compare 43 | rbind(coefsMLE, coefsMAIN) 44 | 45 | ################################################# 46 | ### EM for latent variable approach to probit ### 47 | ################################################# 48 | 49 | probEM = function(params, X, y, tol=.00001, maxits=100, showits=T){ 50 | # Arguments are starting parameters (coefficients), model matrix, response, 51 | # tolerance, maximum iterations, and whether to show iterations 52 | 53 | #starting points 54 | b = params 55 | mu = X%*%b 56 | it = 0 57 | converged = FALSE 58 | z = rnorm(length(y)) # z is the latent variable ~N(0,1) 59 | 60 | if (showits) # Show iterations 61 | cat(paste("Iterations of EM:", "\n")) 62 | while ((!converged) & (it < maxits)) { # while no convergence and we haven't reached our max iterations do this stuff 63 | zOld = z # create 'old' values for comparison 64 | 65 | z = ifelse(y==1, mu+dnorm(mu)/pnorm(mu), mu-dnorm(mu)/pnorm(-mu)) # E step create a new z based on current values 66 | 67 | b = solve(t(X)%*%X) %*% t(X)%*%z # M step estimate b 68 | mu = X%*%b 69 | 70 | ll= sum(y*pnorm(mu, log.p=T) + (1-y)*pnorm(-mu, log.p=T)) 71 | 72 | it = it + 1 73 | if (showits & (it == 1 | it%%5 == 0)) 74 | cat(paste(format(it), "...", "\n", sep = "")) 75 | converged = max(abs(zOld - z)) <= tol 76 | } 77 | 78 | if (showits) # Show last iteration 79 | cat(paste0(format(it), "...", "\n")) 80 | 81 | return(list(b=t(b), ll=ll )) 82 | } 83 | 84 | outEM = probEM(params=init, X=X, y=y, tol=1e-8, maxit=100); outEM # can lower tolerance to duplicate glm result 85 | coefsEM = outEM$b 86 | 87 | ### compare 88 | rbind(coefsMAIN, coefsMLE, coefsEM) 89 | rbind(logLik(myprobit), outMLE$value, outEM$ll) 90 | 91 | 92 | ### Show estimates over niter iterations and visualize 93 | X2 = X; X2[,2:3] = scale(X2[,2:3]) 94 | niter = 20 95 | outEM = sapply( 1:niter, function(x) probEM(params=init, X=X2, y=y, tol=1e-8, maxit=x, showits=F)$b ) 96 | colnames(outEM) = paste0("iter", 1:niter) 97 | 98 | library(ggplot2); library(reshape2) 99 | gdat = melt(outEM); colnames(gdat) = c('coef', 'iter', 'value') 100 | gdat$iter2 = rep(1:niter, e=4) 101 | gdat$coef = factor(gdat$coef, labels=c('Intercept','gre','gpa','rank')) 102 | str(gdat) 103 | 104 | ggplot(aes(x=iter2, y=value), data=gdat) + 105 | geom_line(aes(group=coef, color=coef)) + 106 | ggtheme -------------------------------------------------------------------------------- /ModelFitting/EM Examples/EM for pca.R: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------# 2 | # The following is an EM algorithm for principal components analysis. # 3 | # See Murphy, 2012 Probabilistic Machine Learning 12.2.5. Some of the # 4 | # constructed object is based on output from pca function used below. # 5 | # --------------------------------------------------------------------# 6 | 7 | ##################### 8 | ### Main Function ### 9 | ##################### 10 | 11 | PCAEM = function(X, nComp=2, tol=.00001, maxits=100, showits=T){ 12 | # Arguments X: numeric data, nComp: number of components 13 | # tol = tolerance level, maxits: maximum iterations, showits: show iterations 14 | require(pracma) # for orthonormal basis of W; pcaMethods package has also 15 | 16 | # starting points and other initializations 17 | N = nrow(X) 18 | D = ncol(X) 19 | L = nComp 20 | Xt = t(X) 21 | Z = t(replicate(L, rnorm(N))) # latent variables 22 | W = replicate(L, rnorm(D)) # loadings 23 | 24 | it = 0 25 | converged = FALSE 26 | 27 | if (showits) 28 | cat(paste("Iterations of EM:", "\n")) 29 | while ((!converged) & (it < maxits)) { # while no convergence and we haven't reached our max iterations do this stuff 30 | Z.old = Z # create 'old' values for comparison 31 | Z = solve(t(W)%*%W) %*% crossprod(W, Xt) # E 32 | W = Xt%*%t(Z) %*% solve(tcrossprod(Z)) # M 33 | 34 | it = it + 1 35 | if (showits & (it == 1 | it%%5 == 0)) # if showits, show first and every 5th iteration 36 | cat(paste(format(it), "...", "\n", sep = "")) 37 | converged = max(abs(Z.old-Z)) <= tol 38 | } 39 | 40 | # calculate reconstruction error 41 | Xrecon = W %*% Z 42 | reconerr = sum((Xrecon-t(X))^2) 43 | 44 | # orthogonalize 45 | W = orth(W) 46 | evs = eigen(cov(X %*% W)) 47 | evals = evs$values 48 | evecs = evs$vectors 49 | 50 | W = W %*% evecs 51 | Z = X %*% W 52 | 53 | if (showits) # Show last iteration 54 | cat(paste0(format(it), "...", "\n")) 55 | 56 | return(list(scores=Z, loadings=W, reconerr=reconerr, Xrecon=t(Xrecon))) 57 | } 58 | 59 | ############### 60 | ### Example ### 61 | ############### 62 | 63 | ### Get data and run 64 | # state.x77 is the data; various state demographics 65 | X = scale(state.x77) 66 | outEM = PCAEM(X=X, nComp=2, tol=1e-12, maxit=1000) 67 | outEM 68 | 69 | # Extract reconstructed values and loadings for comparison 70 | Xrecon = outEM$Xrecon 71 | loadingsEM = outEM$loadings 72 | scoresEM = outEM$scores 73 | 74 | # mean squared reconstruction error 75 | mean((Xrecon-X)^2) # outEM$reconerr/prod(dim(X)) 76 | 77 | 78 | ### compare results to output from pcaMethods; note that signs for loadings/scores may be different 79 | library(pcaMethods) 80 | outpcam = pca(X, nPcs=2, method='svd', scale='none', center=F) 81 | loadings_pcam = loadings(outpcam) 82 | scores_pcam = scores(outpcam) 83 | 84 | # compare loadings and scores 85 | sum((abs(loadings_pcam)-abs(loadingsEM))^2) 86 | abs(round(cbind(scores_pcam, scoresEM), 2)) 87 | 88 | # compare reconstructed data sets 89 | Xrecon2 = scores_pcam %*% t(loadings_pcam) 90 | mean((Xrecon2-X)^2) 91 | mean(abs(Xrecon2-Xrecon)) 92 | 93 | # plots 94 | plot(Xrecon2[,1], X[,1]) 95 | plot(Xrecon2[,2], X[,2]) 96 | 97 | plot(Xrecon[,1], Xrecon2[,1]) 98 | -------------------------------------------------------------------------------- /ModelFitting/EM Examples/EM for state space unobserved components.R: -------------------------------------------------------------------------------- 1 | ### The following regards chapter 11 in Statistical Modeling and Computation, 2 | ### the first example for an unobserved components model. The data regards 3 | ### inflation based on the U.S. consumer price index (infl = 4 | ### 400*log(cpi_t/cpi_{t-1})), from the second quarter of 1947 to the second 5 | ### quarter of 2011. You can acquire the data here 6 | ### (http://www.maths.uq.edu.au/~kroese/statbook/Statespace/USCPI.csv) or in 7 | ### Datasets repo. just note that it has 2 mystery columns and one mystery row 8 | ### presumably supplied by Excel. You can also get the CPI data yourself at 9 | ### http://www.bls.gov/cpi/ in a frustrating fashion, or in a much easier 10 | ### fashion here 11 | ### http://research.stlouisfed.org/fred2/series/CPIAUCSL/downloaddata. 12 | 13 | 14 | ### For the following I use n instead of t or T because those are transpose and 15 | ### TRUE in R. The model is basically y = τ + ϵ, with ϵ ~ N(0, σ^2), 16 | ### and τ = τ_{n-1} + υ_n with υ ~ N(0, ω^2). Thus each y is 17 | ### associated with a latent variable that follows a random walk over time. 18 | ### ω^2 serves as a smoothing parameter, which itself may be estimated but 19 | ### which is fixed in the following. See the text for more details. 20 | 21 | d = read.csv('https://raw.githubusercontent.com/m-clark/Datasets/master/us%20cpi/USCPI.csv', header=F) 22 | inflation = d[,1] 23 | summary(inflation) 24 | 25 | 26 | ################################ 27 | ### EM for state space model ### 28 | ################################ 29 | statespaceEM = function( 30 | params, 31 | y, 32 | omega2_0, 33 | omega2, 34 | tol = .00001, 35 | maxits = 100, 36 | showits = T 37 | ) { 38 | 39 | # Arguments are starting parameters (variance as 'sigma2'), data, tolerance, 40 | # maximum iterations, and whether to show iterations 41 | 42 | # Not really needed here, but would be a good idea generally to take advantage 43 | # of sparse representation for large data 44 | require(spam) 45 | 46 | # Starting points 47 | n = length(y) 48 | sigma2 = params$sigma2 49 | 50 | # Other initializations 51 | H = diag(n) 52 | for (i in 1:(ncol(H) - 1)) { 53 | H[i + 1, i] = -1 54 | } 55 | 56 | Omega2 = as.spam(diag(omega2, n)) 57 | Omega2[1, 1] = omega2_0 58 | H = as.spam(H) 59 | HinvOmega2H = t(H) %*% chol2inv(chol(Omega2)) %*% H # tau ~ N(0, HinvOmmega2H^-1) 60 | 61 | 62 | it = 0 63 | converged = FALSE 64 | 65 | if (showits) # Show iterations 66 | cat(paste("Iterations of EM:", "\n")) 67 | 68 | while ((!converged) & (it < maxits)) { 69 | sigma2Old = sigma2[1] 70 | Sigma2invOld = diag(n)/sigma2Old 71 | 72 | K = HinvOmega2H + Sigma2invOld # E 73 | tau = solve(K, y/sigma2Old) # tau|y, sigma2_{n-1}, omega2 ~ N(0, K^-1) 74 | 75 | K_inv_tr = sum(1/eigen(K)$values) 76 | 77 | sigma2 = 1/n * (K_inv_tr + crossprod(y-tau)) # M 78 | 79 | converged = max(abs(sigma2 - sigma2Old)) <= tol 80 | 81 | # if showits true, & it =1 or divisible by 5 print message 82 | it = it + 1 83 | if (showits & it == 1 | it%%5 == 0) 84 | cat(paste(format(it), "...", "\n", sep = "")) 85 | } 86 | 87 | Kfinal = HinvOmega2H + diag(n) / sigma2[1] 88 | 89 | taufinal = solve(K, (y / sigma2[1])) 90 | 91 | out = list(sigma2 = sigma2, tau = taufinal) 92 | } 93 | 94 | # debugonce(statespaceEM) 95 | ssMod_1 = statespaceEM( 96 | params = data.frame(sigma2 = var(inflation)), 97 | y = inflation, 98 | tol = 1e-4, 99 | omega2_0 = 9, 100 | omega2 = 1 ^ 2 101 | ) 102 | 103 | ssMod_.5 = statespaceEM( 104 | params = data.frame(sigma2 = var(inflation)), 105 | y = inflation, 106 | tol = 1e-4, 107 | omega2_0 = 9, 108 | omega2 = .5 ^ 2 109 | ) 110 | 111 | ssMod_1$sigma2 112 | ssMod_.5$sigma2 113 | 114 | 115 | library(lubridate) 116 | series = ymd(paste0(rep(1947:2014, e = 4), '-', c('01', '04', '07', '10'), '-', '01')) 117 | seriestext = series[1:length(inflation)] 118 | 119 | # fig. 11.1 in the text 120 | 121 | library(tidyverse) 122 | 123 | data.frame( 124 | series = series[1:length(inflation)], 125 | inflation, 126 | Mod_1 = ssMod_1$tau, 127 | Mod_.5 = ssMod_.5$tau 128 | ) %>% 129 | ggplot(aes(x = series, y = inflation)) + 130 | geom_point(color = 'gray50') + 131 | geom_line(aes(y = Mod_1), color = '#ff5500') + 132 | geom_line(aes(y = Mod_.5), color = 'skyblue3') + 133 | scale_x_date(date_breaks = '10 years') + 134 | theme_light() 135 | 136 | -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModelML/mixedModelML.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModelML/mixedModelML.pdf -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/diag.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/diag.html -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/fitCSgam-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/fitCSgam-1.png -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/gamSleepStudy-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/gamSleepStudy-1.png -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/gammSleepStudy-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/gammSleepStudy-1.png -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-3-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-3-2.png -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-latex/fitCSgam-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-latex/fitCSgam-1.pdf -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-latex/gammSleepStudy-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-latex/gammSleepStudy-1.pdf -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModels/anovamixed.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/anovamixed.pdf -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModels/growth_vs_mixed_files/figure-html/lavaanmod-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/growth_vs_mixed_files/figure-html/lavaanmod-1.png -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModels/growth_vs_mixed_files/figure-html/randomIntsSlopesMods-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/growth_vs_mixed_files/figure-html/randomIntsSlopesMods-1.png -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModels/growth_vs_mixed_files/figure-html/visualizeTrends-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/growth_vs_mixed_files/figure-html/visualizeTrends-1.png -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModels/growth_vs_mixed_files/growthvsMixed_EstResults.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/growth_vs_mixed_files/growthvsMixed_EstResults.RData -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModels/mixedModels.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/mixedModels.pdf -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-html/sleepModFits-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-html/sleepModFits-1.png -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-html/sleepModFitsReduced-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-html/sleepModFitsReduced-1.png -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-html/sleepstudyPlot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-html/sleepstudyPlot-1.png -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-latex/sleepModFits-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-latex/sleepModFits-1.pdf -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-latex/sleepModFitsReduced-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-latex/sleepModFitsReduced-1.pdf -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-latex/sleepstudyPlot-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-latex/sleepstudyPlot-1.pdf -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-markdown_github/sleepModFits-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-markdown_github/sleepModFits-1.png -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-markdown_github/sleepModFitsReduced-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-markdown_github/sleepModFitsReduced-1.png -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-markdown_github/sleepstudyPlot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-markdown_github/sleepstudyPlot-1.png -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/one_factor_RE.R: -------------------------------------------------------------------------------- 1 | ################################################################################### 2 | ### An approach for one factor random effects model via maximum likelihood in R ### 3 | ### Matlab and Julia; It's based on Statistical Modeling and Computation (2014) ### 4 | ### Chapter 10, example 10.10; Unfortunately I did this before knowing they had ### 5 | ### both matlab and R code on their website, though the R code here is a little ### 6 | ### cleaner and has comments. The data regards crop yield from 10 randomly ### 7 | ### selected locations and three collections at each. See one_factor_RE.m and ### 8 | ### one_factor_RE.jl for the related Matlab and Julia files, and the respective ### 9 | ### twofactorRE.* for the associated two factor random effects examples. ### 10 | ################################################################################### 11 | 12 | 13 | 14 | ##################### 15 | ### Main function ### 16 | ##################### 17 | 18 | one_factor_re_loglike = function(mu, sigma2_mu, sigma2){ 19 | # Args are mu: intercept; sigma2_mu: variance of intercept; sigma2: residual 20 | # variance of y 21 | # I follow their notation for consistency 22 | d = nrow(y) 23 | ni = ncol(y) 24 | 25 | # covariance matrix of observations 26 | Sigmai = sigma2 * diag(ni) + sigma2_mu * matrix(1, ni, ni) 27 | 28 | # log likelihood 29 | l = rep(NA, 10) 30 | # iterate over the rows 31 | for(i in 1:d){ 32 | l[i] = .5 * t(y[i, ] - mu) %*% chol2inv(chol(Sigmai)) %*% (y[i, ] - mu) 33 | } 34 | 35 | ll = -(ni*d) / 2*log(2*pi) - d / 2*log(det(Sigmai)) - sum(l) 36 | 37 | return(-ll) 38 | } 39 | 40 | 41 | ################### 42 | ### Data set up ### 43 | ################### 44 | y = matrix(c(22.6,20.5,20.8, 45 | 22.6,21.2,20.5, 46 | 17.3,16.2,16.6, 47 | 21.4,23.7,23.2, 48 | 20.9,22.2,22.6, 49 | 14.5,10.5,12.3, 50 | 20.8,19.1,21.3, 51 | 17.4,18.6,18.6, 52 | 25.1,24.8,24.9, 53 | 14.9,16.3,16.6), 10, 3, byrow=T) 54 | 55 | 56 | ################################ 57 | ### Starting values and test ### 58 | ################################ 59 | 60 | starts = list( 61 | mu = mean(y), 62 | sigma2_mu = var(rowMeans(y)), 63 | sigma2 = mean(apply(y, 1, var)) 64 | ) 65 | 66 | ### test 67 | one_factor_re_loglike(starts[[1]], starts[[2]], starts[[3]]) 68 | 69 | 70 | ####################### 71 | ### Run and compare ### 72 | ####################### 73 | 74 | ### bbmle has mle2 function for maximum likelihood estimation based on 75 | ### underlying R functions like optim. LBFGS-B is used to place lower bounds on 76 | ### the variance estimates 77 | library(bbmle) 78 | 79 | mlout = mle2( 80 | one_factor_re_loglike, 81 | start = starts, 82 | method = 'L-BFGS-B', 83 | lower = c( 84 | mu = -Inf, 85 | sigma2_mu = 0, 86 | sigma2 = 0 87 | ), 88 | trace = T 89 | ) 90 | 91 | ### Compare 92 | library(lme4) 93 | library(tidyverse) 94 | 95 | d = data.frame(y) %>% 96 | pivot_longer(everything(), names_to = 'x', values_to = 'value') %>% 97 | arrange(x) %>% 98 | group_by(x) %>% 99 | mutate(group = 1:n()) 100 | 101 | lme = lmer(value ~ 1 | group, data = d, REML = F) 102 | 103 | summary(mlout) 104 | summary(lme) 105 | -2 * logLik(lme) 106 | -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/one_factor_RE.jl: -------------------------------------------------------------------------------- 1 | ############################################################################ 2 | ### see onefactorRE.R for details and onefactorRE.m for matlab code. The ### 3 | ### commented part of the function reflects the R code. ### 4 | ############################################################################ 5 | 6 | 7 | 8 | ##################### 9 | ### Main function ### 10 | ##################### 11 | 12 | function one_factor_re_loglike(par::Vector) 13 | d, ni = size(y) 14 | mu = par[1] 15 | sigma2_mu = par[2] 16 | sigma2 = par[3] 17 | 18 | Sigmai = sigma2*eye(ni) + sigma2_mu*ones(ni,ni) 19 | l = -(ni*d)/2*log(2*pi) - d/2*log(det(Sigmai)) 20 | 21 | for i in 1:d 22 | yi = y[i,:]' 23 | l = l - .5(yi-mu)' * (Sigmai\(yi-mu)) 24 | end 25 | 26 | # l = zeros(10) 27 | 28 | # for i in 1:d 29 | # yi = y[i,:]' 30 | # l[i,:] = .5* (yi - mu)' * (Sigmai\(yi - mu)) 31 | # end 32 | # l = -(ni*d)/2*log(2*pi) - d/2*log(det(Sigmai)) - sum(l) 33 | 34 | l = -l[1] # having to do this line hurts 35 | return l 36 | end 37 | 38 | 39 | ################### 40 | ### Data set up ### 41 | ################### 42 | y = [22.6 20.5 20.8 43 | 22.6 21.2 20.5 44 | 17.3 16.2 16.6 45 | 21.4 23.7 23.2 46 | 20.9 22.2 22.6 47 | 14.5 10.5 12.3 48 | 20.8 19.1 21.3 49 | 17.4 18.6 18.6 50 | 25.1 24.8 24.9 51 | 14.9 16.3 16.6] 52 | 53 | 54 | ################################ 55 | ### Starting values and test ### 56 | ################################ 57 | mu0 = mean(y) 58 | sigma2_mu0 = var(mean(y, 2)) 59 | sigma20 = mean(var(y, 2)) 60 | theta0 = [mu0, sigma2_mu0, sigma20] 61 | 62 | ### test 63 | one_factor_re_loglike(theta0) 64 | 65 | 66 | ########### 67 | ### Run ### 68 | ########### 69 | using Optim 70 | res = optimize(one_factor_re_loglike, theta0, method=:l_bfgs) 71 | 72 | 73 | -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/one_factor_RE.m: -------------------------------------------------------------------------------- 1 | % matlab from Statistical Modeling and Computation (2014 p 311). See the 2 | % associated twofactorRE.R file for details. 3 | 4 | function one_factor_re_loglike(mu, sigma2_mu, sigma2, y) 5 | [d ni] = size(y); 6 | Sigmai = sigma2*eye(ni) + sigma2_mu*ones(ni,ni); 7 | l = -(ni*d) / 2*log(2*pi) - d / 2*log(det(Sigmai)); 8 | for i=1:d 9 | yi = y(i, :)'; 10 | l = l - .5*(yi - mu)' * (Sigmai\(yi - mu)); 11 | end 12 | end 13 | 14 | 15 | y = [22.6 20.5 20.8; 16 | 22.6 21.2 20.5; 17 | 17.3 16.2 16.6; 18 | 21.4 23.7 23.2; 19 | 20.9 22.2 22.6; 20 | 14.5 10.5 12.3; 21 | 20.8 19.1 21.3; 22 | 17.4 18.6 18.6; 23 | 25.1 24.8 24.9; 24 | 14.9 16.3 16.6]; 25 | 26 | 27 | f = @(theta) -one_factor_re_loglike(theta(1), theta(2), theta(3), y); 28 | ybar = mean(y, 2); 29 | theta0 = [mean(ybar) var(ybar) mean(var(y, 0, 2))]; 30 | thetahat = fminsearch(f, theta0); 31 | -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/twofactorRE.R: -------------------------------------------------------------------------------- 1 | ################################################################################### 2 | ### An approach for two factor random effects model via maximum likelihood in R ### 3 | ### Matlab and Julia; It's based on Statistical Modeling and Computation (2014) ### 4 | ### Chapter 10, example 10.10; The data regards the breeding value of a set of ### 5 | ### five sires in raising pigs. Each sire is mated to a random group of dams, ### 6 | ### with the response being the average daily weight gain in pounds of two ### 7 | ### piglets in each litter. See one_factor_RE.R for a one factor model, and ### 8 | ### two_factor_RE.m and two_factor_RE.jl for the matlab and julia versions of ### 9 | ### this example. Note that the text has a typo on the sigma2 variance estimate ### 10 | ### (value should be .0023 not .023). ### 11 | ################################################################################### 12 | 13 | 14 | 15 | ##################### 16 | ### Main function ### 17 | ##################### 18 | # the function takes the log variances eta* as input to keep positive 19 | sfran_loglike = function(mu, eta_alpha, eta_gamma, eta) { 20 | # Args 21 | # mu: intercept 22 | # eta_alpha: random effect one 23 | # eta_gamma: random effect two 24 | # eta: residual variance of y 25 | 26 | sigma2_alpha = exp(eta_alpha) 27 | sigma2_gamma = exp(eta_gamma) 28 | sigma2 = exp(eta) 29 | n = length(y) 30 | 31 | # covariance matrix of observations 32 | Sigma = sigma2 * diag(n) + sigma2_alpha * tcrossprod(Xalpha) + 33 | sigma2_gamma * tcrossprod(Xgamma) 34 | 35 | 36 | # log likelihood 37 | ll = -n / 2 * log(2 * pi) - sum(log(diag(chol(Sigma)))) - 38 | .5 * t(y - mu) %*% chol2inv(chol(Sigma)) %*% (y - mu) 39 | 40 | return(-ll) 41 | } 42 | 43 | 44 | ################### 45 | ### Data set up ### 46 | ################### 47 | y = c(1.39,1.29,1.12,1.16,1.52,1.62,1.88,1.87,1.24,1.18, 48 | .95,.96,.82,.92,1.18,1.20,1.47,1.41,1.57,1.65) 49 | 50 | # for use in lme4, but also a more conceptual respresentation of the data 51 | d = expand.grid(sire = rep(1:5, 2), dam = 1:2) 52 | d = data.frame(d[order(d$sire), ], y) 53 | 54 | 55 | ################################ 56 | ### Starting values and test ### 57 | ################################ 58 | starts = list( 59 | mu = mean(y), 60 | eta_alpha = var(tapply(y, d$sire, mean)), 61 | eta_gamma = var(y) / 3, 62 | eta = var(y) / 3 63 | ) 64 | 65 | Xalpha = diag(5) %x% rep(1,4) 66 | 67 | Xgamma = diag(10) %x% rep(1,2) 68 | 69 | # test 70 | sfran_loglike(starts[[1]], starts[[2]], starts[[3]], starts[[4]]) 71 | 72 | 73 | ####################### 74 | ### Run and compare ### 75 | ####################### 76 | library(bbmle) 77 | 78 | mlout = mle2(sfran_loglike, start=starts, method='BFGS') 79 | 80 | ### lme4 comparison 81 | library(lme4) 82 | 83 | lme = lmer(y ~ (1 | sire) + (1 | dam:sire), d, REML = F) 84 | 85 | summary(mlout) 86 | exp(coef(mlout)[-1]) 87 | 88 | summary(lme) 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/twofactorRE.jl: -------------------------------------------------------------------------------- 1 | ############################################################################ 2 | ### see twofactorRE.R for details and twofactorRE.m for matlab code. ### 3 | ############################################################################ 4 | 5 | 6 | 7 | function sfran2_loglike(par::Vector) 8 | n = length(y) 9 | mu = par[1] 10 | 11 | sigma2_alpha = exp(par[2]) 12 | sigma2_gamma = exp(par[3]) 13 | sigma2 = exp(par[4]) 14 | 15 | Sigma = sigma2*eye(n) + sigma2_alpha*(Xalpha * Xalpha') + sigma2_gamma * (Xgamma * Xgamma') 16 | 17 | l = -n/2*log(2*pi) - sum(log(diag(chol(Sigma)))) - .5*(y-mu)' * (Sigma\(y-mu)) 18 | 19 | l = -l[1] 20 | return l 21 | end 22 | 23 | 24 | ################## 25 | ### Data setup ### 26 | ################## 27 | y = [1.39,1.29,1.12,1.16,1.52,1.62,1.88,1.87,1.24,1.18, 28 | .95,.96,.82,.92,1.18,1.20,1.47,1.41,1.57,1.65] 29 | 30 | # See the R file for a conceptual data representation 31 | 32 | Xalpha = kron(eye(5), ones(4,1)) 33 | 34 | Xgamma = kron(eye(10), ones(2,1)) 35 | 36 | 37 | ################################ 38 | ### Starting values and test ### 39 | ################################ 40 | yhat = mean(reshape(y, 4, 5), 1) 41 | 42 | theta0 = [mean(y), log(var(yhat)), log(var(y)/3), log(var(y)/3)] 43 | 44 | sfran2_loglike(theta0) 45 | 46 | 47 | ########### 48 | ### Run ### 49 | ########### 50 | using Optim 51 | res = optimize(sfran2_loglike, theta0, method = :l_bfgs) 52 | exp([-2.92,-3.44, -6.079]) 53 | 54 | 55 | -------------------------------------------------------------------------------- /ModelFitting/Mixed Models/twofactorRE.m: -------------------------------------------------------------------------------- 1 | % matlab from Statistical Modeling and Computation (2014 p 314). See the 2 | % associated twofactorRE.R file for details. 3 | 4 | function sfran2_loglike(mu, eta_alpha, eta_gamma, eta, y, Xalpha, Xgamma) 5 | sigma2_alpha = exp(eta_alpha); 6 | sigma2_gamma = exp(eta_gamma); 7 | sigma2 = exp(eta); 8 | 9 | n = length(y); 10 | 11 | Sigma = sigma2*speye(n) + sigma2_alpha * (Xalpha * Xalpha') + sigma2_gamma * (Xgamma*Xgamma'); 12 | 13 | l = -n/2 * log(2*pi) - sum(log(diag(chol(Sigma)))) - .5*(y - mu)' * (Sigma\(y - mu)); 14 | 15 | end 16 | 17 | y = [1.39 1.29 1.12 1.16 1.52 1.62 1.88 1.87 1.24 1.18 .95 .96 .82 .92 1.18 1.20 1.47 1.41 1.57 1.65]; 18 | 19 | Xalpha = kron(speye(5), ones(4,1)); 20 | 21 | Xgamma = kron(speye(10), ones(2,1)); 22 | 23 | f = @(theta) -sfran_loglike(theta(1), theta(2), theta(3), theta(4), y, Xalpha, Xgamma); 24 | yhat = mean(reshape(y, 4, 5)); 25 | theta0 = [mean(y) log(var(yhat)) log(var(y)/3) log(var(y)/3)]; 26 | thetahat = fminsearch(f, theta0) 27 | -------------------------------------------------------------------------------- /ModelFitting/NBzeroinfl.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "Zero-inflated Negative Binomial Model" 3 | #' author: "Michael Clark" 4 | #' date: "" 5 | #' --- 6 | #' 7 | #' 8 | #' Log likelihood function to estimate parameters for a Zero-inflated Negative Binomial model. With 9 | #' examples and comparison to pscl package output. Also includes approach based on Hilbe GLM text. 10 | #' see also: https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/poiszeroinfl.R 11 | 12 | ZINB = function(y, X, par) { 13 | # arguments are response y, predictor matrix X, and parameter named starting points of 'logit', 'negbin', and 'theta' 14 | 15 | # Extract parameters 16 | logitpars = par[grep('logit', names(par))] 17 | negbinpars = par[grep('negbin', names(par))] 18 | theta = exp(par[grep('theta', names(par))]) 19 | 20 | # Logit part; in this function Xlogit = Xnegbin but one could split X argument into Xlogit and Xnegbin for example 21 | Xlogit = X 22 | LPlogit = Xlogit %*% logitpars 23 | logi0 = plogis(LPlogit) 24 | 25 | # Negbin part 26 | Xnegbin = X 27 | munb = exp(Xnegbin %*% negbinpars) 28 | 29 | # LLs 30 | logliklogit = log( logi0 + exp(log(1 - logi0) + suppressWarnings(dnbinom(0, size = theta, mu = munb, log = TRUE))) ) 31 | logliknegbin = log(1 - logi0) + suppressWarnings(dnbinom(y, size = theta, mu = munb, log = TRUE)) 32 | 33 | # Hilbe formulation 34 | # theta part 35 | # alpha = 1/theta 36 | # m = 1/alpha 37 | # p = 1/(1 + alpha*munb) 38 | 39 | # logliklogit = log( logi0 + (1 - logi0)*(p^m) ) 40 | # logliknegbin = log(1-logi0) + log(gamma(m+y)) - log(gamma(m)) + m*log(p) + y*log(1-p) # gamma(y+1) not needed 41 | 42 | y0 = y == 0 # 0 values 43 | yc = y > 0 # Count part 44 | 45 | loglik = sum(logliklogit[y0]) + sum(logliknegbin[yc]) 46 | -loglik 47 | } 48 | 49 | 50 | #' Get the data 51 | library(haven) 52 | 53 | fish = read_dta("http://www.stata-press.com/data/r11/fish.dta") 54 | 55 | 56 | #' Get starting values or simply do zeros 57 | #' for this function, a named vector for the starting values 58 | #' for zinb: 'logit', 'negbin', 'theta' 59 | init.mod = model.matrix(count ~ persons + livebait, data = fish) # to get X matrix 60 | 61 | startlogi = glm(count == 0 ~ persons + livebait, data = fish, family = "binomial") 62 | startcount = glm(count ~ persons + livebait, data = fish, family = "poisson") 63 | 64 | starts = c( 65 | negbin = coef(startcount), 66 | logit = coef(startlogi), 67 | theta = 1 68 | ) 69 | # starts = c(negbin = rep(0, 3), 70 | # logit = rep(0, 3), 71 | # theta = log(1)) 72 | 73 | 74 | #' Estimate with optim function 75 | optNB1 = optim( 76 | par = starts , 77 | fn = ZINB, 78 | X = init.mod, 79 | y = fish$count, 80 | method = "BFGS", 81 | control = list(maxit = 5000, reltol = 1e-12), 82 | hessian = TRUE 83 | ) 84 | # optNB1 85 | 86 | #' Comparison 87 | # Extract for clean display 88 | B = optNB1$par 89 | se = sqrt(diag(solve((optNB1$hessian)))) 90 | Z = B/se 91 | p = pnorm(abs(Z), lower = FALSE)*2 92 | 93 | # pscl results 94 | library(pscl) 95 | zinbmod1 = zeroinfl(count ~ persons + livebait, data = fish, dist = "negbin") 96 | summary(zinbmod1) 97 | round(data.frame(B, se, Z, p), 4) # note that theta here is actually log(theta) 98 | 99 | 100 | #' Optional data set 101 | 102 | 103 | #' Get the data 104 | data("bioChemists", package = "pscl") 105 | 106 | #' Get starting values or simply do zeros 107 | init.mod = model.matrix(art ~ fem + mar + kid5 + phd + ment, data = bioChemists) # to get X matrix 108 | startlogi = glm(art==0 ~ fem + mar + kid5 + phd + ment, data = bioChemists, family = "binomial") 109 | startcount = glm(art ~ fem + mar + kid5 + phd + ment, data = bioChemists, family = "quasipoisson") 110 | 111 | starts = c( 112 | negbin = coef(startcount), 113 | logit = coef(startlogi), 114 | theta = summary(startcount)$dispersion 115 | ) 116 | # starts = c(negbin = rep(0, 6), 117 | # logit = rep(0, 6), 118 | # theta = 1) 119 | 120 | 121 | #' Estimate with optim function 122 | optNB2 = optim( 123 | par = starts , 124 | fn = ZINB, 125 | X = init.mod, 126 | y = bioChemists$art, 127 | method = "BFGS", 128 | control = list(maxit = 5000, reltol = 1e-12), 129 | hessian = TRUE 130 | ) 131 | # optNB2 132 | 133 | 134 | #' Comparison 135 | # Extract for clean display. 136 | B = optNB2$par 137 | se = sqrt(diag(solve((optNB2$hessian)))) 138 | Z = B/se 139 | p = pnorm(abs(Z), lower = FALSE)*2 140 | 141 | # pscl results 142 | library(pscl) 143 | zinbmod = zeroinfl(art ~ . | ., data = bioChemists, dist = "negbin") 144 | summary(zinbmod) 145 | round(data.frame(B,se, Z, p), 4) -------------------------------------------------------------------------------- /ModelFitting/bivariateProbit.R: -------------------------------------------------------------------------------- 1 | ### Stata's seems to be the primary audience concerned with these models, but I 2 | ### thought I'd play around with one here (I've never had reason to use a probit 3 | ### model in practice). Stata examples come from the UCLA ATS website and the 4 | ### Stata manual so one can investigate the Stata result for comparison. 5 | 6 | 7 | 8 | # standard probit -------------------------------------------------------- 9 | 10 | probitLL = function(beta, X, y){ 11 | mu = X %*% beta 12 | 13 | # these produce identical results, but the second is the typical depiction 14 | # ll = sum(dbinom( 15 | # y, 16 | # size = 1, 17 | # prob = pnorm(mu), 18 | # log = T 19 | # )) 20 | 21 | ll = sum(y * pnorm(mu, log = T) + (1 - y) * log(1 - pnorm(mu))) 22 | 23 | -ll 24 | } 25 | 26 | ### Example 1 27 | 28 | # Example at https://stats.idre.ucla.edu/stata/dae/probit-regression/ 29 | 30 | admit = haven::read_dta('https://stats.idre.ucla.edu/stat/stata/dae/binary.dta') 31 | 32 | head(admit) 33 | 34 | X = model.matrix(admit~ gre + gpa + factor(rank), admit) 35 | y = admit$admit 36 | 37 | init = rep(0, ncol(X)) 38 | 39 | optimResult = optim( 40 | fn = probitLL, 41 | par = init, 42 | X = X, 43 | y = y, 44 | method = 'BFGS' 45 | ) 46 | 47 | optimResult 48 | 49 | 50 | ### Example 2: from Stata manual on standard probit 51 | 52 | # http://www.stata.com/manuals13/rprobit.pdf 53 | # "We have data on the make, weight, and mileage rating of 22 foreign and 52 54 | # domestic automobiles. We wish to fit a probit model explaining whether a car 55 | # is foreign based on its weight and mileage." 56 | 57 | auto = haven::read_dta('http://www.stata-press.com/data/r13/auto.dta') 58 | 59 | head(auto) 60 | 61 | X = model.matrix(foreign~ weight + mpg, auto) 62 | y = auto$foreign 63 | 64 | init = rep(0, ncol(X)) 65 | 66 | optimResult = optim( 67 | fn = probitLL, 68 | par = init, 69 | X = X, 70 | y = y 71 | ) 72 | 73 | optimResult 74 | 75 | 76 | 77 | # Bivariate probit -------------------------------------------------------- 78 | 79 | bivariateProbitLL = function(pars, X, y1, y2) { 80 | rho = pars[1] 81 | mu1 = X %*% pars[2:(ncol(X) + 1)] 82 | mu2 = X %*% pars[(ncol(X) + 2):length(pars)] 83 | q1 = ifelse(y1 == 1, 1,-1) 84 | q2 = ifelse(y2 == 1, 1,-1) 85 | 86 | require(mnormt) 87 | eta1 = q1 * mu1 88 | eta2 = q2 * mu2 89 | 90 | ll = matrix(NA, nrow = nrow(X)) 91 | for (i in 1:length(ll)) { 92 | corr = q1[i] * q2[i] * rho 93 | corr = matrix(c(1, corr, corr, 1), 2) 94 | ll[i] = log( 95 | pmnorm( 96 | x = c(eta1[i], eta2[i]), 97 | mean = c(0, 0), 98 | varcov = corr 99 | ) 100 | ) 101 | } 102 | 103 | # the loop is probably clearer, and there is no difference in time, but here's a oneliner 104 | # ll = mapply(function(e1, e2, q1, q2) log(pmnorm(x=c(e1, e2), varcov = matrix(c(1,q1*q2*rho,q1*q2*rho,1),2))), 105 | # eta1, eta2, q1, q2) 106 | 107 | -sum(ll) 108 | } 109 | 110 | 111 | ### Example 3: from stata manual on bivariate probit 112 | # "We wish to model the bivariate outcomes of whether children attend private 113 | # school and whether the head of the household voted for an increase in property 114 | # tax based on the other covariates." 115 | 116 | school = haven::read_dta('http://www.stata-press.com/data/r13/school.dta') 117 | 118 | head(school) 119 | 120 | X = model.matrix(private ~ years + logptax + loginc, school) 121 | y1 = school$private 122 | y2 = school$vote 123 | 124 | init = c(0, rep(0, ncol(X)*2)) 125 | 126 | # you'll probably get a warning or two, ignore; takes a couple seconds 127 | optimResult = optim( 128 | fn = bivariateProbitLL, 129 | par = init, 130 | X = X, 131 | y1 = y1, 132 | y2 = y2, 133 | method = 'BFGS' 134 | ) 135 | 136 | 137 | loglik = optimResult$value 138 | rho = optimResult$par[1] 139 | coefsPrivate = optimResult$par[2:(ncol(X) + 1)] 140 | coefsVote = optimResult$par[(ncol(X) + 2):length(init)] 141 | names(coefsPrivate) = names(coefsVote) = c('Int', 'years', 'logptax', 'loginc') 142 | 143 | list( 144 | loglik = loglik, 145 | rho = rho, 146 | Private = coefsPrivate, 147 | Vote = coefsVote 148 | ) 149 | 150 | -------------------------------------------------------------------------------- /ModelFitting/convert_code_to_html.R: -------------------------------------------------------------------------------- 1 | 2 | # Preliminaries ----------------------------------------------------------- 3 | 4 | library(tidyverse) 5 | 6 | 7 | fs = list.files('ModelFitting/', pattern = '\\.R$') 8 | 9 | 10 | 11 | # single file 12 | rmarkdown::render( 13 | "ModelFitting/nelder_mead.R", 14 | output_dir = '../m-clark.github.io/docs/models/', 15 | # output_yaml = 'render.yaml', # ignored 16 | # params = list(title = 'blah'), 17 | knit_meta = list(comment = NA) 18 | ) 19 | 20 | 21 | # all in theory, paths issue most likely; not worth the trouble 22 | map(fs, rmarkdown::render, output_dir = '../m-clark.github.io/docs/models/') 23 | 24 | 25 | # selection 26 | fs_select = fs[grepl(fs, pattern = 'gradient')] 27 | 28 | map(paste0('ModelFitting/', fs_select), rmarkdown::render, output_dir = '../m-clark.github.io/docs/models/') 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /ModelFitting/cubicsplines.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "Cubic Spline Model" 3 | #' author: "Michael Clark" 4 | #' date: "" 5 | #' --- 6 | #' 7 | #' 8 | #' See Wood (2017) Generalized Additive Models or my [document](https://m-clark.github.io/generalized-additive-models/). 9 | 10 | library(tidyverse) # for processing and plotting 11 | 12 | #' # Create the data 13 | size = c(1.42,1.58,1.78,1.99,1.99,1.99,2.13,2.13,2.13, 14 | 2.32,2.32,2.32,2.32,2.32,2.43,2.43,2.78,2.98,2.98) 15 | 16 | wear = c(4.0,4.2,2.5,2.6,2.8,2.4,3.2,2.4,2.6,4.8,2.9, 17 | 3.8,3.0,2.7,3.1,3.3,3.0,2.8,1.7) 18 | 19 | x = size - min(size) 20 | x = x / max(x) 21 | d = data.frame(wear, x) 22 | 23 | #' Cubic spline function 24 | rk <- function(x, z) { 25 | ((z-0.5)^2 - 1/12) * ((x-0.5)^2 - 1/12)/4 - 26 | ((abs(x-z)-0.5)^4 - (abs(x-z)-0.5)^2/2 + 7/240) / 24 27 | } 28 | 29 | #' Generate the model matrix. 30 | splX <- function(x, knots) { 31 | q = length(knots) + 2 # number of parameters 32 | n = length(x) # number of observations 33 | X = matrix(1, n, q) # initialized model matrix 34 | X[ ,2] = x # set second column to x 35 | X[ ,3:q] = outer(x, knots, FUN = rk) # remaining to cubic spline basis 36 | X 37 | } 38 | 39 | splS <- function(knots) { 40 | q = length(knots) + 2 41 | S = matrix(0, q, q) # initialize matrix 42 | S[3:q, 3:q] = outer(knots, knots, FUN = rk) # fill in non-zero part 43 | S 44 | } 45 | 46 | #' Matrix square root function. Note that there are various packages with their own. 47 | matSqrt <- function(S) { 48 | d = eigen(S, symmetric = T) 49 | rS = d$vectors %*% diag(d$values^.5) %*% t(d$vectors) 50 | rS 51 | } 52 | 53 | #' Penalized fitting function. 54 | prsFit <- function(y, x, knots, lambda) { 55 | q = length(knots) + 2 # dimension of basis 56 | n = length(x) # number of observations 57 | Xa = rbind(splX(x, knots), matSqrt(splS(knots))*sqrt(lambda)) # augmented model matrix 58 | y[(n+1):(n+q)] = 0 # augment the data vector 59 | 60 | lm(y ~ Xa - 1) # fit and return penalized regression spline 61 | } 62 | 63 | 64 | 65 | #' # Example 1 66 | 67 | 68 | #' Unpenalized 69 | #' 70 | knots = 1:4/5 71 | X = splX(x, knots) # generate model matrix 72 | mod1 = lm(wear ~ X - 1) # fit model 73 | 74 | xp = 0:100/100 # x values for prediction 75 | Xp = splX(xp, knots) # prediction matrix 76 | 77 | 78 | #' Visualize 79 | 80 | ggplot(aes(x = x, y = wear), data = data.frame(x, wear)) + 81 | geom_point(color = "#FF5500") + 82 | geom_line(aes(x = xp, y = Xp %*% coef(mod1)), 83 | data = data.frame(xp, Xp), 84 | color = "#00AAFF") + 85 | labs(x = 'Scaled Engine size', y = 'Wear Index') + 86 | theme_minimal() 87 | 88 | 89 | 90 | #' # Example 2 91 | 92 | 93 | # Add penalty lambda 94 | knots = 1:7/8 95 | d2 = data.frame(x = xp) 96 | 97 | for (i in c(.1, .01, .001, .0001, .00001, .000001)){ 98 | # fit penalized regression 99 | mod2 = prsFit( 100 | y = wear, 101 | x = x, 102 | knots = knots, 103 | lambda = i 104 | ) 105 | # spline choosing lambda 106 | Xp = splX(xp, knots) # matrix to map parameters to fitted values at xp 107 | LP = Xp %*% coef(mod2) 108 | d2[, paste0('lambda = ', i)] = LP[, 1] 109 | } 110 | 111 | #' Examine 112 | # head(d2) 113 | 114 | #' Visualize via ggplot 115 | d3 = d2 %>% 116 | pivot_longer(cols = -x, 117 | names_to = 'lambda', 118 | values_to = 'value') %>% 119 | mutate(lambda = fct_inorder(lambda)) 120 | 121 | ggplot(d3) + 122 | geom_point(aes(x = x, y = wear), col = '#FF5500', data = d) + 123 | geom_line(aes(x = x, y = value), col = "#00AAFF") + 124 | facet_wrap(~lambda) + 125 | theme_minimal() 126 | 127 | -------------------------------------------------------------------------------- /ModelFitting/elm.R: -------------------------------------------------------------------------------- 1 | # A very simple implementation of extreme learning machine for regression for 2 | # demonstration. See elmNN and ELMR for some R package implementations. I add 3 | # comparison to generalized additive models (elm/neural networks and GAMs are 4 | # adaptive basis function models). 5 | 6 | # http://www.extreme-learning-machines.org 7 | # G.-B. Huang, Q.-Y. Zhu and C.-K. Siew, "Extreme Learning Machine: Theory and Applications" 8 | 9 | elm <- function(X, y, n_hidden=NULL, active_fun=tanh) { 10 | # X: an N observations x p features matrix 11 | # y: the target 12 | # n_hidden: the number of hidden nodes 13 | # active_fun: activation function 14 | pp1 = ncol(X) + 1 15 | w0 = matrix(rnorm(pp1*n_hidden), pp1, n_hidden) # random weights 16 | h = active_fun(cbind(1, scale(X)) %*% w0) # compute hidden layer 17 | B = MASS::ginv(h) %*% y # find weights for hidden layer 18 | fit = h %*% B # fitted values 19 | list(fit= fit, loss=crossprod(fit - y), B=B, w0=w0) 20 | } 21 | 22 | 23 | 24 | # one variable, complex function ------------------------------------------- 25 | library(tidyverse); library(mgcv) 26 | set.seed(123) 27 | n = 5000 28 | x = runif(n) 29 | # x = rnorm(n) 30 | mu = sin(2*(4*x-2)) + 2*exp(-(16^2)*((x-.5)^2)) 31 | y = rnorm(n, mu, .3) 32 | # qplot(x, y) 33 | d = data.frame(x,y) 34 | 35 | X_ = as.matrix(x, ncol=1) 36 | 37 | test = elm(X_, y, n_hidden=100) 38 | str(test) 39 | # qplot(x, y) + geom_line(aes(y=test$fit), color='#1e90ff') 40 | cor(test$fit[,1], y)^2 41 | 42 | gam_comparison = gam(y~s(x)) 43 | summary(gam_comparison)$r.sq 44 | 45 | 46 | d %>% 47 | mutate(fit_elm = test$fit, 48 | fit_gam = fitted(gam_comparison)) %>% 49 | ggplot() + 50 | geom_point(aes(x, y), alpha=.1) + 51 | geom_line(aes(x, y=fit_elm), color='#1e90ff') + 52 | geom_line(aes(x, y=fit_gam), color='darkred') 53 | 54 | 55 | 56 | 57 | # motorcycle accident data ------------------------------------------------ 58 | 59 | data('mcycle', package='MASS') 60 | x = mcycle[,1] 61 | X_ = matrix(x, ncol=1) 62 | y = mcycle[,2] 63 | 64 | test = elm(X_, y, n_hidden=100) 65 | cor(test$fit[,1], y)^2 66 | 67 | gam_comparison = gam(y~s(x)) 68 | summary(gam_comparison)$r.sq 69 | 70 | qplot(x, y) + 71 | geom_line(aes(y=test$fit), color='#1e90ff') + 72 | geom_line(aes(y=fitted(gam_comparison)), color='darkred') 73 | 74 | 75 | 76 | 77 | # add covariates ---------------------------------------------------------- 78 | 79 | d = gamSim(eg=7, n=10000) 80 | X_ = as.matrix(d[,2:5]) 81 | y = d[,1] 82 | 83 | n_nodes = c(10, 25, 100, 250, 500, 1000) 84 | test = lapply(n_nodes, function(n) elm(X_, y, n_hidden=n)) # this will take a few seconds 85 | final_n = which.min(sapply(test, function(x) x$loss)) 86 | best = test[[final_n]] 87 | # str(best) 88 | qplot(best$fit[,1], y, alpha=.2) 89 | cor(best$fit[,1], y)^2 90 | 91 | gam_comparison = gam(y~s(x0) + s(x1) + s(x2) + s(x3), data=d) 92 | gam.check(gam_comparison) 93 | summary(gam_comparison)$r.sq 94 | 95 | 96 | test_data0 = gamSim(eg=7) # default n = 400 97 | test_data = cbind(1, scale(test_data0[,2:5])) 98 | 99 | elm_prediction = tanh(test_data %*% best$w0) %*% best$B # remember to use your specific activation function here 100 | gam_prediction = predict(gam_comparison, newdata=test_data0) 101 | cor(data.frame(elm_prediction, gam_prediction), test_data0$y)^2 102 | -------------------------------------------------------------------------------- /ModelFitting/gp Examples/README.md: -------------------------------------------------------------------------------- 1 | Since I first did the code for these models, the Stan group has done some notable improvement in this area, both in terms of the language itself, the example chapter for Gaussian processes, and even implementing a specific function for the squared exponential kernel. While the offerings here may still be of use, they may also be dated, so check the recent Stan manual before getting too carried away. 2 | -------------------------------------------------------------------------------- /ModelFitting/gp Examples/gaussSample.m: -------------------------------------------------------------------------------- 1 | function S = gaussSample(arg1, arg2, arg3) 2 | % Returns n samples (in the rows) from a multivariate Gaussian distribution 3 | % 4 | % Examples: 5 | % S = gaussSample(mu, Sigma, 10) 6 | % S = gaussSample(model, 100) 7 | % S = gaussSample(struct('mu',[0], 'Sigma', eye(1)), 3) 8 | 9 | % This file is from pmtk3.googlecode.com 10 | 11 | 12 | switch nargin 13 | case 3, mu = arg1; Sigma = arg2; n = arg3; 14 | case 2, model = arg1; mu = model.mu; Sigma = model.Sigma; n = arg2; 15 | case 1, model = arg1; mu = model.mu; Sigma = model.Sigma; n = 1; 16 | otherwise 17 | error('bad num args') 18 | end 19 | 20 | A = chol(Sigma, 'lower'); 21 | Z = randn(length(mu), n); 22 | S = bsxfun(@plus, mu(:), A*Z)'; 23 | 24 | 25 | end 26 | -------------------------------------------------------------------------------- /ModelFitting/gp Examples/gaussianProcessStan.R: -------------------------------------------------------------------------------- 1 | # Data and parameter setup ------------------------------------------------ 2 | 3 | # Data 4 | set.seed(1234) 5 | N = 20 6 | Ntest = 200 7 | x = rnorm(N, sd=1) 8 | y = scale(sin(x) + rnorm(N, sd=.1))[,1] 9 | xtest = seq(min(x)-1, max(x)+1, l=Ntest) 10 | plot(x,y, pch=19, col='#ff5500') 11 | 12 | # parameters 13 | eta_sq = 1 14 | rho_sq = 1 15 | sigma_sq = .1 16 | 17 | # Covariance function same as implemented in the Stan code. 18 | Kfn <- function (x, eta_sq, rho_sq, sigma_sq) { 19 | N = length(x) 20 | Sigma = matrix(NA, N, N) 21 | 22 | # off diag elements 23 | for (i in 1:(N-1)) { 24 | for (j in (i+1):N) { 25 | Sigma[i,j] <- eta_sq * exp(-rho_sq * (x[i] - x[j])^2); 26 | Sigma[j,i] <- Sigma[i,j]; 27 | } 28 | } 29 | 30 | # diagonal elements 31 | for (k in 1:N) 32 | Sigma[k,k] <- eta_sq + sigma_sq; # + jitter 33 | Sigma 34 | } 35 | 36 | 37 | 38 | # Vis prior functions ----------------------------------------------------- 39 | xinit = seq(-5,5,.2) 40 | xprior = MASS::mvrnorm(3, 41 | mu=rep(0, length(xinit)), 42 | Sigma=Kfn(x=xinit, 43 | eta_sq = eta_sq, 44 | rho_sq = rho_sq, 45 | sigma_sq = sigma_sq)) 46 | 47 | 48 | library(reshape2) 49 | gdat = melt(data.frame(x=xinit, y=t(xprior)), id='x') 50 | 51 | library(ggvis) 52 | gdat %>% 53 | ggvis(~x, ~value) %>% 54 | group_by(variable) %>% 55 | layer_paths(strokeOpacity:=.5) %>% 56 | add_axis('x', grid=F) %>% 57 | add_axis('y', grid=F) 58 | 59 | 60 | 61 | # Stan model code --------------------------------------------------------- 62 | # models/covariance functions available 63 | # gpStanModelCode_generalizedSquaredExponential.stan 64 | # gpStanModelCode_gammaExponential.stan 65 | # gpStanModelCode_rationalQuadratic.stan 66 | 67 | gp = 'ModelFitting/gp Examples/gpStanModelCode_generalizedSquaredExponential.stan' 68 | 69 | # Compile Check ----------------------------------------------------------- 70 | 71 | standata = list(N=N, x=x, y=y, xtest=xtest, Ntest=200) 72 | 73 | 74 | library(rstan) 75 | fit0 = stan(data=standata, file = gp, iter = 1, chains=1) 76 | 77 | 78 | 79 | # Main Run ---------------------------------------------------------------- 80 | 81 | iterations = 12000 82 | wu = 2000 83 | th = 20 84 | chains = 4 85 | 86 | 87 | # With N = 20 Ntest = 200 takes about 2 min for gen squared expo, 10+min for 300 88 | 89 | p = proc.time() 90 | fit = stan(data=standata, file=gp, iter = iterations, warmup = wu, thin=th, 91 | chains=chains, fit = fit0, cores=chains) 92 | (proc.time() - p)/3600 93 | 94 | 95 | 96 | # Summarize and Vis ------------------------------------------------------- 97 | 98 | # takes a bit to print 99 | # print(fit, par=c('eta_sq','rho_sq','sigma_sq')) 100 | 101 | # library(shinyStan) 102 | # launch_shinystan(fit) 103 | 104 | 105 | # Extract and visualize posterior predictive draws 106 | yRep = extract(fit, 'yRep')$yRep 107 | 108 | gdat = data.frame(x,y) 109 | gdat2 = melt(data.frame(x = sort(xtest), y=t(yRep[sample(2000, 3),])), id='x') 110 | 111 | gdat2 %>% 112 | ggvis(~x, ~value) %>% 113 | group_by(variable) %>% 114 | layer_paths(strokeOpacity:=.25) %>% 115 | layer_points(x=~x, y=~y, fill:='#ff5500', data=gdat) %>% 116 | add_axis('x', grid=F) %>% 117 | add_axis('y', grid=F) 118 | 119 | 120 | # Visualize fit 121 | yRepMean = get_posterior_mean(fit, 'yRep')[,5] 122 | quantiles = data.frame(t(apply(yRep, 2, quantile, p=c(.025,.975)))); colnames(quantiles) = c('ll','ul') 123 | 124 | gdat3 = data.frame(x = sort(xtest), y=yRepMean, quantiles) 125 | ttle = stringr::str_extract(gp, "(?<=_)(.*)(?=\\.)") 126 | 127 | library(kernlab) 128 | comparisonModel = gausspr(x, y, kernel=rbfdot(sigma=1), var=.1, scaled=F, tol=1e-5) 129 | comparisonY = predict(comparisonModel, newdata=xtest) 130 | 131 | library(mgcv) 132 | comparisonModel2 = gam(y~s(x, bs='gp', m=c(2,2☻,2))) 133 | comparisonY2 = predict(comparisonModel2, newdata=data.frame(x=xtest)) 134 | 135 | gdat3 %>% 136 | ggvis(~x, ~y) %>% 137 | layer_ribbons(y=~ll, y2=~ul, fillOpacity:=.1) %>% 138 | layer_paths(strokeOpacity:=.5, stroke:='blue') %>% 139 | layer_paths(y=~comparisonY, strokeOpacity:=.5, stroke:='red') %>% 140 | layer_paths(y=~comparisonY2, strokeOpacity:=.5, stroke:='green') %>% 141 | layer_points(x=~x, y=~y, fill:='#ff5500', fillOpacity:=.5, data=gdat) %>% 142 | add_axis('x', grid=F) %>% 143 | add_axis("x", orient = "top", ticks = 0, title = ttle, 144 | properties = axis_props( 145 | axis = list(stroke = "white"), 146 | labels = list(fontSize = 0))) %>% 147 | add_axis('y', grid=F) 148 | 149 | 150 | -------------------------------------------------------------------------------- /ModelFitting/gp Examples/gpStanModelCode.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; # initial sample size 3 | vector[N] x; # covariate 4 | vector[N] y; # target 5 | int Ntest; # prediction set sample size 6 | vector[Ntest] xtest; # prediction values for covariate 7 | } 8 | 9 | transformed data { 10 | vector[N] mu; 11 | 12 | mu <- rep_vector(0, N); # mean function 13 | } 14 | 15 | parameters { 16 | real eta_sq; # parameters of squared exponential covariance function 17 | real inv_rho_sq; # rho the length scale 18 | real sigma_sq; # eta_sq + sigma_sq = var of predicted y; eta_sq is variance explained by the function 19 | 20 | real eta_sq_scale; # hyperpriors 21 | real inv_rho_sq_scale; 22 | real sigma_sq_scale; 23 | } 24 | 25 | model { 26 | matrix[N,N] Sigma; 27 | 28 | # off-diagonal elements for covariance matrix 29 | for (i in 1:(N-1)) { 30 | for (j in (i+1):N) { 31 | Sigma[i,j] <- eta_sq * exp( - pow(x[i] - x[j],2) / inv_rho_sq ); 32 | Sigma[j,i] <- Sigma[i,j]; 33 | } 34 | } 35 | 36 | # diagonal elements 37 | for (k in 1:N) 38 | Sigma[k,k] <- eta_sq + sigma_sq; # + jitter for pos def 39 | 40 | # hyperpriors 41 | eta_sq_scale ~ exponential(.2); 42 | inv_rho_sq_scale ~ exponential(.2); 43 | sigma_sq_scale ~ exponential(.2); 44 | 45 | # priors 46 | eta_sq ~ cauchy(0, eta_sq_scale); 47 | inv_rho_sq ~ cauchy(0, inv_rho_sq_scale); 48 | sigma_sq ~ cauchy(0, sigma_sq_scale); 49 | 50 | # sampling distribution 51 | y ~ multi_normal(mu, Sigma); 52 | } 53 | 54 | generated quantities { 55 | vector[Ntest] muTest; # The following produces the posterior predictive draws 56 | vector[Ntest] yRep; # see GP section of Stan man- 'Analytical Form...' 57 | matrix[Ntest,Ntest] L; 58 | { 59 | matrix[N,N] Sigma; 60 | matrix[Ntest,Ntest] Omega; 61 | matrix[N,Ntest] K; 62 | matrix[Ntest,N] K_transpose_div_Sigma; 63 | matrix[Ntest,Ntest] Tau; 64 | 65 | # K all elements 66 | for (i in 1:N) 67 | for (j in 1:Ntest) 68 | K[i,j] <- eta_sq * exp(-pow(x[i] - xtest[j], 2)/inv_rho_sq); 69 | 70 | # Sigma off-diagonal elements 71 | for (i in 1:(N-1)) { 72 | for (j in (i+1):N) { 73 | Sigma[i,j] <- eta_sq * exp(- pow(x[i] - x[j],2)/inv_rho_sq); 74 | Sigma[j,i] <- Sigma[i,j]; 75 | } 76 | } 77 | 78 | #Omega off-diagonal elements 79 | for (i in 1:(Ntest-1)) { 80 | for (j in (i+1):Ntest) { 81 | Omega[i,j] <- eta_sq * exp(- pow(xtest[i] - xtest[j],2)/inv_rho_sq); 82 | Omega[j,i] <- Omega[i,j]; 83 | } 84 | } 85 | 86 | #Sigma diagonal elements 87 | for (k in 1:N) 88 | Sigma[k,k] <- eta_sq + sigma_sq; # + jitter for pos def 89 | 90 | #Omega diagonal elements 91 | for (k in 1:Ntest) 92 | Omega[k,k] <- eta_sq + sigma_sq; # + jitter for pos def 93 | 94 | K_transpose_div_Sigma <- K' / Sigma; 95 | muTest <- K_transpose_div_Sigma * y; 96 | Tau <- Omega - K_transpose_div_Sigma * K; 97 | 98 | for (i in 1:(Ntest-1)) 99 | for (j in (i+1):Ntest) 100 | Tau[i,j] <- Tau[j,i]; 101 | 102 | L <- cholesky_decompose(Tau); 103 | } 104 | 105 | yRep <- multi_normal_cholesky_rng(muTest, L); 106 | } 107 | -------------------------------------------------------------------------------- /ModelFitting/gp Examples/gpStanModelCode_gammaExponential.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; # initial sample size 3 | vector[N] x; # covariate 4 | vector[N] y; # target 5 | int Ntest; # prediction set sample size 6 | vector[Ntest] xtest; # prediction values for covariate 7 | } 8 | 9 | transformed data { 10 | vector[N] mu; 11 | 12 | mu <- rep_vector(0, N); # mean function 13 | } 14 | 15 | parameters { 16 | real eta_sq; # parameters of squared exponential covariance function 17 | real l_; 18 | real sigma_sq; 19 | real gamma; 20 | 21 | real eta_sq_scale; # hyperpriors 22 | real l_scale; 23 | real sigma_sq_scale; 24 | } 25 | 26 | transformed parameters { 27 | } 28 | 29 | model { 30 | matrix[N,N] Sigma; 31 | 32 | # off-diagonal elements for covariance matrix 33 | for (i in 1:(N-1)) { 34 | for (j in (i+1):N) { 35 | Sigma[i,j] <- eta_sq * exp(-pow(fabs(x[i] - x[j])/l_, gamma)); 36 | Sigma[j,i] <- Sigma[i,j]; 37 | } 38 | } 39 | 40 | # diagonal elements 41 | for (k in 1:N) 42 | Sigma[k,k] <- eta_sq + sigma_sq; # + jitter for pos def 43 | 44 | # hyperpriors 45 | eta_sq_scale ~ exponential(.2); 46 | l_scale ~ exponential(.2); 47 | sigma_sq_scale ~ exponential(.2); 48 | 49 | # priors 50 | eta_sq ~ cauchy(0, eta_sq_scale); 51 | l_ ~ cauchy(0, l_scale); 52 | sigma_sq ~ cauchy(0, sigma_sq_scale); 53 | gamma ~ cauchy(0, 1); 54 | 55 | # sampling distribution 56 | y ~ multi_normal(mu, Sigma); 57 | } 58 | 59 | generated quantities { 60 | vector[Ntest] muTest; # The following produces the posterior predictive draws 61 | vector[Ntest] yRep; # see GP section of Stan man- 'Analytical Form...' 62 | matrix[Ntest,Ntest] L; 63 | { 64 | matrix[N, N] Sigma; 65 | matrix[Ntest, Ntest] Omega; 66 | matrix[N,Ntest] K; 67 | matrix[Ntest,N] K_transpose_div_Sigma; 68 | matrix[Ntest, Ntest] Tau; 69 | 70 | # Sigma 71 | for (i in 1:N) 72 | for (j in 1:N) 73 | Sigma[i,j] <- eta_sq * exp(-pow(fabs(x[i] - x[j])/l_, gamma)) + if_else(i==j, sigma_sq, 0.0); 74 | 75 | # Omega 76 | for (i in 1:Ntest) 77 | for (j in 1:Ntest) 78 | Omega[i,j] <- eta_sq * exp(-pow(fabs(xtest[i] - xtest[j])/l_, gamma)) + if_else(i==j, sigma_sq, 0.0); 79 | 80 | # K 81 | for (i in 1:N) 82 | for (j in 1:Ntest) 83 | K[i,j] <- eta_sq * exp(-pow(fabs(x[i] - xtest[j])/l_, gamma)); 84 | 85 | K_transpose_div_Sigma <- K' / Sigma; 86 | muTest <- K_transpose_div_Sigma * y; 87 | Tau <- Omega - K_transpose_div_Sigma * K; 88 | 89 | for (i in 1:(Ntest-1)) 90 | for (j in (i+1):Ntest) 91 | Tau[i,j] <- Tau[j,i]; 92 | 93 | L <- cholesky_decompose(Tau); 94 | } 95 | 96 | yRep <- multi_normal_cholesky_rng(muTest, L); 97 | } -------------------------------------------------------------------------------- /ModelFitting/gp Examples/gpStanModelCode_generalizedSquaredExponential.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; # initial sample size 3 | vector[N] x; # covariate 4 | vector[N] y; # target 5 | int Ntest; # prediction set sample size 6 | vector[Ntest] xtest; # prediction values for covariate 7 | } 8 | 9 | transformed data { 10 | vector[N] mu; 11 | 12 | mu <- rep_vector(0, N); # mean function 13 | } 14 | 15 | parameters { 16 | real eta_sq; # parameters of squared exponential covariance function 17 | real inv_rho_sq; # rho the length scale 18 | real sigma_sq; # eta_sq + sigma_sq = var of predicted y; eta_sq is variance explained by the function 19 | 20 | real eta_sq_scale; # hyperpriors 21 | real inv_rho_sq_scale; 22 | real sigma_sq_scale; 23 | } 24 | 25 | transformed parameters { 26 | real rho_sq; 27 | rho_sq <- inv(inv_rho_sq); 28 | } 29 | 30 | model { 31 | matrix[N,N] Sigma; 32 | 33 | # off-diagonal elements for covariance matrix 34 | for (i in 1:(N-1)) { 35 | for (j in (i+1):N) { 36 | Sigma[i,j] <- eta_sq * exp(-rho_sq * pow(x[i] - x[j],2)); 37 | Sigma[j,i] <- Sigma[i,j]; 38 | } 39 | } 40 | 41 | # diagonal elements 42 | for (k in 1:N) 43 | Sigma[k,k] <- eta_sq + sigma_sq; # + jitter for pos def 44 | 45 | # hyperpriors 46 | eta_sq_scale ~ exponential(.2); 47 | inv_rho_sq_scale ~ exponential(.2); 48 | sigma_sq_scale ~ exponential(.2); 49 | 50 | # priors 51 | eta_sq ~ cauchy(0, eta_sq_scale); 52 | inv_rho_sq ~ cauchy(0, inv_rho_sq_scale); 53 | sigma_sq ~ cauchy(0, sigma_sq_scale); 54 | 55 | # sampling distribution 56 | y ~ multi_normal(mu, Sigma); 57 | } 58 | 59 | generated quantities { 60 | vector[Ntest] muTest; # The following produces the posterior predictive draws 61 | vector[Ntest] yRep; # see GP section of Stan man- 'Analytical Form...' 62 | matrix[Ntest,Ntest] L; 63 | { 64 | matrix[N,N] Sigma; 65 | matrix[Ntest,Ntest] Omega; 66 | matrix[N,Ntest] K; 67 | matrix[Ntest,N] K_transpose_div_Sigma; 68 | matrix[Ntest,Ntest] Tau; 69 | 70 | # Sigma 71 | for (i in 1:N) 72 | for (j in 1:N) 73 | Sigma[i,j] <- eta_sq * exp(-pow(x[i] - x[j], 2)) + if_else(i==j, sigma_sq, 0.0); 74 | 75 | # Omega 76 | for (i in 1:Ntest) 77 | for (j in 1:Ntest) 78 | Omega[i,j] <- eta_sq * exp(-pow(xtest[i] - xtest[j], 2)) + if_else(i==j, sigma_sq, 0.0); 79 | 80 | # K 81 | for (i in 1:N) 82 | for (j in 1:Ntest) 83 | K[i,j] <- eta_sq * exp(-pow(x[i] - xtest[j], 2)); 84 | 85 | K_transpose_div_Sigma <- K' / Sigma; 86 | muTest <- K_transpose_div_Sigma * y; 87 | Tau <- Omega - K_transpose_div_Sigma * K; 88 | 89 | for (i in 1:(Ntest-1)) 90 | for (j in (i+1):Ntest) 91 | Tau[i,j] <- Tau[j,i]; 92 | 93 | L <- cholesky_decompose(Tau); 94 | } 95 | 96 | yRep <- multi_normal_cholesky_rng(muTest, L); 97 | 98 | } -------------------------------------------------------------------------------- /ModelFitting/gp Examples/gpStanModelCode_rationalQuadratic.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; # initial sample size 3 | vector[N] x; # covariate 4 | vector[N] y; # target 5 | int Ntest; # prediction set sample size 6 | vector[Ntest] xtest; # prediction values for covariate 7 | } 8 | 9 | transformed data { 10 | vector[N] mu; 11 | 12 | mu <- rep_vector(0, N); # mean function 13 | } 14 | 15 | parameters { 16 | real alpha; # parameters of rational quadratic covariance function 17 | real l_; 18 | real eta_sq; 19 | real sigma_sq; 20 | 21 | real alpha_scale; # hyperpriors 22 | real l_scale; 23 | real eta_sq_scale; 24 | real sigma_sq_scale; 25 | } 26 | 27 | transformed parameters { 28 | } 29 | 30 | model { 31 | matrix[N,N] Sigma; 32 | 33 | # off-diagonal elements for covariance matrix 34 | for (i in 1:(N-1)) { 35 | for (j in (i+1):N) { 36 | Sigma[i,j] <- eta_sq * (1 + (1/(2*alpha)) * pow(fabs(x[i] - x[j]) /l_, 2))^-alpha; 37 | Sigma[j,i] <- Sigma[i,j]; 38 | } 39 | } 40 | 41 | # diagonal elements 42 | for (k in 1:N) 43 | Sigma[k,k] <- eta_sq + sigma_sq; # + jitter for pos def 44 | 45 | # hyperpriors 46 | eta_sq_scale ~ exponential(.2); 47 | l_scale ~ exponential(.2); 48 | sigma_sq_scale ~ exponential(.2); 49 | alpha_scale ~ exponential(.2); 50 | 51 | # priors 52 | eta_sq ~ cauchy(0, eta_sq_scale); 53 | l_ ~ cauchy(0, l_scale); 54 | sigma_sq ~ cauchy(0, sigma_sq_scale); 55 | alpha ~ cauchy(0, alpha_scale); 56 | 57 | # sampling distribution 58 | y ~ multi_normal(mu, Sigma); 59 | } 60 | 61 | generated quantities { 62 | vector[Ntest] muTest; # The following produces the posterior predictive draws 63 | vector[Ntest] yRep; # see GP section of Stan man- 'Analytical Form...' 64 | matrix[Ntest,Ntest] L; 65 | { 66 | matrix[N, N] Sigma; 67 | matrix[Ntest, Ntest] Omega; 68 | matrix[N,Ntest] K; 69 | matrix[Ntest,N] K_transpose_div_Sigma; 70 | matrix[Ntest, Ntest] Tau; 71 | 72 | # Sigma 73 | for (i in 1:N) 74 | for (j in 1:N) 75 | Sigma[i,j] <- eta_sq * (1 + (1/(2*alpha)) * pow(fabs(x[i] - x[j]) /l_, 2))^-alpha + if_else(i==j, sigma_sq, 0.0); 76 | 77 | # Omega 78 | for (i in 1:Ntest) 79 | for (j in 1:Ntest) 80 | Omega[i,j] <- eta_sq * (1 + (1/(2*alpha)) * pow(fabs(xtest[i] - xtest[j]) /l_, 2))^-alpha + if_else(i==j, sigma_sq, 0.0); 81 | 82 | # K 83 | for (i in 1:N) 84 | for (j in 1:Ntest) 85 | K[i,j] <- eta_sq * (1 + (1/(2*alpha)) * pow(fabs(x[i] - xtest[j]) /l_, 2))^-alpha; 86 | 87 | K_transpose_div_Sigma <- K' / Sigma; 88 | muTest <- K_transpose_div_Sigma * y; 89 | Tau <- Omega - K_transpose_div_Sigma * K; 90 | 91 | for (i in 1:(Ntest-1)) 92 | for (j in (i+1):Ntest) 93 | Tau[i,j] <- Tau[j,i]; 94 | 95 | L <- cholesky_decompose(Tau); 96 | } 97 | 98 | yRep <- multi_normal_cholesky_rng(muTest, L); 99 | } -------------------------------------------------------------------------------- /ModelFitting/gp Examples/gpStan_squaredExponentialFactorAnalysis.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; # initial sample size 3 | int D; # number of covariates 4 | matrix[N,D] X; # covariate matrix 5 | vector[N] y; # target 6 | int Ntest; # prediction set sample size 7 | matrix[Ntest,D] Xtest; # prediction values for covariate 8 | int K; # number of factors 9 | } 10 | 11 | transformed data { 12 | vector[N] mu; 13 | 14 | mu <- rep_vector(0, N); # mean function 15 | } 16 | 17 | parameters { 18 | real eta_sq; # parameters of squared exponential covariance function 19 | real sigma_sq; # eta_sq + sigma_sq = var of predicted y; eta_sq is variance explained by the function so can put an upper limit 20 | vector[D] l_sq; # characteristic length 21 | real lambda; # factor loadings, for this problem a single value, as one of them will be fixed to 1 for identification purposes 22 | 23 | // real eta_sq_scale; # hyperpriors 24 | // real sigma_sq_scale; 25 | // real l_sq_scale; 26 | } 27 | 28 | transformed parameters { 29 | vector[D] inv_l_sq; 30 | matrix[D,K] L; 31 | 32 | L[1,1] <- 1; 33 | L[2,1] <- lambda; 34 | 35 | for (d in 1:D) inv_l_sq[d] <- 1/l_sq[d]; 36 | } 37 | 38 | model { 39 | matrix[N,N] Sigma; 40 | 41 | 42 | # off-diagonal elements for covariance matrix 43 | for (i in 1:(N-1)) { 44 | for (j in (i+1):N) { 45 | Sigma[i,j] <- eta_sq * exp(-.5 * quad_form(tcrossprod(L) + diag_matrix(inv_l_sq), to_vector(X[i] - X[j]))); 46 | Sigma[j,i] <- Sigma[i,j]; 47 | } 48 | } 49 | 50 | # diagonal elements 51 | for (k in 1:N) 52 | Sigma[k,k] <- eta_sq + sigma_sq; # + jitter for pos def 53 | 54 | # hyperpriors 55 | // eta_sq_scale ~ exponential(.2); 56 | // sigma_sq_scale ~ exponential(.2); 57 | // l_sq_scale ~ exponential(.2); 58 | 59 | # priors 60 | eta_sq ~ cauchy(0, 5); 61 | sigma_sq ~ cauchy(0, 5); 62 | l_sq ~ cauchy(0, 5); 63 | lambda ~ normal(0, 1); 64 | 65 | # sampling distribution 66 | y ~ multi_normal(mu, Sigma); 67 | } 68 | 69 | generated quantities { 70 | vector[Ntest] muTest; # The following produces the posterior predictive draws 71 | vector[Ntest] yRep; # see GP section of Stan man- 'Analytical Form...' 72 | matrix[Ntest,Ntest] Q; 73 | { 74 | matrix[N,N] Sigma; 75 | matrix[Ntest,Ntest] Omega; 76 | matrix[N,Ntest] K_; 77 | matrix[Ntest,N] K_transpose_div_Sigma; 78 | matrix[Ntest,Ntest] Tau; 79 | 80 | # Sigma 81 | for (i in 1:N) 82 | for (j in 1:N) 83 | Sigma[i,j] <- eta_sq * exp(-.5 * quad_form(tcrossprod(L) + diag_matrix(inv_l_sq), to_vector(X[i] - X[j]))) + if_else(i==j, sigma_sq, 0.0); 84 | 85 | # Omega 86 | for (i in 1:Ntest) 87 | for (j in 1:Ntest) 88 | Omega[i,j] <- eta_sq * exp(-.5 * quad_form(tcrossprod(L) + diag_matrix(inv_l_sq), to_vector(Xtest[i] - Xtest[j]))) + if_else(i==j, sigma_sq, 0.0); 89 | 90 | # K 91 | for (i in 1:N) 92 | for (j in 1:Ntest) 93 | K_[i,j] <- eta_sq * exp(-.5 * quad_form(tcrossprod(L) + diag_matrix(inv_l_sq), to_vector(X[i] - Xtest[j]))); 94 | 95 | K_transpose_div_Sigma <- K_' / Sigma; 96 | muTest <- K_transpose_div_Sigma * y; 97 | Tau <- Omega - K_transpose_div_Sigma * K_; 98 | 99 | for (i in 1:(Ntest-1)) 100 | for (j in (i+1):Ntest) 101 | Tau[i,j] <- Tau[j,i]; 102 | 103 | Q <- cholesky_decompose(Tau); 104 | } 105 | 106 | yRep <- multi_normal_cholesky_rng(muTest, Q); 107 | } -------------------------------------------------------------------------------- /ModelFitting/gp Examples/gprDemoChangeHparams.m: -------------------------------------------------------------------------------- 1 | %% Visualize the effect of change the hyper-params for a 1d GP regression 2 | % based on demo_gpr by Carl Rasmussen 3 | % 4 | %% Generate data 5 | 6 | % This file is from pmtk3.googlecode.com 7 | 8 | n = 20; 9 | rand('state',18); 10 | randn('state',20); 11 | covfunc = {'covSum', {'covSEiso','covNoise'}}; 12 | loghyper = [log(1.0); log(1.0); log(0.1)]; 13 | x = 15*(rand(n,1)-0.5); 14 | y = chol(feval(covfunc{:}, loghyper, x))'*randn(n,1); % Cholesky decomp. 15 | 16 | xstar = linspace(-7.5, 7.5, 201)'; 17 | 18 | hyps = [log(1), log(1), log(0.1);... 19 | log(0.3),log(1.08),log(0.00005);... 20 | log(3),log(1.16),log(0.89)]; 21 | 22 | %% compute post pred and plot marginals 23 | for i=1:size(hyps,1) 24 | loghyper = hyps(i,:)'; 25 | [mu, S2] = gpr(loghyper, covfunc, x, y, xstar); 26 | S2 = S2 - exp(2*loghyper(3)); % remove observation noise 27 | 28 | figure; 29 | f = [mu+2*sqrt(S2);flipdim(mu-2*sqrt(S2),1)]; 30 | fill([xstar; flipdim(xstar,1)], f, [7 7 7]/8, 'EdgeColor', [7 7 7]/8); 31 | hold on 32 | plot(xstar,mu,'k-','LineWidth',2); 33 | plot(x, y, 'k+', 'MarkerSize', 17); 34 | axis([-8 8 -3 3]) 35 | printPmtkFigure(sprintf('gprDemoChangeHparams%d', i)); 36 | end 37 | -------------------------------------------------------------------------------- /ModelFitting/gp Examples/gprDemoNoiseFree.m: -------------------------------------------------------------------------------- 1 | %% Reproduce figure 2.2 from GP book 2 | % 3 | %% 4 | 5 | % This file is from pmtk3.googlecode.com 6 | 7 | setSeed(0); 8 | L = 1; 9 | xs = (-5:0.2:5)'; 10 | ns = length(xs); 11 | keps = 1e-8; 12 | muFn = @(x) 0*x(:).^2; 13 | Kfn = @(x,z) 1*exp(-sq_dist(x'/L,z'/L)/2); 14 | 15 | 16 | % plot sampled functions from the prior 17 | figure; hold on 18 | for i=1:3 19 | model = struct('mu', muFn(xs), 'Sigma', Kfn(xs, xs) + 1e-15*eye(size(xs, 1))); 20 | fs = gaussSample(model, 1); 21 | plot(xs, fs, 'k-', 'linewidth', 2) 22 | end 23 | printPmtkFigure('gprDemoNoiseFreePrior') 24 | 25 | 26 | % generate noise-less training data 27 | Xtrain = [-4, -3, -2, -1, 1]'; 28 | ftrain = sin(Xtrain); 29 | 30 | % compute posterior predictive 31 | K = Kfn(Xtrain, Xtrain); % K 32 | Ks = Kfn(Xtrain, xs); %K_* 33 | Kss = Kfn(xs, xs) + keps*eye(length(xs)); % K_** (keps is essential!) 34 | Ki = inv(K); 35 | postMu = muFn(xs) + Ks'*Ki*(ftrain - muFn(Xtrain)); 36 | postCov = Kss - Ks'*Ki*Ks; 37 | 38 | figure; hold on 39 | % plot marginal posterior variance as gray band 40 | mu = postMu(:); 41 | S2 = diag(postCov); 42 | f = [mu+2*sqrt(S2);flipdim(mu-2*sqrt(S2),1)]; 43 | fill([xs; flipdim(xs,1)], f, [7 7 7]/8, 'EdgeColor', [7 7 7]/8); 44 | 45 | % plot samples from posterior predictive 46 | for i=1:3 47 | model = struct('mu', postMu(:)', 'Sigma', postCov); 48 | fs = gaussSample(model, 1); 49 | plot(xs, fs, 'k-', 'linewidth', 2) 50 | h=plot(Xtrain, ftrain, 'kx', 'markersize', 12, 'linewidth', 3); 51 | end 52 | printPmtkFigure('gprDemoNoiseFreePost') 53 | 54 | -------------------------------------------------------------------------------- /ModelFitting/gp Examples/stangp.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/gp Examples/stangp.RData -------------------------------------------------------------------------------- /ModelFitting/gradient_descent.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "Gradient Descent" 3 | #' author: "Michael Clark" 4 | #' css: '../other.css' 5 | #' highlight: pygments 6 | #' date: "" 7 | #' --- 8 | 9 | #' Gradient descent for a standard linear regression model. The function takes 10 | #' arguments starting points for the parameters to be estimated, a tolerance or 11 | #' maximum iteration value to provide a stopping point, stepsize (or starting 12 | #' stepsize for adaptive approach), whether to print out iterations, and whether 13 | #' to plot the loss over each iteration. 14 | #' 15 | #' 16 | #' 17 | #' # Data Setup 18 | #' 19 | #' Create some basic data for standard regression. 20 | 21 | set.seed(8675309) 22 | 23 | n = 1000 24 | x1 = rnorm(n) 25 | x2 = rnorm(n) 26 | y = 1 + .5*x1 + .2*x2 + rnorm(n) 27 | X = cbind(Intercept = 1, x1, x2) # model matrix 28 | 29 | 30 | 31 | #' # Gradient Descent Algorithm 32 | 33 | 34 | gd = function( 35 | par, 36 | X, 37 | y, 38 | tolerance = 1e-3, 39 | maxit = 1000, 40 | stepsize = 1e-3, 41 | adapt = FALSE, 42 | verbose = TRUE, 43 | plotLoss = TRUE 44 | ) { 45 | 46 | # initialize 47 | beta = par; names(beta) = colnames(X) 48 | loss = crossprod(X %*% beta - y) 49 | tol = 1 50 | iter = 1 51 | 52 | while(tol > tolerance && iter < maxit){ 53 | 54 | LP = X %*% beta 55 | grad = t(X) %*% (LP - y) 56 | betaCurrent = beta - stepsize * grad 57 | tol = max(abs(betaCurrent - beta)) 58 | beta = betaCurrent 59 | loss = append(loss, crossprod(LP - y)) 60 | iter = iter + 1 61 | 62 | if (adapt) 63 | stepsize = ifelse( 64 | loss[iter] < loss[iter - 1], 65 | stepsize * 1.2, 66 | stepsize * .8 67 | ) 68 | 69 | if (verbose && iter %% 10 == 0) 70 | message(paste('Iteration:', iter)) 71 | } 72 | 73 | if (plotLoss) 74 | plot(loss, type = 'l', bty = 'n') 75 | 76 | list( 77 | par = beta, 78 | loss = loss, 79 | RSE = sqrt(crossprod(LP - y) / (nrow(X) - ncol(X))), 80 | iter = iter, 81 | fitted = LP 82 | ) 83 | } 84 | 85 | 86 | #' ## Run 87 | #' 88 | #' Set starting values. 89 | 90 | init = rep(0, 3) 91 | 92 | #' For any particular data you'd have to fiddle with the `stepsize`, which could 93 | #' be assessed via cross-validation, or alternatively one can use an 94 | #' adaptive approach, a simple one of which is implemented in this function. 95 | 96 | gd_result = gd( 97 | init, 98 | X = X, 99 | y = y, 100 | tolerance = 1e-8, 101 | stepsize = 1e-4, 102 | adapt = TRUE 103 | ) 104 | 105 | str(gd_result) 106 | 107 | #' ## Comparison 108 | #' 109 | #' We can compare to standard linear regression. 110 | 111 | rbind( 112 | gd = round(gd_result$par[, 1], 5), 113 | lm = coef(lm(y ~ x1 + x2)) 114 | ) 115 | 116 | # summary(lm(y ~ x1 + x2)) 117 | 118 | 119 | #' # Source 120 | #' Base R source code found at https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/gradient_descent.R 121 | -------------------------------------------------------------------------------- /ModelFitting/hmm_viterbi.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | From the wikipedia page with slight modification 5 | https://en.wikipedia.org/wiki/Viterbi_algorithm#Example 6 | """ 7 | 8 | def viterbi(obs, states, start_p, trans_p, emit_p): 9 | V = [{}] 10 | 11 | for st in states: 12 | V[0][st] = {"prob": start_p[st] * emit_p[st][obs[0]], "prev": None} 13 | 14 | # Run Viterbi when t > 0 15 | 16 | for t in range(1, len(obs)): 17 | V.append({}) 18 | 19 | for st in states: 20 | max_tr_prob = max(V[t-1][prev_st]["prob"]*trans_p[prev_st][st] for prev_st in states) 21 | 22 | for prev_st in states: 23 | if V[t-1][prev_st]["prob"] * trans_p[prev_st][st] == max_tr_prob: 24 | max_prob = max_tr_prob * emit_p[st][obs[t]] 25 | V[t][st] = {"prob": max_prob, "prev": prev_st} 26 | break 27 | 28 | for line in dptable(V): 29 | print(line) 30 | 31 | opt = [] 32 | 33 | # The highest probability 34 | max_prob = max(value["prob"] for value in V[-1].values()) 35 | previous = None 36 | 37 | # Get most probable state and its backtrack 38 | for st, data in V[-1].items(): 39 | if data["prob"] == max_prob: 40 | opt.append(st) 41 | previous = st 42 | break 43 | 44 | # Follow the backtrack till the first observation 45 | for t in range(len(V) - 2, -1, -1): 46 | opt.insert(0, V[t + 1][previous]["prev"]) 47 | previous = V[t + 1][previous]["prev"] 48 | 49 | print('The steps of states are ' + ' '.join(opt) + ' with highest probability of %s' % max_prob) 50 | 51 | 52 | def dptable(V): 53 | # Print a table of steps from dictionary 54 | yield " ".join(("%12d" % i) for i in range(len(V))) 55 | 56 | for state in V[0]: 57 | yield "%.7s: " % state + " ".join("%.7s" % ("%f" % v[state]["prob"]) for v in V) 58 | 59 | #The function viterbi takes the following arguments: obs is the sequence of observations, e.g. ['normal', 'cold', 'dizzy']; states is the set of hidden states; start_p is the start probability; trans_p are the transition probabilities; and emit_p are the emission probabilities. For simplicity of code, we assume that the observation sequence obs is non-empty and that trans_p[i][j] and emit_p[i][j] is defined for all states i,j. 60 | 61 | #In the running example, the forward/Viterbi algorithm is used as follows: 62 | 63 | 64 | obs = ('normal', 'cold', 'dizzy') 65 | states = ('Healthy', 'Fever') 66 | start_p = {'Healthy': 0.6, 'Fever': 0.4} 67 | trans_p = { 68 | 'Healthy' : {'Healthy': 0.7, 'Fever': 0.3}, 69 | 'Fever' : {'Healthy': 0.4, 'Fever': 0.6} 70 | } 71 | emit_p = { 72 | 'Healthy' : {'normal': 0.5, 'cold': 0.4, 'dizzy': 0.1}, 73 | 'Fever' : {'normal': 0.1, 'cold': 0.3, 'dizzy': 0.6} 74 | } 75 | viterbi(obs,states,start_p,trans_p,emit_p) 76 | -------------------------------------------------------------------------------- /ModelFitting/hurdle.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "Hurdle Models" 3 | #' author: "Michael Clark" 4 | #' date: "" 5 | #' --- 6 | #' 7 | #' 8 | #' # Poisson 9 | hurdpoisloglik = function(y, X, par) { 10 | # Extract parameters 11 | logitpars = par[grep('logit', names(par))] 12 | poispars = par[grep('pois', names(par))] 13 | 14 | # Logit model part 15 | Xlogit = X 16 | ylogit = ifelse(y == 0, 0, 1) 17 | 18 | LPlogit = Xlogit %*% logitpars 19 | mulogit = plogis(LPlogit) 20 | 21 | # Calculate the likelihood 22 | logliklogit = -sum( ylogit*log(mulogit) + (1 - ylogit)*log(1 - mulogit) ) 23 | 24 | # Poisson part 25 | Xpois = X[y > 0, ] 26 | ypois = y[y > 0] 27 | 28 | mupois = exp(Xpois %*% poispars) 29 | 30 | # Calculate the likelihood 31 | loglik0 = -mupois 32 | loglikpois = -sum(dpois(ypois, lambda = mupois, log = TRUE)) + sum(log(1 - exp(loglik0))) 33 | 34 | # combine likelihoods 35 | loglik = loglikpois + logliklogit 36 | loglik 37 | } 38 | 39 | 40 | hurdNBloglik = function(y, X, par) { 41 | # Extract parameters 42 | logitpars = par[grep('logit', names(par))] 43 | NegBinpars = par[grep('NegBin', names(par))] 44 | 45 | theta = exp(par[grep('theta', names(par))]) 46 | 47 | # Logit model part 48 | Xlogit = X 49 | ylogit = ifelse(y == 0, 0, 1) 50 | 51 | LPlogit = Xlogit%*%logitpars 52 | mulogit = plogis(LPlogit) 53 | 54 | # Calculate the likelihood 55 | logliklogit = -sum( ylogit*log(mulogit) + (1 - ylogit)*log(1 - mulogit) ) 56 | 57 | #NB part 58 | XNB = X[y > 0, ] 59 | yNB = y[y > 0] 60 | 61 | muNB = exp(XNB %*% NegBinpars) 62 | 63 | # Calculate the likelihood 64 | loglik0 = dnbinom(0, mu = muNB, size = theta, log = TRUE) 65 | loglik1 = dnbinom(yNB, mu = muNB, size = theta, log = TRUE) 66 | loglikNB = -( sum(loglik1) - sum(log(1 - exp(loglik0))) ) 67 | 68 | # combine likelihoods 69 | loglik = loglikNB + logliklogit 70 | loglik 71 | } 72 | 73 | 74 | #' # Data Import 75 | 76 | #' Import a simple data set. Example from the Stata help file for zinb command; 77 | #' can compare results with hnblogit command 78 | 79 | library(haven) 80 | 81 | fish = read_dta("http://www.stata-press.com/data/r11/fish.dta") 82 | 83 | # Get some starting values. 84 | 85 | init_mod = glm( 86 | count ~ persons + livebait, 87 | data = fish, 88 | family = poisson, 89 | x = TRUE, 90 | y = TRUE 91 | ) 92 | 93 | #' for these functions, a named vector for the starting values 94 | starts = c(logit = coef(init_mod), pois = coef(init_mod)) 95 | 96 | #' # Poisson hurdle 97 | 98 | #' Use `optim` to estimate parameters. I fiddle with some options to reproduce the 99 | #' hurdle function as much as possible. 100 | #' 101 | optPois1 = optim( 102 | par = starts, 103 | fn = hurdpoisloglik, 104 | X = init_mod$x, 105 | y = init_mod$y, 106 | control = list(maxit = 5000, reltol = 1e-12), 107 | hessian = TRUE 108 | ) 109 | # optPois1 110 | 111 | #' Extract the elements from the output to create a summary table. 112 | B = optPois1$par 113 | se = sqrt(diag(solve(optPois1$hessian))) 114 | Z = B/se 115 | p = ifelse(Z >= 0, pnorm(Z, lower = FALSE)*2, pnorm(Z)*2) 116 | summarytable = round(data.frame(B, se, Z, p), 3) 117 | 118 | list(summary = summarytable, ll = optPois1$value) 119 | 120 | #' Compare to hurdle from pscl package. 121 | library(pscl) 122 | 123 | poismod = hurdle( 124 | count ~ persons + livebait, 125 | data = fish, 126 | zero.dist = "binomial", 127 | dist = "poisson" 128 | ) 129 | 130 | summary(poismod) 131 | 132 | 133 | #' # Negative Binomial hurdle 134 | 135 | 136 | starts = c( 137 | logit = coef(init_mod), 138 | NegBin = coef(init_mod), 139 | theta = 1 140 | ) 141 | 142 | optNB1 = optim( 143 | par = starts, 144 | fn = hurdNBloglik, 145 | X = init_mod$x, 146 | y = init_mod$y, 147 | control = list(maxit = 5000, reltol = 1e-12), 148 | method = "BFGS", 149 | hessian = TRUE 150 | ) 151 | # optNB1 152 | 153 | B = optNB1$par 154 | se = sqrt(diag(solve(optNB1$hessian))) 155 | Z = B/se 156 | p = ifelse(Z >= 0, pnorm(Z, lower = FALSE)*2, pnorm(Z)*2) 157 | summarytable = round(data.frame(B, se, Z, p), 3) 158 | list(summary = summarytable, ll = optNB1$value) 159 | 160 | NBmod = hurdle( 161 | count ~ persons + livebait, 162 | data = fish, 163 | zero.dist = "binomial", 164 | dist = "negbin" 165 | ) 166 | 167 | summary(NBmod) 168 | 169 | -------------------------------------------------------------------------------- /ModelFitting/ipw.R: -------------------------------------------------------------------------------- 1 | # Demonstration of a simple marginal structural model for estimation of 2 | # so-called 'causal' effects using inverse probability weighting. 3 | 4 | # Example data is from, and comparison made to, the ipw package. See more here: 5 | # https://www.jstatsoft.org/article/view/v043i13/v43i13.pdf 6 | 7 | 8 | # Preliminaries ----------------------------------------------------------- 9 | 10 | library(tidyverse) 11 | library(ipw) 12 | 13 | # Data Setup -------------------------------------------------------------- 14 | 15 | # this example is from the helpfile at ?ipwpoint 16 | set.seed(16) 17 | n <- 1000 18 | simdat <- data.frame(l = rnorm(n, 10, 5)) 19 | a.lin <- simdat$l - 10 20 | pa <- plogis(a.lin) 21 | 22 | simdat <- simdat %>% 23 | mutate( 24 | a = rbinom(n, 1, prob = pa), 25 | y = 10 * a + 0.5 * l + rnorm(n, -10, 5) 26 | ) 27 | 28 | 29 | ipw_result <- ipwpoint( 30 | exposure = a, 31 | family = "binomial", 32 | link = "logit", 33 | numerator = ~ 1, 34 | denominator = ~ l, 35 | data = simdat 36 | ) 37 | 38 | summary(ipw_result$ipw.weights) 39 | ipwplot(ipw_result$ipw.weights) 40 | 41 | 42 | # Create the weights by hand for demonstration ---------------------------- 43 | 44 | ps_num = fitted(glm(a ~ 1, data = simdat, family = 'binomial')) 45 | ps_num[simdat$a == 0] = 1 - ps_num[simdat$a == 0] 46 | 47 | ps_den = fitted(glm(a ~ l, data = simdat, family = 'binomial')) 48 | ps_den[simdat$a == 0] = 1 - ps_den[simdat$a == 0] 49 | 50 | wts = ps_num / ps_den 51 | 52 | # compare 53 | rbind(summary(wts), summary(ipw_result$ipw.weights)) 54 | 55 | # Add inverse probability weights to the data if desired 56 | simdat <- simdat %>% 57 | mutate(sw = ipw_result$ipw.weights) 58 | 59 | 60 | # Marginal Structural Model ----------------------------------------------- 61 | 62 | # Marginal structural model for the causal effect of `a` on `y` corrected for 63 | # confounding by `l` using inverse probability weighting with robust standard 64 | # error from the survey package. 65 | 66 | library("survey") 67 | 68 | msm <- svyglm( 69 | y ~ a, 70 | design = svydesign(~ 1, weights = ~ sw, data = simdat) 71 | ) 72 | 73 | summary(msm) 74 | 75 | # create the likelihood function for using the weights 76 | maxlike = function( 77 | par, # parameters to be estimated; first is taken to be sigma 78 | X, # model matrix 79 | y, # target variable 80 | wts # estimated weights 81 | ) { 82 | beta = par[-1] 83 | lp = X %*% beta 84 | sigma = exp(par[1]) # eponentiated value to stay positive 85 | ll = dnorm(y, mean = lp, sd = sigma, log = TRUE) # weighted likelihood 86 | 87 | -sum(ll*wts) 88 | 89 | # same as 90 | # ll = dnorm(y, mean = lp, sd = sigma)^wts 91 | # -sum(log(ll)) 92 | } 93 | 94 | X = cbind(1, simdat$a) 95 | y = simdat$y 96 | 97 | result = optim( 98 | par = c(sigma = 0, intercept = 0, b = 0), 99 | fn = maxlike, 100 | X = X, 101 | y = y, 102 | wts = wts, 103 | hessian = TRUE, 104 | method = 'BFGS', 105 | control = list(abstol = 1e-12) 106 | ) 107 | 108 | dispersion = exp(result$par[1])^2 109 | beta = result$par[-1] 110 | 111 | 112 | # Compute standard errors ------------------------------------------------- 113 | 114 | # the following is the survey package raw version to get the appropriate 115 | # standard errors, which the ipw approach uses 116 | glm_basic = glm(y ~ a, data = simdat, weights = wts) # to get unscaled cov 117 | res = resid(glm_basic, type = 'working') # residuals 118 | glm_vcov_unsc = summary(glm_basic)$cov.unscaled # weighted vcov unscaled by dispersion solve(crossprod(qr(X))) 119 | estfun = X * res * wts 120 | x = estfun %*% glm_vcov_unsc 121 | 122 | # get standard errors 123 | se = sqrt(diag(crossprod(x)*n/(n-1))) # a robust standard error 124 | se_robust = sqrt(diag(sandwich::sandwich(glm_basic))) # much easier way to get it 125 | se_msm = sqrt(diag(vcov(msm))) 126 | 127 | tibble(se, se_robust, se_msm) 128 | 129 | 130 | # Compare results --------------------------------------------------------- 131 | 132 | tibble( 133 | Estimate = beta, 134 | init_se = sqrt(diag(solve(result$hessian)))[c('intercept', 'b')], # same as scaled se from glm_basic 135 | se_robust = se_robust, 136 | t = Estimate/se, 137 | p = 2*pt(abs(t), df = n-ncol(X), lower.tail = FALSE), 138 | dispersion = dispersion 139 | ) 140 | 141 | # compare to msm 142 | broom::tidy(msm) 143 | -------------------------------------------------------------------------------- /ModelFitting/lasso.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: " L1 (lasso) regularization" 3 | #' author: "Michael Clark" 4 | #' css: '../other.css' 5 | #' highlight: pygments 6 | #' date: "" 7 | #' --- 8 | #' 9 | #' See Tibshirani (1996) for the source, or Murphy PML (2012) for a nice 10 | #' overview (watch for typos in depictions). A more conceptual depiction of the 11 | #' lasso can be found in penalized_ML.R. 12 | #' 13 | #' # Coordinate descent 14 | #' 15 | #' 16 | lasso <- function( 17 | X, # model matrix 18 | y, # target 19 | lambda = .1, # penalty parameter 20 | soft = TRUE, # soft vs. hard thresholding 21 | tol = 1e-6, # tolerance 22 | iter = 100, # number of max iterations 23 | verbose = TRUE # print out iteration number 24 | ) { 25 | 26 | # soft thresholding function 27 | soft_thresh <- function(a, b) { 28 | out = rep(0, length(a)) 29 | out[a > b] = a[a > b] - b 30 | out[a < -b] = a[a < -b] + b 31 | out 32 | } 33 | 34 | w = solve(crossprod(X) + diag(lambda, ncol(X))) %*% crossprod(X,y) 35 | tol_curr = 1 36 | J = ncol(X) 37 | a = rep(0, J) 38 | c_ = rep(0, J) 39 | i = 1 40 | 41 | while (tol < tol_curr && i < iter) { 42 | w_old = w 43 | a = colSums(X^2) 44 | l = length(y)*lambda # for consistency with glmnet approach 45 | c_ = sapply(1:J, function(j) sum( X[,j] * (y - X[,-j] %*% w_old[-j]) )) 46 | if (soft) { 47 | for (j in 1:J) { 48 | w[j] = soft_thresh(c_[j]/a[j], l/a[j]) 49 | } 50 | } 51 | else { 52 | w = w_old 53 | w[c_< l & c_ > -l] = 0 54 | } 55 | 56 | tol_curr = crossprod(w - w_old) 57 | i = i + 1 58 | if (verbose && i%%10 == 0) message(i) 59 | } 60 | 61 | w 62 | } 63 | 64 | #' # Data setup 65 | #' 66 | #' 67 | set.seed(8675309) 68 | N = 500 69 | p = 10 70 | X = scale(matrix(rnorm(N*p), ncol=p)) 71 | b = c(.5, -.5, .25, -.25, .125, -.125, rep(0, p-6)) 72 | y = scale(X %*% b + rnorm(N, sd=.5)) 73 | lambda = .1 74 | 75 | 76 | # debugonce(lasso) 77 | 78 | #' Note, if `lambda=0`, result is the same as `lm.fit`. 79 | #' 80 | #' 81 | result_soft = lasso( 82 | X, 83 | y, 84 | lambda = lambda, 85 | tol = 1e-12, 86 | soft = TRUE 87 | ) 88 | 89 | result_hard = lasso( 90 | X, 91 | y, 92 | lambda = lambda, 93 | tol = 1e-12, 94 | soft = FALSE 95 | ) 96 | 97 | 98 | 99 | 100 | #' `glmnet` is by default a mixture of ridge and lasso penalties, setting alpha 101 | #' = 1 reduces to lasso (alpha=0 would be ridge). We set the lambda to a couple 102 | #' values while only wanting the one set to the same lambda value as above (s). 103 | 104 | 105 | library(glmnet) 106 | 107 | glmnet_res = coef( 108 | glmnet( 109 | X, 110 | y, 111 | alpha = 1, 112 | lambda = c(10, 1, lambda), 113 | thresh = 1e-12, 114 | intercept = FALSE 115 | ), 116 | s = lambda 117 | ) 118 | 119 | library(lassoshooting) 120 | 121 | ls_res = lassoshooting( 122 | X = X, 123 | y = y, 124 | lambda = length(y) * lambda, 125 | thr = 1e-12 126 | ) 127 | 128 | 129 | #' # Comparison 130 | 131 | data.frame( 132 | lm = coef(lm(y ~ . - 1, data.frame(X))), 133 | lasso_soft = result_soft, 134 | lasso_hard = result_hard, 135 | lspack = ls_res$coef, 136 | glmnet = glmnet_res[-1, 1], 137 | truth = b 138 | ) 139 | 140 | 141 | 142 | #' # Source 143 | #' Base R source code found at https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/lasso.R 144 | 145 | 146 | #' for some more detailed R code, check out 147 | #' http://jocelynchi.com/a-coordinate-descent-algorithm-for-the-lasso-problem (now defunct, but might find relevant article at the website) 148 | 149 | -------------------------------------------------------------------------------- /ModelFitting/naivebayes.R: -------------------------------------------------------------------------------- 1 | # Naive bayes demo for binary data 2 | 3 | 4 | # Initialization ---------------------------------------------------------- 5 | 6 | # generate some data 7 | 8 | set.seed(123) 9 | 10 | x = matrix(sample(0:1, 50, replace = TRUE), ncol = 5) 11 | xf = data.frame(lapply(data.frame(x), factor)) 12 | y = sample(0:1, 10, prob = c(.25, .75), replace = TRUE) 13 | 14 | 15 | # use e1071 for comparison 16 | 17 | library(e1071) 18 | 19 | m = naiveBayes(xf, y) 20 | 21 | m 22 | 23 | 24 | 25 | # Base R approach ----------------------------------------------------------- 26 | 27 | lapply(xf, function(var) 28 | t(prop.table(table(' ' = var, y), margin = 2))) 29 | -------------------------------------------------------------------------------- /ModelFitting/nelder_mead.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | import copy 5 | 6 | ''' 7 | Francois Chollet's Nelder-Mead in Python. 8 | https://github.com/fchollet/nelder-mead/blob/master/nelder_mead.py 9 | Pure Python/Numpy implementation of the Nelder-Mead algorithm. 10 | Reference: https://en.wikipedia.org/wiki/Nelder%E2%80%93Mead_method 11 | ''' 12 | 13 | 14 | def nelder_mead( 15 | f, 16 | x_start, 17 | step = 0.1, 18 | no_improve_thr = 10e-6, 19 | no_improv_break = 10, 20 | max_iter = 0, 21 | alpha = 1., 22 | gamma = 2., 23 | rho = 0.5, 24 | sigma = 0.5 25 | ): 26 | ''' 27 | @param f (function): function to optimize, must return a scalar score 28 | and operate over a numpy array of the same dimensions as x_start 29 | @param x_start (numpy array): initial position 30 | @param step (float): look-around radius in initial step 31 | @no_improv_thr, no_improv_break (float, int): break after no_improv_break iterations with 32 | an improvement lower than no_improv_thr 33 | @max_iter (int): always break after this number of iterations. 34 | Set it to 0 to loop indefinitely. 35 | @alpha, gamma, rho, sigma (floats): parameters of the algorithm 36 | (see Wikipedia page for reference) 37 | return: tuple (best parameter array, best score) 38 | ''' 39 | 40 | # init 41 | dim = len(x_start) 42 | prev_best = f(x_start) 43 | no_improv = 0 44 | res = [[x_start, prev_best]] 45 | 46 | for i in range(dim): 47 | x = copy.copy(x_start) 48 | x[i] = x[i] + step 49 | score = f(x) 50 | res.append([x, score]) 51 | 52 | # simplex iter 53 | iters = 0 54 | while 1: 55 | # order 56 | res.sort(key=lambda x: x[1]) 57 | best = res[0][1] 58 | 59 | # break after max_iter 60 | if max_iter and iters >= max_iter: 61 | return res[0] 62 | iters += 1 63 | 64 | # break after no_improv_break iterations with no improvement 65 | print('...best so far:', best) 66 | 67 | if best < prev_best - no_improve_thr: 68 | no_improv = 0 69 | prev_best = best 70 | else: 71 | no_improv += 1 72 | 73 | if no_improv >= no_improv_break: 74 | return res[0] 75 | 76 | # centroid 77 | x0 = [0.] * dim 78 | for tup in res[:-1]: 79 | for i, c in enumerate(tup[0]): 80 | x0[i] += c / (len(res)-1) 81 | 82 | # reflection 83 | xr = x0 + alpha*(x0 - res[-1][0]) 84 | rscore = f(xr) 85 | if res[0][1] <= rscore < res[-2][1]: 86 | del res[-1] 87 | res.append([xr, rscore]) 88 | continue 89 | 90 | # expansion 91 | if rscore < res[0][1]: 92 | xe = x0 + gamma*(x0 - res[-1][0]) 93 | escore = f(xe) 94 | if escore < rscore: 95 | del res[-1] 96 | res.append([xe, escore]) 97 | continue 98 | else: 99 | del res[-1] 100 | res.append([xr, rscore]) 101 | continue 102 | 103 | # contraction 104 | xc = x0 + rho*(x0 - res[-1][0]) 105 | cscore = f(xc) 106 | if cscore < res[-1][1]: 107 | del res[-1] 108 | res.append([xc, cscore]) 109 | continue 110 | 111 | # reduction 112 | x1 = res[0][0] 113 | nres = [] 114 | for tup in res: 115 | redx = x1 + sigma*(tup[0] - x1) 116 | score = f(redx) 117 | nres.append([redx, score]) 118 | res = nres 119 | 120 | 121 | if __name__ == "__main__": 122 | # test 123 | import math 124 | import numpy as np 125 | 126 | def f(x): 127 | return math.sin(x[0]) * math.cos(x[1]) * (1. / (abs(x[2]) + 1)) 128 | 129 | nelder_mead(f, np.array([0., 0., 0.])) 130 | -------------------------------------------------------------------------------- /ModelFitting/newton_irls.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: " GLM estimation" 3 | #' subtitle: "Newton and IRLS" 4 | #' author: "Michael Clark" 5 | #' css: '../other.css' 6 | #' highlight: pygments 7 | #' date: "" 8 | #' --- 9 | #' 10 | #' # GLM estimation examples 11 | 12 | #' Examples of maximum likelihood estimation via a variety of means. See the 13 | #' gradientdescent.R script for that approach. Here we demonstrate Newton's and 14 | #' Iterated Reweighted Least Squares approaches via logistic regression. 15 | #' 16 | #' 17 | #' For the following, I had Murphy's PML text open and more or less followed the 18 | #' algorithms in chapter 8. Note that for Newton's method, this doesn't 19 | #' implement a line search to find a more optimal stepsize at a given iteration. 20 | #' 21 | #' # Data Prep 22 | #' 23 | #' Predict graduate school admission based on gre, gpa, and school rank 24 | #' (higher=more prestige). See corresponding demo here: 25 | #' https://stats.idre.ucla.edu/stata/dae/logistic-regression/. The only 26 | #' difference is that I treat rank as numeric rather than categorical. 27 | 28 | 29 | admit = haven::read_dta('https://stats.idre.ucla.edu/stat/stata/dae/binary.dta') 30 | 31 | comparison_model = glm(admit ~ gre + gpa + rank, data = admit, family = binomial) 32 | 33 | summary(comparison_model) 34 | 35 | X = model.matrix(comparison_model) 36 | y = comparison_model$y 37 | 38 | 39 | #' # Newton's method 40 | 41 | newton <- function( 42 | X, 43 | y, 44 | tol = 1e-12, 45 | iter = 500, 46 | stepsize = .5 47 | ) { 48 | 49 | # Args: 50 | # X: model matrix 51 | # y: target 52 | # tol: tolerance 53 | # iter: maximum number of iterations 54 | # stepsize: (0, 1) 55 | 56 | # intialize 57 | int = log(mean(y) / (1 - mean(y))) # intercept 58 | beta = c(int, rep(0, ncol(X) - 1)) 59 | currtol = 1 60 | it = 0 61 | ll = 0 62 | 63 | while (currtol > tol && it < iter) { 64 | it = it +1 65 | ll_old = ll 66 | 67 | mu = plogis(X %*% beta)[,1] 68 | g = crossprod(X, mu-y) # gradient 69 | S = diag(mu*(1-mu)) 70 | H = t(X) %*% S %*% X # hessian 71 | beta = beta - stepsize * solve(H) %*% g 72 | 73 | ll = sum(dbinom(y, prob = mu, size = 1, log = TRUE)) 74 | currtol = abs(ll - ll_old) 75 | } 76 | 77 | list( 78 | beta = beta, 79 | iter = it, 80 | tol = currtol, 81 | loglik = ll 82 | ) 83 | } 84 | 85 | 86 | newton_result = newton( 87 | X = X, 88 | y = y, 89 | stepsize = .9, 90 | tol = 1e-8 # tol set to 1e-8 as in glm default 91 | ) 92 | 93 | newton_result 94 | comparison_model 95 | 96 | rbind( 97 | newton = unlist(newton_result), 98 | glm_default = c( 99 | beta = coef(comparison_model), 100 | comparison_model$iter, 101 | tol = NA, 102 | loglik = -logLik(comparison_model) 103 | ) 104 | ) 105 | 106 | 107 | #' # IRLS 108 | #' Note that `glm` is actually using IRLS, so the results from this should be 109 | #' fairly spot on. 110 | 111 | irls <- function(X, y, tol = 1e-12, iter = 500) { 112 | 113 | # intialize 114 | int = log(mean(y) / (1 - mean(y))) # intercept 115 | beta = c(int, rep(0, ncol(X) - 1)) 116 | currtol = 1 117 | it = 0 118 | ll = 0 119 | 120 | while (currtol > tol && it < iter) { 121 | it = it + 1 122 | ll_old = ll 123 | 124 | eta = X %*% beta 125 | mu = plogis(eta)[,1] 126 | s = mu * (1 - mu) 127 | S = diag(s) 128 | z = eta + (y-mu)/s 129 | beta = solve(t(X) %*% S %*% X) %*% (t(X) %*% (S %*% z)) 130 | 131 | ll = sum( 132 | dbinom( 133 | y, 134 | prob = plogis(X %*% beta), 135 | size = 1, 136 | log = T 137 | ) 138 | ) 139 | 140 | currtol = abs(ll - ll_old) 141 | } 142 | 143 | list( 144 | beta = beta, 145 | iter = it, 146 | tol = currtol, 147 | loglik = ll, 148 | weights = plogis(X %*% beta) * (1 - plogis(X %*% beta)) 149 | ) 150 | } 151 | 152 | #' `tol` set to 1e-8 as in `glm` default. 153 | irls_result = irls(X = X, y = y, tol = 1e-8) 154 | 155 | str(irls_result) 156 | comparison_model 157 | 158 | #' # Comparison 159 | #' 160 | #' Compare all results. 161 | rbind( 162 | newton = unlist(newton_result), 163 | irls = unlist(irls_result[-length(irls_result)]), 164 | glm_default = c( 165 | beta = coef(comparison_model), 166 | comparison_model$iter, 167 | tol = NA, 168 | loglik = logLik(comparison_model) 169 | ) 170 | ) 171 | 172 | 173 | #' compare weights 174 | head(cbind(irls_result$weights, 175 | comparison_model$weights)) 176 | 177 | 178 | 179 | #' # Source 180 | #' Base R source code found at https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/newton_irls.R -------------------------------------------------------------------------------- /ModelFitting/ordinal_regression.R: -------------------------------------------------------------------------------- 1 | # The following demonstrates a standard cumulative link ordinal regression model 2 | # via maximum likelihood. Default is with probit link function. Alternatively 3 | # you can compare it with a logit link, which will result in values roughly 4 | # 1.7*parameters estimates from the probit. 5 | 6 | 7 | ll_ord = function(par, X, y, probit = TRUE) { 8 | K = length(unique(y)) # number of classes K 9 | ncuts = K-1 # number of cutpoints/thresholds 10 | cuts = par[(1:ncuts)] # cutpoints 11 | beta = par[-(1:ncuts)] # regression coefficients 12 | lp = X %*% beta # linear predictor 13 | ll = rep(0, length(y)) # log likelihood 14 | pfun = ifelse(probit, pnorm, plogis) # which link to use 15 | 16 | for(k in 1:K){ 17 | if (k==1) { 18 | ll[y==k] = pfun((cuts[k] - lp[y==k]), log = TRUE) 19 | } 20 | else if (k < K) { 21 | ll[y==k] = log(pfun(cuts[k] - lp[y==k]) - pfun(cuts[k-1] - lp[y==k])) 22 | } 23 | else { 24 | ll[y==k] = log(1 - pfun(cuts[k-1] - lp[y==k])) 25 | } 26 | } 27 | 28 | -sum(ll) 29 | } 30 | 31 | # data generation from the probit perspective, where the underlying continuous 32 | # latent variable is normally distributed 33 | 34 | set.seed(808) 35 | N = 1000 # Sample size 36 | x = cbind(x1 = rnorm(N), x2 = rnorm(N)) # predictor variables 37 | beta = c(1,-1) # coefficients 38 | y_star = rnorm(N, mean = x %*% beta) # the underlying latent variable 39 | y_1 = y_star > -1.5 # -1.5 first cutpoint 40 | y_2 = y_star > .75 # .75 second cutpoint 41 | y_3 = y_star > 1.75 # 1.75 third cutpoint 42 | y = y_1 + y_2 + y_3 + 1 # target 43 | 44 | table(y) 45 | 46 | d = data.frame(x, y = factor(y)) 47 | 48 | init = c(-1, 1, 2, 0, 0) # initial values 49 | 50 | result_probit = optim( 51 | init, 52 | ll_ord, 53 | y = y, 54 | X = x, 55 | probit = TRUE, 56 | control = list(reltol = 1e-10) 57 | ) 58 | 59 | result_logit = optim( 60 | init, 61 | ll_ord, 62 | y = y, 63 | X = x, 64 | probit = FALSE, 65 | control = list(reltol = 1e-10) 66 | ) 67 | 68 | # compare with ordinal package 69 | library(ordinal) 70 | result_ordpack_probit = clm(y ~ x1 + x2, data = d, link = 'probit') 71 | result_ordpack_logit = clm(y ~ x1 + x2, data = d, link = 'logit') 72 | 73 | 74 | resprobit = data.frame(method = c('ll_ord', 'ordpack'), 75 | rbind(coef(result_probit), coef(result_ordpack_probit))) 76 | colnames(resprobit) = c('method', paste0('cut_', 1:3), 'beta1', 'beta2') 77 | 78 | resprobit 79 | 80 | reslogit = data.frame(method = c('ll_ord', 'ordpack'), 81 | rbind(coef(result_logit), coef(result_ordpack_logit))) 82 | colnames(reslogit) = c('method', paste0('cut_', 1:3), 'beta1', 'beta2') 83 | 84 | reslogit 85 | -------------------------------------------------------------------------------- /ModelFitting/poiszeroinfl.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "Zero-inflated Poisson Model" 3 | #' author: "Michael Clark" 4 | #' date: "" 5 | #' --- 6 | #' 7 | 8 | 9 | #' Log likelihood function to estimate parameters for a Zero-inflated Poisson model. With examples 10 | #' and comparison to pscl package output. Also includes approach based on Hilbe GLM text. 11 | #' see also: https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/NBzeroinfl.R 12 | 13 | 14 | ZIP = function(y, X, par) { 15 | # arguments are response y, predictor matrix X, and parameter named starting points of 'logit' and 'pois' 16 | 17 | # Extract parameters 18 | logitpars = par[grep('logit', names(par))] 19 | poispars = par[grep('pois', names(par))] 20 | 21 | # Logit part; in this function Xlogit = Xpois but one could split X argument into Xlogi and Xpois for example 22 | Xlogit = X 23 | LPlogit = Xlogit %*% logitpars 24 | logi0 = plogis(LPlogit) # alternative 1/(1+exp(-LPlogit)) 25 | 26 | # Poisson part 27 | Xpois = X 28 | mupois = exp(Xpois %*% poispars) 29 | 30 | # LLs 31 | logliklogit = log( logi0 + exp(log(1 - logi0) - mupois) ) 32 | loglikpois = log(1 - logi0) + dpois(y, lambda = mupois, log = TRUE) 33 | 34 | # Hilbe formulation 35 | # logliklogit = log(logi0 + (1 - logi0)*exp(- mupois) ) 36 | # loglikpois = log(1-logi0) -mupois + log(mupois)*y #not necessary: - log(gamma(y+1)) 37 | 38 | y0 = y == 0 # 0 values 39 | yc = y > 0 # Count part 40 | 41 | loglik = sum(logliklogit[y0]) + sum(loglikpois[yc]) 42 | -loglik 43 | } 44 | 45 | #' Get the data 46 | library(haven) 47 | library(pscl) 48 | 49 | fish = read_dta("http://www.stata-press.com/data/r11/fish.dta") 50 | 51 | 52 | #' Get starting values or simply do zeros 53 | #' for this function, a named vector for the starting values 54 | #' for zip: need 'logit', 'pois' 55 | init.mod = glm( 56 | count ~ persons + livebait, 57 | data = fish, 58 | x = TRUE, 59 | y = TRUE, 60 | "poisson" 61 | ) 62 | 63 | # starts = c(logit = coef(init.mod), pois = coef(init.mod)) 64 | starts = c(rep(0, 3), rep(0, 3)) 65 | names(starts) = c(paste0('pois.', names(coef(init.mod))), 66 | paste0('logit.', names(coef(init.mod)))) 67 | 68 | 69 | #' Estimate with optim function 70 | optPois1 = optim( 71 | par = starts , 72 | fn = ZIP, 73 | X = init.mod$x, 74 | y = init.mod$y, 75 | method = "BFGS", 76 | control = list(maxit = 5000, reltol = 1e-12), 77 | hessian = TRUE 78 | ) 79 | 80 | # optPois1 81 | 82 | 83 | #' Comparison 84 | # Extract for clean display 85 | B = optPois1$par 86 | se = sqrt(diag(solve((optPois1$hessian)))) 87 | Z = B/se 88 | p = pnorm(abs(Z), lower = FALSE)*2 89 | 90 | # pscl results 91 | zipoismod = zeroinfl(count ~ persons + livebait, data = fish, dist = "poisson") 92 | summary(zipoismod) 93 | round(data.frame(B, se, Z, p), 4) 94 | -------------------------------------------------------------------------------- /ModelFitting/quantile_regression.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Quantile Regression" 3 | author: "Michael Clark" 4 | date: "February 15, 2016" 5 | output: 6 | html_document: 7 | css: ../other.css 8 | theme: united 9 | --- 10 | 11 | ```{r setup, include=FALSE} 12 | knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE, R.options=list(width=120)) 13 | ``` 14 | 15 | ### Simple quantile regression function and demo 16 | 17 | Loss function 18 | 19 | ```{r} 20 | library(tidyverse) 21 | 22 | qreg = function(par, X, y, tau){ 23 | lp = X%*%par 24 | res = y - lp 25 | loss = ifelse(res < 0 , -(1-tau)*res, tau*res) 26 | sum(loss) 27 | } 28 | ``` 29 | 30 | 31 | 32 | ### Data Prep 33 | 34 | We'll use the `quantreg` package for comparison, and the classic data set on Belgian hh income and food expenditure. Scale income if you want a meaningful centercept. 35 | 36 | ```{r} 37 | library(quantreg) 38 | data(engel) 39 | ### engel$income = scale(engel$income) 40 | ``` 41 | 42 | ### Median estimation 43 | 44 | Compare `optim` output with `quantreg` package. 45 | 46 | ```{r} 47 | optim( 48 | par = c(0, 0), 49 | fn = qreg, 50 | X = cbind(1, engel$income), 51 | y = engel$foodexp, 52 | tau = .5 53 | )$par 54 | 55 | rq(foodexp ~ income, tau = .5, data = engel) 56 | ``` 57 | 58 | 59 | ### Other quantiles 60 | 61 | ```{r} 62 | # quantiles 63 | qs = c(.05, .1, .25, .5, .75, .9, .95) 64 | 65 | resrq = coef(rq(foodexp ~ income, tau = qs, data = engel)) 66 | 67 | resoptim = sapply(qs, function(tau) 68 | optim( 69 | par = c(0, 0), 70 | fn = qreg, 71 | X = cbind(1, engel$income), 72 | y = engel$foodexp, 73 | tau = tau 74 | )$par) 75 | 76 | 77 | # compare results 78 | 79 | rbind(resrq, resoptim) %>% round(2) 80 | ``` 81 | 82 | ### Visualize 83 | 84 | ```{r, results='hide'} 85 | engel %>% 86 | qplot(data = ., 87 | income, 88 | foodexp, 89 | color = I(scales::alpha('orange', .25))) + 90 | geom_abline(aes( 91 | intercept = X1, 92 | slope = X2, 93 | color = group 94 | ), 95 | data = data.frame(t(resoptim), group = factor(qs))) 96 | ``` 97 | -------------------------------------------------------------------------------- /ModelFitting/ridge.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: " L2 (ridge) regularization" 3 | #' author: "Michael Clark" 4 | #' css: '../other.css' 5 | #' highlight: pygments 6 | #' date: "" 7 | #' --- 8 | #' 9 | #' Compare to lasso.R. A more conceptual depiction of the lasso can be found in 10 | #' penalized_ML.R. 11 | 12 | 13 | ridge <- function(w, X, y, lambda = .1) { 14 | # X: model matrix; 15 | # y: target; 16 | # lambda: penalty parameter; 17 | # w: the weights/coefficients 18 | 19 | crossprod(y - X %*% w) + lambda * length(y) * crossprod(w) 20 | } 21 | 22 | 23 | set.seed(8675309) 24 | N = 500 25 | p = 10 26 | X = scale(matrix(rnorm(N * p), ncol = p)) 27 | b = c(.5, -.5, .25, -.25, .125, -.125, rep(0, 4)) 28 | y = scale(X %*% b + rnorm(N, sd = .5)) 29 | 30 | #' Note, if `lambda=0`, result is the same as `lm.fit`. 31 | #' 32 | #' 33 | result_ridge = optim( 34 | rep(0, ncol(X)), 35 | ridge, 36 | X = X, 37 | y = y, 38 | lambda = .1, 39 | method = 'BFGS' 40 | ) 41 | 42 | #' Analytical result. 43 | #' 44 | result_ridge2 = solve(crossprod(X) + diag(length(y)*.1, ncol(X))) %*% crossprod(X, y) 45 | 46 | #' Alternative with augmented data (note sigma ignored as it equals 1, but otherwise 47 | #' X/sigma and y/sigma). 48 | #' 49 | X2 = rbind(X, diag(sqrt(length(y)*.1), ncol(X))) 50 | y2 = c(y, rep(0, ncol(X))) 51 | result_ridge3 = solve(crossprod(X2)) %*% crossprod(X2, y2) 52 | 53 | 54 | 55 | 56 | #' `glmnet` is by default a mixture of ridge and lasso penalties, setting alpha 57 | #' = 1 reduces to lasso, while alpha=0 would be ridge. 58 | 59 | 60 | library(glmnet) 61 | glmnet_res = coef( 62 | glmnet( 63 | X, 64 | y, 65 | alpha = 0, 66 | lambda = c(10, 1, .1), 67 | thresh = 1e-12, 68 | intercept = F 69 | ), 70 | s = .1 71 | ) 72 | 73 | #' # Comparison 74 | 75 | data.frame( 76 | lm = coef(lm(y ~ . - 1, data.frame(X))), 77 | ridge = result_ridge$par, 78 | ridge2 = result_ridge2, 79 | ridge3 = result_ridge3, 80 | glmnet = glmnet_res[-1, 1], 81 | truth = b 82 | ) 83 | 84 | 85 | 86 | 87 | #' # Source 88 | #' Base R source code found at https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/ridge.R -------------------------------------------------------------------------------- /ModelFitting/standard_logistic.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "Standard Logistic Models" 3 | #' author: "Michael Clark" 4 | #' css: '../other.css' 5 | #' highlight: pygments 6 | #' date: "" 7 | #' --- 8 | #' 9 | #' 10 | #' A standard logistic regression model via maximum likelihood or exponential 11 | #' loss. Can serve as an entry point for those starting out to the wider world of 12 | #' computational statistics as maximum likelihood is the fundamental approach used 13 | #' in most applied statistics, but which is also a key aspect of the Bayesian 14 | #' approach. Exponential loss is not confined to the standard glm setting, but 15 | #' is widely used in more predictive/'algorithmic' approaches e.g. in 16 | #' machine learning and elsewhere. 17 | #' 18 | #' This follows the standard_lm.R script. 19 | #' 20 | #' 21 | #' 22 | #' # Data Setup 23 | #' 24 | 25 | set.seed(1235) # ensures replication 26 | 27 | 28 | # predictors and target 29 | 30 | N = 10000 # sample size 31 | k = 2 # number of desired predictors 32 | X = matrix(rnorm(N * k), ncol = k) 33 | 34 | # the linear predictor 35 | lp = -.5 + .2 * X[, 1] + .1 * X[, 2] # increasing N will get estimated values closer to these 36 | 37 | y = rbinom(N, size = 1, prob = plogis(lp)) 38 | 39 | dfXy = data.frame(X, y) 40 | 41 | 42 | 43 | #' 44 | #' # Functions 45 | #' 46 | #' A maximum likelihood approach. 47 | 48 | logreg_ML = function(par, X, y) { 49 | # arguments- 50 | # par: parameters to be estimated 51 | # X: predictor matrix with intercept column 52 | # y: response 53 | 54 | # setup 55 | beta = par # coefficients 56 | N = nrow(X) 57 | 58 | # linear predictor 59 | LP = X %*% beta # linear predictor 60 | mu = plogis(LP) # logit link 61 | 62 | # calculate likelihood 63 | L = dbinom(y, size = 1, prob = mu, log = TRUE) # log likelihood 64 | # L = y*log(mu) + (1 - y)*log(1-mu) # alternate log likelihood form 65 | 66 | -sum(L) # optim by default is minimization, and we want to maximize the likelihood 67 | # (see also fnscale in optim.control) 68 | } 69 | 70 | # An equivalent approach via exponential loss function. 71 | 72 | logreg_exp = function(par, X, y) { 73 | # arguments- 74 | # par: parameters to be estimated 75 | # X: predictor matrix with intercept column 76 | # y: response 77 | 78 | # setup 79 | beta = par # coefficients 80 | 81 | # linear predictor 82 | LP = X %*% beta # linear predictor 83 | 84 | # calculate exponential loss function (convert y to -1:1 from 0:1) 85 | L = sum(exp(-ifelse(y, 1, -1) * .5 * LP)) 86 | } 87 | 88 | 89 | #' # Obtain Model Estimates 90 | #' Setup for use with `optim`. 91 | 92 | X = cbind(1, X) 93 | 94 | # initial values 95 | 96 | init = rep(0, ncol(X)) 97 | names(init) = c('intercept', 'b1', 'b2') 98 | 99 | optlmML = optim( 100 | par = init, 101 | fn = logreg_ML, 102 | X = X, 103 | y = y, 104 | control = list(reltol = 1e-8) 105 | ) 106 | 107 | optglmClass = optim( 108 | par = init, 109 | fn = logreg_exp, 110 | X = X, 111 | y = y, 112 | control = list(reltol = 1e-15) 113 | ) 114 | 115 | pars_ML = optlmML$par 116 | pars_exp = optglmClass$par 117 | 118 | 119 | #' # Comparison 120 | #' 121 | #' Compare to `glm`. 122 | 123 | modglm = glm(y ~ ., dfXy, family = binomial) 124 | 125 | rbind( 126 | pars_ML, 127 | pars_exp, 128 | pars_GLM = coef(modglm) 129 | ) 130 | 131 | 132 | #' # Source 133 | #' Base R source code found at https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/standard_logistic.R -------------------------------------------------------------------------------- /Other/Programming_Shenanigans/compoundInterest.R: -------------------------------------------------------------------------------- 1 | # a (yearly) compound interest calculator to demonstrate recursion in a simple fashion 2 | 3 | compoundInterest <- function(principal, years, rate, yearlybump) { 4 | if(years==0) return(principal) 5 | 6 | if(years>0) compoundInterest(principal=principal*(1+rate) + yearlybump, years=years-1, rate=rate, yearlybump=yearlybump) 7 | } 8 | 9 | compoundInterest(principal=10000, years=10, rate=.05, yearlybump=1000) 10 | 11 | -------------------------------------------------------------------------------- /Other/Programming_Shenanigans/fillbyLastRecursive.R: -------------------------------------------------------------------------------- 1 | # A simple function to fill NAs by the last value; not something one should 2 | # regularly do but it does come up from time to time for variables such as age, year 3 | # etc. Just an exercise in recursion. 4 | 5 | y = rnorm(100) 6 | y2 = y 7 | y2[sample(100, 30)] = NA 8 | 9 | fillbyLast = function(x){ 10 | if (length(x) == 1){ 11 | return(x) 12 | } else { 13 | if (is.na(x[2])){ 14 | x[2] = x[1] 15 | } 16 | c(x[1], fillbyLast(x[2:length(x)])) 17 | } 18 | } 19 | 20 | y3 = fillbyLast(y2) 21 | cbind(y, y2, y3) 22 | 23 | -------------------------------------------------------------------------------- /Other/Programming_Shenanigans/fizzbuzz.R: -------------------------------------------------------------------------------- 1 | ################################################################# 2 | ### See for example: http://c2.com/cgi/wiki?FizzBuzzTest ### 3 | ### The stated goal is to take a sequence and change anything ### 4 | ### that is a multiple of 3 to 'Fizz', multiples of 5 to ### 5 | ### 'Fuzz', and any multiples of both to 'FizzBuzz'. ### 6 | ### R makes it easy to generalize to any sequence/multiples ### 7 | ### and does so very efficiently. ### 8 | ################################################################# 9 | 10 | 11 | ## clean, clear, gets the job done; also generalizes beyond common example 12 | fizzbuzz = function(min, max, num1, num2){ 13 | x_ = min:max 14 | x = x_ 15 | x[x_%%num1 == 0] = "Fizz" 16 | x[x_%%num2 == 0] = "Buzz" 17 | x[x_%%(num1*num2) == 0] = "FizzBuzz" 18 | x 19 | } 20 | 21 | fizzbuzz(1, 100, 3, 5) 22 | fizzbuzz(-50, 50, 7, 4) 23 | 24 | ## One-liner, just because you can 25 | fizzbuzzOneLine = function(min, max, num1, num2){ 26 | sapply(min:max, function(val) ifelse(val%%(num1*num2) == 0, "FizzBuzz", 27 | ifelse(val%%num1==0, 'Fizz', 28 | ifelse(val%%num2==0, 'Buzz', val)))) 29 | } 30 | 31 | fizzbuzzOneLine(1, 100, 3, 5) 32 | 33 | ## recursive for even more flexibility 34 | fizzbuzzRecursive = function(x, nums=c(15,3,5), nams=c('FizzBuzz','Fizz', 'Buzz')){ 35 | if (length(nums) == 0) {paste(x)} 36 | else { 37 | x = sapply(x, function(val) ifelse(!is.na(suppressWarnings(as.numeric(val))) && as.numeric(val) %% nums[1] == 0, nams[1], val)) 38 | fizzbuzzRecursive(x, nums[-1], nams[-1]) 39 | } 40 | } 41 | 42 | fizzbuzzRecursive(1:100, nums=c(15,3,5), nams=c('FizzBuzz','Fizz', 'Buzz')) 43 | 44 | 45 | cbind(fizzbuzz(1, 100, 3, 5), fizzbuzzOneLine(1, 100, 3, 5), fizzbuzzRecursive(1:100, nums=c(15,3,5), nams=c('FizzBuzz','Fizz', 'Buzz'))) 46 | cbind(fizzbuzz(-50, 50, 7, 4), fizzbuzzOneLine(-50, 50, 7, 4), fizzbuzzRecursive(-50:50, nums=c(28,7,4), nams=c('FizzBuzz','Fizz', 'Buzz'))) 47 | 48 | fizzbuzzRecursive(-50:50, nums=c(28, 21, 12, 7, 4, 3), nams=c('FizzBuzz','FizzKapow', 'BuzzKapow', 'Fizz', 'Buzz', 'Kapow')) 49 | 50 | -------------------------------------------------------------------------------- /Other/Programming_Shenanigans/fizzbuzz.jl: -------------------------------------------------------------------------------- 1 | ################################################################# 2 | ### See for example: http://c2.com/cgi/wiki?FizzBuzzTest ### 3 | ### The stated goal is to take a sequence and change anything ### 4 | ### that is a multiple of 3 to 'Fizz', multiples of 5 to ### 5 | ### 'Fuzz', and any multiples of both to 'FizzBuzz'. ### 6 | ################################################################# 7 | 8 | 9 | # R example. R makes it easy to generalize to any sequence/multiples and does so very efficiently. 10 | 11 | #fizzbuzz = function(min, max, num1, num2){ 12 | # x_ = min:max 13 | # x = x_ 14 | # 15 | # x[x_%%num1 == 0] = "Fizz" 16 | # x[x_%%num2 == 0] = "Buzz" 17 | # x[x_%%(num1*num2) == 0] = "FizzBuzz" 18 | # x 19 | # } 20 | 21 | # fizzbuzz(1, 100, 3, 5) 22 | # fizzbuzz(-50, 50, 7, 4) 23 | 24 | 25 | 26 | # The following is one way to FizzBuzz with a standard loop approach. 27 | 28 | function fizzbuzz1(nmin, nmax, num1, num2) 29 | x0 = [nmin:nmax] 30 | x = Array(Any, length(x0)) 31 | 32 | for i in 1:length(x0) 33 | if x0[i]%(num1*num2)==0 34 | x[i] = "FizzBuzz" 35 | elseif x0[i]%num2==0 36 | x[i] = "Buzz" 37 | elseif x0[i]%num1==0 38 | x[i] = "Fizz" 39 | else 40 | x[i] = x0[i] 41 | end 42 | end 43 | 44 | return x 45 | end 46 | 47 | print(fizzbuzz1(1, 100, 3, 5)') 48 | print(fizzbuzz1(-50, 50, 7, 4)') 49 | 50 | 51 | 52 | # One can get fizzbuzz without a loop, but initializing x as x0 as in the R 53 | # script will make it one type regardless of the explicit 'Any' declaration 54 | # ('any' type declaration will be overwritten by Int or Float64, even using 55 | # convert). Could add a ! double 'or' (as in the next example) or maybe 56 | # construct a special type, but that's unsatisfactory. No real code efficiency 57 | # is gained over the loop. 58 | 59 | function fizzbuzz2(nmin, nmax, num1, num2) 60 | x0 = [nmin:nmax] 61 | x = Array(Any, length(x0)) 62 | 63 | check1 = x0%num1.==0 64 | check2 = x0.%num2.==0 65 | check3 = x0.%(num1*num2).==0 66 | checkAll = check1 | check2 | check3 67 | 68 | x[check1] = "Fizz" 69 | x[check2] = "Buzz" 70 | x[check3] = "FizzBuzz" 71 | x[!checkAll] = x0[!checkAll] 72 | return x 73 | end 74 | 75 | print(fizzbuzz2(1, 100, 3, 5)') 76 | print(fizzbuzz2(-50, 50, 7, 4)') 77 | 78 | 79 | 80 | # A compromise approach that retains the 'any' type. Aside from the loop it's 81 | # pretty much the same as the R script. 82 | 83 | function fizzbuzz3(nmin, nmax, num1, num2) 84 | x0 = [nmin:nmax] 85 | x = Array(Any, length(x0)) 86 | 87 | for i in 1:length(x0) 88 | x[i] = x0[i] 89 | end 90 | 91 | x[x0%num1.==0] = "Fizz" 92 | x[x0%num2.==0] = "Buzz" 93 | x[x0%(num1*num2).==0] = "FizzBuzz" 94 | return x 95 | end 96 | 97 | print(fizzbuzz3(1, 100, 3, 5)') 98 | print(fizzbuzz3(-50, 50, 7, 4)') 99 | -------------------------------------------------------------------------------- /Other/Programming_Shenanigans/fizzbuzz.py: -------------------------------------------------------------------------------- 1 | # Python Fizz Buzz. Replicate of the R code. 2 | 3 | def fizzbuzz(min, max, num1, num2): 4 | x = list(range(min, max+1)) 5 | for i in range(len(x)): 6 | if x[i] % (num1*num2)==0 : x[i] = 'FizzBuzz' 7 | elif x[i] % (num1)==0 : x[i] = 'Fizz' 8 | elif x[i] % (num2)==0 : x[i] = 'Buzz' 9 | return(x) 10 | 11 | fizzbuzz(1, 100, 3, 5) 12 | fizzbuzz(-50, 50, 7, 4) 13 | 14 | -------------------------------------------------------------------------------- /Other/Programming_Shenanigans/matrixOperations_files/figure-html/unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/Other/Programming_Shenanigans/matrixOperations_files/figure-html/unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /Other/Programming_Shenanigans/stringReverseRecursively.R: -------------------------------------------------------------------------------- 1 | # reverse a string recursively 2 | 3 | revString = function(string){ 4 | require(stringr) 5 | if (nchar(string)==1) string 6 | else paste0(str_sub(string, -1), revString(str_sub(string, end=-2))) 7 | } 8 | 9 | 10 | string = paste0(letters, collapse = '') 11 | 12 | revString(string) 13 | 14 | # base R approach 15 | revString = function(string){ 16 | if (nchar(string)==1) string 17 | else paste0(substr(string, nchar(string), nchar(string)), 18 | revString(substr(string, 1, nchar(string)-1))) 19 | } 20 | 21 | 22 | -------------------------------------------------------------------------------- /Other/Programming_Shenanigans/stringReverseRecursively.py: -------------------------------------------------------------------------------- 1 | def reverseString(aStr): 2 | if len(aStr) == 1: 3 | return aStr 4 | return aStr[-1] + reverseString(aStr[:-1]) 5 | 6 | import string 7 | 8 | string.ascii_letters 9 | reverseString(string.ascii_letters) 10 | -------------------------------------------------------------------------------- /Other/Programming_Shenanigans/wordWrap.R: -------------------------------------------------------------------------------- 1 | wordWrap = function(text, lineLength){ 2 | require(stringr) 3 | idx = str_locate(str_sub(text, start = lineLength),' ')[,'start'] 4 | if(is.na(idx)) str_trim(text) 5 | else paste0(str_trim(str_sub(text, 1, lineLength+idx-1)), '\n', wordWrap(str_sub(text, start=lineLength+idx), lineLength)) 6 | } 7 | 8 | 9 | li = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.' 10 | 11 | cat(wordWrap(li, 80)) 12 | -------------------------------------------------------------------------------- /Other/Programming_Shenanigans/wordWrap.py: -------------------------------------------------------------------------------- 1 | def wordWrap(text, lineLength): 2 | idx = text[lineLength:].find(' ') 3 | if idx == -1: 4 | return(text.strip()) 5 | 6 | return(text[:lineLength+idx].strip() + '\n' + wordWrap(text[lineLength+idx:], lineLength)) 7 | 8 | import string 9 | li = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.' 10 | 11 | print(wordWrap(li, 80)) 12 | -------------------------------------------------------------------------------- /Other/Python Startup/mixedModel.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Jun 20 17:25:18 2015 4 | 5 | @author: MC 6 | """ 7 | import numpy as np 8 | import pandas as pd 9 | import scipy as sp 10 | import statsmodels.api as sm 11 | import statsmodels.formula.api as smf 12 | 13 | np.random.seed(1234) 14 | N = 1000 15 | nGroups = 40 16 | nPerGroup = N//nGroups 17 | x = np.random.normal(size=N) 18 | group = np.repeat(np.arange(0, nGroups), nPerGroup) 19 | ranEff = np.random.normal(scale=.5, size=nGroups) 20 | coefs = np.array([2,.2]).reshape(2,1) 21 | randInts = coefs[0] + ranEff[group] 22 | 23 | X = np.array([randInts, x]).T 24 | 25 | y = randInts + x*coefs[1] + np.random.normal(scale=.75, size=N) 26 | y.shape 27 | 28 | df = pd.DataFrame({'x':x, 'group':group, 'y':y}) 29 | 30 | ### Mode 31 | mod0 = smf.mixedlm('y ~ x', data=df, groups=df['group']) 32 | mod = mod0.fit() 33 | print(mod.summary()) # intercept RE is in sd 34 | 35 | reModel = mod.random_effects['Intercept'][:] 36 | sm.qqplot(reModel, sp.stats.norm, line='45', fit=True, scale=.25) 37 | -------------------------------------------------------------------------------- /Other/Python Startup/regressionExamples.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Jun 20 09:08:20 2015 4 | 5 | @author: MC 6 | """ 7 | 8 | import numpy as np 9 | import pandas as pd 10 | import scipy as sp 11 | from sklearn.preprocessing import scale 12 | import statsmodels.api as sm 13 | import statsmodels.formula.api as smf 14 | # import seaborn as sea 15 | import matplotlib.pyplot as plt 16 | 17 | # Regression example using np arrays, np matrices and statsmodels; serves as a 18 | # quick reminder for me of some basic statistical modeling in python; 19 | # Unfortunately numpy doesn't deal with vectors as matrices, assumes row 20 | # entry when creating arrays, and elementwise rather than matrix operations, 21 | # which combined means regularly transposing and reshaping, using built in 22 | 23 | 24 | ### Data setup 25 | np.random.seed(1234) 26 | 27 | N = 100 28 | x = np.random.normal(size=N) 29 | X = np.array((np.ones(N), (x))).T 30 | # compare to R: X = cbind(1, rnorm(N)) 31 | 32 | # true value of coefficients 33 | beta = np.array((5, .5)).reshape(2,1) 34 | 35 | # outcome of interest 36 | y = np.dot(X,beta) + np.random.normal(scale=.5, size=N).reshape(N,1) 37 | 38 | # example scale 39 | scale(y) 40 | 41 | ### Inspect 42 | X.shape 43 | X[:5, 1] 44 | beta.shape 45 | 46 | plt.plot(x, y, 'ro') 47 | 48 | 49 | ### Regression models 50 | 51 | ## numpy linalg 52 | np.linalg.lstsq(X, y)[0] 53 | 54 | ## Normal equations numpy array 55 | # note that Python will finally get a matrix multiplier (@ symbol) in 3.5 56 | xx = np.linalg.inv(np.dot(X.T, X)) 57 | xy = np.dot(X.T, y) 58 | 59 | # alternative 60 | xx = np.linalg.inv(X.T.dot(X)) 61 | xy = X.T.dot(y) 62 | 63 | np.dot(xx, xy) 64 | 65 | ## Normal equations numpy matrix 66 | # is only 2d but perhaps more intuitive and multiplication is matrix multiplication by default 67 | Xmat = np.matrix(X) 68 | np.linalg.inv(Xmat.T * Xmat) * np.dot(Xmat.T, y) 69 | 70 | ## statsmodels 71 | res0 = sm.OLS(y, X) 72 | res = sm.OLS(y, X).fit() 73 | 74 | res.summary() 75 | res.aic 76 | res.fittedvalues 77 | res.rsquared_adj 78 | 79 | ## using scipy linregress for simple regression 80 | slope, intercept, r_value, p_value, std_err = sp.stats.linregress(x, y[:,0]) 81 | [intercept, slope] 82 | 83 | ## pandas data frames with formula approach 84 | df = pd.DataFrame(X) 85 | df['y'] = y 86 | df.columns = ['Intercept', 'x', 'y'] 87 | df.head() 88 | 89 | res = smf.ols('y ~ x', data=df).fit() 90 | res.summary() 91 | 92 | plt.plot(x, y, 'ro', 93 | x, res.fittedvalues) 94 | 95 | -------------------------------------------------------------------------------- /Other/getRollCall.R: -------------------------------------------------------------------------------- 1 | ### This function allows one to get multiple rollcall data objects (and in 2 | ### parallel) via the readKH function in the pscl package. See the readKH 3 | ### function helpfile, voteview.com, and also the readme in rollcall folder in 4 | ### the Datasets repo, where the results of this are available for all 5 | ### legislatures. A little messy but works ok. With 7 threads it took ~90 6 | ### seconds to get all data possible, ~2 minutes for csv. So if 7 | ### you're not requesting quite a few, you wouldn't need to set cores very high. 8 | 9 | getRollCall = function(congress, HoRSenBoth='Both', matrix=F, cores=2, Dir=NULL, csv=F){ 10 | # Arguments: 11 | # congress- integer vector 12 | # HoRSenBoth- character, branch of Congress; House, Senate, or Both 13 | # matrix- logical, if matrix instead of rollcall format is desired. See pscl::convertCodes. See csv arg. 14 | # cores- integer, number of cores/threads for parallel processing (Windows) 15 | # Dir- character, where to write out RData files of rollcall objects, or csv if matrix and csv=T 16 | # csv- logical, write matrix of binary votes to file; only if matrix=T and Dir provided 17 | 18 | # requires stringr and pscl packages 19 | 20 | # initial checks 21 | if (csv && !matrix) message('matrix must be TRUE for csv files to be written. *csv files will not be written.') 22 | if (csv && is.null(Dir)) message('Dir must be specified for csv. *csv files will not be written.') 23 | 24 | ### create urls 25 | # initial check for first 9 since the early files are numbered 01 02 etc. 26 | congressforurl = as.character(congress) 27 | congressforurl[congress %in% 1:9] = paste0(0, congressforurl[congress %in% 1:9]) 28 | 29 | urls = paste0('ftp://voteview.com/', c('hou', 'sen'), rep(congressforurl, e=2), 'kh.ord') 30 | 31 | # deal with current congress as it comes from different website; made to work in subsequent years 32 | currentYear = as.numeric(substr(Sys.Date(), 1, 4)) 33 | curfutureCongress = data.frame(Year=2013:currentYear, 34 | Congress=rep(113:200, e=2, length=length(2013:currentYear))) 35 | currCongress = curfutureCongress[curfutureCongress$Year==currentYear, 'Congress'] 36 | 37 | current = ifelse(currCongress %in% congress, T, F) 38 | 39 | # relace url for current congress with different web address 40 | if (current) { 41 | urls[grep(currCongress, urls)] = c( 42 | paste0('http://adric.sscnet.ucla.edu/rollcall/static/','H', currCongress,'.ord'), 43 | paste0('http://adric.sscnet.ucla.edu/rollcall/static/','S', currCongress,'.ord')) 44 | } 45 | 46 | # reduce as requested 47 | if (HoRSenBoth=='House') { 48 | urls = urls[grep('hou|H', urls)] 49 | } else if (HoRSenBoth=='Senate') { 50 | urls = urls[grep('sen|S', urls)] 51 | } 52 | 53 | 54 | ### autocreate descriptions for objects and files just for giggles 55 | suppressPackageStartupMessages(require(stringr)) 56 | descripN = as.numeric(str_extract(urls, '[0-9]+|[0-9]+')) 57 | descripB = str_extract(urls, 'hou|H|sen|S') 58 | 59 | toOrd = function(x){ 60 | lasttwo = x %% 100 61 | lastone = x %% 10 62 | suffix = sapply(as.character(lastone), switch, 63 | '1' = 'st', 64 | '2' = 'nd', 65 | '3' = 'rd', 66 | 'th', 67 | simplify=T) 68 | suffix[lasttwo %in% 11:13] = 'th' 69 | paste0(x, suffix) 70 | } 71 | 72 | descriptions = paste0(toOrd(descripN), ' U.S. ', ifelse(descripB %in% c('hou','H'), 'House of Representatives', 'Senate')) 73 | 74 | 75 | ### set up parallel 76 | library(parallel) 77 | cl = makeCluster(cores) 78 | clusterEvalQ(cl, library(pscl)) 79 | 80 | suppressPackageStartupMessages(require(pscl)) 81 | rcList = clusterMap(cl, fun=readKH, urls, desc=descriptions) 82 | 83 | rcenv = new.env() # so export doesn't look in global 84 | 85 | ### write out RData files 86 | if (!is.null(Dir) & !csv){ 87 | filelist = paste0(Dir, descriptions, '.RData') 88 | descriptionsBrief = paste0(descripB, descripN) 89 | 90 | clusterExport(cl, c('rcList','filelist', 'descriptionsBrief'), envir = rcenv) 91 | 92 | parSapply(cl, 1:length(rcList), function(i) { 93 | assign(descriptionsBrief[i], rcList[[i]]); save(list=descriptionsBrief[i], file=filelist[i]) 94 | }) 95 | } 96 | 97 | 98 | ### convert to binary 99 | if (matrix){ 100 | clusterExport(cl, c('rcList'), envir=rcenv) 101 | rcList = parLapply(cl, rcList, convertCodes) 102 | if (csv && !is.null(Dir)){ 103 | filelist = paste0(Dir, descriptions, '.csv') 104 | clusterMap(cl, write.csv, rcList, filelist) 105 | } 106 | } 107 | 108 | on.exit(stopCluster(cl)) 109 | rcList 110 | } 111 | -------------------------------------------------------------------------------- /Other/ggtheme.R: -------------------------------------------------------------------------------- 1 | ### A clean theme that gets rid of unnecessary gridlines and colored backgrounds. 2 | 3 | require(ggplot2) 4 | ggtheme = theme(legend.background = element_blank(), 5 | legend.key = element_blank(), 6 | panel.grid.minor = element_blank(), 7 | panel.grid.major = element_blank(), 8 | panel.background = element_blank(), 9 | panel.border = element_blank(), 10 | strip.background = element_blank(), 11 | plot.background = element_blank()) 12 | detach(package:ggplot2) -------------------------------------------------------------------------------- /Other/shakespeareanInsulter.R: -------------------------------------------------------------------------------- 1 | #-----------------------------------------------------------------------------------# 2 | # If you would like to be insulted every time you start up R, feel free to put this # 3 | # in your Rprofile.site file. The lists can be found in various places on the web, # 4 | # e.g. http://www.pangloss.com/seidel/Shaker/ # 5 | #-----------------------------------------------------------------------------------# 6 | 7 | 8 | shakesInsult = function(){ 9 | w1 = c('artless','bawdy','beslubbering','bootless','churlish','cockered','clouted','craven','currish','dankish','dissembling','droning','errant','fawning','fobbing','froward','frothy','gleeking','goatish','gorbellied','impertinent','infectious','jarring','loggerheaded','lumpish','mammering','mangled','mewling','paunchy','pribbling','puking','puny','qualling','rank','reeky','roguish','ruttish','saucy','spleeny','spongy','surly','tottering','unmuzzled','vain','venomed','villainous','warped','wayward','weedy','yeasty') 10 | 11 | w2 = c('base-court','bat-fowling','beef-witted','beetle-headed','boil-brained','clapper-clawed','clay-brained','common-kissing','crook-pated','dismal-dreaming','dizzy-eyed','doghearted','dread-bolted','earth-vexing','elf-skinned','fat-kidneyed','fen-sucked','flap-mouthed','fly-bitten','folly-fallen','fool-born','full-gorged','guts-griping','half-faced','hasty-witted','hedge-born','hell-hated','idle-headed','ill-breeding','ill-nurtured','knotty-pated','milk-livered','motley-minded','onion-eyed','plume-plucked','pottle-deep','pox-marked','reeling-ripe','rough-hewn','rude-growing','rump-fed','shard-borne','sheep-biting','spur-galled','swag-bellied','tardy-gaited','tickle-brained','toad-spotted','unchin-snouted','weather-bitten') 12 | 13 | w3 = c('apple-john','baggage','barnacle','bladder','boar-pig','bugbear','bum-bailey','canker-blossom','clack-dish','clotpole','coxcomb','codpiece','death-token','dewberry','flap-dragon','flax-wench','flirt-gill','foot-licker','fustilarian','giglet','gudgeon','haggard','harpy','hedge-pig','horn-beast','hugger-mugger','joithead','lewdster','lout','maggot-pie','malt-worm','mammet','measle','minnow','miscreant','moldwarp','mumble-news','nut-hook','pigeon-egg','pignut','puttock','pumpion','ratsbane','scut','skainsmate','strumpet','varlot','vassal','whey-face','wagtail') 14 | 15 | return( paste( 'Thou', sample(w1, 1), sample(w2, 1), sample(w3, 1),collapse=" ")) 16 | } 17 | 18 | cat('Your Shakespearean insult for the day is...', shakesInsult(),"\n") 19 | 20 | # 21 | # alternatively, use the cowsay package, to let various animals insult you! 22 | # 23 | require(cowsay) 24 | say(what = shakesInsult(),by="random") 25 | -------------------------------------------------------------------------------- /Other/spurriousCorrelationwithRatios.R: -------------------------------------------------------------------------------- 1 | # a function that will take an input n and generate n x y and z values. The 2 | # correlation of interest is between x and y, when either or both are a ratio 3 | # based on z. 4 | 5 | spurcorr = function(n){ 6 | x = rpois(n, 10) 7 | y = rpois(n, 10) 8 | z = runif(n, 50, 100) 9 | 10 | cbind(N=n, origCorr=cor(x,y), xzyzCorr=cor(x/z, y/z), xzyCorr=cor(x/z,y), xyzCorr=cor(x,y/z)) 11 | } 12 | 13 | # replicate at various sample sizes 14 | out = replicate(1000, sapply(c(10, 100, 500, 1000, 2500), spurcorr)) 15 | 16 | origcorrs = apply(out, 3, function(mat) mat[2,]) ;rownames(origcorrs) = paste(c(10, 100, 500, 1000, 2500)) 17 | spurrcorrsBoth = apply(out, 3, function(mat) mat[3,]);rownames(spurrcorrsBoth) = paste(c(10, 100, 500, 1000, 2500)) 18 | spurrcorrsXonly = apply(out, 3, function(mat) mat[4,]);rownames(spurrcorrsXonly) = paste(c(10, 100, 500, 1000, 2500)) 19 | spurrcorrsYonly = apply(out, 3, function(mat) mat[5,]);rownames(spurrcorrsYonly) = paste(c(10, 100, 500, 1000, 2500)) 20 | 21 | data.frame(origZero=rowMeans(origcorrs), Xratio=rowMeans(spurrcorrsXonly), 22 | Yratio=rowMeans(spurrcorrsYonly), Both=rowMeans(spurrcorrsBoth)) 23 | 24 | 25 | -------------------------------------------------------------------------------- /Other/xkcdscrape.R: -------------------------------------------------------------------------------- 1 | ##################################################################################### 2 | ### The following function will go to the xkcd website, scrape the html, look for ### 3 | ### a particular part based on a div tag, snag it, and write it out to a file for ### 4 | ### subsequent text analysis. ### 5 | ### ### 6 | ### The function requires only one argument, number, indicating the number of ### 7 | ### comic strips. ### 8 | ##################################################################################### 9 | 10 | ### Inspiration from: 11 | ### https://github.com/CabbagesAndKings/xkcd-Topics/blob/master/scripts/getTranscripts.sh 12 | 13 | xkcdscrape = function(number){ 14 | # create url string 15 | url = paste0("http://xkcd.com/", number,'/') 16 | 17 | # creates a single element list argument of unparsed html 18 | x = scrape(url, parse=F) 19 | x = as.character(x[[1]]) # not necessary, but simplifies things a bit 20 | 21 | # find the point in the text matching the regex 22 | transcript = regexpr('', x) 23 | 24 | # writes what is sent to console (via cat) to a xkcd#.txt file 25 | sink(paste0('xkcdRScrape/xkcd', number, '.txt')) # open connection to a file of the name formed by paste0; create the folder first. 26 | cat(unlist(regmatches(x, transcript))) # send to the console the point specified by transcript in the html text 27 | sink() # close connection 28 | } 29 | 30 | library(parallel) 31 | cl = makeCluster(3) # nubmer of cores 32 | clusterEvalQ(cl, library(scrapeR)) # load scrapeR package on the cores 33 | clusterExport(cl, 'xkcdscrape') # export the function to the cores 34 | 35 | n = 1283 # number of xkcd comics by late Oct 2013 36 | 37 | # a standard non-parallel way to do it without an explicit loop and more straightforward/easier code 38 | # sapply(1:n, xkcdscrape) # each number 1:n is fed to the function 39 | 40 | # the parallized version (timed via system.time function) 41 | system.time({ 42 | parSapply(cl, 1:n, xkcdscrape) 43 | }) 44 | 45 | 46 | 47 | ### Example loop for comparison; slow 48 | # Check website for number published 49 | # n = 1283 50 | # library(scrapeR) 51 | # system.time({ 52 | # for (i in 1:n){ 53 | # url = paste0("http://xkcd.com/", i,'/') 54 | # x = scrape(url, parse=F) 55 | # transcript = regexpr('', x[[1]]) 56 | # sink(paste0('xkcdRScrape/xkcd', i)) 57 | # cat(regmatches(x[[1]], transcript)) 58 | # sink() 59 | # } 60 | # }) -------------------------------------------------------------------------------- /Other/xkcdscrape.py: -------------------------------------------------------------------------------- 1 | ##################################################################################### 2 | ### The following function will go to the xkcd website, scrape the html, look for ### 3 | ### a particular part based on a div tag, snag it, and write it out to a file for ### 4 | ### subsequent text analysis. ### 5 | ##################################################################################### 6 | 7 | ### Inspiration from: 8 | ### https://github.com/CabbagesAndKings/xkcd-Topics/blob/master/scripts/getTranscripts.sh 9 | ### See the R version in my repo. Being new to Python I thought this would be a nice challenge. 10 | ### I make no claims as to its efficiency. 11 | 12 | ############### 13 | ### Preface ### 14 | ############### 15 | import os 16 | os.getcwd() 17 | 18 | # change working directory to wherever you want the text files to be stored 19 | os.chdir('C:/Users/mclark19/Desktop/CSR/Clients/Me/xkcdscrape/') 20 | 21 | import time # for time comparisons if desired 22 | import urllib2 23 | import BeautifulSoup 24 | 25 | Soup = BeautifulSoup.BeautifulSoup 26 | 27 | ############### 28 | ### Example ### 29 | ############### 30 | ### read in web contents 31 | url = urllib2.urlopen("http://www.xkcd.com/1/") 32 | xkcd1 = url.read() 33 | 34 | ### create a beautifulSoup object 35 | out = Soup(xkcd1) 36 | 37 | ### Inspect in a standard html form 38 | print(out.prettify()) 39 | 40 | ### example of primary process: extract the div with id='transcript' 41 | out.find('div', {'id' : 'transcript'}) 42 | 43 | ###################################### 44 | ### Main Process via Standard Loop ### 45 | ###################################### 46 | 47 | ### Preliminaries 48 | # number of xkcd comics 49 | n = 1283 50 | 51 | # test n 52 | # n = 100 53 | 54 | ### set initial time 55 | t0 = time.time() 56 | 57 | ### Run the loop over all comics 58 | for i in xrange(1, n+1): 59 | filename = 'xkcdpyScrape/xkcd' + str(i) + '.txt' 60 | if i == 404: # this deals with an xkcd joke for comic #404 61 | out = " " 62 | else: 63 | # read in the web contents 64 | url = urllib2.urlopen('http://www.xkcd.com/' + str(i)) 65 | xkcd = url.read() 66 | 67 | # create a soup object 68 | xkcdsoup = Soup(xkcd) 69 | 70 | # scrape the div with id = 'transcript' 71 | out = xkcdsoup.find('div', {'id' : 'transcript'}) 72 | 73 | # Write out the file 74 | f = open(filename, 'w+') 75 | f.write(str(out)) 76 | f.close() 77 | 78 | time.time() - t0 # roughly 20 seconds for 100, about 4.5 minutes for all 79 | 80 | #################### 81 | ### Parallelized ### 82 | #################### 83 | 84 | ################################################################################## 85 | ### Create scrapexkcd function that will do what's in the loop above given a ### 86 | ### start and end point. See the loop above for details. Arguments include the ### 87 | ### start and end number specific to the comics one desires to download. ### 88 | ################################################################################## 89 | 90 | def scrapexkcd(start, end): 91 | import urllib2 92 | import BeautifulSoup 93 | for i in xrange(start, end): 94 | filename = 'xkcdpyScrape/parallel/xkcd' + str(i) + '.txt' 95 | if i == 404: 96 | out = " " 97 | else: 98 | url = urllib2.urlopen('http://www.xkcd.com/' + str(i)) 99 | xkcd = url.read() 100 | xkcdsoup = BeautifulSoup.BeautifulSoup(xkcd) 101 | out = xkcdsoup.find('div', {'id' : 'transcript'}) 102 | f = open(filename, 'w+') 103 | f.write(str(out)) 104 | f.close() 105 | 106 | start = 1 107 | end = 1283 + 1 108 | 109 | # test end 110 | end = 100 111 | 112 | import pp # for parallelization 113 | 114 | # set number of workers 115 | ncpus = 3 116 | 117 | # Create jobserver 118 | job_server = pp.Server(ncpus=ncpus) 119 | 120 | # Divide the task into subtasks 121 | parts = 3 122 | step = (end - start) / parts + 1 123 | 124 | jobs = [] 125 | 126 | t0 = time.time() 127 | 128 | for index in xrange(parts): 129 | # create start and endpoints for the function 130 | starti = start + index*step 131 | endi = min(start + (index+1)*step, end) 132 | # Submit a job which will scrape the site 133 | job_server.submit(scrapexkcd, (starti, endi)) 134 | 135 | #wait for jobs in all groups to finish 136 | job_server.wait() 137 | 138 | time.time() - t0 # time since t0 139 | 140 | # print stats 141 | job_server.print_stats() -------------------------------------------------------------------------------- /SC and TR/MLcode.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------------------------# 2 | # See http://www.nd.edu/~mclark19/learn/ML.pdf for the handout on machine # 3 | # learning this code regards. # 4 | #-------------------------------------------------------------------------------# 5 | 6 | sqerrloss = function(beta, X, y){ 7 | mu = X%*%beta 8 | sum((y-mu)^2) 9 | } 10 | 11 | set.seed(123) 12 | X = cbind(1, rnorm(100), rnorm(100)) 13 | y = rowSums(X[,-1] + rnorm(100)) 14 | out1 = optim(par=c(0,0,0), fn=sqerrloss, X=X, y=y) 15 | out2 = lm(y ~ X[,2] + X[,3]) # check with lm 16 | rbind(c(out1$par, out1$value), c(coef(out2),sum(resid(out2)^2)) ) 17 | 18 | 19 | 20 | 21 | sqerrloss_reg = function(beta, X, y, lambda=.1){ 22 | mu = X%*%beta 23 | sum((y-mu)^2) + lambda*sum(abs(beta[-1])) 24 | } 25 | 26 | out3 = optim(par=c(0,0,0), fn=sqerrloss_reg, X=X, y=y) 27 | rbind(c(out1$par, out1$value), 28 | c(coef(out2),sum(resid(out2)^2)), 29 | c(out3$par, out3$value) ) 30 | 31 | 32 | 33 | 34 | 35 | 36 | wine = read.csv('http://www.nd.edu/~mclark19/learn/data/goodwine.csv') 37 | summary(wine) 38 | 39 | 40 | 41 | library(doSNOW) 42 | registerDoSNOW(makeCluster(11, type = "SOCK")) 43 | 44 | 45 | 46 | library(corrplot) 47 | corrplot(cor(wine[,-c(13,15)]), method="number", tl.cex=.5) 48 | 49 | 50 | 51 | library(caret) 52 | set.seed(1234) #so that the indices will be the same when re-run 53 | trainIndices = createDataPartition(wine$good, p=.8, list=F) 54 | wanted = !colnames(wine) %in% c("free.sulfur.dioxide", "density", "quality", "color", "white" ) 55 | wine_train = wine[trainIndices, wanted] #remove quality and color, as well as density and others 56 | 57 | wine_test = wine[-trainIndices, wanted] 58 | # prep_test = preProcess(wine_test[,-10], method="range") 59 | # wine_test = data.frame(predict(prep_test, wine_test[,-10]), good=wine_test[ ,10]) 60 | 61 | 62 | 63 | wine_trainplot = predict(preProcess(wine_train[,-10], method="range"), 64 | wine_train[,-10]) 65 | featurePlot(wine_trainplot, wine_train$good, "box") 66 | 67 | 68 | 69 | set.seed(1234) 70 | cv_opts = trainControl(method="cv", number=10) 71 | knn_opts = data.frame(.k=c(seq(3, 11, 2), 25, 51, 101)) #odd to avoid ties 72 | results_knn = train(good~., data=wine_train, method="knn", 73 | preProcess="range", trControl=cv_opts, 74 | tuneGrid = knn_opts) 75 | 76 | results_knn 77 | 78 | 79 | 80 | preds_knn = predict(results_knn, wine_test[,-10]) 81 | confusionMatrix(preds_knn, wine_test[,10], positive='Good') 82 | conf_knn = confusionMatrix(preds_knn, wine_test[,10], positive='Good') #create an object to use in Sexpr 83 | 84 | 85 | 86 | dotPlot(varImp(results_knn)) 87 | 88 | 89 | 90 | set.seed(1234) 91 | results_nnet = train(good~., data=wine_train, method="avNNet", 92 | trControl=cv_opts, preProcess="range", 93 | tuneLength=5, trace=F, maxit=1000) 94 | results_nnet 95 | 96 | 97 | 98 | preds_nnet = predict(results_nnet, wine_test[,-10]) 99 | confusionMatrix(preds_nnet, wine_test[,10], positive='Good') 100 | conf_nnet = confusionMatrix(preds_nnet, wine_test[,10], positive='Good') #create an object to use in Sexpr 101 | 102 | 103 | 104 | set.seed(1234) 105 | rf_opts = data.frame(.mtry=c(2:6)) 106 | results_rf = train(good~., data=wine_train, method="rf", 107 | preProcess='range',trControl=cv_opts, tuneGrid=rf_opts, 108 | n.tree=1000) 109 | results_rf 110 | 111 | 112 | 113 | preds_rf = predict(results_rf, wine_test[,-10]) 114 | confusionMatrix(preds_rf, wine_test[,10], positive='Good') 115 | conf_rf = confusionMatrix(preds_rf, wine_test[,10], positive='Good') #create an object to use in Sexpr 116 | 117 | 118 | 119 | set.seed(1234) 120 | results_svm = train(good~., data=wine_train, method="svmLinear", 121 | preProcess="range", trControl=cv_opts, tuneLength=5) 122 | results_svm 123 | 124 | 125 | 126 | preds_svm = predict(results_svm, wine_test[,-10]) 127 | confusionMatrix(preds_svm, wine_test[,10], positive='Good') 128 | conf_svm = confusionMatrix(preds_svm, wine_test[,10], positive='Good') #create an object to use in Sexpr 129 | 130 | 131 | stopCluster() -------------------------------------------------------------------------------- /SC and TR/mixedModelML/mixedModelML.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModelML/mixedModelML.pdf -------------------------------------------------------------------------------- /SC and TR/mixedModelML/mixedModelML_files/figure-html/diag.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModelML/mixedModelML_files/figure-html/diag.html -------------------------------------------------------------------------------- /SC and TR/mixedModelML/mixedModelML_files/figure-html/fitCSgam-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModelML/mixedModelML_files/figure-html/fitCSgam-1.png -------------------------------------------------------------------------------- /SC and TR/mixedModelML/mixedModelML_files/figure-html/gamSleepStudy-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModelML/mixedModelML_files/figure-html/gamSleepStudy-1.png -------------------------------------------------------------------------------- /SC and TR/mixedModelML/mixedModelML_files/figure-html/gammSleepStudy-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModelML/mixedModelML_files/figure-html/gammSleepStudy-1.png -------------------------------------------------------------------------------- /SC and TR/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /SC and TR/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /SC and TR/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-3-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-3-2.png -------------------------------------------------------------------------------- /SC and TR/mixedModelML/mixedModelML_files/figure-latex/fitCSgam-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModelML/mixedModelML_files/figure-latex/fitCSgam-1.pdf -------------------------------------------------------------------------------- /SC and TR/mixedModelML/mixedModelML_files/figure-latex/gammSleepStudy-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModelML/mixedModelML_files/figure-latex/gammSleepStudy-1.pdf -------------------------------------------------------------------------------- /SC and TR/mixedModels/anovamixed.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/anovamixed.pdf -------------------------------------------------------------------------------- /SC and TR/mixedModels/growth_vs_mixed_files/figure-html/lavaanmod-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/growth_vs_mixed_files/figure-html/lavaanmod-1.png -------------------------------------------------------------------------------- /SC and TR/mixedModels/growth_vs_mixed_files/figure-html/randomIntsSlopesMods-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/growth_vs_mixed_files/figure-html/randomIntsSlopesMods-1.png -------------------------------------------------------------------------------- /SC and TR/mixedModels/growth_vs_mixed_files/figure-html/visualizeTrends-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/growth_vs_mixed_files/figure-html/visualizeTrends-1.png -------------------------------------------------------------------------------- /SC and TR/mixedModels/growth_vs_mixed_files/growthvsMixed_EstResults.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/growth_vs_mixed_files/growthvsMixed_EstResults.RData -------------------------------------------------------------------------------- /SC and TR/mixedModels/mixedModels.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/mixedModels.pdf -------------------------------------------------------------------------------- /SC and TR/mixedModels/mixedModels_files/figure-html/sleepModFits-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/mixedModels_files/figure-html/sleepModFits-1.png -------------------------------------------------------------------------------- /SC and TR/mixedModels/mixedModels_files/figure-html/sleepModFitsReduced-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/mixedModels_files/figure-html/sleepModFitsReduced-1.png -------------------------------------------------------------------------------- /SC and TR/mixedModels/mixedModels_files/figure-html/sleepstudyPlot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/mixedModels_files/figure-html/sleepstudyPlot-1.png -------------------------------------------------------------------------------- /SC and TR/mixedModels/mixedModels_files/figure-latex/sleepModFits-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/mixedModels_files/figure-latex/sleepModFits-1.pdf -------------------------------------------------------------------------------- /SC and TR/mixedModels/mixedModels_files/figure-latex/sleepModFitsReduced-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/mixedModels_files/figure-latex/sleepModFitsReduced-1.pdf -------------------------------------------------------------------------------- /SC and TR/mixedModels/mixedModels_files/figure-latex/sleepstudyPlot-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/mixedModels_files/figure-latex/sleepstudyPlot-1.pdf -------------------------------------------------------------------------------- /SC and TR/mixedModels/mixedModels_files/figure-markdown_github/sleepModFits-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/mixedModels_files/figure-markdown_github/sleepModFits-1.png -------------------------------------------------------------------------------- /SC and TR/mixedModels/mixedModels_files/figure-markdown_github/sleepModFitsReduced-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/mixedModels_files/figure-markdown_github/sleepModFitsReduced-1.png -------------------------------------------------------------------------------- /SC and TR/mixedModels/mixedModels_files/figure-markdown_github/sleepstudyPlot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/mixedModels_files/figure-markdown_github/sleepstudyPlot-1.png -------------------------------------------------------------------------------- /SC and TR/mixedModels/pearlpic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/pearlpic.png -------------------------------------------------------------------------------- /other.css: -------------------------------------------------------------------------------- 1 | .emph, em { 2 | color: #E32D00 ; /*#ff5500 #D14300*/ 3 | font-style: normal; 4 | font-weight: 450; 5 | } 6 | 7 | strong { 8 | color: #404040; 9 | } 10 | 11 | 12 | .pack { 13 | color: #1f65b7; /* #990071 #AC9CFF #e41a1c*/ 14 | font-weight: 400; 15 | } 16 | 17 | .func { 18 | color: #007020; /*#007199 #00CBB6; #984ea3; can just use `` instead*/ 19 | font-weight: 400; 20 | } 21 | 22 | .objclass { 23 | color: #947100; /*#AAB400 #4daf4a; #FFC5D0*/ 24 | font-weight: 400; 25 | } 26 | 27 | a { 28 | color: #1e90ff; /*dodgerblue*/ 29 | } 30 | 31 | pre { 32 | box-sizing: border-box; 33 | left: 0; /* This changes where the code chunk box actually starts */ 34 | /* padding: 10px 0 10px 60px; /* Change the last value here to move the text left or right */ 35 | position: relative; 36 | width: 100%; /* This changes where the code chunk box ends on the right side */ 37 | font-size: 75%; /*changes output size; and comments*/ 38 | /*border: 10px solid #ff5500;*/ /*code block and results border*/ 39 | /*background-color:#ff5500;*/ /* results background*/ 40 | 41 | } 42 | 43 | #TOC{ 44 | font-size: 75%; 45 | } 46 | 47 | } 48 | 49 | --------------------------------------------------------------------------------