├── .gitattributes
├── .gitignore
├── Miscellaneous-R-Code.Rproj
├── ModelFitting
    ├── Bayesian
    │   ├── StanBugsJags
    │   │   ├── BetaMixedModel.stan
    │   │   ├── IRT_models
    │   │   │   ├── IRT_2PM.stan
    │   │   │   ├── IRT_3PM.stan
    │   │   │   ├── IRT_4PM.stan
    │   │   │   ├── IRT_rasch.stan
    │   │   │   ├── ReadMe.md
    │   │   │   └── test_stan_irt.R
    │   │   ├── MixedModelGeneric.stan
    │   │   ├── MixedModelSleepstudy_withREcorrelation.stan
    │   │   ├── ReadMe.md
    │   │   ├── cfa
    │   │   ├── jags_MixedModelBetaRegression.jags
    │   │   ├── linregwithprior.stan
    │   │   ├── lmbugs.bugs
    │   │   ├── lmjags.jags
    │   │   ├── multilevelmediation.stan
    │   │   ├── multilevelmediationYuanMacKinnon2009.bugs
    │   │   └── singlelevelmediationYuanMacKinnon2009.bugs
    │   ├── bugs_linreg.R
    │   ├── horseshoe
    │   │   ├── README.md
    │   │   ├── example.R
    │   │   ├── wei_bg.stan
    │   │   ├── wei_bg_joint.stan
    │   │   ├── wei_gau.stan
    │   │   ├── wei_gau_joint.stan
    │   │   ├── wei_hs.stan
    │   │   ├── wei_hs_joint.stan
    │   │   ├── wei_lap.stan
    │   │   └── wei_lap_joint.stan
    │   ├── jags_MixedModelBetaRegression.R
    │   ├── jags_linreg.R
    │   ├── multinomial
    │   │   ├── README.md
    │   │   ├── mnl_1.stan
    │   │   ├── mnl_2.stan
    │   │   ├── mnl_3.stan
    │   │   ├── mnl_4.stan
    │   │   ├── multinomial_stan_comparisons.R
    │   │   ├── stan_basic_mnl_conceptual.stan
    │   │   └── stan_mnl_altspecific.stan
    │   ├── r2bugs.distributions.R
    │   ├── rstanBetaRegression.R
    │   ├── rstan_MixedModelBetaRegression.R
    │   ├── rstan_MixedModelSleepstudy.R
    │   ├── rstan_MixedModelSleepstudy_withREcorrelation.R
    │   ├── rstan_MixtureModel.R
    │   ├── rstan_linregwithprior.R
    │   ├── rstan_multilevelMediation.R
    │   ├── rstan_topicModels0.R
    │   ├── rstant_testBEST.R
    │   ├── stochasticVolatility.R
    │   ├── topicModelgibbs.R
    │   ├── variationalBayesRegression.R
    │   └── variationalBayesRegression.Rmd
    ├── EM Examples
    │   ├── EM Mixture MV.R
    │   ├── EM Mixture.R
    │   ├── EM algorithm for ppca with missing.R
    │   ├── EM algorithm for ppca.R
    │   ├── EM algorithm for probit example.R
    │   ├── EM for pca.R
    │   └── EM for state space unobserved components.R
    ├── Mixed Models
    │   ├── mixedModelML
    │   │   ├── mixedModelML.R
    │   │   ├── mixedModelML.Rmd
    │   │   ├── mixedModelML.html
    │   │   ├── mixedModelML.md
    │   │   ├── mixedModelML.pdf
    │   │   └── mixedModelML_files
    │   │   │   ├── figure-html
    │   │   │       ├── diag.html
    │   │   │       ├── fitCSgam-1.png
    │   │   │       ├── gamSleepStudy-1.png
    │   │   │       ├── gammSleepStudy-1.png
    │   │   │       ├── unnamed-chunk-1-1.png
    │   │   │       ├── unnamed-chunk-3-1.png
    │   │   │       └── unnamed-chunk-3-2.png
    │   │   │   └── figure-latex
    │   │   │       ├── fitCSgam-1.pdf
    │   │   │       └── gammSleepStudy-1.pdf
    │   ├── mixedModels
    │   │   ├── anovamixed.Rmd
    │   │   ├── anovamixed.html
    │   │   ├── anovamixed.pdf
    │   │   ├── growthCurvevsMixedModel.R
    │   │   ├── growth_vs_mixed.Rmd
    │   │   ├── growth_vs_mixed.html
    │   │   ├── growth_vs_mixed_files
    │   │   │   ├── figure-html
    │   │   │   │   ├── lavaanmod-1.png
    │   │   │   │   ├── randomIntsSlopesMods-1.png
    │   │   │   │   └── visualizeTrends-1.png
    │   │   │   └── growthvsMixed_EstResults.RData
    │   │   ├── growth_vs_mixed_sim.Rmd
    │   │   ├── growth_vs_mixed_sim.html
    │   │   ├── growth_vs_mixed_sim.md
    │   │   ├── mixedModels.Rmd
    │   │   ├── mixedModels.md
    │   │   ├── mixedModels.pdf
    │   │   └── mixedModels_files
    │   │   │   ├── figure-html
    │   │   │       ├── sleepModFits-1.png
    │   │   │       ├── sleepModFitsReduced-1.png
    │   │   │       └── sleepstudyPlot-1.png
    │   │   │   ├── figure-latex
    │   │   │       ├── sleepModFits-1.pdf
    │   │   │       ├── sleepModFitsReduced-1.pdf
    │   │   │       └── sleepstudyPlot-1.pdf
    │   │   │   └── figure-markdown_github
    │   │   │       ├── sleepModFits-1.png
    │   │   │       ├── sleepModFitsReduced-1.png
    │   │   │       └── sleepstudyPlot-1.png
    │   ├── one_factor_RE.R
    │   ├── one_factor_RE.jl
    │   ├── one_factor_RE.m
    │   ├── twofactorRE.R
    │   ├── twofactorRE.jl
    │   └── twofactorRE.m
    ├── NBzeroinfl.R
    ├── RKHSReg
    │   ├── RKHSReg.R
    │   ├── RKHSReg.md
    │   └── update.Rmd
    ├── bivariateProbit.R
    ├── cfa_ml.R
    ├── convert_code_to_html.R
    ├── crp.R
    ├── cubicsplines.R
    ├── elm.R
    ├── gp Examples
    │   ├── README.md
    │   ├── gaussSample.m
    │   ├── gaussianProcessStan.R
    │   ├── gaussianProcessStan.Rmd
    │   ├── gaussianprocessNoiseFree.R
    │   ├── gaussianprocessNoisy.R
    │   ├── gpStanModelCode.stan
    │   ├── gpStanModelCode_gammaExponential.stan
    │   ├── gpStanModelCode_generalizedSquaredExponential.stan
    │   ├── gpStanModelCode_rationalQuadratic.stan
    │   ├── gpStan_squaredExponentialFactorAnalysis.R
    │   ├── gpStan_squaredExponentialFactorAnalysis.stan
    │   ├── gprDemoChangeHparams.m
    │   ├── gprDemoNoiseFree.m
    │   └── stangp.RData
    ├── gradient_descent.R
    ├── heckman_selection.R
    ├── hmm_viterbi.R
    ├── hmm_viterbi.py
    ├── hurdle.R
    ├── ipw.R
    ├── lasso.R
    ├── markov_model.R
    ├── multinomial.R
    ├── naivebayes.R
    ├── nelder_mead.R
    ├── nelder_mead.ipynb
    ├── nelder_mead.py
    ├── newton_irls.R
    ├── one_line_models.R
    ├── ordinal_regression.R
    ├── penalized_ML.R
    ├── poiszeroinfl.R
    ├── quantile_regression.Rmd
    ├── quantile_regression.html
    ├── ridge.R
    ├── standard_lm.R
    ├── standard_logistic.R
    ├── stochastic_gradient_descent.R
    ├── survivalCox.R
    └── tobit.R
├── Other
    ├── Programming_Shenanigans
    │   ├── compoundInterest.R
    │   ├── fillbyLastRecursive.R
    │   ├── fizzbuzz.R
    │   ├── fizzbuzz.jl
    │   ├── fizzbuzz.py
    │   ├── matrixOperations.Rmd
    │   ├── matrixOperations.html
    │   ├── matrixOperations.md
    │   ├── matrixOperations_files
    │   │   └── figure-html
    │   │   │   └── unnamed-chunk-16-1.png
    │   ├── stringReverseRecursively.R
    │   ├── stringReverseRecursively.py
    │   ├── wordWrap.R
    │   └── wordWrap.py
    ├── Python Startup
    │   ├── mixedModel.py
    │   └── regressionExamples.py
    ├── getRollCall.R
    ├── ggtheme.R
    ├── shakespeareanInsulter.R
    ├── spurriousCorrelationwithRatios.R
    ├── xkcdscrape.R
    └── xkcdscrape.py
├── README.md
├── SC and TR
    ├── GAMS.R
    ├── MLcode.R
    ├── coursecode.r
    ├── mixedModelML
    │   ├── mixedModelML.R
    │   ├── mixedModelML.Rmd
    │   ├── mixedModelML.html
    │   ├── mixedModelML.md
    │   ├── mixedModelML.pdf
    │   └── mixedModelML_files
    │   │   ├── figure-html
    │   │       ├── diag.html
    │   │       ├── fitCSgam-1.png
    │   │       ├── gamSleepStudy-1.png
    │   │       ├── gammSleepStudy-1.png
    │   │       ├── unnamed-chunk-1-1.png
    │   │       ├── unnamed-chunk-3-1.png
    │   │       └── unnamed-chunk-3-2.png
    │   │   └── figure-latex
    │   │       ├── fitCSgam-1.pdf
    │   │       └── gammSleepStudy-1.pdf
    └── mixedModels
    │   ├── anovamixed.Rmd
    │   ├── anovamixed.html
    │   ├── anovamixed.pdf
    │   ├── growthCurvevsMixedModel.R
    │   ├── growth_vs_mixed.Rmd
    │   ├── growth_vs_mixed.html
    │   ├── growth_vs_mixed_files
    │       ├── figure-html
    │       │   ├── lavaanmod-1.png
    │       │   ├── randomIntsSlopesMods-1.png
    │       │   └── visualizeTrends-1.png
    │       └── growthvsMixed_EstResults.RData
    │   ├── growth_vs_mixed_sim.Rmd
    │   ├── growth_vs_mixed_sim.html
    │   ├── growth_vs_mixed_sim.md
    │   ├── mixedModels.Rmd
    │   ├── mixedModels.md
    │   ├── mixedModels.pdf
    │   ├── mixedModels_files
    │       ├── figure-html
    │       │   ├── sleepModFits-1.png
    │       │   ├── sleepModFitsReduced-1.png
    │       │   └── sleepstudyPlot-1.png
    │       ├── figure-latex
    │       │   ├── sleepModFits-1.pdf
    │       │   ├── sleepModFitsReduced-1.pdf
    │       │   └── sleepstudyPlot-1.pdf
    │       └── figure-markdown_github
    │       │   ├── sleepModFits-1.png
    │       │   ├── sleepModFitsReduced-1.png
    │       │   └── sleepstudyPlot-1.png
    │   └── pearlpic.png
└── other.css


/.gitattributes:
--------------------------------------------------------------------------------
 1 | *.html linguist-vendored
 2 | *.stan linguist-language=Stan
 3 | 
 4 | 
 5 | # Source files
 6 | # ============
 7 | *.Rdata text
 8 | *.rdb binary
 9 | *.rds binary
10 | *.Rd text
11 | *.Rdx binary
12 | *.Rmd text
13 | *.R text
14 | *.stan text


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | *_cache
5 | .DS_Store


--------------------------------------------------------------------------------
/Miscellaneous-R-Code.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/StanBugsJags/BetaMixedModel.stan:
--------------------------------------------------------------------------------
 1 | //see rstan_MixedModelBetaRegression.R
 2 | data {                              
 3 |   int<lower=0> N;                   // number of observations
 4 |   int<lower=1> L;                   // number of batches
 5 |   vector[N] yield;                  // response
 6 |   int<lower=1,upper=L> id[N];       // batch
 7 |   vector[N] temp;                   // temperature
 8 | }
 9 | 
10 | transformed data {
11 |   vector[N] tempCen;
12 |   tempCen = temp - mean(temp);     // centered explanatory variable
13 | }
14 | 
15 | parameters {
16 |   real Intercept;                   // "fixed" effects
17 |   real betaTemp;
18 | 
19 |   real<lower=0> phi;                // dispersion parameter
20 | 
21 |   real<lower=0> sd_int;             // sd for ints
22 |   real<lower=0> sd_beta;            // sd for temp
23 | 
24 |   vector[L] gammaIntercept;         // individual effects
25 |   vector[L] gammaTemp;              // individual effects
26 | }
27 | 
28 | transformed parameters{
29 |   vector<lower=0>[N] A;             // parameter for beta distn
30 |   vector<lower=0>[N] B;             // parameter for beta distn
31 |   vector<lower=0,upper=1>[N] yhat;  // transformed linear predictor
32 |   vector[L] IntRE;
33 |   vector[L] SlopeRE;
34 | 
35 |   for (l in 1:L){
36 |     IntRE[l] = gammaIntercept[l]*sd_int;
37 |     SlopeRE[l] = gammaTemp[l]*sd_beta ;
38 |   }
39 |   
40 |   // model calculations
41 |   for(n in 1:N) {
42 |     yhat[n] = inv_logit((IntRE[id[n]] + Intercept) + (SlopeRE[id[n]] + betaTemp) * tempCen[n]);   
43 |   }
44 |   
45 |   A = yhat * phi;           
46 |   B = (1.0-yhat) * phi;     
47 | }
48 | 
49 | model {
50 |   // priors
51 |   Intercept ~ normal(0, 10);
52 |   betaTemp ~ normal(0, 1);          
53 |   
54 |   sd_int ~ cauchy(0, 2.5);        
55 |   sd_beta ~ cauchy(0, 2.5);        
56 |   phi ~ cauchy(0, 5);         
57 |                                     // matt trick used for following; 
58 |                                     // else slower and convergence issues
59 |   gammaIntercept ~ normal(0, 1);    // random intercepts for each batch
60 |   gammaTemp ~ normal(0, 1);         // random slopes for each batch
61 | 
62 |   // likelihood
63 |   yield ~ beta(A, B);
64 | }


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/StanBugsJags/IRT_models/IRT_2PM.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int N;
 3 |   int J;
 4 |   int Y[N,J];
 5 | }
 6 | 
 7 | transformed data{
 8 |   
 9 | }
10 | 
11 | parameters {
12 |   vector[J] difficulty;
13 |   vector<lower=0>[J] discrim;
14 |   vector[N] Z;
15 | }
16 | 
17 | transformed parameters {
18 |   
19 | }
20 | 
21 | model {
22 |   matrix[N, J] lmat;
23 |   
24 |   # priors
25 |   Z ~ normal(0, 1);
26 |   discrim ~ student_t(3, 0, 5);
27 |   difficulty ~ student_t(3, 0, 5);
28 | 
29 |   
30 |   
31 |   for (j in 1:J){
32 |     lmat[,j] = discrim[j] * (Z - difficulty[j]);
33 |   }
34 |   
35 |   
36 |   // likelihood
37 |   for (j in 1:J)  Y[,j] ~ bernoulli_logit(lmat[,j]);
38 |   
39 | }
40 | 
41 | generated quantities {
42 |   
43 | }
44 | 


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/StanBugsJags/IRT_models/IRT_3PM.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int N;
 3 |   int J;
 4 |   int Y[N,J];
 5 | }
 6 | 
 7 | transformed data{
 8 |   
 9 | }
10 | 
11 | parameters {
12 |   vector[J] difficulty;
13 |   vector<lower=0>[J] discrim;
14 |   vector<lower=0, upper=.25>[J] guess;
15 |   vector[N] Z;
16 | }
17 | 
18 | transformed parameters {
19 |   
20 | }
21 | 
22 | model {
23 |   matrix[N, J] pmat;
24 |   
25 |   # priors
26 |   Z ~ normal(0, 1);
27 |   discrim ~ student_t(3, 0, 5);
28 |   difficulty ~ student_t(3, 0, 5);
29 |   guess ~ beta(1, 19);
30 | 
31 |   
32 |   for (j in 1:J){
33 |     pmat[,j] = guess[j] + (1 - guess[j]) * inv_logit(discrim[j] * (Z - difficulty[j]));
34 |   }
35 | 
36 |   
37 |   // likelihood
38 |   for (j in 1:J)  Y[,j] ~ bernoulli(pmat[,j]);
39 |   
40 | }
41 | 
42 | generated quantities {
43 |   
44 | }
45 | 


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/StanBugsJags/IRT_models/IRT_4PM.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int N;
 3 |   int J;
 4 |   int Y[N,J];
 5 | }
 6 | 
 7 | transformed data{
 8 |   
 9 | }
10 | 
11 | parameters {
12 |   vector[J] difficulty;
13 |   vector<lower=0>[J] discrim;
14 |   vector<lower=0, upper=.25>[J] guess;
15 |   vector<lower=.95, upper=1>[J] ceiling;
16 |   vector[N] Z;
17 | }
18 | 
19 | transformed parameters {
20 |   
21 | }
22 | 
23 | model {
24 |   matrix[N, J] pmat;
25 |   
26 |   # priors
27 |   Z ~ normal(0, 1);
28 |   discrim ~ student_t(3, 0, 5);
29 |   difficulty ~ student_t(3, 0, 5);
30 |   guess ~ beta(1, 19);
31 |   ceiling ~ beta(49, 1);
32 | 
33 |   
34 |   for (j in 1:J){
35 |     pmat[,j] = guess[j] + (ceiling[j] - guess[j]) * inv_logit(discrim[j] * (Z - difficulty[j]));
36 |   }
37 |   
38 |   
39 |   // likelihood
40 |   for (j in 1:J)  Y[,j] ~ bernoulli(pmat[,j]);
41 |   
42 | }
43 | 
44 | generated quantities {
45 |   
46 | }
47 | 


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/StanBugsJags/IRT_models/IRT_rasch.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int N;
 3 |   int J;
 4 |   int Y[N,J];
 5 | }
 6 | 
 7 | transformed data{
 8 |   
 9 | }
10 | 
11 | parameters {
12 |   vector[J] difficulty;
13 |   real<lower=0> discrim;
14 |   vector[N] Z;
15 | }
16 | 
17 | transformed parameters {
18 |   
19 | }
20 | 
21 | model {
22 |   matrix[N, J] lmat;
23 |   
24 |   # priors
25 |   Z ~ normal(0, 1);
26 |   discrim ~ student_t(3, 0, 5);
27 |   difficulty ~ student_t(3, 0, 5);
28 | 
29 |   
30 |   
31 |   for (j in 1:J){
32 |     lmat[,j] = discrim * (Z - difficulty[j]);
33 |   }
34 |   
35 |   
36 |   // likelihood
37 |   for (j in 1:J)  Y[,j] ~ bernoulli_logit(lmat[,j]);
38 |   
39 | }
40 | 
41 | generated quantities {
42 |   
43 | }
44 | 


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/StanBugsJags/IRT_models/ReadMe.md:
--------------------------------------------------------------------------------
1 | These are the basic 1 through 4 parameter IRT models with Stan, and comparisons to models from ltm and sirt packages in R in the test_stan_irt.R file. 
2 | 
3 | Some of these standard models are now easily implemented in [brms](https://cran.r-project.org/web/packages/brms/vignettes/brms_nonlinear.html#advanced-item-response-models).
4 | 


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/StanBugsJags/IRT_models/test_stan_irt.R:
--------------------------------------------------------------------------------
 1 | library(tidyverse); library(rstan); library(shinystan); library(ltm)
 2 | 
 3 | # Data setup
 4 | data(Abortion, package='ltm')
 5 | Abortion_ = sapply(Abortion, as.numeric)
 6 | datalist = list(Y=Abortion_, N=nrow(Abortion_), J=ncol(Abortion_), Model=1)
 7 | 
 8 | # 1PM
 9 | test_rasch = stan(file='IRT_rasch.stan', data=datalist, iter=10)
10 | real_rasch = stan(fit=test_rasch, data=datalist, cores=4, iter=3000, warmup=1000, thin=8)
11 | 
12 | print(real_rasch, par=c('difficulty', 'discrim'), digits=3)
13 | 
14 | # rasch(Abortion, constraint=cbind(ncol(Abortion) + 1, 1))
15 | rasch(Abortion)
16 | 
17 | # std of random effect === discrimination; coefs are comparable to rasch(..., IRT.param=FALSE)
18 | # Abortion %>% 
19 | #   mutate(Subject=1:nrow(Abortion)) %>% 
20 | #   gather(key=Item, value=Response, -Subject) %>% 
21 | #   lme4::glmer(Response ~ -1 + Item + (1|Subject), data=., family=binomial)
22 | # or
23 | # brms::brm(Response ~ -1 + Item + (1|Subject), data=., family=bernoulli)
24 | 
25 | 
26 | # launch_shinystan(real_rasch)
27 | 
28 | 
29 | # 2PM
30 | test_2pm = stan(file='IRT_2PM.stan', data=datalist, iter=10)
31 | real_2pm = stan(fit=test_2pm, data=datalist, cores=4, iter=3000, warmup=1000, thin=8)
32 | 
33 | print(real_2pm, par=c('difficulty', 'discrim'), digits=3)
34 | 
35 | ltm(Abortion ~ z1)
36 | # launch_shinystan(real_2pm)
37 | 
38 | 
39 | # 3PM
40 | test_3pm = stan(file='IRT_3PM.stan', data=datalist, iter=10)
41 | real_3pm = stan(fit=test_3pm, data=datalist, cores=4, iter=4000, warmup=2000, thin=8)
42 | 
43 | print(real_3pm, par=c('difficulty', 'discrim', 'guess'))
44 | 
45 | tpm(Abortion)
46 | # launch_shinystan(real_3pm)
47 | 
48 | 
49 | # 4PM
50 | test_4pm = stan(file='IRT_4PM.stan', data=datalist, iter=10)
51 | real_4pm = stan(fit=test_4pm, data=datalist, cores=4, iter=4000, warmup=2000, thin=8)
52 | 
53 | library(sirt)
54 | compare4pm = rasch.mml2(Abortion, est.a = 1:4 , est.c=1:4 , est.d = 1:4,
55 |                        min.a=0, min.b=-3, max.b=3, min.d = .95 , max.c = .25)  # so nice to have no verbose option!
56 | compare4pm$item[, c('a','b','c','d')] # a = discrim, b = diff, c = guess, d = ceiling
57 | print(real_4pm, par=c('difficulty', 'discrim', 'guess', 'ceiling'))
58 | 
59 | # launch_shinystan(real_4pm)
60 | 


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/StanBugsJags/MixedModelGeneric.stan:
--------------------------------------------------------------------------------
 1 | data {                                           
 2 |   int<lower=1> N;                                # sample size
 3 |   int <lower=1> Ncol_x;                          # number of columns of teh covariate matrix
 4 |   int<lower=1> I;                                # number of groups
 5 |   vector[N] y;                                   # target variable: center or standardize
 6 |   matrix[N, Ncol_x] X;                           # covariate matrix: center or standardize; currently set up for one covariate
 7 |   int<lower=1,upper=I> Group[N];                 # grouping factor
 8 | }
 9 | 
10 | transformed data {
11 |   int <lower=1> P;
12 | //   matrix[N,Ncol_x+1] MM;                      # in case you want to work with a model matrix
13 | //   
14 | //   MM = append_col(rep_vector(1, N), X);
15 | //   P = cols(MM);
16 |   P = Ncol_x + 1;
17 | }
18 | 
19 | parameters {
20 |   vector[P] fixefs;                              # fixed effects
21 |   real<lower=0> sigma_int_fe;                    # fixed effects scales
22 |   real<lower=0> sigma_beta_fe;                      
23 |   cholesky_factor_corr[P] Omega_FE_chol;         # correlation matrix for fixed effects (chol decomp)
24 |   
25 |   real<lower=0> sigma_int_re;                    # random effects scales
26 |   real<lower=0> sigma_beta_re;
27 |   real<lower=0> sigma_int_re_hyper;              # random effects scales hyperprior
28 |   real<lower=0> sigma_beta_re_hyper;
29 | 
30 |   vector[2] gamma[I];                            # individual effects; array allows vectorized multinormal
31 |   cholesky_factor_corr[2] Omega_RE_chol;         # correlation matrix for random intercepts and slopes (chol decomp)
32 |   
33 |   real<lower=0> sigma_y;                         # residual sd
34 | }
35 | 
36 | transformed parameters {
37 |   real Intercept;
38 |   real beta;
39 | 
40 |   Intercept = fixefs[1];
41 |   beta = fixefs[2];
42 | } 
43 | 
44 | model {
45 |   matrix[P,P] DC_fe;
46 |   matrix[2,2] DC_re;
47 |   vector[P] sigma_fe;
48 |   vector[2] sigma_re;
49 |   vector[N] LP;                                  # Linear predictor
50 | 
51 |   # priors
52 |   Omega_FE_chol ~  lkj_corr_cholesky(2.0); 
53 |   Omega_RE_chol ~  lkj_corr_cholesky(2.0); 
54 |   
55 |   ### hyperpriors
56 |   sigma_int_re_hyper ~ exponential(.2);
57 |   sigma_beta_re_hyper ~ exponential(.2);
58 | 
59 |   ### priors
60 |   sigma_int_fe ~ cauchy(0, 2.5);        
61 |   sigma_beta_fe ~ cauchy(0, 2.5); 
62 |   
63 |   sigma_int_re ~ cauchy(0, sigma_int_re_hyper);        
64 |   sigma_beta_re ~ cauchy(0, sigma_beta_re_hyper); 
65 | 
66 |   sigma_y ~ cauchy(0, 2.5);                      
67 |   
68 |   # multivariate draw for fixed effects
69 |   sigma_fe[1] = sigma_int_fe;
70 |   sigma_fe[2] = sigma_beta_fe;
71 |   DC_fe = diag_pre_multiply(sigma_fe, Omega_FE_chol);
72 | 
73 |   fixefs ~ multi_normal_cholesky(rep_vector(0, 2), DC_fe);
74 |   
75 |   
76 |   # multivariate draw for random effects
77 |   sigma_re[1] = sigma_int_re;
78 |   sigma_re[2] = sigma_beta_re;
79 |   DC_re = diag_pre_multiply(sigma_re, Omega_RE_chol);
80 |   
81 |   gamma ~ multi_normal_cholesky(fixefs, DC_re);
82 |   
83 |   
84 |   # likelihood
85 |   for (n in 1:N){                          
86 |     # LP[n] = MM[n] * betavec;                  # tried matrix approach, but it was actually 2-3 times slower
87 |     LP[n] = gamma[Group[n],1] + gamma[Group[n],2] * X[n,1];
88 |   }
89 |   y ~ normal(LP, sigma_y);
90 | }
91 | 
92 | generated quantities {
93 |   corr_matrix[P] Omega_FE;                       # correlation of FE
94 |   corr_matrix[2] Omega_RE;                       # correlation of RE
95 | 
96 |   Omega_FE = tcrossprod(Omega_FE_chol);
97 |   Omega_RE = tcrossprod(Omega_RE_chol);
98 | }


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/StanBugsJags/MixedModelSleepstudy_withREcorrelation.stan:
--------------------------------------------------------------------------------
 1 | data {                                    // data setup
 2 |   int<lower=1> N;                         // sample size
 3 |   int<lower=1> I;                         // number of subjects
 4 |   vector<lower=0>[N] RT;                  // Response: reaction time
 5 |   vector<lower=0>[N] Days;                // Days in study
 6 |   int<lower=1,upper=I> Subject[N];        // Subject
 7 | }
 8 | 
 9 | transformed data {
10 |   real IntBase;
11 |   real RTsd;
12 |   
13 |   IntBase = mean(RT);                    // Intercept starting point
14 |   RTsd = sd(RT);
15 | }
16 | 
17 | parameters {
18 |   real Intercept01;                       // fixed effects
19 |   real beta01;
20 |   vector<lower=0>[2] sigma_u;             // sd for ints and slopes
21 |   real<lower=0> sigma_y;                  // residual sd
22 |   vector[2] gamma[I];                     // individual effects
23 |   cholesky_factor_corr[2] Omega_chol;     // correlation matrix for random intercepts and slopes (chol decomp)
24 | }
25 | 
26 | transformed parameters {
27 |   vector[I] gammaIntercept;               // individual effects (named)
28 |   vector[I] gammaDays;
29 |   real Intercept;
30 |   real beta;
31 | 
32 |   Intercept = IntBase + Intercept01 * RTsd;
33 |   beta = beta01 * 10;
34 | 
35 |   for (i in 1:I){  
36 |     gammaIntercept[i]  = gamma[i,1];
37 |     gammaDays[i] = gamma[i,2];
38 |   }
39 | 
40 | } 
41 | 
42 | model {
43 |   matrix[2,2] D;
44 |   matrix[2,2] DC;
45 |   vector[N] yhat;                         // Linear predictor
46 |   vector[2] mu;                           // vector of Intercept and beta
47 | 
48 |   D = diag_matrix(sigma_u);
49 |   mu[1] = Intercept;
50 |   mu[2] = beta;
51 | 
52 |   // priors
53 |   Intercept01 ~ normal(0, 1);             // example of weakly informative priors;
54 |   beta01 ~ normal(0, 1);                  // remove to essentially duplicate lme4 via improper prior
55 | 
56 |   Omega_chol ~  lkj_corr_cholesky(2.0); 
57 | 
58 |   sigma_u ~ cauchy(0, 2.5);               // prior for RE scale
59 |   sigma_y ~ cauchy(0, 2.5);               // prior for residual scale
60 | 
61 |   DC = D * Omega_chol;
62 | 
63 |   for (i in 1:I)                          // loop for Subject random effects
64 |     gamma[i] ~ multi_normal_cholesky(mu, DC);
65 | 
66 |   // likelihood
67 |   for (n in 1:N)                          
68 |     yhat[n] = gammaIntercept[Subject[n]] + gammaDays[Subject[n]] * Days[n];
69 | 
70 |   RT ~ normal(yhat, sigma_y);
71 | }
72 | 
73 | generated quantities {
74 |   matrix[2,2] Omega;                      // correlation of RE
75 |   
76 |   Omega = tcrossprod(Omega_chol);
77 | }


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/StanBugsJags/ReadMe.md:
--------------------------------------------------------------------------------
1 | Read Me
2 | -------------
3 | 
4 | All of these can be found in context within their respective .R counterparts.  But as there is usually a lot of other (R specific) things going on, I separate them out here for easier inspection.


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/StanBugsJags/cfa:
--------------------------------------------------------------------------------
 1 | data{
 2 |   int<lower=1> N;                             // sample size
 3 |   int<lower=1> P;                             // number of variables
 4 |   int<lower=1> K;                             // number of factors
 5 |   matrix[N,P]  X;                             // data matrix of order [N,P]
 6 | }
 7 | 
 8 | transformed data{
 9 |   int<lower=1> L;
10 |   L <- P-K;                                   // Number of loadings
11 | }
12 | 
13 | 
14 | parameters{
15 |   vector[P] b;                                // intercepts
16 |   vector[L] lambda01;                         // initial factor loadings
17 |   matrix[N,K] FS;                             // factor scores, matrix of order [N,K]
18 |   corr_matrix[K] phi;                         // factor correlations
19 |   vector<lower=0, upper=pi()/2>[K] sdFactorsUnif;
20 |   vector<lower=0, upper=pi()/2>[P] sdXUnif;      
21 |   //vector<lower=0, upper=pi()/2>[L] sdLambdaUnif;
22 | }
23 | 
24 | transformed parameters{
25 | }
26 | 
27 | model{
28 |   matrix[N,P] mu;
29 |   matrix[K,K] Ld;
30 |   vector[K] muFactors;
31 |   vector[L] lambda;                           // factor loadings
32 |   vector[K] sdFactors;               // sd of factors
33 |   vector[P] sdX;                     // variance for each variable
34 |   //vector<lower=0>[L] sdLambda;                // hyperprior variance for loading estimates
35 |   
36 |   // the following will equate the sd to (half) cauchy(0, 2.5)
37 |   for (k in 1:K) sdFactors[k] <- 0 + 2.5*tan(sdFactorsUnif[k]);
38 |   for (p in 1:P) sdX[p] <- 0 + 2.5*tan(sdXUnif[p]);
39 |   // for (l in 1:L) sdLambda[l] <- 0 + 2.5*tan(sdLambdaUnif[l]);
40 |   
41 |   lambda <- lambda01 * 5;
42 |   
43 |   muFactors <- rep_vector(0,K);               // Factor means, set to zero
44 |   Ld <- diag_matrix(sdFactors) * cholesky_decompose(phi);
45 | 
46 |   for(n in 1:N){
47 |     mu[n,1]  <- b[1] + FS[n,1];               // Agree
48 |     mu[n,2]  <- b[2] + FS[n,1]*lambda[1];
49 |     mu[n,3]  <- b[3] + FS[n,1]*lambda[2];
50 |     mu[n,4]  <- b[4] + FS[n,1]*lambda[3];
51 |     mu[n,5]  <- b[5] + FS[n,1]*lambda[4];
52 | 
53 |     mu[n,6]  <- b[6] + FS[n,2];               // Consc
54 |     mu[n,7]  <- b[7] + FS[n,2]*lambda[5];
55 |     mu[n,8]  <- b[8] + FS[n,2]*lambda[6];
56 |     mu[n,9]  <- b[9] + FS[n,2]*lambda[7];
57 |     mu[n,10] <- b[10] + FS[n,2]*lambda[8];
58 | 
59 |     mu[n,11] <- b[11] + FS[n,3];              // Extro
60 |     mu[n,12] <- b[12] + FS[n,3]*lambda[9];
61 |     mu[n,13] <- b[13] + FS[n,3]*lambda[10];
62 |     mu[n,14] <- b[14] + FS[n,3]*lambda[11];
63 |     mu[n,15] <- b[15] + FS[n,3]*lambda[12];
64 | 
65 |     mu[n,16] <- b[16] + FS[n,4];              // Neuro
66 |     mu[n,17] <- b[17] + FS[n,4]*lambda[13];
67 |     mu[n,18] <- b[18] + FS[n,4]*lambda[14];
68 |     mu[n,19] <- b[19] + FS[n,4]*lambda[15];
69 |     mu[n,20] <- b[20] + FS[n,4]*lambda[16];
70 | 
71 |     mu[n,21] <- b[21] + FS[n,5];              // Open
72 |     mu[n,22] <- b[22] + FS[n,5]*lambda[17];
73 |     mu[n,23] <- b[23] + FS[n,5]*lambda[18];
74 |     mu[n,24] <- b[24] + FS[n,5]*lambda[19];
75 |     mu[n,25] <- b[25] + FS[n,5]*lambda[20];
76 |   }
77 | 
78 |   // priors
79 |   phi ~ lkj_corr(2.0);
80 |   // sdX ~ cauchy(0, 2.5);
81 |   // sdLambda ~ cauchy(0, 2.5);
82 |   // sdFactors ~ cauchy(0, 2.5);
83 |   
84 |   b ~ normal(0, 10);
85 |   lambda01 ~ normal(0, 1);
86 |   
87 |   for(i in 1:N){   
88 |     X[i] ~ normal(mu[i], sdX);
89 |     FS[i] ~ multi_normal_cholesky(muFactors, Ld);
90 |   }
91 |   
92 | }


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/StanBugsJags/jags_MixedModelBetaRegression.jags:
--------------------------------------------------------------------------------
 1 | model {
 2 |   for (n in 1:N){
 3 |     logit(mu[n]) <- (Intercept + IntRE[id[n]]) + (betaTemp + SlopeRE[id[n]])*tempCen[n]
 4 |     A[n] <- mu[n]*phi
 5 |     B[n] <- (1.0-mu[n])*phi
 6 |     yield[n] ~ dbeta(A[n], B[n])
 7 |   }
 8 | 
 9 |   Intercept ~ dnorm(0, 1/10^2)
10 |   betaTemp ~ dnorm(0, 1)
11 |   
12 |   for (l in 1:L){
13 |     IntRE[l] <- gammaIntercept[l]*sd_int
14 |     SlopeRE[l] <- gammaTemp[l]*sd_beta
15 | 
16 |     gammaIntercept[l] ~ dnorm(0, 1)
17 |     gammaTemp[l] ~ dnorm(0, 1)
18 |   }
19 | 
20 |   # Half-cauchy as in Gelman 2006
21 |   # If scale parameter in cauchy is 5, precision of z = 1/5^2 = 0.04
22 |   # sigma int
23 | #   sd_int <- zInt/sqrt(chSqInt)                                                  # prior for sigma; cauchy = normal/sqrt(chi^2)
24 | #   zInt ~ dnorm(0, .04)I(0,)
25 | #   chSqInt ~ dgamma(0.5, 0.5)                                                    # chi^2 with 1 d.f.
26 |   sd_int ~ dt(0, .04, 1)I(0,)
27 | 
28 |   # sigma beta
29 | #   sd_beta <- zbeta/sqrt(chSqbeta)                                               
30 | #   zbeta ~ dnorm(0, .04)I(0,)
31 | #   chSqbeta ~ dgamma(0.5, 0.5)                                                   
32 |   sd_beta ~ dt(0, .04, 1)I(0,)
33 | 
34 |   # phi
35 | #   phi <- zphi/sqrt(chSqphi)                                                     
36 | #   zphi ~ dnorm(0, .04)I(0,)
37 | #   chSqphi ~ dgamma(0.5, 0.5)       
38 |   phi ~ dt(0, .04, 1)I(0,)
39 | }


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/StanBugsJags/linregwithprior.stan:
--------------------------------------------------------------------------------
 1 | data {                      // Data block; declarations only
 2 |   int<lower=0> N;           // Sample size                         
 3 |   int<lower=0> k;           // Dimension of model matrix
 4 |   matrix [N, k] X;          // Model Matrix
 5 |   vector[N] y;              // response
 6 | }
 7 | 
 8 | /* transformed data {       // Transformed data block; declarations and statements. None needed here.
 9 |  }
10 | */
11 | 
12 | parameters {                // Parameters block; declarations only
13 |   vector[k] beta;           // coefficient vector
14 |   real<lower=0> sigma;      // error scale
15 | }
16 | 
17 | transformed parameters {    // Transformed parameters block; declarations and statements.
18 | }
19 | 
20 | model {                     // Model block; declarations and statements.
21 |   vector[N] mu;
22 |   mu = X * beta;           // creation of linear predictor
23 | 
24 |   // priors
25 |   beta ~ normal(0, 10);
26 |   sigma ~ cauchy(0, 5);     // With sigma bounded at 0, this is half-cauchy 
27 | 
28 |   // likelihood
29 |   y ~ normal(mu, sigma);
30 | }
31 | 
32 | generated quantities {      // Generated quantities block; declarations and statements.
33 |   real rss;                
34 |   real totalss;
35 |   real R2;                  // Calculate Rsq as a demonstration
36 |   vector[N] mu;
37 |   
38 |   mu = X * beta;
39 |   rss = dot_self(mu-y);
40 |   totalss = dot_self(y-mean(y));
41 |   R2 = 1 - rss/totalss;
42 | }


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/StanBugsJags/lmbugs.bugs:
--------------------------------------------------------------------------------
 1 | model {
 2 |   for (n in 1:N){
 3 |     mu[n] <- beta[1]*X[n,1] + beta[2]*X[n,2] + beta[3]*X[n,3] + beta[4]*X[n,4]
 4 |     y[n] ~ dnorm(mu[n], inv.sigma.sq)
 5 |   }
 6 |   
 7 |   for (k in 1:K){
 8 |     beta[k] ~ dnorm(0, .001)                                                    # prior for reg coefs
 9 |   }
10 |   
11 |   # Half-cauchy as in Gelman 2006
12 |   # Scale parameter is 5, so precision of z = 1/5^2 = 0.04
13 |   sigma.y <- abs(z)/sqrt(chSq)                                                  # prior for sigma; cauchy = normal/sqrt(chi^2)
14 |   z ~ dnorm(0, .0016)
15 |   chSq ~ dgamma(0.5, 0.5)                                                       # chi^2 with 1 d.f.
16 |   # sigma.y ~ dgamma(.001,.001)                                                   # prior for sigma; a typical approach used.
17 |   inv.sigma.sq <- pow(sigma.y, -2)                                              # precision
18 | }


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/StanBugsJags/lmjags.jags:
--------------------------------------------------------------------------------
 1 | model {
 2 |   for (n in 1:N){
 3 |     mu[n] <- beta[1]*X[n,1] + beta[2]*X[n,2] + beta[3]*X[n,3] + beta[4]*X[n,4]
 4 |     y[n] ~ dnorm(mu[n], inv.sigma.sq)
 5 |   }
 6 |   
 7 |   for (k in 1:K){
 8 |     beta[k] ~ dnorm(0, .001)                                                    # prior for reg coefs
 9 |   }
10 |   
11 |   # Half-cauchy as in Gelman 2006
12 |   # Scale parameter is 5, so precision of z = 1/5^2 = 0.04
13 |   z ~ dnorm(0, .04)I(0,)
14 |   chSq ~ dgamma(0.5, 0.5)                                                       # chi^2 with 1 d.f.
15 |   sigma.y <- z/sqrt(chSq)                                                       # prior for sigma; cauchy = normal/sqrt(chi^2)
16 |   inv.sigma.sq <- pow(sigma.y, -2)                                              # precision
17 | }


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/StanBugsJags/multilevelmediation.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int<lower=1> N;
 3 |   vector[N] X;
 4 |   vector[N] Med;
 5 |   vector[N] y;
 6 |   int<lower=1> J;                       # number of groups
 7 |   int<lower=1,upper=J> Group[N];        # Groups
 8 | }
 9 | 
10 | parameters{
11 |   real alpha_xm;                        # mediator model reg parameters
12 |   real beta_xm;
13 |   real<lower=0> sigma_xm;
14 | 
15 |   real alpha;                           # main model reg parameters
16 |   real beta1;
17 |   real beta2;
18 |   real<lower=0> sigma_y;
19 | 
20 |   real<lower=0> sigma_alpha_xm;         # sd for ranef
21 |   real<lower=0> sigma_alpha;
22 |   
23 |   vector[J] gammaAlpha_xm;
24 |   vector[J] gammaAlpha;
25 | }
26 | 
27 | model {
28 |   vector[N] mu_y;                       # linear predictors for Y and M
29 |   vector[N] mu_Med;
30 |   
31 |   # priors
32 |   alpha_xm ~ normal(0, 10);   
33 |   beta_xm ~ normal(0, 10);
34 |   sigma_xm ~ cauchy(0,5);
35 | 
36 |   alpha ~ normal(0, 10);      
37 |   beta1 ~ normal(0, 10);
38 |   beta2 ~ normal(0, 10);
39 |   sigma_y ~ cauchy(0,5);
40 | 
41 |   sigma_alpha_xm  ~ cauchy(0,5);
42 |   sigma_alpha ~ cauchy(0,5);
43 | 
44 |   for (j in 1:J){
45 |     gammaAlpha_xm[j] ~ normal(0, sigma_alpha_xm);
46 |     gammaAlpha[j]    ~ normal(0, sigma_alpha);
47 |   }
48 | 
49 |   for (n in 1:N){
50 |     mu_Med[n] = alpha_xm + gammaAlpha_xm[Group[n]] + beta_xm*X[n];
51 |     mu_y[n] = alpha + gammaAlpha[Group[n]] + beta1*X[n] + beta2*Med[n] ;
52 |   }
53 | 
54 | 
55 |   # sampling
56 |   Med ~ normal(mu_Med, sigma_xm);
57 |   y ~ normal(mu_y, sigma_y);
58 | }
59 | 
60 | generated quantities{
61 |   real indEffect;
62 | 
63 |   # since no random slope for beta2
64 |   indEffect = beta_xm *beta2;
65 | }


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/StanBugsJags/multilevelmediationYuanMacKinnon2009.bugs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Bayesian/StanBugsJags/multilevelmediationYuanMacKinnon2009.bugs


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/StanBugsJags/singlelevelmediationYuanMacKinnon2009.bugs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Bayesian/StanBugsJags/singlelevelmediationYuanMacKinnon2009.bugs


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/horseshoe/README.md:
--------------------------------------------------------------------------------
1 | These are the scripts regarding the horseshoe prior from:
2 | 
3 | "Hierarchical Bayesian Survival Analysis and Projective Covariate Selection in Cardiovascular Event Risk Prediction" by Peltola et al. (2014)
4 | 
5 | [link to paper](http://ceur-ws.org/Vol-1218/bmaw2014_paper_8.pdf)
6 | [link to scripts](http://becs.aalto.fi/en/research/bayes/diabcvd/)


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/horseshoe/wei_bg.stan:
--------------------------------------------------------------------------------
 1 | /*  Variable naming:
 2 |  obs       = observed
 3 |  cen       = (right) censored
 4 |  N         = number of samples
 5 |  M         = number of covariates
 6 |  bg        = established risk (or protective) factors
 7 |  tau       = scale parameter
 8 | */
 9 | // Tomi Peltola, tomi.peltola@aalto.fi
10 | 
11 | functions {
12 |   vector sqrt_vec(vector x) {
13 |     vector[dims(x)[1]] res;
14 | 
15 |     for (m in 1:dims(x)[1]){
16 |       res[m] <- sqrt(x[m]);
17 |     }
18 | 
19 |     return res;
20 |   }
21 | 
22 |   vector bg_prior_lp(real r_global, vector r_local) {
23 |     r_global ~ normal(0.0, 10.0);
24 |     r_local ~ inv_chi_square(1.0);
25 | 
26 |     return r_global * sqrt_vec(r_local);
27 |   }
28 | }
29 | 
30 | data {
31 |   int<lower=0> Nobs;
32 |   int<lower=0> Ncen;
33 |   int<lower=0> M_bg;
34 |   vector[Nobs] yobs;
35 |   vector[Ncen] ycen;
36 |   matrix[Nobs, M_bg] Xobs_bg;
37 |   matrix[Ncen, M_bg] Xcen_bg;
38 | }
39 | 
40 | transformed data {
41 |   real<lower=0> tau_mu;
42 |   real<lower=0> tau_al;
43 | 
44 |   tau_mu <- 10.0;
45 |   tau_al <- 10.0;
46 | }
47 | 
48 | parameters {
49 |   real<lower=0> tau_s_bg_raw;
50 |   vector<lower=0>[M_bg] tau_bg_raw;
51 | 
52 |   real alpha_raw;
53 |   vector[M_bg] beta_bg_raw;
54 | 
55 |   real mu;
56 | }
57 | 
58 | transformed parameters {
59 |   vector[M_bg] beta_bg;
60 |   real alpha;
61 | 
62 |   beta_bg <- bg_prior_lp(tau_s_bg_raw, tau_bg_raw) .* beta_bg_raw;
63 |   alpha <- exp(tau_al * alpha_raw);
64 | }
65 | 
66 | model {
67 |   yobs ~ weibull(alpha, exp(-(mu + Xobs_bg * beta_bg)/alpha));
68 |   increment_log_prob(weibull_ccdf_log(ycen, alpha, exp(-(mu + Xcen_bg * beta_bg)/alpha)));
69 | 
70 |   beta_bg_raw ~ normal(0.0, 1.0);
71 |   alpha_raw ~ normal(0.0, 1.0);
72 | 
73 |   mu ~ normal(0.0, tau_mu);
74 | }
75 | 


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/horseshoe/wei_gau.stan:
--------------------------------------------------------------------------------
 1 | /*  Variable naming:
 2 |  obs       = observed
 3 |  cen       = (right) censored
 4 |  N         = number of samples
 5 |  M         = number of covariates
 6 |  bg        = established risk (or protective) factors
 7 |  biom      = candidate biomarkers (candidate risk factors)
 8 |  tau       = scale parameter
 9 | */
10 | // Tomi Peltola, tomi.peltola@aalto.fi
11 | 
12 | functions {
13 |   vector sqrt_vec(vector x) {
14 |     vector[dims(x)[1]] res;
15 | 
16 |     for (m in 1:dims(x)[1]){
17 |       res[m] <- sqrt(x[m]);
18 |     }
19 | 
20 |     return res;
21 |   }
22 | 
23 |   vector gau_prior_lp(real r1_global, real r2_global, vector ones_biom) {
24 |     r1_global ~ normal(0.0, 1.0);
25 |     r2_global ~ inv_gamma(0.5, 0.5);
26 | 
27 |     return (r1_global * sqrt(r2_global)) * ones_biom;
28 |   }
29 | 
30 |   vector bg_prior_lp(real r_global, vector r_local) {
31 |     r_global ~ normal(0.0, 10.0);
32 |     r_local ~ inv_chi_square(1.0);
33 | 
34 |     return r_global * sqrt_vec(r_local);
35 |   }
36 | }
37 | 
38 | data {
39 |   int<lower=0> Nobs;
40 |   int<lower=0> Ncen;
41 |   int<lower=0> M_bg;
42 |   int<lower=0> M_biom;
43 |   vector[Nobs] yobs;
44 |   vector[Ncen] ycen;
45 |   matrix[Nobs, M_bg] Xobs_bg;
46 |   matrix[Ncen, M_bg] Xcen_bg;
47 |   matrix[Nobs, M_biom] Xobs_biom;
48 |   matrix[Ncen, M_biom] Xcen_biom;
49 | }
50 | 
51 | transformed data {
52 |   real<lower=0> tau_mu;
53 |   real<lower=0> tau_al;
54 |   vector[M_biom] ones_biom;
55 | 
56 |   tau_mu <- 10.0;
57 |   tau_al <- 10.0;
58 | 
59 |   for (m in 1:M_biom) {
60 |     ones_biom[m] <- 1.0;
61 |   }
62 | }
63 | 
64 | parameters {
65 |   real<lower=0> tau_s_bg_raw;
66 |   vector<lower=0>[M_bg] tau_bg_raw;
67 | 
68 |   real<lower=0> tau_s1_biom_raw;
69 |   real<lower=0> tau_s2_biom_raw;
70 | 
71 |   real alpha_raw;
72 |   vector[M_bg] beta_bg_raw;
73 |   vector[M_biom] beta_biom_raw;
74 | 
75 |   real mu;
76 | }
77 | 
78 | transformed parameters {
79 |   vector[M_biom] beta_biom;
80 |   vector[M_bg] beta_bg;
81 |   real alpha;
82 | 
83 |   beta_biom <- gau_prior_lp(tau_s1_biom_raw, tau_s2_biom_raw, ones_biom) .* beta_biom_raw;
84 |   beta_bg <- bg_prior_lp(tau_s_bg_raw, tau_bg_raw) .* beta_bg_raw;
85 |   alpha <- exp(tau_al * alpha_raw);
86 | }
87 | 
88 | model {
89 |   yobs ~ weibull(alpha, exp(-(mu + Xobs_bg * beta_bg + Xobs_biom * beta_biom)/alpha));
90 |   increment_log_prob(weibull_ccdf_log(ycen, alpha, exp(-(mu + Xcen_bg * beta_bg + Xcen_biom * beta_biom)/alpha)));
91 | 
92 |   beta_biom_raw ~ normal(0.0, 1.0);
93 |   beta_bg_raw ~ normal(0.0, 1.0);
94 |   alpha_raw ~ normal(0.0, 1.0);
95 | 
96 |   mu ~ normal(0.0, tau_mu);
97 | }
98 | 


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/horseshoe/wei_hs.stan:
--------------------------------------------------------------------------------
 1 | /*  Variable naming:
 2 |  obs       = observed
 3 |  cen       = (right) censored
 4 |  N         = number of samples
 5 |  M         = number of covariates
 6 |  bg        = established risk (or protective) factors
 7 |  biom      = candidate biomarkers (candidate risk factors)
 8 |  tau       = scale parameter
 9 | */
10 | // Tomi Peltola, tomi.peltola@aalto.fi
11 | 
12 | functions {
13 |   vector sqrt_vec(vector x) {
14 |     vector[dims(x)[1]] res;
15 | 
16 |     for (m in 1:dims(x)[1]){
17 |       res[m] <- sqrt(x[m]);
18 |     }
19 | 
20 |     return res;
21 |   }
22 | 
23 |   vector hs_prior_lp(real r1_global, real r2_global, vector r1_local, vector r2_local) {
24 |     r1_global ~ normal(0.0, 1.0);
25 |     r2_global ~ inv_gamma(0.5, 0.5);
26 | 
27 |     r1_local ~ normal(0.0, 1.0);
28 |     r2_local ~ inv_gamma(0.5, 0.5);
29 | 
30 |     return (r1_global * sqrt(r2_global)) * r1_local .* sqrt_vec(r2_local);
31 |   }
32 | 
33 |   vector bg_prior_lp(real r_global, vector r_local) {
34 |     r_global ~ normal(0.0, 10.0);
35 |     r_local ~ inv_chi_square(1.0);
36 | 
37 |     return r_global * sqrt_vec(r_local);
38 |   }
39 | }
40 | 
41 | data {
42 |   int<lower=0> Nobs;
43 |   int<lower=0> Ncen;
44 |   int<lower=0> M_bg;
45 |   int<lower=0> M_biom;
46 |   vector[Nobs] yobs;
47 |   vector[Ncen] ycen;
48 |   matrix[Nobs, M_bg] Xobs_bg;
49 |   matrix[Ncen, M_bg] Xcen_bg;
50 |   matrix[Nobs, M_biom] Xobs_biom;
51 |   matrix[Ncen, M_biom] Xcen_biom;
52 | }
53 | 
54 | transformed data {
55 |   real<lower=0> tau_mu;
56 |   real<lower=0> tau_al;
57 | 
58 |   tau_mu <- 10.0;
59 |   tau_al <- 10.0;
60 | }
61 | 
62 | parameters {
63 |   real<lower=0> tau_s_bg_raw;
64 |   vector<lower=0>[M_bg] tau_bg_raw;
65 | 
66 |   real<lower=0> tau_s1_biom_raw;
67 |   real<lower=0> tau_s2_biom_raw;
68 |   vector<lower=0>[M_biom] tau1_biom_raw;
69 |   vector<lower=0>[M_biom] tau2_biom_raw;
70 | 
71 |   real alpha_raw;
72 |   vector[M_bg] beta_bg_raw;
73 |   vector[M_biom] beta_biom_raw;
74 | 
75 |   real mu;
76 | }
77 | 
78 | transformed parameters {
79 |   vector[M_biom] beta_biom;
80 |   vector[M_bg] beta_bg;
81 |   real alpha;
82 | 
83 |   beta_biom <- hs_prior_lp(tau_s1_biom_raw, tau_s2_biom_raw, tau1_biom_raw, tau2_biom_raw) .* beta_biom_raw;
84 |   beta_bg <- bg_prior_lp(tau_s_bg_raw, tau_bg_raw) .* beta_bg_raw;
85 |   alpha <- exp(tau_al * alpha_raw);
86 | }
87 | 
88 | model {
89 |   yobs ~ weibull(alpha, exp(-(mu + Xobs_bg * beta_bg + Xobs_biom * beta_biom)/alpha));
90 |   increment_log_prob(weibull_ccdf_log(ycen, alpha, exp(-(mu + Xcen_bg * beta_bg + Xcen_biom * beta_biom)/alpha)));
91 | 
92 |   beta_biom_raw ~ normal(0.0, 1.0);
93 |   beta_bg_raw ~ normal(0.0, 1.0);
94 |   alpha_raw ~ normal(0.0, 1.0);
95 | 
96 |   mu ~ normal(0.0, tau_mu);
97 | }
98 | 


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/horseshoe/wei_lap.stan:
--------------------------------------------------------------------------------
 1 | /*  Variable naming:
 2 |  obs       = observed
 3 |  cen       = (right) censored
 4 |  N         = number of samples
 5 |  M         = number of covariates
 6 |  bg        = established risk (or protective) factors
 7 |  biom      = candidate biomarkers (candidate risk factors)
 8 |  tau       = scale parameter
 9 | */
10 | // Tomi Peltola, tomi.peltola@aalto.fi
11 | 
12 | functions {
13 |   vector sqrt_vec(vector x) {
14 |     vector[dims(x)[1]] res;
15 | 
16 |     for (m in 1:dims(x)[1]){
17 |       res[m] <- sqrt(x[m]);
18 |     }
19 | 
20 |     return res;
21 |   }
22 | 
23 |   vector lap_prior_lp(real r1_global, real r2_global, vector r_local) {
24 |     r1_global ~ normal(0.0, 1.0);
25 |     r2_global ~ inv_gamma(0.5, 0.5);
26 | 
27 |     r_local ~ exponential(1.0);
28 | 
29 |     return (r1_global * sqrt(r2_global)) * sqrt_vec(r_local);
30 |   }
31 | 
32 |   vector bg_prior_lp(real r_global, vector r_local) {
33 |     r_global ~ normal(0.0, 10.0);
34 |     r_local ~ inv_chi_square(1.0);
35 | 
36 |     return r_global * sqrt_vec(r_local);
37 |   }
38 | }
39 | 
40 | data {
41 |   int<lower=0> Nobs;
42 |   int<lower=0> Ncen;
43 |   int<lower=0> M_bg;
44 |   int<lower=0> M_biom;
45 |   vector[Nobs] yobs;
46 |   vector[Ncen] ycen;
47 |   matrix[Nobs, M_bg] Xobs_bg;
48 |   matrix[Ncen, M_bg] Xcen_bg;
49 |   matrix[Nobs, M_biom] Xobs_biom;
50 |   matrix[Ncen, M_biom] Xcen_biom;
51 | }
52 | 
53 | transformed data {
54 |   real<lower=0> tau_mu;
55 |   real<lower=0> tau_al;
56 | 
57 |   tau_mu <- 10.0;
58 |   tau_al <- 10.0;
59 | }
60 | 
61 | parameters {
62 |   real<lower=0> tau_s_bg_raw;
63 |   vector<lower=0>[M_bg] tau_bg_raw;
64 | 
65 |   real<lower=0> tau_s1_biom_raw;
66 |   real<lower=0> tau_s2_biom_raw;
67 |   vector<lower=0>[M_biom] tau_biom_raw;
68 | 
69 |   real alpha_raw;
70 |   vector[M_bg] beta_bg_raw;
71 |   vector[M_biom] beta_biom_raw;
72 | 
73 |   real mu;
74 | }
75 | 
76 | transformed parameters {
77 |   vector[M_biom] beta_biom;
78 |   vector[M_bg] beta_bg;
79 |   real alpha;
80 | 
81 |   beta_biom <- lap_prior_lp(tau_s1_biom_raw, tau_s2_biom_raw, tau_biom_raw) .* beta_biom_raw;
82 |   beta_bg <- bg_prior_lp(tau_s_bg_raw, tau_bg_raw) .* beta_bg_raw;
83 |   alpha <- exp(tau_al * alpha_raw);
84 | }
85 | 
86 | model {
87 |   yobs ~ weibull(alpha, exp(-(mu + Xobs_bg * beta_bg + Xobs_biom *beta_biom)/alpha));
88 |   increment_log_prob(weibull_ccdf_log(ycen, alpha, exp(-(mu + Xcen_bg * beta_bg + Xcen_biom * beta_biom)/alpha)));
89 | 
90 |   beta_biom_raw ~ normal(0.0, 1.0);
91 |   beta_bg_raw ~ normal(0.0, 1.0);
92 |   alpha_raw ~ normal(0.0, 1.0);
93 | 
94 |   mu ~ normal(0.0, tau_mu);
95 | }
96 | 


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/jags_MixedModelBetaRegression.R:
--------------------------------------------------------------------------------
  1 | ### The following is an attempt to reproduce the Stan approach to a mixed model 
  2 | ### with a beta distribution for the response response in
  3 | ### rstan_MixedModelBetaRegression.R.  For more detail about what the JAGS code
  4 | ### is doing, see the Stan code in that file.  See the helpfile for
  5 | ### GasolineYield for information on the data. 
  6 | 
  7 | ### A couple of changes to note. JAGS doesn't have the Cauchy distribution, so I
  8 | ### show the approach in the code outlined in Gelman 2006.  Instead of that, one
  9 | ### can also use an approach with the t-distribution similarly truncated. Unlike
 10 | ### the Stan code, the variance components and phi priors are all half.cauchy(0,
 11 | ### 5).  I'll have to do a little more playing with it but it seemed initially
 12 | ### that JAGS took a lot more iterations to start looking as healthy as the Stan
 13 | ### results in terms of effective sample size (though this is the case for many
 14 | ### models), having particular trouble with the random intercepts and variance
 15 | ### estimate (though Stan had issues there too).  But just for comparison's
 16 | ### sake, it will reproduce Stan parameter results using the same amount of
 17 | ### iterations, so I leave it shorter.  Keep in mind also we're not dealing with
 18 | ### a lot of data here.
 19 | 
 20 | ##################
 21 | ### Data setup ###
 22 | ##################
 23 | data('GasolineYield', package='betareg')
 24 | jagsdat = list('yield'=GasolineYield$yield, 'tempCen'=c(scale(GasolineYield$temp, scale=F)), 
 25 |                'N'=nrow(GasolineYield), 'id'=as.numeric(GasolineYield$batch), L=10)
 26 | 
 27 | 
 28 | #######################
 29 | ### Jags Model Code ###
 30 | #######################
 31 | 
 32 | # write to file
 33 | sink('betaMixedModeljags.txt')
 34 | cat(
 35 |   'model {
 36 |   for (n in 1:N){
 37 |     logit(mu[n]) <- (Intercept + IntRE[id[n]]) + (betaTemp + SlopeRE[id[n]])*tempCen[n]
 38 |     A[n] <- mu[n]*phi
 39 |     B[n] <- (1.0-mu[n])*phi
 40 |     yield[n] ~ dbeta(A[n], B[n])
 41 |   }
 42 | 
 43 |   Intercept ~ dnorm(0, 1/10^2)
 44 |   betaTemp ~ dnorm(0, 1)
 45 |   
 46 |   for (l in 1:L){
 47 |     IntRE[l] <- gammaIntercept[l]*sd_int
 48 |     SlopeRE[l] <- gammaTemp[l]*sd_beta
 49 | 
 50 |     gammaIntercept[l] ~ dnorm(0, 1)
 51 |     gammaTemp[l] ~ dnorm(0, 1)
 52 |   }
 53 | 
 54 |   # Half-cauchy as in Gelman 2006
 55 |   # If scale parameter in cauchy is 5, precision of z = 1/5^2 = 0.04
 56 |   # sigma int
 57 |   #   sd_int <- zInt/sqrt(chSqInt)                                                  # prior for sigma; cauchy = normal/sqrt(chi^2)
 58 |   #   zInt ~ dnorm(0, .04)I(0,)
 59 |   #   chSqInt ~ dgamma(0.5, 0.5)                                                    # chi^2 with 1 d.f.
 60 |   sd_int ~ dt(0, .04, 1)I(0,)
 61 | 
 62 |   # sigma beta
 63 |   #   sd_beta <- zbeta/sqrt(chSqbeta)                                               
 64 |   #   zbeta ~ dnorm(0, .04)I(0,)
 65 |   #   chSqbeta ~ dgamma(0.5, 0.5)                                                   
 66 |   sd_beta ~ dt(0, .04, 1)I(0,)
 67 | 
 68 |   # phi
 69 |   #   phi <- zphi/sqrt(chSqphi)                                                     
 70 |   #   zphi ~ dnorm(0, .04)I(0,)
 71 |   #   chSqphi ~ dgamma(0.5, 0.5)       
 72 |   phi ~ dt(0, .04, 1)I(0,)
 73 | }'
 74 | )
 75 | sink()
 76 | 
 77 | 
 78 | 
 79 | #####################
 80 | ### Run the model ###
 81 | #####################
 82 | library(rjags)
 83 | mixedbeta0 <- jags.model(file='betaMixedModeljags.txt', data=jagsdat, # inits=inits
 84 |                            n.chains=4, n.adapt=2000)
 85 | 
 86 | # update(mixedbetamod, 10000)
 87 | 
 88 | mixedbeta = coda.samples(mixedbeta0, c('Intercept','betaTemp','IntRE', 'SlopeRE', 
 89 |                                        'phi', 'sd_int', 'sd_beta'), n.iter=200000, 
 90 |                          thin=200, n.chains=4)
 91 | summary(mixedbeta)
 92 | 
 93 | ### cleaner view of coefficients of interest
 94 | round(summary(mixedbeta)[[1]], 4)
 95 | 
 96 | ### effective sample size
 97 | effectiveSize(mixedbeta)
 98 | 
 99 | ### trace and density plots
100 | plot(mixedbeta)
101 | 


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/multinomial/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Multinomial logistic regression in Stan
 3 | =======================================
 4 | 
 5 | Files included:
 6 | 
 7 | -   multinomial\_stan\_comparisons.R: Shows a variety of ways one could write a basic multinomial model with an eye toward speed comparisons.
 8 | -   mnl\_1 to 4: stan scripts used in the previous.
 9 | -   stan\_basic\_mnl\_conceptual.stan: This is straightforward conceptual stan code for a basic multinomial model. It duplicates the initial example in the manual. As noted there, without a suitable prior it is not identified
10 | -   stan\_basic\_mnl\_altspecific.stan: This extends the standard multinomial model to one that can include both class specific and class constant effects.
11 | 


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/multinomial/mnl_1.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int<lower=1> K;                   # number of classes
 3 |   int<lower=1> N;                   # nrow of x
 4 |   int<lower=1> D;                   # ncol of x
 5 |   int<lower=1, upper=K> y[N];       # target as integer
 6 |   vector[D] x[N];                   # array of D
 7 | }
 8 | 
 9 | transformed data {
10 |   row_vector[D] zeros;              # create reference level coefs of zero
11 |   zeros = rep_row_vector(0, D);
12 | }
13 | 
14 | parameters {
15 |   matrix[K-1,D] beta_raw;           # estimated coefs
16 | }
17 | 
18 | transformed parameters{
19 |   matrix[K, D] beta;
20 |   beta = append_row(zeros, beta_raw);
21 | }
22 | 
23 | model {
24 |   # prior
25 |   to_vector(beta_raw) ~ normal(0, 10);
26 |   
27 |   # likelihood
28 |   for (n in 1:N)
29 |     y[n] ~ categorical_logit(beta * x[n]);
30 | }
31 | 


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/multinomial/mnl_2.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int<lower=1> K;                   # number of classes
 3 |   int<lower=1> N;                   # nrow of x
 4 |   int<lower=1> D;                   # ncol of x
 5 |   int<lower=1, upper=K> y[N];       # target as integer
 6 |   matrix[N,D] x;
 7 | }
 8 | 
 9 | transformed data{
10 |   matrix[D,N] xt;
11 |   row_vector[D] zeros;
12 |   
13 |   xt = x';
14 |   zeros = rep_row_vector(0, D);
15 | }
16 | 
17 | parameters {
18 |   matrix[K-1,D] beta_raw;
19 | }
20 | 
21 | transformed parameters{
22 |   matrix[K, D] beta;
23 |   beta = append_row(zeros, beta_raw);
24 | }
25 | 
26 | model {
27 |   matrix[K,N] L;
28 | 
29 |   L = beta * xt;
30 |   to_vector(beta_raw) ~ normal(0, 10);
31 |   
32 |   for (n in 1:N)
33 |     y[n] ~ categorical_logit(L[,n]);
34 | }


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/multinomial/mnl_3.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int K;
 3 |   int N;
 4 |   int D;
 5 |   int y[N];
 6 |   matrix[N,D] x;
 7 | }
 8 | 
 9 | transformed data{
10 |   vector[D] zeros;
11 |   
12 |   zeros = rep_vector(0, D);
13 | }
14 | 
15 | parameters {
16 |   matrix[D,K-1] beta_raw;
17 | }
18 | 
19 | transformed parameters{
20 |   matrix[D, K] beta;
21 |   beta = append_col(zeros, beta_raw);
22 | }
23 | 
24 | model {
25 |   matrix[N, K] L;
26 | 
27 |   L = x * beta;
28 |   to_vector(beta_raw) ~ normal(0, 10);
29 |   
30 |   for (n in 1:N)
31 |     y[n] ~ categorical_logit(to_vector(L[n]));
32 | }


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/multinomial/mnl_4.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int K;
 3 |   int N;
 4 |   int D;
 5 |   int y[N];
 6 |   matrix[N,D] x;
 7 | }
 8 | 
 9 | transformed data{
10 |   matrix[D,N] xt;
11 | 
12 |   xt = x';
13 | }
14 | 
15 | parameters {
16 |   matrix[K-1,D] beta_raw;
17 | 
18 | }
19 | 
20 | transformed parameters{
21 |   matrix[K, D] beta;
22 | 
23 |   for (d in 1:D) {
24 |     beta[1,d] = -sum(beta_raw[,d]);
25 |     beta[2:K,d] = beta_raw[,d];
26 |   }
27 | }
28 | 
29 | model {
30 |   matrix[K,N] L;
31 | 
32 |   L = beta * xt;
33 |   
34 |   to_vector(beta_raw) ~ normal(0, 10);
35 | 
36 |   for (n in 1:N)
37 |     y[n] ~ categorical_logit(L[,n]);
38 | }


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/multinomial/stan_basic_mnl_conceptual.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int K;
 3 |   int N;
 4 |   int D;
 5 |   int y[N];
 6 |   vector[D] x[N];
 7 | }
 8 | 
 9 | parameters {
10 |   matrix[K,D] beta;
11 | }
12 | 
13 | model {
14 |   to_vector(beta) ~ normal(0, 10);
15 |   
16 |   for (n in 1:N)
17 |     # y[n] ~ categorical(softmax(beta * x[n]));
18 |   y[n] ~ categorical_logit(beta * x[n]);
19 | }
20 | 


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/multinomial/stan_mnl_altspecific.stan:
--------------------------------------------------------------------------------
  1 | data {
  2 |   int K;                               # number of choices
  3 |   int N;                               # number of individuals
  4 |   int D;                               # number of indiv specific vars
  5 |   int G;                               # number of alt specific vars
  6 |   int T;                               # number of alt constant vars
  7 |   
  8 |   int y[N*K];                          # choices
  9 |   vector[N*K] choice;                  # choice made (logical)
 10 |   
 11 |   matrix[N,D] X;                       # data for indiv specific effects
 12 |   matrix[N*K, G] Y;                    # data for alt specific effects
 13 |   matrix[N*(K-1),T] Z;                 # data for alt constant effects
 14 |   
 15 | }
 16 | 
 17 | transformed data {
 18 | 
 19 | }
 20 | 
 21 | parameters {   
 22 |   matrix[D, K-1] beta;                 # individual specific coefs
 23 |   matrix[G, K]  gamma;                 # choice specific coefs for alt-specific variables
 24 |   vector[T]  theta;                    # choice constant coefs for alt-specific variables
 25 |   
 26 | }
 27 | 
 28 | transformed parameters{
 29 |   
 30 | }
 31 | 
 32 | model {
 33 |   matrix[N, K-1] Vx;                   # Utility for individual vars
 34 |   
 35 |   vector[N*K] Vy0;
 36 |   matrix[N, K-1] Vy;                   # Utility for alt-specific/alt-varying vars
 37 |   
 38 |   vector[N*(K-1)] Vz0;
 39 |   matrix[N, (K-1)] Vz;                 # Utility for alt-specific/alt-constant vars
 40 | 
 41 |   matrix[N,K-1] V;                     # combined utilities
 42 |   
 43 |   vector[N] baseProbVec;               # reference group probabilities
 44 |   real ll0;                            # intermediate log likelihood
 45 |   real loglik;                         # final log likelihood
 46 | 
 47 |   
 48 |   to_vector(beta) ~ normal(0, 10);     # diffuse priors on coefficients
 49 |   to_vector(gamma) ~ normal(0, 10);    # diffuse priors on coefficients
 50 |   to_vector(theta) ~ normal(0, 10);    # diffuse priors on coefficients
 51 |   
 52 | 
 53 |   Vx = X * beta;
 54 |   
 55 |   for(alt in 1:K){
 56 |     vector[G] par;
 57 |     int start;
 58 |     int end;
 59 | 
 60 |     par = gamma[,alt];
 61 |     start = N*alt-N+1;
 62 |     end = N*alt;
 63 |     Vy0[start:end] = Y[start:end,] * par;
 64 |     if(alt>1) Vy[,alt-1] = Vy0[start:end] - Vy0[1:N];
 65 |   }
 66 |   
 67 |   Vz0 = Z * theta;
 68 |   
 69 |   for(alt in 1:(K-1)){
 70 |     int start;
 71 |     int end;
 72 | 
 73 |     start = N*alt-N+1;
 74 |     end = N*alt;
 75 |     Vz[,alt] = Vz0[start:end];
 76 |   }
 77 | 
 78 |   V = Vx + Vy + Vz;
 79 | 
 80 |   for(n in 1:N)  baseProbVec[n] = 1/(1 + sum(exp(V[n])));
 81 |   ll0 = dot_product(to_vector(V), choice[(N+1):(N*K)]); # just going to assume no neg index
 82 |   loglik = sum(log(baseProbVec)) + ll0;
 83 |   target += loglik;
 84 |   
 85 | }
 86 | 
 87 | 
 88 | generated quantities {
 89 |   matrix[N,K-1] fitted_nonref;
 90 |   vector[N] fitted_ref;
 91 |   matrix[N,K] fitted;
 92 |   
 93 |   matrix[N,K-1] Vx;                    # Utility for individual vars
 94 |   
 95 |   vector[N*K] Vy0;
 96 |   matrix[N,K-1] Vy;                    # Utility for alt-specific/alt-varying vars
 97 |   
 98 |   vector[N*(K-1)] Vz0;
 99 |   matrix[N, (K-1)] Vz;                 # Utility for alt-specific/alt-constant vars
100 | 
101 |   matrix[N,K-1] V;                     # combined utilities
102 |   
103 |   vector[N] baseProbVec;               # reference group probabilities
104 | 
105 |   Vx = X * beta;
106 |   
107 |   for(alt in 1:K){
108 |     vector[G] par;
109 |     int start;
110 |     int end;
111 | 
112 |     par = gamma[,alt];
113 |     start = N*alt-N+1;
114 |     end = N*alt;
115 |     Vy0[start:end] = Y[start:end,] * par;
116 |     if(alt>1) Vy[,alt-1] = Vy0[start:end] - Vy0[1:N];
117 |   }
118 |   
119 |   Vz0 = Z * theta;
120 |   
121 |   for(alt in 1:(K-1)){
122 |     int start;
123 |     int end;
124 | 
125 |     start = N*alt-N+1;
126 |     end = N*alt;
127 |     Vz[,alt] = Vz0[start:end];
128 |   }
129 | 
130 |   V = Vx + Vy + Vz;
131 |   
132 |   for(n in 1:N)  baseProbVec[n] = 1/(1 + sum(exp(V[n])));
133 |   fitted_nonref = exp(V) .* rep_matrix(baseProbVec, K-1);
134 |   for(n in 1:N) fitted_ref[n] = 1-sum(fitted_nonref[n]);
135 |   fitted = append_col(fitted_ref, fitted_nonref);
136 |   
137 | }
138 | 


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/r2bugs.distributions.R:
--------------------------------------------------------------------------------
 1 | # From Translating Probability Density Functions: From R to BUGS and Back Again
 2 | # by David S. LeBauer, Michael C. Dietze, Benjamin M. Bolker
 3 | 
 4 | r2bugs.distributions <- function(priors, direction = 'r2bugs') {
 5 |   priors$distn <- as.character(priors$distn)
 6 |   priors$parama <- as.numeric(priors$parama)
 7 |   priors$paramb <- as.numeric(priors$paramb)
 8 |   ## index dataframe according to distribution
 9 |   norm <- priors$distn %in% c('norm', 'lnorm') # these have same transform
10 |   weib <- grepl("weib", priors$distn) # matches r and bugs version
11 |   gamma <- priors$distn == 'gamma'
12 |   chsq <- grepl("chisq", priors$distn) # matches r and bugs version
13 |   bin <- priors$distn %in% c('binom', 'bin') # matches r and bugs version
14 |   nbin <- priors$distn %in% c('nbinom', 'negbin') # matches r and bugs version
15 |   ## Normal, log-Normal: Convert sd to precision
16 |   exponent <- ifelse(direction == "r2bugs", -2, -0.5)
17 |   priors$paramb[norm] <- priors$paramb[norm] ^ exponent
18 |   ## Weibull
19 |   if(direction == 'r2bugs'){
20 |     ## Convert R parameter b to BUGS parameter lambda by l = (1/b)^a
21 |     priors$paramb[weib] <- (1 / priors$paramb[weib]) ^ priors$parama[weib]
22 |   } else if (direction == 'bugs2r') {
23 |     ## Convert BUGS parameter lambda to BUGS parameter b by b = l^(-1/a)
24 |     priors$paramb[weib] <- priors$paramb[weib] ^ (- 1 / priors$parama[weib] )
25 |   }
26 |   ## Reverse parameter order for binomial and negative binomial
27 |   priors[bin | nbin, c('parama', 'paramb')] <-
28 |     priors[bin | nbin, c('paramb', 'parama')]
29 |   ## Translate distribution names
30 |   if(direction == "r2bugs"){
31 |     priors$distn[weib] <- "weib"
32 |     priors$distn[chsq] <- "chisqr"
33 |     priors$distn[bin] <- "bin"
34 |     priors$distn[nbin] <- "negbin"
35 |   } else if(direction == "bugs2r"){
36 |     priors$distn[weib] <- "weibull"
37 |     priors$distn[chsq] <- "chisq"
38 |     priors$distn[bin] <- "binom"
39 |     priors$distn[nbin] <- "nbinom"
40 |   }
41 |   return(priors)
42 | }
43 | 
44 | ### Example ###
45 | r.distn <- data.frame(distn = "norm", parama = 10, paramb = 2)
46 | r2bugs.distributions(r.distn)


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/rstan_MixedModelSleepstudy.R:
--------------------------------------------------------------------------------
  1 | #---------------------------------------------------------------------------------#
  2 | # A mixed model via stan/rstan with comparison to lme4 output.  In this model     #
  3 | # there is a random intercept as well as a random slope for the single predictor. #
  4 | #---------------------------------------------------------------------------------#
  5 | 
  6 | #############
  7 | ### Setup ###
  8 | #############
  9 | 
 10 | ### Data ###
 11 | library(lme4)
 12 | data(sleepstudy)
 13 | # ?sleepstudy
 14 | 
 15 | # Create a model for later comparison
 16 | mod_lme = lmer(Reaction ~ Days + (1 | Subject) + (0 + Days | Subject), sleepstudy)
 17 | 
 18 | dat = list(N=nrow(sleepstudy), I=length(unique(sleepstudy$Subject)), 
 19 |            Subject=as.numeric(sleepstudy$Subject), Days = sleepstudy$Days, 
 20 |            RT=sleepstudy$Reaction)
 21 | 
 22 | 
 23 | ### Stan code ###
 24 | stanmodelcode = '
 25 | data {                                    // data setup
 26 |   int<lower=1> N;                         // sample size
 27 |   int<lower=1> I;                         // number of subjects
 28 |   vector<lower=0>[N] RT;                  // Response: reaction time
 29 |   vector<lower=0>[N] Days;                // Days in study
 30 |   int<lower=1,upper=I> Subject[N];        // Subject
 31 | }
 32 | 
 33 | transformed data {
 34 |   real startInt;                          // Create starting point for Intercept (mean day 0)
 35 | 
 36 |   startInt = 0;
 37 |   for (n in 1:N) 
 38 |     if (Days[n] == 0) startInt = startInt + RT[n]/I;
 39 | }
 40 | 
 41 | parameters {
 42 |   real Intercept;                         // fixed effects
 43 |   real beta;
 44 |   real<lower=0> sd_int;                   // sd for ints and slopes
 45 |   real<lower=0> sd_beta;
 46 |   real<lower=0> sigma_y;                  // residual sd
 47 | 
 48 |   vector[I] gammaIntercept;               // individual effects
 49 |   vector[I] gammaDays;
 50 | }
 51 | 
 52 | transformed parameters {
 53 |   vector[N] yhat; 
 54 | 
 55 |   for (n in 1:N)                         // Linear predictor
 56 |     yhat[n] = gammaIntercept[Subject[n]] + gammaDays[Subject[n]] * Days[n];
 57 | } 
 58 | 
 59 | model {
 60 |   // priors
 61 |   Intercept ~ normal(startInt, 100);     // example of weakly informative priors;
 62 |   beta ~ normal(0, 100);                 // remove to essentially duplicate lme4 via improper prior
 63 | 
 64 |   gammaIntercept ~ normal(Intercept, sd_int);
 65 |   gammaDays ~ normal(beta, sd_beta);
 66 |   
 67 |   sd_int ~ cauchy(0, 5);
 68 |   sd_beta ~ cauchy(0, 5);
 69 |   sigma_y ~ cauchy(0, 5);
 70 | 
 71 |   // likelihood
 72 |   RT ~ normal(yhat, sigma_y);
 73 | }
 74 | '
 75 | #############
 76 | ### Model ###
 77 | #############
 78 | 
 79 | library(rstan)
 80 | fit = stan(model_code = stanmodelcode, model_name = "example",
 81 |             data = dat, iter = 7000, warmup=2000, thin=20, chains = 4,
 82 |             verbose = F) 
 83 | 
 84 | ### Summarize
 85 | print(fit, digits_summary=3, pars=c('Intercept','beta','sigma_y', 'sd_int', 'sd_beta'),
 86 |       probs = c(0, .025, .5, .975, 1))
 87 | 
 88 | ### Compare
 89 | mod_lme
 90 | 
 91 | print(fit, digits_summary=3, pars=c('gammaIntercept', 'gammaDays'))
 92 | 
 93 | ### Diagnostic plots
 94 | shinystan::launch_shinystan(fit)
 95 | 
 96 | ###############################
 97 | ### A parallelized approach ###
 98 | ###############################
 99 | 
100 | fit2 = stan(model_code = stanmodelcode, model_name = "mixedreg", #init=0,
101 |             fit = fit, # if using the same model code, this will use the previous compilation
102 |             data = dat, iter = 7000, warmup=2000, thin=20, cores=4,
103 |             verbose = T)
104 | 
105 | 
106 | # examine some diagnostics
107 | samplerpar = get_sampler_params(fit2)[[1]]
108 | summary(samplerpar)
109 | 
110 | 
111 | print(fit2, pars= c('Intercept','beta','sigma_y', 'sd_int', 'sd_beta','lp__'), digits=3, 
112 |       probs = c(.01, .025, .05, .5, .95, 0.975, .99))
113 | 
114 | # Compare again
115 | mod_lme
116 | 
117 | # diagnostics
118 | shinystan::launch_shinystan(fit2)
119 | 


--------------------------------------------------------------------------------
/ModelFitting/Bayesian/rstan_MixtureModel.R:
--------------------------------------------------------------------------------
  1 | #---------------------------------------------------------------------------------#
  2 | # Mixture model (draft); The Stan manual notes issues with mixture modeling, esp. #
  3 | # label switching (as does the bugs book), but the code in the manual chapter on  #
  4 | # mixture modeling does not appear able to reproduce the old faithful mixture or  #
  5 | # even a simulated mixture of normals, even with some heavy hand holding on       # 
  6 | # starting points, limits etc.  On occasion the means can be recovered, but the   #
  7 | # variances are all over the place and notably influenced by the prior.  The      #
  8 | # Stan manual chapter on problematic posteriors covers many of the issues.        #
  9 | #                                                                                 #
 10 | # The following stan code however is verbatim from github and should work fine,   #
 11 | # albeit slowly.                                                                  #
 12 | # github stan/src/models/basic_estimators/normal_mixture_k_prop.stan              #
 13 | #---------------------------------------------------------------------------------#
 14 | 
 15 | data(faithful)
 16 | head(faithful)
 17 | par(mfrow=c(1,2))
 18 | apply(faithful, 2, ggplot2:::qplot, geom='density')
 19 | layout(1)
 20 | 
 21 | 
 22 | y = rnorm(500, c(50,80), c(5,10))
 23 | ggplot2:::qplot(y, geom='density')
 24 | psych::describe(data.frame(y[seq(1,500, 2)], y[seq(2,500, 2)]))
 25 | 
 26 | 
 27 | # take your pick
 28 | standat = list(N=nrow(faithful), K=2, y=faithful$waiting)
 29 | 
 30 | standat = list(N=nrow(faithful), K=2, y=faithful$eruptions)
 31 | 
 32 | standat = list(N=length(y), K=2, y=y)
 33 | 
 34 | 
 35 | stanmodelcode = '
 36 | data {
 37 |   int<lower=1> K;                                                  # K components
 38 |   int<lower=1> N;                                                  # N observations
 39 |   real y[N];                                                       # variable of interest
 40 | }
 41 | 
 42 | parameters {
 43 |   simplex[K] theta;                                                # mixing proportions
 44 |   simplex[K] mu_prop;
 45 |   real mu_loc;
 46 |   real<lower=0> mu_scale;
 47 |   real<lower=0> sigma[K];                                          # sds of the components
 48 | }
 49 | 
 50 | transformed parameters {
 51 |   ordered[K] mu;
 52 |   mu = mu_loc + mu_scale * cumulative_sum(mu_prop);               # means of the components
 53 | }
 54 | 
 55 | model {
 56 |   // prior
 57 |   mu_loc ~ cauchy(0,5);
 58 |   mu_scale ~ cauchy(0,5);
 59 |   sigma ~ cauchy(0,5);
 60 | 
 61 |   // likelihood
 62 |   {
 63 |     real ps[K];
 64 |     vector[K] log_theta;
 65 |     log_theta = log(theta);
 66 | 
 67 |     for (n in 1:N) {
 68 |       for (k in 1:K) {
 69 |         ps[k] = log_theta[k] + normal_lpdf(y[n] | mu[k], sigma[k]);
 70 |       }
 71 |       target += log_sum_exp(ps);
 72 |     }
 73 |   }
 74 | }
 75 | '
 76 | 
 77 | 
 78 | ################
 79 | ### Test Run ###
 80 | ################
 81 | library(rstan)
 82 | 
 83 | # the following may take several minutes per chain depending on the data
 84 | test = stan(model_code = stanmodelcode, model_name = "example", 
 85 |             data = standat, iter = 7000, warmup=2000, thin=5, chains = 2, cores=2,
 86 |             verbose = F)
 87 | 
 88 | # shinystan::launch_shinystan(test)
 89 | print(test, digits=3)
 90 | 
 91 | ### compare to flexmix: coefs in flexmod1 = mu in test
 92 | library(flexmix)
 93 | flexmod1 = flexmix(standat$y~1, k=2, control=list(tolerance=1e-12, iter.max=1000))
 94 | summary(flexmod1)
 95 | parameters(flexmod1)
 96 | 
 97 | 
 98 | ######################
 99 | ### Production run ###
100 | ######################
101 | 
102 | # This can take a notably long time (i.e. hours) depending on the data.
103 | fit = stan(model_code = stanmodelcode, model_name = "mixture", fit = test, 
104 |                                              data = standat, iter = 62000, warmup=12000, thin=50, cores = 4,
105 |                                              verbose = T)
106 | print(fit, digits=3)
107 | shinystan::launch_shinystan(fit)
108 |  
109 | 


--------------------------------------------------------------------------------
/ModelFitting/EM Examples/EM algorithm for probit example.R:
--------------------------------------------------------------------------------
  1 | ##########################################################
  2 | ### The following regards models for a binary response ###
  3 | ### See Murphy, 2012 Probabilistic Machine Learning    ###
  4 | ### Chapter 11.4.                                      ###       
  5 | ##########################################################
  6 | 
  7 | mydata = haven::read_dta("https://stats.idre.ucla.edu/stat/stata/dae/binary.dta")
  8 |  
  9 | #################################
 10 | ### glm for a standard probit ###
 11 | #################################
 12 | 
 13 | myprobit = glm(admit ~ gre + gpa + rank, family=binomial(link = "probit"), control=list(maxit=500, epsilon=1e-8), data = mydata)
 14 | 
 15 | ## model summary
 16 | summary(myprobit)
 17 | coefsMAIN = coef(myprobit)
 18 | 
 19 | ### input data ###
 20 | X = as.matrix(cbind(1, mydata[,2:4]))
 21 | y = mydata[,1]
 22 | init = c(0,0,0,0)
 23 | 
 24 | ##########################################
 25 | ### MLE function for a standard probit ###
 26 | ##########################################
 27 | 
 28 | myprobMLE = function(params, X, y){
 29 |   # Arguments are starting parameters (coefficients), model matrix, response 
 30 |   
 31 |   b = params
 32 |   mu = X%*%b
 33 |   ll = sum(y*pnorm(mu, log.p=T) + (1-y)*pnorm(-mu, log.p=T))  # compute the log likelihood
 34 |   ll
 35 | }
 36 | 
 37 | ### Fit with optim
 38 | outMLE = optim(init, myprobMLE, X=X, y=y, control=list(fnscale=-1, maxit=1000, reltol=1e-8))  # make tolerance really low to duplicate glm result
 39 | # outMLE
 40 | coefsMLE = outMLE$par
 41 | 
 42 | ### compare
 43 | rbind(coefsMLE, coefsMAIN)
 44 | 
 45 | #################################################
 46 | ### EM for latent variable approach to probit ###
 47 | #################################################
 48 | 
 49 | probEM = function(params, X, y, tol=.00001, maxits=100, showits=T){
 50 |   # Arguments are starting parameters (coefficients), model matrix, response, 
 51 |   # tolerance, maximum iterations, and whether to show iterations
 52 | 
 53 |   #starting points
 54 |   b = params
 55 |   mu = X%*%b
 56 |   it = 0
 57 |   converged = FALSE
 58 |   z = rnorm(length(y))                                                    # z is the latent variable ~N(0,1)
 59 |   
 60 |   if (showits)                                                            # Show iterations
 61 |     cat(paste("Iterations of EM:", "\n"))
 62 |   while ((!converged) & (it < maxits)) {                                  # while no convergence and we haven't reached our max iterations do this stuff
 63 |     zOld = z                                                              # create 'old' values for comparison
 64 |     
 65 |     z = ifelse(y==1, mu+dnorm(mu)/pnorm(mu), mu-dnorm(mu)/pnorm(-mu))     # E step create a new z based on current values
 66 |    
 67 |     b = solve(t(X)%*%X) %*% t(X)%*%z                                      # M step estimate b
 68 |     mu = X%*%b
 69 |     
 70 |     ll= sum(y*pnorm(mu, log.p=T) + (1-y)*pnorm(-mu, log.p=T))
 71 |     
 72 |     it = it + 1
 73 |     if (showits & (it == 1 | it%%5 == 0))
 74 |       cat(paste(format(it), "...", "\n", sep = ""))
 75 |     converged = max(abs(zOld - z)) <= tol
 76 |   }
 77 |   
 78 |   if (showits)                                                            # Show last iteration
 79 |     cat(paste0(format(it), "...", "\n"))
 80 |   
 81 |   return(list(b=t(b), ll=ll ))
 82 | }
 83 | 
 84 | outEM = probEM(params=init, X=X, y=y, tol=1e-8, maxit=100); outEM  # can lower tolerance to duplicate glm result
 85 | coefsEM = outEM$b
 86 | 
 87 | ### compare
 88 | rbind(coefsMAIN, coefsMLE, coefsEM)
 89 | rbind(logLik(myprobit),  outMLE$value, outEM$ll)
 90 | 
 91 | 
 92 | ### Show estimates over niter iterations and visualize
 93 | X2 = X; X2[,2:3] = scale(X2[,2:3])
 94 | niter = 20
 95 | outEM = sapply( 1:niter, function(x) probEM(params=init, X=X2, y=y, tol=1e-8, maxit=x, showits=F)$b  )
 96 | colnames(outEM) = paste0("iter", 1:niter)
 97 | 
 98 | library(ggplot2); library(reshape2)
 99 | gdat = melt(outEM); colnames(gdat) = c('coef', 'iter', 'value')
100 | gdat$iter2 = rep(1:niter, e=4)
101 | gdat$coef = factor(gdat$coef, labels=c('Intercept','gre','gpa','rank'))
102 | str(gdat)
103 | 
104 | ggplot(aes(x=iter2, y=value), data=gdat) + 
105 |   geom_line(aes(group=coef, color=coef)) + 
106 |   ggtheme


--------------------------------------------------------------------------------
/ModelFitting/EM Examples/EM for pca.R:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------------------#
 2 | # The following is an EM algorithm for principal components analysis. #
 3 | # See Murphy, 2012 Probabilistic Machine Learning 12.2.5. Some of the #
 4 | # constructed object is based on output from pca function used below. # 
 5 | # --------------------------------------------------------------------#
 6 | 
 7 | #####################
 8 | ### Main Function ###
 9 | #####################
10 | 
11 | PCAEM = function(X, nComp=2, tol=.00001, maxits=100, showits=T){
12 |   # Arguments X: numeric data, nComp: number of components
13 |   # tol = tolerance level, maxits: maximum iterations, showits: show iterations
14 |   require(pracma) # for orthonormal basis of W; pcaMethods package has also
15 |   
16 |   # starting points and other initializations
17 |   N = nrow(X)
18 |   D = ncol(X)
19 |   L = nComp
20 |   Xt = t(X)
21 |   Z = t(replicate(L, rnorm(N)))                                    # latent variables
22 |   W = replicate(L, rnorm(D))                                       # loadings
23 |   
24 |   it = 0
25 |   converged = FALSE
26 |     
27 |   if (showits)                                                     
28 |     cat(paste("Iterations of EM:", "\n"))
29 |   while ((!converged) & (it < maxits)) {                           # while no convergence and we haven't reached our max iterations do this stuff
30 |     Z.old = Z                                                      # create 'old' values for comparison
31 |     Z = solve(t(W)%*%W) %*% crossprod(W, Xt)                       # E
32 |     W = Xt%*%t(Z) %*% solve(tcrossprod(Z))                         # M
33 | 
34 |     it = it + 1
35 |     if (showits & (it == 1 | it%%5 == 0))                          # if showits, show first and every 5th iteration
36 |       cat(paste(format(it), "...", "\n", sep = ""))
37 |     converged = max(abs(Z.old-Z)) <= tol
38 |   }
39 |   
40 |   # calculate reconstruction error
41 |   Xrecon = W %*% Z
42 |   reconerr = sum((Xrecon-t(X))^2)
43 |   
44 |   # orthogonalize
45 |   W = orth(W)
46 |   evs = eigen(cov(X %*% W))
47 |   evals = evs$values
48 |   evecs = evs$vectors
49 |   
50 |   W = W %*% evecs
51 |   Z = X %*% W
52 | 
53 |   if (showits)                                                     # Show last iteration
54 |     cat(paste0(format(it), "...", "\n"))
55 |   
56 |   return(list(scores=Z, loadings=W, reconerr=reconerr, Xrecon=t(Xrecon)))
57 | }
58 | 
59 | ###############
60 | ### Example ###
61 | ###############
62 | 
63 | ### Get data and run
64 | # state.x77 is the data; various state demographics
65 | X = scale(state.x77)
66 | outEM = PCAEM(X=X, nComp=2, tol=1e-12, maxit=1000)
67 | outEM
68 | 
69 | # Extract reconstructed values and loadings for comparison
70 | Xrecon = outEM$Xrecon
71 | loadingsEM = outEM$loadings
72 | scoresEM = outEM$scores
73 | 
74 | # mean squared reconstruction error
75 | mean((Xrecon-X)^2)  # outEM$reconerr/prod(dim(X))
76 | 
77 | 
78 | ### compare results to output from pcaMethods; note that signs for loadings/scores may be different
79 | library(pcaMethods)
80 | outpcam = pca(X, nPcs=2, method='svd', scale='none', center=F)
81 | loadings_pcam = loadings(outpcam)
82 | scores_pcam = scores(outpcam)
83 | 
84 | # compare loadings and scores
85 | sum((abs(loadings_pcam)-abs(loadingsEM))^2)
86 | abs(round(cbind(scores_pcam, scoresEM), 2))
87 | 
88 | # compare reconstructed data sets
89 | Xrecon2 = scores_pcam %*% t(loadings_pcam)
90 | mean((Xrecon2-X)^2)
91 | mean(abs(Xrecon2-Xrecon))
92 | 
93 | # plots
94 | plot(Xrecon2[,1], X[,1])
95 | plot(Xrecon2[,2], X[,2])
96 | 
97 | plot(Xrecon[,1], Xrecon2[,1])
98 | 


--------------------------------------------------------------------------------
/ModelFitting/EM Examples/EM for state space unobserved components.R:
--------------------------------------------------------------------------------
  1 | ### The following regards chapter 11 in Statistical Modeling and Computation, 
  2 | ### the first example for an unobserved components model.  The data regards 
  3 | ### inflation based on the U.S. consumer price index (infl = 
  4 | ### 400*log(cpi_t/cpi_{t-1})), from the second quarter of 1947 to the second 
  5 | ### quarter of 2011.  You can acquire the data here 
  6 | ### (http://www.maths.uq.edu.au/~kroese/statbook/Statespace/USCPI.csv) or in
  7 | ### Datasets repo. just note that it has 2 mystery columns and one mystery row
  8 | ### presumably supplied by Excel.  You can also get the CPI data yourself at
  9 | ### http://www.bls.gov/cpi/ in a frustrating fashion, or in a much easier
 10 | ### fashion here 
 11 | ### http://research.stlouisfed.org/fred2/series/CPIAUCSL/downloaddata.
 12 | 
 13 | 
 14 | ### For the following I use n instead of t or T because those are transpose and
 15 | ### TRUE in R. The model is basically y = τ + ϵ, with ϵ ~ N(0, σ^2), 
 16 | ### and τ = τ_{n-1} + υ_n with υ ~ N(0, ω^2).  Thus each y is
 17 | ### associated with a latent variable that follows a random walk over time.
 18 | ### ω^2 serves as a smoothing parameter, which itself may be estimated but
 19 | ### which is fixed in the following. See the text for more details.
 20 | 
 21 | d = read.csv('https://raw.githubusercontent.com/m-clark/Datasets/master/us%20cpi/USCPI.csv', header=F)
 22 | inflation = d[,1]
 23 | summary(inflation)
 24 | 
 25 | 
 26 | ################################
 27 | ### EM for state space model ###
 28 | ################################
 29 | statespaceEM = function(
 30 |   params,
 31 |   y,
 32 |   omega2_0,
 33 |   omega2,
 34 |   tol = .00001,
 35 |   maxits = 100,
 36 |   showits = T
 37 | ) {
 38 |   
 39 |   # Arguments are starting parameters (variance as 'sigma2'), data, tolerance,
 40 |   # maximum iterations, and whether to show iterations
 41 |   
 42 |   # Not really needed here, but would be a good idea generally to take advantage
 43 |   # of sparse representation for large data
 44 |   require(spam)
 45 |   
 46 |   # Starting points
 47 |   n = length(y)
 48 |   sigma2 = params$sigma2
 49 |   
 50 |   # Other initializations
 51 |   H = diag(n)
 52 |   for (i in 1:(ncol(H) - 1)) {
 53 |     H[i + 1, i] = -1
 54 |   }
 55 |   
 56 |   Omega2 = as.spam(diag(omega2, n))
 57 |   Omega2[1, 1] = omega2_0
 58 |   H = as.spam(H)
 59 |   HinvOmega2H = t(H) %*% chol2inv(chol(Omega2)) %*% H      # tau ~ N(0, HinvOmmega2H^-1)
 60 |   
 61 |   
 62 |   it = 0
 63 |   converged = FALSE
 64 |   
 65 |   if (showits)                                             # Show iterations
 66 |     cat(paste("Iterations of EM:", "\n"))
 67 | 
 68 |   while ((!converged) & (it < maxits)) { 
 69 |     sigma2Old = sigma2[1]
 70 |     Sigma2invOld = diag(n)/sigma2Old
 71 | 
 72 |     K = HinvOmega2H + Sigma2invOld                         # E
 73 |     tau = solve(K, y/sigma2Old)                            # tau|y, sigma2_{n-1}, omega2 ~ N(0, K^-1)
 74 | 
 75 |     K_inv_tr = sum(1/eigen(K)$values)
 76 |     
 77 |     sigma2 = 1/n * (K_inv_tr + crossprod(y-tau))           # M
 78 |     
 79 |     converged = max(abs(sigma2 - sigma2Old)) <= tol
 80 |     
 81 |     # if showits true, & it =1 or divisible by 5 print message
 82 |     it = it + 1
 83 |     if (showits & it == 1 | it%%5 == 0)        
 84 |       cat(paste(format(it), "...", "\n", sep = ""))
 85 |   }
 86 |   
 87 |   Kfinal = HinvOmega2H + diag(n) / sigma2[1]
 88 |   
 89 |   taufinal = solve(K, (y / sigma2[1]))
 90 |   
 91 |   out = list(sigma2 = sigma2, tau = taufinal)
 92 | }
 93 | 
 94 | # debugonce(statespaceEM)
 95 | ssMod_1 = statespaceEM(
 96 |   params = data.frame(sigma2 = var(inflation)),
 97 |   y = inflation,
 98 |   tol = 1e-4,
 99 |   omega2_0 = 9,
100 |   omega2 = 1 ^ 2
101 | )
102 | 
103 | ssMod_.5 = statespaceEM(
104 |   params = data.frame(sigma2 = var(inflation)),
105 |   y = inflation,
106 |   tol = 1e-4,
107 |   omega2_0 = 9,
108 |   omega2 = .5 ^ 2
109 | )
110 | 
111 | ssMod_1$sigma2
112 | ssMod_.5$sigma2
113 | 
114 | 
115 | library(lubridate)
116 | series = ymd(paste0(rep(1947:2014, e = 4), '-', c('01', '04', '07', '10'), '-', '01'))
117 | seriestext = series[1:length(inflation)]
118 | 
119 | # fig. 11.1 in the text
120 | 
121 | library(tidyverse)
122 | 
123 | data.frame(
124 |   series = series[1:length(inflation)],
125 |   inflation,
126 |   Mod_1  =  ssMod_1$tau,
127 |   Mod_.5 = ssMod_.5$tau
128 | ) %>% 
129 |   ggplot(aes(x = series, y = inflation)) +
130 |   geom_point(color = 'gray50') +
131 |   geom_line(aes(y = Mod_1), color = '#ff5500') +
132 |   geom_line(aes(y = Mod_.5), color = 'skyblue3') +
133 |   scale_x_date(date_breaks = '10 years') +
134 |   theme_light()
135 |   
136 | 


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModelML/mixedModelML.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModelML/mixedModelML.pdf


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/diag.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/diag.html


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/fitCSgam-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/fitCSgam-1.png


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/gamSleepStudy-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/gamSleepStudy-1.png


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/gammSleepStudy-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/gammSleepStudy-1.png


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-3-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-3-2.png


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-latex/fitCSgam-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-latex/fitCSgam-1.pdf


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-latex/gammSleepStudy-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModelML/mixedModelML_files/figure-latex/gammSleepStudy-1.pdf


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModels/anovamixed.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/anovamixed.pdf


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModels/growth_vs_mixed_files/figure-html/lavaanmod-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/growth_vs_mixed_files/figure-html/lavaanmod-1.png


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModels/growth_vs_mixed_files/figure-html/randomIntsSlopesMods-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/growth_vs_mixed_files/figure-html/randomIntsSlopesMods-1.png


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModels/growth_vs_mixed_files/figure-html/visualizeTrends-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/growth_vs_mixed_files/figure-html/visualizeTrends-1.png


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModels/growth_vs_mixed_files/growthvsMixed_EstResults.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/growth_vs_mixed_files/growthvsMixed_EstResults.RData


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModels/mixedModels.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/mixedModels.pdf


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-html/sleepModFits-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-html/sleepModFits-1.png


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-html/sleepModFitsReduced-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-html/sleepModFitsReduced-1.png


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-html/sleepstudyPlot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-html/sleepstudyPlot-1.png


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-latex/sleepModFits-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-latex/sleepModFits-1.pdf


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-latex/sleepModFitsReduced-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-latex/sleepModFitsReduced-1.pdf


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-latex/sleepstudyPlot-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-latex/sleepstudyPlot-1.pdf


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-markdown_github/sleepModFits-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-markdown_github/sleepModFits-1.png


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-markdown_github/sleepModFitsReduced-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-markdown_github/sleepModFitsReduced-1.png


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-markdown_github/sleepstudyPlot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/Mixed Models/mixedModels/mixedModels_files/figure-markdown_github/sleepstudyPlot-1.png


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/one_factor_RE.R:
--------------------------------------------------------------------------------
  1 | ###################################################################################
  2 | ### An approach for one factor random effects model via maximum likelihood in R ###
  3 | ### Matlab and Julia; It's based on Statistical Modeling and Computation (2014) ###
  4 | ### Chapter 10, example 10.10; Unfortunately I did this before knowing they had ###
  5 | ### both matlab and R code on their website, though the R code here is a little ###
  6 | ### cleaner and has comments. The data regards crop yield from 10 randomly      ###
  7 | ### selected locations and three collections at each.  See one_factor_RE.m and  ###
  8 | ### one_factor_RE.jl for the related Matlab and Julia files, and the respective ###
  9 | ### twofactorRE.* for the associated two factor random effects examples.        ###
 10 | ###################################################################################
 11 | 
 12 | 
 13 | 
 14 | #####################
 15 | ### Main function ###
 16 | #####################
 17 | 
 18 | one_factor_re_loglike = function(mu, sigma2_mu, sigma2){
 19 |   # Args are mu: intercept; sigma2_mu: variance of intercept; sigma2: residual
 20 |   # variance of y
 21 |   # I follow their notation for consistency
 22 |   d  = nrow(y)
 23 |   ni = ncol(y)
 24 |   
 25 |   # covariance matrix of observations
 26 |   Sigmai = sigma2 * diag(ni) + sigma2_mu * matrix(1, ni, ni)
 27 |   
 28 |   # log likelihood
 29 |   l = rep(NA, 10)
 30 |   # iterate over the rows
 31 |   for(i in 1:d){
 32 |     l[i] = .5 * t(y[i, ] - mu) %*% chol2inv(chol(Sigmai)) %*% (y[i, ] - mu)  
 33 |   }
 34 |   
 35 |   ll =  -(ni*d) / 2*log(2*pi) - d / 2*log(det(Sigmai)) - sum(l)
 36 |   
 37 |   return(-ll)
 38 | }
 39 | 
 40 | 
 41 | ###################
 42 | ### Data set up ###
 43 | ###################
 44 | y = matrix(c(22.6,20.5,20.8,
 45 |              22.6,21.2,20.5,
 46 |              17.3,16.2,16.6,
 47 |              21.4,23.7,23.2,
 48 |              20.9,22.2,22.6,
 49 |              14.5,10.5,12.3,
 50 |              20.8,19.1,21.3,
 51 |              17.4,18.6,18.6,
 52 |              25.1,24.8,24.9,
 53 |              14.9,16.3,16.6), 10, 3, byrow=T)
 54 | 
 55 | 
 56 | ################################
 57 | ### Starting values and test ###
 58 | ################################
 59 | 
 60 | starts = list(
 61 |   mu = mean(y),
 62 |   sigma2_mu = var(rowMeans(y)),
 63 |   sigma2 = mean(apply(y, 1, var))
 64 | )
 65 | 
 66 | ### test
 67 | one_factor_re_loglike(starts[[1]], starts[[2]], starts[[3]])
 68 | 
 69 | 
 70 | #######################
 71 | ### Run and compare ###
 72 | #######################
 73 | 
 74 | ### bbmle has mle2 function for maximum likelihood estimation based on
 75 | ### underlying R functions like optim. LBFGS-B is used to place lower bounds on
 76 | ### the variance estimates
 77 | library(bbmle)
 78 | 
 79 | mlout = mle2(
 80 |   one_factor_re_loglike,
 81 |   start = starts,
 82 |   method = 'L-BFGS-B',
 83 |   lower = c(
 84 |     mu = -Inf,
 85 |     sigma2_mu = 0,
 86 |     sigma2 = 0
 87 |   ),
 88 |   trace = T
 89 | )
 90 | 
 91 | ### Compare
 92 | library(lme4)
 93 | library(tidyverse)
 94 | 
 95 | d = data.frame(y) %>% 
 96 |   pivot_longer(everything(), names_to = 'x', values_to = 'value') %>% 
 97 |   arrange(x) %>% 
 98 |   group_by(x) %>% 
 99 |   mutate(group = 1:n())
100 | 
101 | lme = lmer(value ~ 1 | group, data = d, REML = F)
102 | 
103 | summary(mlout)
104 | summary(lme)
105 | -2 * logLik(lme)
106 | 


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/one_factor_RE.jl:
--------------------------------------------------------------------------------
 1 | ############################################################################
 2 | ### see onefactorRE.R for details and onefactorRE.m for matlab code. The ###
 3 | ### commented part of the function reflects the R code.                  ###
 4 | ############################################################################
 5 | 
 6 | 
 7 | 
 8 | #####################
 9 | ### Main function ###
10 | #####################
11 | 
12 | function one_factor_re_loglike(par::Vector)
13 |     d, ni = size(y)
14 |     mu = par[1]
15 |     sigma2_mu = par[2]
16 |     sigma2 = par[3]
17 | 
18 |     Sigmai = sigma2*eye(ni) + sigma2_mu*ones(ni,ni)
19 |     l = -(ni*d)/2*log(2*pi) - d/2*log(det(Sigmai))
20 | 
21 |     for i in 1:d
22 |       yi = y[i,:]'
23 |       l = l - .5(yi-mu)' * (Sigmai\(yi-mu))
24 |     end
25 | 
26 | #    l = zeros(10)
27 | 
28 | #    for i in 1:d
29 | #        yi = y[i,:]'
30 | #        l[i,:] =  .5* (yi - mu)' * (Sigmai\(yi - mu))
31 | #    end
32 | #    l =  -(ni*d)/2*log(2*pi) - d/2*log(det(Sigmai)) - sum(l)
33 | 
34 |     l = -l[1]  # having to do this line hurts
35 |     return l
36 | end
37 | 
38 | 
39 | ###################
40 | ### Data set up ###
41 | ###################
42 | y = [22.6 20.5 20.8
43 |      22.6 21.2 20.5
44 |      17.3 16.2 16.6
45 |      21.4 23.7 23.2
46 |      20.9 22.2 22.6
47 |      14.5 10.5 12.3
48 |      20.8 19.1 21.3
49 |      17.4 18.6 18.6
50 |      25.1 24.8 24.9
51 |      14.9 16.3 16.6]
52 | 
53 | 
54 | ################################
55 | ### Starting values and test ###
56 | ################################
57 | mu0 = mean(y)
58 | sigma2_mu0 = var(mean(y, 2))
59 | sigma20 = mean(var(y, 2))
60 | theta0 = [mu0, sigma2_mu0, sigma20]
61 | 
62 | ### test
63 | one_factor_re_loglike(theta0)
64 | 
65 | 
66 | ###########
67 | ### Run ###
68 | ###########
69 | using Optim
70 | res = optimize(one_factor_re_loglike, theta0, method=:l_bfgs)
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/one_factor_RE.m:
--------------------------------------------------------------------------------
 1 | % matlab from Statistical Modeling and Computation (2014 p 311).  See the 
 2 | % associated twofactorRE.R file for details.
 3 | 
 4 | function one_factor_re_loglike(mu, sigma2_mu, sigma2, y)
 5 | 	[d ni] = size(y);
 6 | 	Sigmai = sigma2*eye(ni) + sigma2_mu*ones(ni,ni);
 7 | 	l = -(ni*d) / 2*log(2*pi) - d / 2*log(det(Sigmai));
 8 | 	for i=1:d
 9 | 	  yi = y(i, :)';
10 | 	  l = l - .5*(yi - mu)' * (Sigmai\(yi - mu));
11 | 	end
12 | end
13 | 
14 | 
15 | y = [22.6 20.5 20.8;
16 |      22.6 21.2 20.5;
17 |      17.3 16.2 16.6;
18 |      21.4 23.7 23.2;
19 |      20.9 22.2 22.6;
20 |      14.5 10.5 12.3;
21 |      20.8 19.1 21.3;
22 |      17.4 18.6 18.6;
23 |      25.1 24.8 24.9;
24 |      14.9 16.3 16.6];
25 | 
26 | 
27 | f = @(theta) -one_factor_re_loglike(theta(1), theta(2), theta(3), y);
28 | ybar = mean(y, 2);
29 | theta0 = [mean(ybar) var(ybar) mean(var(y, 0, 2))];
30 | thetahat = fminsearch(f, theta0);
31 | 


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/twofactorRE.R:
--------------------------------------------------------------------------------
 1 | ###################################################################################
 2 | ### An approach for two factor random effects model via maximum likelihood in R ###
 3 | ### Matlab and Julia; It's based on Statistical Modeling and Computation (2014) ###
 4 | ### Chapter 10, example 10.10; The data regards the breeding value of a set of  ### 
 5 | ### five sires in raising pigs. Each sire is mated to a random group of dams,   ###
 6 | ### with the response being the average daily weight gain in pounds of two      ###
 7 | ### piglets in each litter.  See one_factor_RE.R for a one factor model, and    ###
 8 | ### two_factor_RE.m and two_factor_RE.jl for the matlab and julia versions of   ###
 9 | ### this example. Note that the text has a typo on the sigma2 variance estimate ###
10 | ### (value should be .0023 not .023).                                           ###
11 | ###################################################################################
12 | 
13 | 
14 | 
15 | #####################
16 | ### Main function ###
17 | #####################
18 | # the function takes the log variances eta* as input to keep positive
19 | sfran_loglike = function(mu, eta_alpha, eta_gamma, eta) {
20 |   # Args 
21 |   # mu: intercept 
22 |   # eta_alpha: random effect one 
23 |   # eta_gamma: random effect two
24 |   # eta: residual variance of y
25 |   
26 |   sigma2_alpha = exp(eta_alpha)
27 |   sigma2_gamma = exp(eta_gamma)
28 |   sigma2 = exp(eta)
29 |   n = length(y)
30 |   
31 |   # covariance matrix of observations
32 |   Sigma = sigma2 * diag(n) + sigma2_alpha * tcrossprod(Xalpha) + 
33 |     sigma2_gamma * tcrossprod(Xgamma)
34 |   
35 |   
36 |   # log likelihood
37 |   ll = -n / 2 * log(2 * pi) - sum(log(diag(chol(Sigma)))) -
38 |     .5 * t(y - mu) %*% chol2inv(chol(Sigma)) %*% (y - mu)
39 |   
40 |   return(-ll)
41 | }
42 | 
43 | 
44 | ###################
45 | ### Data set up ###
46 | ###################
47 | y = c(1.39,1.29,1.12,1.16,1.52,1.62,1.88,1.87,1.24,1.18,
48 |       .95,.96,.82,.92,1.18,1.20,1.47,1.41,1.57,1.65)
49 | 
50 | # for use in lme4, but also a more conceptual respresentation of the data
51 | d = expand.grid(sire = rep(1:5, 2), dam = 1:2)
52 | d = data.frame(d[order(d$sire), ], y)
53 | 
54 | 
55 | ################################
56 | ### Starting values and test ###
57 | ################################
58 | starts = list(
59 |   mu = mean(y),
60 |   eta_alpha = var(tapply(y, d$sire, mean)),
61 |   eta_gamma = var(y) / 3,
62 |   eta = var(y) / 3
63 | )
64 | 
65 | Xalpha = diag(5) %x% rep(1,4)
66 | 
67 | Xgamma = diag(10) %x% rep(1,2)
68 | 
69 | # test
70 | sfran_loglike(starts[[1]], starts[[2]], starts[[3]], starts[[4]])
71 | 
72 | 
73 | #######################
74 | ### Run and compare ###
75 | #######################
76 | library(bbmle)
77 | 
78 | mlout = mle2(sfran_loglike, start=starts,  method='BFGS')  
79 | 
80 | ### lme4 comparison
81 | library(lme4)
82 | 
83 | lme = lmer(y ~ (1 | sire) + (1 | dam:sire), d, REML = F)
84 | 
85 | summary(mlout)
86 | exp(coef(mlout)[-1])
87 | 
88 | summary(lme)
89 | 
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/twofactorRE.jl:
--------------------------------------------------------------------------------
 1 | ############################################################################
 2 | ### see twofactorRE.R for details and twofactorRE.m for matlab code.     ###
 3 | ############################################################################
 4 | 
 5 | 
 6 | 
 7 | function sfran2_loglike(par::Vector)
 8 |     n = length(y)
 9 |     mu = par[1]
10 |     
11 |     sigma2_alpha = exp(par[2])
12 |     sigma2_gamma = exp(par[3])
13 |     sigma2 = exp(par[4])
14 | 
15 |     Sigma = sigma2*eye(n) + sigma2_alpha*(Xalpha * Xalpha') + sigma2_gamma * (Xgamma * Xgamma')
16 | 
17 |     l = -n/2*log(2*pi) - sum(log(diag(chol(Sigma)))) - .5*(y-mu)' * (Sigma\(y-mu))
18 | 
19 |     l = -l[1]
20 |     return l
21 | end
22 | 
23 | 
24 | ##################
25 | ### Data setup ###
26 | ##################
27 | y = [1.39,1.29,1.12,1.16,1.52,1.62,1.88,1.87,1.24,1.18,
28 |      .95,.96,.82,.92,1.18,1.20,1.47,1.41,1.57,1.65]
29 | 
30 | # See the R file for a conceptual data representation
31 | 
32 | Xalpha = kron(eye(5),  ones(4,1))
33 | 
34 | Xgamma = kron(eye(10), ones(2,1))
35 | 
36 | 
37 | ################################
38 | ### Starting values and test ###
39 | ################################
40 | yhat = mean(reshape(y, 4, 5), 1)
41 | 
42 | theta0 = [mean(y), log(var(yhat)), log(var(y)/3), log(var(y)/3)]
43 | 
44 | sfran2_loglike(theta0)
45 | 
46 | 
47 | ###########
48 | ### Run ###
49 | ###########
50 | using Optim
51 | res = optimize(sfran2_loglike, theta0, method = :l_bfgs)
52 | exp([-2.92,-3.44, -6.079])
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/ModelFitting/Mixed Models/twofactorRE.m:
--------------------------------------------------------------------------------
 1 | % matlab from Statistical Modeling and Computation (2014 p 314). See the 
 2 | % associated twofactorRE.R file for details.
 3 | 
 4 | function sfran2_loglike(mu, eta_alpha, eta_gamma, eta, y, Xalpha, Xgamma)
 5 |   sigma2_alpha = exp(eta_alpha);
 6 |   sigma2_gamma = exp(eta_gamma);
 7 |   sigma2 = exp(eta);
 8 |   
 9 |   n = length(y);
10 |   
11 |   Sigma = sigma2*speye(n) + sigma2_alpha * (Xalpha * Xalpha') + sigma2_gamma * (Xgamma*Xgamma');
12 |   
13 |   l = -n/2 * log(2*pi) - sum(log(diag(chol(Sigma)))) - .5*(y - mu)' * (Sigma\(y - mu));
14 |   
15 | end
16 | 
17 | y = [1.39 1.29 1.12 1.16 1.52 1.62 1.88 1.87 1.24 1.18 .95 .96 .82 .92 1.18 1.20 1.47 1.41 1.57 1.65];
18 | 
19 | Xalpha = kron(speye(5),  ones(4,1));
20 | 
21 | Xgamma = kron(speye(10), ones(2,1));
22 | 
23 | f = @(theta) -sfran_loglike(theta(1), theta(2), theta(3), theta(4), y, Xalpha, Xgamma);
24 | yhat     = mean(reshape(y, 4, 5));
25 | theta0   = [mean(y) log(var(yhat)) log(var(y)/3) log(var(y)/3)];
26 | thetahat = fminsearch(f, theta0)
27 | 


--------------------------------------------------------------------------------
/ModelFitting/NBzeroinfl.R:
--------------------------------------------------------------------------------
  1 | #' ---
  2 | #' title: "Zero-inflated Negative Binomial Model"
  3 | #' author: "Michael Clark"
  4 | #' date: ""
  5 | #' ---
  6 | #' 
  7 | #' 
  8 | #' Log likelihood function to estimate parameters for a Zero-inflated Negative Binomial model. With
  9 | #' examples and comparison to pscl package output. Also includes approach based on Hilbe GLM text.
 10 | #' see also: https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/poiszeroinfl.R
 11 | 
 12 | ZINB = function(y, X, par) {
 13 |   # arguments are response y, predictor matrix X, and parameter named starting points of 'logit', 'negbin', and 'theta'
 14 |   
 15 |   # Extract parameters
 16 |   logitpars  = par[grep('logit', names(par))]
 17 |   negbinpars = par[grep('negbin', names(par))]
 18 |   theta = exp(par[grep('theta', names(par))])
 19 |   
 20 |   # Logit part; in this function Xlogit = Xnegbin but one could split X argument into Xlogit and Xnegbin for example
 21 |   Xlogit  = X
 22 |   LPlogit = Xlogit %*% logitpars
 23 |   logi0   = plogis(LPlogit) 
 24 |   
 25 |   # Negbin part
 26 |   Xnegbin = X
 27 |   munb = exp(Xnegbin %*% negbinpars)
 28 |   
 29 |   # LLs
 30 |   logliklogit  = log( logi0 + exp(log(1 - logi0) + suppressWarnings(dnbinom(0, size = theta, mu = munb, log = TRUE))) )
 31 |   logliknegbin = log(1 - logi0) + suppressWarnings(dnbinom(y, size = theta, mu = munb, log = TRUE))
 32 |   
 33 |   # Hilbe formulation
 34 |   # theta part 
 35 |   # alpha = 1/theta  
 36 |   # m = 1/alpha
 37 |   # p = 1/(1 + alpha*munb)
 38 |   
 39 |   # logliklogit = log( logi0 + (1 - logi0)*(p^m) )
 40 |   # logliknegbin = log(1-logi0) + log(gamma(m+y)) - log(gamma(m)) + m*log(p) + y*log(1-p)   # gamma(y+1) not needed
 41 |   
 42 |   y0 = y == 0   # 0 values
 43 |   yc = y > 0    # Count part
 44 |   
 45 |   loglik = sum(logliklogit[y0]) + sum(logliknegbin[yc])
 46 |   -loglik
 47 | }
 48 | 
 49 | 
 50 | #' Get the data
 51 | library(haven)
 52 | 
 53 | fish = read_dta("http://www.stata-press.com/data/r11/fish.dta")
 54 | 
 55 | 
 56 | #' Get starting values or simply do zeros
 57 | #' for this function, a named vector for the starting values
 58 | #' for zinb: 'logit', 'negbin', 'theta'
 59 | init.mod = model.matrix(count ~ persons + livebait, data = fish) # to get X matrix
 60 | 
 61 | startlogi  = glm(count == 0 ~ persons + livebait, data = fish, family = "binomial")
 62 | startcount = glm(count ~ persons + livebait, data = fish, family = "poisson")
 63 | 
 64 | starts = c(
 65 |   negbin = coef(startcount),
 66 |   logit = coef(startlogi),
 67 |   theta = 1
 68 | )  
 69 | # starts = c(negbin = rep(0, 3),
 70 | #            logit = rep(0, 3),
 71 | #            theta = log(1))
 72 | 
 73 | 
 74 | #' Estimate with optim function
 75 | optNB1 = optim(
 76 |   par = starts ,
 77 |   fn  = ZINB,
 78 |   X   = init.mod,
 79 |   y   = fish$count,
 80 |   method  = "BFGS",
 81 |   control = list(maxit = 5000, reltol = 1e-12),
 82 |   hessian = TRUE
 83 | )
 84 | # optNB1
 85 | 
 86 | #' Comparison
 87 | # Extract for clean display
 88 | B  = optNB1$par
 89 | se = sqrt(diag(solve((optNB1$hessian))))
 90 | Z  = B/se
 91 | p  = pnorm(abs(Z), lower = FALSE)*2
 92 | 
 93 | # pscl results
 94 | library(pscl)
 95 | zinbmod1 = zeroinfl(count ~ persons + livebait, data = fish, dist = "negbin")
 96 | summary(zinbmod1)
 97 | round(data.frame(B, se, Z, p), 4)  # note that theta here is actually log(theta)
 98 | 
 99 | 
100 | #' Optional data set
101 | 
102 | 
103 | #' Get the data
104 | data("bioChemists", package = "pscl")
105 | 
106 | #' Get starting values or simply do zeros
107 | init.mod   = model.matrix(art ~ fem + mar + kid5 + phd + ment, data = bioChemists) # to get X matrix
108 | startlogi  = glm(art==0 ~ fem + mar + kid5 + phd + ment, data = bioChemists, family = "binomial")
109 | startcount = glm(art ~ fem + mar + kid5 + phd + ment, data = bioChemists, family = "quasipoisson")
110 | 
111 | starts = c(
112 |   negbin = coef(startcount),
113 |   logit  = coef(startlogi),
114 |   theta  = summary(startcount)$dispersion
115 | )  
116 | # starts = c(negbin = rep(0, 6),
117 | #            logit = rep(0, 6),
118 | #            theta = 1)
119 | 
120 | 
121 | #' Estimate with optim function
122 | optNB2 = optim(
123 |   par = starts ,
124 |   fn  = ZINB,
125 |   X   = init.mod,
126 |   y   = bioChemists$art,
127 |   method  = "BFGS",
128 |   control = list(maxit = 5000, reltol = 1e-12),
129 |   hessian = TRUE
130 | )
131 | # optNB2
132 | 
133 | 
134 | #' Comparison
135 | # Extract for clean display.
136 | B  = optNB2$par
137 | se = sqrt(diag(solve((optNB2$hessian))))
138 | Z  = B/se
139 | p  = pnorm(abs(Z), lower = FALSE)*2
140 | 
141 | # pscl results
142 | library(pscl)
143 | zinbmod = zeroinfl(art ~ . | ., data = bioChemists, dist = "negbin")
144 | summary(zinbmod)
145 | round(data.frame(B,se, Z, p), 4)


--------------------------------------------------------------------------------
/ModelFitting/bivariateProbit.R:
--------------------------------------------------------------------------------
  1 | ### Stata's seems to be the primary audience concerned with these models, but I
  2 | ### thought I'd play around with one here (I've never had reason to use a probit
  3 | ### model in practice).  Stata examples come from the UCLA ATS website and the
  4 | ### Stata manual so one can investigate the Stata result for comparison.
  5 | 
  6 | 
  7 | 
  8 | # standard probit --------------------------------------------------------
  9 | 
 10 | probitLL = function(beta, X, y){
 11 |   mu = X %*% beta
 12 |   
 13 |   # these produce identical results, but the second is the typical depiction
 14 |   # ll = sum(dbinom(
 15 |   #   y,
 16 |   #   size = 1,
 17 |   #   prob = pnorm(mu),
 18 |   #   log = T
 19 |   # ))     
 20 |   
 21 |   ll = sum(y * pnorm(mu, log = T) + (1 - y) * log(1 - pnorm(mu)))
 22 |   
 23 |   -ll
 24 | }
 25 | 
 26 | ### Example 1
 27 | 
 28 | # Example at https://stats.idre.ucla.edu/stata/dae/probit-regression/
 29 | 
 30 | admit = haven::read_dta('https://stats.idre.ucla.edu/stat/stata/dae/binary.dta')
 31 | 
 32 | head(admit)
 33 | 
 34 | X = model.matrix(admit~ gre + gpa + factor(rank), admit)
 35 | y = admit$admit
 36 | 
 37 | init = rep(0, ncol(X))
 38 | 
 39 | optimResult = optim(
 40 |   fn  = probitLL,
 41 |   par = init,
 42 |   X = X,
 43 |   y = y,
 44 |   method = 'BFGS'
 45 | )
 46 | 
 47 | optimResult 
 48 | 
 49 | 
 50 | ### Example 2: from Stata manual on standard probit 
 51 | 
 52 | # http://www.stata.com/manuals13/rprobit.pdf 
 53 | # "We have data on the make, weight, and mileage rating of 22 foreign and 52 
 54 | # domestic automobiles. We wish to fit a probit model explaining whether a car 
 55 | # is foreign based on its weight and mileage."
 56 | 
 57 | auto = haven::read_dta('http://www.stata-press.com/data/r13/auto.dta')
 58 | 
 59 | head(auto)
 60 | 
 61 | X = model.matrix(foreign~ weight + mpg, auto)
 62 | y = auto$foreign
 63 | 
 64 | init = rep(0, ncol(X))
 65 | 
 66 | optimResult = optim(
 67 |   fn  = probitLL,
 68 |   par = init,
 69 |   X = X,
 70 |   y = y
 71 | )
 72 | 
 73 | optimResult
 74 | 
 75 | 
 76 | 
 77 | # Bivariate probit --------------------------------------------------------
 78 | 
 79 | bivariateProbitLL = function(pars, X, y1, y2) {
 80 |   rho = pars[1]
 81 |   mu1 = X %*% pars[2:(ncol(X) + 1)]
 82 |   mu2 = X %*% pars[(ncol(X) + 2):length(pars)]
 83 |   q1 = ifelse(y1 == 1, 1,-1)
 84 |   q2 = ifelse(y2 == 1, 1,-1)
 85 |   
 86 |   require(mnormt)
 87 |   eta1 = q1 * mu1
 88 |   eta2 = q2 * mu2
 89 |   
 90 |   ll = matrix(NA, nrow = nrow(X))
 91 |   for (i in 1:length(ll)) {
 92 |     corr = q1[i] * q2[i] * rho
 93 |     corr = matrix(c(1, corr, corr, 1), 2)
 94 |     ll[i] = log(
 95 |       pmnorm(
 96 |         x = c(eta1[i], eta2[i]),
 97 |         mean = c(0, 0),
 98 |         varcov = corr
 99 |       )
100 |     )
101 |   }
102 |   
103 |   # the loop is probably clearer, and there is no difference in time, but here's a oneliner
104 |   # ll = mapply(function(e1, e2, q1, q2) log(pmnorm(x=c(e1, e2), varcov = matrix(c(1,q1*q2*rho,q1*q2*rho,1),2))),
105 |   #             eta1, eta2, q1, q2)
106 |   
107 |   -sum(ll)
108 | }
109 | 
110 | 
111 | ### Example 3: from stata manual on bivariate probit
112 | # "We wish to model the bivariate outcomes of whether children attend private 
113 | # school and whether the head of the household voted for an increase in property
114 | # tax based on the other covariates."
115 | 
116 | school = haven::read_dta('http://www.stata-press.com/data/r13/school.dta')
117 | 
118 | head(school)
119 | 
120 | X  = model.matrix(private ~ years + logptax + loginc, school)
121 | y1 = school$private
122 | y2 = school$vote
123 | 
124 | init = c(0, rep(0, ncol(X)*2))
125 | 
126 | # you'll probably get a warning or two, ignore; takes a couple seconds
127 | optimResult = optim(
128 |   fn  = bivariateProbitLL,
129 |   par = init,
130 |   X   = X,
131 |   y1  = y1,
132 |   y2  = y2,
133 |   method = 'BFGS'
134 | )
135 | 
136 | 
137 | loglik = optimResult$value
138 | rho = optimResult$par[1]
139 | coefsPrivate = optimResult$par[2:(ncol(X) + 1)]
140 | coefsVote = optimResult$par[(ncol(X) + 2):length(init)]
141 | names(coefsPrivate) = names(coefsVote) = c('Int', 'years', 'logptax', 'loginc')
142 | 
143 | list(
144 |   loglik  = loglik,
145 |   rho     = rho,
146 |   Private = coefsPrivate,
147 |   Vote    = coefsVote
148 | )
149 | 
150 | 


--------------------------------------------------------------------------------
/ModelFitting/convert_code_to_html.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # Preliminaries -----------------------------------------------------------
 3 | 
 4 | library(tidyverse)
 5 | 
 6 | 
 7 | fs = list.files('ModelFitting/', pattern = '\\.R$')
 8 | 
 9 | 
10 | 
11 | # single file
12 | rmarkdown::render(
13 |   "ModelFitting/nelder_mead.R",
14 |   output_dir = '../m-clark.github.io/docs/models/',
15 |   # output_yaml = 'render.yaml',   # ignored
16 |   # params = list(title = 'blah'),
17 |   knit_meta = list(comment = NA)
18 | )
19 | 
20 | 
21 | # all in theory, paths issue most likely; not worth the trouble
22 | map(fs, rmarkdown::render, output_dir = '../m-clark.github.io/docs/models/')
23 | 
24 | 
25 | # selection
26 | fs_select = fs[grepl(fs, pattern = 'gradient')]
27 | 
28 | map(paste0('ModelFitting/', fs_select), rmarkdown::render, output_dir = '../m-clark.github.io/docs/models/')
29 | 
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/ModelFitting/cubicsplines.R:
--------------------------------------------------------------------------------
  1 | #' ---
  2 | #' title: "Cubic Spline Model"
  3 | #' author: "Michael Clark"
  4 | #' date: ""
  5 | #' ---
  6 | #'
  7 | #' 
  8 | #' See Wood (2017) Generalized Additive Models or my [document](https://m-clark.github.io/generalized-additive-models/).
  9 | 
 10 | library(tidyverse) # for processing and plotting
 11 | 
 12 | #' # Create the data
 13 | size = c(1.42,1.58,1.78,1.99,1.99,1.99,2.13,2.13,2.13,
 14 |          2.32,2.32,2.32,2.32,2.32,2.43,2.43,2.78,2.98,2.98)
 15 | 
 16 | wear = c(4.0,4.2,2.5,2.6,2.8,2.4,3.2,2.4,2.6,4.8,2.9,
 17 |          3.8,3.0,2.7,3.1,3.3,3.0,2.8,1.7)
 18 | 
 19 | x = size - min(size)
 20 | x = x / max(x)
 21 | d = data.frame(wear, x)
 22 | 
 23 | #' Cubic spline function
 24 | rk <- function(x, z) {
 25 |   ((z-0.5)^2 - 1/12) * ((x-0.5)^2 - 1/12)/4 -
 26 |     ((abs(x-z)-0.5)^4 - (abs(x-z)-0.5)^2/2 + 7/240) / 24
 27 | }
 28 | 
 29 | #' Generate the model matrix.
 30 | splX <- function(x, knots) {
 31 |   q = length(knots) + 2                # number of parameters
 32 |   n = length(x)                        # number of observations
 33 |   X = matrix(1, n, q)                  # initialized model matrix
 34 |   X[ ,2]   = x                         # set second column to x
 35 |   X[ ,3:q] = outer(x, knots, FUN = rk) # remaining to cubic spline basis
 36 |   X
 37 | }
 38 | 
 39 | splS <- function(knots) {
 40 |   q = length(knots) + 2
 41 |   S = matrix(0, q, q)                         # initialize matrix
 42 |   S[3:q, 3:q] = outer(knots, knots, FUN = rk) # fill in non-zero part
 43 |   S
 44 | }
 45 | 
 46 | #' Matrix square root function. Note that there are various packages with their own.
 47 | matSqrt <- function(S) {
 48 |   d  = eigen(S, symmetric = T)
 49 |   rS = d$vectors %*% diag(d$values^.5) %*% t(d$vectors)
 50 |   rS
 51 | }
 52 | 
 53 | #' Penalized fitting function.
 54 | prsFit <- function(y, x, knots, lambda) {
 55 |   q  = length(knots) + 2    # dimension of basis
 56 |   n  = length(x)            # number of observations
 57 |   Xa = rbind(splX(x, knots), matSqrt(splS(knots))*sqrt(lambda)) # augmented model matrix
 58 |   y[(n+1):(n+q)] = 0        # augment the data vector
 59 |   
 60 |   lm(y ~ Xa - 1) # fit and return penalized regression spline
 61 | }
 62 | 
 63 | 
 64 | 
 65 | #' # Example 1
 66 | 
 67 | 
 68 | #' Unpenalized
 69 | #' 
 70 | knots = 1:4/5
 71 | X = splX(x, knots)      # generate model matrix
 72 | mod1 = lm(wear ~ X - 1) # fit model
 73 | 
 74 | xp = 0:100/100 # x values for prediction
 75 | Xp = splX(xp, knots) # prediction matrix
 76 | 
 77 | 
 78 | #' Visualize
 79 | 
 80 | ggplot(aes(x = x, y = wear), data = data.frame(x, wear)) +
 81 |   geom_point(color = "#FF5500") +
 82 |   geom_line(aes(x = xp, y = Xp %*% coef(mod1)),
 83 |             data = data.frame(xp, Xp),
 84 |             color = "#00AAFF") +
 85 |   labs(x = 'Scaled Engine size', y  = 'Wear Index') +
 86 |   theme_minimal()
 87 | 
 88 | 
 89 | 
 90 | #' # Example 2
 91 | 
 92 | 
 93 | # Add penalty lambda
 94 | knots = 1:7/8
 95 | d2 = data.frame(x = xp)
 96 | 
 97 | for (i in c(.1, .01, .001, .0001, .00001, .000001)){
 98 |   # fit penalized regression
 99 |   mod2 = prsFit(
100 |     y = wear,
101 |     x = x,
102 |     knots = knots,
103 |     lambda = i
104 |   ) 
105 |   # spline choosing lambda
106 |   Xp = splX(xp, knots) # matrix to map parameters to fitted values at xp
107 |   LP = Xp %*% coef(mod2)
108 |   d2[, paste0('lambda = ', i)] = LP[, 1]
109 | }
110 | 
111 | #' Examine
112 | # head(d2)
113 | 
114 | #' Visualize via ggplot
115 | d3 = d2 %>%
116 |   pivot_longer(cols = -x,
117 |                names_to  = 'lambda',
118 |                values_to = 'value') %>% 
119 |   mutate(lambda = fct_inorder(lambda))
120 | 
121 | ggplot(d3) +
122 |   geom_point(aes(x = x, y = wear), col = '#FF5500', data = d) +
123 |   geom_line(aes(x = x, y = value), col = "#00AAFF") +
124 |   facet_wrap(~lambda) +
125 |   theme_minimal()
126 | 
127 | 


--------------------------------------------------------------------------------
/ModelFitting/elm.R:
--------------------------------------------------------------------------------
  1 | # A very simple implementation of extreme learning machine for regression for 
  2 | # demonstration. See elmNN and ELMR for some R package implementations. I add 
  3 | # comparison to generalized additive models (elm/neural networks and GAMs are
  4 | # adaptive basis function models).
  5 | 
  6 | # http://www.extreme-learning-machines.org
  7 | # G.-B. Huang, Q.-Y. Zhu and C.-K. Siew, "Extreme Learning Machine: Theory and Applications"
  8 | 
  9 | elm <- function(X, y, n_hidden=NULL, active_fun=tanh) {
 10 |   # X: an N observations x p features matrix
 11 |   # y: the target
 12 |   # n_hidden: the number of hidden nodes
 13 |   # active_fun: activation function
 14 |   pp1 = ncol(X) + 1
 15 |   w0 = matrix(rnorm(pp1*n_hidden), pp1, n_hidden)       # random weights
 16 |   h = active_fun(cbind(1, scale(X)) %*% w0)             # compute hidden layer
 17 |   B = MASS::ginv(h) %*% y                               # find weights for hidden layer
 18 |   fit = h %*% B                                         # fitted values
 19 |   list(fit= fit, loss=crossprod(fit - y), B=B, w0=w0)
 20 | }
 21 | 
 22 | 
 23 | 
 24 | # one variable, complex function -------------------------------------------
 25 | library(tidyverse); library(mgcv)
 26 | set.seed(123)
 27 | n = 5000
 28 | x = runif(n)
 29 | # x = rnorm(n)
 30 | mu = sin(2*(4*x-2)) + 2*exp(-(16^2)*((x-.5)^2))
 31 | y = rnorm(n, mu, .3)
 32 | # qplot(x, y)
 33 | d = data.frame(x,y) 
 34 | 
 35 | X_ = as.matrix(x, ncol=1)
 36 | 
 37 | test = elm(X_, y, n_hidden=100)
 38 | str(test)
 39 | # qplot(x, y) + geom_line(aes(y=test$fit), color='#1e90ff')
 40 | cor(test$fit[,1], y)^2
 41 | 
 42 | gam_comparison = gam(y~s(x))
 43 | summary(gam_comparison)$r.sq
 44 | 
 45 | 
 46 | d %>% 
 47 |   mutate(fit_elm = test$fit,
 48 |          fit_gam = fitted(gam_comparison)) %>% 
 49 |   ggplot() + 
 50 |   geom_point(aes(x, y), alpha=.1) +
 51 |   geom_line(aes(x, y=fit_elm), color='#1e90ff') + 
 52 |   geom_line(aes(x, y=fit_gam), color='darkred')
 53 | 
 54 | 
 55 | 
 56 | 
 57 | # motorcycle accident data ------------------------------------------------
 58 | 
 59 | data('mcycle', package='MASS')
 60 | x = mcycle[,1]
 61 | X_ = matrix(x, ncol=1)
 62 | y = mcycle[,2]
 63 | 
 64 | test = elm(X_, y, n_hidden=100)
 65 | cor(test$fit[,1], y)^2
 66 | 
 67 | gam_comparison = gam(y~s(x))
 68 | summary(gam_comparison)$r.sq
 69 | 
 70 | qplot(x, y) +
 71 |   geom_line(aes(y=test$fit), color='#1e90ff') + 
 72 |   geom_line(aes(y=fitted(gam_comparison)), color='darkred')
 73 | 
 74 | 
 75 | 
 76 | 
 77 | # add covariates ----------------------------------------------------------
 78 | 
 79 | d = gamSim(eg=7, n=10000)
 80 | X_ = as.matrix(d[,2:5])
 81 | y = d[,1]
 82 | 
 83 | n_nodes = c(10, 25, 100, 250, 500, 1000)
 84 | test = lapply(n_nodes, function(n) elm(X_, y, n_hidden=n))       # this will take a few seconds
 85 | final_n = which.min(sapply(test, function(x) x$loss))
 86 | best = test[[final_n]]
 87 | # str(best)
 88 | qplot(best$fit[,1], y, alpha=.2)
 89 | cor(best$fit[,1], y)^2
 90 | 
 91 | gam_comparison = gam(y~s(x0) + s(x1) + s(x2) + s(x3), data=d)
 92 | gam.check(gam_comparison)
 93 | summary(gam_comparison)$r.sq
 94 | 
 95 | 
 96 | test_data0 = gamSim(eg=7)  # default n = 400
 97 | test_data =  cbind(1, scale(test_data0[,2:5]))
 98 | 
 99 | elm_prediction = tanh(test_data %*% best$w0) %*% best$B          # remember to use your specific activation function here
100 | gam_prediction = predict(gam_comparison, newdata=test_data0)
101 | cor(data.frame(elm_prediction, gam_prediction), test_data0$y)^2
102 | 


--------------------------------------------------------------------------------
/ModelFitting/gp Examples/README.md:
--------------------------------------------------------------------------------
1 | Since I first did the code for these models, the Stan group has done some notable improvement in this area, both in terms of the language itself, the example chapter for Gaussian processes, and even implementing a specific function for the squared exponential kernel.  While the offerings here may still be of use, they may also be dated, so check the recent Stan manual before getting too carried away.
2 | 


--------------------------------------------------------------------------------
/ModelFitting/gp Examples/gaussSample.m:
--------------------------------------------------------------------------------
 1 | function S = gaussSample(arg1, arg2, arg3)
 2 | % Returns n samples (in the rows) from a multivariate Gaussian distribution
 3 | %
 4 | % Examples:
 5 | % S = gaussSample(mu, Sigma, 10)
 6 | % S = gaussSample(model, 100)
 7 | % S = gaussSample(struct('mu',[0], 'Sigma', eye(1)), 3)
 8 | 
 9 | % This file is from pmtk3.googlecode.com
10 | 
11 | 
12 | switch nargin
13 |     case 3,  mu = arg1; Sigma = arg2; n = arg3;
14 |     case 2, model = arg1; mu = model.mu; Sigma = model.Sigma; n = arg2;
15 |     case 1, model = arg1; mu = model.mu; Sigma = model.Sigma; n = 1; 
16 |     otherwise
17 |         error('bad num args')
18 | end
19 | 
20 | A = chol(Sigma, 'lower');
21 | Z = randn(length(mu), n);
22 | S = bsxfun(@plus, mu(:), A*Z)';
23 | 
24 | 
25 | end
26 | 


--------------------------------------------------------------------------------
/ModelFitting/gp Examples/gaussianProcessStan.R:
--------------------------------------------------------------------------------
  1 | # Data and parameter setup ------------------------------------------------
  2 | 
  3 | # Data
  4 | set.seed(1234)
  5 | N = 20
  6 | Ntest = 200
  7 | x = rnorm(N, sd=1)
  8 | y = scale(sin(x) + rnorm(N, sd=.1))[,1]
  9 | xtest = seq(min(x)-1, max(x)+1, l=Ntest)
 10 | plot(x,y, pch=19, col='#ff5500')
 11 | 
 12 | # parameters
 13 | eta_sq = 1
 14 | rho_sq = 1
 15 | sigma_sq = .1
 16 | 
 17 | # Covariance function same as implemented in the Stan code.
 18 | Kfn <- function (x, eta_sq, rho_sq, sigma_sq) {
 19 |   N = length(x)
 20 |   Sigma = matrix(NA, N, N)
 21 |   
 22 |   # off diag elements
 23 |   for (i in 1:(N-1)) {
 24 |     for (j in (i+1):N) {
 25 |       Sigma[i,j] <- eta_sq * exp(-rho_sq * (x[i] - x[j])^2);
 26 |       Sigma[j,i] <- Sigma[i,j];
 27 |     }
 28 |   }
 29 |   
 30 |   # diagonal elements
 31 |   for (k in 1:N)
 32 |     Sigma[k,k] <- eta_sq + sigma_sq; # + jitter
 33 |   Sigma
 34 | }
 35 | 
 36 | 
 37 | 
 38 | # Vis prior functions -----------------------------------------------------
 39 | xinit = seq(-5,5,.2)
 40 | xprior = MASS::mvrnorm(3, 
 41 |                        mu=rep(0, length(xinit)), 
 42 |                        Sigma=Kfn(x=xinit,
 43 |                                  eta_sq = eta_sq, 
 44 |                                  rho_sq = rho_sq, 
 45 |                                  sigma_sq = sigma_sq))
 46 | 
 47 | 
 48 | library(reshape2)
 49 | gdat = melt(data.frame(x=xinit, y=t(xprior)), id='x')
 50 | 
 51 | library(ggvis)
 52 | gdat %>% 
 53 |   ggvis(~x, ~value) %>% 
 54 |   group_by(variable) %>% 
 55 |   layer_paths(strokeOpacity:=.5) %>% 
 56 |   add_axis('x', grid=F) %>% 
 57 |   add_axis('y', grid=F)
 58 | 
 59 | 
 60 | 
 61 | # Stan model code ---------------------------------------------------------
 62 | # models/covariance functions available
 63 | # gpStanModelCode_generalizedSquaredExponential.stan
 64 | # gpStanModelCode_gammaExponential.stan
 65 | # gpStanModelCode_rationalQuadratic.stan
 66 | 
 67 | gp = 'ModelFitting/gp Examples/gpStanModelCode_generalizedSquaredExponential.stan'
 68 | 
 69 | # Compile Check -----------------------------------------------------------
 70 | 
 71 | standata = list(N=N, x=x, y=y, xtest=xtest, Ntest=200)
 72 | 
 73 | 
 74 | library(rstan)
 75 | fit0 = stan(data=standata, file = gp, iter = 1, chains=1)
 76 | 
 77 | 
 78 | 
 79 | # Main Run ----------------------------------------------------------------
 80 | 
 81 | iterations = 12000
 82 | wu = 2000
 83 | th = 20
 84 | chains = 4
 85 | 
 86 | 
 87 | # With N = 20 Ntest = 200 takes about 2 min for gen squared expo, 10+min for 300
 88 | 
 89 | p = proc.time()
 90 | fit = stan(data=standata, file=gp, iter = iterations, warmup = wu, thin=th, 
 91 |            chains=chains, fit = fit0, cores=chains)
 92 | (proc.time() - p)/3600
 93 | 
 94 | 
 95 | 
 96 | # Summarize and Vis -------------------------------------------------------
 97 | 
 98 | # takes a bit to print
 99 | # print(fit, par=c('eta_sq','rho_sq','sigma_sq'))
100 | 
101 | # library(shinyStan)
102 | # launch_shinystan(fit)
103 | 
104 | 
105 | # Extract and visualize posterior predictive draws
106 | yRep = extract(fit, 'yRep')$yRep
107 | 
108 | gdat = data.frame(x,y)
109 | gdat2 = melt(data.frame(x = sort(xtest), y=t(yRep[sample(2000, 3),])), id='x')
110 | 
111 | gdat2 %>% 
112 |   ggvis(~x, ~value) %>% 
113 |   group_by(variable) %>% 
114 |   layer_paths(strokeOpacity:=.25) %>% 
115 |   layer_points(x=~x, y=~y, fill:='#ff5500', data=gdat) %>% 
116 |   add_axis('x', grid=F) %>% 
117 |   add_axis('y', grid=F)  
118 | 
119 | 
120 | # Visualize fit
121 | yRepMean = get_posterior_mean(fit, 'yRep')[,5]
122 | quantiles = data.frame(t(apply(yRep, 2, quantile, p=c(.025,.975)))); colnames(quantiles) = c('ll','ul')
123 | 
124 | gdat3 = data.frame(x = sort(xtest), y=yRepMean, quantiles)
125 | ttle = stringr::str_extract(gp, "(?<=_)(.*)(?=\\.)")
126 | 
127 | library(kernlab)
128 | comparisonModel = gausspr(x, y, kernel=rbfdot(sigma=1), var=.1, scaled=F, tol=1e-5)
129 | comparisonY = predict(comparisonModel, newdata=xtest)
130 | 
131 | library(mgcv)
132 | comparisonModel2 = gam(y~s(x, bs='gp', m=c(2,2☻,2)))
133 | comparisonY2 = predict(comparisonModel2, newdata=data.frame(x=xtest))
134 | 
135 | gdat3 %>% 
136 |   ggvis(~x, ~y) %>% 
137 |   layer_ribbons(y=~ll, y2=~ul, fillOpacity:=.1) %>% 
138 |   layer_paths(strokeOpacity:=.5, stroke:='blue') %>%
139 |   layer_paths(y=~comparisonY, strokeOpacity:=.5, stroke:='red') %>%
140 |   layer_paths(y=~comparisonY2, strokeOpacity:=.5, stroke:='green') %>%
141 |   layer_points(x=~x, y=~y, fill:='#ff5500', fillOpacity:=.5, data=gdat) %>% 
142 |   add_axis('x', grid=F) %>% 
143 |   add_axis("x", orient = "top", ticks = 0, title = ttle,
144 |            properties = axis_props(
145 |              axis = list(stroke = "white"),
146 |              labels = list(fontSize = 0))) %>% 
147 |   add_axis('y', grid=F)  
148 | 
149 | 
150 | 


--------------------------------------------------------------------------------
/ModelFitting/gp Examples/gpStanModelCode.stan:
--------------------------------------------------------------------------------
  1 | data {
  2 |   int<lower=1> N;                                # initial sample size
  3 |   vector[N] x;                                   # covariate
  4 |   vector[N] y;                                   # target
  5 |   int<lower=0> Ntest;                            # prediction set sample size
  6 |   vector[Ntest] xtest;                           # prediction values for covariate
  7 | }
  8 | 
  9 | transformed data {
 10 |   vector[N] mu;
 11 | 
 12 |   mu <- rep_vector(0, N);                        # mean function
 13 | }
 14 | 
 15 | parameters {
 16 |   real<lower=0> eta_sq;                          # parameters of squared exponential covariance function
 17 |   real<lower=0> inv_rho_sq;                      # rho the length scale
 18 |   real<lower=0> sigma_sq;                        # eta_sq + sigma_sq = var of predicted y; eta_sq is variance explained by the function
 19 | 
 20 |   real<lower=0> eta_sq_scale;                    # hyperpriors
 21 |   real<lower=0> inv_rho_sq_scale;
 22 |   real<lower=0> sigma_sq_scale;
 23 | }
 24 | 
 25 | model {
 26 |   matrix[N,N] Sigma;
 27 | 
 28 |   # off-diagonal elements for covariance matrix
 29 |   for (i in 1:(N-1)) {
 30 |     for (j in (i+1):N) {
 31 |       Sigma[i,j] <- eta_sq * exp( - pow(x[i] - x[j],2) / inv_rho_sq );
 32 |       Sigma[j,i] <- Sigma[i,j];
 33 |     }
 34 |   }
 35 | 
 36 |   # diagonal elements
 37 |   for (k in 1:N)
 38 |     Sigma[k,k] <- eta_sq + sigma_sq;             # + jitter for pos def
 39 | 
 40 |   # hyperpriors
 41 |   eta_sq_scale ~ exponential(.2);
 42 |   inv_rho_sq_scale ~ exponential(.2);
 43 |   sigma_sq_scale ~ exponential(.2);
 44 | 
 45 |   # priors
 46 |   eta_sq ~ cauchy(0, eta_sq_scale);
 47 |   inv_rho_sq ~ cauchy(0, inv_rho_sq_scale);
 48 |   sigma_sq ~ cauchy(0, sigma_sq_scale);
 49 | 
 50 |   # sampling distribution
 51 |   y ~ multi_normal(mu, Sigma);
 52 | }
 53 | 
 54 | generated quantities {
 55 |   vector[Ntest] muTest;                          # The following produces the posterior predictive draws
 56 |   vector[Ntest] yRep;                            # see GP section of Stan man- 'Analytical Form...'
 57 |   matrix[Ntest,Ntest] L;
 58 |   {
 59 |     matrix[N,N] Sigma;
 60 |     matrix[Ntest,Ntest] Omega;
 61 |     matrix[N,Ntest] K;
 62 |     matrix[Ntest,N] K_transpose_div_Sigma;
 63 |     matrix[Ntest,Ntest] Tau;
 64 | 
 65 |     # K all elements
 66 |     for (i in 1:N)
 67 |       for (j in 1:Ntest)
 68 |         K[i,j] <- eta_sq * exp(-pow(x[i] - xtest[j], 2)/inv_rho_sq);
 69 | 
 70 |     # Sigma off-diagonal elements
 71 |     for (i in 1:(N-1)) {
 72 |       for (j in (i+1):N) {
 73 |         Sigma[i,j] <- eta_sq * exp(- pow(x[i] - x[j],2)/inv_rho_sq);
 74 |         Sigma[j,i] <- Sigma[i,j];
 75 |       }
 76 |     }
 77 | 
 78 |     #Omega off-diagonal elements
 79 |     for (i in 1:(Ntest-1)) {
 80 |       for (j in (i+1):Ntest) {
 81 |         Omega[i,j] <- eta_sq * exp(- pow(xtest[i] - xtest[j],2)/inv_rho_sq);
 82 |         Omega[j,i] <- Omega[i,j];
 83 |       }
 84 |     }
 85 | 
 86 |     #Sigma diagonal elements
 87 |     for (k in 1:N)
 88 |       Sigma[k,k] <- eta_sq + sigma_sq;             # + jitter for pos def
 89 | 
 90 |     #Omega diagonal elements
 91 |     for (k in 1:Ntest)
 92 |       Omega[k,k] <- eta_sq + sigma_sq;             # + jitter for pos def
 93 | 
 94 |     K_transpose_div_Sigma <- K' / Sigma;
 95 |     muTest <- K_transpose_div_Sigma * y;
 96 |     Tau <- Omega - K_transpose_div_Sigma * K;
 97 | 
 98 |     for (i in 1:(Ntest-1))
 99 |       for (j in (i+1):Ntest)
100 |         Tau[i,j] <- Tau[j,i];
101 | 
102 |     L <- cholesky_decompose(Tau);
103 |   }
104 | 
105 |   yRep <- multi_normal_cholesky_rng(muTest, L);
106 | }
107 | 


--------------------------------------------------------------------------------
/ModelFitting/gp Examples/gpStanModelCode_gammaExponential.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int<lower=1> N;                                # initial sample size
 3 |   vector[N] x;                                   # covariate
 4 |   vector[N] y;                                   # target
 5 |   int<lower=0> Ntest;                            # prediction set sample size
 6 |   vector[Ntest] xtest;                           # prediction values for covariate
 7 | }
 8 | 
 9 | transformed data {
10 |   vector[N] mu;
11 | 
12 |   mu <- rep_vector(0, N);                        # mean function
13 | }
14 | 
15 | parameters {
16 |   real<lower=0> eta_sq;                          # parameters of squared exponential covariance function
17 |   real<lower=0> l_;
18 |   real<lower=0> sigma_sq;
19 |   real<lower=0, upper=2> gamma;
20 | 
21 |   real<lower=0> eta_sq_scale;                    # hyperpriors
22 |   real<lower=0> l_scale;
23 |   real<lower=0> sigma_sq_scale;
24 | }
25 | 
26 | transformed parameters {
27 | }
28 | 
29 | model {
30 |   matrix[N,N] Sigma;
31 | 
32 |   # off-diagonal elements for covariance matrix
33 |   for (i in 1:(N-1)) {
34 |     for (j in (i+1):N) {
35 |       Sigma[i,j] <- eta_sq *  exp(-pow(fabs(x[i] - x[j])/l_, gamma));
36 |       Sigma[j,i] <- Sigma[i,j];
37 |     }
38 |   }
39 | 
40 |   # diagonal elements
41 |   for (k in 1:N)
42 |     Sigma[k,k] <- eta_sq + sigma_sq;             # + jitter for pos def
43 | 
44 |   # hyperpriors
45 |   eta_sq_scale ~ exponential(.2);
46 |   l_scale ~ exponential(.2);
47 |   sigma_sq_scale ~ exponential(.2);
48 | 
49 |   # priors
50 |   eta_sq ~ cauchy(0, eta_sq_scale);
51 |   l_ ~ cauchy(0, l_scale);
52 |   sigma_sq ~ cauchy(0, sigma_sq_scale);
53 |   gamma ~ cauchy(0, 1);
54 | 
55 |   # sampling distribution
56 |   y ~ multi_normal(mu, Sigma);
57 | }
58 | 
59 | generated quantities {
60 |   vector[Ntest] muTest;                          # The following produces the posterior predictive draws
61 |   vector[Ntest] yRep;                            # see GP section of Stan man- 'Analytical Form...'
62 |   matrix[Ntest,Ntest] L;
63 |   {
64 |     matrix[N, N] Sigma;
65 |     matrix[Ntest, Ntest] Omega;
66 |     matrix[N,Ntest] K;
67 |     matrix[Ntest,N] K_transpose_div_Sigma;
68 |     matrix[Ntest, Ntest] Tau;
69 | 
70 |     # Sigma
71 |     for (i in 1:N)
72 |       for (j in 1:N)
73 |         Sigma[i,j] <- eta_sq * exp(-pow(fabs(x[i] - x[j])/l_, gamma)) + if_else(i==j, sigma_sq, 0.0);
74 | 
75 |     # Omega
76 |     for (i in 1:Ntest)
77 |       for (j in 1:Ntest)
78 |         Omega[i,j] <- eta_sq * exp(-pow(fabs(xtest[i] - xtest[j])/l_, gamma)) + if_else(i==j, sigma_sq, 0.0);
79 | 
80 |     # K
81 |     for (i in 1:N)
82 |       for (j in 1:Ntest)
83 |         K[i,j] <- eta_sq * exp(-pow(fabs(x[i] - xtest[j])/l_, gamma));
84 | 
85 |     K_transpose_div_Sigma <- K' / Sigma;
86 |     muTest <- K_transpose_div_Sigma * y;
87 |     Tau <- Omega - K_transpose_div_Sigma * K;
88 | 
89 |     for (i in 1:(Ntest-1))
90 |       for (j in (i+1):Ntest)
91 |         Tau[i,j] <- Tau[j,i];
92 | 
93 |     L <- cholesky_decompose(Tau);
94 |   }
95 | 
96 |   yRep <- multi_normal_cholesky_rng(muTest, L);
97 | }


--------------------------------------------------------------------------------
/ModelFitting/gp Examples/gpStanModelCode_generalizedSquaredExponential.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int<lower=1> N;                                # initial sample size
 3 |   vector[N] x;                                   # covariate
 4 |   vector[N] y;                                   # target
 5 |   int<lower=0> Ntest;                            # prediction set sample size
 6 |   vector[Ntest] xtest;                           # prediction values for covariate
 7 | }
 8 | 
 9 | transformed data {
10 |   vector[N] mu;
11 | 
12 |   mu <- rep_vector(0, N);                        # mean function
13 | }
14 | 
15 | parameters {
16 |   real<lower=0> eta_sq;                          # parameters of squared exponential covariance function
17 |   real<lower=0> inv_rho_sq;                      # rho the length scale
18 |   real<lower=0> sigma_sq;                        # eta_sq + sigma_sq = var of predicted y; eta_sq is variance explained by the function
19 | 
20 |   real<lower=0> eta_sq_scale;                    # hyperpriors
21 |   real<lower=0> inv_rho_sq_scale;
22 |   real<lower=0> sigma_sq_scale;
23 | }
24 | 
25 | transformed parameters {
26 |   real<lower=0> rho_sq;
27 |   rho_sq <- inv(inv_rho_sq);
28 | }
29 | 
30 | model {
31 |   matrix[N,N] Sigma;
32 | 
33 |   # off-diagonal elements for covariance matrix
34 |   for (i in 1:(N-1)) {
35 |     for (j in (i+1):N) {
36 |       Sigma[i,j] <- eta_sq * exp(-rho_sq * pow(x[i] - x[j],2));
37 |       Sigma[j,i] <- Sigma[i,j];
38 |     }
39 |   }
40 | 
41 |   # diagonal elements
42 |   for (k in 1:N)
43 |     Sigma[k,k] <- eta_sq + sigma_sq;             # + jitter for pos def
44 | 
45 |   # hyperpriors
46 |   eta_sq_scale ~ exponential(.2);
47 |   inv_rho_sq_scale ~ exponential(.2);
48 |   sigma_sq_scale ~ exponential(.2);
49 | 
50 |   # priors
51 |   eta_sq ~ cauchy(0, eta_sq_scale);
52 |   inv_rho_sq ~ cauchy(0, inv_rho_sq_scale);
53 |   sigma_sq ~ cauchy(0, sigma_sq_scale);
54 | 
55 |   # sampling distribution
56 |   y ~ multi_normal(mu, Sigma);
57 | }
58 | 
59 | generated quantities {
60 |   vector[Ntest] muTest;                          # The following produces the posterior predictive draws
61 |   vector[Ntest] yRep;                            # see GP section of Stan man- 'Analytical Form...'
62 |   matrix[Ntest,Ntest] L;
63 |   {
64 |     matrix[N,N] Sigma;
65 |     matrix[Ntest,Ntest] Omega;
66 |     matrix[N,Ntest] K;
67 |     matrix[Ntest,N] K_transpose_div_Sigma;
68 |     matrix[Ntest,Ntest] Tau;
69 | 
70 |     # Sigma
71 |     for (i in 1:N)
72 |       for (j in 1:N)
73 |         Sigma[i,j] <- eta_sq * exp(-pow(x[i] - x[j], 2)) + if_else(i==j, sigma_sq, 0.0);
74 | 
75 |     # Omega
76 |     for (i in 1:Ntest)
77 |       for (j in 1:Ntest)
78 |         Omega[i,j] <- eta_sq * exp(-pow(xtest[i] - xtest[j], 2)) + if_else(i==j, sigma_sq, 0.0);
79 | 
80 |     # K
81 |     for (i in 1:N)
82 |       for (j in 1:Ntest)
83 |         K[i,j] <- eta_sq * exp(-pow(x[i] - xtest[j], 2));
84 | 
85 |     K_transpose_div_Sigma <- K' / Sigma;
86 |     muTest <- K_transpose_div_Sigma * y;
87 |     Tau <- Omega - K_transpose_div_Sigma * K;
88 | 
89 |     for (i in 1:(Ntest-1))
90 |       for (j in (i+1):Ntest)
91 |         Tau[i,j] <- Tau[j,i];
92 | 
93 |     L <- cholesky_decompose(Tau);
94 |   }
95 | 
96 |   yRep <- multi_normal_cholesky_rng(muTest, L);
97 | 
98 | }


--------------------------------------------------------------------------------
/ModelFitting/gp Examples/gpStanModelCode_rationalQuadratic.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int<lower=1> N;                                # initial sample size
 3 |   vector[N] x;                                   # covariate
 4 |   vector[N] y;                                   # target
 5 |   int<lower=0> Ntest;                            # prediction set sample size
 6 |   vector[Ntest] xtest;                           # prediction values for covariate
 7 | }
 8 | 
 9 | transformed data {
10 |   vector[N] mu;
11 | 
12 |   mu <- rep_vector(0, N);                        # mean function
13 | }
14 | 
15 | parameters {
16 |   real<lower=0> alpha;                           # parameters of rational quadratic covariance function
17 |   real<lower=0> l_;
18 |   real<lower=0> eta_sq;
19 |   real<lower=0> sigma_sq;
20 | 
21 |   real<lower=0> alpha_scale;                     # hyperpriors
22 |   real<lower=0> l_scale;
23 |   real<lower=0> eta_sq_scale;
24 |   real<lower=0> sigma_sq_scale;
25 | }
26 | 
27 | transformed parameters {
28 | }
29 | 
30 | model {
31 |   matrix[N,N] Sigma;
32 | 
33 |   # off-diagonal elements for covariance matrix
34 |   for (i in 1:(N-1)) {
35 |     for (j in (i+1):N) {
36 |       Sigma[i,j] <- eta_sq * (1 + (1/(2*alpha)) * pow(fabs(x[i] - x[j]) /l_, 2))^-alpha;
37 |       Sigma[j,i] <- Sigma[i,j];
38 |     }
39 |   }
40 | 
41 |   # diagonal elements
42 |   for (k in 1:N)
43 |     Sigma[k,k] <- eta_sq + sigma_sq;             # + jitter for pos def
44 | 
45 |   # hyperpriors
46 |   eta_sq_scale ~ exponential(.2);
47 |   l_scale ~ exponential(.2);
48 |   sigma_sq_scale ~ exponential(.2);
49 |   alpha_scale ~ exponential(.2);
50 | 
51 |   # priors
52 |   eta_sq ~ cauchy(0, eta_sq_scale);
53 |   l_ ~ cauchy(0, l_scale);
54 |   sigma_sq ~ cauchy(0, sigma_sq_scale);
55 |   alpha ~ cauchy(0, alpha_scale);
56 | 
57 |   # sampling distribution
58 |   y ~ multi_normal(mu, Sigma);
59 | }
60 | 
61 | generated quantities {
62 |   vector[Ntest] muTest;                          # The following produces the posterior predictive draws
63 |   vector[Ntest] yRep;                            # see GP section of Stan man- 'Analytical Form...'
64 |   matrix[Ntest,Ntest] L;
65 |   {
66 |     matrix[N, N] Sigma;
67 |     matrix[Ntest, Ntest] Omega;
68 |     matrix[N,Ntest] K;
69 |     matrix[Ntest,N] K_transpose_div_Sigma;
70 |     matrix[Ntest, Ntest] Tau;
71 | 
72 |     # Sigma
73 |     for (i in 1:N)
74 |       for (j in 1:N)
75 |         Sigma[i,j] <- eta_sq * (1 + (1/(2*alpha)) * pow(fabs(x[i] - x[j]) /l_, 2))^-alpha + if_else(i==j, sigma_sq, 0.0);
76 | 
77 |     # Omega
78 |     for (i in 1:Ntest)
79 |       for (j in 1:Ntest)
80 |         Omega[i,j] <- eta_sq * (1 + (1/(2*alpha)) * pow(fabs(xtest[i] - xtest[j]) /l_, 2))^-alpha + if_else(i==j, sigma_sq, 0.0);
81 | 
82 |     # K
83 |     for (i in 1:N)
84 |       for (j in 1:Ntest)
85 |         K[i,j] <- eta_sq * (1 + (1/(2*alpha)) * pow(fabs(x[i] - xtest[j]) /l_, 2))^-alpha;
86 | 
87 |     K_transpose_div_Sigma <- K' / Sigma;
88 |     muTest <- K_transpose_div_Sigma * y;
89 |     Tau <- Omega - K_transpose_div_Sigma * K;
90 | 
91 |     for (i in 1:(Ntest-1))
92 |       for (j in (i+1):Ntest)
93 |         Tau[i,j] <- Tau[j,i];
94 | 
95 |     L <- cholesky_decompose(Tau);
96 |   }
97 | 
98 |   yRep <- multi_normal_cholesky_rng(muTest, L);
99 | }


--------------------------------------------------------------------------------
/ModelFitting/gp Examples/gpStan_squaredExponentialFactorAnalysis.stan:
--------------------------------------------------------------------------------
  1 | data {
  2 |   int<lower=1> N;                                # initial sample size
  3 |   int<lower=1> D;                                # number of covariates
  4 |   matrix[N,D] X;                                 # covariate matrix
  5 |   vector[N] y;                                   # target
  6 |   int<lower=0> Ntest;                            # prediction set sample size
  7 |   matrix[Ntest,D] Xtest;                           # prediction values for covariate
  8 |   int<lower=1> K;                                # number of factors
  9 | }
 10 | 
 11 | transformed data {
 12 |   vector[N] mu;
 13 | 
 14 |   mu <- rep_vector(0, N);                        # mean function
 15 | }
 16 | 
 17 | parameters {
 18 |   real<lower=0, upper=1> eta_sq;                 # parameters of squared exponential covariance function
 19 |   real<lower=0, upper=1> sigma_sq;               # eta_sq + sigma_sq = var of predicted y; eta_sq is variance explained by the function so can put an upper limit
 20 |   vector<lower=0, upper=100>[D] l_sq;               # characteristic length
 21 |   real<lower=-2, upper=2> lambda;                # factor loadings, for this problem a single value, as one of them will be fixed to 1 for identification purposes
 22 | 
 23 | //   real<lower=0> eta_sq_scale;                    # hyperpriors
 24 | //   real<lower=0> sigma_sq_scale;
 25 | //   real<lower=0> l_sq_scale;
 26 | }
 27 | 
 28 | transformed parameters {
 29 |   vector[D] inv_l_sq;
 30 |   matrix[D,K] L;
 31 | 
 32 |   L[1,1] <- 1;
 33 |   L[2,1] <- lambda;
 34 | 
 35 |   for (d in 1:D) inv_l_sq[d] <- 1/l_sq[d];
 36 | }
 37 | 
 38 | model {
 39 |   matrix[N,N] Sigma;
 40 | 
 41 | 
 42 |   # off-diagonal elements for covariance matrix
 43 |   for (i in 1:(N-1)) {
 44 |     for (j in (i+1):N) {
 45 |       Sigma[i,j] <- eta_sq * exp(-.5 * quad_form(tcrossprod(L) + diag_matrix(inv_l_sq), to_vector(X[i] - X[j])));
 46 |       Sigma[j,i] <- Sigma[i,j];
 47 |     }
 48 |   }
 49 | 
 50 |   # diagonal elements
 51 |   for (k in 1:N)
 52 |     Sigma[k,k] <- eta_sq + sigma_sq;             # + jitter for pos def
 53 | 
 54 |   # hyperpriors
 55 | //   eta_sq_scale ~ exponential(.2);
 56 | //   sigma_sq_scale ~ exponential(.2);
 57 | //   l_sq_scale ~ exponential(.2);
 58 | 
 59 |   # priors
 60 |   eta_sq ~ cauchy(0, 5);
 61 |   sigma_sq ~ cauchy(0, 5);
 62 |   l_sq ~ cauchy(0, 5);
 63 |   lambda ~ normal(0, 1);
 64 | 
 65 |   # sampling distribution
 66 |   y ~ multi_normal(mu, Sigma);
 67 | }
 68 | 
 69 | generated quantities {
 70 |   vector[Ntest] muTest;                          # The following produces the posterior predictive draws
 71 |   vector[Ntest] yRep;                            # see GP section of Stan man- 'Analytical Form...'
 72 |   matrix[Ntest,Ntest] Q;
 73 |   {
 74 |     matrix[N,N] Sigma;
 75 |     matrix[Ntest,Ntest] Omega;
 76 |     matrix[N,Ntest] K_;
 77 |     matrix[Ntest,N] K_transpose_div_Sigma;
 78 |     matrix[Ntest,Ntest] Tau;
 79 | 
 80 |     # Sigma
 81 |     for (i in 1:N)
 82 |       for (j in 1:N)
 83 |         Sigma[i,j] <- eta_sq * exp(-.5 * quad_form(tcrossprod(L) + diag_matrix(inv_l_sq), to_vector(X[i] - X[j]))) + if_else(i==j, sigma_sq, 0.0);
 84 | 
 85 |     # Omega
 86 |     for (i in 1:Ntest)
 87 |       for (j in 1:Ntest)
 88 |         Omega[i,j] <- eta_sq * exp(-.5 * quad_form(tcrossprod(L) + diag_matrix(inv_l_sq), to_vector(Xtest[i] - Xtest[j]))) + if_else(i==j, sigma_sq, 0.0);
 89 | 
 90 |     # K
 91 |     for (i in 1:N)
 92 |       for (j in 1:Ntest)
 93 |         K_[i,j] <- eta_sq * exp(-.5 * quad_form(tcrossprod(L) + diag_matrix(inv_l_sq), to_vector(X[i] - Xtest[j]))); 
 94 | 
 95 |     K_transpose_div_Sigma <- K_' / Sigma;
 96 |     muTest <- K_transpose_div_Sigma * y;
 97 |     Tau <- Omega - K_transpose_div_Sigma * K_;
 98 | 
 99 |     for (i in 1:(Ntest-1))
100 |       for (j in (i+1):Ntest)
101 |         Tau[i,j] <- Tau[j,i];
102 | 
103 |     Q <- cholesky_decompose(Tau);
104 |   }
105 | 
106 |   yRep <- multi_normal_cholesky_rng(muTest, Q);
107 | }


--------------------------------------------------------------------------------
/ModelFitting/gp Examples/gprDemoChangeHparams.m:
--------------------------------------------------------------------------------
 1 | %% Visualize the effect of change the hyper-params for a 1d GP regression
 2 | % based on demo_gpr by Carl Rasmussen
 3 | %
 4 | %% Generate data
 5 | 
 6 | % This file is from pmtk3.googlecode.com
 7 | 
 8 | n = 20;
 9 | rand('state',18);
10 | randn('state',20);
11 | covfunc = {'covSum', {'covSEiso','covNoise'}};
12 | loghyper = [log(1.0); log(1.0); log(0.1)];
13 | x = 15*(rand(n,1)-0.5);
14 | y = chol(feval(covfunc{:}, loghyper, x))'*randn(n,1);        % Cholesky decomp.
15 | 
16 | xstar = linspace(-7.5, 7.5, 201)';
17 | 
18 | hyps = [log(1), log(1), log(0.1);...
19 |   log(0.3),log(1.08),log(0.00005);...
20 |   log(3),log(1.16),log(0.89)];
21 | 
22 | %% compute post pred and plot marginals
23 | for i=1:size(hyps,1)
24 |   loghyper = hyps(i,:)';
25 |   [mu, S2] = gpr(loghyper, covfunc, x, y, xstar);
26 |   S2 = S2 - exp(2*loghyper(3)); % remove observation noise
27 |   
28 |   figure;
29 |   f = [mu+2*sqrt(S2);flipdim(mu-2*sqrt(S2),1)];
30 |   fill([xstar; flipdim(xstar,1)], f, [7 7 7]/8, 'EdgeColor', [7 7 7]/8);
31 |   hold on
32 |   plot(xstar,mu,'k-','LineWidth',2);
33 |   plot(x, y, 'k+', 'MarkerSize', 17);
34 |   axis([-8 8 -3 3])
35 |   printPmtkFigure(sprintf('gprDemoChangeHparams%d', i));
36 | end
37 | 


--------------------------------------------------------------------------------
/ModelFitting/gp Examples/gprDemoNoiseFree.m:
--------------------------------------------------------------------------------
 1 | %% Reproduce figure 2.2 from GP book
 2 | %
 3 | %%
 4 | 
 5 | % This file is from pmtk3.googlecode.com
 6 | 
 7 | setSeed(0);
 8 | L = 1;
 9 | xs = (-5:0.2:5)';
10 | ns = length(xs);
11 | keps = 1e-8;
12 | muFn = @(x) 0*x(:).^2;
13 | Kfn = @(x,z) 1*exp(-sq_dist(x'/L,z'/L)/2);
14 | 
15 | 
16 | % plot sampled functions from the prior
17 | figure; hold on
18 | for i=1:3
19 |   model = struct('mu', muFn(xs), 'Sigma',  Kfn(xs, xs) + 1e-15*eye(size(xs, 1)));
20 |   fs = gaussSample(model, 1);
21 |   plot(xs, fs, 'k-', 'linewidth', 2)
22 | end
23 | printPmtkFigure('gprDemoNoiseFreePrior')
24 | 
25 | 
26 | % generate noise-less training data
27 | Xtrain = [-4, -3, -2, -1, 1]';
28 | ftrain = sin(Xtrain);
29 | 
30 | % compute posterior predictive
31 | K = Kfn(Xtrain, Xtrain); % K
32 | Ks = Kfn(Xtrain, xs); %K_*
33 | Kss = Kfn(xs, xs) + keps*eye(length(xs)); % K_** (keps is essential!)
34 | Ki = inv(K);
35 | postMu = muFn(xs) + Ks'*Ki*(ftrain - muFn(Xtrain));
36 | postCov = Kss - Ks'*Ki*Ks;
37 | 
38 | figure; hold on
39 | % plot marginal posterior variance as gray band
40 | mu = postMu(:);
41 | S2 = diag(postCov);
42 | f = [mu+2*sqrt(S2);flipdim(mu-2*sqrt(S2),1)];
43 | fill([xs; flipdim(xs,1)], f, [7 7 7]/8, 'EdgeColor', [7 7 7]/8);
44 | 
45 | % plot samples from posterior predictive
46 | for i=1:3
47 |   model = struct('mu', postMu(:)', 'Sigma', postCov);
48 |   fs = gaussSample(model, 1);
49 |   plot(xs, fs, 'k-', 'linewidth', 2)
50 |   h=plot(Xtrain, ftrain, 'kx', 'markersize', 12, 'linewidth', 3);
51 | end
52 | printPmtkFigure('gprDemoNoiseFreePost')
53 | 
54 | 


--------------------------------------------------------------------------------
/ModelFitting/gp Examples/stangp.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/ModelFitting/gp Examples/stangp.RData


--------------------------------------------------------------------------------
/ModelFitting/gradient_descent.R:
--------------------------------------------------------------------------------
  1 | #' ---
  2 | #' title: "Gradient Descent"
  3 | #' author: "Michael Clark"
  4 | #' css: '../other.css'
  5 | #' highlight: pygments
  6 | #' date: ""
  7 | #' ---
  8 | 
  9 | #' Gradient descent for a standard linear regression model.  The function takes
 10 | #' arguments starting points for the parameters to be estimated, a tolerance or
 11 | #' maximum iteration value to provide a stopping point, stepsize (or starting
 12 | #' stepsize for adaptive approach), whether to print out iterations, and whether
 13 | #' to plot the loss over each iteration.
 14 | #' 
 15 | #' 
 16 | #' 
 17 | #' # Data Setup
 18 | #' 
 19 | #' Create some basic data for standard regression.
 20 | 
 21 | set.seed(8675309)
 22 | 
 23 | n  = 1000
 24 | x1 = rnorm(n)
 25 | x2 = rnorm(n)
 26 | y  = 1 + .5*x1 + .2*x2 + rnorm(n)
 27 | X  = cbind(Intercept = 1, x1, x2)  # model matrix
 28 | 
 29 | 
 30 | 
 31 | #' # Gradient Descent Algorithm
 32 | 
 33 | 
 34 | gd = function(
 35 |   par,
 36 |   X,
 37 |   y,
 38 |   tolerance = 1e-3,
 39 |   maxit     = 1000,
 40 |   stepsize  = 1e-3,
 41 |   adapt     = FALSE,
 42 |   verbose   = TRUE,
 43 |   plotLoss  = TRUE
 44 |   ) {
 45 |   
 46 |   # initialize
 47 |   beta = par; names(beta) = colnames(X)
 48 |   loss = crossprod(X %*% beta - y)
 49 |   tol  = 1
 50 |   iter = 1
 51 |   
 52 |   while(tol > tolerance && iter < maxit){
 53 |     
 54 |     LP   = X %*% beta
 55 |     grad = t(X) %*% (LP - y)
 56 |     betaCurrent = beta - stepsize * grad
 57 |     tol  = max(abs(betaCurrent - beta))
 58 |     beta = betaCurrent
 59 |     loss = append(loss, crossprod(LP - y))
 60 |     iter = iter + 1
 61 |     
 62 |     if (adapt)
 63 |       stepsize = ifelse(
 64 |         loss[iter] < loss[iter - 1],  
 65 |         stepsize * 1.2, 
 66 |         stepsize * .8
 67 |       )
 68 |     
 69 |     if (verbose && iter %% 10 == 0)
 70 |       message(paste('Iteration:', iter))
 71 |   }
 72 |   
 73 |   if (plotLoss)
 74 |     plot(loss, type = 'l', bty = 'n')
 75 |   
 76 |   list(
 77 |     par    = beta,
 78 |     loss   = loss,
 79 |     RSE    = sqrt(crossprod(LP - y) / (nrow(X) - ncol(X))), 
 80 |     iter   = iter,
 81 |     fitted = LP
 82 |   )
 83 | }
 84 | 
 85 | 
 86 | #' ## Run
 87 | #' 
 88 | #' Set starting values.
 89 | 
 90 | init = rep(0, 3)
 91 | 
 92 | #' For any particular data you'd have to fiddle with the `stepsize`, which could 
 93 | #' be assessed via cross-validation, or alternatively one can use an
 94 | #' adaptive approach, a simple one of which is implemented in this function.
 95 | 
 96 | gd_result = gd(
 97 |   init,
 98 |   X = X,
 99 |   y = y,
100 |   tolerance = 1e-8,
101 |   stepsize  = 1e-4,
102 |   adapt     = TRUE
103 | )
104 | 
105 | str(gd_result)
106 | 
107 | #' ## Comparison
108 | #' 
109 | #' We can compare to standard linear regression.
110 | 
111 | rbind(
112 |   gd = round(gd_result$par[, 1], 5),
113 |   lm = coef(lm(y ~ x1 + x2))
114 | )
115 | 
116 | # summary(lm(y ~ x1 + x2))
117 | 
118 | 
119 | #' # Source
120 | #' Base R source code found at https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/gradient_descent.R
121 | 


--------------------------------------------------------------------------------
/ModelFitting/hmm_viterbi.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | From the wikipedia page with slight modification
 5 | https://en.wikipedia.org/wiki/Viterbi_algorithm#Example
 6 | """
 7 | 
 8 | def viterbi(obs, states, start_p, trans_p, emit_p):
 9 |     V = [{}]
10 | 
11 |     for st in states:
12 |         V[0][st] = {"prob": start_p[st] * emit_p[st][obs[0]], "prev": None}
13 | 
14 |     # Run Viterbi when t > 0
15 | 
16 |     for t in range(1, len(obs)):
17 |         V.append({})
18 | 
19 |         for st in states:
20 |             max_tr_prob = max(V[t-1][prev_st]["prob"]*trans_p[prev_st][st] for prev_st in states)
21 | 
22 |             for prev_st in states:
23 |                 if V[t-1][prev_st]["prob"] * trans_p[prev_st][st] == max_tr_prob:
24 |                     max_prob = max_tr_prob * emit_p[st][obs[t]]
25 |                     V[t][st] = {"prob": max_prob, "prev": prev_st}
26 |                     break
27 | 
28 |     for line in dptable(V):
29 |         print(line)
30 | 
31 |     opt = []
32 | 
33 |     # The highest probability
34 |     max_prob = max(value["prob"] for value in V[-1].values())
35 |     previous = None
36 | 
37 |     # Get most probable state and its backtrack
38 |     for st, data in V[-1].items():
39 |         if data["prob"] == max_prob:
40 |             opt.append(st)
41 |             previous = st
42 |             break
43 | 
44 |     # Follow the backtrack till the first observation
45 |     for t in range(len(V) - 2, -1, -1):
46 |         opt.insert(0, V[t + 1][previous]["prev"])
47 |         previous = V[t + 1][previous]["prev"]
48 | 
49 |     print('The steps of states are ' + ' '.join(opt) + ' with highest probability of %s' % max_prob)
50 | 
51 | 
52 | def dptable(V):
53 |     # Print a table of steps from dictionary
54 |     yield " ".join(("%12d" % i) for i in range(len(V)))
55 |     
56 |     for state in V[0]:
57 |         yield "%.7s: " % state + " ".join("%.7s" % ("%f" % v[state]["prob"]) for v in V)
58 | 
59 | #The function viterbi takes the following arguments: obs is the sequence of observations, e.g. ['normal', 'cold', 'dizzy']; states is the set of hidden states; start_p is the start probability; trans_p are the transition probabilities; and emit_p are the emission probabilities. For simplicity of code, we assume that the observation sequence obs is non-empty and that trans_p[i][j] and emit_p[i][j] is defined for all states i,j.
60 | 
61 | #In the running example, the forward/Viterbi algorithm is used as follows:
62 | 
63 | 
64 | obs = ('normal', 'cold', 'dizzy')
65 | states = ('Healthy', 'Fever')
66 | start_p = {'Healthy': 0.6, 'Fever': 0.4}
67 | trans_p = {
68 |    'Healthy' : {'Healthy': 0.7, 'Fever': 0.3},
69 |    'Fever' : {'Healthy': 0.4, 'Fever': 0.6}
70 |    }
71 | emit_p = {
72 |    'Healthy' : {'normal': 0.5, 'cold': 0.4, 'dizzy': 0.1},
73 |    'Fever' : {'normal': 0.1, 'cold': 0.3, 'dizzy': 0.6}
74 |    }
75 | viterbi(obs,states,start_p,trans_p,emit_p)
76 | 


--------------------------------------------------------------------------------
/ModelFitting/hurdle.R:
--------------------------------------------------------------------------------
  1 | #' ---
  2 | #' title: "Hurdle Models"
  3 | #' author: "Michael Clark"
  4 | #' date: ""
  5 | #' ---
  6 | #' 
  7 | #' 
  8 | #' # Poisson
  9 | hurdpoisloglik = function(y, X, par) {
 10 |   # Extract parameters
 11 |   logitpars = par[grep('logit', names(par))]
 12 |   poispars  = par[grep('pois', names(par))]
 13 |   
 14 |   # Logit model part
 15 |   Xlogit = X
 16 |   ylogit = ifelse(y == 0, 0, 1)
 17 |   
 18 |   LPlogit = Xlogit %*% logitpars
 19 |   mulogit = plogis(LPlogit)
 20 |   
 21 |   # Calculate the likelihood
 22 |   logliklogit = -sum( ylogit*log(mulogit) + (1 - ylogit)*log(1 - mulogit) )  
 23 |   
 24 |   # Poisson part
 25 |   Xpois = X[y > 0, ]
 26 |   ypois = y[y > 0]
 27 |   
 28 |   mupois = exp(Xpois %*% poispars)
 29 |   
 30 |   # Calculate the likelihood
 31 |   loglik0    = -mupois
 32 |   loglikpois = -sum(dpois(ypois, lambda = mupois, log = TRUE)) + sum(log(1 - exp(loglik0)))
 33 |   
 34 |   # combine likelihoods
 35 |   loglik = loglikpois + logliklogit
 36 |   loglik
 37 | }
 38 | 
 39 | 
 40 | hurdNBloglik = function(y, X, par) {
 41 |   # Extract parameters
 42 |   logitpars  = par[grep('logit', names(par))]
 43 |   NegBinpars = par[grep('NegBin', names(par))]
 44 |   
 45 |   theta = exp(par[grep('theta', names(par))])
 46 |   
 47 |   # Logit model part
 48 |   Xlogit = X
 49 |   ylogit = ifelse(y == 0, 0, 1)
 50 |   
 51 |   LPlogit = Xlogit%*%logitpars
 52 |   mulogit =  plogis(LPlogit)
 53 |   
 54 |   # Calculate the likelihood
 55 |   logliklogit = -sum( ylogit*log(mulogit) + (1 - ylogit)*log(1 - mulogit) )
 56 |   
 57 |   #NB part
 58 |   XNB = X[y > 0, ]
 59 |   yNB = y[y > 0]
 60 |   
 61 |   muNB = exp(XNB %*% NegBinpars)
 62 |   
 63 |   # Calculate the likelihood
 64 |   loglik0  = dnbinom(0,   mu = muNB, size = theta, log = TRUE)
 65 |   loglik1  = dnbinom(yNB, mu = muNB, size = theta, log = TRUE)
 66 |   loglikNB = -( sum(loglik1) - sum(log(1 - exp(loglik0))) )
 67 |   
 68 |   # combine likelihoods
 69 |   loglik = loglikNB + logliklogit
 70 |   loglik
 71 | }
 72 | 
 73 | 
 74 | #' # Data Import
 75 | 
 76 | #' Import a simple data set. Example from the Stata help file for zinb command;
 77 | #' can compare results with hnblogit command
 78 | 
 79 | library(haven)
 80 | 
 81 | fish = read_dta("http://www.stata-press.com/data/r11/fish.dta")
 82 | 
 83 | # Get some starting values.
 84 | 
 85 | init_mod = glm(
 86 |   count ~ persons + livebait,
 87 |   data = fish,
 88 |   family = poisson,
 89 |   x = TRUE,
 90 |   y = TRUE
 91 | )
 92 | 
 93 | #' for these functions, a named vector for the starting values
 94 | starts = c(logit = coef(init_mod), pois = coef(init_mod))  
 95 | 
 96 | #' # Poisson hurdle 
 97 | 
 98 | #' Use `optim` to estimate parameters. I fiddle with some options to reproduce the
 99 | #' hurdle function as much as possible.
100 | #' 
101 | optPois1 = optim(
102 |   par = starts,
103 |   fn  = hurdpoisloglik,
104 |   X   = init_mod$x,
105 |   y   = init_mod$y,
106 |   control = list(maxit = 5000, reltol = 1e-12),
107 |   hessian = TRUE
108 | )
109 | # optPois1
110 | 
111 | #' Extract the elements from the output to create a summary table.
112 | B  = optPois1$par
113 | se = sqrt(diag(solve(optPois1$hessian)))
114 | Z  = B/se
115 | p  = ifelse(Z >= 0, pnorm(Z, lower = FALSE)*2, pnorm(Z)*2)
116 | summarytable = round(data.frame(B, se, Z, p), 3)
117 | 
118 | list(summary = summarytable, ll = optPois1$value)
119 | 
120 | #' Compare to hurdle from pscl package.
121 | library(pscl)
122 | 
123 | poismod = hurdle(
124 |   count ~ persons + livebait,
125 |   data = fish,
126 |   zero.dist = "binomial",
127 |   dist = "poisson"
128 | )
129 | 
130 | summary(poismod)
131 | 
132 | 
133 | #' # Negative Binomial hurdle
134 | 
135 | 
136 | starts =  c(
137 |   logit  = coef(init_mod),
138 |   NegBin = coef(init_mod),
139 |   theta  = 1
140 | )
141 | 
142 | optNB1 = optim(
143 |   par = starts,
144 |   fn  = hurdNBloglik,
145 |   X   = init_mod$x,
146 |   y   = init_mod$y,
147 |   control = list(maxit = 5000, reltol = 1e-12),
148 |   method  = "BFGS",
149 |   hessian = TRUE
150 | )
151 | # optNB1 
152 | 
153 | B  = optNB1$par
154 | se = sqrt(diag(solve(optNB1$hessian)))
155 | Z  = B/se
156 | p  = ifelse(Z >= 0, pnorm(Z, lower = FALSE)*2, pnorm(Z)*2)
157 | summarytable = round(data.frame(B, se, Z, p), 3)
158 | list(summary = summarytable, ll = optNB1$value)
159 | 
160 | NBmod = hurdle(
161 |   count ~ persons + livebait,
162 |   data = fish,
163 |   zero.dist = "binomial",
164 |   dist = "negbin"
165 | )
166 | 
167 | summary(NBmod)
168 | 
169 | 


--------------------------------------------------------------------------------
/ModelFitting/ipw.R:
--------------------------------------------------------------------------------
  1 | # Demonstration of a simple marginal structural model for estimation of
  2 | # so-called 'causal' effects using inverse probability weighting.
  3 | 
  4 | # Example data is from, and comparison made to, the ipw package.  See more here:
  5 | # https://www.jstatsoft.org/article/view/v043i13/v43i13.pdf
  6 | 
  7 | 
  8 | # Preliminaries -----------------------------------------------------------
  9 | 
 10 | library(tidyverse)
 11 | library(ipw)
 12 | 
 13 | # Data Setup --------------------------------------------------------------
 14 | 
 15 | # this example is from the helpfile at ?ipwpoint
 16 | set.seed(16)
 17 | n <- 1000
 18 | simdat <- data.frame(l = rnorm(n, 10, 5))
 19 | a.lin <- simdat$l - 10
 20 | pa <- plogis(a.lin)
 21 | 
 22 | simdat <- simdat %>% 
 23 |   mutate(
 24 |     a = rbinom(n, 1, prob = pa),
 25 |     y = 10 * a + 0.5 * l + rnorm(n, -10, 5)
 26 |   )
 27 | 
 28 | 
 29 | ipw_result <- ipwpoint(
 30 |   exposure = a,
 31 |   family = "binomial",
 32 |   link = "logit",
 33 |   numerator = ~ 1,
 34 |   denominator = ~ l,
 35 |   data = simdat
 36 | )
 37 | 
 38 | summary(ipw_result$ipw.weights)
 39 | ipwplot(ipw_result$ipw.weights)
 40 | 
 41 | 
 42 | # Create the weights by hand for demonstration ----------------------------
 43 | 
 44 | ps_num = fitted(glm(a ~ 1, data = simdat, family = 'binomial'))
 45 | ps_num[simdat$a == 0] = 1 - ps_num[simdat$a == 0]
 46 | 
 47 | ps_den = fitted(glm(a ~ l, data = simdat, family = 'binomial'))
 48 | ps_den[simdat$a == 0] = 1 - ps_den[simdat$a == 0]
 49 | 
 50 | wts = ps_num / ps_den
 51 | 
 52 | # compare
 53 | rbind(summary(wts), summary(ipw_result$ipw.weights))
 54 | 
 55 | # Add inverse probability weights to the data if desired
 56 | simdat <- simdat %>% 
 57 |   mutate(sw = ipw_result$ipw.weights)
 58 | 
 59 | 
 60 | # Marginal Structural Model -----------------------------------------------
 61 | 
 62 | # Marginal structural model for the causal effect of `a` on `y` corrected for
 63 | # confounding by `l` using inverse probability weighting with robust standard
 64 | # error from the survey package.
 65 | 
 66 | library("survey")
 67 | 
 68 | msm <- svyglm(
 69 |   y ~ a,
 70 |   design = svydesign(~ 1, weights = ~ sw, data = simdat)
 71 | )
 72 | 
 73 | summary(msm)
 74 | 
 75 | # create the likelihood function for using the weights
 76 | maxlike = function(
 77 |   par,             # parameters to be estimated; first is taken to be sigma
 78 |   X,               # model matrix
 79 |   y,               # target variable
 80 |   wts              # estimated weights
 81 | ) {
 82 |   beta = par[-1]
 83 |   lp = X %*% beta
 84 |   sigma = exp(par[1])  # eponentiated value to stay positive
 85 |   ll = dnorm(y, mean = lp, sd = sigma, log = TRUE)  # weighted likelihood
 86 |   
 87 |   -sum(ll*wts)
 88 |   
 89 |   # same as
 90 |   # ll = dnorm(y, mean = lp, sd = sigma)^wts
 91 |   # -sum(log(ll))
 92 | }
 93 | 
 94 | X = cbind(1, simdat$a)
 95 | y = simdat$y
 96 | 
 97 | result = optim(
 98 |   par = c(sigma = 0, intercept = 0, b = 0),
 99 |   fn  = maxlike,
100 |   X   = X,
101 |   y   = y,
102 |   wts = wts,
103 |   hessian = TRUE,
104 |   method  = 'BFGS',
105 |   control = list(abstol = 1e-12)
106 | )
107 | 
108 | dispersion = exp(result$par[1])^2
109 | beta = result$par[-1]
110 | 
111 | 
112 | # Compute standard errors -------------------------------------------------
113 | 
114 | # the following is the survey package raw version to get the appropriate
115 | # standard errors, which the ipw approach uses
116 | glm_basic = glm(y ~ a, data = simdat, weights = wts)     # to get unscaled cov
117 | res = resid(glm_basic, type = 'working')                 # residuals
118 | glm_vcov_unsc = summary(glm_basic)$cov.unscaled          # weighted vcov unscaled by dispersion solve(crossprod(qr(X)))
119 | estfun = X * res * wts                  
120 | x = estfun %*% glm_vcov_unsc 
121 | 
122 | # get standard errors
123 | se = sqrt(diag(crossprod(x)*n/(n-1)))                    # a robust standard error
124 | se_robust = sqrt(diag(sandwich::sandwich(glm_basic)))    # much easier way to get it
125 | se_msm    = sqrt(diag(vcov(msm)))
126 | 
127 | tibble(se, se_robust, se_msm)
128 | 
129 | 
130 | # Compare results ---------------------------------------------------------
131 | 
132 | tibble(
133 |   Estimate  = beta,
134 |   init_se   = sqrt(diag(solve(result$hessian)))[c('intercept', 'b')],   # same as scaled se from glm_basic
135 |   se_robust = se_robust,
136 |   t = Estimate/se,
137 |   p = 2*pt(abs(t), df = n-ncol(X), lower.tail = FALSE),  
138 |   dispersion = dispersion             
139 | )
140 | 
141 | # compare to msm
142 | broom::tidy(msm)
143 | 


--------------------------------------------------------------------------------
/ModelFitting/lasso.R:
--------------------------------------------------------------------------------
  1 | #' ---
  2 | #' title: " L1 (lasso) regularization"
  3 | #' author: "Michael Clark"
  4 | #' css: '../other.css'
  5 | #' highlight: pygments
  6 | #' date: ""
  7 | #' ---
  8 | #' 
  9 | #' See Tibshirani (1996) for the source, or Murphy PML (2012) for a nice
 10 | #' overview (watch for typos in depictions). A more conceptual depiction of the
 11 | #' lasso can be found in penalized_ML.R.
 12 | #' 
 13 | #' # Coordinate descent
 14 | #' 
 15 | #' 
 16 | lasso <- function(
 17 |   X,                   # model matrix
 18 |   y,                   # target
 19 |   lambda  = .1,        # penalty parameter
 20 |   soft    = TRUE,      # soft vs. hard thresholding
 21 |   tol     = 1e-6,      # tolerance
 22 |   iter    = 100,       # number of max iterations
 23 |   verbose = TRUE       # print out iteration number
 24 | ) {
 25 |   
 26 |   # soft thresholding function
 27 |   soft_thresh <- function(a, b) {
 28 |     out = rep(0, length(a))
 29 |     out[a >  b] = a[a > b] - b
 30 |     out[a < -b] = a[a < -b] + b
 31 |     out
 32 |   }
 33 |   
 34 |   w = solve(crossprod(X) + diag(lambda, ncol(X))) %*% crossprod(X,y)
 35 |   tol_curr = 1
 36 |   J = ncol(X)
 37 |   a = rep(0, J)
 38 |   c_ = rep(0, J)
 39 |   i = 1
 40 |   
 41 |   while (tol < tol_curr && i < iter) {
 42 |     w_old = w 
 43 |     a = colSums(X^2)
 44 |     l = length(y)*lambda  # for consistency with glmnet approach
 45 |     c_ = sapply(1:J, function(j)  sum( X[,j] * (y - X[,-j] %*% w_old[-j]) ))
 46 |     if (soft) {
 47 |       for (j in 1:J) {
 48 |         w[j] = soft_thresh(c_[j]/a[j], l/a[j])
 49 |       }
 50 |     }
 51 |     else {
 52 |       w = w_old
 53 |       w[c_< l & c_ > -l] = 0
 54 |     }
 55 |     
 56 |     tol_curr = crossprod(w - w_old)  
 57 |     i = i + 1
 58 |     if (verbose && i%%10 == 0) message(i)
 59 |   }
 60 |   
 61 |   w
 62 | }
 63 | 
 64 | #' # Data setup
 65 | #' 
 66 | #' 
 67 | set.seed(8675309)
 68 | N = 500
 69 | p = 10
 70 | X = scale(matrix(rnorm(N*p), ncol=p))
 71 | b = c(.5, -.5, .25, -.25, .125, -.125, rep(0, p-6))
 72 | y = scale(X %*% b + rnorm(N, sd=.5))
 73 | lambda = .1
 74 | 
 75 | 
 76 | # debugonce(lasso)
 77 | 
 78 | #' Note, if `lambda=0`, result is the same as  `lm.fit`.
 79 | #' 
 80 | #' 
 81 | result_soft = lasso(
 82 |   X,
 83 |   y,
 84 |   lambda = lambda,
 85 |   tol = 1e-12,
 86 |   soft = TRUE
 87 | )
 88 | 
 89 | result_hard = lasso(
 90 |   X,
 91 |   y,
 92 |   lambda = lambda,
 93 |   tol    = 1e-12,
 94 |   soft   = FALSE
 95 | )
 96 | 
 97 | 
 98 | 
 99 | 
100 | #' `glmnet` is by default a mixture of ridge and lasso penalties, setting alpha
101 | #' = 1 reduces to lasso (alpha=0 would be ridge). We set the lambda to a couple
102 | #' values while only wanting the one set to the same lambda value as above (s).
103 | 
104 | 
105 | library(glmnet)
106 | 
107 | glmnet_res = coef(
108 |   glmnet(
109 |     X,
110 |     y,
111 |     alpha  = 1,
112 |     lambda = c(10, 1, lambda),
113 |     thresh = 1e-12,
114 |     intercept = FALSE
115 |   ),
116 |   s = lambda
117 | )
118 | 
119 | library(lassoshooting)
120 | 
121 | ls_res = lassoshooting(
122 |   X = X,
123 |   y = y,
124 |   lambda = length(y) * lambda,
125 |   thr = 1e-12
126 | )
127 | 
128 | 
129 | #' # Comparison
130 | 
131 | data.frame(
132 |   lm = coef(lm(y ~ . - 1, data.frame(X))),
133 |   lasso_soft = result_soft,
134 |   lasso_hard = result_hard,
135 |   lspack = ls_res$coef,
136 |   glmnet = glmnet_res[-1, 1],
137 |   truth  = b
138 | )
139 | 
140 | 
141 | 
142 | #' # Source
143 | #' Base R source code found at https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/lasso.R
144 | 
145 | 
146 | #' for some more detailed R code, check out
147 | #' http://jocelynchi.com/a-coordinate-descent-algorithm-for-the-lasso-problem (now defunct, but might find relevant article at the website)
148 | 
149 | 


--------------------------------------------------------------------------------
/ModelFitting/naivebayes.R:
--------------------------------------------------------------------------------
 1 | # Naive bayes demo for binary data
 2 | 
 3 | 
 4 | # Initialization ----------------------------------------------------------
 5 | 
 6 | # generate some data
 7 | 
 8 | set.seed(123)
 9 | 
10 | x  = matrix(sample(0:1, 50, replace = TRUE), ncol = 5)
11 | xf = data.frame(lapply(data.frame(x), factor))
12 | y  = sample(0:1, 10, prob = c(.25, .75), replace = TRUE)
13 | 
14 | 
15 | # use e1071 for comparison
16 | 
17 | library(e1071)
18 | 
19 | m = naiveBayes(xf, y)
20 | 
21 | m
22 | 
23 | 
24 | 
25 | # Base R approach  -----------------------------------------------------------
26 | 
27 | lapply(xf, function(var)
28 |   t(prop.table(table(' ' = var, y), margin = 2)))
29 | 


--------------------------------------------------------------------------------
/ModelFitting/nelder_mead.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | import copy
  5 | 
  6 | '''
  7 |     Francois Chollet's Nelder-Mead in Python.
  8 |     https://github.com/fchollet/nelder-mead/blob/master/nelder_mead.py
  9 |     Pure Python/Numpy implementation of the Nelder-Mead algorithm.
 10 |     Reference: https://en.wikipedia.org/wiki/Nelder%E2%80%93Mead_method
 11 | '''
 12 | 
 13 | 
 14 | def nelder_mead(
 15 |   f,
 16 |   x_start,
 17 |   step = 0.1,
 18 |   no_improve_thr  = 10e-6,
 19 |   no_improv_break = 10,
 20 |   max_iter = 0,
 21 |   alpha = 1.,
 22 |   gamma = 2.,
 23 |   rho   = 0.5,
 24 |   sigma = 0.5
 25 | ):
 26 |     '''
 27 |         @param f (function): function to optimize, must return a scalar score
 28 |             and operate over a numpy array of the same dimensions as x_start
 29 |         @param x_start (numpy array): initial position
 30 |         @param step (float): look-around radius in initial step
 31 |         @no_improv_thr,  no_improv_break (float, int): break after no_improv_break iterations with
 32 |             an improvement lower than no_improv_thr
 33 |         @max_iter (int): always break after this number of iterations.
 34 |             Set it to 0 to loop indefinitely.
 35 |         @alpha, gamma, rho, sigma (floats): parameters of the algorithm
 36 |             (see Wikipedia page for reference)
 37 |         return: tuple (best parameter array, best score)
 38 |     '''
 39 | 
 40 |     # init
 41 |     dim = len(x_start)
 42 |     prev_best = f(x_start)
 43 |     no_improv = 0
 44 |     res = [[x_start, prev_best]]
 45 | 
 46 |     for i in range(dim):
 47 |         x = copy.copy(x_start)
 48 |         x[i] = x[i] + step
 49 |         score = f(x)
 50 |         res.append([x, score])
 51 | 
 52 |     # simplex iter
 53 |     iters = 0
 54 |     while 1:
 55 |         # order
 56 |         res.sort(key=lambda x: x[1])
 57 |         best = res[0][1]
 58 | 
 59 |         # break after max_iter
 60 |         if max_iter and iters >= max_iter:
 61 |             return res[0]
 62 |         iters += 1
 63 | 
 64 |         # break after no_improv_break iterations with no improvement
 65 |         print('...best so far:', best)
 66 | 
 67 |         if best < prev_best - no_improve_thr:
 68 |             no_improv = 0
 69 |             prev_best = best
 70 |         else:
 71 |             no_improv += 1
 72 | 
 73 |         if no_improv >= no_improv_break:
 74 |             return res[0]
 75 | 
 76 |         # centroid
 77 |         x0 = [0.] * dim
 78 |         for tup in res[:-1]:
 79 |             for i, c in enumerate(tup[0]):
 80 |                 x0[i] += c / (len(res)-1)
 81 | 
 82 |         # reflection
 83 |         xr = x0 + alpha*(x0 - res[-1][0])
 84 |         rscore = f(xr)
 85 |         if res[0][1] <= rscore < res[-2][1]:
 86 |             del res[-1]
 87 |             res.append([xr, rscore])
 88 |             continue
 89 | 
 90 |         # expansion
 91 |         if rscore < res[0][1]:
 92 |             xe = x0 + gamma*(x0 - res[-1][0])
 93 |             escore = f(xe)
 94 |             if escore < rscore:
 95 |                 del res[-1]
 96 |                 res.append([xe, escore])
 97 |                 continue
 98 |             else:
 99 |                 del res[-1]
100 |                 res.append([xr, rscore])
101 |                 continue
102 | 
103 |         # contraction
104 |         xc = x0 + rho*(x0 - res[-1][0])
105 |         cscore = f(xc)
106 |         if cscore < res[-1][1]:
107 |             del res[-1]
108 |             res.append([xc, cscore])
109 |             continue
110 | 
111 |         # reduction
112 |         x1 = res[0][0]
113 |         nres = []
114 |         for tup in res:
115 |             redx = x1 + sigma*(tup[0] - x1)
116 |             score = f(redx)
117 |             nres.append([redx, score])
118 |         res = nres
119 | 
120 | 
121 | if __name__ == "__main__":
122 |     # test
123 |     import math
124 |     import numpy as np
125 | 
126 |     def f(x):
127 |         return math.sin(x[0]) * math.cos(x[1]) * (1. / (abs(x[2]) + 1))
128 | 
129 | nelder_mead(f, np.array([0., 0., 0.]))
130 | 


--------------------------------------------------------------------------------
/ModelFitting/newton_irls.R:
--------------------------------------------------------------------------------
  1 | #' ---
  2 | #' title: " GLM estimation"
  3 | #' subtitle: "Newton and IRLS"
  4 | #' author: "Michael Clark"
  5 | #' css: '../other.css'
  6 | #' highlight: pygments
  7 | #' date: ""
  8 | #' ---
  9 | #'
 10 | #' # GLM estimation examples
 11 | 
 12 | #' Examples of maximum likelihood estimation via a variety of means.  See the
 13 | #' gradientdescent.R script for that approach.  Here we demonstrate Newton's and
 14 | #' Iterated Reweighted Least Squares approaches via logistic regression.
 15 | #' 
 16 | #' 
 17 | #' For the following, I had Murphy's PML text open and more or less followed the 
 18 | #' algorithms in chapter 8.  Note that for Newton's method, this doesn't
 19 | #' implement a line search to find a more optimal stepsize at a given iteration.
 20 | #' 
 21 | #' # Data Prep
 22 | #' 
 23 | #' Predict graduate school admission based on gre, gpa, and school rank
 24 | #' (higher=more prestige). See corresponding demo here:
 25 | #' https://stats.idre.ucla.edu/stata/dae/logistic-regression/. The only
 26 | #' difference is that I treat rank as numeric rather than categorical.
 27 | 
 28 | 
 29 | admit = haven::read_dta('https://stats.idre.ucla.edu/stat/stata/dae/binary.dta')
 30 | 
 31 | comparison_model = glm(admit ~ gre + gpa + rank, data = admit, family = binomial)
 32 | 
 33 | summary(comparison_model)
 34 | 
 35 | X = model.matrix(comparison_model)
 36 | y = comparison_model$y
 37 | 
 38 | 
 39 | #' # Newton's method
 40 | 
 41 | newton <- function(
 42 |   X,
 43 |   y,
 44 |   tol  = 1e-12,
 45 |   iter = 500,
 46 |   stepsize = .5
 47 |   ) {
 48 |   
 49 |   # Args: 
 50 |   # X: model matrix
 51 |   # y: target
 52 |   # tol: tolerance
 53 |   # iter: maximum number of iterations
 54 |   # stepsize: (0, 1)
 55 |   
 56 |   # intialize
 57 |   int     = log(mean(y) / (1 - mean(y)))         # intercept
 58 |   beta    = c(int, rep(0, ncol(X) - 1))
 59 |   currtol = 1
 60 |   it = 0
 61 |   ll = 0
 62 |   
 63 |   while (currtol > tol && it < iter) {
 64 |     it = it +1
 65 |     ll_old = ll
 66 |     
 67 |     mu = plogis(X %*% beta)[,1]
 68 |     g  = crossprod(X, mu-y)               # gradient
 69 |     S  = diag(mu*(1-mu)) 
 70 |     H  = t(X) %*% S %*% X                 # hessian
 71 |     beta = beta - stepsize * solve(H) %*% g
 72 | 
 73 |     ll = sum(dbinom(y, prob = mu, size = 1, log = TRUE))
 74 |     currtol = abs(ll - ll_old)
 75 |   }
 76 |   
 77 |   list(
 78 |     beta = beta,
 79 |     iter = it,
 80 |     tol  = currtol,
 81 |     loglik = ll
 82 |   )
 83 | }
 84 | 
 85 | 
 86 | newton_result = newton(
 87 |   X = X,
 88 |   y = y,
 89 |   stepsize = .9,
 90 |   tol = 1e-8      # tol set to 1e-8 as in glm default
 91 | ) 
 92 | 
 93 | newton_result
 94 | comparison_model
 95 | 
 96 | rbind(
 97 |   newton = unlist(newton_result),
 98 |   glm_default = c(
 99 |     beta = coef(comparison_model),
100 |     comparison_model$iter,
101 |     tol = NA,
102 |     loglik = -logLik(comparison_model)
103 |   )
104 | )
105 | 
106 | 
107 | #' # IRLS 
108 | #' Note that `glm` is actually using IRLS, so the results from this should be
109 | #' fairly spot on.
110 | 
111 | irls <- function(X, y, tol = 1e-12, iter = 500) {
112 |   
113 |   # intialize
114 |   int  = log(mean(y) / (1 - mean(y)))   # intercept
115 |   beta = c(int, rep(0, ncol(X) - 1))
116 |   currtol = 1
117 |   it = 0
118 |   ll = 0
119 |   
120 |   while (currtol > tol && it < iter) {
121 |     it = it + 1
122 |     ll_old = ll
123 |     
124 |     eta  = X %*% beta
125 |     mu   = plogis(eta)[,1]
126 |     s    = mu * (1 - mu)
127 |     S    = diag(s)
128 |     z    = eta + (y-mu)/s
129 |     beta = solve(t(X) %*% S %*% X) %*% (t(X) %*% (S %*% z))
130 |     
131 |     ll = sum(
132 |       dbinom(
133 |         y,
134 |         prob = plogis(X %*% beta),
135 |         size = 1,
136 |         log = T
137 |       )
138 |     )
139 |     
140 |     currtol = abs(ll - ll_old)
141 |   }
142 |   
143 |   list(
144 |     beta = beta,
145 |     iter = it,
146 |     tol  = currtol,
147 |     loglik  = ll,
148 |     weights = plogis(X %*% beta) * (1 - plogis(X %*% beta))
149 |   )
150 | }
151 | 
152 | #' `tol` set to 1e-8 as in `glm` default.
153 | irls_result = irls(X = X, y = y, tol = 1e-8) 
154 | 
155 | str(irls_result)
156 | comparison_model
157 | 
158 | #' # Comparison
159 | #' 
160 | #' Compare all results.
161 | rbind(
162 |   newton = unlist(newton_result),
163 |   irls   = unlist(irls_result[-length(irls_result)]),
164 |   glm_default = c(
165 |     beta = coef(comparison_model),
166 |     comparison_model$iter,
167 |     tol = NA,
168 |     loglik = logLik(comparison_model)
169 |   )
170 | )
171 | 
172 | 
173 | #' compare weights 
174 | head(cbind(irls_result$weights,
175 |            comparison_model$weights))
176 | 
177 | 
178 | 
179 | #' # Source
180 | #' Base R source code found at https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/newton_irls.R


--------------------------------------------------------------------------------
/ModelFitting/ordinal_regression.R:
--------------------------------------------------------------------------------
 1 | # The following demonstrates a standard cumulative link ordinal regression model
 2 | # via maximum likelihood. Default is with probit link function. Alternatively
 3 | # you can compare it with a logit link, which will result in values roughly 
 4 | # 1.7*parameters estimates from the probit.
 5 | 
 6 | 
 7 | ll_ord = function(par, X, y, probit = TRUE) {
 8 |   K = length(unique(y))                        # number of classes K
 9 |   ncuts = K-1                                  # number of cutpoints/thresholds
10 |   cuts = par[(1:ncuts)]                        # cutpoints
11 |   beta = par[-(1:ncuts)]                       # regression coefficients
12 |   lp = X %*% beta                              # linear predictor
13 |   ll = rep(0, length(y))                       # log likelihood
14 |   pfun = ifelse(probit, pnorm, plogis)         # which link to use
15 |   
16 |   for(k in 1:K){
17 |     if (k==1) {
18 |       ll[y==k] = pfun((cuts[k] - lp[y==k]), log = TRUE)
19 |     }
20 |     else if (k < K) {
21 |       ll[y==k] = log(pfun(cuts[k] - lp[y==k]) - pfun(cuts[k-1] - lp[y==k]))
22 |     }
23 |     else {
24 |       ll[y==k] = log(1 - pfun(cuts[k-1] - lp[y==k])) 
25 |     }
26 |   }
27 |   
28 |   -sum(ll)
29 | }
30 | 
31 | # data generation from the probit perspective, where the underlying continuous
32 | # latent variable is normally distributed
33 | 
34 | set.seed(808)
35 | N = 1000                                       # Sample size
36 | x = cbind(x1 = rnorm(N), x2 = rnorm(N))        # predictor variables
37 | beta = c(1,-1)                                 # coefficients
38 | y_star = rnorm(N, mean = x %*% beta)           # the underlying latent variable
39 | y_1 = y_star > -1.5                            # -1.5 first cutpoint
40 | y_2 = y_star > .75                             # .75 second cutpoint
41 | y_3 = y_star > 1.75                            # 1.75 third cutpoint
42 | y = y_1 + y_2 + y_3 + 1                        # target
43 | 
44 | table(y)
45 | 
46 | d = data.frame(x, y = factor(y))
47 | 
48 | init = c(-1, 1, 2, 0, 0)                          # initial values
49 | 
50 | result_probit = optim(
51 |   init,
52 |   ll_ord,
53 |   y = y,
54 |   X = x,
55 |   probit  = TRUE,
56 |   control = list(reltol = 1e-10)
57 | )
58 | 
59 | result_logit  = optim(
60 |   init,
61 |   ll_ord,
62 |   y = y,
63 |   X = x,
64 |   probit  = FALSE,
65 |   control = list(reltol = 1e-10)
66 | )
67 | 
68 | # compare with ordinal package
69 | library(ordinal)
70 | result_ordpack_probit = clm(y ~ x1 + x2, data = d, link = 'probit')
71 | result_ordpack_logit  = clm(y ~ x1 + x2, data = d, link = 'logit')
72 | 
73 | 
74 | resprobit = data.frame(method = c('ll_ord', 'ordpack'),
75 |                        rbind(coef(result_probit), coef(result_ordpack_probit)))
76 | colnames(resprobit) = c('method', paste0('cut_', 1:3), 'beta1', 'beta2')
77 | 
78 | resprobit
79 | 
80 | reslogit = data.frame(method = c('ll_ord', 'ordpack'),
81 |                       rbind(coef(result_logit), coef(result_ordpack_logit)))
82 | colnames(reslogit) = c('method', paste0('cut_', 1:3), 'beta1', 'beta2')
83 | 
84 | reslogit
85 | 


--------------------------------------------------------------------------------
/ModelFitting/poiszeroinfl.R:
--------------------------------------------------------------------------------
 1 | #' ---
 2 | #' title: "Zero-inflated Poisson Model"
 3 | #' author: "Michael Clark"
 4 | #' date: ""
 5 | #' ---
 6 | #' 
 7 | 
 8 | 
 9 | #' Log likelihood function to estimate parameters for a Zero-inflated Poisson model. With examples
10 | #' and comparison to pscl package output. Also includes approach based on Hilbe GLM text.
11 | #' see also: https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/NBzeroinfl.R
12 | 
13 | 
14 | ZIP = function(y, X, par) {
15 |   # arguments are response y, predictor matrix X, and parameter named starting points of 'logit' and 'pois'
16 |   
17 |   # Extract parameters
18 |   logitpars = par[grep('logit', names(par))]   
19 |   poispars  = par[grep('pois', names(par))]     
20 |   
21 |   # Logit part; in this function Xlogit = Xpois but one could split X argument into Xlogi and Xpois for example
22 |   Xlogit  = X
23 |   LPlogit = Xlogit %*% logitpars
24 |   logi0   = plogis(LPlogit)  # alternative 1/(1+exp(-LPlogit))
25 |     
26 |   # Poisson part
27 |   Xpois  = X
28 |   mupois = exp(Xpois %*% poispars)
29 |   
30 |   # LLs
31 |   logliklogit = log( logi0 + exp(log(1 - logi0) - mupois) )
32 |   loglikpois  = log(1 - logi0) + dpois(y, lambda = mupois, log = TRUE)
33 |   
34 |   # Hilbe formulation
35 |   # logliklogit = log(logi0 + (1 - logi0)*exp(- mupois) )
36 |   # loglikpois = log(1-logi0) -mupois + log(mupois)*y     #not necessary: - log(gamma(y+1))
37 |     
38 |   y0 = y == 0  # 0 values
39 |   yc = y > 0   # Count part
40 | 
41 |   loglik = sum(logliklogit[y0]) + sum(loglikpois[yc])
42 |   -loglik
43 | }
44 | 
45 | #' Get the data
46 | library(haven)
47 | library(pscl)
48 | 
49 | fish = read_dta("http://www.stata-press.com/data/r11/fish.dta")
50 | 
51 | 
52 | #' Get starting values or simply do zeros
53 | #' for this function, a named vector for the starting values
54 | #' for zip: need 'logit', 'pois'
55 | init.mod = glm(
56 |   count ~ persons + livebait,
57 |   data = fish,
58 |   x = TRUE,
59 |   y = TRUE,
60 |   "poisson"
61 | )
62 | 
63 | # starts = c(logit = coef(init.mod), pois = coef(init.mod))  
64 | starts = c(rep(0, 3), rep(0, 3))
65 | names(starts) = c(paste0('pois.', names(coef(init.mod))), 
66 |                   paste0('logit.', names(coef(init.mod))))
67 |                                                                
68 | 
69 | #' Estimate with optim function
70 | optPois1 = optim(
71 |   par = starts ,
72 |   fn  = ZIP,
73 |   X   = init.mod$x,
74 |   y   = init.mod$y,
75 |   method  = "BFGS",
76 |   control = list(maxit = 5000, reltol = 1e-12),
77 |   hessian = TRUE
78 | )
79 | 
80 | # optPois1
81 | 
82 | 
83 | #' Comparison
84 | # Extract for clean display
85 | B  = optPois1$par
86 | se = sqrt(diag(solve((optPois1$hessian))))
87 | Z  = B/se
88 | p  = pnorm(abs(Z), lower = FALSE)*2
89 | 
90 | # pscl results
91 | zipoismod = zeroinfl(count ~ persons + livebait, data = fish, dist = "poisson") 
92 | summary(zipoismod)
93 | round(data.frame(B, se, Z, p), 4)
94 | 


--------------------------------------------------------------------------------
/ModelFitting/quantile_regression.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Quantile Regression"
 3 | author: "Michael Clark"
 4 | date: "February 15, 2016"
 5 | output:
 6 |   html_document:
 7 |     css: ../other.css
 8 |     theme: united
 9 | ---
10 | 
11 | ```{r setup, include=FALSE}
12 | knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE, R.options=list(width=120))
13 | ```
14 | 
15 | ### Simple quantile regression function and demo
16 | 
17 | Loss function
18 | 
19 | ```{r}
20 | library(tidyverse)
21 | 
22 | qreg = function(par, X, y, tau){
23 |   lp = X%*%par
24 |   res = y - lp
25 |   loss = ifelse(res < 0 , -(1-tau)*res, tau*res)
26 |   sum(loss)
27 | }
28 | ```
29 | 
30 | 
31 | 
32 | ### Data Prep
33 | 
34 | We'll use the  `quantreg` package for comparison, and the classic data set on Belgian hh income and food expenditure.  Scale income if you want a meaningful centercept.
35 | 
36 | ```{r}
37 | library(quantreg)
38 | data(engel)          
39 | ### engel$income = scale(engel$income)
40 | ```
41 | 
42 | ### Median estimation
43 | 
44 | Compare `optim` output with `quantreg` package.
45 | 
46 | ```{r}
47 | optim(
48 |   par = c(0, 0),
49 |   fn  = qreg,
50 |   X   = cbind(1, engel$income),
51 |   y   = engel$foodexp,
52 |   tau = .5
53 | )$par
54 | 
55 | rq(foodexp ~ income, tau = .5, data = engel)
56 | ```
57 | 
58 | 
59 | ### Other quantiles
60 | 
61 | ```{r}
62 | # quantiles
63 | qs = c(.05, .1, .25, .5, .75, .9, .95)
64 | 
65 | resrq = coef(rq(foodexp ~ income, tau = qs, data = engel))
66 | 
67 | resoptim = sapply(qs, function(tau)
68 |    optim(
69 |     par = c(0, 0),
70 |     fn  = qreg,
71 |     X   = cbind(1, engel$income),
72 |     y   = engel$foodexp,
73 |     tau = tau
74 |   )$par)
75 | 
76 | 
77 | # compare results
78 | 
79 | rbind(resrq, resoptim) %>% round(2)
80 | ```
81 | 
82 | ### Visualize
83 | 
84 | ```{r, results='hide'}
85 | engel %>%
86 |   qplot(data = .,
87 |         income,
88 |         foodexp,
89 |         color = I(scales::alpha('orange', .25))) +
90 |   geom_abline(aes(
91 |     intercept = X1,
92 |     slope = X2,
93 |     color = group
94 |   ),
95 |   data = data.frame(t(resoptim), group = factor(qs)))
96 | ```
97 | 


--------------------------------------------------------------------------------
/ModelFitting/ridge.R:
--------------------------------------------------------------------------------
 1 | #' ---
 2 | #' title: " L2 (ridge) regularization"
 3 | #' author: "Michael Clark"
 4 | #' css: '../other.css'
 5 | #' highlight: pygments
 6 | #' date: ""
 7 | #' ---
 8 | #' 
 9 | #' Compare to lasso.R.  A more conceptual depiction of the lasso can be found in
10 | #' penalized_ML.R.
11 | 
12 |  
13 | ridge <- function(w, X, y, lambda = .1) {
14 |   # X: model matrix; 
15 |   # y: target; 
16 |   # lambda: penalty parameter; 
17 |   # w: the weights/coefficients
18 |   
19 |   crossprod(y - X %*% w) + lambda * length(y) * crossprod(w)
20 | }
21 | 
22 | 
23 | set.seed(8675309)
24 | N = 500
25 | p = 10
26 | X = scale(matrix(rnorm(N * p), ncol = p))
27 | b = c(.5, -.5, .25, -.25, .125, -.125, rep(0, 4))
28 | y = scale(X %*% b + rnorm(N, sd = .5))
29 | 
30 | #' Note, if `lambda=0`, result is the same as  `lm.fit`.
31 | #' 
32 | #' 
33 | result_ridge = optim(
34 |   rep(0, ncol(X)),
35 |   ridge,
36 |   X = X,
37 |   y = y,
38 |   lambda = .1,
39 |   method = 'BFGS'
40 | )
41 | 
42 | #' Analytical result.
43 | #' 
44 | result_ridge2 =  solve(crossprod(X) + diag(length(y)*.1, ncol(X))) %*% crossprod(X, y)
45 | 
46 | #' Alternative with augmented data (note sigma ignored as it equals 1, but otherwise
47 | #' X/sigma and y/sigma).
48 | #' 
49 | X2 = rbind(X, diag(sqrt(length(y)*.1), ncol(X)))
50 | y2 = c(y, rep(0, ncol(X)))
51 | result_ridge3 = solve(crossprod(X2)) %*% crossprod(X2, y2)
52 | 
53 | 
54 | 
55 | 
56 | #' `glmnet` is by default a mixture of ridge and lasso penalties, setting alpha
57 | #' = 1 reduces to lasso, while alpha=0 would be ridge.
58 | 
59 | 
60 | library(glmnet)
61 | glmnet_res = coef(
62 |   glmnet(
63 |     X,
64 |     y,
65 |     alpha = 0,
66 |     lambda = c(10, 1, .1),
67 |     thresh = 1e-12,
68 |     intercept = F
69 |   ), 
70 |   s = .1
71 | )
72 | 
73 | #' # Comparison
74 | 
75 | data.frame(
76 |   lm     = coef(lm(y ~ . - 1, data.frame(X))),
77 |   ridge  = result_ridge$par,
78 |   ridge2 = result_ridge2,
79 |   ridge3 = result_ridge3,
80 |   glmnet = glmnet_res[-1, 1],
81 |   truth  = b
82 | )
83 | 
84 | 
85 | 
86 | 
87 | #' # Source
88 | #' Base R source code found at https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/ridge.R


--------------------------------------------------------------------------------
/ModelFitting/standard_logistic.R:
--------------------------------------------------------------------------------
  1 | #' ---
  2 | #' title: "Standard Logistic Models"
  3 | #' author: "Michael Clark"
  4 | #' css: '../other.css'
  5 | #' highlight: pygments
  6 | #' date: ""
  7 | #' ---
  8 | #' 
  9 | #'
 10 | #' A standard logistic regression model via maximum likelihood or exponential
 11 | #' loss. Can serve as an entry point for those starting out to the wider world of
 12 | #' computational statistics as maximum likelihood is the fundamental approach used
 13 | #' in most applied statistics, but which is also a key aspect of the Bayesian
 14 | #' approach.  Exponential loss is not confined to the standard glm setting, but
 15 | #' is widely used in more predictive/'algorithmic' approaches e.g. in
 16 | #' machine learning and elsewhere.
 17 | #' 
 18 | #' This follows the standard_lm.R script.
 19 | #'
 20 | #'
 21 | #'
 22 | #' # Data Setup 
 23 | #' 
 24 | 
 25 | set.seed(1235)  # ensures replication
 26 | 
 27 | 
 28 | # predictors and target
 29 | 
 30 | N = 10000 # sample size
 31 | k = 2     # number of desired predictors
 32 | X = matrix(rnorm(N * k), ncol = k)
 33 | 
 34 | # the linear predictor
 35 | lp = -.5 + .2 * X[, 1] + .1 * X[, 2] # increasing N will get estimated values closer to these
 36 | 
 37 | y = rbinom(N, size = 1, prob = plogis(lp))
 38 | 
 39 | dfXy = data.frame(X, y)
 40 | 
 41 | 
 42 | 
 43 | #' 
 44 | #' # Functions 
 45 | #' 
 46 | #' A maximum likelihood approach.
 47 | 
 48 | logreg_ML = function(par, X, y) {
 49 |   # arguments- 
 50 |   # par: parameters to be estimated
 51 |   # X: predictor matrix with intercept column
 52 |   # y: response
 53 |   
 54 |   # setup
 55 |   beta = par                                # coefficients
 56 |   N = nrow(X)
 57 |   
 58 |   # linear predictor
 59 |   LP = X %*% beta                           # linear predictor
 60 |   mu = plogis(LP)                           # logit link
 61 |   
 62 |   # calculate likelihood
 63 |   L = dbinom(y, size = 1, prob = mu, log = TRUE)         # log likelihood
 64 |   #   L =  y*log(mu) + (1 - y)*log(1-mu)    # alternate log likelihood form
 65 |   
 66 |   -sum(L)                                   # optim by default is minimization, and we want to maximize the likelihood 
 67 |   # (see also fnscale in optim.control)
 68 | }
 69 | 
 70 | # An equivalent approach via exponential loss function.
 71 | 
 72 | logreg_exp = function(par, X, y) {
 73 |   # arguments- 
 74 |   # par: parameters to be estimated
 75 |   # X: predictor matrix with intercept column
 76 |   # y: response
 77 |   
 78 |   # setup
 79 |   beta = par                                   # coefficients
 80 |   
 81 |   # linear predictor
 82 |   LP = X %*% beta                              # linear predictor
 83 | 
 84 |   # calculate exponential loss function (convert y to -1:1 from 0:1)
 85 |   L = sum(exp(-ifelse(y, 1, -1) * .5 * LP))
 86 | }
 87 | 
 88 | 
 89 | #' # Obtain Model Estimates
 90 | #' Setup for use with `optim`.
 91 | 
 92 | X = cbind(1, X)
 93 | 
 94 | # initial values
 95 | 
 96 | init = rep(0, ncol(X))
 97 | names(init) = c('intercept', 'b1', 'b2')
 98 | 
 99 | optlmML = optim(
100 |   par = init,
101 |   fn  = logreg_ML,
102 |   X   = X,
103 |   y   = y,
104 |   control = list(reltol = 1e-8)
105 | )
106 | 
107 | optglmClass = optim(
108 |   par = init,
109 |   fn  = logreg_exp,
110 |   X   = X,
111 |   y   = y, 
112 |   control = list(reltol = 1e-15)
113 | )
114 | 
115 | pars_ML  = optlmML$par
116 | pars_exp = optglmClass$par
117 | 
118 | 
119 | #' # Comparison
120 | #' 
121 | #' Compare to `glm`.
122 | 
123 | modglm = glm(y ~ ., dfXy, family = binomial)
124 | 
125 | rbind(
126 |   pars_ML,
127 |   pars_exp,
128 |   pars_GLM = coef(modglm)
129 | )
130 | 
131 | 
132 | #' # Source
133 | #' Base R source code found at https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/standard_logistic.R


--------------------------------------------------------------------------------
/Other/Programming_Shenanigans/compoundInterest.R:
--------------------------------------------------------------------------------
 1 | # a (yearly) compound interest calculator to demonstrate recursion in a simple fashion
 2 | 
 3 | compoundInterest <- function(principal, years, rate, yearlybump) {
 4 |   if(years==0) return(principal)
 5 |   
 6 |   if(years>0) compoundInterest(principal=principal*(1+rate) + yearlybump, years=years-1, rate=rate, yearlybump=yearlybump)
 7 | }
 8 | 
 9 | compoundInterest(principal=10000, years=10, rate=.05, yearlybump=1000)
10 | 
11 | 


--------------------------------------------------------------------------------
/Other/Programming_Shenanigans/fillbyLastRecursive.R:
--------------------------------------------------------------------------------
 1 | # A simple function to fill NAs by the last value; not something one should
 2 | # regularly do but it does come up from time to time for variables such as age, year
 3 | # etc. Just an exercise in recursion.
 4 | 
 5 | y = rnorm(100)
 6 | y2 = y
 7 | y2[sample(100, 30)] = NA
 8 | 
 9 | fillbyLast = function(x){
10 |   if (length(x) == 1){
11 |     return(x)
12 |   } else {
13 |     if (is.na(x[2])){
14 |       x[2] = x[1]
15 |     }
16 |     c(x[1], fillbyLast(x[2:length(x)]))
17 |   }
18 | }
19 | 
20 | y3 = fillbyLast(y2)
21 | cbind(y, y2, y3)
22 | 
23 | 


--------------------------------------------------------------------------------
/Other/Programming_Shenanigans/fizzbuzz.R:
--------------------------------------------------------------------------------
 1 | #################################################################
 2 | ### See for example: http://c2.com/cgi/wiki?FizzBuzzTest      ###
 3 | ### The stated goal is to take a sequence and change anything ###  
 4 | ### that is a multiple of 3 to 'Fizz', multiples of 5 to      ###
 5 | ### 'Fuzz', and any multiples of both to 'FizzBuzz'.          ###
 6 | ### R makes it easy to generalize to any sequence/multiples   ###
 7 | ### and does so very efficiently.                             ###
 8 | #################################################################
 9 | 
10 | 
11 | ## clean, clear, gets the job done; also generalizes beyond common example
12 | fizzbuzz = function(min, max, num1, num2){
13 |   x_ = min:max
14 |   x = x_
15 |   x[x_%%num1 == 0] = "Fizz"
16 |   x[x_%%num2 == 0] = "Buzz"
17 |   x[x_%%(num1*num2) == 0] = "FizzBuzz"
18 |   x
19 | }
20 | 
21 | fizzbuzz(1, 100, 3, 5)
22 | fizzbuzz(-50, 50, 7, 4)
23 | 
24 | ## One-liner, just because you can
25 | fizzbuzzOneLine = function(min, max, num1, num2){
26 |   sapply(min:max, function(val) ifelse(val%%(num1*num2) == 0, "FizzBuzz", 
27 |                                        ifelse(val%%num1==0, 'Fizz', 
28 |                                               ifelse(val%%num2==0, 'Buzz', val))))
29 | }
30 | 
31 | fizzbuzzOneLine(1, 100, 3, 5)
32 | 
33 | ## recursive for even more flexibility
34 | fizzbuzzRecursive = function(x, nums=c(15,3,5), nams=c('FizzBuzz','Fizz', 'Buzz')){
35 |   if (length(nums) == 0) {paste(x)}
36 |   else {
37 |     x = sapply(x, function(val) ifelse(!is.na(suppressWarnings(as.numeric(val))) && as.numeric(val) %% nums[1] == 0, nams[1], val))
38 |     fizzbuzzRecursive(x, nums[-1], nams[-1])
39 |   }
40 | }
41 | 
42 | fizzbuzzRecursive(1:100, nums=c(15,3,5), nams=c('FizzBuzz','Fizz', 'Buzz'))
43 | 
44 | 
45 | cbind(fizzbuzz(1, 100, 3, 5), fizzbuzzOneLine(1, 100, 3, 5), fizzbuzzRecursive(1:100, nums=c(15,3,5), nams=c('FizzBuzz','Fizz', 'Buzz')))
46 | cbind(fizzbuzz(-50, 50, 7, 4), fizzbuzzOneLine(-50, 50, 7, 4), fizzbuzzRecursive(-50:50, nums=c(28,7,4), nams=c('FizzBuzz','Fizz', 'Buzz')))
47 | 
48 | fizzbuzzRecursive(-50:50, nums=c(28, 21, 12, 7, 4, 3), nams=c('FizzBuzz','FizzKapow', 'BuzzKapow', 'Fizz', 'Buzz', 'Kapow'))
49 | 
50 | 


--------------------------------------------------------------------------------
/Other/Programming_Shenanigans/fizzbuzz.jl:
--------------------------------------------------------------------------------
 1 | #################################################################
 2 | ### See for example: http://c2.com/cgi/wiki?FizzBuzzTest      ###
 3 | ### The stated goal is to take a sequence and change anything ###
 4 | ### that is a multiple of 3 to 'Fizz', multiples of 5 to      ###
 5 | ### 'Fuzz', and any multiples of both to 'FizzBuzz'.          ###
 6 | #################################################################
 7 | 
 8 | 
 9 | # R example. R makes it easy to generalize to any sequence/multiples and does so very efficiently.
10 | 
11 | #fizzbuzz = function(min, max, num1, num2){
12 | #  x_ = min:max
13 | #  x = x_
14 | #
15 | #  x[x_%%num1 == 0] = "Fizz"
16 | #  x[x_%%num2 == 0] = "Buzz"
17 | #  x[x_%%(num1*num2) == 0] = "FizzBuzz"
18 | #  x
19 | # }
20 | 
21 | # fizzbuzz(1, 100, 3, 5)
22 | # fizzbuzz(-50, 50, 7, 4)
23 | 
24 | 
25 | 
26 | # The following is one way to FizzBuzz with a standard loop approach.
27 | 
28 | function fizzbuzz1(nmin, nmax, num1, num2)
29 |     x0 = [nmin:nmax]
30 |     x = Array(Any,  length(x0))
31 | 
32 |     for i in 1:length(x0)
33 |         if x0[i]%(num1*num2)==0
34 |             x[i] = "FizzBuzz"
35 |         elseif x0[i]%num2==0
36 |             x[i] = "Buzz"
37 |         elseif x0[i]%num1==0
38 |             x[i] = "Fizz"
39 |         else
40 |             x[i] = x0[i]
41 |         end
42 |     end
43 | 
44 |     return x
45 | end
46 | 
47 | print(fizzbuzz1(1, 100, 3, 5)')
48 | print(fizzbuzz1(-50, 50, 7, 4)')
49 | 
50 | 
51 | 
52 | # One can get fizzbuzz without a loop, but initializing x as x0 as in the R
53 | # script will make it one type regardless of the explicit 'Any' declaration
54 | # ('any' type declaration will be overwritten by Int or Float64, even using
55 | # convert). Could add a ! double 'or' (as in the next example) or maybe
56 | # construct a special type, but that's unsatisfactory. No real code efficiency
57 | # is gained over the loop.
58 | 
59 | function fizzbuzz2(nmin, nmax, num1, num2)
60 |     x0 = [nmin:nmax]
61 |     x = Array(Any, length(x0))
62 | 
63 |     check1 = x0%num1.==0
64 |     check2 = x0.%num2.==0
65 |     check3 = x0.%(num1*num2).==0
66 |     checkAll = check1 | check2 | check3
67 | 
68 |     x[check1] = "Fizz"
69 |     x[check2] = "Buzz"
70 |     x[check3] = "FizzBuzz"
71 |     x[!checkAll] = x0[!checkAll]
72 |     return x
73 | end
74 | 
75 | print(fizzbuzz2(1, 100, 3, 5)')
76 | print(fizzbuzz2(-50, 50, 7, 4)')
77 | 
78 | 
79 | 
80 | # A compromise approach that retains the 'any' type. Aside from the loop it's
81 | # pretty much the same as the R script.
82 | 
83 | function fizzbuzz3(nmin, nmax, num1, num2)
84 |     x0 = [nmin:nmax]
85 |     x  = Array(Any, length(x0))
86 | 
87 |     for i in 1:length(x0)
88 |         x[i] = x0[i]
89 |     end
90 | 
91 |     x[x0%num1.==0] = "Fizz"
92 |     x[x0%num2.==0] = "Buzz"
93 |     x[x0%(num1*num2).==0] = "FizzBuzz"
94 |     return x
95 | end
96 | 
97 | print(fizzbuzz3(1, 100, 3, 5)')
98 | print(fizzbuzz3(-50, 50, 7, 4)')
99 | 


--------------------------------------------------------------------------------
/Other/Programming_Shenanigans/fizzbuzz.py:
--------------------------------------------------------------------------------
 1 | # Python Fizz Buzz. Replicate of the R code.
 2 | 
 3 | def fizzbuzz(min, max, num1, num2):
 4 | 	x = list(range(min, max+1))
 5 | 	for i in range(len(x)):
 6 | 		if  x[i] % (num1*num2)==0 : x[i] = 'FizzBuzz'
 7 | 		elif x[i] % (num1)==0 : x[i] = 'Fizz'
 8 | 		elif x[i] % (num2)==0 : x[i] = 'Buzz'
 9 | 	return(x)
10 | 	
11 | fizzbuzz(1, 100, 3, 5)
12 | fizzbuzz(-50, 50, 7, 4)
13 | 
14 | 


--------------------------------------------------------------------------------
/Other/Programming_Shenanigans/matrixOperations_files/figure-html/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/Other/Programming_Shenanigans/matrixOperations_files/figure-html/unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/Other/Programming_Shenanigans/stringReverseRecursively.R:
--------------------------------------------------------------------------------
 1 | # reverse a string recursively
 2 | 
 3 | revString = function(string){
 4 |   require(stringr)
 5 |   if (nchar(string)==1) string
 6 |   else paste0(str_sub(string, -1), revString(str_sub(string, end=-2)))
 7 | }
 8 | 
 9 | 
10 | string = paste0(letters, collapse = '')
11 | 
12 | revString(string)
13 | 
14 | # base R approach
15 | revString = function(string){
16 |   if (nchar(string)==1) string
17 |   else paste0(substr(string, nchar(string), nchar(string)), 
18 |               revString(substr(string, 1, nchar(string)-1)))
19 | }
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/Other/Programming_Shenanigans/stringReverseRecursively.py:
--------------------------------------------------------------------------------
 1 | def reverseString(aStr):
 2 |   if len(aStr) == 1:
 3 |     return aStr
 4 |   return  aStr[-1] + reverseString(aStr[:-1])
 5 | 
 6 | import string
 7 | 
 8 | string.ascii_letters
 9 | reverseString(string.ascii_letters)
10 | 


--------------------------------------------------------------------------------
/Other/Programming_Shenanigans/wordWrap.R:
--------------------------------------------------------------------------------
 1 | wordWrap = function(text, lineLength){
 2 |   require(stringr)
 3 |   idx = str_locate(str_sub(text, start = lineLength),' ')[,'start']
 4 |   if(is.na(idx)) str_trim(text)
 5 |   else paste0(str_trim(str_sub(text, 1, lineLength+idx-1)), '\n', wordWrap(str_sub(text, start=lineLength+idx), lineLength)) 
 6 | }
 7 | 
 8 | 
 9 | li = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.'
10 | 
11 | cat(wordWrap(li, 80))
12 | 


--------------------------------------------------------------------------------
/Other/Programming_Shenanigans/wordWrap.py:
--------------------------------------------------------------------------------
 1 | def wordWrap(text, lineLength):
 2 |   idx = text[lineLength:].find(' ')
 3 |   if idx == -1:
 4 |     return(text.strip())
 5 |   
 6 |   return(text[:lineLength+idx].strip() + '\n' + wordWrap(text[lineLength+idx:], lineLength))
 7 | 
 8 | import string
 9 | li = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.'
10 | 
11 | print(wordWrap(li, 80))
12 | 


--------------------------------------------------------------------------------
/Other/Python Startup/mixedModel.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sat Jun 20 17:25:18 2015
 4 | 
 5 | @author: MC
 6 | """
 7 | import numpy as np
 8 | import pandas as pd
 9 | import scipy as sp
10 | import statsmodels.api as sm
11 | import statsmodels.formula.api as smf
12 | 
13 | np.random.seed(1234)
14 | N = 1000
15 | nGroups = 40
16 | nPerGroup = N//nGroups
17 | x = np.random.normal(size=N)
18 | group = np.repeat(np.arange(0, nGroups), nPerGroup)
19 | ranEff = np.random.normal(scale=.5, size=nGroups)
20 | coefs = np.array([2,.2]).reshape(2,1)
21 | randInts = coefs[0] + ranEff[group]
22 | 
23 | X = np.array([randInts, x]).T
24 | 
25 | y = randInts + x*coefs[1] +  np.random.normal(scale=.75, size=N)
26 | y.shape
27 | 
28 | df = pd.DataFrame({'x':x, 'group':group, 'y':y})
29 | 
30 | ### Mode
31 | mod0 = smf.mixedlm('y ~ x', data=df, groups=df['group']) 
32 | mod = mod0.fit() 
33 | print(mod.summary())  # intercept RE is in sd 
34 | 
35 | reModel = mod.random_effects['Intercept'][:]
36 | sm.qqplot(reModel, sp.stats.norm, line='45', fit=True, scale=.25)
37 | 


--------------------------------------------------------------------------------
/Other/Python Startup/regressionExamples.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sat Jun 20 09:08:20 2015
 4 | 
 5 | @author: MC
 6 | """
 7 | 
 8 | import numpy as np
 9 | import pandas as pd
10 | import scipy as sp
11 | from sklearn.preprocessing import scale
12 | import statsmodels.api as sm
13 | import statsmodels.formula.api as smf
14 | # import seaborn as sea
15 | import matplotlib.pyplot as plt
16 | 
17 | # Regression example using np arrays, np matrices and statsmodels; serves as a 
18 | # quick reminder for me of some basic statistical modeling in python; 
19 | # Unfortunately numpy doesn't deal with vectors as matrices, assumes row
20 | # entry when creating arrays, and elementwise rather than matrix operations, 
21 | # which combined means regularly transposing and reshaping, using built in 
22 | 
23 | 
24 | ### Data setup
25 | np.random.seed(1234)
26 | 
27 | N = 100
28 | x = np.random.normal(size=N)
29 | X = np.array((np.ones(N), (x))).T
30 | # compare to R: X = cbind(1, rnorm(N))
31 | 
32 | # true value of coefficients
33 | beta = np.array((5, .5)).reshape(2,1)
34 | 
35 | # outcome of interest
36 | y = np.dot(X,beta) + np.random.normal(scale=.5, size=N).reshape(N,1)
37 | 
38 | # example scale
39 | scale(y)
40 | 
41 | ### Inspect
42 | X.shape
43 | X[:5, 1]
44 | beta.shape
45 | 
46 | plt.plot(x, y, 'ro')
47 | 
48 | 
49 | ### Regression models
50 | 
51 | ## numpy linalg
52 | np.linalg.lstsq(X, y)[0]
53 | 
54 | ## Normal equations numpy array
55 | # note that Python will finally get a matrix multiplier (@ symbol) in 3.5
56 | xx = np.linalg.inv(np.dot(X.T, X))
57 | xy = np.dot(X.T, y)
58 | 
59 | # alternative
60 | xx = np.linalg.inv(X.T.dot(X))
61 | xy = X.T.dot(y)
62 | 
63 | np.dot(xx, xy)
64 | 
65 | ## Normal equations numpy matrix
66 | # is only 2d but perhaps more intuitive and multiplication is matrix multiplication by default
67 | Xmat = np.matrix(X) 
68 | np.linalg.inv(Xmat.T * Xmat) *  np.dot(Xmat.T, y)
69 | 
70 | ## statsmodels 
71 | res0 = sm.OLS(y, X)
72 | res = sm.OLS(y, X).fit()
73 | 
74 | res.summary()
75 | res.aic
76 | res.fittedvalues
77 | res.rsquared_adj
78 | 
79 | ## using scipy linregress for simple regression
80 | slope, intercept, r_value, p_value, std_err = sp.stats.linregress(x, y[:,0])
81 | [intercept, slope]
82 | 
83 | ## pandas data frames with formula approach
84 | df = pd.DataFrame(X)
85 | df['y'] = y
86 | df.columns = ['Intercept', 'x', 'y']
87 | df.head()
88 | 
89 | res = smf.ols('y ~ x',  data=df).fit()
90 | res.summary()
91 | 
92 | plt.plot(x, y, 'ro',
93 |          x, res.fittedvalues)
94 | 
95 | 


--------------------------------------------------------------------------------
/Other/getRollCall.R:
--------------------------------------------------------------------------------
  1 | ### This function allows one to get multiple rollcall data objects (and in 
  2 | ### parallel) via the readKH function in the pscl package.  See the readKH 
  3 | ### function helpfile, voteview.com, and also the readme in rollcall folder in 
  4 | ### the Datasets repo, where the results of this are available for all 
  5 | ### legislatures. A little messy but works ok.  With 7 threads it took ~90 
  6 | ### seconds to get all data possible, ~2 minutes for csv.  So if
  7 | ### you're not requesting quite a few, you wouldn't need to set cores very high.
  8 | 
  9 | getRollCall = function(congress, HoRSenBoth='Both', matrix=F, cores=2, Dir=NULL, csv=F){
 10 |   # Arguments: 
 11 |   # congress- integer vector
 12 |   # HoRSenBoth- character, branch of Congress; House, Senate, or Both
 13 |   # matrix- logical, if matrix instead of rollcall format is desired. See pscl::convertCodes. See csv arg.
 14 |   # cores- integer, number of cores/threads for parallel processing (Windows)
 15 |   # Dir- character, where to write out RData files of rollcall objects, or csv if matrix and csv=T
 16 |   # csv- logical, write matrix of binary votes to file; only if matrix=T and Dir provided
 17 |   
 18 |   # requires stringr and pscl packages
 19 |   
 20 |   # initial checks
 21 |   if (csv && !matrix) message('matrix must be TRUE for csv files to be written. *csv files will not be written.')
 22 |   if (csv && is.null(Dir)) message('Dir must be specified for csv. *csv files will not be written.')
 23 |   
 24 |   ### create urls
 25 |   # initial check for first 9 since the early files are numbered 01 02 etc.
 26 |   congressforurl = as.character(congress)
 27 |   congressforurl[congress %in% 1:9] = paste0(0, congressforurl[congress %in% 1:9])
 28 |   
 29 |   urls = paste0('ftp://voteview.com/', c('hou', 'sen'), rep(congressforurl, e=2), 'kh.ord')
 30 | 
 31 |   # deal with current congress as it comes from different website; made to work in subsequent years
 32 |   currentYear = as.numeric(substr(Sys.Date(), 1, 4))
 33 |   curfutureCongress = data.frame(Year=2013:currentYear, 
 34 |                                  Congress=rep(113:200, e=2, length=length(2013:currentYear)))
 35 |   currCongress = curfutureCongress[curfutureCongress$Year==currentYear, 'Congress']
 36 |   
 37 |   current = ifelse(currCongress %in% congress, T, F)
 38 |   
 39 |   # relace url for current congress with different web address
 40 |   if (current) {
 41 |     urls[grep(currCongress, urls)] = c(
 42 |              paste0('http://adric.sscnet.ucla.edu/rollcall/static/','H', currCongress,'.ord'),
 43 |              paste0('http://adric.sscnet.ucla.edu/rollcall/static/','S', currCongress,'.ord'))
 44 |   }
 45 |   
 46 |   # reduce as requested
 47 |   if (HoRSenBoth=='House') {
 48 |     urls = urls[grep('hou|H', urls)]
 49 |   } else if (HoRSenBoth=='Senate') {
 50 |     urls = urls[grep('sen|S', urls)]
 51 |   }
 52 |   
 53 |   
 54 |   ### autocreate descriptions for objects and files just for giggles
 55 |   suppressPackageStartupMessages(require(stringr))
 56 |   descripN = as.numeric(str_extract(urls, '[0-9]+|[0-9]+'))
 57 |   descripB = str_extract(urls, 'hou|H|sen|S')
 58 |   
 59 |   toOrd = function(x){
 60 |     lasttwo = x %% 100
 61 |     lastone = x %% 10
 62 |     suffix = sapply(as.character(lastone), switch,
 63 |                     '1' = 'st',
 64 |                     '2' = 'nd',
 65 |                     '3' = 'rd',
 66 |                     'th',
 67 |                     simplify=T)
 68 |     suffix[lasttwo %in% 11:13] = 'th'
 69 |     paste0(x, suffix)
 70 |   }
 71 |   
 72 |   descriptions = paste0(toOrd(descripN), ' U.S. ', ifelse(descripB %in% c('hou','H'), 'House of Representatives', 'Senate'))
 73 |   
 74 |   
 75 |   ### set up parallel
 76 |   library(parallel)
 77 |   cl = makeCluster(cores)
 78 |   clusterEvalQ(cl, library(pscl))
 79 | 
 80 |   suppressPackageStartupMessages(require(pscl))
 81 |   rcList = clusterMap(cl, fun=readKH, urls, desc=descriptions)
 82 |   
 83 |   rcenv = new.env() # so export doesn't look in global
 84 |   
 85 |   ### write out RData files
 86 |   if (!is.null(Dir) & !csv){
 87 |     filelist = paste0(Dir, descriptions, '.RData')
 88 |     descriptionsBrief = paste0(descripB, descripN)
 89 |     
 90 |     clusterExport(cl, c('rcList','filelist', 'descriptionsBrief'), envir = rcenv)
 91 | 
 92 |     parSapply(cl, 1:length(rcList), function(i) {
 93 |       assign(descriptionsBrief[i], rcList[[i]]); save(list=descriptionsBrief[i], file=filelist[i])
 94 |     })
 95 |   }
 96 |   
 97 |   
 98 |   ### convert to binary 
 99 |   if (matrix){
100 |     clusterExport(cl, c('rcList'), envir=rcenv)
101 |     rcList = parLapply(cl, rcList, convertCodes)
102 |     if (csv && !is.null(Dir)){
103 |       filelist = paste0(Dir, descriptions, '.csv')
104 |       clusterMap(cl, write.csv, rcList, filelist)
105 |     } 
106 |   }
107 |   
108 |   on.exit(stopCluster(cl))
109 |   rcList
110 | }
111 | 


--------------------------------------------------------------------------------
/Other/ggtheme.R:
--------------------------------------------------------------------------------
 1 | ### A clean theme that gets rid of unnecessary gridlines and colored backgrounds.
 2 | 
 3 | require(ggplot2)
 4 | ggtheme = theme(legend.background = element_blank(), 
 5 |                 legend.key = element_blank(), 
 6 |                 panel.grid.minor = element_blank(), 
 7 |                 panel.grid.major = element_blank(), 
 8 |                 panel.background = element_blank(), 
 9 |                 panel.border = element_blank(), 
10 |                 strip.background = element_blank(), 
11 |                 plot.background = element_blank())
12 | detach(package:ggplot2)


--------------------------------------------------------------------------------
/Other/shakespeareanInsulter.R:
--------------------------------------------------------------------------------
 1 | #-----------------------------------------------------------------------------------#
 2 | # If you would like to be insulted every time you start up R, feel free to put this # 
 3 | # in your Rprofile.site file. The lists can be found in various places on the web,  #
 4 | # e.g. http://www.pangloss.com/seidel/Shaker/                                       #
 5 | #-----------------------------------------------------------------------------------#
 6 | 
 7 | 
 8 | shakesInsult = function(){
 9 |   w1 = c('artless','bawdy','beslubbering','bootless','churlish','cockered','clouted','craven','currish','dankish','dissembling','droning','errant','fawning','fobbing','froward','frothy','gleeking','goatish','gorbellied','impertinent','infectious','jarring','loggerheaded','lumpish','mammering','mangled','mewling','paunchy','pribbling','puking','puny','qualling','rank','reeky','roguish','ruttish','saucy','spleeny','spongy','surly','tottering','unmuzzled','vain','venomed','villainous','warped','wayward','weedy','yeasty')
10 |   
11 |   w2 = c('base-court','bat-fowling','beef-witted','beetle-headed','boil-brained','clapper-clawed','clay-brained','common-kissing','crook-pated','dismal-dreaming','dizzy-eyed','doghearted','dread-bolted','earth-vexing','elf-skinned','fat-kidneyed','fen-sucked','flap-mouthed','fly-bitten','folly-fallen','fool-born','full-gorged','guts-griping','half-faced','hasty-witted','hedge-born','hell-hated','idle-headed','ill-breeding','ill-nurtured','knotty-pated','milk-livered','motley-minded','onion-eyed','plume-plucked','pottle-deep','pox-marked','reeling-ripe','rough-hewn','rude-growing','rump-fed','shard-borne','sheep-biting','spur-galled','swag-bellied','tardy-gaited','tickle-brained','toad-spotted','unchin-snouted','weather-bitten')
12 |   
13 |   w3 = c('apple-john','baggage','barnacle','bladder','boar-pig','bugbear','bum-bailey','canker-blossom','clack-dish','clotpole','coxcomb','codpiece','death-token','dewberry','flap-dragon','flax-wench','flirt-gill','foot-licker','fustilarian','giglet','gudgeon','haggard','harpy','hedge-pig','horn-beast','hugger-mugger','joithead','lewdster','lout','maggot-pie','malt-worm','mammet','measle','minnow','miscreant','moldwarp','mumble-news','nut-hook','pigeon-egg','pignut','puttock','pumpion','ratsbane','scut','skainsmate','strumpet','varlot','vassal','whey-face','wagtail')
14 |   
15 |   return( paste( 'Thou', sample(w1, 1), sample(w2, 1), sample(w3, 1),collapse=" "))
16 | }
17 | 
18 | cat('Your Shakespearean insult for the day is...', shakesInsult(),"\n")
19 | 
20 | #
21 | #  alternatively, use the cowsay package, to let various animals insult you!
22 | #
23 | require(cowsay)
24 | say(what = shakesInsult(),by="random")
25 | 


--------------------------------------------------------------------------------
/Other/spurriousCorrelationwithRatios.R:
--------------------------------------------------------------------------------
 1 | # a function that will take an input n and generate n x y and z values. The
 2 | # correlation of interest is between x and y, when either or both are a ratio
 3 | # based on z.
 4 | 
 5 | spurcorr = function(n){
 6 |   x = rpois(n, 10)
 7 |   y = rpois(n, 10)
 8 |   z = runif(n, 50, 100)
 9 |   
10 |   cbind(N=n, origCorr=cor(x,y), xzyzCorr=cor(x/z, y/z), xzyCorr=cor(x/z,y), xyzCorr=cor(x,y/z))
11 | }
12 | 
13 | # replicate at various sample sizes
14 | out = replicate(1000, sapply(c(10, 100, 500, 1000, 2500), spurcorr))
15 | 
16 | origcorrs = apply(out, 3, function(mat) mat[2,]) ;rownames(origcorrs) = paste(c(10, 100, 500, 1000, 2500))
17 | spurrcorrsBoth = apply(out, 3, function(mat) mat[3,]);rownames(spurrcorrsBoth) = paste(c(10, 100, 500, 1000, 2500))
18 | spurrcorrsXonly = apply(out, 3, function(mat) mat[4,]);rownames(spurrcorrsXonly) = paste(c(10, 100, 500, 1000, 2500))
19 | spurrcorrsYonly = apply(out, 3, function(mat) mat[5,]);rownames(spurrcorrsYonly) = paste(c(10, 100, 500, 1000, 2500))
20 | 
21 | data.frame(origZero=rowMeans(origcorrs), Xratio=rowMeans(spurrcorrsXonly), 
22 |            Yratio=rowMeans(spurrcorrsYonly), Both=rowMeans(spurrcorrsBoth))
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/Other/xkcdscrape.R:
--------------------------------------------------------------------------------
 1 | #####################################################################################
 2 | ### The following function will go to the xkcd website, scrape the html, look for ###
 3 | ### a particular part based on a div tag, snag it, and write it out to a file for ###
 4 | ### subsequent text analysis.                                                     ###
 5 | ###                                                                               ###
 6 | ### The function requires only one argument, number, indicating the number of     ###
 7 | ### comic strips.                                                                 ###  
 8 | #####################################################################################
 9 | 
10 | ### Inspiration from: 
11 | ### https://github.com/CabbagesAndKings/xkcd-Topics/blob/master/scripts/getTranscripts.sh
12 | 
13 | xkcdscrape = function(number){
14 |   # create url string
15 |   url = paste0("http://xkcd.com/", number,'/') 
16 |   
17 |   # creates a single element list argument of unparsed html
18 |   x = scrape(url, parse=F)
19 |   x = as.character(x[[1]]) # not necessary, but simplifies things a bit
20 |   
21 |   # find the point in the text matching the regex
22 |   transcript = regexpr('<div id="transcript" style="display: none">(.*?)</div>', x)
23 |   
24 |   # writes what is sent to console (via cat) to a xkcd#.txt file
25 |   sink(paste0('xkcdRScrape/xkcd', number, '.txt')) # open connection to a file of the name formed by paste0; create the folder first.
26 |   cat(unlist(regmatches(x, transcript))) # send to the console the point specified by transcript in the html text
27 |   sink()  # close connection
28 | }
29 | 
30 | library(parallel)
31 | cl = makeCluster(3)      # nubmer of cores
32 | clusterEvalQ(cl, library(scrapeR)) # load scrapeR package on the cores
33 | clusterExport(cl, 'xkcdscrape') # export the function to the cores
34 | 
35 | n = 1283  # number of xkcd comics by late Oct 2013
36 | 
37 | # a standard non-parallel way to do it without an explicit loop and more straightforward/easier code
38 | # sapply(1:n, xkcdscrape)  # each number 1:n is fed to the function
39 | 
40 | # the parallized version (timed via system.time function)
41 | system.time({
42 |   parSapply(cl, 1:n, xkcdscrape)
43 |   }) 
44 | 
45 | 
46 | 
47 | ### Example loop for comparison; slow
48 | # Check website for number published
49 | # n = 1283
50 | # library(scrapeR)
51 | # system.time({
52 | # for (i in 1:n){
53 | #   url = paste0("http://xkcd.com/", i,'/')
54 | #   x = scrape(url, parse=F)
55 | #   transcript = regexpr('<div id="transcript" style="display: none">(.*?)</div>', x[[1]])
56 | #   sink(paste0('xkcdRScrape/xkcd', i))
57 | #   cat(regmatches(x[[1]], transcript))
58 | #   sink()
59 | # }
60 | # })


--------------------------------------------------------------------------------
/Other/xkcdscrape.py:
--------------------------------------------------------------------------------
  1 | #####################################################################################
  2 | ### The following function will go to the xkcd website, scrape the html, look for ###
  3 | ### a particular part based on a div tag, snag it, and write it out to a file for ###
  4 | ### subsequent text analysis.                                                     ###
  5 | #####################################################################################
  6 | 
  7 | ### Inspiration from: 
  8 | ### https://github.com/CabbagesAndKings/xkcd-Topics/blob/master/scripts/getTranscripts.sh
  9 | ### See the R version in my repo.  Being new to Python I thought this would be a nice challenge.
 10 | ### I make no claims as to its efficiency.
 11 | 
 12 | ###############
 13 | ### Preface ###
 14 | ###############
 15 | import os
 16 | os.getcwd()
 17 | 
 18 | # change working directory to wherever you want the text files to be stored
 19 | os.chdir('C:/Users/mclark19/Desktop/CSR/Clients/Me/xkcdscrape/')
 20 | 
 21 | import time # for time comparisons if desired
 22 | import urllib2
 23 | import BeautifulSoup  
 24 | 
 25 | Soup = BeautifulSoup.BeautifulSoup 
 26 | 
 27 | ###############
 28 | ### Example ###
 29 | ###############
 30 | ### read in web contents
 31 | url = urllib2.urlopen("http://www.xkcd.com/1/")
 32 | xkcd1 = url.read()
 33 | 
 34 | ### create a beautifulSoup object
 35 | out = Soup(xkcd1)
 36 | 
 37 | ### Inspect in a standard html form
 38 | print(out.prettify())
 39 | 
 40 | ### example of primary process: extract the div with id='transcript'
 41 | out.find('div', {'id' : 'transcript'}) 
 42 | 
 43 | ######################################
 44 | ### Main Process via Standard Loop ###
 45 | ######################################
 46 | 
 47 | ### Preliminaries
 48 | # number of xkcd comics
 49 | n = 1283
 50 | 
 51 | # test n
 52 | # n = 100
 53 | 
 54 | ### set initial time
 55 | t0 = time.time() 
 56 | 
 57 | ### Run the loop over all comics
 58 | for i in xrange(1, n+1):
 59 |     filename = 'xkcdpyScrape/xkcd' +  str(i) + '.txt'
 60 |     if i == 404:    # this deals with an xkcd joke for comic #404
 61 |         out = " "
 62 |     else:
 63 |         # read in the web contents
 64 |         url = urllib2.urlopen('http://www.xkcd.com/' + str(i))
 65 |         xkcd = url.read()
 66 |         
 67 |         # create a soup object
 68 |         xkcdsoup = Soup(xkcd)
 69 |         
 70 |         # scrape the div with id = 'transcript'
 71 |         out = xkcdsoup.find('div', {'id' : 'transcript'})
 72 |     
 73 |     # Write out the file
 74 |     f = open(filename, 'w+')
 75 |     f.write(str(out))
 76 |     f.close()
 77 | 
 78 | time.time() - t0  # roughly 20 seconds for 100, about 4.5 minutes for all
 79 | 
 80 | ####################
 81 | ### Parallelized ###
 82 | ####################
 83 | 
 84 | ##################################################################################
 85 | ### Create scrapexkcd function that will do what's in the loop above given a   ###
 86 | ### start and end point. See the loop above for details. Arguments include the ###
 87 | ### start and end number specific to the comics one desires to download.       ###
 88 | ##################################################################################
 89 | 
 90 | def scrapexkcd(start, end):
 91 |     import urllib2
 92 |     import BeautifulSoup 
 93 |     for i in xrange(start, end):
 94 |         filename = 'xkcdpyScrape/parallel/xkcd' +  str(i) + '.txt'
 95 |         if i == 404:
 96 |             out = " "
 97 |         else:
 98 |             url = urllib2.urlopen('http://www.xkcd.com/' + str(i))
 99 |             xkcd = url.read()
100 |             xkcdsoup = BeautifulSoup.BeautifulSoup(xkcd)
101 |             out = xkcdsoup.find('div', {'id' : 'transcript'})
102 |         f = open(filename, 'w+')
103 |         f.write(str(out))
104 |         f.close()
105 | 
106 | start = 1
107 | end = 1283 + 1 
108 | 
109 | # test end
110 | end = 100
111 | 
112 | import pp # for parallelization
113 | 
114 | # set number of workers
115 | ncpus = 3
116 | 
117 | # Create jobserver
118 | job_server = pp.Server(ncpus=ncpus)
119 | 
120 | # Divide the task into subtasks
121 | parts = 3
122 | step = (end - start) / parts + 1
123 | 
124 | jobs = []
125 | 
126 | t0 = time.time()
127 | 
128 | for index in xrange(parts):
129 |     # create start and endpoints for the function
130 |     starti = start + index*step
131 |     endi = min(start + (index+1)*step, end)
132 |     # Submit a job which will scrape the site
133 |     job_server.submit(scrapexkcd, (starti, endi))
134 | 
135 | #wait for jobs in all groups to finish 
136 | job_server.wait()
137 | 
138 | time.time() - t0  # time since t0
139 | 
140 | # print stats
141 | job_server.print_stats()


--------------------------------------------------------------------------------
/SC and TR/MLcode.R:
--------------------------------------------------------------------------------
  1 | #-------------------------------------------------------------------------------#
  2 | # See http://www.nd.edu/~mclark19/learn/ML.pdf for the handout on machine       #  
  3 | # learning this code regards.                                                   # 
  4 | #-------------------------------------------------------------------------------#
  5 | 
  6 | sqerrloss = function(beta, X, y){
  7 |   mu = X%*%beta
  8 |   sum((y-mu)^2)
  9 | }
 10 | 
 11 | set.seed(123)
 12 | X = cbind(1, rnorm(100), rnorm(100))
 13 | y = rowSums(X[,-1] + rnorm(100))
 14 | out1 = optim(par=c(0,0,0), fn=sqerrloss, X=X, y=y)
 15 | out2 = lm(y ~ X[,2] + X[,3])  # check with lm 
 16 | rbind(c(out1$par, out1$value), c(coef(out2),sum(resid(out2)^2)) )
 17 | 
 18 | 
 19 | 
 20 | 
 21 | sqerrloss_reg = function(beta, X, y, lambda=.1){
 22 |   mu = X%*%beta
 23 |   sum((y-mu)^2) + lambda*sum(abs(beta[-1]))
 24 | }
 25 | 
 26 | out3 = optim(par=c(0,0,0), fn=sqerrloss_reg, X=X, y=y)
 27 | rbind(c(out1$par, out1$value), 
 28 |       c(coef(out2),sum(resid(out2)^2)), 
 29 |       c(out3$par, out3$value) )
 30 | 
 31 | 
 32 | 
 33 | 
 34 | 
 35 | 
 36 | wine = read.csv('http://www.nd.edu/~mclark19/learn/data/goodwine.csv')
 37 | summary(wine)
 38 | 
 39 | 
 40 | 
 41 | library(doSNOW)
 42 | registerDoSNOW(makeCluster(11, type = "SOCK"))
 43 | 
 44 | 
 45 | 
 46 | library(corrplot)
 47 | corrplot(cor(wine[,-c(13,15)]), method="number", tl.cex=.5)
 48 | 
 49 | 
 50 | 
 51 | library(caret)
 52 | set.seed(1234) #so that the indices will be the same when re-run
 53 | trainIndices = createDataPartition(wine$good, p=.8, list=F)
 54 | wanted = !colnames(wine) %in%  c("free.sulfur.dioxide", "density", "quality", "color", "white" )
 55 | wine_train = wine[trainIndices, wanted]  #remove quality and color, as well as density and others
 56 | 
 57 | wine_test = wine[-trainIndices, wanted]
 58 | # prep_test = preProcess(wine_test[,-10], method="range")
 59 | # wine_test = data.frame(predict(prep_test, wine_test[,-10]), good=wine_test[ ,10])
 60 | 
 61 | 
 62 | 
 63 | wine_trainplot = predict(preProcess(wine_train[,-10], method="range"), 
 64 |                          wine_train[,-10])
 65 | featurePlot(wine_trainplot, wine_train$good, "box")
 66 | 
 67 | 
 68 | 
 69 | set.seed(1234)
 70 | cv_opts = trainControl(method="cv", number=10)
 71 | knn_opts = data.frame(.k=c(seq(3, 11, 2), 25, 51, 101)) #odd to avoid ties
 72 | results_knn = train(good~., data=wine_train, method="knn", 
 73 |                     preProcess="range", trControl=cv_opts, 
 74 |                     tuneGrid = knn_opts)
 75 | 
 76 | results_knn
 77 | 
 78 | 
 79 | 
 80 | preds_knn = predict(results_knn, wine_test[,-10])
 81 | confusionMatrix(preds_knn, wine_test[,10], positive='Good')
 82 | conf_knn = confusionMatrix(preds_knn, wine_test[,10], positive='Good') #create an object to use in Sexpr
 83 | 
 84 | 
 85 | 
 86 | dotPlot(varImp(results_knn))
 87 | 
 88 | 
 89 | 
 90 | set.seed(1234)
 91 | results_nnet = train(good~., data=wine_train, method="avNNet", 
 92 |                      trControl=cv_opts, preProcess="range", 
 93 |                      tuneLength=5, trace=F, maxit=1000)
 94 | results_nnet
 95 | 
 96 | 
 97 | 
 98 | preds_nnet = predict(results_nnet, wine_test[,-10])
 99 | confusionMatrix(preds_nnet, wine_test[,10], positive='Good')
100 | conf_nnet = confusionMatrix(preds_nnet, wine_test[,10], positive='Good') #create an object to use in Sexpr
101 | 
102 | 
103 | 
104 | set.seed(1234)
105 | rf_opts = data.frame(.mtry=c(2:6))
106 | results_rf = train(good~., data=wine_train, method="rf", 
107 |                    preProcess='range',trControl=cv_opts, tuneGrid=rf_opts, 
108 |                    n.tree=1000)
109 | results_rf
110 | 
111 | 
112 | 
113 | preds_rf = predict(results_rf, wine_test[,-10])
114 | confusionMatrix(preds_rf, wine_test[,10], positive='Good')
115 | conf_rf = confusionMatrix(preds_rf, wine_test[,10], positive='Good') #create an object to use in Sexpr
116 | 
117 | 
118 | 
119 | set.seed(1234)
120 | results_svm = train(good~., data=wine_train, method="svmLinear", 
121 |                     preProcess="range", trControl=cv_opts, tuneLength=5)
122 | results_svm
123 | 
124 | 
125 | 
126 | preds_svm = predict(results_svm, wine_test[,-10])
127 | confusionMatrix(preds_svm, wine_test[,10], positive='Good')
128 | conf_svm = confusionMatrix(preds_svm, wine_test[,10], positive='Good') #create an object to use in Sexpr
129 | 
130 | 
131 | stopCluster()


--------------------------------------------------------------------------------
/SC and TR/mixedModelML/mixedModelML.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModelML/mixedModelML.pdf


--------------------------------------------------------------------------------
/SC and TR/mixedModelML/mixedModelML_files/figure-html/diag.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModelML/mixedModelML_files/figure-html/diag.html


--------------------------------------------------------------------------------
/SC and TR/mixedModelML/mixedModelML_files/figure-html/fitCSgam-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModelML/mixedModelML_files/figure-html/fitCSgam-1.png


--------------------------------------------------------------------------------
/SC and TR/mixedModelML/mixedModelML_files/figure-html/gamSleepStudy-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModelML/mixedModelML_files/figure-html/gamSleepStudy-1.png


--------------------------------------------------------------------------------
/SC and TR/mixedModelML/mixedModelML_files/figure-html/gammSleepStudy-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModelML/mixedModelML_files/figure-html/gammSleepStudy-1.png


--------------------------------------------------------------------------------
/SC and TR/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/SC and TR/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/SC and TR/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-3-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModelML/mixedModelML_files/figure-html/unnamed-chunk-3-2.png


--------------------------------------------------------------------------------
/SC and TR/mixedModelML/mixedModelML_files/figure-latex/fitCSgam-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModelML/mixedModelML_files/figure-latex/fitCSgam-1.pdf


--------------------------------------------------------------------------------
/SC and TR/mixedModelML/mixedModelML_files/figure-latex/gammSleepStudy-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModelML/mixedModelML_files/figure-latex/gammSleepStudy-1.pdf


--------------------------------------------------------------------------------
/SC and TR/mixedModels/anovamixed.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/anovamixed.pdf


--------------------------------------------------------------------------------
/SC and TR/mixedModels/growth_vs_mixed_files/figure-html/lavaanmod-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/growth_vs_mixed_files/figure-html/lavaanmod-1.png


--------------------------------------------------------------------------------
/SC and TR/mixedModels/growth_vs_mixed_files/figure-html/randomIntsSlopesMods-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/growth_vs_mixed_files/figure-html/randomIntsSlopesMods-1.png


--------------------------------------------------------------------------------
/SC and TR/mixedModels/growth_vs_mixed_files/figure-html/visualizeTrends-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/growth_vs_mixed_files/figure-html/visualizeTrends-1.png


--------------------------------------------------------------------------------
/SC and TR/mixedModels/growth_vs_mixed_files/growthvsMixed_EstResults.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/growth_vs_mixed_files/growthvsMixed_EstResults.RData


--------------------------------------------------------------------------------
/SC and TR/mixedModels/mixedModels.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/mixedModels.pdf


--------------------------------------------------------------------------------
/SC and TR/mixedModels/mixedModels_files/figure-html/sleepModFits-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/mixedModels_files/figure-html/sleepModFits-1.png


--------------------------------------------------------------------------------
/SC and TR/mixedModels/mixedModels_files/figure-html/sleepModFitsReduced-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/mixedModels_files/figure-html/sleepModFitsReduced-1.png


--------------------------------------------------------------------------------
/SC and TR/mixedModels/mixedModels_files/figure-html/sleepstudyPlot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/mixedModels_files/figure-html/sleepstudyPlot-1.png


--------------------------------------------------------------------------------
/SC and TR/mixedModels/mixedModels_files/figure-latex/sleepModFits-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/mixedModels_files/figure-latex/sleepModFits-1.pdf


--------------------------------------------------------------------------------
/SC and TR/mixedModels/mixedModels_files/figure-latex/sleepModFitsReduced-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/mixedModels_files/figure-latex/sleepModFitsReduced-1.pdf


--------------------------------------------------------------------------------
/SC and TR/mixedModels/mixedModels_files/figure-latex/sleepstudyPlot-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/mixedModels_files/figure-latex/sleepstudyPlot-1.pdf


--------------------------------------------------------------------------------
/SC and TR/mixedModels/mixedModels_files/figure-markdown_github/sleepModFits-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/mixedModels_files/figure-markdown_github/sleepModFits-1.png


--------------------------------------------------------------------------------
/SC and TR/mixedModels/mixedModels_files/figure-markdown_github/sleepModFitsReduced-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/mixedModels_files/figure-markdown_github/sleepModFitsReduced-1.png


--------------------------------------------------------------------------------
/SC and TR/mixedModels/mixedModels_files/figure-markdown_github/sleepstudyPlot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/mixedModels_files/figure-markdown_github/sleepstudyPlot-1.png


--------------------------------------------------------------------------------
/SC and TR/mixedModels/pearlpic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-clark/Miscellaneous-R-Code/4c0fa04adfbd97b3ca62a49ca69a39ff31c6f61f/SC and TR/mixedModels/pearlpic.png


--------------------------------------------------------------------------------
/other.css:
--------------------------------------------------------------------------------
 1 | .emph, em {
 2 |   color: #E32D00 ;  /*#ff5500 #D14300*/
 3 |   font-style: normal;
 4 |   font-weight: 450;
 5 | }
 6 | 
 7 | strong {
 8 |   color: #404040;  
 9 | }
10 | 
11 | 
12 | .pack {
13 |   color: #1f65b7; /* #990071 #AC9CFF #e41a1c*/
14 |   font-weight: 400;
15 | }
16 | 
17 | .func {
18 |   color: #007020;   /*#007199 #00CBB6; #984ea3; can just use `` instead*/
19 |   font-weight: 400;
20 | }
21 | 
22 | .objclass {
23 |   color:  #947100;  /*#AAB400 #4daf4a; #FFC5D0*/
24 |   font-weight: 400;
25 | }
26 | 
27 | a {
28 |   color: #1e90ff;  /*dodgerblue*/
29 | }
30 | 
31 | pre {
32 |   box-sizing: border-box;
33 |   left: 0; /* This changes where the code chunk box actually starts */ 
34 |   /* padding: 10px 0 10px 60px; /* Change the last value here to move the text left or right */
35 |   position: relative;
36 |   width: 100%; /* This changes where the code chunk box ends on the right side */
37 |   font-size: 75%; /*changes output size; and comments*/
38 |   /*border: 10px solid #ff5500;*/  /*code block and results border*/
39 |   /*background-color:#ff5500;*/   /* results background*/
40 |   
41 | }
42 | 
43 | #TOC{
44 |   font-size: 75%;
45 | }
46 |   
47 | }
48 | 
49 | 


--------------------------------------------------------------------------------