├── .Rbuildignore
├── .github
    ├── .gitignore
    └── workflows
    │   └── R-CMD-check.yaml
├── .gitignore
├── DESCRIPTION
├── MNLpred.Rproj
├── NAMESPACE
├── NEWS.md
├── R
    ├── gles-data.R
    ├── mnl_fd2_ova.R
    ├── mnl_fd_ova.R
    └── mnl_pred_ova.R
├── README.Rmd
├── README.md
├── cran-comments.md
├── data
    └── gles.RData
├── inst
    └── CITATION
├── man
    ├── figures
    │   ├── README-first_differences_plot-1.png
    │   ├── README-prediction_plot1-1.png
    │   ├── README-prediction_plot2-1.png
    │   └── README-static_fd_plot-1.png
    ├── gles.Rd
    ├── mnl_fd2_ova.Rd
    ├── mnl_fd_ova.Rd
    └── mnl_pred_ova.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   └── test_inputvariants.R
└── vignettes
    ├── .gitignore
    ├── OVA_Predictions_For_MNL.Rmd
    └── bibliography.bib


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^MNLpred\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^README\.Rmd$
 4 | ^cran-comments\.md$
 5 | feedback
 6 | issues
 7 | README_files
 8 | PDF
 9 | ^CRAN-RELEASE$
10 | release
11 | ^\.github$
12 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
 2 | # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |       - master
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |       - master
12 | 
13 | name: R-CMD-check
14 | 
15 | jobs:
16 |   R-CMD-check:
17 |     runs-on: ${{ matrix.config.os }}
18 | 
19 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
20 | 
21 |     strategy:
22 |       fail-fast: false
23 |       matrix:
24 |         config:
25 |           - {os: windows-latest, r: 'release'}
26 |           - {os: macOS-latest, r: 'release'}
27 |           - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
28 |           - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
29 | 
30 |     env:
31 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
32 |       RSPM: ${{ matrix.config.rspm }}
33 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
34 | 
35 |     steps:
36 |       - uses: actions/checkout@v2
37 | 
38 |       - uses: r-lib/actions/setup-r@v1
39 |         with:
40 |           r-version: ${{ matrix.config.r }}
41 | 
42 |       - uses: r-lib/actions/setup-pandoc@v1
43 | 
44 |       - name: Query dependencies
45 |         run: |
46 |           install.packages('remotes')
47 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
48 |           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
49 |         shell: Rscript {0}
50 | 
51 |       - name: Cache R packages
52 |         if: runner.os != 'Windows'
53 |         uses: actions/cache@v2
54 |         with:
55 |           path: ${{ env.R_LIBS_USER }}
56 |           key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
57 |           restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
58 | 
59 |       - name: Install system dependencies
60 |         if: runner.os == 'Linux'
61 |         run: |
62 |           while read -r cmd
63 |           do
64 |             eval sudo $cmd
65 |           done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
66 | 
67 |       - name: Install dependencies
68 |         run: |
69 |           remotes::install_deps(dependencies = TRUE)
70 |           remotes::install_cran("rcmdcheck")
71 |         shell: Rscript {0}
72 | 
73 |       - name: Check
74 |         env:
75 |           _R_CHECK_CRAN_INCOMING_REMOTE_: false
76 |         run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
77 |         shell: Rscript {0}
78 | 
79 |       - name: Upload check results
80 |         if: failure()
81 |         uses: actions/upload-artifact@main
82 |         with:
83 |           name: ${{ runner.os }}-r${{ matrix.config.r }}-results
84 |           path: check
85 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | inst/doc
 3 | feedback
 4 | release
 5 | 
 6 | issues/
 7 | 
 8 | .Rhistory
 9 | 
10 | PDF
11 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: MNLpred
 2 | Title: Simulated Predicted Probabilities for Multinomial Logit Models
 3 | Version: 0.0.8
 4 | Authors@R: 
 5 |     person(given = "Manuel",
 6 |            family = "Neumann",
 7 |            role = c("aut", "cre"),
 8 |            email = "manuel.neumann@mzes.uni-mannheim.de",
 9 |            comment = c(ORCID = "0000-0002-7953-3939"))
10 | Depends: R (>= 3.5.0)
11 | Description: Functions to easily return simulated predicted probabilities and
12 |     first differences for multinomial logit models. It takes a specified 
13 |     scenario and a multinomial model to predict probabilities with a set of
14 |     coefficients, drawn from a simulated sampling distribution. The simulated 
15 |     predictions allow for meaningful plots with means and confidence intervals.
16 |     The methodological approach is based on the principles laid out by King,
17 |     Tomz, and Wittenberg (2000) <doi:10.2307/2669316> and Hanmer and Ozan Kalkan 
18 |     (2016) <doi:10.1111/j.1540-5907.2012.00602.x>.
19 | License: GPL-3
20 | Encoding: UTF-8
21 | LazyData: true
22 | RoxygenNote: 7.1.1
23 | Suggests: 
24 |     knitr,
25 |     rmarkdown,
26 |     testthat,
27 |     nnet,
28 |     magrittr,
29 |     ggplot2,
30 |     scales
31 | VignetteBuilder: knitr
32 | Imports:
33 |     MASS,
34 |     stats
35 | 


--------------------------------------------------------------------------------
/MNLpred.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageCheckArgs: --as-cran
22 | PackageRoxygenize: rd,collate,namespace,vignette
23 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(mnl_fd2_ova)
 4 | export(mnl_fd_ova)
 5 | export(mnl_pred_ova)
 6 | importFrom(MASS,mvrnorm)
 7 | importFrom(stats,coef)
 8 | importFrom(stats,na.omit)
 9 | importFrom(stats,quantile)
10 | importFrom(utils,setTxtProgressBar)
11 | importFrom(utils,txtProgressBar)
12 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # MNLpred 0.0.8
 2 |   
 3 |   - Skipping a package test that causes issues on CRAN.
 4 | 
 5 | # MNLpred 0.0.7
 6 | 
 7 |   - Fixing an erroneously triggered error message that did not correctly
 8 |     evaluate the class of the IV when only one IV is supplied in the model.
 9 | 
10 | # MNLpred 0.0.6
11 | 
12 |   - Fixes a bug so that the function also works with just one IV.
13 | 
14 | # MNLpred 0.0.5
15 |   
16 |   - Arguments 'xvari', 'scenname', 'scenvalue', and 'scenvalues' are deprecated 
17 |   in favor of 'x', 'z', 'z_value', and 'z_values' respectively.
18 |   - New error messages.
19 |   - Major speed enhancement.
20 | 
21 | # MNLpred 0.0.4
22 | 
23 |   - Error message for factor variables in data.
24 | 
25 | # MNLpred 0.0.3
26 | 
27 |   - Package includes sample data for examples, vignettes, etc.
28 |   - Functions return more descriptive error messages.
29 |   - Functions return progress bars.
30 | 
31 | 
32 | # MNLpred 0.0.2
33 | Package continues to work with the new major tibble release.
34 | 
35 | # MNLpred 0.0.1
36 | 


--------------------------------------------------------------------------------
/R/gles-data.R:
--------------------------------------------------------------------------------
 1 | #' German Longitudinal Election Study
 2 | #'
 3 | #' A sample of 1,000 respondents in the Rolling Cross Sectional
 4 | #' study in the German Longitudinal Election Study in 2017.
 5 | #'
 6 | #' @docType data
 7 | #'
 8 | #' @usage data(gles)
 9 | #'
10 | #' @format An data frame with 1,000 observations and 6 variables:
11 | #' \describe{
12 | #'   \item{vote}{Voting decision for party}
13 | #'   \item{egoposition_immigration}{Ego-position toward immigration (0 = very open to 10 = very restrictive )}
14 | #'   \item{ostwest}{Dummy for respondents from Eastern Germany (= 1)}
15 | #'   \item{political_interest}{Measurement for political interst (0 = low, 4 = high)}
16 | #'   \item{income}{Self-reported income satisfaction (0 = low, 4 = high)}
17 | #'   \item{gender}{Self-reported gender (binary coding with 1 = female)}
18 | #' }
19 | #'
20 | #'
21 | #' @keywords datasets
22 | #'
23 | #' @references Roßteutscher, Sigrid et al. 2019.
24 | #' “Rolling Cross-Section-Wahlkampfstudie mit Nachwahl-Panelwelle (GLES 2017).”
25 | #' ZA6803 Datenfile Version 4.0.1.
26 | #' (\href{https://www.doi.org/10.4232/1.13213}{GESIS Datenarchiv}).
27 | #'
28 | #' @source \href{https://www.doi.org/10.4232/1.13213}{GESIS Datenarchiv}
29 | #'
30 | #' @examples
31 | #' data(gles)
32 | #' table(gles$vote)
33 | "gles"
34 | 


--------------------------------------------------------------------------------
/R/mnl_fd2_ova.R:
--------------------------------------------------------------------------------
  1 | #' Multinomial First Differences Predictions For Two Values (Observed Value Approach)
  2 | #'
  3 | #' @param model the multinomial model, from a \code{\link{multinom}}()-function call (see the \code{\link{nnet}} package)
  4 | #' @param data the data with which the model was estimated
  5 | #' @param x the name of the variable that should be varied
  6 | #' @param value1 first value for the difference
  7 | #' @param value2 second value for the difference
  8 | #' @param xvari former argument for \code{x} (deprecated).
  9 | #' @param nsim numbers of simulations
 10 | #' @param seed set a seed for replication purposes.
 11 | #' @param probs a vector with two numbers, defining the significance levels. Default to 5\% significance level: \code{c(0.025, 0.975)}
 12 | #'
 13 | #' @return The function returns a list with several elements. Most importantly the list includes the simulated draws `S`, the simulated predictions `P`, the first differences of the predictions `P_fd`, a data set for plotting `plotdata` the predicted probabilities, and one for the first differences `plotdata_fd`.
 14 | #' @export
 15 | #'
 16 | #' @examples
 17 | #' library(nnet)
 18 | #' library(MASS)
 19 | #'
 20 | #' dataset <- data.frame(y = c(rep("a", 10), rep("b", 10), rep("c", 10)),
 21 | #'                       x1 = rnorm(30),
 22 | #'                       x2 = rnorm(30, mean = 1),
 23 | #'                       x3 = sample(1:10, 30, replace = TRUE))
 24 | #'
 25 | #' mod <- multinom(y ~ x1 + x2 + x3, data = dataset, Hess = TRUE)
 26 | #'
 27 | #' fdi1 <- mnl_fd2_ova(model = mod, data = dataset,
 28 | #'                     x = "x1",
 29 | #'                     value1 = min(dataset$x1),
 30 | #'                     value2 = max(dataset$x1),
 31 | #'                     nsim = 10)
 32 | #'
 33 | #'
 34 | #'
 35 | #' @importFrom stats coef na.omit quantile
 36 | #' @importFrom utils setTxtProgressBar txtProgressBar
 37 | #' @importFrom MASS mvrnorm
 38 | 
 39 | 
 40 | mnl_fd2_ova <- function(model,
 41 |                         data,
 42 |                         x,
 43 |                         value1,
 44 |                         value2,
 45 |                         xvari,
 46 |                         nsim = 1000,
 47 |                         seed = "random",
 48 |                         probs = c(0.025, 0.975)){
 49 | 
 50 |   # Create list that is returned in the end.
 51 |   output <- list()
 52 | 
 53 |   # Warnings for deprecated arguments
 54 |   if (!missing(xvari)) {
 55 |     warning("The argument 'xvari' is deprecated; please use 'x' instead.\n\n",
 56 |             call. = FALSE)
 57 |     x <- xvari
 58 |   }
 59 | 
 60 |   # Errors:
 61 |   if (is.null(model) == TRUE) {
 62 |     stop("Please supply a model")
 63 |   }
 64 | 
 65 |   if (sum(grepl("multinom", model$call)) == 0) {
 66 |     stop("Please supply a multinom()-model")
 67 |   }
 68 | 
 69 |   if (is.null(data) == TRUE) {
 70 |     stop("Please supply a data set")
 71 |   }
 72 | 
 73 |   if (is.null(x) == TRUE | is.character(x) == FALSE) {
 74 |     stop("Please supply a character of your x-variable of interest")
 75 |   }
 76 | 
 77 |   if (is.null(value1) == TRUE | is.null(value2) == TRUE) {
 78 |     stop("Please supply values to compute differences")
 79 |   }
 80 | 
 81 |   if (is.null(model$Hessian) == TRUE) {
 82 |     stop("There is no Hessian matrix. Please specify Hess = TRUE in your multinom() call.")
 83 |   }
 84 | 
 85 |   # Names of variables in model (without the "list" character in the vector)
 86 |   variables <- as.character(attr(model$terms, "variables"))[-1]
 87 | 
 88 |   if (!(x %in% variables) == TRUE){
 89 |     stop("x-variable is not an independent variable in the model. There might be a typo.")
 90 |   }
 91 | 
 92 |   # > Handeling the IVs --------------------------------------------------------
 93 |   # Name of independent variables
 94 |   iv <- variables[2:length(variables)]
 95 |   output[["IV"]] <- iv
 96 | 
 97 |   # Variables have to be numeric
 98 |   if (length(iv) > 1) {
 99 |     if (sum(apply(data[, iv], 2, class) %in% c("numeric", "integer")) < ncol(data[, iv])) {
100 |       stop("Please supply data that consists of numeric values. The package can not handle factor or character variables, yet. For workarounds, please take a look at the github issues (https://github.com/ManuelNeumann/MNLpred/issues/1). The problem will hopefully be fixed with the 0.1.0 release.")
101 |     }
102 |   } else {
103 |     if (class(eval(parse(text = paste0("data$", iv)))) %in% c("numeric", "integer") == FALSE) {
104 |       stop("Please supply data that consists of numeric values. The package can not handle factor or character variables, yet. For workarounds, please take a look at the github issues (https://github.com/ManuelNeumann/MNLpred/issues/1). The problem will hopefully be fixed with the 0.1.0 release.")
105 |     }
106 |   }
107 | 
108 |   # > Handeling the DVs --------------------------------------------------------
109 |   # Name of dependent variable
110 |   dv <- variables[1]
111 |   output[["DV"]] <- dv
112 | 
113 | 
114 |   # > Full observations (listwise deletion) --------------------------------------
115 |   data_redux <- na.omit(data[, c(dv, iv)])
116 | 
117 |   # Number of full observations
118 |   obs <- nrow(data_redux)
119 |   output[["Observations"]] <- obs
120 | 
121 |   # > Working with the model ---------------------------------------------------
122 |   # Get matrix of coefficients out of the model
123 |   coefmatrix <- coef(model)
124 | 
125 |   # Number of coefficients
126 |   ncoef <- ncol(coefmatrix)
127 | 
128 |   # Model coefficients as a vector
129 |   mu <- as.vector(t(coef(model)))
130 | 
131 |   # Variance-covariance matrix of estimates
132 |   varcov <- solve(model$Hessian)
133 | 
134 |   # Set seed if needed:
135 |   if (seed != "random") {
136 |     set.seed(seed = seed)
137 |   }
138 | 
139 |   # Simulate a sampling distribution
140 |   S <- mvrnorm(nsim, mu, varcov)
141 |   output[["S"]] <- S
142 | 
143 |   # Artificial variation ov independent variable of interest
144 |   variation <- c(value1, value2)
145 | 
146 |   output[["ScenarioValues"]] <- variation
147 | 
148 |   nseq <- length(variation)
149 | 
150 |   # Number of full observations
151 |   obs <- nrow(data_redux)
152 |   output[["Observations"]] <- obs
153 | 
154 |   # Choice categories of the dependent variable
155 |   categories <- sort(unique(eval(parse(text = paste0("data$", dv)))))
156 |   J <- length(categories)
157 |   output[["ChoiceCategories"]] <- categories
158 |   output[["nChoices"]] <- J
159 | 
160 |   # Numbers of interactions
161 |   ninteraction <- sum(grepl(":", model$coefnames))
162 | 
163 |   # Matrix of observations
164 |   X <- matrix(NA, ncol = ncoef, nrow = obs)
165 |   colnames(X) <- model$coefnames
166 |   # 1 for the Intercept
167 |   X[, 1] <- 1
168 |   # Values of the independent variables
169 |   X[, 2:(length(iv)+1)] <- as.matrix(data_redux[, iv])
170 | 
171 | 
172 |   # Prepare array to fill in the matrix with the observed values
173 |   ovacases <- array(NA, c(dim(X), nseq))
174 |   # Fill in the matrices:
175 |   ovacases[,,] <- X
176 | 
177 |   # Select the position of the variable which should vary:
178 |   if (is.null(x) == FALSE) {
179 |     varidim <- which(colnames(X) == x)
180 |   }
181 | 
182 |   # Artificially alter the variable in each dimension according to
183 |   # the preferred sequence:
184 |   if (is.null(x) == FALSE) {
185 |     for (i in 1:nseq) {
186 |       ovacases[, varidim, i] <- variation[i]
187 |     }
188 |   }
189 | 
190 | 
191 |   # Compute interactions:
192 |   if (ninteraction != 0) {
193 | 
194 |     # Get position of interaction terms
195 |     interactionterms <- which(grepl(":", model$coefnames) == TRUE)
196 | 
197 |     # Compute the terms:
198 |     for (i in c(interactionterms)) {
199 |       # First variable name of the interaction:
200 |       firstint <- gsub(":.*", "", model$coefnames[i])
201 |       # Second variable name of the interaction:
202 |       secondint <- gsub(".*:", "", model$coefnames[i])
203 | 
204 |       # Get position in matrix:
205 |       intdim1 <- which(colnames(X) == firstint)
206 |       intdim2 <- which(colnames(X) == secondint)
207 | 
208 |       # Compute interaction term:
209 |       for(j in 1:nseq) {
210 |         ovacases[, i, j] <- ovacases[, intdim1, j]*ovacases[, intdim2, j]
211 |       }
212 |     }
213 |   }
214 | 
215 |   # Prepare array of observed values:
216 |   ovaV <- array(NA, c(obs, nsim, nseq, J))
217 | 
218 |   # Add progress bar
219 |   pb_multiplication <- txtProgressBar(min = 0, max = nseq, initial = 0)
220 | 
221 |   # Loop over all scenarios
222 |   cat("Multiplying values with simulated estimates:\n")
223 | 
224 |   for(i in 1:nseq){
225 |     ovaV[, , i, 1] <- apply(matrix(0,
226 |                                    nrow = nsim,
227 |                                    ncol = ncol(X)), 1, function(s) ovacases[, , i] %*% s)
228 |     # ^ This will be zero because of the baseline category  ^
229 | 
230 |     # For each choice, the cases will now be multiplied with the simulated estimates
231 |     for (k in 2:J) {
232 |       coefstart <- (k-2)*ncoef+1
233 |       coefend <- (k-1)*ncoef
234 |       element <- parse(text = paste0("ovaV[,, i,", k, "] <- apply(S[, ",
235 |                                      coefstart, ":", coefend,
236 |                                      "], 1, function(s) ovacases[,, i] %*% s)"))
237 |       eval(element)
238 |     }
239 | 
240 |     # Progress bar:
241 |     setTxtProgressBar(pb_multiplication, i)
242 |   }
243 | 
244 | 
245 | 
246 |   # Multinomial link function:
247 | 
248 |   pb_link <- txtProgressBar(min = 0, max = nseq, initial = 0)
249 |   cat("\nApplying link function:\n")
250 | 
251 |   # 1. Part: Sum over cases
252 |   Sexp <- rowSums(exp(ovaV), dims = 3L)
253 | 
254 |   # Create P (array with predictions)
255 |   P <- array(NA, c(nsim, J, nseq))
256 | 
257 |   # 2. Part: take the exponent and divide through the sum of all (Sexp)
258 |   for (l in 1:nseq) {
259 |     for (m in 1:J) {
260 |       P[, m, l] <- colMeans(exp(ovaV[, , l, m]) / Sexp[, , l])
261 |       if (sum(is.na(P[, m, l])) != 0) {
262 |         stop(
263 |           "Some of the log-odds are very large and the exponent cannot be computed. Please check your model specification for any problems, such as perfectly separated variables."
264 |         )
265 |       }
266 |     }
267 | 
268 |     setTxtProgressBar(pb_link, l)
269 |   }
270 | 
271 |   output[["P"]] <- P
272 | 
273 |   # Aggregate the simulations
274 |   # Create tibble for plot
275 |   # plotdat <- tibble::tibble(iv = rep(variation, J),
276 |   #                           categories = rep(categories, each = length(variation)),
277 |   #                           mean = NA,
278 |   #                           lower = NA,
279 |   #                           upper = NA)
280 |   plotdat <- data.frame(iv = rep(variation, J),
281 |                         categories = rep(categories, each = length(variation)),
282 |                         mean = NA,
283 |                         lower = NA,
284 |                         upper = NA)
285 | 
286 | 
287 | 
288 |   # Aggregate
289 | 
290 |   start <- 1
291 | 
292 |   for (i in 1:J) {
293 |     end <- i*length(variation)
294 |     plotdat[c(start:end), "mean"] <- colMeans(P[, i,])
295 |     plotdat[c(start:end), "lower"] <- apply(P[, i,], 2, quantile, probs = probs[1])
296 |     plotdat[c(start:end), "upper"] <- apply(P[, i,], 2, quantile, probs = probs[2])
297 |     start <- end+1
298 |   }
299 | 
300 |   # Rename the variables in the plot data
301 |   colnames(plotdat)[1:2] <- c(x, dv)
302 | 
303 | 
304 |   # Put the data in the output
305 |   output[["plotdata"]] <- plotdat
306 | 
307 |   # First differences
308 |   P_fd <- array(NA, dim = c(nsim, J))
309 | 
310 |   for (i in 1:J) {
311 |     P_fd[, i] <- P[, i, 2] - P[, i, 1]
312 |   }
313 | 
314 |   output[["P_fd"]] <- P_fd
315 | 
316 |   # Plotdata
317 |   plotdat_fd <- data.frame(categories = categories,
318 |                            mean = NA,
319 |                            lower = NA,
320 |                            upper = NA)
321 | 
322 |   start <- 1
323 |   for (i in 1:J){
324 |     end <- i
325 |     plotdat_fd[c(start:end), "mean"] <- mean(P_fd[, i])
326 |     plotdat_fd[c(start:end), "lower"] <- quantile(P_fd[, i], probs = probs[1])
327 |     plotdat_fd[c(start:end), "upper"] <- quantile(P_fd[, i], probs = probs[2])
328 |     start <- end+1
329 |   }
330 | 
331 |   output[["plotdata_fd"]] <- plotdat_fd
332 | 
333 |   cat("\nDone!\n\n")
334 |   return(output)
335 | }
336 | 


--------------------------------------------------------------------------------
/R/mnl_fd_ova.R:
--------------------------------------------------------------------------------
  1 | #' Multinomial First Differences Prediction (Observed Value Approach)
  2 | #'
  3 | #' This function predicts values for two different scenarios over a range of
  4 | #' values. It then takes the differences between the different simulations to
  5 | #' return first differences for each value.
  6 | #'
  7 | #' The function uses the \code{\link{mnl_pred_ova}} function for each scenario.
  8 | #' The results of these predictions are also returned and can therefore be
  9 | #' easily accessed. If you need predictions for multiple scenarios, you can use
 10 | #' this function to both plot the predictions for each scenario and the
 11 | #' differences between them.
 12 | #'
 13 | #' @param model the multinomial model, from a \code{\link{multinom}}()-function call (see the \code{\link{nnet}} package)
 14 | #' @param data the data with which the model was estimated
 15 | #' @param x the name of the variable that should be varied (the x-axis variable in prediction plots)
 16 | #' @param z define the variable for which you want to compute the difference.
 17 | #' @param z_values determine the two values at which value you want to fix the scenario (\code{z}). The first differences will be computed by subtracting the values of the first supplied scenario from the second one.
 18 | #' @param xvari former argument for \code{x} (deprecated).
 19 | #' @param scenname former argument for \code{z} (deprecated).
 20 | #' @param scenvalues former argument for \code{z_values} (deprecated).
 21 | #' @param by define the steps of \code{x}.
 22 | #' @param nsim numbers of simulations
 23 | #' @param seed set a seed for replication purposes.
 24 | #' @param probs a vector with two numbers, defining the significance levels. Default to 5\% significance level: \code{c(0.025, 0.975)}
 25 | #'
 26 | #' @return The function returns a list with several elements. Most importantly the list includes the simulated draws `S`, the simulated predictions `P`, and a data set for plotting `plotdata`.
 27 | #' @export
 28 | #'
 29 | #' @examples
 30 | #' library(nnet)
 31 | #' library(MASS)
 32 | #'
 33 | #' dataset <- data.frame(y = c(rep("a", 10), rep("b", 10), rep("c", 10)),
 34 | #'                       x1 = rnorm(30),
 35 | #'                       x2 = rnorm(30, mean = 1),
 36 | #'                       x3 = sample(1:10, 30, replace = TRUE))
 37 | #'
 38 | #' mod <- multinom(y ~ x1 + x2 + x3, data = dataset, Hess = TRUE)
 39 | #'
 40 | #' fdif <- mnl_fd_ova(model = mod, data = dataset,
 41 | #'                    x = "x1", z = "x3",
 42 | #'                    z_values = c(min(dataset$x3), max(dataset$x3)),
 43 | #'                    nsim = 10)
 44 | #'
 45 | 
 46 | mnl_fd_ova <- function(model,
 47 |                        data,
 48 |                        x,
 49 |                        z,
 50 |                        z_values,
 51 |                        xvari,
 52 |                        scenname,
 53 |                        scenvalues,
 54 |                        by = NULL,
 55 |                        nsim = 1000,
 56 |                        seed = "random",
 57 |                        probs = c(0.025, 0.975)){
 58 | 
 59 |   # Prepare output:
 60 |   output <- list()
 61 | 
 62 |   # Warnings for deprecated arguments
 63 |   if (!missing(xvari)) {
 64 |     warning("The argument 'xvari' is deprecated; please use 'x' instead.\n\n",
 65 |             call. = FALSE)
 66 |     x <- xvari
 67 |   }
 68 | 
 69 |   if (!missing(scenname)) {
 70 |     warning("The argument 'scenname' is deprecated; please use 'z' instead.\n\n",
 71 |             call. = FALSE)
 72 |     z <- scenname
 73 |   }
 74 | 
 75 |   if (!missing(scenvalues)) {
 76 |     warning("The argument 'scenvalues' is deprecated; please use 'z_values' instead.\n\n",
 77 |             call. = FALSE)
 78 |     z_values <- scenvalues
 79 |   }
 80 | 
 81 |   # Errors:
 82 |   if (is.null(model) == TRUE) {
 83 |     stop("Please supply a model")
 84 |   }
 85 | 
 86 |   if (sum(grepl("multinom", model$call)) == 0) {
 87 |     stop("Please supply a multinom()-model")
 88 |   }
 89 | 
 90 |   if (is.null(data) == TRUE) {
 91 |     stop("Please supply a data set")
 92 |   }
 93 | 
 94 |   if (is.null(x) == TRUE | is.character(x) == FALSE) {
 95 |     stop("Please supply a character of your x-variable of interest")
 96 |   }
 97 | 
 98 |   if (is.null(z) == TRUE | is.character(z) == FALSE) {
 99 |     stop("Please supply a character of the scenario variable of interest")
100 |   }
101 | 
102 |   if (is.null(z_values) == TRUE |
103 |       length(z_values) != 2 |
104 |       is.vector(z_values, mode = "numeric") == FALSE) {
105 |     stop("Please two numeric values that are used for the different scenarios")
106 |   }
107 | 
108 | 
109 |   if (seed == "random") {
110 |     seed <- sample(1:10000, 1)
111 |   }
112 | 
113 |   # Predictions for first scenario
114 |   cat("First scenario:\n")
115 | 
116 |   pred1 <- mnl_pred_ova(model = model,
117 |                         data = data,
118 |                         x = x,
119 |                         z = z,
120 |                         z_value = z_values[1],
121 |                         by = by, nsim = nsim, seed = seed,
122 |                         probs = probs)
123 |   output[["Prediction1"]] <- pred1
124 | 
125 |   # Predictions for second scenario
126 |   cat("Second scenario:\n")
127 | 
128 |   pred2 <- mnl_pred_ova(model = model,
129 |                         data = data,
130 |                         x = x,
131 |                         z = z,
132 |                         z_value = z_values[2],
133 |                         by = by, nsim = nsim, seed = seed,
134 |                         probs = probs)
135 |   output[["Prediction2"]] <- pred2
136 | 
137 |   plotdat <- rbind(pred1$plotdata,
138 |                    pred2$plotdata)
139 | 
140 |   output[["plotdata"]] <- plotdat
141 | 
142 | 
143 |   # First differences
144 |   P_fd <- array(NA, dim = c(nsim, pred1$nChoices, pred1$nVariation))
145 | 
146 |   for (i in 1:pred1$nChoices) {
147 |     P_fd[, i,] <- pred2$P[, i,] - pred1$P[, i,]
148 |   }
149 | 
150 |   output[["P"]] <- P_fd
151 | 
152 |   # Plotdata
153 |   plotdata_fd <- pred1$plotdata[, c(1:2,4:6)]
154 |   plotdata_fd[, c("mean", "lower", "upper")] <- NA
155 | 
156 |   start <- 1
157 |   for (i in 1:pred1$nChoices){
158 |     end <- i*pred1$nVariation
159 |     plotdata_fd[c(start:end), "mean"] <- colMeans(P_fd[, i, ])
160 |     plotdata_fd[c(start:end), "lower"] <- apply(P_fd[, i, ], 2, quantile, probs = probs[1])
161 |     plotdata_fd[c(start:end), "upper"] <- apply(P_fd[, i, ], 2, quantile, probs = probs[2])
162 |     start <- end+1
163 |   }
164 | 
165 |   output[["plotdata_fd"]] <- plotdata_fd
166 | 
167 |   return(output)
168 | 
169 | }
170 | 


--------------------------------------------------------------------------------
/R/mnl_pred_ova.R:
--------------------------------------------------------------------------------
  1 | #' Multinomial Prediction Function (Observed Value Approach)
  2 | #'
  3 | #' This function predicts probabilities for all choices of a multinomial logit
  4 | #' model over a specified span of values.
  5 | #'
  6 | #' @param model the multinomial model, from a \code{\link{multinom}}()-function call (see the \code{\link{nnet}} package)
  7 | #' @param data the data with which the model was estimated
  8 | #' @param x the name of the variable that should be varied (the x-axis variable in prediction plots)
  9 | #' @param z if you want to hold a specific variable stable over all scenarios, you can name it here (optional).
 10 | #' @param z_value determine at which value you want to fix the \code{z}.
 11 | #' @param xvari former argument for \code{x} (deprecated).
 12 | #' @param scenname former argument for \code{z} (deprecated).
 13 | #' @param scenvalue former argument for \code{z_value} (deprecated).
 14 | #' @param by define the steps of \code{x}.
 15 | #' @param nsim numbers of simulations
 16 | #' @param seed set a seed for replication purposes.
 17 | #' @param probs a vector with two numbers, defining the significance levels. Default to 5\% significance level: \code{c(0.025, 0.975)}
 18 | #'
 19 | #' @return The function returns a list with several elements. Most importantly the list includes the simulated draws `S`, the simulated predictions `P`, and a data set for plotting `plotdata`.
 20 | #' @export
 21 | #'
 22 | #' @examples
 23 | #' library(nnet)
 24 | #' library(MASS)
 25 | #'
 26 | #' dataset <- data.frame(y = c(rep("a", 10), rep("b", 10), rep("c", 10)),
 27 | #'                       x1 = rnorm(30),
 28 | #'                       x2 = rnorm(30, mean = 1),
 29 | #'                       x3 = sample(1:10, 30, replace = TRUE))
 30 | #'
 31 | #' mod <- multinom(y ~ x1 + x2 + x3, data = dataset, Hess = TRUE)
 32 | #'
 33 | #' pred <- mnl_pred_ova(model = mod, data = dataset,
 34 | #'                      x = "x1",
 35 | #'                      nsim = 10)
 36 | #'
 37 | #'
 38 | #' @importFrom stats coef na.omit quantile
 39 | #' @importFrom utils setTxtProgressBar txtProgressBar
 40 | #' @importFrom MASS mvrnorm
 41 | 
 42 | 
 43 | mnl_pred_ova <- function(model,
 44 |                          data,
 45 |                          x,
 46 |                          by = NULL,
 47 |                          z = NULL,
 48 |                          z_value = NULL,
 49 |                          xvari,
 50 |                          scenname,
 51 |                          scenvalue,
 52 |                          nsim = 1000,
 53 |                          seed = "random",
 54 |                          probs = c(0.025, 0.975)){
 55 | 
 56 |   # Create list that is returned in the end.
 57 |   output <- list()
 58 | 
 59 |   # Warnings for deprecated arguments
 60 |   if (!missing(xvari)) {
 61 |     warning("The argument 'xvari' is deprecated; please use 'x' instead.\n\n",
 62 |             call. = FALSE)
 63 |     x <- xvari
 64 |   }
 65 | 
 66 |   if (!missing(scenname)) {
 67 |     warning("The argument 'scenname' is deprecated; please use 'z' instead.\n\n",
 68 |             call. = FALSE)
 69 |     z <- scenname
 70 |   }
 71 | 
 72 |   if (!missing(scenvalue)) {
 73 |     warning("The argument 'scenvalue' is deprecated; please use 'z_value' instead.\n\n",
 74 |             call. = FALSE)
 75 |     z_value <- scenvalue
 76 |   }
 77 | 
 78 | 
 79 |   # Errors:
 80 |   if (is.null(model) == TRUE) {
 81 |     stop("Please supply a model")
 82 |   }
 83 | 
 84 |   if (sum(grepl("multinom", model$call)) == 0) {
 85 |     stop("Please supply a multinom()-model")
 86 |   }
 87 | 
 88 |   if (is.null(data) == TRUE) {
 89 |     stop("Please supply a data set")
 90 |   }
 91 | 
 92 |   if (is.null(x) == TRUE | is.character(x) == FALSE) {
 93 |     stop("Please supply a character string of your x-variable of interest")
 94 |   }
 95 | 
 96 |   if (is.null(model$Hessian) == TRUE) {
 97 |     stop("There is no Hessian matrix. Please specify Hess = TRUE in your multinom() call.")
 98 |   }
 99 | 
100 |   # Names of variables in model (without the "list" character in the vector)
101 |   variables <- as.character(attr(model$terms, "variables"))[-1]
102 | 
103 |   if(!(x %in% variables) == TRUE){
104 |     stop("x-variable is not an independent variable in the model. There might be a typo.")
105 |   }
106 | 
107 |   # Check if scenario is supplied correctly
108 |   if (is.null(z) == FALSE & is.character(z) == FALSE) {
109 |     stop("Please supply a character string of your scenario of interest")
110 |   }
111 | 
112 |   if(is.null(z) == FALSE){
113 |     if (!(z %in% variables) == TRUE) {
114 |       stop("The scenario variable is not an independent variable in the model. There might be a typo.")
115 |     }
116 |   }
117 | 
118 |   # > Handeling the IVs --------------------------------------------------------
119 |   # Name of independent variables
120 |   iv <- variables[2:length(variables)]
121 |   output[["IV"]] <- iv
122 | 
123 |   # Variables have to be numeric
124 |   if (length(iv) > 1) {
125 |     if (sum(apply(data[, iv], 2, class) %in% c("numeric", "integer")) < ncol(data[, iv])) {
126 |       stop("Please supply data that consists of numeric values. The package can not handle factor or character variables, yet. For workarounds, please take a look at the github issues (https://github.com/ManuelNeumann/MNLpred/issues/1). The problem will hopefully be fixed with the 0.1.0 release.")
127 |     }
128 |   } else {
129 |     if (class(eval(parse(text = paste0("data$", iv)))) %in% c("numeric", "integer") == FALSE) {
130 |       stop("Please supply data that consists of numeric values. The package can not handle factor or character variables, yet. For workarounds, please take a look at the github issues (https://github.com/ManuelNeumann/MNLpred/issues/1). The problem will hopefully be fixed with the 0.1.0 release.")
131 |     }
132 |   }
133 | 
134 |   # > Handeling the DVs --------------------------------------------------------
135 |   # Name of dependent variable
136 |   dv <- variables[1]
137 |   output[["DV"]] <- dv
138 | 
139 | 
140 |   # > Full observations (listwise deletion) ------------------------------------
141 |   data_redux <- na.omit(data[, c(dv, iv)])
142 | 
143 |   # Number of full observations
144 |   obs <- nrow(data_redux)
145 |   output[["Observations"]] <- obs
146 | 
147 |   # > Working with the model ---------------------------------------------------
148 |   # Get matrix of coefficients out of the model
149 |   coefmatrix <- coef(model)
150 | 
151 |   # Number of coefficients
152 |   ncoef <- ncol(coefmatrix)
153 | 
154 |   # Model coefficients as a vector
155 |   mu <- as.vector(t(coef(model)))
156 | 
157 |   # Variance-covariance matrix of estimates
158 |   varcov <- solve(model$Hessian)
159 | 
160 |   # Set seed if needed:
161 |   if (seed != "random") {
162 |     set.seed(seed = seed)
163 |   }
164 | 
165 |   # Simulate a sampling distribution
166 |   S <- mvrnorm(nsim, mu, varcov)
167 |   output[["S"]] <- S
168 | 
169 |   # Artificial variation of independent variable of interest (x)
170 |   if (is.null(by) == TRUE) {
171 |     by <- abs(min(eval(parse(text = paste0("data$", x))), na.rm = TRUE) -
172 |       max(eval(parse(text = paste0("data$", x))), na.rm = TRUE))
173 |   }
174 | 
175 |   variation <- seq(from = min(eval(parse(text = paste0("data$", x))),
176 |                               na.rm = TRUE),
177 |                    to = max(eval(parse(text = paste0("data$", x))),
178 |                             na.rm = TRUE),
179 |                    by = by)
180 | 
181 |   output[["Variation"]] <- variation
182 | 
183 |   # Length of sequence
184 |   nseq <- length(variation)
185 | 
186 |   if (nseq == 1) {
187 |     stop("Please supply a dataset or a x-variable with variation")
188 |   }
189 | 
190 |   output[["nVariation"]] <- nseq
191 | 
192 |   # Choice categories of the dependent variable
193 |   categories <- sort(unique(eval(parse(text = paste0("data$", dv)))))
194 |   J <- length(categories)
195 | 
196 |   if (J < 3) {
197 |     stop("Please supply a dataset with a dependent variable that has a sufficient number of outcomes (> 2)")
198 |   }
199 | 
200 |   output[["ChoiceCategories"]] <- categories
201 |   output[["nChoices"]] <- J
202 | 
203 |   # Numbers of interactions
204 |   ninteraction <- sum(grepl(":", model$coefnames))
205 | 
206 |   # Matrix of observations
207 |   X <- matrix(NA, ncol = ncoef, nrow = obs)
208 |   colnames(X) <- model$coefnames
209 |   # 1 for the Intercept
210 |   X[, 1] <- 1
211 |   # Values of the independent variables
212 |   X[, 2:(length(iv)+1)] <- as.matrix(data_redux[, iv])
213 | 
214 | 
215 |   # Prepare array to fill in the matrix with the observed values
216 |   ovacases <- array(NA, c(dim(X), nseq))
217 |   # Fill in the matrices:
218 |   ovacases[,,] <- X
219 | 
220 |   # Select the position of the variable which should vary:
221 |   varidim <- which(colnames(X) == x)
222 | 
223 | 
224 |   # Artificially alter the variable in each dimension according to
225 |   # the preferred sequence:
226 |   for (i in 1:nseq) {
227 |     ovacases[, varidim, i] <- variation[i]
228 |   }
229 | 
230 |   # Hold a second variable steady (if need be)
231 |   if(is.null(z) == FALSE) {
232 |     scendim <- which(colnames(X) == z)
233 | 
234 |     for (i in 1:nseq) {
235 |       ovacases[, scendim, i] <- z_value
236 |     }
237 |   }
238 | 
239 |   # Compute interactions:
240 |   if (ninteraction != 0) {
241 | 
242 |     # Get position of interaction terms
243 |     interactionterms <- which(grepl(":", model$coefnames) == TRUE)
244 | 
245 |     # Compute the terms:
246 |     for (i in c(interactionterms)) {
247 |       # First variable name of the interaction:
248 |       firstint <- gsub(":.*", "", model$coefnames[i])
249 |       # Second variable name of the interaction:
250 |       secondint <- gsub(".*:", "", model$coefnames[i])
251 | 
252 |       # Get position in matrix:
253 |       intdim1 <- which(colnames(X) == firstint)
254 |       intdim2 <- which(colnames(X) == secondint)
255 | 
256 |       # Compute interaction term:
257 |       for(j in 1:nseq) {
258 |         ovacases[, i, j] <- ovacases[, intdim1, j]*ovacases[, intdim2, j]
259 |       }
260 |     }
261 |   }
262 | 
263 |   # Prepare array of observed values:
264 |   ovaV <- array(NA, c(obs, nsim, nseq, J))
265 | 
266 |   # Add progress bar
267 |   pb_multiplication <- txtProgressBar(min = 0, max = nseq, initial = 0)
268 | 
269 |   # Loop over all scenarios
270 |   cat("Multiplying values with simulated estimates:\n")
271 | 
272 |   # Loop over all scenarios
273 |   for(i in 1:nseq){
274 |     ovaV[, , i, 1] <- apply(matrix(0,
275 |                                    nrow = nsim,
276 |                                    ncol = ncol(X)),
277 |                             1,
278 |                             function(s) ovacases[, , i] %*% s)
279 |     # ^ This will be zero because of the baseline category  ^
280 | 
281 |     # For each choice, the cases will now be multiplied with the simulated estimates
282 |     for (k in 2:J) {
283 |       coefstart <- (k-2)*ncoef+1
284 |       coefend <- (k-1)*ncoef
285 |       element <- parse(text = paste0("ovaV[,, i,", k, "] <- apply(S[, ",
286 |                                      coefstart,
287 |                                      ":",
288 |                                      coefend,
289 |                                      "], 1, function(s) ovacases[,, i] %*% s)"))
290 |       eval(element)
291 |     }
292 | 
293 |     # Progress bar:
294 |     setTxtProgressBar(pb_multiplication, i)
295 |   }
296 | 
297 |   # Multinomial link function:
298 | 
299 |   # 1. Part: Sum over cases
300 |   Sexp <- rowSums(exp(ovaV), dims = 3L)
301 | 
302 |   # Create P (array with predictions)
303 |   P <- array(NA, c(nsim, J, nseq))
304 | 
305 |   # 2. Part: take the exponent and divide through the sum of all (Sexp)
306 |   # Add progress bar
307 |   pb_link <- txtProgressBar(min = 0, max = nseq, initial = 0)
308 | 
309 |   # Loop over all scenarios
310 |   cat("\nApplying link function:\n")
311 | 
312 |   for (l in 1:nseq) {
313 |     for (m in 1:J) {
314 |       P[, m, l] <- colMeans(exp(ovaV[, , l, m]) / Sexp[, , l])
315 |       if (sum(is.na(P[, m, l])) != 0) {
316 |         stop("Stop")
317 |       }
318 |     }
319 | 
320 |     # Progress bar:
321 |     setTxtProgressBar(pb_link, l)
322 |   }
323 | 
324 | 
325 |   output[["P"]] <- P
326 | 
327 |   # Aggregate the simulations
328 |   # Create tibble for plot
329 |   # if (is.null(z_value) == TRUE) {
330 |   #   plotdat <- tibble::tibble(iv = rep(variation, J),
331 |   #                             categories = rep(categories, each = length(variation)),
332 |   #                             mean = NA,
333 |   #                             lower = NA,
334 |   #                             upper = NA)
335 |   # } else {
336 |   #   plotdat <- tibble::tibble(iv = rep(variation, J),
337 |   #                             categories = rep(categories, each = length(variation)),
338 |   #                             scen = rep(z_value, each = length(categories)),
339 |   #                             mean = NA,
340 |   #                             lower = NA,
341 |   #                             upper = NA)
342 |   # }
343 | 
344 |   if (is.null(z_value) == TRUE) {
345 |     plotdat <- data.frame(iv = rep(variation, J),
346 |                           categories = rep(categories, each = length(variation)),
347 |                           mean = NA,
348 |                           lower = NA,
349 |                           upper = NA)
350 |   } else {
351 |     plotdat <- data.frame(iv = rep(variation, J),
352 |                           categories = rep(categories, each = length(variation)),
353 |                           scen = rep(z_value, each = length(variation)),
354 |                           mean = NA,
355 |                           lower = NA,
356 |                           upper = NA)
357 |   }
358 | 
359 | 
360 | 
361 |   # Aggregate
362 |   start <- 1
363 | 
364 |   for (i in 1:J) {
365 |     end <- i*length(variation)
366 |     plotdat[c(start:end), "mean"] <- colMeans(P[, i,])
367 |     plotdat[c(start:end), "lower"] <- apply(P[, i,], 2, quantile, probs = probs[1])
368 |     plotdat[c(start:end), "upper"] <- apply(P[, i,], 2, quantile, probs = probs[2])
369 |     start <- end+1
370 |   }
371 | 
372 |   # Rename the variables in the plot data
373 |   if (is.null(z) == TRUE) {
374 |     colnames(plotdat)[1:2] <- c(x, dv)
375 |   } else {
376 |     colnames(plotdat)[1:3] <- c(x, dv, z)
377 |   }
378 | 
379 | 
380 |   # Put the data in the output
381 |   output[["plotdata"]] <- plotdat
382 | 
383 |   cat("\nDone!\n\n")
384 | 
385 |   return(output)
386 | }
387 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | bibliography: vignettes/bibliography.bib
  4 | ---
  5 | 
  6 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  7 | 
  8 | 
  9 | 
 10 | ```{r, include = FALSE}
 11 | knitr::opts_chunk$set(
 12 |   collapse = TRUE,
 13 |   comment = "#>",
 14 |   fig.path = "man/figures/README-"
 15 | )
 16 | ```
 17 | 
 18 | # MNLpred - Simulated Predictions From Multinomial Logistic Models
 19 | 
 20 | <!-- badges: start -->
 21 |   [![GPLv3 license](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://perso.crans.org/besson/LICENSE.html)
 22 |   [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/MNLpred)](https://cran.r-project.org/package=MNLpred)
 23 |   [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4525342.svg)](https://doi.org/10.5281/zenodo.4525342)
 24 |   [![downloads](https://cranlogs.r-pkg.org/badges/MNLpred)](https://www.r-pkg.org/badges/version/MNLpred)
 25 |   [![total_downloads](https://cranlogs.r-pkg.org/badges/grand-total/MNLpred)](https://www.r-pkg.org/badges/version/MNLpred)
 26 | [![R build status](https://github.com/ManuelNeumann/MNLpred/workflows/R-CMD-check/badge.svg)](https://github.com/ManuelNeumann/MNLpred/actions)
 27 | <!-- badges: end -->
 28 | 
 29 | This package provides functions that make it easy to get plottable predictions from multinomial logit models. The predictions are based on simulated draws of regression estimates from their respective sampling distribution.
 30 | 
 31 | At first I will present the theoretical and statistical background, before using sample data to demonstrate the functions of the package.
 32 | 
 33 | ## The Multinomial Logit Model
 34 | 
 35 | For the statistical and theoretical background of the multinomial logit regression please refer to the vignette or sources like [these lecture notes by Germán Rodríguez](https://data.princeton.edu/wws509/notes/c6s2).
 36 | 
 37 | Due to the inconvenience of integrating math equations in the README file, this is not the place to write comprehensively about it.
 38 | 
 39 | These are the important characteristics of the model:
 40 | 
 41 | - The multinomial logit regression is used to model nominal outcomes. It provides the opportunity to assign specific choices a probability, based on a set of independent variables.
 42 | - The model needs an assigned baseline category to be identifiable. All other choices are evaluated in contrast to this reference.
 43 | - The model returns a set of coefficients for each choice category.
 44 | - Like all logit models, the multinomial logit model returns log-odds which are difficult to interpret in terms of effect sizes and uncertainties.
 45 | 
 46 | This package helps to interpret the model in meaningful ways.
 47 | 
 48 | ## Using the Package
 49 | 
 50 | ### Installing
 51 | 
 52 | The package can be both installed from CRAN or the github repository:
 53 | 
 54 | ```{r}
 55 | # Uncomment if necessary:
 56 | 
 57 | # install.packages("MNLpred")
 58 | # devtools::install_github("ManuelNeumann/MNLpred")
 59 | ```
 60 | 
 61 | 
 62 | ### How Does the Function Work?
 63 | 
 64 | As we have seen above, the multinomial logit can be used to get an insight into the probabilities to choose one option out of a set of alternatives. We have also seen that we need a baseline category to identify the model. This is mathematically necessary, but does not come in handy for purposes of interpretation.
 65 | 
 66 | It is far more helpful and easier to understand to come up with predicted probabilities and first differences for values of interest [see e.g., @king2000 for approaches in social sciences]. Based on simulations, this package helps to easily predict probabilities and their uncertainty in forms of confidence intervals for each choice category over a specified scenario. The functions use the observed values to compute the predicted probabilities, as is recommended by @hanmer2013.
 67 | 
 68 | The procedure follows the following steps:
 69 | 
 70 | 1. Estimate a multinomial model and save the coefficients and the variance covariance matrix (based on the Hessian-matrix of the model).
 71 | 1. To simulate uncertainty, make n draws of coefficients from a simulated sampling distribution based on the coefficients and the variance covariance matrix.
 72 | 1. Predict probabilities by multiplying the drawn coefficients with a specified scenario (the observed values).
 73 | 1. Take the mean and the quantiles of the simulated predicted probabilities.
 74 | 
 75 | The presented functions follow these steps. Additionally, they use the so called observed value approach. This means that the "scenario" uses all observed values that informed the model. Therefore the function takes these more detailed steps:
 76 | 
 77 | 1. For all (complete) cases n predictions are computed based on their observed independent values and the n sets of coefficients.
 78 | 1. Next, the predicted values of all observations for each simulation are averaged.
 79 | 1. Take the mean and the quantiles of the simulated predicted probabilities (same as above).
 80 | 
 81 | For first differences, the simulated predictions are subtracted from each other.
 82 | 
 83 | To showcase these steps, I present a reproducible example of how the functions can be used.
 84 | 
 85 | ### Example
 86 | 
 87 | The example uses data from the German Longitudinal Election Study (GLES, @rosteutscher2019). 
 88 | 
 89 | The contains 1,000 respondents characteristics and their vote choice.
 90 | 
 91 | For this task, we need the following packages:
 92 | 
 93 | ```{r setup}
 94 | # Required packages
 95 | library(magrittr) # for pipes
 96 | library(nnet) # for the multinom()-function
 97 | library(MASS) # for the multivariate normal distribution
 98 | 
 99 | # The package
100 | library(MNLpred)
101 | 
102 | # Plotting the predicted probabilities:
103 | library(ggplot2)
104 | library(scales)
105 | ```
106 | 
107 | Now we load the data:
108 | 
109 | ```{r data, echo=TRUE}
110 | # The data:
111 | data("gles")
112 | ```
113 | 
114 | The next step is to compute the actual model. The function of the `MNLpred` package is based on models that were estimated with the `multinom()`-function of the `nnet` package. The `multinom()` function is convenient because it does not need transformed datasets. The syntax is very easy and resembles the ordinary regression functions. Important is that the Hessian matrix is returned with `Hess = TRUE`. The matrix is needed to simulate the sampling distribution.
115 | 
116 | As we have seen above, we need a baseline or reference category for the model to work. Therefore, be aware what your baseline category is. If you use a dependent variable that is of type `character`, the categories will be ordered in alphabetical order. If you have a `factor`at hand, you can define your baseline category, for example with the `relevel()`function.
117 | 
118 | Now, let's estimate the model:
119 | 
120 | ```{r model}
121 | # Multinomial logit model:
122 | mod1 <- multinom(vote ~ egoposition_immigration + 
123 |                    political_interest + 
124 |                    income + gender + ostwest, 
125 |                  data = gles,
126 |                  Hess = TRUE)
127 | ```
128 | The results show the coefficients and standard errors. As we can see, there are five sets of coefficients. They describe the relationship between the reference category (`AfD`) and the vote choices for the parties `CDU/CSU`, `FDP`, `Gruene`, `LINKE`, and `SPD`.
129 | 
130 | ```{r results}
131 | summary(mod1)
132 | ```
133 | 
134 | A first rough review of the coefficients shows that a more restrictive ego-position toward immigration leads to a lower probability of the voters to choose any other party than the AfD. It is hard to evaluate whether the effect is statistically significant and how the probabilities for each choice look like. For this it is helpful to predict the probabilities for certain scenarios and plot the means and confidence intervals for visual analysis.
135 | 
136 | Let's say we are interested in the relationship between the ego-position toward immigration and the probability to choose any of the parties. It would be helpful to plot the predicted probabilities for the span of the positions.
137 | 
138 | ```{r math}
139 | summary(gles$egoposition_immigration)
140 | ```
141 | 
142 | As we can see, the ego positions were recorded on a scale from 0 to 10. Higher numbers represent more restrictive positions.
143 | We pick this score as the x-variable (`x`) and use the `mnl_pred_ova()` function to get predicted probabilities for each position in this range. 
144 | 
145 | The function needs a multinomial logit model (`model`), data (`data`), the variable of interest `x`, the steps for which the probabilities should be predicted (`by`). Additionally, a `seed` can be defined for replication purposes, the numbers of simulations can be defined (`nsim`), and the confidence intervals (`probs`). 
146 | 
147 | If we want to hold another variable stable, we can specify so with `z`and `z_value`. See also the `mnl_fd_ova()` function below.
148 | 
149 | ```{r mnl_pred_ova}
150 | pred1 <- mnl_pred_ova(model = mod1,
151 |                       data = gles,
152 |                       x = "egoposition_immigration",
153 |                       by = 1,
154 |                       seed = 68159,
155 |                       nsim = 100, # faster
156 |                       probs = c(0.025, 0.975)) # default
157 | ```
158 | 
159 | The function returns a list with several elements. Most importantly, it returns a `plotdata` data set:
160 | 
161 | ```{r return}
162 | pred1$plotdata %>% head()
163 | ```
164 | 
165 | As we can see, it includes the range of the x variable, a mean, a lower, and an upper bound of the confidence interval. Concerning the choice category, the data is in a long format. This makes it easy to plot it with the `ggplot` syntax. The choice category can now easily be used to differentiate the lines in the plot by using `linetype = vote` in the `aes()`. Another option is to use `facet_wrap()` or `facet_grid()` to differentiate the predictions: 
166 | 
167 | ```{r prediction_plot1}
168 | ggplot(data = pred1$plotdata, aes(x = egoposition_immigration, 
169 |                                   y = mean,
170 |                                   ymin = lower, ymax = upper)) +
171 |   geom_ribbon(alpha = 0.1) + # Confidence intervals
172 |   geom_line() + # Mean
173 |   facet_wrap(.~ vote, scales = "free_y", ncol = 2) +
174 |   scale_y_continuous(labels = percent_format(accuracy = 1)) + # % labels
175 |   scale_x_continuous(breaks = c(0:10),
176 |                      minor_breaks = FALSE) +
177 |   theme_bw() +
178 |   labs(y = "Predicted probabilities",
179 |        x = "Ego-position toward immigration") # Always label your axes ;)
180 | ```
181 | 
182 | If we want first differences between two scenarios, we can use the function `mnl_fd2_ova()`. The function takes similar arguments as the function above, but now the values for the scenarios of interest have to be supplied. Imagine we want to know what difference it makes to position oneself on the most tolerant or most restrictive end of the `egoposition_immigration` scale. This can be done as follows:
183 | 
184 | ```{r static_fd}
185 | fdif1 <- mnl_fd2_ova(model = mod1,
186 |                      data = gles,
187 |                      x = "egoposition_immigration",
188 |                      value1 = min(gles$egoposition_immigration),
189 |                      value2 = max(gles$egoposition_immigration),
190 |                      seed = 68159,
191 |                      nsim = 100)
192 | ```
193 | 
194 | The first differences can then be depicted in a graph.
195 | 
196 | ```{r static_fd_plot}
197 | ggplot(fdif1$plotdata_fd, aes(x = categories, 
198 |                               y = mean,
199 |                               ymin = lower, ymax = upper)) +
200 |   geom_pointrange() +
201 |   geom_hline(yintercept = 0) +
202 |   scale_y_continuous(labels = percent_format()) +
203 |   theme_bw() +
204 |   labs(y = "Predicted probabilities",
205 |        x = "Party vote")
206 | ```
207 | 
208 | 
209 | We are often not only interested in the static difference, but the difference across a span of values, given a difference in a second variable. This is especially helpful when we look at dummy variables. For example, we could be interested in the effect of `gender` on the vote decision over the different ego-positions. With the `mnl_fd_ova()` function, we can predict the probabilities for two scenarios and subtract them. The function returns the differences and the confidence intervals of the differences. The different scenarios can be held stable with `z` and the `z_values`. `z_values` takes a vector of two numeric values. These values are held stable for the variable that is named in `z`. 
210 | 
211 | ```{r first_diffferences_prediction}
212 | fdif2 <- mnl_fd_ova(model = mod1,
213 |                     data = gles,
214 |                     x = "egoposition_immigration",
215 |                     by = 1,
216 |                     z = "gender",
217 |                     z_values = c(0,1),
218 |                     seed = 68159,
219 |                     nsim = 100)
220 | ```
221 | As before, the function returns a list including a data set that can be used to plot the differences.
222 | 
223 | ```{r fd_return}
224 | fdif2$plotdata_fd %>% head()
225 | ```
226 | 
227 | Since the function calls the `mnl_pred_ova()` function internally, it also returns the output of the two predictions in the list element `Prediction1` and `Prediction2`. The plot data for the predictions is already bound together row wise to easily plot the predicted probabilities.
228 | 
229 | ```{r prediction_plot2}
230 | ggplot(data = fdif2$plotdata, aes(x = egoposition_immigration, 
231 |                                   y = mean,
232 |                                   ymin = lower, ymax = upper,
233 |                                   group = as.factor(gender),
234 |                                   linetype = as.factor(gender))) +
235 |   geom_ribbon(alpha = 0.1) +
236 |   geom_line() +
237 |   facet_wrap(. ~ vote, scales = "free_y", ncol = 2) +
238 |   scale_y_continuous(labels = percent_format(accuracy = 1)) + # % labels
239 |   scale_x_continuous(breaks = c(0:10),
240 |                      minor_breaks = FALSE) +
241 |   scale_linetype_discrete(name = "Gender",
242 |                           breaks = c(0, 1),
243 |                           labels = c("Male", "Female")) +
244 |   theme_bw() +
245 |   labs(y = "Predicted probabilities",
246 |        x = "Ego-position toward immigration") # Always label your axes ;)
247 | ```
248 | 
249 | As we can see, the differences between `female` and `male` differ, depending on the party and ego-position. So let's take a look at the differences:
250 | 
251 | ```{r first_differences_plot}
252 | ggplot(data = fdif2$plotdata_fd, aes(x = egoposition_immigration, 
253 |                                      y = mean,
254 |                                   ymin = lower, ymax = upper)) +
255 |   geom_ribbon(alpha = 0.1) +
256 |   geom_line() +
257 |   geom_hline(yintercept = 0) +
258 |   facet_wrap(. ~ vote, ncol = 3) +
259 |   scale_y_continuous(labels = percent_format(accuracy = 1)) + # % labels
260 |   scale_x_continuous(breaks = c(0:10),
261 |                      minor_breaks = FALSE) +
262 |   theme_bw() +
263 |   labs(y = "Predicted probabilities",
264 |        x = "Ego-position toward immigration") # Always label your axes ;)
265 | ```
266 | 
267 | We can see that the differences are for some parties at no point statistically significant from 0.
268 | 
269 | 
270 | ## Conclusion
271 | Multinomial logit models are important to model nominal choices. They are, however, restricted by being in need of a baseline category. Additionally, the log-character of the estimates makes it difficult to interpret them in meaningful ways. Predicting probabilities for all choices for scenarios, based on the observed data provides much more insight. The functions of this package provide easy to use functions that return data that can be used to plot predicted probabilities. The function uses a model from the `multinom()` function and uses the observed value approach and a supplied scenario to predict values over the range of fitting values. The functions simulate sampling distributions and therefore provide meaningful confidence intervals. `mnl_pred_ova()` can be used to predict probabilities for a certain scenario. `mnl_fd_ova()` can be used to predict probabilities for two scenarios and their first differences.
272 | 
273 | ## Acknowledgment
274 | 
275 | My code is inspired by the method courses in the [Political Science master's program  at the University of Mannheim](https://www.sowi.uni-mannheim.de/en/academics/prospective-students/ma-in-political-science/)(cool place, check it out!). The skeleton of the code is based on a tutorial taught by [Marcel Neunhoeffer](https://www.marcel-neunhoeffer.com/) (lecture: "Advanced Quantitative Methods" by [Thomas Gschwend](https://www.sowi.uni-mannheim.de/gschwend/)).
276 | 
277 | ## DOI
278 | 
279 | General DOI (always links to most recent version): [10.5281/zenodo.4525342](https://doi.org/10.5281/zenodo.4525342)
280 | 
281 | DOIs for different versions:
282 | 
283 |   - Version 0.0.6: [10.5281/zenodo.4580427](https://doi.org/10.5281/zenodo.4580427)
284 |   - Version 0.0.5: [10.5281/zenodo.4525343](https://doi.org/10.5281/zenodo.4525343)
285 | 
286 | ## References
287 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  3 | 
  4 | # MNLpred - Simulated Predictions From Multinomial Logistic Models
  5 | 
  6 | <!-- badges: start -->
  7 | 
  8 | [![GPLv3
  9 | license](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://perso.crans.org/besson/LICENSE.html)
 10 | [![CRAN\_Status\_Badge](https://www.r-pkg.org/badges/version/MNLpred)](https://cran.r-project.org/package=MNLpred)
 11 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4525342.svg)](https://doi.org/10.5281/zenodo.4525342)
 12 | [![downloads](https://cranlogs.r-pkg.org/badges/MNLpred)](https://www.r-pkg.org/badges/version/MNLpred)
 13 | [![total\_downloads](https://cranlogs.r-pkg.org/badges/grand-total/MNLpred)](https://www.r-pkg.org/badges/version/MNLpred)
 14 | [![R build
 15 | status](https://github.com/ManuelNeumann/MNLpred/workflows/R-CMD-check/badge.svg)](https://github.com/ManuelNeumann/MNLpred/actions)
 16 | <!-- badges: end -->
 17 | 
 18 | This package provides functions that make it easy to get plottable
 19 | predictions from multinomial logit models. The predictions are based on
 20 | simulated draws of regression estimates from their respective sampling
 21 | distribution.
 22 | 
 23 | At first I will present the theoretical and statistical background,
 24 | before using sample data to demonstrate the functions of the package.
 25 | 
 26 | ## The Multinomial Logit Model
 27 | 
 28 | For the statistical and theoretical background of the multinomial logit
 29 | regression please refer to the vignette or sources like [these lecture
 30 | notes by Germán
 31 | Rodríguez](https://data.princeton.edu/wws509/notes/c6s2).
 32 | 
 33 | Due to the inconvenience of integrating math equations in the README
 34 | file, this is not the place to write comprehensively about it.
 35 | 
 36 | These are the important characteristics of the model:
 37 | 
 38 | -   The multinomial logit regression is used to model nominal outcomes.
 39 |     It provides the opportunity to assign specific choices a
 40 |     probability, based on a set of independent variables.
 41 | -   The model needs an assigned baseline category to be identifiable.
 42 |     All other choices are evaluated in contrast to this reference.
 43 | -   The model returns a set of coefficients for each choice category.
 44 | -   Like all logit models, the multinomial logit model returns log-odds
 45 |     which are difficult to interpret in terms of effect sizes and
 46 |     uncertainties.
 47 | 
 48 | This package helps to interpret the model in meaningful ways.
 49 | 
 50 | ## Using the Package
 51 | 
 52 | ### Installing
 53 | 
 54 | The package can be both installed from CRAN or the github repository:
 55 | 
 56 | ``` r
 57 | # Uncomment if necessary:
 58 | 
 59 | # install.packages("MNLpred")
 60 | # devtools::install_github("ManuelNeumann/MNLpred")
 61 | ```
 62 | 
 63 | ### How Does the Function Work?
 64 | 
 65 | As we have seen above, the multinomial logit can be used to get an
 66 | insight into the probabilities to choose one option out of a set of
 67 | alternatives. We have also seen that we need a baseline category to
 68 | identify the model. This is mathematically necessary, but does not come
 69 | in handy for purposes of interpretation.
 70 | 
 71 | It is far more helpful and easier to understand to come up with
 72 | predicted probabilities and first differences for values of interest
 73 | (see e.g., King, Tomz, and Wittenberg 2000 for approaches in social
 74 | sciences). Based on simulations, this package helps to easily predict
 75 | probabilities and their uncertainty in forms of confidence intervals for
 76 | each choice category over a specified scenario. The functions use the
 77 | observed values to compute the predicted probabilities, as is
 78 | recommended by Hanmer and Ozan Kalkan (2013).
 79 | 
 80 | The procedure follows the following steps:
 81 | 
 82 | 1.  Estimate a multinomial model and save the coefficients and the
 83 |     variance covariance matrix (based on the Hessian-matrix of the
 84 |     model).
 85 | 2.  To simulate uncertainty, make n draws of coefficients from a
 86 |     simulated sampling distribution based on the coefficients and the
 87 |     variance covariance matrix.
 88 | 3.  Predict probabilities by multiplying the drawn coefficients with a
 89 |     specified scenario (the observed values).
 90 | 4.  Take the mean and the quantiles of the simulated predicted
 91 |     probabilities.
 92 | 
 93 | The presented functions follow these steps. Additionally, they use the
 94 | so called observed value approach. This means that the “scenario” uses
 95 | all observed values that informed the model. Therefore the function
 96 | takes these more detailed steps:
 97 | 
 98 | 1.  For all (complete) cases n predictions are computed based on their
 99 |     observed independent values and the n sets of coefficients.
100 | 2.  Next, the predicted values of all observations for each simulation
101 |     are averaged.
102 | 3.  Take the mean and the quantiles of the simulated predicted
103 |     probabilities (same as above).
104 | 
105 | For first differences, the simulated predictions are subtracted from
106 | each other.
107 | 
108 | To showcase these steps, I present a reproducible example of how the
109 | functions can be used.
110 | 
111 | ### Example
112 | 
113 | The example uses data from the German Longitudinal Election Study (GLES,
114 | Roßteutscher et al. (2019)).
115 | 
116 | The contains 1,000 respondents characteristics and their vote choice.
117 | 
118 | For this task, we need the following packages:
119 | 
120 | ``` r
121 | # Required packages
122 | library(magrittr) # for pipes
123 | library(nnet) # for the multinom()-function
124 | library(MASS) # for the multivariate normal distribution
125 | 
126 | # The package
127 | library(MNLpred)
128 | 
129 | # Plotting the predicted probabilities:
130 | library(ggplot2)
131 | library(scales)
132 | ```
133 | 
134 | Now we load the data:
135 | 
136 | ``` r
137 | # The data:
138 | data("gles")
139 | ```
140 | 
141 | The next step is to compute the actual model. The function of the
142 | `MNLpred` package is based on models that were estimated with the
143 | `multinom()`-function of the `nnet` package. The `multinom()` function
144 | is convenient because it does not need transformed datasets. The syntax
145 | is very easy and resembles the ordinary regression functions. Important
146 | is that the Hessian matrix is returned with `Hess = TRUE`. The matrix is
147 | needed to simulate the sampling distribution.
148 | 
149 | As we have seen above, we need a baseline or reference category for the
150 | model to work. Therefore, be aware what your baseline category is. If
151 | you use a dependent variable that is of type `character`, the categories
152 | will be ordered in alphabetical order. If you have a `factor`at hand,
153 | you can define your baseline category, for example with the
154 | `relevel()`function.
155 | 
156 | Now, let’s estimate the model:
157 | 
158 | ``` r
159 | # Multinomial logit model:
160 | mod1 <- multinom(vote ~ egoposition_immigration + 
161 |                    political_interest + 
162 |                    income + gender + ostwest, 
163 |                  data = gles,
164 |                  Hess = TRUE)
165 | #> # weights:  42 (30 variable)
166 | #> initial  value 1791.759469 
167 | #> iter  10 value 1644.501289
168 | #> iter  20 value 1553.803188
169 | #> iter  30 value 1538.792079
170 | #> final  value 1537.906674 
171 | #> converged
172 | ```
173 | 
174 | The results show the coefficients and standard errors. As we can see,
175 | there are five sets of coefficients. They describe the relationship
176 | between the reference category (`AfD`) and the vote choices for the
177 | parties `CDU/CSU`, `FDP`, `Gruene`, `LINKE`, and `SPD`.
178 | 
179 | ``` r
180 | summary(mod1)
181 | #> Call:
182 | #> multinom(formula = vote ~ egoposition_immigration + political_interest + 
183 | #>     income + gender + ostwest, data = gles, Hess = TRUE)
184 | #> 
185 | #> Coefficients:
186 | #>         (Intercept) egoposition_immigration political_interest      income
187 | #> CDU/CSU    3.101201              -0.4419104        -0.29177070  0.33114348
188 | #> FDP        2.070618              -0.4106626        -0.19044703  0.18496691
189 | #> Gruene     3.232074              -0.8482213        -0.03023454  0.24330589
190 | #> LINKE      4.990008              -0.7477359        -0.04503371 -0.24206850
191 | #> SPD        3.799394              -0.6425427        -0.03514426  0.08211066
192 | #>           gender     ostwest
193 | #> CDU/CSU 1.296949  0.79760035
194 | #> FDP     1.252112  1.01378955
195 | #> Gruene  1.831714  0.76299897
196 | #> LINKE   1.368591 -0.02428322
197 | #> SPD     1.497019  0.74026388
198 | #> 
199 | #> Std. Errors:
200 | #>         (Intercept) egoposition_immigration political_interest    income
201 | #> CDU/CSU   0.8568928              0.06504100          0.1694270 0.1872533
202 | #> FDP       0.9508589              0.07083385          0.1882974 0.2063590
203 | #> Gruene    0.9854950              0.07887427          0.1969704 0.2149368
204 | #> LINKE     0.9505656              0.07755359          0.1962954 0.2058036
205 | #> SPD       0.8880256              0.06912678          0.1779570 0.1924002
206 | #>            gender   ostwest
207 | #> CDU/CSU 0.3530225 0.3120178
208 | #> FDP     0.3794295 0.3615071
209 | #> Gruene  0.3875116 0.3671397
210 | #> LINKE   0.3885663 0.3493941
211 | #> SPD     0.3625652 0.3269007
212 | #> 
213 | #> Residual Deviance: 3075.813 
214 | #> AIC: 3135.813
215 | ```
216 | 
217 | A first rough review of the coefficients shows that a more restrictive
218 | ego-position toward immigration leads to a lower probability of the
219 | voters to choose any other party than the AfD. It is hard to evaluate
220 | whether the effect is statistically significant and how the
221 | probabilities for each choice look like. For this it is helpful to
222 | predict the probabilities for certain scenarios and plot the means and
223 | confidence intervals for visual analysis.
224 | 
225 | Let’s say we are interested in the relationship between the ego-position
226 | toward immigration and the probability to choose any of the parties. It
227 | would be helpful to plot the predicted probabilities for the span of the
228 | positions.
229 | 
230 | ``` r
231 | summary(gles$egoposition_immigration)
232 | #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
233 | #>   0.000   3.000   4.000   4.361   6.000  10.000
234 | ```
235 | 
236 | As we can see, the ego positions were recorded on a scale from 0 to 10.
237 | Higher numbers represent more restrictive positions. We pick this score
238 | as the x-variable (`x`) and use the `mnl_pred_ova()` function to get
239 | predicted probabilities for each position in this range.
240 | 
241 | The function needs a multinomial logit model (`model`), data (`data`),
242 | the variable of interest `x`, the steps for which the probabilities
243 | should be predicted (`by`). Additionally, a `seed` can be defined for
244 | replication purposes, the numbers of simulations can be defined
245 | (`nsim`), and the confidence intervals (`probs`).
246 | 
247 | If we want to hold another variable stable, we can specify so with
248 | `z`and `z_value`. See also the `mnl_fd_ova()` function below.
249 | 
250 | ``` r
251 | pred1 <- mnl_pred_ova(model = mod1,
252 |                       data = gles,
253 |                       x = "egoposition_immigration",
254 |                       by = 1,
255 |                       seed = 68159,
256 |                       nsim = 100, # faster
257 |                       probs = c(0.025, 0.975)) # default
258 | #> Multiplying values with simulated estimates:
259 | #> ================================================================================
260 | #> Applying link function:
261 | #> ================================================================================
262 | #> Done!
263 | ```
264 | 
265 | The function returns a list with several elements. Most importantly, it
266 | returns a `plotdata` data set:
267 | 
268 | ``` r
269 | pred1$plotdata %>% head()
270 | #>   egoposition_immigration vote        mean       lower       upper
271 | #> 1                       0  AfD 0.002419192 0.001025942 0.004913258
272 | #> 2                       1  AfD 0.004625108 0.002172854 0.008685788
273 | #> 3                       2  AfD 0.008653845 0.004472502 0.014698651
274 | #> 4                       3  AfD 0.015796304 0.008923480 0.025148142
275 | #> 5                       4  AfD 0.028022769 0.017756232 0.040861062
276 | #> 6                       5  AfD 0.048081830 0.033241425 0.063794110
277 | ```
278 | 
279 | As we can see, it includes the range of the x variable, a mean, a lower,
280 | and an upper bound of the confidence interval. Concerning the choice
281 | category, the data is in a long format. This makes it easy to plot it
282 | with the `ggplot` syntax. The choice category can now easily be used to
283 | differentiate the lines in the plot by using `linetype = vote` in the
284 | `aes()`. Another option is to use `facet_wrap()` or `facet_grid()` to
285 | differentiate the predictions:
286 | 
287 | ``` r
288 | ggplot(data = pred1$plotdata, aes(x = egoposition_immigration, 
289 |                                   y = mean,
290 |                                   ymin = lower, ymax = upper)) +
291 |   geom_ribbon(alpha = 0.1) + # Confidence intervals
292 |   geom_line() + # Mean
293 |   facet_wrap(.~ vote, scales = "free_y", ncol = 2) +
294 |   scale_y_continuous(labels = percent_format(accuracy = 1)) + # % labels
295 |   scale_x_continuous(breaks = c(0:10),
296 |                      minor_breaks = FALSE) +
297 |   theme_bw() +
298 |   labs(y = "Predicted probabilities",
299 |        x = "Ego-position toward immigration") # Always label your axes ;)
300 | ```
301 | 
302 | ![](man/figures/README-prediction_plot1-1.png)<!-- -->
303 | 
304 | If we want first differences between two scenarios, we can use the
305 | function `mnl_fd2_ova()`. The function takes similar arguments as the
306 | function above, but now the values for the scenarios of interest have to
307 | be supplied. Imagine we want to know what difference it makes to
308 | position oneself on the most tolerant or most restrictive end of the
309 | `egoposition_immigration` scale. This can be done as follows:
310 | 
311 | ``` r
312 | fdif1 <- mnl_fd2_ova(model = mod1,
313 |                      data = gles,
314 |                      x = "egoposition_immigration",
315 |                      value1 = min(gles$egoposition_immigration),
316 |                      value2 = max(gles$egoposition_immigration),
317 |                      seed = 68159,
318 |                      nsim = 100)
319 | #> Multiplying values with simulated estimates:
320 | #> ================================================================================
321 | #> Applying link function:
322 | #> ================================================================================
323 | #> Done!
324 | ```
325 | 
326 | The first differences can then be depicted in a graph.
327 | 
328 | ``` r
329 | ggplot(fdif1$plotdata_fd, aes(x = categories, 
330 |                               y = mean,
331 |                               ymin = lower, ymax = upper)) +
332 |   geom_pointrange() +
333 |   geom_hline(yintercept = 0) +
334 |   scale_y_continuous(labels = percent_format()) +
335 |   theme_bw() +
336 |   labs(y = "Predicted probabilities",
337 |        x = "Party vote")
338 | ```
339 | 
340 | ![](man/figures/README-static_fd_plot-1.png)<!-- -->
341 | 
342 | We are often not only interested in the static difference, but the
343 | difference across a span of values, given a difference in a second
344 | variable. This is especially helpful when we look at dummy variables.
345 | For example, we could be interested in the effect of `gender` on the
346 | vote decision over the different ego-positions. With the `mnl_fd_ova()`
347 | function, we can predict the probabilities for two scenarios and
348 | subtract them. The function returns the differences and the confidence
349 | intervals of the differences. The different scenarios can be held stable
350 | with `z` and the `z_values`. `z_values` takes a vector of two numeric
351 | values. These values are held stable for the variable that is named in
352 | `z`.
353 | 
354 | ``` r
355 | fdif2 <- mnl_fd_ova(model = mod1,
356 |                     data = gles,
357 |                     x = "egoposition_immigration",
358 |                     by = 1,
359 |                     z = "gender",
360 |                     z_values = c(0,1),
361 |                     seed = 68159,
362 |                     nsim = 100)
363 | #> First scenario:
364 | #> Multiplying values with simulated estimates:
365 | #> ================================================================================
366 | #> Applying link function:
367 | #> ================================================================================
368 | #> Done!
369 | #> 
370 | #> Second scenario:
371 | #> Multiplying values with simulated estimates:
372 | #> ================================================================================
373 | #> Applying link function:
374 | #> ================================================================================
375 | #> Done!
376 | ```
377 | 
378 | As before, the function returns a list including a data set that can be
379 | used to plot the differences.
380 | 
381 | ``` r
382 | fdif2$plotdata_fd %>% head()
383 | #>   egoposition_immigration vote         mean        lower        upper
384 | #> 1                       0  AfD -0.002861681 -0.006172786 -0.001206796
385 | #> 2                       1  AfD -0.005396410 -0.010531302 -0.002537904
386 | #> 3                       2  AfD -0.009942124 -0.017921934 -0.005038667
387 | #> 4                       3  AfD -0.017827751 -0.029548569 -0.009657235
388 | #> 5                       4  AfD -0.030957366 -0.046960384 -0.017810853
389 | #> 6                       5  AfD -0.051691516 -0.071903589 -0.031438906
390 | ```
391 | 
392 | Since the function calls the `mnl_pred_ova()` function internally, it
393 | also returns the output of the two predictions in the list element
394 | `Prediction1` and `Prediction2`. The plot data for the predictions is
395 | already bound together row wise to easily plot the predicted
396 | probabilities.
397 | 
398 | ``` r
399 | ggplot(data = fdif2$plotdata, aes(x = egoposition_immigration, 
400 |                                   y = mean,
401 |                                   ymin = lower, ymax = upper,
402 |                                   group = as.factor(gender),
403 |                                   linetype = as.factor(gender))) +
404 |   geom_ribbon(alpha = 0.1) +
405 |   geom_line() +
406 |   facet_wrap(. ~ vote, scales = "free_y", ncol = 2) +
407 |   scale_y_continuous(labels = percent_format(accuracy = 1)) + # % labels
408 |   scale_x_continuous(breaks = c(0:10),
409 |                      minor_breaks = FALSE) +
410 |   scale_linetype_discrete(name = "Gender",
411 |                           breaks = c(0, 1),
412 |                           labels = c("Male", "Female")) +
413 |   theme_bw() +
414 |   labs(y = "Predicted probabilities",
415 |        x = "Ego-position toward immigration") # Always label your axes ;)
416 | ```
417 | 
418 | ![](man/figures/README-prediction_plot2-1.png)<!-- -->
419 | 
420 | As we can see, the differences between `female` and `male` differ,
421 | depending on the party and ego-position. So let’s take a look at the
422 | differences:
423 | 
424 | ``` r
425 | ggplot(data = fdif2$plotdata_fd, aes(x = egoposition_immigration, 
426 |                                      y = mean,
427 |                                   ymin = lower, ymax = upper)) +
428 |   geom_ribbon(alpha = 0.1) +
429 |   geom_line() +
430 |   geom_hline(yintercept = 0) +
431 |   facet_wrap(. ~ vote, ncol = 3) +
432 |   scale_y_continuous(labels = percent_format(accuracy = 1)) + # % labels
433 |   scale_x_continuous(breaks = c(0:10),
434 |                      minor_breaks = FALSE) +
435 |   theme_bw() +
436 |   labs(y = "Predicted probabilities",
437 |        x = "Ego-position toward immigration") # Always label your axes ;)
438 | ```
439 | 
440 | ![](man/figures/README-first_differences_plot-1.png)<!-- -->
441 | 
442 | We can see that the differences are for some parties at no point
443 | statistically significant from 0.
444 | 
445 | ## Conclusion
446 | 
447 | Multinomial logit models are important to model nominal choices. They
448 | are, however, restricted by being in need of a baseline category.
449 | Additionally, the log-character of the estimates makes it difficult to
450 | interpret them in meaningful ways. Predicting probabilities for all
451 | choices for scenarios, based on the observed data provides much more
452 | insight. The functions of this package provide easy to use functions
453 | that return data that can be used to plot predicted probabilities. The
454 | function uses a model from the `multinom()` function and uses the
455 | observed value approach and a supplied scenario to predict values over
456 | the range of fitting values. The functions simulate sampling
457 | distributions and therefore provide meaningful confidence intervals.
458 | `mnl_pred_ova()` can be used to predict probabilities for a certain
459 | scenario. `mnl_fd_ova()` can be used to predict probabilities for two
460 | scenarios and their first differences.
461 | 
462 | ## Acknowledgment
463 | 
464 | My code is inspired by the method courses in the [Political Science
465 | master’s program at the University of
466 | Mannheim](https://www.sowi.uni-mannheim.de/en/academics/prospective-students/ma-in-political-science/)(cool
467 | place, check it out!). The skeleton of the code is based on a tutorial
468 | taught by [Marcel Neunhoeffer](https://www.marcel-neunhoeffer.com/)
469 | (lecture: “Advanced Quantitative Methods” by [Thomas
470 | Gschwend](https://www.sowi.uni-mannheim.de/gschwend/)).
471 | 
472 | ## DOI
473 | 
474 | General DOI (always links to most recent version):
475 | [10.5281/zenodo.4525342](https://doi.org/10.5281/zenodo.4525342)
476 | 
477 | DOIs for different versions:
478 | 
479 | -   Version 0.0.6:
480 |     [10.5281/zenodo.4580427](https://doi.org/10.5281/zenodo.4580427)
481 | -   Version 0.0.5:
482 |     [10.5281/zenodo.4525343](https://doi.org/10.5281/zenodo.4525343)
483 | 
484 | ## References
485 | 
486 | <div id="refs" class="references csl-bib-body hanging-indent">
487 | 
488 | <div id="ref-hanmer2013" class="csl-entry">
489 | 
490 | Hanmer, Michael J., and Kerem Ozan Kalkan. 2013. “Behind the Curve:
491 | Clarifying the Best Approach to Calculating Predicted Probabilities and
492 | Marginal Effects from Limited Dependent Variable Models.” *American
493 | Journal of Political Science* 57 (1): 263–77.
494 | <https://doi.org/10.1111/j.1540-5907.2012.00602.x>.
495 | 
496 | </div>
497 | 
498 | <div id="ref-king2000" class="csl-entry">
499 | 
500 | King, Gary, Michael Tomz, and Jason Wittenberg. 2000. “Making the Most
501 | of Statistical Analyses: Improving Interpretation and Presentation.”
502 | *American Journal of Political Science* 44 (2): 341–55.
503 | <https://doi.org/10.2307/2669316>.
504 | 
505 | </div>
506 | 
507 | <div id="ref-rosteutscher2019" class="csl-entry">
508 | 
509 | Roßteutscher, Sigrid, Harald Schoen, Rüdiger Schmitt-Beck, Christof
510 | Wolf, and Alexander Staudt. 2019. “Rolling Cross-Section-Wahlkampfstudie
511 | Mit Nachwahl-Panelwelle (GLES 2017).” GESIS Datenarchiv.
512 | <https://doi.org/10.4232/1.13213>.
513 | 
514 | </div>
515 | 
516 | </div>
517 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Submission
 2 | 
 3 | This is a submission for a new patch release (0.0.8).
 4 | 
 5 | The 0.0.8 release fixes a bug where the functions stopped because of an erroneously 
 6 | triggered error message.
 7 | 
 8 | The resubmission is necessary because a test failed in a 
 9 | r-devel-linux-x86_64-fedora-gcc environment. This test is now skipped because 
10 | the error is not replicable on a local fedora-gcc-devel container.
11 | 
12 | 
13 | ## Test environments
14 | 
15 | * Windows Server 2008 R2 SP1, R-devel, 32/64 bit (R-hub)
16 | * Windows x86_64-w64-mingw32 (64-bit), R-devel (win-builder)
17 | * windows latest, release (github actions)
18 | * Ubuntu 20.04, release (github actions)
19 | * Ubuntu 20.04, devel (github actions)
20 | * Fedora Linux, R-devel, GCC (R-hub)
21 | * macOS latest, release (github actions)
22 | 
23 | 
24 | ## R CMD check results
25 | There were no ERRORs or WARNINGs.
26 | 
27 | There was one NOTE:
28 | 
29 | Maintainer: 'Manuel Neumann <manuel.neumann@mzes.uni-mannheim.de>'
30 | 
31 | Days since last update: 3
32 | 
33 | Found the following (possibly) invalid URLs:
34 |   URL: https://doi.org/10.2307/2669316
35 |     From: inst/doc/OVA_Predictions_For_MNL.html
36 |           README.md
37 |     Status: 403
38 |     Message: Forbidden
39 |   URL: https://doi.org/10.4232/1.13213
40 |     From: inst/doc/OVA_Predictions_For_MNL.html
41 |           README.md
42 |     Status: Error
43 |     Message: SSL certificate problem: unable to get local issuer certificate
44 |   URL: https://www.doi.org/10.4232/1.13213
45 |     From: man/gles.Rd
46 |     Status: Error
47 |     Message: SSL certificate problem: unable to get local issuer certificate
48 | 
49 | Found the following (possibly) invalid DOIs:
50 |   DOI: 10.2307/2669316
51 |     From: DESCRIPTION
52 |     Status: Forbidden
53 |     Message: 403
54 |     
55 | * All DOIs and URLs were manually checked and are valid.
56 | 
57 | 
58 | ## Downstram dependencies
59 | There are currently no downstream dependencies for this package.
60 | 


--------------------------------------------------------------------------------
/data/gles.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ManuelNeumann/MNLpred/119672dcffea9b9a0e0caf187894b6f9fac8bb33/data/gles.RData


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citHeader("To cite MNLpred in publications please use:")
 2 | 
 3 | citEntry(entry = "Manual",
 4 |   title        = "MNLpred - Simulated Predicted Probabilities for Multinomial Logit Models",
 5 |   author       = as.person("Manuel Neumann"),
 6 |   year         = "2021",
 7 |   url          = "https://CRAN.R-project.org/package=MNLpred",
 8 |   note         = "Version 0.0.8",
 9 |   doi          = "10.5281/zenodo.4525342",
10 |   textVersion  =
11 |   paste("Neumann, Manuel (2021).",
12 |         "MNLpred - Simulated Predicted Probabilities for Multinomial Logit Models",
13 |         "(Version 0.0.8).",
14 |         "DOI 10.5281/zenodo.4525342.",
15 |         "URL https://CRAN.R-project.org/package=MNLpred.")
16 | )
17 | 


--------------------------------------------------------------------------------
/man/figures/README-first_differences_plot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ManuelNeumann/MNLpred/119672dcffea9b9a0e0caf187894b6f9fac8bb33/man/figures/README-first_differences_plot-1.png


--------------------------------------------------------------------------------
/man/figures/README-prediction_plot1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ManuelNeumann/MNLpred/119672dcffea9b9a0e0caf187894b6f9fac8bb33/man/figures/README-prediction_plot1-1.png


--------------------------------------------------------------------------------
/man/figures/README-prediction_plot2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ManuelNeumann/MNLpred/119672dcffea9b9a0e0caf187894b6f9fac8bb33/man/figures/README-prediction_plot2-1.png


--------------------------------------------------------------------------------
/man/figures/README-static_fd_plot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ManuelNeumann/MNLpred/119672dcffea9b9a0e0caf187894b6f9fac8bb33/man/figures/README-static_fd_plot-1.png


--------------------------------------------------------------------------------
/man/gles.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gles-data.R
 3 | \docType{data}
 4 | \name{gles}
 5 | \alias{gles}
 6 | \title{German Longitudinal Election Study}
 7 | \format{
 8 | An data frame with 1,000 observations and 6 variables:
 9 | \describe{
10 |   \item{vote}{Voting decision for party}
11 |   \item{egoposition_immigration}{Ego-position toward immigration (0 = very open to 10 = very restrictive )}
12 |   \item{ostwest}{Dummy for respondents from Eastern Germany (= 1)}
13 |   \item{political_interest}{Measurement for political interst (0 = low, 4 = high)}
14 |   \item{income}{Self-reported income satisfaction (0 = low, 4 = high)}
15 |   \item{gender}{Self-reported gender (binary coding with 1 = female)}
16 | }
17 | }
18 | \source{
19 | \href{https://www.doi.org/10.4232/1.13213}{GESIS Datenarchiv}
20 | }
21 | \usage{
22 | data(gles)
23 | }
24 | \description{
25 | A sample of 1,000 respondents in the Rolling Cross Sectional
26 | study in the German Longitudinal Election Study in 2017.
27 | }
28 | \examples{
29 | data(gles)
30 | table(gles$vote)
31 | }
32 | \references{
33 | Roßteutscher, Sigrid et al. 2019.
34 | “Rolling Cross-Section-Wahlkampfstudie mit Nachwahl-Panelwelle (GLES 2017).”
35 | ZA6803 Datenfile Version 4.0.1.
36 | (\href{https://www.doi.org/10.4232/1.13213}{GESIS Datenarchiv}).
37 | }
38 | \keyword{datasets}
39 | 


--------------------------------------------------------------------------------
/man/mnl_fd2_ova.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mnl_fd2_ova.R
 3 | \name{mnl_fd2_ova}
 4 | \alias{mnl_fd2_ova}
 5 | \title{Multinomial First Differences Predictions For Two Values (Observed Value Approach)}
 6 | \usage{
 7 | mnl_fd2_ova(
 8 |   model,
 9 |   data,
10 |   x,
11 |   value1,
12 |   value2,
13 |   xvari,
14 |   nsim = 1000,
15 |   seed = "random",
16 |   probs = c(0.025, 0.975)
17 | )
18 | }
19 | \arguments{
20 | \item{model}{the multinomial model, from a \code{\link{multinom}}()-function call (see the \code{\link{nnet}} package)}
21 | 
22 | \item{data}{the data with which the model was estimated}
23 | 
24 | \item{x}{the name of the variable that should be varied}
25 | 
26 | \item{value1}{first value for the difference}
27 | 
28 | \item{value2}{second value for the difference}
29 | 
30 | \item{xvari}{former argument for \code{x} (deprecated).}
31 | 
32 | \item{nsim}{numbers of simulations}
33 | 
34 | \item{seed}{set a seed for replication purposes.}
35 | 
36 | \item{probs}{a vector with two numbers, defining the significance levels. Default to 5\% significance level: \code{c(0.025, 0.975)}}
37 | }
38 | \value{
39 | The function returns a list with several elements. Most importantly the list includes the simulated draws `S`, the simulated predictions `P`, the first differences of the predictions `P_fd`, a data set for plotting `plotdata` the predicted probabilities, and one for the first differences `plotdata_fd`.
40 | }
41 | \description{
42 | Multinomial First Differences Predictions For Two Values (Observed Value Approach)
43 | }
44 | \examples{
45 | library(nnet)
46 | library(MASS)
47 | 
48 | dataset <- data.frame(y = c(rep("a", 10), rep("b", 10), rep("c", 10)),
49 |                       x1 = rnorm(30),
50 |                       x2 = rnorm(30, mean = 1),
51 |                       x3 = sample(1:10, 30, replace = TRUE))
52 | 
53 | mod <- multinom(y ~ x1 + x2 + x3, data = dataset, Hess = TRUE)
54 | 
55 | fdi1 <- mnl_fd2_ova(model = mod, data = dataset,
56 |                     x = "x1",
57 |                     value1 = min(dataset$x1),
58 |                     value2 = max(dataset$x1),
59 |                     nsim = 10)
60 | 
61 | 
62 | 
63 | }
64 | 


--------------------------------------------------------------------------------
/man/mnl_fd_ova.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mnl_fd_ova.R
 3 | \name{mnl_fd_ova}
 4 | \alias{mnl_fd_ova}
 5 | \title{Multinomial First Differences Prediction (Observed Value Approach)}
 6 | \usage{
 7 | mnl_fd_ova(
 8 |   model,
 9 |   data,
10 |   x,
11 |   z,
12 |   z_values,
13 |   xvari,
14 |   scenname,
15 |   scenvalues,
16 |   by = NULL,
17 |   nsim = 1000,
18 |   seed = "random",
19 |   probs = c(0.025, 0.975)
20 | )
21 | }
22 | \arguments{
23 | \item{model}{the multinomial model, from a \code{\link{multinom}}()-function call (see the \code{\link{nnet}} package)}
24 | 
25 | \item{data}{the data with which the model was estimated}
26 | 
27 | \item{x}{the name of the variable that should be varied (the x-axis variable in prediction plots)}
28 | 
29 | \item{z}{define the variable for which you want to compute the difference.}
30 | 
31 | \item{z_values}{determine the two values at which value you want to fix the scenario (\code{z}). The first differences will be computed by subtracting the values of the first supplied scenario from the second one.}
32 | 
33 | \item{xvari}{former argument for \code{x} (deprecated).}
34 | 
35 | \item{scenname}{former argument for \code{z} (deprecated).}
36 | 
37 | \item{scenvalues}{former argument for \code{z_values} (deprecated).}
38 | 
39 | \item{by}{define the steps of \code{x}.}
40 | 
41 | \item{nsim}{numbers of simulations}
42 | 
43 | \item{seed}{set a seed for replication purposes.}
44 | 
45 | \item{probs}{a vector with two numbers, defining the significance levels. Default to 5\% significance level: \code{c(0.025, 0.975)}}
46 | }
47 | \value{
48 | The function returns a list with several elements. Most importantly the list includes the simulated draws `S`, the simulated predictions `P`, and a data set for plotting `plotdata`.
49 | }
50 | \description{
51 | This function predicts values for two different scenarios over a range of
52 | values. It then takes the differences between the different simulations to
53 | return first differences for each value.
54 | }
55 | \details{
56 | The function uses the \code{\link{mnl_pred_ova}} function for each scenario.
57 | The results of these predictions are also returned and can therefore be
58 | easily accessed. If you need predictions for multiple scenarios, you can use
59 | this function to both plot the predictions for each scenario and the
60 | differences between them.
61 | }
62 | \examples{
63 | library(nnet)
64 | library(MASS)
65 | 
66 | dataset <- data.frame(y = c(rep("a", 10), rep("b", 10), rep("c", 10)),
67 |                       x1 = rnorm(30),
68 |                       x2 = rnorm(30, mean = 1),
69 |                       x3 = sample(1:10, 30, replace = TRUE))
70 | 
71 | mod <- multinom(y ~ x1 + x2 + x3, data = dataset, Hess = TRUE)
72 | 
73 | fdif <- mnl_fd_ova(model = mod, data = dataset,
74 |                    x = "x1", z = "x3",
75 |                    z_values = c(min(dataset$x3), max(dataset$x3)),
76 |                    nsim = 10)
77 | 
78 | }
79 | 


--------------------------------------------------------------------------------
/man/mnl_pred_ova.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mnl_pred_ova.R
 3 | \name{mnl_pred_ova}
 4 | \alias{mnl_pred_ova}
 5 | \title{Multinomial Prediction Function (Observed Value Approach)}
 6 | \usage{
 7 | mnl_pred_ova(
 8 |   model,
 9 |   data,
10 |   x,
11 |   by = NULL,
12 |   z = NULL,
13 |   z_value = NULL,
14 |   xvari,
15 |   scenname,
16 |   scenvalue,
17 |   nsim = 1000,
18 |   seed = "random",
19 |   probs = c(0.025, 0.975)
20 | )
21 | }
22 | \arguments{
23 | \item{model}{the multinomial model, from a \code{\link{multinom}}()-function call (see the \code{\link{nnet}} package)}
24 | 
25 | \item{data}{the data with which the model was estimated}
26 | 
27 | \item{x}{the name of the variable that should be varied (the x-axis variable in prediction plots)}
28 | 
29 | \item{by}{define the steps of \code{x}.}
30 | 
31 | \item{z}{if you want to hold a specific variable stable over all scenarios, you can name it here (optional).}
32 | 
33 | \item{z_value}{determine at which value you want to fix the \code{z}.}
34 | 
35 | \item{xvari}{former argument for \code{x} (deprecated).}
36 | 
37 | \item{scenname}{former argument for \code{z} (deprecated).}
38 | 
39 | \item{scenvalue}{former argument for \code{z_value} (deprecated).}
40 | 
41 | \item{nsim}{numbers of simulations}
42 | 
43 | \item{seed}{set a seed for replication purposes.}
44 | 
45 | \item{probs}{a vector with two numbers, defining the significance levels. Default to 5\% significance level: \code{c(0.025, 0.975)}}
46 | }
47 | \value{
48 | The function returns a list with several elements. Most importantly the list includes the simulated draws `S`, the simulated predictions `P`, and a data set for plotting `plotdata`.
49 | }
50 | \description{
51 | This function predicts probabilities for all choices of a multinomial logit
52 | model over a specified span of values.
53 | }
54 | \examples{
55 | library(nnet)
56 | library(MASS)
57 | 
58 | dataset <- data.frame(y = c(rep("a", 10), rep("b", 10), rep("c", 10)),
59 |                       x1 = rnorm(30),
60 |                       x2 = rnorm(30, mean = 1),
61 |                       x3 = sample(1:10, 30, replace = TRUE))
62 | 
63 | mod <- multinom(y ~ x1 + x2 + x3, data = dataset, Hess = TRUE)
64 | 
65 | pred <- mnl_pred_ova(model = mod, data = dataset,
66 |                      x = "x1",
67 |                      nsim = 10)
68 | 
69 | 
70 | }
71 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(MNLpred)
3 | 
4 | test_check("MNLpred")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test_inputvariants.R:
--------------------------------------------------------------------------------
  1 | context("Variations in the input")
  2 | library(MNLpred)
  3 | library(nnet)
  4 | library(MASS)
  5 | 
  6 | mod1 <- multinom(vote ~ egoposition_immigration +
  7 |                    political_interest +
  8 |                    income + gender + ostwest,
  9 |                  data = gles,
 10 |                  Hess = TRUE)
 11 | 
 12 | mod2 <- multinom(vote ~ egoposition_immigration +
 13 |                    political_interest +
 14 |                    income + gender + ostwest,
 15 |                  data = gles)
 16 | 
 17 | # Add factors:
 18 | gles$factor <- as.factor(sample(c("A", "B", "C"), nrow(gles), replace = T))
 19 | 
 20 | mod3 <- multinom(vote ~ egoposition_immigration +
 21 |                    political_interest +
 22 |                    income + gender + ostwest +
 23 |                    factor,
 24 |                  data = gles,
 25 |                  Hess = TRUE)
 26 | 
 27 | # Just one IV
 28 | mod4 <- multinom(vote ~ egoposition_immigration,
 29 |                  data = gles,
 30 |                  Hess = TRUE)
 31 | 
 32 | data_1iv <- data.frame(x1 = c(1:4),
 33 |                        x2 = rep(c("m", "n"), 2),
 34 |                        y = factor(c("a", "b", "c", "d")))
 35 | 
 36 | mod5a_1iv <- multinom(y ~ x1,
 37 |                       data = data_1iv,
 38 |                       Hess = TRUE)
 39 | 
 40 | mod5b_1iv <- multinom(y ~ x2,
 41 |                       data = data_1iv,
 42 |                       Hess = TRUE)
 43 | 
 44 | 
 45 | # Tests
 46 | test_that("mnl_pred_ova() returns two predictions when by = NULL", {
 47 | 
 48 |   expect_equal(mnl_pred_ova(model = mod1,
 49 |                             data = gles,
 50 |                             x = "egoposition_immigration",
 51 |                             by = NULL,
 52 |                             seed = "random", # default
 53 |                             nsim = 2, # faster
 54 |                             probs = c(0.025, 0.975))$nVariation, 2)
 55 | })
 56 | 
 57 | test_that("mnl_pred_ova() returns dataframes with correct number of rows", {
 58 | 
 59 |   pred1 <- mnl_pred_ova(model = mod1,
 60 |                         data = gles,
 61 |                         x = "egoposition_immigration",
 62 |                         by = 1,
 63 |                         seed = "random", # default
 64 |                         nsim = 2, # faster
 65 |                         probs = c(0.025, 0.975))
 66 |   pred2 <- mnl_pred_ova(model = mod1,
 67 |                         data = gles,
 68 |                         x = "egoposition_immigration",
 69 |                         z = "gender",
 70 |                         z_value = 1,
 71 |                         by = 1,
 72 |                         seed = "random", # default
 73 |                         nsim = 2, # faster
 74 |                         probs = c(0.025, 0.975))
 75 |   expect_equal(nrow(pred1$plotdata), length(unique(gles$vote)) * length(seq(min(gles$egoposition_immigration),
 76 |                                                                             max(gles$egoposition_immigration),
 77 |                                                                             by = 1)))
 78 |   })
 79 | 
 80 | test_that("mnl_pred_ova() returns error message when variables contain typos", {
 81 | 
 82 |   expect_error(mnl_pred_ova(model = mod1,
 83 |                         data = gles,
 84 |                         x = "immigration",
 85 |                         by = 1,
 86 |                         seed = "random", # default
 87 |                         nsim = 2, # faster
 88 |                         probs = c(0.025, 0.975)),
 89 |                regexp = "There might be a typo.")
 90 |   })
 91 | 
 92 | test_that("mnl_pred_ova() returns error message when variables contain typos", {
 93 | 
 94 |   expect_error(mnl_pred_ova(model = mod1,
 95 |                             data = gles,
 96 |                             x = "egoposition_immigration",
 97 |                             by = 1,
 98 |                             z = "gndr",
 99 |                             z_value = 1,
100 |                             seed = "random", # default
101 |                             nsim = 2, # faster
102 |                             probs = c(0.025, 0.975)),
103 |                regexp = "There might be a typo.")
104 | })
105 | 
106 | test_that("mnl_pred_ova() returns error message when there is no Hessian matrix", {
107 | 
108 |   expect_error(mnl_pred_ova(model = mod2,
109 |                             data = gles,
110 |                             x = "egoposition_immigration",
111 |                             by = 1,
112 |                             seed = "random", # default
113 |                             nsim = 2, # faster
114 |                             probs = c(0.025, 0.975)),
115 |                regexp = "Hess = TRUE")
116 | })
117 | 
118 | test_that("mnl_pred_ova() stops if non-numeric variables are supplied with the data", {
119 | 
120 |   expect_error(mnl_pred_ova(model = mod3,
121 |                             data = gles,
122 |                             x = "egoposition_immigration",
123 |                             nsim = 2),
124 |                regexp = "Please supply data that consists of numeric values.")
125 | 
126 | })
127 | 
128 | 
129 | # Fixing bug with apply() and one IV (v0.0.6)
130 | test_that("mnl_pred_ov() works with just one iv", {
131 | 
132 |   expect_type(mnl_pred_ova(model = mod4,
133 |                            data = gles,
134 |                            x = "egoposition_immigration",
135 |                            nsim = 2), "list")
136 | })
137 | 
138 | test_that("mnl_pred_ov() does correctly evaluate the class of one iv (negative example)",{
139 | 
140 |   expect_error(mnl_pred_ova(model = mod5b_1iv,
141 |                              data = data_1iv,
142 |                              x = "x2",
143 |                              nsim = 2),
144 |                "Please supply data that consists of numeric values. The package can not handle factor or character variables, yet.*")
145 | })
146 | 
147 | test_that("mnl_pred_ov() does correctly evaluate the class of one iv", {
148 |   skip_on_cran()
149 |   expect_length(mnl_pred_ova(model = mod5a_1iv,
150 |                              data = data_1iv,
151 |                              x = "x1",
152 |                              nsim = 2)$IV,
153 |                 n = 1)
154 | })
155 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/vignettes/OVA_Predictions_For_MNL.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Observed Value Predictions for Multinomial Logit Models"
  3 | author: "Manuel Neumann"
  4 | output: rmarkdown::html_vignette
  5 | bibliography: bibliography.bib
  6 | vignette: >
  7 |   %\VignetteIndexEntry{Observed Value Predictions for Multinomial Logit Models}
  8 |   %\VignetteEngine{knitr::rmarkdown}
  9 |   %\VignetteEncoding{UTF-8}
 10 | ---
 11 | 
 12 | ```{r, include = FALSE}
 13 | knitr::opts_chunk$set(
 14 |   collapse = TRUE,
 15 |   comment = "#>"
 16 | )
 17 | ```
 18 | 
 19 | This package provides functions that make it easy to get plottable predictions from multinomial logit models. The predictions are based on simulated draws of regression estimates from their respective sampling distribution.
 20 | 
 21 | At first I will present the theoretical and statistical background, before using sample data to demonstrate the functions of the package.
 22 | 
 23 | ## The multinomial logit model
 24 | 
 25 | This is a short introduction in the theoretical and statistical background of the multinomial logit.
 26 | 
 27 | Dependent variables can not necessarily be ordered. In political science, for example, the variable of interest is often the individual's vote choice, based on the set of parties that are presented. Of interest is then how somebody comes up with their choice.
 28 | 
 29 | More generally spoken, many questions deal with a nominal outcome variable and we want to test assumptions about the function that may lead to a respective outcome.
 30 | 
 31 | For these questions, the multinomial logit model is often a fitting option. Similar to an ordinary logit model, the multinomial logit model assumes that the probability to choose one over the other outcomes can be modeled with a linear function and a fitting logit link function. The difference of the multinomial logit is that it models the choice of *each* category as a function of the characteristics of the observation.
 32 | 
 33 | In formal terms, we assume $$\Pr(y_i = j|X_i)$$ is a linear combination of $$X_i\beta_j$$, whereby $$\beta_j$$ is a choice specific vector. This means we are interested in the probability that the observed choice of the individual $$y_i$$ is the choice category $$j$$ dependent on characteristics of the observation's characteristics $$X_i$$. Therefore we estimate a choice specific vector $\beta_j$. Since the probability is restricted to be between $$0$$ and $$1$$, we use $$exp(X_i\beta_j)$$ as a fitting link function. Additionally, we bring the exponents into relationship with each other and normalize them by dividing through the sum of them.
 34 | 
 35 | Since we cannot compare all choices against each other, the model is not identified so far. Instead, we have to choose a baseline category and fix it to $$0$$. Therefore we estimate the probability of all choices to be chosen in comparison to the baseline choice.
 36 | 
 37 | Eventually, we end up with the following probability function:
 38 | 
 39 | $$\Pr(y_i|X_i)= \frac{exp(X_i\beta_j)}{\sum^{J}_{m=1}exp(X_i \beta_m)}$$, whereby $$\beta_1 = 0$$
 40 | This is the link function that is used for estimation.
 41 | 
 42 | For a more detailed insight into the multinomial model refer to sources like [these lecture notes by Germán Rodríguez](https://data.princeton.edu/wws509/notes/c6s2).
 43 | 
 44 | ## Using the package
 45 | 
 46 | ### How does the function work?
 47 | 
 48 | As we have seen above, the multinomial logit can be used to get an insight into the probabilities to choose one option out of a set of alternatives. We have also seen that we need a baseline category to identify the model. This is mathematically necessary, but does not come in handy for purposes of interpretation.
 49 | 
 50 | It is far more helpful and easier to understand to come up with predicted probabilities and first differences for values of interest [see e.g., @king2000 for approaches in social sciences]. Based on simulations, this package helps to easily predict probabilities and confidence intervals for each choice category over a specified scenario. The functions use the observed values to compute the predicted probabilities, as is recommended by @hanmer2013.
 51 | 
 52 | The procedure follows the following steps:
 53 | 
 54 | 1. Estimate a multinomial model and save the coefficients and the variance covariance matrix (based on the Hessian-matrix of the model).
 55 | 1. To simulate uncertainty, make $n$ draws of coefficients from a simulated sampling distribution based on the coefficients and the variance covariance matrix.
 56 | 1. Predict probabilities by multiplying the drawn coefficients with a specified scenario (so far these are the observed values).
 57 | 1. Take the mean and the quantiles of the simulated predicted probabilities.
 58 | 
 59 | The presented functions follow these steps. Additionally, they use the so called observed value approach. This means that the "scenario" uses all observed values that informed the model. Therefore the function takes these more detailed steps:
 60 | 
 61 | 1. For all (complete) cases $n$ predictions are computed based on their observed independent values and the $n$ sets of coefficients.
 62 | 1. Next the predicted values of all observations for each simulation are averaged.
 63 | 1. Take the mean and the quantiles of the simulated predicted probabilities (same as above).
 64 | 
 65 | For first differences, the simulated predictions are subtracted from each other.
 66 | 
 67 | To showcase these steps, I present a reproducible example of how the functions can be used.
 68 | 
 69 | ### Example
 70 | 
 71 | The example uses data from the German Longitudinal Election Study (GLES, @rosteutscher2019). 
 72 | 
 73 | The contains 1,000 respondents characteristics and their vote choice.
 74 | 
 75 | For this task, we need the following packages:
 76 | 
 77 | ```{r setup}
 78 | # Required packages
 79 | library(magrittr) # for pipes
 80 | library(nnet) # for the multinom()-function
 81 | library(MASS) # for the multivariate normal distribution
 82 | 
 83 | # The package
 84 | library(MNLpred)
 85 | 
 86 | # Plotting the predicted probabilities:
 87 | library(ggplot2)
 88 | library(scales)
 89 | ```
 90 | 
 91 | Now we load the data:
 92 | 
 93 | ```{r data, echo=TRUE}
 94 | # The data:
 95 | data("gles")
 96 | ```
 97 | 
 98 | The next step is to compute the actual model. The function of the `MNLpred` package is based on models that were estimated with the `multinom()`-function of the `nnet` package. The `multinom()` function is convenient because it does not need transformed datasets. The syntax is very easy and resembles the ordinary regression functions. Important is that the Hessian matrix is returned with `Hess = TRUE`. The matrix is needed to simulate the sampling distribution.
 99 | 
100 | As we have seen above, we need a baseline or reference category for the model to work. Therefore, be aware what your baseline category is. If you use a dependent variable that is of type `character`, the categories will be ordered in alphabetical order. If you have a `factor`at hand, you can define your baseline category, for example with the `relevel()`function.
101 | 
102 | Now, let's estimate the model:
103 | 
104 | ```{r model}
105 | # Multinomial logit model:
106 | mod1 <- multinom(vote ~ egoposition_immigration + 
107 |                    political_interest + 
108 |                    income + gender + ostwest, 
109 |                  data = gles,
110 |                  Hess = TRUE)
111 | ```
112 | The results show the coefficients and standard errors. As we can see, there are five sets of coefficients. They describe the relationship between the reference category (`AfD`) and the vote choices for the parties `CDU/CSU`, `FDP`, `Gruene`, `LINKE`, and `SPD`.
113 | 
114 | ```{r results}
115 | summary(mod1)
116 | ```
117 | 
118 | A first rough review of the coefficients shows that a more restrictive ego-position toward immigration leads to a lower probability of the voters to choose any other party than the AfD. It is hard to evaluate whether the effect is statistically significant and how the probabilities for each choice look like. For this it is helpful to predict the probabilities for certain scenarios and plot the means and confidence intervals for visual analysis.
119 | 
120 | Let's say we are interested in the relationship between the ego-position toward immigration and the probability to choose any of the parties. It would be helpful to plot the predicted probabilities for the span of the positions.
121 | 
122 | ```{r math}
123 | summary(gles$egoposition_immigration)
124 | ```
125 | 
126 | As we can see, the ego positions were recorded on a scale from 0 to 10. Higher numbers represent more restrictive positions.
127 | We pick this score as the x-variable (`x`) and use the `mnl_pred_ova()` function to get predicted probabilities for each position in this range. 
128 | 
129 | The function needs a multinomial logit model (`model`), data (`data`), the variable of interest `x`, the steps for which the probabilities should be predicted (`by`). Additionally, a `seed` can be defined for replication purposes, the numbers of simulations can be defined (`nsim`), and the confidence intervals (`probs`). 
130 | 
131 | If we want to hold another variable stable, we can specify so with `z`and `z_value`. See also the `mnl_fd_ova()` function below.
132 | 
133 | ```{r mnl_pred_ova}
134 | pred1 <- mnl_pred_ova(model = mod1,
135 |                       data = gles,
136 |                       x = "egoposition_immigration",
137 |                       by = 1,
138 |                       seed = "random", # default
139 |                       nsim = 100, # faster
140 |                       probs = c(0.025, 0.975)) # default
141 | ```
142 | 
143 | The function returns a list with several elements. Most importantly, it returns a `plotdata` data set:
144 | 
145 | ```{r return}
146 | pred1$plotdata %>% head()
147 | ```
148 | 
149 | As we can see, it includes the range of the x variable, a mean, a lower, and an upper bound of the confidence interval. Concerning the choice category, the data is in a long format. This makes it easy to plot it with the `ggplot` syntax. The choice category can now easily be used to differentiate the lines in the plot by using `linetype = vote` in the `aes()`. Another option is to use `facet_wrap()` or `facet_grid()` to differentiate the predictions: 
150 | 
151 | ```{r prediction_plot1}
152 | ggplot(data = pred1$plotdata, aes(x = egoposition_immigration, 
153 |                                   y = mean,
154 |                                   ymin = lower, ymax = upper)) +
155 |   geom_ribbon(alpha = 0.1) + # Confidence intervals
156 |   geom_line() + # Mean
157 |   facet_wrap(.~ vote, scales = "free_y", ncol = 2) +
158 |   scale_y_continuous(labels = percent_format(accuracy = 1)) + # % labels
159 |   scale_x_continuous(breaks = c(0:10)) +
160 |   theme_bw() +
161 |   labs(y = "Predicted probabilities",
162 |        x = "Ego-position toward immigration") # Always label your axes ;)
163 | ```
164 | 
165 | If we want first differences between two scenarios, we can use the function `mnl_fd2_ova()`. The function takes similar arguments as the function above, but now the values for the scenarios of interest have to be supplied. Imagine we want to know what difference it makes to position oneself on the most tolerant or most restrictive end of the `egoposition_immigration` scale. This can be done as follows:
166 | 
167 | ```{r static_fd}
168 | fdif1 <- mnl_fd2_ova(model = mod1,
169 |                      data = gles,
170 |                      x = "egoposition_immigration",
171 |                      value1 = min(gles$egoposition_immigration),
172 |                      value2 = max(gles$egoposition_immigration),
173 |                      nsim = 100)
174 | ```
175 | 
176 | The first differences can then be depicted in a graph.
177 | 
178 | ```{r static_fd_plot}
179 | ggplot(fdif1$plotdata_fd, aes(x = categories, 
180 |                               y = mean,
181 |                               ymin = lower, ymax = upper)) +
182 |   geom_pointrange() +
183 |   geom_hline(yintercept = 0) +
184 |   scale_y_continuous(labels = percent_format()) +
185 |   theme_bw() +
186 |   labs(y = "Predicted probabilities",
187 |        x = "Party vote")
188 | ```
189 | 
190 | 
191 | We are often not only interested in the static difference, but the difference across a span of values, given a difference in a second variable. This is especially helpful when we look at dummy variables. For example, we could be interested in the effect of `gender` on the vote decision over the different ego-positions. With the `mnl_fd_ova()` function, we can predict the probabilities for two scenarios and subtract them. The function returns the differences and the confidence intervals of the differences. The different scenarios can be held stable with `z` and the `z_values`. `z_values` takes a vector of two numeric values. These values are held stable for the variable that is named in `z`. 
192 | 
193 | ```{r first_diffferences_prediction}
194 | fdif2 <- mnl_fd_ova(model = mod1,
195 |                     data = gles,
196 |                     x = "egoposition_immigration",
197 |                     by = 1,
198 |                     z = "gender",
199 |                     z_values = c(0,1),
200 |                     nsim = 100)
201 | ```
202 | As before, the function returns a list including a data set that can be used to plot the differences.
203 | 
204 | ```{r fd_return}
205 | fdif2$plotdata_fd %>% head()
206 | ```
207 | 
208 | Since the function calls the `mnl_pred_ova()` function internally, it also returns the output of the two predictions in the list element `Prediction1` and `Prediction2`. The plot data for the predictions is already bound together row wise to easily plot the predicted probabilities.
209 | 
210 | ```{r prediction_plot2}
211 | ggplot(data = fdif2$plotdata, aes(x = egoposition_immigration, 
212 |                                   y = mean,
213 |                                   ymin = lower, ymax = upper,
214 |                                   group = as.factor(gender),
215 |                                   linetype = as.factor(gender))) +
216 |   geom_ribbon(alpha = 0.1) +
217 |   geom_line() +
218 |   facet_wrap(. ~ vote, scales = "free_y", ncol = 2) +
219 |   scale_y_continuous(labels = percent_format(accuracy = 1)) + # % labels
220 |   scale_x_continuous(breaks = c(0:10)) +
221 |   scale_linetype_discrete(name = "Gender",
222 |                           breaks = c(0, 1),
223 |                           labels = c("Male", "Female")) +
224 |   theme_bw() +
225 |   labs(y = "Predicted probabilities",
226 |        x = "Ego-position toward immigration") # Always label your axes ;)
227 | ```
228 | 
229 | As we can see, the differences between `female` and `male` differ, depending on the party and ego-position. So let's take a look at the differences:
230 | 
231 | ```{r first_differences_plot}
232 | ggplot(data = fdif2$plotdata_fd, aes(x = egoposition_immigration, 
233 |                                      y = mean,
234 |                                   ymin = lower, ymax = upper)) +
235 |   geom_ribbon(alpha = 0.1) +
236 |   geom_line() +
237 |   geom_hline(yintercept = 0) +
238 |   facet_wrap(. ~ vote, ncol = 3) +
239 |   scale_y_continuous(labels = percent_format(accuracy = 1)) + # % labels
240 |   scale_x_continuous(breaks = c(0:10)) +
241 |   theme_bw() +
242 |   labs(y = "Predicted probabilities",
243 |        x = "Ego-position toward immigration") # Always label your axes ;)
244 | ```
245 | 
246 | We can see that the differences are for some parties at no point statistically significant from 0.
247 | 
248 | 
249 | ## Conclusion
250 | Multinomial logit models are important to model nominal choices. They are, however, restricted by being in need of a baseline category. Additionally, the log-character of the estimates makes it difficult to interpret them in meaningful ways. Predicting probabilities for all choices for scenarios, based on the observed data provides much more insight. The functions of this package provide easy to use functions that return data that can be used to plot predicted probabilities. The function uses a model from the `multinom()` function and uses the observed value approach and a supplied scenario to predict values over the range of fitting values. The functions simulate sampling distributions and therefore provide meaningful confidence intervals. `mnl_pred_ova()` can be used to predict probabilities for a certain scenario. `mnl_fd_ova()` can be used to predict probabilities for two scenarios and their first differences.
251 | 
252 | ## Acknowledgment
253 | 
254 | My code is inspired by the method courses in the [Political Science master's program  at the University of Mannheim](https://www.sowi.uni-mannheim.de/en/academics/prospective-students/ma-in-political-science/)(cool place, check it out!). The skeleton of the code is based on a tutorial taught by [Marcel Neunhoeffer](https://www.marcel-neunhoeffer.com/) (lecture: "Advanced Quantitative Methods" by [Thomas Gschwend](https://www.sowi.uni-mannheim.de/gschwend/)).
255 | 
256 | ## References
257 | 


--------------------------------------------------------------------------------
/vignettes/bibliography.bib:
--------------------------------------------------------------------------------
 1 | # Bibliography for vignette
 2 | 
 3 | @article{hanmer2013,
 4 | 	title = {Behind the {Curve}: {Clarifying} the {Best} {Approach} to {Calculating} {Predicted} {Probabilities} and {Marginal} {Effects} from {Limited} {Dependent} {Variable} {Models}},
 5 | 	volume = {57},
 6 | 	issn = {00925853},
 7 | 	shorttitle = {Behind the {Curve}},
 8 | 	doi = {10.1111/j.1540-5907.2012.00602.x},
 9 | 	number = {1},
10 | 	journal = {American Journal of Political Science},
11 | 	author = {Hanmer, Michael J. and Ozan Kalkan, Kerem},
12 | 	month = jan,
13 | 	year = {2013},
14 | 	pages = {263--277}
15 | }
16 | 
17 | 
18 | @article{king2000,
19 | 	title = {Making the {Most} of {Statistical} {Analyses}: {Improving} {Interpretation} and {Presentation}},
20 | 	volume = {44},
21 | 	doi = {10.2307/2669316},
22 | 	number = {2},
23 | 	journal = {American Journal of Political Science},
24 | 	author = {King, Gary and Tomz, Michael and Wittenberg, Jason},
25 | 	year = {2000},
26 | 	pages = {341--355}
27 | }
28 | 
29 | 
30 | 
31 | @misc{rosteutscher2019,
32 | 	title = {Rolling {Cross}-{Section}-{Wahlkampfstudie} mit {Nachwahl}-{Panelwelle} ({GLES} 2017)},
33 | 	publisher = {GESIS Datenarchiv},
34 | 	author = {Roßteutscher, Sigrid and Schoen, Harald and Schmitt-Beck, Rüdiger and Wolf, Christof and Staudt, Alexander},
35 | 	year = {2019},
36 | 	doi = {10.4232/1.13213},
37 | 	note = {version: ZA6803 Datenfile Version 4.0.1}
38 | }
39 | 


--------------------------------------------------------------------------------