├── NAMESPACE ├── .Rbuildignore ├── .gitignore ├── data └── policyTable.RData ├── autoPricing.Rproj ├── man ├── .plotRatingFactor.Rd ├── addVariable.Rd ├── .addVariable.Rd ├── policyTable.Rd ├── .getCoeffs.Rd ├── getCoeffs.Rd ├── .consistCorrection.Rd ├── .forwardBackSig.Rd ├── .getVarSignificance.Rd ├── .plotTimeConsist.Rd ├── plotTimeConsist.Rd ├── .modelFamilySpec.Rd ├── forwardBackStepAIC.Rd └── stepIC.Rd ├── NEWS ├── R ├── policyTable.R ├── addVariable.R ├── modelFamilySpec.R ├── autoPricing-package.R ├── getCoeffs.R ├── getVarSignificance.R ├── plotRatingFactor.R ├── forwardBackSig.R ├── consistCorrection.R ├── plotTimeConsist.R └── stepIC.R ├── DESCRIPTION ├── README.md └── LICENSE /NAMESPACE: -------------------------------------------------------------------------------- 1 | export(stepIC) -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | -------------------------------------------------------------------------------- /data/policyTable.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MangoTheCat/autoPricing/master/data/policyTable.RData -------------------------------------------------------------------------------- /autoPricing.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | -------------------------------------------------------------------------------- /man/.plotRatingFactor.Rd: -------------------------------------------------------------------------------- 1 | \name{.plotRatingFactor} 2 | \alias{.plotRatingFactor} 3 | \title{This is a function to plot rating factors...} 4 | \usage{.plotRatingFactor(ratingFactor="BonusMalus", theCoefficients=myCoeffs)} 5 | \description{This is a function to plot rating factors} 6 | \value{There is no output to this function} 7 | \note{This is an internal function} 8 | \keyword{plotting} 9 | \keyword{rating} 10 | \keyword{factors} 11 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}} 12 | \arguments{\item{\code{ratingFactor}}{a character string of the rating factor to be plotted} 13 | \item{\code{theCoefficients}}{output from the \code{getCoeffs()} function}} 14 | -------------------------------------------------------------------------------- /man/addVariable.Rd: -------------------------------------------------------------------------------- 1 | \name{addVariable} 2 | \alias{addVariable} 3 | \title{Function to create a formula which adds a variable to an input formula...} 4 | \usage{addVariable(theFormula, addVar)} 5 | \description{Function to create a formula which adds a variable to an input formula} 6 | \value{The output is a formula object which is the input formula added to the variable} 7 | \note{This should really be an internal function} 8 | \keyword{formula} 9 | \keyword{add} 10 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}} 11 | \arguments{\item{\code{theFormula}}{this is the formula that the variable should be added to} 12 | \item{\code{addVar}}{this is the variable that you should add}} 13 | -------------------------------------------------------------------------------- /man/.addVariable.Rd: -------------------------------------------------------------------------------- 1 | \name{.addVariable} 2 | \alias{.addVariable} 3 | \title{Function to create a formula which adds a variable to an input formula...} 4 | \usage{.addVariable(theFormula, addVar)} 5 | \description{Function to create a formula which adds a variable to an input formula} 6 | \value{The output is a formula object which is the input formula added to the variable} 7 | \note{This should really be an internal function} 8 | \keyword{formula} 9 | \keyword{add} 10 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}} 11 | \arguments{\item{\code{theFormula}}{this is the formula that the variable should be added to} 12 | \item{\code{addVar}}{this is the variable that you should add}} 13 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | Date: 12/01/2012 2 | Author: Chibisi Chima-Okereke 3 | Package: autoPricing 4 | Version: 0.4 5 | # 6 | 7 | The policyTable dataset is added to allow users to play with the functions 8 | 9 | 10 | Version: 0.1 11 | # 12 | This is the first iteration of the package 13 | 14 | Version 0.2 15 | # 16 | Some basic alterations to make the code more robust 17 | 18 | Version 0.3 19 | # 20 | Added the function consistCorrection.R to rebalance for variables that are initial significant but not consistent 21 | 22 | Version 0.4 23 | # 24 | 1. Now using the update() function to iterate through the models 25 | 2. Now Chi-Squared, F-Test, are included in the test options 26 | 3. Now using cat() instead of print() for comments 27 | 4. Using AIC(), BIC() functions in addition to extractAIC() function 28 | -------------------------------------------------------------------------------- /man/policyTable.Rd: -------------------------------------------------------------------------------- 1 | \name{policyTable} 2 | \title{This is a simulated dataset, it is a modified version of that given by Kaas, R., Goovaerts, M., 3 | Dhaene, J., Denuit, M. in Modern Actuarial Risk Theory, and can be found at 4 | \url{http://www1.fee.uva.nl/ke/act/people/kaas/Cars.txt}} 5 | \description{This data will be included the package} 6 | \alias{autoPricing} 7 | \docType{data} 8 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}} 9 | \references{the data is an altered version of the set used in KASS et al Modern 10 | Actuarial Risk Theory located at \url{http://www1.fee.uva.nl/ke/act/people/kaas/Cars.txt}} 11 | \keyword{glm,} 12 | \keyword{actuarial} 13 | \keyword{pricing,} 14 | \keyword{automating,} 15 | \keyword{data} 16 | \seealso{\code{\link{stepIC}}} 17 | -------------------------------------------------------------------------------- /R/policyTable.R: -------------------------------------------------------------------------------- 1 | #' This data will be included the package 2 | #' 3 | #' @name policyTable 4 | #' @aliases autoPricing 5 | #' @docType data 6 | #' @title This is a simulated dataset, it is a modified version of that given by Kaas, R., Goovaerts, M., 7 | #' Dhaene, J., Denuit, M. in Modern Actuarial Risk Theory, and can be found at 8 | #' \url{http://www1.fee.uva.nl/ke/act/people/kaas/Cars.txt} 9 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com} 10 | #' @references the data is an altered version of the set used in KASS et al Modern 11 | #' Actuarial Risk Theory located at \url{http://www1.fee.uva.nl/ke/act/people/kaas/Cars.txt} 12 | #' @keywords glm, actuarial pricing, automating, data 13 | #' @seealso \code{\link{stepIC}} 14 | NULL 15 | -------------------------------------------------------------------------------- /man/.getCoeffs.Rd: -------------------------------------------------------------------------------- 1 | \name{.getCoeffs} 2 | \alias{.getCoeffs} 3 | \title{Function for formatting coefficients...} 4 | \usage{.getCoeffs(myExposure="Exposure", myGlm=theGlm, theForm, sourceData)} 5 | \description{Function for formatting coefficients} 6 | \value{The output is a nicely formatted table of the coefficients} 7 | \note{This formula is used extensively in the \code{stepIC} function} 8 | \keyword{glm} 9 | \keyword{coefficients} 10 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}} 11 | \arguments{\item{\code{myExposure}}{the name of the column of the dataset that has the exposure (in years)} 12 | \item{\code{myGlm}}{the glm object} 13 | \item{\code{theForm}}{should be the formula if theGlm$formula is NULL} 14 | \item{\code{sourceData}}{should be the source data if theGlm$data is NULL}} 15 | -------------------------------------------------------------------------------- /man/getCoeffs.Rd: -------------------------------------------------------------------------------- 1 | \name{getCoeffs} 2 | \alias{getCoeffs} 3 | \title{Function for formatting coefficients...} 4 | \usage{getCoeffs(myExposure="Exposure", myGlm=theGlm, theForm, sourceData)} 5 | \description{Function for formatting coefficients} 6 | \value{The output is a nicely formatted table of the coefficients} 7 | \note{This formula is used extensively in the \code{forwardBackStepAIC} function} 8 | \keyword{glm} 9 | \keyword{coefficients} 10 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}} 11 | \arguments{\item{\code{myExposure}}{the name of the column of the dataset that has the exposure (in years)} 12 | \item{\code{myGlm}}{the glm object} 13 | \item{\code{theForm}}{should be the formula if theGlm$formula is NULL} 14 | \item{\code{sourceData}}{should be the source data if theGlm$data is NULL}} 15 | -------------------------------------------------------------------------------- /man/.consistCorrection.Rd: -------------------------------------------------------------------------------- 1 | \name{.consistCorrection} 2 | \alias{.consistCorrection} 3 | \title{This function rebalances a raw variable that is significant but not consistent...} 4 | \usage{.consistCorrection(glm1=currentModel, currVar=currentVar)} 5 | \description{This function rebalances a raw variable that is significant but not consistent} 6 | \value{The output is a list of revised glm and a character either \code{"Consistent"} or \code{"Not Consistent"}} 7 | \note{This is an internal function} 8 | \keyword{glm} 9 | \keyword{consistency} 10 | \keyword{significance} 11 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}} 12 | \arguments{\item{\code{glm1}}{the glm that contains the rating factor to be analysed output from the \code{.forwardBackSig} function} 13 | \item{\code{currVar}}{the rating factor to be analysed}} 14 | -------------------------------------------------------------------------------- /R/addVariable.R: -------------------------------------------------------------------------------- 1 | #' Function to create a formula which adds a variable to an input formula 2 | #' 3 | #' 4 | #' @param \code{theFormula} this is the formula that the variable should be added to 5 | #' @param \code{addVar} this is the variable that you should add 6 | #' @return The output is a formula object which is the input formula added to the variable 7 | #' @note This should really be an internal function 8 | #' 9 | #' @keywords formula add 10 | #' @include autoPricing-package.R 11 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com} 12 | # 13 | .addVariable <- function(theFormula, addVar){ 14 | theFormula <- as.character(theFormula) 15 | theRHS <- paste(c(theFormula[3], addVar), collapse = " + ", sep = "") 16 | theFormula <- as.formula(paste(theFormula[2], " ~ ", theRHS, sep = "")) 17 | return(theFormula) 18 | } 19 | -------------------------------------------------------------------------------- /man/.forwardBackSig.Rd: -------------------------------------------------------------------------------- 1 | \name{.forwardBackSig} 2 | \alias{.forwardBackSig} 3 | \title{Master function to find out if the variable is significant when you add a variable...} 4 | \usage{.forwardBackSig(theModel, currVar, alg="forward", ...)} 5 | \description{Master function to find out if the variable is significant when you add a variable} 6 | \value{The model that will be used and whether the \code{currVar} was significant or not} 7 | \note{This is an internal function and should not be used by the user} 8 | \keyword{glm} 9 | \keyword{forward} 10 | \keyword{backward} 11 | \seealso{\code{\link{glm}}} 12 | \seealso{\code{\link{stepIC}}} 13 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}} 14 | \arguments{\item{\code{theModel}}{this is the candidate model} 15 | \item{\code{currVar}}{this is the current variable being investigated} 16 | \item{\code{alg}}{this is the algoirthm being used either forward or backward}} 17 | -------------------------------------------------------------------------------- /man/.getVarSignificance.Rd: -------------------------------------------------------------------------------- 1 | \name{.getVarSignificance} 2 | \alias{.getVarSignificance} 3 | \title{Function to get the significance or not of a variable when given the the appropriate glms...} 4 | \usage{.getVarSignificance(aGlm1=glm1, aGlm2=glm2, selType="AIC")} 5 | \description{Function to get the significance or not of a variable when given the the appropriate glms} 6 | \value{The output is logical, TRUE means that the variable is significant, FALSE means that the variable is not significant} 7 | \note{This is an internal function} 8 | \keyword{glm} 9 | \keyword{consistency} 10 | \keyword{significance} 11 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}} 12 | \arguments{\item{\code{aGlm1}}{the glm that contains the rating factor to be analysed} 13 | \item{\code{aGlm2}}{the glm without the rating factor to be analysed} 14 | \item{\code{selType}}{This is the selection criterion type, either "AIC", "BIC", "Chisq", or "F"}} 15 | -------------------------------------------------------------------------------- /man/.plotTimeConsist.Rd: -------------------------------------------------------------------------------- 1 | \name{.plotTimeConsist} 2 | \alias{.plotTimeConsist} 3 | \title{Function for plotting consistency...} 4 | \usage{.plotTimeConsist(ratingFactor="BonusMalus", timeDef="Year", 5 | theGlm=myGlm, constThresh=60, expVar="Exposure", showPlots=TRUE)} 6 | \description{Function for plotting consistency} 7 | \value{The output is a numeric denoting the median consistency of the rating factor with year} 8 | \note{...} 9 | \seealso{\code{\link{stepIC}}} 10 | \keyword{consistency} 11 | \keyword{glm} 12 | \keyword{time} 13 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}} 14 | \arguments{\item{\code{ratingFactor}}{a character string denoting the rating factor to carry out time consistency analysis} 15 | \item{\code{timeDef}}{a character string denoting the Year variable} 16 | \item{\code{theGlm}}{a glm object to be analysed} 17 | \item{\code{constThresh}}{a numeric denoting the threshold percentage of consistency} 18 | \item{\code{expVar}}{set this to the exposure column name} 19 | \item{\code{showPlots}}{logical for whether to plot the variables or not}} 20 | -------------------------------------------------------------------------------- /man/plotTimeConsist.Rd: -------------------------------------------------------------------------------- 1 | \name{plotTimeConsist} 2 | \alias{plotTimeConsist} 3 | \title{Function for plotting consistency...} 4 | \usage{plotTimeConsist(ratingFactor="BonusMalus", timeDef="Year", theGlm=myGlm, constThresh=60, expVar="Exposure", showPlots=TRUE)} 5 | \description{Function for plotting consistency} 6 | \value{The output is a numeric denoting the median consistency of the rating factor with year} 7 | \note{...} 8 | \seealso{\code{\link{forwardBackStepAIC}}} 9 | \keyword{consistency} 10 | \keyword{glm} 11 | \keyword{time} 12 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}} 13 | \arguments{\item{\code{ratingFactor}}{a character string denoting the rating factor to carry out time consistency analysis} 14 | \item{\code{timeDef}}{a character string denoting the Year variable} 15 | \item{\code{theGlm}}{a glm object to be analysed} 16 | \item{\code{constThresh}}{a numeric denoting the threshold percentage of consistency} 17 | \item{\code{expVar}}{set this to the exposure column name} 18 | \item{\code{showPlots}}{logical for whether to plot the variables or not}} 19 | -------------------------------------------------------------------------------- /man/.modelFamilySpec.Rd: -------------------------------------------------------------------------------- 1 | \name{.modelFamilySpec} 2 | \alias{.modelFamilySpec} 3 | \title{Function to specify the model family/link distributions/functions...} 4 | \usage{.modelFamilySpec(distr="poisson", myLink="log", theta)} 5 | \description{Function to specify the model family/link distributions/functions} 6 | \value{The output is an object of class "family"} 7 | \note{This is just a wrapper function for the family() function} 8 | \keyword{addition} 9 | \keyword{arithmetic} 10 | \references{http://finzi.psych.upenn.edu/R/library/stats/html/family.html} 11 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}} 12 | \arguments{\item{\code{distr}}{character string denoting the distribution to use options are "poisson", 13 | "binomial", "gaussian", "Gamma", "inverse.gaussian", "quasi", "quasibinomial", 14 | "quasipoisson", "negative.binomial"} 15 | \item{\code{myLink}}{character string denoting the link function, e.g. "identity", 16 | "log", "logit", "inverse", "1/mu^2"} 17 | \item{\code{theta}}{should be set to the value of theta if the "negative.binomal" distribution is selected}} 18 | \examples{modelFamilySpec("poisson", "log")} 19 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: autoPricing 2 | Maintainer: Chibisi Chima-Okereke 3 | License: GPL-2 4 | Title: This package is for carrying out automated glm pricing analysis 5 | for frequency and severity models 6 | ZipData: yes 7 | Type: Package 8 | LazyLoad: true 9 | Author: Chibisi Chima-Okereke 10 | Import: MASS, reshape, fBasics, gridExtra, multcomp 11 | Description: The purpose of this package is to carry out automated GLM 12 | analysis for actuarial pricing. The idea is that it uses the 13 | forward or backward algorithms and information criteria to obtain a 14 | frequency and severity model. It allows tables to be specified that 15 | map the aggregation structure of the variable from those currently 16 | specified to a less granular set of categories. This allows those 17 | explanatory variables to be dynamically re-specified during the 18 | pricing process if the native variable does not improve the fit of 19 | the model. The re-specification of the variable categories is done 20 | if the native variable does not decrease the information criterion; 21 | the mapping table and a Tukey test on the categorical variable is 22 | used to aggregate categories that are statistically and logically 23 | similar. 24 | Version: 0.4 25 | Extends: 26 | sp 27 | URL: http://www.mango-solutions.com 28 | Depends: 29 | MASS, 30 | reshape, 31 | fBasics, 32 | gridExtra, 33 | multcomp 34 | Collate: 35 | 'stepIC.R' 36 | 'plotTimeConsist.R' 37 | 'plotRatingFactor.R' 38 | 'modelFamilySpec.R' 39 | 'getVarSignificance.R' 40 | 'getCoeffs.R' 41 | 'forwardBackSig.R' 42 | 'policyTable.R' 43 | 'consistCorrection.R' 44 | 'autoPricing-package.R' 45 | 'addVariable.R' 46 | -------------------------------------------------------------------------------- /R/modelFamilySpec.R: -------------------------------------------------------------------------------- 1 | #' Function to specify the model family/link distributions/functions 2 | #' 3 | #' 4 | #' 5 | #' @param \code{distr} character string denoting the distribution to use options are "poisson", 6 | #' "binomial", "gaussian", "Gamma", "inverse.gaussian", "quasi", "quasibinomial", 7 | #' "quasipoisson", "negative.binomial" 8 | #' @param \code{myLink} character string denoting the link function, e.g. "identity", 9 | #' "log", "logit", "inverse", "1/mu^2" 10 | #' @param \code{theta} should be set to the value of theta if the "negative.binomal" distribution is selected 11 | #' @return The output is an object of class "family" 12 | #' @note This is just a wrapper function for the family() function 13 | #' 14 | #' @keywords addition arithmetic 15 | #' @references 16 | #' http://finzi.psych.upenn.edu/R/library/stats/html/family.html 17 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com} 18 | #' @examples 19 | #' modelFamilySpec("poisson", "log") 20 | # 21 | .modelFamilySpec <- function(distr = "poisson", myLink = "log", theta = NULL){ 22 | 23 | # theta is only specified for negative binomial distribution 24 | if(distr == "negative.binomial"){ 25 | if(is.null(theta)){stop("Cannot proceed without specifying theta")} 26 | myTheta <- theta 27 | myFamily <- lapply(myTheta, distr, link = "log")[[1]] 28 | 29 | }else{ 30 | 31 | myFamily <- lapply(myLink, distr)[[1]] 32 | } 33 | return(myFamily) 34 | } 35 | 36 | # Examples of specifications for models 37 | #binomial(link = "logit") 38 | #gaussian(link = "identity") 39 | #Gamma(link = "inverse") 40 | #inverse.gaussian(link = "1/mu^2") 41 | #poisson(link = "log") 42 | #quasi(link = "identity", variance = "constant") 43 | #quasibinomial(link = "logit") 44 | #quasipoisson(link = "log") 45 | # This next one require MASS package 46 | #negative.binomial(theta = stop("'theta' must be specified"), link = "log") 47 | -------------------------------------------------------------------------------- /R/autoPricing-package.R: -------------------------------------------------------------------------------- 1 | #' This package is for carrying out automated glm pricing analysis for frequency and severity 2 | #' 3 | #' \tabular{ll}{ 4 | #' Package: \tab autoPricing\cr 5 | #' Type: \tab Package\cr 6 | #' Version: \tab 0.4\cr 7 | #' Date: \tab 2011-01-12\cr 8 | #' License: \tab GPL (>= 2)\cr 9 | #' LazyLoad: \tab yes\cr 10 | #' } 11 | #' 12 | #' The purpose of this package is to carry out automated GLM analysis for actuarial 13 | #' pricing. The idea is that it uses the forward or backward algorithms and information criteria to 14 | #' obtain a frequency and severity model. It allows tables to be specified that map the aggregation 15 | #' structure of the variable from those currently specified to a less granular set of categories. 16 | #' This allows those explanatory variables to be dynamically re-specified during the pricing process 17 | #' if the native variable does not improve the fit of the model. The re-specification of the 18 | #' variable categories is done if the native variable does not decrease the information criterion; 19 | #' the mapping table and a Tukey test on the categorical variable is used to aggregate categories 20 | #' that are statistically and logically similar. 21 | #' 22 | #' 23 | #' The \code{\link{stepIC}} function is the main function for this package, it carrys out the 24 | #' step information criterion process. 25 | #' 26 | #' 27 | #' @name autoPricing-package 28 | #' @aliases autoPricing 29 | #' @docType package 30 | #' @title This package is for carrying out automated glm pricing analysis for frequency and severity 31 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com} 32 | #' @keywords glm, actuarial pricing, automating 33 | #' @include addVariable.R 34 | #' @include consistCorrection.R 35 | #' @include policyTable.R 36 | #' @include forwardBackSig.R 37 | #' @include getCoeffs.R 38 | #' @include getVarSignificance.R 39 | #' @include modelFamilySpec.R 40 | #' @include plotRatingFactor.R 41 | #' @include plotTimeConsist.R 42 | #' @include stepIC.R 43 | #' @seealso \code{\link{stepIC}} 44 | #' 45 | -------------------------------------------------------------------------------- /R/getCoeffs.R: -------------------------------------------------------------------------------- 1 | #' Function for formatting coefficients 2 | #' 3 | #' @param \code{myExposure} the name of the column of the dataset that has the exposure (in years) 4 | #' @param \code{myGlm} the glm object 5 | #' @param \code{theForm} should be the formula if theGlm$formula is NULL 6 | #' @param \code{sourceData} should be the source data if theGlm$data is NULL 7 | #' @param \code{aCurrVar} this is the current variable being analysed 8 | #' @return The output is a nicely formatted table of the coefficients 9 | #' @note This formula is used extensively in the \code{stepIC} function 10 | #' 11 | #' @keywords glm coefficients 12 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com} 13 | # 14 | .getCoeffs <- function(myExposure = "Exposure", myGlm = theGlm, theForm = NULL, sourceData = NULL, aCurrVar){ 15 | 16 | theData <- myGlm$data 17 | 18 | if(is.null(myGlm$formula)){myGlm$formula <- theForm} 19 | if(is.null(myGlm$data)){theData <- sourceData} 20 | 21 | 22 | myVars <- gsub(" ", "", unlist(strsplit(as.character(myGlm$formula)[3], "\\+"))) 23 | 24 | getMyVars <- function(numVars = 1){ 25 | return(myVars[which(lapply(strsplit(myVars, ":"), length) == numVars)]) 26 | } 27 | 28 | singleVars <- getMyVars(numVars = 1) 29 | singleVars <- singleVars[singleVars != 1] 30 | singleVars <- singleVars[singleVars != 0] 31 | 32 | # 33 | # Creating the newdata 34 | restOfFactors <- singleVars[singleVars != aCurrVar] 35 | newData <- lapply(restOfFactors, function(x){ levels(theData[,x])[1] }) 36 | names(newData) <- restOfFactors 37 | newData[[aCurrVar]] <- levels(theData[, aCurrVar]) 38 | newData <- data.frame(do.call(cbind, newData)) 39 | newData[,myExposure] <- 1 40 | 41 | # These are the predicted Coefficients 42 | aCoeffs <- predict(myGlm, newdata = newData, type = "term", se.fit = TRUE) 43 | bCoeffs <- data.frame("Estimate" = aCoeffs$fit[, aCurrVar], "StdError" = aCoeffs$se.fit[, aCurrVar]) 44 | # Re-basing the fitted Coefficient 45 | bCoeffs[,1] <- bCoeffs[, 1] - bCoeffs[1, 1] 46 | bCoeffs <- data.frame("Categories" = levels(theData[,aCurrVar]), bCoeffs) 47 | #names(bCoeffs)[1] <- "Categories" 48 | 49 | # This is to get the percentage exposure 50 | tabFormula <- as.formula(paste(myExposure, "~", aCurrVar)) 51 | propExposure <- xtabs(tabFormula, data = theData[,c(aCurrVar, myExposure)]) 52 | propExposure <- 100*propExposure/sum(propExposure) 53 | bCoeffs$PercentageExposure <- propExposure 54 | 55 | return(bCoeffs) 56 | 57 | } 58 | -------------------------------------------------------------------------------- /R/getVarSignificance.R: -------------------------------------------------------------------------------- 1 | #' Function to get the significance or not of a variable when given the the appropriate glms 2 | #' 3 | #' @param \code{aGlm1} the glm that contains the rating factor to be analysed 4 | #' @param \code{aGlm2} the glm without the rating factor to be analysed 5 | #' @param \code{selType} This is the selection criterion type, either "AIC", "BIC", "Chisq", or "F" 6 | #' @return The output is logical, TRUE means that the variable is significant, FALSE means that the variable is not significant 7 | #' @note This is an internal function 8 | #' 9 | #' @keywords glm consistency significance 10 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com} 11 | 12 | .getVarSignificance <- function(aGlm1 = glm1, aGlm2 = glm2, selType = "AIC"){ 13 | 14 | handicap <- get("handicap", envir = funcEnv) 15 | 16 | if(selType == "AIC"){ 17 | ICglm1 <- extractAIC(aGlm1)[2] 18 | ICglm2 <- extractAIC(aGlm2)[2] 19 | cat("AIC is being used for model selection\n\n") 20 | cat("Factor Model: ", paste(as.character(aGlm1$formula)[c(2,1,3)], collapse = " "), "\n") 21 | cat(paste("AIC of candidate model is ", round(ICglm1), "\n", sep = "")) 22 | cat("Sub model: ", paste(as.character(aGlm2$formula)[c(2,1,3)], collapse = " "), "\n") 23 | cat(paste("AIC of sub model is ", round(ICglm2), "\n\n\n", sep = "")) 24 | output <- round(ICglm1) + handicap < round(ICglm2) 25 | } 26 | 27 | if(selType == "BIC"){ 28 | ICglm1 <- BIC(aGlm1) 29 | ICglm2 <- BIC(aGlm2) 30 | cat("BIC is being used for model selection\n\n") 31 | cat("Factor Model: ", paste(as.character(aGlm1$formula)[c(2,1,3)], collapse = " "), "\n") 32 | cat(paste("BIC of candidate model is ", round(ICglm1), "\n\n", sep = "")) 33 | cat("Sub model: ", paste(as.character(aGlm2$formula)[c(2,1,3)], collapse = " "), "\n") 34 | cat(paste("BIC of sub model is ", round(ICglm2), "\n\n\n", sep = "")) 35 | output <- round(ICglm1) + handicap < round(ICglm2) 36 | } 37 | 38 | if(selType == "Chisq"){ 39 | anovaTable <- anova(aGlm2, aGlm1, test = "Chisq") 40 | cat("Chi-Squared Test is being used for model selection\n\n") 41 | cat("ANOVA output for Chi-Squared Test comparing the models\n\n") 42 | print(anovaTable) 43 | output <- (anovaTable[2 ,"Pr(>Chi)"] < 0.05 & anovaTable[2, "Deviance"] > 0) 44 | } 45 | 46 | if(selType == "F"){ 47 | anovaTable <- anova(aGlm2, aGlm1, test = "F") 48 | cat("F-Test is being used for model selection\n\n") 49 | cat("ANOVA output for F-Test comparing the models\n\n") 50 | print(anovaTable) 51 | output <- (anovaTable[2 ,"Pr(>F)"] < 0.05 & anovaTable[2, "Deviance"] > 0) 52 | } 53 | 54 | return(output) 55 | 56 | } 57 | -------------------------------------------------------------------------------- /R/plotRatingFactor.R: -------------------------------------------------------------------------------- 1 | #' This is a function to plot rating factors 2 | #' 3 | #' @param \code{ratingFactor} a character string of the rating factor to be plotted 4 | #' @param \code{theCoefficients} output from the \code{getCoeffs()} function 5 | #' @return There is no output to this function 6 | #' @note This is an internal function 7 | #' 8 | #' @keywords plotting rating factors 9 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com} 10 | # 11 | .plotRatingFactor <- function(ratingFactor = "BonusMalus", theCoefficients = myCoeffs){ 12 | # 13 | 14 | # Defining Colors 15 | # EMB Yellow 16 | embYellow <- c(250, 255, 102)/255 17 | embYellow <- rgb(embYellow[1], embYellow[2], embYellow[3], alpha = 0.7) 18 | myColors <- colors()[c(26, 451, 142, 83, 254, 491, 454, 32, 381, 494, 121, 548, 19 | 504, 150, 493, 591, 115, 135, 75, 393)] 20 | # 21 | theCoefficients$High95 <- theCoefficients$Estimate + theCoefficients$StdError*1.96 22 | theCoefficients$Low95 <- theCoefficients$Estimate - theCoefficients$StdError*1.96 23 | # 24 | fullYRange <- c(0, max(2*theCoefficients$PercentageExposure)) 25 | yRange <- c(fullYRange[2]/2, fullYRange[2]) 26 | xRange <- range(c(theCoefficients$Estimate, theCoefficients$High95, theCoefficients$Low95)) 27 | if(xRange[2] > 0){xRange[2] <- xRange[2]*1.2}else{xRange[2] <- xRange[2] - .2*xRange[2] } 28 | # 29 | convertScale <- function(conPoints = 1, parm2Perc = TRUE){ 30 | 31 | myM <- diff(yRange)/diff(xRange) 32 | myC <- yRange[1] - myM*xRange[1] 33 | 34 | if(parm2Perc == TRUE){outPut <- myM*conPoints + myC}else{ 35 | 36 | if(parm2Perc == FALSE){outPut <- (conPoints - myC)/myM}else{outPut <- NULL} 37 | } 38 | return(outPut) 39 | } 40 | # 41 | fullYRange <- c(fullYRange[1], 1.2*ceiling(fullYRange[2])) 42 | # 43 | #Plotting 44 | # 45 | #windows() 46 | par(mar = c(5,4,4,5) + .1) 47 | xCoords <- barplot(theCoefficients$PercentageExposure, col = embYellow, ylim = fullYRange, yaxt = "n", 48 | main = paste("Predicted Values for the ", ratingFactor, " rating factor", sep = ""), names.arg = theCoefficients$Level1) 49 | lines(xCoords, convertScale(theCoefficients$Estimate, parm2Perc = TRUE), lwd = 2, col = "blue") 50 | lines(xCoords, convertScale(theCoefficients$High95, parm2Perc = TRUE), lwd = 2, col = colors()[33], lty = 2) 51 | lines(xCoords, convertScale(theCoefficients$Low95, parm2Perc = TRUE), lwd = 2, col = colors()[33], lty = 2) 52 | points(xCoords, convertScale(theCoefficients$Estimate, parm2Perc = TRUE), cex = 1.7, lwd = 2, pch = 21, bg = "blue", col = "brown") 53 | axis(4) 54 | mtext("Exposure %", side = 4, line = 3) 55 | #axis(2, labels = c("", round(convertScale(axTicks(2), parm2Perc = FALSE), 2)[-1]), at = axTicks(2)) 56 | axis(2, labels = c(round(convertScale(axTicks(2), parm2Perc = FALSE), 2)), at = axTicks(2)) 57 | mtext("Fitted Coefficient (Base = 0)", side = 2, line = 3) 58 | } # end of plotting function 59 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # autoPricing 2 | 3 | > The purpose of this package is to carry out automated GLM 4 | analysis for actuarial pricing. The idea is that it uses the 5 | forward or backward algorithms and information criteria to obtain a 6 | frequency and severity model. It allows tables to be specified that 7 | map the aggregation structure of the variable from those currently 8 | specified to a less granular set of categories. This allows those 9 | explanatory variables to be dynamically re-specified during the 10 | pricing process if the native variable does not improve the fit of 11 | the model. The re-specification of the variable categories is done 12 | if the native variable does not decrease the information criterion; 13 | the mapping table and a Tukey test on the categorical variable is 14 | used to aggregate categories that are statistically and logically 15 | similar. 16 | 17 | ## Installation 18 | Installation from github requires the devtools package to be installed. 19 | 20 | ## Usage 21 | 22 | ```R 23 | #Loading the data 24 | data(policyTable) 25 | 26 | #Preparing rating factor names and mapping tables 27 | myRatingFactors <- c("BonusMalus", "WeightClass", "Region", "Age", "Mileage", "Usage") 28 | ratingFactorLevels <- lapply(myRatingFactors, function(x){matrix(as.character(levels(policyTable[,x])))}) 29 | names(ratingFactorLevels) <- myRatingFactors 30 | ratingFactorLevels$Mileage <- cbind(ratingFactorLevels$Mileage, c("0-12500", "0-12500", "> 12500")) 31 | ratingFactorLevels$BonusMalus <- cbind(ratingFactorLevels$BonusMalus, as.character(sort(rep(LETTERS[1:7], 2)))) 32 | weightClass <- c("650-935", "650-935", "650-935", "650-935", "1030-1315", "1030-1315", "1030-1315", "1030-1315", "1410-1600", "1410-1600", "1410-1600") 33 | ratingFactorLevels$WeightClass <- cbind(ratingFactorLevels$WeightClass, weightClass) 34 | 35 | #Example 1: Executing forward algorithm for poisson risk model 36 | outputModelForwardFreq <- stepIC(ratingFact = myRatingFactors, countVar = "NoClaims", 37 | sevVar = "GrossIncurred", factLevels = ratingFactorLevels, timeVar = "Year", selType = "BIC", 38 | consistThresh = 60, theData = policyTable, analysisType = "frequency", 39 | myDistr = "poisson", theLink = "log", exposureName = "Exposure", 40 | handicap = 0, myDocumentTitle = "Automated Pricing GLM", theAlg = "forward", 41 | plotCharts = TRUE) 42 | # Example 2: Writing process to PDF and log file for documentation purposes 43 | myFolder <- getwd() 44 | pdf(file = file.path(myFolder, "GLMOutput.pdf"), height = 7, width = 11) 45 | par(mfrow = c(1,1), cex.main = 1, cex.axis = .9, cex.lab = 1, cex = 1) 46 | sink(file = file.path(myFolder, "GLMOutput.doc")) 47 | outputModelForwardFreq <- stepIC(ratingFact = myRatingFactors, countVar = "NoClaims", 48 | sevVar = "GrossIncurred", factLevels = ratingFactorLevels, timeVar = "Year", selType = "BIC", 49 | consistThresh = 60, theData = policyTable, analysisType = "frequency", 50 | myDistr = "poisson", theLink = "log", exposureName = "Exposure", 51 | handicap = 0, myDocumentTitle = "Automated Pricing GLM", theAlg = "forward", 52 | plotCharts = TRUE) 53 | sink() 54 | dev.off() 55 | ``` 56 | 57 | ## Licence 58 | GPL 2 © [Mango Solutions](https://github.com/mangothecat) -------------------------------------------------------------------------------- /man/forwardBackStepAIC.Rd: -------------------------------------------------------------------------------- 1 | \name{forwardBackStepAIC} 2 | \alias{forwardBackStepAIC} 3 | \title{Function for forward and backward model selection algorithm using information criteria to obtain frequency and severity models for...} 4 | \usage{forwardBackStepAIC(ratingFact, countVar, sevVar, factLevels=1, timeVar, IC="BIC", consistThresh=60, theData, 5 | analysisType="frequency", myDistr="poisson", theLink="log", exposureName="Exposure", handicap=0, 6 | myDocumentTitle="Automated Pricing GLM", theAlg="forward", plotCharts=TRUE, ...)} 7 | \description{Function for forward and backward model selection algorithm using information criteria to obtain frequency and severity models for 8 | actuarial pricing models the routine also dynamically adjusts the categories for variables that are not significant. This function 9 | is intended for analysing categorical explanatory varibles.} 10 | \value{The output is the frequency or severity model from the chosen algorithm \code{theAlg}} 11 | \note{please pay attention to the structure of the table requires, since it is non-standard as far as in most 12 | modelling systems a separate policy and claims table is used for the analysis} 13 | \keyword{glm} 14 | \keyword{actuarial} 15 | \keyword{pricing} 16 | \keyword{automation} 17 | \keyword{backward} 18 | \keyword{forward} 19 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}} 20 | \arguments{\item{\code{ratingFact}}{this a character vector denoting the column headers of rating factors in your table (\code{theData}). 21 | Please be aware that the package \code{autoPricing} does not currently support interaction terms in your model.} 22 | \item{\code{countVar}}{this is a character denoting the claims count column} 23 | \item{\code{sevVar}}{this is a character denoting the column header of the severity variable} 24 | \item{\code{factLevels}}{this is a list of matrices (or data.frames) that denote the mapping of the rating 25 | factors from their current categories to their logical aggregatable variables} 26 | \item{\code{timeVar}}{this is a character denoting the column header of the year variable. This variable must be specified because 27 | time consistency analysis is carried out by using interaction terms with the year variable.} 28 | \item{\code{IC}}{This is a character string denoting whether the information criterion used should be AIC 29 | (Akaike Information Criterion) or BIC (Bayesian Information Criterion)} 30 | \item{\code{consistThresh}}{Set this to the threshold for median consistency, denoting that the model fit of interaction 31 | of the variables with Year is consistent to \code{consistThresh} or greater before it is accepted} 32 | \item{\code{theData}}{this is the data set that will be used for the analysis containing the rating factors, exposure, 33 | severity, claim counts, and year} 34 | \item{\code{myDistr}}{this is a character string denoting which distribution should be used in the analysis e.g. "poisson", "Gamma"} 35 | \item{\code{theLink}}{this is the link function to be used in the analysis} 36 | \item{\code{exposureName}}{this is a character string denoting the column name of the exposure (in Years)} 37 | \item{\code{handicap}}{extra penalty for the information criterion, it is added to the IC of the candiate 38 | model to alter how dificult it is to accept variables} 39 | \item{\code{myDocumentTitle}}{This is a character string for the title of the document} 40 | \item{\code{theAlg}}{this is the algorithm to be used either "forward" or "backward".} 41 | \item{\code{plotCharts}}{this is a logical variable as to whether the charts should be plotted}} 42 | \examples{#Loading the data 43 | data(policyTable) 44 | 45 | #Preparing rating factor names and mapping tables 46 | myRatingFactors <- c("BonusMalus", "WeightClass", "Region", "Age", "Mileage", "Usage") 47 | ratingFactorLevels <- lapply(myRatingFactors, function(x){matrix(as.character(levels(policyTable[,x])))}) 48 | names(ratingFactorLevels) <- myRatingFactors 49 | ratingFactorLevels$Mileage <- cbind(ratingFactorLevels$Mileage, c("0-12500", "0-12500", "> 12500")) 50 | 51 | # Example 1: Executing forward algorithm for poisson risk model 52 | outputModelForwardFreq <- forwardBackStepAIC(ratingFact = myRatingFactors, countVar = "NoClaims", 53 | sevVar = "GrossIncurred", factLevels = ratingFactorLevels, timeVar = "Year", IC = "BIC", 54 | consistThresh = 60, theData = policyTable, analysisType = "frequency", 55 | myDistr = "poisson", theLink = "log", exposureName = "Exposure", 56 | handicap = 0, myDocumentTitle = "Automated Pricing GLM", theAlg = "forward", 57 | plotCharts = TRUE) 58 | 59 | 60 | # Example 2: Writing process to PDF and log file for documentation purposes 61 | myFolder <- getwd() 62 | 63 | pdf(file = file.path(paste(myFolder, "GLMOutput.pdf", sep = "")), height = 7, width = 11) 64 | par(mfrow = c(1,1), cex.main = 1, cex.axis = .9, cex.lab = 1, cex = 1) 65 | sink(file = file.path(paste(myFolder, "GLMOutput.doc", sep = ""))) 66 | outputModelForwardFreq <- forwardBackStepAIC(ratingFact = myRatingFactors, countVar = "NoClaims", 67 | sevVar = "GrossIncurred", factLevels = ratingFactorLevels, timeVar = "Year", IC = "BIC", 68 | consistThresh = 60, theData = policyTable, analysisType = "frequency", 69 | myDistr = "poisson", theLink = "log", exposureName = "Exposure", 70 | handicap = 0, myDocumentTitle = "Automated Pricing GLM", theAlg = "forward", 71 | plotCharts = TRUE) 72 | sink() 73 | dev.off()} 74 | -------------------------------------------------------------------------------- /R/forwardBackSig.R: -------------------------------------------------------------------------------- 1 | #' Master function to find out if the variable is significant when you add a variable 2 | #' 3 | #' @param \code{theModel} this is the candidate model 4 | #' @param \code{currVar} this is the current variable being investigated 5 | #' @param \code{alg} this is the algoirthm being used either forward or backward 6 | #' @return The model that will be used and whether the \code{currVar} was significant or not 7 | #' @note This is an internal function and should not be used by the user 8 | #' 9 | #' @keywords glm forward backward 10 | #' @include autoPricing-package.R 11 | #' @seealso \code{\link{glm}} 12 | #' @seealso \code{\link{stepIC}} 13 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com} 14 | # 15 | .forwardBackSig <- function(theModel, currVar, alg = "forward", ...){ 16 | # 17 | myWeights <- get("myWeights", envir = funcEnv) 18 | factLevels <- get("factLevels", envir = funcEnv) 19 | theData <- get("theData", envir = funcEnv) 20 | myK <- get("myK", envir = funcEnv) 21 | handicap <- get("handicap", envir = funcEnv) 22 | plotCharts <- get("plotCharts", envir = funcEnv) 23 | theFormula <- theModel$formula 24 | # 25 | if(alg == "forward"){ 26 | glm1 <- update(theModel, as.formula(paste("~.+", currVar, sep = ""))) 27 | glm2 <- theModel 28 | } 29 | # 30 | if(alg == "backward"){ 31 | glm1 <- theModel 32 | glm2 <- update(theModel, as.formula(paste("~.-", currVar, sep = ""))) 33 | } 34 | 35 | 36 | # 37 | # This is the important part where it decided what happens to 38 | # the variable based on its information criterion 39 | # 40 | factorChanged <- FALSE 41 | assign("factorChanged", factorChanged, envir = funcEnv) 42 | bSelType <- get("selType", envir = funcEnv) 43 | if(.getVarSignificance(aGlm1 = glm1, aGlm2 = glm2, selType = bSelType)){ 44 | cat(paste("\nVariable ", currVar, " is significant \n\n", sep = "")) 45 | 46 | return(list(glm1, "Significant")) 47 | }else{ 48 | if(ncol(factLevels[[currVar]]) > 1){ 49 | 50 | consolidTable <- factLevels[[currVar]] 51 | 52 | #Extract Significance Test for factor level of variable concerned 53 | variableComparison <- summary(glht(glm1, linfct = eval(parse( 54 | text = paste("mcp(", currVar, ' = "Tukey")', sep = ""))) ))$test 55 | compNames <- names(variableComparison$coefficients) 56 | variableComparison <- data.frame("Comparison" = compNames, "pvalues" = as.vector(variableComparison$pvalue)) 57 | # 58 | cat("Output of summary(glht(glm, variable = 'Tukey')) in the {multcomp} package \n") 59 | print(variableComparison) 60 | cat("\nThis is the mapping table to be used to aggregate the factor levels before retesting the glm \n") 61 | print(consolidTable) 62 | poorVars <- as.character(variableComparison[variableComparison$pvalues > 0.05,]$Comparison) 63 | poorVars <- strsplit(poorVars, " - ") 64 | # 65 | factorChanged <- FALSE 66 | tempData <- get("tempData", envir = funcEnv) 67 | for(i in seq(along = poorVars)){ 68 | poorVals <- consolidTable[which(consolidTable[,1] %in% poorVars[[i]]), ][,2] 69 | # 70 | if(all(poorVals == poorVals[1])){ 71 | levels(tempData[,currVar])[which(levels(tempData[,currVar]) %in% poorVars[[i]])] <- poorVals[1] 72 | cat(paste("\naltered levels ... ", paste(levels(tempData[,currVar]), collapse = ", "), "\n" ) ) 73 | factorChanged <- TRUE 74 | }else{} 75 | } 76 | cat(paste("\nFactor has changed? ... ", factorChanged, "\n\n", sep = "")) 77 | if(factorChanged){ 78 | glm1 <- glm(glm1$formula, data = tempData, family = modelFamily, offset = log(Exposure), weights = myWeights) 79 | 80 | if( .getVarSignificance(aGlm1 = glm1, aGlm2 = glm2, selType = bSelType) ){ 81 | 82 | cat("Alternative form of the variable ", currVar, " is significant\n") 83 | assign("tempData", tempData, envir = funcEnv) 84 | assign("factorChanged", factorChanged, envir = funcEnv) 85 | return(list(glm1, "Significant")) 86 | }else{ 87 | if(plotCharts){ 88 | .plotRatingFactor(ratingFactor = as.character(currVar), theCoefficients = 89 | .getCoeffs(myExposure = "Exposure", myGlm = glm1, aCurrVar = as.character(currVar))) 90 | } 91 | cat(paste("Variable ", currVar, " is not significant", "\n\n\n", sep = "")) 92 | assign("factorChanged", factorChanged, envir = funcEnv) 93 | return(list(glm2, "Not Significant"))} 94 | 95 | }else{ 96 | if(plotCharts){ 97 | .plotRatingFactor(ratingFactor = as.character(currVar), theCoefficients = 98 | .getCoeffs(myExposure = "Exposure", myGlm = glm1, aCurrVar = as.character(currVar))) 99 | } 100 | cat(paste("Variable ", currVar, " is not significant \n\n\n", sep = "")) 101 | assign("factorChanged", factorChanged, envir = funcEnv) 102 | return(list(glm2, "Not Significant"))} 103 | 104 | }else{ 105 | if(plotCharts){ 106 | .plotRatingFactor(ratingFactor = as.character(currVar), theCoefficients = 107 | .getCoeffs(myExposure = "Exposure", myGlm = glm1, aCurrVar = as.character(currVar))) 108 | } 109 | cat(paste("No further aggregation can be carried out since mapping table is not specified so variable ", 110 | currVar, " is not significant \n\n\n", sep = "")) 111 | assign("factorChanged", factorChanged, envir = funcEnv) 112 | return(list(glm2, "Not Significant")) 113 | } 114 | } 115 | 116 | } 117 | 118 | -------------------------------------------------------------------------------- /R/consistCorrection.R: -------------------------------------------------------------------------------- 1 | #' This function rebalances a raw variable that is significant but not consistent 2 | #' 3 | #' @param \code{glm1} the glm that contains the rating factor to be analysed output from the \code{.forwardBackSig} function 4 | #' @param \code{currVar} the rating factor to be analysed 5 | #' @return The output is a list of revised glm and a character either \code{"Consistent"} or \code{"Not Consistent"} 6 | #' @note This is an internal function 7 | #' 8 | #' @keywords glm consistency significance 9 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com} 10 | # 11 | .consistCorrection <- function(glm1 = currentModel, currVar = currentVar){ 12 | 13 | cat("Significant But Not Consistent Analysis (SBNC) Analysis:\n") 14 | cat("########################################################\n\n") 15 | cat("This part of the analysis is taking place because the variable ", currVar, " is significant but not consistent ...\n") 16 | cat("... so the variable is being rebalanced to see if it will be consistent\n\n") 17 | tempData <- get("theData", envir = funcEnv) 18 | myWeights <- get("myWeights", envir = funcEnv) 19 | factLevels <- get("factLevels", envir = funcEnv) 20 | timeVar <- get("timeVar", envir = funcEnv) 21 | exposureName <- get("exposureName", envir = funcEnv) 22 | consistThresh <- get("consistThresh", envir = funcEnv) 23 | 24 | glm2 <- update(theModel, as.formula(paste("~.-", currVar, sep = ""))) 25 | 26 | #myForm2 <- as.character(glm2$formula)[c(2,1,3)] 27 | #ratingFactors <- strsplit(myForm2[3], " \\+ ")[[1]] 28 | #myForm2 <- paste(paste(myForm2[1:2], collapse = " "), paste(ratingFactors[ratingFactors != currVar], collapse = " + ")) 29 | #glm2 <- glm(formula(myForm2), data = tempData, family = glm2$family, offset = log(Exposure), weights = myWeights) 30 | # 31 | ##################################### 32 | 33 | consolidTable <- factLevels[[currVar]] 34 | #Extract Significance Test for factor level of variable concerned 35 | variableComparison <- summary(glht(glm1, linfct = eval(parse( 36 | text = paste("mcp(", currVar, ' = "Tukey")', sep = ""))) ))$test 37 | compNames <- names(variableComparison$coefficients) 38 | variableComparison <- data.frame("Comparison" = compNames, "pvalues" = as.vector(variableComparison$pvalue)) 39 | # 40 | cat("Output of summary(glht(glm, variable = 'Tukey')) in the {multcomp} package\n") 41 | print(variableComparison) 42 | cat("\nThis is the mapping table to be used to aggregate the factor levels before retesting the glm\n") 43 | print(consolidTable) 44 | poorVars <- as.character(variableComparison[variableComparison$pvalues > 0.05,]$Comparison) 45 | poorVars <- strsplit(poorVars, " - ") 46 | # 47 | factorChanged <- FALSE 48 | for(i in seq(along = poorVars)){ 49 | poorVals <- consolidTable[which(consolidTable[,1] %in% poorVars[[i]]), ][,2] 50 | # 51 | if(all(poorVals == poorVals[1])){ 52 | levels(tempData[,currVar])[which(levels(tempData[,currVar]) %in% poorVars[[i]])] <- poorVals[1] 53 | cat("altered levels ... ", paste(levels(tempData[,currVar]), collapse = ", "), "\n" ) 54 | factorChanged <- TRUE 55 | }else{} 56 | }#end for 57 | cat(paste("\n\nFactor has changed? ... ", factorChanged, "\n", sep = "")) 58 | if(factorChanged){ 59 | glm1 <- glm(glm1$formula, data = tempData, family = modelFamily, offset = log(Exposure), weights = myWeights) 60 | # 61 | #print(paste("Extended Model IC ", round(extractAIC(glm1, k = myK)[2]), " sub model ", 62 | # round(extractAIC(glm2, k = myK)[2]), sep = "")) 63 | bSelType <- get("selType", envir = funcEnv) 64 | #aicBetter <- (round(extractAIC(glm1, k = myK)[2]) + handicap < round(extractAIC(glm2, k = myK)[2])) 65 | if( .getVarSignificance(aGlm1 = glm1, aGlm2 = glm2, selType = bSelType) ){ 66 | cat(paste("An altered form of variable ", currVar, " is significant with IC: ", "\n", sep = "")) 67 | assign("tempData", tempData, envir = funcEnv) 68 | assign("factorChanged", factorChanged, envir = funcEnv) 69 | .plotRatingFactor(ratingFactor = as.character(currVar), theCoefficients = 70 | .getCoeffs(myExposure = "Exposure", myGlm = glm1)) 71 | currentModelTime <- formula(paste(paste(as.character(glm1$formula)[c(2,1,3)], collapse = ""), 72 | " + ", paste(timeVar, ":", currVar, sep = ""), sep = "")) 73 | consistOut <- .plotTimeConsist(ratingFactor = as.character(currVar), timeDef = timeVar, 74 | theGlm = currentModelTime, constThresh = consistThresh, expVar = exposureName, 75 | showPlots = TRUE) 76 | if(consistOut > consistThresh){ 77 | cat(paste("Atered variable ", currVar, " is significant and consistent", "\n\n", sep = "")) 78 | return(list(glm1, "Consistent"))}else{ 79 | cat(paste("Atered variable ", currVar, " is significant but not consistent", "\n\n", sep = "")) 80 | return(list(glm2, "Not Consistent"))} 81 | }else{ 82 | if(plotCharts){ 83 | .plotRatingFactor(ratingFactor = as.character(currVar), theCoefficients = 84 | .getCoeffs(myExposure = "Exposure", myGlm = glm1)) 85 | } 86 | cat(paste("Atered initial significant variable ", currVar, " is not significant -- odd!!", "\n\n", sep = "")) 87 | assign("factorChanged", factorChanged, envir = funcEnv) 88 | return(list(glm2, "Not Consistent"))} 89 | 90 | }else{ 91 | if(plotCharts){ 92 | .plotRatingFactor(ratingFactor = as.character(currVar), theCoefficients = 93 | .getCoeffs(myExposure = "Exposure", myGlm = glm1)) 94 | } 95 | cat(paste("Variable ", currVar, " is not changed therefore not consistent", "\n\n", sep = ""))# 96 | assign("factorChanged", factorChanged, envir = funcEnv) 97 | return(list(glm2, "Not Consistent")) 98 | } 99 | 100 | } 101 | -------------------------------------------------------------------------------- /man/stepIC.Rd: -------------------------------------------------------------------------------- 1 | \name{stepIC} 2 | \alias{stepIC} 3 | \title{Function for forward and backward model selection algorithm using information criteria to obtain frequency and severity models for...} 4 | \usage{stepIC(ratingFact, countVar, sevVar, factLevels=1, timeVar, 5 | selType="BIC", consistThresh=60, theData, analysisType="frequency", 6 | myDistr="poisson", theLink="log", theAlg="forward", 7 | exposureName="Exposure", handicap=0, plotCharts=TRUE, 8 | myDocumentTitle="Automated Pricing GLM", ...)} 9 | \description{Function for forward and backward model selection algorithm using information criteria to obtain frequency and severity models for 10 | actuarial pricing models the routine also dynamically adjusts the categories for variables that are not significant if an adequate 11 | mapping table is provided. This function is intended for analysing categorical explanatory varibles.} 12 | \value{The output is the frequency or severity model from the chosen algorithm \code{theAlg}} 13 | \note{please pay attention to the structure of the data requirements, since it is different from how 14 | actuarial data for GLM analysis is usually shaped and use the provided dataset "policyTable" as a guide 15 | for how the data ashould be formatted.} 16 | \keyword{glm} 17 | \keyword{actuarial} 18 | \keyword{pricing} 19 | \keyword{automation} 20 | \keyword{backward} 21 | \keyword{forward} 22 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}} 23 | \arguments{\item{\code{ratingFact}}{a character vector denoting the column headers of rating factors in your table (\code{theData}). 24 | Please be aware that the package \code{autoPricing} does not currently support interaction terms in your model.} 25 | \item{\code{countVar}}{a character string denoting the name of the claims count column} 26 | \item{\code{sevVar}}{a character string denoting the column header of the severity variable} 27 | \item{\code{factLevels}}{this is a list of matrices (or data.frames) that denote the mapping of the rating 28 | factors from their current categories to their logical aggregatable variables} 29 | \item{\code{timeVar}}{a character denoting the column header of the year variable. This variable must be specified because 30 | time consistency analysis is carried out by using interaction terms with the year variable.} 31 | \item{\code{IC}}{a character string denoting whether the information criterion used should be "AIC" 32 | (Akaike Information Criterion) or "BIC" (Bayesian Information Criterion)} 33 | \item{\code{consistThresh}}{Set this to the threshold for median consistency, denoting that the model fit of interaction 34 | of the variables with Year is consistent to \code{consistThresh} or greater before it is accepted} 35 | \item{\code{theData}}{this is the data set that will be used for the analysis containing the rating factors, exposure, 36 | severity, claim counts, and year} 37 | \item{\code{analysisType}}{flag indicating whether the analysis is for a "frequency" or "severity" model} 38 | \item{\code{myDistr}}{this is a character string denoting which distribution should be used in the analysis e.g. "poisson", "Gamma"} 39 | \item{\code{theLink}}{this is the link function to be used in the analysis} 40 | \item{\code{theAlg}}{this is the algorithm to be used either "forward" or "backward".} 41 | \item{\code{exposureName}}{this is a character string denoting the column name of the exposure (in Years)} 42 | \item{\code{handicap}}{extra penalty for the information criterion, it is added to the IC of the candiate 43 | model to alter how dificult it is to accept variables} 44 | \item{\code{myDocumentTitle}}{This is a character string for the title of the document} 45 | \item{\code{plotCharts}}{this is a logical variable as to whether the charts should be plotted}} 46 | \examples{#Loading the data 47 | data(policyTable) 48 | 49 | #Preparing rating factor names and mapping tables 50 | myRatingFactors <- c("BonusMalus", "WeightClass", "Region", "Age", "Mileage", "Usage") 51 | ratingFactorLevels <- lapply(myRatingFactors, function(x){matrix(as.character(levels(policyTable[,x])))}) 52 | names(ratingFactorLevels) <- myRatingFactors 53 | ratingFactorLevels$Mileage <- cbind(ratingFactorLevels$Mileage, c("0-12500", "0-12500", "> 12500")) 54 | ratingFactorLevels$BonusMalus <- cbind(ratingFactorLevels$BonusMalus, as.character(sort(rep(LETTERS[1:7], 2)))) 55 | weightClass <- c("650-935", "650-935", "650-935", "650-935", "1030-1315", "1030-1315", "1030-1315", "1030-1315", "1410-1600", "1410-1600", "1410-1600") 56 | ratingFactorLevels$WeightClass <- cbind(ratingFactorLevels$WeightClass, weightClass) 57 | 58 | #Example 1: Executing forward algorithm for poisson risk model 59 | outputModelForwardFreq <- stepIC(ratingFact = myRatingFactors, countVar = "NoClaims", 60 | sevVar = "GrossIncurred", factLevels = ratingFactorLevels, timeVar = "Year", selType = "BIC", 61 | consistThresh = 60, theData = policyTable, analysisType = "frequency", 62 | myDistr = "poisson", theLink = "log", exposureName = "Exposure", 63 | handicap = 0, myDocumentTitle = "Automated Pricing GLM", theAlg = "forward", 64 | plotCharts = TRUE) 65 | 66 | 67 | # Example 2: Writing process to PDF and log file for documentation purposes 68 | myFolder <- getwd() 69 | 70 | pdf(file = file.path(myFolder, "GLMOutput.pdf"), height = 7, width = 11) 71 | par(mfrow = c(1,1), cex.main = 1, cex.axis = .9, cex.lab = 1, cex = 1) 72 | sink(file = file.path(myFolder, "GLMOutput.doc")) 73 | outputModelForwardFreq <- stepIC(ratingFact = myRatingFactors, countVar = "NoClaims", 74 | sevVar = "GrossIncurred", factLevels = ratingFactorLevels, timeVar = "Year", selType = "BIC", 75 | consistThresh = 60, theData = policyTable, analysisType = "frequency", 76 | myDistr = "poisson", theLink = "log", exposureName = "Exposure", 77 | handicap = 0, myDocumentTitle = "Automated Pricing GLM", theAlg = "forward", 78 | plotCharts = TRUE) 79 | sink() 80 | dev.off()} 81 | -------------------------------------------------------------------------------- /R/plotTimeConsist.R: -------------------------------------------------------------------------------- 1 | #' Function for plotting consistency 2 | #' 3 | #' @param \code{ratingFactor} a character string denoting the rating factor to carry out time consistency analysis 4 | #' @param \code{timeDef} a character string denoting the Year variable 5 | #' @param \code{theGlm} a glm object to be analysed 6 | #' @param \code{constThresh} a numeric denoting the threshold percentage of consistency 7 | #' @param \code{expVar} set this to the exposure column name 8 | #' @param \code{showPlots} logical for whether to plot the variables or not 9 | #' @return The output is a numeric denoting the median consistency of the rating factor with year 10 | #' @note ... 11 | #' @seealso \code{\link{stepIC}} 12 | #' 13 | #' @keywords consistency glm time 14 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com} 15 | # 16 | .plotTimeConsist <- function(ratingFactor = "BonusMalus", timeDef = "Year", theGlm = myGlm, 17 | constThresh = 60, expVar = "Exposure", showPlots = TRUE){ 18 | # 19 | exposureName <- get("exposureName", envir = funcEnv) 20 | theData <- get("theData", envir = funcEnv) 21 | names(theData)[names(theData) == exposureName] <- "Exposure" 22 | myWeights <- get("myWeights", envir = funcEnv) 23 | theGlm <- glm(theGlm, data = theData, family = get("modelFamily", envir = funcEnv), offset = log(Exposure), weights = myWeights) 24 | myTerms <- gsub(" ", "", strsplit(as.character(theGlm$formula)[3], "\\+")[[1]]) 25 | myTerms <- unlist(lapply(myTerms, function(x){x <- strsplit(x, ":")[[1]]; if(length(x) > 1){return(NULL)}else{return(x)}})) 26 | myTerms <- myTerms[!is.na(myTerms)] 27 | myTerms <- myTerms[!(myTerms %in% c(timeDef, ratingFactor))] 28 | newData <- expand.grid(timeDef = levels(theGlm$data[,get("timeDef")]), ratingFactor = levels(theGlm$data[,get("ratingFactor")]), expVar = 1) 29 | names(newData) <- c(timeDef, ratingFactor, expVar) 30 | # 31 | if(length(myTerms) > 0){ 32 | myTerms <- lapply(myTerms, function(x){y <- data.frame(levels(theGlm$data[,x])[1]); names(y) <- x; return(y)}) 33 | myTerms <- do.call(cbind, myTerms) 34 | # 35 | newData <- data.frame(newData, myTerms) 36 | } 37 | myWeights <- rep(1, nrow(newData)) 38 | newData <- data.frame(newData, myWeights) 39 | tempSum <- predict(theGlm, newdata = newData, type = "terms", se.fit = FALSE) 40 | # Summing over the exposure year, rating factor, and year:rating factor interaction terms 41 | tempSum <- tempSum[,c(timeDef, ratingFactor, paste(timeDef, ":", ratingFactor, sep = ""))] 42 | # 43 | newData <- newData[,c(timeDef, ratingFactor)] 44 | newData$Coefficients <- apply(tempSum, 1, sum) 45 | # 46 | myFormula <- formula(paste("Coefficients", " ~ ", timeDef, " + ", ratingFactor, sep = "")) 47 | testSum <- xtabs(myFormula, data = newData) 48 | # Defining Colors 49 | # EMB Yellow 50 | embYellow <- c(250, 255, 102)/255 51 | embYellow <- rgb(embYellow[1], embYellow[2], embYellow[3], alpha = 0.7) 52 | # 53 | myColors <- timPalette(length(levels(theGlm$data[,timeDef]))) 54 | myProps <- prop.table(xtabs(formula(paste(expVar, " ~ ", timeDef," + ", 55 | ratingFactor, sep = "")), data = theGlm$data))*100 56 | # 57 | fullYRange <- c(0, max(2*apply(myProps, 2, sum))) 58 | yRange <- c(fullYRange[2]/2, fullYRange[2]) 59 | xRange <- range(testSum) 60 | if(xRange[2] > 0){xRange[2] <- xRange[2]*1.2}else{xRange[2] <- xRange[2] - .2*xRange[2] } 61 | # 62 | convertScale <- function(conPoints = 1, parm2Perc = TRUE){ 63 | 64 | myM <- diff(yRange)/diff(xRange) 65 | myC <- yRange[1] - myM*xRange[1] 66 | 67 | if(parm2Perc == TRUE){outPut <- myM*conPoints + myC}else{ 68 | 69 | if(parm2Perc == FALSE){outPut <- (conPoints - myC)/myM}else{outPut <- NULL} 70 | } 71 | return(outPut) 72 | } 73 | # 74 | # 75 | fullYRange <- c(fullYRange[1], 1.4*ceiling(fullYRange[2])) 76 | plotCoeffs <- convertScale(testSum) 77 | # 78 | if(showPlots == TRUE){ 79 | par(mar=c(5,4,4,5) + .1) 80 | xCoords <- barplot(myProps, col = myColors[1:nrow(myProps)], ylim = fullYRange, yaxt = "n", 81 | main = paste("Consistency Plot for ", ratingFactor, " rating factor", sep = ""), legend = rownames(myProps)) 82 | 83 | for(i in 1:nrow(plotCoeffs)){ 84 | lines(xCoords, plotCoeffs[i,], lwd = 2, col = myColors[i]) 85 | points(xCoords, plotCoeffs[i,], cex = 1, lwd = 1.2, pch = 21, bg = myColors[i], col = "black") 86 | #print(i) 87 | axis(4) 88 | mtext("Exposure %", side = 4, line = 3) 89 | axis(2, labels = c("", round(convertScale(axTicks(2), parm2Perc = FALSE), 1)[-1]), at = axTicks(2)) 90 | mtext(paste("Predicted Values for model including Year:", as.character(ratingFactor)," interaction", sep = ""), side = 2, line = 3) 91 | }# end for 92 | }#end show plot 93 | # Calculating consistency 94 | if(ncol(testSum) > 2){diffMatrix <- data.frame(t(apply(testSum, 1, diff)))}else{ 95 | diffMatrix <- data.frame(apply(testSum, 1, diff)) 96 | } 97 | names(diffMatrix) <- paste(colnames(testSum)[1:(length(colnames(testSum)) - 1)], colnames(testSum)[2:(length(colnames(testSum)))], sep = " - ") 98 | 99 | myCondition <- function(x){x[which(x > 0)] <- 1 100 | x[which(x < 0)] <- -1 101 | x[which(x == 0)] <- 0 102 | return(x)} 103 | 104 | consistencyMatrix <- apply(diffMatrix, 2, myCondition) 105 | 106 | myProps <- function(x){ 107 | percConsist <- max(data.frame(prop.table(table(x)))[,2]) 108 | # 109 | return(percConsist) 110 | } 111 | # 112 | myConsist <- apply(consistencyMatrix, 2, myProps)*100 113 | totConsist <- round(median(myConsist)) 114 | myConsist <- round(myConsist) 115 | # 116 | if(showPlots == TRUE){ 117 | myXs <- barplot(myConsist, border = "NA", col = "white", ylim = c(0, 120), 118 | main = "Percentage consistency between adjacent levels", 119 | ylab = "Percentage", xlab = "Comparison") 120 | points(myConsist ~ myXs, cex = 1, lwd = 1.2, pch = 21, bg = myColors[1], col = "black") 121 | lines(myConsist ~ myXs, cex = 1, lwd = 1.2, col = myColors[1]) 122 | abline(h = constThresh, lty = 2, col = "grey") 123 | # 124 | if(totConsist >= constThresh){text(x = 1, y = constThresh*1.05, labels = " Consistency Pass")}else{ 125 | text(x = 1, y = constThresh*1.05, labels = " Consistency Fail")} 126 | axis(1, labels = FALSE, at = myXs) 127 | box() 128 | }#end if plot 129 | myCoeffs <- data.frame(summary(theGlm)$coeff) 130 | names(myCoeffs) <- c("Estimate", "StdError", "ZValue","P(Z > |z|)") 131 | # 132 | return(totConsist) 133 | } # end of plotting function 134 | # 135 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | 341 | -------------------------------------------------------------------------------- /R/stepIC.R: -------------------------------------------------------------------------------- 1 | #' Function for forward and backward model selection algorithm using information criteria to obtain frequency and severity models for 2 | #' actuarial pricing models the routine also dynamically adjusts the categories for variables that are not significant if an adequate 3 | #' mapping table is provided. This function is intended for analysing categorical explanatory varibles. 4 | #' 5 | #' 6 | #' @param \code{ratingFact} a character vector denoting the column headers of rating factors in your table (\code{theData}). 7 | #' Please be aware that the package \code{autoPricing} does not currently support interaction terms in your model. 8 | #' @param \code{countVar} a character string denoting the name of the claims count column 9 | #' @param \code{sevVar} a character string denoting the column header of the severity variable 10 | #' @param \code{factLevels} this is a list of matrices (or data.frames) that denote the mapping of the rating 11 | #' factors from their current categories to their logical aggregatable variables 12 | #' @param \code{timeVar} a character denoting the column header of the year variable. This variable must be specified because 13 | #' time consistency analysis is carried out by using interaction terms with the year variable. 14 | #' @param \code{selType} a character string denoting whether the information criterion used to denote for model comparisons should be "AIC" 15 | #' (Akaike Information Criterion) or "BIC" (Bayesian Information Criterion). Other selection types are 16 | #' available, "Chisq" for Chi-Squared, and "F" for F-Test but these are not recommended for multiple comparison reasons. 17 | #' @param \code{consistThresh} Set this to the threshold for median consistency, denoting that the model fit of interaction 18 | #' of the variables with Year is consistent to \code{consistThresh} or greater before it is accepted 19 | #' @param \code{theData} this is the data set that will be used for the analysis containing the rating factors, exposure, 20 | #' severity, claim counts, and year 21 | #' @param \code{analysisType} flag indicating whether the analysis is for a "frequency" or "severity" model 22 | #' @param \code{myDistr} this is a character string denoting which distribution should be used in the analysis e.g. "poisson", "Gamma" 23 | #' @param \code{theLink} this is the link function to be used in the analysis 24 | #' @param \code{theAlg} this is the algorithm to be used either "forward" or "backward". 25 | #' @param \code{exposureName} this is a character string denoting the column name of the exposure (in Years) 26 | #' @param \code{handicap} extra penalty for the information criterion, it is added to the IC of the candiate 27 | #' model to alter how dificult it is to accept variables 28 | #' @return The output is the frequency or severity model from the chosen algorithm \code{theAlg} 29 | #' @param \code{myDocumentTitle} This is a character string for the title of the document 30 | #' @param \code{plotCharts} this is a logical variable as to whether the charts should be plotted 31 | #' @param \code{randomiseOrder} this is whether the order of the stepwise analysis should be randomised or not. 32 | #' This feature may be useful to check the model which is selected if variables are 33 | #' tested in a random randomised order 34 | #' @param \code{justFit} Determines whether the automatic fitting process is used or whether the specified model is simply fitted 35 | #' @note please pay attention to the structure of the data requirements, since it is different from how 36 | #' actuarial data for GLM analysis is usually shaped and use the provided dataset "policyTable" as a guide 37 | #' for how the data ashould be formatted. 38 | #' 39 | #' @keywords glm actuarial pricing automation backward forward 40 | #' @include autoPricing-package.R 41 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com} 42 | #' @examples 43 | #' 44 | #' #Loading the data 45 | #' data(policyTable) 46 | #' 47 | #' #Preparing rating factor names and mapping tables 48 | #' myRatingFactors <- c("BonusMalus", "WeightClass", "Region", "Age", "Mileage", "Usage") 49 | #' ratingFactorLevels <- lapply(myRatingFactors, function(x){matrix(as.character(levels(policyTable[,x])))}) 50 | #' names(ratingFactorLevels) <- myRatingFactors 51 | #' ratingFactorLevels$Mileage <- cbind(ratingFactorLevels$Mileage, c("0-12500", "0-12500", "> 12500")) 52 | #' ratingFactorLevels$BonusMalus <- cbind(ratingFactorLevels$BonusMalus, as.character(sort(rep(LETTERS[1:7], 2)))) 53 | #' weightClass <- c("650-935", "650-935", "650-935", "650-935", "1030-1315", "1030-1315", "1030-1315", "1030-1315", "1410-1600", "1410-1600", "1410-1600") 54 | #' ratingFactorLevels$WeightClass <- cbind(ratingFactorLevels$WeightClass, weightClass) 55 | #' 56 | #' #Example 1: Executing forward algorithm for poisson risk model 57 | #' outputModelForwardFreq <- stepIC(ratingFact = myRatingFactors, countVar = "NoClaims", 58 | #' sevVar = "GrossIncurred", factLevels = ratingFactorLevels, timeVar = "Year", selType = "BIC", 59 | #' consistThresh = 60, theData = policyTable, analysisType = "frequency", 60 | #' myDistr = "poisson", theLink = "log", exposureName = "Exposure", 61 | #' handicap = 0, myDocumentTitle = "Automated Pricing GLM", theAlg = "forward", 62 | #' plotCharts = TRUE) 63 | #' 64 | #' 65 | #' # Example 2: Writing process to PDF and log file for documentation purposes 66 | #' myFolder <- getwd() 67 | #' 68 | #' pdf(file = file.path(myFolder, "GLMOutput.pdf"), height = 7, width = 11) 69 | #' par(mfrow = c(1,1), cex.main = 1, cex.axis = .9, cex.lab = 1, cex = 1) 70 | #' sink(file = file.path(myFolder, "GLMOutput.doc")) 71 | #' outputModelForwardFreq <- stepIC(ratingFact = myRatingFactors, countVar = "NoClaims", 72 | #' sevVar = "GrossIncurred", factLevels = ratingFactorLevels, timeVar = "Year", selType = "BIC", 73 | #' consistThresh = 60, theData = policyTable, analysisType = "frequency", 74 | #' myDistr = "poisson", theLink = "log", exposureName = "Exposure", 75 | #' handicap = 0, myDocumentTitle = "Automated Pricing GLM", theAlg = "forward", 76 | #' plotCharts = TRUE) 77 | #' sink() 78 | #' dev.off() 79 | # 80 | stepIC <- function(ratingFact, countVar, 81 | sevVar, factLevels = 1, timeVar, selType = "BIC", 82 | consistThresh = 60, theData, analysisType = "frequency", 83 | myDistr = "poisson", theLink = "log", theAlg = "forward", 84 | exposureName = "Exposure", handicap = 0, 85 | plotCharts = TRUE, myDocumentTitle = "Automated Pricing GLM", 86 | randomiseOrder = FALSE, justFit = FALSE, ...){ 87 | 88 | # Specifying the distribution and link functions 89 | assign("modelFamily", .modelFamilySpec(distr = myDistr, myLink = theLink, theta = NULL), envir = .GlobalEnv) 90 | 91 | # Aggregating the data ... 92 | # 93 | factorList <- lapply(c(timeVar, ratingFact), function(x){y <- theData[,x];return(y)}) 94 | names(factorList) <- c(timeVar, ratingFact) 95 | theData <- aggregate(theData[,c(countVar, sevVar, exposureName)], by = factorList, FUN = sum) 96 | names(theData)[names(theData) == exposureName] <- c("Exposure") 97 | # 98 | # reformating the data for severity models 99 | if(analysisType == "severity"){ 100 | theData <- theData[theData[,sevVar] > 0, ] 101 | averageClaim <- theData[,sevVar]/theData[,countVar] 102 | theData$averageClaim <- averageClaim 103 | depVar <- "averageClaim" 104 | myWeights <- theData[,countVar] 105 | } 106 | if(analysisType == "frequency"){ 107 | depVar <- countVar 108 | myWeights <- rep(1, nrow(theData)) 109 | } 110 | # 111 | # assigning data into protected Environment 112 | assign("funcEnv", new.env(), envir = .GlobalEnv) 113 | assign("theData", theData, envir = funcEnv) 114 | assign("tempData", theData, envir = funcEnv) 115 | assign("timeVar", timeVar, envir = funcEnv) 116 | assign("handicap", handicap, envir = funcEnv) 117 | assign("myWeights", myWeights, envir = funcEnv) 118 | assign("selType", selType, envir = funcEnv) 119 | #assign("exposure", theData[,exposureName], envir = funcEnv) 120 | assign("exposureName", exposureName, envir = funcEnv) 121 | assign("plotCharts", plotCharts, envir = funcEnv) 122 | assign("consistThresh", consistThresh, envir = funcEnv) 123 | # 124 | 125 | # NOTE WELL, this is a primary jumping off point, it divides the path of the analysis into auto-fit and simple model fit 126 | 127 | ## This is the if statement that determines whether the model will go throught the auto-fit selection 128 | # or whether it will simply just fit the model engaged with the parameter "justFit". 129 | if(justFit == TRUE){ 130 | cat("The justFit parameter has been set to ", justFit, "therefore the auto-fitting process will not be carried out and the specified model will be fully fitted.\n") 131 | finalModel <- glm(formula(paste(depVar, " ~ ", paste(c(timeVar, ratingFact), collapse = " + "), sep = "")), data = theData, 132 | family = modelFamily, offset = log(Exposure), weights = get("myWeights", envir = funcEnv)) 133 | }else{ 134 | 135 | cat("\n", selType, " will be used as model selection criterion in this investigation.\n") 136 | if(!(selType == "AIC" | selType == "BIC")){ 137 | cat("Warning: Information criteria based techniques are not being used for this stepwise model selection process. This is not recommended.\n\n") 138 | } 139 | 140 | # Document Title: 141 | # 142 | myDocumentTitle <- paste(myDocumentTitle, " for ", analysisType, " \n(Model Distr: ", myDistr, 143 | ", Link: ", theLink, ")", "\nMethod = ", theAlg, sep = "") 144 | 145 | cat("************************************************************************************************************\n") 146 | cat(paste("This analysis is a ", analysisType, " analysis using a ", myDistr, " distribution and a ", theLink, " link", 147 | ", for the ", theAlg, " algorithm \n", sep = "")) 148 | cat("************************************************************************************************************\n\n") 149 | #print("") 150 | #print("") 151 | # If you do not have the correct number of factor Levels 152 | if(length(ratingFact) != length(ratingFactorLevels)){ 153 | factLevels <- as.list(rep(1, length(ratingFact))) 154 | cat("Warning: factors levels spec have been reset since they have the wrong number of levels \n") 155 | } 156 | assign("factLevels", factLevels, envir = funcEnv) 157 | origData <- theData 158 | 159 | if(theAlg == "forward"){ 160 | prevForms <- paste(depVar, " ~ ", paste(c(timeVar, ratingFact), collapse = " + ", sep = ""), sep = "") 161 | prevModel <- glm(formula(paste(depVar, " ~ ", timeVar, sep = "")), data = theData, 162 | family = modelFamily, offset = log(Exposure), weights = myWeights) 163 | } 164 | if(theAlg == "backward"){ 165 | prevForms <- paste(depVar, " ~ ", paste(c(timeVar, ratingFact), collapse = " + ", sep = ""), sep = "") 166 | prevModel <- glm(formula(prevForms), data = theData, 167 | family = modelFamily, offset = log(Exposure), weights = myWeights) 168 | } 169 | 170 | #if(selType == "AIC"){myK <- 2} 171 | if(selType == "BIC"){myK <- log(nrow(theData))}else{myK <- 2} 172 | 173 | assign("myK", myK, envir = funcEnv) 174 | # One way to order variables 175 | cat("Beginning one-way analyses to determine order that variables will be analysed \n") 176 | cat("********************************************************************************\n\n") 177 | 178 | aBaseModel <- glm(formula(paste(depVar, " ~ ", timeVar, sep = "")), data = theData, 179 | family = modelFamily, offset = log(Exposure), weights = get("myWeights", envir = funcEnv)) 180 | 181 | theIC <- lapply(ratingFact, function(x){ 182 | aModel <- update(aBaseModel, as.formula(paste("~.+", x, sep = ""))) 183 | output <- data.frame("RatingFactor" = x, "IC" = extractAIC(aModel, k = myK)[2]) 184 | cat(paste(paste(as.character(aModel$formula)[c(2,1,3)], collapse = " "), "\n", sep = "")) 185 | return(output)}) 186 | ICTable <- do.call(rbind, theIC) 187 | 188 | if(!randomiseOrder){ 189 | if(theAlg == "forward"){ICTable <- ICTable[order(ICTable$IC, decreasing = FALSE),]} 190 | if(theAlg == "backward"){ICTable <- ICTable[order(ICTable$IC, decreasing = TRUE),]}}else{ 191 | icOrder <- sample(rownames(ICTable), size = length(rownames(ICTable))) 192 | ICTable <- ICTable[icOrder,] 193 | } 194 | 195 | ratingFact <- as.character(ICTable$RatingFactor) 196 | rownames(ICTable) <- 1:nrow(ICTable) 197 | cat("\nAnalysis order ...\n") 198 | print(ICTable) 199 | # 200 | # Plotting Model Details 201 | if(plotCharts){ 202 | grid.arrange( 203 | tableGrob(data.frame(myDocumentTitle), show.rownames = FALSE, show.colnames = FALSE, gpar.corefill = gpar(fill="white", 204 | col = NA, row = NA), gpar.coretext=gpar(col = "black", cex = 3)), 205 | tableGrob(ICTable, 206 | show.csep=TRUE, show.rsep=TRUE, show.box=TRUE, separator="grey", name="test", gp=gpar(fontsize=12, lwd=2), 207 | equal.width=FALSE, grep=TRUE, global=TRUE), 208 | nrow = 2) 209 | } 210 | # 211 | myReturn <- TRUE 212 | 213 | loopCount <- 1 214 | 215 | # while loop 216 | while(length(ratingFact) > 0){ 217 | 218 | currentVar <- ratingFact[1] 219 | 220 | cat(paste("\nAnalysing the rating factor ... ", currentVar, " \n\n", sep = "")) 221 | 222 | # Find out if the variable is significant: 223 | 224 | currentModel <- .forwardBackSig(theModel = prevModel, currVar = currentVar, alg = theAlg) 225 | theData <- get("tempData", envir = funcEnv) 226 | assign("theData", get("tempData", envir = funcEnv), envir = funcEnv) 227 | if(currentModel[[2]] == "Significant"){ 228 | currentModel <- currentModel[[1]] 229 | ICPass <- TRUE 230 | ratingFact <- ratingFact[currentVar != ratingFact] 231 | #assign("ratingFact", ratingFact, envir = funcEnv) 232 | }else{ 233 | currentModel <- currentModel[[1]] 234 | ICPass <- FALSE 235 | ratingFact <- ratingFact[currentVar != ratingFact] 236 | #assign("ratingFact", ratingFact, envir = funcEnv) 237 | } 238 | 239 | if(ICPass){ 240 | currentModelTime <- formula(paste(paste(as.character(currentModel$formula)[c(2,1,3)], collapse = ""), 241 | " + ", paste(timeVar, ":", currentVar, sep = ""), sep = "")) 242 | ############################################################################################ 243 | 244 | # Plotting Factor 245 | myCoeffs <- data.frame(summary(currentModel)$coeff) 246 | names(myCoeffs) <- c("Estimate", "StdError", "ZValue","P(Z > |z|)") 247 | if(plotCharts){ 248 | .plotRatingFactor(ratingFactor = as.character(currentVar), theCoefficients = 249 | .getCoeffs(myExposure = exposureName, myGlm = currentModel, 250 | aCurrVar = as.character(currentVar))) 251 | } 252 | consistOut <- .plotTimeConsist(ratingFactor = as.character(currentVar), timeDef = timeVar, 253 | theGlm = currentModelTime, constThresh = consistThresh, expVar = exposureName, 254 | showPlots = plotCharts) 255 | consistency <- (consistOut >= consistThresh) 256 | factorChanged <- get("factorChanged", envir = funcEnv) 257 | 258 | # This is run if rating factor is significant, not consistent, 259 | # and not changed and there is a mapping table. We already know that it is significant 260 | if(!consistency & !factorChanged & ncol(factLevels[[currentVar]]) > 1){ 261 | 262 | aConsistencyTest <- .consistCorrection(glm1 = currentModel, currVar = currentVar) 263 | theData <- get("tempData", envir = funcEnv) 264 | cat("\n########################################################\n") 265 | cat("End of SBNC Analysis\n") 266 | currentModel <- aConsistencyTest[[1]] 267 | if(aConsistencyTest[[2]] == "Consistent"){consistency <- TRUE}else{consistency <- FALSE} 268 | myCoeffs <- data.frame(summary(currentModel)$coeff) 269 | names(myCoeffs) <- c("Estimate", "StdError", "ZValue","P(Z > |z|)") 270 | } 271 | 272 | 273 | if(ICPass & consistency){ 274 | cat("Model is consistent and has improved IC\n") 275 | prevModel <- currentModel 276 | }else{ 277 | cat(paste("Information Criteria pass: ", ICPass, " & consistency: ", consistency, "\n", sep = "")) 278 | }#End Consistency Pass 279 | 280 | }else{ 281 | cat("Current variable has failed so final model is sub model\n") 282 | prevModel <- currentModel 283 | } 284 | 285 | #print("") 286 | cat(paste("\nThe loop count is ... ", loopCount, "\n\n", sep = "")) 287 | cat(paste("Finishing variable ... ", currentVar, "\n", sep = "")) 288 | #print("") 289 | loopCount <- loopCount + 1 290 | }# end while 291 | # 292 | # Final check to see if all the variable are significant 293 | cat(paste("Carrying out the final checks, the IC for the candidate model is ", 294 | round(extractAIC(prevModel, k = myK)[2]),", the full candidate formula is ...\n", sep = "")) 295 | finalRatingFactors <- gsub(" ", "", strsplit(paste(prevModel$formula)[3], "\\+")[[1]][-1]) 296 | cat(paste(as.character(prevModel$formula)[c(2,1,3)], collapse = " "), "\n") 297 | # 298 | cat("The sub models are\n\n") 299 | 300 | cSelType <- selType 301 | for(i in finalRatingFactors){ 302 | cat("Testing rating factor ", i, " .....\n") 303 | cat("#########################################\n") 304 | subModel <- update(prevModel, as.formula(paste("~.-", i, sep = ""))) 305 | if(!.getVarSignificance(aGlm1 = prevModel, aGlm2 = subModel, selType = cSelType)){ 306 | prevModel <- subModel 307 | cat("Variable ", i, " is not significant in the final check so it has been eliminated\n") 308 | cat("####################################################################################\n\n") 309 | }else{ 310 | cat("Variable ", i, " is significant in the final check so it will be included in the model\n") 311 | cat("##########################################################################################\n\n") 312 | } 313 | 314 | } 315 | 316 | finalModel <- prevModel 317 | 318 | ############################################################################# 319 | # 320 | cat("\nThe final model summary follows ...\n") 321 | cat("************************************************************************************************\n") 322 | print(summary(finalModel)$coeff) 323 | # 324 | #print("") 325 | #print("") 326 | # Do a complete dump of diagnostics here, AIC, BIC, Chisq, and F if Type = severity 327 | cat("\n\n*********************************************************************************************************\n") 328 | cat("The final valid model is ... ", as.character(finalModel$formula)[c(2,1,3)], "\n") 329 | cat("*********************************************************************************************************\n") 330 | 331 | cat("AIC for the final model is ", round(AIC(finalModel)), "\n") 332 | cat("BIC for the final model is ", round(BIC(finalModel)), "\n") 333 | cat("Chi-Squared ANOVA for the final model is ... \n") 334 | print(anova(finalModel, test = "Chisq")) 335 | 336 | # This is a nicer coefficient table than the default 337 | raingFactorList <- gsub(" ", "", strsplit(as.character(finalModel$formula)[3], split = "\\+")[[1]]) 338 | coeffsOut <- lapply(raingFactorList, function(x){data.frame("Factor" = x, .getCoeffs(myExposure = "Exposure", myGlm = finalModel, aCurrVar = x))}) 339 | coeffsOut <- do.call(rbind, coeffsOut) 340 | rownames(coeffsOut) <- 1:nrow(coeffsOut) 341 | assign("coeffsOut", coeffsOut, envir = funcEnv) 342 | cat("\nOutputting print friendly model output table\n\n") 343 | print(coeffsOut) 344 | 345 | cat("\n***********************************************************************************************\n") 346 | cat("End of Model Summary\n") 347 | cat("***********************************************************************************************\n") 348 | #print("") 349 | # 350 | finalModelTable <- data.frame("Formula" = paste(as.character(finalModel$formula)[c(2,1,3)], collapse = " "), 351 | "AIC" = AIC(finalModel), "BIC" = BIC(finalModel)) 352 | # 353 | finalTitle <- paste("Final Model for ", analysisType, " analysis \nusing a ", myDistr, " distribution\n and a ", 354 | theLink, " link for the ", theAlg, " algorithm", sep = "") 355 | if(plotCharts){ 356 | grid.arrange( 357 | tableGrob(data.frame(finalTitle), show.rownames = FALSE, show.colnames = FALSE, gpar.corefill = gpar(fill="white", 358 | col = NA, row = NA), gpar.coretext=gpar(col = "black", cex = 3)), 359 | tableGrob(finalModelTable, show.rownames = FALSE, show.csep=TRUE, show.rsep=TRUE, 360 | show.box=TRUE, separator="grey", name="test", gp=gpar(fontsize=12, lwd=2), equal.width=FALSE, grep=TRUE, global=TRUE), 361 | nrow = 2 362 | ) 363 | } 364 | } # end of justFit it else 365 | # Final Coefficient Table 366 | return(finalModel) 367 | rm(list = ls());gc() 368 | } 369 | --------------------------------------------------------------------------------