├── NAMESPACE
├── .Rbuildignore
├── .gitignore
├── data
    └── policyTable.RData
├── autoPricing.Rproj
├── man
    ├── .plotRatingFactor.Rd
    ├── addVariable.Rd
    ├── .addVariable.Rd
    ├── policyTable.Rd
    ├── .getCoeffs.Rd
    ├── getCoeffs.Rd
    ├── .consistCorrection.Rd
    ├── .forwardBackSig.Rd
    ├── .getVarSignificance.Rd
    ├── .plotTimeConsist.Rd
    ├── plotTimeConsist.Rd
    ├── .modelFamilySpec.Rd
    ├── forwardBackStepAIC.Rd
    └── stepIC.Rd
├── NEWS
├── R
    ├── policyTable.R
    ├── addVariable.R
    ├── modelFamilySpec.R
    ├── autoPricing-package.R
    ├── getCoeffs.R
    ├── getVarSignificance.R
    ├── plotRatingFactor.R
    ├── forwardBackSig.R
    ├── consistCorrection.R
    ├── plotTimeConsist.R
    └── stepIC.R
├── DESCRIPTION
├── README.md
└── LICENSE


/NAMESPACE:
--------------------------------------------------------------------------------
1 | export(stepIC)


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | 


--------------------------------------------------------------------------------
/data/policyTable.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MangoTheCat/autoPricing/master/data/policyTable.RData


--------------------------------------------------------------------------------
/autoPricing.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | 


--------------------------------------------------------------------------------
/man/.plotRatingFactor.Rd:
--------------------------------------------------------------------------------
 1 | \name{.plotRatingFactor}
 2 | \alias{.plotRatingFactor}
 3 | \title{This is a function to plot rating factors...}
 4 | \usage{.plotRatingFactor(ratingFactor="BonusMalus", theCoefficients=myCoeffs)}
 5 | \description{This is a function to plot rating factors}
 6 | \value{There is no output to this function}
 7 | \note{This is an internal function}
 8 | \keyword{plotting}
 9 | \keyword{rating}
10 | \keyword{factors}
11 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}}
12 | \arguments{\item{\code{ratingFactor}}{a character string of the rating factor to be plotted}
13 | \item{\code{theCoefficients}}{output from the \code{getCoeffs()} function}}
14 | 


--------------------------------------------------------------------------------
/man/addVariable.Rd:
--------------------------------------------------------------------------------
 1 | \name{addVariable}
 2 | \alias{addVariable}
 3 | \title{Function to create a formula which adds a variable to an input formula...}
 4 | \usage{addVariable(theFormula, addVar)}
 5 | \description{Function to create a formula which adds a variable to an input formula}
 6 | \value{The output is a formula object which is the input formula added to the variable}
 7 | \note{This should really be an internal function}
 8 | \keyword{formula}
 9 | \keyword{add}
10 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}}
11 | \arguments{\item{\code{theFormula}}{this is the formula that the variable should be added to}
12 | \item{\code{addVar}}{this is the variable that you should add}}
13 | 


--------------------------------------------------------------------------------
/man/.addVariable.Rd:
--------------------------------------------------------------------------------
 1 | \name{.addVariable}
 2 | \alias{.addVariable}
 3 | \title{Function to create a formula which adds a variable to an input formula...}
 4 | \usage{.addVariable(theFormula, addVar)}
 5 | \description{Function to create a formula which adds a variable to an input formula}
 6 | \value{The output is a formula object which is the input formula added to the variable}
 7 | \note{This should really be an internal function}
 8 | \keyword{formula}
 9 | \keyword{add}
10 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}}
11 | \arguments{\item{\code{theFormula}}{this is the formula that the variable should be added to}
12 | \item{\code{addVar}}{this is the variable that you should add}}
13 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
 1 | Date: 12/01/2012
 2 | Author: Chibisi Chima-Okereke
 3 | Package: autoPricing
 4 | Version: 0.4
 5 | #
 6 | 
 7 | The policyTable dataset is added to allow users to play with the functions
 8 | 
 9 | 
10 | Version: 0.1
11 | #
12 | This is the first iteration of the package
13 | 
14 | Version 0.2
15 | #
16 | Some basic alterations to make the code more robust
17 | 
18 | Version 0.3
19 | #
20 | Added the function consistCorrection.R to rebalance for variables that are initial significant but not consistent
21 | 
22 | Version 0.4
23 | #
24 | 1. Now using the update() function to iterate through the models
25 | 2. Now Chi-Squared, F-Test, are included in the test options
26 | 3. Now using cat() instead of print() for comments
27 | 4. Using AIC(), BIC() functions in addition to extractAIC() function
28 | 


--------------------------------------------------------------------------------
/man/policyTable.Rd:
--------------------------------------------------------------------------------
 1 | \name{policyTable}
 2 | \title{This is a simulated dataset, it is a modified version of that given by Kaas, R., Goovaerts, M., 
 3 | Dhaene, J., Denuit, M. in Modern  Actuarial Risk Theory, and can be found at 
 4 | \url{http://www1.fee.uva.nl/ke/act/people/kaas/Cars.txt}}
 5 | \description{This data will be included the package}
 6 | \alias{autoPricing}
 7 | \docType{data}
 8 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}}
 9 | \references{the data is an altered  version of the set used in KASS et al Modern 
10 | Actuarial Risk Theory located at \url{http://www1.fee.uva.nl/ke/act/people/kaas/Cars.txt}}
11 | \keyword{glm,}
12 | \keyword{actuarial}
13 | \keyword{pricing,}
14 | \keyword{automating,}
15 | \keyword{data}
16 | \seealso{\code{\link{stepIC}}}
17 | 


--------------------------------------------------------------------------------
/R/policyTable.R:
--------------------------------------------------------------------------------
 1 | #' This data will be included the package
 2 | #'
 3 | #' @name policyTable
 4 | #' @aliases autoPricing
 5 | #' @docType data
 6 | #' @title This is a simulated dataset, it is a modified version of that given by Kaas, R., Goovaerts, M., 
 7 | #'            Dhaene, J., Denuit, M. in Modern  Actuarial Risk Theory, and can be found at 
 8 | #'            \url{http://www1.fee.uva.nl/ke/act/people/kaas/Cars.txt}
 9 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com}
10 | #' @references the data is an altered  version of the set used in KASS et al Modern 
11 | #'            Actuarial Risk Theory located at \url{http://www1.fee.uva.nl/ke/act/people/kaas/Cars.txt}
12 | #' @keywords glm, actuarial pricing, automating, data
13 | #' @seealso \code{\link{stepIC}}
14 | NULL
15 | 


--------------------------------------------------------------------------------
/man/.getCoeffs.Rd:
--------------------------------------------------------------------------------
 1 | \name{.getCoeffs}
 2 | \alias{.getCoeffs}
 3 | \title{Function for formatting coefficients...}
 4 | \usage{.getCoeffs(myExposure="Exposure", myGlm=theGlm, theForm, sourceData)}
 5 | \description{Function for formatting coefficients}
 6 | \value{The output is a nicely formatted table of the coefficients}
 7 | \note{This formula is used extensively in the \code{stepIC} function}
 8 | \keyword{glm}
 9 | \keyword{coefficients}
10 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}}
11 | \arguments{\item{\code{myExposure}}{the name of the column of the dataset that has the exposure (in years)}
12 | \item{\code{myGlm}}{the glm object}
13 | \item{\code{theForm}}{should be the formula if theGlm$formula is NULL}
14 | \item{\code{sourceData}}{should be the source data if theGlm$data is NULL}}
15 | 


--------------------------------------------------------------------------------
/man/getCoeffs.Rd:
--------------------------------------------------------------------------------
 1 | \name{getCoeffs}
 2 | \alias{getCoeffs}
 3 | \title{Function for formatting coefficients...}
 4 | \usage{getCoeffs(myExposure="Exposure", myGlm=theGlm, theForm, sourceData)}
 5 | \description{Function for formatting coefficients}
 6 | \value{The output is a nicely formatted table of the coefficients}
 7 | \note{This formula is used extensively in the \code{forwardBackStepAIC} function}
 8 | \keyword{glm}
 9 | \keyword{coefficients}
10 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}}
11 | \arguments{\item{\code{myExposure}}{the name of the column of the dataset that has the exposure (in years)}
12 | \item{\code{myGlm}}{the glm object}
13 | \item{\code{theForm}}{should be the formula if theGlm$formula is NULL}
14 | \item{\code{sourceData}}{should be the source data if theGlm$data is NULL}}
15 | 


--------------------------------------------------------------------------------
/man/.consistCorrection.Rd:
--------------------------------------------------------------------------------
 1 | \name{.consistCorrection}
 2 | \alias{.consistCorrection}
 3 | \title{This function rebalances a raw variable that is significant but not consistent...}
 4 | \usage{.consistCorrection(glm1=currentModel, currVar=currentVar)}
 5 | \description{This function rebalances a raw variable that is significant but not consistent}
 6 | \value{The output is a list of revised glm and a character either \code{"Consistent"} or \code{"Not Consistent"}}
 7 | \note{This is an internal function}
 8 | \keyword{glm}
 9 | \keyword{consistency}
10 | \keyword{significance}
11 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}}
12 | \arguments{\item{\code{glm1}}{the glm that contains the rating factor to be analysed output from the \code{.forwardBackSig} function}
13 | \item{\code{currVar}}{the rating factor to be analysed}}
14 | 


--------------------------------------------------------------------------------
/R/addVariable.R:
--------------------------------------------------------------------------------
 1 | #' Function to create a formula which adds a variable to an input formula
 2 | #'
 3 | #'
 4 | #' @param \code{theFormula} this is the formula that the variable should be added to 
 5 | #' @param \code{addVar} this is the variable that you should add
 6 | #' @return The output is a formula object which is the input formula added to the variable
 7 | #' @note This should really be an internal function
 8 | #'
 9 | #' @keywords formula add
10 | #' @include autoPricing-package.R
11 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com}
12 | #
13 | .addVariable <- function(theFormula, addVar){
14 |   theFormula <- as.character(theFormula)
15 |   theRHS <- paste(c(theFormula[3], addVar), collapse = " + ", sep = "")
16 |   theFormula <- as.formula(paste(theFormula[2], " ~ ", theRHS, sep = ""))
17 |   return(theFormula)
18 | }
19 | 


--------------------------------------------------------------------------------
/man/.forwardBackSig.Rd:
--------------------------------------------------------------------------------
 1 | \name{.forwardBackSig}
 2 | \alias{.forwardBackSig}
 3 | \title{Master function to find out if the variable is significant when you add a variable...}
 4 | \usage{.forwardBackSig(theModel, currVar, alg="forward", ...)}
 5 | \description{Master function to find out if the variable is significant when you add a variable}
 6 | \value{The model that will be used and whether the \code{currVar} was significant or not}
 7 | \note{This is an internal function and should not be used by the user}
 8 | \keyword{glm}
 9 | \keyword{forward}
10 | \keyword{backward}
11 | \seealso{\code{\link{glm}}}
12 | \seealso{\code{\link{stepIC}}}
13 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}}
14 | \arguments{\item{\code{theModel}}{this is the candidate model}
15 | \item{\code{currVar}}{this is the current variable being investigated}
16 | \item{\code{alg}}{this is the algoirthm being used either forward or backward}}
17 | 


--------------------------------------------------------------------------------
/man/.getVarSignificance.Rd:
--------------------------------------------------------------------------------
 1 | \name{.getVarSignificance}
 2 | \alias{.getVarSignificance}
 3 | \title{Function to get the significance or not of a variable when given the the appropriate glms...}
 4 | \usage{.getVarSignificance(aGlm1=glm1, aGlm2=glm2, selType="AIC")}
 5 | \description{Function to get the significance or not of a variable when given the the appropriate glms}
 6 | \value{The output is logical, TRUE means that the variable is significant, FALSE means that the variable is not significant}
 7 | \note{This is an internal function}
 8 | \keyword{glm}
 9 | \keyword{consistency}
10 | \keyword{significance}
11 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}}
12 | \arguments{\item{\code{aGlm1}}{the glm that contains the rating factor to be analysed}
13 | \item{\code{aGlm2}}{the glm without the rating factor to be analysed}
14 | \item{\code{selType}}{This is the selection criterion type, either "AIC", "BIC", "Chisq", or "F"}}
15 | 


--------------------------------------------------------------------------------
/man/.plotTimeConsist.Rd:
--------------------------------------------------------------------------------
 1 | \name{.plotTimeConsist}
 2 | \alias{.plotTimeConsist}
 3 | \title{Function for plotting consistency...}
 4 | \usage{.plotTimeConsist(ratingFactor="BonusMalus", timeDef="Year",
 5 |     theGlm=myGlm, constThresh=60, expVar="Exposure", showPlots=TRUE)}
 6 | \description{Function for plotting consistency}
 7 | \value{The output is a numeric denoting the median consistency of the rating factor with year}
 8 | \note{...}
 9 | \seealso{\code{\link{stepIC}}}
10 | \keyword{consistency}
11 | \keyword{glm}
12 | \keyword{time}
13 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}}
14 | \arguments{\item{\code{ratingFactor}}{a character string denoting the rating factor to carry out time consistency analysis}
15 | \item{\code{timeDef}}{a character string denoting the Year variable}
16 | \item{\code{theGlm}}{a glm object to be analysed}
17 | \item{\code{constThresh}}{a numeric denoting the threshold percentage of consistency}
18 | \item{\code{expVar}}{set this to the exposure column name}
19 | \item{\code{showPlots}}{logical for whether to plot the variables or not}}
20 | 


--------------------------------------------------------------------------------
/man/plotTimeConsist.Rd:
--------------------------------------------------------------------------------
 1 | \name{plotTimeConsist}
 2 | \alias{plotTimeConsist}
 3 | \title{Function for plotting consistency...}
 4 | \usage{plotTimeConsist(ratingFactor="BonusMalus", timeDef="Year", theGlm=myGlm, constThresh=60, expVar="Exposure", showPlots=TRUE)}
 5 | \description{Function for plotting consistency}
 6 | \value{The output is a numeric denoting the median consistency of the rating factor with year}
 7 | \note{...}
 8 | \seealso{\code{\link{forwardBackStepAIC}}}
 9 | \keyword{consistency}
10 | \keyword{glm}
11 | \keyword{time}
12 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}}
13 | \arguments{\item{\code{ratingFactor}}{a character string denoting the rating factor to carry out time consistency analysis}
14 | \item{\code{timeDef}}{a character string denoting the Year variable}
15 | \item{\code{theGlm}}{a glm object to be analysed}
16 | \item{\code{constThresh}}{a numeric denoting the threshold percentage of consistency}
17 | \item{\code{expVar}}{set this to the exposure column name}
18 | \item{\code{showPlots}}{logical for whether to plot the variables or not}}
19 | 


--------------------------------------------------------------------------------
/man/.modelFamilySpec.Rd:
--------------------------------------------------------------------------------
 1 | \name{.modelFamilySpec}
 2 | \alias{.modelFamilySpec}
 3 | \title{Function to specify the model family/link distributions/functions...}
 4 | \usage{.modelFamilySpec(distr="poisson", myLink="log", theta)}
 5 | \description{Function to specify the model family/link distributions/functions}
 6 | \value{The output is an object of class "family"}
 7 | \note{This is just a wrapper function for the family() function}
 8 | \keyword{addition}
 9 | \keyword{arithmetic}
10 | \references{http://finzi.psych.upenn.edu/R/library/stats/html/family.html}
11 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}}
12 | \arguments{\item{\code{distr}}{character string denoting the distribution to use options are "poisson", 
13 | "binomial", "gaussian", "Gamma", "inverse.gaussian", "quasi", "quasibinomial", 
14 | "quasipoisson", "negative.binomial"}
15 | \item{\code{myLink}}{character string denoting the link function, e.g. "identity", 
16 | "log", "logit", "inverse", "1/mu^2"}
17 | \item{\code{theta}}{should be set to the value of theta if the "negative.binomal" distribution is selected}}
18 | \examples{modelFamilySpec("poisson", "log")}
19 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: autoPricing
 2 | Maintainer: Chibisi Chima-Okereke <cchima-okereke@mango-solutions.com>
 3 | License: GPL-2
 4 | Title: This package is for carrying out automated glm pricing analysis
 5 |     for frequency and severity models
 6 | ZipData: yes
 7 | Type: Package
 8 | LazyLoad: true
 9 | Author: Chibisi Chima-Okereke <cchima-okereke@mango-solutions.com>
10 | Import: MASS, reshape, fBasics, gridExtra, multcomp
11 | Description: The purpose of this package is to carry out automated GLM
12 |     analysis for actuarial pricing. The idea is that it uses the
13 |     forward or backward algorithms and information criteria to obtain a
14 |     frequency and severity model. It allows tables to be specified that
15 |     map the aggregation structure of the variable from those currently
16 |     specified to a less granular set of categories. This allows those
17 |     explanatory variables to be dynamically re-specified during the
18 |     pricing process if the native variable does not improve the fit of
19 |     the model. The re-specification of the variable categories is done
20 |     if the native variable does not decrease the information criterion;
21 |     the mapping table and a Tukey test on the categorical variable is
22 |     used to aggregate categories that are statistically and logically
23 |     similar.
24 | Version: 0.4
25 | Extends:
26 |     sp
27 | URL: http://www.mango-solutions.com
28 | Depends:
29 |     MASS,
30 |     reshape,
31 |     fBasics,
32 |     gridExtra,
33 |     multcomp
34 | Collate:
35 |     'stepIC.R'
36 |     'plotTimeConsist.R'
37 |     'plotRatingFactor.R'
38 |     'modelFamilySpec.R'
39 |     'getVarSignificance.R'
40 |     'getCoeffs.R'
41 |     'forwardBackSig.R'
42 |     'policyTable.R'
43 |     'consistCorrection.R'
44 |     'autoPricing-package.R'
45 |     'addVariable.R'
46 | 


--------------------------------------------------------------------------------
/R/modelFamilySpec.R:
--------------------------------------------------------------------------------
 1 | #' Function to specify the model family/link distributions/functions
 2 | #'
 3 | #'
 4 | #'
 5 | #' @param \code{distr} character string denoting the distribution to use options are "poisson", 
 6 | #'                    "binomial", "gaussian", "Gamma", "inverse.gaussian", "quasi", "quasibinomial", 
 7 | #'                     "quasipoisson", "negative.binomial"
 8 | #' @param \code{myLink} character string denoting the link function, e.g. "identity", 
 9 | #'                    "log", "logit", "inverse", "1/mu^2"
10 | #' @param \code{theta} should be set to the value of theta if the "negative.binomal" distribution is selected
11 | #' @return The output is an object of class "family"
12 | #' @note This is just a wrapper function for the family() function
13 | #'
14 | #' @keywords addition arithmetic
15 | #' @references
16 | #' http://finzi.psych.upenn.edu/R/library/stats/html/family.html
17 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com}
18 | #' @examples
19 | #' modelFamilySpec("poisson", "log")
20 | #
21 | .modelFamilySpec <- function(distr = "poisson", myLink = "log", theta = NULL){
22 |   
23 |   # theta is only specified for negative binomial distribution
24 |   if(distr == "negative.binomial"){
25 |     if(is.null(theta)){stop("Cannot proceed without specifying theta")}
26 |     myTheta <- theta
27 |     myFamily <- lapply(myTheta, distr, link = "log")[[1]]
28 |     
29 |   }else{
30 |   
31 |   myFamily <- lapply(myLink, distr)[[1]]
32 |   }
33 |   return(myFamily)
34 | }
35 |   
36 | # Examples of specifications for models
37 | #binomial(link = "logit")
38 | #gaussian(link = "identity")
39 | #Gamma(link = "inverse")
40 | #inverse.gaussian(link = "1/mu^2")
41 | #poisson(link = "log")
42 | #quasi(link = "identity", variance = "constant")
43 | #quasibinomial(link = "logit")
44 | #quasipoisson(link = "log")
45 | # This next one require MASS package
46 | #negative.binomial(theta = stop("'theta' must be specified"), link = "log")
47 | 


--------------------------------------------------------------------------------
/R/autoPricing-package.R:
--------------------------------------------------------------------------------
 1 | #' This package is for carrying out automated glm pricing analysis for frequency and severity 
 2 | #'
 3 | #' \tabular{ll}{
 4 | #' Package: \tab autoPricing\cr
 5 | #' Type: \tab Package\cr
 6 | #' Version: \tab 0.4\cr
 7 | #' Date: \tab 2011-01-12\cr
 8 | #' License: \tab GPL (>= 2)\cr
 9 | #' LazyLoad: \tab yes\cr
10 | #' }
11 | #'
12 | #' The purpose of this package is to carry out automated GLM analysis for actuarial 
13 | #' pricing. The idea is that it uses the forward or backward algorithms and information criteria to 
14 | #' obtain a frequency and severity model. It allows tables to be specified that map the aggregation 
15 | #' structure of the variable from those currently specified to a less granular set of categories. 
16 | #' This allows those explanatory variables to be dynamically re-specified during the pricing process 
17 | #' if the native variable does not improve the fit of the model. The re-specification of the 
18 | #' variable categories is done if the native variable does not decrease the information criterion; 
19 | #' the mapping table and a Tukey test on the categorical variable is used to aggregate categories 
20 | #' that are statistically and logically similar.
21 | #'
22 | #'
23 | #' The \code{\link{stepIC}} function is the main function for this package, it carrys out the 
24 | #'      step information criterion process.
25 | #'
26 | #'
27 | #' @name autoPricing-package
28 | #' @aliases autoPricing
29 | #' @docType package
30 | #' @title This package is for carrying out automated glm pricing analysis for frequency and severity 
31 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com}
32 | #' @keywords glm, actuarial pricing, automating
33 | #' @include addVariable.R
34 | #' @include consistCorrection.R
35 | #' @include policyTable.R
36 | #' @include forwardBackSig.R
37 | #' @include getCoeffs.R
38 | #' @include getVarSignificance.R
39 | #' @include modelFamilySpec.R
40 | #' @include plotRatingFactor.R
41 | #' @include plotTimeConsist.R
42 | #' @include stepIC.R
43 | #' @seealso \code{\link{stepIC}}
44 | #'
45 | 


--------------------------------------------------------------------------------
/R/getCoeffs.R:
--------------------------------------------------------------------------------
 1 | #' Function for formatting coefficients
 2 | #'
 3 | #' @param \code{myExposure} the name of the column of the dataset that has the exposure (in years)
 4 | #' @param \code{myGlm} the glm object
 5 | #' @param \code{theForm} should be the formula if theGlm$formula is NULL
 6 | #' @param \code{sourceData} should be the source data if theGlm$data is NULL
 7 | #' @param \code{aCurrVar} this is the current variable being analysed
 8 | #' @return The output is a nicely formatted table of the coefficients
 9 | #' @note This formula is used extensively in the \code{stepIC} function
10 | #'
11 | #' @keywords glm coefficients
12 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com}
13 | #
14 | .getCoeffs <- function(myExposure = "Exposure", myGlm = theGlm, theForm = NULL, sourceData = NULL, aCurrVar){
15 | 
16 | 	theData <- myGlm$data
17 | 	
18 | 	if(is.null(myGlm$formula)){myGlm$formula <- theForm}
19 | 	if(is.null(myGlm$data)){theData <- sourceData}
20 | 	
21 | 	
22 | 	myVars <- gsub(" ", "", unlist(strsplit(as.character(myGlm$formula)[3], "\\+")))
23 | 	
24 | 	getMyVars <- function(numVars = 1){
25 | 	  return(myVars[which(lapply(strsplit(myVars, ":"), length) == numVars)])  
26 | 	}
27 | 	
28 | 	singleVars <- getMyVars(numVars = 1)
29 | 	singleVars <- singleVars[singleVars != 1]
30 | 	singleVars <- singleVars[singleVars != 0]
31 | 
32 | #
33 | 	# Creating the newdata
34 | 	restOfFactors <- singleVars[singleVars != aCurrVar]
35 | 	newData <- lapply(restOfFactors, function(x){ levels(theData[,x])[1] })
36 | 	names(newData) <- restOfFactors
37 | 	newData[[aCurrVar]] <- levels(theData[, aCurrVar])
38 | 	newData <- data.frame(do.call(cbind, newData))
39 | 	newData[,myExposure] <- 1
40 | 	
41 | 	# These are the predicted Coefficients
42 | 	aCoeffs <- predict(myGlm, newdata = newData, type = "term", se.fit = TRUE)
43 | 	bCoeffs <- data.frame("Estimate" = aCoeffs$fit[, aCurrVar], "StdError" = aCoeffs$se.fit[, aCurrVar])
44 | 	# Re-basing the fitted Coefficient
45 | 	bCoeffs[,1] <- bCoeffs[, 1] - bCoeffs[1, 1]
46 | 	bCoeffs <- data.frame("Categories" = levels(theData[,aCurrVar]), bCoeffs)
47 | 	#names(bCoeffs)[1] <- "Categories"
48 | 	
49 | 	# This is to get the percentage exposure
50 | 	tabFormula <- as.formula(paste(myExposure, "~", aCurrVar))
51 | 	propExposure <- xtabs(tabFormula, data = theData[,c(aCurrVar, myExposure)])
52 | 	propExposure <- 100*propExposure/sum(propExposure)
53 | 	bCoeffs$PercentageExposure <- propExposure
54 | 	
55 | 	return(bCoeffs)
56 | 
57 | }
58 | 


--------------------------------------------------------------------------------
/R/getVarSignificance.R:
--------------------------------------------------------------------------------
 1 | #' Function to get the significance or not of a variable when given the the appropriate glms
 2 | #'
 3 | #' @param \code{aGlm1} the glm that contains the rating factor to be analysed
 4 | #' @param \code{aGlm2} the glm without the rating factor to be analysed
 5 | #' @param \code{selType} This is the selection criterion type, either "AIC", "BIC", "Chisq", or "F"
 6 | #' @return The output is logical, TRUE means that the variable is significant, FALSE means that the variable is not significant
 7 | #' @note This is an internal function
 8 | #'
 9 | #' @keywords glm consistency significance
10 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com}
11 | 
12 | .getVarSignificance <- function(aGlm1 = glm1, aGlm2 = glm2, selType = "AIC"){
13 | 	
14 | 	handicap <- get("handicap", envir = funcEnv)
15 | 	
16 | 	if(selType == "AIC"){
17 | 		ICglm1 <- extractAIC(aGlm1)[2]
18 | 		ICglm2 <- extractAIC(aGlm2)[2]
19 | 		cat("AIC is being used for model selection\n\n")
20 | 		cat("Factor Model: ", paste(as.character(aGlm1$formula)[c(2,1,3)], collapse = " "), "\n")
21 | 		cat(paste("AIC of candidate model is ", round(ICglm1), "\n", sep = ""))
22 | 		cat("Sub model: ", paste(as.character(aGlm2$formula)[c(2,1,3)], collapse = " "), "\n")
23 | 		cat(paste("AIC of sub model is ", round(ICglm2), "\n\n\n", sep = ""))
24 | 		output <- round(ICglm1) + handicap < round(ICglm2)
25 | 	}
26 | 	
27 | 	if(selType == "BIC"){
28 | 		ICglm1 <- BIC(aGlm1)
29 | 		ICglm2 <- BIC(aGlm2)
30 | 		cat("BIC is being used for model selection\n\n")
31 | 		cat("Factor Model: ", paste(as.character(aGlm1$formula)[c(2,1,3)], collapse = " "), "\n")
32 | 		cat(paste("BIC of candidate model is ", round(ICglm1), "\n\n", sep = ""))
33 | 		cat("Sub model: ", paste(as.character(aGlm2$formula)[c(2,1,3)], collapse = " "), "\n")
34 | 		cat(paste("BIC of sub model is ", round(ICglm2), "\n\n\n", sep = ""))
35 | 		output <- round(ICglm1) + handicap < round(ICglm2)
36 | 	}
37 | 	
38 | 	if(selType == "Chisq"){
39 | 		anovaTable <- anova(aGlm2, aGlm1, test = "Chisq")
40 | 		cat("Chi-Squared Test is being used for model selection\n\n")
41 | 		cat("ANOVA output for Chi-Squared Test comparing the models\n\n")
42 | 		print(anovaTable)
43 | 		output <- (anovaTable[2 ,"Pr(>Chi)"] < 0.05 & anovaTable[2, "Deviance"] > 0)
44 | 	}
45 | 	
46 | 	if(selType == "F"){
47 | 		anovaTable <- anova(aGlm2, aGlm1, test = "F")
48 | 		cat("F-Test is being used for model selection\n\n")
49 | 		cat("ANOVA output for F-Test comparing the models\n\n")
50 | 		print(anovaTable)
51 | 		output <- (anovaTable[2 ,"Pr(>F)"] < 0.05 & anovaTable[2, "Deviance"] > 0)
52 | 	}
53 | 	
54 | 	return(output)
55 | 	
56 | }
57 | 


--------------------------------------------------------------------------------
/R/plotRatingFactor.R:
--------------------------------------------------------------------------------
 1 | #' This is a function to plot rating factors
 2 | #'
 3 | #' @param \code{ratingFactor} a character string of the rating factor to be plotted
 4 | #' @param \code{theCoefficients} output from the \code{getCoeffs()} function 
 5 | #' @return There is no output to this function
 6 | #' @note This is an internal function
 7 | #'
 8 | #' @keywords plotting rating factors
 9 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com}
10 | #
11 | .plotRatingFactor <- function(ratingFactor = "BonusMalus", theCoefficients = myCoeffs){
12 | #
13 | 
14 | # Defining Colors
15 | # EMB Yellow
16 | embYellow <- c(250, 255, 102)/255
17 | embYellow <- rgb(embYellow[1], embYellow[2], embYellow[3], alpha = 0.7)
18 | myColors <- colors()[c(26, 451, 142, 83, 254, 491, 454, 32, 381, 494, 121, 548,
19 |                                                 504, 150, 493, 591, 115, 135, 75, 393)]
20 | #
21 | theCoefficients$High95 <- theCoefficients$Estimate + theCoefficients$StdError*1.96
22 | theCoefficients$Low95 <- theCoefficients$Estimate - theCoefficients$StdError*1.96
23 | #
24 | fullYRange <- c(0, max(2*theCoefficients$PercentageExposure))
25 | yRange <- c(fullYRange[2]/2, fullYRange[2])
26 | xRange <- range(c(theCoefficients$Estimate, theCoefficients$High95, theCoefficients$Low95))
27 | if(xRange[2] > 0){xRange[2] <- xRange[2]*1.2}else{xRange[2] <- xRange[2] - .2*xRange[2] }
28 | #
29 | convertScale <- function(conPoints = 1, parm2Perc = TRUE){
30 |   
31 |  myM <- diff(yRange)/diff(xRange)
32 |  myC <- yRange[1] - myM*xRange[1]
33 |  
34 |  if(parm2Perc == TRUE){outPut <- myM*conPoints + myC}else{
35 |    
36 |    if(parm2Perc == FALSE){outPut <- (conPoints - myC)/myM}else{outPut <- NULL}
37 |  }
38 | return(outPut)
39 | }
40 | #
41 | fullYRange <- c(fullYRange[1], 1.2*ceiling(fullYRange[2]))
42 | #
43 | #Plotting
44 | #
45 | #windows()
46 | par(mar = c(5,4,4,5) + .1)
47 | xCoords <- barplot(theCoefficients$PercentageExposure, col = embYellow, ylim = fullYRange, yaxt = "n", 
48 |         main = paste("Predicted Values for the ", ratingFactor, " rating factor", sep = ""), names.arg = theCoefficients$Level1)
49 | lines(xCoords, convertScale(theCoefficients$Estimate, parm2Perc = TRUE), lwd = 2, col = "blue")
50 | lines(xCoords, convertScale(theCoefficients$High95, parm2Perc = TRUE), lwd = 2, col = colors()[33], lty = 2)
51 | lines(xCoords, convertScale(theCoefficients$Low95, parm2Perc = TRUE), lwd = 2, col = colors()[33], lty = 2)
52 | points(xCoords, convertScale(theCoefficients$Estimate, parm2Perc = TRUE), cex = 1.7, lwd = 2, pch = 21, bg = "blue", col = "brown")
53 | axis(4)
54 | mtext("Exposure %", side = 4, line = 3)
55 | #axis(2, labels = c("", round(convertScale(axTicks(2), parm2Perc = FALSE), 2)[-1]), at = axTicks(2))
56 | axis(2, labels = c(round(convertScale(axTicks(2), parm2Perc = FALSE), 2)), at = axTicks(2))
57 | mtext("Fitted Coefficient (Base = 0)", side = 2, line = 3)
58 | } # end of plotting function
59 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # autoPricing
 2 | 
 3 | > The purpose of this package is to carry out automated GLM
 4 |     analysis for actuarial pricing. The idea is that it uses the
 5 |     forward or backward algorithms and information criteria to obtain a
 6 |     frequency and severity model. It allows tables to be specified that
 7 |     map the aggregation structure of the variable from those currently
 8 |     specified to a less granular set of categories. This allows those
 9 |     explanatory variables to be dynamically re-specified during the
10 |     pricing process if the native variable does not improve the fit of
11 |     the model. The re-specification of the variable categories is done
12 |     if the native variable does not decrease the information criterion;
13 |     the mapping table and a Tukey test on the categorical variable is
14 |     used to aggregate categories that are statistically and logically
15 |     similar.
16 |     
17 | ## Installation
18 | Installation from github requires the devtools package to be installed.
19 | 
20 | ## Usage
21 | 
22 | ```R
23 | #Loading the data
24 | data(policyTable)
25 | 
26 | #Preparing rating factor names and mapping tables 
27 | myRatingFactors <- c("BonusMalus", "WeightClass", "Region", "Age", "Mileage", "Usage")
28 | ratingFactorLevels <- lapply(myRatingFactors, function(x){matrix(as.character(levels(policyTable[,x])))})
29 | names(ratingFactorLevels) <- myRatingFactors
30 | ratingFactorLevels$Mileage <- cbind(ratingFactorLevels$Mileage, c("0-12500", "0-12500", "> 12500"))
31 | ratingFactorLevels$BonusMalus <- cbind(ratingFactorLevels$BonusMalus, as.character(sort(rep(LETTERS[1:7], 2))))
32 | weightClass <- c("650-935", "650-935", "650-935", "650-935", "1030-1315", "1030-1315", "1030-1315", "1030-1315", "1410-1600", "1410-1600", "1410-1600")
33 | ratingFactorLevels$WeightClass <- cbind(ratingFactorLevels$WeightClass, weightClass)
34 | 
35 | #Example 1: Executing forward algorithm for poisson risk model
36 | outputModelForwardFreq <- stepIC(ratingFact = myRatingFactors, countVar = "NoClaims", 
37 |                   sevVar = "GrossIncurred", factLevels = ratingFactorLevels, timeVar = "Year", selType = "BIC", 
38 |                   consistThresh = 60, theData = policyTable, analysisType = "frequency",
39 |                   myDistr = "poisson", theLink = "log", exposureName = "Exposure",
40 |                   handicap = 0, myDocumentTitle  = "Automated Pricing GLM", theAlg = "forward", 
41 |                   plotCharts = TRUE)
42 | # Example 2: Writing process to PDF and log file for documentation purposes
43 | myFolder <- getwd()
44 | pdf(file = file.path(myFolder, "GLMOutput.pdf"), height = 7, width = 11)
45 | par(mfrow = c(1,1), cex.main = 1, cex.axis = .9, cex.lab = 1, cex = 1)
46 | sink(file = file.path(myFolder, "GLMOutput.doc"))
47 | outputModelForwardFreq <- stepIC(ratingFact = myRatingFactors, countVar = "NoClaims", 
48 |                   sevVar = "GrossIncurred", factLevels = ratingFactorLevels, timeVar = "Year", selType = "BIC", 
49 |                   consistThresh = 60, theData = policyTable, analysisType = "frequency",
50 |                   myDistr = "poisson", theLink = "log", exposureName = "Exposure",
51 |                   handicap = 0, myDocumentTitle  = "Automated Pricing GLM", theAlg = "forward", 
52 |                   plotCharts = TRUE)
53 | sink()
54 | dev.off()
55 | ```
56 | 
57 | ## Licence
58 | GPL 2 © [Mango Solutions](https://github.com/mangothecat)


--------------------------------------------------------------------------------
/man/forwardBackStepAIC.Rd:
--------------------------------------------------------------------------------
 1 | \name{forwardBackStepAIC}
 2 | \alias{forwardBackStepAIC}
 3 | \title{Function for forward and backward model selection algorithm using information criteria to obtain frequency and severity models for...}
 4 | \usage{forwardBackStepAIC(ratingFact, countVar, sevVar, factLevels=1, timeVar, IC="BIC", consistThresh=60, theData,
 5 |     analysisType="frequency", myDistr="poisson", theLink="log", exposureName="Exposure", handicap=0,
 6 |     myDocumentTitle="Automated Pricing GLM", theAlg="forward", plotCharts=TRUE, ...)}
 7 | \description{Function for forward and backward model selection algorithm using information criteria to obtain frequency and severity models for 
 8 | actuarial pricing models the routine also dynamically adjusts the categories for variables that are not significant. This function
 9 | is intended for analysing categorical explanatory varibles.}
10 | \value{The output is the frequency or severity model from the chosen algorithm \code{theAlg}}
11 | \note{please pay attention to the structure of the table requires, since it is non-standard as far as in most 
12 | modelling systems a separate policy and claims table is used for the analysis}
13 | \keyword{glm}
14 | \keyword{actuarial}
15 | \keyword{pricing}
16 | \keyword{automation}
17 | \keyword{backward}
18 | \keyword{forward}
19 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}}
20 | \arguments{\item{\code{ratingFact}}{this a character vector denoting the column headers of rating factors in your table (\code{theData}).
21 | Please be aware that the package \code{autoPricing} does not currently support interaction terms in your model.}
22 | \item{\code{countVar}}{this is a character denoting the claims count column}
23 | \item{\code{sevVar}}{this is a character denoting the column header of the severity variable}
24 | \item{\code{factLevels}}{this is a list of matrices (or data.frames) that denote the mapping of the rating 
25 | factors from their current categories to their logical aggregatable variables}
26 | \item{\code{timeVar}}{this is a character denoting the column header of the year variable. This variable must be specified because
27 | time consistency analysis is carried out by using interaction terms with the year variable.}
28 | \item{\code{IC}}{This is a character string denoting whether the information criterion used should be AIC 
29 | (Akaike Information Criterion) or BIC (Bayesian Information Criterion)}
30 | \item{\code{consistThresh}}{Set this to the threshold for median consistency, denoting that the model fit of interaction
31 | of the variables with Year is consistent to \code{consistThresh} or greater before it is accepted}
32 | \item{\code{theData}}{this is the data set that will be used for the analysis containing the rating factors, exposure, 
33 | severity, claim counts, and year}
34 | \item{\code{myDistr}}{this is a character string denoting which distribution should be used in the analysis e.g. "poisson", "Gamma"}
35 | \item{\code{theLink}}{this is the link function to be used in the analysis}
36 | \item{\code{exposureName}}{this is a character string denoting the column name of the exposure (in Years)}
37 | \item{\code{handicap}}{extra penalty for the information criterion, it is added to the IC of the candiate 
38 | model to alter how dificult it is to accept variables}
39 | \item{\code{myDocumentTitle}}{This is a character string for the title of the document}
40 | \item{\code{theAlg}}{this is the algorithm to be used either "forward" or "backward".}
41 | \item{\code{plotCharts}}{this is a logical variable as to whether the charts should be plotted}}
42 | \examples{#Loading the data
43 | data(policyTable)
44 | 
45 | #Preparing rating factor names and mapping tables 
46 | myRatingFactors <- c("BonusMalus", "WeightClass", "Region", "Age", "Mileage", "Usage")
47 | ratingFactorLevels <- lapply(myRatingFactors, function(x){matrix(as.character(levels(policyTable[,x])))})
48 | names(ratingFactorLevels) <- myRatingFactors
49 | ratingFactorLevels$Mileage <- cbind(ratingFactorLevels$Mileage, c("0-12500", "0-12500", "> 12500"))
50 | 
51 | # Example 1: Executing forward algorithm for poisson risk model
52 | outputModelForwardFreq <- forwardBackStepAIC(ratingFact = myRatingFactors, countVar = "NoClaims", 
53 | sevVar = "GrossIncurred", factLevels = ratingFactorLevels, timeVar = "Year", IC = "BIC", 
54 | consistThresh = 60, theData = policyTable, analysisType = "frequency",
55 | myDistr = "poisson", theLink = "log", exposureName = "Exposure",
56 | handicap = 0, myDocumentTitle  = "Automated Pricing GLM", theAlg = "forward", 
57 | plotCharts = TRUE)
58 | 
59 | 
60 | # Example 2: Writing process to PDF and log file for documentation purposes
61 | myFolder <- getwd()
62 | 
63 | pdf(file = file.path(paste(myFolder, "GLMOutput.pdf", sep = "")), height = 7, width = 11)
64 | par(mfrow = c(1,1), cex.main = 1, cex.axis = .9, cex.lab = 1, cex = 1)
65 | sink(file = file.path(paste(myFolder, "GLMOutput.doc", sep = "")))
66 | outputModelForwardFreq <- forwardBackStepAIC(ratingFact = myRatingFactors, countVar = "NoClaims", 
67 | sevVar = "GrossIncurred", factLevels = ratingFactorLevels, timeVar = "Year", IC = "BIC", 
68 | consistThresh = 60, theData = policyTable, analysisType = "frequency",
69 | myDistr = "poisson", theLink = "log", exposureName = "Exposure",
70 | handicap = 0, myDocumentTitle  = "Automated Pricing GLM", theAlg = "forward", 
71 | plotCharts = TRUE)
72 | sink()
73 | dev.off()}
74 | 


--------------------------------------------------------------------------------
/R/forwardBackSig.R:
--------------------------------------------------------------------------------
  1 | #' Master function to find out if the variable is significant when you add a variable
  2 | #'
  3 | #' @param \code{theModel} this is the candidate model
  4 | #' @param \code{currVar} this is the current variable being investigated
  5 | #' @param \code{alg} this is the algoirthm being used either forward or backward
  6 | #' @return The model that will be used and whether the \code{currVar} was significant or not
  7 | #' @note This is an internal function and should not be used by the user
  8 | #'
  9 | #' @keywords glm forward backward
 10 | #' @include autoPricing-package.R
 11 | #' @seealso \code{\link{glm}}
 12 | #' @seealso \code{\link{stepIC}}
 13 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com}
 14 | #
 15 | .forwardBackSig <- function(theModel, currVar, alg = "forward", ...){
 16 |   #
 17 |   myWeights <- get("myWeights", envir = funcEnv)
 18 |   factLevels <- get("factLevels", envir = funcEnv)
 19 |   theData <- get("theData", envir = funcEnv)
 20 |   myK <- get("myK", envir = funcEnv)
 21 |   handicap <- get("handicap", envir = funcEnv)
 22 |   plotCharts <- get("plotCharts", envir = funcEnv)
 23 |   theFormula <- theModel$formula
 24 |   #
 25 |   if(alg == "forward"){
 26 |   glm1 <- update(theModel, as.formula(paste("~.+", currVar, sep = "")))
 27 |   glm2 <- theModel
 28 |   }
 29 |   #
 30 |   if(alg == "backward"){
 31 |   glm1 <- theModel
 32 |   glm2 <- update(theModel, as.formula(paste("~.-", currVar, sep = "")))
 33 |   }
 34 |   
 35 |   
 36 |   #
 37 |   # This is the important part where it decided what happens to 
 38 |   #         the variable based on its information criterion
 39 |   #
 40 | 	factorChanged <- FALSE
 41 | 	assign("factorChanged", factorChanged, envir = funcEnv)
 42 | 	bSelType <- get("selType", envir = funcEnv)
 43 |   if(.getVarSignificance(aGlm1 = glm1, aGlm2 = glm2, selType = bSelType)){
 44 |     cat(paste("\nVariable ", currVar, " is significant \n\n", sep = ""))
 45 | 
 46 |     return(list(glm1, "Significant"))
 47 |     }else{
 48 |       if(ncol(factLevels[[currVar]]) > 1){
 49 |       
 50 | 	consolidTable <- factLevels[[currVar]]
 51 | 	
 52 |     #Extract Significance Test for factor level of variable concerned
 53 |     variableComparison <- summary(glht(glm1, linfct = eval(parse(
 54 |           text = paste("mcp(", currVar, ' = "Tukey")', sep = ""))) ))$test
 55 |     compNames <- names(variableComparison$coefficients)
 56 |     variableComparison <- data.frame("Comparison" = compNames, "pvalues" = as.vector(variableComparison$pvalue))
 57 |     #
 58 |     cat("Output of summary(glht(glm, variable = 'Tukey')) in the {multcomp} package \n")
 59 |     print(variableComparison)
 60 |     cat("\nThis is the mapping table to be used to aggregate the factor levels before retesting the glm \n")
 61 |     print(consolidTable)
 62 |     poorVars <- as.character(variableComparison[variableComparison$pvalues > 0.05,]$Comparison)
 63 |     poorVars <- strsplit(poorVars, " - ")
 64 |     #
 65 |     factorChanged <- FALSE
 66 |     tempData <- get("tempData", envir = funcEnv)
 67 |     for(i in seq(along = poorVars)){
 68 |     poorVals <- consolidTable[which(consolidTable[,1] %in% poorVars[[i]]), ][,2]
 69 |     #
 70 |     if(all(poorVals == poorVals[1])){
 71 |       levels(tempData[,currVar])[which(levels(tempData[,currVar]) %in% poorVars[[i]])] <- poorVals[1]
 72 |       cat(paste("\naltered levels ... ", paste(levels(tempData[,currVar]), collapse = ", "), "\n" ) )
 73 |       factorChanged <- TRUE
 74 |       }else{}
 75 |     }
 76 |     cat(paste("\nFactor has changed?  ... ", factorChanged, "\n\n", sep = ""))
 77 |     if(factorChanged){
 78 |       glm1 <- glm(glm1$formula, data = tempData, family = modelFamily, offset = log(Exposure), weights = myWeights)
 79 | 	
 80 |       if( .getVarSignificance(aGlm1 = glm1, aGlm2 = glm2, selType = bSelType) ){
 81 | 
 82 | 		cat("Alternative form of the variable ", currVar, " is significant\n")
 83 |         assign("tempData", tempData, envir = funcEnv)
 84 | 		assign("factorChanged", factorChanged, envir = funcEnv)
 85 |         return(list(glm1, "Significant"))
 86 |       }else{
 87 |         if(plotCharts){
 88 |         .plotRatingFactor(ratingFactor = as.character(currVar), theCoefficients = 
 89 |                                 .getCoeffs(myExposure = "Exposure", myGlm = glm1, aCurrVar = as.character(currVar)))
 90 |         }
 91 |         cat(paste("Variable ", currVar, " is not significant", "\n\n\n", sep = ""))
 92 | 		assign("factorChanged", factorChanged, envir = funcEnv)
 93 |         return(list(glm2, "Not Significant"))}
 94 |       
 95 |     }else{
 96 |       if(plotCharts){
 97 |       .plotRatingFactor(ratingFactor = as.character(currVar), theCoefficients = 
 98 |                                 .getCoeffs(myExposure = "Exposure", myGlm = glm1, aCurrVar = as.character(currVar)))
 99 |       }
100 |       cat(paste("Variable ", currVar, " is not significant \n\n\n", sep = ""))
101 | 	  assign("factorChanged", factorChanged, envir = funcEnv)
102 |         return(list(glm2, "Not Significant"))}
103 |     
104 |     }else{
105 |       if(plotCharts){
106 |       .plotRatingFactor(ratingFactor = as.character(currVar), theCoefficients = 
107 |                                 .getCoeffs(myExposure = "Exposure", myGlm = glm1, aCurrVar = as.character(currVar)))
108 |       }
109 |       cat(paste("No further aggregation can be carried out since mapping table is not specified so variable ", 
110 |                   currVar, " is not significant \n\n\n", sep = ""))
111 |  		assign("factorChanged", factorChanged, envir = funcEnv)
112 |         return(list(glm2, "Not Significant"))
113 |     }
114 |     }
115 |   
116 | }
117 | 
118 | 


--------------------------------------------------------------------------------
/R/consistCorrection.R:
--------------------------------------------------------------------------------
  1 | #' This function rebalances a raw variable that is significant but not consistent
  2 | #'
  3 | #' @param \code{glm1} the glm that contains the rating factor to be analysed output from the \code{.forwardBackSig} function
  4 | #' @param \code{currVar} the rating factor to be analysed 
  5 | #' @return The output is a list of revised glm and a character either \code{"Consistent"} or \code{"Not Consistent"}
  6 | #' @note This is an internal function
  7 | #'
  8 | #' @keywords glm consistency significance
  9 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com}
 10 | #
 11 | .consistCorrection <- function(glm1 = currentModel, currVar = currentVar){
 12 | 	
 13 | 	cat("Significant But Not Consistent Analysis (SBNC) Analysis:\n")
 14 | 	cat("########################################################\n\n")
 15 | 	cat("This part of the analysis is taking place because the variable ", currVar, " is significant but not consistent ...\n")
 16 | 	cat("... so the variable is being rebalanced to see if it will be consistent\n\n")
 17 | 	tempData <- get("theData", envir = funcEnv)
 18 | 	myWeights <- get("myWeights", envir = funcEnv)
 19 | 	factLevels <- get("factLevels", envir = funcEnv)
 20 | 	timeVar <- get("timeVar", envir = funcEnv)
 21 | 	exposureName <- get("exposureName", envir = funcEnv)
 22 | 	consistThresh <- get("consistThresh", envir = funcEnv)
 23 | 	
 24 | 	glm2 <- update(theModel, as.formula(paste("~.-", currVar, sep = "")))
 25 | 	
 26 | 	#myForm2 <- as.character(glm2$formula)[c(2,1,3)]
 27 | 	#ratingFactors <- strsplit(myForm2[3], " \\+ ")[[1]]
 28 | 	#myForm2 <- paste(paste(myForm2[1:2], collapse = " "), paste(ratingFactors[ratingFactors != currVar], collapse = " + "))
 29 | 	#glm2 <- glm(formula(myForm2), data = tempData, family = glm2$family, offset = log(Exposure), weights = myWeights)
 30 | 	#
 31 | 	#####################################
 32 | 	
 33 | 		consolidTable <- factLevels[[currVar]]
 34 | 		#Extract Significance Test for factor level of variable concerned
 35 | 		variableComparison <- summary(glht(glm1, linfct = eval(parse(
 36 | 										text = paste("mcp(", currVar, ' = "Tukey")', sep = ""))) ))$test
 37 | 		compNames <- names(variableComparison$coefficients)
 38 | 		variableComparison <- data.frame("Comparison" = compNames, "pvalues" = as.vector(variableComparison$pvalue))
 39 | 		#
 40 | 		cat("Output of summary(glht(glm, variable = 'Tukey')) in the {multcomp} package\n")
 41 | 		print(variableComparison)
 42 | 		cat("\nThis is the mapping table to be used to aggregate the factor levels before retesting the glm\n")
 43 | 		print(consolidTable)
 44 | 		poorVars <- as.character(variableComparison[variableComparison$pvalues > 0.05,]$Comparison)
 45 | 		poorVars <- strsplit(poorVars, " - ")
 46 | 		#
 47 | 		factorChanged <- FALSE
 48 | 		for(i in seq(along = poorVars)){
 49 | 			poorVals <- consolidTable[which(consolidTable[,1] %in% poorVars[[i]]), ][,2]
 50 | 			#
 51 | 			if(all(poorVals == poorVals[1])){
 52 | 				levels(tempData[,currVar])[which(levels(tempData[,currVar]) %in% poorVars[[i]])] <- poorVals[1]
 53 | 				cat("altered levels ... ", paste(levels(tempData[,currVar]), collapse = ", "), "\n" )
 54 | 				factorChanged <- TRUE
 55 | 			}else{}
 56 | 		}#end for
 57 | 		cat(paste("\n\nFactor has changed?  ... ", factorChanged, "\n", sep = ""))
 58 | 		if(factorChanged){
 59 | 			glm1 <- glm(glm1$formula, data = tempData, family = modelFamily, offset = log(Exposure), weights = myWeights)
 60 | 			#
 61 | 			#print(paste("Extended Model IC ", round(extractAIC(glm1, k = myK)[2]), " sub model ", 
 62 | 		    #	round(extractAIC(glm2, k = myK)[2]), sep = ""))
 63 | 			bSelType <- get("selType", envir = funcEnv)
 64 | 			#aicBetter <- (round(extractAIC(glm1, k = myK)[2]) + handicap < round(extractAIC(glm2, k = myK)[2]))
 65 | 			if( .getVarSignificance(aGlm1 = glm1, aGlm2 = glm2, selType = bSelType) ){
 66 | 				cat(paste("An altered form of variable ", currVar, " is significant with IC: ", "\n", sep = ""))
 67 | 				assign("tempData", tempData, envir = funcEnv)
 68 | 				assign("factorChanged", factorChanged, envir = funcEnv)
 69 | 				.plotRatingFactor(ratingFactor = as.character(currVar), theCoefficients = 
 70 | 								.getCoeffs(myExposure = "Exposure", myGlm = glm1))
 71 | 				currentModelTime <- formula(paste(paste(as.character(glm1$formula)[c(2,1,3)], collapse = ""), 
 72 | 								" + ", paste(timeVar, ":", currVar, sep = ""), sep = ""))
 73 | 				consistOut <- .plotTimeConsist(ratingFactor = as.character(currVar), timeDef = timeVar, 
 74 | 						theGlm = currentModelTime, constThresh = consistThresh, expVar = exposureName,
 75 | 						showPlots = TRUE)
 76 | 				if(consistOut > consistThresh){
 77 | 					cat(paste("Atered variable ", currVar, " is significant and consistent", "\n\n", sep = ""))
 78 | 					return(list(glm1, "Consistent"))}else{
 79 | 					cat(paste("Atered variable ", currVar, " is significant but not consistent", "\n\n", sep = ""))
 80 | 					return(list(glm2, "Not Consistent"))}
 81 | 			}else{
 82 | 				if(plotCharts){
 83 | 					.plotRatingFactor(ratingFactor = as.character(currVar), theCoefficients = 
 84 | 									.getCoeffs(myExposure = "Exposure", myGlm = glm1))
 85 | 				}
 86 | 				cat(paste("Atered initial significant variable ", currVar, " is not significant -- odd!!", "\n\n", sep = ""))
 87 | 				assign("factorChanged", factorChanged, envir = funcEnv)
 88 | 				return(list(glm2, "Not Consistent"))}
 89 | 			
 90 | 		}else{
 91 | 			if(plotCharts){
 92 | 				.plotRatingFactor(ratingFactor = as.character(currVar), theCoefficients = 
 93 | 								.getCoeffs(myExposure = "Exposure", myGlm = glm1))
 94 | 			}
 95 | 			cat(paste("Variable ", currVar, " is not changed therefore not consistent", "\n\n", sep = ""))#
 96 | 			assign("factorChanged", factorChanged, envir = funcEnv)
 97 | 			return(list(glm2, "Not Consistent"))
 98 | 		}
 99 | 	
100 | }
101 | 


--------------------------------------------------------------------------------
/man/stepIC.Rd:
--------------------------------------------------------------------------------
 1 | \name{stepIC}
 2 | \alias{stepIC}
 3 | \title{Function for forward and backward model selection algorithm using information criteria to obtain frequency and severity models for...}
 4 | \usage{stepIC(ratingFact, countVar, sevVar, factLevels=1, timeVar,
 5 |     selType="BIC", consistThresh=60, theData, analysisType="frequency",
 6 |     myDistr="poisson", theLink="log", theAlg="forward",
 7 |     exposureName="Exposure", handicap=0, plotCharts=TRUE,
 8 |     myDocumentTitle="Automated Pricing GLM", ...)}
 9 | \description{Function for forward and backward model selection algorithm using information criteria to obtain frequency and severity models for 
10 | actuarial pricing models the routine also dynamically adjusts the categories for variables that are not significant if an adequate 
11 | mapping table is provided. This function is intended for analysing categorical explanatory varibles.}
12 | \value{The output is the frequency or severity model from the chosen algorithm \code{theAlg}}
13 | \note{please pay attention to the structure of the data requirements, since it is different from how 
14 | actuarial data for GLM analysis is usually shaped and use the provided dataset "policyTable" as a guide
15 | for how the data ashould be formatted.}
16 | \keyword{glm}
17 | \keyword{actuarial}
18 | \keyword{pricing}
19 | \keyword{automation}
20 | \keyword{backward}
21 | \keyword{forward}
22 | \author{Chibisi Chima-Okereke \email{cchima-okereke@mango-solutions.com}}
23 | \arguments{\item{\code{ratingFact}}{a character vector denoting the column headers of rating factors in your table (\code{theData}).
24 | Please be aware that the package \code{autoPricing} does not currently support interaction terms in your model.}
25 | \item{\code{countVar}}{a character string denoting the name of the claims count column}
26 | \item{\code{sevVar}}{a character string denoting the column header of the severity variable}
27 | \item{\code{factLevels}}{this is a list of matrices (or data.frames) that denote the mapping of the rating 
28 | factors from their current categories to their logical aggregatable variables}
29 | \item{\code{timeVar}}{a character denoting the column header of the year variable. This variable must be specified because
30 | time consistency analysis is carried out by using interaction terms with the year variable.}
31 | \item{\code{IC}}{a character string denoting whether the information criterion used should be "AIC" 
32 | (Akaike Information Criterion) or "BIC" (Bayesian Information Criterion)}
33 | \item{\code{consistThresh}}{Set this to the threshold for median consistency, denoting that the model fit of interaction
34 | of the variables with Year is consistent to \code{consistThresh} or greater before it is accepted}
35 | \item{\code{theData}}{this is the data set that will be used for the analysis containing the rating factors, exposure, 
36 | severity, claim counts, and year}
37 | \item{\code{analysisType}}{flag indicating whether the analysis is for a "frequency" or "severity" model}
38 | \item{\code{myDistr}}{this is a character string denoting which distribution should be used in the analysis e.g. "poisson", "Gamma"}
39 | \item{\code{theLink}}{this is the link function to be used in the analysis}
40 | \item{\code{theAlg}}{this is the algorithm to be used either "forward" or "backward".}
41 | \item{\code{exposureName}}{this is a character string denoting the column name of the exposure (in Years)}
42 | \item{\code{handicap}}{extra penalty for the information criterion, it is added to the IC of the candiate 
43 | model to alter how dificult it is to accept variables}
44 | \item{\code{myDocumentTitle}}{This is a character string for the title of the document}
45 | \item{\code{plotCharts}}{this is a logical variable as to whether the charts should be plotted}}
46 | \examples{#Loading the data
47 | data(policyTable)
48 | 
49 | #Preparing rating factor names and mapping tables 
50 | myRatingFactors <- c("BonusMalus", "WeightClass", "Region", "Age", "Mileage", "Usage")
51 | ratingFactorLevels <- lapply(myRatingFactors, function(x){matrix(as.character(levels(policyTable[,x])))})
52 | names(ratingFactorLevels) <- myRatingFactors
53 | ratingFactorLevels$Mileage <- cbind(ratingFactorLevels$Mileage, c("0-12500", "0-12500", "> 12500"))
54 | ratingFactorLevels$BonusMalus <- cbind(ratingFactorLevels$BonusMalus, as.character(sort(rep(LETTERS[1:7], 2))))
55 | weightClass <- c("650-935", "650-935", "650-935", "650-935", "1030-1315", "1030-1315", "1030-1315", "1030-1315", "1410-1600", "1410-1600", "1410-1600")
56 | ratingFactorLevels$WeightClass <- cbind(ratingFactorLevels$WeightClass, weightClass)
57 | 
58 | #Example 1: Executing forward algorithm for poisson risk model
59 | outputModelForwardFreq <- stepIC(ratingFact = myRatingFactors, countVar = "NoClaims", 
60 | sevVar = "GrossIncurred", factLevels = ratingFactorLevels, timeVar = "Year", selType = "BIC", 
61 | consistThresh = 60, theData = policyTable, analysisType = "frequency",
62 | myDistr = "poisson", theLink = "log", exposureName = "Exposure",
63 | handicap = 0, myDocumentTitle  = "Automated Pricing GLM", theAlg = "forward", 
64 | plotCharts = TRUE)
65 | 
66 | 
67 | # Example 2: Writing process to PDF and log file for documentation purposes
68 | myFolder <- getwd()
69 | 
70 | pdf(file = file.path(myFolder, "GLMOutput.pdf"), height = 7, width = 11)
71 | par(mfrow = c(1,1), cex.main = 1, cex.axis = .9, cex.lab = 1, cex = 1)
72 | sink(file = file.path(myFolder, "GLMOutput.doc"))
73 | outputModelForwardFreq <- stepIC(ratingFact = myRatingFactors, countVar = "NoClaims", 
74 | sevVar = "GrossIncurred", factLevels = ratingFactorLevels, timeVar = "Year", selType = "BIC", 
75 | consistThresh = 60, theData = policyTable, analysisType = "frequency",
76 | myDistr = "poisson", theLink = "log", exposureName = "Exposure",
77 | handicap = 0, myDocumentTitle  = "Automated Pricing GLM", theAlg = "forward", 
78 | plotCharts = TRUE)
79 | sink()
80 | dev.off()}
81 | 


--------------------------------------------------------------------------------
/R/plotTimeConsist.R:
--------------------------------------------------------------------------------
  1 | #' Function for plotting consistency
  2 | #'
  3 | #' @param \code{ratingFactor} a character string denoting the rating factor to carry out time consistency analysis
  4 | #' @param \code{timeDef} a character string denoting the Year variable
  5 | #' @param \code{theGlm} a glm object to be analysed
  6 | #' @param \code{constThresh} a numeric denoting the threshold percentage of consistency
  7 | #' @param \code{expVar} set this to the exposure column name
  8 | #' @param \code{showPlots} logical for whether to plot the variables or not
  9 | #' @return The output is a numeric denoting the median consistency of the rating factor with year
 10 | #' @note ...
 11 | #' @seealso \code{\link{stepIC}} 
 12 | #'
 13 | #' @keywords consistency glm time
 14 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com}
 15 | #
 16 | .plotTimeConsist <- function(ratingFactor = "BonusMalus", timeDef = "Year", theGlm = myGlm, 
 17 |                                                           constThresh = 60, expVar = "Exposure", showPlots = TRUE){
 18 | #
 19 | exposureName <- get("exposureName", envir = funcEnv)
 20 | theData <- get("theData", envir = funcEnv)
 21 | names(theData)[names(theData) == exposureName] <- "Exposure"
 22 | myWeights <- get("myWeights", envir = funcEnv)
 23 | theGlm <- glm(theGlm, data = theData, family = get("modelFamily", envir = funcEnv), offset = log(Exposure), weights = myWeights)
 24 | myTerms <- gsub(" ", "", strsplit(as.character(theGlm$formula)[3], "\\+")[[1]])
 25 | myTerms <- unlist(lapply(myTerms, function(x){x <- strsplit(x, ":")[[1]]; if(length(x) > 1){return(NULL)}else{return(x)}}))
 26 | myTerms <- myTerms[!is.na(myTerms)]
 27 | myTerms <- myTerms[!(myTerms %in% c(timeDef, ratingFactor))]
 28 | newData <- expand.grid(timeDef = levels(theGlm$data[,get("timeDef")]), ratingFactor = levels(theGlm$data[,get("ratingFactor")]), expVar = 1)
 29 | names(newData) <- c(timeDef, ratingFactor, expVar)
 30 | #
 31 | if(length(myTerms) > 0){
 32 | myTerms <- lapply(myTerms, function(x){y <- data.frame(levels(theGlm$data[,x])[1]); names(y) <- x; return(y)})
 33 | myTerms <- do.call(cbind, myTerms)
 34 | #
 35 | newData <- data.frame(newData, myTerms)
 36 | }
 37 | myWeights <- rep(1, nrow(newData))
 38 | newData <- data.frame(newData, myWeights)
 39 | tempSum <- predict(theGlm, newdata = newData, type = "terms", se.fit = FALSE)
 40 | # Summing over the exposure year, rating factor, and year:rating factor interaction terms
 41 | tempSum <- tempSum[,c(timeDef, ratingFactor, paste(timeDef, ":", ratingFactor, sep = ""))]
 42 | #
 43 | newData <- newData[,c(timeDef, ratingFactor)]
 44 | newData$Coefficients <- apply(tempSum, 1, sum)
 45 | #
 46 | myFormula <- formula(paste("Coefficients", " ~ ", timeDef, " + ", ratingFactor, sep = ""))
 47 | testSum <- xtabs(myFormula, data = newData)
 48 | # Defining Colors
 49 | # EMB Yellow
 50 | embYellow <- c(250, 255, 102)/255
 51 | embYellow <- rgb(embYellow[1], embYellow[2], embYellow[3], alpha = 0.7)
 52 | #
 53 | myColors <- timPalette(length(levels(theGlm$data[,timeDef])))
 54 | myProps <- prop.table(xtabs(formula(paste(expVar, " ~ ", timeDef," + ", 
 55 |                                        ratingFactor, sep = "")), data = theGlm$data))*100
 56 | #
 57 | fullYRange <- c(0, max(2*apply(myProps, 2, sum))) 
 58 | yRange <- c(fullYRange[2]/2, fullYRange[2])
 59 | xRange <- range(testSum)
 60 | if(xRange[2] > 0){xRange[2] <- xRange[2]*1.2}else{xRange[2] <- xRange[2] - .2*xRange[2] }
 61 | #
 62 | convertScale <- function(conPoints = 1, parm2Perc = TRUE){
 63 |   
 64 |  myM <- diff(yRange)/diff(xRange)
 65 |  myC <- yRange[1] - myM*xRange[1]
 66 |  
 67 |  if(parm2Perc == TRUE){outPut <- myM*conPoints + myC}else{
 68 |    
 69 |    if(parm2Perc == FALSE){outPut <- (conPoints - myC)/myM}else{outPut <- NULL}
 70 |  }
 71 | return(outPut)
 72 | }
 73 | #
 74 | #
 75 | fullYRange <- c(fullYRange[1], 1.4*ceiling(fullYRange[2]))
 76 | plotCoeffs <- convertScale(testSum)
 77 | #
 78 | if(showPlots == TRUE){
 79 | par(mar=c(5,4,4,5) + .1)
 80 | xCoords <- barplot(myProps, col = myColors[1:nrow(myProps)], ylim = fullYRange, yaxt = "n", 
 81 |         main = paste("Consistency Plot for ", ratingFactor, " rating factor", sep = ""), legend = rownames(myProps))
 82 | 
 83 | for(i in 1:nrow(plotCoeffs)){
 84 | lines(xCoords, plotCoeffs[i,], lwd = 2, col = myColors[i])
 85 | points(xCoords, plotCoeffs[i,], cex = 1, lwd = 1.2, pch = 21, bg = myColors[i], col = "black")
 86 | #print(i)  
 87 | axis(4)
 88 | mtext("Exposure %", side = 4, line = 3)
 89 | axis(2, labels = c("", round(convertScale(axTicks(2), parm2Perc = FALSE), 1)[-1]), at = axTicks(2))
 90 | mtext(paste("Predicted Values for model including Year:", as.character(ratingFactor)," interaction", sep = ""), side = 2, line = 3)
 91 | }# end for
 92 | }#end show plot
 93 | # Calculating consistency
 94 | if(ncol(testSum) > 2){diffMatrix <- data.frame(t(apply(testSum, 1, diff)))}else{
 95 |   diffMatrix <- data.frame(apply(testSum, 1, diff))
 96 | }
 97 | names(diffMatrix) <- paste(colnames(testSum)[1:(length(colnames(testSum)) - 1)], colnames(testSum)[2:(length(colnames(testSum)))], sep = " - ")
 98 | 
 99 | myCondition <- function(x){x[which(x > 0)] <- 1
100 |                            x[which(x < 0)] <- -1
101 |                            x[which(x == 0)] <- 0
102 |                            return(x)}
103 | 
104 | consistencyMatrix <- apply(diffMatrix, 2, myCondition)
105 | 
106 | myProps <- function(x){
107 | percConsist <- max(data.frame(prop.table(table(x)))[,2])
108 | #
109 | return(percConsist)
110 | }
111 | #
112 | myConsist <- apply(consistencyMatrix, 2, myProps)*100
113 | totConsist <- round(median(myConsist))
114 | myConsist <- round(myConsist)
115 | #
116 | if(showPlots == TRUE){
117 | myXs <- barplot(myConsist, border = "NA", col = "white", ylim = c(0, 120),
118 |                 main = "Percentage consistency between adjacent levels", 
119 |                 ylab = "Percentage", xlab = "Comparison")
120 | points(myConsist ~ myXs, cex = 1, lwd = 1.2, pch = 21, bg = myColors[1], col = "black")
121 | lines(myConsist ~ myXs, cex = 1, lwd = 1.2, col = myColors[1])
122 | abline(h = constThresh, lty = 2, col = "grey")
123 | #
124 | if(totConsist >= constThresh){text(x = 1, y = constThresh*1.05, labels = "           Consistency Pass")}else{
125 | text(x = 1, y = constThresh*1.05, labels = "            Consistency Fail")}
126 | axis(1, labels = FALSE, at = myXs)
127 | box()
128 | }#end if plot
129 | myCoeffs <- data.frame(summary(theGlm)$coeff)
130 | names(myCoeffs) <- c("Estimate", "StdError", "ZValue","P(Z > |z|)")
131 | #
132 | return(totConsist)
133 | } # end of plotting function
134 | #
135 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     {description}
294 |     Copyright (C) {year}  {fullname}
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   {signature of Ty Coon}, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 
341 | 


--------------------------------------------------------------------------------
/R/stepIC.R:
--------------------------------------------------------------------------------
  1 | #'  Function for forward and backward model selection algorithm using information criteria to obtain frequency and severity models for 
  2 | #'  actuarial pricing models the routine also dynamically adjusts the categories for variables that are not significant if an adequate 
  3 | #'  mapping table is provided. This function is intended for analysing categorical explanatory varibles.
  4 | #'
  5 | #'
  6 | #' @param \code{ratingFact} a character vector denoting the column headers of rating factors in your table (\code{theData}).
  7 | #'                          Please be aware that the package \code{autoPricing} does not currently support interaction terms in your model.
  8 | #' @param \code{countVar} a character string denoting the name of the claims count column
  9 | #' @param \code{sevVar} a character string denoting the column header of the severity variable
 10 | #' @param \code{factLevels} this is a list of matrices (or data.frames) that denote the mapping of the rating 
 11 | #'                          factors from their current categories to their logical aggregatable variables 
 12 | #' @param \code{timeVar} a character denoting the column header of the year variable. This variable must be specified because
 13 | #'                          time consistency analysis is carried out by using interaction terms with the year variable.
 14 | #' @param \code{selType} a character string denoting whether the information criterion used to denote for model comparisons should be "AIC" 
 15 | #'                          (Akaike Information Criterion) or "BIC" (Bayesian Information Criterion). Other selection types are
 16 | #' 							available, "Chisq" for Chi-Squared, and "F" for F-Test but these are not recommended for multiple comparison reasons.
 17 | #' @param \code{consistThresh} Set this to the threshold for median consistency, denoting that the model fit of interaction
 18 | #'                            of the variables with Year is consistent to \code{consistThresh} or greater before it is accepted
 19 | #' @param \code{theData} this is the data set that will be used for the analysis containing the rating factors, exposure, 
 20 | #'                            severity, claim counts, and year
 21 | #' @param \code{analysisType} flag indicating whether the analysis is for a "frequency" or "severity" model
 22 | #' @param \code{myDistr} this is a character string denoting which distribution should be used in the analysis e.g. "poisson", "Gamma"
 23 | #' @param \code{theLink} this is the link function to be used in the analysis
 24 | #' @param \code{theAlg} this is the algorithm to be used either "forward" or "backward".
 25 | #' @param \code{exposureName} this is a character string denoting the column name of the exposure (in Years)
 26 | #' @param \code{handicap} extra penalty for the information criterion, it is added to the IC of the candiate 
 27 | #'                            model to alter how dificult it is to accept variables
 28 | #' @return The output is the frequency or severity model from the chosen algorithm \code{theAlg}
 29 | #' @param \code{myDocumentTitle} This is a character string for the title of the document
 30 | #' @param \code{plotCharts} this is a logical variable as to whether the charts should be plotted
 31 | #' @param \code{randomiseOrder} this is whether the order of the stepwise analysis should be randomised or not.
 32 | #' 							This feature may be useful to check the model which is selected if variables are
 33 | #' 							tested in a random randomised order
 34 | #' @param \code{justFit} Determines whether the automatic fitting process is used or whether the specified model is simply fitted
 35 | #' @note please pay attention to the structure of the data requirements, since it is different from how 
 36 | #'                           actuarial data for GLM analysis is usually shaped and use the provided dataset "policyTable" as a guide
 37 | #'                          for how the data ashould be formatted.
 38 | #'
 39 | #' @keywords glm actuarial pricing automation backward forward
 40 | #' @include autoPricing-package.R
 41 | #' @author Chibisi Chima-Okereke \email{cchima-okereke@@mango-solutions.com}
 42 | #' @examples
 43 | #'
 44 | #' #Loading the data
 45 | #' data(policyTable)
 46 | #'
 47 | #' #Preparing rating factor names and mapping tables 
 48 | #' myRatingFactors <- c("BonusMalus", "WeightClass", "Region", "Age", "Mileage", "Usage")
 49 | #' ratingFactorLevels <- lapply(myRatingFactors, function(x){matrix(as.character(levels(policyTable[,x])))})
 50 | #' names(ratingFactorLevels) <- myRatingFactors
 51 | #' ratingFactorLevels$Mileage <- cbind(ratingFactorLevels$Mileage, c("0-12500", "0-12500", "> 12500"))
 52 | #' ratingFactorLevels$BonusMalus <- cbind(ratingFactorLevels$BonusMalus, as.character(sort(rep(LETTERS[1:7], 2))))
 53 | #' weightClass <- c("650-935", "650-935", "650-935", "650-935", "1030-1315", "1030-1315", "1030-1315", "1030-1315", "1410-1600", "1410-1600", "1410-1600")
 54 | #' ratingFactorLevels$WeightClass <- cbind(ratingFactorLevels$WeightClass, weightClass)
 55 | #' 
 56 | #' #Example 1: Executing forward algorithm for poisson risk model
 57 | #' outputModelForwardFreq <- stepIC(ratingFact = myRatingFactors, countVar = "NoClaims", 
 58 | #'                   sevVar = "GrossIncurred", factLevels = ratingFactorLevels, timeVar = "Year", selType = "BIC", 
 59 | #'                   consistThresh = 60, theData = policyTable, analysisType = "frequency",
 60 | #'                   myDistr = "poisson", theLink = "log", exposureName = "Exposure",
 61 | #'                   handicap = 0, myDocumentTitle  = "Automated Pricing GLM", theAlg = "forward", 
 62 | #'                   plotCharts = TRUE)
 63 | #'
 64 | #'
 65 | #' # Example 2: Writing process to PDF and log file for documentation purposes
 66 | #' myFolder <- getwd()
 67 | #'
 68 | #' pdf(file = file.path(myFolder, "GLMOutput.pdf"), height = 7, width = 11)
 69 | #' par(mfrow = c(1,1), cex.main = 1, cex.axis = .9, cex.lab = 1, cex = 1)
 70 | #' sink(file = file.path(myFolder, "GLMOutput.doc"))
 71 | #' outputModelForwardFreq <- stepIC(ratingFact = myRatingFactors, countVar = "NoClaims", 
 72 | #'                   sevVar = "GrossIncurred", factLevels = ratingFactorLevels, timeVar = "Year", selType = "BIC", 
 73 | #'                   consistThresh = 60, theData = policyTable, analysisType = "frequency",
 74 | #'                   myDistr = "poisson", theLink = "log", exposureName = "Exposure",
 75 | #'                   handicap = 0, myDocumentTitle  = "Automated Pricing GLM", theAlg = "forward", 
 76 | #'                   plotCharts = TRUE)
 77 | #' sink()
 78 | #' dev.off()
 79 | #
 80 | stepIC <- function(ratingFact, countVar, 
 81 |                      sevVar, factLevels = 1, timeVar, selType = "BIC",
 82 |                      consistThresh = 60, theData, analysisType = "frequency",
 83 |                      myDistr = "poisson", theLink = "log", theAlg = "forward", 
 84 |                      exposureName = "Exposure", handicap = 0, 
 85 |                      plotCharts = TRUE, myDocumentTitle  = "Automated Pricing GLM",
 86 | 					 randomiseOrder = FALSE, justFit = FALSE, ...){
 87 | 
 88 | 	# Specifying the distribution and link functions
 89 | 	assign("modelFamily", .modelFamilySpec(distr = myDistr, myLink = theLink, theta = NULL), envir = .GlobalEnv)
 90 | 				 
 91 | 	# Aggregating the data ...
 92 |   #
 93 |   factorList <- lapply(c(timeVar, ratingFact), function(x){y <- theData[,x];return(y)})
 94 |   names(factorList) <- c(timeVar, ratingFact)
 95 |   theData <- aggregate(theData[,c(countVar, sevVar, exposureName)], by = factorList, FUN = sum)
 96 |   names(theData)[names(theData) == exposureName] <- c("Exposure")
 97 |   #
 98 |   # reformating the data for severity models
 99 |   if(analysisType == "severity"){
100 |     theData <- theData[theData[,sevVar] > 0, ]
101 |     averageClaim <- theData[,sevVar]/theData[,countVar]
102 |     theData$averageClaim <- averageClaim
103 |     depVar <- "averageClaim"
104 |     myWeights <- theData[,countVar]
105 |   }
106 |   if(analysisType == "frequency"){
107 |     depVar <- countVar
108 |     myWeights <- rep(1, nrow(theData))
109 |   }
110 |   #
111 |   # assigning data into protected Environment
112 |   assign("funcEnv", new.env(), envir = .GlobalEnv)
113 |   assign("theData", theData, envir = funcEnv)
114 |   assign("tempData", theData, envir = funcEnv)
115 |   assign("timeVar", timeVar, envir = funcEnv)
116 |   assign("handicap", handicap, envir = funcEnv)
117 |   assign("myWeights", myWeights, envir = funcEnv)
118 |   assign("selType", selType, envir = funcEnv)
119 |   #assign("exposure", theData[,exposureName], envir = funcEnv)
120 |   assign("exposureName", exposureName, envir = funcEnv)
121 |   assign("plotCharts", plotCharts, envir = funcEnv)
122 |   assign("consistThresh", consistThresh, envir = funcEnv)
123 |   #
124 | 
125 | # NOTE WELL, this is a primary jumping off point, it divides the path of the analysis into auto-fit and simple model fit
126 | 
127 | 	## This is the if statement that determines whether the model will go throught the auto-fit selection
128 | 	#				or whether it will simply just fit the model engaged with the parameter "justFit". 
129 | 	if(justFit == TRUE){
130 | 		cat("The justFit parameter has been set to ", justFit, "therefore the auto-fitting process will not be carried out and the specified model will be fully fitted.\n")
131 | 		finalModel <- glm(formula(paste(depVar, " ~ ", paste(c(timeVar, ratingFact), collapse = " + "), sep = "")), data = theData, 
132 | 				family = modelFamily, offset = log(Exposure), weights = get("myWeights", envir = funcEnv))
133 | 	}else{ 
134 | 		
135 | cat("\n", selType, " will be used as model selection criterion in this investigation.\n")
136 | if(!(selType == "AIC" | selType == "BIC")){
137 | 	cat("Warning: Information criteria based techniques are not being used for this stepwise model selection process. This is not recommended.\n\n")
138 | }
139 | 
140 |   # Document Title:
141 |   #
142 |   myDocumentTitle <- paste(myDocumentTitle, " for ", analysisType, " \n(Model Distr: ", myDistr, 
143 |                            ", Link: ", theLink, ")", "\nMethod = ", theAlg, sep = "")
144 |   
145 |   cat("************************************************************************************************************\n")
146 |   cat(paste("This analysis is a ",  analysisType, " analysis using a ", myDistr, " distribution and a ", theLink, " link", 
147 |               ", for the ", theAlg, " algorithm \n", sep = ""))
148 |   cat("************************************************************************************************************\n\n")
149 |   #print("")
150 |   #print("")
151 |   # If you do not have the correct number of factor Levels
152 |   if(length(ratingFact) != length(ratingFactorLevels)){
153 |     factLevels <- as.list(rep(1, length(ratingFact)))
154 |     cat("Warning: factors levels spec have been reset since they have the wrong number of levels \n")
155 |     }
156 |   assign("factLevels", factLevels, envir = funcEnv)
157 |   origData <- theData 
158 |   
159 |   if(theAlg == "forward"){
160 |   prevForms <- paste(depVar, " ~ ", paste(c(timeVar, ratingFact), collapse = " + ", sep = ""), sep = "")
161 |   prevModel <- glm(formula(paste(depVar, " ~ ", timeVar, sep = "")), data = theData, 
162 |                                         family = modelFamily, offset = log(Exposure), weights = myWeights)
163 |   }
164 |   if(theAlg == "backward"){
165 |   prevForms <- paste(depVar, " ~ ", paste(c(timeVar, ratingFact), collapse = " + ", sep = ""), sep = "")
166 |   prevModel <- glm(formula(prevForms), data = theData, 
167 |                                         family = modelFamily, offset = log(Exposure), weights = myWeights)
168 |   }
169 |   
170 |   #if(selType == "AIC"){myK <- 2}
171 |   if(selType == "BIC"){myK <- log(nrow(theData))}else{myK <- 2}
172 |   
173 |   assign("myK", myK, envir = funcEnv)
174 |   # One way to order variables
175 |   cat("Beginning one-way analyses to determine order that variables will be analysed \n")
176 |   cat("********************************************************************************\n\n")
177 |   
178 |   aBaseModel <- glm(formula(paste(depVar, " ~ ", timeVar, sep = "")), data = theData, 
179 | 		  family = modelFamily, offset = log(Exposure), weights = get("myWeights", envir = funcEnv))
180 |   
181 |   theIC <- lapply(ratingFact, function(x){
182 | 			  aModel <- update(aBaseModel, as.formula(paste("~.+", x, sep = "")))
183 | 			  output <- data.frame("RatingFactor" = x, "IC" = extractAIC(aModel, k = myK)[2])
184 | 			  cat(paste(paste(as.character(aModel$formula)[c(2,1,3)], collapse = " "), "\n", sep = ""))
185 | 			  return(output)})
186 |   ICTable <- do.call(rbind, theIC)
187 |   
188 | 	if(!randomiseOrder){
189 |   if(theAlg == "forward"){ICTable <- ICTable[order(ICTable$IC, decreasing = FALSE),]}
190 |   if(theAlg == "backward"){ICTable <- ICTable[order(ICTable$IC, decreasing = TRUE),]}}else{
191 |   icOrder <- sample(rownames(ICTable), size = length(rownames(ICTable)))
192 |   ICTable <- ICTable[icOrder,]
193 | 	}
194 | 
195 |   ratingFact <- as.character(ICTable$RatingFactor)
196 |   rownames(ICTable) <- 1:nrow(ICTable)
197 |   cat("\nAnalysis order ...\n")
198 |   print(ICTable)
199 |   #
200 |   # Plotting Model Details
201 |   if(plotCharts){
202 |   grid.arrange(
203 |   tableGrob(data.frame(myDocumentTitle), show.rownames = FALSE, show.colnames = FALSE, gpar.corefill = gpar(fill="white", 
204 |                                           col = NA, row = NA), gpar.coretext=gpar(col = "black", cex = 3)),
205 |   tableGrob(ICTable,
206 |   show.csep=TRUE, show.rsep=TRUE, show.box=TRUE, separator="grey", name="test", gp=gpar(fontsize=12, lwd=2), 
207 |             equal.width=FALSE, grep=TRUE, global=TRUE),
208 |   nrow = 2)
209 |   }
210 |   #
211 |   myReturn <- TRUE
212 | 
213 |   loopCount <- 1
214 |   
215 |   # while loop
216 |   while(length(ratingFact) > 0){
217 | 
218 |   currentVar <- ratingFact[1]
219 |   
220 |   cat(paste("\nAnalysing the rating factor ... ", currentVar, " \n\n", sep = ""))
221 | 
222 |   # Find out if the variable is significant:
223 |   
224 |   currentModel <- .forwardBackSig(theModel = prevModel, currVar = currentVar, alg = theAlg)
225 |   theData <- get("tempData", envir = funcEnv)
226 |   assign("theData", get("tempData", envir = funcEnv), envir = funcEnv)
227 |   if(currentModel[[2]] == "Significant"){
228 |     currentModel <- currentModel[[1]]
229 |     ICPass <- TRUE
230 |     ratingFact <- ratingFact[currentVar != ratingFact]
231 |     #assign("ratingFact", ratingFact, envir = funcEnv)
232 |   }else{
233 |     currentModel <- currentModel[[1]]
234 |     ICPass <- FALSE
235 |     ratingFact <- ratingFact[currentVar != ratingFact]
236 |     #assign("ratingFact", ratingFact, envir = funcEnv)
237 |   }
238 |   
239 |   if(ICPass){
240 |   currentModelTime <- formula(paste(paste(as.character(currentModel$formula)[c(2,1,3)], collapse = ""), 
241 |                                     " + ", paste(timeVar, ":", currentVar, sep = ""), sep = ""))
242 | ############################################################################################
243 |   
244 |   # Plotting Factor
245 |   myCoeffs <- data.frame(summary(currentModel)$coeff)
246 |   names(myCoeffs) <- c("Estimate", "StdError", "ZValue","P(Z > |z|)")
247 |   if(plotCharts){
248 |   .plotRatingFactor(ratingFactor = as.character(currentVar), theCoefficients = 
249 |                                 .getCoeffs(myExposure = exposureName, myGlm = currentModel, 
250 | 										aCurrVar = as.character(currentVar)))
251 |   }
252 |   consistOut <- .plotTimeConsist(ratingFactor = as.character(currentVar), timeDef = timeVar, 
253 |                                 theGlm = currentModelTime, constThresh = consistThresh, expVar = exposureName,
254 |                                 showPlots = plotCharts)
255 |   consistency <- (consistOut >= consistThresh)
256 |   factorChanged <- get("factorChanged", envir = funcEnv)
257 |   
258 |   # This is run if rating factor is significant, not consistent, 
259 | 	# and not changed and there is a mapping table. We already know that it is significant
260 |   if(!consistency & !factorChanged & ncol(factLevels[[currentVar]]) > 1){
261 | 	  
262 | 	  aConsistencyTest <- .consistCorrection(glm1 = currentModel, currVar = currentVar)
263 | 	  theData <- get("tempData", envir = funcEnv)
264 | 	  cat("\n########################################################\n")
265 | 	  cat("End of SBNC Analysis\n")
266 | 	  currentModel <- aConsistencyTest[[1]]
267 | 	  if(aConsistencyTest[[2]] == "Consistent"){consistency <- TRUE}else{consistency <- FALSE}
268 | 	  myCoeffs <- data.frame(summary(currentModel)$coeff)
269 | 	  names(myCoeffs) <- c("Estimate", "StdError", "ZValue","P(Z > |z|)")
270 |   }
271 |   
272 |   
273 |   if(ICPass & consistency){
274 |     cat("Model is consistent and has improved IC\n")
275 |     prevModel <- currentModel
276 |   }else{
277 |     cat(paste("Information Criteria pass: ", ICPass, " & consistency: ", consistency, "\n", sep = ""))
278 |   }#End Consistency Pass
279 |   
280 |   }else{
281 |     cat("Current variable has failed so final model is sub model\n")
282 |     prevModel <- currentModel
283 |   }
284 | 
285 |   #print("")
286 |   cat(paste("\nThe loop count is ... ", loopCount, "\n\n", sep = ""))
287 |   cat(paste("Finishing variable ... ", currentVar, "\n", sep = ""))
288 |   #print("")
289 |   loopCount <- loopCount + 1
290 |   }# end while
291 | #
292 |   # Final check to see if all the variable are significant
293 |   cat(paste("Carrying out the final checks, the IC for the candidate model is ", 
294 |               round(extractAIC(prevModel, k = myK)[2]),", the full candidate formula is ...\n", sep = ""))
295 |   finalRatingFactors <- gsub(" ", "", strsplit(paste(prevModel$formula)[3], "\\+")[[1]][-1])
296 |   cat(paste(as.character(prevModel$formula)[c(2,1,3)], collapse = " "), "\n")
297 |   #
298 |   cat("The sub models are\n\n")
299 |   
300 |   cSelType <- selType
301 |   for(i in finalRatingFactors){
302 | 	  cat("Testing rating factor ", i, " .....\n")
303 | 	  cat("#########################################\n")
304 | 	  subModel <- update(prevModel, as.formula(paste("~.-", i, sep = "")))
305 | 	  if(!.getVarSignificance(aGlm1 = prevModel, aGlm2 = subModel, selType = cSelType)){
306 | 		  prevModel <- subModel
307 | 		  cat("Variable ", i, " is not significant in the final check so it has been eliminated\n")
308 | 		  cat("####################################################################################\n\n")
309 | 	  }else{
310 | 		  cat("Variable ", i, " is significant in the final check so it will be included in the model\n")
311 | 		  cat("##########################################################################################\n\n")
312 | 	  }
313 | 	  
314 |   }
315 |   
316 |   finalModel <- prevModel
317 |   
318 |   #############################################################################
319 | #
320 | cat("\nThe final model summary follows ...\n")
321 | cat("************************************************************************************************\n")
322 | print(summary(finalModel)$coeff)
323 | #
324 | #print("")
325 | #print("")
326 | # Do a complete dump of diagnostics here, AIC, BIC, Chisq, and F if Type = severity
327 | cat("\n\n*********************************************************************************************************\n")
328 | cat("The final valid model is ... ", as.character(finalModel$formula)[c(2,1,3)], "\n")
329 | cat("*********************************************************************************************************\n")
330 | 
331 | cat("AIC for the final model is ", round(AIC(finalModel)), "\n")
332 | cat("BIC for the final model is ", round(BIC(finalModel)), "\n")
333 | cat("Chi-Squared ANOVA for the final model is ... \n")
334 | print(anova(finalModel, test = "Chisq"))
335 | 
336 | # This is a nicer coefficient table than the default
337 | raingFactorList <- gsub(" ", "", strsplit(as.character(finalModel$formula)[3], split = "\\+")[[1]])
338 | coeffsOut <- lapply(raingFactorList, function(x){data.frame("Factor" = x, .getCoeffs(myExposure = "Exposure", myGlm = finalModel, aCurrVar = x))})
339 | coeffsOut <- do.call(rbind, coeffsOut)
340 | rownames(coeffsOut) <- 1:nrow(coeffsOut)
341 | assign("coeffsOut", coeffsOut, envir = funcEnv)
342 | cat("\nOutputting print friendly model output table\n\n")
343 | print(coeffsOut)
344 | 
345 | cat("\n***********************************************************************************************\n")
346 | cat("End of Model Summary\n")
347 | cat("***********************************************************************************************\n")
348 | #print("")
349 | #
350 | finalModelTable <- data.frame("Formula" = paste(as.character(finalModel$formula)[c(2,1,3)], collapse = " "), 
351 | 		"AIC" = AIC(finalModel), "BIC" = BIC(finalModel))
352 | #
353 | finalTitle <- paste("Final Model for ",  analysisType, " analysis \nusing a ", myDistr, " distribution\n and a ", 
354 |                     theLink, " link for the ", theAlg, " algorithm", sep = "")
355 | if(plotCharts){
356 | grid.arrange(
357 | tableGrob(data.frame(finalTitle), show.rownames = FALSE, show.colnames = FALSE, gpar.corefill = gpar(fill="white", 
358 |                                           col = NA, row = NA), gpar.coretext=gpar(col = "black", cex = 3)),
359 | tableGrob(finalModelTable, show.rownames = FALSE, show.csep=TRUE, show.rsep=TRUE, 
360 | show.box=TRUE, separator="grey", name="test", gp=gpar(fontsize=12, lwd=2), equal.width=FALSE, grep=TRUE, global=TRUE),
361 | nrow = 2
362 | )
363 | }
364 | } # end of justFit it else
365 | # Final Coefficient Table
366 | return(finalModel)
367 | rm(list = ls());gc()
368 | }
369 | 


--------------------------------------------------------------------------------