├── .gitignore
├── manuscript
├── figures
│ ├── fi-plot.png
│ ├── pdp-plot.png
│ ├── tree-plot1.png
│ ├── tree-plot2.png
│ └── breakdown-plot.png
├── elsevier-without-titles.csl
├── elsevier-with-titles-alphabetical.csl
├── elsevier-with-titles.csl
├── elsevier-vancouver.csl
├── numcompress.sty
├── elsevier-harvard.csl
├── elsevier-harvard-without-titles.csl
├── explain-pricing.bib
├── elsarticle.cls
└── manuscript.Rmd
├── DESCRIPTION
├── explain-ml-pricing.Rproj
├── .travis.yml
├── R
├── decision-tree.R
└── explain.R
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | **/.DS_Store
6 | model_artifacts
7 |
--------------------------------------------------------------------------------
/manuscript/figures/fi-plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kasaai/explain-ml-pricing/HEAD/manuscript/figures/fi-plot.png
--------------------------------------------------------------------------------
/manuscript/figures/pdp-plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kasaai/explain-ml-pricing/HEAD/manuscript/figures/pdp-plot.png
--------------------------------------------------------------------------------
/manuscript/figures/tree-plot1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kasaai/explain-ml-pricing/HEAD/manuscript/figures/tree-plot1.png
--------------------------------------------------------------------------------
/manuscript/figures/tree-plot2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kasaai/explain-ml-pricing/HEAD/manuscript/figures/tree-plot2.png
--------------------------------------------------------------------------------
/manuscript/figures/breakdown-plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kasaai/explain-ml-pricing/HEAD/manuscript/figures/breakdown-plot.png
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: placeholder
2 | Type: Website
3 | Title: Does not matter.
4 | Version: 0.0.1
5 | Imports:
6 | tinytex,
7 | withr,
8 | rmarkdown,
9 | rticles,
10 | bookdown,
11 | knitr,
12 | kableExtra,
13 | magrittr
--------------------------------------------------------------------------------
/explain-ml-pricing.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
2 |
3 | language: R
4 | latex: false
5 | cache:
6 | packages: yes
7 | directories:
8 | - $HOME/bin
9 | - $HOME/.TinyTeX
10 |
11 | script:
12 | - Rscript -e 'if (tinytex::tinytex_root() == "") tinytex::install_tinytex()'
13 | - Rscript -e 'tinytex::tlmgr_install("cm-super")'
14 | - Rscript -e 'dir.create("manuscript/dist")'
15 | - Rscript -e 'rmarkdown::render("manuscript/manuscript.Rmd", output_dir = "manuscript/dist/")'
16 |
17 | deploy:
18 | provider: pages
19 | skip_cleanup: true
20 | github_token: $GITHUB_TOKEN
21 | on:
22 | branch: master
23 | local_dir: manuscript/dist
24 |
--------------------------------------------------------------------------------
/R/decision-tree.R:
--------------------------------------------------------------------------------
1 | library(rpart)
2 | library(rpart.plot)
3 |
4 | testing_data <- pins::pin_get("toy-model-testing-data", board = "cork")
5 |
6 | set.seed(2020)
7 | testing_data <- testing_data %>%
8 | mutate_at(c("age_range", "sex", "vehicle_category"),
9 | ~ .x %>%
10 | sub("Entre 18 e 25 anos", "18-25", .) %>%
11 | sub("Entre 26 e 35 anos", "26-35", .) %>%
12 | sub("Entre 36 e 45 anos", "36-45", .) %>%
13 | sub("Entre 46 e 55 anos", "46-55", .) %>%
14 | sub("Maior que 55 anos", "56+", .) %>%
15 | sub("Não informada", "No information", .) %>%
16 | sub("Feminino", "Female", .) %>%
17 | sub("Masculino", "Male", .) %>%
18 | sub("Passeio importado", "Passenger imported", .) %>%
19 | sub("Passeio nacional", "Passenger domestic", .) %>%
20 | sub("Pick-up (nacional e importado)", "Pick-up trucks", .)
21 | ) %>%
22 | # sample_n(10000) %>%
23 | mutate(make = factor(make))
24 | tree_model1 <- rpart(loss_per_exposure ~ age_range + vehicle_age + sex, data = testing_data,
25 | weights = exposure, method = "anova",
26 | control = rpart.control(minsplit = 2, minbucket = 1, cp = 0.0001,
27 | maxdepth = 2))
28 |
29 | png("manuscript/figures/tree-plot1.png", width = 400, height = 300)
30 | rpart.plot(tree_model1, type = 1, faclen = 10, tweak = 1.05, branch = 0)
31 | dev.off()
32 |
33 | tree_model2 <- rpart(loss_per_exposure ~ age_range + vehicle_age + sex + vehicle_category,
34 | data = testing_data,
35 | weights = exposure, method = "anova",
36 | control = rpart.control(minsplit = 2, minbucket = 1, cp = 0.0001,
37 | maxdepth = 10))
38 |
39 | png("manuscript/figures/tree-plot2.png", width = 700, height = 400)
40 | rpart.plot(tree_model2, type = 1, faclen = 10, tweak = 1.05, branch = 0)
41 | dev.off()
42 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Towards Explainability of Machine Learning Models in Insurance Pricing
2 |
3 |
4 | [](https://travis-ci.org/kasaai/explain-ml-pricing)
5 |
6 |
7 | Live draft preview: [kasaai.github.io/explain-ml-pricing/manuscript.pdf](https://kasaai.github.io/explain-ml-pricing/manuscript.pdf)
8 |
9 | Slack: [slack.kasa.ai](https://slack.kasa.ai/) `#explain-ml-pricing`
10 |
11 | Risk classification for insurance rating has traditionally been done with one-way, or univariate, analysis techniques. In recent years, many insurers have moved towards using generalized linear models (GLM), a multivariate predictive modeling technique, which addresses many shortcomings of univariate approaches, and is currently considered the state of the art in insurance risk classification. At the same time, machine learning (ML) techniques such as random forests and deep neural networks have gained popularity in many industries due to their superior predictive performance over linear models. However, these ML techniques, often considered to be completely “black box”, have been less successful in gaining adoption in insurance pricing, which is a regulated discipline and requires a certain amount of transparency in models. While recent efforts in ML research have attempted to provide mechanisms for explaining or interpreting these complex models, to the best of our knowledge none has focused on the insurance pricing use case, which we plan to address in this project. We envision that this work will be a step in enabling insurers to use and file more accurate predictive models, which lead to fairer prices. In addition, it is our intent that this work will assist practitioners in complying with relevant Actuarial Standards of Practice related to ratemaking, modeling, and clear communication of relevant assumptions. This will have additional benefits to regulators and other stakeholders tasked with reviewing actuarial work products.
12 |
13 | The main goal of this project is to describe and demonstrate model explanation techniques for machine learning models in the context of insurance pricing. To that end, this project will involve
14 |
15 | - Building predictive models for pricing (e.g. a GLM and a random forest.)
16 | - Surveying the ML model explanation literature and identifying relevant techniques (e.g. partial dependence plots, locally interpretable model-agnostic explanations.)
17 | - Applying the techniques to the models built.
18 | - Discussing how to communicate the artifacts of these techniques.
19 | - Discussing potential recommendations to regulators who will review ML models.
20 |
21 | -----
22 |
23 | Repository for the paper "Towards Explainability of Machine Learning Models in Insurance Pricing." This work is supported by the Casualty Actuarial Society.
24 |
25 | -----
26 |
27 | Please note that this project is released with a [Contributor Code of
28 | Conduct](https://github.com/kasaai/quests/blob/master/CODE_OF_CONDUCT.md). By participating in this project
29 | you agree to abide by its terms.
30 |
--------------------------------------------------------------------------------
/manuscript/elsevier-without-titles.csl:
--------------------------------------------------------------------------------
1 |
2 |
126 |
--------------------------------------------------------------------------------
/R/explain.R:
--------------------------------------------------------------------------------
1 | library(tidyverse)
2 | library(DALEX)
3 | library(ggplot2)
4 | library(scales)
5 | library(patchwork)
6 |
7 | # Need to use the following branch of {ingredients}
8 | # remotes::install_github("kevinykuo/ingredients", ref = "weights")
9 |
10 | pins::board_register_github(name = "cork", repo = "kasaai/cork")
11 | testing_data <- pins::pin_get("toy-model-testing-data", board = "cork")
12 | piggyback::pb_download(file = "model_artifacts/toy-model.tar.gz", repo = "kasaai/cork")
13 | untar("model_artifacts/toy-model.tar.gz", exdir = "model_artifacts")
14 | toy_model <- keras::load_model_tf("model_artifacts/toy-model")
15 |
16 | predictors <- c(
17 | "sex", "age_range", "vehicle_age", "make",
18 | "vehicle_category", "region"
19 | )
20 |
21 | custom_predict <- function(model, newdata) {
22 | predict(model, newdata, batch_size = 10000)
23 | }
24 |
25 | explainer_nn <- DALEX::explain(
26 | model = toy_model,
27 | data = testing_data,
28 | y = testing_data$loss_per_exposure,
29 | weights = testing_data$exposure,
30 | predict_function = custom_predict,
31 | label = "neural_net"
32 | )
33 |
34 | pdp_vehicle_age <- ingredients::partial_dependency(
35 | explainer_nn,
36 | "vehicle_age",
37 | N = 10000,
38 | variable_splits = list(vehicle_age = seq(0, 35, by = 0.1))
39 | )
40 |
41 | pdp_plot <- as.data.frame(pdp_vehicle_age) %>%
42 | ggplot(aes(x = `_x_`, y = `_yhat_`)) +
43 | geom_line() +
44 | ylab("Average Predicted Loss Cost") +
45 | theme_bw()+
46 | theme(axis.ticks.x = element_blank(),
47 | axis.text.x=element_blank(),
48 | axis.title.x=element_blank())
49 |
50 | vehicle_age_histogram <- testing_data %>%
51 | ggplot(aes(x = vehicle_age)) +
52 | geom_histogram(alpha = 0.8) +
53 | theme_bw() +
54 | ylab("Count") +
55 | xlab("Vehicle Age")
56 |
57 | pdp_plot <- pdp_plot / vehicle_age_histogram +
58 | plot_layout(heights = c(2, 1))
59 |
60 | ggsave("manuscript/figures/pdp-plot.png", plot = pdp_plot)
61 |
62 | fi <- ingredients::feature_importance(
63 | explainer_nn,
64 | loss_function = function(observed, predicted, weights) {
65 | sqrt(
66 | sum(((observed - predicted) ^ 2 * weights) /sum(weights))
67 | )
68 | },
69 | # weights = testing_data$exposure,
70 | variables = predictors,
71 | B = 10,
72 | n_sample = 50000
73 | )
74 |
75 | fi_plot <- fi %>%
76 | as.data.frame() %>%
77 | (function(df) {
78 | df <- df %>%
79 | group_by(variable) %>%
80 | summarize(dropout_loss = mean(dropout_loss))
81 | full_model_loss <- df %>%
82 | filter(variable == "_full_model_") %>%
83 | pull(dropout_loss)
84 | df %>%
85 | filter(!variable %in% c("_full_model_", "_baseline_")) %>%
86 | ggplot(aes(x = reorder(variable, dropout_loss), y = dropout_loss)) +
87 | geom_bar(stat = "identity", alpha = 0.8) +
88 | geom_hline(yintercept = full_model_loss, col = "red", linetype = "dashed")+
89 | scale_y_continuous(limits = c(full_model_loss, NA),
90 | oob = rescale_none
91 | ) +
92 | xlab("Variable") +
93 | ylab("Dropout Loss (RMSE)") +
94 | coord_flip() +
95 | theme_bw() +
96 | NULL
97 | })
98 |
99 | ggsave("manuscript/figures/fi-plot.png", plot = fi_plot)
100 |
101 | sample_row <- testing_data[1,] %>%
102 | select(!!predictors)
103 | breakdown <- iBreakDown::break_down(explainer_nn, sample_row)
104 |
105 | df <- breakdown %>%
106 | as.data.frame() %>%
107 | mutate(start = lag(cumulative, default = first(contribution)),
108 | label = formatC(contribution, digits = 2, format = "f")) %>%
109 | mutate_at("label",
110 | ~ ifelse(!variable %in% c("intercept", "prediction") & .x > 0,
111 | paste0("+", .x),
112 | .x)) %>%
113 | mutate_at(c("variable", "variable_value"),
114 | ~ .x %>%
115 | sub("Entre 18 e 25 anos", "18-25", .) %>%
116 | sub("Passeio nacional", "Domestic passener", .) %>%
117 | sub("Masculino", "Male", .))
118 |
119 | breakdown_plot <- df %>%
120 | ggplot(aes(reorder(variable, position), fill = sign,
121 | xmin = position - 0.40,
122 | xmax = position + 0.40,
123 | ymin = start,
124 | ymax = cumulative)) +
125 | geom_rect(alpha = 0.4) +
126 | geom_errorbarh(data = df %>% filter(variable_value != ""),
127 | mapping = aes(xmax = position - 1.40,
128 | xmin = position + 0.40,
129 | y = cumulative), height = 0,
130 | linetype = "dotted",
131 | color = "blue") +
132 | geom_rect(
133 | data = df %>% filter(variable %in% c("intercept", "prediction")),
134 | mapping = aes(xmin = position - 0.4,
135 | xmax = position + 0.4,
136 | ymin = start,
137 | ymax = cumulative),
138 | color = "black") +
139 | scale_fill_manual(values = c("blue", "orange", NA)) +
140 | coord_flip() +
141 | theme_bw() +
142 | theme(legend.position = "none") +
143 | geom_text(
144 | aes(label = label,
145 | y = pmax(df$cumulative, df$cumulative - df$contribution)),
146 | nudge_y = 10,
147 | hjust = "inward",
148 | color = "black"
149 | ) +
150 | xlab("Variable") +
151 | ylab("Contribution") +
152 | theme(axis.text.y = element_text(size = 10))
153 |
154 | ggsave("manuscript/figures/breakdown-plot.png", plot = breakdown_plot)
155 |
--------------------------------------------------------------------------------
/manuscript/elsevier-with-titles-alphabetical.csl:
--------------------------------------------------------------------------------
1 |
2 |
135 |
--------------------------------------------------------------------------------
/manuscript/elsevier-with-titles.csl:
--------------------------------------------------------------------------------
1 |
2 |
150 |
--------------------------------------------------------------------------------
/manuscript/elsevier-vancouver.csl:
--------------------------------------------------------------------------------
1 |
2 |
164 |
--------------------------------------------------------------------------------
/manuscript/numcompress.sty:
--------------------------------------------------------------------------------
1 | %%
2 | %% This is file 'numcompress'.
3 | %%
4 | %% Copyright (C) 2009-2012 River Valley Technologies
5 | %%
6 | %%
7 | %% This package may be distributed under the terms of the LaTeX Project
8 | %% Public License, as described in lppl.txt in the base LaTeX distribution.
9 | %% Either version 1.0 or, at your option, any later version.
10 | %%
11 | %% $Id: numcompress.sty 187 2012-08-18 09:36:35Z rishi $
12 | %% $URL: http://lenova.river-valley.com/svn/elsbst/trunk/numcompress.sty $
13 | %%
14 | \NeedsTeXFormat{LaTeX2e}
15 | \def\Fileversion$#1: #2 ${\gdef\fileversion{#2}}
16 | \def\Filedate$#1: #2-#3-#4 #5 #6 #7 ${\gdef\filedate{#2/#3/#4}}
17 | \Fileversion$Rev: 187 $
18 | \Filedate$LastChangedDate: 2012-08-18 15:06:35 +0530 (Sat, 18 Aug 2012) $
19 | \ProvidesPackage{numcompress}
20 | [\filedate\space\fileversion\space numcompress (CVR)]
21 | \PackageWarningNoLine{numcompress}
22 | {****************************************\MessageBreak
23 | Package numcompress v,\fileversion\space loaded\MessageBreak
24 | [Compress numbers (CVR)]\MessageBreak
25 | ****************************************}
26 | \newif\ifdots \dotsfalse
27 | \newif\ifnumcompress \numcompresstrue
28 |
29 | \DeclareOption{dots}{\global\dotstrue}
30 | \DeclareOption{nodots}{\global\dotsfalse}
31 | \DeclareOption{compress}{\global\numcompresstrue}
32 | \DeclareOption{nocompress}{\global\numcompressfalse}
33 |
34 | \ProcessOptions
35 |
36 | \def\removeDot#1{\def\tmp{#1}%
37 | \ifx\tmp\@empty\else\@removeDot#1\@nil\fi}
38 |
39 | \def\@removeDot#1\@nil{\edef\fchar{\expandafter\@car#1\@nil}%
40 | \edef\rchar{\expandafter\@cdr#1!\@nil}%
41 | \def\@xmltempa{.}\def\@xmltempb{!}%
42 | \ifx\fchar\@xmltempb\relax\else%
43 | \ifx\fchar\@xmltempa\relax\else%
44 | \fchar\ignorespaces\fi\removeDot{\rchar}\fi}
45 |
46 | \def\First[#1]{\csname First#1\endcsname}
47 | \def\Second[#1]{\csname Second#1\endcsname}
48 |
49 | \def\parseFirstPage#1{\@tempcnta=0
50 | \@tfor\@digits:=#1\do{%
51 | {\global\advance\@tempcnta by 1
52 | \expandafter\xdef\csname
53 | First\the\@tempcnta\endcsname{\@digits}%
54 | \xdef\flength{\the\@tempcnta}}}}
55 |
56 | \def\parseSecondPage#1{\@tempcnta=0
57 | \@tfor\@digits:=#1\do{%
58 | {\global\advance\@tempcnta by 1
59 | \expandafter\xdef\csname
60 | Second\the\@tempcnta\endcsname{\@digits}%
61 | \xdef\llength{\the\@tempcnta}}}}
62 |
63 | \newif\ifdissimilar\dissimilarfalse
64 | \def\checkequal#1#2{\edef\Farg{#1}\edef\Sarg{#2}%
65 | \edef\One{A}%
66 | \ifcat\One\Farg \relax\else%
67 | \ifdissimilar\Sarg\else%
68 | \ifnum\Farg=\Sarg\relax\else\Sarg\dissimilartrue\fi\fi\fi}
69 | %
70 | \let\@@fpage\@empty
71 | \let\@@lpage\@empty
72 | \def\fpage@compress#1{%
73 | \gdef\@@fpage{#1}%
74 | \edef\llength{0}%
75 | \parseFirstPage{#1}%
76 | \ifnum\flength=\llength%
77 | \gdef\@fpage{\@@fpage}%
78 | \gdef\@lpage{%
79 | \@ifundefined{Second1}{}{\checkequal{\First[1]}{\Second[1]}}%
80 | \@ifundefined{Second2}{}{\checkequal{\First[2]}{\Second[2]}}%
81 | \@ifundefined{Second3}{}{\checkequal{\First[3]}{\Second[3]}}%
82 | \@ifundefined{Second4}{}{\checkequal{\First[4]}{\Second[4]}}%
83 | \@ifundefined{Second5}{}{\checkequal{\First[5]}{\Second[5]}}%
84 | }%
85 | \else%
86 | \gdef\@fpage{\@@fpage}%
87 | \gdef\@lpage{\@@lpage}%
88 | \fi}
89 |
90 | \def\lpage@compress#1{%
91 | \gdef\@@lpage{#1}%
92 | \parseSecondPage{#1}%
93 | \ifnum\flength=\llength%
94 | \gdef\@fpage{\@@fpage}%
95 | \gdef\@lpage{%
96 | \edef\One{A}%
97 | \edef\xFirst{\First[1]}%
98 | \edef\xSecond{\Second[1]}%
99 | \ifcat\One\xSecond\relax%
100 | \ifx\xFirst\xSecond%
101 | \@ifundefined{Second1}{}{\checkequal{\First[1]}{\Second[1]}}%
102 | \@ifundefined{Second2}{}{\checkequal{\First[2]}{\Second[2]}}%
103 | \@ifundefined{Second3}{}{\checkequal{\First[3]}{\Second[3]}}%
104 | \@ifundefined{Second4}{}{\checkequal{\First[4]}{\Second[4]}}%
105 | \@ifundefined{Second5}{}{\checkequal{\First[5]}{\Second[5]}}%
106 | \else#1\fi%
107 | \else%
108 | \ifx\xFirst\xSecond%
109 | \@ifundefined{Second1}{}{\checkequal{\First[1]}{\Second[1]}}%
110 | \@ifundefined{Second2}{}{\checkequal{\First[2]}{\Second[2]}}%
111 | \@ifundefined{Second3}{}{\checkequal{\First[3]}{\Second[3]}}%
112 | \@ifundefined{Second4}{}{\checkequal{\First[4]}{\Second[4]}}%
113 | \@ifundefined{Second5}{}{\checkequal{\First[5]}{\Second[5]}}%
114 | \else#1\fi%
115 | \fi%
116 | }%
117 | \else
118 | \gdef\@fpage{\@@fpage}%
119 | \gdef\@lpage{%
120 | \edef\Targ{#1}%
121 | \edef\One{A}%
122 | \edef\xFirst{\First[1]}%
123 | \edef\xSecond{\Second[1]}%
124 | \ifx\xFirst\xSecond
125 | \ifcat\One\xSecond\relax\else\@@lpage\fi%
126 | \else#1\fi%
127 | }%
128 | \fi}
129 |
130 | %\newwrite\xx
131 | %\immediate\openout\xx=tmpbib.tex
132 | \gdef\@@lpage@compress#1--#2\@nil{\lpage@compress{#1}}
133 | \gdef\@@@pages#1#2{\def\next{#2}%
134 | % \immediate\write\xx{[\the\c@NAT@ctr.]\space [#1][#2]}%
135 | \fpage@compress{#1}%\ifx\next\@empty\relax\else
136 | \@@lpage@compress#2\@nil%\fi
137 | {\@fpage\ifx\next\@empty\relax\else
138 | --\@lpage\fi}\resetall}
139 |
140 | \gdef\@@@page#1{#1\resetall}
141 |
142 | \def\mk@empty#1{\@tempcnta=1
143 | \loop\ifnum\@tempcnta<6
144 | \expandafter\let\csname#1\the\@tempcnta\endcsname\relax
145 | \advance\@tempcnta by 1 \repeat}
146 | \def\resetall{\let\@lpage\@empty\let\@fpage\@empty
147 | \def\flength{0}\def\llength{0}%
148 | \let\@@fpage\@empty\let\@@lpage\@empty
149 | \mk@empty{First}\mk@empty{Second}}
150 |
151 |
152 | \ifdots
153 | \gdef\xfnm[#1]{\unskip\space#1}
154 | \def\bibinfo#1#2{\@ifnextchar.{\@@bibinfo{#1}{#2}}{\@@@bibinfo{#1}{#2}}}
155 | \def\@@@bibinfo#1#2{\def\next{#1}%
156 | \def\@@@pg{pages}\def\@@@au{author}%
157 | \ifx\next\@@@pg\bibpages{#2}\else
158 | \ifx\next\@@@au\bibauthor{#2}\else
159 | #2\fi\fi}
160 | \def\@@bibinfo#1#2.{\def\next{#1}%
161 | \def\@@@pg{pages}\def\@@@au{author}%
162 | \ifx\next\@@@pg\bibpages{#2}.\else
163 | \ifx\next\@@@au\bibauthor{#2}\else
164 | #2.\fi\fi}
165 | \else
166 | \gdef\xfnm[#1]{\unskip\space\removeDot{#1}}
167 | \def\bibinfo#1#2{\def\next{#1}%
168 | \def\@@@pg{pages}\def\@@@au{author}%
169 | \ifx\next\@@@pg\bibpages{#2}\else
170 | \ifx\next\@@@au\bibauthor{#2}\else
171 | #2\fi\fi}
172 | \fi
173 |
174 | \ifnumcompress
175 | \def\bibpages#1{\@@bibpages#1--\\\@nil}
176 | \def\@@bibpages#1--#2\@nil{%
177 | \ifx\\#2\relax\@@@page{#1}\else
178 | \@@@pages{#1}{#2}\fi}
179 | \else
180 | \def\bibpages#1{#1}
181 | \fi
182 |
183 | \def\bibauthor#1{#1}
184 |
185 | \endinput
186 |
187 | %%
188 | %% End of package 'numcompress.sty'
189 | %%
190 |
--------------------------------------------------------------------------------
/manuscript/elsevier-harvard.csl:
--------------------------------------------------------------------------------
1 |
2 |
240 |
--------------------------------------------------------------------------------
/manuscript/elsevier-harvard-without-titles.csl:
--------------------------------------------------------------------------------
1 |
2 |
243 |
--------------------------------------------------------------------------------
/manuscript/explain-pricing.bib:
--------------------------------------------------------------------------------
1 | @article{kuoDeepTriangleDeep2018,
2 | title={DeepTriangle: A deep learning approach to loss reserving},
3 | author={Kuo, Kevin},
4 | journal={Risks},
5 | volume={7},
6 | number={3},
7 | pages={97},
8 | year={2019},
9 | publisher={Multidisciplinary Digital Publishing Institute}
10 | }
11 |
12 | @article{wuthrichMachineLearning2018,
13 | title = {Machine Learning in Individual Claims Reserving},
14 | volume = {2018},
15 | number = {6},
16 | journal = {Scandinavian Actuarial Journal},
17 | author = {W{\"u}thrich, Mario V.},
18 | year = {2018},
19 | pages = {465--480}
20 | }
21 |
22 | @article{gabrielliNeuralNetwork2019a,
23 | title = {Neural Network Embedding of the Over-Dispersed {{Poisson}} Reserving Model},
24 | journal = {Scandinavian Actuarial Journal},
25 | author = {Gabrielli, Andrea and Richman, Ronald and W{\"u}thrich, Mario V.},
26 | year = {2019},
27 | pages = {1--29}
28 | }
29 |
30 | @article{lecunDeepLearning2015,
31 | title = {Deep Learning},
32 | volume = {521},
33 | number = {7553},
34 | journal = {Nature},
35 | author = {LeCun, Yann and Bengio, Yoshua and Hinton, Geoffrey},
36 | year = {2015},
37 | pages = {436}
38 | }
39 |
40 | @techreport{gabrielliNeuralNetwork2019,
41 | address = {{Rochester, NY}},
42 | type = {{{SSRN Scholarly Paper}}},
43 | title = {A {{Neural Network Boosted Double Over}}-{{Dispersed Poisson Claims Reserving Model}}},
44 | abstract = {We present an actuarial loss reserving technique that takes into account both claim counts and claim amounts. Separate (over-dispersed) Poisson models for the claim counts and the claim amounts are combined by a joint embedding into a neural network architecture. As starting point of the neural network calibration we use exactly these two separate (over-dispersed) Poisson models. Such a nested model can be interpreted as a boosting machine. It allows us for joint modeling and mutual learning of claim counts and claim amounts beyond the two individual (over-dispersed) Poisson models. Moreover, this choice of neural network initialization guarantees stability and accelerates representation learning.},
45 | language = {en},
46 | number = {ID 3365517},
47 | institution = {{Social Science Research Network}},
48 | author = {Gabrielli, Andrea},
49 | month = apr,
50 | year = {2019},
51 | keywords = {boosting,chain-ladder reserves,claim amounts,claim counts,claims reserving in insurance,cross-classified over-dispersed Poisson model,double chain-ladder,embedding,learning across portfolios,neural network}
52 | }
53 |
54 | @article{doshi-velezRigorousScience2017,
55 | archivePrefix = {arXiv},
56 | eprinttype = {arxiv},
57 | eprint = {1702.08608},
58 | primaryClass = {cs, stat},
59 | title = {Towards {{A Rigorous Science}} of {{Interpretable Machine Learning}}},
60 | abstract = {As machine learning systems become ubiquitous, there has been a surge of interest in interpretable machine learning: systems that provide explanation for their outputs. These explanations are often used to qualitatively assess other criteria such as safety or non-discrimination. However, despite the interest in interpretability, there is very little consensus on what interpretable machine learning is and how it should be measured. In this position paper, we first define interpretability and describe when interpretability is needed (and when it is not). Next, we suggest a taxonomy for rigorous evaluation and expose open questions towards a more rigorous science of interpretable machine learning.},
61 | journal = {arXiv:1702.08608 [cs, stat]},
62 | author = {{Doshi-Velez}, Finale and Kim, Been},
63 | month = feb,
64 | year = {2017},
65 | keywords = {Computer Science - Machine Learning,Statistics - Machine Learning,Computer Science - Artificial Intelligence}
66 | }
67 |
68 | @book{chollet2018deep,
69 | title = {Deep Learning with {{R}}},
70 | isbn = {978-1-61729-554-6},
71 | publisher = {{Manning Publications Company}},
72 | author = {Chollet, F. and Allaire, J.J.},
73 | year = {2018},
74 | lccn = {2018285360}
75 | }
76 |
77 | @inproceedings{biranExplanationJustification2017,
78 | title = {Explanation and Justification in Machine Learning: {{A}} Survey},
79 | volume = {8},
80 | shorttitle = {Explanation and Justification in Machine Learning},
81 | booktitle = {{{IJCAI}}-17 Workshop on Explainable {{AI}} ({{XAI}})},
82 | author = {Biran, Or and Cotton, Courtenay},
83 | year = {2017},
84 | pages = {1}
85 | }
86 |
87 | @inproceedings{kimExamplesAre2016,
88 | title = {Examples Are Not Enough, Learn to Criticize! Criticism for Interpretability},
89 | booktitle = {Advances in {{Neural Information Processing Systems}}},
90 | author = {Kim, Been and Khanna, Rajiv and Koyejo, Oluwasanmi O.},
91 | year = {2016},
92 | pages = {2280--2288}
93 | }
94 |
95 | @article{lange_1966,
96 | author = {Lange, Jeffrey T.},
97 | title = {General Liability Insurance Ratemaking},
98 | journal = {Proceedings of the Casualty Actuarial Society},
99 | address = {{Arlington, Virginia}},
100 | volume = {LIII},
101 | year = {1966},
102 | pages = {26--53}
103 | }
104 |
105 | @article{nelder_wedderburn_1972,
106 | author = {Nelder, J. A. and Wedderburn, R. W. M.},
107 | title = {Generalized Linear Models},
108 | journal = {Journal of the Royal Statistical Society},
109 | series = {Series A (General)},
110 | volume = {135},
111 | number = {3},
112 | year = {1972},
113 | pages = {370--384}
114 | }
115 |
116 | @article{anderson_2005,
117 | author = {Anderson, D. and Feldblum, S. and Modlin, C. and Schirmacher, D. and Schirmacher, E. and Thandi, N.},
118 | title = {A Practitioner's Guide to Generalized Linear Models},
119 | edition = {2},
120 | year = {2005},
121 | pages = {4--39}
122 | }
123 |
124 | @article{mildenhall_1999,
125 | author = {Mildenhall, Stephen J.},
126 | title = {A Systematic Relationship Between Minimum Bias and Generalized Linear Models},
127 | journal = {Proceedings of the Casualty Actuarial Society},
128 | address = {{Arlington, Virginia}},
129 | year = {1999},
130 | pages = {393--487},
131 | volume = {LXXXVI}
132 | }
133 |
134 | @article{bailey_simon_1960, title={Two Studies in Automobile Insurance Ratemaking}, volume={1}, DOI={10.1017/S0515036100009569}, number={4}, journal={ASTIN Bulletin}, publisher={Cambridge University Press}, author={Bailey, Robert A. and Simon, LeRoy J.}, year={1960}, pages={192–217}}
135 |
136 | @article{brown_1988,
137 | title = {Minimum Bias with Generalized Linear Models},
138 | author = {Robert L. Brown},
139 | booktitle = {Proceedings of the Casualty Actuarial Society 75},
140 | year = {1988},
141 | pages = {187--217}
142 | }
143 |
144 | @article{wang_raj_2017,
145 | title = {On the Origin of Deep Learning},
146 | author = {Wang, Haohan and Raj, Bhiksha},
147 | journal = {arXiv:1702.07800v4 [cs.LG]},
148 | year = {2017}
149 | }
150 |
151 | @book{nilsson_2009,
152 | title = {The Quest for Artificial Intelligence},
153 | author = {Nilsson, Nils J.},
154 | year = {2009},
155 | publisher = {Cambridge University Press}
156 | }
157 |
158 | @article{robertson_2009,
159 | title = {NCCI's 2007 Hazard Group Mapping},
160 | author = {Robertson, J.P.},
161 | journal = {Variance},
162 | volume = {3},
163 | issue = {2},
164 | year = {2009},
165 | pages = {194--213}
166 | }
167 |
168 | @article{gao_2018,
169 | title = {Claims Frequency Modeling Using Telematics Car Driving Data},
170 | author = {Gao, Guangyuan and Meng, Shengwang and Wuthrich, Mario V.},
171 | year = {2018},
172 | journal = {Scandinavian Actuarial Journal}
173 | }
174 |
175 | @article{roel_2018,
176 | title = {Unraveling the Predictive Power of Telematics Data in Car Insurance Pricing},
177 | author = {Roel, Verbelen and Antonio, Katrien and Claeskens, Gerda},
178 | year = {2018},
179 | journal = {Royal Statistical Society}
180 | }
181 |
182 | @article{gao_2018_2,
183 | title = {Feature Extraction from Telematics Car Driving Heatmaps},
184 | author = {Gao, Guangyuan and Wuthrich, Mario V.},
185 | year = {2018},
186 | journal = {European Actuarial Journal},
187 | volume = {8},
188 | issue = {2},
189 | pages = {383--406}
190 | }
191 |
192 | @article{gao_2019,
193 | author = {Gao, Guangyuan and Wuthrich, Mario},
194 | year = {2019},
195 | month = {01},
196 | pages = {6},
197 | title = {Convolutional Neural Network Classification of Telematics Car Driving Data},
198 | volume = {7},
199 | journal = {Risks},
200 | doi = {10.3390/risks7010006}
201 | }
202 |
203 | @article{wuthrich_2017,
204 | title = {Covariate Selection from Telematics Car Driving Data},
205 | author = {Wuthrich, Mario V.},
206 | year = {2017},
207 | journal = {European Actuarial Journal},
208 | volume = {7},
209 | issue = {1},
210 | pages = {89--108}
211 |
212 | }
213 |
214 | @article{yang_2018,
215 | title = {Insurance Premium Prediction via Gradient Tree-Boosted Tweedie Compound Poisson Models},
216 | author = {Yang, Yi and Qian, Wei and Zou, Hui},
217 | year = {2018},
218 | journal = {Journal of Business and Economic Statistics},
219 | volume = {36},
220 | issue = {3},
221 | pages = {456--470}
222 | }
223 |
224 | @article{henckaerts_2018,
225 | title = {A Data Driven Binning Strategy for the Construction of Insurance Tariff Classes},
226 | author = {Henckaerts, Roel and Antonio, Katrien and Clijsters, Maxime and Verbelen, Roel},
227 | year = {2018},
228 | journal = {Scandinavian Actuarial Journal},
229 | volume = {2018},
230 | issue = {8},
231 | pages = {681--705}
232 | }
233 |
234 | @article{noll_2018,
235 | author = {Noll, Alexander and Salzmann, Robert and Wuthrich, Mario},
236 | title = {Case Study: French Motor Third-Party Liability Claims},
237 | journal = {SSRN Electronic Journal},
238 | year = {2018}
239 | }
240 |
241 | @article{spedicato_2018,
242 | author = {Spedicato, Giorgio and Dutang, Christophe and Petrini, Leonardo},
243 | title = {Machine Learning Methods to Perform Pricing Optimization: A Comparison with Standard Generalized Linear Models},
244 | journal = {Variance},
245 | volume = {12},
246 | issue = {1},
247 | year = {2018}
248 | }
249 |
250 | @article{dugas_2003,
251 | author = {Dugas, Charles and Bengio, Y. and Chapados, Nicolas and Vincent, P. and Denoncourt, G. and Fournier, C.},
252 | year = {2003},
253 | month = {12},
254 | title = {Statistical Learning Algorithms Applied to Automobile Insurance Ratemaking},
255 | doi = {10.1142/9789812794246_0004}
256 | }
257 |
258 | @article{goldburd_2016,
259 | title={Generalized linear models for insurance rating},
260 | author={Goldburd, Mark and Khare, Anand and Tevet, Dan},
261 | journal={Casualty Actuarial Society, CAS Monographs Series},
262 | number={5},
263 | year={2016}
264 | }
265 |
266 | @inproceedings{mango_1998,
267 | title={An application of Game Theory: Property Catastrophe Risk Load},
268 | author={Mango, Donald F},
269 | booktitle={Proceedings of the Casualty Actuarial Society},
270 | volume={LXXXV},
271 | pages={157--186},
272 | year={1998},
273 | }
274 |
275 | @article{asop_12,
276 | title = {Actuarial Standard of Practice No. 12 - Risk Classification (for All Practice Areas)},
277 | year = {2011},
278 | publisher = {Actuarial Standards Board},
279 | month = {May},
280 | url = {http://www.actuarialstandardsboard.org/wp-content/uploads/2014/02/asop012_132.pdf}
281 | }
282 |
283 | @article{asop_41,
284 | title = {Actuarial Standard of Practice No. 41 - Actuarial Communications},
285 | year = {2010},
286 | publisher = {Actuarial Standards Board},
287 | month = {December},
288 | url = {http://www.actuarialstandardsboard.org/wp-content/uploads/2014/02/asop041_120.pdf}
289 | }
290 |
291 | @article{asop_56,
292 | title = {Actuarial Standard of Practice No. 56 - Modeling},
293 | year = {2019},
294 | publisher = {Actuarial Standards Board},
295 | month = {December},
296 | url = {http://www.actuarialstandardsboard.org/wp-content/uploads/2020/01/asop056_195.pdf}
297 | }
298 |
299 | @article{cas_syllabus_2018,
300 | title = {Syllabus of Basic Education},
301 | author = {{Casualty Actuarial Society}},
302 | volume = {2018},
303 | year = {2018},
304 | publisher = {Casualty Actuarial Society},
305 | url = {https://www.casact.org/admissions/syllabus/ArchivedSyllabi/2018Syllabus.pdf}
306 | }
307 |
308 | @articles{naic_summer_2017,
309 | title = {2017 Proceedings of the National Association of Insurance Commissioners},
310 | author = {{National Association of Insurance Commissioners}},
311 | year = {2017},
312 | month = {August},
313 | section = {8},
314 | pages = {148--160}
315 | }
316 |
317 | @articles{naic_white_paper,
318 | title = {Regulatory Review of Predictive Models 10/15/19 Exposure Draft},
319 | author = {{National Association of Insurance Commissioners}},
320 | year = {2019},
321 | month = {October},
322 | url = {https://content.naic.org/sites/default/files/inline-files/Predictive%20Model%20White%20Paper%20Exposed%2010-15-19.docx}
323 | }
324 |
325 |
326 | @article{hiltonConversationalProcesses1990,
327 | title = {Conversational Processes and Causal Explanation.},
328 | volume = {107},
329 | number = {1},
330 | journal = {Psychological Bulletin},
331 | author = {Hilton, Denis J.},
332 | year = {1990},
333 | pages = {65}
334 | }
335 |
336 | @article{millerExplanationArtificial2017,
337 | archivePrefix = {arXiv},
338 | eprinttype = {arxiv},
339 | eprint = {1706.07269},
340 | primaryClass = {cs},
341 | title = {Explanation in {{Artificial Intelligence}}: {{Insights}} from the {{Social Sciences}}},
342 | shorttitle = {Explanation in {{Artificial Intelligence}}},
343 | abstract = {There has been a recent resurgence in the area of explainable artificial intelligence as researchers and practitioners seek to make their algorithms more understandable. Much of this research is focused on explicitly explaining decisions or actions to a human observer, and it should not be controversial to say that looking at how humans explain to each other can serve as a useful starting point for explanation in artificial intelligence. However, it is fair to say that most work in explainable artificial intelligence uses only the researchers' intuition of what constitutes a `good' explanation. There exists vast and valuable bodies of research in philosophy, psychology, and cognitive science of how people define, generate, select, evaluate, and present explanations, which argues that people employ certain cognitive biases and social expectations towards the explanation process. This paper argues that the field of explainable artificial intelligence should build on this existing research, and reviews relevant papers from philosophy, cognitive psychology/science, and social psychology, which study these topics. It draws out some important findings, and discusses ways that these can be infused with work on explainable artificial intelligence.},
344 | journal = {arXiv:1706.07269 [cs]},
345 | author = {Miller, Tim},
346 | month = jun,
347 | year = {2017},
348 | keywords = {Computer Science - Artificial Intelligence}
349 | }
350 |
351 | @article{millerExplainableAI2017,
352 | title = {Explainable {{AI}}: {{Beware}} of Inmates Running the Asylum or: {{How I}} Learnt to Stop Worrying and Love the Social and Behavioural Sciences},
353 | shorttitle = {Explainable {{AI}}},
354 | journal = {arXiv preprint arXiv:1712.00547},
355 | author = {Miller, Tim and Howe, Piers and Sonenberg, Liz},
356 | year = {2017}
357 | }
358 |
359 | @misc{biecekPMVEE,
360 | title = {Predictive Models: Explore, Explain, and Debug},
361 | author = {Biecek, Przemyslaw and Burzykowski, Tomasz},
362 | howpublished = {\url{https://pbiecek.github.io/PM_VEE/}},
363 | note = {Accessed: 2019-10-15},
364 | year = {2019}
365 | }
366 |
367 | @article{breimanRandomForests2001,
368 | title = {Random Forests},
369 | volume = {45},
370 | number = {1},
371 | journal = {Machine learning},
372 | author = {Breiman, Leo},
373 | year = {2001},
374 | pages = {5--32}
375 | }
376 |
377 | @article{fisherAllModels2018,
378 | archivePrefix = {arXiv},
379 | eprinttype = {arxiv},
380 | eprint = {1801.01489},
381 | primaryClass = {stat},
382 | title = {All {{Models}} Are {{Wrong}}, but {{Many}} Are {{Useful}}: {{Learning}} a {{Variable}}'s {{Importance}} by {{Studying}} an {{Entire Class}} of {{Prediction Models Simultaneously}}},
383 | shorttitle = {All {{Models}} Are {{Wrong}}, but {{Many}} Are {{Useful}}},
384 | journal = {arXiv:1801.01489 [stat]},
385 | author = {Fisher, Aaron and Rudin, Cynthia and Dominici, Francesca},
386 | month = jan,
387 | year = {2018},
388 | keywords = {Statistics - Methodology}
389 | }
390 |
391 | @article{friedmanGreedyFunction2001,
392 | title = {Greedy Function Approximation: A Gradient Boosting Machine},
393 | shorttitle = {Greedy Function Approximation},
394 | journal = {Annals of statistics},
395 | author = {Friedman, Jerome H.},
396 | year = {2001},
397 | pages = {1189--1232}
398 | }
399 |
400 | @article{molnar2018interpretable,
401 | title={Interpretable machine learning: A guide for making black box models explainable},
402 | author={Molnar, Christoph and others},
403 | journal={E-book at< https://christophm. github. io/interpretable-ml-book/>, version dated},
404 | volume={10},
405 | year={2018}
406 | }
407 |
408 | @Manual{rlang,
409 | title = {R: A Language and Environment for Statistical Computing},
410 | author = {{R Core Team}},
411 | organization = {R Foundation for Statistical Computing},
412 | address = {Vienna, Austria},
413 | year = {2019},
414 | url = {https://www.R-project.org/},
415 | }
416 |
417 | @misc{tensorflow2015-whitepaper,
418 | title={{TensorFlow}: Large-Scale Machine Learning on Heterogeneous Systems},
419 | url={http://tensorflow.org/},
420 | note={Software available from tensorflow.org},
421 | author={
422 | Martin~Abadi and
423 | Ashish~Agarwal and
424 | Paul~Barham and
425 | Eugene~Brevdo and
426 | Zhifeng~Chen and
427 | Craig~Citro and
428 | Greg~S.~Corrado and
429 | Andy~Davis and
430 | Jeffrey~Dean and
431 | Matthieu~Devin and
432 | Sanjay~Ghemawat and
433 | Ian~Goodfellow and
434 | Andrew~Harp and
435 | Geoffrey~Irving and
436 | Michael~Isard and
437 | Yangqing Jia and
438 | Rafal~Jozefowicz and
439 | Lukasz~Kaiser and
440 | Manjunath~Kudlur and
441 | Josh~Levenberg and
442 | Dan~Man\'{e} and
443 | Rajat~Monga and
444 | Sherry~Moore and
445 | Derek~Murray and
446 | Chris~Olah and
447 | Mike~Schuster and
448 | Jonathon~Shlens and
449 | Benoit~Steiner and
450 | Ilya~Sutskever and
451 | Kunal~Talwar and
452 | Paul~Tucker and
453 | Vincent~Vanhoucke and
454 | Vijay~Vasudevan and
455 | Fernanda~Vi\'{e}gas and
456 | Oriol~Vinyals and
457 | Pete~Warden and
458 | Martin~Wattenberg and
459 | Martin~Wicke and
460 | Yuan~Yu and
461 | Xiaoqiang~Zheng},
462 | year={2015},
463 | }
464 |
465 | @article{goldstein2015peeking,
466 | title={Peeking inside the black box: Visualizing statistical learning with plots of individual conditional expectation},
467 | author={Goldstein, Alex and Kapelner, Adam and Bleich, Justin and Pitkin, Emil},
468 | journal={Journal of Computational and Graphical Statistics},
469 | volume={24},
470 | number={1},
471 | pages={44--65},
472 | year={2015},
473 | publisher={Taylor \& Francis}
474 | }
475 |
476 | @misc{apley2016visualizing,
477 | title={Visualizing the Effects of Predictor Variables in Black Box Supervised Learning Models},
478 | author={Daniel W. Apley and Jingyu Zhu},
479 | year={2016},
480 | eprint={1612.08468},
481 | archivePrefix={arXiv},
482 | primaryClass={stat.ME}
483 | }
484 |
485 | @inproceedings{ribeiro2016should,
486 | title={Why should i trust you?: Explaining the predictions of any classifier},
487 | author={Ribeiro, Marco Tulio and Singh, Sameer and Guestrin, Carlos},
488 | booktitle={Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining},
489 | pages={1135--1144},
490 | year={2016},
491 | organization={ACM}
492 | }
493 |
494 | @incollection{NIPS2017_7062,
495 | title = {A Unified Approach to Interpreting Model Predictions},
496 | author = {Lundberg, Scott M and Lee, Su-In},
497 | booktitle = {Advances in Neural Information Processing Systems 30},
498 | editor = {I. Guyon and U. V. Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett},
499 | pages = {4765--4774},
500 | year = {2017},
501 | publisher = {Curran Associates, Inc.},
502 | url = {http://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions.pdf}
503 | }
504 |
505 | @misc{apley2016visualizing,
506 | title={Visualizing the Effects of Predictor Variables in Black Box Supervised Learning Models},
507 | author={Daniel W. Apley and Jingyu Zhu},
508 | year={2016},
509 | eprint={1612.08468},
510 | archivePrefix={arXiv},
511 | primaryClass={stat.ME}
512 | }
--------------------------------------------------------------------------------
/manuscript/elsarticle.cls:
--------------------------------------------------------------------------------
1 | %%
2 | %% This is file `elsarticle.cls',
3 | %% generated with the docstrip utility.
4 | %%
5 | %% The original source files were:
6 | %%
7 | %% elsarticle.dtx (with options: `class')
8 | %%
9 | %% Copyright 2007, 2008 Elsevier Ltd.
10 | %%
11 | %% This file is part of the 'Elsarticle Bundle'.
12 | %% -------------------------------------------
13 | %%
14 | %% It may be distributed under the conditions of the LaTeX Project Public
15 | %% License, either version 1.2 of this license or (at your option) any
16 | %% later version. The latest version of this license is in
17 | %% http://www.latex-project.org/lppl.txt
18 | %% and version 1.2 or later is part of all distributions of LaTeX
19 | %% version 1999/12/01 or later.
20 | %%
21 | %% The list of all files belonging to the 'Elsarticle Bundle' is
22 | %% given in the file `manifest.txt'.
23 | %%
24 | %% $Id: elsarticle.cls,v 1.18 2008-08-28 06:03:47 cvr Exp $
25 | %%
26 | \def\RCSfile{elsarticle}%
27 | \def\RCSversion{1.0.1}%
28 | \def\RCSdate{2008/05/22}%
29 | \def\@shortjnl{\relax}
30 | \def\@journal{Elsevier Science} \def\@company{Elsevier Science}
31 | \def\@issn{000-0000}
32 | \def\@shortjid{elsarticle}
33 | \NeedsTeXFormat{LaTeX2e}[1995/12/01]
34 | \ProvidesClass{\@shortjid}[\RCSdate, \RCSversion: \@journal]
35 | \def\ABD{\AtBeginDocument}
36 | \newif\ifpreprint \preprintfalse
37 | \newif\iflongmktitle \longmktitlefalse
38 |
39 | \def\@blstr{1}
40 | \newdimen\@bls
41 | \@bls=\baselineskip
42 |
43 | \def\@finalWarning{%
44 | *****************************************************\MessageBreak
45 | This document is typeset in the CRC style which\MessageBreak
46 | is not suitable for submission.\MessageBreak
47 | \MessageBreak
48 | Please typeset again using 'preprint' option\MessageBreak
49 | for creating PDF suitable for submission.\MessageBreak
50 | ******************************************************\MessageBreak
51 | }
52 |
53 | \DeclareOption{preprint}{\global\preprinttrue
54 | \gdef\@blstr{1}\xdef\jtype{0}%
55 | \AtBeginDocument{\@twosidefalse\@mparswitchfalse}}
56 | \DeclareOption{final}{\gdef\@blstr{1}\global\preprintfalse}
57 | \DeclareOption{review}{\global\preprinttrue\gdef\@blstr{1.5}}
58 | \DeclareOption{authoryear}{\xdef\@biboptions{authoryear}}
59 | \DeclareOption{number}{\xdef\@biboptions{square,numbers}}
60 | \DeclareOption{numbers}{\xdef\@biboptions{square,numbers}}
61 | \DeclareOption{longtitle}{\global\longmktitletrue}
62 | \DeclareOption{5p}{\xdef\jtype{5}\global\preprintfalse
63 | \ExecuteOptions{twocolumn}}
64 | \def\jtype{0}
65 | \DeclareOption{3p}{\xdef\jtype{3}\global\preprintfalse}
66 | \DeclareOption{1p}{\xdef\jtype{1}\global\preprintfalse
67 | \AtBeginDocument{\@twocolumnfalse}}
68 | \DeclareOption{times}{\IfFileExists{txfonts.sty}%
69 | {\AtEndOfClass{\RequirePackage{txfonts}%
70 | \gdef\ttdefault{cmtt}%
71 | \let\iint\relax
72 | \let\iiint\relax
73 | \let\iiiint\relax
74 | \let\idotsint\relax
75 | \let\openbox\relax}}{\RequirePackage{times}}}
76 | \ExecuteOptions{a4paper,10pt,oneside,onecolumn,number,preprint}
77 | \DeclareOption*{\PassOptionsToClass{\CurrentOption}{article}}
78 | \ProcessOptions
79 | \LoadClass{article}
80 | \RequirePackage{graphicx}
81 | \let\comma\@empty
82 | \let\tnotesep\@empty
83 | \def\title#1{\gdef\@title{#1}}
84 | \let\@title\@empty
85 |
86 | \def\elsLabel#1{\@bsphack\protected@write\@auxout{}%
87 | {\string\Newlabel{#1}{\@currentlabel}}\@esphack}
88 | \def\Newlabel#1#2{\expandafter\xdef\csname X@#1\endcsname{#2}}
89 |
90 | \def\Ref#1{\@ifundefined{X@#1}{0}{\csname X@#1\endcsname}%
91 | }
92 |
93 | \def\tnotemark[#1]{\textsuperscript{\@for\@@tmark:=#1\do{%
94 | \edef\tnotenum{\@ifundefined{X@\@@tmark}{1}{\Ref{\@@tmark}}}%
95 | \ifcase\tnotenum\or\ding{73}\or,\ding{73}\ding{73}\fi}}%
96 | }
97 | \let\@tnotemark\@empty
98 |
99 | \let\@tnotes\@empty
100 | \RequirePackage{pifont}
101 | \newcounter{tnote}
102 | \def\tnotetext[#1]#2{\g@addto@macro\@tnotes{%
103 | \refstepcounter{tnote}\elsLabel{#1}%
104 | \def\thefootnote{\ifcase\c@tnote\or\ding{73}\or\ding{73}\ding{73}\fi}%
105 | \footnotetext{#2}}}
106 |
107 | \newcounter{fnote}
108 | \def\fnmark[#1]{\let\comma\@empty
109 | \def\@fnmark{\@for\@@fnmark:=#1\do{%
110 | \edef\fnotenum{\@ifundefined{X@\@@fnmark}{1}{\Ref{\@@fnmark}}}%
111 | \unskip\comma\fnotenum\let\comma,}}%
112 | }
113 |
114 | \let\@fnotes\@empty\let\@fnmark\@empty
115 | \def\fntext[#1]#2{\g@addto@macro\@fnotes{%
116 | \refstepcounter{fnote}\elsLabel{#1}%
117 | \def\thefootnote{\thefnote}%
118 | \global\setcounter{footnote}{\thefnote}%
119 | \footnotetext{#2}}}
120 |
121 | \def\cormark[#1]{\edef\cnotenum{\Ref{#1}}\unskip%
122 | \textsuperscript{\sep\ifcase\cnotenum\or
123 | $\ast$\or$\ast\ast$\fi\hspace{-1pt}}\let\sep=,}
124 |
125 | \let\@cormark\@empty
126 | \let\@cornotes\@empty
127 | \newcounter{cnote}
128 | \def\cortext[#1]#2{\g@addto@macro\@cornotes{%
129 | \refstepcounter{cnote}\elsLabel{#1}%
130 | \def\thefootnote{\ifcase\thecnote\or$\ast$\or
131 | $\ast\ast$\fi}%
132 | \footnotetext{#2}}}
133 |
134 | \def\corref#1{\cormark[#1]}
135 | \def\fnref#1{\fnmark[#1]}
136 | \def\tnoteref#1{\tnotemark[#1]}
137 |
138 | \def\resetTitleCounters{\c@cnote=0
139 | \c@fnote=0 \c@tnote=0 \c@footnote=0}
140 |
141 | \let\eadsep\@empty
142 | \let\@elseads\@empty
143 | \let\@elsuads\@empty
144 | \let\@cormark\@empty
145 | \def\ead{\@ifnextchar[{\@uad}{\@ead}}
146 | \gdef\@ead#1{\bgroup\def\_{\string\_}\def\{{\string\{}%
147 | \def\}{\string\}}%
148 | \edef\tmp{\the\@eadauthor}
149 | \immediate\write\@auxout{\string\emailauthor
150 | {#1}{\expandafter\strip@prefix\meaning\tmp}}%
151 | \egroup
152 | }
153 | \newcounter{ead}
154 | \gdef\emailauthor#1#2{\stepcounter{ead}%
155 | \g@addto@macro\@elseads{\raggedright%
156 | \let\corref\@gobble
157 | \eadsep\texttt{#1} (#2)\def\eadsep{\unskip,\space}}%
158 | }
159 | \gdef\@uad[#1]#2{\bgroup
160 | \edef\tmp{\the\@eadauthor}
161 | \immediate\write\@auxout{\string\urlauthor
162 | {#2}{\expandafter\strip@prefix\meaning\tmp}}%
163 | \egroup
164 | }
165 | \def\urlauthor#1#2{\g@addto@macro\@elsuads{\let\corref\@gobble%
166 | \raggedright\eadsep\texttt{#1}\space(#2)%
167 | \def\eadsep{\unskip,\space}}%
168 | }
169 |
170 | \def\elsauthors{}
171 | \def\pprinttitle{}
172 | \let\authorsep\@empty
173 | \let\sep\@empty
174 | \newcounter{author}
175 | \def\author{\@ifnextchar[{\@@author}{\@author}}
176 |
177 | \newtoks\@eadauthor
178 | \def\@@author[#1]#2{\g@addto@macro\elsauthors{%
179 | \def\baselinestretch{1}%
180 | \authorsep#2\unskip\textsuperscript{%#1%
181 | \@for\@@affmark:=#1\do{%
182 | \edef\affnum{\@ifundefined{X@\@@affmark}{1}{\Ref{\@@affmark}}}%
183 | \unskip\sep\affnum\let\sep=,}%
184 | \ifx\@fnmark\@empty\else\unskip\sep\@fnmark\let\sep=,\fi}%
185 | \def\authorsep{\unskip,\space}%
186 | \global\let\sep\@empty
187 | \global\let\@fnmark\@empty}%
188 | \@eadauthor={#2}
189 | }
190 |
191 | \def\@author#1{\g@addto@macro\elsauthors{\normalsize%
192 | \def\baselinestretch{1}%
193 | \upshape\authorsep#1\unskip\textsuperscript{%
194 | \ifx\@fnmark\@empty\else\unskip\sep\@fnmark\let\sep=,\fi}%
195 | \def\authorsep{\unskip,\space}%
196 | \global\let\@fnmark\@empty
197 | \global\let\sep\@empty}%
198 | \@eadauthor={{#1}}
199 | }
200 |
201 | \def\elsaddress{}
202 | \def\addsep{\par\vskip6pt}
203 | \def\address{\@ifnextchar[{\@@address}{\@address}}
204 |
205 | \newcounter{affn}
206 | \renewcommand\theaffn{\alph{affn}}
207 |
208 | \long\def\@@address[#1]#2{\g@addto@macro\elsaddress{%
209 | \def\baselinestretch{1}%
210 | \refstepcounter{affn}\elsLabel{#1}%
211 | \textsuperscript{\theaffn}#2\par}}
212 |
213 | \long\def\@address#1{\g@addto@macro\elsauthors{%
214 | \def\baselinestretch{1}%
215 | \addsep\footnotesize\itshape#1\def\addsep{\par\vskip6pt}%
216 | \def\authorsep{\par\vskip8pt}}}
217 |
218 | \newbox\absbox
219 | \renewenvironment{abstract}{\global\setbox\absbox=\vbox\bgroup
220 | \hsize=\textwidth\def\baselinestretch{1}%
221 | \noindent\unskip\textbf{Abstract}
222 | \par\medskip\noindent\unskip\ignorespaces}
223 | {\egroup}
224 |
225 | \newbox\keybox
226 | \def\keyword{%
227 | \def\sep{\unskip, }%
228 | \def\MSC{\@ifnextchar[{\@MSC}{\@MSC[2000]}}
229 | \def\@MSC[##1]{\par\leavevmode\hbox {\it ##1~MSC:\space}}%
230 | \def\PACS{\par\leavevmode\hbox {\it PACS:\space}}%
231 | \def\JEL{\par\leavevmode\hbox {\it JEL:\space}}%
232 | \global\setbox\keybox=\vbox\bgroup\hsize=\textwidth
233 | \normalsize\normalfont\def\baselinestretch{1}
234 | \parskip\z@
235 | \noindent\textit{Key words: }
236 | \raggedright % Keywords are not justified.
237 | \ignorespaces}
238 | \def\endkeyword{\par \egroup}
239 |
240 | \newdimen\Columnwidth
241 | \Columnwidth=\columnwidth
242 |
243 | \def\printFirstPageNotes{%
244 | \iflongmktitle
245 | \let\columnwidth=\textwidth\fi
246 | \ifx\@tnotes\@empty\else\@tnotes\fi
247 | \ifx\@cornotes\@empty\else\@cornotes\fi
248 | \ifx\@elseads\@empty\relax\else
249 | \let\thefootnote\relax
250 | \footnotetext{\ifnum\theead=1\relax
251 | \textit{Email address:\space}\else
252 | \textit{Email addresses:\space}\fi
253 | \@elseads}\fi
254 | \ifx\@elsuads\@empty\relax\else
255 | \let\thefootnote\relax
256 | \footnotetext{\textit{URL:\space}%
257 | \@elsuads}\fi
258 | \ifx\@fnotes\@empty\else\@fnotes\fi
259 | \iflongmktitle\if@twocolumn
260 | \let\columnwidth=\Columnwidth\fi\fi
261 | }
262 |
263 | \long\def\pprintMaketitle{\clearpage
264 | \iflongmktitle\if@twocolumn\let\columnwidth=\textwidth\fi\fi
265 | \resetTitleCounters
266 | \def\baselinestretch{1}%
267 | \printFirstPageNotes
268 | \begin{center}%
269 | \thispagestyle{pprintTitle}%
270 | \def\baselinestretch{1}%
271 | \Large\@title\par\vskip18pt
272 | \normalsize\elsauthors\par\vskip10pt
273 | \footnotesize\itshape\elsaddress\par\vskip36pt
274 | %\hrule\vskip12pt
275 | \ifvoid\absbox\else\unvbox\absbox\par\vskip10pt\fi
276 | \ifvoid\keybox\else\unvbox\keybox\par\vskip10pt\fi
277 | %\hrule\vskip12pt
278 | \end{center}%
279 | \gdef\thefootnote{\arabic{footnote}}%
280 | }
281 |
282 | \def\printWarning{%
283 | \mbox{}\par\vfill\par\bgroup
284 | \fboxsep12pt\fboxrule1pt
285 | \hspace*{.18\textwidth}
286 | \fcolorbox{gray50}{gray10}{\box\warnbox}
287 | \egroup\par\vfill\thispagestyle{empty}
288 | \setcounter{page}{0}
289 | \clearpage}
290 |
291 | \long\def\finalMaketitle{%
292 | \resetTitleCounters
293 | \def\baselinestretch{1}%
294 | \MaketitleBox
295 | \thispagestyle{pprintTitle}%
296 | \gdef\thefootnote{\arabic{footnote}}%
297 | }
298 |
299 | \long\def\MaketitleBox{%
300 | \resetTitleCounters
301 | \def\baselinestretch{1}%
302 | \begin{center}%
303 | \def\baselinestretch{1}%
304 | \Large\@title\par\vskip18pt
305 | \normalsize\elsauthors\par\vskip10pt
306 | \footnotesize\itshape\elsaddress\par\vskip36pt
307 | %\hrule\vskip12pt
308 | \ifvoid\absbox\else\unvbox\absbox\par\vskip10pt\fi
309 | \ifvoid\keybox\else\unvbox\keybox\par\vskip10pt\fi
310 | %\hrule\vskip12pt
311 | \end{center}%
312 | }
313 |
314 | \def\FNtext#1{\par\bgroup\footnotesize#1\egroup}
315 | \newdimen\space@left
316 | \def\alarm#1{\typeout{******************************}%
317 | \typeout{#1}%
318 | \typeout{******************************}%
319 | }
320 | \long\def\getSpaceLeft{%\global\@twocolumnfalse%
321 | \global\setbox0=\vbox{\hsize=\textwidth\MaketitleBox}%
322 | \global\setbox1=\vbox{\hsize=\textwidth
323 | \let\footnotetext\FNtext
324 | \printFirstPageNotes}%
325 | \xdef\noteheight{\the\ht1}%
326 | \xdef\titleheight{\the\ht0}%
327 | \@tempdima=\vsize
328 | \advance\@tempdima-\noteheight
329 | \advance\@tempdima-1\baselineskip
330 | }
331 |
332 | \skip\footins=24pt
333 |
334 | \newbox\els@boxa
335 | \newbox\els@boxb
336 |
337 | \ifpreprint
338 | \def\maketitle{\pprintMaketitle}
339 | \else
340 | \ifnum\jtype=1
341 | \def\maketitle{%
342 | \iflongmktitle\getSpaceLeft
343 | \global\setbox\els@boxa=\vsplit0 to \@tempdima
344 | \box\els@boxa\par\resetTitleCounters
345 | \thispagestyle{pprintTitle}%
346 | \printFirstPageNotes
347 | \box0%
348 | \else
349 | \finalMaketitle\printFirstPageNotes
350 | \fi
351 | \gdef\thefootnote{\arabic{footnote}}}%
352 | \else
353 | \ifnum\jtype=5
354 | \def\maketitle{%
355 | \iflongmktitle\getSpaceLeft
356 | \global\setbox\els@boxa=\vsplit0 to \@tempdima
357 | \box\els@boxa\par\resetTitleCounters
358 | \thispagestyle{pprintTitle}%
359 | \printFirstPageNotes
360 | \twocolumn[\box0]%\printFirstPageNotes
361 | \else
362 | \twocolumn[\finalMaketitle]\printFirstPageNotes
363 | \fi
364 | \gdef\thefootnote{\arabic{footnote}}}
365 | \else
366 | \if@twocolumn
367 | \def\maketitle{%
368 | \iflongmktitle\getSpaceLeft
369 | \global\setbox\els@boxa=\vsplit0 to \@tempdima
370 | \box\els@boxa\par\resetTitleCounters
371 | \thispagestyle{pprintTitle}%
372 | \printFirstPageNotes
373 | \twocolumn[\box0]%
374 | \else
375 | \twocolumn[\finalMaketitle]\printFirstPageNotes
376 | \fi
377 | \gdef\thefootnote{\arabic{footnote}}}%
378 | \else
379 | \def\maketitle{%
380 | \iflongmktitle\getSpaceLeft
381 | \global\setbox\els@boxa=\vsplit0 to \@tempdima
382 | \box\els@boxa\par\resetTitleCounters
383 | \thispagestyle{pprintTitle}%
384 | \printFirstPageNotes
385 | \box0%
386 | \else
387 | \finalMaketitle\printFirstPageNotes
388 | \fi
389 | \gdef\thefootnote{\arabic{footnote}}}%
390 | \fi
391 | \fi
392 | \fi
393 | \fi
394 | \def\ps@pprintTitle{%
395 | \let\@oddhead\@empty
396 | \let\@evenhead\@empty
397 | \def\@oddfoot{\footnotesize\itshape
398 | Preprint submitted to \ifx\@journal\@empty Elsevier
399 | \else\@journal\fi\hfill\today}%
400 | \let\@evenfoot\@oddfoot}
401 |
402 | \def\@seccntDot{.}
403 | \def\@seccntformat#1{\csname the#1\endcsname\@seccntDot\hskip 0.5em}
404 |
405 | \renewcommand\section{\@startsection {section}{1}{\z@}%
406 | {18\p@ \@plus 6\p@ \@minus 3\p@}%
407 | {9\p@ \@plus 6\p@ \@minus 3\p@}%
408 | {\normalsize\bfseries\boldmath}}
409 | \renewcommand\subsection{\@startsection{subsection}{2}{\z@}%
410 | {12\p@ \@plus 6\p@ \@minus 3\p@}%
411 | {3\p@ \@plus 6\p@ \@minus 3\p@}%
412 | {\normalfont\normalsize\itshape}}
413 | \renewcommand\subsubsection{\@startsection{subsubsection}{3}{\z@}%
414 | {12\p@ \@plus 6\p@ \@minus 3\p@}%
415 | {\p@}%
416 | {\normalfont\normalsize\itshape}}
417 |
418 | \def\paragraph{\secdef{\els@aparagraph}{\els@bparagraph}}
419 | \def\els@aparagraph[#1]#2{\elsparagraph[#1]{#2.}}
420 | \def\els@bparagraph#1{\elsparagraph*{#1.}}
421 |
422 | \newcommand\elsparagraph{\@startsection{paragraph}{4}{0\z@}%
423 | {10\p@ \@plus 6\p@ \@minus 3\p@}%
424 | {-6\p@}%
425 | {\normalfont\itshape}}
426 | \newdimen\leftMargin
427 | \leftMargin=2em
428 | \newtoks\@enLab %\newtoks\@enfont
429 | \def\@enQmark{?}
430 | \def\@enLabel#1#2{%
431 | \edef\@enThe{\noexpand#1{\@enumctr}}%
432 | \@enLab\expandafter{\the\@enLab\csname the\@enumctr\endcsname}%
433 | \@enloop}
434 | \def\@enSpace{\afterassignment\@enSp@ce\let\@tempa= }
435 | \def\@enSp@ce{\@enLab\expandafter{\the\@enLab\space}\@enloop}
436 | \def\@enGroup#1{\@enLab\expandafter{\the\@enLab{#1}}\@enloop}
437 | \def\@enOther#1{\@enLab\expandafter{\the\@enLab#1}\@enloop}
438 | \def\@enloop{\futurelet\@entemp\@enloop@}
439 | \def\@enloop@{%
440 | \ifx A\@entemp \def\@tempa{\@enLabel\Alph }\else
441 | \ifx a\@entemp \def\@tempa{\@enLabel\alph }\else
442 | \ifx i\@entemp \def\@tempa{\@enLabel\roman }\else
443 | \ifx I\@entemp \def\@tempa{\@enLabel\Roman }\else
444 | \ifx 1\@entemp \def\@tempa{\@enLabel\arabic}\else
445 | \ifx \@sptoken\@entemp \let\@tempa\@enSpace \else
446 | \ifx \bgroup\@entemp \let\@tempa\@enGroup \else
447 | \ifx \@enum@\@entemp \let\@tempa\@gobble \else
448 | \let\@tempa\@enOther
449 | \fi\fi\fi\fi\fi\fi\fi\fi
450 | \@tempa}
451 | \newlength{\@sep} \newlength{\@@sep}
452 | \setlength{\@sep}{.5\baselineskip plus.2\baselineskip
453 | minus.2\baselineskip}
454 | \setlength{\@@sep}{.1\baselineskip plus.01\baselineskip
455 | minus.05\baselineskip}
456 | \providecommand{\sfbc}{\rmfamily\upshape}
457 | \providecommand{\sfn}{\rmfamily\upshape}
458 | \def\@enfont{\ifnum \@enumdepth >1\let\@nxt\sfn \else\let\@nxt\sfbc \fi\@nxt}
459 | \def\enumerate{%
460 | \ifnum \@enumdepth >3 \@toodeep\else
461 | \advance\@enumdepth \@ne
462 | \edef\@enumctr{enum\romannumeral\the\@enumdepth}\fi
463 | \@ifnextchar[{\@@enum@}{\@enum@}}
464 | \def\@@enum@[#1]{%
465 | \@enLab{}\let\@enThe\@enQmark
466 | \@enloop#1\@enum@
467 | \ifx\@enThe\@enQmark\@warning{The counter will not be printed.%
468 | ^^J\space\@spaces\@spaces\@spaces The label is: \the\@enLab}\fi
469 | \expandafter\edef\csname label\@enumctr\endcsname{\the\@enLab}%
470 | \expandafter\let\csname the\@enumctr\endcsname\@enThe
471 | \csname c@\@enumctr\endcsname7
472 | \expandafter\settowidth
473 | \csname leftmargin\romannumeral\@enumdepth\endcsname
474 | {\the\@enLab\hskip\labelsep}%
475 | \@enum@}
476 | \def\@enum@{\list{{\@enfont\csname label\@enumctr\endcsname}}%
477 | {\usecounter{\@enumctr}\def\makelabel##1{\hss\llap{##1}}%
478 | \ifnum \@enumdepth>1\setlength{\topsep}{\@@sep}\else
479 | \setlength{\topsep}{\@sep}\fi
480 | \ifnum \@enumdepth>1\setlength{\itemsep}{0pt plus1pt minus1pt}%
481 | \else \setlength{\itemsep}{\@@sep}\fi
482 | %\setlength\leftmargin{\leftMargin}%%%{1.8em}
483 | \setlength{\parsep}{0pt plus1pt minus1pt}%
484 | \setlength{\parskip}{0pt plus1pt minus1pt}
485 | }}
486 |
487 | \def\endenumerate{\par\ifnum \@enumdepth >1\addvspace{\@@sep}\else
488 | \addvspace{\@sep}\fi \endlist}
489 |
490 | \def\sitem{\@noitemargtrue\@item[\@itemlabel *]}
491 |
492 | \def\itemize{\@ifnextchar[{\@Itemize}{\@Itemize[]}}
493 |
494 | \def\@Itemize[#1]{\def\next{#1}%
495 | \ifnum \@itemdepth >\thr@@\@toodeep\else
496 | \advance\@itemdepth\@ne
497 | \ifx\next\@empty\else\expandafter\def\csname
498 | labelitem\romannumeral\the\@itemdepth\endcsname{#1}\fi%
499 | \edef\@itemitem{labelitem\romannumeral\the\@itemdepth}%
500 | \expandafter\list\csname\@itemitem\endcsname
501 | {\def\makelabel##1{\hss\llap{##1}}}%
502 | \fi}
503 | \def\newdefinition#1{%
504 | \@ifnextchar[{\@odfn{#1}}{\@ndfn{#1}}}%]
505 | \def\@ndfn#1#2{%
506 | \@ifnextchar[{\@xndfn{#1}{#2}}{\@yndfn{#1}{#2}}}
507 | \def\@xndfn#1#2[#3]{%
508 | \expandafter\@ifdefinable\csname #1\endcsname
509 | {\@definecounter{#1}\@newctr{#1}[#3]%
510 | \expandafter\xdef\csname the#1\endcsname{%
511 | \expandafter\noexpand\csname the#3\endcsname \@dfncountersep
512 | \@dfncounter{#1}}%
513 | \global\@namedef{#1}{\@dfn{#1}{#2}}%
514 | \global\@namedef{end#1}{\@enddefinition}}}
515 | \def\@yndfn#1#2{%
516 | \expandafter\@ifdefinable\csname #1\endcsname
517 | {\@definecounter{#1}%
518 | \expandafter\xdef\csname the#1\endcsname{\@dfncounter{#1}}%
519 | \global\@namedef{#1}{\@dfn{#1}{#2}}%
520 | \global\@namedef{end#1}{\@enddefinition}}}
521 | \def\@odfn#1[#2]#3{%
522 | \@ifundefined{c@#2}{\@nocounterr{#2}}%
523 | {\expandafter\@ifdefinable\csname #1\endcsname
524 | {\global\@namedef{the#1}{\@nameuse{the#2}}
525 | \global\@namedef{#1}{\@dfn{#2}{#3}}%
526 | \global\@namedef{end#1}{\@enddefinition}}}}
527 | \def\@dfn#1#2{%
528 | \refstepcounter{#1}%
529 | \@ifnextchar[{\@ydfn{#1}{#2}}{\@xdfn{#1}{#2}}}
530 | \def\@xdfn#1#2{%
531 | \@begindefinition{#2}{\csname the#1\endcsname}\ignorespaces}
532 | \def\@ydfn#1#2[#3]{%
533 | \@opargbegindefinition{#2}{\csname the#1\endcsname}{#3}\ignorespaces}
534 | \def\@dfncounter#1{\noexpand\arabic{#1}}
535 | \def\@dfncountersep{.}
536 | \def\@begindefinition#1#2{\trivlist
537 | \item[\hskip\labelsep{\bfseries #1\ #2.}]\upshape}
538 | \def\@opargbegindefinition#1#2#3{\trivlist
539 | \item[\hskip\labelsep{\bfseries #1\ #2\ (#3).}]\upshape}
540 | \def\@enddefinition{\endtrivlist}
541 |
542 | \def\@begintheorem#1#2{\trivlist
543 | \let\baselinestretch\@blstr
544 | \item[\hskip \labelsep{\bfseries #1\ #2.}]\itshape}
545 | \def\@opargbegintheorem#1#2#3{\trivlist
546 | \let\baselinestretch\@blstr
547 | \item[\hskip \labelsep{\bfseries #1\ #2\ (#3).}]\itshape}
548 |
549 | \def\newproof#1{%
550 | \@ifnextchar[{\@oprf{#1}}{\@nprf{#1}}}
551 | \def\@nprf#1#2{%
552 | \@ifnextchar[{\@xnprf{#1}{#2}}{\@ynprf{#1}{#2}}}
553 | \def\@xnprf#1#2[#3]{%
554 | \expandafter\@ifdefinable\csname #1\endcsname
555 | {\@definecounter{#1}\@newctr{#1}[#3]%
556 | \expandafter\xdef\csname the#1\endcsname{%
557 | \expandafter\noexpand\csname the#3\endcsname \@prfcountersep
558 | \@prfcounter{#1}}%
559 | \global\@namedef{#1}{\@prf{#1}{#2}}%
560 | \global\@namedef{end#1}{\@endproof}}}
561 | \def\@ynprf#1#2{%
562 | \expandafter\@ifdefinable\csname #1\endcsname
563 | {\@definecounter{#1}%
564 | \expandafter\xdef\csname the#1\endcsname{\@prfcounter{#1}}%
565 | \global\@namedef{#1}{\@prf{#1}{#2}}%
566 | \global\@namedef{end#1}{\@endproof}}}
567 | \def\@oprf#1[#2]#3{%
568 | \@ifundefined{c@#2}{\@nocounterr{#2}}%
569 | {\expandafter\@ifdefinable\csname #1\endcsname
570 | {\global\@namedef{the#1}{\@nameuse{the#2}}%
571 | \global\@namedef{#1}{\@prf{#2}{#3}}%
572 | \global\@namedef{end#1}{\@endproof}}}}
573 | \def\@prf#1#2{%
574 | \refstepcounter{#1}%
575 | \@ifnextchar[{\@yprf{#1}{#2}}{\@xprf{#1}{#2}}}
576 | \def\@xprf#1#2{%
577 | \@beginproof{#2}{\csname the#1\endcsname}\ignorespaces}
578 | \def\@yprf#1#2[#3]{%
579 | \@opargbeginproof{#2}{\csname the#1\endcsname}{#3}\ignorespaces}
580 | \def\@prfcounter#1{\noexpand\arabic{#1}}
581 | \def\@prfcountersep{.}
582 | \def\@beginproof#1#2{\trivlist\let\baselinestretch\@blstr
583 | \item[\hskip \labelsep{\scshape #1.}]\rmfamily}
584 | \def\@opargbeginproof#1#2#3{\trivlist\let\baselinestretch\@blstr
585 | \item[\hskip \labelsep{\scshape #1\ (#3).}]\rmfamily}
586 | \def\@endproof{\endtrivlist}
587 | \newcommand*{\qed}{\hbox{}\hfill$\Box$}
588 |
589 | \@ifundefined{@biboptions}{\xdef\@biboptions{numbers}}{}
590 | \InputIfFileExists{\jobname.spl}{}{}
591 | \RequirePackage[\@biboptions]{natbib}
592 |
593 | \newwrite\splwrite
594 | \immediate\openout\splwrite=\jobname.spl
595 | \def\biboptions#1{\def\next{#1}\immediate\write\splwrite{%
596 | \string\g@addto@macro\string\@biboptions{%
597 | ,\expandafter\strip@prefix\meaning\next}}}
598 |
599 | \let\baselinestretch=\@blstr
600 |
601 | \ifnum\jtype=1
602 | \RequirePackage{geometry}
603 | \geometry{twoside,
604 | paperwidth=210mm,
605 | paperheight=297mm,
606 | textheight=562pt,
607 | textwidth=384pt,
608 | centering,
609 | headheight=50pt,
610 | headsep=12pt,
611 | footskip=12pt,
612 | footnotesep=24pt plus 2pt minus 12pt,
613 | }
614 | \global\let\bibfont=\footnotesize
615 | \global\bibsep=0pt
616 | \if@twocolumn\global\@twocolumnfalse\fi
617 | \else\ifnum\jtype=3
618 | \RequirePackage{geometry}
619 | \geometry{twoside,
620 | paperwidth=210mm,
621 | paperheight=297mm,
622 | textheight=622pt,
623 | textwidth=468pt,
624 | centering,
625 | headheight=50pt,
626 | headsep=12pt,
627 | footskip=12pt,
628 | footnotesep=24pt plus 2pt minus 12pt,
629 | columnsep=2pc
630 | }
631 | \global\let\bibfont=\footnotesize
632 | \global\bibsep=0pt
633 | \if@twocolumn\input{fleqn.clo}\fi
634 | \else\ifnum\jtype=5
635 | \RequirePackage{geometry}
636 | \geometry{twoside,
637 | paperwidth=210mm,
638 | paperheight=297mm,
639 | textheight=682pt,
640 | textwidth=522pt,
641 | centering,
642 | headheight=50pt,
643 | headsep=12pt,
644 | footskip=18pt,
645 | footnotesep=24pt plus 2pt minus 12pt,
646 | columnsep=18pt
647 | }%
648 | \global\let\bibfont=\footnotesize
649 | \global\bibsep=0pt
650 | \input{fleqn.clo}
651 | \global\@twocolumntrue
652 | %%
653 | %% End of option '5p'
654 | %%
655 | \fi\fi\fi
656 |
657 | \def\journal#1{\gdef\@journal{#1}}
658 | \let\@journal\@empty
659 | \newenvironment{frontmatter}{}{\maketitle}
660 |
661 | \long\def\@makecaption#1#2{%
662 | \vskip\abovecaptionskip\footnotesize
663 | \sbox\@tempboxa{#1: #2}%
664 | \ifdim \wd\@tempboxa >\hsize
665 | #1: #2\par
666 | \else
667 | \global \@minipagefalse
668 | \hb@xt@\hsize{\hfil\box\@tempboxa\hfil}%
669 | \fi
670 | \vskip\belowcaptionskip}
671 |
672 | \AtBeginDocument{\@ifpackageloaded{hyperref}
673 | {\def\@linkcolor{blue}
674 | \def\@anchorcolor{blue}
675 | \def\@citecolor{blue}
676 | \def\@filecolor{blue}
677 | \def\@urlcolor{blue}
678 | \def\@menucolor{blue}
679 | \def\@pagecolor{blue}
680 | \begingroup
681 | \@makeother\`%
682 | \@makeother\=%
683 | \edef\x{%
684 | \edef\noexpand\x{%
685 | \endgroup
686 | \noexpand\toks@{%
687 | \catcode 96=\noexpand\the\catcode`\noexpand\`\relax
688 | \catcode 61=\noexpand\the\catcode`\noexpand\=\relax
689 | }%
690 | }%
691 | \noexpand\x
692 | }%
693 | \x
694 | \@makeother\`
695 | \@makeother\=
696 | }{}}
697 | %%
698 |
699 | \endinput
700 | %%
701 | %% End of file `elsarticle.cls'.
702 |
--------------------------------------------------------------------------------
/manuscript/manuscript.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Towards Explainability of Machine Learning Models in Insurance Pricing
3 | author:
4 | - name: Kevin Kuo
5 | email: kevin@kasa.ai
6 | affiliation: a
7 | footnote: Corresponding Author
8 | - name: Daniel Lupton
9 | email: dlupton@taylorandmulder.com
10 | affiliation: b
11 | address:
12 | - code: a
13 | address: Kasa AI, 3040 78th Ave SE \#1271, Mercer Island, WA 98040, USA
14 | - code: b
15 | address: Taylor & Mulder, 10508 Rivers Bend Lane, Potomac, MD 20854, USA
16 | abstract: |
17 | Machine learning methods have garnered increasing interest among actuaries in
18 | recent years. However, their adoption by practitioners has been limited, partly
19 | due to the lack of transparency of these methods, as compared to generalized
20 | linear models. In this paper, we discuss the need for model interpretability in
21 | property & casualty insurance ratemaking, propose a framework for explaining
22 | models, and present a case study to illustrate the framework.
23 |
24 | journal: "Variance"
25 | date: "`r Sys.Date()`"
26 | bibliography: explain-pricing.bib
27 | #linenumbers: true
28 | numbersections: true
29 | # csl: elsevier-harvard.csl
30 | output:
31 | bookdown::pdf_book:
32 | base_format: rticles::elsevier_article
33 | layout: preprint, 3p, twocolumn, letterpaper, 10pt
34 | ---
35 |
36 | # Introduction
37 |
38 | Risk classification for property & casualty (P&C) insurance rating has traditionally
39 | been done with one-way, or univariate, analysis techniques. In recent years, many
40 | insurers have moved towards using generalized linear models (GLM), a multivariate
41 | predictive modeling technique, which addresses many shortcomings of univariate
42 | approaches, and is currently considered the gold standard in insurance risk
43 | classification. At the same time, machine learning (ML) techniques such as deep
44 | neural networks have gained popularity in many industries due to their superior
45 | predictive performance over linear models [@lecunDeepLearning2015]. In fact, there
46 | is a fast growing body of literature on applying ML to P&C reserving
47 | [@kuoDeepTriangleDeep2018; @wuthrichMachineLearning2018; @gabrielliNeuralNetwork2019a;
48 | @gabrielliNeuralNetwork2019]. However, these ML techniques, often considered to be
49 | completely “black box”, have been less successful in gaining adoption in pricing,
50 | which is a regulated discipline and requires a certain amount of transparency in models.
51 |
52 | If insurers can gain more insight into how ML models behave in risk classification
53 | contexts, it would increase their ability to reassure regulators and the public
54 | that accepted ratemaking principles are met. Being able to charge more accurate
55 | premiums would, in turn, make the risk transfer system more efficient and contribute
56 | to the betterment of society. In this paper, we aim to take a step towards liberating
57 | actuaries from the confines of linear models in pricing projects, by proposing a
58 | framework for explaining ML models for ratemaking that regulators, practitioners,
59 | and researchers in actuarial science can build upon.
60 |
61 | The rest of this paper is organized as follows: Section \ref{ratemaking} provides
62 | an overview of P&C ratemaking, Section \ref{need} discusses the importance of
63 | interpretation, Section \ref{interpretability} discusses model interpretability
64 | in the context of ratemaking and proposes specific tasks for model explanation,
65 | Section \ref{application} describes current model interpretation techniques and
66 | applies them to the tasks defined in the previous section, and Section
67 | \ref{conclusion} concludes.
68 |
69 | # Property and Casualty Ratemaking {#ratemaking}
70 |
71 | ## History of Ratemaking
72 |
73 | Early classification ratemaking procedures were typically univariate in nature.
74 | For example, @lange_1966 notes that (at that time) most major lines of insurance
75 | used univariate methods based around the same principle: distributing an overall
76 | indication to territorial relativities or classification relativities based on
77 | the extent to which they deviated from the average experience.
78 |
79 | @bailey_simon_1960 introduced minimum bias methods, which were expanded
80 | throughout the 60s, 70s, and 80s. As computing power developed, minimum bias
81 | began to give away to GLMs, with papers such as @brown_1988 and @mildenhall_1999
82 | bridging the gap between the methods.
83 |
84 | Arguably, GLMs predate minimum bias procedures by a significant margin. The term
85 | was coined by @nelder_wedderburn_1972, but generalizations of least squares
86 | linear regression date back at least to the 1930s. Like minimum bias methods,
87 | GLMs did not become mainstream in actuarial science for some time. For example,
88 | the syllabus of basic education of the Casualty Actuarial Society (CAS) does not
89 | seem to include any mention of GLMs
90 | prior to @brown_1988 in the 1990 syllabus. From there, GLMs
91 | seem to have received only passing mention until 2006 with the introduction of
92 | @anderson_2005 to the syllabus. Beginning in 2016, the CAS introduced @goldburd_2016
93 | to the syllabus, which offers a comprehensive guide to GLMs.
94 |
95 | ## Machine Learning in Ratemaking
96 |
97 | Paralelling the development of GLM was the development of machine learning
98 | algorithms throughout the middle part of the 20th century. Detailed histories
99 | of machine learning may be found in sources such as @nilsson_2009 and
100 | @wang_raj_2017. Consistent with GLMs, machine learning was relatively unpopular
101 | in actuarial science until the last ten years as computing power has become cheaper
102 | and more easily available and as machine learning software packages have obviated
103 | the need for developing analyses from scratch each time an analysis is performed.
104 | Due to the breadth of machine learning as a field, it is difficult to identify
105 | the first time it entered the CAS syllabus; however, cluster analysis (in the
106 | form of k-means) seems to have been first included in 2011 with @robertson_2009.
107 | More recently, the CAS MAS-I and MAS-II exams introduced in 2018 have included
108 | machine learning explicitly.
109 |
110 | Within the area of ratemaking, machine learning is still in its infancy. A
111 | significant portion of machine learning applications to ratemaking has been in
112 | the context of automobile telematics, such as @gao_2018, @gao_2018_2, @gao_2019,
113 | @roel_2018, or @wuthrich_2017. Presumably this focus has been a result of the
114 | high-dimensionality and complexity of telematics data, making it a field in
115 | which the unique abilities of machine learning techniques give a clear
116 | advantage over traditional approaches.
117 |
118 | Outside of telematics, @yang_2018 use a gradient tree-boosting approach to
119 | capture non-linearities that would be a challenge for GLMs. @henckaerts_2018
120 | make use of generalized additive models (GAM) to improve predictions of GLMs.
121 | Many researchers, in an apparent effort to demonstrate the range of possibilities
122 | and advantages of machine learning, have approached the topic by comparing many
123 | different machine learning algorithms within a single study, such as in
124 | @dugas_2003, @noll_2018, @spedicato_2018. These studies make use of such varied
125 | techniques as regression trees, boosting machines, support vector machines, and
126 | neural networks.
127 |
128 | ## Ratemaking Process
129 |
130 | Regardless of the method employed for determining this risk of various
131 | classifications, the actual process of setting rate relativities typically
132 | involves some variation of the following steps:
133 |
134 | 1. Obtain relevant policy-level data
135 | 2. Prepare data for analysis
136 | 3. Perform analysis on the data, employing desired method(s) to estimate
137 | needed rates
138 | 4. Select final rates based on rate indications
139 | 5. Present rates to the reglator, including explanation of the steps followed to
140 | derive the rates
141 | 6. Answer questions from regulators regarding the method employed
142 |
143 | The focus of this paper is on steps 5 and 6.
144 | In many states, rate filings that exceed certain thresholds for
145 | magnitude of rate changes or filings that make use of new or sophisticated
146 | predictive models may be subject to particular regulatory scrutiny. In these cases,
147 | it is necessary to be able to explain the results of the modeling process in a
148 | way that is understandable without sacrificing statistical rigor.
149 |
150 | It should be noted that communicating results is not simply a method of passing
151 | regulatory muster. Generating interpretable modeling output is an important -
152 | even essential - facet of model checking. Actuaries are bound by relevant standards
153 | to be able to exercise appropriate judgment in selecting risk characteristics as
154 | part of a risk classification system per Actuarial Standard of Practice 12
155 | ("Risk Classification"). Therefore, the techniques discussed
156 | in this paper may be viewed from the lens of providing useful information to
157 | regulators, but they should also be considered as part of a thorough vetting of
158 | any rating model.
159 |
160 | Although the focus of this paper is on communication to regulators, it should be
161 | said that selecting final rates based on indications (step 4 in the list above) may
162 | pose a unique challenge for black-box models. This, too, provides strong motivation
163 | for techniques that could add to the modeler's - or any stakeholder's -
164 | understanding of the model, such as the relative importance of variables or the
165 | shapes of response curves. Such techniques could be usefully employed in making
166 | decisions about how best to select rates.
167 |
168 | Similarly, although the focus of this paper is on communication in a pricing
169 | context, the techniques explored in this paper (and many of the concerns discussed)
170 | may also be relevant to other contexts, such as claim-level reserving or analytics,
171 | or other applications of machine learning to the insurance industry.
172 |
173 | # The Need to See Inside the Black Box {#need}
174 |
175 | Within the actuarial profession, Actuarial Standard of Practice 41 ("Actuarial
176 | Communications") notes that "...another actuary qualified in the same practice
177 | area [should be able to] make an objective appraisal of the reasonableness of
178 | the actuary's work as presented in the actuarial report." [@asop_41] Underlying
179 | this requirement is an assumption that the hypothetical other actuary qualified
180 | in the same practice area is adequately familiar with the relevant techniques
181 | employed. Although the syllabus of basic education is constantly changing, there
182 | has at times been an assumption that all techniques and assumptions that have
183 | ever been a part of the syllabus of basic education needn't be explained from
184 | first principles in general actuarial communications, and that an actuary
185 | practicing in the same field should be able to make an objective appraisal of
186 | the results from the methods found in the syllabus.
187 | This is notable because, beginning with the introduction
188 | of the CAS MAS-I and MAS-II examination in July of 2018, several machine learning
189 | models were formally included in the syllabus of basic education. These exams
190 | cover a wide range of topics, such as splines, clustering algorithms, decision
191 | trees, boosting, and principle components analysis [@cas_syllabus_2018].
192 |
193 | Nevertheless, machine learning poses something of a special challenge for ASOP
194 | 41 for several reasons:
195 |
196 | 1. Machine learning models can be very ad hoc compared to
197 | traditional statistical models.
198 | 2. Because many machine learning models do not assume an underlying probability
199 | distribution or stochastic process, they may not admit of standard metrics for model comparison
200 | (e.g., it's not straightforward to calculate an AIC over a neural network).
201 | 3. Machine learning methods are often combined into ensembles that
202 | may not be easily separated and that may, as a collection, cease to resemble
203 | a single standard version of a model.
204 | 4. Machine learning models can be "black boxes" insofar as the final form of
205 | response curve cannot be easily predicted and may depend heavily on the
206 | available data (which may not, in turn, be available to the reviewer).
207 |
208 | This last item raises a final interesting issue. GLMs and their ilk are often
209 | fitted using one of a handful of standard and well-understood approaches (e.g.,
210 | maximum likelihood estimation). However, this is not possible in general with
211 | machine learning models, as machine learning algorithms often use loss surfaces
212 | that are very complex such that it may not be feasible to calculate the global
213 | minimum of the surface. Certainly, closed form representations of the loss
214 | surfaces are not generally available. For this reason, the training phase of a
215 | machine learning model is, in many ways, just as important to one's
216 | understanding as the model form and the data on which the model is fitted.
217 | Because the final model result is inseparable from these three components
218 | (training method, model form, and data), it is not generally adequate to just
219 | know the method employed to make an objective appraisal of the reasonableness
220 | of the result.
221 |
222 | These issues also pose particular challenges with respect to other standards.
223 | For instance, as discussed previously, ASOP 12 requires
224 | actuaries to be able to exercise appropriate judgment about risk classification
225 | systems. The recent ASOP 56 ("Models") speaks to more general concerns in all
226 | practice areas that might make use of models. ASOP 56 requires the actuary to
227 | "make reasonable efforts to confirm that the model structure, data, assumptions,
228 | governance and controls, and model testing and output validation are consistent
229 | with the intended purpose." [@asop_56] All of these efforts may be hampered if
230 | it is not possible to peer into the black box of the model.
231 |
232 | It should also be noted that these comments only apply within the actuarial profession. Outside of
233 | the actuarial profession, communication of results may be more challenging. A
234 | 2017 survey conducted by the Casualty Actuarial and Statistical Task Force of
235 | the National Association of Insurance Commissioners (NAIC) found that the plurality of
236 | responding regulators identified "Filing complexity and/or a lack of resources
237 | or expertise" as a key challenge that impedes their ability to review GLMs or
238 | other predictive models [@naic_summer_2017]. Given that machine learning
239 | algorithms are generally regarded as more complex than GLMs, this implies that
240 | the challenge of communicating machine learning model results is significant.
241 |
242 | In response to the same survey, 33 state regulators noted that it would be
243 | helpful or very helpful for the NAIC to develop information and tools to assist
244 | in reviewing rate filings based on GLMs, and 34 noted that it would be helpful
245 | to develop similar items to assist in reviewing "Other Advanced Modeling
246 | Techniques." One outgrowth of this need was the development of a white paper,
247 | @naic_white_paper, on best practices for regulatory review of predictive models.
248 | The white paper focuses on review of GLMs, particularly with respect to private
249 | passenger automobile and homeowners' insurance. Some of the guidance offered in
250 | this regard is therefore not strictly applicable to the review of machine
251 | learning models. For example, as previously noted, p-values are not a concept
252 | that translates well to deterministic machine learning algorithms. However,
253 | among the guidance applicable to machine learning algorithms are the following:
254 |
255 | * Determine the extent to which the model causes premium disruption for individual
256 | policyholders and how the insurer will
257 | explain the disruption to individual consumers that inquire about it.
258 | * Determine that individual input characteristics to a predictive model are
259 | related to the expected loss or expense
260 | differences in risk. Each input characteristic should have an intuitive or
261 | demonstrable actual relationship to expected loss or expense.
262 | * Determine that individual outputs from a predictive model and their associated
263 | selected relativities are not unfairly discriminatory.
264 |
265 | The last of these items is an entire topic unto itself. The methods and
266 | concepts introduced in this paper are useful for exploring the question of whether
267 | rates are appropriately related to risk of loss as defined by the variables used
268 | in the model, but there are many other aspects of discrimination-free that are
269 | outside the scope of this paper. The methods in this paper may help in understanding
270 | the model, which is a necessary precursor to addressing the question of unfair
271 | discrimination.
272 |
273 | The items in this list are by no means exhaustive, but they pertain to the concept of model
274 | interpretability for ratemaking that we develop next.
275 |
276 | # Interpretability in the Ratemaking Context {#interpretability}
277 |
278 | In this section, we attempt to develop a working definition of interpretability
279 | for ratemaking applications. While we will not provide a comprehensive survey of
280 | the prolific and fast evolving ML interpretability literature, we draw from it
281 | as appropriate in setting the stage for our discussion. Even among researchers
282 | in the subject, there is not a consensus on the definition of interpretability;
283 | here are a few from frequently cited papers:
284 |
285 | - Ability to explain or to present in understandable terms to a human
286 | [@doshi-velezRigorousScience2017];
287 | - The degree to which an observer can understand the cause of a decision
288 | [@biranExplanationJustification2017]; and
289 | - A method is interpretable if a user can correctly and efficiently predict the
290 | method’s results [@kimExamplesAre2016].
291 |
292 | We motivate our discussion by considering several aspects of interpretability.
293 | As we proceed through the points below, we aim to arrive at a more scoped and
294 | relevant definition of what it means for a pricing
295 | model to be interpretable. In the remainder of this section, we clarify a couple concepts regarding
296 | interpretable *classes* of models and the computational transparency of ML
297 | models, outline frameworks for understanding the communication goals of interpretability,
298 | then discuss a potential framework for implementing ML
299 | interpretability in practice.
300 |
301 | ## Not All Linear Models are Interpretable {#linear-models}
302 |
303 | In the actuarial science literature, the GLM is probably the most oft-cited
304 | example of an easily interpretable model. Given a set of inputs, we can easily
305 | reason about what the output of the model is. As an illustrative example,
306 | consider a claim severity model with driver age, sex, and vehicle age as
307 | predictors; assuming a log link function and letting $Y$ denote the response, we
308 | have
309 |
310 | \begin{align}
311 | & \log(E[Y]) = \beta_0 + \beta_1 \cdot \text{age} \nonumber \\
312 | & \quad + \beta_2 \cdot \text{vehicle\_age} + \beta_3 \cdot \text{sex}_{\text{male}}.
313 | \end{align}
314 |
315 | Here, we can tell, for example, what the model would predict for the expected
316 | severity if we were to increase age by a certain amount, *all else being equal*,
317 | because the relationship between the predictor and the response is simply
318 | multiplication by the coefficient $\beta_1$ and applying the inverse link
319 | function.
320 |
321 | Another commonly cited example of an interpretable model is a decision tree. An
322 | illustrative example is shown in Figure \@ref(fig:tree-plot1). Here, the
323 | prediction is arrived at by following a sequence of if-else decisions.
324 |
325 | ```{r tree-plot1, echo = FALSE, fig.cap="A simple decision tree for loss cost prediction.", out.width="100%", fig.align='center'}
326 | knitr::include_graphics("figures/tree-plot1.png")
327 | ```
328 |
329 | Now, it is worth pointing out that, when declaring that GLMs or decision trees
330 | are interpretable models, we are implicitly assuming that we are considering
331 | only a handful of predictors. In fact, the ease with which we can reason about a
332 | model declines as the number of predictors, transformations of them, and
333 | interactions increase, as in the following (somewhat pathological) example:
334 |
335 | \begin{align}\label{eq:badglm}
336 | & \log(E[Y]) = \beta_0 + \beta_1 \cdot \text{age} + \beta_2 \cdot \text{vehicle\_age} \nonumber\\
337 | & \quad + \beta_3\cdot\text{vehicle\_age}^2 \nonumber \\
338 | & \quad + \beta_4 \cdot \text{age} \cdot \text{vehicle\_age} \nonumber \\
339 | & \quad + \beta_5 \cdot \text{sex}_{\text{male}} + \beta_6 \cdot \text{sex}_{\text{male}} \cdot \text{age}.
340 | \end{align}
341 |
342 | Similarly, one can see that in Figure \@ref(fig:tree-plot2), larger trees are
343 | tough to reason about. In other words, even when working within the framework
344 | of an "interpretable" class of models, we may still end up with something that
345 | many would consider "black box."
346 |
347 | ```{r tree-plot2, echo = FALSE, fig.cap="A more complex decision tree. This is still much simpler than typical realistic examples.", out.width="60%", fig.align='center', fig.env='figure*'}
348 | knitr::include_graphics("figures/tree-plot2.png")
349 | ```
350 |
351 | ## The Machinery is Not a Secret
352 |
353 | Another occasional misconception is that we have no visibility into how some ML
354 | models *compute* predictions, which renders them uninterpretable. Outside of
355 | proprietary algorithms, all common ML models, including neural networks,
356 | gradient boosted trees, and random forests, are well studied and have large
357 | bodies of literature documenting their inner workings. As an example, a fitted
358 | feedforward neural network is simply a composition of linear transformations
359 | followed by nonlinear activation functions. As in Equation \ref{eq:badglm}, one
360 | can write down the mathematical equation for calculating the prediction given
361 | some inputs, but it may be difficult for a human to reason about it. We show
362 | later that we can still provide explanations of completely "black box" models,
363 | but is important to note that ML model predictions are still governed by
364 | mathematical rules, and are deterministic in most cases.
365 |
366 | ## Explanations are Contextual
367 |
368 | @hiltonConversationalProcesses1990 proposed a framework, later interpreted by
369 | @millerExplanationArtificial2017 in the context of ML, for understanding model explanations
370 | as *conversations* or *social interactions.* One consequence of this identification
371 | is that explanations need to be *relevant* to the *audience*. This framework is
372 | consistent with ASOP 41, which formulates a similar requirement in terms of an
373 | *intended user* of the actuarial communication. In developing,
374 | filing, and operationalizing a pricing model, one needs to accommodate a variety
375 | of stakeholders, each of whom has a different set of questions, assumptions,
376 | and technical capacity. First, there are internal stakeholders at the company,
377 | which includes management and underwriters. While some of the individuals in
378 | this audience may be technical, they are likely less familiar with predictive
379 | modeling techniques than the actuaries and data scientists who build the models.
380 | Next, we have the regulators, who may have limited resources to review the
381 | models, and will focus on a specific list of questions motivated by statute
382 | and public policy. Finally, we have potential policyholders, who have an interest (perhaps
383 | more so than the other parties) as they are responsible for paying the premiums.
384 |
385 | It is interesting to note that the modelers, who are most familiar with the
386 | models, tend to be same people designing and communicating the explanations.
387 | This poses a challenge that @millerExplainableAI2017 call "inmates running
388 | the asylum", where the modelers design explanations for *themselves* rather
389 | than the intended audience. For example, they may be interested in technical
390 | questions, such as extrapolation behavior, and shape the explanations accordingly,
391 | which may be irrelevant to a prospective policyholder.
392 |
393 | Another point outlined in Miller's survey [@millerExplanationArtificial2017] is
394 | that explanations are *contrastive*. In other words, people are often interested
395 | in not why something happened, but rather why it happened instead of something
396 | else. For example, policyholders might not care exactly how their auto premiums
397 | are computed, but would like to know why they are being charged more than their
398 | coworkers who drive similar vehicles. As an extension, policyholders may want
399 | to know what they can change in order to obtain lower premiums.
400 |
401 | ## Asking and Answering the Right Questions
402 |
403 | With the above considerations in mind, we propose a potential framework for
404 | interpreting ML models for insurance pricing: the actuarial profession, in
405 | collaboration with regulators and representatives of the public, define a set
406 | of questions to be answered by explanations accompanying ML models, along
407 | with acceptance criteria and examples of successful explanations. In other
408 | words, interpretability for our purposes is defined as the ability of a model's
409 | explanations to answer the posed questions.
410 |
411 | It should be noted that no ideal set of questions exists that would encompass
412 | all potential models. Rather, the actuary must consider what aspects of the
413 | model would raise questions from the perspective of the model's intended users.
414 | We propose that relevant stakeholders, by providing example questions and
415 | answers, would inherently provide guidance by which actuaries can reasonably
416 | anticipate the kinds of specific questions most important to those stakeholders
417 | and address them proactively.
418 |
419 | These questions should relate to existing guidelines, such as those described in
420 | @naic_white_paper and outlined in Section \ref{need}, standards of practice, and
421 | regulation, and in fact should not be specific only to ML models. By
422 | conceptualizing a set of questions, we reduce the burden of both companies and
423 | regulators; this is especially important for the latter, who are already
424 | resource constrained facing increasing variety of models being filed. This
425 | format should also be familiar to actuaries who are accustomed to adhering to
426 | specific guidelines in, for example, ASOPs. Like the ASOPs, We envision that
427 | these questions and guidelines will be continually updated to reflect feedback
428 | obtained and advances in research.
429 |
430 | While the realization of a set of such guidelines is an ambitious undertaking
431 | beyond the scope of this paper, we present in the next section a sample set of
432 | questions and techniques one can leverage to answer them. The goal of these case
433 | studies is twofold: to more concretely illustrate the proposed framework, and
434 | to expose the actuarial audience to modern ML interpretation techniques.
435 |
436 | # Applying Model Interpretation Techniques {#application}
437 |
438 | Now that we have established a framework for model interpretation in the form of
439 | asking and answering relevant questions, we demonstrate examples of such
440 | exchanges via an illustrative case study. Analytically, our starting point is a
441 | fitted deep neural network model for predicting loss costs. As the modeling
442 | details are of secondary importance, they are available in Appendix
443 | \ref{model-dev}. The questions that we ask of the model are as follows:
444 |
445 | 1. What are the most important predictors in the model? Put another way, to
446 | what extent do the predictors improve the accuracy of the model?
447 | 2. How does the predicted loss cost change, on average, as we change an input?
448 | 3. For a particular policyholder, how does each characteristic contribute to
449 | the loss cost prediction?
450 |
451 | The techniques we utilize to answer these questions are permutation variable
452 | importance, partial dependence plots, and additive variable attributions,
453 | respectively.
454 | In our discussion, we adopt the organization of techniques and some notation
455 | presented in @molnar2018interpretable and @biecekPMVEE, which are comprehensive
456 | references on the most established ML interpretation techniques.
457 |
458 | ## A Simplified View of Interpretation Techniques
459 |
460 | Before we dive into the answering questions, we present a brief taxonomy of ML
461 | interpretation techniques. Rather than attempting an exhaustive classification,
462 | the goal is to orient ourselves among broad categories of techniques, so we can
463 | map them to tasks indicated by the questions being asked. For our purposes,
464 | model interpretation techniques can be categorized across two dimensions:
465 | intrinsic vs. post-hoc and global vs. local.
466 |
467 | ### Intrinsic vs. Post-hoc
468 |
469 | Intrinsic model interpretation draws conclusions from the structure of the
470 | fitted model and are what we typically associate with "interpretable" classes
471 | of models. This is only viable with models with simple structures, such as the
472 | sparse linear model and shallow decision tree we see in Section
473 | \ref{linear-models}, where we arrive at explanations by reading off parameter
474 | estimates or a few decision rules. For algorithms that produce models
475 | with complex structure that do not lend themselves easily to intrinsic
476 | exploration, we can appeal to post-hoc techniques. This class of techniques
477 | interrogate the model by presenting it with data for scoring and observing the
478 | prediction behavior of the model. These techniques are concerned with only the
479 | inputs and outputs, and hence are *agnostic* of the model itself, which means
480 | they can also be applied to simple models. Since most useful ML models have a
481 | level of complexity beyond the threshold of intrinsic interpretability, we focus
482 | on model-agnostic techniques in our case study. As we will see later on, the
483 | data that we present to the models are usually some perturbed variations of test
484 | data.
485 |
486 | ### Global vs. Local
487 |
488 | Along the other dimension, we categorize model interpretations as global, or
489 | model-level, and local, or instance-level. The former class provides insights
490 | with respect to the model as a whole. Some examples of these eplanations include
491 | variable importances and sensitivities, on average, of the predicted response
492 | with respect to individual predictors. Note that these methods may be compared to
493 | the methods described in [@goldburd_2016], Chapter 7, which focus on global
494 | interpretation of GLMs.
495 |
496 | In our case study, questions 1 and 2 are
497 | associated with global interpretations. On the other hand, question 3 pertains to an
498 | individual prediction, which would fall in the local, or instance-level,
499 | category. In addition to individual variable attribution, we can also inquire
500 | about what would happen to the current predicted response if we were to perturb
501 | specific predictor variables.
502 |
503 | ## Answering the Questions
504 |
505 | Having aligned the questions with the categories of interpretation techniques,
506 | we now introduce a selection of appropriate techniques to answer them.
507 |
508 | ### Variable Importance
509 |
510 | "What are the most important predictors in the model?"
511 |
512 | For linear models and their generalizations, and some ML models, measures of
513 | variable importance can be obtained from the fitted model structure. In the
514 | case of GLMs, one might observe the magnitudes of the estimated coefficients or
515 | $t$-statistics, whereas
516 | for random forests, one might use out-of-bag errors [@breimanRandomForests2001].
517 | For more complex models, such as the neural network in our case study, we need
518 | to devise another approach.
519 |
520 | We follow the methodology of permutation feature importance as described in
521 | @fisherAllModels2018, and utilize the notation introduced by @biecekPMVEE.
522 | The gist of the technique is as follows: to see how important a variable is,
523 | we make predictions without it and see how much worse off we are in terms of
524 | accuracy. One way to achieve this would be to re-fit the model many times (as
525 | many times as the number of variables.) However, this may be intractable with
526 | lengthy model training times or large numbers of variables, so a more popular
527 | approach is to instead keep the same fitted model but permute the values of each
528 | predictor.
529 |
530 | More formally, let $y$ denote the vector of responses, $X$ denote the matrix of
531 | predictor variables, $\widehat{f}$ denote the fitted model, and
532 | $L = \mathcal{L}(\widehat{f}(X), y)$, where $\widehat{f}$ applies to $X$
533 | rowwise, denote the value of the loss function, which is mean squared error
534 | in the case of regression. Now, if $\widetilde{X}^{j}$ denotes the predictor
535 | matrix where the $j$th variable has been permuted, then we can compute the
536 | loss with the permuted dataset as
537 | $L^{-j} = \mathcal{L}(\widehat{f}(\widetilde{X}^{j}), y)$.
538 | Here, by *permuting* a variable, we mean that we randomly rearrange the values
539 | in the column of data associated with the variable.
540 | With this, we define
541 | the variable importance $VI^{j}$ as $L^{-j} - L$.
542 |
543 | In Figure \@ref(fig:fi), we show a plot of variable importances. In our
544 | particular example, we see that the "make" variable contributes most to the
545 | accuracy of the model with "sex" contributing the least. This provides a way
546 | for the audience to quickly glance at the most relevant variables, and ask
547 | further question as necessary.
548 |
549 | Note that these measures do not provide information regarding the directional
550 | sensitivity of the predictors on the response. Also, when there are correlated
551 | variables, one should be careful about interpretation, as the result may be
552 | biased by unrealistic records in the permuted dataset. Another ramification
553 | of a group of correlated variables is that their inclusion may cause each to
554 | appear less important than if only one is included in the model.
555 |
556 | ```{r fi, out.width='100%', fig.align = "center", fig.cap="Permutation feature importances for the neural network model.", echo = FALSE}
557 | knitr::include_graphics("figures/fi-plot.png")
558 | ```
559 |
560 | ### Partial Dependence Plots
561 |
562 | "How does the predicted loss cost change, on average, as we change an input?"
563 |
564 | For this question, we again consider first how it would be answered in the GLM
565 | setting. When the input predictor in question is continuous, we can answer the
566 | question by looking at the estimated coefficient, which provides the change in
567 | the response per unit change in the predictor (on the scale of the linear
568 | predictor). For non-parametric models and neural networks, where no coefficients
569 | are available, we can appeal to partial dependence plots (PDP), first proposed
570 | by @friedmanGreedyFunction2001 for gradient boosting machines (GBM).
571 |
572 | ```{r pdp, out.width='70%', fig.align = "center", fig.cap="Partial dependence plot for the neural network model.", echo = FALSE, fig.env='figure*', fig.pos='h'}
573 | knitr::include_graphics("figures/pdp-plot.png")
574 | ```
575 |
576 | ```{r breakdown, out.width='80%', fig.align = "center", fig.cap="Variable contribution plot for the neural network model.", echo = FALSE, fig.env = 'figure*', fig.pos = 'h'}
577 | knitr::include_graphics("figures/breakdown-plot.png")
578 | ```
579 |
580 | To describe PDP, we need to introduce some additional notation. Let $x^j$ denote
581 | the input variable of interest. Then we define the partial dependence function as
582 |
583 | \begin{equation}
584 | h(z) = E_{X^{-j}}[\widehat{f}(x|x^j = z)],
585 | \end{equation}
586 |
587 | where the expectation is taken over the distribution of the other predictor
588 | variables. In other words, we marginalize them out so we can focus on the
589 | relationship between the predicted response and the variable of interest.
590 | Empirically, we estimate $h$ by
591 |
592 | \begin{equation}
593 | \widehat{h}(z) = \frac{1}{N}\sum_{i = 1}^{N}\widehat{f}(x_i|x_i^{j} = z),
594 | \end{equation}
595 |
596 | where $N$ is the number of records in the dataset.
597 |
598 | In Figure \@ref(fig:pdp), we exhibit the PDP for the "vehicle age" variable.
599 | We see that the average predicted loss cost decreases with vehicle age until
600 | the latter is around 18. Note that the accompanying histogram shows that the
601 | data is quite thin for vehicle age greater than 18, so the apparent upward
602 | trend to the right is driven by just a few data points.
603 |
604 | This information allows the modeler and stakeholders to consider whether it
605 | is reasonable for the anticipated loss cost to follow this shape.
606 |
607 | The question posed here is particularly important for regulators, who would
608 | like to know whether each variable affects the prediction in the direction that
609 | is expected, based on intuition, experience, and existing models. During the
610 | model development stage, PDP can also be used as a reasonableness test for
611 | candidate models by identifying unexpected relationships for the analyst to
612 | investigate.
613 |
614 | As with permutation feature importance, one should be careful when interpreting
615 | PDP when there are strongly correlated variables. Since we average over the
616 | marginal distribution of the rest of the variables, we may take into account
617 | unrealistic data (e.g. high vehicle age for a model that is brand new). To
618 | address this drawback, alternative visualization techniques have been proposed,
619 | such as accumulated local effect (ALE) plots, which take expectations over
620 | conditional, rather than marginal, distributions [@apley2016visualizing].
621 |
622 | ### Variable Attribution
623 |
624 | "For a particular policyholder, how does each characteristic contribute to the
625 | loss cost prediction?"
626 |
627 | In the previous two examples, we look at model-level explanations; now we
628 | move on to one where we investigate one particular prediction instance. As
629 | before, we consider how we would approach the question for linear models. For
630 | a GLM with a log link common in ratemaking applications, for example, we start
631 | with the base rate, then the exponentiated coefficients would have
632 | multiplicative effects on the final rate. Similar to the previous examples,
633 | for ML models in general we do not have directly interpretable weights.
634 | Instead, one way to arrive at variable contributions is calculating the change
635 | in the expected model prediction for each predictor, conditioned on other
636 | predictors.
637 |
638 |
639 | Formally, for a fitted model $\widehat{f}$, a given ordering of the variables
640 | $X^1,\dots,X^p$, where $p$ is the number of predictor variables,
641 | and a specific instance $x_*$, we
642 | would like to decompose the model prediction $\widehat{f}(x_*)$ into
643 |
644 | \begin{equation}\label{eq:contrib-decompose}
645 | \widehat{f}(x_* ) = v_0 + \sum_{j=1}^p v(j, x_*),
646 | \end{equation}
647 |
648 | where $v_0$ denotes the average model response, and $v(j, x_*)$ denotes the
649 | contribution of the $j$th variable in instance $x_*$, defined as
650 |
651 | \begin{align}\label{eq:var-contrib}
652 | & v(j, x_* ) = E_X[\widehat{f}(X) | X^1 = x_* ^1, \dots, X^j = x_* ^j] \nonumber\\
653 | & \quad - E_X[\widehat{f}(X) | X^1 = x_* ^1, \dots, X^{j-1} = x_*^{j - 1})].
654 | \end{align}
655 |
656 | Hence, the contribution of the $j$th variable to the prediction is the
657 | incremental change in the expected model prediction when we set $X^j = x_*^ j$
658 | assuming the other variables take their values in $x_*$. Note here that this
659 | definition implies that the order in which we consider the variables affects
660 | the results. Empirically, the expectations in \@ref(eq:var-contrib) are
661 | calculated by sampling the test dataset.
662 |
663 | In Figure \@ref(fig:breakdown), we exhibit a waterfall plot of variable
664 | contributions. The "intercept" value denotes the average model prediction and
665 | represents the $v_0$ term in Equation \@ref(eq:contrib-decompose). The predicted
666 | loss cost for this particular policyholder is slightly less than average; the
667 | characteristics that makes this policyholder more risky are the fact that he is
668 | a male between the ages of 18 and 25; counteracting the risky driver
669 | characteristics are the vehicle properties: it is a GM vehicle built
670 | domestically and is seven years old.
671 |
672 | Instance-level explanations are useful for investigating specific problematic
673 | predictions generated by the model. Regulators and model reviewers may be
674 | interested in variable contributions for the safest and riskiest policyholders
675 | to see if they conform to intuition. A policyholder with a particularly high
676 | premium may wish to find out what of their characteristics contribute to it,
677 | and may follow up with a question about how he can lower it, which would
678 | require another type of explanation.
679 |
680 | As noted earlier, the ordering of variables has an impact on the contributions
681 | calculated, especially for models that are non-additive, which could cause
682 | inconsistent explanations. There are several approaches to ameliorate this
683 | phenomenon, including selecting variables with the largest contributions first,
684 | including interactions terms, and averaging over possible orderings. The last
685 | of these ideas is implemented by @NIPS2017_7062 using Shapley values from
686 | cooperative game theory, and is referred to as Shapley additive explanations
687 | (SHAP). These approaches are discussed further in @biecekPMVEE and its references.
688 | Shapley values were also used in @mango_1998, which has appeared on the CAS syllabus
689 | starting in 2004, in the context of determining how to allocate catastrophe
690 | risk loads between multiple accounts.
691 |
692 | ## Other Techniques
693 |
694 | In this paper, we demonstrate just a few model-agnostic ML interpretation
695 | techniques. These represent a small subset of existing techniques, each of
696 | which have additional variations. In the remainder of this section, we point
697 | out a few common techniques not covered in our case study.
698 |
699 | Individual conditional expectation (ICE) plots disaggregate PDPs into their
700 | instance-level components for a more granular view into predictor sensitivities
701 | [@goldstein2015peeking]. To accommodate correlated variables in PDP, accumulated
702 | local effect (ALE) plots computes expected changes in model response over the
703 | conditional, rather than marginal, distribution of the other variables
704 | [@apley2016visualizing].
705 |
706 | Local interpretable model-agnostic explanations (LIME) [@ribeiro2016should]
707 | builds simple surrogate models using model predictions, with higher training
708 | weights given to the point of interest, in effect replacing the complex ML model
709 | with an easily interpretable linear regressions or decision trees in
710 | neighborhoods of specific points for the purpose of explanation. Taking the
711 | concept further, one can also train a global surrogate model across the entire
712 | domain of interest.
713 |
714 | # Conclusion {#conclusion}
715 |
716 | Actuarial standards of practice, most notably ASOP 41, places responsibility on
717 | the actuary to clearly communicate actuarial work products, including insurance
718 | pricing models. These responsibilities create special challenges for
719 | communicating machine learning models, which are often seen "black boxes" due
720 | in part to their complexity, nonlinearity, flexible construction, and ad hoc
721 | nature.
722 |
723 | In this paper, we discuss particular questions of model validation that are
724 | of key importance in communicating a model that may present particular
725 | difficulty for machine learning models compared to GLMs or traditional pricing
726 | models. Specifically,
727 |
728 | * How does the model impact individual insurance consumers?
729 | * How are the predictor variables related to expected losses?
730 |
731 | We contextualize these questions in terms of different frameworks for defining
732 | interpretability. We conceptualize interpretability in terms of the ability of
733 | a model (or modeler) to answer a set of idealized questions that would be
734 | refined. We then offer potential (families of) model-agnostic techniques for
735 | providing answers to these questions.
736 |
737 | Much work remains to be done in terms of defining the role of machine learning
738 | algorithms in actuarial practice. Lack of interpretability has been a key
739 | barrier preventing wider adoption and exploration of these techniques. The
740 | methods proposed in this paper could therefore represent important strides in
741 | unlocking the potential of machine learning within the insurance industry.
742 |
743 | Acknowledgments {.unnumbered}
744 | ==========
745 |
746 | We thank Navdeep Gill, Daniel Falbel, Morgan Bugbee, the volunteers of the CAS
747 | project oversight group, and two anonymous reviewers for helpful discussions
748 | and/or feedback.
749 | This work is supported by the Casualty Actuarial Society.
750 |
751 | References {#references .unnumbered}
752 | ==========
753 |
754 |
755 | \appendix
756 |
757 | Appendix {.unnumbered}
758 | =============
759 |
760 |
761 | ```{r variables, results='asis', echo = FALSE}
762 | library(magrittr)
763 | variables <- tibble::tribble(
764 | ~Variable, ~Type, ~Transformation,
765 | "Age range", "Categorical", "One-hot encode",
766 | "Sex", "Categorical", "One-hot encode",
767 | "Vehicle category", "Categorical", "One-hot encode",
768 | "Make", "Categorical", "Embed in $\\mathbb{R}^2$",
769 | "Vehicle age", "Numeric", "Center and scale",
770 | "Region", "Categorical", "Embed in $\\mathbb{R}^2$"
771 | )
772 | knitr::kable(
773 | variables,
774 | caption = "Input variables and their transformations.",
775 | booktabs = TRUE,
776 | table.env = "table",
777 | escape = FALSE,
778 | linesep = ""
779 | ) %>%
780 | kableExtra::kable_styling(latex_options = "hold_position")
781 | ```
782 |
783 |
784 | # Model Development {#model-dev}
785 |
786 | In this appendix, we describe the ML model and the data used to train it. Note
787 | that, for our paper, the ultimate goal of the modeling procedure is to develop
788 | something that can produce predictions. As a result, we do not follow standard
789 | practices for tuning and validation. However, for the sake of
790 | completeness and reproducibility, we include an overview of the process here.
791 | Implementation is done using the R [@rlang] interface to TensorFlow
792 | [@tensorflow2015-whitepaper]. The model explanation visualizations utilize the
793 | implemention by @biecekPMVEE, and the code to reproduce them are available on
794 | GitHub[^1].
795 |
796 | [^1]: https://github.com/kasaai/explain-ml-pricing
797 |
798 | ## Data
799 |
800 | We use data from the AUTOSEG ("Automobile Statistics System") of Brazil's
801 | Superintendence of Private Insurance (SUSEP). The organization maintains
802 | policy-characteristics-level data, including claim counts and amounts, for all
803 | insured automobiles in Brazil. The data contains variables from policyholder
804 | characteristics to losses by peril. We use the records from the first half of
805 | 2012, which contains 1,707,651 records. One-fifth of the data is reserved for
806 | testing; the remainder is further split into 3/4 of analysis and 1/4 into
807 | assessment for determining early stopping.
808 |
809 | ## Model
810 |
811 | Table \@ref(tab:variables) shows the input variables to our model and their
812 | associated transformations. For "make" and "region", we map each level to a
813 | point in $\mathbb{R}^2$ through embedding layers. The model predicts expected
814 | loss cost for all perils combined. The architecture is a feedforward
815 | neural network with two hidden layers with 64 units each. The activations for
816 | the hidden layers are ReLU while for the output layer it is softplus. Exposures
817 | for each record are used as sample weights during training. We fit the model
818 | via ADAM with an initial learning rate of 0.1, a mini-batch size of 10,000,
819 | and trigger early stopping when the mean squared error on the asssessment set
820 | does not improve for five epochs.
821 |
--------------------------------------------------------------------------------