├── .Rbuildignore ├── .gitignore ├── .travis.yml ├── CC-BY-NC-SA-4.0.txt ├── CONTRIBUTING.md ├── DESCRIPTION ├── LICENSE.txt ├── MIT-License.txt ├── R ├── coef-plot.R ├── create_text_classifier.R ├── effect-plot.R ├── get-SpamTube-dataset.R ├── get-bike-sharing-dataset.R ├── get-cervical-cancer-dataset.R ├── get-text-classifier.R ├── ggplot-theme.R ├── knitr_options.R ├── lime.R └── utils.R ├── README.md ├── _build_and_deploy.sh ├── cover ├── cover-amazon.xcf ├── cover-leanpub.xcf ├── cover-lulu.jpg ├── cover-lulu.png ├── cover-lulu.xcf ├── cover.png └── isbn_barcode.pdf ├── data ├── TubeSpam.csv ├── bike-sharing-daily.csv ├── bike.RData ├── bike.csv ├── cached-anchors-cervical-balanced.RDS ├── cached-anchors-cervical.RDS ├── cached-anchors-edge.RDS ├── cached-anchors.RDS ├── cached-sbrl-bike.RDS ├── cached-sbrl-cervical.RDS ├── cervical.RData ├── cervical.csv ├── influence-df.RData ├── risk_factors_cervical_cancer.csv ├── speed_dating_data.csv └── ycomments.RData ├── iml-book.Rproj ├── initialize-gh-pages.sh ├── manuscript ├── 00.0-preface.Rmd ├── 01-introduction.Rmd ├── 01.2-short-stories.Rmd ├── 01.3-ml-definitions.Rmd ├── 02-interpretability.Rmd ├── 03-datasets.Rmd ├── 04.1-interpretable-models.Rmd ├── 04.2-interpretable-linear.Rmd ├── 04.3-interpretable-logistic.Rmd ├── 04.4-interpretable-lm-extensions.Rmd ├── 04.5-interpretable-tree.Rmd ├── 04.6-interpretable-rules.Rmd ├── 04.7-interpretable-rulefit.Rmd ├── 04.8-interpretable-other.Rmd ├── 05.1-agnostic.Rmd ├── 05.2-agnostic-pdp.Rmd ├── 05.3-agnostic-ice.Rmd ├── 05.4-agnostic-ale.Rmd ├── 05.5-agnostic-interaction.Rmd ├── 05.6-agnostic-permfeatimp.Rmd ├── 05.7-agnostic-global-surrogate.Rmd ├── 05.8-agnostic-lime.Rmd ├── 05.8.1-agnostic-Anchors.Rmd ├── 05.9-agnostic-shapley.Rmd ├── 05.9b-agnostic-shap.Rmd ├── 06.0-example.Rmd ├── 06.1-example-based-counterfactual.Rmd ├── 06.2-example-based-adversarial.Rmd ├── 06.3-example-based-proto.Rmd ├── 06.4-example-based-archetypes.Rmd ├── 06.5-example-based-influence-fct.Rmd ├── 07.0-neuralnet.Rmd ├── 07.1-feature-visualization.Rmd ├── 07.2-concepts.Rmd ├── 07.3-feature-attribution.Rmd ├── 07.4-distillation.Rmd ├── 08-future.Rmd ├── 09-contribute.Rmd ├── 09b-translations.Rmd ├── 10-acknowledgements.Rmd ├── Book.txt ├── Makefile ├── Sample.txt ├── _bookdown.yml ├── _output.yml ├── css │ ├── cookieconsent.min.css │ └── style.css ├── html │ └── header.html ├── images │ ├── a484.png │ ├── access-denied.jpg │ ├── access-denied.xcf │ ├── activation-optim.png │ ├── adversarial-1pixel.png │ ├── adversarial-ostrich.jpg │ ├── adversarial-panda.jpg │ ├── adversarial-toaster.png │ ├── adversarial-turtle.png │ ├── agnostic.png │ ├── amazon-freq-bought-together.png │ ├── analyze.png │ ├── anchors-process.jpg │ ├── anchors-visualization.png │ ├── anchors.jpg │ ├── arch-compare.png │ ├── big-picture.png │ ├── big-picture.xcf │ ├── broden.png │ ├── burnt-earth.jpg │ ├── burnt-earth.xcf │ ├── by-nc-sa.png │ ├── cfexp-nsgaII.jpg │ ├── cnn features-1.xcf │ ├── cnn-features.png │ ├── cooks-analyzed-1.png │ ├── cover-amazon-bordered.jpg │ ├── detective.png │ ├── dissection-dog-exemplary.jpg │ ├── dissection-dogs.jpeg │ ├── dissection-network.png │ ├── doctor-840127_1280.xcf │ ├── dog_and_book.jpeg │ ├── doge-stuck.jpg │ ├── doge-stuck.xcf │ ├── enrollment.png │ ├── eureka.png │ ├── explain.png │ ├── feature-visualization-units.png │ ├── graph.jpg │ ├── hospital.png │ ├── ice-bike-derivative-1.png │ ├── ice-cervical-derivative-1.png │ ├── iml.png │ ├── inceptionv1.svg │ ├── influence-single-1.png │ ├── interaction-cervical-1.png │ ├── interaction2-cervical-age-1.png │ ├── learn-one-rule.png │ ├── learner.png │ ├── lime-images-package-example-1.png │ ├── lime-tabular-example-explain-plot-2-1.png │ ├── lime-text-explanations-1.png │ ├── machine-learning-xkcd.png │ ├── mri.png │ ├── pen.jpg │ ├── potato-chips.jpg │ ├── programing-ml.png │ ├── proto-critique2.jpg │ ├── rotation-dissect.png │ ├── rulefit.jpg │ ├── scientist.png │ ├── shap-clustering.png │ ├── shap-dependence-interaction.png │ ├── shap-dependence.png │ ├── shap-explain-1.png │ ├── shap-explain-2.png │ ├── shap-importance-extended.png │ ├── shap-importance.png │ ├── shap-simplified-features.jpg │ ├── shap-superpixel.jpg │ ├── shapley-bike-plot-1.png │ ├── shapley-cervical-plot-1.png │ ├── shapley-coalitions.png │ ├── shapley-instance-intervention.png │ ├── shapley-instance.png │ ├── spheres.jpg │ ├── strong.png │ ├── tcav.png │ ├── title_page.jpg │ ├── trippy.png │ ├── units.jpg │ └── wise.png ├── index.Rmd ├── javascript │ └── cookieconsent.min.js ├── krantz.cls └── xgboost.model ├── pkg └── sbrl_1.2.tar.gz ├── review.md └── scripts ├── dl-feature-attribution ├── activation-maximization.py ├── edge-detection.py ├── feature-attribution-dl.py ├── utils.py └── utils_imagenet.py ├── fix-leanpub.R ├── imagenet_classifier.R ├── lime.ipynb ├── mmd └── MMD-critic │ ├── .gitignore │ ├── Helper.py │ ├── LICENSE │ ├── README │ ├── classify.py │ ├── data.py │ ├── mmd.py │ └── run_digits.py ├── prepare_data.R ├── process-rmd-leanpub.R ├── references.R ├── setup_book_manjaro.R └── shap ├── .gitignore ├── requirements.txt └── shap-notebook.ipynb /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^packrat/ 4 | ^\.Rprofile$ 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # Example code in package build process 9 | *-Ex.R 10 | 11 | # Output files from R CMD build 12 | /*.tar.gz 13 | 14 | # Output files from R CMD check 15 | /*.Rcheck/ 16 | 17 | # RStudio files 18 | .Rproj.user/ 19 | 20 | # produced vignettes 21 | vignettes/*.html 22 | vignettes/*.pdf 23 | 24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 25 | .httr-oauth 26 | 27 | # knitr and R markdown default cache directories 28 | /*_cache/ 29 | /cache/ 30 | 31 | # Temporary files created by R markdown 32 | *.utf8.md 33 | *.knit.md 34 | .Rproj.user 35 | 36 | # The book files 37 | manuscript/_book/ 38 | manuscript/_bookdown_files/ 39 | # temporary ones 40 | manuscript/interpretable-ml* 41 | manuscript/images/*.pdf 42 | 43 | # Vim swap files 44 | *.swp 45 | 46 | # LaTeX files 47 | *.aux 48 | *.tex 49 | *.log 50 | 51 | xgboost.model 52 | .DS_Store 53 | 54 | # leanpub files 55 | manuscript/*.md 56 | manuscript/cache 57 | 58 | kindlegen 59 | manuscript/images/*.png 60 | 61 | # From SBRL package usage 62 | *.out 63 | *.label 64 | packrat/lib*/ 65 | 66 | # Gets build automatically, so ignore for repo 67 | manuscript/11-references.Rmd 68 | 69 | # From Deep Learning scripts 70 | scripts/dl-feature-attribution/dl/ 71 | scripts/dl-feature-attribution/*.png 72 | 73 | .ipynb_checkpoints 74 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r 2 | pandoc_version: 1.19.2.1 3 | sudo: required 4 | cache: 5 | packages: true 6 | directories: 7 | - manuscript/cache 8 | 9 | apt_pacakges: 10 | - r-cran-java 11 | - libgsl-dev 12 | - libproj-dev 13 | - libgdal-dev 14 | - open-cobol 15 | 16 | before_install: 17 | - sudo $(which R) CMD javareconf 18 | - Rscript -e 'if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager"); BiocManager::install("Biobase")}' 19 | 20 | 21 | before_script: 22 | - chmod +x ./_build_and_deploy.sh 23 | - git config --global user.email "christoph.molnar@gmail.com" 24 | - git config --global user.name "Christoph Molnar" 25 | script: ./_build_and_deploy.sh 26 | env: 27 | global: 28 | - secure: jmo/WiDp6XfW5zU7e9yZuktt74+5WlzqKtXv3Tq14LUdDNuQfopPAvobjh6/ZVIcxA+lsfGt6G2lDMsKBsKuVOK0rrk99WmAt0UWEvS16ftoldFs3U3xHZI9YXmd1iBT5h4b5GNL25woGHfxyHw87dQkacatAbXH7b8D24ALRNvx1UAGVW4Dz/D52xvgL6Lncu296SrGA9PqXmiHuq679p7j7V90+z5KK9XxI1PcYNRKhMdtQkswa4132GM42tBQ8OoU4v2aWBgBEtSO8J6KXO/B1j184c0aSU2qoooxFCN0ZeR+ECs418wDj7wj98VfQDFCUOCteVmLSvLHgct2t70F5bNPbn+7PD+cwz4HPEy7zJffHxjKepsiLZEWUkGoQ6cSYEUUOicAOL0lrQtLxA14Yz/Y569Iq6/TrxL+FDYXBmJkwZ5gBffEbmGfmubkUZLNBfh+n65VYl71tGWM9e8tYBPLq1G0c+2cbN4E9H/A3wo7sP4JjKHmUVvZjqAOfxuuFpXua6wPSa6AyuuvjKfDzJOYjro6CWO4KYjUZ6qZ9DEt+xVrv39KMUviVTZPnUzc8kmz0zFDFnvJeOo/AGZ8ackDK6C3UJmpCAUxkdqFywHcAdl5vhoF4o5kk5Ucsa9Yn+SfNAe3jOicEV1M5U8yZ4vr/xUJTS0E/opkQMc= 29 | 30 | 31 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Any contribution is very welcome. 2 | 3 | For example, if you find a typo, a better way to formulate a sentence or if you have stylistic improvements, the first step is to fork the repository. 4 | Then you can make some changes and create a pull request. 5 | 6 | If you are interested in a bigger contribution, like writing a chapter or providing examples: that's great! Please open an issue on Github and we can discuss your ideas. 7 | 8 | Rules for contributing text or code: 9 | 10 | - Make sure each text line only contains one sentence. Exception: for item lists it is one item (with possibly multiple sentences) per line. 11 | - Fork the repository and open a pull request (PR) for requesting to include the changes. You can use "[WIP]" in the title of the PR, to indicate that you are still working on it 12 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: iml.book 2 | Title: Interpretable machine learning 3 | Version: 0.0.1 4 | Depends: 5 | knitr 6 | Imports: 7 | bookdown, 8 | ggplot2, 9 | mlbench, 10 | dplyr, 11 | tidyr, 12 | data.table, 13 | mlr, 14 | rpart, 15 | partykit, 16 | randomForest, 17 | gridExtra, 18 | grid, 19 | jpeg, 20 | caret, 21 | e1071, 22 | tm, 23 | svglite, 24 | mmpf, 25 | numDeriv, 26 | DT, 27 | xgboost, 28 | lubridate, 29 | pre, 30 | shiny, 31 | roxygen2, 32 | memoise, 33 | gridExtra, 34 | OneR, 35 | RWeka, 36 | iml, 37 | jtools, 38 | mgcv, 39 | devtools, 40 | readr, 41 | Cairo, 42 | viridis, 43 | interactions, 44 | rjson, 45 | png, 46 | R.utils, 47 | yaImpute, 48 | arules 49 | Suggests: 50 | party 51 | Remotes: christophM/interpretable-ml-book 52 | 53 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The Interpretable Machine Learning book project (c) by Christoph Molnar use the following two licenses: 2 | 3 | * Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0) for the rendered book. The full terms are in the file CC-BY-NC-SA-4.0.txt in the root of this repository. 4 | * MIT License for the code that produces the book. The full terms are in file MIT-License.txt in the root folder of this repository. 5 | -------------------------------------------------------------------------------- /MIT-License.txt: -------------------------------------------------------------------------------- 1 | Copyright 2019 Christoph Molnar 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /R/coef-plot.R: -------------------------------------------------------------------------------- 1 | 2 | #' Plot coefficients of a linear model 3 | coef_plot = function(mod, alpha = 0.05, remove_intercept = TRUE){ 4 | lm_summary = summary(mod)$coefficients 5 | rownames(lm_summary) = pretty_rownames(rownames(lm_summary)) 6 | 7 | df = data.frame(Features = rownames(lm_summary), 8 | Estimate = lm_summary[,'Estimate'], 9 | std_error = lm_summary[,'Std. Error']) 10 | df$lower = df$Estimate - qnorm(alpha/2) * df$std_error 11 | df$upper = df$Estimate + qnorm(alpha/2) * df$std_error 12 | 13 | 14 | if(remove_intercept){ 15 | df = df[!(df$Features == '(Intercept)'),] 16 | } 17 | require("ggplot2") 18 | ggplot(df) + 19 | geom_vline(xintercept=0, linetype=4) + 20 | geom_point(aes(x=Estimate, y=Features)) + 21 | geom_segment(aes(y=Features, yend=Features, x=lower, xend=upper), arrow = arrow(angle=90, ends='both', length = unit(0.1, 'cm'))) + 22 | scale_x_continuous('Weight estimate') + 23 | my_theme() 24 | } 25 | -------------------------------------------------------------------------------- /R/create_text_classifier.R: -------------------------------------------------------------------------------- 1 | # Text data is not yet implemented in LIME 2 | # Doing it myself now 3 | 4 | 5 | #' Tokenize sentence into words 6 | #' 7 | #' @param x string with sentence 8 | #' @return list of words 9 | tokenize = function(x){ 10 | unlist(strsplit(x, "\\s+")) 11 | } 12 | 13 | 14 | #' Get a subset from a text 15 | #' 16 | #' @param words List of words 17 | #' @param prob Probability with which to keep a word 18 | #' @return List with two objects. First object is the new text. Second object is a vector 19 | #' of length number of words with 0s and 1s, indicating whether a word is in the new 20 | #' sentence (1) or not (0) 21 | draw_combination = function(words, prob=0.5){ 22 | # Create combination 23 | combi = rbinom(n = length(words), size = 1, prob = prob) 24 | names(combi) = words 25 | df = data.frame(t(combi)) 26 | # Create text 27 | new_text = paste(words[which(combi==1)], collapse = ' ') 28 | list(text = new_text, 29 | combi = df) 30 | } 31 | 32 | 33 | #'Create variations of a text 34 | #' 35 | #'@param text The text 36 | #'@param pred_fun The prediction function from the machine learning model. 37 | #' It should contain the complete pipeline: take the raw text, do all the pre-processing 38 | #' and do the prediction. Returned prediction should be a data.frame with one column per class 39 | #'@param prob Probability with which to keep a word 40 | #'@param n_variations Number of variations to create 41 | #'@param class The class for which to create the predictions 42 | #'@return data.frame for a local linear model, containing binary features for word occurence 43 | #'weights for distance to original sentence and the predictions for the chosen class. 44 | create_variations = function(text, pred_fun, prob=0.5, n_variations = 100, class){ 45 | tokenized = tokenize(text) 46 | df = data.frame(lapply(tokenized, function(x) 1)) 47 | names(df) = tokenized 48 | 49 | combinations = lapply(1:n_variations, function(x){ 50 | draw_combination(tokenized, prob=prob) 51 | }) 52 | 53 | texts = as.vector(sapply(combinations, function(x) x['text'])) 54 | 55 | features = data.frame(data.table::rbindlist(sapply(combinations, function(x) x['combi']))) 56 | weights = rowSums(features) / ncol(features) 57 | predictions = pred_fun(texts)[,class] 58 | 59 | cbind(features, pred=predictions, weights = weights) 60 | } 61 | 62 | 63 | #' Explain the classification of a text 64 | #' 65 | #'@param text The text for which to explain the classification 66 | #'@param pred_fun The prediction function from the machine learning model. 67 | #' It should contain the complete pipeline: take the raw text, do all the pre-processing 68 | #' and do the prediction. Returned prediction should be a data.frame with one column per class 69 | #'@param prob The probability to keep a word in the variations 70 | #'@param n_variations The number of text variations to create 71 | #'@param K The number of features to use for the explanation 72 | #'@param case The ID of the observation 73 | #'@param class The class for which to create the explanations 74 | explain_text = function(text, pred_fun, prob=0.9, n_variations=500, K = 3, case=1, class){ 75 | stopifnot(K >= 1) 76 | df = create_variations(text, pred_fun = predict_fun, prob = prob, n_variations = n_variations, class=class) 77 | mod = glm(pred ~ . - weights, data =df , weights=df$weights, family = 'binomial') 78 | 79 | coefs = coef(mod) 80 | coefs = coefs[names(coefs) != '(Intercept)'] 81 | coefs = coefs[base::order(abs(coefs), decreasing = TRUE)] 82 | names(coefs) = tokenize(text) 83 | coefs = coefs[1:K] 84 | # Create explanation compatible to R-LIME format 85 | tibble(case = case, 86 | label = class, 87 | label_prob = pred_fun(text)[, class], 88 | model_intercept = coef(mod)['(Intercept)'], 89 | feature = names(coefs), 90 | feature_value = names(coefs), 91 | feature_weight = coefs, 92 | feature_desc = names(coefs), 93 | data = text, 94 | prediction = list(pred_fun(text))) 95 | } 96 | -------------------------------------------------------------------------------- /R/effect-plot.R: -------------------------------------------------------------------------------- 1 | 2 | #' Plot effects of linear model 3 | effect_plot = function(mod, dat, feature_names=NULL){ 4 | X = get_effects(mod, dat) 5 | if(!missing(feature_names)){ 6 | rownames(X) = feature_names 7 | } 8 | X = tidyr::gather(X) 9 | require("ggplot2") 10 | ggplot(X) + 11 | geom_hline(yintercept=0, linetype=4) + 12 | geom_boxplot(aes(x=key, y=value, group=key)) + 13 | coord_flip() + 14 | scale_y_continuous('Feature effect') + 15 | my_theme() 16 | } 17 | 18 | get_reference_dataset = function(dat){ 19 | df = lapply(dat, function(feature){ 20 | if(class(feature) == 'factor'){ 21 | factor(levels(feature)[1], levels = levels(feature)) 22 | } else { 23 | 0 24 | } 25 | }) 26 | data.frame(df) 27 | } 28 | 29 | get_effects = function(mod, dat){ 30 | 31 | X = data.frame(predict(mod, type = 'terms')) 32 | 33 | # Nicer colnames 34 | colnames(X) = gsub('^X\\.', '', colnames(X)) 35 | colnames(X) = gsub('\\.', ' ', colnames(X)) 36 | 37 | # predict with type='terms' centers the results, so we have to add the mean again 38 | reference_X = predict(mod, newdata=get_reference_dataset(dat), type='terms') 39 | X_star = data.frame(t(apply(X, 1, function(x){ x - reference_X[1,names(X)]}))) 40 | X_star 41 | } 42 | -------------------------------------------------------------------------------- /R/get-SpamTube-dataset.R: -------------------------------------------------------------------------------- 1 | get.ycomments.data = function(data_dir){ 2 | ycomments.file = sprintf('%s/TubeSpam.csv', data_dir) 3 | if (!file.exists(ycomments.file)) { 4 | download.spam.data() 5 | } 6 | read.csv(ycomments.file, stringsAsFactors = FALSE) 7 | } 8 | 9 | 10 | 11 | # Download the youtube datasets 12 | download.spam.data = function(){ 13 | urls = sprintf('http://lasid.sor.ufscar.br/labeling/datasets/%i/download/', 9:13) 14 | ycomments = lapply(urls, read.csv, stringsAsFactors=FALSE) 15 | ycomments = do.call('rbind', ycomments) 16 | cleanFun <- function(htmlString) { 17 | return(gsub("<.*?>", "", htmlString)) 18 | } 19 | ycomments$CONTENT = cleanFun(ycomments$CONTENT) 20 | # Convert to ASCII 21 | ycomments$CONTENT = iconv(ycomments$CONTENT, "UTF-8", "ASCII", sub="") 22 | write.csv( x = ycomments, file = sprintf('%s/TubeSpam.csv', data_dir),row.names=FALSE) 23 | } 24 | -------------------------------------------------------------------------------- /R/get-bike-sharing-dataset.R: -------------------------------------------------------------------------------- 1 | get.bike.data = function(data_dir){ 2 | bike = read.csv(sprintf('%s/bike-sharing-daily.csv', data_dir), stringsAsFactors = FALSE) 3 | # See http://archive.ics.uci.edu/ml/datasets/Bike+Sharing+Dataset 4 | 5 | bike$weekday = factor(bike$weekday, levels=0:6, labels = c('SUN', 'MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT')) 6 | bike$holiday = factor(bike$holiday, levels = c(0,1), labels = c('NO HOLIDAY', 'HOLIDAY')) 7 | bike$workingday = factor(bike$workingday, levels = c(0,1), labels = c('NO WORKING DAY', 'WORKING DAY')) 8 | bike$season = factor(bike$season, levels = 1:4, labels = c('SPRING', 'SUMMER', 'FALL', 'WINTER')) 9 | bike$weathersit = factor(bike$weathersit, levels = 1:3, labels = c('GOOD', 'MISTY', 'RAIN/SNOW/STORM')) 10 | bike$mnth = factor(bike$mnth, levels = 1:12, labels = c('JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OKT', 'NOV', 'DEZ')) 11 | bike$yr[bike$yr == 0] = 2011 12 | bike$yr[bike$yr == 1] = 2012 13 | bike$yr = factor(bike$yr) 14 | bike$days_since_2011 = day_diff(bike$dteday, min(as.Date(bike$dteday))) 15 | 16 | # denormalize weather features: 17 | # temp : Normalized temperature in Celsius. The values are derived via (t-t_min)/(t_max-t_min), t_min=-8, t_max=+39 (only in hourly scale) 18 | bike$temp = bike$temp * (39 - (-8)) + (-8) 19 | # atemp: Normalized feeling temperature in Celsius. The values are derived via (t-t_min)/(t_max-t_min), t_min=-16, t_max=+50 (only in hourly scale) 20 | bike$atemp = bike$atemp * (50 - (16)) + (16) 21 | 22 | #windspeed: Normalized wind speed. The values are divided to 67 (max) 23 | bike$windspeed = 67 * bike$windspeed 24 | #hum: Normalized humidity. The values are divided to 100 (max) 25 | bike$hum = 100 * bike$hum 26 | 27 | 28 | dplyr::select(bike, -instant, -dteday, -registered, -casual, -atemp) 29 | } 30 | 31 | 32 | get.bike.task = function(data_dir){ 33 | mlr::makeRegrTask(id='bike', data=get.bike.data(data_dir), target = 'cnt') 34 | } 35 | 36 | 37 | bike.features.of.interest = c('season','holiday', 'workingday', 'weathersit', 'temp', 'hum', 'windspeed', 'days_since_2011') 38 | -------------------------------------------------------------------------------- /R/get-cervical-cancer-dataset.R: -------------------------------------------------------------------------------- 1 | #============================================================================== 2 | # Preparing dataset for cervical cancer classification 3 | #============================================================================== 4 | # Source: 5 | # http://archive.ics.uci.edu/ml/datasets/Cervical+cancer+%28Risk+Factors%29 6 | # Paper: http://www.inescporto.pt/~jsc/publications/conferences/2017KelwinIBPRIA.pdf 7 | 8 | get.cervical.data = function(data_dir){ 9 | cervical = read.csv(sprintf('%s/risk_factors_cervical_cancer.csv', data_dir), na.strings = c('?'), stringsAsFactors = FALSE) 10 | cervical = select(cervical, -Citology, -Schiller, -Hinselmann) 11 | cervical$Biopsy = factor(cervical$Biopsy, levels = c(0, 1), labels=c('Healthy', 'Cancer')) 12 | 13 | ## subset variables to the ones that should be used in the book 14 | cervical = dplyr::select(cervical, Age, Number.of.sexual.partners, First.sexual.intercourse, 15 | Num.of.pregnancies, Smokes, Smokes..years., Hormonal.Contraceptives, Hormonal.Contraceptives..years., 16 | IUD, IUD..years., STDs, STDs..number., STDs..Number.of.diagnosis, STDs..Time.since.first.diagnosis, 17 | STDs..Time.since.last.diagnosis, Biopsy) 18 | 19 | # NA imputation 20 | imputer = mlr::imputeMode() 21 | 22 | 23 | cervical_impute = mlr::impute(cervical, classes = list(numeric = imputeMode())) 24 | cervical = cervical_impute$data 25 | #cervical = relevel(cervical, "Healthy") 26 | cervical 27 | } 28 | 29 | get.cervical.task = function(data_dir){ 30 | cervical = get.cervical.data(data_dir) 31 | mlr::makeClassifTask(id='cervical', data = cervical, target = 'Biopsy') 32 | } 33 | -------------------------------------------------------------------------------- /R/get-text-classifier.R: -------------------------------------------------------------------------------- 1 | 2 | get.ycomments.classifier = function(ycomments){ 3 | labeledTerms = prepare_data(ycomments$CONTENT) 4 | labeledTerms$class = factor(ycomments$CLASS, levels = c(0,1), labels = c('no spam', 'spam')) 5 | rp = rpart::rpart(class ~ ., data = labeledTerms) 6 | get_predict_fun(rp, labeledTerms) 7 | } 8 | 9 | prepare_data = function(comments, trained_corpus = NULL){ 10 | 11 | corpus = Corpus(VectorSource(comments)) 12 | dtm = DocumentTermMatrix(corpus, control = list(removePunctuation = TRUE, 13 | stopwords=TRUE, 14 | stemming = FALSE, 15 | removeNumbers = TRUE 16 | )) 17 | 18 | labeledTerms = as.data.frame(as.matrix(dtm)) 19 | 20 | # Seems that columns called break or next cause trouble 21 | names(labeledTerms)[names(labeledTerms) %in% c('break')] <- 'break.' 22 | names(labeledTerms)[names(labeledTerms) %in% c('next')] <- 'next.' 23 | names(labeledTerms)[names(labeledTerms) %in% c('else')] <- 'else.' 24 | 25 | 26 | if(!is.null(trained_corpus)){ 27 | # Make sure only overlapping features are used 28 | labeledTerms = labeledTerms[intersect(colnames(labeledTerms), colnames(trained_corpus))] 29 | 30 | empty_corpus = trained_corpus[1, ] 31 | labeledTerms = data.frame(data.table::rbindlist(list(empty_corpus, labeledTerms), fill=TRUE)) 32 | labeledTerms = labeledTerms[2:nrow(labeledTerms),] 33 | } 34 | labeledTerms 35 | } 36 | 37 | 38 | get_predict_fun = function(model, train_corpus){ 39 | function(comments){ 40 | terms = prepare_data(comments, train_corpus) 41 | predict(model, newdata = terms, type='prob') 42 | } 43 | } 44 | 45 | 46 | # Text data is not yet implemented in LIME 47 | # Doing it myself here 48 | 49 | 50 | #' Tokenize sentence into words 51 | #' 52 | #' @param x string with sentence 53 | #' @return list of words 54 | tokenize = function(x){ 55 | unlist(strsplit(x, "\\s+")) 56 | } 57 | 58 | 59 | #' Get a subset from a text 60 | #' 61 | #' @param words List of words 62 | #' @param prob Probability with which to keep a word 63 | #' @return List with two objects. First object is the new text. Second object is a vector 64 | #' of length number of words with 0s and 1s, indicating whether a word is in the new 65 | #' sentence (1) or not (0) 66 | draw_combination = function(words, prob=0.5){ 67 | # Create combination 68 | combi = rbinom(n = length(words), size = 1, prob = prob) 69 | names(combi) = words 70 | df = data.frame(t(combi)) 71 | # Create text 72 | new_text = paste(words[which(combi==1)], collapse = ' ') 73 | list(text = new_text, 74 | combi = df) 75 | } 76 | 77 | 78 | #'Create variations of a text 79 | #' 80 | #'@param text The text 81 | #'@param pred_fun The prediction function from the machine learning model. 82 | #' It should contain the complete pipeline: take the raw text, do all the pre-processing 83 | #' and do the prediction. Returned prediction should be a data.frame with one column per class 84 | #'@param prob Probability with which to keep a word 85 | #'@param n_variations Number of variations to create 86 | #'@param class The class for which to create the predictions 87 | #'@return data.frame for a local linear model, containing binary features for word occurence 88 | #'weights for distance to original sentence and the predictions for the chosen class. 89 | create_variations = function(text, pred_fun, prob=0.5, n_variations = 100, class, round.to = 2){ 90 | tokenized = tokenize(text) 91 | df = data.frame(lapply(tokenized, function(x) 1)) 92 | names(df) = tokenized 93 | 94 | combinations = lapply(1:n_variations, function(x){ 95 | draw_combination(tokenized, prob=prob) 96 | }) 97 | 98 | texts = as.vector(sapply(combinations, function(x) x['text'])) 99 | 100 | features = data.frame(data.table::rbindlist(sapply(combinations, function(x) x['combi']))) 101 | weights = round(rowSums(features) / ncol(features), round.to) 102 | predictions = round(pred_fun(texts)[,class], round.to) 103 | 104 | cbind(features, pred=predictions, weights = weights) 105 | } 106 | 107 | 108 | #' Explain the classification of a text 109 | #' 110 | #'@param text The text for which to explain the classification 111 | #'@param pred_fun The prediction function from the machine learning model. 112 | #' It should contain the complete pipeline: take the raw text, do all the pre-processing 113 | #' and do the prediction. Returned prediction should be a data.frame with one column per class 114 | #'@param prob The probability to keep a word in the variations 115 | #'@param n_variations The number of text variations to create 116 | #'@param K The number of features to use for the explanation 117 | #'@param case The ID of the observation 118 | #'@param class The class for which to create the explanations 119 | explain_text = function(text, pred_fun, prob=0.9, n_variations=500, K = 3, case=1, class){ 120 | stopifnot(K >= 1) 121 | df = create_variations(text, pred_fun = pred_fun, prob = prob, n_variations = n_variations, class=class) 122 | mod = glm(pred ~ . - weights, data =df , weights=df$weights, family = 'binomial') 123 | coefs = coef(mod) 124 | coefs = coefs[names(coefs) != '(Intercept)'] 125 | names(coefs) = tokenize(text) 126 | coefs = coefs[base::order(abs(coefs), decreasing = TRUE)] 127 | coefs = coefs[1:K] 128 | # Create explanation compatible to R-LIME format 129 | tibble(case = case, 130 | label = class, 131 | label_prob = pred_fun(text)[, class], 132 | model_intercept = coef(mod)['(Intercept)'], 133 | feature = names(coefs), 134 | feature_value = names(coefs), 135 | feature_weight = coefs, 136 | feature_desc = names(coefs), 137 | data = text, 138 | prediction = list(pred_fun(text))) 139 | } 140 | -------------------------------------------------------------------------------- /R/ggplot-theme.R: -------------------------------------------------------------------------------- 1 | # load libraries 2 | library("ggplot2") 3 | library("viridis") 4 | 5 | # define graphics theme 6 | my_theme = function(legend.position='right'){ 7 | theme_bw() %+replace% 8 | theme(legend.position=legend.position) 9 | } 10 | 11 | theme_set(my_theme()) 12 | 13 | 14 | default_color = "azure4" 15 | -------------------------------------------------------------------------------- /R/knitr_options.R: -------------------------------------------------------------------------------- 1 | library("knitr") 2 | library('tm') 3 | library('rpart') 4 | library('mlr') 5 | library('dplyr') 6 | library('ggplot2') 7 | library('tidyr') 8 | library('partykit') 9 | library('memoise') 10 | library('pre') 11 | library('iml') 12 | 13 | 14 | opts_chunk$set( 15 | echo = FALSE, 16 | message = FALSE, 17 | warning = FALSE, 18 | fig.path = "images/", 19 | collapse = TRUE, 20 | dev = "CairoPNG", 21 | dpi = 150, 22 | fig.height = 5, 23 | fig.width = 7, 24 | dev.args = list(pointsize = 20) 25 | ) 26 | 27 | output <- opts_knit$get("rmarkdown.pandoc.to") 28 | -------------------------------------------------------------------------------- /R/lime.R: -------------------------------------------------------------------------------- 1 | #' Kernel function 2 | #' 3 | #' @param d Distance between center and point 4 | #' @param kernel_width Width of kernel 5 | kernel = function(d, kernel_width){ 6 | sqrt(exp(-(d^2) / kernel_width^2)) 7 | } 8 | 9 | #' Get euclidean distances of samples to instances to be explained 10 | #' @param point_explain Vector of scaled features 11 | #' @param points_sample data.frame of scaled features for the sample points 12 | #' @return Vector with distances of samples to instance to be explained 13 | get_distances = function(point_explain, points_sample){ 14 | # euclidean distance 15 | apply(points_sample, 1, function(x){ 16 | sum((point_explain - x)^2) 17 | }) 18 | } 19 | 20 | # Function for creating y values 21 | get_y = function(x1, x2, noise_prob = 0){ 22 | y = sign(sign(x2-1+abs(x1*2))/3 - sign(x2-.5+abs(x1*3))/3) + 1 23 | y = y * (1 - rbinom(length(x1), 1, prob = noise_prob)) 24 | # flip classes 25 | y = 1 - y 26 | y 27 | } 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | #' Make character vector pretty 2 | pretty_rownames = function(rnames){ 3 | rnames = gsub('^`', '', rnames) 4 | rnames = gsub('`$', '', rnames) 5 | rnames = gsub('`', ':', rnames) 6 | rnames 7 | } 8 | 9 | 10 | year_diff = function(date1, date2){ 11 | day_diff(date1, date2) / 365.25 12 | } 13 | 14 | day_diff = function(date1, date2){ 15 | as.numeric(difftime(as.Date(date1), as.Date(date2), units = 'days')) 16 | } 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Interpretable machine learning 2 | 3 | Explaining the decisions and behaviour of machine learning models. 4 | 5 | [![Build Status](https://travis-ci.org/christophM/interpretable-ml-book.svg?branch=master)](https://travis-ci.org/christophM/interpretable-ml-book) 6 | 7 | ## Summary 8 | You can find the current version of the book here: https://christophm.github.io/interpretable-ml-book/ 9 | 10 | This book is about interpretable machine learning. Machine learning is being built into many products and processes of our daily lives, yet decisions made by machines don't automatically come with an explanation. An explanation increases the trust in the decision and in the machine learning model. As the programmer of an algorithm you want to know whether you can trust the learned model. Did it learn generalizable features? Or are there some odd artifacts in the training data which the algorithm picked up? This book will give an overview over techniques that can be used to make black boxes as transparent as possible and explain decisions. In the first chapter algorithms that produce simple, interpretable models are introduced together with instructions how to interpret the output. The later chapters focus on analyzing complex models and their decisions. 11 | In an ideal future, machines will be able to explain their decisions and make a transition into an algorithmic age more human. This books is recommended for machine learning practitioners, data scientists, statisticians and also for stakeholders deciding on the use of machine learning and intelligent algorithms. 12 | 13 | 14 | The book is automatically build from the master branch and pushed to gh-pages by Travis CI. 15 | 16 | ## Contributing 17 | 18 | See [how to contribute](CONTRIBUTING.md) 19 | 20 | ## Rendering the book 21 | Clone the repository. 22 | ```{shell} 23 | git clone git@github.com:christophM/interpretable-ml-book.git 24 | ``` 25 | Make sure all dependencies for the book are installed. This book has the structure of an R package, so dependencies can be installed easily, only R and the devtools library is required. 26 | Start an R session in the folder of the book repository and type: 27 | ```{r} 28 | devtools::install_deps() 29 | ``` 30 | 31 | For rendering the book, start an R session and type: 32 | ```{r} 33 | setwd("manuscript") 34 | # first, generate the references 35 | source("../scripts/references.R") 36 | bookdown::render_book('', 'bookdown::gitbook') 37 | ``` 38 | 39 | After rendering, the HTML files of the book will be in the "_book" folder. You can either double-click index.html directly or, of course, do it in R: 40 | ```{r} 41 | browseURL('_book/index.html') 42 | ``` 43 | ## Notes on Printing with lulu.com 44 | 45 | - Export from Leanpub in 7.44" x 9.68" 18.9cm x 24.6cm 46 | - For cover: 7.565 x 9.925", 19.226 x 25.224cm, see [recommended sizes](https://connect.lulu.com/en/discussion/33279/recommended-book-cover-image-dimensions) 47 | - Font for front cover: Francois One 48 | 49 | ## Writing 50 | 51 | Stuff that both works for leanpub and for bookdown: 52 | 53 | - Titles start with #, subtitles with ## and so on. 54 | - Titles can be tagged using {#tag-of-the-title} 55 | - Chapters can be referenced by using `[text of the link](#tag-of-the-title)` 56 | - Figures can be referenced by using `[text of the link](#fig:tag-of-r-chunk-that-produced-figure)` 57 | - Start and end mathematical expressions with `$` (inline) or with `$$` (extra line). Will be automatically changed for leanpub with a regexpr. Conversion script only works if no empty spaces are in the formula. 58 | - Leave empty lines between formulas and text (if formula not inline). Formulas (with $$ ... $$) should be in one line and not over multiple lines (due to parser). 59 | - References have to be writen like this: `[^ref-tag]` and must be at the end of the respective file with `[^ref]: Details of the reference ...`. Make sure the space is included. References are collected in 10-reference.Rmd with the script references.R. Make sure not to use `[^ref-tag]: ` anywhere in the text, only at the bottom for the actual reference. 60 | 61 | Printing for proofreading with extra line spacing: 62 | Build HTML book, go to manuscript/_book/libs/gitbook*/css/style.css, change line-height:1.7 to line-height:2.5, open local html with chrome, print to pdf with custom margin. 63 | ## Changelog 64 | All notable changes to the book will be documented here. 65 | 66 | ### v1.2 (IN PROGRESS) [html version] 67 | - Added "Preface by the Author" chapter 68 | - Started section on neural network interpretation 69 | - Added chapter on feature visualization 70 | - Added SHAP chapter 71 | - Added Anchors chapter 72 | - Fixed error in logistic regression chapter: Logistic regression was predicting class "Healthy", but interpretation in the text was for class "Cancer". Now regression weights have the correct sign. 73 | - Renamed Feature Importance chapter to "Permutation Feature Importance" 74 | - Errata: 75 | - Chapter 4.3 GLM, GAM and more: Logistic regression uses logit, not logistic function as link function. 76 | - Chapter Linear models: Formula for adjusted R-squared was corrected (twice) 77 | - Chapter Decision Rules: Newly introduced mix up between Healthy and Cancer in OneR chapter was fixed. 78 | - Chapter RuleFit: The importance of the linear term in the total importance formulate was indexed with an $l$ instead of $j$. 79 | - Updated images 80 | 81 | ### v1.1 (2019-03-23) [Print version, ebook version] 82 | - Fixes wrong index in Cooks Distance summation (i -> j) 83 | - fixed boxplot formula (1.5 instead of 1.58) 84 | - Change to colorblind-friendly color palettes (viridis) 85 | - Make sure plots work in black and white as well 86 | - Extends counterfactual chapter with MOC (by Susanne Dandl) 87 | 88 | ### v1.0 (2019-02-21) 89 | - Extensive proofreading and polishing 90 | 91 | ### v0.7 (2018-11-21) 92 | - Renamed Definitions chapter to Terminology 93 | - Added mathematical notation to Terminology (former Definitions) chapter 94 | - Added LASSO example 95 | - Restructured lm chapter and added pros/cons 96 | - Renamed "Criteria of Interpretability Methods" to "Taxonomy of Interpretability Methods" 97 | - Added advantages and disadvantages of logistic regression 98 | - Added list of references at the end of book 99 | - Added images to the short stories 100 | - Added drawback of shapley value: feature have to be independent 101 | - Added tree decomposition and feature importance to tree chapter 102 | - Improved explanation of individual prediction in lm 103 | - Added "What's Wrong With my Dog" example to Adversarial Examples 104 | - Added links to data files and pre-processing R scripts 105 | 106 | ### v0.6 (2018-11-02) 107 | - Added chapter on accumulated local effects plots 108 | - Added some advantages and disadvantages to pdps 109 | - Added chapter on extending linear models 110 | - Fixed missing square in the Friedman H-statistic 111 | - Added discussion about training vs. test data in feature importance chapter 112 | - Improved the definitions, also added some graphics 113 | - Added an example with a categorical feature to PDP 114 | 115 | ### v0.5 (2018-08-14) 116 | - Added chapter on influential instances 117 | - Added chapter on Decision Rules 118 | - Added chapter on adversarial machine examples 119 | - Added chapter on prototypes and criticisms 120 | - Added chapter on counterfactual explanations 121 | - Added section on LIME images (by Verena Haunschmid) 122 | - Added section on when we don't need interpretability 123 | - Renamed chapter: Human-style Explanations -> Human-friendly Explanations 124 | 125 | ### v0.4 (2018-05-23) 126 | - Added chapter on global surrogate models 127 | - Added improved Shapley pictograms 128 | - Added acknowledgements chapter 129 | - Added feature interaction chapter 130 | - Improved example in partial dependence plot chapter 131 | - The weights in LIME text chapter where shown with the wrong words. This has been fixed. 132 | - Improved introduction text 133 | - Added chapter about the future of interpretability 134 | - Added Criteria for Intepretability Methods 135 | 136 | ### v0.3 (2018-04-24) 137 | - Reworked the Feature Importance Chapter 138 | - Added third short story 139 | - Removed xkcd comic 140 | - Merged introduction and about the book chapters 141 | - Addeds pros & cons to pdp and ice chapters 142 | - Started using the iml package for plots in ice and pdp 143 | - Restructured the book files for Leanpub 144 | - Added a cover 145 | - Added some CSS for nicer formatting 146 | 147 | ### v0.2 (2018-02-13) 148 | - Added chapter about Shapley value explanations 149 | - Added short story chapters 150 | - Added donation links in Preface 151 | - Reworked RuleFit with examples and theory. 152 | - Interpretability chapter extended 153 | - Add chapter on human-style explanations 154 | - Making it easier to collaborate: Travis checks if book can be rendered for pull requests 155 | 156 | ### v0.1 (2017-12-03) 157 | - First release of the Interpretable Machine Learning book 158 | -------------------------------------------------------------------------------- /_build_and_deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e # Exit with nonzero exit code if anything fails 3 | 4 | 5 | # Copied from here: https://gist.github.com/domenic/ec8b0fc8ab45f39403dd 6 | 7 | SOURCE_BRANCH="master" 8 | TARGET_BRANCH="gh-pages" 9 | LEANPUB_BRANCH="leanpub" 10 | 11 | BUILD_COMMIT_MSG="Update book (travis build ${TRAVIS_BUILD_NUMBER})" 12 | 13 | BRANCH=$(if [ "$TRAVIS_PULL_REQUEST" = "false" ]; then echo $TRAVIS_BRANCH; else echo $TRAVIS_PULL_REQUEST_BRANCH; fi) 14 | 15 | # Create datasets 16 | Rscript scripts/prepare_data.R 17 | 18 | cd manuscript 19 | # Create references 20 | make -B 11-references.Rmd 21 | # Compile html version of book for gh-pages 22 | make -B html 23 | # Compile md version of book for leanpub 24 | # make -B leanpub 25 | cd .. 26 | 27 | ## Only deploy when on master branch of main repository 28 | if [ "$BRANCH" = "master" -a "$TRAVIS_PULL_REQUEST" = "false" ] ; then 29 | 30 | echo "Deploying master to gh-pages." 31 | # Clone the existing gh-pages for this repo into out/ 32 | # Create a new empty branch if gh-pages doesn't exist yet (should only happen on first deply) 33 | git clone -b $TARGET_BRANCH https://${GH_TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git out 34 | cd out 35 | git rm -rf ./* 36 | cp -r ../manuscript/_book/* ./ 37 | touch .nojekyll 38 | git add .nojekyll 39 | 40 | git add --all ./* 41 | 42 | # Get the deploy key by using Travis's stored variables to decrypt deploy_key.enc 43 | git config credential.helper "store --file=.git/credentials" 44 | echo "https://${GH_TOKEN}:@github.com" > .git/credentials 45 | git commit -m "${BUILD_COMMIT_MSG}" --allow-empty 46 | 47 | # Now that we're all set up, we can push. 48 | git push origin $TARGET_BRANCH 49 | 50 | 51 | # echo "Deploying master to leanpub branch." 52 | # cd ../ 53 | # rm -r out 54 | # git add -f manuscript/*.md 55 | # git add -f images/* 56 | # git commit -m "${BUILD_COMMIT_MSG}" 57 | # git push origin $LEANPUB_BRANCH 58 | 59 | else 60 | echo "Changes are not being deployed, since this is a fork / branch." 61 | fi 62 | -------------------------------------------------------------------------------- /cover/cover-amazon.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/cover/cover-amazon.xcf -------------------------------------------------------------------------------- /cover/cover-leanpub.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/cover/cover-leanpub.xcf -------------------------------------------------------------------------------- /cover/cover-lulu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/cover/cover-lulu.jpg -------------------------------------------------------------------------------- /cover/cover-lulu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/cover/cover-lulu.png -------------------------------------------------------------------------------- /cover/cover-lulu.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/cover/cover-lulu.xcf -------------------------------------------------------------------------------- /cover/cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/cover/cover.png -------------------------------------------------------------------------------- /cover/isbn_barcode.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/cover/isbn_barcode.pdf -------------------------------------------------------------------------------- /data/bike.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/data/bike.RData -------------------------------------------------------------------------------- /data/cached-anchors-cervical-balanced.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/data/cached-anchors-cervical-balanced.RDS -------------------------------------------------------------------------------- /data/cached-anchors-cervical.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/data/cached-anchors-cervical.RDS -------------------------------------------------------------------------------- /data/cached-anchors-edge.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/data/cached-anchors-edge.RDS -------------------------------------------------------------------------------- /data/cached-anchors.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/data/cached-anchors.RDS -------------------------------------------------------------------------------- /data/cached-sbrl-bike.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/data/cached-sbrl-bike.RDS -------------------------------------------------------------------------------- /data/cached-sbrl-cervical.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/data/cached-sbrl-cervical.RDS -------------------------------------------------------------------------------- /data/cervical.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/data/cervical.RData -------------------------------------------------------------------------------- /data/influence-df.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/data/influence-df.RData -------------------------------------------------------------------------------- /data/speed_dating_data.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/data/speed_dating_data.csv -------------------------------------------------------------------------------- /data/ycomments.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/data/ycomments.RData -------------------------------------------------------------------------------- /iml-book.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Website 16 | -------------------------------------------------------------------------------- /initialize-gh-pages.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | git checkout --orphan gh-pages 4 | git rm -rf . 5 | 6 | # create a hidden file .nojekyll 7 | touch .nojekyll 8 | git add .nojekyll 9 | 10 | git commit -m"Initial commit" 11 | git push origin gh-pages -------------------------------------------------------------------------------- /manuscript/00.0-preface.Rmd: -------------------------------------------------------------------------------- 1 | ```{r setup, cache=FALSE, include=FALSE} 2 | devtools::load_all() 3 | is.html = !is.null(output) && output == "html" 4 | only.in.html = "*This chapter is currently only available in this web version. ebook and print will follow.*" 5 | 6 | devtools::install_github("viadee/anchorsOnR") 7 | install.packages("../pkg/sbrl_1.2.tar.gz", repos = NULL, type = "source") 8 | ``` 9 | 10 | 11 | # Summary {-} 12 | ```{r cover, cache=FALSE, eval = is.html, out.width=500, fig.align="center"} 13 | knitr::include_graphics('images/title_page.jpg', dpi = NA) 14 | ``` 15 | 16 | Machine learning has great potential for improving products, processes and research. 17 | But **computers usually do not explain their predictions** which is a barrier to the adoption of machine learning. 18 | This book is about making machine learning models and their decisions interpretable. 19 | 20 | After exploring the concepts of interpretability, you will learn about simple, **interpretable models** such as decision trees, decision rules and linear regression. 21 | Later chapters focus on general model-agnostic methods for **interpreting black box models** like feature importance and accumulated local effects and explaining individual predictions with Shapley values and LIME. 22 | 23 | All interpretation methods are explained in depth and discussed critically. 24 | How do they work under the hood? 25 | What are their strengths and weaknesses? 26 | How can their outputs be interpreted? 27 | This book will enable you to select and correctly apply the interpretation method that is most suitable for your machine learning project. 28 | 29 | The book focuses on machine learning models for tabular data (also called relational or structured data) and less on computer vision and natural language processing tasks. 30 | Reading the book is recommended for machine learning practitioners, data scientists, statisticians, and anyone else interested in making machine learning models interpretable. 31 | 32 | 33 | `r if(is.html){"You can buy the PDF and e-book version (epub, mobi) [on leanpub.com](https://leanpub.com/interpretable-machine-learning)."}` 34 | 35 | `r if(is.html){"You can buy the print version [on lulu.com](http://www.lulu.com/shop/christoph-molnar/interpretable-machine-learning/paperback/product-24036234.html)."}` 36 | 37 | **About me:** My name is Christoph Molnar, I'm a statistician and a machine learner. 38 | My goal is to make machine learning interpretable. 39 | 40 | Mail: christoph.molnar.ai@gmail.com 41 | 42 | Website: [https://christophm.github.io/](https://christophm.github.io/) 43 | 44 | Follow me on Twitter! [\@ChristophMolnar](https://twitter.com/ChristophMolnar) 45 | 46 | Cover by [\@YvonneDoinel](https://twitter.com/YvonneDoinel) 47 | 48 | 49 | `r if(is.html){"![Creative Commons License](images/by-nc-sa.png)"}` 50 | 51 | `r if(is.html){"This book is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-nc-sa/4.0/)."}` 52 | 53 | 54 | 55 | # Preface by the Author {-} 56 | 57 | This book started as a side project when I was working as a statistician in clinical research. 58 | I worked 4 days a week, and on my "day off" I worked on side projects. 59 | Eventually, interpretable machine learning became one of my side projects. 60 | At first I had no intention of writing a book. 61 | Instead, I was simply interested in finding out more about interpretable machine learning and was looking for good resources to learn from. 62 | Given the success of machine learning and the importance of interpretability, I expected that there would be tons of books and tutorials on this topic. 63 | But I only found the relevant research papers and a few blog posts scattered around the internet, but nothing with a good overview. 64 | No books, no tutorials, no overview papers, nothing. 65 | This gap inspired me to start this book. 66 | I ended up writing the book I wished was available when I began my study of interpretable machine learning. 67 | My intention with this book was twofold: to learn for myself and to share this new knowledge with others. 68 | 69 | 70 | I received my bachelor's and master's degree in statistics at the LMU Munich, Germany. 71 | Most of my knowledge about machine learning was self-taught through online courses, competitions, side projects and professional activities. 72 | My statistical background was an excellent basis for getting into machine learning, and especially for interpretability. 73 | In statistics, a major focus is on building interpretable regression models. 74 | After I finished my master's degree in statistics, I decided not to pursue a PhD, because I did not enjoy writing my master's thesis. 75 | Writing just stressed me out too much. 76 | So I took jobs as data scientist in a Fintech start-up and as statistician in clinical research. 77 | After these three years in industry I started writing this book and a few months later I started a PhD in interpretable machine learning. 78 | By starting this book, I regained the joy of writing and it helped me to develop a passion for research. 79 | 80 | This book covers many techniques of interpretable machine learning. 81 | In the first chapters I introduce the concept of interpretability and motivate why interpretability is necessary. 82 | There are even some short stories! 83 | The book discusses the different properties of explanations and what humans think is a good explanation. 84 | Then we will discuss machine learning models that are inherently interpretable, for example regression models and decision trees. 85 | The main focus of this book is on model-agnostic interpretability methods. 86 | Model-agnostic means that these methods can be applied to any machine learning model and are applied after the model has been trained. 87 | The independence of the model makes model-agnostic methods very flexible and powerful. 88 | Some techniques explain how individual predictions were made, like local interpretable model-agnostic explanations (LIME) and Shapley values. 89 | Other techniques describe the average behavior of the model across a dataset. 90 | Here we learn about the partial dependence plot, accumulated local effects, permutation feature importance and many other methods. 91 | A special category are example-based methods that produce data points as explanations. 92 | Counterfactual explanations, prototypes, influential instances and adversarial examples are example-based methods that are discussed in this book. 93 | The book concludes with some reflections on what the future of interpretable machine learning might look like. 94 | 95 | You do not have to read the book from cover to cover, you can jump back and forth and concentrate on the techniques that interest you most. 96 | I only recommend that you start with the introduction and the chapter on interpretability. 97 | Most chapters follow a similar structure and focus on one interpretation method. 98 | The first paragraph summarizes the method. 99 | Then I try to explain the method intuitively without relying on mathematical formulas. 100 | Then we look at the theory of the method to get a deep understanding of how it works. 101 | You will not be spared here, because the theory will contain formulas. 102 | I believe that a new method is best understood using examples. 103 | Therefore, each method is applied to real data. 104 | Some people say that statistician are very critical people. 105 | For me, this is true, because each chapter contains critical discussions about advantages and disadvantages of the respective interpretation method. 106 | This book is not an advertisement for the methods, but it should help you decide whether a method works well for your application or not. 107 | In the last section of each chapter, available software implementations are discussed. 108 | 109 | Machine learning has received great attention from many people in research and industry. 110 | Sometimes machine learning is overhyped in the media, but there are many real and impactful applications. 111 | Machine learning is a powerful technology for products, research and automation. 112 | Today machine learning is used, for example, to detect fraudulent financial transactions, recommend movies to watch and classify images. 113 | It is often crucial that the machine learning models are interpretable. 114 | Interpretability helps the developer to debug and improve the model, build trust in the model, justify model predictions and gain insights. 115 | The increased need for machine learning interpretability is a natural consequence of an increased use of machine learning. 116 | This book has become a valuable resource for many people. 117 | Teaching instructors use the book to introduce their students to the concepts of interpretable machine learning. 118 | I received e-mails from various master and doctoral students who told me that this book was the starting point and most important reference for their theses. 119 | The book has helped applied researchers in the field of ecology, finance, psychology, etc. who use machine learning to understand their data. 120 | Data scientists from industry told me that they use the "Interpretable Machine Learning" book for their work and recommend it to their colleagues. 121 | I am happy that many people can benefit from this book and become experts in model interpretation. 122 | 123 | I would recommend this book to practitioners who want an overview of techniques to make their machine learning models more interpretable. 124 | It is also recommended to students and researchers (and anyone else) who is interested in the topic. 125 | To benefit from this book, you should already have a basic understanding of machine learning. 126 | You should also have a mathematical understanding at university entry level to be able to follow the theory and formulas in this book. 127 | It should also be possible, however, to understand the intuitive description of the method at the beginning of each chapter without mathematics. 128 | 129 | I hope you enjoy the book! 130 | 131 | -------------------------------------------------------------------------------- /manuscript/01-introduction.Rmd: -------------------------------------------------------------------------------- 1 | # Introduction {#intro} 2 | 3 | This book explains to you how to make (supervised) machine learning models interpretable. 4 | The chapters contain some mathematical formulas, but you should be able to understand the ideas behind the methods even without the formulas. 5 | This book is not for people trying to learn machine learning from scratch. 6 | If you are new to machine learning, there are a lot of books and other resources to learn the basics. 7 | I recommend the book "The Elements of Statistical Learning" by Hastie, Tibshirani, and Friedman (2009) [^Hastie] and [Andrew Ng's "Machine Learning" online course](https://www.coursera.org/learn/machine-learning) on the online learning platform coursera.com to start with machine learning. 8 | Both the book and the course are available free of charge! 9 | 10 | New methods for the interpretation of machine learning models are published at breakneck speed. 11 | To keep up with everything that is published would be madness and simply impossible. 12 | That is why you will not find the most novel and fancy methods in this book, but established methods and basic concepts of machine learning interpretability. 13 | These basics prepare you for making machine learning models interpretable. 14 | Internalizing the basic concepts also empowers you to better understand and evaluate any new paper on interpretability published on [arxiv.org](https://arxiv.org/) in the last 5 minutes since you began reading this book (I might be exaggerating the publication rate). 15 | 16 | This book starts with some (dystopian) [short stories](#storytime) that are not needed to understand the book, but hopefully will entertain and make you think. 17 | Then the book explores the concepts of [machine learning interpretability](#interpretability). 18 | We will discuss when interpretability is important and what different types of explanations there are. 19 | Terms used throughout the book can be looked up in the [Terminology chapter](#terminology). 20 | Most of the models and methods explained are presented using real data examples which are described in the [Data chapter](#data). 21 | One way to make machine learning interpretable is to use [interpretable models](#simple), such as linear models or decision trees. 22 | The other option is the use of [model-agnostic interpretation tools](#agnostic) that can be applied to any supervised machine learning model. 23 | The Model-Agnostic Methods chapter deals with methods such as partial dependence plots and permutation feature importance. 24 | Model-agnostic methods work by changing the input of the machine learning model and measuring changes in the prediction output. 25 | Model-agnostic methods that return data instances as explanations are discussed in the chapter [Example Based Explanations](#example-based). 26 | All model-agnostic methods can be further differentiated based on whether they explain global model behavior across all data instances or individual predictions. 27 | The following methods explain the overall behavior of the model: [Partial Dependence Plots](#pdp), [Accumulated Local Effects](#ale), [Feature Interaction](#interaction), [Feature Importance](#feature-importance), [Global Surrogate Models](#global) and [Prototypes and Criticisms](#proto). 28 | To explain individual predictions we have [Local Surrogate Models](#lime), [Shapley Value Explanations](#shapley), [Counterfactual Explanations](#counterfactual) (and closely related: [Adversarial Examples](#adversarial)). 29 | Some methods can be used to explain both aspects of global model behavior and individual predictions: [Individual Conditional Expectation](#ice) and [Influential Instances](#influential). 30 | 31 | The book ends with an optimistic outlook on what [the future of interpretable machine learning](#future) might look like. 32 | 33 | You can either read the book from beginning to end or jump directly to the methods that interest you. 34 | 35 | I hope you will enjoy the read! 36 | 37 | [^Hastie]: Friedman, Jerome, Trevor Hastie, and Robert Tibshirani. "The elements of statistical learning". www.web.stanford.edu/~hastie/ElemStatLearn/ (2009). 38 | -------------------------------------------------------------------------------- /manuscript/01.3-ml-definitions.Rmd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ## What Is Machine Learning? 5 | 6 | Machine learning is a set of methods that computers use to make and improve predictions or behaviors based on data. 7 | 8 | For example, to predict the value of a house, the computer would learn patterns from past house sales. 9 | The book focuses on supervised machine learning, which covers all prediction problems where we have a dataset for which we already know the outcome of interest (e.g. past house prices) and want to learn to predict the outcome for new data. 10 | Excluded from supervised learning are for example clustering tasks (= unsupervised learning) where we do not have a specific outcome of interest, but want to find clusters of data points. 11 | Also excluded are things like reinforcement learning, where an agent learns to optimize a certain reward by acting in an environment (e.g. a computer playing Tetris). 12 | The goal of supervised learning is to learn a predictive model that maps features of the data (e.g. house size, location, floor type, ...) to an output (e.g. house price). 13 | If the output is categorical, the task is called classification, and if it is numerical, it is called regression. 14 | The machine learning algorithm learns a model by estimating parameters (like weights) or learning structures (like trees). 15 | The algorithm is guided by a score or loss function that is minimized. 16 | In the house value example, the machine minimizes the difference between the estimated house price and the predicted price. 17 | A fully trained machine learning model can then be used to make predictions for new instances. 18 | 19 | Estimation of house prices, product recommendations, street sign detection, credit default prediction and fraud detection: 20 | All these examples have in common that they can be solved by machine learning. 21 | The tasks are different, but the approach is the same: 22 | Step 1: Data collection. 23 | The more, the better. 24 | The data must contain the outcome you want to predict and additional information from which to make the prediction. 25 | For a street sign detector ("Is there a street sign in the image?"), you would collect street images and label whether a street sign is visible or not. 26 | For a credit default predictor, you need past data on actual loans, information on whether the customers were in default with their loans, and data that will help you make predictions, such as income, past credit defaults, and so on. 27 | For an automatic house value estimator program, you could collect data from past house sales and information about the real estate such as size, location, and so on. 28 | Step 2: Enter this information into a machine learning algorithm that generates a sign detector model, a credit rating model or a house value estimator. 29 | Step 3: Use model with new data. 30 | Integrate the model into a product or process, such as a self-driving car, a credit application process or a real estate marketplace website. 31 | 32 | Machines surpass humans in many tasks, such as playing chess (or more recently Go) or predicting the weather. 33 | Even if the machine is as good as a human or a bit worse at a task, there remain great advantages in terms of speed, reproducibility and scaling. 34 | A once implemented machine learning model can complete a task much faster than humans, reliably delivers consistent results and can be copied infinitely. 35 | Replicating a machine learning model on another machine is fast and cheap. 36 | The training of a human for a task can take decades (especially when they are young) and is very costly. 37 | A major disadvantage of using machine learning is that insights about the data and the task the machine solves is hidden in increasingly complex models. 38 | You need millions of numbers to describe a deep neural network, and there is no way to understand the model in its entirety. 39 | Other models, such as the random forest, consist of hundreds of decision trees that "vote" for predictions. 40 | To understand how the decision was made, you would have to look into the votes and structures of each of the hundreds of trees. 41 | That just does not work no matter how clever you are or how good your working memory is. 42 | The best performing models are often blends of several models (also called ensembles) that cannot be interpreted, even if each single model could be interpreted. 43 | If you focus only on performance, you will automatically get more and more opaque models. 44 | Just take a look at [interviews with winners on the kaggle.com machine learning competition platform](http://blog.kaggle.com/): 45 | The winning models were mostly ensembles of models or very complex models such as boosted trees or deep neural networks. 46 | 47 | 48 | 49 | ## Terminology {#terminology} 50 | 51 | To avoid confusion due to ambiguity, here are some definitions of terms used in this book: 52 | 53 | An **Algorithm** is a set of rules that a machine follows to achieve a particular goal[^algorithm]. 54 | An algorithm can be considered as a recipe that defines the inputs, the output and all the steps needed to get from the inputs to the output. 55 | Cooking recipes are algorithms where the ingredients are the inputs, the cooked food is the output, and the preparation and cooking steps are the algorithm instructions. 56 | 57 | 58 | **Machine Learning** is a set of methods that allow computers to learn from data to make and improve predictions (for example cancer, weekly sales, credit default). 59 | Machine learning is a paradigm shift from "normal programming" where all instructions must be explicitly given to the computer to "indirect programming" that takes place through providing data. 60 | 61 | ```{r programing-vs-ml, echo = FALSE, fig.cap = "", width = 400} 62 | knitr::include_graphics("images/programing-ml.png") 63 | ``` 64 | 65 | A **Learner** or **Machine Learning Algorithm** is the program used to learn a machine learning model from data. 66 | Another name is "inducer" (e.g. "tree inducer"). 67 | 68 | 69 | A **Machine Learning Model** is the learned program that maps inputs to predictions. 70 | This can be a set of weights for a linear model or for a neural network. 71 | Other names for the rather unspecific word "model" are "predictor" or - depending on the task - "classifier" or "regression model". 72 | In formulas, the trained machine learning model is called $\hat{f}$ or $\hat{f}(x)$. 73 | 74 | ```{r learner-definition, fig.cap = "A learner learns a model from labeled training data. The model is used to make predictions.", echo = FALSE, width = 500} 75 | knitr::include_graphics("images/learner.png") 76 | ``` 77 | 78 | 79 | A **Black Box Model** is a system that does not reveal its internal mechanisms. 80 | In machine learning, "black box" describes models that cannot be understood by looking at their parameters (e.g. a neural network). 81 | The opposite of a black box is sometimes referred to as **White Box**, and is referred to in this book as [interpretable model](#simple). 82 | [Model-agnostic methods](#agnostic) for interpretability treat machine learning models as black boxes, even if they are not. 83 | 84 | ```{r black-box, echo = FALSE, fig.cap = "", width = 1500} 85 | knitr::include_graphics("images/iml.png") 86 | ``` 87 | 88 | 89 | **Interpretable Machine Learning** refers to methods and models that make the behavior and predictions of machine learning systems understandable to humans. 90 | 91 | 92 | A **Dataset** is a table with the data from which the machine learns. 93 | The dataset contains the features and the target to predict. 94 | When used to induce a model, the dataset is called training data. 95 | 96 | An **Instance** is a row in the dataset. 97 | Other names for 'instance' are: (data) point, example, observation. 98 | An instance consists of the feature values $x^{(i)}$ and, if known, the target outcome $y_i$. 99 | 100 | The **Features** are the inputs used for prediction or classification. 101 | A feature is a column in the dataset. 102 | Throughout the book, features are assumed to be interpretable, meaning it is easy to understand what they mean, like the temperature on a given day or the height of a person. 103 | The interpretability of the features is a big assumption. 104 | But if it is hard to understand the input features, it is even harder to understand what the model does. 105 | The matrix with all features is called X and $x^{(i)}$ for a single instance. 106 | The vector of a single feature for all instances is $x_j$ and the value for the feature j and instance i is $x^{(i)}_j$. 107 | 108 | The **Target** is the information the machine learns to predict. 109 | In mathematical formulas, the target is usually called y or $y_i$ for a single instance. 110 | 111 | A **Machine Learning Task** is the combination of a dataset with features and a target. 112 | Depending on the type of the target, the task can be for example classification, regression, survival analysis, clustering, or outlier detection. 113 | 114 | The **Prediction** is what the machine learning model "guesses" what the target value should be based on the given features. 115 | In this book, the model prediction is denoted by $\hat{f}(x^{(i)})$ or $\hat{y}$. 116 | 117 | 118 | 119 | [^algorithm]: "Definition of Algorithm." https://www.merriam-webster.com/dictionary/algorithm. (2017). 120 | -------------------------------------------------------------------------------- /manuscript/03-datasets.Rmd: -------------------------------------------------------------------------------- 1 | ```{r, message = FALSE, warning = FALSE, echo = FALSE} 2 | devtools::load_all() 3 | ``` 4 | 5 | 6 | # Datasets {#data} 7 | 8 | Throughout the book, all models and techniques are applied to real datasets that are freely available online. 9 | We will use different datasets for different tasks: 10 | Classification, regression and text classification. 11 | 12 | ## Bike Rentals (Regression) {#bike-data} 13 | This dataset contains daily counts of rented bicycles from the bicycle rental company [Capital-Bikeshare](https://www.capitalbikeshare.com/) in Washington D.C., along with weather and seasonal information. 14 | The data was kindly made openly available by Capital-Bikeshare. 15 | Fanaee-T and Gama (2013)[^Fanaee] added weather data and season information. 16 | The goal is to predict how many bikes will be rented depending on the weather and the day. 17 | The data can be downloaded from the [UCI Machine Learning Repository](http://archive.ics.uci.edu/ml/datasets/Bike+Sharing+Dataset). 18 | 19 | 20 | New features were added to the dataset and not all original features were used for the examples in this book. 21 | Here is the list of features that were used: 22 | 23 | - Count of bicycles including both casual and registered users. 24 | The count is used as the target in the regression task. 25 | - The season, either spring, summer, fall or winter. 26 | - Indicator whether the day was a holiday or not. 27 | - The year, either 2011 or 2012. 28 | - Number of days since the 01.01.2011 (the first day in the dataset). 29 | This feature was introduced to take account of the trend over time. 30 | - Indicator whether the day was a working day or weekend. 31 | - The weather situation on that day. One of: 32 | - clear, few clouds, partly cloudy, cloudy 33 | - mist + clouds, mist + broken clouds, mist + few clouds, mist 34 | - light snow, light rain + thunderstorm + scattered clouds, light rain + scattered clouds 35 | - heavy rain + ice pallets + thunderstorm + mist, snow + mist 36 | - Temperature in degrees Celsius. 37 | - Relative humidity in percent (0 to 100). 38 | - Wind speed in km per hour. 39 | 40 | 41 | For the examples in this book, the data has been slightly processed. 42 | You can find the processing R-script in the book's [Github repository](https://github.com/christophM/interpretable-ml-book/blob/master/R/get-bike-sharing-dataset.R) together with the [final RData file](https://github.com/christophM/interpretable-ml-book/blob/master/data/bike.RData). 43 | 44 | 45 | 46 | 47 | ## YouTube Spam Comments (Text Classification) {#spam-data} 48 | As an example for text classification we work with 1956 comments from 5 different YouTube videos. 49 | Thankfully, the authors who used this dataset in an article on spam classification made the data [freely available](https://archive.ics.uci.edu/ml/datasets/YouTube+Spam+Collection) (Alberto, Lochter, and Almeida (2015)[^Alberto]). 50 | 51 | The comments were collected via the YouTube API from five of the ten most viewed videos on YouTube in the first half of 2015. 52 | All 5 are music videos. 53 | One of them is "Gangnam Style" by Korean artist Psy. 54 | The other artists were Katy Perry, LMFAO, Eminem, and Shakira. 55 | 56 | 57 | Checkout some of the comments. 58 | The comments were manually labeled as spam or legitimate. 59 | Spam was coded with a "1" and legitimate comments with a "0". 60 | 61 | ```{r show-dating-data-TubeSpam} 62 | data(ycomments) 63 | knitr::kable(ycomments[1:10, c('CONTENT', 'CLASS')]) 64 | ``` 65 | 66 | You can also go to YouTube and take a look at the comment section. 67 | But please do not get caught in YouTube hell and end up watching videos of monkeys stealing and drinking cocktails from tourists on the beach. 68 | The Google Spam detector has also probably changed a lot since 2015. 69 | 70 | [Watch the view-record breaking video "Gangnam Style" here](https://www.youtube.com/watch?v=9bZkp7q19f0&feature=player_embedded). 71 | 72 | If you want to play around with the data, you can find the [RData file](https://github.com/christophM/interpretable-ml-book/blob/master/data/ycomments.RData) along with the [R-script](https://github.com/christophM/interpretable-ml-book/blob/master/R/get-SpamTube-dataset.R) with some convenience functions in the book's Github repository. 73 | 74 | 75 | 76 | ## Risk Factors for Cervical Cancer (Classification) {#cervical} 77 | 78 | The cervical cancer dataset contains indicators and risk factors for predicting whether a woman will get cervical cancer. 79 | The features include demographic data (such as age), lifestyle, and medical history. 80 | The data can be downloaded from the [UCI Machine Learning repository](https://archive.ics.uci.edu/ml/datasets/Cervical+cancer+%28Risk+Factors%29) and is described by Fernandes, Cardoso, and Fernandes (2017)[^Fernandes]. 81 | 82 | The subset of data features used in the book's examples are: 83 | 84 | - Age in years 85 | - Number of sexual partners 86 | - First sexual intercourse (age in years) 87 | - Number of pregnancies 88 | - Smoking yes or no 89 | - Smoking (in years) 90 | - Hormonal contraceptives yes or no 91 | - Hormonal contraceptives (in years) 92 | - Intrauterine device yes or no (IUD) 93 | - Number of years with an intrauterine device (IUD) 94 | - Has patient ever had a sexually transmitted disease (STD) yes or no 95 | - Number of STD diagnoses 96 | - Time since first STD diagnosis 97 | - Time since last STD diagnosis 98 | - The biopsy results "Healthy" or "Cancer". Target outcome. 99 | 100 | The biopsy serves as the gold standard for diagnosing cervical cancer. 101 | For the examples in this book, the biopsy outcome was used as the target. 102 | Missing values for each column were imputed by the mode (most frequent value), which is probably a bad solution, since the true answer could be correlated with the probability that a value is missing. 103 | There is probably a bias because the questions are of a very private nature. 104 | But this is not a book about missing data imputation, so the mode imputation will have to suffice for the examples. 105 | 106 | To reproduce the examples of this book with this dataset, find the 107 | [preprocessing R-script](https://github.com/christophM/interpretable-ml-book/blob/master/R/get-cervical-cancer-dataset.R) and the 108 | [final RData file](https://github.com/christophM/interpretable-ml-book/blob/master/data/cervical.RData) in the book's Github repository. 109 | 110 | [^Fanaee]: Fanaee-T, Hadi, and Joao Gama. "Event labeling combining ensemble detectors and background knowledge." Progress in Artificial Intelligence. Springer Berlin Heidelberg, 1–15. doi:10.1007/s13748-013-0040-3. (2013). 111 | 112 | [^Alberto]: Alberto, Túlio C, Johannes V Lochter, and Tiago A Almeida. "Tubespam: comment spam filtering on YouTube." In Machine Learning and Applications (Icmla), Ieee 14th International Conference on, 138–43. IEEE. (2015). 113 | 114 | [^Fernandes]: Fernandes, Kelwin, Jaime S Cardoso, and Jessica Fernandes. "Transfer learning with partial observability applied to cervical cancer screening." In Iberian Conference on Pattern Recognition and Image Analysis, 243–50. Springer. (2017). 115 | -------------------------------------------------------------------------------- /manuscript/04.1-interpretable-models.Rmd: -------------------------------------------------------------------------------- 1 | # Interpretable Models {#simple} 2 | 3 | The easiest way to achieve interpretability is to use only a subset of algorithms that create interpretable models. 4 | Linear regression, logistic regression and the decision tree are commonly used interpretable models. 5 | 6 | In the following chapters we will talk about these models. 7 | Not in detail, only the basics, because there is already a ton of books, videos, tutorials, papers and more material available. 8 | We will focus on how to interpret the models. 9 | The book discusses [linear regression](#limo), [logistic regression](#logistic), [other linear regression extensions](#extend-lm), [decision trees](#tree), [decision rules](#rules) and [the RuleFit algorithm](#rulefit) in more detail. 10 | It also lists [other interpretable models](#other-interpretable). 11 | 12 | All interpretable models explained in this book are interpretable on a modular level, with the exception of the k-nearest neighbors method. 13 | The following table gives an overview of the interpretable model types and their properties. 14 | A model is linear if the association between features and target is modelled linearly. 15 | A model with monotonicity constraints ensures that the relationship between a feature and the target outcome always goes in the same direction over the entire range of the feature: 16 | An increase in the feature value either always leads to an increase or always to a decrease in the target outcome. 17 | Monotonicity is useful for the interpretation of a model because it makes it easier to understand a relationship. 18 | Some models can automatically include interactions between features to predict the target outcome. 19 | You can include interactions in any type of model by manually creating interaction features. 20 | Interactions can improve predictive performance, but too many or too complex interactions can hurt interpretability. 21 | Some models handle only regression, some only classification, and still others both. 22 | 23 | From this table, you can select a suitable interpretable model for your task, either regression (regr) or classification (class): 24 | 25 | | Algorithm |Linear |Monotone|Interaction|Task| 26 | |:--------------|:----|:----|:------|:--------| 27 | | Linear regression | Yes | Yes | No | regr | 28 | | Logistic regression | No | Yes | No | class| 29 | | Decision trees | No | Some | Yes | class,regr| 30 | | RuleFit| Yes | No | Yes| class,regr | 31 | | Naive Bayes | No | Yes | No | class | 32 | | k-nearest neighbors | No | No | No | class,regr| 33 | 34 | -------------------------------------------------------------------------------- /manuscript/04.8-interpretable-other.Rmd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ## Other Interpretable Models {#other-interpretable} 5 | 6 | The list of interpretable models is constantly growing and of unknown size. 7 | It includes simple models such as linear models, decision trees and naive Bayes, but also more complex ones that combine or modify non-interpretable machine learning models to make them more interpretable. 8 | Especially publications on the latter type of models are currently being produced at high frequency and it is hard to keep up with developments. 9 | The book teases only the Naive Bayes classifier and k-nearest neighbors in this chapter. 10 | 11 | ### Naive Bayes Classifier 12 | 13 | The Naive Bayes classifier uses the Bayes' theorem of conditional probabilities. 14 | For each feature, it calculates the probability for a class depending on the value of the feature. 15 | The Naive Bayes classifier calculates the class probabilities for each feature independently, which is equivalent to a strong (= naive) assumption of independence of the features. 16 | Naive Bayes is a conditional probability model and models the probability of a class $C_k$ as follows: 17 | 18 | $$P(C_k|x)=\frac{1}{Z}P(C_k)\prod_{i=1}^n{}P(x_i|C_k)$$ 19 | 20 | The term Z is a scaling parameter that ensures that the sum of probabilities for all classes is 1 (otherwise they would not be probabilities). 21 | The conditional probability of a class is the class probability times the probability of each feature given the class, normalized by Z. 22 | This formula can be derived by using the Bayes' theorem. 23 | 24 | Naive Bayes is an interpretable model because of the independence assumption. 25 | It can be interpreted on the modular level. 26 | It is very clear for each feature how much it contributes towards a certain class prediction, since we can interpret the conditional probability. 27 | 28 | ### K-Nearest Neighbors 29 | 30 | The k-nearest neighbor method can be used for regression and classification and uses the nearest neighbors of a data point for prediction. 31 | For classification, the k-nearest neighbor method assigns the most common class of the nearest neighbors of an instance. 32 | For regression, it takes the average of the outcome of the neighbors. 33 | The tricky parts are finding the right k and deciding how to measure the distance between instances, which ultimately defines the neighborhood. 34 | 35 | 36 | The k-nearest neighbor model differs from the other interpretable models presented in this book because it is an instance-based learning algorithm. 37 | How can k-nearest neighbors be interpreted? 38 | First of all, there are no parameters to learn, so there is no interpretability on a modular level. 39 | Furthermore, there is a lack of global model interpretability because the model is inherently local and there are no global weights or structures explicitly learned. 40 | Maybe it is interpretable at the local level? 41 | To explain a prediction, you can always retrieve the k neighbors that were used for the prediction. 42 | Whether the model is interpretable depends solely on the question whether you can 'interpret' a single instance in the dataset. 43 | If an instance consists of hundreds or thousands of features, then it is not interpretable, I would argue. 44 | But if you have few features or a way to reduce your instance to the most important features, presenting the k-nearest neighbors can give you good explanations. 45 | -------------------------------------------------------------------------------- /manuscript/05.1-agnostic.Rmd: -------------------------------------------------------------------------------- 1 | ```{r, message = FALSE, warning = FALSE, echo = FALSE} 2 | devtools::load_all() 3 | ``` 4 | 5 | # Model-Agnostic Methods {#agnostic} 6 | 7 | Separating the explanations from the machine learning model (= model-agnostic interpretation methods) has some advantages (Ribeiro, Singh, and Guestrin 2016[^Ribeiro2016]). 8 | The great advantage of model-agnostic interpretation methods over model-specific ones is their flexibility. 9 | Machine learning developers are free to use any machine learning model they like when the interpretation methods can be applied to any model. 10 | Anything that builds on an interpretation of a machine learning model, such as a graphic or user interface, also becomes independent of the underlying machine learning model. 11 | Typically, not just one, but many types of machine learning models are evaluated to solve a task, and when comparing models in terms of interpretability, it is easier to work with model-agnostic explanations, because the same method can be used for any type of model. 12 | 13 | An alternative to model-agnostic interpretation methods is to use only [interpretable models](#simple), which often has the big disadvantage that predictive performance is lost compared to other machine learning models and you limit yourself to one type of model. 14 | The other alternative is to use model-specific interpretation methods. 15 | The disadvantage of this is that it also binds you to one model type and it will be difficult to switch to something else. 16 | 17 | Desirable aspects of a model-agnostic explanation system are (Ribeiro, Singh, and Guestrin 2016): 18 | 19 | - **Model flexibility:** 20 | The interpretation method can work with any machine learning model, such as random forests and deep neural networks. 21 | - **Explanation flexibility:** 22 | You are not limited to a certain form of explanation. 23 | In some cases it might be useful to have a linear formula, in other cases a graphic with feature importances. 24 | - **Representation flexibility:** 25 | The explanation system should be able to use a different feature representation as the model being explained. 26 | For a text classifier that uses abstract word embedding vectors, it might be preferable to use the presence of individual words for the explanation. 27 | 28 | 29 | **The bigger picture** 30 | 31 | Let us take a high level look at model-agnostic interpretability. 32 | We capture the world by collecting data, and abstract it further by learning to predict the data (for the task) with a machine learning model. 33 | Interpretability is just another layer on top that helps humans understand. 34 | 35 | ```{r bigpicture, fig.cap="The big picture of explainable machine learning. The real world goes through many layers before it reaches the human in the form of explanations.", out.width=700} 36 | knitr::include_graphics("images/big-picture.png") 37 | ``` 38 | 39 | The lowest layer is the **World**. 40 | This could literally be nature itself, like the biology of the human body and how it reacts to medication, but also more abstract things like the real estate market. 41 | The World layer contains everything that can be observed and is of interest. 42 | Ultimately, we want to learn something about the World and interact with it. 43 | 44 | The second layer is the **Data** layer. 45 | We have to digitize the World in order to make it processable for computers and also to store information. 46 | The Data layer contains anything from images, texts, tabular data and so on. 47 | 48 | By fitting machine learning models based on the Data layer, we get the **Black Box Model** layer. 49 | Machine learning algorithms learn with data from the real world to make predictions or find structures. 50 | 51 | Above the Black Box Model layer is the **Interpretability Methods** layer, which helps us deal with the opacity of machine learning models. 52 | What were the most important features for a particular diagnosis? 53 | Why was a financial transaction classified as fraud? 54 | 55 | The last layer is occupied by a **Human**. 56 | Look! This one waves to you because you are reading this book and helping to provide better explanations for black box models! 57 | Humans are ultimately the consumers of the explanations. 58 | 59 | This multi-layered abstraction also helps to understand the differences in approaches between statisticians and machine learning practitioners. 60 | Statisticians deal with the Data layer, such as planning clinical trials or designing surveys. 61 | They skip the Black Box Model layer and go right to the Interpretability Methods layer. 62 | Machine learning specialists also deal with the Data layer, such as collecting labeled samples of skin cancer images or crawling Wikipedia. 63 | Then they train a black box machine learning model. 64 | The Interpretability Methods layer is skipped and humans directly deal with the black box model predictions. 65 | It's great that interpretable machine learning fuses the work of statisticians and machine learning specialists. 66 | 67 | Of course this graphic does not capture everything: 68 | Data could come from simulations. 69 | Black box models also output predictions that might not even reach humans, but only supply other machines, and so on. 70 | But overall it is a useful abstraction to understand how interpretability becomes this new layer on top of machine learning models. 71 | 72 | 73 | [^Ribeiro2016]: Ribeiro, Marco Tulio, Sameer Singh, and Carlos Guestrin. "Model-agnostic interpretability of machine learning." ICML Workshop on Human Interpretability in Machine Learning. (2016). 74 | -------------------------------------------------------------------------------- /manuscript/05.3-agnostic-ice.Rmd: -------------------------------------------------------------------------------- 1 | ```{r, message = FALSE, warning = FALSE, echo = FALSE} 2 | devtools::load_all() 3 | set.seed(42) 4 | ``` 5 | 6 | 7 | 8 | ## Individual Conditional Expectation (ICE) {#ice} 9 | 10 | Individual Conditional Expectation (ICE) plots display one line per instance that shows how the instance's prediction changes when a feature changes. 11 | 12 | The partial dependence plot for the average effect of a feature is a global method because it does not focus on specific instances, but on an overall average. 13 | The equivalent to a PDP for individual data instances is called individual conditional expectation (ICE) plot (Goldstein et al. 2017[^Goldstein2017]). 14 | An ICE plot visualizes the dependence of the prediction on a feature for *each* instance separately, resulting in one line per instance, compared to one line overall in partial dependence plots. 15 | A PDP is the average of the lines of an ICE plot. 16 | The values for a line (and one instance) can be computed by keeping all other features the same, creating variants of this instance by replacing the feature's value with values from a grid and making predictions with the black box model for these newly created instances. 17 | The result is a set of points for an instance with the feature value from the grid and the respective predictions. 18 | 19 | What is the point of looking at individual expectations instead of partial dependencies? 20 | Partial dependence plots can obscure a heterogeneous relationship created by interactions. 21 | PDPs can show you what the average relationship between a feature and the prediction looks like. 22 | This only works well if the interactions between the features for which the PDP is calculated and the other features are weak. 23 | In case of interactions, the ICE plot will provide much more insight. 24 | 25 | A more formal definition: 26 | In ICE plots, for each instance in $\{(x_{S}^{(i)},x_{C}^{(i)})\}_{i=1}^N$ the curve $\hat{f}_S^{(i)}$ is plotted against $x^{(i)}_{S}$, while $x^{(i)}_{C}$ remains fixed. 27 | 28 | ### Examples 29 | 30 | Let's go back to the [cervical cancer dataset](#cervical) and see how the prediction of each instance is associated with the feature "Age". 31 | We will analyze a random forest that predicts the probability of cancer for a woman given risk factors. 32 | In the [partial dependence plot](#pdp) we have seen that the cancer probability increases around the age of 50, but is this true for every woman in the dataset? 33 | The ICE plot reveals that for most women the age effect follows the average pattern of an increase at age 50, but there are some exceptions: 34 | For the few women that have a high predicted probability at a young age, the predicted cancer probability does not change much with age. 35 | 36 | ```{r ice-cervical, fig.cap="ICE plot of cervical cancer probability by age. Each line represents one woman. For most women there is an increase in predicted cancer probability with increasing age. For some women with a predicted cancer probability above 0.4, the prediction does not change much at higher age."} 37 | library("mlr") 38 | library("ggplot2") 39 | data(cervical) 40 | set.seed(43) 41 | cervical_subset_index = sample(1:nrow(cervical), size = 300) 42 | cervical_subset = cervical[cervical_subset_index, ] 43 | cervical.task = makeClassifTask(data = cervical, target = "Biopsy") 44 | mod = mlr::train(mlr::makeLearner(cl = 'classif.randomForest', id = 'cervical-rf', predict.type = 'prob'), cervical.task) 45 | pred.cervical = Predictor$new(mod, cervical_subset, class = "Cancer") 46 | ice = FeatureEffect$new(pred.cervical, "Age", method = "ice")$plot() + 47 | scale_color_discrete(guide='none') + 48 | scale_y_continuous('Predicted cancer probability') 49 | ice 50 | ``` 51 | 52 | The next figure shows ICE plots for the [bike rental prediction](#bike-data). 53 | The underlying prediction model is a random forest. 54 | 55 | ```{r ice-bike, fig.cap='ICE plots of predicted bicycle rentals by weather conditions. The same effects can be observed as in the partial dependence plots.'} 56 | set.seed(42) 57 | data("bike") 58 | bike.subset.index = sample(1:nrow(bike), size = 300) 59 | bike.subset = bike[bike.subset.index,] 60 | bike.task = makeRegrTask(data = bike, target = "cnt") 61 | mod.bike = mlr::train(mlr::makeLearner(cl = 'regr.randomForest', id = 'bike-rf'), bike.task) 62 | pred.bike = Predictor$new(mod.bike, bike.subset) 63 | 64 | p1 = FeatureEffect$new(pred.bike, "temp", method = "ice")$plot() + scale_x_continuous("Temperature") + 65 | scale_y_continuous("Predicted bicycle rentals") 66 | p2 = FeatureEffect$new(pred.bike, "hum", method = "ice")$plot() + scale_x_continuous("Humidity") + scale_y_continuous("") 67 | p3 = FeatureEffect$new(pred.bike, "windspeed", method = "ice")$plot() + scale_x_continuous("Windspeed")+ scale_y_continuous("") 68 | gridExtra::grid.arrange(p1, p2, p3, ncol = 3) 69 | ``` 70 | 71 | All curves seem to follow the same course, so there are no obvious interactions. 72 | That means that the PDP is already a good summary of the relationships between the displayed features and the predicted number of bicycles 73 | 74 | #### Centered ICE Plot 75 | 76 | There is a problem with ICE plots: 77 | Sometimes it can be hard to tell whether the ICE curves differ between individuals because they start at different predictions. 78 | A simple solution is to center the curves at a certain point in the feature and display only the difference in the prediction to this point. 79 | The resulting plot is called centered ICE plot (c-ICE). 80 | Anchoring the curves at the lower end of the feature is a good choice. 81 | The new curves are defined as: 82 | 83 | $$\hat{f}_{cent}^{(i)}=\hat{f}^{(i)}-\mathbf{1}\hat{f}(x^{a},x^{(i)}_{C})$$ 84 | 85 | where $\mathbf{1}$ is a vector of 1's with the appropriate number of dimensions (usually one or two), $\hat{f}$ is the fitted model and x^a^ is the anchor point. 86 | 87 | #### Example 88 | 89 | For example, take the cervical cancer ICE plot for age and center the lines on the youngest observed age: 90 | 91 | ```{r ice-cervical-centered, fig.cap=sprintf("Centered ICE plot for predicted cancer probability by age. Lines are fixed to 0 at age %i. Compared to age %i, the predictions for most women remain unchanged until the age of 45 where the predicted probability increases.", min(cervical_subset$Age), min(cervical_subset$Age))} 92 | library("iml") 93 | predictor = Predictor$new(mod, data = cervical_subset, class = "Cancer") 94 | ice = FeatureEffect$new(predictor, feature = "Age", center.at = min(cervical_subset$Age), method = "pdp+ice") 95 | ice$plot() + scale_color_discrete(guide='none') + 96 | scale_y_continuous('Cancer probability difference to age 13') 97 | ``` 98 | 99 | The centered ICE plots make it easier to compare the curves of individual instances. 100 | This can be useful if we do not want to see the absolute change of a predicted value, but the difference in the prediction compared to a fixed point of the feature range. 101 | 102 | Let's have a look at centered ICE plots for the bicycle rental prediction: 103 | 104 | ```{r ice-bike-centered, fig.cap='Centered ICE plots of predicted number of bikes by weather condition. The lines show the difference in prediction compared to the prediction with the respective feature value at its observed minimum.'} 105 | data(bike) 106 | set.seed(43) 107 | bike.subset.index = sample(1:nrow(bike), size = 100) 108 | bike.subset = bike[bike.subset.index,] 109 | 110 | predictor = Predictor$new(mod.bike, data = bike.subset) 111 | ice1 = FeatureEffect$new(predictor, feature = "temp", center.at = min(bike$temp), method = "pdp+ice")$plot() 112 | ice2 = FeatureEffect$new(predictor, feature = "hum", center.at = min(bike$hum), method = "pdp+ice")$plot() 113 | ice3 = FeatureEffect$new(predictor, feature = "windspeed", center.at = min(bike$windspeed), method = "pdp+ice")$plot() 114 | gridExtra::grid.arrange(ice1, ice2, ice3, nrow = 1) 115 | ``` 116 | 117 | #### Derivative ICE Plot 118 | 119 | Another way to make it visually easier to spot heterogeneity is to look at the individual derivatives of the prediction function with respect to a feature. 120 | The resulting plot is called the derivative ICE plot (d-ICE). 121 | The derivatives of a function (or curve) tell you whether changes occur and in which direction they occur. 122 | With the derivative ICE plot, it is easy to spot ranges of feature values where the black box predictions change for (at least some) instances. 123 | If there is no interaction between the analyzed feature $x_S$ and the other features $x_C$, then the prediction function can be expressed as: 124 | 125 | $$\hat{f}(x)=\hat{f}(x_S,x_C)=g(x_S)+h(x_C),\quad\text{with}\quad\frac{\delta\hat{f}(x)}{\delta{}x_S}=g'(x_S)$$ 126 | 127 | Without interactions, the individual partial derivatives should be the same for all instances. 128 | If they differ, it is due to interactions and it becomes visible in the d-ICE plot. 129 | In addition to displaying the individual curves for the derivative of the prediction function with respect to the feature in S, showing the standard deviation of the derivative helps to highlight regions in feature in S with heterogeneity in the estimated derivatives. 130 | The derivative ICE plot takes a long time to compute and is rather impractical. 131 | 132 | 133 | ### Advantages 134 | 135 | Individual conditional expectation curves are **even more intuitive to understand** than partial dependence plots. 136 | One line represents the predictions for one instance if we vary the feature of interest. 137 | 138 | Unlike partial dependence plots, ICE curves can **uncover heterogeneous relationships**. 139 | 140 | ### Disadvantages 141 | 142 | ICE curves **can only display one feature** meaningfully, because two features would require the drawing of several overlaying surfaces and you would not see anything in the plot. 143 | 144 | ICE curves suffer from the same problem as PDPs: 145 | If the feature of interest is correlated with the other features, then **some points in the lines might be invalid data points** according to the joint feature distribution. 146 | 147 | If many ICE curves are drawn, the **plot can become overcrowded** and you will not see anything. 148 | The solution: Either add some transparency to the lines or draw only a sample of the lines. 149 | 150 | In ICE plots it might not be easy to **see the average**. 151 | This has a simple solution: 152 | Combine individual conditional expectation curves with the partial dependence plot. 153 | 154 | ### Software and Alternatives 155 | 156 | ICE plots are implemented in the R packages `iml` (used for these examples), `ICEbox`[^ICEbox], and `pdp`. 157 | Another R package that does something very similar to ICE is `condvis`. 158 | 159 | 160 | 161 | [^ICEbox]: Goldstein, Alex, et al. "Package ‘ICEbox’." (2017). 162 | 163 | [^Goldstein2017]: Goldstein, Alex, et al. "Peeking inside the black box: Visualizing statistical learning with plots of individual conditional expectation." Journal of Computational and Graphical Statistics 24.1 (2015): 44-65. 164 | 165 | -------------------------------------------------------------------------------- /manuscript/05.7-agnostic-global-surrogate.Rmd: -------------------------------------------------------------------------------- 1 | ```{r, message = FALSE, warning = FALSE, echo = FALSE} 2 | devtools::load_all() 3 | set.seed(42) 4 | ``` 5 | 6 | 7 | 8 | ## Global Surrogate {#global} 9 | 10 | A global surrogate model is an interpretable model that is trained to approximate the predictions of a black box model. 11 | We can draw conclusions about the black box model by interpreting the surrogate model. 12 | Solving machine learning interpretability by using more machine learning! 13 | 14 | 15 | ### Theory 16 | 17 | Surrogate models are also used in engineering: 18 | If an outcome of interest is expensive, time-consuming or otherwise difficult to measure (e.g. because it comes from a complex computer simulation), a cheap and fast surrogate model of the outcome can be used instead. 19 | The difference between the surrogate models used in engineering and in interpretable machine learning is that the underlying model is a machine learning model (not a simulation) and that the surrogate model must be interpretable. 20 | The purpose of (interpretable) surrogate models is to approximate the predictions of the underlying model as accurately as possible and to be interpretable at the same time. 21 | The idea of surrogate models can be found under different names: 22 | Approximation model, metamodel, response surface model, emulator, ... 23 | 24 | About the theory: 25 | There is actually not much theory needed to understand surrogate models. 26 | We want to approximate our black box prediction function f as closely as possible with the surrogate model prediction function g, under the constraint that g is interpretable. 27 | For the function g any interpretable model -- for example from the [interpretable models chapter](#simple) -- can be used. 28 | 29 | For example a linear model: 30 | 31 | $$g(x)=\beta_0+\beta_1{}x_1{}+\ldots+\beta_p{}x_p$$ 32 | 33 | Or a decision tree: 34 | 35 | $$g(x)=\sum_{m=1}^Mc_m{}I\{x\in{}R_m\}$$ 36 | 37 | Training a surrogate model is a model-agnostic method, since it does not require any information about the inner workings of the black box model, only access to data and the prediction function is necessary. 38 | If the underlying machine learning model was replaced with another, you could still use the surrogate method. 39 | The choice of the black box model type and of the surrogate model type is decoupled. 40 | 41 | Perform the following steps to obtain a surrogate model: 42 | 43 | 1. Select a dataset X. 44 | This can be the same dataset that was used for training the black box model or a new dataset from the same distribution. 45 | You could even select a subset of the data or a grid of points, depending on your application. 46 | 1. For the selected dataset X, get the predictions of the black box model. 47 | 1. Select an interpretable model type (linear model, decision tree, ...). 48 | 1. Train the interpretable model on the dataset X and its predictions. 49 | 1. Congratulations! You now have a surrogate model. 50 | 1. Measure how well the surrogate model replicates the predictions of the black box model. 51 | 1. Interpret the surrogate model. 52 | 53 | You may find approaches for surrogate models that have some extra steps or differ a little, but the general idea is usually as described here. 54 | 55 | One way to measure how well the surrogate replicates the black box model is the R-squared measure: 56 | 57 | $$R^2=1-\frac{SSE}{SST}=1-\frac{\sum_{i=1}^n(\hat{y}_*^{(i)}-\hat{y}^{(i)})^2}{\sum_{i=1}^n(\hat{y}^{(i)}-\bar{\hat{y}})^2}$$ 58 | 59 | where $\hat{y}_*^{(i)}$ is the prediction for the i-th instance of the surrogate model, $\hat{y}^{(i)}$ the prediction of the black box model and $\bar{\hat{y}}$ the mean of the black box model predictions. 60 | SSE stands for sum of squares error and SST for sum of squares total. 61 | The R-squared measure can be interpreted as the percentage of variance that is captured by the surrogate model. 62 | If R-squared is close to 1 (= low SSE), then the interpretable model approximates the behavior of the black box model very well. 63 | If the interpretable model is very close, you might want to replace the complex model with the interpretable model. 64 | If the R-squared is close to 0 (= high SSE), then the interpretable model fails to explain the black box model. 65 | 66 | Note that we have not talked about the model performance of the underlying black box model, i.e. how good or bad it performs in predicting the actual outcome. 67 | The performance of the black box model does not play a role in training the surrogate model. 68 | The interpretation of the surrogate model is still valid because it makes statements about the model and not about the real world. 69 | But of course the interpretation of the surrogate model becomes irrelevant if the black box model is bad, because then the black box model itself is irrelevant. 70 | 71 | 72 | We could also build a surrogate model based on a subset of the original data or reweight the instances. 73 | In this way, we change the distribution of the surrogate model's input, which changes the focus of the interpretation (then it is no longer really global). 74 | If we weight the data locally by a specific instance of the data (the closer the instances to the selected instance, the higher their weight), we get a local surrogate model that can explain the individual prediction of the instance. 75 | Read more about local models in the [following chapter](#lime). 76 | 77 | ### Example 78 | 79 | To demonstrate the surrogate models, we consider a regression and a classification example. 80 | 81 | First, we train a support vector machine to predict the [daily number of rented bikes](#bike-data) given weather and calendar information. 82 | The support vector machine is not very interpretable, so we train a surrogate with a CART decision tree as interpretable model to approximate the behavior of the support vector machine. 83 | 84 | ```{r surrogate-bike, message = FALSE, warning = FALSE, echo = FALSE, fig.cap = "The terminal nodes of a surrogate tree that approximates the predictions of a support vector machine trained on the bike rental dataset. The distributions in the nodes show that the surrogate tree predicts a higher number of rented bikes when temperature is above 13 degrees Celsius and when the day was later in the 2 year period (cut point at 435 days)."} 85 | library("iml") 86 | data(bike) 87 | bike.task = makeRegrTask(data = bike, target = "cnt") 88 | mod.bike = mlr::train(mlr::makeLearner(cl = 'regr.svm'), bike.task) 89 | 90 | pred.bike = Predictor$new(mod.bike, data = bike[, names(bike) != "cnt"]) 91 | tree = TreeSurrogate$new(pred.bike) 92 | plot(tree) 93 | 94 | 95 | pred.tree = predict(tree, bike) 96 | pred.svm = getPredictionResponse(predict(mod.bike, bike.task)) 97 | ``` 98 | 99 | The surrogate model has a R-squared (variance explained) of `r round(tree$r.squared, 2)` which means it approximates the underlying black box behavior quite well, but not perfectly. 100 | If the fit were perfect, we could throw away the support vector machine and use the tree instead. 101 | 102 | In our second example, we predict the probability of [cervical cancer](#cervical) with a random forest. 103 | Again we train a decision tree with the original dataset, but with the prediction of the random forest as outcome, instead of the real classes (healthy vs. cancer) from the data. 104 | 105 | ```{r surrogate-cervical, message = FALSE, warning = FALSE, echo = FALSE, fig.cap = "The terminal nodes of a surrogate tree that approximates the predictions of a random forest trained on the cervical cancer dataset. The counts in the nodes show the frequency of the black box models classifications in the nodes."} 106 | data(cervical) 107 | cervical.task = makeClassifTask(data = cervical, target = "Biopsy") 108 | mod.cervical = mlr::train(mlr::makeLearner(cl = 'classif.randomForest', predict.type = "prob"), cervical.task) 109 | 110 | pred.cervical = Predictor$new(mod.cervical, data = cervical[names(cervical) != "Biopsy"], type = "prob") 111 | tree.cervical = TreeSurrogate$new(pred.cervical, maxdepth = 2) 112 | plot(tree.cervical) + 113 | theme(strip.text.x = element_text(size = 8)) 114 | pred.tree.cervical = predict(tree.cervical, cervical)["Cancer"] 115 | pred.cervical = getPredictionProbabilities(predict(mod.cervical, cervical.task)) 116 | ``` 117 | 118 | The surrogate model has an R-squared (variance explained) of `r round(tree.cervical$r.squared[1], 2)`, which means it does not approximate the random forest well and we should not overinterpret the tree when drawing conclusions about the complex model. 119 | 120 | ### Advantages 121 | 122 | The surrogate model method is **flexible**: 123 | Any model from the [interpretable models chapter](#simple) can be used. 124 | This also means that you can exchange not only the interpretable model, but also the underlying black box model. 125 | Suppose you create some complex model and explain it to different teams in your company. 126 | One team is familiar with linear models, the other team can understand decision trees. 127 | You can train two surrogate models (linear model and decision tree) for the original black box model and offer two kinds of explanations. 128 | If you find a better performing black box model, you do not have to change your method of interpretation, because you can use the same class of surrogate models. 129 | 130 | I would argue that the approach is very **intuitive** and straightforward. 131 | This means it is easy to implement, but also easy to explain to people not familiar with data science or machine learning. 132 | 133 | With the **R-squared measure**, we can easily measure how good our surrogate models are in approximating the black box predictions. 134 | 135 | ### Disadvantages 136 | 137 | You have to be aware that you draw **conclusions about the model and not about the data**, since the surrogate model never sees the real outcome. 138 | 139 | It is not clear what the best **cut-off for R-squared** is in order to be confident that the surrogate model is close enough to the black box model. 140 | 80% of variance explained? 50%? 99%? 141 | 142 | We can measure how close the surrogate model is to the black box model. 143 | Let us assume we are not very close, but close enough. 144 | It could happen that the interpretable model is very **close for one subset of the dataset, but widely divergent for another subset**. 145 | In this case the interpretation for the simple model would not be equally good for all data points. 146 | 147 | The interpretable model you choose as a surrogate **comes with all its advantages and disadvantages**. 148 | 149 | Some people argue that there are, in general, **no intrinsically interpretable models** (including even linear models and decision trees) and that it would even be dangerous to have an illusion of interpretability. 150 | If you share this opinion, then of course this method is not for you. 151 | 152 | ### Software 153 | 154 | I used the `iml` R package for the examples. 155 | If you can train a machine learning model, then you should be able to implement surrogate models yourself. 156 | Simply train an interpretable model to predict the predictions of the black box model. 157 | 158 | -------------------------------------------------------------------------------- /manuscript/06.0-example.Rmd: -------------------------------------------------------------------------------- 1 | # Example-Based Explanations {#example-based} 2 | 3 | Example-based explanation methods select particular instances of the dataset to explain the behavior of machine learning models or to explain the underlying data distribution. 4 | 5 | 6 | 7 | Example-based explanations are mostly model-agnostic, because they make any machine learning model more interpretable. 8 | The difference to model-agnostic methods is that the example-based methods explain a model by selecting instances of the dataset and not by creating summaries of features (such as [feature importance](#feature-importance) or [partial dependence](#pdp)). 9 | Example-based explanations only make sense if we can represent an instance of the data in a humanly understandable way. 10 | This works well for images, because we can view them directly. 11 | In general, example-based methods work well if the feature values of an instance carry more context, meaning the data has a structure, like images or texts do. 12 | It is more challenging to represent tabular data in a meaningful way, because an instance can consist of hundreds or thousands of (less structured) features. 13 | Listing all feature values to describe an instance is usually not useful. 14 | It works well if there are only a handful of features or if we have a way to summarize an instance. 15 | 16 | 17 | Example-based explanations help humans construct mental models of the machine learning model and the data the machine learning model has been trained on. 18 | It especially helps to understand complex data distributions. 19 | But what do I mean by example-based explanations? 20 | We often use them in our jobs and daily lives. 21 | Let us start with some examples[^cbr]. 22 | 23 | A physician sees a patient with an unusual cough and a mild fever. 24 | The patient's symptoms remind her of another patient she had years ago with similar symptoms. 25 | She suspects that her current patient could have the same disease and she takes a blood sample to test for this specific disease. 26 | 27 | A data scientist works on a new project for one of his clients: 28 | Analysis of the risk factors that lead to the failure of production machines for keyboards. 29 | The data scientist remembers a similar project he worked on and reuses parts of the code from the old project because he thinks the client wants the same analysis. 30 | 31 | A kitten sits on the window ledge of a burning and uninhabited house. 32 | The fire department has already arrived and one of the firefighters ponders for a second whether he can risk going into the building to save the kitten. 33 | He remembers similar cases in his life as a firefighter: 34 | Old wooden houses that have been burning slowly for some time were often unstable and eventually collapsed. 35 | Because of the similarity of this case, he decides not to enter, because the risk of the house collapsing is too great. 36 | Fortunately, the kitty jumps out of the window, lands safely and nobody is harmed in the fire. Happy end. 37 | 38 | These stories illustrate how we humans think in examples or analogies. 39 | The blueprint of example-based explanations is: 40 | Thing B is similar to thing A and A caused Y, so I predict that B will cause Y as well. 41 | Implicitly, some machine learning approaches work example-based. 42 | [Decision trees](#tree) partition the data into nodes based on the similarities of the data points in the features that are important for predicting the target. 43 | A decision tree gets the prediction for a new data instance by finding the instances that are similar (= in the same terminal node) and returning the average of the outcomes of those instances as the prediction. 44 | The k-nearest neighbors (knn) method works explicitly with example-based predictions. 45 | For a new instance, a knn model locates the k-nearest neighbors (e.g. the k=3 closest instances) and returns the average of the outcomes of those neighbors as a prediction. 46 | The prediction of a knn can be explained by returning the k neighbors, which -- again -- is only meaningful if we have a good way to represent a single instance. 47 | 48 | The chapters in this part cover the following example-based interpretation methods: 49 | 50 | - [Counterfactual explanations](#counterfactual) tell us how an instance has to change to significantly change its prediction. 51 | By creating counterfactual instances, we learn about how the model makes its predictions and can explain individual predictions. 52 | - [Adversarial examples](#adversarial) are counterfactuals used to fool machine learning models. 53 | The emphasis is on flipping the prediction and not explaining it. 54 | - [Prototypes](#proto) are a selection of representative instances from the data and criticisms are instances that are not well represented by those prototypes. [^critique] 55 | - [Influential instances](#influential) are the training data points that were the most influential for the parameters of a prediction model or the predictions themselves. 56 | Identifying and analysing influential instances helps to find problems with the data, debug the model and understand the model's behavior better. 57 | - [k-nearest neighbors model](#other-interpretable): An (interpretable) machine learning model based on examples. 58 | 59 | 60 | [^cbr]: Aamodt, Agnar, and Enric Plaza. "Case-based reasoning: Foundational issues, methodological variations, and system approaches." AI communications 7.1 (1994): 39-59. 61 | 62 | -------------------------------------------------------------------------------- /manuscript/06.4-example-based-archetypes.Rmd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ```{r archetypes, eval=FALSE} 5 | 6 | library("archetypes") 7 | devtools::load_all() 8 | data(cervical) 9 | 10 | mm = model.matrix(~ . - 1, data = cervical) 11 | 12 | cervical.x = cervical[setdiff(names(cervical), "Biopsy")] 13 | aa = archetypes(cervical.x, 5) 14 | 15 | 16 | simplexplot(aa) 17 | 18 | summary(aa) 19 | 20 | round(aa$archetypes, 1) 21 | 22 | 23 | cervical.task = makeClassifTask(data = cervical, target = "Biopsy") 24 | mod = mlr::train(mlr::makeLearner(cl = 'classif.randomForest', id = 'cervical-rf', predict.type = 'prob'), cervical.task) 25 | 26 | predict(mod, newdata = data.frame(aa$archetypes)) 27 | 28 | 29 | pred.cervical = Predictor$new(mod, data = cervical.x, class = "Cancer") 30 | pdp = Shapley$new(pred.cervical, data.frame(aa$archetypes)[1,]) 31 | pdp$plot() 32 | 33 | pdp = Shapley$new(pred.cervical, data.frame(aa$archetypes)[2,]) 34 | pdp$plot() 35 | 36 | 37 | pdp = Shapley$new(pred.cervical, round(data.frame(aa$archetypes)[3,])) 38 | pdp$plot() 39 | ``` 40 | 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /manuscript/07.0-neuralnet.Rmd: -------------------------------------------------------------------------------- 1 | ```{r, message = FALSE, warning = FALSE, echo = FALSE} 2 | devtools::load_all() 3 | ``` 4 | 5 | # Neural Network Interpretation {#neural-networks} 6 | 7 | `r if(is.html){only.in.html}` 8 | 9 | 10 | The following chapters focus on interpretation methods for neural networks. 11 | The methods visualize features and concepts learned by a neural network, explain individual predictions and simplify neural networks. 12 | 13 | Deep learning has been very successful, especially in tasks that involve images and texts such as image classification and language translation. 14 | The success story of deep neural networks began in 2012, when the ImageNet image classification challenge [^imagenet] was won by a deep learning approach. 15 | Since then, we have witnessed a Cambrian explosion of deep neural network architectures, with a trend towards deeper networks with more and more weight parameters. 16 | 17 | 18 | To make predictions with a neural network, the data input is passed through many layers of multiplication with the learned weights and through non-linear transformations. 19 | A single prediction can involve millions of mathematical operations depending on the architecture of the neural network. 20 | There is no chance that we humans can follow the exact mapping from data input to prediction. 21 | We would have to consider millions of weights that interact in a complex way to understand a prediction by a neural network. 22 | To interpret the behavior and predictions of neural networks, we need specific interpretation methods. 23 | The chapters assume that you are familiar with deep learning, including convolutional neural networks. 24 | 25 | 26 | We can certainly use [model-agnostic methods](#agnostic), such as [local models](#lime) or [partial dependence plots](#pdp), but there are two reasons why it makes sense to consider interpretation methods developed specifically for neural networks: 27 | First, neural networks learn features and concepts in their hidden layers and we need special tools to uncover them. 28 | Second, the gradient can be utilized to implement interpretation methods that are more computationally efficient than model-agnostic methods that look at the model "from the outside". 29 | Also most other methods in this book are intended for the interpretation of models for tabular data. 30 | Image and text data require different methods. 31 | 32 | The next chapters cover the following topics: 33 | 34 | - [Feature Visualization](#feature-visualization): What features has the neural network learned? 35 | [Adversarial Examples](#adversarial) from the [Example-Based Explanations chapter](#example-based) are closely related to feature visualization : How can we manipulate the inputs to get a wrong classification? 36 | - [Concepts](#neural-concepts) (IN PROGRESS): Which more abstract concepts has the neural network learned? 37 | - [Feature Attribution](#feature-attribution) (IN PROGRESS): How did each input contribute to a particular prediction? 38 | - [Modell Distillation](#neural-distillation) (IN PROGRESS): How can we explain a neural network with a simpler model? 39 | 40 | 41 | -------------------------------------------------------------------------------- /manuscript/07.2-concepts.Rmd: -------------------------------------------------------------------------------- 1 | ## Detecting Concepts 2 | 3 | `r if(is.html){only.in.html}` 4 | 5 | 6 | This chapter presents techniques for analyzing which concepts a neural network learned. 7 | Concept here means an abstract idea, which is pre-defined by a human. 8 | While [feature visualization](#feauture-visualization) tries to detect features from neural network units, which might match a concept (e.g. dog snouts) but doesn't have to, concept detection starts with a concept and analyzes how the neural network handles this concept. 9 | 10 | 11 | 12 | Feature visualization is more explorational: 13 | What does the neural network detect? 14 | But it does not help when we have more concret questions, like how important was the concept of dog snouts for the classification? 15 | 16 | 17 | 18 | We will look at two approaches: Network Dissection and Concept Activation Vectors. 19 | Both approaches require additional labeling of data, but in different ways. 20 | 21 | ### TCAV: Testing with concept activation vectors. 22 | 23 | But what about more implicit concepts? 24 | Concepts for which we have no prior labeled data? 25 | 26 | TCAV by Kim et al. (2019)[^tcav] explain a prediction by showing the importance of more high level concepts (e.g. texture, gender, color) for the prediction or classification. 27 | 28 | You have to learn the concepts from data. 29 | That means if you want to understand whether the network uses the concept of "female" for the classification of e.g. images, you have to provide some examples of "female" (could be images with women in it), and non-female (images without women in it). 30 | 31 | You send all those images through the network 32 | 33 | Good thing is that TCAV does not require to change the network you are using, but you can use the network that you already have. 34 | 35 | 36 | 37 | 38 | TCAV uses directional derivatives to quantify the importance of a concept for the classification or prediction. 39 | The concept is defined by the user and must be defined via some positive and negative data examples. 40 | For example for the image classification of a zebra, the concept might be stripes. 41 | The concept is defined byselecting images of stripes and some randomly sampled images without stripes. 42 | 43 | ```{r tcav, fig.cap="Figure from TCAV paper, Been Kim et. al (2018) ", out.width=800} 44 | knitr::include_graphics("images/tcav.png") 45 | ``` 46 | 47 | 48 | Code for TCAV: https://github.com/tensorflow/tcav 49 | 50 | TODO: CONTINUE DESCRIBING TCAV 51 | 52 | Good things about TCAV: 53 | The concepts are not required to be known at training time. 54 | Really any concept can be analyzed, as long as you find some positive and negative examples. 55 | 56 | 57 | For RNNs: https://medium.com/@plusepsilon/visualizations-of-recurrent-neural-networks-c18f07779d56 58 | https://distill.pub/2019/memorization-in-rnns/ 59 | http://lstm.seas.harvard.edu/ 60 | 61 | TODO: Checkout RNNVis and LSTMVis 62 | 63 | List of notebooks: 64 | https://github.com/tensorflow/lucid 65 | More a tool for getting a general, better understanding of cnns, but not for daily job. 66 | 67 | ### Word Embeddings 68 | 69 | **Word Embeddings** 70 | Word embeddings represent words as vectors which can be used to compute the similarity between words. 71 | As another way to visualize concepts that were learned are word embeddings. 72 | An embedding maps a discrete feature (e.g. a word) to a m-dimensional vector. 73 | A word embedding is the vector in some embedding space a word is mapped onto. 74 | The embedding space is learned by the neural network. 75 | The directions in that space often correlate to concepts. 76 | This means that words with similar vectors have some similarity, e.g. cat and dog. 77 | This also has the nice effect that we can do arithmetics in that space. 78 | e.g. 79 | 80 | $$embedding(king)-embedding(queen)=embedding(man)-embedding(woman)$$ 81 | 82 | The embeddings are high-dimensional vectors. 83 | For visualization, they are often mapped to 2 Dimensions (e.g. with tSNE) TODO: CITE 84 | 85 | What can you do with embeddings? 86 | You can visualize the concepts that were learned. 87 | Embedding let us analyze what the neural network learned. 88 | For example, did it learn some kind of bias? 89 | How do we get word embeddings? 90 | Other use cases include to use these embeddings as feature transformations before the e.g. text is used in a machine learning model. 91 | 92 | How are they created? 93 | It's a mapping from categorical features (e.g. words) to some vectors. 94 | They can be initialized with random weights and the embeddings are learned along with the thing you are trying to predict, e.g. with a recurrent neural network. 95 | An alternativ is to use a pre-trained embedding like word2vec, GloVe or fasttext. 96 | Those are trained over huge corpuses of text to predict words from their neighboring words. 97 | 98 | 99 | - concepts can transform when learning, e.g. dog into waterfall 100 | 101 | **Detecting Concepts During Training Time** 102 | 103 | Towards Robust Interpretability with Self-Explaining Neural Networks 104 | 105 | 106 | **Software** 107 | 108 | - CAffee and with GANS https://github.com/Evolving-AI-Lab/synthesizing 109 | 110 | 111 | ### Other approaches for concepts 112 | 113 | - Word embeddings https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf 114 | - 115 | 116 | [^TCAV]: Kim, Been, et al. "Interpretability beyond feature attribution: Quantitative testing with concept activation vectors (tcav)." arXiv preprint arXiv:1711.11279 (2017). 117 | 118 | [^dissect]: Bau, David, et al. "Network dissection: Quantifying interpretability of deep visual representations." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2017. 119 | -------------------------------------------------------------------------------- /manuscript/07.4-distillation.Rmd: -------------------------------------------------------------------------------- 1 | ### Rule Extraction and Model Extraction 2 | 3 | Some methods, which are all only tested on single hidden layer networks. 4 | - KT method: Extract for each neuron a rule 5 | - Extract decision tree (Hinton) 6 | - CRED (2001, Sato and Tsukimoto) 7 | - Trepan 8 | 9 | - Model tree distillation (for CNNs) 10 | 11 | DeepRed 12 | 13 | 14 | LIME 15 | 16 | Types of rule extraction: Decompositional, Pedagogical, Eclectic 17 | Pedagogical is simply model-agnostic. 18 | 19 | You can also look at different levels: 20 | Approximate the rules of a single neuron 21 | Approximate the whole network classification. 22 | 23 | TODO: Link overview paper. 24 | 25 | 26 | -------------------------------------------------------------------------------- /manuscript/09-contribute.Rmd: -------------------------------------------------------------------------------- 1 | # Contribute to the Book {#contribute} 2 | 3 | Thank you for reading my book about Interpretable Machine Learning. 4 | The book is under continuous development. 5 | It will be improved over time and more chapters will be added. 6 | Very similar to how software is developed. 7 | 8 | All text and code for the book is open source and [available at github.com](https://github.com/christophM/interpretable-ml-book). 9 | On the Github page you can suggest fixes and [open issues](https://github.com/christophM/interpretable-ml-book/issues) if you find a mistake or if something is missing. 10 | 11 | # Citing this Book {#cite} 12 | 13 | If you found this book useful for your blog post, research article or product, I would be grateful if you would cite this book. 14 | You can cite the book like this: 15 | 16 | 17 | Molnar, Christoph. "Interpretable machine learning. A Guide for Making Black Box Models Explainable", 2019. https://christophm.github.io/interpretable-ml-book/. 18 | 19 | Or use the following bibtex entry: 20 | 21 | 22 | @book{molnar2019, 23 | title = {Interpretable Machine Learning}, 24 | author = {Christoph Molnar}, 25 | note = {\url{https://christophm.github.io/interpretable-ml-book/}}, 26 | year = {2019}, 27 | subtitle = {A Guide for Making Black Box Models Explainable} 28 | } 29 | 30 | 31 | I am always curious about where and how interpretation methods are used in industry and research. 32 | If you use the book as a reference, it would be great if you wrote me a line and told me what for. 33 | This is of course optional and only serves to satisfy my own curiosity and to stimulate interesting exchanges. 34 | My mail is christoph.molnar.ai@gmail.com . 35 | -------------------------------------------------------------------------------- /manuscript/09b-translations.Rmd: -------------------------------------------------------------------------------- 1 | # Translations {#translations} 2 | 3 | **Interested in translating the book?** 4 | 5 | This book is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-nc-sa/4.0/). 6 | This means that you are allowed to translate it and put it online. 7 | You have to mention me as original author and you are not allowed to sell the book. 8 | 9 | If you are interested in translating the book, you can write a message and I can link your translation here. 10 | My address is christoph.molnar.ai@gmail.com . 11 | 12 | **List of translations** 13 | 14 | **Chinese**: 15 | 16 | - https://github.com/MingchaoZhu/InterpretableMLBook Complete translations by [Mingchao Zhu](https://github.com/MingchaoZhu). 17 | - https://blog.csdn.net/wizardforcel/article/details/98992150 Translation of most chapters, by CSDN, an online community of programmers. 18 | - https://zhuanlan.zhihu.com/p/63408696 Translation of some chapters by 知乎. The website also includes questions and answers from various users. 19 | 20 | 21 | **Korean**: 22 | 23 | - https://tootouch.github.io/IML/taxonomy_of_interpretability_methods/ Complete Korean translation by [TooTouch](https://tootouch.github.io/) 24 | 25 | - https://subinium.github.io/IML/ Partial Korean translation by [An Subin](https://subinium.github.io/) 26 | 27 | 28 | **Spanish** 29 | 30 | - https://fedefliguer.github.io/AAI/ First chapters translated by [Federico Fliguer](https://fedefliguer.github.io/) 31 | 32 | If you know of any other translation of the book or of individual chapters, I would be grateful to hear about it and list it here. 33 | You can reach me via email: christoph.molnar.ai@gmail.com . 34 | -------------------------------------------------------------------------------- /manuscript/10-acknowledgements.Rmd: -------------------------------------------------------------------------------- 1 | # Acknowledgements 2 | 3 | Writing this book was (and still is) a lot of fun. 4 | But it is also a lot of work and I am very happy about the support I received. 5 | 6 | My biggest thank-you goes to Katrin who had the hardest job in terms of hours and effort: 7 | she proofread the book from beginning to end and discovered many spelling mistakes and inconsistencies that I would never have found. 8 | I am very grateful for her support. 9 | 10 | A big thanks goes to Verena Haunschmid for writing the section about [LIME explanations for images](#images-lime). 11 | She works in data science and I recommend following her on Twitter: [\@ExpectAPatronum](https://twitter.com/ExpectAPatronum). 12 | I also want to thank all the [early readers who contributed corrections](https://github.com/christophM/interpretable-ml-book/graphs/contributors) on Github! 13 | 14 | Furthermore, I want to thank everyone who created illustrations: 15 | The cover was designed by my friend [\@YvonneDoinel](https://twitter.com/YvonneDoinel). 16 | The graphics in the [Shapley Value chapter](#shapley) were created by [Heidi Seibold](https://twitter.com/HeidiBaya), as well as the turtle example in the [adversarial examples chapter](#adversarial). 17 | Verena Haunschmid created the graphic in the [RuleFit chapter](#rulefit). 18 | 19 | 20 | In at least three aspects, the way I published this book is unconventional. 21 | First, it is available both as website and as ebook/pdf. 22 | The software I used to create this book is called `bookdown`, written by [Yihui Xie](https://yihui.name/), who created many R packages that make it easy to combine R code and text. 23 | Thanks a lot! 24 | Secondly, I self-publish the book on the platform [Leanpub](https://leanpub.com/), instead of working with a traditional publisher. 25 | And third, I published the book as in-progress book, which has helped me enormously to get feedback and to monetize it along the way. 26 | Many thanks to leanpub for making this possible and handling the royalties fairly. 27 | I would also like to thank you, dear reader, for reading this book without a big publisher name behind it. 28 | 29 | I am grateful for the funding of my research on interpretable machine learning by the Bavarian State Ministry of Science and the Arts in the framework of the Centre Digitisation.Bavaria (ZD.B). 30 | -------------------------------------------------------------------------------- /manuscript/Book.txt: -------------------------------------------------------------------------------- 1 | 00.0-preface.md 2 | 01-introduction.md 3 | 01.2-short-stories.md 4 | 01.3-ml-definitions.md 5 | 02-interpretability.md 6 | 03-datasets.md 7 | 04.1-interpretable-models.md 8 | 04.2-interpretable-linear.md 9 | 04.3-interpretable-logistic.md 10 | 04.4-interpretable-lm-extensions.md 11 | 04.5-interpretable-tree.md 12 | 04.6-interpretable-rules.md 13 | 04.7-interpretable-rulefit.md 14 | 04.8-interpretable-other.md 15 | 05.1-agnostic.md 16 | 05.2-agnostic-pdp.md 17 | 05.3-agnostic-ice.md 18 | 05.4-agnostic-ale.md 19 | 05.5-agnostic-interaction.md 20 | 05.6-agnostic-permfeatimp.md 21 | 05.7-agnostic-global-surrogate.md 22 | 05.8-agnostic-lime.md 23 | 05.8.1-agnostic-Anchors.Rmd 24 | 05.9-agnostic-shapley.md 25 | 06.0-example.md 26 | 06.1-example-based-counterfactual.md 27 | 06.2-example-based-adversarial.md 28 | 06.3-example-based-proto.md 29 | 06.5-example-based-influence-fct.md 30 | 07.0-neuralnet.md 31 | 07.1-deep-learning.md 32 | 08-future.md 33 | 09-contribute.md 34 | 10-acknowledgements.md 35 | 11-references.md 36 | -------------------------------------------------------------------------------- /manuscript/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY : sbrl 2 | 3 | all : html 4 | 5 | book.file := Book.txt 6 | md.files := $(shell cat ${book.file}) 7 | 8 | package: ../DESCRIPTION ../R/*.R 9 | Rscript -e "devtools::install('../')" 10 | 11 | html : *.Rmd 12 | # When build fails interpretable-ml.Rmd is created and not removed. Next build will fail when file exists. 13 | rm -f interpretable-ml.Rmd 14 | Rscript -e "bookdown::render_book('./', 'bookdown::gitbook')" 15 | 16 | leanpub : $(md.files) 17 | 18 | %.md : %.Rmd 19 | R --no-save --args $< < ../scripts/process-rmd-leanpub.R 20 | R --no-save --args $@ < ../scripts/fix-leanpub.R 21 | 22 | 23 | pdf : interpretable-ml.pdf 24 | 25 | interpretable-ml.pdf : *.Rmd 26 | # When build fails interpretable-ml.Rmd is created and not removed. Next build will fail when file exists. 27 | rm -f interpretable-ml.Rmd 28 | Rscript -e "bookdown::render_book('./', 'bookdown::pdf_book')" 29 | pdflatex $< 30 | 31 | epub : _book/interpretable-ml.epub 32 | 33 | _book/interpretable-ml.epub : *.Rmd 34 | # When build fails interpretable-ml.Rmd is created and not removed. Next build will fail when file exists. 35 | rm -f interpretable-ml.Rmd 36 | Rscript -e "bookdown::render_book('./', 'bookdown::epub_book')" 37 | 38 | 39 | mobi : _book/interpretable-ml.epub 40 | # When build fails interpretable-ml.Rmd is created and not removed. Next build will fail when file exists. 41 | rm -f interpretable-ml.Rmd 42 | Rscript -e "bookdown::kindlegen('./_book/interpretable-ml.epub', exec = '../kindlegen')" 43 | 44 | 45 | 11-references.Rmd : *.Rmd 46 | Rscript ../scripts/references.R 47 | 48 | 49 | sbrl: 50 | sudo apt install -y libgsl10-dev open-cobol 51 | -------------------------------------------------------------------------------- /manuscript/Sample.txt: -------------------------------------------------------------------------------- 1 | 06.1-example-based-counterfactual.md 2 | -------------------------------------------------------------------------------- /manuscript/_bookdown.yml: -------------------------------------------------------------------------------- 1 | book_filename: interpretable-ml 2 | repo: https://github.com/christophM/interpretable-ml-book 3 | output_dir: "_book" 4 | rmd_subdir: FALSE 5 | language: 6 | label: 7 | fig: "FIGURE " 8 | tab: "TABLE " 9 | ui: 10 | edit: "Edit" 11 | chapter_name: "Chapter " 12 | rmd_files: ["index.Rmd", 13 | "01-introduction.Rmd", 14 | "01.2-short-stories.Rmd", 15 | "01.3-ml-definitions.Rmd", 16 | "02-interpretability.Rmd", 17 | "03-datasets.Rmd", 18 | "04.1-interpretable-models.Rmd", 19 | "04.2-interpretable-linear.Rmd", 20 | "04.3-interpretable-logistic.Rmd", 21 | "04.4-interpretable-lm-extensions.Rmd", 22 | "04.5-interpretable-tree.Rmd", 23 | "04.6-interpretable-rules.Rmd", 24 | "04.7-interpretable-rulefit.Rmd", 25 | "04.8-interpretable-other.Rmd", 26 | "05.1-agnostic.Rmd", 27 | "05.2-agnostic-pdp.Rmd", 28 | "05.3-agnostic-ice.Rmd", 29 | "05.4-agnostic-ale.Rmd", 30 | "05.5-agnostic-interaction.Rmd", 31 | "05.6-agnostic-permfeatimp.Rmd", 32 | "05.7-agnostic-global-surrogate.Rmd", 33 | "05.8-agnostic-lime.Rmd", 34 | "05.8.1-agnostic-Anchors.Rmd", 35 | "05.9-agnostic-shapley.Rmd", 36 | "05.9b-agnostic-shap.Rmd", 37 | "06.0-example.Rmd", 38 | "06.1-example-based-counterfactual.Rmd", 39 | "06.2-example-based-adversarial.Rmd", 40 | "06.3-example-based-proto.Rmd", 41 | "06.4-example-based-archetypes.Rmd", 42 | "06.5-example-based-influence-fct.Rmd", 43 | "07.0-neuralnet.Rmd", 44 | "07.1-feature-visualization.Rmd", 45 | #"07.2-concepts.Rmd", 46 | #"07.3-attribution.Rmd", 47 | #"07.4-distillation.Rmd", 48 | "08-future.Rmd", 49 | "09-contribute.Rmd", 50 | "09b-translations.Rmd", 51 | "10-acknowledgements.Rmd", 52 | "11-references.Rmd"] 53 | -------------------------------------------------------------------------------- /manuscript/_output.yml: -------------------------------------------------------------------------------- 1 | bookdown::gitbook: 2 | dev: svglite 3 | split_by: section 4 | css: css/style.css 5 | includes: 6 | in_header: html/header.html 7 | config: 8 | edit: https://github.com/christophM/interpretable-ml-book/edit/master/%s 9 | sharing: 10 | github: yes 11 | facebook: yes 12 | toc: 13 | collapse: subsection 14 | before: | 15 |
  • Interpretable machine learning
  • 16 | after: | 17 |
  • Published with bookdown
  • 18 | 19 | -------------------------------------------------------------------------------- /manuscript/css/cookieconsent.min.css: -------------------------------------------------------------------------------- 1 | .cc-window{opacity:1;transition:opacity 1s ease}.cc-window.cc-invisible{opacity:0}.cc-animate.cc-revoke{transition:transform 1s ease}.cc-animate.cc-revoke.cc-top{transform:translateY(-2em)}.cc-animate.cc-revoke.cc-bottom{transform:translateY(2em)}.cc-animate.cc-revoke.cc-active.cc-bottom,.cc-animate.cc-revoke.cc-active.cc-top,.cc-revoke:hover{transform:translateY(0)}.cc-grower{max-height:0;overflow:hidden;transition:max-height 1s} 2 | .cc-link,.cc-revoke:hover{text-decoration:underline}.cc-revoke,.cc-window{position:fixed;overflow:hidden;box-sizing:border-box;font-family:Helvetica,Calibri,Arial,sans-serif;font-size:16px;line-height:1.5em;display:-ms-flexbox;display:flex;-ms-flex-wrap:nowrap;flex-wrap:nowrap;z-index:9999}.cc-window.cc-static{position:static}.cc-window.cc-floating{padding:2em;max-width:24em;-ms-flex-direction:column;flex-direction:column}.cc-window.cc-banner{padding:1em 1.8em;width:100%;-ms-flex-direction:row;flex-direction:row}.cc-revoke{padding:.5em}.cc-header{font-size:18px;font-weight:700}.cc-btn,.cc-close,.cc-link,.cc-revoke{cursor:pointer}.cc-link{opacity:.8;display:inline-block;padding:.2em}.cc-link:hover{opacity:1}.cc-link:active,.cc-link:visited{color:initial}.cc-btn{display:block;padding:.4em .8em;font-size:.9em;font-weight:700;border-width:2px;border-style:solid;text-align:center;white-space:nowrap}.cc-banner .cc-btn:last-child{min-width:140px}.cc-highlight .cc-btn:first-child{background-color:transparent;border-color:transparent}.cc-highlight .cc-btn:first-child:focus,.cc-highlight .cc-btn:first-child:hover{background-color:transparent;text-decoration:underline}.cc-close{display:block;position:absolute;top:.5em;right:.5em;font-size:1.6em;opacity:.9;line-height:.75}.cc-close:focus,.cc-close:hover{opacity:1} 3 | .cc-revoke.cc-top{top:0;left:3em;border-bottom-left-radius:.5em;border-bottom-right-radius:.5em}.cc-revoke.cc-bottom{bottom:0;left:3em;border-top-left-radius:.5em;border-top-right-radius:.5em}.cc-revoke.cc-left{left:3em;right:unset}.cc-revoke.cc-right{right:3em;left:unset}.cc-top{top:1em}.cc-left{left:1em}.cc-right{right:1em}.cc-bottom{bottom:1em}.cc-floating>.cc-link{margin-bottom:1em}.cc-floating .cc-message{display:block;margin-bottom:1em}.cc-window.cc-floating .cc-compliance{-ms-flex:1;flex:1}.cc-window.cc-banner{-ms-flex-align:center;align-items:center}.cc-banner.cc-top{left:0;right:0;top:0}.cc-banner.cc-bottom{left:0;right:0;bottom:0}.cc-banner .cc-message{-ms-flex:1;flex:1}.cc-compliance{display:-ms-flexbox;display:flex;-ms-flex-align:center;align-items:center;-ms-flex-line-pack:justify;align-content:space-between}.cc-compliance>.cc-btn{-ms-flex:1;flex:1}.cc-btn+.cc-btn{margin-left:.5em} 4 | @media print{.cc-revoke,.cc-window{display:none}}@media screen and (max-width:900px){.cc-btn{white-space:normal}}@media screen and (max-width:414px) and (orientation:portrait),screen and (max-width:736px) and (orientation:landscape){.cc-window.cc-top{top:0}.cc-window.cc-bottom{bottom:0}.cc-window.cc-banner,.cc-window.cc-left,.cc-window.cc-right{left:0;right:0}.cc-window.cc-banner{-ms-flex-direction:column;flex-direction:column}.cc-window.cc-banner .cc-compliance{-ms-flex:1;flex:1}.cc-window.cc-floating{max-width:none}.cc-window .cc-message{margin-bottom:1em}.cc-window.cc-banner{-ms-flex-align:unset;align-items:unset}} 5 | .cc-floating.cc-theme-classic{padding:1.2em;border-radius:5px}.cc-floating.cc-type-info.cc-theme-classic .cc-compliance{text-align:center;display:inline;-ms-flex:none;flex:none}.cc-theme-classic .cc-btn{border-radius:5px}.cc-theme-classic .cc-btn:last-child{min-width:140px}.cc-floating.cc-type-info.cc-theme-classic .cc-btn{display:inline-block} 6 | .cc-theme-edgeless.cc-window{padding:0}.cc-floating.cc-theme-edgeless .cc-message{margin:2em 2em 1.5em}.cc-banner.cc-theme-edgeless .cc-btn{margin:0;padding:.8em 1.8em;height:100%}.cc-banner.cc-theme-edgeless .cc-message{margin-left:1em}.cc-floating.cc-theme-edgeless .cc-btn+.cc-btn{margin-left:0} -------------------------------------------------------------------------------- /manuscript/css/style.css: -------------------------------------------------------------------------------- 1 | /* Copied from https://github.com/rstudio/bookdown/blob/master/inst/examples/css/style.css */ 2 | p.caption { 3 | color: #777; 4 | margin-top: 10px; 5 | } 6 | p code { 7 | white-space: inherit; 8 | } 9 | pre { 10 | word-break: normal; 11 | word-wrap: normal; 12 | } 13 | pre code { 14 | white-space: inherit; 15 | } 16 | p.flushright { 17 | text-align: right; 18 | } 19 | blockquote > p:last-child { 20 | text-align: right; 21 | } 22 | blockquote > p:first-child { 23 | text-align: inherit; 24 | } 25 | .header-section-number { 26 | padding-right: .2em; 27 | font-weight: 500; 28 | } 29 | .level1 .header-section-number { 30 | display: inline-block; 31 | border-bottom: 3px solid; 32 | } 33 | .level1 h1 { 34 | border-bottom: 1px solid; 35 | } 36 | h1, h2, h3, h4, h5, h6 { 37 | font-weight: normal; 38 | } 39 | h1.title { 40 | font-weight: 700; 41 | } 42 | .book .book-body .page-wrapper .page-inner section.normal { 43 | font-weight: 500; 44 | } -------------------------------------------------------------------------------- /manuscript/html/header.html: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /manuscript/images/a484.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/a484.png -------------------------------------------------------------------------------- /manuscript/images/access-denied.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/access-denied.jpg -------------------------------------------------------------------------------- /manuscript/images/access-denied.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/access-denied.xcf -------------------------------------------------------------------------------- /manuscript/images/activation-optim.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/activation-optim.png -------------------------------------------------------------------------------- /manuscript/images/adversarial-1pixel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/adversarial-1pixel.png -------------------------------------------------------------------------------- /manuscript/images/adversarial-ostrich.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/adversarial-ostrich.jpg -------------------------------------------------------------------------------- /manuscript/images/adversarial-panda.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/adversarial-panda.jpg -------------------------------------------------------------------------------- /manuscript/images/adversarial-toaster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/adversarial-toaster.png -------------------------------------------------------------------------------- /manuscript/images/adversarial-turtle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/adversarial-turtle.png -------------------------------------------------------------------------------- /manuscript/images/agnostic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/agnostic.png -------------------------------------------------------------------------------- /manuscript/images/amazon-freq-bought-together.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/amazon-freq-bought-together.png -------------------------------------------------------------------------------- /manuscript/images/analyze.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/analyze.png -------------------------------------------------------------------------------- /manuscript/images/anchors-process.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/anchors-process.jpg -------------------------------------------------------------------------------- /manuscript/images/anchors-visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/anchors-visualization.png -------------------------------------------------------------------------------- /manuscript/images/anchors.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/anchors.jpg -------------------------------------------------------------------------------- /manuscript/images/arch-compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/arch-compare.png -------------------------------------------------------------------------------- /manuscript/images/big-picture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/big-picture.png -------------------------------------------------------------------------------- /manuscript/images/big-picture.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/big-picture.xcf -------------------------------------------------------------------------------- /manuscript/images/broden.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/broden.png -------------------------------------------------------------------------------- /manuscript/images/burnt-earth.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/burnt-earth.jpg -------------------------------------------------------------------------------- /manuscript/images/burnt-earth.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/burnt-earth.xcf -------------------------------------------------------------------------------- /manuscript/images/by-nc-sa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/by-nc-sa.png -------------------------------------------------------------------------------- /manuscript/images/cfexp-nsgaII.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/cfexp-nsgaII.jpg -------------------------------------------------------------------------------- /manuscript/images/cnn features-1.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/cnn features-1.xcf -------------------------------------------------------------------------------- /manuscript/images/cnn-features.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/cnn-features.png -------------------------------------------------------------------------------- /manuscript/images/cooks-analyzed-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/cooks-analyzed-1.png -------------------------------------------------------------------------------- /manuscript/images/cover-amazon-bordered.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/cover-amazon-bordered.jpg -------------------------------------------------------------------------------- /manuscript/images/detective.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/detective.png -------------------------------------------------------------------------------- /manuscript/images/dissection-dog-exemplary.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/dissection-dog-exemplary.jpg -------------------------------------------------------------------------------- /manuscript/images/dissection-dogs.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/dissection-dogs.jpeg -------------------------------------------------------------------------------- /manuscript/images/dissection-network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/dissection-network.png -------------------------------------------------------------------------------- /manuscript/images/doctor-840127_1280.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/doctor-840127_1280.xcf -------------------------------------------------------------------------------- /manuscript/images/dog_and_book.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/dog_and_book.jpeg -------------------------------------------------------------------------------- /manuscript/images/doge-stuck.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/doge-stuck.jpg -------------------------------------------------------------------------------- /manuscript/images/doge-stuck.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/doge-stuck.xcf -------------------------------------------------------------------------------- /manuscript/images/enrollment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/enrollment.png -------------------------------------------------------------------------------- /manuscript/images/eureka.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/eureka.png -------------------------------------------------------------------------------- /manuscript/images/explain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/explain.png -------------------------------------------------------------------------------- /manuscript/images/feature-visualization-units.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/feature-visualization-units.png -------------------------------------------------------------------------------- /manuscript/images/graph.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/graph.jpg -------------------------------------------------------------------------------- /manuscript/images/hospital.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/hospital.png -------------------------------------------------------------------------------- /manuscript/images/ice-bike-derivative-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/ice-bike-derivative-1.png -------------------------------------------------------------------------------- /manuscript/images/ice-cervical-derivative-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/ice-cervical-derivative-1.png -------------------------------------------------------------------------------- /manuscript/images/iml.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/iml.png -------------------------------------------------------------------------------- /manuscript/images/influence-single-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/influence-single-1.png -------------------------------------------------------------------------------- /manuscript/images/interaction-cervical-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/interaction-cervical-1.png -------------------------------------------------------------------------------- /manuscript/images/interaction2-cervical-age-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/interaction2-cervical-age-1.png -------------------------------------------------------------------------------- /manuscript/images/learn-one-rule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/learn-one-rule.png -------------------------------------------------------------------------------- /manuscript/images/learner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/learner.png -------------------------------------------------------------------------------- /manuscript/images/lime-images-package-example-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/lime-images-package-example-1.png -------------------------------------------------------------------------------- /manuscript/images/lime-tabular-example-explain-plot-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/lime-tabular-example-explain-plot-2-1.png -------------------------------------------------------------------------------- /manuscript/images/lime-text-explanations-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/lime-text-explanations-1.png -------------------------------------------------------------------------------- /manuscript/images/machine-learning-xkcd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/machine-learning-xkcd.png -------------------------------------------------------------------------------- /manuscript/images/mri.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/mri.png -------------------------------------------------------------------------------- /manuscript/images/pen.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/pen.jpg -------------------------------------------------------------------------------- /manuscript/images/potato-chips.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/potato-chips.jpg -------------------------------------------------------------------------------- /manuscript/images/programing-ml.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/programing-ml.png -------------------------------------------------------------------------------- /manuscript/images/proto-critique2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/proto-critique2.jpg -------------------------------------------------------------------------------- /manuscript/images/rotation-dissect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/rotation-dissect.png -------------------------------------------------------------------------------- /manuscript/images/rulefit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/rulefit.jpg -------------------------------------------------------------------------------- /manuscript/images/scientist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/scientist.png -------------------------------------------------------------------------------- /manuscript/images/shap-clustering.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/shap-clustering.png -------------------------------------------------------------------------------- /manuscript/images/shap-dependence-interaction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/shap-dependence-interaction.png -------------------------------------------------------------------------------- /manuscript/images/shap-dependence.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/shap-dependence.png -------------------------------------------------------------------------------- /manuscript/images/shap-explain-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/shap-explain-1.png -------------------------------------------------------------------------------- /manuscript/images/shap-explain-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/shap-explain-2.png -------------------------------------------------------------------------------- /manuscript/images/shap-importance-extended.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/shap-importance-extended.png -------------------------------------------------------------------------------- /manuscript/images/shap-importance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/shap-importance.png -------------------------------------------------------------------------------- /manuscript/images/shap-simplified-features.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/shap-simplified-features.jpg -------------------------------------------------------------------------------- /manuscript/images/shap-superpixel.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/shap-superpixel.jpg -------------------------------------------------------------------------------- /manuscript/images/shapley-bike-plot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/shapley-bike-plot-1.png -------------------------------------------------------------------------------- /manuscript/images/shapley-cervical-plot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/shapley-cervical-plot-1.png -------------------------------------------------------------------------------- /manuscript/images/shapley-coalitions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/shapley-coalitions.png -------------------------------------------------------------------------------- /manuscript/images/shapley-instance-intervention.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/shapley-instance-intervention.png -------------------------------------------------------------------------------- /manuscript/images/shapley-instance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/shapley-instance.png -------------------------------------------------------------------------------- /manuscript/images/spheres.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/spheres.jpg -------------------------------------------------------------------------------- /manuscript/images/strong.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/strong.png -------------------------------------------------------------------------------- /manuscript/images/tcav.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/tcav.png -------------------------------------------------------------------------------- /manuscript/images/title_page.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/title_page.jpg -------------------------------------------------------------------------------- /manuscript/images/trippy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/trippy.png -------------------------------------------------------------------------------- /manuscript/images/units.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/units.jpg -------------------------------------------------------------------------------- /manuscript/images/wise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/images/wise.png -------------------------------------------------------------------------------- /manuscript/index.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Interpretable Machine Learning" 3 | subtitle: "A Guide for Making Black Box Models Explainable." 4 | author: "Christoph Molnar" 5 | date: "`r Sys.Date()`" 6 | knit: "bookdown::render_book" 7 | documentclass: krantz 8 | link-citations: yes 9 | colorlinks: yes 10 | lot: no 11 | lof: no 12 | fontsize: 12pt 13 | monofont: "Source Code Pro" 14 | monofontoptions: "Scale=0.7" 15 | site: bookdown::bookdown_site 16 | description: "Machine learning algorithms usually operate as black boxes and it is unclear how they derived a certain decision. This book is a guide for practitioners to make machine learning decisions interpretable." 17 | #url: 18 | github-repo: christophM/interpretable-ml-book 19 | always_allow_html: yes 20 | --- 21 | 22 | ```{r child = '00.0-preface.Rmd'} 23 | ``` 24 | -------------------------------------------------------------------------------- /manuscript/xgboost.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/manuscript/xgboost.model -------------------------------------------------------------------------------- /pkg/sbrl_1.2.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hacarus/interpretable-ml-book-ja/5311c92f8805d006ac3b2c0dd8f02cdcefe19c79/pkg/sbrl_1.2.tar.gz -------------------------------------------------------------------------------- /review.md: -------------------------------------------------------------------------------- 1 | # Review Guidelines 2 | 3 | 4 | Your review will help improve the book. 5 | This is an open peer-review. 6 | This means anyone can comment here in this issue. 7 | 8 | 9 | Link to the chapter: LINK 10 | Just in case: link to the Rmd of the chapter: LINK 11 | 12 | Please check the chapter for the following points: 13 | 14 | - Does the title of the chapter describe the chapter? 15 | - Does the first section make clear what to expect from the rest of the chapter? 16 | - Organization: Does the order of the sections make sense? 17 | - Are the tables, figures and other images good? Is easy to understand what they mean? Is the caption self-explaining? 18 | - Are the main claims of the chapter backed up with good arguments, proofs, examples, references? 19 | - Are alle the statements correct? 20 | - Anything else you think might improve this chapter? 21 | - Is some graphic missing that could better explain some concept better ? 22 | 23 | For methods chapters: 24 | - Any Disadvantages of Advantages missing? 25 | - 26 | 27 | 28 | With your permission, your name will appear -------------------------------------------------------------------------------- /scripts/dl-feature-attribution/activation-maximization.py: -------------------------------------------------------------------------------- 1 | # code from here: https://gist.github.com/saurabhpal97/158988f112e2e3b6067d25c5f6499ef3#file-activation_max-py 2 | 3 | #importing the required modules 4 | from vis.visualization import visualize_activation 5 | from vis.utils import utils 6 | from keras import activations 7 | from keras import applications 8 | import matplotlib.pyplot as plt 9 | from scipy.misc import imread 10 | 11 | plt.rcParams['figure.figsize'] = (18,6) 12 | #creating a VGG16 model using fully connected layers also because then we can 13 | #visualize the patterns for individual category 14 | from keras.applications import VGG16 15 | model = VGG16(weights='imagenet',include_top=True) 16 | 17 | #finding out the layer index using layer name 18 | #the find_layer_idx function accepts the model and name of layer as parameters and return the index of respective layer 19 | layer_idx = utils.find_layer_idx(model,'predictions') 20 | #changing the activation of the layer to linear 21 | model.layers[layer_idx].activation = activations.linear 22 | #applying modifications to the model 23 | model = utils.apply_modifications(model) 24 | #Indian elephant 25 | img3 = visualize_activation(model,layer_idx,filter_indices=385,max_iter=5000,verbose=True) 26 | plt.imshow(img3) 27 | -------------------------------------------------------------------------------- /scripts/dl-feature-attribution/edge-detection.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import numpy as np 4 | from matplotlib import pyplot as plt 5 | import imp 6 | 7 | base_dir = os.path.dirname(__file__) 8 | utils = imp.load_source("utils", os.path.join(base_dir, "utils.py")) 9 | base_dir = os.path.dirname(__file__) 10 | img = utils.load_image( 11 | os.path.join(base_dir, "..", "..", "manuscript", "images", "dog_and_book.jpeg"), 224) 12 | img = np.uint8(img) 13 | edges = cv2.Canny(img, 200, 400) 14 | edges = np.max(edges) - edges 15 | plt.imshow(edges,cmap = 'gray') 16 | plt.title('Edge Image'), plt.xticks([]), plt.yticks([]) 17 | plt.axis('off') 18 | plt.title("Canny Edge Detector") 19 | plt.savefig("dog_and_book_edge.png", bbox_inches = "tight") 20 | 21 | -------------------------------------------------------------------------------- /scripts/dl-feature-attribution/feature-attribution-dl.py: -------------------------------------------------------------------------------- 1 | # Code from here: ttps://github.com/albermax/innvestigate/blob/master/examples/notebooks/imagenet_compare_methods.ipynb 2 | import keras 3 | import keras.backend 4 | import imp 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import os 8 | import innvestigate 9 | import innvestigate.utils 10 | import keras.applications.vgg16 as vgg16 11 | from keras.applications.vgg16 import decode_predictions 12 | 13 | model, preprocess = vgg16.VGG16(), vgg16.preprocess_input 14 | base_dir = os.path.dirname(__file__) 15 | utils = imp.load_source("utils", os.path.join(base_dir, "utils.py")) 16 | imgnetutils = imp.load_source("utils_imagenet", "utils_imagenet.py") 17 | 18 | def inverse_graymap(X): 19 | return imgnetutils.graymap(np.max(X) - X) 20 | 21 | 22 | # Methods we use and some properties. 23 | methods = [ 24 | # NAME OPT.PARAMS POSTPROC FXN TITLE 25 | # Show input. 26 | ("input", {}, imgnetutils.image, "Input"), 27 | # Function 28 | ("gradient", {"postprocess": "abs"}, inverse_graymap, "Gradient"), 29 | ("smoothgrad", {"augment_by_n": 64, "postprocess": "square"}, inverse_graymap, "SmoothGrad"), 30 | # Signal 31 | ("deconvnet", {}, imgnetutils.bk_proj, "Deconvnet"), 32 | ("guided_backprop", {}, imgnetutils.bk_proj, "Guided Backprop"), 33 | #("pattern.net", {}, imgnetutils.bk_proj, "PatternNet"), 34 | # Interaction 35 | ("deep_taylor", {}, imgnetutils.heatmap, "Deep Taylor"), 36 | #("pattern.attribution", {}, imgnetutils.heatmap, "PatternAttribution"), 37 | ("input_t_gradient", {}, imgnetutils.heatmap, "Input * Gradient"), 38 | ("integrated_gradients", {"steps": 64}, imgnetutils.heatmap, "Integrated Gradients"), 39 | ("lrp.z", {}, imgnetutils.heatmap, "LRP-Z"), 40 | ("lrp.epsilon", {"epsilon": 1}, imgnetutils.heatmap, "LRP-Epsilon"), 41 | ("lrp.sequential_preset_a_flat",{"epsilon": 1}, imgnetutils.heatmap, "LRP-PresetAFlat"), 42 | ("lrp.sequential_preset_b_flat",{"epsilon": 1}, imgnetutils.heatmap, "LRP-PresetBFlat"), 43 | ] 44 | 45 | if __name__ == "__main__": 46 | # Load an image. 47 | image = utils.load_image( 48 | os.path.join(base_dir, "..", "..", "manuscript", "images", "dog_and_book.jpeg"), 224) 49 | 50 | # Get model 51 | yhat = model.predict(preprocess(image[None])) 52 | label = decode_predictions(yhat) 53 | label = label[0][0] 54 | print('%s (%.2f%%)' % (label[1], label[2]*100)) 55 | # Strip softmax layer 56 | model = innvestigate.utils.model_wo_softmax(model) 57 | for method in methods: 58 | print(method[0]) 59 | analyzer = innvestigate.create_analyzer(method[0], 60 | model, 61 | **method[1]) 62 | if method[0] == "input": 63 | a = image[None]/255 64 | else: 65 | x = preprocess(image[None]) 66 | # use preprocessing from other script 67 | a = analyzer.analyze(x) 68 | a = imgnetutils.postprocess(a, "BGRtoRGB", False) 69 | a = method[2](a) 70 | plt.imshow(a[0], cmap="seismic", clim=(-1, 1)) 71 | plt.axis('off') 72 | plt.title(method[3]) 73 | plt.savefig("dog_and_book_" + method[0] + ".png", bbox_inches = "tight") 74 | 75 | -------------------------------------------------------------------------------- /scripts/dl-feature-attribution/utils.py: -------------------------------------------------------------------------------- 1 | # Begin: Python 2/3 compatibility header small 2 | # Get Python 3 functionality: 3 | from __future__ import\ 4 | absolute_import, print_function, division, unicode_literals 5 | from future.utils import raise_with_traceback, raise_from 6 | # catch exception with: except Exception as e 7 | from builtins import range, map, zip, filter 8 | from io import open 9 | import six 10 | # End: Python 2/3 compatability header small 11 | 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import os 15 | import PIL.Image 16 | import shutil 17 | 18 | 19 | ############################################################################### 20 | # Download utilities 21 | ############################################################################### 22 | 23 | 24 | def download(url, filename): 25 | if not os.path.exists(filename): 26 | print("Download: %s ---> %s" % (url, filename)) 27 | response = six.moves.urllib.request.urlopen(url) 28 | with open(filename, 'wb') as out_file: 29 | shutil.copyfileobj(response, out_file) 30 | 31 | 32 | ############################################################################### 33 | # Plot utility 34 | ############################################################################### 35 | 36 | 37 | def load_image(path, size): 38 | ret = PIL.Image.open(path) 39 | ret = ret.resize((size, size)) 40 | ret = np.asarray(ret, dtype=np.uint8).astype(np.float32) 41 | if ret.ndim == 2: 42 | ret.resize((size, size, 1)) 43 | ret = np.repeat(ret, 3, axis=-1) 44 | return ret 45 | 46 | 47 | def get_imagenet_data(size=224): 48 | base_dir = os.path.dirname(__file__) 49 | 50 | # ImageNet 2012 validation set images? 51 | with open(os.path.join(base_dir, "images", "ground_truth_val2012")) as f: 52 | ground_truth_val2012 = {x.split()[0]: int(x.split()[1]) 53 | for x in f.readlines() if len(x.strip()) > 0} 54 | with open(os.path.join(base_dir, "images", "synset_id_to_class")) as f: 55 | synset_to_class = {x.split()[1]: int(x.split()[0]) 56 | for x in f.readlines() if len(x.strip()) > 0} 57 | with open(os.path.join(base_dir, "images", "imagenet_label_mapping")) as f: 58 | image_label_mapping = {int(x.split(":")[0]): x.split(":")[1].strip() 59 | for x in f.readlines() if len(x.strip()) > 0} 60 | 61 | def get_class(f): 62 | # File from ImageNet 2012 validation set 63 | ret = ground_truth_val2012.get(f, None) 64 | if ret is None: 65 | # File from ImageNet training sets 66 | ret = synset_to_class.get(f.split("_")[0], None) 67 | if ret is None: 68 | # Random JPEG file 69 | ret = "--" 70 | return ret 71 | 72 | images = [(load_image(os.path.join(base_dir, "images", f), size), 73 | get_class(f)) 74 | for f in os.listdir(os.path.join(base_dir, "images")) 75 | if f.lower().endswith(".jpg") or f.lower().endswith(".jpeg")] 76 | return images, image_label_mapping 77 | 78 | 79 | def plot_image_grid(grid, 80 | row_labels_left, 81 | row_labels_right, 82 | col_labels, 83 | file_name=None, 84 | figsize=None, 85 | dpi=224): 86 | n_rows = len(grid) 87 | n_cols = len(grid[0]) 88 | if figsize is None: 89 | figsize = (n_cols, n_rows+1) 90 | 91 | plt.clf() 92 | plt.rc("font", family="sans-serif") 93 | 94 | plt.figure(figsize=figsize) 95 | for r in range(n_rows): 96 | for c in range(n_cols): 97 | ax = plt.subplot2grid(shape=[n_rows+1, n_cols], loc=[r+1, c]) 98 | # TODO controlled color mapping wrt all grid entries, 99 | # or individually. make input param 100 | if grid[r][c] is not None: 101 | ax.imshow(grid[r][c], interpolation='none') 102 | else: 103 | for spine in plt.gca().spines.values(): 104 | spine.set_visible(False) 105 | ax.set_xticks([]) 106 | ax.set_yticks([]) 107 | 108 | # column labels 109 | if not r: 110 | if col_labels != []: 111 | ax.set_title(col_labels[c], 112 | rotation=22.5, 113 | horizontalalignment='left', 114 | verticalalignment='bottom') 115 | 116 | # row labels 117 | if not c: 118 | if row_labels_left != []: 119 | txt_left = [l+'\n' for l in row_labels_left[r]] 120 | ax.set_ylabel( 121 | ''.join(txt_left), 122 | rotation=0, 123 | verticalalignment='center', 124 | horizontalalignment='right', 125 | ) 126 | 127 | if c == n_cols-1: 128 | if row_labels_right != []: 129 | txt_right = [l+'\n' for l in row_labels_right[r]] 130 | ax2 = ax.twinx() 131 | ax2.set_xticks([]) 132 | ax2.set_yticks([]) 133 | ax2.set_ylabel( 134 | ''.join(txt_right), 135 | rotation=0, 136 | verticalalignment='center', 137 | horizontalalignment='left' 138 | ) 139 | 140 | if file_name is None: 141 | plt.show() 142 | else: 143 | print('Saving figure to {}'.format(file_name)) 144 | plt.savefig(file_name, orientation='landscape', dpi=dpi) 145 | -------------------------------------------------------------------------------- /scripts/dl-feature-attribution/utils_imagenet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import innvestigate 4 | import innvestigate.utils as iutils 5 | import innvestigate.utils.visualizations as ivis 6 | 7 | 8 | def preprocess(X, net): 9 | X = X.copy() 10 | X = net["preprocess_f"](X) 11 | return X 12 | 13 | 14 | def postprocess(X, color_conversion, channels_first): 15 | X = X.copy() 16 | X = iutils.postprocess_images( 17 | X, color_coding=color_conversion, channels_first=channels_first) 18 | return X 19 | 20 | 21 | def image(X): 22 | X = X.copy() 23 | return ivis.project(X, absmax=255.0, input_is_postive_only=True) 24 | 25 | 26 | def bk_proj(X): 27 | X = ivis.clip_quantile(X, 1) 28 | return ivis.project(X) 29 | 30 | 31 | def heatmap(X): 32 | #X = ivis.gamma(X, minamp=0, gamma=0.95) 33 | return ivis.heatmap(X) 34 | 35 | 36 | def graymap(X): 37 | return ivis.graymap(np.abs(X), input_is_positive_only=True) 38 | -------------------------------------------------------------------------------- /scripts/fix-leanpub.R: -------------------------------------------------------------------------------- 1 | # adapted from: https://github.com/rdpeng/rprogdatascience/blob/leanpub/manuscript/fixmath.R 2 | cargs <- commandArgs(TRUE) 3 | infile <- cargs[1] 4 | 5 | fixmath = function(doc0) { 6 | doc <- sub("^\\\\\\[$", "{\\$\\$}", doc0, perl = TRUE) 7 | doc <- sub("^\\\\\\]$", "{\\/\\$\\$}", doc, perl = TRUE) 8 | doc <- gsub("\\$\\$(\\S+)\\$\\$", "\\$\\1\\$", doc, perl = TRUE) 9 | doc <- gsub("^\\$(.+)\\$$", "{\\$\\$}\\1{\\/\\$\\$}", doc, perl = TRUE) 10 | # Adds pagebreaks for leanpub 11 | #doc = c("", "{pagebreak}","", doc) 12 | doc 13 | } 14 | 15 | fix_chapter_enum = function(doc0) { 16 | gsub("{-}", "", doc0, fixed = TRUE) 17 | } 18 | 19 | add_pagebreak = function(doc) { 20 | gsub("", "{pagebreak}", doc, fixed = TRUE) 21 | } 22 | 23 | process_file = function(infile) { 24 | doc0 = readLines(infile) 25 | doc = fixmath(doc0) 26 | doc = fix_chapter_enum(doc) 27 | doc = add_pagebreak(doc) 28 | writeLines(doc, infile) 29 | } 30 | 31 | process_file(infile) 32 | -------------------------------------------------------------------------------- /scripts/imagenet_classifier.R: -------------------------------------------------------------------------------- 1 | # https://cran.rstudio.com/web/packages/keras/vignettes/applications.html 2 | 3 | library(keras) 4 | 5 | # instantiate the model 6 | model <- application_mobilenet(weights = 'imagenet') 7 | #model <- application_resnet50(weights = 'imagenet') 8 | 9 | # load the image 10 | # img_path <- "F:/Data/pets_data/cats/20160211_200107000_iOS.jpg" 11 | #img_path <- "F:/Data/pets_data/dogs/IMG_20170920_200039_286.jpg" 12 | img_path <- "/run/media/verena/SAMSUNG/Data/pets_data/dogs/IMG_20170920_200039_286.jpg" 13 | img <- image_load(img_path, target_size = c(224,224)) 14 | x <- image_to_array(img) 15 | 16 | # ensure we have a 4d tensor with single element in the batch dimension, 17 | # the preprocess the input for prediction using resnet50 18 | x <- array_reshape(x, c(1, dim(x))) 19 | x <- imagenet_preprocess_input(x) 20 | 21 | # make predictions then decode and print them 22 | preds <- model %>% predict(x) 23 | imagenet_decode_predictions(preds, top = 3)[[1]] 24 | 25 | library(lime) 26 | library(abind) 27 | 28 | img_preprocess <- function(x) { 29 | arrays <- lapply(x, function(path) { 30 | img <- image_load(path, target_size = c(224,224)) 31 | x <- image_to_array(img) 32 | x <- array_reshape(x, c(1, dim(x))) 33 | x <- imagenet_preprocess_input(x) 34 | }) 35 | do.call(abind, c(arrays, list(along = 1))) 36 | } 37 | 38 | # Create an explainer (lime recognise the path as an image) 39 | explainer <- lime(img_path, as_classifier(model), img_preprocess) 40 | 41 | 42 | # Explain the model (can take a long time depending on your system) 43 | explanation <- explain(img_path, explainer, n_labels = 2, n_features = 3, n_superpixels = 10) 44 | 45 | library(microbenchmark) 46 | mb <- microbenchmark( 47 | explanation1 <- explain(img_path, explainer, n_labels = 2, n_features = 3, n_superpixels = 10), 48 | explanation2 <- explain(img_path, explainer, n_labels = 1, n_features = 3, n_superpixels = 10), 49 | explanation3 <- explain(img_path, explainer, n_labels = 2, n_features = 10, n_superpixels = 10), 50 | explanation4 <- explain(img_path, explainer, n_labels = 2, n_features = 3, n_superpixels = 20) 51 | ) 52 | # explanation$label <- imagenet_decode_predictions(explanation$label_prob) 53 | 54 | plot_explanations(explanation1) 55 | -------------------------------------------------------------------------------- /scripts/mmd/MMD-critic/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Data generated by this tool. 7 | data/ 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # Environments 86 | .env 87 | .venv 88 | env/ 89 | venv/ 90 | ENV/ 91 | env.bak/ 92 | venv.bak/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | -------------------------------------------------------------------------------- /scripts/mmd/MMD-critic/Helper.py: -------------------------------------------------------------------------------- 1 | # maintained by rajivak@utexas.edu 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | import os 6 | 7 | import numpy as np 8 | 9 | def format_numsel(numsel): 10 | ss = '' 11 | for i,j in enumerate(numsel): 12 | ss = ss + " %d:%d " %(i,j) 13 | return ss 14 | 15 | def get_train_testindices(n, ntest, seed): 16 | np.random.seed(seed) 17 | testindices = np.random.choice(n,ntest,replace=False) 18 | trainindices = np.setdiff1d( range(n), testindices) 19 | return trainindices, testindices 20 | 21 | def exit(str): 22 | print(str) 23 | exit(1) 24 | 25 | 26 | def dir_exists(filename): 27 | """Creates the directory of a file if the directory does not exist. 28 | 29 | Raises: 30 | IOError: If the directory could not be created (and the directory does not 31 | exist). This may be due to for instance permissions issues or a race 32 | condition in which the directory is created right before makdirs runs. 33 | """ 34 | dir = os.path.dirname(filename) 35 | if not os.path.exists(dir): 36 | os.makedirs(dir) 37 | -------------------------------------------------------------------------------- /scripts/mmd/MMD-critic/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Been Kim 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /scripts/mmd/MMD-critic/README: -------------------------------------------------------------------------------- 1 | run run_digits.py to reproduce numbers in the paper: 2 | 3 | =========================================================================== 4 | Examples are not Enough, Learn to Criticize! Criticism for Interpretability. 5 | =========================================================================== 6 | Been Kim*, Rajiv Khanna*, Sanmi Koyejo*. NIPS 2016 7 | 8 | paper: 9 | 10 | http://people.csail.mit.edu/beenkim/papers/KIM2016NIPS_MMD.pdf 11 | 12 | 13 | @inproceedings{kim2016MMD, 14 | title={Examples are not Enough, Learn to Criticize! Criticism for Interpretability}, 15 | author={Been Kim, Rajiv Khanna and Sanmi Koyejo }, 16 | booktitle={Advances in Neural Information Processing Systems}, 17 | year={2016} 18 | } 19 | -------------------------------------------------------------------------------- /scripts/mmd/MMD-critic/classify.py: -------------------------------------------------------------------------------- 1 | # maintained by rajivak@utexas.edu 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | import numpy as np 6 | from sklearn.neighbors import KNeighborsClassifier 7 | 8 | # simple class to build 1NN classifier and classify using it 9 | class Classifier: 10 | model=None 11 | 12 | def __init__(self): 13 | pass 14 | 15 | def build_model(self, trainX, trainy): 16 | print("building model using %d points " %len(trainy)) 17 | self.model = KNeighborsClassifier(n_neighbors=1) 18 | self.model.fit(trainX, trainy) 19 | 20 | def classify(self, testX, testy): 21 | 22 | print("classifying %d points " %len(testy)) 23 | predy = self.model.predict(testX) 24 | 25 | ncorrect = np.sum(predy == testy) 26 | return 1.0 - ncorrect/(len(predy) + 0.0) 27 | -------------------------------------------------------------------------------- /scripts/mmd/MMD-critic/data.py: -------------------------------------------------------------------------------- 1 | # maintained by rajivak@utexas.edu 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | from sklearn.datasets import load_svmlight_file 6 | import numpy as np 7 | from sklearn.datasets import load_svmlight_file 8 | from sklearn.metrics.pairwise import rbf_kernel 9 | 10 | 11 | # class to load and handle data 12 | class Data: 13 | X = None # n * d 14 | y = None # n 15 | gamma = None 16 | kernel = None # n* n 17 | 18 | def __init__(self): 19 | pass 20 | 21 | # only to perform cross validation for picking gamma 22 | def splittraintest(self, testpercent): 23 | ntest = int(np.shape(self.X)[0] * testpercent/100.0) 24 | testindices = np.random.choice(np.shape(self.X)[0], ntest, replace=False) 25 | self.testX = self.X[testindices, :] 26 | self.testy = self.y[testindices] 27 | trainindices = np.setdiff1d(np.arange(np.shape(self.X)[0]), testindices) 28 | self.X = self.X[trainindices,:] 29 | self.y = self.y[trainindices] 30 | 31 | 32 | def subset(self, i): 33 | return np.where(y==i)[0] 34 | 35 | def load_data(self, X, y, gamma=None, docalkernel=False, savefile=None, testfile=None, dobin=False): 36 | self.X = X 37 | if dobin: 38 | bins = [-1.0, -0.67, -0.33, 0, 0.33, 0.67, 1.0] 39 | # bins = [-1.0, 0, 1.0] 40 | binned = np.digitize(self.X, bins ) 41 | self.X=np.array([bins[binned[i, j] - 1] for i in range(np.shape(self.X)[0]) for j in range(np.shape(self.X)[1])]).reshape(np.shape(self.X)) 42 | 43 | self.y = y 44 | if testfile is not None: 45 | dat2 = load_svmlight_file(testfile) 46 | self.testX = dat2[0].todense() 47 | if dobin: 48 | bins = [-1.0, -0.67, -0.33, 0, 0.33, 0.67, 1.0] 49 | binned = np.digitize(self.testX, bins) 50 | self.testX = np.array([bins[binned[i, j] - 1] for i in range(np.shape(self.testX)[0]) for j in range(np.shape(self.testX)[1])]).reshape(np.shape(self.testX)) 51 | 52 | self.testy = dat2[1] 53 | # print(np.shape(self.X)) 54 | 55 | self.gamma = gamma 56 | self.kernel = rbf_kernel(self.X, gamma=gamma) 57 | 58 | def load_svmlight(self, filename, gamma=None, docalkernel=False, savefile=None, testfile=None, dobin=False): 59 | data = load_svmlight_file(filename) 60 | self.load_data(data[0].todense(), data[1], gamma, docalkernel, savefile, testfile, dobin) 61 | 62 | def calculate_kernel(self, g=None): 63 | if g is None: 64 | if self.gamma is None: 65 | print("gamma not provided!") 66 | exit(1) 67 | else: 68 | self.kernel = rbf_kernel(self.X, gamma=self.gamma) 69 | else: 70 | self.kernel = rbf_kernel(self.X, gamma=g) 71 | 72 | # only calculate distance within class. across class, distance = 0 73 | def calculate_kernel_individual(self, g=None): 74 | touseg = g 75 | if touseg is None: 76 | touseg = self.gamma 77 | if touseg is None: 78 | print("gamma not provided!") 79 | exit(1) 80 | self.kernel = np.zeros((np.shape(self.X)[0], np.shape(self.X)[0]) ) 81 | sortind = np.argsort(self.y) 82 | self.X = self.X[sortind, :] 83 | self.y = self.y[sortind] 84 | 85 | for i in np.arange(10): 86 | j = i+1 87 | ind = np.where(self.y == j)[0] 88 | startind = np.min(ind) 89 | endind = np.max(ind)+1 90 | self.kernel[startind:endind, startind:endind ] = rbf_kernel(self.X[startind:endind, :], gamma=self.gamma) 91 | 92 | 93 | def loadstate(self,filename): 94 | temp = np.load(filename) 95 | self.X = temp['X'] 96 | self.y = temp['y'] 97 | self.gamma = temp['gamma'] 98 | self.kernel = temp['kernel'] 99 | 100 | def setgamma(self, newgamma): 101 | if self.kernel is not None: 102 | temp = np.log(self.kernel) 103 | temp = temp * newgamma/self.gamma 104 | self.kernel = np.exp(temp) 105 | self.gamma = newgamma 106 | if self.kernel is None: 107 | self.calculate_kernel() 108 | 109 | def savestate(self, outpfile): 110 | np.savez(file=outpfile, X=self.X, y=self.y, gamma=self.gamma, kernel=self.kernel) 111 | 112 | def rbf(v1,v2): 113 | dd = v1 - v2 114 | res = - self.gamma * np.dot(dd,dd) 115 | return math.exp(res) 116 | 117 | def getsim(self, i, j): 118 | if kernel is not None: 119 | return self.kernel[i,j] 120 | else: 121 | return self.rbf(X[i,:], X[j,:]) 122 | 123 | 124 | if __name__ == "__main__": 125 | import matplotlib.pyplot as plt 126 | file = 'data/usps' 127 | data=load_svmlight_file(file) 128 | X = data[0].todense() 129 | print(data[1]) 130 | plt.imshow(X[2,:].reshape((16,16))) 131 | plt.show() 132 | 133 | 134 | 135 | 136 | -------------------------------------------------------------------------------- /scripts/mmd/MMD-critic/mmd.py: -------------------------------------------------------------------------------- 1 | # maintained by rajivak@utexas.edu 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | import numpy as np 6 | # from mpi4py import MPI 7 | import sys 8 | import math 9 | 10 | 11 | ############################################################################################################################## 12 | # function to select criticisms 13 | # ARGS: 14 | # K: Kernel matrix 15 | # selectedprotos: prototypes already selected 16 | # m : number of criticisms to be selected 17 | # reg: regularizer type. 18 | # is_K_sparse: True means K is the pre-computed csc sparse matrix? False means it is a dense matrix. 19 | # RETURNS: indices selected as criticisms 20 | ############################################################################################################################## 21 | def select_criticism_regularized(K, selectedprotos, m, reg='logdet', is_K_sparse=True): 22 | 23 | n = np.shape(K)[0] 24 | if reg in ['None','logdet','iterative']: 25 | pass 26 | else: 27 | print("wrong regularizer :" + reg) 28 | exit(1) 29 | options = dict() 30 | 31 | selected = np.array([], dtype=int) 32 | candidates2 = np.setdiff1d(range(n), selectedprotos) 33 | inverse_of_prev_selected = None # should be a matrix 34 | 35 | if is_K_sparse: 36 | colsum = np.array(K.sum(0)).ravel()/n 37 | else: 38 | colsum = np.sum(K, axis=0)/n 39 | 40 | for i in range(m): 41 | maxx = -sys.float_info.max 42 | argmax = -1 43 | candidates = np.setdiff1d(candidates2, selected) 44 | 45 | s1array = colsum[candidates] 46 | 47 | temp = K[selectedprotos, :][:, candidates] 48 | if is_K_sparse: 49 | s2array = temp.sum(0) 50 | else: 51 | s2array = np.sum(temp, axis=0) 52 | 53 | s2array = s2array / (len(selectedprotos)) 54 | 55 | s1array = np.abs(s1array - s2array) 56 | if reg == 'logdet': 57 | if inverse_of_prev_selected is not None: # first call has been made already 58 | temp = K[selected, :][:, candidates] 59 | if is_K_sparse: 60 | temp2 = temp.transpose().dot(inverse_of_prev_selected) 61 | regularizer = temp.transpose().multiply(temp2) 62 | regcolsum = regularizer.sum(1).ravel()# np.sum(regularizer, axis=0) 63 | regularizer = np.abs(K.diagonal()[candidates] - regcolsum) 64 | 65 | else: 66 | # hadamard product 67 | temp2 = np.array(np.dot(inverse_of_prev_selected, temp)) 68 | regularizer = temp2 * temp 69 | regcolsum = np.sum(regularizer, axis=0) 70 | regularizer = np.log(np.abs(np.diagonal(K)[candidates] - regcolsum)) 71 | s1array = s1array + regularizer 72 | else: 73 | if is_K_sparse: 74 | s1array = s1array - np.log(np.abs(K.diagonal()[candidates])) 75 | else: 76 | s1array = s1array - np.log(np.abs(np.diagonal(K)[candidates])) 77 | argmax = candidates[np.argmax(s1array)] 78 | maxx = np.max(s1array) 79 | 80 | selected = np.append(selected, argmax) 81 | if reg == 'logdet': 82 | KK = K[selected,:][:,selected] 83 | if is_K_sparse: 84 | KK = KK.todense() 85 | 86 | inverse_of_prev_selected = np.linalg.inv(KK) # shortcut 87 | if reg == 'iterative': 88 | selectedprotos = np.append(selectedprotos, argmax) 89 | 90 | return selected 91 | 92 | ############################################################################################################################## 93 | # Function choose m of all rows by MMD as per kernelfunc 94 | # ARGS: 95 | # K : kernel matrix 96 | # candidate_indices : array of potential choices for selections, returned values are chosen from these indices 97 | # m: number of selections to be made 98 | # is_K_sparse: True means K is the pre-computed csc sparse matrix? False means it is a dense matrix. 99 | # RETURNS: subset of candidate_indices which are selected as prototypes 100 | ############################################################################################################################## 101 | 102 | def greedy_select_protos(K, candidate_indices, m, is_K_sparse=False): 103 | 104 | if len(candidate_indices) != np.shape(K)[0]: 105 | K = K[:,candidate_indices][candidate_indices,:] 106 | 107 | n = len(candidate_indices) 108 | 109 | # colsum = np.array(K.sum(0)).ravel() # same as rowsum 110 | if is_K_sparse: 111 | colsum = 2*np.array(K.sum(0)).ravel() / n 112 | else: 113 | colsum = 2*np.sum(K, axis=0) / n 114 | 115 | selected = np.array([], dtype=int) 116 | value = np.array([]) 117 | for i in range(m): 118 | maxx = -sys.float_info.max 119 | argmax = -1 120 | candidates = np.setdiff1d(range(n), selected) 121 | 122 | s1array = colsum[candidates] 123 | if len(selected) > 0: 124 | temp = K[selected, :][:, candidates] 125 | if is_K_sparse: 126 | # s2array = temp.sum(0) *2 127 | s2array = temp.sum(0) * 2 + K.diagonal()[candidates] 128 | 129 | else: 130 | s2array = np.sum(temp, axis=0) *2 + np.diagonal(K)[candidates] 131 | 132 | s2array = s2array/(len(selected) + 1) 133 | 134 | s1array = s1array - s2array 135 | 136 | else: 137 | if is_K_sparse: 138 | s1array = s1array - (np.abs(K.diagonal()[candidates])) 139 | else: 140 | s1array = s1array - (np.abs(np.diagonal(K)[candidates])) 141 | 142 | argmax = candidates[np.argmax(s1array)] 143 | # print("max %f" %np.max(s1array)) 144 | 145 | selected = np.append(selected, argmax) 146 | # value = np.append(value,maxx) 147 | KK = K[selected, :][:, selected] 148 | if is_K_sparse: 149 | KK = KK.todense() 150 | 151 | inverse_of_prev_selected = np.linalg.inv(KK) # shortcut 152 | 153 | return candidate_indices[selected] 154 | -------------------------------------------------------------------------------- /scripts/mmd/MMD-critic/run_digits.py: -------------------------------------------------------------------------------- 1 | # maintained by rajivak@utexas.edu 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | import argparse 6 | import os 7 | from data import Data 8 | from mmd import select_criticism_regularized, greedy_select_protos 9 | import matplotlib.pyplot as plt 10 | from pylab import * 11 | from matplotlib import gridspec 12 | from classify import Classifier 13 | #from mpi4py import MPI 14 | import Helper 15 | 16 | 17 | DATA_DIRECTORY = os.path.join(os.getcwd(), 'data') 18 | 19 | 20 | ############################################################################################################################## 21 | # plotter function to draw the selected prototypes/criticisms 22 | # ARGS : 23 | # xx : the matrix of selected pictures, each row is the representation of the digit picture 24 | # y : true classification of the picture, only used to print in order 25 | # fileprefix: path prefix 26 | # printselectionnumbers : if True, number of selected digits of each type are also outputted in the output file. 27 | # RETURNS: nothing 28 | ############################################################################################################################## 29 | def plotfigs2(xx, selectedy, fileprefix=None, printselectionnumbers = False): 30 | num_selected = np.array([0] * 10) 31 | for ii in range(10): 32 | num_selected[ii] = len(np.where(selectedy == (ii + 1))[0]) 33 | print(ii, num_selected[ii]) 34 | 35 | totm = np.shape(xx)[0] 36 | print("number of images being printed %d" %totm) 37 | perpic_m = 60 38 | begin_at = 0 39 | counter = 0 40 | perrow = 10 41 | 42 | while counter < int(totm/perpic_m) + 1: 43 | 44 | counter += 1 45 | print("counter %d " % counter) 46 | 47 | offset = 0 48 | if begin_at == 0: 49 | offset = 5 # for text about number of protos/crits of each type 50 | if not printselectionnumbers: 51 | offset = 0 52 | 53 | # m=m+offset # for num_selected 54 | gs = gridspec.GridSpec(int(perpic_m/perrow)+int(offset/perrow), 55 | int(perrow), wspace=0.0, hspace=0.0) 56 | fig = plt.figure() 57 | 58 | if begin_at == 0 and printselectionnumbers: 59 | ax=fig.add_subplot(gs[0,:]) 60 | ax.text(0.1,0.5,Helper.format_numsel(num_selected)) 61 | ax.axis('off') 62 | 63 | endd = begin_at + offset+ perpic_m 64 | if endd-offset > totm: 65 | endd = totm +offset 66 | print(" begin %d, end %d" %(begin_at + offset, endd)) 67 | for i in np.array(range(begin_at + offset, endd)): 68 | ax = fig.add_subplot(gs[int(i - begin_at)]) 69 | #ax.imshow(xx[i - offset, :].reshape((16, 16)), cmap="Greys_r") 70 | ax.imshow(xx[int(i - offset), :].reshape((16, 16))) 71 | ax.axis('off') 72 | 73 | file = fileprefix+str(counter) + '.png' 74 | if file is not None: 75 | # print("saving file") 76 | plt.savefig(file , dpi=2000) 77 | 78 | begin_at += perpic_m 79 | 80 | 81 | 82 | ############################################################################################################################## 83 | # this function makes selects prototypes/criticisms and outputs the respective pictures. Also does 1-NN classification test 84 | # ARGS: 85 | # filename: the path to usps file 86 | # gamma: parameter for the kernel exp( - gamma * \| x1 - x2 \|_2 ) 87 | # ktype: kernel type, 0 for global, 1 for local 88 | # outfig: path where selected prototype pictures are outputted, can be None when outputting of pictures is skipped 89 | # critoutfig: path where selected criticism pictures are outputted, can be None 90 | # testfile : path to the test usps.t 91 | # RETURNS: returns indices of selected prototypes, criticisms and the built data structure that contains the loaded usps dataset 92 | ############################################################################################################################## 93 | def run(filename, gamma, m, k, ktype, outfig, critoutfig,testfile): 94 | 95 | digitsdat = Data() 96 | digitsdat.load_svmlight(filename, gamma=gamma, docalkernel=False, savefile=None, testfile=testfile, dobin=False) 97 | 98 | if ktype == 0: 99 | digitsdat.calculate_kernel() 100 | print("Running Kernel type : global ") 101 | else: 102 | digitsdat.calculate_kernel_individual() 103 | print("Running Kernel type : local ") 104 | 105 | 106 | 107 | # selected = greedy_parallel(digitsdat.kernel, m) 108 | # print(np.sort(selected)) 109 | selected = greedy_select_protos(digitsdat.kernel, np.array(range(np.shape(digitsdat.kernel)[0])), m) 110 | # print(np.sort(selected)) 111 | # critselected = select_criticism(digitsdat.kernel, selected, k) 112 | selectedy = digitsdat.y[selected] 113 | sortedindx = np.argsort(selectedy) 114 | critselected= None 115 | 116 | if outfig is not None: 117 | plotfigs2(digitsdat.X[selected[sortedindx], :], selectedy[sortedindx], outfig) 118 | 119 | 120 | if k > 0: 121 | critselected = select_criticism_regularized(digitsdat.kernel, selected, k, is_K_sparse=False, reg='logdet') 122 | 123 | critselectedy = digitsdat.y[critselected] 124 | critsortedindx = np.argsort(critselectedy) 125 | 126 | if critoutfig is not None: 127 | plotfigs2(digitsdat.X[critselected[critsortedindx], :], critselectedy[critsortedindx], critoutfig+reg) 128 | 129 | return selected, critselected, digitsdat 130 | 131 | ######################################################################################################################### 132 | # build a 1 NN classifier based on selected prototypes, test it against testfile 133 | # ARGS: 134 | # digitsdat : Data() structure already built. should also have built the kernels and loaded the test file as well. 135 | # selected : the indices of selected prototypes, in order of their selection (the order is important for all_test_k to be viable. 136 | # all_test_m : array of number of prototypes to be used to build classifier. Since the selections are greedy, one can select for 5000 prototypes, 137 | # and test for num_prototypes = 10, 100, 1000, 4000, etc. 138 | ############################################################################################################################## 139 | def test_1NN(digitsdat, selected, all_test_m): 140 | 141 | for testm in all_test_m: 142 | 143 | classifier = Classifier() 144 | classifier.build_model(digitsdat.X[selected[0:testm], :], digitsdat.y[ selected[0:testm]]) 145 | print("m=%d error=%f" % ( testm, classifier.classify(digitsdat.testX, digitsdat.testy))) 146 | 147 | # uncomment for stats on how many protos were selected for each type of digit. 148 | #num_selected = np.array([0] * 10) 149 | 150 | #for ii in range(10): 151 | # num_selected[ii] = len(np.where(selectedy == (ii + 1))[0]) 152 | # print(ii, num_selected[ii]) 153 | 154 | 155 | ######################################################################################################################### 156 | ######################################################################################################################### 157 | ######################################################################################################################### 158 | # start here 159 | def main( 160 | data_prefix, 161 | output_prefix, 162 | gamma, 163 | m, 164 | alltestm, 165 | kerneltype, 166 | do_output_pics): 167 | ioff() 168 | 169 | outfig = None 170 | critoutfig = None 171 | 172 | k = 0 # number of criticisms 173 | 174 | if do_output_pics == 1: 175 | outfig = os.path.join(output_prefix, 'images/%d/protos' % m) 176 | critoutfig = os.path.join(output_prefix, 'images/%d/crit' % m) 177 | 178 | Helper.dir_exists(outfig) 179 | 180 | selected, critselected, digitsdat = run( 181 | os.path.join(data_prefix, 'usps'), 182 | gamma, 183 | m, 184 | k, 185 | kerneltype, 186 | outfig, 187 | critoutfig, 188 | os.path.join(data_prefix, 'usps.t')) 189 | 190 | test_1NN(digitsdat, selected, alltestm) 191 | 192 | print("...done") 193 | 194 | 195 | if __name__ == '__main__': 196 | parser = argparse.ArgumentParser() 197 | parser.add_argument( 198 | "--data_directory", 199 | type=str, 200 | default=DATA_DIRECTORY, 201 | help="The directory that contains data such as the usps file.") 202 | parser.add_argument( 203 | "--output_directory", 204 | type=str, 205 | default="./figs/", 206 | help="The directory in which to output data.") 207 | FLAGS, unparsed = parser.parse_known_args() 208 | 209 | data_prefix = FLAGS.data_directory 210 | output_prefix = os.path.join(FLAGS.output_directory, "data") 211 | gamma = 0.026 # kernel parameter, obtained after cross validation 212 | 213 | #m= 4433 # total number of prototypes to select 214 | #alltestm = np.array([4433, 3772, 3135, 2493, 1930, 1484, 1145, 960, 828, 715, 643, 584, 492, 410, 329, 286, 219, 185, 130, 110]) # test using these number of prototypes 215 | 216 | m = 50 # total number of prototypes to select 217 | alltestm = np.array( 218 | [410, 329, 286, 219, 185, 130, 219 | 110]) # test using these number of prototypes 220 | 221 | do_output_pics = 1 222 | kernel_type = 1 # 1 for local, 0 for global 223 | 224 | main(data_prefix, output_prefix, gamma, m, alltestm, kernel_type, do_output_pics) 225 | 226 | 227 | 228 | 229 | 230 | -------------------------------------------------------------------------------- /scripts/prepare_data.R: -------------------------------------------------------------------------------- 1 | library(R.utils) 2 | library(tm) 3 | library(dplyr) 4 | library(mlr) 5 | 6 | devtools::load_all("R/") 7 | data_dir = "data/" 8 | 9 | 10 | 11 | bike = get.bike.data(data_dir) 12 | save(bike, file = sprintf('%sbike.RData', data_dir)) 13 | write.csv(bike,file = sprintf('%sbike.csv', data_dir), row.names = FALSE) 14 | 15 | cervical = get.cervical.data(data_dir) 16 | save(cervical, file = sprintf('%scervical.RData', data_dir)) 17 | write.csv(cervical, file = sprintf("%scervical.csv", data_dir), row.names = FALSE) 18 | 19 | ycomments = get.ycomments.data(data_dir) 20 | save(ycomments, file = sprintf('%sycomments.RData', data_dir)) 21 | 22 | -------------------------------------------------------------------------------- /scripts/process-rmd-leanpub.R: -------------------------------------------------------------------------------- 1 | # adapted from: https://github.com/rdpeng/rprogdatascience/blob/leanpub/manuscript/fixmath.R 2 | cargs <- commandArgs(TRUE) 3 | infile <- cargs[1] 4 | 5 | 6 | # Removes out.width, which is needed in HTML version, but 7 | # when converting to .md, leads to html image input, instead of ![]() notation 8 | # which causes problems with leanpub 9 | fix_image_params = function(doc){ 10 | gsub("\\s*,\\s*out.width\\s*=\\s*\\d+", "", doc) 11 | } 12 | 13 | process_doc = function(doc0) { 14 | doc = fix_image_params(doc0) 15 | } 16 | 17 | # Make changes and write to disc 18 | doc0 = readLines(infile) 19 | doc = process_doc(doc0) 20 | writeLines(doc, infile) 21 | 22 | # knit stuff 23 | knitr::knit(infile, envir = new.env()) 24 | 25 | # Write again the original doc 26 | writeLines(doc0, infile) -------------------------------------------------------------------------------- /scripts/references.R: -------------------------------------------------------------------------------- 1 | library("data.table") 2 | library("readr") 3 | # load r packages so they are cited 4 | devtools::load_all() 5 | # relative project path to the .Rmd files 6 | path = "./" 7 | # name of the file where to write the references to 8 | out.file = "11-references.Rmd" 9 | 10 | # Function to collect list of .Rmd files 11 | get_file_list = function(folder){ 12 | paste(folder, list.files(folder, pattern = ".Rmd"), sep = "/") 13 | } 14 | 15 | # Function to create a list of references from list of .Rmd 16 | grep_references = function(file){ 17 | # read in file as string 18 | lines = readr::read_lines(file) 19 | # grep lines starting with[^XXX]: 20 | lines = lines[grep("\\[\\^[a-z;A-Z;0-9]*\\]\\:", lines)] 21 | # split by first : 22 | splitted = strsplit(lines, "]: ") 23 | # store in data.frame with key and ref 24 | res = data.table::rbindlist(lapply(splitted, 25 | function(x) data.frame(t(x), stringsAsFactors = FALSE))) 26 | if ( nrow(res) > 0) { 27 | colnames(res) = c("key", "reference") 28 | res$key = gsub("\\[\\^", "", res$key) 29 | } 30 | res 31 | } 32 | 33 | 34 | # Adapted from knitr::write_bib 35 | # For the citation of the R packages 36 | get_R_bib = function (x = .packages(), tweak = TRUE, width = NULL, 37 | prefix = getOption("knitr.bib.prefix", "R-")) { 38 | # iml.book is only the pseudo name of the book DESCRIPTION file 39 | x = setdiff(x, "iml.book") 40 | idx = mapply(system.file, package = x) == "" 41 | if (any(idx)) { 42 | warning("package(s) ", paste(x[idx], collapse = ", "), 43 | " not found") 44 | x = x[!idx] 45 | } 46 | x = setdiff(x, knitr:::.base.pkgs) 47 | bib = sapply(x, function(pkg) { 48 | cite = citation(pkg, auto = if (pkg == "base") 49 | NULL 50 | else TRUE) 51 | if (tweak) { 52 | cite$title = gsub(sprintf("^(%s: )(\\1)", pkg), "\\1", 53 | cite$title) 54 | cite$title = gsub(" & ", " \\\\& ", cite$title) 55 | } 56 | cite 57 | }, 58 | simplify = FALSE) 59 | bib = bib[sort(x)] 60 | invisible(bib) 61 | } 62 | 63 | 64 | 65 | 66 | file_list = get_file_list(path) 67 | file_list = setdiff(file_list, paste(path, "interpretable-ml.Rmd", sep = "/")) 68 | reference_list = data.table::rbindlist(lapply(file_list, grep_references), 69 | fill = TRUE) 70 | 71 | reference_list = unique(reference_list) 72 | reference_list = reference_list[order(reference_list$reference), ] 73 | 74 | r_reference_list = get_R_bib() 75 | 76 | fileConn <- file(paste(path, out.file, sep = "/")) 77 | write_string = c("# References {-}", 78 | "") 79 | for ( i in 1:nrow(reference_list)) { 80 | write_string = c(write_string, "", reference_list$reference[i]) 81 | } 82 | write_string = c(write_string, "", "## R Packages Used for Examples {-}", "") 83 | for ( i in 1:length(r_reference_list)) { 84 | r_package_citation = paste0("**", names(r_reference_list[i]), "**. ", 85 | format(r_reference_list[[i]], "textVersion", collapse = TRUE)[[1]]) 86 | write_string = c(write_string, "", r_package_citation) 87 | } 88 | write_lines(write_string, fileConn) 89 | -------------------------------------------------------------------------------- /scripts/setup_book_manjaro.R: -------------------------------------------------------------------------------- 1 | install.packages('ggplot2') 2 | install.packages('dplyr') 3 | install.packages('tidyr') 4 | install.packages('mlr') 5 | 6 | # install gfortran (package manager) --> needed for partykit 7 | # ln -s /usr/bin/gfortran-5 /usr/bin/gfortran 8 | 9 | install.packages("randomForest") 10 | install.packages("caret") 11 | install.packages("pre") 12 | 13 | devtools::install_github("christophM/iml") 14 | 15 | install.packages('partykit') 16 | 17 | setwd("manuscript") 18 | file.remove("interpretable-ml.Rmd") 19 | bookdown::render_book('', 'bookdown::gitbook') 20 | 21 | browseURL('_book/index.html') 22 | -------------------------------------------------------------------------------- /scripts/shap/.gitignore: -------------------------------------------------------------------------------- 1 | shap 2 | -------------------------------------------------------------------------------- /scripts/shap/requirements.txt: -------------------------------------------------------------------------------- 1 | attrs==19.1.0 2 | backcall==0.1.0 3 | bleach==3.1.4 4 | cycler==0.10.0 5 | decorator==4.4.0 6 | defusedxml==0.6.0 7 | entrypoints==0.3 8 | imageio==2.5.0 9 | ipykernel==5.1.2 10 | ipython==7.7.0 11 | ipython-genutils==0.2.0 12 | ipywidgets==7.5.1 13 | jedi==0.14.1 14 | Jinja2==2.10.1 15 | joblib==0.13.2 16 | jsonschema==3.0.2 17 | jupyter-client==5.3.1 18 | jupyter-core==4.5.0 19 | kiwisolver==1.1.0 20 | MarkupSafe==1.1.1 21 | mistune==0.8.4 22 | nbconvert==5.5.0 23 | nbformat==4.4.0 24 | networkx==2.3 25 | notebook==6.0.0 26 | numpy==1.17.0 27 | pandas==0.25.0 28 | pandocfilters==1.4.2 29 | parso==0.5.1 30 | pexpect==4.7.0 31 | pickleshare==0.7.5 32 | Pillow==6.2.0 33 | pkg-resources==0.0.0 34 | prometheus-client==0.7.1 35 | prompt-toolkit==2.0.9 36 | ptyprocess==0.6.0 37 | Pygments==2.4.2 38 | pyparsing==2.4.2 39 | pyrsistent==0.15.4 40 | python-dateutil==2.8.0 41 | pytz==2019.2 42 | PyWavelets==1.0.3 43 | pyzmq==18.0.2 44 | scikit-image==0.15.0 45 | scikit-learn==0.21.3 46 | scipy==1.3.0 47 | Send2Trash==1.5.0 48 | shap==0.29.3 49 | six==1.12.0 50 | terminado==0.8.2 51 | testpath==0.4.2 52 | tornado==6.0.3 53 | tqdm==4.32.2 54 | traitlets==4.3.2 55 | wcwidth==0.1.7 56 | webencodings==0.5.1 57 | widgetsnbextension==3.5.1 58 | --------------------------------------------------------------------------------