├── .gitignore
├── Introduction to Model Interpretability.ipynb
├── README.md
├── data
    ├── bank-additional
    │   └── bank-additional-full.csv
    └── toucan.jpg
├── helpers.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | .ipynb_checkpoints
 2 | __pycache__
 3 | .vscode/
 4 | venv
 5 | explanation.html
 6 | *Copy*ipynb
 7 | Untitled.ipynb
 8 | data/
 9 | !data/bank-additional/bank-additional-full.csv
10 | !data/toucan.jpg


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PyData NYC 2018: Introduction to Model Interpretability
 2 | 
 3 | ## Slides
 4 | 
 5 | Available [here](https://speakerdeck.com/klemag/pydata-nyc-2018-open-the-black-box-an-introduction-to-model-interpretability-with-lime-and-shap)
 6 | 
 7 | 
 8 | ## Setup instructions
 9 | 
10 | Make sure you are using Python 3.6+
11 | 
12 | with Anaconda:
13 | 
14 | ```
15 | conda create -n interpretability --file requirements.txt -c conda-forge
16 | conda source interpretability
17 | ```
18 | 
19 | with pip:
20 | 
21 | ```
22 | pip install -r requirements.txt
23 | ```


--------------------------------------------------------------------------------
/data/toucan.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klemag/pydata_nyc2018-intro-to-model-interpretability/565d24b1ee8aec10fe1d4daf3bfec99847374605/data/toucan.jpg


--------------------------------------------------------------------------------
/helpers.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | def convert_to_lime_format(X, categorical_names, col_names=None, invert=False):
 5 |     """Converts data with categorical values as string into the right format 
 6 |     for LIME, with categorical values as integers labels.
 7 | 
 8 |     It takes categorical_names, the same dictionary that has to be passed
 9 |     to LIME to ensure consistency. 
10 | 
11 |     col_names and invert allow to rebuild the original dataFrame from
12 |     a numpy array in LIME format to be passed to a Pipeline or sklearn
13 |     OneHotEncoder
14 |     """
15 | 
16 |     # If the data isn't a dataframe, we need to be able to build it
17 |     if not isinstance(X, pd.DataFrame):
18 |         X_lime = pd.DataFrame(X, columns=col_names)
19 |     else:
20 |         X_lime = X.copy()
21 | 
22 |     for k, v in categorical_names.items():
23 |         if not invert:
24 |             label_map = {
25 |                 str_label: int_label for int_label, str_label in enumerate(v)
26 |             }
27 |         else:
28 |             label_map = {
29 |                 int_label: str_label for int_label, str_label in enumerate(v)
30 |             }
31 | 
32 |         X_lime.iloc[:, k] = X_lime.iloc[:, k].map(label_map)
33 | 
34 |     return X_lime
35 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | lime
 2 | shap==0.24
 3 | scikit-image
 4 | keras
 5 | scikit-learn==0.20
 6 | pandas
 7 | eli5
 8 | xgboost
 9 | jupyter
10 | tensorflow


--------------------------------------------------------------------------------