├── .gitignore
├── LICENSE
├── README.md
├── ch02_theoretical_foundations_of_explainable_ai
    ├── Images
    │   └── Interpretability vs. Visualization.png
    └── interpretability_visualization.py
├── ch03_interpretability_of_traditional_machine_learning_models
    ├── Images
    │   ├── Bayesian.png
    │   ├── decision_tree_feature_importance.png
    │   ├── gam.png
    │   ├── linear_regression_example.png
    │   ├── logistic_regression_churn_prediction.png
    │   └── svm_decision_boundary_visualization.png
    ├── bayesian.py
    ├── decision_tree_feature_importance.py
    ├── gam.py
    ├── linear_regression_example.py
    ├── logistic_regression_churn_prediction.py
    ├── rule_based_cold_diagnosis.py
    └── svm_decision_boundary_visualization.py
├── ch04_interpretability_of_deep_learning_models
    ├── Images
    │   ├── HiddenStates.png
    │   ├── OriginalandPredictedSineWave.png
    │   ├── SampleAttentionHeatmap.png
    │   ├── cat.jpg
    │   └── vgg16_feature_map_visualization.png
    ├── attention_heatmap_visualization.py
    ├── rnn_hidden_states_visualization.py
    └── vgg16_feature_map_visualization.py
├── ch05_interpretability_of_large_language_models
    ├── Images
    │   ├── MeanActivations.png
    │   ├── PCAVisualization.png
    │   └── embedding_probing_task.png
    ├── embedding_probing_task.py
    ├── embedding_visualization.py
    └── layer_activation_analysis.py
├── ch06_techniques_for_explainable_ai
    ├── 610_Post-hoc
    │   ├── Images
    │   │   ├── Attention-based Explanation for Time Series.png
    │   │   ├── DTW.png
    │   │   ├── Decision Plot.png
    │   │   ├── DeepLIFT.png
    │   │   ├── Feature Importance Analysis.png
    │   │   ├── Feature Interaction Detection.png
    │   │   ├── Feature Interaction Heatmap.png
    │   │   ├── Grad-CAM.png
    │   │   ├── ICE.png
    │   │   ├── Integrated Gradients 1.png
    │   │   ├── Integrated Gradients.png
    │   │   ├── Local Interpretable Model-agnostic Explanations (LIME).png
    │   │   ├── PDPs.png
    │   │   ├── SHAP Dependence.png
    │   │   ├── Saliency Maps for Recurrent Neural Networks.png
    │   │   ├── Saliency Maps.png
    │   │   ├── Shapley Additive Explanations (SHAP.png
    │   │   ├── SmoothGrad.png
    │   │   ├── TimeSHAP.png
    │   │   └── shap_force_plot.png
    │   ├── attention_based_explanation_time_series.py
    │   ├── decision_plot.py
    │   ├── deep_lift.py
    │   ├── dtw.py
    │   ├── feature_importance_analysis.py
    │   ├── feature_interaction_detection.py
    │   ├── feature_interaction_heatmap.py
    │   ├── grad_cam.py
    │   ├── ice.py
    │   ├── integrated_gradients.py
    │   ├── lime_explanation.html
    │   ├── local_interpretable_model_agnostic_explanations.py
    │   ├── lrp.py
    │   ├── pdps.py
    │   ├── saliency_Maps_rnn.py
    │   ├── saliency_maps.py
    │   ├── shap_dependence.py
    │   ├── shap_force_plot.html
    │   ├── shap_force_plot.ipynb
    │   ├── shap_force_plot.png
    │   ├── shapley_additive_explanations.py
    │   ├── smoothgrad.py
    │   └── timeshap.py
    ├── 620_Causal-Inference
    │   ├── Images
    │   │   └── Causal Discovery.png
    │   ├── causal_discovery.py
    │   ├── causal_mediation.py
    │   ├── irm.py
    │   └── scm.py
    ├── 630_Counterfactual
    │   ├── Images
    │   │   ├── (GAN)-based Counterfactuals.png
    │   │   ├── Diverse Counterfactual.png
    │   │   └── Optimization-based Counterfactuals.png
    │   ├── actionable_recourse_method.py
    │   ├── counterfactuals_RL.py
    │   ├── counterfactuals_structured_data.py
    │   ├── counterfactuals_time_series_data.py
    │   ├── diverse_counterfactual.py
    │   ├── gan_based_counterfactuals.py
    │   ├── minimal_change_counterfactuals.py
    │   ├── neighbor_counterfactuals.py
    │   ├── optimization_based_counterfactuals.py
    │   └── prototype_based_counterfactuals.py
    ├── 640_Graph-based
    │   ├── Images
    │   │   ├── feature_importance.png
    │   │   └── subgraph.pdf
    │   ├── gnn_explainer.py
    │   └── node_importance_attribution.py
    ├── 650_Multimodal
    │   ├── Images
    │   │   ├── Cross-modal Attention.png
    │   │   ├── Joint Feature Attribution for Multimodal Models.png
    │   │   ├── model_plot.png
    │   │   └── model_summary.png
    │   ├── cross_modal_attention.py
    │   ├── joint_feature_attribution_multimodal_models.py
    │   └── multimodal_explanations_attention.py
    └── 660_Robustness
    │   ├── Images
    │       ├── Adversarial Robustness Testing.png
    │       ├── Fairness-aware Explanation.png
    │       └── Robustness Testing for Explanations.png
    │   ├── adversarial_robustness_testing.py
    │   ├── explanation_consistency_score.py
    │   ├── fairness_aware_explanation.py
    │   ├── invariant_explanation_testing.py
    │   ├── invariant_testing_llms.py
    │   └── robustness_testing_explanations.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | .DS_Store
  2 | aclImdb/
  3 | *.gz
  4 | data/
  5 | *.h5
  6 | 
  7 | # Byte-compiled / optimized / DLL files
  8 | __pycache__/
  9 | *.py[cod]
 10 | *$py.class
 11 | 
 12 | # C extensions
 13 | *.so
 14 | 
 15 | # Distribution / packaging
 16 | .Python
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | share/python-wheels/
 30 | *.egg-info/
 31 | .installed.cfg
 32 | *.egg
 33 | MANIFEST
 34 | 
 35 | # PyInstaller
 36 | #  Usually these files are written by a python script from a template
 37 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 38 | *.manifest
 39 | *.spec
 40 | 
 41 | # Installer logs
 42 | pip-log.txt
 43 | pip-delete-this-directory.txt
 44 | 
 45 | # Unit test / coverage reports
 46 | htmlcov/
 47 | .tox/
 48 | .nox/
 49 | .coverage
 50 | .coverage.*
 51 | .cache
 52 | nosetests.xml
 53 | coverage.xml
 54 | *.cover
 55 | *.py,cover
 56 | .hypothesis/
 57 | .pytest_cache/
 58 | cover/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | db.sqlite3
 68 | db.sqlite3-journal
 69 | 
 70 | # Flask stuff:
 71 | instance/
 72 | .webassets-cache
 73 | 
 74 | # Scrapy stuff:
 75 | .scrapy
 76 | 
 77 | # Sphinx documentation
 78 | docs/_build/
 79 | 
 80 | # PyBuilder
 81 | .pybuilder/
 82 | target/
 83 | 
 84 | # Jupyter Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # IPython
 88 | profile_default/
 89 | ipython_config.py
 90 | 
 91 | # pyenv
 92 | #   For a library or package, you might want to ignore these files since the code is
 93 | #   intended to run in multiple environments; otherwise, check them in:
 94 | # .python-version
 95 | 
 96 | # pipenv
 97 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 98 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 99 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
100 | #   install all needed dependencies.
101 | #Pipfile.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | 
110 | # pdm
111 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | #   in version control.
115 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 | 
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 | 
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 | 
127 | # SageMath parsed files
128 | *.sage.py
129 | 
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 | 
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 | 
143 | # Rope project settings
144 | .ropeproject
145 | 
146 | # mkdocs documentation
147 | /site
148 | 
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 | 
154 | # Pyre type checker
155 | .pyre/
156 | 
157 | # pytype static type analyzer
158 | .pytype/
159 | 
160 | # Cython debug symbols
161 | cython_debug/
162 | 
163 | # PyCharm
164 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
167 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Wizard
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # XAI_From_Classical_Models_to_LLMs
 2 | 
 3 | ![License](https://img.shields.io/badge/License-MIT-green)
 4 | [![arXiv](https://img.shields.io/badge/arXiv-2412.00800-B31B1B.svg)](https://arxiv.org/abs/2412.00800)
 5 | [![DOI](https://img.shields.io/badge/DOI-10.48550/arXiv.2412.00800-blue)](https://doi.org/10.48550/arXiv.2412.00800)
 6 | 
 7 | This repository contains the accompanying code and resources for the book **"A Comprehensive Guide to Explainable AI: From Classical Models to LLMs"**.
 8 | 
 9 | ## About the Book
10 | 
11 | **A Comprehensive Guide to Explainable AI** addresses the critical need for transparency and interpretability in AI systems. This book bridges foundational concepts with advanced methodologies, offering a deep dive into the following topics:
12 | 
13 | - **Traditional Models**: Interpretability in Decision Trees, Linear Regression, Support Vector Machines (SVMs).
14 | - **Deep Learning Models**: Explainability for CNNs, RNNs, and Large Language Models (LLMs) like BERT, GPT, and T5.
15 | - **Practical Techniques**: SHAP, LIME, Grad-CAM, counterfactual explanations, causal inference.
16 | - **Case Studies**: Applications in healthcare, finance, and policymaking.
17 | - **Evaluation Metrics**: Assessing explanation quality.
18 | - **Emerging Directions**: Interpretability in federated learning, ethical AI.
19 | 
20 | Hands-on Python examples and additional resources are available in the companion [GitHub repository](#).
21 | 
22 | ---
23 | 
24 | ## Features
25 | 
26 | - **Practical Techniques**: Explore actionable explainability techniques with Python code.
27 | - **Real-World Applications**: Learn through case studies across diverse domains.
28 | - **Emerging Research**: Gain insights into the latest trends, including interpretability for federated learning and ethical considerations.
29 | - **Resources**: Complementary materials provided for further learning and development.
30 | 
31 | ---
32 | 
33 | ## Authors
34 | 
35 | The book is co-authored by experts in AI and machine learning:
36 | Weiche Hsieh, Ziqian Bi, Chuanqi Jiang, Junyu Liu, Benji Peng, Sen Zhang, Xuanhe Pan, Jiawei Xu, Jinlang Wang, Keyu Chen, Pohsun Feng, Yizhu Wen, Xinyuan Song, Tianyang Wang, Ming Liu, Junjie Yang, Ming Li, Bowen Jing, Jintao Ren, Junhao Song, Hong-Ming Tseng, Yichao Zhang, Lawrence K. Q. Yan, Qian Niu, Silin Chen, Yunze Wang, Chia Xin Liang.
37 | 
38 | ---
39 | 
40 | ## Citation
41 | 
42 | If you use this work in your research, please cite it as follows:
43 | 
44 | ```bibtex
45 | @book{hsieh2024comprehensiveguideexplainableai,
46 |     title={A Comprehensive Guide to Explainable AI: From Classical Models to LLMs},
47 |     author={Weiche Hsieh and Ziqian Bi and Chuanqi Jiang and Junyu Liu and Benji Peng and Sen Zhang and Xuanhe Pan and Jiawei Xu and Jinlang Wang and Keyu Chen and Pohsun Feng and Yizhu Wen and Xinyuan Song and Tianyang Wang and Ming Liu and Junjie Yang and Ming Li and Bowen Jing and Jintao Ren and Junhao Song and Hong-Ming Tseng and Yichao Zhang and Lawrence K. Q. Yan and Qian Niu and Silin Chen and Yunze Wang and Chia Xin Liang},
48 |     year={2024},
49 |     publisher={arXiv},
50 |     eprint={2412.00800},
51 |     archivePrefix={arXiv},
52 |     primaryClass={cs.LG},
53 |     url={https://arxiv.org/abs/2412.00800}
54 | }
55 | ```
56 | 


--------------------------------------------------------------------------------
/ch02_theoretical_foundations_of_explainable_ai/Images/Interpretability vs. Visualization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch02_theoretical_foundations_of_explainable_ai/Images/Interpretability vs. Visualization.png


--------------------------------------------------------------------------------
/ch02_theoretical_foundations_of_explainable_ai/interpretability_visualization.py:
--------------------------------------------------------------------------------
 1 | import shap
 2 | import numpy as np
 3 | import pandas as pd
 4 | from sklearn.ensemble import RandomForestClassifier
 5 | from sklearn.datasets import load_iris
 6 | from sklearn.model_selection import train_test_split
 7 | 
 8 | # Load the dataset
 9 | data = load_iris()
10 | X = pd.DataFrame(data.data, columns=data.feature_names)
11 | y = data.target
12 | 
13 | # Split the dataset into training and testing sets
14 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
15 | 
16 | # Create and train a RandomForest model
17 | model = RandomForestClassifier(random_state=42)
18 | model.fit(X_train, y_train)
19 | 
20 | # Initialize the SHAP Explainer
21 | explainer = shap.Explainer(model.predict, X_test)
22 | 
23 | # Compute SHAP values
24 | shap_values = explainer(X_test)
25 | 
26 | # Plot SHAP Summary Plot
27 | shap.summary_plot(shap_values, X_test)


--------------------------------------------------------------------------------
/ch03_interpretability_of_traditional_machine_learning_models/Images/Bayesian.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch03_interpretability_of_traditional_machine_learning_models/Images/Bayesian.png


--------------------------------------------------------------------------------
/ch03_interpretability_of_traditional_machine_learning_models/Images/decision_tree_feature_importance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch03_interpretability_of_traditional_machine_learning_models/Images/decision_tree_feature_importance.png


--------------------------------------------------------------------------------
/ch03_interpretability_of_traditional_machine_learning_models/Images/gam.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch03_interpretability_of_traditional_machine_learning_models/Images/gam.png


--------------------------------------------------------------------------------
/ch03_interpretability_of_traditional_machine_learning_models/Images/linear_regression_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch03_interpretability_of_traditional_machine_learning_models/Images/linear_regression_example.png


--------------------------------------------------------------------------------
/ch03_interpretability_of_traditional_machine_learning_models/Images/logistic_regression_churn_prediction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch03_interpretability_of_traditional_machine_learning_models/Images/logistic_regression_churn_prediction.png


--------------------------------------------------------------------------------
/ch03_interpretability_of_traditional_machine_learning_models/Images/svm_decision_boundary_visualization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch03_interpretability_of_traditional_machine_learning_models/Images/svm_decision_boundary_visualization.png


--------------------------------------------------------------------------------
/ch03_interpretability_of_traditional_machine_learning_models/bayesian.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow_probability as tfp
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | # Define the dataset
 7 | np.random.seed(42)
 8 | X = np.linspace(-5, 5, 100)
 9 | true_slope = 0.7
10 | true_intercept = 1.5
11 | y = true_slope * X + true_intercept + np.random.normal(0, 1, size=X.shape)
12 | 
13 | # Define the Bayesian linear regression model
14 | tfd = tfp.distributions
15 | 
16 | # Define priors
17 | prior_slope = tfd.Normal(loc=0., scale=1.)
18 | prior_intercept = tfd.Normal(loc=0., scale=1.)
19 | prior_sigma = tfd.HalfNormal(scale=1.)
20 | 
21 | # Define likelihood function
22 | def likelihood(slope, intercept, sigma, X):
23 |     mean = slope * X + intercept
24 |     return tfd.Normal(loc=mean, scale=sigma)
25 | 
26 | # Sample from the posterior using Markov Chain Monte Carlo (MCMC)
27 | @tf.function
28 | def joint_log_prob(slope, intercept, sigma):
29 |     lp = prior_slope.log_prob(slope) + prior_intercept.log_prob(intercept) + prior_sigma.log_prob(sigma)
30 |     lp += tf.reduce_sum(likelihood(slope, intercept, sigma, X).log_prob(y))
31 |     return lp
32 | 
33 | # Initialize MCMC sampler
34 | initial_state = [0., 0., 1.]
35 | num_results = 1000
36 | kernel = tfp.mcmc.HamiltonianMonteCarlo(
37 |     target_log_prob_fn=joint_log_prob,
38 |     step_size=0.1,
39 |     num_leapfrog_steps=3)
40 | 
41 | # Run MCMC
42 | states, kernel_results = tfp.mcmc.sample_chain(
43 |     num_results=num_results,
44 |     current_state=initial_state,
45 |     kernel=kernel,
46 |     trace_fn=lambda _, pkr: pkr.is_accepted)
47 | 
48 | # Extract sampled parameters
49 | slope_samples, intercept_samples, sigma_samples = states
50 | 
51 | # Plot the posterior distributions
52 | fig, axs = plt.subplots(1, 3, figsize=(15, 5))
53 | axs[0].hist(slope_samples, bins=30, color='skyblue', edgecolor='black')
54 | axs[0].set_title('Posterior of Slope')
55 | axs[1].hist(intercept_samples, bins=30, color='skyblue', edgecolor='black')
56 | axs[1].set_title('Posterior of Intercept')
57 | axs[2].hist(sigma_samples, bins=30, color='skyblue', edgecolor='black')
58 | axs[2].set_title('Posterior of Sigma')
59 | 
60 | plt.show()


--------------------------------------------------------------------------------
/ch03_interpretability_of_traditional_machine_learning_models/decision_tree_feature_importance.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | from sklearn.tree import DecisionTreeClassifier
 4 | from sklearn.datasets import make_classification
 5 | from sklearn.model_selection import train_test_split
 6 | 
 7 | # 1. Generate synthetic data with 3 features for binary classification
 8 | X, y = make_classification(
 9 |     n_samples=100,          # Number of samples
10 |     n_features=3,           # Number of features
11 |     n_informative=3,        # Number of informative features
12 |     n_redundant=0,          # No redundant features
13 |     n_classes=2,            # Binary classification
14 |     random_state=42         # Random seed for reproducibility
15 | )
16 | 
17 | # 2. Split the data into training and testing sets (70% train, 30% test)
18 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
19 | 
20 | # 3. Train a Decision Tree Classifier with a maximum depth of 4
21 | clf = DecisionTreeClassifier(max_depth=4, random_state=42)
22 | clf.fit(X_train, y_train)
23 | 
24 | # 4. Extract the feature importance scores from the trained classifier
25 | feature_importance = clf.feature_importances_
26 | 
27 | # 5. Define feature names for the plot (e.g., Feature 1, Feature 2, Feature 3)
28 | features = np.array(['Feature 1', 'Feature 2', 'Feature 3'])
29 | 
30 | # 6. Plot a horizontal bar chart to visualize feature importance
31 | plt.barh(features, feature_importance)
32 | plt.xlabel('Importance Score')                # Label for the x-axis
33 | plt.title('Feature Importance in Decision Tree')  # Title of the plot
34 | plt.show()                                   # Display the plot
35 | 


--------------------------------------------------------------------------------
/ch03_interpretability_of_traditional_machine_learning_models/gam.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from pygam import LinearGAM, s
 4 | 
 5 | # Generate synthetic data
 6 | np.random.seed(42)
 7 | x1 = np.random.uniform(-3, 3, 200)
 8 | x2 = np.random.uniform(-3, 3, 200)
 9 | y = np.sin(x1) + 0.5 * np.cos(x2) + np.random.normal(0, 0.2, 200)
10 | 
11 | # Combine features into a matrix
12 | X = np.column_stack((x1, x2))
13 | 
14 | # Define and fit the GAM model
15 | gam = LinearGAM(s(0) + s(1))
16 | gam.fit(X, y)
17 | 
18 | # Plot the partial dependence for each feature
19 | fig, axs = plt.subplots(1, 2, figsize=(12, 5))
20 | for i, ax in enumerate(axs):
21 |     XX = gam.generate_X_grid(term=i)
22 |     ax.plot(XX[:, i], gam.partial_dependence(term=i, X=XX))
23 |     ax.set_title(f'Partial Dependence of Feature x{i+1}')
24 |     ax.set_xlabel(f'x{i+1}')
25 |     ax.set_ylabel('Predicted y')
26 | 
27 | plt.tight_layout()
28 | plt.show()


--------------------------------------------------------------------------------
/ch03_interpretability_of_traditional_machine_learning_models/linear_regression_example.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.linear_model import LinearRegression
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | # Example data: [Square Footage, Number of Bedrooms]
 6 | X = np.array([[1500, 3], [2000, 4], [2500, 4], [3000, 5]])
 7 | y = np.array([300000, 400000, 500000, 600000])
 8 | 
 9 | # Initialize and train the Linear Regression model
10 | model = LinearRegression()
11 | model.fit(X, y)
12 | 
13 | # Output the intercept and coefficients
14 | print("Intercept:", model.intercept_)
15 | print("Coefficients:", model.coef_)
16 | 
17 | # Predict house prices using the trained model
18 | y_pred = model.predict(X)
19 | print("Predicted Prices:", y_pred)
20 | 
21 | # Plot the true vs predicted prices
22 | plt.scatter(range(len(y)), y, color='blue', label='True Prices')
23 | plt.scatter(range(len(y_pred)), y_pred, color='red', marker='x', label='Predicted Prices')
24 | plt.xlabel('Sample Index')
25 | plt.ylabel('House Price ($)')
26 | plt.title('True vs Predicted House Prices')
27 | plt.legend()
28 | plt.show()
29 | 


--------------------------------------------------------------------------------
/ch03_interpretability_of_traditional_machine_learning_models/logistic_regression_churn_prediction.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.linear_model import LogisticRegression
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | # Example data: [Monthly Charges, Tenure]
 6 | X = np.array([[30, 1], [40, 3], [50, 5], [60, 7]])
 7 | y = np.array([0, 0, 1, 1])  # 0 = No churn, 1 = Churn
 8 | 
 9 | # Initialize and train the Logistic Regression model
10 | model = LogisticRegression()
11 | model.fit(X, y)
12 | 
13 | # Output the intercept and coefficients
14 | print("Intercept:", model.intercept_)
15 | print("Coefficients:", model.coef_)
16 | 
17 | # Make predictions and predict probabilities
18 | y_pred = model.predict(X)
19 | y_prob = model.predict_proba(X)[:, 1]
20 | 
21 | print("Predicted Labels:", y_pred)
22 | print("Predicted Probabilities (Churn):", y_prob)
23 | 
24 | # Visualization of the predicted probabilities
25 | plt.scatter(range(len(y)), y, color='blue', label='True Labels (0=No churn, 1=Churn)')
26 | plt.plot(range(len(y_prob)), y_prob, color='red', marker='x', linestyle='--', label='Predicted Probabilities')
27 | plt.xlabel('Sample Index')
28 | plt.ylabel('Probability of Churn')
29 | plt.title('Logistic Regression: Churn Prediction')
30 | plt.legend()
31 | plt.show()
32 | 


--------------------------------------------------------------------------------
/ch03_interpretability_of_traditional_machine_learning_models/rule_based_cold_diagnosis.py:
--------------------------------------------------------------------------------
 1 | # Filename: rule_based_cold_diagnosis.py
 2 | 
 3 | def diagnose(symptoms):
 4 |     """
 5 |     Diagnoses a condition based on the provided symptoms using a simple rule-based system.
 6 | 
 7 |     Args:
 8 |         symptoms (dict): A dictionary where keys are symptom names (e.g., 'fever', 'cough') 
 9 |                          and values are booleans indicating whether the symptom is present.
10 | 
11 |     Returns:
12 |         str: The diagnosis based on the provided symptoms.
13 |     """
14 |     if symptoms.get('fever') and symptoms.get('cough'):
15 |         return "Common Cold"
16 |     elif symptoms.get('fever'):
17 |         return "Fever of unknown origin"
18 |     elif symptoms.get('cough'):
19 |         return "Possible respiratory infection"
20 |     else:
21 |         return "No specific diagnosis"
22 | 
23 | if __name__ == "__main__":
24 |     # Collect symptoms from the user
25 |     fever = input("Do you have a fever? (yes/no): ").strip().lower() == 'yes'
26 |     cough = input("Do you have a cough? (yes/no): ").strip().lower() == 'yes'
27 | 
28 |     # Create a dictionary of symptoms
29 |     symptoms = {'fever': fever, 'cough': cough}
30 | 
31 |     # Get the diagnosis
32 |     diagnosis = diagnose(symptoms)
33 | 
34 |     # Display the diagnosis
35 |     print(f"Diagnosis: {diagnosis}")
36 | 


--------------------------------------------------------------------------------
/ch03_interpretability_of_traditional_machine_learning_models/svm_decision_boundary_visualization.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn import datasets
 3 | from sklearn.svm import SVC
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | # Load a sample dataset with two features
 7 | X, y = datasets.make_classification(n_samples=100, n_features=2,
 8 |                                     n_informative=2, n_redundant=0, n_repeated=0,
 9 |                                     n_classes=2, n_clusters_per_class=1,
10 |                                     random_state=42)
11 | 
12 | # Initialize and train a linear SVM classifier
13 | clf = SVC(kernel='linear')
14 | clf.fit(X, y)
15 | 
16 | # Extract the weight vector and bias term
17 | w = clf.coef_[0]
18 | b = clf.intercept_[0]
19 | 
20 | # Define the decision boundary
21 | x_points = np.linspace(min(X[:, 0]), max(X[:, 0]), 100)
22 | y_points = -(w[0] / w[1]) * x_points - b / w[1]
23 | 
24 | # Plot the data points and decision boundary
25 | plt.scatter(X[:, 0], X[:, 1], c=y, cmap='coolwarm', edgecolors='k', label='Data Points')
26 | plt.plot(x_points, y_points, color='red', label='Decision Boundary')
27 | 
28 | # Highlight the support vectors
29 | plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1],
30 |             s=100, facecolors='none', edgecolors='k', linewidths=1.5,
31 |             label='Support Vectors')
32 | 
33 | plt.xlabel('Feature 1')
34 | plt.ylabel('Feature 2')
35 | plt.title('SVM Decision Boundary with Support Vectors')
36 | plt.legend()
37 | plt.show()
38 | 


--------------------------------------------------------------------------------
/ch04_interpretability_of_deep_learning_models/Images/HiddenStates.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch04_interpretability_of_deep_learning_models/Images/HiddenStates.png


--------------------------------------------------------------------------------
/ch04_interpretability_of_deep_learning_models/Images/OriginalandPredictedSineWave.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch04_interpretability_of_deep_learning_models/Images/OriginalandPredictedSineWave.png


--------------------------------------------------------------------------------
/ch04_interpretability_of_deep_learning_models/Images/SampleAttentionHeatmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch04_interpretability_of_deep_learning_models/Images/SampleAttentionHeatmap.png


--------------------------------------------------------------------------------
/ch04_interpretability_of_deep_learning_models/Images/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch04_interpretability_of_deep_learning_models/Images/cat.jpg


--------------------------------------------------------------------------------
/ch04_interpretability_of_deep_learning_models/Images/vgg16_feature_map_visualization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch04_interpretability_of_deep_learning_models/Images/vgg16_feature_map_visualization.png


--------------------------------------------------------------------------------
/ch04_interpretability_of_deep_learning_models/attention_heatmap_visualization.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import seaborn as sns
 3 | import numpy as np
 4 | 
 5 | # Define a sample attention weights matrix (3x3 for simplicity)
 6 | attention_weights = np.array([[0.1, 0.2, 0.7], 
 7 |                               [0.5, 0.3, 0.2], 
 8 |                               [0.3, 0.4, 0.3]])
 9 | 
10 | # Create the heatmap plot
11 | plt.figure(figsize=(6, 5))
12 | sns.heatmap(attention_weights, annot=True, fmt=".2f", cmap='Blues', cbar=False, linewidths=0.5)
13 | 
14 | # Add titles and labels
15 | plt.title("Sample Attention Heatmap", fontsize=14)
16 | plt.xlabel("Input Tokens", fontsize=12)
17 | plt.ylabel("Output Tokens", fontsize=12)
18 | plt.xticks(ticks=[0.5, 1.5, 2.5], labels=["Token 1", "Token 2", "Token 3"])
19 | plt.yticks(ticks=[0.5, 1.5, 2.5], labels=["Output 1", "Output 2", "Output 3"])
20 | 
21 | # Display the plot
22 | plt.tight_layout()
23 | plt.show()
24 | 


--------------------------------------------------------------------------------
/ch04_interpretability_of_deep_learning_models/rnn_hidden_states_visualization.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | 
 5 | # Check TensorFlow version and GPU availability
 6 | print("TensorFlow version:", tf.__version__)
 7 | print("GPU is", "available" if tf.config.list_physical_devices('GPU') else "not available")
 8 | 
 9 | # Generate a synthetic sine wave dataset
10 | time_steps = 100
11 | X = np.sin(np.linspace(0, 20, time_steps))
12 | X = X.reshape((1, time_steps, 1))  # Reshape for RNN input (batch_size, time_steps, features)
13 | 
14 | # Build a simple RNN model with 10 hidden units
15 | model = tf.keras.Sequential([
16 |     tf.keras.layers.SimpleRNN(units=10, return_sequences=True, input_shape=(time_steps, 1)),
17 |     tf.keras.layers.Dense(1)  # Add a Dense layer for prediction
18 | ])
19 | 
20 | # Compile the model
21 | model.compile(optimizer='adam', loss='mse')
22 | 
23 | # Run inference using the sine wave data
24 | y_pred = model.predict(X)
25 | hidden_states = model.predict(X, verbose=0)
26 | 
27 | # Plot the original sine wave data
28 | plt.figure(figsize=(12, 6))
29 | plt.plot(X[0, :, 0], label='Original Sine Wave', color='gray', linestyle='--', linewidth=2)
30 | plt.title("Original Sine Wave Data")
31 | plt.xlabel("Time Step")
32 | plt.ylabel("Value")
33 | plt.grid()
34 | plt.legend()
35 | plt.show()
36 | 
37 | # Plot the predicted sine wave vs the original sine wave
38 | plt.figure(figsize=(12, 6))
39 | plt.plot(X[0, :, 0], label='Original Sine Wave', color='lightgray', linestyle='--', linewidth=2)
40 | plt.plot(y_pred[0, :, 0], label='Predicted Sine Wave', color='blue', linewidth=2)
41 | plt.title("Comparison of Original and Predicted Sine Wave")
42 | plt.xlabel("Time Step")
43 | plt.ylabel("Value")
44 | plt.legend()
45 | plt.grid()
46 | plt.show()
47 | 
48 | # Plot the activations of all 10 hidden units over time
49 | plt.figure(figsize=(14, 8))
50 | for i in range(hidden_states.shape[-1]):
51 |     plt.plot(hidden_states[0, :, i], label=f'Hidden Unit {i+1}', alpha=0.8)
52 | 
53 | plt.title("Activations of All Hidden Units Over Time")
54 | plt.xlabel("Time Step")
55 | plt.ylabel("Hidden State Activation")
56 | plt.legend(loc='upper right', ncol=2, fontsize=10)
57 | plt.grid()
58 | plt.show()
59 | 


--------------------------------------------------------------------------------
/ch04_interpretability_of_deep_learning_models/vgg16_feature_map_visualization.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | 
 5 | # Check TensorFlow version and GPU availability
 6 | print("TensorFlow version:", tf.__version__)
 7 | print("GPU is", "available" if tf.config.list_physical_devices('GPU') else "not available")
 8 | 
 9 | # Load a pre-trained VGG16 model (without the fully connected layers)
10 | model = tf.keras.applications.VGG16(weights='imagenet', include_top=False)
11 | 
12 | # Load and preprocess the input image
13 | image_path = 'Ch04/cat.jpg'  # Ensure 'cat.jpg' is in the directory
14 | try:
15 |     image = tf.keras.preprocessing.image.load_img(image_path, target_size=(224, 224))
16 | except FileNotFoundError:
17 |     print(f"Error: Image file '{image_path}' not found.")
18 |     exit()
19 | 
20 | image_array = tf.keras.preprocessing.image.img_to_array(image)
21 | image_array = np.expand_dims(image_array, axis=0)
22 | image_array = tf.keras.applications.vgg16.preprocess_input(image_array)
23 | 
24 | # Define a model that outputs the feature maps of the first convolutional layer
25 | layer_name = 'block1_conv1'
26 | feature_map_model = tf.keras.Model(inputs=model.input, outputs=model.get_layer(layer_name).output)
27 | 
28 | # Generate the feature maps for the input image
29 | feature_maps = feature_map_model.predict(image_array)
30 | 
31 | # Check the shape of the feature maps
32 | print("Feature map shape:", feature_maps.shape)
33 | 
34 | # Visualize the first 16 feature maps
35 | fig, axes = plt.subplots(4, 4, figsize=(10, 10))
36 | for i, ax in enumerate(axes.flat):
37 |     if i < feature_maps.shape[-1]:
38 |         ax.imshow(feature_maps[0, :, :, i], cmap='viridis')
39 |     ax.axis('off')
40 | plt.tight_layout()
41 | plt.show()
42 | 


--------------------------------------------------------------------------------
/ch05_interpretability_of_large_language_models/Images/MeanActivations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch05_interpretability_of_large_language_models/Images/MeanActivations.png


--------------------------------------------------------------------------------
/ch05_interpretability_of_large_language_models/Images/PCAVisualization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch05_interpretability_of_large_language_models/Images/PCAVisualization.png


--------------------------------------------------------------------------------
/ch05_interpretability_of_large_language_models/Images/embedding_probing_task.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch05_interpretability_of_large_language_models/Images/embedding_probing_task.png


--------------------------------------------------------------------------------
/ch05_interpretability_of_large_language_models/embedding_probing_task.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.linear_model import LogisticRegression
 3 | from sklearn.metrics import accuracy_score
 4 | from transformers import BertTokenizer, TFBertModel
 5 | import tensorflow as tf
 6 | 
 7 | # Load pretrained BERT model and tokenizer
 8 | model = TFBertModel.from_pretrained('bert-base-uncased')
 9 | tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
10 | 
11 | # Define a list of words and their corresponding part-of-speech labels
12 | words = ["cat", "run", "dog", "jump"]  # Example words
13 | labels = [0, 1, 0, 1]  # Labels: 0 for noun, 1 for verb
14 | 
15 | # Tokenize the words and obtain embeddings
16 | inputs = tokenizer(words, return_tensors='tf', padding=True, truncation=True)
17 | outputs = model(inputs['input_ids'])[0].numpy()
18 | 
19 | # Use mean embeddings as features for the classifier
20 | features = outputs.mean(axis=1)
21 | 
22 | # Train a logistic regression classifier
23 | classifier = LogisticRegression()
24 | classifier.fit(features, labels)
25 | 
26 | # Make predictions
27 | predictions = classifier.predict(features)
28 | 
29 | # Evaluate the classifier
30 | accuracy = accuracy_score(labels, predictions)
31 | print(f"Probing Task Accuracy: {accuracy:.2f}")
32 | 


--------------------------------------------------------------------------------
/ch05_interpretability_of_large_language_models/embedding_visualization.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from sklearn.decomposition import PCA
 4 | from transformers import BertTokenizer, TFBertModel
 5 | import tensorflow as tf
 6 | 
 7 | # Load pretrained BERT model and tokenizer
 8 | model = TFBertModel.from_pretrained('bert-base-uncased')
 9 | tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
10 | 
11 | # Define a list of words to encode
12 | words = ["king", "queen", "man", "woman"]
13 | 
14 | # Tokenize words and obtain embeddings
15 | inputs = tokenizer(words, return_tensors='tf', padding=True, truncation=True)
16 | outputs = model(inputs['input_ids'])[0].numpy()
17 | 
18 | # Compute the mean embeddings for each word
19 | mean_embeddings = outputs.mean(axis=1)
20 | 
21 | # Perform PCA to reduce embeddings to 2D
22 | pca = PCA(n_components=2)
23 | reduced_embeddings = pca.fit_transform(mean_embeddings)
24 | 
25 | # Plot the 2D embeddings
26 | plt.figure(figsize=(8, 6))
27 | plt.scatter(reduced_embeddings[:, 0], reduced_embeddings[:, 1], color='blue')
28 | 
29 | # Annotate the plot with word labels
30 | for i, word in enumerate(words):
31 |     plt.annotate(word, (reduced_embeddings[i, 0], reduced_embeddings[i, 1]), fontsize=12)
32 | 
33 | plt.title("PCA Visualization of BERT Word Embeddings")
34 | plt.xlabel("Principal Component 1")
35 | plt.ylabel("Principal Component 2")
36 | plt.grid(True)
37 | plt.show()
38 | 


--------------------------------------------------------------------------------
/ch05_interpretability_of_large_language_models/layer_activation_analysis.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from transformers import BertTokenizer, TFBertModel
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | # Load pretrained BERT model and tokenizer with hidden states output enabled
 7 | model = TFBertModel.from_pretrained('bert-base-uncased', output_hidden_states=True)
 8 | tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
 9 | 
10 | # Define a function to extract and analyze layer-wise activations
11 | def extract_and_analyze_activations(model, inputs):
12 |     outputs = model(inputs)
13 |     hidden_states = outputs.hidden_states  # Extract all hidden states
14 |     layer_means = [tf.reduce_mean(state).numpy() for state in hidden_states]  # Compute mean activation
15 |     return hidden_states, layer_means
16 | 
17 | # Example usage with a sample input sentence
18 | input_data = tokenizer("The cat sat on the mat.", return_tensors='tf', padding=True, truncation=True)
19 | input_ids = input_data['input_ids']
20 | 
21 | # Extract activations and compute mean activations for each layer
22 | layer_outputs, layer_means = extract_and_analyze_activations(model, input_ids)
23 | 
24 | # Print the number of layers and the shape of activations from the first layer
25 | print("Number of layers analyzed:", len(layer_outputs))
26 | print("Shape of activations from the first layer:", layer_outputs[0].shape)
27 | 
28 | # Plot mean activations across layers
29 | plt.figure(figsize=(10, 6))
30 | plt.plot(range(len(layer_means)), layer_means, marker='o', color='blue')
31 | plt.title("Mean Activations Across BERT Layers")
32 | plt.xlabel("Layer")
33 | plt.ylabel("Mean Activation Value")
34 | plt.grid(True)
35 | plt.show()
36 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Attention-based Explanation for Time Series.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Attention-based Explanation for Time Series.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/DTW.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/DTW.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Decision Plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Decision Plot.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/DeepLIFT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/DeepLIFT.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Feature Importance Analysis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Feature Importance Analysis.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Feature Interaction Detection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Feature Interaction Detection.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Feature Interaction Heatmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Feature Interaction Heatmap.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Grad-CAM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Grad-CAM.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/ICE.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/ICE.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Integrated Gradients 1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Integrated Gradients 1.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Integrated Gradients.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Integrated Gradients.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Local Interpretable Model-agnostic Explanations (LIME).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Local Interpretable Model-agnostic Explanations (LIME).png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/PDPs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/PDPs.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/SHAP Dependence.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/SHAP Dependence.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Saliency Maps for Recurrent Neural Networks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Saliency Maps for Recurrent Neural Networks.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Saliency Maps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Saliency Maps.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Shapley Additive Explanations (SHAP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Shapley Additive Explanations (SHAP.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/SmoothGrad.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/SmoothGrad.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/TimeSHAP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/TimeSHAP.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/shap_force_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/shap_force_plot.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/attention_based_explanation_time_series.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | # Generate synthetic time series data
 6 | np.random.seed(0)
 7 | time_steps = 100
 8 | X = np.sin(np.linspace(0, 2 * np.pi, time_steps)) + np.random.normal(0, 0.1, time_steps)
 9 | y = np.roll(X, -1)
10 | 
11 | # Reshape data to fit LSTM input
12 | X_input = X.reshape((1, time_steps, 1))
13 | y_input = y.reshape((1, time_steps, 1))
14 | 
15 | # Define the Attention Layer
16 | class AttentionLayer(tf.keras.layers.Layer):
17 |     def __init__(self):
18 |         super(AttentionLayer, self).__init__()
19 |         self.score_layer = tf.keras.layers.Dense(1, activation='tanh')
20 | 
21 |     def call(self, x):
22 |         score = self.score_layer(x)
23 |         attention_weights = tf.nn.softmax(score, axis=1)
24 |         context_vector = attention_weights * x
25 |         context_vector = tf.reduce_sum(context_vector, axis=1)
26 |         return context_vector, attention_weights
27 | 
28 | # Build the model
29 | inputs = tf.keras.Input(shape=(time_steps, 1))
30 | lstm_output = tf.keras.layers.LSTM(50, return_sequences=True)(inputs)
31 | context_vector, attention_weights = AttentionLayer()(lstm_output)
32 | outputs = tf.keras.layers.Dense(1)(context_vector)
33 | model = tf.keras.Model(inputs=inputs, outputs=outputs)
34 | 
35 | # Compile and train the model
36 | model.compile(optimizer='adam', loss='mse')
37 | model.fit(X_input, y_input[:, -1, :], epochs=10, verbose=0)  # Adjust target values to match output shape
38 | 
39 | # Create a model to output predictions and attention weights
40 | attention_model = tf.keras.Model(inputs=inputs, outputs=[outputs, attention_weights])
41 | 
42 | # Get predictions and attention weights
43 | prediction, att_weights = attention_model.predict(X_input)
44 | 
45 | # Plot the original data, predicted values, and attention weights
46 | plt.figure(figsize=(12, 6))
47 | 
48 | # Plot original data and predicted values on the left y-axis
49 | ax1 = plt.gca()
50 | ax1.plot(np.arange(time_steps), X, label='Original Data', color='blue')
51 | ax1.plot(time_steps - 1, prediction[0], 'ro', label='Predicted Value')
52 | ax1.set_xlabel('Time Step')
53 | ax1.set_ylabel('Data Value')
54 | ax1.legend(loc='upper left')
55 | 
56 | # Plot attention weights on the right y-axis
57 | ax2 = ax1.twinx()
58 | ax2.plot(np.arange(time_steps), att_weights[0, :, 0], label='Attention Weights', color='green', alpha=0.5)
59 | ax2.set_ylabel('Attention Weights')
60 | ax2.legend(loc='upper right')
61 | 
62 | plt.title('Original Data, Predicted Values, and Attention Weights')
63 | plt.show()
64 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/decision_plot.py:
--------------------------------------------------------------------------------
 1 | import shap
 2 | import numpy as np
 3 | from sklearn.datasets import fetch_california_housing
 4 | from sklearn.ensemble import GradientBoostingRegressor
 5 | 
 6 | # Load the California housing dataset
 7 | data = fetch_california_housing()
 8 | X, y = data.data, data.target
 9 | 
10 | # Train the model
11 | model = GradientBoostingRegressor(n_estimators=100, random_state=42)
12 | model.fit(X, y)
13 | 
14 | # Create a SHAP explainer
15 | explainer = shap.TreeExplainer(model)
16 | shap_values = explainer.shap_values(X)
17 | 
18 | # Subsample the data (e.g., 1000 observations)
19 | subset_size = 1000
20 | random_indices = np.random.choice(X.shape[0], subset_size, replace=False)
21 | X_subset = X[random_indices]
22 | shap_values_subset = shap_values[random_indices]
23 | 
24 | # Generate the decision plot with subsampled data
25 | shap.decision_plot(explainer.expected_value, shap_values_subset, X_subset, feature_names=data.feature_names)
26 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/deep_lift.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.optim as optim
 4 | from captum.attr import DeepLift
 5 | import numpy as np
 6 | import matplotlib.pyplot as plt
 7 | from torchvision import datasets, transforms
 8 | from torch.utils.data import DataLoader
 9 | 
10 | # Convert TensorFlow data to PyTorch format
11 | train_transform = transforms.Compose([transforms.ToTensor()])
12 | test_transform = transforms.Compose([transforms.ToTensor()])
13 | 
14 | # Load the MNIST dataset using torchvision
15 | train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=train_transform)
16 | test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=test_transform)
17 | 
18 | train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
19 | test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True)
20 | 
21 | # Define a simple CNN model in PyTorch
22 | class SimpleCNN(nn.Module):
23 |     def __init__(self):
24 |         super(SimpleCNN, self).__init__()
25 |         self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
26 |         self.pool = nn.MaxPool2d(2, 2)
27 |         self.fc1 = nn.Linear(32 * 14 * 14, 128)
28 |         self.fc2 = nn.Linear(128, 10)
29 | 
30 |     def forward(self, x):
31 |         x = torch.relu(self.conv1(x))
32 |         x = self.pool(x)
33 |         x = x.view(-1, 32 * 14 * 14)
34 |         x = torch.relu(self.fc1(x))
35 |         x = torch.softmax(self.fc2(x), dim=1)
36 |         return x
37 | 
38 | # Initialize the model, loss function, and optimizer
39 | model = SimpleCNN()
40 | criterion = nn.CrossEntropyLoss()
41 | optimizer = optim.Adam(model.parameters(), lr=0.001)
42 | 
43 | # Train the model for one epoch (for simplicity)
44 | for images, labels in train_loader:
45 |     optimizer.zero_grad()
46 |     outputs = model(images)
47 |     loss = criterion(outputs, labels)
48 |     loss.backward()
49 |     optimizer.step()
50 |     break  # Train on one batch for demonstration
51 | 
52 | # Select a sample image and its baseline
53 | sample_image, sample_label = next(iter(test_loader))
54 | baseline = torch.zeros_like(sample_image)
55 | 
56 | # Compute DeepLIFT attributions with the target label
57 | dl = DeepLift(model)
58 | attributions = dl.attribute(sample_image, baseline, target=sample_label.item())
59 | 
60 | # Visualize the attributions
61 | attributions = attributions.detach().numpy().squeeze()
62 | plt.imshow(attributions, cmap='hot', interpolation='nearest')
63 | plt.colorbar()
64 | plt.title("DeepLIFT Attribution for MNIST Prediction")
65 | plt.show()
66 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/dtw.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from fastdtw import fastdtw
 4 | 
 5 | # Generate synthetic time-series data
 6 | time_series = np.sin(np.linspace(0, 2 * np.pi, 100)).flatten()  # Flatten to 1-D
 7 | reference_series = np.sin(np.linspace(0, 2 * np.pi, 120) + 0.5).flatten()  # Flatten to 1-D
 8 | 
 9 | # Ensure both time_series and reference_series are 1-D
10 | print(f"Time series shape: {time_series.shape}, Reference series shape: {reference_series.shape}")
11 | 
12 | # Define custom distance function for scalar values
13 | def scalar_distance(u, v):
14 |     return abs(u - v)
15 | 
16 | # Apply DTW to align the sequences using the custom distance function
17 | distance, path = fastdtw(time_series, reference_series, dist=scalar_distance)
18 | 
19 | # Plot the aligned sequences and highlight the warping path
20 | plt.figure(figsize=(10, 5))
21 | plt.plot(time_series, label="Time Series")
22 | plt.plot(
23 |     np.interp(np.linspace(0, 100, 120), np.arange(120), reference_series),
24 |     label="Reference Series",
25 |     alpha=0.7
26 | )
27 | 
28 | # Highlight the warping path
29 | for (i, j) in path:
30 |     plt.plot([i, j * 100 / 120], [time_series[i], reference_series[j]], color='gray', alpha=0.5)
31 | 
32 | plt.title(f"DTW Alignment (Distance: {distance:.2f})")
33 | plt.xlabel("Time Index")
34 | plt.ylabel("Value")
35 | plt.legend()
36 | plt.grid()
37 | plt.show()
38 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/feature_importance_analysis.py:
--------------------------------------------------------------------------------
 1 | from sklearn.datasets import load_iris
 2 | from sklearn.tree import DecisionTreeClassifier
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | # Load the Iris dataset
 6 | data = load_iris()
 7 | X, y = data.data, data.target
 8 | 
 9 | # Train a Decision Tree Classifier
10 | model = DecisionTreeClassifier(random_state=42)
11 | model.fit(X, y)
12 | 
13 | # Extract feature importance
14 | importances = model.feature_importances_
15 | feature_names = data.feature_names
16 | 
17 | # Plot feature importance
18 | plt.figure(figsize=(8, 6))
19 | plt.barh(feature_names, importances, color='skyblue')
20 | plt.xlabel("Feature Importance Score")
21 | plt.ylabel("Features")
22 | plt.title("Feature Importance Analysis for Decision Tree Classifier")
23 | plt.show()


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/feature_interaction_detection.py:
--------------------------------------------------------------------------------
 1 | import shap
 2 | import numpy as np
 3 | from sklearn.datasets import fetch_california_housing
 4 | from sklearn.ensemble import GradientBoostingRegressor
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | # Load the California housing dataset
 8 | data = fetch_california_housing()
 9 | X, y = data.data, data.target
10 | feature_names = data.feature_names
11 | 
12 | # Train the model
13 | model = GradientBoostingRegressor(n_estimators=100, random_state=42)
14 | model.fit(X, y)
15 | 
16 | # Create SHAP explainer and compute SHAP values
17 | explainer = shap.TreeExplainer(model)
18 | shap_values = explainer.shap_values(X)
19 | 
20 | # Compute SHAP interaction values
21 | shap_interaction_values = explainer.shap_interaction_values(X)
22 | 
23 | # Plot the SHAP interaction values (summary plot)
24 | shap.summary_plot(shap_interaction_values, X, feature_names=feature_names, plot_type="compact_dot")
25 | 
26 | # Visualize a specific feature pair interaction
27 | shap.dependence_plot(("MedInc", "AveRooms"), shap_interaction_values, X, feature_names=feature_names, interaction_index="AveRooms")
28 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/feature_interaction_heatmap.py:
--------------------------------------------------------------------------------
 1 | import shap
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from sklearn.datasets import fetch_california_housing
 5 | from sklearn.ensemble import GradientBoostingRegressor
 6 | 
 7 | # Load the California Housing dataset
 8 | data = fetch_california_housing()
 9 | X, y = data.data, data.target
10 | feature_names = data.feature_names
11 | 
12 | # Train a Gradient Boosting Regressor
13 | model = GradientBoostingRegressor(n_estimators=100, random_state=42)
14 | model.fit(X, y)
15 | 
16 | # Compute SHAP interaction values
17 | explainer = shap.TreeExplainer(model)
18 | shap_interaction_values = explainer.shap_interaction_values(X)
19 | 
20 | # Extract the main SHAP values from the interaction matrix (diagonal elements only)
21 | shap_values_main = np.array([shap_interaction_values[i][:, i] for i in range(X.shape[1])]).T
22 | 
23 | # Plot the Feature Interaction Heatmap
24 | shap.summary_plot(shap_values_main, X, feature_names=feature_names, plot_type="bar")
25 | plt.title("Feature Importance for California Housing Dataset")
26 | plt.show()
27 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/grad_cam.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from tensorflow.keras.applications import VGG16
 5 | from tensorflow.keras.applications.vgg16 import preprocess_input, decode_predictions
 6 | from tensorflow.keras.preprocessing.image import img_to_array, load_img
 7 | import cv2  # 新增 OpenCV 用於調整 heatmap 大小
 8 | 
 9 | # Load a pre-trained VGG16 model
10 | model = VGG16(weights='imagenet')
11 | 
12 | # Load and preprocess the input image
13 | image_path = 'Ch04/Images/cat.jpg'
14 | image = load_img(image_path, target_size=(224, 224))
15 | 
16 | # Convert the image to an array and preprocess it
17 | image_array = img_to_array(image)
18 | image_array = np.expand_dims(image_array, axis=0)
19 | image_array = preprocess_input(image_array)
20 | 
21 | # Get the model prediction
22 | predictions = model.predict(image_array)
23 | predicted_class = np.argmax(predictions[0])
24 | 
25 | # Function to compute Grad-CAM heatmap
26 | def compute_gradcam(model, image_array, class_idx, layer_name='block5_conv3'):
27 |     # Create a model that maps the input image to the activations of the last convolutional layer
28 |     # and the model's output
29 |     grad_model = tf.keras.models.Model(
30 |         [model.inputs], [model.get_layer(layer_name).output, model.output]
31 |     )
32 | 
33 |     # Record operations for automatic differentiation
34 |     with tf.GradientTape() as tape:
35 |         conv_output, predictions = grad_model(image_array)
36 |         loss = predictions[:, class_idx]
37 | 
38 |     # Compute gradients with respect to the convolutional output
39 |     grads = tape.gradient(loss, conv_output)
40 | 
41 |     # Compute the mean intensity of the gradients for each channel
42 |     pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
43 | 
44 |     # Extract the feature maps from the convolutional layer output
45 |     conv_output = conv_output[0]
46 | 
47 |     # Compute the weighted sum of the feature maps
48 |     heatmap = tf.reduce_sum(tf.multiply(pooled_grads, conv_output), axis=-1)
49 | 
50 |     # Apply ReLU and normalize the heatmap
51 |     heatmap = np.maximum(heatmap, 0)
52 |     heatmap /= np.max(heatmap)
53 | 
54 |     return heatmap
55 | 
56 | # Generate the Grad-CAM heatmap
57 | heatmap = compute_gradcam(model, image_array, predicted_class)
58 | 
59 | # Resize heatmap to match the input image size
60 | heatmap = cv2.resize(heatmap, (224, 224))  # 使用 OpenCV 調整大小
61 | heatmap = np.uint8(255 * heatmap)  # 將 heatmap 轉換為 0-255 的範圍
62 | 
63 | # 將 heatmap 轉換為彩色映射
64 | heatmap_color = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
65 | 
66 | # 疊加 heatmap 和原始圖像
67 | superimposed_img = cv2.addWeighted(cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR), 0.6, heatmap_color, 0.4, 0)
68 | 
69 | # Display the original image, heatmap, and overlay
70 | fig, ax = plt.subplots(1, 3, figsize=(18, 6))
71 | 
72 | # Display original image
73 | ax[0].imshow(image)
74 | ax[0].axis('off')
75 | ax[0].set_title("Original Image")
76 | 
77 | # Display the heatmap only
78 | ax[1].imshow(heatmap, cmap='jet')
79 | ax[1].axis('off')
80 | ax[1].set_title("Grad-CAM Heatmap")
81 | 
82 | # Display the overlay image
83 | ax[2].imshow(cv2.cvtColor(superimposed_img, cv2.COLOR_BGR2RGB))
84 | ax[2].axis('off')
85 | ax[2].set_title("Overlay Image")
86 | 
87 | # Show the plot
88 | plt.tight_layout()
89 | plt.show()
90 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/ice.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from sklearn.ensemble import RandomForestRegressor
 4 | from sklearn.datasets import fetch_california_housing
 5 | from sklearn.inspection import PartialDependenceDisplay
 6 | 
 7 | # Load the California Housing dataset
 8 | data = fetch_california_housing()
 9 | X, y = data.data, data.target
10 | 
11 | # Train a Random Forest model
12 | model = RandomForestRegressor(n_estimators=100, random_state=42)
13 | model.fit(X, y)
14 | 
15 | # Create ICE Plot for the feature "MedInc" (Median Income)
16 | fig, ax = plt.subplots(figsize=(10, 6))
17 | display = PartialDependenceDisplay.from_estimator(
18 |     model, X, features=[0], kind="individual", ax=ax, feature_names=data.feature_names
19 | )
20 | ax.set_title("ICE Plot for MedInc (Median Income)")
21 | ax.set_xlabel("MedInc (Median Income)")
22 | ax.set_ylabel("Predicted House Price")
23 | plt.show()
24 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/integrated_gradients.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | # Define a simple CNN model in TensorFlow/Keras
 6 | class SimpleCNN(tf.keras.Model):
 7 |     def __init__(self):
 8 |         super(SimpleCNN, self).__init__()
 9 |         self.conv1 = tf.keras.layers.Conv2D(32, (3, 3), padding="same", activation="relu")
10 |         self.conv2 = tf.keras.layers.Conv2D(64, (3, 3), padding="same", activation="relu")
11 |         self.flatten = tf.keras.layers.Flatten()
12 |         self.fc1 = tf.keras.layers.Dense(128, activation="relu")
13 |         self.fc2 = tf.keras.layers.Dense(10, activation="softmax")
14 | 
15 |     def call(self, x):
16 |         x = self.conv1(x)
17 |         x = self.conv2(x)
18 |         x = self.flatten(x)
19 |         x = self.fc1(x)
20 |         return self.fc2(x)
21 | 
22 | # Load the MNIST dataset
23 | (train_images, train_labels), _ = tf.keras.datasets.mnist.load_data()
24 | train_images = train_images[..., tf.newaxis] / 255.0  # Rescale to [0, 1]
25 | 
26 | # Initialize the model, loss function, and optimizer
27 | model = SimpleCNN()
28 | model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
29 |               loss=tf.keras.losses.SparseCategoricalCrossentropy(),
30 |               metrics=["accuracy"])
31 | 
32 | # Train the model (one epoch for simplicity)
33 | model.fit(train_images, train_labels, batch_size=64, epochs=1)
34 | 
35 | # Select a sample image and baseline for Integrated Gradients
36 | sample_image = train_images[0:1]  # Shape (1, 28, 28, 1)
37 | baseline = tf.zeros_like(sample_image)
38 | 
39 | # Function to calculate Integrated Gradients
40 | def compute_integrated_gradients(model, input_image, baseline, target_class_idx, m_steps=50):
41 |     # Generate interpolated images between baseline and input
42 |     interpolated_images = [
43 |         baseline + (float(i) / m_steps) * (input_image - baseline)
44 |         for i in range(m_steps + 1)
45 |     ]
46 |     interpolated_images = tf.concat(interpolated_images, axis=0)
47 | 
48 |     with tf.GradientTape() as tape:
49 |         tape.watch(interpolated_images)
50 |         # Get model predictions for interpolated images
51 |         predictions = model(interpolated_images)
52 |         target_predictions = predictions[:, target_class_idx]
53 | 
54 |     # Compute gradients between predictions and interpolated images
55 |     grads = tape.gradient(target_predictions, interpolated_images)
56 | 
57 |     # Average gradients and compute attributions
58 |     avg_grads = tf.reduce_mean(grads, axis=0)
59 |     integrated_grads = (input_image - baseline) * avg_grads
60 |     return integrated_grads
61 | 
62 | # Compute attributions using Integrated Gradients
63 | target_class = train_labels[0]  # The true class for the sample image
64 | attributions = compute_integrated_gradients(model, sample_image, baseline, target_class)
65 | 
66 | # Visualize the attributions
67 | attributions = attributions.numpy().squeeze()
68 | plt.imshow(attributions, cmap="hot", interpolation="nearest")
69 | plt.colorbar()
70 | plt.title("Integrated Gradients Attribution for MNIST Prediction")
71 | plt.show()
72 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/local_interpretable_model_agnostic_explanations.py:
--------------------------------------------------------------------------------
 1 | import lime
 2 | import lime.lime_text
 3 | from sklearn.pipeline import make_pipeline
 4 | from sklearn.feature_extraction.text import TfidfVectorizer
 5 | from sklearn.linear_model import LogisticRegression
 6 | from sklearn.datasets import load_files
 7 | from sklearn.model_selection import train_test_split
 8 | import numpy as np
 9 | 
10 | # Specify the path to the IMDB dataset. Ensure the dataset is downloaded and located in this path.
11 | # The IMDB dataset can be downloaded from:
12 | # https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
13 | 
14 | # 1. Load the IMDB dataset
15 | data = load_files("aclImdb/train", categories=["pos", "neg"], encoding="utf-8", decode_error="replace")
16 | X = np.array(data.data)
17 | y = data.target  # Directly use data.target without further transformation
18 | 
19 | # 2. Split the dataset
20 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
21 | 
22 | # 3. Create the text classification pipeline
23 | vectorizer = TfidfVectorizer()
24 | classifier = LogisticRegression(random_state=42, max_iter=1000)
25 | pipeline = make_pipeline(vectorizer, classifier)
26 | 
27 | # 4. Train the model
28 | pipeline.fit(X_train, y_train)
29 | 
30 | # 5. Initialize the LIME explainer
31 | explainer = lime.lime_text.LimeTextExplainer(class_names=["NEGATIVE", "POSITIVE"])
32 | 
33 | # 6. Choose an instance to explain
34 | text_instance = X_test[0]
35 | exp = explainer.explain_instance(text_instance, pipeline.predict_proba, num_features=10)
36 | 
37 | # 7. Save the explanation as an HTML file
38 | exp.save_to_file('lime_explanation.html')
39 | print("LIME explanation saved as 'lime_explanation.html'")
40 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/lrp.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | # Create XOR dataset
 7 | X = torch.Tensor([[0,0], [0,1], [1,0], [1,1]])
 8 | y = torch.Tensor([[0], [1], [1], [0]])
 9 | 
10 | # Define a simple neural network
11 | class SimpleNN(nn.Module):
12 |     def __init__(self):
13 |         super(SimpleNN, self).__init__()
14 |         self.fc1 = nn.Linear(2, 4)  # First layer with 4 neurons
15 |         self.fc2 = nn.Linear(4, 1)  # Output layer
16 | 
17 |     def forward(self, x):
18 |         x = F.relu(self.fc1(x))
19 |         x = torch.sigmoid(self.fc2(x))
20 |         return x
21 | 
22 | # Initialize the model, loss function, and optimizer
23 | model = SimpleNN()
24 | criterion = nn.BCELoss()
25 | optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
26 | 
27 | # Train the model
28 | for epoch in range(1000):
29 |     optimizer.zero_grad()
30 |     outputs = model(X)
31 |     loss = criterion(outputs, y)
32 |     loss.backward()
33 |     optimizer.step()
34 |     if (epoch + 1) % 200 == 0:
35 |         print(f'Epoch [{epoch + 1}/1000], Loss: {loss.item():.4f}')
36 | 
37 | # Select an input for explanation
38 | x_input = torch.Tensor([[1.0, 1.0]])
39 | output = model(x_input)
40 | print(f'\nPrediction for input {x_input.numpy()}: {output.item():.4f}')
41 | 
42 | # Forward pass, recording intermediate activations
43 | x0 = x_input.detach()
44 | z1 = model.fc1(x0)
45 | a1 = F.relu(z1)
46 | z2 = model.fc2(a1)
47 | a2 = torch.sigmoid(z2)
48 | 
49 | # LRP parameters
50 | epsilon = 1e-6
51 | 
52 | # Calculate relevance R2 at the output layer
53 | R2 = a2.item()  # Get scalar value
54 | 
55 | # Propagate relevance from the output layer to the hidden layer
56 | w2 = model.fc2.weight.data.squeeze()  # Shape becomes [4]
57 | a1 = a1.detach().squeeze()            # Shape becomes [4]
58 | z = a1 * w2                           # Element-wise multiplication, shape [4]
59 | s = z.sum()
60 | denominator = s + epsilon * s.sign()
61 | R1 = (z / denominator) * R2           # Shape is [4]
62 | 
63 | # Propagate relevance from the hidden layer to the input layer
64 | w1 = model.fc1.weight.data            # Shape is [4, 2]
65 | x0 = x0.detach().squeeze()            # Shape is [2]
66 | R0 = torch.zeros_like(x0)             # Shape is [2]
67 | 
68 | # Iterate over each neuron in the hidden layer
69 | for i in range(w1.shape[0]):
70 |     w = w1[i]                         # Shape is [2]
71 |     z = x0 * w                        # Element-wise multiplication, shape [2]
72 |     s = z.sum()
73 |     denominator = s + epsilon * s.sign()
74 |     R0 += (z / denominator) * R1[i].item()  # Convert R1[i] to scalar
75 | 
76 | # Output the relevance scores
77 | print(f'\nInput relevance scores: {R0}')
78 | print(f'Sum of input relevances: {R0.sum().item():.4f}')
79 | print(f'Output relevance: {R2:.4f}')
80 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/pdps.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from sklearn.ensemble import RandomForestRegressor
 4 | from sklearn.datasets import fetch_california_housing
 5 | from sklearn.inspection import PartialDependenceDisplay
 6 | 
 7 | # Load the California Housing dataset
 8 | data = fetch_california_housing()
 9 | X, y = data.data, data.target
10 | 
11 | # Train a Random Forest model
12 | model = RandomForestRegressor(n_estimators=100, random_state=42)
13 | model.fit(X, y)
14 | 
15 | # Plot Partial Dependence for the feature "MedInc" (Median Income)
16 | fig, ax = plt.subplots(figsize=(8, 6))
17 | PartialDependenceDisplay.from_estimator(model, X, [0], feature_names=data.feature_names, ax=ax)
18 | ax.set_title("Partial Dependence Plot for MedInc (Median Income)")
19 | ax.set_xlabel("MedInc (Median Income)")
20 | ax.set_ylabel("Predicted House Price")
21 | plt.show()
22 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/saliency_Maps_rnn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | # Generate synthetic time series data
 6 | np.random.seed(42)
 7 | time_steps = 100
 8 | X = np.sin(np.linspace(0, 4 * np.pi, time_steps)) + np.random.normal(0, 0.1, time_steps)
 9 | y = np.roll(X, -1)
10 | 
11 | # Reshape data to fit LSTM input
12 | X_input = X.reshape((1, time_steps, 1))
13 | y_input = y.reshape((1, time_steps, 1))
14 | 
15 | # Define LSTM model to predict the entire sequence
16 | inputs = tf.keras.Input(shape=(time_steps, 1))
17 | lstm_output = tf.keras.layers.LSTM(50, return_sequences=True)(inputs)
18 | outputs = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1))(lstm_output)
19 | model = tf.keras.Model(inputs=inputs, outputs=outputs)
20 | 
21 | # Compile and train the model
22 | model.compile(optimizer='adam', loss='mse')
23 | model.fit(X_input, y_input, epochs=10, verbose=0)
24 | 
25 | # Convert X and y to TensorFlow tensors
26 | X_tensor = tf.convert_to_tensor(X_input, dtype=tf.float32)
27 | y_tensor = tf.convert_to_tensor(y_input, dtype=tf.float32)
28 | 
29 | # Compute the saliency map
30 | with tf.GradientTape() as tape:
31 |     tape.watch(X_tensor)
32 |     predictions = model(X_tensor)
33 |     # Use MeanSquaredError loss object
34 |     loss_object = tf.keras.losses.MeanSquaredError()
35 |     loss = tf.reduce_mean(loss_object(y_tensor, predictions))
36 | 
37 | # Compute the gradients
38 | grads = tape.gradient(loss, X_tensor)
39 | grads = grads.numpy()[0, :, 0]  # Extract gradients for each time step
40 | 
41 | # Plot the original data, predicted data, and saliency map
42 | fig, ax1 = plt.subplots(figsize=(12, 6))
43 | 
44 | # Plot the original data and predicted data on the left y-axis
45 | color = 'tab:blue'
46 | ax1.set_xlabel('Time Step')
47 | ax1.set_ylabel('Data Value', color=color)
48 | ax1.plot(X_input[0, :, 0], color='blue', label='Original Data')
49 | ax1.plot(predictions.numpy()[0, :, 0], color='orange', label='Predicted Data')
50 | ax1.tick_params(axis='y', labelcolor=color)
51 | ax1.legend(loc='upper left')
52 | 
53 | # Plot the saliency map on the right y-axis
54 | ax2 = ax1.twinx()
55 | color = 'tab:red'
56 | ax2.set_ylabel('Gradient Magnitude', color=color)
57 | ax2.plot(np.abs(grads), color=color, label='Saliency Map')
58 | ax2.tick_params(axis='y', labelcolor=color)
59 | ax2.legend(loc='upper right')
60 | 
61 | plt.title('Original Data, Predicted Data, and Saliency Map')
62 | plt.show()
63 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/saliency_maps.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | # Load the MNIST dataset
 6 | (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()
 7 | train_images = train_images[..., np.newaxis] / 255.0
 8 | test_images = test_images[..., np.newaxis] / 255.0
 9 | 
10 | # Define a simple CNN model
11 | model = tf.keras.Sequential([
12 |     tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
13 |     tf.keras.layers.MaxPooling2D((2, 2)),
14 |     tf.keras.layers.Flatten(),
15 |     tf.keras.layers.Dense(128, activation='relu'),
16 |     tf.keras.layers.Dense(10, activation='softmax')
17 | ])
18 | 
19 | # Compile and train the model
20 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
21 | model.fit(train_images, train_labels, epochs=1, batch_size=64)
22 | 
23 | # Function to compute the saliency map
24 | def compute_saliency_map(model, image, target_class):
25 |     image = tf.convert_to_tensor(image[np.newaxis, ...], dtype=tf.float32)
26 | 
27 |     with tf.GradientTape() as tape:
28 |         tape.watch(image)
29 |         predictions = model(image)
30 |         loss = predictions[0, target_class]
31 | 
32 |     # Compute the gradient of the loss with respect to the input image
33 |     gradient = tape.gradient(loss, image)
34 |     saliency = tf.abs(gradient)[0]
35 | 
36 |     # Normalize the saliency map
37 |     saliency = saliency.numpy().squeeze()
38 |     saliency = (saliency - saliency.min()) / (saliency.max() - saliency.min())
39 | 
40 |     return saliency
41 | 
42 | # Select a sample image and compute the saliency map
43 | sample_image = test_images[0]
44 | target_class = np.argmax(model.predict(sample_image[np.newaxis, ...]))
45 | saliency_map = compute_saliency_map(model, sample_image, target_class)
46 | 
47 | # Visualize the original image and its saliency map
48 | plt.subplot(1, 2, 1)
49 | plt.imshow(sample_image.squeeze(), cmap='gray')
50 | plt.title("Original Image")
51 | 
52 | plt.subplot(1, 2, 2)
53 | plt.imshow(saliency_map, cmap='hot')
54 | plt.title("Saliency Map")
55 | plt.colorbar()
56 | plt.show()


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/shap_dependence.py:
--------------------------------------------------------------------------------
 1 | import shap
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from sklearn.ensemble import GradientBoostingRegressor
 5 | from sklearn.datasets import fetch_california_housing
 6 | 
 7 | # Load the California Housing dataset
 8 | data = fetch_california_housing()
 9 | X, y = data.data, data.target
10 | feature_names = data.feature_names
11 | 
12 | # Train a Gradient Boosting model
13 | model = GradientBoostingRegressor(n_estimators=100, random_state=42)
14 | model.fit(X, y)
15 | 
16 | # Compute SHAP values
17 | explainer = shap.Explainer(model, X)
18 | shap_values = explainer(X)
19 | 
20 | # Extract SHAP values for dependence plot
21 | # Using `shap_values.values` to get the actual SHAP values as an array
22 | shap.dependence_plot("AveRooms", shap_values.values, X, feature_names=feature_names)
23 | plt.title("SHAP Dependence Plot for AveRooms")
24 | plt.xlabel("Average Number of Rooms (AveRooms)")
25 | plt.ylabel("SHAP Value (Impact on Model Output)")
26 | plt.show()
27 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/shap_force_plot.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 5,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "import shap\n",
10 |     "import numpy as np\n",
11 |     "from sklearn.datasets import fetch_california_housing\n",
12 |     "from sklearn.ensemble import GradientBoostingRegressor\n",
13 |     "\n",
14 |     "# Load the California housing dataset\n",
15 |     "data = fetch_california_housing()\n",
16 |     "X, y = data.data, data.target\n",
17 |     "\n",
18 |     "# Train the model\n",
19 |     "model = GradientBoostingRegressor(n_estimators=100, random_state=42)\n",
20 |     "model.fit(X, y)\n",
21 |     "\n",
22 |     "# Create a SHAP explainer\n",
23 |     "explainer = shap.TreeExplainer(model)\n",
24 |     "shap_values = explainer.shap_values(X)\n",
25 |     "\n",
26 |     "# Select an instance to explain\n",
27 |     "instance_index = 0\n",
28 |     "\n",
29 |     "# Create the force plot\n",
30 |     "force_plot = shap.force_plot(explainer.expected_value, shap_values[instance_index], X[instance_index], feature_names=data.feature_names)\n",
31 |     "\n",
32 |     "# Save the force plot as an HTML file\n",
33 |     "shap.save_html(\"shap_force_plot.html\", force_plot)"
34 |    ]
35 |   }
36 |  ],
37 |  "metadata": {
38 |   "kernelspec": {
39 |    "display_name": ".venv",
40 |    "language": "python",
41 |    "name": "python3"
42 |   },
43 |   "language_info": {
44 |    "codemirror_mode": {
45 |     "name": "ipython",
46 |     "version": 3
47 |    },
48 |    "file_extension": ".py",
49 |    "mimetype": "text/x-python",
50 |    "name": "python",
51 |    "nbconvert_exporter": "python",
52 |    "pygments_lexer": "ipython3",
53 |    "version": "3.9.9"
54 |   }
55 |  },
56 |  "nbformat": 4,
57 |  "nbformat_minor": 2
58 | }
59 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/shap_force_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/shap_force_plot.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/shapley_additive_explanations.py:
--------------------------------------------------------------------------------
 1 | import shap
 2 | import xgboost as xgb
 3 | import pandas as pd
 4 | from sklearn.datasets import load_breast_cancer
 5 | from sklearn.model_selection import train_test_split
 6 | 
 7 | # Load the Breast Cancer dataset
 8 | data = load_breast_cancer()
 9 | X = pd.DataFrame(data.data, columns=data.feature_names)
10 | y = data.target
11 | 
12 | # Split the dataset
13 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
14 | 
15 | # Train a gradient boosting model
16 | model = xgb.XGBClassifier(random_state=42)
17 | model.fit(X_train, y_train)
18 | 
19 | # Initialize the SHAP explainer
20 | explainer = shap.Explainer(model, X_train)
21 | shap_values = explainer(X_test)
22 | 
23 | # Plot the SHAP summary plot
24 | shap.summary_plot(shap_values, X_test)


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/smoothgrad.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | # Load the MNIST dataset
 6 | (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()
 7 | train_images = train_images[..., np.newaxis] / 255.0
 8 | test_images = test_images[..., np.newaxis] / 255.0
 9 | 
10 | # Define a simple CNN model
11 | model = tf.keras.Sequential([
12 |     tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
13 |     tf.keras.layers.MaxPooling2D((2, 2)),
14 |     tf.keras.layers.Flatten(),
15 |     tf.keras.layers.Dense(128, activation='relu'),
16 |     tf.keras.layers.Dense(10, activation='softmax')
17 | ])
18 | 
19 | # Compile and train the model
20 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
21 | model.fit(train_images, train_labels, epochs=1, batch_size=64)
22 | 
23 | # Function to compute SmoothGrad
24 | def smoothgrad(image, model, target_class, num_samples=50, noise_level=0.1):
25 |     grads = []
26 |     for _ in range(num_samples):
27 |         noise = np.random.normal(0, noise_level, image.shape)
28 |         noisy_image = image + noise
29 |         noisy_image = tf.convert_to_tensor(noisy_image[np.newaxis, ...], dtype=tf.float32)
30 | 
31 |         with tf.GradientTape() as tape:
32 |             tape.watch(noisy_image)
33 |             prediction = model(noisy_image)
34 |             loss = prediction[0, target_class]
35 | 
36 |         gradient = tape.gradient(loss, noisy_image)
37 |         grads.append(gradient.numpy().squeeze())
38 | 
39 |     # Average the gradients
40 |     smooth_grad = np.mean(grads, axis=0)
41 |     return smooth_grad
42 | 
43 | # Select a sample image and compute SmoothGrad attributions
44 | sample_image = test_images[0]
45 | target_class = np.argmax(model.predict(sample_image[np.newaxis, ...]))
46 | attributions = smoothgrad(sample_image, model, target_class)
47 | 
48 | # Visualize the attributions
49 | plt.imshow(attributions, cmap='hot', interpolation='nearest')
50 | plt.colorbar()
51 | plt.title("SmoothGrad Attribution for MNIST Prediction")
52 | plt.show()


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/610_Post-hoc/timeshap.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | import shap
 4 | 
 5 | # Generate synthetic time-series data
 6 | time_series_data = np.random.rand(100, 10, 1)  # 100 samples, 10 time steps, 1 feature
 7 | labels = np.random.randint(0, 2, size=(100,))
 8 | 
 9 | # Define a simple LSTM model
10 | model = tf.keras.Sequential([
11 |     tf.keras.layers.Input(shape=(10, 1)),
12 |     tf.keras.layers.LSTM(50),
13 |     tf.keras.layers.Dense(1, activation='sigmoid')
14 | ])
15 | model.compile(optimizer='adam', loss='binary_crossentropy')
16 | model.fit(time_series_data, labels, epochs=5, verbose=0)
17 | 
18 | # Define the prediction function for SHAP
19 | def predict_fn(data):
20 |     # Reshape input data back to 3D for the LSTM model
21 |     return model.predict(data.reshape(-1, 10, 1)).flatten()
22 | 
23 | # Select a background dataset (subset of training data)
24 | background_data = time_series_data[:50].reshape(50, -1)  # Flatten to 2D
25 | 
26 | # Initialize SHAP KernelExplainer
27 | explainer = shap.KernelExplainer(predict_fn, background_data)
28 | 
29 | # Select an instance to explain and flatten it
30 | instance = time_series_data[0:1].reshape(1, -1)  # Flatten to 2D
31 | 
32 | # Compute SHAP values
33 | shap_values = explainer.shap_values(instance, nsamples=100)
34 | 
35 | # Display SHAP values
36 | print("SHAP values for each flattened feature:", shap_values)
37 | 
38 | # Visualization of SHAP values
39 | import matplotlib.pyplot as plt
40 | 
41 | # Plot SHAP values for the first instance
42 | plt.bar(range(len(shap_values[0])), shap_values[0])
43 | plt.xlabel('Flattened Feature Index')
44 | plt.ylabel('SHAP Value')
45 | plt.title('SHAP Values for Time-Series Instance')
46 | plt.show()
47 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/620_Causal-Inference/Images/Causal Discovery.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/620_Causal-Inference/Images/Causal Discovery.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/620_Causal-Inference/causal_discovery.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from causallearn.search.ConstraintBased.PC import pc
 3 | from causallearn.utils.GraphUtils import GraphUtils
 4 | import matplotlib.pyplot as plt
 5 | import networkx as nx
 6 | 
 7 | # Generate synthetic data
 8 | np.random.seed(42)
 9 | n_samples = 1000
10 | X = np.random.randn(n_samples)
11 | Y = 0.5 * X + np.random.randn(n_samples) * 0.1
12 | Z = 0.3 * X + 0.4 * Y + np.random.randn(n_samples) * 0.1
13 | data = np.column_stack((X, Y, Z))
14 | 
15 | # Define variable names
16 | variable_names = ['X', 'Y', 'Z']
17 | 
18 | # Apply PC algorithm for causal discovery
19 | causal_graph = pc(data, alpha=0.05)
20 | # Print the learned graph
21 | print(causal_graph.G)
22 | 
23 | # Build a NetworkX graph from the adjacency matrix
24 | def build_nx_graph(causal_graph, labels):
25 |     import networkx as nx
26 |     G = nx.DiGraph()
27 |     num_nodes = len(labels)
28 |     G.add_nodes_from(range(num_nodes))
29 |     # Add edges
30 |     for i in range(num_nodes):
31 |         for j in range(num_nodes):
32 |             edge_type = causal_graph.G.graph[i][j]
33 |             if edge_type == 1:  # Directed edge from i to j
34 |                 G.add_edge(i, j)
35 |             elif edge_type == -1:  # Directed edge from j to i
36 |                 G.add_edge(j, i)
37 |             elif edge_type == 2:  # Undirected edge
38 |                 G.add_edge(i, j)
39 |                 G.add_edge(j, i)
40 |     # Relabel nodes
41 |     mapping = {i: label for i, label in enumerate(labels)}
42 |     G = nx.relabel_nodes(G, mapping)
43 |     return G
44 | 
45 | # Build the graph
46 | G = build_nx_graph(causal_graph, variable_names)
47 | 
48 | # Plot the causal graph
49 | pos = nx.spring_layout(G)
50 | nx.draw(
51 |     G,
52 |     pos,
53 |     with_labels=True,
54 |     node_size=1500,
55 |     node_color='lightblue',
56 |     arrowsize=20,
57 |     font_size=12,
58 |     font_weight='bold'
59 | )
60 | plt.title('Causal Graph')
61 | plt.show()
62 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/620_Causal-Inference/causal_mediation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import statsmodels.api as sm
 4 | from statsmodels.formula.api import ols
 5 | 
 6 | # Generate synthetic data
 7 | np.random.seed(42)
 8 | n = 100
 9 | X = np.random.randn(n)
10 | M = 0.5 * X + np.random.randn(n) * 0.5  # Mediator influenced by X
11 | Y = 0.3 * X + 0.7 * M + np.random.randn(n) * 0.5  # Outcome influenced by X and M
12 | 
13 | # Create a DataFrame
14 | data = pd.DataFrame({'X': X, 'M': M, 'Y': Y})
15 | 
16 | # Step 1: Fit the mediator model (M ~ X)
17 | mediator_model = ols('M ~ X', data=data).fit()
18 | 
19 | # Step 2: Fit the outcome model (Y ~ X + M)
20 | outcome_model = ols('Y ~ X + M', data=data).fit()
21 | 
22 | # Extract coefficients
23 | alpha_1 = mediator_model.params['X']
24 | beta_1 = outcome_model.params['X']
25 | beta_2 = outcome_model.params['M']
26 | 
27 | # Calculate direct, indirect, and total effects
28 | indirect_effect = alpha_1 * beta_2
29 | direct_effect = beta_1
30 | total_effect = direct_effect + indirect_effect
31 | proportion_mediated = indirect_effect / total_effect
32 | 
33 | print(f"Indirect Effect (IE): {indirect_effect:.4f}")
34 | print(f"Direct Effect (DE): {direct_effect:.4f}")
35 | print(f"Total Effect (TE): {total_effect:.4f}")
36 | print(f"Proportion Mediated: {proportion_mediated:.2%}")


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/620_Causal-Inference/irm.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from tensorflow.keras.layers import Dense
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | # Generate synthetic data for two environments
 7 | def generate_data(n, env_factor):
 8 |     X = np.random.randn(n, 2)
 9 |     Y = (X[:, 0] * X[:, 1] > 0).astype(int)  # Basic correlation
10 |     Y_spurious = Y.copy()
11 |     flip_idx = np.random.rand(n) < env_factor  # Add spurious correlation
12 |     Y_spurious[flip_idx] = 1 - Y_spurious[flip_idx]
13 |     return X, Y_spurious
14 | 
15 | # Environment 1 with spurious correlation factor of 0.2
16 | X_env1, Y_env1 = generate_data(1000, env_factor=0.2)
17 | # Environment 2 with spurious correlation factor of 0.8
18 | X_env2, Y_env2 = generate_data(1000, env_factor=0.8)
19 | 
20 | # Reshape labels to shape (n, 1)
21 | Y_env1 = Y_env1.reshape(-1, 1)
22 | Y_env2 = Y_env2.reshape(-1, 1)
23 | 
24 | # Define the IRM model
25 | class IRMModel(tf.keras.Model):
26 |     def __init__(self):
27 |         super(IRMModel, self).__init__()
28 |         self.feature_extractor = Dense(10, activation='relu')
29 |         self.classifier = Dense(1, activation='sigmoid')
30 | 
31 |     def call(self, inputs):
32 |         features = self.feature_extractor(inputs)
33 |         output = self.classifier(features)
34 |         return output, features
35 | 
36 | # Define IRM penalty function, using GradientTape to compute gradients
37 | def irm_penalty(loss, features, tape):
38 |     grad = tape.gradient(loss, features)
39 |     penalty = tf.reduce_mean(tf.square(grad))
40 |     return penalty
41 | 
42 | # Compile the model
43 | model = IRMModel()
44 | optimizer = tf.keras.optimizers.Adam()
45 | 
46 | # Training loop
47 | for epoch in range(1000):
48 |     with tf.GradientTape(persistent=True) as tape:
49 |         # Process Environment 1
50 |         pred_env1, features_env1 = model(X_env1)
51 |         loss_env1 = tf.keras.losses.binary_crossentropy(Y_env1, pred_env1)
52 |         loss_env1_mean = tf.reduce_mean(loss_env1)
53 |         penalty_env1 = irm_penalty(loss_env1_mean, features_env1, tape)
54 | 
55 |         # Process Environment 2
56 |         pred_env2, features_env2 = model(X_env2)
57 |         loss_env2 = tf.keras.losses.binary_crossentropy(Y_env2, pred_env2)
58 |         loss_env2_mean = tf.reduce_mean(loss_env2)
59 |         penalty_env2 = irm_penalty(loss_env2_mean, features_env2, tape)
60 | 
61 |         # Total loss
62 |         total_loss = loss_env1_mean + loss_env2_mean + 1.0 * (penalty_env1 + penalty_env2)
63 | 
64 |     # Compute gradients and update parameters
65 |     gradients = tape.gradient(total_loss, model.trainable_variables)
66 |     optimizer.apply_gradients(zip(gradients, model.trainable_variables))
67 |     del tape  # Free resources
68 | 
69 |     # Print loss every 100 epochs
70 |     if epoch % 100 == 0:
71 |         print(f"Epoch {epoch}, Loss: {total_loss.numpy()}")
72 | 
73 | print("Training complete.")
74 | 
75 | # Evaluate the model on different environments
76 | def evaluate_model(X, Y):
77 |     pred, _ = model(X)
78 |     pred_labels = (pred.numpy() > 0.5).astype(int)
79 |     accuracy = np.mean(pred_labels == Y)
80 |     return accuracy
81 | 
82 | acc_env1 = evaluate_model(X_env1, Y_env1)
83 | acc_env2 = evaluate_model(X_env2, Y_env2)
84 | print(f"Accuracy in Environment 1: {acc_env1 * 100:.2f}%")
85 | print(f"Accuracy in Environment 2: {acc_env2 * 100:.2f}%")
86 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/620_Causal-Inference/scm.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from causality.inference.search import ICAlgorithm
 4 | from causality.estimation.adjustments import AdjustForDirectCauses
 5 | from causality.estimation.nonparametric import CausalEffect
 6 | 
 7 | # Define the structural equations
 8 | np.random.seed(42)
 9 | n_samples = 1000
10 | X = np.random.normal(size=n_samples)
11 | U_M = np.random.normal(size=n_samples)
12 | M = 0.5 * X + U_M
13 | U_Y = np.random.normal(size=n_samples)
14 | Y = 0.3 * M + 0.2 * X + U_Y
15 | 
16 | # Create a DataFrame
17 | data = pd.DataFrame({'X': X, 'M': M, 'Y': Y})
18 | 
19 | # Estimate the causal effect of X on Y using SCM
20 | adjuster = AdjustForDirectCauses()
21 | causal_effect_estimator = CausalEffect()
22 | effect = causal_effect_estimator.estimate(data, 'X', 'Y', adjust_for=['M'])
23 | 
24 | print(f"Estimated causal effect of X on Y: {effect:.4f}")


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/630_Counterfactual/Images/(GAN)-based Counterfactuals.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/630_Counterfactual/Images/(GAN)-based Counterfactuals.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/630_Counterfactual/Images/Diverse Counterfactual.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/630_Counterfactual/Images/Diverse Counterfactual.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/630_Counterfactual/Images/Optimization-based Counterfactuals.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/630_Counterfactual/Images/Optimization-based Counterfactuals.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/630_Counterfactual/actionable_recourse_method.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.optimize import minimize
 3 | 
 4 | # Define the prediction function of the model (simplified example)
 5 | def predict(model, x):
 6 |     # Remove reshaping to ensure x remains a 1D array
 7 |     return model.predict(x)
 8 | 
 9 | # Action cost function: Assign higher costs to harder-to-change features
10 | def action_cost(x, x_prime, weights):
11 |     return np.sum(weights * np.abs(x - x_prime))
12 | 
13 | # Objective function for actionable recourse
14 | def objective_function(x_prime, x, model, target_class, lambda_param, weights):
15 |     distance = np.linalg.norm(x - x_prime)
16 |     action_cost_value = action_cost(x, x_prime, weights)
17 |     prediction_loss = 0 if predict(model, x_prime) == target_class else 1
18 |     return distance + lambda_param * action_cost_value + prediction_loss
19 | 
20 | # Generate actionable recourse
21 | def generate_actionable_recourse(model, x, target_class, weights, lambda_param=0.1):
22 |     # Initialize the counterfactual with the original input
23 |     x_prime = np.copy(x)
24 | 
25 |     # Optimize to find actionable recourse
26 |     result = minimize(
27 |         objective_function,
28 |         x_prime,
29 |         args=(x, model, target_class, lambda_param, weights),
30 |         method='L-BFGS-B'
31 |     )
32 | 
33 |     return result.x
34 | 
35 | # Example usage
36 | x = np.array([30, 50000, 0.4])  # Example input features: age, income, debt-to-income ratio
37 | weights = np.array([0.0, 0.5, 1.0])  # Higher cost for changing age, lower for financial habits
38 | target_class = 1  # Desired outcome: Loan approval
39 | 
40 | # Assume we have a pre-trained model (pseudo-model for illustration)
41 | class SimpleModel:
42 |     def predict(self, x):
43 |         # x is expected to be a 1D array
44 |         return int(x[1] > 40000 and x[2] < 0.5)  # Simplified decision rule
45 | 
46 | model = SimpleModel()
47 | 
48 | # Generate actionable recourse
49 | counterfactual = generate_actionable_recourse(model, x, target_class, weights)
50 | 
51 | print("Original input:", x)
52 | print("Actionable recourse:", counterfactual)
53 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/630_Counterfactual/counterfactuals_RL.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | # Define a simple Deep Q-Network (DQN)
 5 | class DQN(tf.keras.Model):
 6 |     def __init__(self, state_dim, action_dim):
 7 |         super(DQN, self).__init__()
 8 |         self.dense1 = tf.keras.layers.Dense(24, activation='relu')
 9 |         self.dense2 = tf.keras.layers.Dense(24, activation='relu')
10 |         self.q_values = tf.keras.layers.Dense(action_dim, activation=None)
11 | 
12 |     def call(self, state):
13 |         x = self.dense1(state)
14 |         x = self.dense2(x)
15 |         return self.q_values(x)
16 | 
17 | # Initialize the environment, DQN model, and sample state
18 | state_dim = 4
19 | action_dim = 2
20 | model = DQN(state_dim, action_dim)
21 | sample_state = np.random.rand(1, state_dim).astype(np.float32)
22 | 
23 | # Predict Q-values for the current state
24 | q_values = model(sample_state).numpy().squeeze()
25 | 
26 | # Define the counterfactual analysis function
27 | def counterfactual_analysis(model, state, actual_action):
28 |     q_values = model(state).numpy().squeeze()
29 |     counterfactual_actions = [a for a in range(len(q_values)) if a != actual_action]
30 | 
31 |     counterfactual_results = {}
32 |     for action in counterfactual_actions:
33 |         counterfactual_q_value = q_values[action]
34 |         counterfactual_results[action] = counterfactual_q_value
35 | 
36 |     return counterfactual_results
37 | 
38 | # Assume the agent took action 0, analyze the counterfactual for action 1
39 | actual_action = 0
40 | counterfactuals = counterfactual_analysis(model, sample_state, actual_action)
41 | 
42 | print("Q-values for the current state:", q_values)
43 | print("Counterfactual Q-values for alternative actions:", counterfactuals)


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/630_Counterfactual/counterfactuals_structured_data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.neural_network import MLPClassifier
 3 | from scipy.optimize import minimize
 4 | 
 5 | # Synthetic dataset: Features are age, income, credit score, and debt-to-income ratio
 6 | X = np.array([[25, 40000, 650, 0.3], [45, 80000, 720, 0.2], [35, 60000, 690, 0.25], [50, 120000, 750, 0.15]])
 7 | y = np.array([0, 1, 0, 1])  # 0 = Loan Denied, 1 = Loan Approved
 8 | 
 9 | # Train a neural network classifier
10 | model = MLPClassifier(hidden_layer_sizes=(10,), max_iter=1000)
11 | model.fit(X, y)
12 | 
13 | # Define the objective function for generating counterfactuals
14 | def objective_function(x_prime, x, model, target_class, lambda_param):
15 |     distance = np.linalg.norm(x - x_prime)
16 |     prediction = model.predict([x_prime])[0]
17 |     classification_loss = 0 if prediction == target_class else 1
18 |     return distance + lambda_param * classification_loss
19 | 
20 | # Generate a counterfactual explanation
21 | def generate_counterfactual(model, x, target_class, lambda_param=0.1):
22 |     x_prime = np.copy(x)
23 |     result = minimize(
24 |         objective_function,
25 |         x_prime,
26 |         args=(x, model, target_class, lambda_param),
27 |         method='L-BFGS-B'
28 |     )
29 |     return result.x
30 | 
31 | # Example input: Applicant profile [age, income, credit score, debt-to-income ratio]
32 | x = np.array([30, 50000, 670, 0.28])
33 | target_class = 1  # Desired outcome: Loan approval
34 | 
35 | # Generate the counterfactual example
36 | counterfactual = generate_counterfactual(model, x, target_class)
37 | 
38 | print("Original input:", x)
39 | print("Counterfactual example:", counterfactual)
40 | print("Original prediction:", model.predict([x])[0])
41 | print("Counterfactual prediction:", model.predict([counterfactual])[0])


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/630_Counterfactual/counterfactuals_time_series_data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from scipy.optimize import minimize
 4 | 
 5 | # Define a simple LSTM model for time series prediction
 6 | model = tf.keras.Sequential([
 7 |     tf.keras.layers.LSTM(50, activation='relu', input_shape=(10, 1)),
 8 |     tf.keras.layers.Dense(1)
 9 | ])
10 | 
11 | # Generate a synthetic time series dataset
12 | np.random.seed(0)
13 | X = np.random.rand(100, 10, 1)  # 100 sequences of length 10
14 | y = (X.mean(axis=1) > 0.5).astype(int)  # Binary target based on mean value
15 | 
16 | # Train the model
17 | model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
18 | model.fit(X, y, epochs=10, batch_size=16)
19 | 
20 | # Define the objective function for generating counterfactuals
21 | def objective_function(x_prime, x, model, target_class, lambda_param):
22 |     x_prime = x_prime.reshape(1, -1, 1)
23 |     distance = np.linalg.norm(x - x_prime)
24 |     smoothness = np.sum(np.abs(np.diff(x_prime.squeeze())))
25 |     prediction = model.predict(x_prime)[0][0]
26 |     classification_loss = 0 if (prediction > 0.5) == target_class else 1
27 |     return distance + lambda_param * smoothness + 10 * classification_loss
28 | 
29 | # Generate a counterfactual for a sample sequence
30 | def generate_counterfactual(model, x, target_class, lambda_param=0.1):
31 |     x_prime = np.copy(x)
32 |     result = minimize(
33 |         objective_function,
34 |         x_prime.flatten(),
35 |         args=(x, model, target_class, lambda_param),
36 |         method='L-BFGS-B'
37 |     )
38 |     return result.x.reshape(-1, 1)
39 | 
40 | # Example input sequence
41 | x_sample = X[0]
42 | target_class = 1  # Desired outcome: Change prediction to class 1
43 | 
44 | # Generate the counterfactual sequence
45 | counterfactual = generate_counterfactual(model, x_sample, target_class)
46 | 
47 | print("Original sequence:", x_sample.flatten())
48 | print("Counterfactual sequence:", counterfactual.flatten())
49 | print("Original prediction:", model.predict(x_sample.reshape(1, -1, 1))[0][0])
50 | print("Counterfactual prediction:", model.predict(counterfactual.reshape(1, -1, 1))[0][0])


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/630_Counterfactual/diverse_counterfactual.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | # Load and preprocess the MNIST dataset
 6 | (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()
 7 | train_images = train_images[..., np.newaxis] / 255.0
 8 | test_images = test_images[..., np.newaxis] / 255.0
 9 | 
10 | # Define and train the classifier model (if not already trained)
11 | inputs = tf.keras.Input(shape=(28, 28, 1))
12 | x = tf.keras.layers.Conv2D(32, (3, 3), activation='relu')(inputs)
13 | x = tf.keras.layers.MaxPooling2D((2, 2))(x)
14 | x = tf.keras.layers.Flatten()(x)
15 | x = tf.keras.layers.Dense(128, activation='relu')(x)
16 | outputs = tf.keras.layers.Dense(10, activation='softmax')(x)
17 | model = tf.keras.Model(inputs=inputs, outputs=outputs)
18 | 
19 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
20 | model.fit(train_images, train_labels, epochs=5, batch_size=64, validation_data=(test_images, test_labels))
21 | 
22 | # Save the trained model
23 | model.save('mnist_classifier.h5')
24 | 
25 | # Load the model (optional if already in memory)
26 | model = tf.keras.models.load_model('mnist_classifier.h5')
27 | 
28 | # Function to generate diverse counterfactuals
29 | def generate_diverse_counterfactuals(model, image, target_class, num_counterfactuals=3, num_steps=100, learning_rate=0.01, lambda_1=0.1, lambda_2=0.05):
30 |     counterfactuals = []
31 | 
32 |     for _ in range(num_counterfactuals):
33 |         # Initialize the counterfactual image as a copy of the original
34 |         counterfactual = tf.Variable(image, dtype=tf.float32)
35 |         optimizer = tf.optimizers.Adam(learning_rate)
36 | 
37 |         for step in range(num_steps):
38 |             with tf.GradientTape() as tape:
39 |                 # Compute the prediction loss
40 |                 prediction = model(counterfactual)
41 |                 target_class_tensor = tf.convert_to_tensor([target_class], dtype=tf.int32)
42 |                 classification_loss = tf.keras.losses.sparse_categorical_crossentropy(target_class_tensor, prediction)
43 | 
44 |                 # Compute the similarity loss
45 |                 distance_loss = tf.reduce_mean(tf.abs(counterfactual - image))
46 | 
47 |                 # Compute the diversity loss (based on difference from previous counterfactuals)
48 |                 diversity_loss = 0
49 |                 if counterfactuals:
50 |                     for prev_cf in counterfactuals:
51 |                         diversity_loss += tf.reduce_mean(tf.abs(counterfactual - prev_cf))
52 |                     diversity_loss /= len(counterfactuals)  # Normalize by the number of counterfactuals
53 | 
54 |                 # Total loss function
55 |                 total_loss = distance_loss + lambda_1 * classification_loss + lambda_2 * diversity_loss
56 | 
57 |             # Update the counterfactual image
58 |             gradients = tape.gradient(total_loss, counterfactual)
59 |             optimizer.apply_gradients([(gradients, counterfactual)])
60 |             counterfactual.assign(tf.clip_by_value(counterfactual, 0.0, 1.0))
61 | 
62 |         # Add the optimized counterfactual to the list
63 |         counterfactuals.append(counterfactual.numpy())
64 | 
65 |     return counterfactuals
66 | 
67 | # Select a sample image and generate counterfactuals
68 | sample_image = test_images[0:1]
69 | original_prediction = model.predict(sample_image)
70 | original_label = np.argmax(original_prediction, axis=1)[0]
71 | target_label = (original_label + 1) % 10  # Set the desired target class
72 | 
73 | print(f"Original label: {original_label}, Target label: {target_label}")
74 | 
75 | # Generate diverse counterfactuals
76 | counterfactuals = generate_diverse_counterfactuals(model, sample_image, target_label)
77 | 
78 | # Display the generated counterfactuals
79 | plt.figure(figsize=(12, 4))
80 | for i, cf_image in enumerate(counterfactuals):
81 |     plt.subplot(1, len(counterfactuals), i + 1)
82 |     plt.imshow(cf_image.squeeze(), cmap='gray')
83 |     plt.title(f"Counterfactual {i+1}")
84 |     plt.axis('off')
85 | plt.show()
86 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/630_Counterfactual/gan_based_counterfactuals.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | from tensorflow.keras.losses import SparseCategoricalCrossentropy
  5 | from scipy.optimize import minimize
  6 | 
  7 | # Load and preprocess the MNIST dataset
  8 | (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()
  9 | train_images = (train_images / 255.0).astype(np.float32)
 10 | test_images = (test_images / 255.0).astype(np.float32)
 11 | train_images = train_images[..., np.newaxis]
 12 | test_images = test_images[..., np.newaxis]
 13 | 
 14 | # Define a simple classifier model
 15 | def create_classifier():
 16 |     inputs = tf.keras.Input(shape=(28, 28, 1))
 17 |     x = tf.keras.layers.Conv2D(32, (3, 3), activation='relu')(inputs)
 18 |     x = tf.keras.layers.Flatten()(x)
 19 |     x = tf.keras.layers.Dense(128, activation='relu')(x)
 20 |     outputs = tf.keras.layers.Dense(10)(x)  # Logits output
 21 |     model = tf.keras.Model(inputs=inputs, outputs=outputs)
 22 |     return model
 23 | 
 24 | classifier = create_classifier()
 25 | classifier.compile(
 26 |     optimizer='adam',
 27 |     loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
 28 |     metrics=['accuracy']
 29 | )
 30 | 
 31 | # Train the classifier
 32 | classifier.fit(
 33 |     train_images,
 34 |     train_labels,
 35 |     epochs=5,
 36 |     validation_data=(test_images, test_labels),
 37 |     batch_size=128
 38 | )
 39 | 
 40 | # Define a simple generator model
 41 | def create_generator(latent_dim):
 42 |     inputs = tf.keras.Input(shape=(latent_dim,))
 43 |     x = tf.keras.layers.Dense(7 * 7 * 128, activation='relu')(inputs)
 44 |     x = tf.keras.layers.Reshape((7, 7, 128))(x)
 45 |     x = tf.keras.layers.UpSampling2D()(x)
 46 |     x = tf.keras.layers.Conv2D(64, kernel_size=3, padding='same', activation='relu')(x)
 47 |     x = tf.keras.layers.UpSampling2D()(x)
 48 |     outputs = tf.keras.layers.Conv2D(1, kernel_size=3, padding='same', activation='sigmoid')(x)
 49 |     model = tf.keras.Model(inputs=inputs, outputs=outputs)
 50 |     return model
 51 | 
 52 | latent_dim = 100
 53 | generator = create_generator(latent_dim)
 54 | 
 55 | # Note: In practice, you should train the generator as part of a GAN.
 56 | # For this example, we'll assume the generator is already trained.
 57 | 
 58 | # Select a sample image from the test set
 59 | sample_image = test_images[0:1]
 60 | original_prediction = classifier.predict(sample_image)
 61 | original_label = np.argmax(original_prediction, axis=1)[0]
 62 | 
 63 | # Define the target class (different from the original prediction)
 64 | target_label = (original_label + 1) % 10
 65 | 
 66 | # Define a loss function
 67 | loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
 68 | 
 69 | # Define a function to generate counterfactuals
 70 | def generate_counterfactual(z):
 71 |     z = tf.convert_to_tensor(z.reshape(1, -1), dtype=tf.float32)
 72 |     with tf.GradientTape():
 73 |         generated_image = generator(z)
 74 |         prediction = classifier(generated_image)
 75 |         # Convert target_label to tensor
 76 |         target_label_tensor = tf.convert_to_tensor([target_label], dtype=tf.int32)
 77 |         similarity_loss = tf.norm(generated_image - sample_image)
 78 |         classification_loss = loss_fn(target_label_tensor, prediction)
 79 |         total_loss = similarity_loss + 0.1 * classification_loss
 80 |     return total_loss.numpy().astype(np.float64)
 81 | 
 82 | # Initialize the latent vector and optimize it
 83 | z_initial = np.random.normal(size=(latent_dim,))
 84 | result = minimize(
 85 |     generate_counterfactual,
 86 |     z_initial,
 87 |     method='L-BFGS-B',
 88 |     options={'maxiter': 100}
 89 | )
 90 | 
 91 | # Generate the counterfactual image
 92 | z_optimized = result.x
 93 | counterfactual_image = generator.predict(z_optimized.reshape(1, -1))
 94 | 
 95 | # Display the original and counterfactual images
 96 | plt.figure(figsize=(8, 4))
 97 | 
 98 | plt.subplot(1, 2, 1)
 99 | plt.imshow(sample_image.squeeze(), cmap='gray')
100 | plt.title(f"Original: {original_label}")
101 | plt.axis('off')
102 | 
103 | plt.subplot(1, 2, 2)
104 | # Removed .numpy() since counterfactual_image is already a NumPy array
105 | plt.imshow(counterfactual_image.squeeze(), cmap='gray')
106 | plt.title(f"Counterfactual: {target_label}")
107 | plt.axis('off')
108 | 
109 | plt.show()
110 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/630_Counterfactual/minimal_change_counterfactuals.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.linear_model import LogisticRegression
 3 | from scipy.optimize import minimize
 4 | 
 5 | # Create a synthetic dataset
 6 | X = np.array([[0.1, 0.5], [0.4, 0.8], [0.5, 0.3], [0.9, 0.6], [0.7, 0.9]])
 7 | y = np.array([0, 0, 0, 1, 1])
 8 | 
 9 | # Train a logistic regression classifier
10 | model = LogisticRegression()
11 | model.fit(X, y)
12 | 
13 | # Define the objective function for minimal change counterfactual
14 | def objective_function(x_prime, x, model, target_class):
15 |     distance = np.linalg.norm(x - x_prime)
16 |     prediction = model.predict([x_prime])[0]
17 |     classification_loss = 0 if prediction == target_class else 1
18 |     return distance + 10 * classification_loss  # Penalize if prediction does not match target class
19 | 
20 | # Generate a minimal change counterfactual
21 | def generate_minimal_counterfactual(model, x, target_class):
22 |     x_prime = np.copy(x)
23 |     result = minimize(
24 |         objective_function,
25 |         x_prime,
26 |         args=(x, model, target_class),
27 |         method='L-BFGS-B'
28 |     )
29 |     return result.x
30 | 
31 | # Example input and target class
32 | x = np.array([0.3, 0.7])
33 | target_class = 1  # Desired outcome different from the model's original prediction
34 | 
35 | # Generate the counterfactual
36 | counterfactual = generate_minimal_counterfactual(model, x, target_class)
37 | 
38 | print("Original input:", x)
39 | print("Minimal change counterfactual:", counterfactual)
40 | print("Original prediction:", model.predict([x])[0])
41 | print("Counterfactual prediction:", model.predict([counterfactual])[0])


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/630_Counterfactual/neighbor_counterfactuals.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from sklearn.datasets import load_iris
 4 | from sklearn.neighbors import KNeighborsClassifier
 5 | from sklearn.preprocessing import StandardScaler
 6 | from sklearn.metrics import pairwise_distances_argmin_min
 7 | 
 8 | # Load the Iris dataset
 9 | data = load_iris()
10 | X = pd.DataFrame(data.data, columns=data.feature_names)
11 | y = pd.Series(data.target)
12 | 
13 | # Standardize the features
14 | scaler = StandardScaler()
15 | X_scaled = scaler.fit_transform(X)
16 | 
17 | # Train a k-NN classifier
18 | knn = KNeighborsClassifier(n_neighbors=5)
19 | knn.fit(X_scaled, y)
20 | 
21 | # Function to find nearest neighbor counterfactual
22 | def find_counterfactual(instance, model, X, y):
23 |     original_class = model.predict([instance])[0]
24 |     # Find the nearest neighbor from a different class
25 |     mask = y != original_class
26 |     candidates = X[mask]
27 |     indices, distances = pairwise_distances_argmin_min([instance], candidates)
28 |     counterfactual = candidates[indices[0]]
29 |     return counterfactual
30 | 
31 | # Select a sample instance and find its counterfactual
32 | sample_index = 0
33 | sample_instance = X_scaled[sample_index]
34 | counterfactual = find_counterfactual(sample_instance, knn, X_scaled, y)
35 | 
36 | print("Original instance:", sample_instance)
37 | print("Counterfactual instance:", counterfactual)


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/630_Counterfactual/optimization_based_counterfactuals.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | # Load the MNIST dataset
 6 | (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()
 7 | train_images = train_images[..., np.newaxis] / 255.0
 8 | test_images = test_images[..., np.newaxis] / 255.0
 9 | 
10 | # Define a simple neural network model using Functional API to avoid warnings
11 | inputs = tf.keras.Input(shape=(28, 28, 1))
12 | x = tf.keras.layers.Conv2D(32, (3, 3), activation='relu')(inputs)
13 | x = tf.keras.layers.MaxPooling2D((2, 2))(x)
14 | x = tf.keras.layers.Flatten()(x)
15 | x = tf.keras.layers.Dense(128, activation='relu')(x)
16 | outputs = tf.keras.layers.Dense(10, activation='softmax')(x)
17 | model = tf.keras.Model(inputs=inputs, outputs=outputs)
18 | 
19 | # Compile and train the model
20 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
21 | model.fit(train_images, train_labels, epochs=1, batch_size=64)
22 | 
23 | # Function to generate an optimization-based counterfactual
24 | def generate_counterfactual(model, image, target_class, num_steps=100, learning_rate=0.01, lambda_param=0.1):
25 |     # Create a variable for the counterfactual image
26 |     counterfactual = tf.Variable(image, dtype=tf.float32)
27 | 
28 |     # Define the optimizer
29 |     optimizer = tf.optimizers.Adam(learning_rate)
30 | 
31 |     for step in range(num_steps):
32 |         with tf.GradientTape() as tape:
33 |             # Compute the loss: distance loss + prediction loss
34 |             distance_loss = tf.reduce_mean(tf.abs(counterfactual - image))
35 |             prediction = model(counterfactual)
36 |             # Convert target_class to tensor
37 |             target_class_tensor = tf.convert_to_tensor([target_class], dtype=tf.int32)
38 |             classification_loss = tf.keras.losses.sparse_categorical_crossentropy(target_class_tensor, prediction)
39 |             total_loss = distance_loss + lambda_param * classification_loss
40 | 
41 |         # Compute gradients and update the counterfactual image
42 |         gradients = tape.gradient(total_loss, counterfactual)
43 |         optimizer.apply_gradients([(gradients, counterfactual)])
44 | 
45 |         # Clip the pixel values to maintain valid image range
46 |         counterfactual.assign(tf.clip_by_value(counterfactual, 0.0, 1.0))
47 | 
48 |     return counterfactual.numpy()
49 | 
50 | # Select a sample image and generate a counterfactual
51 | sample_image = test_images[0:1]
52 | original_prediction = model.predict(sample_image)
53 | original_label = np.argmax(original_prediction, axis=1)[0]
54 | target_label = (original_label + 1) % 10
55 | 
56 | counterfactual_image = generate_counterfactual(model, sample_image, target_label)
57 | 
58 | # Display the original and counterfactual images
59 | plt.figure(figsize=(8, 4))
60 | 
61 | plt.subplot(1, 2, 1)
62 | plt.imshow(sample_image.squeeze(), cmap='gray')
63 | plt.title(f"Original: {original_label}")
64 | plt.axis('off')
65 | 
66 | plt.subplot(1, 2, 2)
67 | plt.imshow(counterfactual_image.squeeze(), cmap='gray')
68 | plt.title(f"Counterfactual: {target_label}")
69 | plt.axis('off')
70 | 
71 | plt.show()
72 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/630_Counterfactual/prototype_based_counterfactuals.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from sklearn.datasets import load_iris
 4 | from sklearn.neighbors import KNeighborsClassifier
 5 | from sklearn.preprocessing import StandardScaler
 6 | from sklearn.metrics import pairwise_distances_argmin_min
 7 | 
 8 | # Load the Iris dataset
 9 | data = load_iris()
10 | X = pd.DataFrame(data.data, columns=data.feature_names)
11 | y = pd.Series(data.target)
12 | 
13 | # Standardize the features
14 | scaler = StandardScaler()
15 | X_scaled = scaler.fit_transform(X)
16 | 
17 | # Train a k-NN classifier
18 | knn = KNeighborsClassifier(n_neighbors=5)
19 | knn.fit(X_scaled, y)
20 | 
21 | # Function to find prototype-based counterfactual
22 | def find_prototype_counterfactual(instance, model, X, y):
23 |     original_class = model.predict([instance])[0]
24 |     # Find the nearest prototype from a different class
25 |     mask = y != original_class
26 |     prototypes = X[mask]
27 |     indices, distances = pairwise_distances_argmin_min([instance], prototypes)
28 |     counterfactual = prototypes[indices[0]]
29 |     return counterfactual
30 | 
31 | # Select a sample instance and find its counterfactual
32 | sample_index = 0
33 | sample_instance = X_scaled[sample_index]
34 | counterfactual = find_prototype_counterfactual(sample_instance, knn, X_scaled, y)
35 | 
36 | print("Original instance:", sample_instance)
37 | print("Counterfactual prototype:", counterfactual)


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/640_Graph-based/Images/feature_importance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/640_Graph-based/Images/feature_importance.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/640_Graph-based/Images/subgraph.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/640_Graph-based/Images/subgraph.pdf


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/640_Graph-based/gnn_explainer.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | import torch
 4 | import torch.nn.functional as F
 5 | 
 6 | from torch_geometric.datasets import Planetoid
 7 | from torch_geometric.explain import Explainer, GNNExplainer
 8 | from torch_geometric.nn import GCNConv
 9 | 
10 | dataset = 'Cora'
11 | path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'Planetoid')
12 | dataset = Planetoid(path, dataset)
13 | data = dataset[0]
14 | 
15 | 
16 | class GCN(torch.nn.Module):
17 |     def __init__(self):
18 |         super().__init__()
19 |         self.conv1 = GCNConv(dataset.num_features, 16)
20 |         self.conv2 = GCNConv(16, dataset.num_classes)
21 | 
22 |     def forward(self, x, edge_index):
23 |         x = F.relu(self.conv1(x, edge_index))
24 |         x = F.dropout(x, training=self.training)
25 |         x = self.conv2(x, edge_index)
26 |         return F.log_softmax(x, dim=1)
27 | 
28 | 
29 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
30 | model = GCN().to(device)
31 | data = data.to(device)
32 | optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
33 | 
34 | for epoch in range(1, 201):
35 |     model.train()
36 |     optimizer.zero_grad()
37 |     out = model(data.x, data.edge_index)
38 |     loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
39 |     loss.backward()
40 |     optimizer.step()
41 | 
42 | explainer = Explainer(
43 |     model=model,
44 |     algorithm=GNNExplainer(epochs=200),
45 |     explanation_type='model',
46 |     node_mask_type='attributes',
47 |     edge_mask_type='object',
48 |     model_config=dict(
49 |         mode='multiclass_classification',
50 |         task_level='node',
51 |         return_type='log_probs',
52 |     ),
53 | )
54 | node_index = 10
55 | explanation = explainer(data.x, data.edge_index, index=node_index)
56 | print(f'Generated explanations in {explanation.available_explanations}')
57 | 
58 | path = 'feature_importance.png'
59 | explanation.visualize_feature_importance(path, top_k=10)
60 | print(f"Feature importance plot has been saved to '{path}'")
61 | 
62 | path = 'subgraph.pdf'
63 | explanation.visualize_graph(path)
64 | print(f"Subgraph visualization plot has been saved to '{path}'")


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/640_Graph-based/node_importance_attribution.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | from torch_geometric.nn import GCNConv
 4 | from torch_geometric.datasets import Planetoid
 5 | 
 6 | # Load the Cora dataset
 7 | dataset = Planetoid(root='/tmp/Cora', name='Cora')
 8 | data = dataset[0]
 9 | 
10 | # Define a simple GCN model
11 | class GCN(torch.nn.Module):
12 |     def __init__(self):
13 |         super(GCN, self).__init__()
14 |         self.conv1 = GCNConv(dataset.num_node_features, 16)
15 |         self.conv2 = GCNConv(16, dataset.num_classes)
16 | 
17 |     def forward(self, x, edge_index):
18 |         x = F.relu(self.conv1(x, edge_index))
19 |         x = self.conv2(x, edge_index)
20 |         return F.log_softmax(x, dim=1)
21 | 
22 | model = GCN()
23 | optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
24 | criterion = torch.nn.CrossEntropyLoss()
25 | 
26 | # Train the GCN model
27 | model.train()
28 | for epoch in range(200):
29 |     optimizer.zero_grad()
30 |     out = model(data.x, data.edge_index)
31 |     loss = criterion(out[data.train_mask], data.y[data.train_mask])
32 |     loss.backward()
33 |     optimizer.step()
34 | 
35 | # Calculate node importance
36 | model.eval()
37 | data.x.requires_grad = True  # Enable gradient calculation for node features
38 | 
39 | target_node = 10
40 | output = model(data.x, data.edge_index)
41 | 
42 | # Perform backward pass for the predicted class of the target node
43 | predicted_class = output[target_node].argmax()
44 | output[target_node, predicted_class].backward()
45 | 
46 | # Calculate the L2 norm of the gradient for the target node's features as the importance score
47 | node_importance = torch.norm(data.x.grad[target_node], p=2).item()
48 | print(f"Importance score for node {target_node}: {node_importance:.4f}")
49 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/650_Multimodal/Images/Cross-modal Attention.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/650_Multimodal/Images/Cross-modal Attention.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/650_Multimodal/Images/Joint Feature Attribution for Multimodal Models.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/650_Multimodal/Images/Joint Feature Attribution for Multimodal Models.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/650_Multimodal/Images/model_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/650_Multimodal/Images/model_plot.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/650_Multimodal/Images/model_summary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/650_Multimodal/Images/model_summary.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/650_Multimodal/cross_modal_attention.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.keras.layers import Dense, MultiHeadAttention, Concatenate, GlobalAveragePooling1D, Lambda
 3 | from tensorflow.keras.models import Model
 4 | import numpy as np
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | # Set the model dimension
 8 | d_model = 64
 9 | 
10 | # Define text and image input layers
11 | text_input = tf.keras.Input(shape=(100, 300), name='text_input')  # 100 tokens, 300-d embeddings
12 | image_input = tf.keras.Input(shape=(49, 512), name='image_input')  # 7x7 image patches, 512-d features
13 | 
14 | # Project inputs to a common dimension
15 | text_features = Dense(d_model)(text_input)    # Shape: (batch_size, 100, 64)
16 | image_features = Dense(d_model)(image_input)  # Shape: (batch_size, 49, 64)
17 | 
18 | # Self-attention on text features
19 | text_self_attention = MultiHeadAttention(num_heads=8, key_dim=d_model)
20 | text_attention_output = text_self_attention(
21 |     query=text_features, value=text_features, key=text_features
22 | )  # Shape: (batch_size, 100, 64)
23 | 
24 | # Self-attention on image features
25 | image_self_attention = MultiHeadAttention(num_heads=8, key_dim=d_model)
26 | image_attention_output = image_self_attention(
27 |     query=image_features, value=image_features, key=image_features
28 | )  # Shape: (batch_size, 49, 64)
29 | 
30 | # Cross-modal attention from text to image features
31 | cross_modal_attention = MultiHeadAttention(num_heads=8, key_dim=d_model)
32 | cross_attention_output, cross_attention_scores = cross_modal_attention(
33 |     query=text_attention_output,
34 |     value=image_attention_output,
35 |     key=image_attention_output,
36 |     return_attention_scores=True
37 | )  # cross_attention_output shape: (batch_size, 100, 64)
38 | # cross_attention_scores shape: (batch_size, 8, 100, 49)
39 | 
40 | # Average the attention scores over the heads using a Lambda layer
41 | average_attention_scores = Lambda(lambda x: tf.reduce_mean(x, axis=1))(cross_attention_scores)
42 | # Shape: (batch_size, 100, 49)
43 | 
44 | # Pooling over the sequence length to get fixed-size representations
45 | text_representation = GlobalAveragePooling1D()(text_attention_output)          # Shape: (batch_size, 64)
46 | image_representation = GlobalAveragePooling1D()(image_attention_output)        # Shape: (batch_size, 64)
47 | cross_attention_representation = GlobalAveragePooling1D()(cross_attention_output)  # Shape: (batch_size, 64)
48 | 
49 | # Combine the representations
50 | combined_representation = Concatenate()([
51 |     text_representation,
52 |     image_representation,
53 |     cross_attention_representation
54 | ])  # Shape: (batch_size, 192)
55 | 
56 | # Output layer
57 | output = Dense(1, activation='sigmoid')(combined_representation)
58 | 
59 | # Build and compile the model
60 | model = Model(inputs=[text_input, image_input], outputs=output)
61 | model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
62 | 
63 | # Create sample input data
64 | text_sample = np.random.rand(1, 100, 300).astype(np.float32)
65 | image_sample = np.random.rand(1, 49, 512).astype(np.float32)
66 | 
67 | # Perform model prediction
68 | prediction = model.predict([text_sample, image_sample])
69 | print("Prediction:", prediction)
70 | 
71 | # Build a model to output the attention scores for visualization
72 | attention_model = Model(inputs=[text_input, image_input], outputs=average_attention_scores)
73 | 
74 | # Get the attention scores
75 | attention_scores = attention_model.predict([text_sample, image_sample])  # Shape: (1, 100, 49)
76 | 
77 | # Visualize the cross-modal attention weights
78 | plt.figure(figsize=(12, 8))
79 | plt.imshow(attention_scores[0], cmap='viridis', aspect='auto')
80 | plt.colorbar()
81 | plt.title("Cross-modal Attention Weights (Text to Image Features)")
82 | plt.xlabel("Image Feature Index (49 patches)")
83 | plt.ylabel("Text Token Index (100 tokens)")
84 | plt.show()
85 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/650_Multimodal/joint_feature_attribution_multimodal_models.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.keras.layers import Dense, Concatenate
 3 | from tensorflow.keras.models import Model
 4 | import numpy as np
 5 | 
 6 | # Define text and image inputs
 7 | text_input = tf.keras.Input(shape=(300,), name='text_input')  # 300-d text embeddings
 8 | image_input = tf.keras.Input(shape=(512,), name='image_input')  # 512-d image features
 9 | 
10 | # Define simple Dense layers for text and image features
11 | text_features = Dense(128, activation='relu')(text_input)
12 | image_features = Dense(128, activation='relu')(image_input)
13 | 
14 | # Concatenate text and image features
15 | combined_features = Concatenate()([text_features, image_features])
16 | output = Dense(1, activation='sigmoid')(combined_features)
17 | 
18 | # Build and compile the model
19 | model = Model(inputs=[text_input, image_input], outputs=output)
20 | model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
21 | 
22 | # Define Integrated Gradients function
23 | def integrated_gradients(model, inputs, baseline, steps=50):
24 |     alphas = np.linspace(0, 1, steps)
25 | 
26 |     # Interpolate for each input
27 |     input_scaled_text = np.array([baseline[0] + alpha * (inputs[0] - baseline[0]) for alpha in alphas])
28 |     input_scaled_image = np.array([baseline[1] + alpha * (inputs[1] - baseline[1]) for alpha in alphas])
29 | 
30 |     # Convert NumPy arrays to TensorFlow tensors
31 |     input_scaled_text = tf.convert_to_tensor(input_scaled_text, dtype=tf.float32)
32 |     input_scaled_image = tf.convert_to_tensor(input_scaled_image, dtype=tf.float32)
33 | 
34 |     # Compute gradients using GradientTape
35 |     with tf.GradientTape() as tape:
36 |         tape.watch([input_scaled_text, input_scaled_image])
37 |         predictions = model([input_scaled_text, input_scaled_image])
38 |     gradients = tape.gradient(predictions, [input_scaled_text, input_scaled_image])
39 | 
40 |     # Calculate average gradients and Integrated Gradients
41 |     avg_gradients_text = tf.reduce_mean(gradients[0], axis=0).numpy()
42 |     avg_gradients_image = tf.reduce_mean(gradients[1], axis=0).numpy()
43 | 
44 |     integrated_gradients_text = (inputs[0] - baseline[0]) * avg_gradients_text
45 |     integrated_gradients_image = (inputs[1] - baseline[1]) * avg_gradients_image
46 | 
47 |     return integrated_gradients_text, integrated_gradients_image
48 | 
49 | # Example usage
50 | text_sample = np.random.rand(300)
51 | image_sample = np.random.rand(512)
52 | baseline_text = np.zeros(300)
53 | baseline_image = np.zeros(512)
54 | 
55 | inputs = [text_sample, image_sample]
56 | baseline = [baseline_text, baseline_image]
57 | 
58 | # Compute Integrated Gradients
59 | attributions_text, attributions_image = integrated_gradients(model, inputs, baseline)
60 | print("Integrated Gradients for text features:", attributions_text)
61 | print("Integrated Gradients for image features:", attributions_image)
62 | 
63 | # Visualize Integrated Gradients
64 | import matplotlib.pyplot as plt
65 | 
66 | plt.figure(figsize=(12, 6))
67 | 
68 | # Plot for text features
69 | plt.subplot(1, 2, 1)
70 | plt.plot(attributions_text)
71 | plt.title("Integrated Gradients for Text Features")
72 | plt.xlabel("Feature Index")
73 | plt.ylabel("Attribution")
74 | 
75 | # Plot for image features
76 | plt.subplot(1, 2, 2)
77 | plt.plot(attributions_image)
78 | plt.title("Integrated Gradients for Image Features")
79 | plt.xlabel("Feature Index")
80 | plt.ylabel("Attribution")
81 | 
82 | plt.tight_layout()
83 | plt.show()
84 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/650_Multimodal/multimodal_explanations_attention.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.keras.layers import Dense, Attention, Concatenate, Lambda
 3 | from tensorflow.keras.models import Model
 4 | from tensorflow.keras.utils import plot_model
 5 | 
 6 | # Define text and image inputs
 7 | text_input = tf.keras.Input(shape=(100, 300), name='text_input')  # 100 tokens, 300-d embeddings
 8 | image_input = tf.keras.Input(shape=(49, 512), name='image_input')  # 7x7 image patches, 512-d features
 9 | 
10 | # Text attention mechanism
11 | text_query = Dense(64, activation='tanh')(text_input)
12 | text_key = Dense(64, activation='tanh')(text_input)
13 | text_value = Dense(64, activation='tanh')(text_input)
14 | text_attention = Attention()([text_query, text_value, text_key])
15 | 
16 | # Reduce mean for text attention
17 | text_representation = Lambda(lambda x: tf.reduce_mean(x, axis=1))(text_attention)
18 | 
19 | # Image attention mechanism
20 | image_query = Dense(64, activation='tanh')(image_input)
21 | image_key = Dense(64, activation='tanh')(image_input)
22 | image_value = Dense(64, activation='tanh')(image_input)
23 | image_attention = Attention()([image_query, image_value, image_key])
24 | 
25 | # Reduce mean for image attention
26 | image_representation = Lambda(lambda x: tf.reduce_mean(x, axis=1))(image_attention)
27 | 
28 | # Concatenate text and image representations
29 | combined_representation = Concatenate()([text_representation, image_representation])
30 | output = Dense(1, activation='sigmoid')(combined_representation)
31 | 
32 | # Build and compile the model
33 | model = Model(inputs=[text_input, image_input], outputs=output)
34 | model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
35 | 
36 | # Print model summary
37 | model.summary()
38 | 
39 | # Save the model plot as a PNG image
40 | plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)
41 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/660_Robustness/Images/Adversarial Robustness Testing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/660_Robustness/Images/Adversarial Robustness Testing.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/660_Robustness/Images/Fairness-aware Explanation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/660_Robustness/Images/Fairness-aware Explanation.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/660_Robustness/Images/Robustness Testing for Explanations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/660_Robustness/Images/Robustness Testing for Explanations.png


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/660_Robustness/adversarial_robustness_testing.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | # Load MNIST dataset
 6 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
 7 | x_test = x_test.astype('float32') / 255.0
 8 | x_test = np.expand_dims(x_test, axis=-1)
 9 | 
10 | # Define a simple CNN model
11 | model = tf.keras.Sequential([
12 |     tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
13 |     tf.keras.layers.MaxPooling2D((2, 2)),
14 |     tf.keras.layers.Flatten(),
15 |     tf.keras.layers.Dense(10, activation='softmax')
16 | ])
17 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
18 | 
19 | # Train the model
20 | model.fit(x_train, y_train, epochs=5, batch_size=64)
21 | 
22 | # Function to compute saliency map
23 | def compute_saliency_map(model, input_image, label):
24 |     input_image = tf.convert_to_tensor(input_image[np.newaxis, ...])
25 |     with tf.GradientTape() as tape:
26 |         tape.watch(input_image)
27 |         predictions = model(input_image)
28 |         loss = predictions[0, label]
29 |     gradient = tape.gradient(loss, input_image).numpy()[0]
30 |     saliency_map = np.max(np.abs(gradient), axis=-1)
31 |     return saliency_map
32 | 
33 | # Generate an adversarial example using FGSM (Fast Gradient Sign Method)
34 | def generate_adversarial_example(model, input_image, label, epsilon=0.1):
35 |     input_image = tf.convert_to_tensor(input_image[np.newaxis, ...])
36 |     with tf.GradientTape() as tape:
37 |         tape.watch(input_image)
38 |         predictions = model(input_image)
39 |         loss = predictions[0, label]
40 |     gradient = tape.gradient(loss, input_image)
41 |     perturbation = epsilon * tf.sign(gradient)
42 |     adversarial_image = input_image + perturbation
43 |     return adversarial_image.numpy()[0]
44 | 
45 | # Original and adversarial saliency maps
46 | original_image = x_test[0]
47 | adversarial_image = generate_adversarial_example(model, original_image, y_test[0])
48 | saliency_original = compute_saliency_map(model, original_image, y_test[0])
49 | saliency_adversarial = compute_saliency_map(model, adversarial_image, y_test[0])
50 | 
51 | # Compute Adversarial Robustness Score (ARS)
52 | delta = adversarial_image - original_image
53 | ars = 1 - np.linalg.norm(saliency_original - saliency_adversarial) / np.linalg.norm(delta)
54 | print(f"Adversarial Robustness Score (ARS): {ars:.4f}")
55 | 
56 | # Display the saliency maps
57 | plt.figure(figsize=(12, 5))
58 | plt.subplot(1, 2, 1)
59 | plt.title("Original Saliency Map")
60 | plt.imshow(saliency_original, cmap='hot')
61 | plt.axis('off')
62 | 
63 | plt.subplot(1, 2, 2)
64 | plt.title("Adversarial Saliency Map")
65 | plt.imshow(saliency_adversarial, cmap='hot')
66 | plt.axis('off')
67 | plt.show()


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/660_Robustness/explanation_consistency_score.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | # Load MNIST dataset
 5 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
 6 | x_test = x_test.astype('float32') / 255.0
 7 | x_test = np.expand_dims(x_test, axis=-1)
 8 | 
 9 | # Define a simple CNN model
10 | model = tf.keras.Sequential([
11 |     tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
12 |     tf.keras.layers.MaxPooling2D((2, 2)),
13 |     tf.keras.layers.Flatten(),
14 |     tf.keras.layers.Dense(10, activation='softmax')
15 | ])
16 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
17 | 
18 | # Train the model
19 | model.fit(x_train, y_train, epochs=5, batch_size=64)
20 | 
21 | # Function to compute gradient-based explanations
22 | def compute_gradients(model, input_image, label):
23 |     input_image = tf.convert_to_tensor(input_image[np.newaxis, ...])
24 |     with tf.GradientTape() as tape:
25 |         tape.watch(input_image)
26 |         predictions = model(input_image)
27 |         loss = predictions[0, label]
28 |     gradient = tape.gradient(loss, input_image).numpy()[0]
29 |     return gradient
30 | 
31 | # Consistency Analysis
32 | image1 = x_test[0]
33 | image2 = x_test[1]  # A similar image
34 | grad1 = compute_gradients(model, image1, y_test[0])
35 | grad2 = compute_gradients(model, image2, y_test[1])
36 | ecs = np.dot(grad1.flatten(), grad2.flatten()) / (np.linalg.norm(grad1) * np.linalg.norm(grad2))
37 | print(f"Explanation Consistency Score (ECS): {ecs:.4f}")
38 | 
39 | # Stability Analysis
40 | perturbed_image = image1 + 0.1 * np.random.normal(size=image1.shape)
41 | grad_perturbed = compute_gradients(model, perturbed_image, y_test[0])
42 | ess = 1 - np.linalg.norm(grad1 - grad_perturbed) / np.linalg.norm(image1 - perturbed_image)
43 | print(f"Explanation Stability Score (ESS): {ess:.4f}")


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/660_Robustness/fairness_aware_explanation.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | # Define a simple neural network model
 5 | model = tf.keras.Sequential([
 6 |     tf.keras.layers.Dense(16, activation='relu', input_shape=(10,)),
 7 |     tf.keras.layers.Dense(1, activation='sigmoid')
 8 | ])
 9 | model.compile(optimizer='adam', loss='binary_crossentropy')
10 | 
11 | # Generate synthetic data
12 | np.random.seed(0)
13 | X = np.random.rand(1000, 10)
14 | y = (X[:, 0] + X[:, 1] > 1).astype(int)  # Binary target
15 | sensitive_attribute = np.random.choice([0, 1], size=(1000,))  # Gender (0 = male, 1 = female)
16 | 
17 | # Train the model
18 | model.fit(X, y, epochs=10, batch_size=32)
19 | 
20 | # Function to compute fairness-aware attributions
21 | def compute_attributions(model, X, sensitive_attr):
22 |     gradients_male = []
23 |     gradients_female = []
24 | 
25 |     for i, x in enumerate(X):
26 |         # Convert input to tf.Tensor
27 |         x_tensor = tf.convert_to_tensor(x, dtype=tf.float32)
28 | 
29 |         with tf.GradientTape() as tape:
30 |             tape.watch(x_tensor)
31 |             prediction = model(tf.expand_dims(x_tensor, axis=0))
32 |         gradient = tape.gradient(prediction, x_tensor).numpy()
33 | 
34 |         if sensitive_attr[i] == 0:  # Male
35 |             gradients_male.append(gradient)
36 |         else:  # Female
37 |             gradients_female.append(gradient)
38 | 
39 |     avg_grad_male = np.mean(gradients_male, axis=0)
40 |     avg_grad_female = np.mean(gradients_female, axis=0)
41 |     attribution_disparity = np.abs(avg_grad_male - avg_grad_female)
42 | 
43 |     return avg_grad_male, avg_grad_female, attribution_disparity
44 | 
45 | # Compute attributions and disparity
46 | avg_grad_male, avg_grad_female, attribution_disparity = compute_attributions(model, X, sensitive_attribute)
47 | 
48 | print("Average gradient attributions (male):", avg_grad_male)
49 | print("Average gradient attributions (female):", avg_grad_female)
50 | print("Attribution disparity:", attribution_disparity)
51 | 
52 | import matplotlib.pyplot as plt
53 | 
54 | # Visualization of attribution disparity
55 | plt.bar(range(10), attribution_disparity)
56 | plt.xlabel('Feature Index')
57 | plt.ylabel('Attribution Disparity')
58 | plt.title('Feature Attribution Disparity between Male and Female')
59 | plt.show()
60 | 


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/660_Robustness/invariant_explanation_testing.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | # Load MNIST dataset
 5 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
 6 | x_test = x_test.astype('float32') / 255.0
 7 | x_test = np.expand_dims(x_test, axis=-1)
 8 | 
 9 | # Define a simple CNN model
10 | model = tf.keras.Sequential([
11 |     tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
12 |     tf.keras.layers.MaxPooling2D((2, 2)),
13 |     tf.keras.layers.Flatten(),
14 |     tf.keras.layers.Dense(10, activation='softmax')
15 | ])
16 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
17 | 
18 | # Train the model
19 | model.fit(x_train, y_train, epochs=5, batch_size=64)
20 | 
21 | # Function to compute gradient-based explanations
22 | def compute_gradients(model, input_image, label):
23 |     input_image = tf.convert_to_tensor(input_image[np.newaxis, ...])
24 |     with tf.GradientTape() as tape:
25 |         tape.watch(input_image)
26 |         predictions = model(input_image)
27 |         loss = predictions[0, label]
28 |     gradient = tape.gradient(loss, input_image).numpy()[0]
29 |     return gradient
30 | 
31 | # Split test data into two environments: even and odd digits
32 | even_digits = x_test[y_test % 2 == 0]
33 | odd_digits = x_test[y_test % 2 == 1]
34 | 
35 | # Compute gradient explanations for both environments
36 | grad_even = compute_gradients(model, even_digits[0], y_test[0])
37 | grad_odd = compute_gradients(model, odd_digits[0], y_test[1])
38 | 
39 | # Calculate Explanation Invariance Score (EIS)
40 | eis = 1 - np.linalg.norm(grad_even - grad_odd) / (np.linalg.norm(grad_even) + np.linalg.norm(grad_odd))
41 | print(f"Explanation Invariance Score (EIS): {eis:.4f}")


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/660_Robustness/invariant_testing_llms.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import dowhy
 4 | from dowhy import CausalModel
 5 | from sklearn.tree import DecisionTreeClassifier
 6 | from sklearn.model_selection import train_test_split
 7 | 
 8 | # Generate synthetic data
 9 | np.random.seed(0)
10 | data_size = 1000
11 | X = np.random.rand(data_size, 3)
12 | gender = np.random.choice([0, 1], size=data_size)  # Sensitive attribute (0 = male, 1 = female)
13 | income = X[:, 0] + 0.5 * gender + np.random.normal(size=data_size)  # Outcome influenced by gender
14 | data = pd.DataFrame({'income': income, 'gender': gender, 'feature1': X[:, 1], 'feature2': X[:, 2]})
15 | 
16 | # Split data into training and testing sets
17 | train_data, test_data = train_test_split(data, test_size=0.2, random_state=0)
18 | 
19 | # Train a decision tree model
20 | model = DecisionTreeClassifier()
21 | model.fit(train_data[['gender', 'feature1', 'feature2']], (train_data['income'] > 0.5).astype(int))
22 | 
23 | # Define causal model using DoWhy
24 | causal_model = CausalModel(
25 |     data=train_data,
26 |     treatment='gender',
27 |     outcome='income',
28 |     common_causes=['feature1', 'feature2']
29 | )
30 | 
31 | # Identify and estimate the Average Causal Effect (ACE)
32 | identified_estimand = causal_model.identify_effect()
33 | estimate = causal_model.estimate_effect(identified_estimand, method_name="backdoor.linear_regression")
34 | print(f"Estimated Average Causal Effect (ACE): {estimate.value:.4f}")


--------------------------------------------------------------------------------
/ch06_techniques_for_explainable_ai/660_Robustness/robustness_testing_explanations.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | # Load MNIST dataset
 6 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
 7 | x_test = x_test.astype('float32') / 255.0
 8 | x_test = np.expand_dims(x_test, axis=-1)
 9 | 
10 | # Define a simple CNN model
11 | model = tf.keras.Sequential([
12 |     tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
13 |     tf.keras.layers.MaxPooling2D((2, 2)),
14 |     tf.keras.layers.Flatten(),
15 |     tf.keras.layers.Dense(10, activation='softmax')
16 | ])
17 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
18 | 
19 | # Train the model
20 | model.fit(x_train, y_train, epochs=5, batch_size=64)
21 | 
22 | # Function to compute saliency map
23 | def compute_saliency_map(model, input_image, label):
24 |     input_image = tf.convert_to_tensor(input_image[np.newaxis, ...])
25 |     with tf.GradientTape() as tape:
26 |         tape.watch(input_image)
27 |         predictions = model(input_image)
28 |         loss = predictions[0, label]
29 |     gradient = tape.gradient(loss, input_image)
30 |     saliency_map = tf.reduce_max(tf.abs(gradient), axis=-1).numpy()[0]
31 |     return saliency_map
32 | 
33 | # Test robustness of the saliency map
34 | original_image = x_test[0]
35 | perturbed_image = original_image + 0.1 * np.random.normal(size=original_image.shape)
36 | 
37 | # Compute saliency maps
38 | saliency_original = compute_saliency_map(model, original_image, y_test[0])
39 | saliency_perturbed = compute_saliency_map(model, perturbed_image, y_test[0])
40 | 
41 | # Compute robustness score
42 | robustness_score = 1 - np.linalg.norm(saliency_original - saliency_perturbed) / np.linalg.norm(original_image - perturbed_image)
43 | print(f"Robustness Score: {robustness_score:.4f}")
44 | 
45 | # Display the saliency maps
46 | plt.figure(figsize=(10, 5))
47 | plt.subplot(1, 2, 1)
48 | plt.title("Original Saliency Map")
49 | plt.imshow(saliency_original, cmap='hot')
50 | plt.axis('off')
51 | 
52 | plt.subplot(1, 2, 2)
53 | plt.title("Perturbed Saliency Map")
54 | plt.imshow(saliency_perturbed, cmap='hot')
55 | plt.axis('off')
56 | plt.show()


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | shap==0.46.0
 2 | matplotlib==3.9.2
 3 | tensorflow==2.18.0
 4 | seaborn==0.13.2
 5 | transformers==4.46.2
 6 | # tf-keras==2.18.0
 7 | pygam==0.9.1
 8 | tensorflow-probability==0.25.0
 9 | xgboost==2.1.2
10 | lime==0.2.0.1
11 | captum==0.7.0
12 | torch==2.5.1
13 | torchvision==0.20.1
14 | opencv-python==4.10
15 | ipython==8.18
16 | fastdtw==0.3.4
17 | statsmodels==0.14.4
18 | pytest==8.3.3
19 | causal-learn==0.1.3.8
20 | causality==0.0.10
21 | torch-geometric==2.6.1
22 | dowhy==0.11.1


--------------------------------------------------------------------------------