├── .gitignore ├── LICENSE ├── README.md ├── ch02_theoretical_foundations_of_explainable_ai ├── Images │ └── Interpretability vs. Visualization.png └── interpretability_visualization.py ├── ch03_interpretability_of_traditional_machine_learning_models ├── Images │ ├── Bayesian.png │ ├── decision_tree_feature_importance.png │ ├── gam.png │ ├── linear_regression_example.png │ ├── logistic_regression_churn_prediction.png │ └── svm_decision_boundary_visualization.png ├── bayesian.py ├── decision_tree_feature_importance.py ├── gam.py ├── linear_regression_example.py ├── logistic_regression_churn_prediction.py ├── rule_based_cold_diagnosis.py └── svm_decision_boundary_visualization.py ├── ch04_interpretability_of_deep_learning_models ├── Images │ ├── HiddenStates.png │ ├── OriginalandPredictedSineWave.png │ ├── SampleAttentionHeatmap.png │ ├── cat.jpg │ └── vgg16_feature_map_visualization.png ├── attention_heatmap_visualization.py ├── rnn_hidden_states_visualization.py └── vgg16_feature_map_visualization.py ├── ch05_interpretability_of_large_language_models ├── Images │ ├── MeanActivations.png │ ├── PCAVisualization.png │ └── embedding_probing_task.png ├── embedding_probing_task.py ├── embedding_visualization.py └── layer_activation_analysis.py ├── ch06_techniques_for_explainable_ai ├── 610_Post-hoc │ ├── Images │ │ ├── Attention-based Explanation for Time Series.png │ │ ├── DTW.png │ │ ├── Decision Plot.png │ │ ├── DeepLIFT.png │ │ ├── Feature Importance Analysis.png │ │ ├── Feature Interaction Detection.png │ │ ├── Feature Interaction Heatmap.png │ │ ├── Grad-CAM.png │ │ ├── ICE.png │ │ ├── Integrated Gradients 1.png │ │ ├── Integrated Gradients.png │ │ ├── Local Interpretable Model-agnostic Explanations (LIME).png │ │ ├── PDPs.png │ │ ├── SHAP Dependence.png │ │ ├── Saliency Maps for Recurrent Neural Networks.png │ │ ├── Saliency Maps.png │ │ ├── Shapley Additive Explanations (SHAP.png │ │ ├── SmoothGrad.png │ │ ├── TimeSHAP.png │ │ └── shap_force_plot.png │ ├── attention_based_explanation_time_series.py │ ├── decision_plot.py │ ├── deep_lift.py │ ├── dtw.py │ ├── feature_importance_analysis.py │ ├── feature_interaction_detection.py │ ├── feature_interaction_heatmap.py │ ├── grad_cam.py │ ├── ice.py │ ├── integrated_gradients.py │ ├── lime_explanation.html │ ├── local_interpretable_model_agnostic_explanations.py │ ├── lrp.py │ ├── pdps.py │ ├── saliency_Maps_rnn.py │ ├── saliency_maps.py │ ├── shap_dependence.py │ ├── shap_force_plot.html │ ├── shap_force_plot.ipynb │ ├── shap_force_plot.png │ ├── shapley_additive_explanations.py │ ├── smoothgrad.py │ └── timeshap.py ├── 620_Causal-Inference │ ├── Images │ │ └── Causal Discovery.png │ ├── causal_discovery.py │ ├── causal_mediation.py │ ├── irm.py │ └── scm.py ├── 630_Counterfactual │ ├── Images │ │ ├── (GAN)-based Counterfactuals.png │ │ ├── Diverse Counterfactual.png │ │ └── Optimization-based Counterfactuals.png │ ├── actionable_recourse_method.py │ ├── counterfactuals_RL.py │ ├── counterfactuals_structured_data.py │ ├── counterfactuals_time_series_data.py │ ├── diverse_counterfactual.py │ ├── gan_based_counterfactuals.py │ ├── minimal_change_counterfactuals.py │ ├── neighbor_counterfactuals.py │ ├── optimization_based_counterfactuals.py │ └── prototype_based_counterfactuals.py ├── 640_Graph-based │ ├── Images │ │ ├── feature_importance.png │ │ └── subgraph.pdf │ ├── gnn_explainer.py │ └── node_importance_attribution.py ├── 650_Multimodal │ ├── Images │ │ ├── Cross-modal Attention.png │ │ ├── Joint Feature Attribution for Multimodal Models.png │ │ ├── model_plot.png │ │ └── model_summary.png │ ├── cross_modal_attention.py │ ├── joint_feature_attribution_multimodal_models.py │ └── multimodal_explanations_attention.py └── 660_Robustness │ ├── Images │ ├── Adversarial Robustness Testing.png │ ├── Fairness-aware Explanation.png │ └── Robustness Testing for Explanations.png │ ├── adversarial_robustness_testing.py │ ├── explanation_consistency_score.py │ ├── fairness_aware_explanation.py │ ├── invariant_explanation_testing.py │ ├── invariant_testing_llms.py │ └── robustness_testing_explanations.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | aclImdb/ 3 | *.gz 4 | data/ 5 | *.h5 6 | 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | share/python-wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | *.py,cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | cover/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | db.sqlite3 68 | db.sqlite3-journal 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | 80 | # PyBuilder 81 | .pybuilder/ 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # IPython 88 | profile_default/ 89 | ipython_config.py 90 | 91 | # pyenv 92 | # For a library or package, you might want to ignore these files since the code is 93 | # intended to run in multiple environments; otherwise, check them in: 94 | # .python-version 95 | 96 | # pipenv 97 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 98 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 99 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 100 | # install all needed dependencies. 101 | #Pipfile.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 116 | .pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ 169 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Wizard 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # XAI_From_Classical_Models_to_LLMs 2 | 3 | ![License](https://img.shields.io/badge/License-MIT-green) 4 | [![arXiv](https://img.shields.io/badge/arXiv-2412.00800-B31B1B.svg)](https://arxiv.org/abs/2412.00800) 5 | [![DOI](https://img.shields.io/badge/DOI-10.48550/arXiv.2412.00800-blue)](https://doi.org/10.48550/arXiv.2412.00800) 6 | 7 | This repository contains the accompanying code and resources for the book **"A Comprehensive Guide to Explainable AI: From Classical Models to LLMs"**. 8 | 9 | ## About the Book 10 | 11 | **A Comprehensive Guide to Explainable AI** addresses the critical need for transparency and interpretability in AI systems. This book bridges foundational concepts with advanced methodologies, offering a deep dive into the following topics: 12 | 13 | - **Traditional Models**: Interpretability in Decision Trees, Linear Regression, Support Vector Machines (SVMs). 14 | - **Deep Learning Models**: Explainability for CNNs, RNNs, and Large Language Models (LLMs) like BERT, GPT, and T5. 15 | - **Practical Techniques**: SHAP, LIME, Grad-CAM, counterfactual explanations, causal inference. 16 | - **Case Studies**: Applications in healthcare, finance, and policymaking. 17 | - **Evaluation Metrics**: Assessing explanation quality. 18 | - **Emerging Directions**: Interpretability in federated learning, ethical AI. 19 | 20 | Hands-on Python examples and additional resources are available in the companion [GitHub repository](#). 21 | 22 | --- 23 | 24 | ## Features 25 | 26 | - **Practical Techniques**: Explore actionable explainability techniques with Python code. 27 | - **Real-World Applications**: Learn through case studies across diverse domains. 28 | - **Emerging Research**: Gain insights into the latest trends, including interpretability for federated learning and ethical considerations. 29 | - **Resources**: Complementary materials provided for further learning and development. 30 | 31 | --- 32 | 33 | ## Authors 34 | 35 | The book is co-authored by experts in AI and machine learning: 36 | Weiche Hsieh, Ziqian Bi, Chuanqi Jiang, Junyu Liu, Benji Peng, Sen Zhang, Xuanhe Pan, Jiawei Xu, Jinlang Wang, Keyu Chen, Pohsun Feng, Yizhu Wen, Xinyuan Song, Tianyang Wang, Ming Liu, Junjie Yang, Ming Li, Bowen Jing, Jintao Ren, Junhao Song, Hong-Ming Tseng, Yichao Zhang, Lawrence K. Q. Yan, Qian Niu, Silin Chen, Yunze Wang, Chia Xin Liang. 37 | 38 | --- 39 | 40 | ## Citation 41 | 42 | If you use this work in your research, please cite it as follows: 43 | 44 | ```bibtex 45 | @book{hsieh2024comprehensiveguideexplainableai, 46 | title={A Comprehensive Guide to Explainable AI: From Classical Models to LLMs}, 47 | author={Weiche Hsieh and Ziqian Bi and Chuanqi Jiang and Junyu Liu and Benji Peng and Sen Zhang and Xuanhe Pan and Jiawei Xu and Jinlang Wang and Keyu Chen and Pohsun Feng and Yizhu Wen and Xinyuan Song and Tianyang Wang and Ming Liu and Junjie Yang and Ming Li and Bowen Jing and Jintao Ren and Junhao Song and Hong-Ming Tseng and Yichao Zhang and Lawrence K. Q. Yan and Qian Niu and Silin Chen and Yunze Wang and Chia Xin Liang}, 48 | year={2024}, 49 | publisher={arXiv}, 50 | eprint={2412.00800}, 51 | archivePrefix={arXiv}, 52 | primaryClass={cs.LG}, 53 | url={https://arxiv.org/abs/2412.00800} 54 | } 55 | ``` 56 | -------------------------------------------------------------------------------- /ch02_theoretical_foundations_of_explainable_ai/Images/Interpretability vs. Visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch02_theoretical_foundations_of_explainable_ai/Images/Interpretability vs. Visualization.png -------------------------------------------------------------------------------- /ch02_theoretical_foundations_of_explainable_ai/interpretability_visualization.py: -------------------------------------------------------------------------------- 1 | import shap 2 | import numpy as np 3 | import pandas as pd 4 | from sklearn.ensemble import RandomForestClassifier 5 | from sklearn.datasets import load_iris 6 | from sklearn.model_selection import train_test_split 7 | 8 | # Load the dataset 9 | data = load_iris() 10 | X = pd.DataFrame(data.data, columns=data.feature_names) 11 | y = data.target 12 | 13 | # Split the dataset into training and testing sets 14 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 15 | 16 | # Create and train a RandomForest model 17 | model = RandomForestClassifier(random_state=42) 18 | model.fit(X_train, y_train) 19 | 20 | # Initialize the SHAP Explainer 21 | explainer = shap.Explainer(model.predict, X_test) 22 | 23 | # Compute SHAP values 24 | shap_values = explainer(X_test) 25 | 26 | # Plot SHAP Summary Plot 27 | shap.summary_plot(shap_values, X_test) -------------------------------------------------------------------------------- /ch03_interpretability_of_traditional_machine_learning_models/Images/Bayesian.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch03_interpretability_of_traditional_machine_learning_models/Images/Bayesian.png -------------------------------------------------------------------------------- /ch03_interpretability_of_traditional_machine_learning_models/Images/decision_tree_feature_importance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch03_interpretability_of_traditional_machine_learning_models/Images/decision_tree_feature_importance.png -------------------------------------------------------------------------------- /ch03_interpretability_of_traditional_machine_learning_models/Images/gam.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch03_interpretability_of_traditional_machine_learning_models/Images/gam.png -------------------------------------------------------------------------------- /ch03_interpretability_of_traditional_machine_learning_models/Images/linear_regression_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch03_interpretability_of_traditional_machine_learning_models/Images/linear_regression_example.png -------------------------------------------------------------------------------- /ch03_interpretability_of_traditional_machine_learning_models/Images/logistic_regression_churn_prediction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch03_interpretability_of_traditional_machine_learning_models/Images/logistic_regression_churn_prediction.png -------------------------------------------------------------------------------- /ch03_interpretability_of_traditional_machine_learning_models/Images/svm_decision_boundary_visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch03_interpretability_of_traditional_machine_learning_models/Images/svm_decision_boundary_visualization.png -------------------------------------------------------------------------------- /ch03_interpretability_of_traditional_machine_learning_models/bayesian.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow_probability as tfp 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | 6 | # Define the dataset 7 | np.random.seed(42) 8 | X = np.linspace(-5, 5, 100) 9 | true_slope = 0.7 10 | true_intercept = 1.5 11 | y = true_slope * X + true_intercept + np.random.normal(0, 1, size=X.shape) 12 | 13 | # Define the Bayesian linear regression model 14 | tfd = tfp.distributions 15 | 16 | # Define priors 17 | prior_slope = tfd.Normal(loc=0., scale=1.) 18 | prior_intercept = tfd.Normal(loc=0., scale=1.) 19 | prior_sigma = tfd.HalfNormal(scale=1.) 20 | 21 | # Define likelihood function 22 | def likelihood(slope, intercept, sigma, X): 23 | mean = slope * X + intercept 24 | return tfd.Normal(loc=mean, scale=sigma) 25 | 26 | # Sample from the posterior using Markov Chain Monte Carlo (MCMC) 27 | @tf.function 28 | def joint_log_prob(slope, intercept, sigma): 29 | lp = prior_slope.log_prob(slope) + prior_intercept.log_prob(intercept) + prior_sigma.log_prob(sigma) 30 | lp += tf.reduce_sum(likelihood(slope, intercept, sigma, X).log_prob(y)) 31 | return lp 32 | 33 | # Initialize MCMC sampler 34 | initial_state = [0., 0., 1.] 35 | num_results = 1000 36 | kernel = tfp.mcmc.HamiltonianMonteCarlo( 37 | target_log_prob_fn=joint_log_prob, 38 | step_size=0.1, 39 | num_leapfrog_steps=3) 40 | 41 | # Run MCMC 42 | states, kernel_results = tfp.mcmc.sample_chain( 43 | num_results=num_results, 44 | current_state=initial_state, 45 | kernel=kernel, 46 | trace_fn=lambda _, pkr: pkr.is_accepted) 47 | 48 | # Extract sampled parameters 49 | slope_samples, intercept_samples, sigma_samples = states 50 | 51 | # Plot the posterior distributions 52 | fig, axs = plt.subplots(1, 3, figsize=(15, 5)) 53 | axs[0].hist(slope_samples, bins=30, color='skyblue', edgecolor='black') 54 | axs[0].set_title('Posterior of Slope') 55 | axs[1].hist(intercept_samples, bins=30, color='skyblue', edgecolor='black') 56 | axs[1].set_title('Posterior of Intercept') 57 | axs[2].hist(sigma_samples, bins=30, color='skyblue', edgecolor='black') 58 | axs[2].set_title('Posterior of Sigma') 59 | 60 | plt.show() -------------------------------------------------------------------------------- /ch03_interpretability_of_traditional_machine_learning_models/decision_tree_feature_importance.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | from sklearn.tree import DecisionTreeClassifier 4 | from sklearn.datasets import make_classification 5 | from sklearn.model_selection import train_test_split 6 | 7 | # 1. Generate synthetic data with 3 features for binary classification 8 | X, y = make_classification( 9 | n_samples=100, # Number of samples 10 | n_features=3, # Number of features 11 | n_informative=3, # Number of informative features 12 | n_redundant=0, # No redundant features 13 | n_classes=2, # Binary classification 14 | random_state=42 # Random seed for reproducibility 15 | ) 16 | 17 | # 2. Split the data into training and testing sets (70% train, 30% test) 18 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) 19 | 20 | # 3. Train a Decision Tree Classifier with a maximum depth of 4 21 | clf = DecisionTreeClassifier(max_depth=4, random_state=42) 22 | clf.fit(X_train, y_train) 23 | 24 | # 4. Extract the feature importance scores from the trained classifier 25 | feature_importance = clf.feature_importances_ 26 | 27 | # 5. Define feature names for the plot (e.g., Feature 1, Feature 2, Feature 3) 28 | features = np.array(['Feature 1', 'Feature 2', 'Feature 3']) 29 | 30 | # 6. Plot a horizontal bar chart to visualize feature importance 31 | plt.barh(features, feature_importance) 32 | plt.xlabel('Importance Score') # Label for the x-axis 33 | plt.title('Feature Importance in Decision Tree') # Title of the plot 34 | plt.show() # Display the plot 35 | -------------------------------------------------------------------------------- /ch03_interpretability_of_traditional_machine_learning_models/gam.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from pygam import LinearGAM, s 4 | 5 | # Generate synthetic data 6 | np.random.seed(42) 7 | x1 = np.random.uniform(-3, 3, 200) 8 | x2 = np.random.uniform(-3, 3, 200) 9 | y = np.sin(x1) + 0.5 * np.cos(x2) + np.random.normal(0, 0.2, 200) 10 | 11 | # Combine features into a matrix 12 | X = np.column_stack((x1, x2)) 13 | 14 | # Define and fit the GAM model 15 | gam = LinearGAM(s(0) + s(1)) 16 | gam.fit(X, y) 17 | 18 | # Plot the partial dependence for each feature 19 | fig, axs = plt.subplots(1, 2, figsize=(12, 5)) 20 | for i, ax in enumerate(axs): 21 | XX = gam.generate_X_grid(term=i) 22 | ax.plot(XX[:, i], gam.partial_dependence(term=i, X=XX)) 23 | ax.set_title(f'Partial Dependence of Feature x{i+1}') 24 | ax.set_xlabel(f'x{i+1}') 25 | ax.set_ylabel('Predicted y') 26 | 27 | plt.tight_layout() 28 | plt.show() -------------------------------------------------------------------------------- /ch03_interpretability_of_traditional_machine_learning_models/linear_regression_example.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.linear_model import LinearRegression 3 | import matplotlib.pyplot as plt 4 | 5 | # Example data: [Square Footage, Number of Bedrooms] 6 | X = np.array([[1500, 3], [2000, 4], [2500, 4], [3000, 5]]) 7 | y = np.array([300000, 400000, 500000, 600000]) 8 | 9 | # Initialize and train the Linear Regression model 10 | model = LinearRegression() 11 | model.fit(X, y) 12 | 13 | # Output the intercept and coefficients 14 | print("Intercept:", model.intercept_) 15 | print("Coefficients:", model.coef_) 16 | 17 | # Predict house prices using the trained model 18 | y_pred = model.predict(X) 19 | print("Predicted Prices:", y_pred) 20 | 21 | # Plot the true vs predicted prices 22 | plt.scatter(range(len(y)), y, color='blue', label='True Prices') 23 | plt.scatter(range(len(y_pred)), y_pred, color='red', marker='x', label='Predicted Prices') 24 | plt.xlabel('Sample Index') 25 | plt.ylabel('House Price ($)') 26 | plt.title('True vs Predicted House Prices') 27 | plt.legend() 28 | plt.show() 29 | -------------------------------------------------------------------------------- /ch03_interpretability_of_traditional_machine_learning_models/logistic_regression_churn_prediction.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.linear_model import LogisticRegression 3 | import matplotlib.pyplot as plt 4 | 5 | # Example data: [Monthly Charges, Tenure] 6 | X = np.array([[30, 1], [40, 3], [50, 5], [60, 7]]) 7 | y = np.array([0, 0, 1, 1]) # 0 = No churn, 1 = Churn 8 | 9 | # Initialize and train the Logistic Regression model 10 | model = LogisticRegression() 11 | model.fit(X, y) 12 | 13 | # Output the intercept and coefficients 14 | print("Intercept:", model.intercept_) 15 | print("Coefficients:", model.coef_) 16 | 17 | # Make predictions and predict probabilities 18 | y_pred = model.predict(X) 19 | y_prob = model.predict_proba(X)[:, 1] 20 | 21 | print("Predicted Labels:", y_pred) 22 | print("Predicted Probabilities (Churn):", y_prob) 23 | 24 | # Visualization of the predicted probabilities 25 | plt.scatter(range(len(y)), y, color='blue', label='True Labels (0=No churn, 1=Churn)') 26 | plt.plot(range(len(y_prob)), y_prob, color='red', marker='x', linestyle='--', label='Predicted Probabilities') 27 | plt.xlabel('Sample Index') 28 | plt.ylabel('Probability of Churn') 29 | plt.title('Logistic Regression: Churn Prediction') 30 | plt.legend() 31 | plt.show() 32 | -------------------------------------------------------------------------------- /ch03_interpretability_of_traditional_machine_learning_models/rule_based_cold_diagnosis.py: -------------------------------------------------------------------------------- 1 | # Filename: rule_based_cold_diagnosis.py 2 | 3 | def diagnose(symptoms): 4 | """ 5 | Diagnoses a condition based on the provided symptoms using a simple rule-based system. 6 | 7 | Args: 8 | symptoms (dict): A dictionary where keys are symptom names (e.g., 'fever', 'cough') 9 | and values are booleans indicating whether the symptom is present. 10 | 11 | Returns: 12 | str: The diagnosis based on the provided symptoms. 13 | """ 14 | if symptoms.get('fever') and symptoms.get('cough'): 15 | return "Common Cold" 16 | elif symptoms.get('fever'): 17 | return "Fever of unknown origin" 18 | elif symptoms.get('cough'): 19 | return "Possible respiratory infection" 20 | else: 21 | return "No specific diagnosis" 22 | 23 | if __name__ == "__main__": 24 | # Collect symptoms from the user 25 | fever = input("Do you have a fever? (yes/no): ").strip().lower() == 'yes' 26 | cough = input("Do you have a cough? (yes/no): ").strip().lower() == 'yes' 27 | 28 | # Create a dictionary of symptoms 29 | symptoms = {'fever': fever, 'cough': cough} 30 | 31 | # Get the diagnosis 32 | diagnosis = diagnose(symptoms) 33 | 34 | # Display the diagnosis 35 | print(f"Diagnosis: {diagnosis}") 36 | -------------------------------------------------------------------------------- /ch03_interpretability_of_traditional_machine_learning_models/svm_decision_boundary_visualization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import datasets 3 | from sklearn.svm import SVC 4 | import matplotlib.pyplot as plt 5 | 6 | # Load a sample dataset with two features 7 | X, y = datasets.make_classification(n_samples=100, n_features=2, 8 | n_informative=2, n_redundant=0, n_repeated=0, 9 | n_classes=2, n_clusters_per_class=1, 10 | random_state=42) 11 | 12 | # Initialize and train a linear SVM classifier 13 | clf = SVC(kernel='linear') 14 | clf.fit(X, y) 15 | 16 | # Extract the weight vector and bias term 17 | w = clf.coef_[0] 18 | b = clf.intercept_[0] 19 | 20 | # Define the decision boundary 21 | x_points = np.linspace(min(X[:, 0]), max(X[:, 0]), 100) 22 | y_points = -(w[0] / w[1]) * x_points - b / w[1] 23 | 24 | # Plot the data points and decision boundary 25 | plt.scatter(X[:, 0], X[:, 1], c=y, cmap='coolwarm', edgecolors='k', label='Data Points') 26 | plt.plot(x_points, y_points, color='red', label='Decision Boundary') 27 | 28 | # Highlight the support vectors 29 | plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], 30 | s=100, facecolors='none', edgecolors='k', linewidths=1.5, 31 | label='Support Vectors') 32 | 33 | plt.xlabel('Feature 1') 34 | plt.ylabel('Feature 2') 35 | plt.title('SVM Decision Boundary with Support Vectors') 36 | plt.legend() 37 | plt.show() 38 | -------------------------------------------------------------------------------- /ch04_interpretability_of_deep_learning_models/Images/HiddenStates.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch04_interpretability_of_deep_learning_models/Images/HiddenStates.png -------------------------------------------------------------------------------- /ch04_interpretability_of_deep_learning_models/Images/OriginalandPredictedSineWave.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch04_interpretability_of_deep_learning_models/Images/OriginalandPredictedSineWave.png -------------------------------------------------------------------------------- /ch04_interpretability_of_deep_learning_models/Images/SampleAttentionHeatmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch04_interpretability_of_deep_learning_models/Images/SampleAttentionHeatmap.png -------------------------------------------------------------------------------- /ch04_interpretability_of_deep_learning_models/Images/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch04_interpretability_of_deep_learning_models/Images/cat.jpg -------------------------------------------------------------------------------- /ch04_interpretability_of_deep_learning_models/Images/vgg16_feature_map_visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch04_interpretability_of_deep_learning_models/Images/vgg16_feature_map_visualization.png -------------------------------------------------------------------------------- /ch04_interpretability_of_deep_learning_models/attention_heatmap_visualization.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import seaborn as sns 3 | import numpy as np 4 | 5 | # Define a sample attention weights matrix (3x3 for simplicity) 6 | attention_weights = np.array([[0.1, 0.2, 0.7], 7 | [0.5, 0.3, 0.2], 8 | [0.3, 0.4, 0.3]]) 9 | 10 | # Create the heatmap plot 11 | plt.figure(figsize=(6, 5)) 12 | sns.heatmap(attention_weights, annot=True, fmt=".2f", cmap='Blues', cbar=False, linewidths=0.5) 13 | 14 | # Add titles and labels 15 | plt.title("Sample Attention Heatmap", fontsize=14) 16 | plt.xlabel("Input Tokens", fontsize=12) 17 | plt.ylabel("Output Tokens", fontsize=12) 18 | plt.xticks(ticks=[0.5, 1.5, 2.5], labels=["Token 1", "Token 2", "Token 3"]) 19 | plt.yticks(ticks=[0.5, 1.5, 2.5], labels=["Output 1", "Output 2", "Output 3"]) 20 | 21 | # Display the plot 22 | plt.tight_layout() 23 | plt.show() 24 | -------------------------------------------------------------------------------- /ch04_interpretability_of_deep_learning_models/rnn_hidden_states_visualization.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | # Check TensorFlow version and GPU availability 6 | print("TensorFlow version:", tf.__version__) 7 | print("GPU is", "available" if tf.config.list_physical_devices('GPU') else "not available") 8 | 9 | # Generate a synthetic sine wave dataset 10 | time_steps = 100 11 | X = np.sin(np.linspace(0, 20, time_steps)) 12 | X = X.reshape((1, time_steps, 1)) # Reshape for RNN input (batch_size, time_steps, features) 13 | 14 | # Build a simple RNN model with 10 hidden units 15 | model = tf.keras.Sequential([ 16 | tf.keras.layers.SimpleRNN(units=10, return_sequences=True, input_shape=(time_steps, 1)), 17 | tf.keras.layers.Dense(1) # Add a Dense layer for prediction 18 | ]) 19 | 20 | # Compile the model 21 | model.compile(optimizer='adam', loss='mse') 22 | 23 | # Run inference using the sine wave data 24 | y_pred = model.predict(X) 25 | hidden_states = model.predict(X, verbose=0) 26 | 27 | # Plot the original sine wave data 28 | plt.figure(figsize=(12, 6)) 29 | plt.plot(X[0, :, 0], label='Original Sine Wave', color='gray', linestyle='--', linewidth=2) 30 | plt.title("Original Sine Wave Data") 31 | plt.xlabel("Time Step") 32 | plt.ylabel("Value") 33 | plt.grid() 34 | plt.legend() 35 | plt.show() 36 | 37 | # Plot the predicted sine wave vs the original sine wave 38 | plt.figure(figsize=(12, 6)) 39 | plt.plot(X[0, :, 0], label='Original Sine Wave', color='lightgray', linestyle='--', linewidth=2) 40 | plt.plot(y_pred[0, :, 0], label='Predicted Sine Wave', color='blue', linewidth=2) 41 | plt.title("Comparison of Original and Predicted Sine Wave") 42 | plt.xlabel("Time Step") 43 | plt.ylabel("Value") 44 | plt.legend() 45 | plt.grid() 46 | plt.show() 47 | 48 | # Plot the activations of all 10 hidden units over time 49 | plt.figure(figsize=(14, 8)) 50 | for i in range(hidden_states.shape[-1]): 51 | plt.plot(hidden_states[0, :, i], label=f'Hidden Unit {i+1}', alpha=0.8) 52 | 53 | plt.title("Activations of All Hidden Units Over Time") 54 | plt.xlabel("Time Step") 55 | plt.ylabel("Hidden State Activation") 56 | plt.legend(loc='upper right', ncol=2, fontsize=10) 57 | plt.grid() 58 | plt.show() 59 | -------------------------------------------------------------------------------- /ch04_interpretability_of_deep_learning_models/vgg16_feature_map_visualization.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | # Check TensorFlow version and GPU availability 6 | print("TensorFlow version:", tf.__version__) 7 | print("GPU is", "available" if tf.config.list_physical_devices('GPU') else "not available") 8 | 9 | # Load a pre-trained VGG16 model (without the fully connected layers) 10 | model = tf.keras.applications.VGG16(weights='imagenet', include_top=False) 11 | 12 | # Load and preprocess the input image 13 | image_path = 'Ch04/cat.jpg' # Ensure 'cat.jpg' is in the directory 14 | try: 15 | image = tf.keras.preprocessing.image.load_img(image_path, target_size=(224, 224)) 16 | except FileNotFoundError: 17 | print(f"Error: Image file '{image_path}' not found.") 18 | exit() 19 | 20 | image_array = tf.keras.preprocessing.image.img_to_array(image) 21 | image_array = np.expand_dims(image_array, axis=0) 22 | image_array = tf.keras.applications.vgg16.preprocess_input(image_array) 23 | 24 | # Define a model that outputs the feature maps of the first convolutional layer 25 | layer_name = 'block1_conv1' 26 | feature_map_model = tf.keras.Model(inputs=model.input, outputs=model.get_layer(layer_name).output) 27 | 28 | # Generate the feature maps for the input image 29 | feature_maps = feature_map_model.predict(image_array) 30 | 31 | # Check the shape of the feature maps 32 | print("Feature map shape:", feature_maps.shape) 33 | 34 | # Visualize the first 16 feature maps 35 | fig, axes = plt.subplots(4, 4, figsize=(10, 10)) 36 | for i, ax in enumerate(axes.flat): 37 | if i < feature_maps.shape[-1]: 38 | ax.imshow(feature_maps[0, :, :, i], cmap='viridis') 39 | ax.axis('off') 40 | plt.tight_layout() 41 | plt.show() 42 | -------------------------------------------------------------------------------- /ch05_interpretability_of_large_language_models/Images/MeanActivations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch05_interpretability_of_large_language_models/Images/MeanActivations.png -------------------------------------------------------------------------------- /ch05_interpretability_of_large_language_models/Images/PCAVisualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch05_interpretability_of_large_language_models/Images/PCAVisualization.png -------------------------------------------------------------------------------- /ch05_interpretability_of_large_language_models/Images/embedding_probing_task.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch05_interpretability_of_large_language_models/Images/embedding_probing_task.png -------------------------------------------------------------------------------- /ch05_interpretability_of_large_language_models/embedding_probing_task.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.linear_model import LogisticRegression 3 | from sklearn.metrics import accuracy_score 4 | from transformers import BertTokenizer, TFBertModel 5 | import tensorflow as tf 6 | 7 | # Load pretrained BERT model and tokenizer 8 | model = TFBertModel.from_pretrained('bert-base-uncased') 9 | tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') 10 | 11 | # Define a list of words and their corresponding part-of-speech labels 12 | words = ["cat", "run", "dog", "jump"] # Example words 13 | labels = [0, 1, 0, 1] # Labels: 0 for noun, 1 for verb 14 | 15 | # Tokenize the words and obtain embeddings 16 | inputs = tokenizer(words, return_tensors='tf', padding=True, truncation=True) 17 | outputs = model(inputs['input_ids'])[0].numpy() 18 | 19 | # Use mean embeddings as features for the classifier 20 | features = outputs.mean(axis=1) 21 | 22 | # Train a logistic regression classifier 23 | classifier = LogisticRegression() 24 | classifier.fit(features, labels) 25 | 26 | # Make predictions 27 | predictions = classifier.predict(features) 28 | 29 | # Evaluate the classifier 30 | accuracy = accuracy_score(labels, predictions) 31 | print(f"Probing Task Accuracy: {accuracy:.2f}") 32 | -------------------------------------------------------------------------------- /ch05_interpretability_of_large_language_models/embedding_visualization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from sklearn.decomposition import PCA 4 | from transformers import BertTokenizer, TFBertModel 5 | import tensorflow as tf 6 | 7 | # Load pretrained BERT model and tokenizer 8 | model = TFBertModel.from_pretrained('bert-base-uncased') 9 | tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') 10 | 11 | # Define a list of words to encode 12 | words = ["king", "queen", "man", "woman"] 13 | 14 | # Tokenize words and obtain embeddings 15 | inputs = tokenizer(words, return_tensors='tf', padding=True, truncation=True) 16 | outputs = model(inputs['input_ids'])[0].numpy() 17 | 18 | # Compute the mean embeddings for each word 19 | mean_embeddings = outputs.mean(axis=1) 20 | 21 | # Perform PCA to reduce embeddings to 2D 22 | pca = PCA(n_components=2) 23 | reduced_embeddings = pca.fit_transform(mean_embeddings) 24 | 25 | # Plot the 2D embeddings 26 | plt.figure(figsize=(8, 6)) 27 | plt.scatter(reduced_embeddings[:, 0], reduced_embeddings[:, 1], color='blue') 28 | 29 | # Annotate the plot with word labels 30 | for i, word in enumerate(words): 31 | plt.annotate(word, (reduced_embeddings[i, 0], reduced_embeddings[i, 1]), fontsize=12) 32 | 33 | plt.title("PCA Visualization of BERT Word Embeddings") 34 | plt.xlabel("Principal Component 1") 35 | plt.ylabel("Principal Component 2") 36 | plt.grid(True) 37 | plt.show() 38 | -------------------------------------------------------------------------------- /ch05_interpretability_of_large_language_models/layer_activation_analysis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from transformers import BertTokenizer, TFBertModel 4 | import matplotlib.pyplot as plt 5 | 6 | # Load pretrained BERT model and tokenizer with hidden states output enabled 7 | model = TFBertModel.from_pretrained('bert-base-uncased', output_hidden_states=True) 8 | tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') 9 | 10 | # Define a function to extract and analyze layer-wise activations 11 | def extract_and_analyze_activations(model, inputs): 12 | outputs = model(inputs) 13 | hidden_states = outputs.hidden_states # Extract all hidden states 14 | layer_means = [tf.reduce_mean(state).numpy() for state in hidden_states] # Compute mean activation 15 | return hidden_states, layer_means 16 | 17 | # Example usage with a sample input sentence 18 | input_data = tokenizer("The cat sat on the mat.", return_tensors='tf', padding=True, truncation=True) 19 | input_ids = input_data['input_ids'] 20 | 21 | # Extract activations and compute mean activations for each layer 22 | layer_outputs, layer_means = extract_and_analyze_activations(model, input_ids) 23 | 24 | # Print the number of layers and the shape of activations from the first layer 25 | print("Number of layers analyzed:", len(layer_outputs)) 26 | print("Shape of activations from the first layer:", layer_outputs[0].shape) 27 | 28 | # Plot mean activations across layers 29 | plt.figure(figsize=(10, 6)) 30 | plt.plot(range(len(layer_means)), layer_means, marker='o', color='blue') 31 | plt.title("Mean Activations Across BERT Layers") 32 | plt.xlabel("Layer") 33 | plt.ylabel("Mean Activation Value") 34 | plt.grid(True) 35 | plt.show() 36 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Attention-based Explanation for Time Series.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Attention-based Explanation for Time Series.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/Images/DTW.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/DTW.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Decision Plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Decision Plot.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/Images/DeepLIFT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/DeepLIFT.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Feature Importance Analysis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Feature Importance Analysis.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Feature Interaction Detection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Feature Interaction Detection.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Feature Interaction Heatmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Feature Interaction Heatmap.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Grad-CAM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Grad-CAM.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/Images/ICE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/ICE.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Integrated Gradients 1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Integrated Gradients 1.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Integrated Gradients.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Integrated Gradients.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Local Interpretable Model-agnostic Explanations (LIME).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Local Interpretable Model-agnostic Explanations (LIME).png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/Images/PDPs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/PDPs.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/Images/SHAP Dependence.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/SHAP Dependence.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Saliency Maps for Recurrent Neural Networks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Saliency Maps for Recurrent Neural Networks.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Saliency Maps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Saliency Maps.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Shapley Additive Explanations (SHAP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/Shapley Additive Explanations (SHAP.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/Images/SmoothGrad.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/SmoothGrad.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/Images/TimeSHAP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/TimeSHAP.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/Images/shap_force_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/Images/shap_force_plot.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/attention_based_explanation_time_series.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import matplotlib.pyplot as plt 4 | 5 | # Generate synthetic time series data 6 | np.random.seed(0) 7 | time_steps = 100 8 | X = np.sin(np.linspace(0, 2 * np.pi, time_steps)) + np.random.normal(0, 0.1, time_steps) 9 | y = np.roll(X, -1) 10 | 11 | # Reshape data to fit LSTM input 12 | X_input = X.reshape((1, time_steps, 1)) 13 | y_input = y.reshape((1, time_steps, 1)) 14 | 15 | # Define the Attention Layer 16 | class AttentionLayer(tf.keras.layers.Layer): 17 | def __init__(self): 18 | super(AttentionLayer, self).__init__() 19 | self.score_layer = tf.keras.layers.Dense(1, activation='tanh') 20 | 21 | def call(self, x): 22 | score = self.score_layer(x) 23 | attention_weights = tf.nn.softmax(score, axis=1) 24 | context_vector = attention_weights * x 25 | context_vector = tf.reduce_sum(context_vector, axis=1) 26 | return context_vector, attention_weights 27 | 28 | # Build the model 29 | inputs = tf.keras.Input(shape=(time_steps, 1)) 30 | lstm_output = tf.keras.layers.LSTM(50, return_sequences=True)(inputs) 31 | context_vector, attention_weights = AttentionLayer()(lstm_output) 32 | outputs = tf.keras.layers.Dense(1)(context_vector) 33 | model = tf.keras.Model(inputs=inputs, outputs=outputs) 34 | 35 | # Compile and train the model 36 | model.compile(optimizer='adam', loss='mse') 37 | model.fit(X_input, y_input[:, -1, :], epochs=10, verbose=0) # Adjust target values to match output shape 38 | 39 | # Create a model to output predictions and attention weights 40 | attention_model = tf.keras.Model(inputs=inputs, outputs=[outputs, attention_weights]) 41 | 42 | # Get predictions and attention weights 43 | prediction, att_weights = attention_model.predict(X_input) 44 | 45 | # Plot the original data, predicted values, and attention weights 46 | plt.figure(figsize=(12, 6)) 47 | 48 | # Plot original data and predicted values on the left y-axis 49 | ax1 = plt.gca() 50 | ax1.plot(np.arange(time_steps), X, label='Original Data', color='blue') 51 | ax1.plot(time_steps - 1, prediction[0], 'ro', label='Predicted Value') 52 | ax1.set_xlabel('Time Step') 53 | ax1.set_ylabel('Data Value') 54 | ax1.legend(loc='upper left') 55 | 56 | # Plot attention weights on the right y-axis 57 | ax2 = ax1.twinx() 58 | ax2.plot(np.arange(time_steps), att_weights[0, :, 0], label='Attention Weights', color='green', alpha=0.5) 59 | ax2.set_ylabel('Attention Weights') 60 | ax2.legend(loc='upper right') 61 | 62 | plt.title('Original Data, Predicted Values, and Attention Weights') 63 | plt.show() 64 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/decision_plot.py: -------------------------------------------------------------------------------- 1 | import shap 2 | import numpy as np 3 | from sklearn.datasets import fetch_california_housing 4 | from sklearn.ensemble import GradientBoostingRegressor 5 | 6 | # Load the California housing dataset 7 | data = fetch_california_housing() 8 | X, y = data.data, data.target 9 | 10 | # Train the model 11 | model = GradientBoostingRegressor(n_estimators=100, random_state=42) 12 | model.fit(X, y) 13 | 14 | # Create a SHAP explainer 15 | explainer = shap.TreeExplainer(model) 16 | shap_values = explainer.shap_values(X) 17 | 18 | # Subsample the data (e.g., 1000 observations) 19 | subset_size = 1000 20 | random_indices = np.random.choice(X.shape[0], subset_size, replace=False) 21 | X_subset = X[random_indices] 22 | shap_values_subset = shap_values[random_indices] 23 | 24 | # Generate the decision plot with subsampled data 25 | shap.decision_plot(explainer.expected_value, shap_values_subset, X_subset, feature_names=data.feature_names) 26 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/deep_lift.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | from captum.attr import DeepLift 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from torchvision import datasets, transforms 8 | from torch.utils.data import DataLoader 9 | 10 | # Convert TensorFlow data to PyTorch format 11 | train_transform = transforms.Compose([transforms.ToTensor()]) 12 | test_transform = transforms.Compose([transforms.ToTensor()]) 13 | 14 | # Load the MNIST dataset using torchvision 15 | train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=train_transform) 16 | test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=test_transform) 17 | 18 | train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) 19 | test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True) 20 | 21 | # Define a simple CNN model in PyTorch 22 | class SimpleCNN(nn.Module): 23 | def __init__(self): 24 | super(SimpleCNN, self).__init__() 25 | self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1) 26 | self.pool = nn.MaxPool2d(2, 2) 27 | self.fc1 = nn.Linear(32 * 14 * 14, 128) 28 | self.fc2 = nn.Linear(128, 10) 29 | 30 | def forward(self, x): 31 | x = torch.relu(self.conv1(x)) 32 | x = self.pool(x) 33 | x = x.view(-1, 32 * 14 * 14) 34 | x = torch.relu(self.fc1(x)) 35 | x = torch.softmax(self.fc2(x), dim=1) 36 | return x 37 | 38 | # Initialize the model, loss function, and optimizer 39 | model = SimpleCNN() 40 | criterion = nn.CrossEntropyLoss() 41 | optimizer = optim.Adam(model.parameters(), lr=0.001) 42 | 43 | # Train the model for one epoch (for simplicity) 44 | for images, labels in train_loader: 45 | optimizer.zero_grad() 46 | outputs = model(images) 47 | loss = criterion(outputs, labels) 48 | loss.backward() 49 | optimizer.step() 50 | break # Train on one batch for demonstration 51 | 52 | # Select a sample image and its baseline 53 | sample_image, sample_label = next(iter(test_loader)) 54 | baseline = torch.zeros_like(sample_image) 55 | 56 | # Compute DeepLIFT attributions with the target label 57 | dl = DeepLift(model) 58 | attributions = dl.attribute(sample_image, baseline, target=sample_label.item()) 59 | 60 | # Visualize the attributions 61 | attributions = attributions.detach().numpy().squeeze() 62 | plt.imshow(attributions, cmap='hot', interpolation='nearest') 63 | plt.colorbar() 64 | plt.title("DeepLIFT Attribution for MNIST Prediction") 65 | plt.show() 66 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/dtw.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from fastdtw import fastdtw 4 | 5 | # Generate synthetic time-series data 6 | time_series = np.sin(np.linspace(0, 2 * np.pi, 100)).flatten() # Flatten to 1-D 7 | reference_series = np.sin(np.linspace(0, 2 * np.pi, 120) + 0.5).flatten() # Flatten to 1-D 8 | 9 | # Ensure both time_series and reference_series are 1-D 10 | print(f"Time series shape: {time_series.shape}, Reference series shape: {reference_series.shape}") 11 | 12 | # Define custom distance function for scalar values 13 | def scalar_distance(u, v): 14 | return abs(u - v) 15 | 16 | # Apply DTW to align the sequences using the custom distance function 17 | distance, path = fastdtw(time_series, reference_series, dist=scalar_distance) 18 | 19 | # Plot the aligned sequences and highlight the warping path 20 | plt.figure(figsize=(10, 5)) 21 | plt.plot(time_series, label="Time Series") 22 | plt.plot( 23 | np.interp(np.linspace(0, 100, 120), np.arange(120), reference_series), 24 | label="Reference Series", 25 | alpha=0.7 26 | ) 27 | 28 | # Highlight the warping path 29 | for (i, j) in path: 30 | plt.plot([i, j * 100 / 120], [time_series[i], reference_series[j]], color='gray', alpha=0.5) 31 | 32 | plt.title(f"DTW Alignment (Distance: {distance:.2f})") 33 | plt.xlabel("Time Index") 34 | plt.ylabel("Value") 35 | plt.legend() 36 | plt.grid() 37 | plt.show() 38 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/feature_importance_analysis.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import load_iris 2 | from sklearn.tree import DecisionTreeClassifier 3 | import matplotlib.pyplot as plt 4 | 5 | # Load the Iris dataset 6 | data = load_iris() 7 | X, y = data.data, data.target 8 | 9 | # Train a Decision Tree Classifier 10 | model = DecisionTreeClassifier(random_state=42) 11 | model.fit(X, y) 12 | 13 | # Extract feature importance 14 | importances = model.feature_importances_ 15 | feature_names = data.feature_names 16 | 17 | # Plot feature importance 18 | plt.figure(figsize=(8, 6)) 19 | plt.barh(feature_names, importances, color='skyblue') 20 | plt.xlabel("Feature Importance Score") 21 | plt.ylabel("Features") 22 | plt.title("Feature Importance Analysis for Decision Tree Classifier") 23 | plt.show() -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/feature_interaction_detection.py: -------------------------------------------------------------------------------- 1 | import shap 2 | import numpy as np 3 | from sklearn.datasets import fetch_california_housing 4 | from sklearn.ensemble import GradientBoostingRegressor 5 | import matplotlib.pyplot as plt 6 | 7 | # Load the California housing dataset 8 | data = fetch_california_housing() 9 | X, y = data.data, data.target 10 | feature_names = data.feature_names 11 | 12 | # Train the model 13 | model = GradientBoostingRegressor(n_estimators=100, random_state=42) 14 | model.fit(X, y) 15 | 16 | # Create SHAP explainer and compute SHAP values 17 | explainer = shap.TreeExplainer(model) 18 | shap_values = explainer.shap_values(X) 19 | 20 | # Compute SHAP interaction values 21 | shap_interaction_values = explainer.shap_interaction_values(X) 22 | 23 | # Plot the SHAP interaction values (summary plot) 24 | shap.summary_plot(shap_interaction_values, X, feature_names=feature_names, plot_type="compact_dot") 25 | 26 | # Visualize a specific feature pair interaction 27 | shap.dependence_plot(("MedInc", "AveRooms"), shap_interaction_values, X, feature_names=feature_names, interaction_index="AveRooms") 28 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/feature_interaction_heatmap.py: -------------------------------------------------------------------------------- 1 | import shap 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from sklearn.datasets import fetch_california_housing 5 | from sklearn.ensemble import GradientBoostingRegressor 6 | 7 | # Load the California Housing dataset 8 | data = fetch_california_housing() 9 | X, y = data.data, data.target 10 | feature_names = data.feature_names 11 | 12 | # Train a Gradient Boosting Regressor 13 | model = GradientBoostingRegressor(n_estimators=100, random_state=42) 14 | model.fit(X, y) 15 | 16 | # Compute SHAP interaction values 17 | explainer = shap.TreeExplainer(model) 18 | shap_interaction_values = explainer.shap_interaction_values(X) 19 | 20 | # Extract the main SHAP values from the interaction matrix (diagonal elements only) 21 | shap_values_main = np.array([shap_interaction_values[i][:, i] for i in range(X.shape[1])]).T 22 | 23 | # Plot the Feature Interaction Heatmap 24 | shap.summary_plot(shap_values_main, X, feature_names=feature_names, plot_type="bar") 25 | plt.title("Feature Importance for California Housing Dataset") 26 | plt.show() 27 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/grad_cam.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from tensorflow.keras.applications import VGG16 5 | from tensorflow.keras.applications.vgg16 import preprocess_input, decode_predictions 6 | from tensorflow.keras.preprocessing.image import img_to_array, load_img 7 | import cv2 # 新增 OpenCV 用於調整 heatmap 大小 8 | 9 | # Load a pre-trained VGG16 model 10 | model = VGG16(weights='imagenet') 11 | 12 | # Load and preprocess the input image 13 | image_path = 'Ch04/Images/cat.jpg' 14 | image = load_img(image_path, target_size=(224, 224)) 15 | 16 | # Convert the image to an array and preprocess it 17 | image_array = img_to_array(image) 18 | image_array = np.expand_dims(image_array, axis=0) 19 | image_array = preprocess_input(image_array) 20 | 21 | # Get the model prediction 22 | predictions = model.predict(image_array) 23 | predicted_class = np.argmax(predictions[0]) 24 | 25 | # Function to compute Grad-CAM heatmap 26 | def compute_gradcam(model, image_array, class_idx, layer_name='block5_conv3'): 27 | # Create a model that maps the input image to the activations of the last convolutional layer 28 | # and the model's output 29 | grad_model = tf.keras.models.Model( 30 | [model.inputs], [model.get_layer(layer_name).output, model.output] 31 | ) 32 | 33 | # Record operations for automatic differentiation 34 | with tf.GradientTape() as tape: 35 | conv_output, predictions = grad_model(image_array) 36 | loss = predictions[:, class_idx] 37 | 38 | # Compute gradients with respect to the convolutional output 39 | grads = tape.gradient(loss, conv_output) 40 | 41 | # Compute the mean intensity of the gradients for each channel 42 | pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2)) 43 | 44 | # Extract the feature maps from the convolutional layer output 45 | conv_output = conv_output[0] 46 | 47 | # Compute the weighted sum of the feature maps 48 | heatmap = tf.reduce_sum(tf.multiply(pooled_grads, conv_output), axis=-1) 49 | 50 | # Apply ReLU and normalize the heatmap 51 | heatmap = np.maximum(heatmap, 0) 52 | heatmap /= np.max(heatmap) 53 | 54 | return heatmap 55 | 56 | # Generate the Grad-CAM heatmap 57 | heatmap = compute_gradcam(model, image_array, predicted_class) 58 | 59 | # Resize heatmap to match the input image size 60 | heatmap = cv2.resize(heatmap, (224, 224)) # 使用 OpenCV 調整大小 61 | heatmap = np.uint8(255 * heatmap) # 將 heatmap 轉換為 0-255 的範圍 62 | 63 | # 將 heatmap 轉換為彩色映射 64 | heatmap_color = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) 65 | 66 | # 疊加 heatmap 和原始圖像 67 | superimposed_img = cv2.addWeighted(cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR), 0.6, heatmap_color, 0.4, 0) 68 | 69 | # Display the original image, heatmap, and overlay 70 | fig, ax = plt.subplots(1, 3, figsize=(18, 6)) 71 | 72 | # Display original image 73 | ax[0].imshow(image) 74 | ax[0].axis('off') 75 | ax[0].set_title("Original Image") 76 | 77 | # Display the heatmap only 78 | ax[1].imshow(heatmap, cmap='jet') 79 | ax[1].axis('off') 80 | ax[1].set_title("Grad-CAM Heatmap") 81 | 82 | # Display the overlay image 83 | ax[2].imshow(cv2.cvtColor(superimposed_img, cv2.COLOR_BGR2RGB)) 84 | ax[2].axis('off') 85 | ax[2].set_title("Overlay Image") 86 | 87 | # Show the plot 88 | plt.tight_layout() 89 | plt.show() 90 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/ice.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from sklearn.ensemble import RandomForestRegressor 4 | from sklearn.datasets import fetch_california_housing 5 | from sklearn.inspection import PartialDependenceDisplay 6 | 7 | # Load the California Housing dataset 8 | data = fetch_california_housing() 9 | X, y = data.data, data.target 10 | 11 | # Train a Random Forest model 12 | model = RandomForestRegressor(n_estimators=100, random_state=42) 13 | model.fit(X, y) 14 | 15 | # Create ICE Plot for the feature "MedInc" (Median Income) 16 | fig, ax = plt.subplots(figsize=(10, 6)) 17 | display = PartialDependenceDisplay.from_estimator( 18 | model, X, features=[0], kind="individual", ax=ax, feature_names=data.feature_names 19 | ) 20 | ax.set_title("ICE Plot for MedInc (Median Income)") 21 | ax.set_xlabel("MedInc (Median Income)") 22 | ax.set_ylabel("Predicted House Price") 23 | plt.show() 24 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/integrated_gradients.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | # Define a simple CNN model in TensorFlow/Keras 6 | class SimpleCNN(tf.keras.Model): 7 | def __init__(self): 8 | super(SimpleCNN, self).__init__() 9 | self.conv1 = tf.keras.layers.Conv2D(32, (3, 3), padding="same", activation="relu") 10 | self.conv2 = tf.keras.layers.Conv2D(64, (3, 3), padding="same", activation="relu") 11 | self.flatten = tf.keras.layers.Flatten() 12 | self.fc1 = tf.keras.layers.Dense(128, activation="relu") 13 | self.fc2 = tf.keras.layers.Dense(10, activation="softmax") 14 | 15 | def call(self, x): 16 | x = self.conv1(x) 17 | x = self.conv2(x) 18 | x = self.flatten(x) 19 | x = self.fc1(x) 20 | return self.fc2(x) 21 | 22 | # Load the MNIST dataset 23 | (train_images, train_labels), _ = tf.keras.datasets.mnist.load_data() 24 | train_images = train_images[..., tf.newaxis] / 255.0 # Rescale to [0, 1] 25 | 26 | # Initialize the model, loss function, and optimizer 27 | model = SimpleCNN() 28 | model.compile(optimizer=tf.keras.optimizers.Adam(0.001), 29 | loss=tf.keras.losses.SparseCategoricalCrossentropy(), 30 | metrics=["accuracy"]) 31 | 32 | # Train the model (one epoch for simplicity) 33 | model.fit(train_images, train_labels, batch_size=64, epochs=1) 34 | 35 | # Select a sample image and baseline for Integrated Gradients 36 | sample_image = train_images[0:1] # Shape (1, 28, 28, 1) 37 | baseline = tf.zeros_like(sample_image) 38 | 39 | # Function to calculate Integrated Gradients 40 | def compute_integrated_gradients(model, input_image, baseline, target_class_idx, m_steps=50): 41 | # Generate interpolated images between baseline and input 42 | interpolated_images = [ 43 | baseline + (float(i) / m_steps) * (input_image - baseline) 44 | for i in range(m_steps + 1) 45 | ] 46 | interpolated_images = tf.concat(interpolated_images, axis=0) 47 | 48 | with tf.GradientTape() as tape: 49 | tape.watch(interpolated_images) 50 | # Get model predictions for interpolated images 51 | predictions = model(interpolated_images) 52 | target_predictions = predictions[:, target_class_idx] 53 | 54 | # Compute gradients between predictions and interpolated images 55 | grads = tape.gradient(target_predictions, interpolated_images) 56 | 57 | # Average gradients and compute attributions 58 | avg_grads = tf.reduce_mean(grads, axis=0) 59 | integrated_grads = (input_image - baseline) * avg_grads 60 | return integrated_grads 61 | 62 | # Compute attributions using Integrated Gradients 63 | target_class = train_labels[0] # The true class for the sample image 64 | attributions = compute_integrated_gradients(model, sample_image, baseline, target_class) 65 | 66 | # Visualize the attributions 67 | attributions = attributions.numpy().squeeze() 68 | plt.imshow(attributions, cmap="hot", interpolation="nearest") 69 | plt.colorbar() 70 | plt.title("Integrated Gradients Attribution for MNIST Prediction") 71 | plt.show() 72 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/local_interpretable_model_agnostic_explanations.py: -------------------------------------------------------------------------------- 1 | import lime 2 | import lime.lime_text 3 | from sklearn.pipeline import make_pipeline 4 | from sklearn.feature_extraction.text import TfidfVectorizer 5 | from sklearn.linear_model import LogisticRegression 6 | from sklearn.datasets import load_files 7 | from sklearn.model_selection import train_test_split 8 | import numpy as np 9 | 10 | # Specify the path to the IMDB dataset. Ensure the dataset is downloaded and located in this path. 11 | # The IMDB dataset can be downloaded from: 12 | # https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz 13 | 14 | # 1. Load the IMDB dataset 15 | data = load_files("aclImdb/train", categories=["pos", "neg"], encoding="utf-8", decode_error="replace") 16 | X = np.array(data.data) 17 | y = data.target # Directly use data.target without further transformation 18 | 19 | # 2. Split the dataset 20 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) 21 | 22 | # 3. Create the text classification pipeline 23 | vectorizer = TfidfVectorizer() 24 | classifier = LogisticRegression(random_state=42, max_iter=1000) 25 | pipeline = make_pipeline(vectorizer, classifier) 26 | 27 | # 4. Train the model 28 | pipeline.fit(X_train, y_train) 29 | 30 | # 5. Initialize the LIME explainer 31 | explainer = lime.lime_text.LimeTextExplainer(class_names=["NEGATIVE", "POSITIVE"]) 32 | 33 | # 6. Choose an instance to explain 34 | text_instance = X_test[0] 35 | exp = explainer.explain_instance(text_instance, pipeline.predict_proba, num_features=10) 36 | 37 | # 7. Save the explanation as an HTML file 38 | exp.save_to_file('lime_explanation.html') 39 | print("LIME explanation saved as 'lime_explanation.html'") 40 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/lrp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | # Create XOR dataset 7 | X = torch.Tensor([[0,0], [0,1], [1,0], [1,1]]) 8 | y = torch.Tensor([[0], [1], [1], [0]]) 9 | 10 | # Define a simple neural network 11 | class SimpleNN(nn.Module): 12 | def __init__(self): 13 | super(SimpleNN, self).__init__() 14 | self.fc1 = nn.Linear(2, 4) # First layer with 4 neurons 15 | self.fc2 = nn.Linear(4, 1) # Output layer 16 | 17 | def forward(self, x): 18 | x = F.relu(self.fc1(x)) 19 | x = torch.sigmoid(self.fc2(x)) 20 | return x 21 | 22 | # Initialize the model, loss function, and optimizer 23 | model = SimpleNN() 24 | criterion = nn.BCELoss() 25 | optimizer = torch.optim.SGD(model.parameters(), lr=0.1) 26 | 27 | # Train the model 28 | for epoch in range(1000): 29 | optimizer.zero_grad() 30 | outputs = model(X) 31 | loss = criterion(outputs, y) 32 | loss.backward() 33 | optimizer.step() 34 | if (epoch + 1) % 200 == 0: 35 | print(f'Epoch [{epoch + 1}/1000], Loss: {loss.item():.4f}') 36 | 37 | # Select an input for explanation 38 | x_input = torch.Tensor([[1.0, 1.0]]) 39 | output = model(x_input) 40 | print(f'\nPrediction for input {x_input.numpy()}: {output.item():.4f}') 41 | 42 | # Forward pass, recording intermediate activations 43 | x0 = x_input.detach() 44 | z1 = model.fc1(x0) 45 | a1 = F.relu(z1) 46 | z2 = model.fc2(a1) 47 | a2 = torch.sigmoid(z2) 48 | 49 | # LRP parameters 50 | epsilon = 1e-6 51 | 52 | # Calculate relevance R2 at the output layer 53 | R2 = a2.item() # Get scalar value 54 | 55 | # Propagate relevance from the output layer to the hidden layer 56 | w2 = model.fc2.weight.data.squeeze() # Shape becomes [4] 57 | a1 = a1.detach().squeeze() # Shape becomes [4] 58 | z = a1 * w2 # Element-wise multiplication, shape [4] 59 | s = z.sum() 60 | denominator = s + epsilon * s.sign() 61 | R1 = (z / denominator) * R2 # Shape is [4] 62 | 63 | # Propagate relevance from the hidden layer to the input layer 64 | w1 = model.fc1.weight.data # Shape is [4, 2] 65 | x0 = x0.detach().squeeze() # Shape is [2] 66 | R0 = torch.zeros_like(x0) # Shape is [2] 67 | 68 | # Iterate over each neuron in the hidden layer 69 | for i in range(w1.shape[0]): 70 | w = w1[i] # Shape is [2] 71 | z = x0 * w # Element-wise multiplication, shape [2] 72 | s = z.sum() 73 | denominator = s + epsilon * s.sign() 74 | R0 += (z / denominator) * R1[i].item() # Convert R1[i] to scalar 75 | 76 | # Output the relevance scores 77 | print(f'\nInput relevance scores: {R0}') 78 | print(f'Sum of input relevances: {R0.sum().item():.4f}') 79 | print(f'Output relevance: {R2:.4f}') 80 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/pdps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from sklearn.ensemble import RandomForestRegressor 4 | from sklearn.datasets import fetch_california_housing 5 | from sklearn.inspection import PartialDependenceDisplay 6 | 7 | # Load the California Housing dataset 8 | data = fetch_california_housing() 9 | X, y = data.data, data.target 10 | 11 | # Train a Random Forest model 12 | model = RandomForestRegressor(n_estimators=100, random_state=42) 13 | model.fit(X, y) 14 | 15 | # Plot Partial Dependence for the feature "MedInc" (Median Income) 16 | fig, ax = plt.subplots(figsize=(8, 6)) 17 | PartialDependenceDisplay.from_estimator(model, X, [0], feature_names=data.feature_names, ax=ax) 18 | ax.set_title("Partial Dependence Plot for MedInc (Median Income)") 19 | ax.set_xlabel("MedInc (Median Income)") 20 | ax.set_ylabel("Predicted House Price") 21 | plt.show() 22 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/saliency_Maps_rnn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import matplotlib.pyplot as plt 4 | 5 | # Generate synthetic time series data 6 | np.random.seed(42) 7 | time_steps = 100 8 | X = np.sin(np.linspace(0, 4 * np.pi, time_steps)) + np.random.normal(0, 0.1, time_steps) 9 | y = np.roll(X, -1) 10 | 11 | # Reshape data to fit LSTM input 12 | X_input = X.reshape((1, time_steps, 1)) 13 | y_input = y.reshape((1, time_steps, 1)) 14 | 15 | # Define LSTM model to predict the entire sequence 16 | inputs = tf.keras.Input(shape=(time_steps, 1)) 17 | lstm_output = tf.keras.layers.LSTM(50, return_sequences=True)(inputs) 18 | outputs = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1))(lstm_output) 19 | model = tf.keras.Model(inputs=inputs, outputs=outputs) 20 | 21 | # Compile and train the model 22 | model.compile(optimizer='adam', loss='mse') 23 | model.fit(X_input, y_input, epochs=10, verbose=0) 24 | 25 | # Convert X and y to TensorFlow tensors 26 | X_tensor = tf.convert_to_tensor(X_input, dtype=tf.float32) 27 | y_tensor = tf.convert_to_tensor(y_input, dtype=tf.float32) 28 | 29 | # Compute the saliency map 30 | with tf.GradientTape() as tape: 31 | tape.watch(X_tensor) 32 | predictions = model(X_tensor) 33 | # Use MeanSquaredError loss object 34 | loss_object = tf.keras.losses.MeanSquaredError() 35 | loss = tf.reduce_mean(loss_object(y_tensor, predictions)) 36 | 37 | # Compute the gradients 38 | grads = tape.gradient(loss, X_tensor) 39 | grads = grads.numpy()[0, :, 0] # Extract gradients for each time step 40 | 41 | # Plot the original data, predicted data, and saliency map 42 | fig, ax1 = plt.subplots(figsize=(12, 6)) 43 | 44 | # Plot the original data and predicted data on the left y-axis 45 | color = 'tab:blue' 46 | ax1.set_xlabel('Time Step') 47 | ax1.set_ylabel('Data Value', color=color) 48 | ax1.plot(X_input[0, :, 0], color='blue', label='Original Data') 49 | ax1.plot(predictions.numpy()[0, :, 0], color='orange', label='Predicted Data') 50 | ax1.tick_params(axis='y', labelcolor=color) 51 | ax1.legend(loc='upper left') 52 | 53 | # Plot the saliency map on the right y-axis 54 | ax2 = ax1.twinx() 55 | color = 'tab:red' 56 | ax2.set_ylabel('Gradient Magnitude', color=color) 57 | ax2.plot(np.abs(grads), color=color, label='Saliency Map') 58 | ax2.tick_params(axis='y', labelcolor=color) 59 | ax2.legend(loc='upper right') 60 | 61 | plt.title('Original Data, Predicted Data, and Saliency Map') 62 | plt.show() 63 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/saliency_maps.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | # Load the MNIST dataset 6 | (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data() 7 | train_images = train_images[..., np.newaxis] / 255.0 8 | test_images = test_images[..., np.newaxis] / 255.0 9 | 10 | # Define a simple CNN model 11 | model = tf.keras.Sequential([ 12 | tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)), 13 | tf.keras.layers.MaxPooling2D((2, 2)), 14 | tf.keras.layers.Flatten(), 15 | tf.keras.layers.Dense(128, activation='relu'), 16 | tf.keras.layers.Dense(10, activation='softmax') 17 | ]) 18 | 19 | # Compile and train the model 20 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) 21 | model.fit(train_images, train_labels, epochs=1, batch_size=64) 22 | 23 | # Function to compute the saliency map 24 | def compute_saliency_map(model, image, target_class): 25 | image = tf.convert_to_tensor(image[np.newaxis, ...], dtype=tf.float32) 26 | 27 | with tf.GradientTape() as tape: 28 | tape.watch(image) 29 | predictions = model(image) 30 | loss = predictions[0, target_class] 31 | 32 | # Compute the gradient of the loss with respect to the input image 33 | gradient = tape.gradient(loss, image) 34 | saliency = tf.abs(gradient)[0] 35 | 36 | # Normalize the saliency map 37 | saliency = saliency.numpy().squeeze() 38 | saliency = (saliency - saliency.min()) / (saliency.max() - saliency.min()) 39 | 40 | return saliency 41 | 42 | # Select a sample image and compute the saliency map 43 | sample_image = test_images[0] 44 | target_class = np.argmax(model.predict(sample_image[np.newaxis, ...])) 45 | saliency_map = compute_saliency_map(model, sample_image, target_class) 46 | 47 | # Visualize the original image and its saliency map 48 | plt.subplot(1, 2, 1) 49 | plt.imshow(sample_image.squeeze(), cmap='gray') 50 | plt.title("Original Image") 51 | 52 | plt.subplot(1, 2, 2) 53 | plt.imshow(saliency_map, cmap='hot') 54 | plt.title("Saliency Map") 55 | plt.colorbar() 56 | plt.show() -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/shap_dependence.py: -------------------------------------------------------------------------------- 1 | import shap 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from sklearn.ensemble import GradientBoostingRegressor 5 | from sklearn.datasets import fetch_california_housing 6 | 7 | # Load the California Housing dataset 8 | data = fetch_california_housing() 9 | X, y = data.data, data.target 10 | feature_names = data.feature_names 11 | 12 | # Train a Gradient Boosting model 13 | model = GradientBoostingRegressor(n_estimators=100, random_state=42) 14 | model.fit(X, y) 15 | 16 | # Compute SHAP values 17 | explainer = shap.Explainer(model, X) 18 | shap_values = explainer(X) 19 | 20 | # Extract SHAP values for dependence plot 21 | # Using `shap_values.values` to get the actual SHAP values as an array 22 | shap.dependence_plot("AveRooms", shap_values.values, X, feature_names=feature_names) 23 | plt.title("SHAP Dependence Plot for AveRooms") 24 | plt.xlabel("Average Number of Rooms (AveRooms)") 25 | plt.ylabel("SHAP Value (Impact on Model Output)") 26 | plt.show() 27 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/shap_force_plot.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 5, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import shap\n", 10 | "import numpy as np\n", 11 | "from sklearn.datasets import fetch_california_housing\n", 12 | "from sklearn.ensemble import GradientBoostingRegressor\n", 13 | "\n", 14 | "# Load the California housing dataset\n", 15 | "data = fetch_california_housing()\n", 16 | "X, y = data.data, data.target\n", 17 | "\n", 18 | "# Train the model\n", 19 | "model = GradientBoostingRegressor(n_estimators=100, random_state=42)\n", 20 | "model.fit(X, y)\n", 21 | "\n", 22 | "# Create a SHAP explainer\n", 23 | "explainer = shap.TreeExplainer(model)\n", 24 | "shap_values = explainer.shap_values(X)\n", 25 | "\n", 26 | "# Select an instance to explain\n", 27 | "instance_index = 0\n", 28 | "\n", 29 | "# Create the force plot\n", 30 | "force_plot = shap.force_plot(explainer.expected_value, shap_values[instance_index], X[instance_index], feature_names=data.feature_names)\n", 31 | "\n", 32 | "# Save the force plot as an HTML file\n", 33 | "shap.save_html(\"shap_force_plot.html\", force_plot)" 34 | ] 35 | } 36 | ], 37 | "metadata": { 38 | "kernelspec": { 39 | "display_name": ".venv", 40 | "language": "python", 41 | "name": "python3" 42 | }, 43 | "language_info": { 44 | "codemirror_mode": { 45 | "name": "ipython", 46 | "version": 3 47 | }, 48 | "file_extension": ".py", 49 | "mimetype": "text/x-python", 50 | "name": "python", 51 | "nbconvert_exporter": "python", 52 | "pygments_lexer": "ipython3", 53 | "version": "3.9.9" 54 | } 55 | }, 56 | "nbformat": 4, 57 | "nbformat_minor": 2 58 | } 59 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/shap_force_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/610_Post-hoc/shap_force_plot.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/shapley_additive_explanations.py: -------------------------------------------------------------------------------- 1 | import shap 2 | import xgboost as xgb 3 | import pandas as pd 4 | from sklearn.datasets import load_breast_cancer 5 | from sklearn.model_selection import train_test_split 6 | 7 | # Load the Breast Cancer dataset 8 | data = load_breast_cancer() 9 | X = pd.DataFrame(data.data, columns=data.feature_names) 10 | y = data.target 11 | 12 | # Split the dataset 13 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 14 | 15 | # Train a gradient boosting model 16 | model = xgb.XGBClassifier(random_state=42) 17 | model.fit(X_train, y_train) 18 | 19 | # Initialize the SHAP explainer 20 | explainer = shap.Explainer(model, X_train) 21 | shap_values = explainer(X_test) 22 | 23 | # Plot the SHAP summary plot 24 | shap.summary_plot(shap_values, X_test) -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/smoothgrad.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | # Load the MNIST dataset 6 | (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data() 7 | train_images = train_images[..., np.newaxis] / 255.0 8 | test_images = test_images[..., np.newaxis] / 255.0 9 | 10 | # Define a simple CNN model 11 | model = tf.keras.Sequential([ 12 | tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)), 13 | tf.keras.layers.MaxPooling2D((2, 2)), 14 | tf.keras.layers.Flatten(), 15 | tf.keras.layers.Dense(128, activation='relu'), 16 | tf.keras.layers.Dense(10, activation='softmax') 17 | ]) 18 | 19 | # Compile and train the model 20 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) 21 | model.fit(train_images, train_labels, epochs=1, batch_size=64) 22 | 23 | # Function to compute SmoothGrad 24 | def smoothgrad(image, model, target_class, num_samples=50, noise_level=0.1): 25 | grads = [] 26 | for _ in range(num_samples): 27 | noise = np.random.normal(0, noise_level, image.shape) 28 | noisy_image = image + noise 29 | noisy_image = tf.convert_to_tensor(noisy_image[np.newaxis, ...], dtype=tf.float32) 30 | 31 | with tf.GradientTape() as tape: 32 | tape.watch(noisy_image) 33 | prediction = model(noisy_image) 34 | loss = prediction[0, target_class] 35 | 36 | gradient = tape.gradient(loss, noisy_image) 37 | grads.append(gradient.numpy().squeeze()) 38 | 39 | # Average the gradients 40 | smooth_grad = np.mean(grads, axis=0) 41 | return smooth_grad 42 | 43 | # Select a sample image and compute SmoothGrad attributions 44 | sample_image = test_images[0] 45 | target_class = np.argmax(model.predict(sample_image[np.newaxis, ...])) 46 | attributions = smoothgrad(sample_image, model, target_class) 47 | 48 | # Visualize the attributions 49 | plt.imshow(attributions, cmap='hot', interpolation='nearest') 50 | plt.colorbar() 51 | plt.title("SmoothGrad Attribution for MNIST Prediction") 52 | plt.show() -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/610_Post-hoc/timeshap.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import shap 4 | 5 | # Generate synthetic time-series data 6 | time_series_data = np.random.rand(100, 10, 1) # 100 samples, 10 time steps, 1 feature 7 | labels = np.random.randint(0, 2, size=(100,)) 8 | 9 | # Define a simple LSTM model 10 | model = tf.keras.Sequential([ 11 | tf.keras.layers.Input(shape=(10, 1)), 12 | tf.keras.layers.LSTM(50), 13 | tf.keras.layers.Dense(1, activation='sigmoid') 14 | ]) 15 | model.compile(optimizer='adam', loss='binary_crossentropy') 16 | model.fit(time_series_data, labels, epochs=5, verbose=0) 17 | 18 | # Define the prediction function for SHAP 19 | def predict_fn(data): 20 | # Reshape input data back to 3D for the LSTM model 21 | return model.predict(data.reshape(-1, 10, 1)).flatten() 22 | 23 | # Select a background dataset (subset of training data) 24 | background_data = time_series_data[:50].reshape(50, -1) # Flatten to 2D 25 | 26 | # Initialize SHAP KernelExplainer 27 | explainer = shap.KernelExplainer(predict_fn, background_data) 28 | 29 | # Select an instance to explain and flatten it 30 | instance = time_series_data[0:1].reshape(1, -1) # Flatten to 2D 31 | 32 | # Compute SHAP values 33 | shap_values = explainer.shap_values(instance, nsamples=100) 34 | 35 | # Display SHAP values 36 | print("SHAP values for each flattened feature:", shap_values) 37 | 38 | # Visualization of SHAP values 39 | import matplotlib.pyplot as plt 40 | 41 | # Plot SHAP values for the first instance 42 | plt.bar(range(len(shap_values[0])), shap_values[0]) 43 | plt.xlabel('Flattened Feature Index') 44 | plt.ylabel('SHAP Value') 45 | plt.title('SHAP Values for Time-Series Instance') 46 | plt.show() 47 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/620_Causal-Inference/Images/Causal Discovery.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/620_Causal-Inference/Images/Causal Discovery.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/620_Causal-Inference/causal_discovery.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from causallearn.search.ConstraintBased.PC import pc 3 | from causallearn.utils.GraphUtils import GraphUtils 4 | import matplotlib.pyplot as plt 5 | import networkx as nx 6 | 7 | # Generate synthetic data 8 | np.random.seed(42) 9 | n_samples = 1000 10 | X = np.random.randn(n_samples) 11 | Y = 0.5 * X + np.random.randn(n_samples) * 0.1 12 | Z = 0.3 * X + 0.4 * Y + np.random.randn(n_samples) * 0.1 13 | data = np.column_stack((X, Y, Z)) 14 | 15 | # Define variable names 16 | variable_names = ['X', 'Y', 'Z'] 17 | 18 | # Apply PC algorithm for causal discovery 19 | causal_graph = pc(data, alpha=0.05) 20 | # Print the learned graph 21 | print(causal_graph.G) 22 | 23 | # Build a NetworkX graph from the adjacency matrix 24 | def build_nx_graph(causal_graph, labels): 25 | import networkx as nx 26 | G = nx.DiGraph() 27 | num_nodes = len(labels) 28 | G.add_nodes_from(range(num_nodes)) 29 | # Add edges 30 | for i in range(num_nodes): 31 | for j in range(num_nodes): 32 | edge_type = causal_graph.G.graph[i][j] 33 | if edge_type == 1: # Directed edge from i to j 34 | G.add_edge(i, j) 35 | elif edge_type == -1: # Directed edge from j to i 36 | G.add_edge(j, i) 37 | elif edge_type == 2: # Undirected edge 38 | G.add_edge(i, j) 39 | G.add_edge(j, i) 40 | # Relabel nodes 41 | mapping = {i: label for i, label in enumerate(labels)} 42 | G = nx.relabel_nodes(G, mapping) 43 | return G 44 | 45 | # Build the graph 46 | G = build_nx_graph(causal_graph, variable_names) 47 | 48 | # Plot the causal graph 49 | pos = nx.spring_layout(G) 50 | nx.draw( 51 | G, 52 | pos, 53 | with_labels=True, 54 | node_size=1500, 55 | node_color='lightblue', 56 | arrowsize=20, 57 | font_size=12, 58 | font_weight='bold' 59 | ) 60 | plt.title('Causal Graph') 61 | plt.show() 62 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/620_Causal-Inference/causal_mediation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import statsmodels.api as sm 4 | from statsmodels.formula.api import ols 5 | 6 | # Generate synthetic data 7 | np.random.seed(42) 8 | n = 100 9 | X = np.random.randn(n) 10 | M = 0.5 * X + np.random.randn(n) * 0.5 # Mediator influenced by X 11 | Y = 0.3 * X + 0.7 * M + np.random.randn(n) * 0.5 # Outcome influenced by X and M 12 | 13 | # Create a DataFrame 14 | data = pd.DataFrame({'X': X, 'M': M, 'Y': Y}) 15 | 16 | # Step 1: Fit the mediator model (M ~ X) 17 | mediator_model = ols('M ~ X', data=data).fit() 18 | 19 | # Step 2: Fit the outcome model (Y ~ X + M) 20 | outcome_model = ols('Y ~ X + M', data=data).fit() 21 | 22 | # Extract coefficients 23 | alpha_1 = mediator_model.params['X'] 24 | beta_1 = outcome_model.params['X'] 25 | beta_2 = outcome_model.params['M'] 26 | 27 | # Calculate direct, indirect, and total effects 28 | indirect_effect = alpha_1 * beta_2 29 | direct_effect = beta_1 30 | total_effect = direct_effect + indirect_effect 31 | proportion_mediated = indirect_effect / total_effect 32 | 33 | print(f"Indirect Effect (IE): {indirect_effect:.4f}") 34 | print(f"Direct Effect (DE): {direct_effect:.4f}") 35 | print(f"Total Effect (TE): {total_effect:.4f}") 36 | print(f"Proportion Mediated: {proportion_mediated:.2%}") -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/620_Causal-Inference/irm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from tensorflow.keras.layers import Dense 4 | import matplotlib.pyplot as plt 5 | 6 | # Generate synthetic data for two environments 7 | def generate_data(n, env_factor): 8 | X = np.random.randn(n, 2) 9 | Y = (X[:, 0] * X[:, 1] > 0).astype(int) # Basic correlation 10 | Y_spurious = Y.copy() 11 | flip_idx = np.random.rand(n) < env_factor # Add spurious correlation 12 | Y_spurious[flip_idx] = 1 - Y_spurious[flip_idx] 13 | return X, Y_spurious 14 | 15 | # Environment 1 with spurious correlation factor of 0.2 16 | X_env1, Y_env1 = generate_data(1000, env_factor=0.2) 17 | # Environment 2 with spurious correlation factor of 0.8 18 | X_env2, Y_env2 = generate_data(1000, env_factor=0.8) 19 | 20 | # Reshape labels to shape (n, 1) 21 | Y_env1 = Y_env1.reshape(-1, 1) 22 | Y_env2 = Y_env2.reshape(-1, 1) 23 | 24 | # Define the IRM model 25 | class IRMModel(tf.keras.Model): 26 | def __init__(self): 27 | super(IRMModel, self).__init__() 28 | self.feature_extractor = Dense(10, activation='relu') 29 | self.classifier = Dense(1, activation='sigmoid') 30 | 31 | def call(self, inputs): 32 | features = self.feature_extractor(inputs) 33 | output = self.classifier(features) 34 | return output, features 35 | 36 | # Define IRM penalty function, using GradientTape to compute gradients 37 | def irm_penalty(loss, features, tape): 38 | grad = tape.gradient(loss, features) 39 | penalty = tf.reduce_mean(tf.square(grad)) 40 | return penalty 41 | 42 | # Compile the model 43 | model = IRMModel() 44 | optimizer = tf.keras.optimizers.Adam() 45 | 46 | # Training loop 47 | for epoch in range(1000): 48 | with tf.GradientTape(persistent=True) as tape: 49 | # Process Environment 1 50 | pred_env1, features_env1 = model(X_env1) 51 | loss_env1 = tf.keras.losses.binary_crossentropy(Y_env1, pred_env1) 52 | loss_env1_mean = tf.reduce_mean(loss_env1) 53 | penalty_env1 = irm_penalty(loss_env1_mean, features_env1, tape) 54 | 55 | # Process Environment 2 56 | pred_env2, features_env2 = model(X_env2) 57 | loss_env2 = tf.keras.losses.binary_crossentropy(Y_env2, pred_env2) 58 | loss_env2_mean = tf.reduce_mean(loss_env2) 59 | penalty_env2 = irm_penalty(loss_env2_mean, features_env2, tape) 60 | 61 | # Total loss 62 | total_loss = loss_env1_mean + loss_env2_mean + 1.0 * (penalty_env1 + penalty_env2) 63 | 64 | # Compute gradients and update parameters 65 | gradients = tape.gradient(total_loss, model.trainable_variables) 66 | optimizer.apply_gradients(zip(gradients, model.trainable_variables)) 67 | del tape # Free resources 68 | 69 | # Print loss every 100 epochs 70 | if epoch % 100 == 0: 71 | print(f"Epoch {epoch}, Loss: {total_loss.numpy()}") 72 | 73 | print("Training complete.") 74 | 75 | # Evaluate the model on different environments 76 | def evaluate_model(X, Y): 77 | pred, _ = model(X) 78 | pred_labels = (pred.numpy() > 0.5).astype(int) 79 | accuracy = np.mean(pred_labels == Y) 80 | return accuracy 81 | 82 | acc_env1 = evaluate_model(X_env1, Y_env1) 83 | acc_env2 = evaluate_model(X_env2, Y_env2) 84 | print(f"Accuracy in Environment 1: {acc_env1 * 100:.2f}%") 85 | print(f"Accuracy in Environment 2: {acc_env2 * 100:.2f}%") 86 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/620_Causal-Inference/scm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from causality.inference.search import ICAlgorithm 4 | from causality.estimation.adjustments import AdjustForDirectCauses 5 | from causality.estimation.nonparametric import CausalEffect 6 | 7 | # Define the structural equations 8 | np.random.seed(42) 9 | n_samples = 1000 10 | X = np.random.normal(size=n_samples) 11 | U_M = np.random.normal(size=n_samples) 12 | M = 0.5 * X + U_M 13 | U_Y = np.random.normal(size=n_samples) 14 | Y = 0.3 * M + 0.2 * X + U_Y 15 | 16 | # Create a DataFrame 17 | data = pd.DataFrame({'X': X, 'M': M, 'Y': Y}) 18 | 19 | # Estimate the causal effect of X on Y using SCM 20 | adjuster = AdjustForDirectCauses() 21 | causal_effect_estimator = CausalEffect() 22 | effect = causal_effect_estimator.estimate(data, 'X', 'Y', adjust_for=['M']) 23 | 24 | print(f"Estimated causal effect of X on Y: {effect:.4f}") -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/630_Counterfactual/Images/(GAN)-based Counterfactuals.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/630_Counterfactual/Images/(GAN)-based Counterfactuals.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/630_Counterfactual/Images/Diverse Counterfactual.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/630_Counterfactual/Images/Diverse Counterfactual.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/630_Counterfactual/Images/Optimization-based Counterfactuals.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/630_Counterfactual/Images/Optimization-based Counterfactuals.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/630_Counterfactual/actionable_recourse_method.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.optimize import minimize 3 | 4 | # Define the prediction function of the model (simplified example) 5 | def predict(model, x): 6 | # Remove reshaping to ensure x remains a 1D array 7 | return model.predict(x) 8 | 9 | # Action cost function: Assign higher costs to harder-to-change features 10 | def action_cost(x, x_prime, weights): 11 | return np.sum(weights * np.abs(x - x_prime)) 12 | 13 | # Objective function for actionable recourse 14 | def objective_function(x_prime, x, model, target_class, lambda_param, weights): 15 | distance = np.linalg.norm(x - x_prime) 16 | action_cost_value = action_cost(x, x_prime, weights) 17 | prediction_loss = 0 if predict(model, x_prime) == target_class else 1 18 | return distance + lambda_param * action_cost_value + prediction_loss 19 | 20 | # Generate actionable recourse 21 | def generate_actionable_recourse(model, x, target_class, weights, lambda_param=0.1): 22 | # Initialize the counterfactual with the original input 23 | x_prime = np.copy(x) 24 | 25 | # Optimize to find actionable recourse 26 | result = minimize( 27 | objective_function, 28 | x_prime, 29 | args=(x, model, target_class, lambda_param, weights), 30 | method='L-BFGS-B' 31 | ) 32 | 33 | return result.x 34 | 35 | # Example usage 36 | x = np.array([30, 50000, 0.4]) # Example input features: age, income, debt-to-income ratio 37 | weights = np.array([0.0, 0.5, 1.0]) # Higher cost for changing age, lower for financial habits 38 | target_class = 1 # Desired outcome: Loan approval 39 | 40 | # Assume we have a pre-trained model (pseudo-model for illustration) 41 | class SimpleModel: 42 | def predict(self, x): 43 | # x is expected to be a 1D array 44 | return int(x[1] > 40000 and x[2] < 0.5) # Simplified decision rule 45 | 46 | model = SimpleModel() 47 | 48 | # Generate actionable recourse 49 | counterfactual = generate_actionable_recourse(model, x, target_class, weights) 50 | 51 | print("Original input:", x) 52 | print("Actionable recourse:", counterfactual) 53 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/630_Counterfactual/counterfactuals_RL.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | # Define a simple Deep Q-Network (DQN) 5 | class DQN(tf.keras.Model): 6 | def __init__(self, state_dim, action_dim): 7 | super(DQN, self).__init__() 8 | self.dense1 = tf.keras.layers.Dense(24, activation='relu') 9 | self.dense2 = tf.keras.layers.Dense(24, activation='relu') 10 | self.q_values = tf.keras.layers.Dense(action_dim, activation=None) 11 | 12 | def call(self, state): 13 | x = self.dense1(state) 14 | x = self.dense2(x) 15 | return self.q_values(x) 16 | 17 | # Initialize the environment, DQN model, and sample state 18 | state_dim = 4 19 | action_dim = 2 20 | model = DQN(state_dim, action_dim) 21 | sample_state = np.random.rand(1, state_dim).astype(np.float32) 22 | 23 | # Predict Q-values for the current state 24 | q_values = model(sample_state).numpy().squeeze() 25 | 26 | # Define the counterfactual analysis function 27 | def counterfactual_analysis(model, state, actual_action): 28 | q_values = model(state).numpy().squeeze() 29 | counterfactual_actions = [a for a in range(len(q_values)) if a != actual_action] 30 | 31 | counterfactual_results = {} 32 | for action in counterfactual_actions: 33 | counterfactual_q_value = q_values[action] 34 | counterfactual_results[action] = counterfactual_q_value 35 | 36 | return counterfactual_results 37 | 38 | # Assume the agent took action 0, analyze the counterfactual for action 1 39 | actual_action = 0 40 | counterfactuals = counterfactual_analysis(model, sample_state, actual_action) 41 | 42 | print("Q-values for the current state:", q_values) 43 | print("Counterfactual Q-values for alternative actions:", counterfactuals) -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/630_Counterfactual/counterfactuals_structured_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.neural_network import MLPClassifier 3 | from scipy.optimize import minimize 4 | 5 | # Synthetic dataset: Features are age, income, credit score, and debt-to-income ratio 6 | X = np.array([[25, 40000, 650, 0.3], [45, 80000, 720, 0.2], [35, 60000, 690, 0.25], [50, 120000, 750, 0.15]]) 7 | y = np.array([0, 1, 0, 1]) # 0 = Loan Denied, 1 = Loan Approved 8 | 9 | # Train a neural network classifier 10 | model = MLPClassifier(hidden_layer_sizes=(10,), max_iter=1000) 11 | model.fit(X, y) 12 | 13 | # Define the objective function for generating counterfactuals 14 | def objective_function(x_prime, x, model, target_class, lambda_param): 15 | distance = np.linalg.norm(x - x_prime) 16 | prediction = model.predict([x_prime])[0] 17 | classification_loss = 0 if prediction == target_class else 1 18 | return distance + lambda_param * classification_loss 19 | 20 | # Generate a counterfactual explanation 21 | def generate_counterfactual(model, x, target_class, lambda_param=0.1): 22 | x_prime = np.copy(x) 23 | result = minimize( 24 | objective_function, 25 | x_prime, 26 | args=(x, model, target_class, lambda_param), 27 | method='L-BFGS-B' 28 | ) 29 | return result.x 30 | 31 | # Example input: Applicant profile [age, income, credit score, debt-to-income ratio] 32 | x = np.array([30, 50000, 670, 0.28]) 33 | target_class = 1 # Desired outcome: Loan approval 34 | 35 | # Generate the counterfactual example 36 | counterfactual = generate_counterfactual(model, x, target_class) 37 | 38 | print("Original input:", x) 39 | print("Counterfactual example:", counterfactual) 40 | print("Original prediction:", model.predict([x])[0]) 41 | print("Counterfactual prediction:", model.predict([counterfactual])[0]) -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/630_Counterfactual/counterfactuals_time_series_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from scipy.optimize import minimize 4 | 5 | # Define a simple LSTM model for time series prediction 6 | model = tf.keras.Sequential([ 7 | tf.keras.layers.LSTM(50, activation='relu', input_shape=(10, 1)), 8 | tf.keras.layers.Dense(1) 9 | ]) 10 | 11 | # Generate a synthetic time series dataset 12 | np.random.seed(0) 13 | X = np.random.rand(100, 10, 1) # 100 sequences of length 10 14 | y = (X.mean(axis=1) > 0.5).astype(int) # Binary target based on mean value 15 | 16 | # Train the model 17 | model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) 18 | model.fit(X, y, epochs=10, batch_size=16) 19 | 20 | # Define the objective function for generating counterfactuals 21 | def objective_function(x_prime, x, model, target_class, lambda_param): 22 | x_prime = x_prime.reshape(1, -1, 1) 23 | distance = np.linalg.norm(x - x_prime) 24 | smoothness = np.sum(np.abs(np.diff(x_prime.squeeze()))) 25 | prediction = model.predict(x_prime)[0][0] 26 | classification_loss = 0 if (prediction > 0.5) == target_class else 1 27 | return distance + lambda_param * smoothness + 10 * classification_loss 28 | 29 | # Generate a counterfactual for a sample sequence 30 | def generate_counterfactual(model, x, target_class, lambda_param=0.1): 31 | x_prime = np.copy(x) 32 | result = minimize( 33 | objective_function, 34 | x_prime.flatten(), 35 | args=(x, model, target_class, lambda_param), 36 | method='L-BFGS-B' 37 | ) 38 | return result.x.reshape(-1, 1) 39 | 40 | # Example input sequence 41 | x_sample = X[0] 42 | target_class = 1 # Desired outcome: Change prediction to class 1 43 | 44 | # Generate the counterfactual sequence 45 | counterfactual = generate_counterfactual(model, x_sample, target_class) 46 | 47 | print("Original sequence:", x_sample.flatten()) 48 | print("Counterfactual sequence:", counterfactual.flatten()) 49 | print("Original prediction:", model.predict(x_sample.reshape(1, -1, 1))[0][0]) 50 | print("Counterfactual prediction:", model.predict(counterfactual.reshape(1, -1, 1))[0][0]) -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/630_Counterfactual/diverse_counterfactual.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | # Load and preprocess the MNIST dataset 6 | (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data() 7 | train_images = train_images[..., np.newaxis] / 255.0 8 | test_images = test_images[..., np.newaxis] / 255.0 9 | 10 | # Define and train the classifier model (if not already trained) 11 | inputs = tf.keras.Input(shape=(28, 28, 1)) 12 | x = tf.keras.layers.Conv2D(32, (3, 3), activation='relu')(inputs) 13 | x = tf.keras.layers.MaxPooling2D((2, 2))(x) 14 | x = tf.keras.layers.Flatten()(x) 15 | x = tf.keras.layers.Dense(128, activation='relu')(x) 16 | outputs = tf.keras.layers.Dense(10, activation='softmax')(x) 17 | model = tf.keras.Model(inputs=inputs, outputs=outputs) 18 | 19 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) 20 | model.fit(train_images, train_labels, epochs=5, batch_size=64, validation_data=(test_images, test_labels)) 21 | 22 | # Save the trained model 23 | model.save('mnist_classifier.h5') 24 | 25 | # Load the model (optional if already in memory) 26 | model = tf.keras.models.load_model('mnist_classifier.h5') 27 | 28 | # Function to generate diverse counterfactuals 29 | def generate_diverse_counterfactuals(model, image, target_class, num_counterfactuals=3, num_steps=100, learning_rate=0.01, lambda_1=0.1, lambda_2=0.05): 30 | counterfactuals = [] 31 | 32 | for _ in range(num_counterfactuals): 33 | # Initialize the counterfactual image as a copy of the original 34 | counterfactual = tf.Variable(image, dtype=tf.float32) 35 | optimizer = tf.optimizers.Adam(learning_rate) 36 | 37 | for step in range(num_steps): 38 | with tf.GradientTape() as tape: 39 | # Compute the prediction loss 40 | prediction = model(counterfactual) 41 | target_class_tensor = tf.convert_to_tensor([target_class], dtype=tf.int32) 42 | classification_loss = tf.keras.losses.sparse_categorical_crossentropy(target_class_tensor, prediction) 43 | 44 | # Compute the similarity loss 45 | distance_loss = tf.reduce_mean(tf.abs(counterfactual - image)) 46 | 47 | # Compute the diversity loss (based on difference from previous counterfactuals) 48 | diversity_loss = 0 49 | if counterfactuals: 50 | for prev_cf in counterfactuals: 51 | diversity_loss += tf.reduce_mean(tf.abs(counterfactual - prev_cf)) 52 | diversity_loss /= len(counterfactuals) # Normalize by the number of counterfactuals 53 | 54 | # Total loss function 55 | total_loss = distance_loss + lambda_1 * classification_loss + lambda_2 * diversity_loss 56 | 57 | # Update the counterfactual image 58 | gradients = tape.gradient(total_loss, counterfactual) 59 | optimizer.apply_gradients([(gradients, counterfactual)]) 60 | counterfactual.assign(tf.clip_by_value(counterfactual, 0.0, 1.0)) 61 | 62 | # Add the optimized counterfactual to the list 63 | counterfactuals.append(counterfactual.numpy()) 64 | 65 | return counterfactuals 66 | 67 | # Select a sample image and generate counterfactuals 68 | sample_image = test_images[0:1] 69 | original_prediction = model.predict(sample_image) 70 | original_label = np.argmax(original_prediction, axis=1)[0] 71 | target_label = (original_label + 1) % 10 # Set the desired target class 72 | 73 | print(f"Original label: {original_label}, Target label: {target_label}") 74 | 75 | # Generate diverse counterfactuals 76 | counterfactuals = generate_diverse_counterfactuals(model, sample_image, target_label) 77 | 78 | # Display the generated counterfactuals 79 | plt.figure(figsize=(12, 4)) 80 | for i, cf_image in enumerate(counterfactuals): 81 | plt.subplot(1, len(counterfactuals), i + 1) 82 | plt.imshow(cf_image.squeeze(), cmap='gray') 83 | plt.title(f"Counterfactual {i+1}") 84 | plt.axis('off') 85 | plt.show() 86 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/630_Counterfactual/gan_based_counterfactuals.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from tensorflow.keras.losses import SparseCategoricalCrossentropy 5 | from scipy.optimize import minimize 6 | 7 | # Load and preprocess the MNIST dataset 8 | (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data() 9 | train_images = (train_images / 255.0).astype(np.float32) 10 | test_images = (test_images / 255.0).astype(np.float32) 11 | train_images = train_images[..., np.newaxis] 12 | test_images = test_images[..., np.newaxis] 13 | 14 | # Define a simple classifier model 15 | def create_classifier(): 16 | inputs = tf.keras.Input(shape=(28, 28, 1)) 17 | x = tf.keras.layers.Conv2D(32, (3, 3), activation='relu')(inputs) 18 | x = tf.keras.layers.Flatten()(x) 19 | x = tf.keras.layers.Dense(128, activation='relu')(x) 20 | outputs = tf.keras.layers.Dense(10)(x) # Logits output 21 | model = tf.keras.Model(inputs=inputs, outputs=outputs) 22 | return model 23 | 24 | classifier = create_classifier() 25 | classifier.compile( 26 | optimizer='adam', 27 | loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 28 | metrics=['accuracy'] 29 | ) 30 | 31 | # Train the classifier 32 | classifier.fit( 33 | train_images, 34 | train_labels, 35 | epochs=5, 36 | validation_data=(test_images, test_labels), 37 | batch_size=128 38 | ) 39 | 40 | # Define a simple generator model 41 | def create_generator(latent_dim): 42 | inputs = tf.keras.Input(shape=(latent_dim,)) 43 | x = tf.keras.layers.Dense(7 * 7 * 128, activation='relu')(inputs) 44 | x = tf.keras.layers.Reshape((7, 7, 128))(x) 45 | x = tf.keras.layers.UpSampling2D()(x) 46 | x = tf.keras.layers.Conv2D(64, kernel_size=3, padding='same', activation='relu')(x) 47 | x = tf.keras.layers.UpSampling2D()(x) 48 | outputs = tf.keras.layers.Conv2D(1, kernel_size=3, padding='same', activation='sigmoid')(x) 49 | model = tf.keras.Model(inputs=inputs, outputs=outputs) 50 | return model 51 | 52 | latent_dim = 100 53 | generator = create_generator(latent_dim) 54 | 55 | # Note: In practice, you should train the generator as part of a GAN. 56 | # For this example, we'll assume the generator is already trained. 57 | 58 | # Select a sample image from the test set 59 | sample_image = test_images[0:1] 60 | original_prediction = classifier.predict(sample_image) 61 | original_label = np.argmax(original_prediction, axis=1)[0] 62 | 63 | # Define the target class (different from the original prediction) 64 | target_label = (original_label + 1) % 10 65 | 66 | # Define a loss function 67 | loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) 68 | 69 | # Define a function to generate counterfactuals 70 | def generate_counterfactual(z): 71 | z = tf.convert_to_tensor(z.reshape(1, -1), dtype=tf.float32) 72 | with tf.GradientTape(): 73 | generated_image = generator(z) 74 | prediction = classifier(generated_image) 75 | # Convert target_label to tensor 76 | target_label_tensor = tf.convert_to_tensor([target_label], dtype=tf.int32) 77 | similarity_loss = tf.norm(generated_image - sample_image) 78 | classification_loss = loss_fn(target_label_tensor, prediction) 79 | total_loss = similarity_loss + 0.1 * classification_loss 80 | return total_loss.numpy().astype(np.float64) 81 | 82 | # Initialize the latent vector and optimize it 83 | z_initial = np.random.normal(size=(latent_dim,)) 84 | result = minimize( 85 | generate_counterfactual, 86 | z_initial, 87 | method='L-BFGS-B', 88 | options={'maxiter': 100} 89 | ) 90 | 91 | # Generate the counterfactual image 92 | z_optimized = result.x 93 | counterfactual_image = generator.predict(z_optimized.reshape(1, -1)) 94 | 95 | # Display the original and counterfactual images 96 | plt.figure(figsize=(8, 4)) 97 | 98 | plt.subplot(1, 2, 1) 99 | plt.imshow(sample_image.squeeze(), cmap='gray') 100 | plt.title(f"Original: {original_label}") 101 | plt.axis('off') 102 | 103 | plt.subplot(1, 2, 2) 104 | # Removed .numpy() since counterfactual_image is already a NumPy array 105 | plt.imshow(counterfactual_image.squeeze(), cmap='gray') 106 | plt.title(f"Counterfactual: {target_label}") 107 | plt.axis('off') 108 | 109 | plt.show() 110 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/630_Counterfactual/minimal_change_counterfactuals.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.linear_model import LogisticRegression 3 | from scipy.optimize import minimize 4 | 5 | # Create a synthetic dataset 6 | X = np.array([[0.1, 0.5], [0.4, 0.8], [0.5, 0.3], [0.9, 0.6], [0.7, 0.9]]) 7 | y = np.array([0, 0, 0, 1, 1]) 8 | 9 | # Train a logistic regression classifier 10 | model = LogisticRegression() 11 | model.fit(X, y) 12 | 13 | # Define the objective function for minimal change counterfactual 14 | def objective_function(x_prime, x, model, target_class): 15 | distance = np.linalg.norm(x - x_prime) 16 | prediction = model.predict([x_prime])[0] 17 | classification_loss = 0 if prediction == target_class else 1 18 | return distance + 10 * classification_loss # Penalize if prediction does not match target class 19 | 20 | # Generate a minimal change counterfactual 21 | def generate_minimal_counterfactual(model, x, target_class): 22 | x_prime = np.copy(x) 23 | result = minimize( 24 | objective_function, 25 | x_prime, 26 | args=(x, model, target_class), 27 | method='L-BFGS-B' 28 | ) 29 | return result.x 30 | 31 | # Example input and target class 32 | x = np.array([0.3, 0.7]) 33 | target_class = 1 # Desired outcome different from the model's original prediction 34 | 35 | # Generate the counterfactual 36 | counterfactual = generate_minimal_counterfactual(model, x, target_class) 37 | 38 | print("Original input:", x) 39 | print("Minimal change counterfactual:", counterfactual) 40 | print("Original prediction:", model.predict([x])[0]) 41 | print("Counterfactual prediction:", model.predict([counterfactual])[0]) -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/630_Counterfactual/neighbor_counterfactuals.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.datasets import load_iris 4 | from sklearn.neighbors import KNeighborsClassifier 5 | from sklearn.preprocessing import StandardScaler 6 | from sklearn.metrics import pairwise_distances_argmin_min 7 | 8 | # Load the Iris dataset 9 | data = load_iris() 10 | X = pd.DataFrame(data.data, columns=data.feature_names) 11 | y = pd.Series(data.target) 12 | 13 | # Standardize the features 14 | scaler = StandardScaler() 15 | X_scaled = scaler.fit_transform(X) 16 | 17 | # Train a k-NN classifier 18 | knn = KNeighborsClassifier(n_neighbors=5) 19 | knn.fit(X_scaled, y) 20 | 21 | # Function to find nearest neighbor counterfactual 22 | def find_counterfactual(instance, model, X, y): 23 | original_class = model.predict([instance])[0] 24 | # Find the nearest neighbor from a different class 25 | mask = y != original_class 26 | candidates = X[mask] 27 | indices, distances = pairwise_distances_argmin_min([instance], candidates) 28 | counterfactual = candidates[indices[0]] 29 | return counterfactual 30 | 31 | # Select a sample instance and find its counterfactual 32 | sample_index = 0 33 | sample_instance = X_scaled[sample_index] 34 | counterfactual = find_counterfactual(sample_instance, knn, X_scaled, y) 35 | 36 | print("Original instance:", sample_instance) 37 | print("Counterfactual instance:", counterfactual) -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/630_Counterfactual/optimization_based_counterfactuals.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | # Load the MNIST dataset 6 | (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data() 7 | train_images = train_images[..., np.newaxis] / 255.0 8 | test_images = test_images[..., np.newaxis] / 255.0 9 | 10 | # Define a simple neural network model using Functional API to avoid warnings 11 | inputs = tf.keras.Input(shape=(28, 28, 1)) 12 | x = tf.keras.layers.Conv2D(32, (3, 3), activation='relu')(inputs) 13 | x = tf.keras.layers.MaxPooling2D((2, 2))(x) 14 | x = tf.keras.layers.Flatten()(x) 15 | x = tf.keras.layers.Dense(128, activation='relu')(x) 16 | outputs = tf.keras.layers.Dense(10, activation='softmax')(x) 17 | model = tf.keras.Model(inputs=inputs, outputs=outputs) 18 | 19 | # Compile and train the model 20 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) 21 | model.fit(train_images, train_labels, epochs=1, batch_size=64) 22 | 23 | # Function to generate an optimization-based counterfactual 24 | def generate_counterfactual(model, image, target_class, num_steps=100, learning_rate=0.01, lambda_param=0.1): 25 | # Create a variable for the counterfactual image 26 | counterfactual = tf.Variable(image, dtype=tf.float32) 27 | 28 | # Define the optimizer 29 | optimizer = tf.optimizers.Adam(learning_rate) 30 | 31 | for step in range(num_steps): 32 | with tf.GradientTape() as tape: 33 | # Compute the loss: distance loss + prediction loss 34 | distance_loss = tf.reduce_mean(tf.abs(counterfactual - image)) 35 | prediction = model(counterfactual) 36 | # Convert target_class to tensor 37 | target_class_tensor = tf.convert_to_tensor([target_class], dtype=tf.int32) 38 | classification_loss = tf.keras.losses.sparse_categorical_crossentropy(target_class_tensor, prediction) 39 | total_loss = distance_loss + lambda_param * classification_loss 40 | 41 | # Compute gradients and update the counterfactual image 42 | gradients = tape.gradient(total_loss, counterfactual) 43 | optimizer.apply_gradients([(gradients, counterfactual)]) 44 | 45 | # Clip the pixel values to maintain valid image range 46 | counterfactual.assign(tf.clip_by_value(counterfactual, 0.0, 1.0)) 47 | 48 | return counterfactual.numpy() 49 | 50 | # Select a sample image and generate a counterfactual 51 | sample_image = test_images[0:1] 52 | original_prediction = model.predict(sample_image) 53 | original_label = np.argmax(original_prediction, axis=1)[0] 54 | target_label = (original_label + 1) % 10 55 | 56 | counterfactual_image = generate_counterfactual(model, sample_image, target_label) 57 | 58 | # Display the original and counterfactual images 59 | plt.figure(figsize=(8, 4)) 60 | 61 | plt.subplot(1, 2, 1) 62 | plt.imshow(sample_image.squeeze(), cmap='gray') 63 | plt.title(f"Original: {original_label}") 64 | plt.axis('off') 65 | 66 | plt.subplot(1, 2, 2) 67 | plt.imshow(counterfactual_image.squeeze(), cmap='gray') 68 | plt.title(f"Counterfactual: {target_label}") 69 | plt.axis('off') 70 | 71 | plt.show() 72 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/630_Counterfactual/prototype_based_counterfactuals.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.datasets import load_iris 4 | from sklearn.neighbors import KNeighborsClassifier 5 | from sklearn.preprocessing import StandardScaler 6 | from sklearn.metrics import pairwise_distances_argmin_min 7 | 8 | # Load the Iris dataset 9 | data = load_iris() 10 | X = pd.DataFrame(data.data, columns=data.feature_names) 11 | y = pd.Series(data.target) 12 | 13 | # Standardize the features 14 | scaler = StandardScaler() 15 | X_scaled = scaler.fit_transform(X) 16 | 17 | # Train a k-NN classifier 18 | knn = KNeighborsClassifier(n_neighbors=5) 19 | knn.fit(X_scaled, y) 20 | 21 | # Function to find prototype-based counterfactual 22 | def find_prototype_counterfactual(instance, model, X, y): 23 | original_class = model.predict([instance])[0] 24 | # Find the nearest prototype from a different class 25 | mask = y != original_class 26 | prototypes = X[mask] 27 | indices, distances = pairwise_distances_argmin_min([instance], prototypes) 28 | counterfactual = prototypes[indices[0]] 29 | return counterfactual 30 | 31 | # Select a sample instance and find its counterfactual 32 | sample_index = 0 33 | sample_instance = X_scaled[sample_index] 34 | counterfactual = find_prototype_counterfactual(sample_instance, knn, X_scaled, y) 35 | 36 | print("Original instance:", sample_instance) 37 | print("Counterfactual prototype:", counterfactual) -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/640_Graph-based/Images/feature_importance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/640_Graph-based/Images/feature_importance.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/640_Graph-based/Images/subgraph.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/640_Graph-based/Images/subgraph.pdf -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/640_Graph-based/gnn_explainer.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | 6 | from torch_geometric.datasets import Planetoid 7 | from torch_geometric.explain import Explainer, GNNExplainer 8 | from torch_geometric.nn import GCNConv 9 | 10 | dataset = 'Cora' 11 | path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'Planetoid') 12 | dataset = Planetoid(path, dataset) 13 | data = dataset[0] 14 | 15 | 16 | class GCN(torch.nn.Module): 17 | def __init__(self): 18 | super().__init__() 19 | self.conv1 = GCNConv(dataset.num_features, 16) 20 | self.conv2 = GCNConv(16, dataset.num_classes) 21 | 22 | def forward(self, x, edge_index): 23 | x = F.relu(self.conv1(x, edge_index)) 24 | x = F.dropout(x, training=self.training) 25 | x = self.conv2(x, edge_index) 26 | return F.log_softmax(x, dim=1) 27 | 28 | 29 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 30 | model = GCN().to(device) 31 | data = data.to(device) 32 | optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4) 33 | 34 | for epoch in range(1, 201): 35 | model.train() 36 | optimizer.zero_grad() 37 | out = model(data.x, data.edge_index) 38 | loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask]) 39 | loss.backward() 40 | optimizer.step() 41 | 42 | explainer = Explainer( 43 | model=model, 44 | algorithm=GNNExplainer(epochs=200), 45 | explanation_type='model', 46 | node_mask_type='attributes', 47 | edge_mask_type='object', 48 | model_config=dict( 49 | mode='multiclass_classification', 50 | task_level='node', 51 | return_type='log_probs', 52 | ), 53 | ) 54 | node_index = 10 55 | explanation = explainer(data.x, data.edge_index, index=node_index) 56 | print(f'Generated explanations in {explanation.available_explanations}') 57 | 58 | path = 'feature_importance.png' 59 | explanation.visualize_feature_importance(path, top_k=10) 60 | print(f"Feature importance plot has been saved to '{path}'") 61 | 62 | path = 'subgraph.pdf' 63 | explanation.visualize_graph(path) 64 | print(f"Subgraph visualization plot has been saved to '{path}'") -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/640_Graph-based/node_importance_attribution.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch_geometric.nn import GCNConv 4 | from torch_geometric.datasets import Planetoid 5 | 6 | # Load the Cora dataset 7 | dataset = Planetoid(root='/tmp/Cora', name='Cora') 8 | data = dataset[0] 9 | 10 | # Define a simple GCN model 11 | class GCN(torch.nn.Module): 12 | def __init__(self): 13 | super(GCN, self).__init__() 14 | self.conv1 = GCNConv(dataset.num_node_features, 16) 15 | self.conv2 = GCNConv(16, dataset.num_classes) 16 | 17 | def forward(self, x, edge_index): 18 | x = F.relu(self.conv1(x, edge_index)) 19 | x = self.conv2(x, edge_index) 20 | return F.log_softmax(x, dim=1) 21 | 22 | model = GCN() 23 | optimizer = torch.optim.Adam(model.parameters(), lr=0.01) 24 | criterion = torch.nn.CrossEntropyLoss() 25 | 26 | # Train the GCN model 27 | model.train() 28 | for epoch in range(200): 29 | optimizer.zero_grad() 30 | out = model(data.x, data.edge_index) 31 | loss = criterion(out[data.train_mask], data.y[data.train_mask]) 32 | loss.backward() 33 | optimizer.step() 34 | 35 | # Calculate node importance 36 | model.eval() 37 | data.x.requires_grad = True # Enable gradient calculation for node features 38 | 39 | target_node = 10 40 | output = model(data.x, data.edge_index) 41 | 42 | # Perform backward pass for the predicted class of the target node 43 | predicted_class = output[target_node].argmax() 44 | output[target_node, predicted_class].backward() 45 | 46 | # Calculate the L2 norm of the gradient for the target node's features as the importance score 47 | node_importance = torch.norm(data.x.grad[target_node], p=2).item() 48 | print(f"Importance score for node {target_node}: {node_importance:.4f}") 49 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/650_Multimodal/Images/Cross-modal Attention.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/650_Multimodal/Images/Cross-modal Attention.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/650_Multimodal/Images/Joint Feature Attribution for Multimodal Models.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/650_Multimodal/Images/Joint Feature Attribution for Multimodal Models.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/650_Multimodal/Images/model_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/650_Multimodal/Images/model_plot.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/650_Multimodal/Images/model_summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/650_Multimodal/Images/model_summary.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/650_Multimodal/cross_modal_attention.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras.layers import Dense, MultiHeadAttention, Concatenate, GlobalAveragePooling1D, Lambda 3 | from tensorflow.keras.models import Model 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | # Set the model dimension 8 | d_model = 64 9 | 10 | # Define text and image input layers 11 | text_input = tf.keras.Input(shape=(100, 300), name='text_input') # 100 tokens, 300-d embeddings 12 | image_input = tf.keras.Input(shape=(49, 512), name='image_input') # 7x7 image patches, 512-d features 13 | 14 | # Project inputs to a common dimension 15 | text_features = Dense(d_model)(text_input) # Shape: (batch_size, 100, 64) 16 | image_features = Dense(d_model)(image_input) # Shape: (batch_size, 49, 64) 17 | 18 | # Self-attention on text features 19 | text_self_attention = MultiHeadAttention(num_heads=8, key_dim=d_model) 20 | text_attention_output = text_self_attention( 21 | query=text_features, value=text_features, key=text_features 22 | ) # Shape: (batch_size, 100, 64) 23 | 24 | # Self-attention on image features 25 | image_self_attention = MultiHeadAttention(num_heads=8, key_dim=d_model) 26 | image_attention_output = image_self_attention( 27 | query=image_features, value=image_features, key=image_features 28 | ) # Shape: (batch_size, 49, 64) 29 | 30 | # Cross-modal attention from text to image features 31 | cross_modal_attention = MultiHeadAttention(num_heads=8, key_dim=d_model) 32 | cross_attention_output, cross_attention_scores = cross_modal_attention( 33 | query=text_attention_output, 34 | value=image_attention_output, 35 | key=image_attention_output, 36 | return_attention_scores=True 37 | ) # cross_attention_output shape: (batch_size, 100, 64) 38 | # cross_attention_scores shape: (batch_size, 8, 100, 49) 39 | 40 | # Average the attention scores over the heads using a Lambda layer 41 | average_attention_scores = Lambda(lambda x: tf.reduce_mean(x, axis=1))(cross_attention_scores) 42 | # Shape: (batch_size, 100, 49) 43 | 44 | # Pooling over the sequence length to get fixed-size representations 45 | text_representation = GlobalAveragePooling1D()(text_attention_output) # Shape: (batch_size, 64) 46 | image_representation = GlobalAveragePooling1D()(image_attention_output) # Shape: (batch_size, 64) 47 | cross_attention_representation = GlobalAveragePooling1D()(cross_attention_output) # Shape: (batch_size, 64) 48 | 49 | # Combine the representations 50 | combined_representation = Concatenate()([ 51 | text_representation, 52 | image_representation, 53 | cross_attention_representation 54 | ]) # Shape: (batch_size, 192) 55 | 56 | # Output layer 57 | output = Dense(1, activation='sigmoid')(combined_representation) 58 | 59 | # Build and compile the model 60 | model = Model(inputs=[text_input, image_input], outputs=output) 61 | model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) 62 | 63 | # Create sample input data 64 | text_sample = np.random.rand(1, 100, 300).astype(np.float32) 65 | image_sample = np.random.rand(1, 49, 512).astype(np.float32) 66 | 67 | # Perform model prediction 68 | prediction = model.predict([text_sample, image_sample]) 69 | print("Prediction:", prediction) 70 | 71 | # Build a model to output the attention scores for visualization 72 | attention_model = Model(inputs=[text_input, image_input], outputs=average_attention_scores) 73 | 74 | # Get the attention scores 75 | attention_scores = attention_model.predict([text_sample, image_sample]) # Shape: (1, 100, 49) 76 | 77 | # Visualize the cross-modal attention weights 78 | plt.figure(figsize=(12, 8)) 79 | plt.imshow(attention_scores[0], cmap='viridis', aspect='auto') 80 | plt.colorbar() 81 | plt.title("Cross-modal Attention Weights (Text to Image Features)") 82 | plt.xlabel("Image Feature Index (49 patches)") 83 | plt.ylabel("Text Token Index (100 tokens)") 84 | plt.show() 85 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/650_Multimodal/joint_feature_attribution_multimodal_models.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras.layers import Dense, Concatenate 3 | from tensorflow.keras.models import Model 4 | import numpy as np 5 | 6 | # Define text and image inputs 7 | text_input = tf.keras.Input(shape=(300,), name='text_input') # 300-d text embeddings 8 | image_input = tf.keras.Input(shape=(512,), name='image_input') # 512-d image features 9 | 10 | # Define simple Dense layers for text and image features 11 | text_features = Dense(128, activation='relu')(text_input) 12 | image_features = Dense(128, activation='relu')(image_input) 13 | 14 | # Concatenate text and image features 15 | combined_features = Concatenate()([text_features, image_features]) 16 | output = Dense(1, activation='sigmoid')(combined_features) 17 | 18 | # Build and compile the model 19 | model = Model(inputs=[text_input, image_input], outputs=output) 20 | model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) 21 | 22 | # Define Integrated Gradients function 23 | def integrated_gradients(model, inputs, baseline, steps=50): 24 | alphas = np.linspace(0, 1, steps) 25 | 26 | # Interpolate for each input 27 | input_scaled_text = np.array([baseline[0] + alpha * (inputs[0] - baseline[0]) for alpha in alphas]) 28 | input_scaled_image = np.array([baseline[1] + alpha * (inputs[1] - baseline[1]) for alpha in alphas]) 29 | 30 | # Convert NumPy arrays to TensorFlow tensors 31 | input_scaled_text = tf.convert_to_tensor(input_scaled_text, dtype=tf.float32) 32 | input_scaled_image = tf.convert_to_tensor(input_scaled_image, dtype=tf.float32) 33 | 34 | # Compute gradients using GradientTape 35 | with tf.GradientTape() as tape: 36 | tape.watch([input_scaled_text, input_scaled_image]) 37 | predictions = model([input_scaled_text, input_scaled_image]) 38 | gradients = tape.gradient(predictions, [input_scaled_text, input_scaled_image]) 39 | 40 | # Calculate average gradients and Integrated Gradients 41 | avg_gradients_text = tf.reduce_mean(gradients[0], axis=0).numpy() 42 | avg_gradients_image = tf.reduce_mean(gradients[1], axis=0).numpy() 43 | 44 | integrated_gradients_text = (inputs[0] - baseline[0]) * avg_gradients_text 45 | integrated_gradients_image = (inputs[1] - baseline[1]) * avg_gradients_image 46 | 47 | return integrated_gradients_text, integrated_gradients_image 48 | 49 | # Example usage 50 | text_sample = np.random.rand(300) 51 | image_sample = np.random.rand(512) 52 | baseline_text = np.zeros(300) 53 | baseline_image = np.zeros(512) 54 | 55 | inputs = [text_sample, image_sample] 56 | baseline = [baseline_text, baseline_image] 57 | 58 | # Compute Integrated Gradients 59 | attributions_text, attributions_image = integrated_gradients(model, inputs, baseline) 60 | print("Integrated Gradients for text features:", attributions_text) 61 | print("Integrated Gradients for image features:", attributions_image) 62 | 63 | # Visualize Integrated Gradients 64 | import matplotlib.pyplot as plt 65 | 66 | plt.figure(figsize=(12, 6)) 67 | 68 | # Plot for text features 69 | plt.subplot(1, 2, 1) 70 | plt.plot(attributions_text) 71 | plt.title("Integrated Gradients for Text Features") 72 | plt.xlabel("Feature Index") 73 | plt.ylabel("Attribution") 74 | 75 | # Plot for image features 76 | plt.subplot(1, 2, 2) 77 | plt.plot(attributions_image) 78 | plt.title("Integrated Gradients for Image Features") 79 | plt.xlabel("Feature Index") 80 | plt.ylabel("Attribution") 81 | 82 | plt.tight_layout() 83 | plt.show() 84 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/650_Multimodal/multimodal_explanations_attention.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras.layers import Dense, Attention, Concatenate, Lambda 3 | from tensorflow.keras.models import Model 4 | from tensorflow.keras.utils import plot_model 5 | 6 | # Define text and image inputs 7 | text_input = tf.keras.Input(shape=(100, 300), name='text_input') # 100 tokens, 300-d embeddings 8 | image_input = tf.keras.Input(shape=(49, 512), name='image_input') # 7x7 image patches, 512-d features 9 | 10 | # Text attention mechanism 11 | text_query = Dense(64, activation='tanh')(text_input) 12 | text_key = Dense(64, activation='tanh')(text_input) 13 | text_value = Dense(64, activation='tanh')(text_input) 14 | text_attention = Attention()([text_query, text_value, text_key]) 15 | 16 | # Reduce mean for text attention 17 | text_representation = Lambda(lambda x: tf.reduce_mean(x, axis=1))(text_attention) 18 | 19 | # Image attention mechanism 20 | image_query = Dense(64, activation='tanh')(image_input) 21 | image_key = Dense(64, activation='tanh')(image_input) 22 | image_value = Dense(64, activation='tanh')(image_input) 23 | image_attention = Attention()([image_query, image_value, image_key]) 24 | 25 | # Reduce mean for image attention 26 | image_representation = Lambda(lambda x: tf.reduce_mean(x, axis=1))(image_attention) 27 | 28 | # Concatenate text and image representations 29 | combined_representation = Concatenate()([text_representation, image_representation]) 30 | output = Dense(1, activation='sigmoid')(combined_representation) 31 | 32 | # Build and compile the model 33 | model = Model(inputs=[text_input, image_input], outputs=output) 34 | model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) 35 | 36 | # Print model summary 37 | model.summary() 38 | 39 | # Save the model plot as a PNG image 40 | plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True) 41 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/660_Robustness/Images/Adversarial Robustness Testing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/660_Robustness/Images/Adversarial Robustness Testing.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/660_Robustness/Images/Fairness-aware Explanation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/660_Robustness/Images/Fairness-aware Explanation.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/660_Robustness/Images/Robustness Testing for Explanations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Echoslayer/XAI_From_Classical_Models_to_LLMs/50f1a068648dabdadab5c2e0a4029e36577593e5/ch06_techniques_for_explainable_ai/660_Robustness/Images/Robustness Testing for Explanations.png -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/660_Robustness/adversarial_robustness_testing.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | # Load MNIST dataset 6 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() 7 | x_test = x_test.astype('float32') / 255.0 8 | x_test = np.expand_dims(x_test, axis=-1) 9 | 10 | # Define a simple CNN model 11 | model = tf.keras.Sequential([ 12 | tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)), 13 | tf.keras.layers.MaxPooling2D((2, 2)), 14 | tf.keras.layers.Flatten(), 15 | tf.keras.layers.Dense(10, activation='softmax') 16 | ]) 17 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy') 18 | 19 | # Train the model 20 | model.fit(x_train, y_train, epochs=5, batch_size=64) 21 | 22 | # Function to compute saliency map 23 | def compute_saliency_map(model, input_image, label): 24 | input_image = tf.convert_to_tensor(input_image[np.newaxis, ...]) 25 | with tf.GradientTape() as tape: 26 | tape.watch(input_image) 27 | predictions = model(input_image) 28 | loss = predictions[0, label] 29 | gradient = tape.gradient(loss, input_image).numpy()[0] 30 | saliency_map = np.max(np.abs(gradient), axis=-1) 31 | return saliency_map 32 | 33 | # Generate an adversarial example using FGSM (Fast Gradient Sign Method) 34 | def generate_adversarial_example(model, input_image, label, epsilon=0.1): 35 | input_image = tf.convert_to_tensor(input_image[np.newaxis, ...]) 36 | with tf.GradientTape() as tape: 37 | tape.watch(input_image) 38 | predictions = model(input_image) 39 | loss = predictions[0, label] 40 | gradient = tape.gradient(loss, input_image) 41 | perturbation = epsilon * tf.sign(gradient) 42 | adversarial_image = input_image + perturbation 43 | return adversarial_image.numpy()[0] 44 | 45 | # Original and adversarial saliency maps 46 | original_image = x_test[0] 47 | adversarial_image = generate_adversarial_example(model, original_image, y_test[0]) 48 | saliency_original = compute_saliency_map(model, original_image, y_test[0]) 49 | saliency_adversarial = compute_saliency_map(model, adversarial_image, y_test[0]) 50 | 51 | # Compute Adversarial Robustness Score (ARS) 52 | delta = adversarial_image - original_image 53 | ars = 1 - np.linalg.norm(saliency_original - saliency_adversarial) / np.linalg.norm(delta) 54 | print(f"Adversarial Robustness Score (ARS): {ars:.4f}") 55 | 56 | # Display the saliency maps 57 | plt.figure(figsize=(12, 5)) 58 | plt.subplot(1, 2, 1) 59 | plt.title("Original Saliency Map") 60 | plt.imshow(saliency_original, cmap='hot') 61 | plt.axis('off') 62 | 63 | plt.subplot(1, 2, 2) 64 | plt.title("Adversarial Saliency Map") 65 | plt.imshow(saliency_adversarial, cmap='hot') 66 | plt.axis('off') 67 | plt.show() -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/660_Robustness/explanation_consistency_score.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | # Load MNIST dataset 5 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() 6 | x_test = x_test.astype('float32') / 255.0 7 | x_test = np.expand_dims(x_test, axis=-1) 8 | 9 | # Define a simple CNN model 10 | model = tf.keras.Sequential([ 11 | tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)), 12 | tf.keras.layers.MaxPooling2D((2, 2)), 13 | tf.keras.layers.Flatten(), 14 | tf.keras.layers.Dense(10, activation='softmax') 15 | ]) 16 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy') 17 | 18 | # Train the model 19 | model.fit(x_train, y_train, epochs=5, batch_size=64) 20 | 21 | # Function to compute gradient-based explanations 22 | def compute_gradients(model, input_image, label): 23 | input_image = tf.convert_to_tensor(input_image[np.newaxis, ...]) 24 | with tf.GradientTape() as tape: 25 | tape.watch(input_image) 26 | predictions = model(input_image) 27 | loss = predictions[0, label] 28 | gradient = tape.gradient(loss, input_image).numpy()[0] 29 | return gradient 30 | 31 | # Consistency Analysis 32 | image1 = x_test[0] 33 | image2 = x_test[1] # A similar image 34 | grad1 = compute_gradients(model, image1, y_test[0]) 35 | grad2 = compute_gradients(model, image2, y_test[1]) 36 | ecs = np.dot(grad1.flatten(), grad2.flatten()) / (np.linalg.norm(grad1) * np.linalg.norm(grad2)) 37 | print(f"Explanation Consistency Score (ECS): {ecs:.4f}") 38 | 39 | # Stability Analysis 40 | perturbed_image = image1 + 0.1 * np.random.normal(size=image1.shape) 41 | grad_perturbed = compute_gradients(model, perturbed_image, y_test[0]) 42 | ess = 1 - np.linalg.norm(grad1 - grad_perturbed) / np.linalg.norm(image1 - perturbed_image) 43 | print(f"Explanation Stability Score (ESS): {ess:.4f}") -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/660_Robustness/fairness_aware_explanation.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | # Define a simple neural network model 5 | model = tf.keras.Sequential([ 6 | tf.keras.layers.Dense(16, activation='relu', input_shape=(10,)), 7 | tf.keras.layers.Dense(1, activation='sigmoid') 8 | ]) 9 | model.compile(optimizer='adam', loss='binary_crossentropy') 10 | 11 | # Generate synthetic data 12 | np.random.seed(0) 13 | X = np.random.rand(1000, 10) 14 | y = (X[:, 0] + X[:, 1] > 1).astype(int) # Binary target 15 | sensitive_attribute = np.random.choice([0, 1], size=(1000,)) # Gender (0 = male, 1 = female) 16 | 17 | # Train the model 18 | model.fit(X, y, epochs=10, batch_size=32) 19 | 20 | # Function to compute fairness-aware attributions 21 | def compute_attributions(model, X, sensitive_attr): 22 | gradients_male = [] 23 | gradients_female = [] 24 | 25 | for i, x in enumerate(X): 26 | # Convert input to tf.Tensor 27 | x_tensor = tf.convert_to_tensor(x, dtype=tf.float32) 28 | 29 | with tf.GradientTape() as tape: 30 | tape.watch(x_tensor) 31 | prediction = model(tf.expand_dims(x_tensor, axis=0)) 32 | gradient = tape.gradient(prediction, x_tensor).numpy() 33 | 34 | if sensitive_attr[i] == 0: # Male 35 | gradients_male.append(gradient) 36 | else: # Female 37 | gradients_female.append(gradient) 38 | 39 | avg_grad_male = np.mean(gradients_male, axis=0) 40 | avg_grad_female = np.mean(gradients_female, axis=0) 41 | attribution_disparity = np.abs(avg_grad_male - avg_grad_female) 42 | 43 | return avg_grad_male, avg_grad_female, attribution_disparity 44 | 45 | # Compute attributions and disparity 46 | avg_grad_male, avg_grad_female, attribution_disparity = compute_attributions(model, X, sensitive_attribute) 47 | 48 | print("Average gradient attributions (male):", avg_grad_male) 49 | print("Average gradient attributions (female):", avg_grad_female) 50 | print("Attribution disparity:", attribution_disparity) 51 | 52 | import matplotlib.pyplot as plt 53 | 54 | # Visualization of attribution disparity 55 | plt.bar(range(10), attribution_disparity) 56 | plt.xlabel('Feature Index') 57 | plt.ylabel('Attribution Disparity') 58 | plt.title('Feature Attribution Disparity between Male and Female') 59 | plt.show() 60 | -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/660_Robustness/invariant_explanation_testing.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | # Load MNIST dataset 5 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() 6 | x_test = x_test.astype('float32') / 255.0 7 | x_test = np.expand_dims(x_test, axis=-1) 8 | 9 | # Define a simple CNN model 10 | model = tf.keras.Sequential([ 11 | tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)), 12 | tf.keras.layers.MaxPooling2D((2, 2)), 13 | tf.keras.layers.Flatten(), 14 | tf.keras.layers.Dense(10, activation='softmax') 15 | ]) 16 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy') 17 | 18 | # Train the model 19 | model.fit(x_train, y_train, epochs=5, batch_size=64) 20 | 21 | # Function to compute gradient-based explanations 22 | def compute_gradients(model, input_image, label): 23 | input_image = tf.convert_to_tensor(input_image[np.newaxis, ...]) 24 | with tf.GradientTape() as tape: 25 | tape.watch(input_image) 26 | predictions = model(input_image) 27 | loss = predictions[0, label] 28 | gradient = tape.gradient(loss, input_image).numpy()[0] 29 | return gradient 30 | 31 | # Split test data into two environments: even and odd digits 32 | even_digits = x_test[y_test % 2 == 0] 33 | odd_digits = x_test[y_test % 2 == 1] 34 | 35 | # Compute gradient explanations for both environments 36 | grad_even = compute_gradients(model, even_digits[0], y_test[0]) 37 | grad_odd = compute_gradients(model, odd_digits[0], y_test[1]) 38 | 39 | # Calculate Explanation Invariance Score (EIS) 40 | eis = 1 - np.linalg.norm(grad_even - grad_odd) / (np.linalg.norm(grad_even) + np.linalg.norm(grad_odd)) 41 | print(f"Explanation Invariance Score (EIS): {eis:.4f}") -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/660_Robustness/invariant_testing_llms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import dowhy 4 | from dowhy import CausalModel 5 | from sklearn.tree import DecisionTreeClassifier 6 | from sklearn.model_selection import train_test_split 7 | 8 | # Generate synthetic data 9 | np.random.seed(0) 10 | data_size = 1000 11 | X = np.random.rand(data_size, 3) 12 | gender = np.random.choice([0, 1], size=data_size) # Sensitive attribute (0 = male, 1 = female) 13 | income = X[:, 0] + 0.5 * gender + np.random.normal(size=data_size) # Outcome influenced by gender 14 | data = pd.DataFrame({'income': income, 'gender': gender, 'feature1': X[:, 1], 'feature2': X[:, 2]}) 15 | 16 | # Split data into training and testing sets 17 | train_data, test_data = train_test_split(data, test_size=0.2, random_state=0) 18 | 19 | # Train a decision tree model 20 | model = DecisionTreeClassifier() 21 | model.fit(train_data[['gender', 'feature1', 'feature2']], (train_data['income'] > 0.5).astype(int)) 22 | 23 | # Define causal model using DoWhy 24 | causal_model = CausalModel( 25 | data=train_data, 26 | treatment='gender', 27 | outcome='income', 28 | common_causes=['feature1', 'feature2'] 29 | ) 30 | 31 | # Identify and estimate the Average Causal Effect (ACE) 32 | identified_estimand = causal_model.identify_effect() 33 | estimate = causal_model.estimate_effect(identified_estimand, method_name="backdoor.linear_regression") 34 | print(f"Estimated Average Causal Effect (ACE): {estimate.value:.4f}") -------------------------------------------------------------------------------- /ch06_techniques_for_explainable_ai/660_Robustness/robustness_testing_explanations.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | # Load MNIST dataset 6 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() 7 | x_test = x_test.astype('float32') / 255.0 8 | x_test = np.expand_dims(x_test, axis=-1) 9 | 10 | # Define a simple CNN model 11 | model = tf.keras.Sequential([ 12 | tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)), 13 | tf.keras.layers.MaxPooling2D((2, 2)), 14 | tf.keras.layers.Flatten(), 15 | tf.keras.layers.Dense(10, activation='softmax') 16 | ]) 17 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) 18 | 19 | # Train the model 20 | model.fit(x_train, y_train, epochs=5, batch_size=64) 21 | 22 | # Function to compute saliency map 23 | def compute_saliency_map(model, input_image, label): 24 | input_image = tf.convert_to_tensor(input_image[np.newaxis, ...]) 25 | with tf.GradientTape() as tape: 26 | tape.watch(input_image) 27 | predictions = model(input_image) 28 | loss = predictions[0, label] 29 | gradient = tape.gradient(loss, input_image) 30 | saliency_map = tf.reduce_max(tf.abs(gradient), axis=-1).numpy()[0] 31 | return saliency_map 32 | 33 | # Test robustness of the saliency map 34 | original_image = x_test[0] 35 | perturbed_image = original_image + 0.1 * np.random.normal(size=original_image.shape) 36 | 37 | # Compute saliency maps 38 | saliency_original = compute_saliency_map(model, original_image, y_test[0]) 39 | saliency_perturbed = compute_saliency_map(model, perturbed_image, y_test[0]) 40 | 41 | # Compute robustness score 42 | robustness_score = 1 - np.linalg.norm(saliency_original - saliency_perturbed) / np.linalg.norm(original_image - perturbed_image) 43 | print(f"Robustness Score: {robustness_score:.4f}") 44 | 45 | # Display the saliency maps 46 | plt.figure(figsize=(10, 5)) 47 | plt.subplot(1, 2, 1) 48 | plt.title("Original Saliency Map") 49 | plt.imshow(saliency_original, cmap='hot') 50 | plt.axis('off') 51 | 52 | plt.subplot(1, 2, 2) 53 | plt.title("Perturbed Saliency Map") 54 | plt.imshow(saliency_perturbed, cmap='hot') 55 | plt.axis('off') 56 | plt.show() -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | shap==0.46.0 2 | matplotlib==3.9.2 3 | tensorflow==2.18.0 4 | seaborn==0.13.2 5 | transformers==4.46.2 6 | # tf-keras==2.18.0 7 | pygam==0.9.1 8 | tensorflow-probability==0.25.0 9 | xgboost==2.1.2 10 | lime==0.2.0.1 11 | captum==0.7.0 12 | torch==2.5.1 13 | torchvision==0.20.1 14 | opencv-python==4.10 15 | ipython==8.18 16 | fastdtw==0.3.4 17 | statsmodels==0.14.4 18 | pytest==8.3.3 19 | causal-learn==0.1.3.8 20 | causality==0.0.10 21 | torch-geometric==2.6.1 22 | dowhy==0.11.1 --------------------------------------------------------------------------------