├── concept_fig.png ├── images ├── conceptFig.jpg ├── top_genes_tcr.png └── top_pathways_img.png ├── utils.py ├── datasets.py ├── LICENSE ├── top_pathways.py ├── figures ├── make_fig2.py └── supplementary_figures │ ├── mcfarland_cond_top_pathways.py │ ├── .ipynb_checkpoints │ ├── mcfarland_cond_top_pathways-checkpoint.py │ ├── drop_g-checkpoint.ipynb │ └── g_enrichments-checkpoint.ipynb │ ├── drop_g.ipynb │ └── g_enrichments.ipynb ├── README.md ├── benchmark_intercode.py ├── summary.py ├── get_top_pathways.py ├── pathexplainer.py ├── standard_VAE_impute_benchmark.py └── benchmark_pmvae.py /concept_fig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suinleelab/PAUSE/HEAD/concept_fig.png -------------------------------------------------------------------------------- /images/conceptFig.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suinleelab/PAUSE/HEAD/images/conceptFig.jpg -------------------------------------------------------------------------------- /images/top_genes_tcr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suinleelab/PAUSE/HEAD/images/top_genes_tcr.png -------------------------------------------------------------------------------- /images/top_pathways_img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suinleelab/PAUSE/HEAD/images/top_pathways_img.png -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def parse_gmt(path, symbols=None, min_genes=10): 4 | lut = dict() 5 | for line in open(path, 'r'): 6 | key, _, *genes = line.strip().split() 7 | if symbols is not None: 8 | genes = symbols.intersection(genes).tolist() 9 | if len(genes) < min_genes: 10 | continue 11 | lut[key] = genes 12 | 13 | return lut 14 | 15 | def load_annotations(gmt, genes, min_genes=10): 16 | genesets = parse_gmt(gmt, genes, min_genes) 17 | annotations = pd.DataFrame(False, index=genes, columns=genesets.keys()) 18 | for key, genes in genesets.items(): 19 | annotations.loc[genes, key] = True 20 | 21 | return annotations -------------------------------------------------------------------------------- /datasets.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import Dataset, DataLoader 2 | import torch 3 | import numpy as np 4 | import pandas as pd 5 | 6 | class RNASeqData(Dataset): 7 | 8 | def __init__(self, X, c=None, y=None, transform=None): 9 | self.X = X 10 | self.y = y 11 | self.c = c 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return self.X.shape[0] 16 | 17 | def __getitem__(self, index): 18 | sample = self.X[index,:] 19 | 20 | if self.transform is not None: 21 | sample = self.transform(sample) 22 | 23 | if self.y is not None and self.c is not None: 24 | return sample, self.y[index], self.c[index] 25 | if self.y is None and self.c is not None: 26 | return sample, self.c[index] 27 | else: 28 | return sample -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Lee Lab @ UW Allen School 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /top_pathways.py: -------------------------------------------------------------------------------- 1 | import anndata 2 | import numpy as np 3 | import pandas as pd 4 | import torch 5 | import torch.nn.functional as F 6 | import os 7 | 8 | from utils import load_annotations 9 | from sklearn.model_selection import train_test_split 10 | 11 | from torch.utils.data import Dataset, DataLoader 12 | from datasets import RNASeqData 13 | 14 | from pathexplainer import PathExplainerTorch 15 | from sklearn.linear_model import LogisticRegression 16 | 17 | from models import pmVAEModel 18 | import os 19 | os.environ["CUDA_VISIBLE_DEVICES"]="1" 20 | 21 | 22 | 23 | def main(): 24 | 25 | # load data 26 | 27 | # kang dataset 28 | data = anndata.read('data/kang_count.h5ad') 29 | 30 | # haber dataset 31 | #data = anndata.read('/projects/leelab/data/single-cell/haber_2017/preprocessed/adata_top_2000_genes.h5ad') 32 | #data = data[data.obs['condition'] != 'Salmonella'].copy() 33 | 34 | symbols = data.var_names 35 | 36 | number_of_replicates = 10 37 | 38 | first_run = True 39 | 40 | # for 10 experimental replicates 41 | for rand_seed in range(number_of_replicates): 42 | 43 | print("replicate number " + str(rand_seed)) 44 | 45 | # split data 46 | 47 | train_data, test_data = train_test_split(data, 48 | test_size=0.25, 49 | shuffle=True, 50 | random_state=rand_seed) 51 | tr_data, val_data = train_test_split(train_data, 52 | test_size=0.25, 53 | shuffle=True, 54 | random_state=rand_seed) 55 | 56 | tr_ds = RNASeqData(np.array(tr_data.X)) 57 | val_ds = RNASeqData(np.array(val_data.X)) 58 | 59 | # load annotations 60 | membership_mask = load_annotations('data/c2.cp.reactome.v7.4.symbols.gmt', 61 | symbols, 62 | min_genes=13 63 | ).astype(bool).T 64 | 65 | ## 66 | ## train base model 67 | ## 68 | 69 | # initialize base model 70 | basePMVAE = pmVAEModel(membership_mask.values, 71 | [12], 72 | 1, 73 | beta=1e-05, 74 | terms=membership_mask.index, 75 | add_auxiliary_module=True 76 | ) 77 | 78 | 79 | if first_run: # first run 80 | top_ig = pd.DataFrame(index=basePMVAE.latent_space_names()) 81 | top_lr = pd.DataFrame(index=basePMVAE.latent_space_names()) 82 | first_run = False 83 | 84 | 85 | # train 86 | basePMVAE.train(tr_ds, val_ds, 87 | checkpoint_path='top_kang.pkl', 88 | max_epochs=100) 89 | 90 | basePMVAE.set_gpu(False) 91 | 92 | 93 | # IG pathway rankings 94 | print("Calc IG score") 95 | def model_loss_wrapper(z): 96 | module_outputs = basePMVAE.model.decoder_net(z) 97 | global_recon = basePMVAE.model.merge(module_outputs) 98 | return F.mse_loss(global_recon, ground_truth, reduction='none').mean(1).view(-1,1) 99 | 100 | ground_truth = torch.tensor(np.array(val_data.X)).float() 101 | outs = basePMVAE.model(ground_truth) 102 | 103 | input_data = outs.z 104 | baseline_data = torch.zeros(outs.z.shape[1]) 105 | baseline_data.requires_grad = True 106 | 107 | explainer = PathExplainerTorch(model_loss_wrapper) 108 | attributions = explainer.attributions(input_data, 109 | baseline=baseline_data, 110 | num_samples=200, 111 | use_expectation=False) 112 | 113 | np_attribs = attributions.detach().numpy() 114 | top_ig[rand_seed] = np_attribs.mean(0) 115 | 116 | # so far! 117 | top_ig.to_csv('kang_ig.csv', index=False) 118 | 119 | 120 | 121 | 122 | # LR pathway rankings 123 | print("Calc LR score") 124 | 125 | y_tr = tr_data.obs['condition'] 126 | y_val = val_data.obs['condition'] 127 | 128 | train_labels = (y_tr == 'stimulated').values 129 | val_labels = (y_val == 'stimulated').values 130 | 131 | 132 | train_embedding = basePMVAE.model(torch.tensor(tr_data.X).float()).z.detach().numpy() 133 | val_embedding = basePMVAE.model(torch.tensor(val_data.X).float()).z.detach().numpy() 134 | 135 | lr_scores = [] 136 | for pathway in range(train_embedding.shape[1]): 137 | clf = LogisticRegression(random_state=0).fit(train_embedding[:,pathway].reshape(-1,1), train_labels) 138 | lr_scores.append(clf.score(val_embedding[:,pathway].reshape(-1,1), val_labels)) 139 | 140 | 141 | top_lr[rand_seed] = lr_scores 142 | top_lr[rand_seed] = -1.*top_lr[rand_seed] 143 | 144 | 145 | # so far! 146 | top_lr.to_csv('kang_lr.csv', index=False) 147 | 148 | 149 | 150 | top_ig.to_csv('kang_ig.csv', index=False) 151 | top_lr.to_csv('kang_lr.csv', index=False) 152 | 153 | 154 | 155 | if __name__ == '__main__': 156 | main() -------------------------------------------------------------------------------- /figures/make_fig2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from scipy import stats 4 | import matplotlib.pyplot as plt 5 | import seaborn as sb 6 | from statannotations.Annotator import Annotator 7 | 8 | DATASETS = ['kang', 'haber', 'datlinger'] 9 | 10 | METHODS = ['impute', 'retrain'] 11 | PAL = 'colorblind' 12 | 13 | LABEL_SIZE = 18 14 | TITLE_SIZE = 18 15 | AXES_SIZE = 18 16 | LEG_SIZE = 14 17 | 18 | 19 | def get_arrays(dataset, method): 20 | ig_results = np.load('complete_results/'+ dataset + '_' + method + '_ig.npy') 21 | logvar_results = np.load('complete_results/'+ dataset + '_' + method + '_logvar.npy') 22 | lr_results = np.load('complete_results/'+ dataset + '_' + method + '_lr.npy') 23 | kld_results = np.load('complete_results/'+ dataset + '_' + method + '_kld.npy') 24 | rand_results = np.load('complete_results/'+ dataset + '_' + method + '_rand.npy') 25 | 26 | return ig_results, logvar_results, lr_results, kld_results, rand_results 27 | 28 | # load results for single dataset and benchmark method 29 | def load_res(dataset, method): 30 | 31 | ig_results, logvar_results, lr_results, kld_results, rand_results = get_arrays(dataset, method) 32 | 33 | # get AUCs 34 | ig_aucs = np.trapz(ig_results, axis=1) 35 | lr_aucs = np.trapz(lr_results, axis=1) 36 | kld_aucs = np.trapz(kld_results, axis=1) 37 | rand_aucs = np.trapz(rand_results, axis=1) 38 | logvar_aucs = np.trapz(logvar_results, axis=1) 39 | 40 | auc_stack = np.concatenate((ig_aucs, lr_aucs, kld_aucs, rand_aucs, logvar_aucs)) 41 | 42 | num_trials = 10 43 | #rankings_methods = np.concatenate((['Loss Attribution']*num_trials, ['LR Score']*num_trials, ['KL Divergence']*num_trials, ['Random']*num_trials, ['LS Variance']*num_trials)) 44 | 45 | rankings_methods = np.concatenate((['PAUSE']*num_trials, ['LR']*num_trials, ['KLD']*num_trials, ['Random']*num_trials, ['LSV']*num_trials)) 46 | 47 | 48 | 49 | results = pd.DataFrame(index=list(range(0,50))) 50 | results['methods'] = rankings_methods 51 | results['aucs'] = auc_stack 52 | 53 | return results 54 | 55 | 56 | def get_subplot(dataset, method): 57 | 58 | plt.rc('axes', titlesize=TITLE_SIZE) # fontsize title 59 | plt.rc('axes', labelsize=AXES_SIZE) # fontsize of the x and y axis labels 60 | plt.rc('xtick', labelsize=LABEL_SIZE) # fontsize of the (method) tick labels 61 | 62 | results = load_res(dataset, method) 63 | 64 | plt.style.use('seaborn-colorblind') 65 | 66 | fig, ax = plt.subplots(figsize=(6,4)) 67 | 68 | bp = sb.boxplot(ax=ax, 69 | data=results,x='methods',y='aucs',dodge=True, 70 | color='white', fliersize=0, 71 | ) 72 | 73 | sb.stripplot(ax=ax, 74 | data=results,x='methods',y='aucs', 75 | dodge=True, 76 | s=4) 77 | 78 | """ 79 | pairs=[("Loss Attribution", "LR Score")] 80 | annotator = Annotator(ax, pairs, data=results, x='methods',y='aucs') 81 | annotator.set_custom_annotations(['**']) 82 | annotator.annotate() 83 | """ 84 | 85 | # for ** position 86 | top = [results[results['methods'] == "LR"].max()['aucs'], 87 | results[results['methods'] == "KLD"].max()['aucs'], 88 | results[results['methods'] == "Random"].max()['aucs'], 89 | results[results['methods'] == "LSV"].max()['aucs']] 90 | 91 | 92 | for i in range(4): 93 | plt.text(x=bp.get_xticks()[i+1] - 0.07, y=top[i] + 0.001, s='**', fontdict={'size':12, 'color':'black'}) 94 | 95 | 96 | ax.set_ylabel('AUC') 97 | 98 | if method == "retrain": # not for bottom row 99 | ax.set_xlabel('Pathway Ranking Method') 100 | else: 101 | ax.set_xlabel('') 102 | 103 | #plt.title(get_title(dataset) + ' ' + method.capitalize() + ' Benchmark') 104 | plt.title(method.capitalize()) 105 | 106 | 107 | plt.savefig('figs/dataset=%s-method=%s.pdf' % (dataset, method), bbox_inches='tight') 108 | 109 | plt.show() 110 | 111 | 112 | 113 | def get_title(dataset): 114 | dataset_title = '' 115 | if dataset == 'kang': 116 | dataset_title = 'PBMC' 117 | if dataset == 'haber': 118 | dataset_title = 'Intestinal' 119 | if dataset == 'datlinger': 120 | dataset_title = 'Jurkat' 121 | if dataset == 'grubman': 122 | dataset_title = 'Entorhinal' 123 | return dataset_title 124 | 125 | 126 | 127 | # get single line graph 128 | def get_lines(dataset, method): 129 | ig_results, logvar_results, lr_results, kld_results, rand_results = get_arrays(dataset, method) 130 | 131 | plt.style.use('seaborn-colorblind') 132 | 133 | fig, ax = plt.subplots(figsize=(6,4)) 134 | 135 | sb.lineplot(data=ig_results.mean(0), label='PAUSE') 136 | sb.lineplot(data=lr_results.mean(0), label='LR') 137 | sb.lineplot(data=kld_results.mean(0), label='KLD') 138 | sb.lineplot(data=rand_results.mean(0), label='Random') 139 | sb.lineplot(data=logvar_results.mean(0), label='LSV') 140 | 141 | 142 | if method == 'impute': 143 | ax.set_xlabel('Number of Top Pathways Ablated') 144 | 145 | if method == 'retrain': 146 | ax.set_xlabel('Number of Top Pathways Included') 147 | 148 | plt.legend(fontsize=LEG_SIZE) 149 | 150 | ax.set_ylabel('Reconstruction Error') 151 | 152 | #plt.title(get_title(dataset) + ' ' + method.capitalize() + ' Benchmark') 153 | plt.title(method.capitalize()) 154 | 155 | 156 | plt.savefig('figs/lines-dataset=%s-method=%s.pdf' % (dataset, method),bbox_inches='tight') 157 | 158 | plt.show() 159 | 160 | 161 | if __name__ == '__main__': 162 | 163 | for dataset in DATASETS: 164 | for method in METHODS: 165 | get_subplot(dataset, method) 166 | 167 | get_lines('haber', 'impute') 168 | get_lines('haber', 'retrain') 169 | 170 | -------------------------------------------------------------------------------- /figures/supplementary_figures/mcfarland_cond_top_pathways.py: -------------------------------------------------------------------------------- 1 | # get Mcfarland top pathways, condition on cell lines 2 | 3 | import anndata 4 | import numpy as np 5 | import pandas as pd 6 | import torch 7 | import torch.nn.functional as F 8 | import os 9 | 10 | from utils import load_annotations 11 | from sklearn.model_selection import train_test_split 12 | 13 | from torch.utils.data import Dataset, DataLoader 14 | from datasets import RNASeqData 15 | 16 | from pathexplainer import PathExplainerTorch 17 | from sklearn.linear_model import LogisticRegression 18 | from sklearn.preprocessing import OneHotEncoder 19 | import argparse 20 | 21 | 22 | from models import pmVAEModel 23 | import os 24 | import time 25 | 26 | save_path = 'new_for_revision/new_res/' 27 | 28 | 29 | def main(): 30 | 31 | ig_times = [] 32 | lr_times = [] 33 | train_times = [] 34 | 35 | parser = argparse.ArgumentParser() 36 | parser.add_argument('dataset', action="store", default='kang') 37 | parser.add_argument('which_gpu', action="store", default='0') 38 | 39 | args = parser.parse_args() 40 | 41 | os.environ["CUDA_VISIBLE_DEVICES"]=args.which_gpu 42 | dataset =args.dataset 43 | 44 | # load data 45 | 46 | # load mcfarland data 47 | data = anndata.read('/projects/leelab/data/single-cell/mcfarland_2020_Idasanutlin/preprocessed/adata_top_2000_genes_tc.h5ad') 48 | 49 | data = data[data.obs['condition'] == 'Idasanutlin'].copy() 50 | symbols = data.var_names 51 | 52 | conditions = np.array(data.obs['cell_line']).reshape(-1,1) 53 | enc = OneHotEncoder() 54 | enc.fit(conditions) 55 | pre_processed_conditions = enc.transform(conditions).toarray() 56 | 57 | number_of_replicates = 10 58 | 59 | first_run = True 60 | 61 | # for 10 experimental replicates 62 | for rand_seed in range(number_of_replicates): 63 | 64 | print("replicate number " + str(rand_seed)) 65 | 66 | # split data 67 | 68 | train_data, test_data, train_c, test_c = train_test_split(data,pre_processed_conditions, 69 | test_size=0.25, 70 | shuffle=True, 71 | random_state=rand_seed) 72 | tr_data, val_data, tr_c, val_c = train_test_split(train_data,train_c, 73 | test_size=0.25, 74 | shuffle=True, 75 | random_state=rand_seed) 76 | 77 | tr_ds = RNASeqData(np.array(tr_data.X), c=tr_c) 78 | val_ds = RNASeqData(np.array(val_data.X), c=val_c) 79 | 80 | # load annotations 81 | membership_mask = load_annotations('data/c2.cp.reactome.v7.4.symbols.gmt', 82 | symbols, 83 | min_genes=13 84 | ).astype(bool).T 85 | 86 | ## 87 | ## train model 88 | ## 89 | 90 | # initialize base model 91 | basePMVAE = pmVAEModel(membership_mask.values, 92 | [12], 93 | 1, 94 | cdim = tr_c.shape[1], 95 | beta=1e-05, 96 | terms=membership_mask.index, 97 | add_auxiliary_module=True 98 | ) 99 | 100 | 101 | if first_run: # first run 102 | top_ig = pd.DataFrame(index=basePMVAE.latent_space_names()) 103 | top_lr = pd.DataFrame(index=basePMVAE.latent_space_names()) 104 | first_run = False 105 | 106 | 107 | # train 108 | 109 | start_train = time.time() 110 | basePMVAE.train(tr_ds, val_ds, 111 | checkpoint_path='saved_models/seed_' + str(rand_seed) + 'cell_lines_cond_top_' + dataset + '.pkl', 112 | max_epochs=100) 113 | 114 | end_train = time.time() 115 | train_times.append(end_train - start_train) 116 | 117 | 118 | basePMVAE.set_gpu(False) 119 | 120 | 121 | # IG pathway rankings 122 | print("Calc IG score") 123 | 124 | start_ig = time.time() 125 | 126 | def model_loss_wrapper(z): 127 | latent_input = torch.cat([z, c_full], 1) 128 | module_outputs = basePMVAE.model.decoder_net(latent_input) 129 | global_recon = basePMVAE.model.merge(module_outputs) 130 | return F.mse_loss(global_recon, ground_truth, reduction='none').mean(1).view(-1,1) 131 | 132 | ground_truth = torch.tensor(data.X).float() 133 | c_full = torch.tensor(pre_processed_conditions).float() 134 | outs = basePMVAE.model(ground_truth,c_full) 135 | 136 | input_data = outs.z 137 | baseline_data = torch.zeros(outs.z.shape[1]) 138 | baseline_data.requires_grad = True 139 | 140 | explainer = PathExplainerTorch(model_loss_wrapper) 141 | attributions = explainer.attributions(input_data, 142 | baseline=baseline_data, 143 | num_samples=200, 144 | use_expectation=False) 145 | 146 | np_attribs = attributions.detach().numpy() 147 | top_ig[rand_seed] = np_attribs.mean(0) 148 | 149 | end_ig = time.time() 150 | ig_times.append(end_ig - start_ig) 151 | 152 | 153 | # so far! 154 | top_ig.to_csv(save_path + dataset + '_cell_lines_cond_ig.csv', index=False) 155 | 156 | 157 | # LR pathway rankings 158 | print("Calc LR score") 159 | start_lr = time.time() 160 | 161 | if args.dataset == 'mcfarland': 162 | 163 | y_tr = tr_data.obs['TP53_mutation_status'] 164 | y_val = val_data.obs['TP53_mutation_status'] 165 | 166 | train_labels = (y_tr == 'Wild Type').values 167 | val_labels = (y_val == 'Wild Type').values 168 | 169 | 170 | train_embedding = basePMVAE.model(torch.tensor(tr_data.X).float(), torch.tensor(tr_c).float()).z.detach().numpy() 171 | val_embedding = basePMVAE.model(torch.tensor(val_data.X).float(), torch.tensor(val_c).float()).z.detach().numpy() 172 | 173 | 174 | lr_scores = [] 175 | for pathway in range(train_embedding.shape[1]): 176 | clf = LogisticRegression(random_state=0).fit(train_embedding[:,pathway].reshape(-1,1), train_labels) 177 | lr_scores.append(clf.score(val_embedding[:,pathway].reshape(-1,1), val_labels)) 178 | 179 | 180 | top_lr[rand_seed] = lr_scores 181 | top_lr[rand_seed] = -1.*top_lr[rand_seed] 182 | 183 | end_lr = time.time() 184 | lr_times.append(end_lr - start_lr) 185 | 186 | 187 | # so far! 188 | top_lr.to_csv(save_path + dataset + '_cell_lines_cond_lr.csv', index=False) 189 | 190 | times = pd.DataFrame() 191 | times['ig_times'] = ig_times 192 | times['lr_times'] = lr_times 193 | times['train_times'] = train_times 194 | 195 | times.to_csv(save_path + args.dataset + '_cell_lines_cond_times.csv') 196 | 197 | 198 | if __name__ == '__main__': 199 | main() -------------------------------------------------------------------------------- /figures/supplementary_figures/.ipynb_checkpoints/mcfarland_cond_top_pathways-checkpoint.py: -------------------------------------------------------------------------------- 1 | # get Mcfarland top pathways, condition on cell lines 2 | 3 | import anndata 4 | import numpy as np 5 | import pandas as pd 6 | import torch 7 | import torch.nn.functional as F 8 | import os 9 | 10 | from utils import load_annotations 11 | from sklearn.model_selection import train_test_split 12 | 13 | from torch.utils.data import Dataset, DataLoader 14 | from datasets import RNASeqData 15 | 16 | from pathexplainer import PathExplainerTorch 17 | from sklearn.linear_model import LogisticRegression 18 | from sklearn.preprocessing import OneHotEncoder 19 | import argparse 20 | 21 | 22 | from models import pmVAEModel 23 | import os 24 | import time 25 | 26 | save_path = 'new_for_revision/new_res/' 27 | 28 | 29 | def main(): 30 | 31 | ig_times = [] 32 | lr_times = [] 33 | train_times = [] 34 | 35 | parser = argparse.ArgumentParser() 36 | parser.add_argument('dataset', action="store", default='kang') 37 | parser.add_argument('which_gpu', action="store", default='0') 38 | 39 | args = parser.parse_args() 40 | 41 | os.environ["CUDA_VISIBLE_DEVICES"]=args.which_gpu 42 | dataset =args.dataset 43 | 44 | # load data 45 | 46 | # load mcfarland data 47 | data = anndata.read('/projects/leelab/data/single-cell/mcfarland_2020_Idasanutlin/preprocessed/adata_top_2000_genes_tc.h5ad') 48 | 49 | data = data[data.obs['condition'] == 'Idasanutlin'].copy() 50 | symbols = data.var_names 51 | 52 | conditions = np.array(data.obs['cell_line']).reshape(-1,1) 53 | enc = OneHotEncoder() 54 | enc.fit(conditions) 55 | pre_processed_conditions = enc.transform(conditions).toarray() 56 | 57 | number_of_replicates = 10 58 | 59 | first_run = True 60 | 61 | # for 10 experimental replicates 62 | for rand_seed in range(number_of_replicates): 63 | 64 | print("replicate number " + str(rand_seed)) 65 | 66 | # split data 67 | 68 | train_data, test_data, train_c, test_c = train_test_split(data,pre_processed_conditions, 69 | test_size=0.25, 70 | shuffle=True, 71 | random_state=rand_seed) 72 | tr_data, val_data, tr_c, val_c = train_test_split(train_data,train_c, 73 | test_size=0.25, 74 | shuffle=True, 75 | random_state=rand_seed) 76 | 77 | tr_ds = RNASeqData(np.array(tr_data.X), c=tr_c) 78 | val_ds = RNASeqData(np.array(val_data.X), c=val_c) 79 | 80 | # load annotations 81 | membership_mask = load_annotations('data/c2.cp.reactome.v7.4.symbols.gmt', 82 | symbols, 83 | min_genes=13 84 | ).astype(bool).T 85 | 86 | ## 87 | ## train model 88 | ## 89 | 90 | # initialize base model 91 | basePMVAE = pmVAEModel(membership_mask.values, 92 | [12], 93 | 1, 94 | cdim = tr_c.shape[1], 95 | beta=1e-05, 96 | terms=membership_mask.index, 97 | add_auxiliary_module=True 98 | ) 99 | 100 | 101 | if first_run: # first run 102 | top_ig = pd.DataFrame(index=basePMVAE.latent_space_names()) 103 | top_lr = pd.DataFrame(index=basePMVAE.latent_space_names()) 104 | first_run = False 105 | 106 | 107 | # train 108 | 109 | start_train = time.time() 110 | basePMVAE.train(tr_ds, val_ds, 111 | checkpoint_path='saved_models/seed_' + str(rand_seed) + 'cell_lines_cond_top_' + dataset + '.pkl', 112 | max_epochs=100) 113 | 114 | end_train = time.time() 115 | train_times.append(end_train - start_train) 116 | 117 | 118 | basePMVAE.set_gpu(False) 119 | 120 | 121 | # IG pathway rankings 122 | print("Calc IG score") 123 | 124 | start_ig = time.time() 125 | 126 | def model_loss_wrapper(z): 127 | latent_input = torch.cat([z, c_full], 1) 128 | module_outputs = basePMVAE.model.decoder_net(latent_input) 129 | global_recon = basePMVAE.model.merge(module_outputs) 130 | return F.mse_loss(global_recon, ground_truth, reduction='none').mean(1).view(-1,1) 131 | 132 | ground_truth = torch.tensor(data.X).float() 133 | c_full = torch.tensor(pre_processed_conditions).float() 134 | outs = basePMVAE.model(ground_truth,c_full) 135 | 136 | input_data = outs.z 137 | baseline_data = torch.zeros(outs.z.shape[1]) 138 | baseline_data.requires_grad = True 139 | 140 | explainer = PathExplainerTorch(model_loss_wrapper) 141 | attributions = explainer.attributions(input_data, 142 | baseline=baseline_data, 143 | num_samples=200, 144 | use_expectation=False) 145 | 146 | np_attribs = attributions.detach().numpy() 147 | top_ig[rand_seed] = np_attribs.mean(0) 148 | 149 | end_ig = time.time() 150 | ig_times.append(end_ig - start_ig) 151 | 152 | 153 | # so far! 154 | top_ig.to_csv(save_path + dataset + '_cell_lines_cond_ig.csv', index=False) 155 | 156 | 157 | # LR pathway rankings 158 | print("Calc LR score") 159 | start_lr = time.time() 160 | 161 | if args.dataset == 'mcfarland': 162 | 163 | y_tr = tr_data.obs['TP53_mutation_status'] 164 | y_val = val_data.obs['TP53_mutation_status'] 165 | 166 | train_labels = (y_tr == 'Wild Type').values 167 | val_labels = (y_val == 'Wild Type').values 168 | 169 | 170 | train_embedding = basePMVAE.model(torch.tensor(tr_data.X).float(), torch.tensor(tr_c).float()).z.detach().numpy() 171 | val_embedding = basePMVAE.model(torch.tensor(val_data.X).float(), torch.tensor(val_c).float()).z.detach().numpy() 172 | 173 | 174 | lr_scores = [] 175 | for pathway in range(train_embedding.shape[1]): 176 | clf = LogisticRegression(random_state=0).fit(train_embedding[:,pathway].reshape(-1,1), train_labels) 177 | lr_scores.append(clf.score(val_embedding[:,pathway].reshape(-1,1), val_labels)) 178 | 179 | 180 | top_lr[rand_seed] = lr_scores 181 | top_lr[rand_seed] = -1.*top_lr[rand_seed] 182 | 183 | end_lr = time.time() 184 | lr_times.append(end_lr - start_lr) 185 | 186 | 187 | # so far! 188 | top_lr.to_csv(save_path + dataset + '_cell_lines_cond_lr.csv', index=False) 189 | 190 | times = pd.DataFrame() 191 | times['ig_times'] = ig_times 192 | times['lr_times'] = lr_times 193 | times['train_times'] = train_times 194 | 195 | times.to_csv(save_path + args.dataset + '_cell_lines_cond_times.csv') 196 | 197 | 198 | if __name__ == '__main__': 199 | main() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PAUSE 2 | 3 | ![Main Concept Fig](/images/conceptFig.jpg) 4 | 5 | Code for the paper "Principled feature attribution for unsupervised gene expression analysis" (PAUSE). 6 | For more information, see our preprint: https://www.biorxiv.org/content/10.1101/2022.05.03.490535v1. 7 | 8 | ## Examples 9 | 10 | ### Identify most important pathways from an interpretable autoencoder 11 | This first example demonstrates how the PAUSE framework can be used to identify the most important pathways for an interpretable autoencoder. 12 | 13 | ```python 14 | import anndata 15 | # and other import statements... 16 | 17 | ## load a single cell dataset 18 | data = anndata.read('data/kang_count.h5ad') 19 | 20 | ## load a pathway gene set file 21 | ## more examples can be found here (http://www.gsea-msigdb.org/gsea/msigdb/collections.jsp) 22 | data.varm['annotations'] = load_annotations( 23 | 'data/c2.cp.reactome.v7.4.symbols.gmt', 24 | data.var_names, 25 | min_genes=13 26 | ) 27 | # binary matrix mapping from genes to pathways 28 | membership_mask = data.varm['annotations'].astype(bool).T.values 29 | ``` 30 | 31 | After loading the RNA-seq dataset you want to analyze, you can then initialize and train a model on the dataset. In this case, we use our PyTorch implementation of the [pmVAE architecture](https://www.biorxiv.org/content/10.1101/2021.01.28.428664v1), which is a variational autoencoder composed of a set of subnetworks (pathway modules) that are factorized according to the gene sets defined above. In this model, each latent node in the bottleneck layer only contains information about the genes belonging to its corresponding pathway. 32 | 33 | ```python 34 | from models import pmVAEModel 35 | 36 | # initialize pmVAE model. 37 | # positional arguments are 1) the binary gene set membership matrix, 38 | # 2) a list containing the number of nodes in each hidden layer, and 39 | # 3) an integer indicating the number of nodes in each module's bottleneck. 40 | pmvae = pmVAEModel( 41 | membership_mask, 42 | [12], # This indicates that there will be one intermediate layer before the bottleneck with 12 nodes in each module. To have 2 intermediate layers of 6 nodes, you could write [6, 6] 43 | 4, # number of nodes in each module bottleneck 44 | terms=membership_mask.index, # a list of the names of the pathway modules 45 | add_auxiliary_module=True # whether or not to include a densely connected auxiliary module 46 | ) 47 | 48 | # train pmVAE model 49 | pmvae.train(train_dataset, # a PyTorch dataset object containing the training expression samples 50 | val_dataset, # a PyTorch dataset object containing the val expression samples 51 | max_epochs=200, # Maximum number of epochs to train 52 | lr=0.001, # learning rate of the adam optimizer used to train the model 53 | beta=1e-5, # weight multiplier of KL loss term 54 | batch_size=256, # samples per batch 55 | pathway_dropout=True, # whether or not to train with pathway dropout scheme as defined in pmVAE paper 56 | checkpoint_path='pmvae_checkpoint.pkl' # path of model checkpoint 57 | ) 58 | ``` 59 | 60 | Once the model is trained, we can use the [Path Explain software](https://github.com/suinleelab/path_explain) (also provided in this repository in the `pathexplainer.py` file) to *identify the top pathways* in the dataset by explaining the trained models reconstruction error with respect to the learned latent pathways. 61 | 62 | ```python 63 | from pathexplainer import PathExplainerTorch 64 | import torch 65 | import torch.nn.functional as F 66 | 67 | # define a wrapper function that outputs the reconstruction error of the model given the latent codes 68 | def model_loss_wrapper(z): 69 | module_outputs = pmvae.model.decoder_net(z) 70 | global_recon = pmvae.model.merge(module_outputs) 71 | return F.mse_loss(global_recon, ground_truth, reduction='none').mean(1).view(-1,1) 72 | 73 | # define a tensor to hold the original data, which gets used as an argument in the reconstruction error in the wrapper above 74 | ground_truth = torch.tensor(data.X).float() 75 | 76 | # get the latent codes to use as input to the model loss wrapper 77 | outs = pmvae.model(ground_truth) 78 | input_data = outs.z 79 | baseline_data = torch.zeros(outs.z.shape[1]) # define a baseline, in this case the zeros vector 80 | baseline_data.requires_grad = True 81 | 82 | # calculate the pathway attributions 83 | explainer = PathExplainerTorch(model_loss_wrapper) 84 | attributions = explainer.attributions(input_data, 85 | baseline=baseline_data, 86 | num_samples=200, # number of samples to use when calculating the path integral 87 | use_expectation=False) 88 | 89 | ``` 90 | 91 | Once you have calculated the pathway attributions, you can average them over all samples in the dataset to identify and plot the most important pathways. 92 | 93 | ```python 94 | # move attributions to numpy, make a df w/ index as latent space names 95 | np_attribs = attributions.detach().numpy() 96 | top_features = pd.DataFrame(index=pmvae.latent_space_names()) 97 | top_features['global_attribs'] = np_attribs.mean(0) # in this case, global attributions are the mean over the dataset 98 | 99 | # Loss explanation 100 | top_features.sort_values('global_attribs',ascending=True).iloc[:30,0].plot.bar() 101 | ``` 102 | 103 | ![Showing pathway attributions](/images/top_pathways_img.png) 104 | 105 | ### Identify most important genes contributing to a particular latent pathway 106 | This first example demonstrates how the PAUSE framework can be used to identify the most important pathways for an interpretable autoencoder. However, as you see above, these interpretable autoencoders often have multiple bottleneck nodes for each pathway, raising the question of what the difference between these bottleneck nodes is. Additionally, sometimes the most important pathways are the "uninterpretable" densely-connected auxiliary pathways. How can we identify the most important genes contributing to these latent pathways, and interpret their biological meaning? By using gene level attributions. This example uses another pmVAE model, as in the above example. This time, however, instead of getting attributions of the loss to the latent pathways, we can pick a latent pathway and explain it in terms of its input genes. 107 | 108 | ```python 109 | from summary import summary_plot 110 | 111 | # explain tcr in terms of genes 112 | def model_latent_wrapper(x): 113 | outs = pmvae.model(x) 114 | z = outs.mu 115 | return z[:,316].reshape(-1,1) # 316 is the latent node number corresponding to the pathway of interest here 116 | 117 | input_data = torch.tensor(data.X).float() 118 | input_data.requires_grad = True 119 | baseline_data = torch.zeros(data.X.shape[1]) 120 | baseline_data.requires_grad = True 121 | 122 | explainer = PathExplainerTorch(model_latent_wrapper) # this time, use explanation software with latent output wrapper 123 | attributions = explainer.attributions(input_data, 124 | baseline=baseline_data, 125 | num_samples=200, # again use 200 interpolation points to numerically approximate the path integral 126 | use_expectation=False) 127 | 128 | np_attribs = attributions.detach().numpy() 129 | top_features = pd.DataFrame(index=membership_mask.columns) 130 | top_features['global_attribs'] = np.abs(np_attribs).mean(0) # to find top genes, we take the average MAGNITUDE of attribs across all samples 131 | 132 | summary_plot(np_attribs, 133 | data.X, 134 | feature_names=membership_mask.columns, 135 | plot_top_k=10, 136 | standardize_features=False, 137 | scale_x_ind=False, 138 | scale_y_ind=False, 139 | figsize=(4, 4), 140 | dpi=300, 141 | cmap=coolwarm) 142 | ``` 143 | 144 | ![Showing genee attributions](/images/top_genes_tcr.png) 145 | 146 | ## Reproducing experiments and figures from paper 147 | 148 | For code to generate the models used, see "models.py". Pathway attributions and gene attributions are generated using code from "pathexplainer.py". Benchmarking pathways attributions against other methods for ranking pathway importance is done using the files "benchmark_pmvae.py", "benchmark_intercode.py", and "top_pathways.py". For code to generate the figures in the paper, see the folder `figures`. 149 | 150 | -------------------------------------------------------------------------------- /benchmark_intercode.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # benchmark_pmvae.py 3 | 4 | import anndata 5 | import numpy as np 6 | import pandas as pd 7 | import torch 8 | import torch.nn.functional as F 9 | import os 10 | 11 | from utils import load_annotations 12 | from sklearn.model_selection import train_test_split 13 | 14 | from torch.utils.data import Dataset, DataLoader 15 | from datasets import RNASeqData 16 | 17 | import argparse 18 | 19 | from pathexplainer import PathExplainerTorch 20 | from sklearn.linear_model import LogisticRegression 21 | 22 | from models import pmVAEModel 23 | 24 | from intercode import AutoencoderLinearDecoder, train_autoencoder 25 | 26 | import os 27 | os.environ["CUDA_VISIBLE_DEVICES"]="4" 28 | 29 | def main(): 30 | 31 | # get dataset, removal method 32 | parser = argparse.ArgumentParser() 33 | # parser.add_argument('split', action="store", default='0') 34 | parser.add_argument('dataset', action="store", default='kang') 35 | parser.add_argument('removal', action="store", default='impute') 36 | 37 | args = parser.parse_args() 38 | 39 | # load data 40 | if args.dataset == 'kang': 41 | 42 | data = anndata.read('data/kang_count.h5ad') 43 | data.varm['I'] = load_annotations( 44 | 'data/c2.cp.reactome.v7.4.symbols.gmt', 45 | data.var_names, 46 | min_genes=13 47 | ).values 48 | data.uns['terms'] = list(load_annotations( 49 | 'data/c2.cp.reactome.v7.4.symbols.gmt', 50 | data.var_names, 51 | min_genes=13 52 | ).columns) 53 | 54 | number_of_pathways = 20 55 | number_of_replicates = 10 56 | 57 | l2_results = np.zeros((number_of_replicates,number_of_pathways)) 58 | ig_results = np.zeros((number_of_replicates,number_of_pathways)) 59 | # lr_results = np.zeros((number_of_replicates,number_of_pathways)) 60 | # kld_results = np.zeros((number_of_replicates,number_of_pathways)) 61 | rand_results = np.zeros((number_of_replicates,number_of_pathways)) 62 | 63 | # for 10 experimental replicates 64 | for rand_seed in range(number_of_replicates): 65 | 66 | print("replicate number " + str(rand_seed)) 67 | 68 | # split data 69 | 70 | train_data, test_data = train_test_split(data, 71 | test_size=0.25, 72 | shuffle=True, 73 | random_state=rand_seed) 74 | tr_data, val_data = train_test_split(train_data, 75 | test_size=0.25, 76 | shuffle=True, 77 | random_state=rand_seed) 78 | 79 | tr_ds = RNASeqData(np.array(tr_data.X)) 80 | val_ds = RNASeqData(np.array(val_data.X)) 81 | 82 | # load annotations 83 | membership_mask = load_annotations('data/c2.cp.reactome.v7.4.symbols.gmt', 84 | data.var_names, 85 | min_genes=13 86 | ).astype(bool).T 87 | 88 | ## 89 | ## train base model 90 | ## 91 | 92 | LR = 0.001 93 | BATCH_SIZE = 62 94 | N_EPOCHS = 30 95 | 96 | # regularization hyperparameters 97 | # lambda0 - page 19 of presentation 98 | # lambdas 1-3 - last term on page 20 99 | 100 | LAMBDA0 = 0.1 101 | 102 | LAMBDA1 = 0.93*LR 103 | LAMBDA2 = 0.43*LR 104 | LAMBDA3 = 0.57*LR 105 | 106 | # initialize base model 107 | autoencoder = AutoencoderLinearDecoder(tr_data.n_vars, n_ann=len(tr_data.uns['terms'])) 108 | autoencoder.cuda() 109 | 110 | # train 111 | train_autoencoder(tr_data, autoencoder, LR, BATCH_SIZE, N_EPOCHS, 112 | l2_reg_lambda0=LAMBDA0, lambda1=LAMBDA1, lambda2=LAMBDA2, lambda3=LAMBDA3) 113 | 114 | ## 115 | ## get pathway rankings 116 | ## 117 | top_features = pd.DataFrame(index=data.uns['terms']) 118 | 119 | ## get L2 120 | top_features['l2'] = -1.*autoencoder.decoder.weight_dict['annotated'].data.norm(p=2, dim=0).detach().cpu().numpy() 121 | 122 | print("Calc IG score") 123 | # IG pathway rankings 124 | ground_truth = torch.tensor(val_data.X).float() 125 | autoencoder.cpu() 126 | 127 | def intercode_loss_wrapper(z): 128 | global_recon = autoencoder.decoder(z) 129 | return F.mse_loss(global_recon, ground_truth, reduction='none').mean(1).view(-1,1) 130 | 131 | 132 | input_data = autoencoder.encoder(torch.tensor(val_data.X).float()) 133 | baseline_data = torch.zeros(input_data.shape[1]) 134 | baseline_data.requires_grad = True 135 | 136 | explainer = PathExplainerTorch(intercode_loss_wrapper) 137 | attributions = explainer.attributions(input_data, 138 | baseline=baseline_data, 139 | num_samples=200, 140 | use_expectation=False) 141 | 142 | top_features['IG'] = attributions.detach().numpy().mean(0) 143 | 144 | # # LR pathway rankings 145 | # print("Calc LR score") 146 | # y_tr = tr_data.obs['condition'] 147 | # y_val = val_data.obs['condition'] 148 | 149 | # train_embedding = basePMVAE.model(torch.tensor(tr_data.X).float()).z.detach().numpy() 150 | # val_embedding = basePMVAE.model(torch.tensor(val_data.X).float()).z.detach().numpy() 151 | 152 | # lr_scores = [] 153 | # for pathway in range(train_embedding.shape[1]): 154 | # train_labels = (y_tr == 'stimulated').values 155 | # val_labels = (y_val == 'stimulated').values 156 | # clf = LogisticRegression(random_state=0).fit(train_embedding[:,pathway].reshape(-1,1), train_labels) 157 | # lr_scores.append(clf.score(val_embedding[:,pathway].reshape(-1,1), val_labels)) 158 | 159 | # top_features['lr_score'] = lr_scores 160 | # top_features['lr_score'] = -1.*top_features['lr_score'] 161 | 162 | # # KLD pathway rankings 163 | # print("Calc KLD") 164 | # pathway_kld = (-0.5 * (1 + outs.logvar - outs.mu.pow(2) - outs.logvar.exp()).mean(0)).detach().numpy() 165 | # top_features['kld'] = -1.*pathway_kld 166 | 167 | # Random pathway rankings 168 | print("Calc Random") 169 | np.random.seed(rand_seed) 170 | top_features['rand'] = np.random.randn(top_features.shape[0]) 171 | 172 | # impute or retrain 173 | def impute_benchmark(method,n_pathways=20): 174 | method_recons_errors = [] 175 | 176 | # for top 10 pathways 177 | for i in range(1,1+n_pathways): 178 | 179 | # set pathways = 0. 180 | test_matrix = torch.tensor(test_data.X).float() 181 | test_matrix_embedded = autoencoder.encoder(test_matrix) 182 | for x in top_features.sort_values(method).index[:i]: 183 | index_to_zero = list(top_features.index).index(x) 184 | test_matrix_embedded[:,index_to_zero] = 0. 185 | 186 | global_recon = autoencoder.decoder(test_matrix_embedded) 187 | recons_error = F.mse_loss(global_recon, test_matrix).detach().item() 188 | method_recons_errors.append(recons_error) 189 | return method_recons_errors 190 | 191 | print("Impute L2") 192 | l2_results[rand_seed,:] = impute_benchmark('l2') 193 | print("Impute IG") 194 | ig_results[rand_seed,:] = impute_benchmark('IG') 195 | # print("Impute LR") 196 | # lr_results[rand_seed,:] = impute_benchmark('lr_score') 197 | # print("Impute KLD") 198 | # kld_results[rand_seed,:] = impute_benchmark('kld') 199 | print("Impute RAND") 200 | rand_results[rand_seed,:] = impute_benchmark('rand') 201 | 202 | # save results 203 | with open('results/intercode_kang_impute_l2.npy', 'wb') as f: 204 | np.save(f, l2_results) 205 | with open('results/intercode_kang_impute_ig.npy', 'wb') as f: 206 | np.save(f, ig_results) 207 | # with open('results/intercode_kang_impute_lr.npy', 'wb') as f: 208 | # np.save(f, lr_results) 209 | # with open('results/intercode_kang_impute_kld.npy', 'wb') as f: 210 | # np.save(f, kld_results) 211 | with open('results/intercode_kang_impute_rand.npy', 'wb') as f: 212 | np.save(f, rand_results) 213 | 214 | if __name__ == '__main__': 215 | main() -------------------------------------------------------------------------------- /summary.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines a function to plot individual feature-level importances 3 | in a summary plot. 4 | """ 5 | import pandas as pd 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | import matplotlib as mpl 9 | from scatter import _get_bounds, _color_bar, _get_shared_limits, _set_axis_config 10 | import colors 11 | 12 | def _get_jitter_array(feature_values, 13 | select_attributions): 14 | """ 15 | Helper function to get jitter in a summary plot. 16 | Args: 17 | feature_values: see summary_plot 18 | select_attributions: see summary_plot 19 | """ 20 | jitter_array = np.zeros(feature_values.shape) 21 | for i in range(select_attributions.shape[1]): 22 | feature_attr = select_attributions[:, i] 23 | num_samples = feature_attr.shape[0] 24 | nbins = 100 25 | quant = np.round(nbins * (feature_attr - np.min(feature_attr)) / \ 26 | (np.max(feature_attr) - \ 27 | np.min(feature_attr) + 1e-8)) 28 | inds = np.argsort(quant + np.random.randn(num_samples) * 1e-6) 29 | layer = 0 30 | last_bin = -1 31 | jitter_values = np.zeros(num_samples) 32 | for ind in inds: 33 | if quant[ind] != last_bin: 34 | layer = 0 35 | jitter_values[ind] = np.ceil(layer / 2) * ((layer % 2) * 2 - 1) 36 | layer += 1 37 | last_bin = quant[ind] 38 | jitter_values *= 0.9 * (1.0 / np.max(jitter_values + 1)) 39 | jitter_array[:, i] = jitter_values 40 | return jitter_array 41 | 42 | def _get_jitter_df(interactions, feature_values, 43 | select_attributions, attributions, 44 | interaction_feature, feature_order): 45 | """ 46 | Helper function to call the jitter matrix function. 47 | """ 48 | if interactions is None: 49 | jitter_array = _get_jitter_array(feature_values, select_attributions) 50 | jitter_df = pd.DataFrame(jitter_array) 51 | else: 52 | if interactions.shape == attributions.shape: 53 | select_interactions = interactions[:, feature_order] 54 | else: 55 | if interaction_feature is None: 56 | raise ValueError('Argument interaction was specified ' + \ 57 | 'but argument interaction_feature was not.') 58 | select_interactions = interactions[:, feature_order, interaction_feature] 59 | jitter_df = pd.DataFrame(select_interactions) 60 | return jitter_df 61 | 62 | def summary_plot(attributions, 63 | feature_values, 64 | interactions=None, 65 | interaction_feature=None, 66 | feature_names=None, 67 | plot_top_k=None, 68 | standardize_features=True, 69 | scale_x_ind=False, 70 | scale_y_ind=False, 71 | figsize=(8, 4), 72 | dpi=150, 73 | **kwargs): 74 | """ 75 | Function to draw an interactive scatter plot of 76 | attribution values. Since this is built on top 77 | of altair, this function works best when the 78 | number of points is small (< 5000). 79 | Args: 80 | attributions: A matrix of attributions. 81 | Should be of shape [batch_size, feature_dims]. 82 | feature_values: A matrix of feature values. 83 | Should the same shape as the attributions. 84 | interactions: Either a matrix of the same shape as attributions representing 85 | the interaction between interaction_feature and all other features, 86 | or a matrix that can be indexed as 87 | interactions[:, :, interaction_feature]. 88 | interaction_feature: A feature to use for interactions if interactions 89 | are provided as all pairwise interactions. 90 | feature_names: An optional list of length attributions.shape[1]. Each 91 | entry should be a string representing the name of a feature. 92 | plot_top_k: The number of features to plot. If none, will plot all features. 93 | This might take a while, depending on how many features you have. 94 | scale_x_ind: Set to True to scale the x axes of each plot independently. 95 | Defaults to False. 96 | scale_y_ind: Set to True to scale the y axes of each plot independently. 97 | Defaults to False. 98 | figsize: Figure size in matplotlib units. Each figure will be square. 99 | dpi: Resolution of each plot. 100 | kwargs: Passed to plt.scatter 101 | """ 102 | if plot_top_k is None: 103 | plot_top_k = attributions.shape[1] 104 | mean_abs_attr = np.mean(np.abs(attributions), axis=0) 105 | max_order = np.argsort(mean_abs_attr) 106 | feature_order = max_order[::-1][:plot_top_k] 107 | 108 | if feature_names is None: 109 | feature_names = ['Feature {}'.format(i) for i in range(feature_values.shape[1])] 110 | 111 | feature_values = feature_values[:, feature_order] 112 | select_attributions = attributions[:, feature_order] 113 | feature_names = [feature_names[i] for i in feature_order] 114 | 115 | if standardize_features: 116 | standardized_feature_values = (feature_values - np.mean(feature_values, 117 | axis=0, 118 | keepdims=True)) 119 | standardized_feature_values = standardized_feature_values / \ 120 | (np.std(standardized_feature_values, 121 | axis=0, 122 | keepdims=True) + 1e7) 123 | else: 124 | standardized_feature_values = feature_values 125 | 126 | vmin, vmax = _get_bounds(standardized_feature_values) 127 | standardized_feature_values = np.clip(standardized_feature_values, vmin, vmax) 128 | 129 | attribution_names = ['Attribution to {}'.format(feature_names[i]) for \ 130 | i in range(len(feature_names))] 131 | feature_df = pd.DataFrame(standardized_feature_values) 132 | attribution_df = pd.DataFrame(select_attributions) 133 | feature_df.columns = feature_names 134 | attribution_df.columns = attribution_names 135 | 136 | feature_df = pd.melt(feature_df, var_name='Feature', value_name='Normalized Feature Value') 137 | attribution_df = pd.melt(attribution_df, var_name='Attribution', value_name='Attribution Value') 138 | attribution_df = attribution_df.drop(columns=['Attribution']) 139 | 140 | jitter_df = _get_jitter_df(interactions, feature_values, 141 | select_attributions, attributions, 142 | interaction_feature, feature_order) 143 | jitter_df = pd.melt(jitter_df, var_name='Variable', value_name='Jitter') 144 | jitter_df = jitter_df.drop(columns=['Variable']) 145 | melted_df = pd.concat([feature_df, attribution_df, jitter_df], axis=1) 146 | 147 | if 's' not in kwargs: 148 | kwargs['s'] = 4 149 | if 'cmap' not in kwargs: 150 | kwargs['cmap'] = colors.green_gold() 151 | 152 | x_limits, y_limits = _get_shared_limits(melted_df['Attribution Value'], 153 | melted_df['Jitter'], 154 | scale_x_ind, 155 | scale_y_ind) 156 | 157 | fig, axs = plt.subplots(plot_top_k, 1, figsize=figsize, dpi=dpi) 158 | fig.subplots_adjust(left=0.2, hspace=0) 159 | for i in range(plot_top_k - 1): 160 | axis = axs[i] 161 | _set_axis_config(axis, 162 | clear_x_ticks=True, 163 | clear_y_ticks=True) 164 | trans = mpl.transforms.blended_transform_factory(axis.transData, axis.transAxes) 165 | axis.plot([0.0, 1.0], [0.5, 0.5], transform=axis.transAxes, 166 | linewidth=0.5, color='black', alpha=0.3, zorder=1) 167 | axis.plot([0.0, 0.0], [-1.0, 1.0], transform=trans, clip_on=False, 168 | linewidth=0.5, color='black', alpha=0.3, zorder=1) 169 | 170 | axis = axs[-1] 171 | _set_axis_config(axis, 172 | [0.0, 0.0, 0.0, 0.5], 173 | clear_x_ticks=False, 174 | clear_y_ticks=True) 175 | trans = mpl.transforms.blended_transform_factory(axis.transData, axis.transAxes) 176 | axis.plot([0.0, 1.0], [0.5, 0.5], transform=axis.transAxes, 177 | linewidth=0.5, color='black', alpha=0.3, zorder=1) 178 | axis.plot([0.0, 0.0], [0.0, 1.0], transform=trans, 179 | linewidth=0.5, color='black', alpha=0.3, zorder=1) 180 | axis.tick_params(length=4, labelsize=8) 181 | axis.set_xlabel('Attribution Value') 182 | 183 | for i in range(plot_top_k): 184 | axis = axs[i] 185 | selected_df = melted_df.loc[melted_df['Feature'] == feature_names[i]] 186 | trans = mpl.transforms.blended_transform_factory(axis.transAxes, axis.transAxes) 187 | axis.text(-0.02, 0.5, feature_names[i], 188 | horizontalalignment='right', 189 | verticalalignment='center', 190 | fontsize=8, 191 | transform=trans) 192 | axis.scatter(x=selected_df['Attribution Value'], 193 | y=selected_df['Jitter'], 194 | c=selected_df['Normalized Feature Value'], 195 | zorder=2, 196 | **kwargs) 197 | if x_limits is not None: 198 | axis.set_xlim(x_limits) 199 | if y_limits is not None: 200 | axis.set_ylim(y_limits) 201 | 202 | _color_bar(fig, vmin, vmax, 'Feature Value', ticks=False, label_size=8, **kwargs) -------------------------------------------------------------------------------- /get_top_pathways.py: -------------------------------------------------------------------------------- 1 | import anndata 2 | import numpy as np 3 | import pandas as pd 4 | import torch 5 | import torch.nn.functional as F 6 | import os 7 | 8 | from utils import load_annotations 9 | from sklearn.model_selection import train_test_split 10 | 11 | from torch.utils.data import Dataset, DataLoader 12 | from datasets import RNASeqData 13 | 14 | from pathexplainer import PathExplainerTorch 15 | from sklearn.linear_model import LogisticRegression 16 | import argparse 17 | 18 | 19 | from models import pmVAEModel 20 | import mygene 21 | import os 22 | import time 23 | 24 | save_path = 'new_for_revision/new_res/' 25 | 26 | def main(): 27 | 28 | ig_times = [] 29 | lr_times = [] 30 | train_times = [] 31 | 32 | # get dataset, removal method 33 | parser = argparse.ArgumentParser() 34 | parser.add_argument('dataset', action="store", default='kang') 35 | parser.add_argument('which_gpu', action="store", default='0') 36 | parser.add_argument('gene_prog', action="store", default='Ctrl') 37 | 38 | args = parser.parse_args() 39 | 40 | os.environ["CUDA_VISIBLE_DEVICES"]=args.which_gpu 41 | dataset =args.dataset 42 | 43 | 44 | # load data 45 | 46 | # load datlinger data 47 | if args.dataset == 'datlinger': 48 | 49 | data = anndata.read('data/datlinger_pp.h5ad') 50 | symbols = data.var_names 51 | 52 | 53 | # load kang data 54 | if args.dataset == 'kang': 55 | 56 | data = anndata.read('data/kang_count.h5ad') 57 | symbols = data.var_names 58 | 59 | 60 | # load mcfarland data 61 | if args.dataset == 'mcfarland': 62 | 63 | data = anndata.read('/projects/leelab/data/single-cell/mcfarland_2020_Idasanutlin/preprocessed/adata_top_2000_genes_tc.h5ad') 64 | data = data[data.obs['condition'] == 'Idasanutlin'].copy() 65 | symbols = data.var_names 66 | 67 | 68 | # load zheng data 69 | if args.dataset == 'zheng': 70 | data = anndata.read('/projects/leelab/data/single-cell/zheng_2017/preprocessed/adata_top_2000_genes.h5ad') 71 | 72 | # convert ENSG IDs to gene symbols: 73 | 74 | mg = mygene.MyGeneInfo() 75 | geneList = data.var_names 76 | geneSyms = mg.querymany(geneList , scopes='ensembl.gene', fields='symbol', species='human', returnall=True) 77 | 78 | symbols = [] 79 | not_in = [] 80 | is_in = [] 81 | for k in range(2000): 82 | if ('symbol' in geneSyms['out'][k]): 83 | symbols += [geneSyms['out'][k]['symbol']] 84 | is_in += [geneSyms['out'][k]['query']] 85 | else: 86 | not_in += [geneSyms['out'][k]['query']] 87 | symbols = pd.Index(symbols) 88 | 89 | symbols = pd.Index(set(symbols.to_numpy())) 90 | 91 | # filter out post transplant 92 | data = data[data.obs['condition'] != 'post_transplant'][:,is_in].copy() 93 | 94 | 95 | # load haber data 96 | if args.dataset == 'haber': 97 | 98 | data = anndata.read('/projects/leelab/data/single-cell/haber_2017/preprocessed/adata_top_2000_genes.h5ad') 99 | 100 | # filter out H poly 101 | data = data[data.obs['condition'] != 'Salmonella'].copy() 102 | 103 | symbols = data.var_names 104 | 105 | 106 | # load grubman data 107 | if args.dataset == 'grubman': 108 | 109 | data = anndata.read('/projects/leelab/data/single-cell/grubman_2019/preprocessed/adata_top_2000_genes.h5ad') 110 | 111 | symbols = data.var_names 112 | 113 | 114 | if args.dataset == 'norman': 115 | 116 | data = anndata.read('/projects/leelab/data/single-cell/norman_2019/preprocessed/adata_top_2000_genes_tc.h5ad') 117 | 118 | if args.gene_prog == 'erythroid': 119 | data = data[(data.obs['gene_program'] == 'Ctrl') | (data.obs['gene_program'] == 'Erythroid')].copy() 120 | 121 | if args.gene_prog == 'granulocyte-apoptosis': 122 | data = data[(data.obs['gene_program'] == 'Ctrl') | (data.obs['gene_program'] == 'Granulocyte/apoptosis')].copy() 123 | 124 | if args.gene_prog == 'megakaryocyte': 125 | data = data[(data.obs['gene_program'] == 'Ctrl') | (data.obs['gene_program'] == 'Megakaryocyte')].copy() 126 | 127 | if args.gene_prog == 'pro-growth': 128 | data = data[(data.obs['gene_program'] == 'Ctrl') | (data.obs['gene_program'] == 'Pro-growth')].copy() 129 | 130 | test_df = pd.DataFrame(index=data.var['gene_name']) 131 | symbols = test_df.index 132 | 133 | 134 | 135 | number_of_replicates = 10 136 | first_run = True 137 | 138 | # for 10 experimental replicates 139 | for rand_seed in range(number_of_replicates): 140 | 141 | print("replicate number " + str(rand_seed)) 142 | 143 | # split data 144 | 145 | train_data, test_data = train_test_split(data, 146 | test_size=0.25, 147 | shuffle=True, 148 | random_state=rand_seed) 149 | tr_data, val_data = train_test_split(train_data, 150 | test_size=0.25, 151 | shuffle=True, 152 | random_state=rand_seed) 153 | 154 | tr_ds = RNASeqData(np.array(tr_data.X)) 155 | val_ds = RNASeqData(np.array(val_data.X)) 156 | 157 | # load annotations 158 | membership_mask = load_annotations('data/c2.cp.reactome.v7.4.symbols.gmt', 159 | symbols, 160 | min_genes=13 161 | ).astype(bool).T 162 | 163 | ## 164 | ## train model 165 | ## 166 | 167 | # initialize base model 168 | basePMVAE = pmVAEModel(membership_mask.values, 169 | [12], 170 | 1, 171 | beta=1e-05, 172 | terms=membership_mask.index, 173 | add_auxiliary_module=True 174 | ) 175 | 176 | 177 | if first_run: # first run 178 | 179 | top_ig = pd.DataFrame(index=basePMVAE.latent_space_names()) 180 | top_lr = pd.DataFrame(index=basePMVAE.latent_space_names()) 181 | first_run = False 182 | 183 | # train 184 | 185 | start_train = time.time() 186 | basePMVAE.train(tr_ds, val_ds, 187 | checkpoint_path='saved_models/' + dataset + '_' + args.gene_prog + '.pkl', 188 | max_epochs=100) 189 | 190 | end_train = time.time() 191 | train_times.append(end_train - start_train) 192 | 193 | basePMVAE.set_gpu(False) 194 | 195 | 196 | # IG pathway rankings 197 | print("Calc IG score") 198 | 199 | start_ig = time.time() 200 | 201 | def model_loss_wrapper(z): 202 | module_outputs = basePMVAE.model.decoder_net(z) 203 | global_recon = basePMVAE.model.merge(module_outputs) 204 | return F.mse_loss(global_recon, ground_truth, reduction='none').mean(1).view(-1,1) 205 | 206 | ground_truth = torch.tensor(np.array(val_data.X)).float() 207 | outs = basePMVAE.model(ground_truth) 208 | 209 | input_data = outs.z 210 | baseline_data = torch.zeros(outs.z.shape[1]) 211 | baseline_data.requires_grad = True 212 | 213 | explainer = PathExplainerTorch(model_loss_wrapper) 214 | attributions = explainer.attributions(input_data, 215 | baseline=baseline_data, 216 | num_samples=200, 217 | use_expectation=False) 218 | 219 | np_attribs = attributions.detach().numpy() 220 | top_ig[rand_seed] = np_attribs.mean(0) 221 | 222 | end_ig = time.time() 223 | ig_times.append(end_ig - start_ig) 224 | 225 | # so far! 226 | top_ig.to_csv(save_path + dataset + '_ig.csv', index=False) 227 | 228 | 229 | # LR pathway rankings 230 | print("Calc LR score") 231 | start_lr = time.time() 232 | 233 | 234 | if args.dataset == 'kang' or args.dataset == 'datlinger': 235 | y_tr = tr_data.obs['condition'] 236 | y_val = val_data.obs['condition'] 237 | 238 | train_labels = (y_tr == 'stimulated').values 239 | val_labels = (y_val == 'stimulated').values 240 | 241 | 242 | if args.dataset == 'mcfarland': 243 | 244 | y_tr = tr_data.obs['TP53_mutation_status'] 245 | y_val = val_data.obs['TP53_mutation_status'] 246 | 247 | train_labels = (y_tr == 'Wild Type').values 248 | val_labels = (y_val == 'Wild Type').values 249 | 250 | 251 | if args.dataset == 'haber': 252 | y_tr = tr_data.obs['condition'] 253 | y_val = val_data.obs['condition'] 254 | 255 | train_labels = (y_tr == 'Control').values 256 | val_labels = (y_val == 'Control').values 257 | 258 | if args.dataset == 'grubman': 259 | y_tr = tr_data.obs['batchCond'] 260 | y_val = val_data.obs['batchCond'] 261 | 262 | train_labels = (y_tr == 'ct').values 263 | val_labels = (y_val == 'ct').values 264 | 265 | 266 | if args.dataset == 'zheng': 267 | y_tr = tr_data.obs['condition'] 268 | y_val = val_data.obs['condition'] 269 | 270 | train_labels = (y_tr == 'healthy').values 271 | val_labels = (y_val == 'healthy').values 272 | 273 | 274 | if args.dataset == 'norman': 275 | y_tr = tr_data.obs['gene_program'] 276 | y_val = val_data.obs['gene_program'] 277 | 278 | train_labels = (y_tr == 'Ctrl').values 279 | val_labels = (y_val == 'Ctrl').values 280 | 281 | train_embedding = basePMVAE.model(torch.tensor(tr_data.X).float()).z.detach().numpy() 282 | val_embedding = basePMVAE.model(torch.tensor(val_data.X).float()).z.detach().numpy() 283 | 284 | lr_scores = [] 285 | for pathway in range(train_embedding.shape[1]): 286 | clf = LogisticRegression(random_state=0).fit(train_embedding[:,pathway].reshape(-1,1), train_labels) 287 | lr_scores.append(clf.score(val_embedding[:,pathway].reshape(-1,1), val_labels)) 288 | 289 | 290 | top_lr[rand_seed] = lr_scores 291 | top_lr[rand_seed] = -1.*top_lr[rand_seed] 292 | 293 | end_lr = time.time() 294 | lr_times.append(end_lr - start_lr) 295 | 296 | 297 | # so far! 298 | top_lr.to_csv(save_path + dataset + '_lr.csv', index=False) 299 | 300 | times = pd.DataFrame() 301 | times['ig_times'] = ig_times 302 | times['lr_times'] = lr_times 303 | times['train_times'] = train_times 304 | 305 | times.to_csv(save_path + args.dataset + '_times.csv') 306 | 307 | if __name__ == '__main__': 308 | main() -------------------------------------------------------------------------------- /pathexplainer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import functools 3 | import operator 4 | import torch 5 | from torch.autograd import grad 6 | import numpy as np 7 | from tqdm import * 8 | 9 | def gather_nd(params, indices): 10 | """ 11 | Args: 12 | params: Tensor to index 13 | indices: k-dimension tensor of integers. 14 | Returns: 15 | output: 1-dimensional tensor of elements of ``params``, where 16 | output[i] = params[i][indices[i]] 17 | 18 | params indices output 19 | 1 2 1 1 4 20 | 3 4 2 0 ----> 5 21 | 5 6 0 0 1 22 | """ 23 | max_value = functools.reduce(operator.mul, list(params.size())) - 1 24 | indices = indices.t().long() 25 | ndim = indices.size(0) 26 | idx = torch.zeros_like(indices[0]).long() 27 | m = 1 28 | 29 | for i in range(ndim)[::-1]: 30 | idx += indices[i]*m 31 | m *= params.size(i) 32 | 33 | idx[idx < 0] = 0 34 | idx[idx > max_value] = 0 35 | return torch.take(params, idx) 36 | 37 | class PathExplainerTorch(object): 38 | def __init__(self, model): 39 | self.model = model 40 | return 41 | 42 | def _get_ref_tensor(self,baseline,batch_size,num_samples): 43 | number_to_draw = num_samples * batch_size 44 | replace = baseline.shape[0] < number_to_draw 45 | sample_indices = np.random.choice(baseline.shape[0], 46 | size=number_to_draw, 47 | replace=replace) 48 | ref_tensor = baseline[sample_indices,:] 49 | 50 | return ref_tensor 51 | 52 | def _get_samples_input(self, input_tensor, baseline, 53 | num_samples, use_expectation): 54 | ''' 55 | calculate interpolation points 56 | Args: 57 | input_tensor: Tensor of shape (batch, ...), where ... indicates 58 | the input dimensions. 59 | reference_tensor: A tensor of shape (batch, k, ...) where ... 60 | indicates dimensions, and k represents the number of background 61 | reference samples to draw per input in the batch. 62 | Returns: 63 | samples_input: A tensor of shape (batch, k, ...) with the 64 | interpolated points between input and ref. 65 | samples_delta: A tensor of shape (batch, 1, ...) with the 66 | difference between input and reference for each sample 67 | ''' 68 | input_dims = list(input_tensor.size())[1:] 69 | num_input_dims = len(input_dims) 70 | batch_size = input_tensor.size()[0] 71 | 72 | if use_expectation: 73 | reference_tensor = self._get_ref_tensor(baseline,batch_size,num_samples) 74 | shape = reference_tensor.shape 75 | reference_tensor = reference_tensor.view( 76 | batch_size, 77 | num_samples, 78 | *(shape[1:])) 79 | 80 | # Grab a [batch_size, k]-sized interpolation sample 81 | t_tensor = torch.FloatTensor(batch_size, num_samples).uniform_(0,1).to(reference_tensor.device) 82 | shape = [batch_size, num_samples] + [1] * num_input_dims 83 | interp_coef = t_tensor.view(*shape) 84 | 85 | # Evaluate the end points 86 | end_point_ref = (1.0 - interp_coef) * reference_tensor 87 | 88 | input_expand_mult = input_tensor.unsqueeze(1) 89 | end_point_input = interp_coef * input_expand_mult 90 | 91 | # Affine Combine 92 | samples_input = end_point_input + end_point_ref 93 | 94 | else: 95 | batch_size = input_tensor.size()[0] 96 | input_expand = input_tensor.unsqueeze(1) 97 | reps = np.ones(len(baseline.shape)).astype(int) 98 | reps[0] = batch_size 99 | reference_tensor = baseline.repeat(list(reps)).unsqueeze(1) 100 | # reference_tensor = torch.as_tensor(sampled_baseline).unsqueeze(1).to(baseline.device) 101 | scaled_inputs = [reference_tensor + (float(i)/num_samples)*(input_expand - reference_tensor) \ 102 | for i in range(0,num_samples+1)] 103 | samples_input = torch.cat(scaled_inputs,dim=1) 104 | 105 | samples_delta = self._get_samples_delta(input_tensor, reference_tensor) 106 | samples_delta = samples_delta.to(samples_input.device) 107 | 108 | return samples_input, samples_delta 109 | 110 | def _get_samples_delta(self, input_tensor, reference_tensor): 111 | input_expand_mult = input_tensor.unsqueeze(1) 112 | sd = input_expand_mult - reference_tensor 113 | return sd 114 | 115 | def _get_grads(self, samples_input, output_indices=None): 116 | 117 | grad_tensor = torch.zeros(samples_input.shape).float().to(samples_input.device) 118 | 119 | k_ = samples_input.shape[1] 120 | 121 | for i in range(k_): 122 | particular_slice = samples_input[:,i] 123 | batch_output = self.model(particular_slice) 124 | # should check that users pass in sparse labels 125 | # Only look at the user-specified label 126 | if batch_output.size(1) > 1: 127 | sample_indices = torch.arange(0,batch_output.size(0)).to(samples_input.device) 128 | indices_tensor = torch.cat([ 129 | sample_indices.unsqueeze(1), 130 | output_indices.unsqueeze(1)], dim=1) 131 | batch_output = gather_nd(batch_output, indices_tensor) 132 | 133 | model_grads = grad( 134 | outputs=batch_output, 135 | inputs=particular_slice, 136 | grad_outputs=torch.ones_like(batch_output).to(samples_input.device), 137 | create_graph=True) 138 | grad_tensor[:,i,:] = model_grads[0] 139 | return grad_tensor 140 | 141 | def attributions(self, input_tensor, baseline, 142 | num_samples = 50, use_expectation=True, 143 | output_indices=None): 144 | """ 145 | Calculate either Expected or Integrated Gradients approximation of 146 | Aumann-Shapley values for the sample ``input_tensor``. 147 | Args: 148 | model (torch.nn.Module): Pytorch neural network model for which the 149 | output should be explained. 150 | input_tensor (torch.Tensor): Pytorch tensor representing the input 151 | to be explained. 152 | baseline (torch.Tensor): Pytorch tensor representing the baseline. 153 | If use_expectation is true, then baseline should be shape 154 | (num_refs, ...) where ... indicates the dimensionality 155 | of the input. Otherwise, baseline should be shape (1, ...). 156 | output_indices (optional, default=None): For multi-class prediction 157 | """ 158 | equal_dims = baseline.shape[1:] == input_tensor.shape[1:] 159 | almost_equal_dims = baseline.shape == input_tensor.shape[1:] 160 | 161 | dev = input_tensor.device 162 | baseline = baseline.to(dev) 163 | 164 | input_tensor.requires_grad_ = True 165 | 166 | if use_expectation and not equal_dims: 167 | raise ValueError('baseline should be shape (num_refs, ...) \ 168 | where ... indicates the dimensionality \ 169 | of the input') 170 | 171 | if not use_expectation and baseline.shape[0] != 1: 172 | if almost_equal_dims: 173 | baseline = baseline.unsqueeze(0) 174 | else: 175 | raise ValueError('baseline should be shape (...) \ 176 | where ... indicates the dimensionality \ 177 | of the input') 178 | 179 | samples_input, samples_delta = self._get_samples_input(input_tensor, baseline, 180 | num_samples, use_expectation) 181 | grad_tensor = self._get_grads(samples_input, output_indices) 182 | mult_grads = samples_delta * grad_tensor 183 | attributions = mult_grads.mean(1) 184 | 185 | return attributions 186 | 187 | def interactions(self, input_tensor, baseline, 188 | num_samples=50, use_expectation=True, 189 | output_indices=None, interaction_index=None, 190 | verbose=True): 191 | """ 192 | samples_input: A tensor of shape (batch, k, features) 193 | ig_tensor: also size (batch, k, features), but contains IG values 194 | 195 | """ 196 | 197 | if len(input_tensor.shape) != 2: 198 | raise ValueError('PyTorch Explainer only supports ' + \ 199 | 'interaction for 2D input tensors!') 200 | 201 | equal_dims = baseline.shape[1:] == input_tensor.shape[1:] 202 | almost_equal_dims = baseline.shape == input_tensor.shape[1:] 203 | 204 | if use_expectation and not equal_dims: 205 | raise ValueError('baseline should be shape (num_refs, ...) \ 206 | where ... indicates the dimensionality \ 207 | of the input') 208 | 209 | if not use_expectation and baseline.shape[0] != 1: 210 | if almost_equal_dims: 211 | baseline = baseline.unsqueeze(0) 212 | else: 213 | raise ValueError('baseline should be shape (...) \ 214 | where ... indicates the dimensionality \ 215 | of the input') 216 | 217 | inner_loop_nsamples = int(round(np.sqrt(num_samples))) 218 | 219 | samples_input, samples_delta = self._get_samples_input(input_tensor, baseline, 220 | inner_loop_nsamples, use_expectation) 221 | 222 | if interaction_index is not None: 223 | interaction_mult_tensor = torch.zeros([input_tensor.size(0), samples_input.size(1), input_tensor.size(1)]) 224 | else: 225 | interaction_mult_tensor = torch.zeros([input_tensor.size(0), samples_input.size(1), 226 | input_tensor.size(1), input_tensor.size(1)]) 227 | 228 | ig_tensor = torch.zeros(samples_input.shape).float() 229 | 230 | if use_expectation: 231 | loop_num = inner_loop_nsamples 232 | else: 233 | loop_num = inner_loop_nsamples + 1 234 | 235 | if verbose: 236 | iterable = tqdm(range(loop_num)) 237 | else: 238 | iterable = range(loop_num) 239 | 240 | for i in iterable: 241 | 242 | particular_slice = samples_input[:,i] 243 | ig_tensor[:,i,:] = self.attributions(particular_slice, baseline, 244 | num_samples=inner_loop_nsamples, use_expectation=use_expectation, 245 | output_indices=output_indices) 246 | 247 | if interaction_index is not None: 248 | second_grads = grad( 249 | outputs=ig_tensor[:,i,interaction_index], 250 | inputs=particular_slice, 251 | grad_outputs=torch.ones_like(ig_tensor[:,i,interaction_index]), 252 | create_graph=True)[0] 253 | interaction_mult_tensor[:,i,:] = second_grads 254 | 255 | else: 256 | for feature in range(input_tensor.size(1)): 257 | second_grads = grad( 258 | outputs=ig_tensor[:,i,feature], 259 | inputs=particular_slice, 260 | grad_outputs=torch.ones_like(ig_tensor[:,i,feature]), 261 | create_graph=True)[0] 262 | interaction_mult_tensor[:,i,feature,:] = second_grads 263 | 264 | interaction_mult_tensor = interaction_mult_tensor.to(samples_delta.device) 265 | if interaction_index is not None: 266 | interaction_tensor = interaction_mult_tensor * samples_delta 267 | else: 268 | interaction_tensor = interaction_mult_tensor * samples_delta.unsqueeze(2) 269 | interactions = interaction_tensor.mean(1) 270 | 271 | return interactions -------------------------------------------------------------------------------- /standard_VAE_impute_benchmark.py: -------------------------------------------------------------------------------- 1 | # impute benchmark on standard VAE 2 | 3 | import anndata 4 | import numpy as np 5 | import pandas as pd 6 | import torch 7 | import torch.nn.functional as F 8 | import os 9 | import mygene 10 | 11 | from utils import load_annotations 12 | from sklearn.model_selection import train_test_split 13 | 14 | from torch.utils.data import Dataset, DataLoader 15 | from datasets import RNASeqData 16 | 17 | import argparse 18 | 19 | from pathexplainer import PathExplainerTorch 20 | from sklearn.linear_model import LogisticRegression 21 | 22 | from models import VAEModel 23 | import time 24 | 25 | import os 26 | 27 | save_path = 'new_for_revision/new_res/dense/' 28 | 29 | def main(): 30 | 31 | # get dataset, removal method 32 | parser = argparse.ArgumentParser() 33 | parser.add_argument('dataset', action="store", default='kang') 34 | parser.add_argument('removal', action="store", default='impute') 35 | parser.add_argument('which_gpu', action="store", default='0') 36 | 37 | args = parser.parse_args() 38 | 39 | os.environ["CUDA_VISIBLE_DEVICES"]=args.which_gpu 40 | 41 | 42 | # load datlinger data 43 | if args.dataset == 'datlinger': 44 | 45 | data = anndata.read('data/datlinger_pp.h5ad') 46 | symbols = data.var_names 47 | 48 | 49 | # load kang data 50 | if args.dataset == 'kang': 51 | 52 | data = anndata.read('data/kang_count.h5ad') 53 | symbols = data.var_names 54 | 55 | 56 | # load mcfarland data 57 | if args.dataset == 'mcfarland': 58 | 59 | data = anndata.read('/projects/leelab/data/single-cell/mcfarland_2020_Idasanutlin/preprocessed/adata_top_2000_genes_tc.h5ad') 60 | data = data[data.obs['condition'] == 'Idasanutlin'].copy() 61 | symbols = data.var_names 62 | 63 | 64 | # load zheng data 65 | if args.dataset == 'zheng': 66 | data = anndata.read('/projects/leelab/data/single-cell/zheng_2017/preprocessed/adata_top_2000_genes.h5ad') 67 | 68 | # convert ENSG IDs to gene symbols: 69 | 70 | mg = mygene.MyGeneInfo() 71 | geneList = data.var_names 72 | geneSyms = mg.querymany(geneList , scopes='ensembl.gene', fields='symbol', species='human', returnall=True) 73 | 74 | symbols = [] 75 | not_in = [] 76 | is_in = [] 77 | for k in range(2000): 78 | if ('symbol' in geneSyms['out'][k]): 79 | symbols += [geneSyms['out'][k]['symbol']] 80 | is_in += [geneSyms['out'][k]['query']] 81 | else: 82 | not_in += [geneSyms['out'][k]['query']] 83 | symbols = pd.Index(symbols) 84 | 85 | symbols = pd.Index(set(symbols.to_numpy())) 86 | 87 | # filter out post transplant 88 | data = data[data.obs['condition'] != 'post_transplant'][:,is_in].copy() 89 | 90 | 91 | # load haber data 92 | if args.dataset == 'haber': 93 | 94 | data = anndata.read('/projects/leelab/data/single-cell/haber_2017/preprocessed/adata_top_2000_genes.h5ad') 95 | 96 | # filter out H poly 97 | data = data[data.obs['condition'] != 'Salmonella'].copy() 98 | 99 | symbols = data.var_names 100 | 101 | 102 | 103 | # load grubman data 104 | if args.dataset == 'grubman': 105 | 106 | data = anndata.read('/projects/leelab/data/single-cell/grubman_2019/preprocessed/adata_top_2000_genes.h5ad') 107 | 108 | symbols = data.var_names 109 | 110 | # for all datasets 111 | data.varm['I'] = load_annotations( 112 | 'data/c2.cp.reactome.v7.4.symbols.gmt', 113 | symbols, 114 | min_genes=33 115 | ).values 116 | data.uns['terms'] = list(load_annotations( 117 | 'data/c2.cp.reactome.v7.4.symbols.gmt', 118 | symbols, 119 | min_genes=33 120 | ).columns) 121 | 122 | number_of_pathways = 20 123 | number_of_replicates = 10 124 | 125 | logvar_results = np.zeros((number_of_replicates,number_of_pathways)) 126 | ig_results = np.zeros((number_of_replicates,number_of_pathways)) 127 | lr_results = np.zeros((number_of_replicates,number_of_pathways)) 128 | kld_results = np.zeros((number_of_replicates,number_of_pathways)) 129 | rand_results = np.zeros((number_of_replicates,number_of_pathways)) 130 | 131 | logvar_times = [] 132 | ig_times = [] 133 | lr_times = [] 134 | kld_times = [] 135 | rand_times = [] 136 | 137 | # for 10 experimental replicates 138 | for rand_seed in range(number_of_replicates): 139 | 140 | print("replicate number " + str(rand_seed)) 141 | 142 | # split data 143 | 144 | train_data, test_data = train_test_split(data, 145 | test_size=0.25, 146 | shuffle=True, 147 | random_state=rand_seed) 148 | tr_data, val_data = train_test_split(train_data, 149 | test_size=0.25, 150 | shuffle=True, 151 | random_state=rand_seed) 152 | 153 | tr_ds = RNASeqData(np.array(tr_data.X)) 154 | val_ds = RNASeqData(np.array(val_data.X)) 155 | 156 | # load annotations 157 | 158 | 159 | membership_mask = load_annotations('data/c2.cp.reactome.v7.4.symbols.gmt', 160 | symbols, 161 | min_genes=13 162 | 163 | ## 164 | ## train base model 165 | ## 166 | 167 | 168 | # initialize base model 169 | basePMVAE = VAEModel(n_features=tr_data.X.shape[1], 170 | hidden_layers=[12*n_pathways, n_pathways], 171 | beta=1e-05, 172 | add_auxiliary_module=False 173 | ) 174 | 175 | 176 | print(basePMVAE.model) 177 | 178 | # train 179 | basePMVAE.train(tr_ds, val_ds, 180 | checkpoint_path='saved_models/dense/'+args.dataset + '_' + args.removal +'_baseModel.pkl', 181 | max_epochs=100) 182 | 183 | basePMVAE.set_gpu(False) 184 | 185 | ## 186 | ## get pathway rankings 187 | ## 188 | top_features = pd.DataFrame(index=data.uns['terms']) 189 | 190 | ## get max val logvar 191 | 192 | print("Calc max val score") 193 | 194 | ground_truth = torch.tensor(np.array(val_data.X)).float() 195 | outs = basePMVAE.model(ground_truth) 196 | 197 | start_logvar= time.time() 198 | 199 | top_features['logvar'] = -1.*outs.logvar.mean(0).detach().numpy() 200 | 201 | end_logvar= time.time() 202 | logvar_times.append(end_logvar-start_logvar) 203 | 204 | # IG pathway rankings 205 | print("Calc IG score") 206 | start_ig = time.time() 207 | 208 | def model_loss_wrapper(z): 209 | module_outputs = basePMVAE.model.decoder_net(z) 210 | 211 | global_recon = module_outputs 212 | #global_recon = basePMVAE.model.merge(module_outputs) 213 | 214 | return F.mse_loss(global_recon, ground_truth, reduction='none').mean(1).view(-1,1) 215 | 216 | input_data = outs.z 217 | baseline_data = torch.zeros(outs.z.shape[1]) 218 | baseline_data.requires_grad = True 219 | 220 | explainer = PathExplainerTorch(model_loss_wrapper) 221 | attributions = explainer.attributions(input_data, 222 | baseline=baseline_data, 223 | num_samples=200, #200 224 | use_expectation=False) 225 | 226 | np_attribs = attributions.detach().numpy() 227 | top_features['IG'] = np_attribs.mean(0) 228 | 229 | end_ig = time.time() 230 | ig_times.append(end_ig - start_ig) 231 | 232 | 233 | # LR pathway rankings 234 | print("Calc LR score") 235 | start_lr = time.time() 236 | 237 | if args.dataset == 'kang': 238 | print('here') 239 | y_tr = tr_data.obs['condition'] 240 | y_val = val_data.obs['condition'] 241 | 242 | train_labels = (y_tr == b'stimulated').values 243 | val_labels = (y_val == b'stimulated').values 244 | 245 | print(train_labels.shape) 246 | print(train_labels.sum()) 247 | 248 | print(val_labels.shape) 249 | print(val_labels.sum()) 250 | 251 | print(tr_data.obs['condition']) 252 | 253 | 254 | if args.dataset == 'datlinger': 255 | y_tr = tr_data.obs['condition'] 256 | y_val = val_data.obs['condition'] 257 | 258 | train_labels = (y_tr == 'stimulated').values 259 | val_labels = (y_val == 'stimulated').values 260 | 261 | 262 | if args.dataset == 'mcfarland': 263 | 264 | y_tr = tr_data.obs['TP53_mutation_status'] 265 | y_val = val_data.obs['TP53_mutation_status'] 266 | 267 | train_labels = (y_tr == 'Wild Type').values 268 | val_labels = (y_val == 'Wild Type').values 269 | 270 | 271 | if args.dataset == 'haber': 272 | y_tr = tr_data.obs['condition'] 273 | y_val = val_data.obs['condition'] 274 | 275 | train_labels = (y_tr == 'Control').values 276 | val_labels = (y_val == 'Control').values 277 | 278 | if args.dataset == 'grubman': 279 | y_tr = tr_data.obs['batchCond'] 280 | y_val = val_data.obs['batchCond'] 281 | 282 | train_labels = (y_tr == 'ct').values 283 | val_labels = (y_val == 'ct').values 284 | 285 | 286 | if args.dataset == 'zheng': 287 | y_tr = tr_data.obs['condition'] 288 | y_val = val_data.obs['condition'] 289 | 290 | train_labels = (y_tr == 'healthy').values 291 | val_labels = (y_val == 'healthy').values 292 | 293 | train_embedding = basePMVAE.model(torch.tensor(tr_data.X).float()).z.detach().numpy() 294 | val_embedding = basePMVAE.model(torch.tensor(val_data.X).float()).z.detach().numpy() 295 | 296 | lr_scores = [] 297 | for pathway in range(train_embedding.shape[1]): 298 | clf = LogisticRegression(random_state=0).fit(train_embedding[:,pathway].reshape(-1,1), train_labels) 299 | lr_scores.append(clf.score(val_embedding[:,pathway].reshape(-1,1), val_labels)) 300 | 301 | top_features['lr_score'] = lr_scores 302 | top_features['lr_score'] = -1.*top_features['lr_score'] 303 | 304 | end_lr = time.time() 305 | lr_times.append(end_lr - start_lr) 306 | 307 | 308 | # KLD pathway rankings 309 | print("Calc KLD") 310 | start_kld = time.time() 311 | 312 | pathway_kld = (-0.5 * (1 + outs.logvar - outs.mu.pow(2) - outs.logvar.exp()).mean(0)).detach().numpy() 313 | top_features['kld'] = -1.*pathway_kld 314 | 315 | end_kld = time.time() 316 | kld_times.append(end_kld - start_kld) 317 | 318 | # Random pathway rankings 319 | print("Calc Random") 320 | np.random.seed(rand_seed) 321 | top_features['rand'] = np.random.randn(top_features.shape[0]) 322 | 323 | 324 | times = pd.DataFrame() 325 | times['logvar_times'] = logvar_times 326 | times['ig_times'] = ig_times 327 | times['lr_times'] = lr_times 328 | times['kld_times'] = kld_times 329 | 330 | times.to_csv(save_path + args.dataset + '_times.csv') 331 | 332 | 333 | # impute 334 | def impute_benchmark(method,n_pathways=20): 335 | method_recons_errors = [] 336 | 337 | # for top 20 pathways 338 | for i in range(1,1+n_pathways): 339 | 340 | # set pathways = 0. 341 | test_matrix = torch.tensor(test_data.X).float() 342 | test_matrix_embedded = basePMVAE.model(test_matrix).z 343 | for x in top_features.sort_values(method).index[:i]: 344 | index_to_zero = list(top_features.index).index(x) 345 | test_matrix_embedded[:,index_to_zero] = 0. 346 | 347 | module_outputs = basePMVAE.model.decoder_net(test_matrix_embedded) 348 | 349 | global_recon = module_outputs 350 | 351 | recons_error = F.mse_loss(global_recon, test_matrix).detach().item() 352 | method_recons_errors.append(recons_error) 353 | return method_recons_errors 354 | 355 | # run impute 356 | if args.removal == "impute": 357 | print("Impute Logvar") 358 | logvar_results[rand_seed,:] = impute_benchmark('logvar') 359 | print("Impute IG") 360 | ig_results[rand_seed,:] = impute_benchmark('IG') 361 | print("Impute LR") 362 | lr_results[rand_seed,:] = impute_benchmark('lr_score') 363 | print("Impute KLD") 364 | kld_results[rand_seed,:] = impute_benchmark('kld') 365 | print("Impute RAND") 366 | rand_results[rand_seed,:] = impute_benchmark('rand') 367 | 368 | 369 | # save results every iteration so that if it crashes 370 | # there's at least some progress 371 | with open('{}/{}_{}_logvar.npy'.format(save_path, args.dataset, args.removal), 'wb') as f: 372 | np.save(f, logvar_results) 373 | with open('{}/{}_{}_ig.npy'.format(save_path, args.dataset, args.removal), 'wb') as f: 374 | np.save(f, ig_results) 375 | with open('{}/{}_{}_lr.npy'.format(save_path, args.dataset, args.removal), 'wb') as f: 376 | np.save(f, lr_results) 377 | with open('{}/{}_{}_kld.npy'.format(save_path, args.dataset, args.removal), 'wb') as f: 378 | np.save(f, kld_results) 379 | with open('{}/{}_{}_rand.npy'.format(save_path, args.dataset, args.removal), 'wb') as f: 380 | np.save(f, rand_results) 381 | 382 | if __name__ == '__main__': 383 | main() 384 | -------------------------------------------------------------------------------- /benchmark_pmvae.py: -------------------------------------------------------------------------------- 1 | import anndata 2 | import numpy as np 3 | import pandas as pd 4 | import torch 5 | import torch.nn.functional as F 6 | import os 7 | import mygene 8 | 9 | from utils import load_annotations 10 | from sklearn.model_selection import train_test_split 11 | 12 | from torch.utils.data import Dataset, DataLoader 13 | from datasets import RNASeqData 14 | 15 | import argparse 16 | 17 | from pathexplainer import PathExplainerTorch 18 | from sklearn.linear_model import LogisticRegression 19 | 20 | from models import pmVAEModel 21 | import time 22 | 23 | import os 24 | 25 | save_path = 'new_for_revision/new_res/' 26 | 27 | def main(): 28 | 29 | # get dataset, removal method 30 | parser = argparse.ArgumentParser() 31 | parser.add_argument('dataset', action="store", default='kang') 32 | parser.add_argument('removal', action="store", default='impute') 33 | parser.add_argument('which_gpu', action="store", default='0') 34 | 35 | args = parser.parse_args() 36 | 37 | os.environ["CUDA_VISIBLE_DEVICES"]=args.which_gpu 38 | 39 | # load datlinger data 40 | if args.dataset == 'datlinger': 41 | 42 | data = anndata.read('data/datlinger_pp.h5ad') 43 | symbols = data.var_names 44 | 45 | # load norman data 46 | if args.dataset == 'norman': 47 | data = anndata.read('/projects/leelab/data/single-cell/norman_2019/preprocessed/adata_top_2000_genes_tc.h5ad') 48 | data = data[(data.obs['gene_program'] == 'Ctrl') | (data.obs['gene_program'] == 'Granulocyte/apoptosis')].copy() 49 | 50 | test_df = pd.DataFrame(index=data.var['gene_name']) 51 | symbols = test_df.index 52 | 53 | # load kang data 54 | if args.dataset == 'kang': 55 | 56 | data = anndata.read('data/kang_count.h5ad') 57 | symbols = data.var_names 58 | 59 | 60 | # load mcfarland data 61 | if args.dataset == 'mcfarland': 62 | 63 | data = anndata.read('/projects/leelab/data/single-cell/mcfarland_2020_Idasanutlin/preprocessed/adata_top_2000_genes_tc.h5ad') 64 | data = data[data.obs['condition'] == 'Idasanutlin'].copy() 65 | symbols = data.var_names 66 | 67 | # load zheng data 68 | if args.dataset == 'zheng': 69 | data = anndata.read('/projects/leelab/data/single-cell/zheng_2017/preprocessed/adata_top_2000_genes.h5ad') 70 | 71 | # convert ENSG IDs to gene symbols: 72 | 73 | mg = mygene.MyGeneInfo() 74 | geneList = data.var_names 75 | geneSyms = mg.querymany(geneList , scopes='ensembl.gene', fields='symbol', species='human', returnall=True) 76 | 77 | symbols = [] 78 | not_in = [] 79 | is_in = [] 80 | for k in range(2000): 81 | if ('symbol' in geneSyms['out'][k]): 82 | symbols += [geneSyms['out'][k]['symbol']] 83 | is_in += [geneSyms['out'][k]['query']] 84 | else: 85 | not_in += [geneSyms['out'][k]['query']] 86 | symbols = pd.Index(symbols) 87 | 88 | symbols = pd.Index(set(symbols.to_numpy())) 89 | 90 | # filter out post transplant 91 | data = data[data.obs['condition'] != 'post_transplant'][:,is_in].copy() 92 | 93 | # load haber data 94 | if args.dataset == 'haber': 95 | 96 | data = anndata.read('/projects/leelab/data/single-cell/haber_2017/preprocessed/adata_top_2000_genes.h5ad') 97 | 98 | # filter out H poly 99 | data = data[data.obs['condition'] != 'Salmonella'].copy() 100 | 101 | symbols = data.var_names 102 | 103 | 104 | # load grubman data 105 | if args.dataset == 'grubman': 106 | 107 | data = anndata.read('/projects/leelab/data/single-cell/grubman_2019/preprocessed/adata_top_2000_genes.h5ad') 108 | 109 | symbols = data.var_names 110 | 111 | 112 | # for all datasets 113 | data.varm['I'] = load_annotations( 114 | 'data/c2.cp.reactome.v7.4.symbols.gmt', 115 | symbols, 116 | min_genes=13 117 | ).values 118 | data.uns['terms'] = list(load_annotations( 119 | 'data/c2.cp.reactome.v7.4.symbols.gmt', 120 | symbols, 121 | min_genes=13 122 | ).columns) 123 | 124 | top_ig = pd.DataFrame(index=data.uns['terms']) 125 | top_lr = pd.DataFrame(index=data.uns['terms']) 126 | 127 | number_of_pathways = 20 128 | number_of_replicates = 10 129 | 130 | logvar_results = np.zeros((number_of_replicates,number_of_pathways)) 131 | ig_results = np.zeros((number_of_replicates,number_of_pathways)) 132 | lr_results = np.zeros((number_of_replicates,number_of_pathways)) 133 | kld_results = np.zeros((number_of_replicates,number_of_pathways)) 134 | rand_results = np.zeros((number_of_replicates,number_of_pathways)) 135 | 136 | logvar_times = [] 137 | ig_times = [] 138 | lr_times = [] 139 | kld_times = [] 140 | rand_times = [] 141 | 142 | # for 10 experimental replicates 143 | for rand_seed in range(number_of_replicates): 144 | 145 | print("replicate number " + str(rand_seed)) 146 | 147 | # split data 148 | 149 | train_data, test_data = train_test_split(data, 150 | test_size=0.25, 151 | shuffle=True, 152 | random_state=rand_seed) 153 | tr_data, val_data = train_test_split(train_data, 154 | test_size=0.25, 155 | shuffle=True, 156 | random_state=rand_seed) 157 | 158 | tr_ds = RNASeqData(np.array(tr_data.X)) 159 | val_ds = RNASeqData(np.array(val_data.X)) 160 | 161 | # load annotations 162 | membership_mask = load_annotations('data/c2.cp.reactome.v7.4.symbols.gmt', 163 | symbols, 164 | min_genes=13 165 | ).astype(bool).T 166 | 167 | ## 168 | ## train base model 169 | ## 170 | 171 | # initialize base model 172 | basePMVAE = pmVAEModel(membership_mask.values, 173 | [12], 174 | 1, 175 | beta=1e-05, 176 | terms=membership_mask.index, 177 | add_auxiliary_module=False 178 | ) 179 | 180 | # train 181 | basePMVAE.train(tr_ds, val_ds, 182 | checkpoint_path=args.dataset + '_' + args.removal +'_baseModel.pkl', 183 | max_epochs=100) 184 | 185 | basePMVAE.set_gpu(False) 186 | 187 | ## 188 | ## get pathway rankings 189 | ## 190 | top_features = pd.DataFrame(index=data.uns['terms']) 191 | 192 | ## get max val logvar 193 | 194 | print("Calc max val score") 195 | 196 | ground_truth = torch.tensor(np.array(val_data.X)).float() 197 | outs = basePMVAE.model(ground_truth) 198 | 199 | start_logvar= time.time() 200 | 201 | top_features['logvar'] = -1.*outs.logvar.mean(0).detach().numpy() 202 | 203 | end_logvar= time.time() 204 | logvar_times.append(end_logvar-start_logvar) 205 | 206 | 207 | # IG pathway rankings 208 | print("Calc IG score") 209 | start_ig = time.time() 210 | 211 | def model_loss_wrapper(z): 212 | module_outputs = basePMVAE.model.decoder_net(z) 213 | global_recon = basePMVAE.model.merge(module_outputs) 214 | return F.mse_loss(global_recon, ground_truth, reduction='none').mean(1).view(-1,1) 215 | 216 | input_data = outs.z 217 | baseline_data = torch.zeros(outs.z.shape[1]) 218 | baseline_data.requires_grad = True 219 | 220 | explainer = PathExplainerTorch(model_loss_wrapper) 221 | attributions = explainer.attributions(input_data, 222 | baseline=baseline_data, 223 | num_samples=200, 224 | use_expectation=False) 225 | 226 | np_attribs = attributions.detach().numpy() 227 | top_features['IG'] = np_attribs.mean(0) 228 | 229 | top_ig[rand_seed] = np_attribs.mean(0) 230 | 231 | end_ig = time.time() 232 | ig_times.append(end_ig - start_ig) 233 | 234 | 235 | 236 | # LR pathway rankings 237 | print("Calc LR score") 238 | start_lr = time.time() 239 | 240 | if args.dataset == 'kang' or args.dataset == 'datlinger': 241 | y_tr = tr_data.obs['condition'] 242 | y_val = val_data.obs['condition'] 243 | 244 | train_labels = (y_tr == 'stimulated').values 245 | val_labels = (y_val == 'stimulated').values 246 | 247 | 248 | if args.dataset == 'mcfarland': 249 | 250 | y_tr = tr_data.obs['TP53_mutation_status'] 251 | y_val = val_data.obs['TP53_mutation_status'] 252 | 253 | train_labels = (y_tr == 'Wild Type').values 254 | val_labels = (y_val == 'Wild Type').values 255 | 256 | 257 | if args.dataset == 'haber': 258 | y_tr = tr_data.obs['condition'] 259 | y_val = val_data.obs['condition'] 260 | 261 | train_labels = (y_tr == 'Control').values 262 | val_labels = (y_val == 'Control').values 263 | 264 | if args.dataset == 'grubman': 265 | y_tr = tr_data.obs['batchCond'] 266 | y_val = val_data.obs['batchCond'] 267 | 268 | train_labels = (y_tr == 'ct').values 269 | val_labels = (y_val == 'ct').values 270 | 271 | 272 | if args.dataset == 'zheng': 273 | y_tr = tr_data.obs['condition'] 274 | y_val = val_data.obs['condition'] 275 | 276 | train_labels = (y_tr == 'healthy').values 277 | val_labels = (y_val == 'healthy').values 278 | 279 | 280 | if args.dataset == 'norman': 281 | y_tr = tr_data.obs['gene_program'] 282 | y_val = val_data.obs['gene_program'] 283 | 284 | train_labels = (y_tr == 'Ctrl').values 285 | val_labels = (y_val == 'Ctrl').values 286 | 287 | 288 | train_embedding = basePMVAE.model(torch.tensor(tr_data.X).float()).z.detach().numpy() 289 | val_embedding = basePMVAE.model(torch.tensor(val_data.X).float()).z.detach().numpy() 290 | 291 | lr_scores = [] 292 | for pathway in range(train_embedding.shape[1]): 293 | clf = LogisticRegression(random_state=0).fit(train_embedding[:,pathway].reshape(-1,1), train_labels) 294 | lr_scores.append(clf.score(val_embedding[:,pathway].reshape(-1,1), val_labels)) 295 | 296 | top_features['lr_score'] = lr_scores 297 | top_features['lr_score'] = -1.*top_features['lr_score'] 298 | 299 | end_lr = time.time() 300 | lr_times.append(end_lr - start_lr) 301 | 302 | 303 | # KLD pathway rankings 304 | print("Calc KLD") 305 | start_kld = time.time() 306 | 307 | pathway_kld = (-0.5 * (1 + outs.logvar - outs.mu.pow(2) - outs.logvar.exp()).mean(0)).detach().numpy() 308 | top_features['kld'] = -1.*pathway_kld 309 | 310 | end_kld = time.time() 311 | kld_times.append(end_kld - start_kld) 312 | 313 | # Random pathway rankings 314 | print("Calc Random") 315 | np.random.seed(rand_seed) 316 | top_features['rand'] = np.random.randn(top_features.shape[0]) 317 | 318 | times = pd.DataFrame() 319 | times['logvar_times'] = logvar_times 320 | times['ig_times'] = ig_times 321 | times['lr_times'] = lr_times 322 | times['kld_times'] = kld_times 323 | 324 | times.to_csv(save_path + args.dataset + '_times.csv') 325 | 326 | # impute or retrain 327 | def impute_benchmark(method,n_pathways=20): 328 | method_recons_errors = [] 329 | 330 | # for top 20 pathways 331 | for i in range(1,1+n_pathways): 332 | 333 | # set pathways = 0. 334 | test_matrix = torch.tensor(test_data.X).float() 335 | test_matrix_embedded = basePMVAE.model(test_matrix).z 336 | for x in top_features.sort_values(method).index[:i]: 337 | index_to_zero = list(top_features.index).index(x) 338 | test_matrix_embedded[:,index_to_zero] = 0. 339 | 340 | module_outputs = basePMVAE.model.decoder_net(test_matrix_embedded) 341 | global_recon = basePMVAE.model.merge(module_outputs) 342 | recons_error = F.mse_loss(global_recon, test_matrix).detach().item() 343 | method_recons_errors.append(recons_error) 344 | return method_recons_errors 345 | 346 | def retrain_benchmark(method,n_pathways=20): 347 | method_recons_errors = [] 348 | # for top 20 pathways 349 | for i in range(1,21): 350 | 351 | # get cumulative pathways 352 | A_new=[] 353 | for x in top_features.sort_values(method).index[:i]: 354 | A_new.append(membership_mask.loc[x,:].values.reshape(1,-1)) 355 | A_new = np.concatenate(A_new,axis=0) 356 | 357 | reducedVAE = pmVAEModel( 358 | A_new, 359 | [12], 360 | 1, 361 | beta=1e-05, 362 | terms=list(range(A_new.shape[0])), 363 | add_auxiliary_module=False 364 | ) 365 | 366 | reducedVAE.train(tr_ds, val_ds, checkpoint_path= args.dataset + '_' + args.removal +'_reducedVAE.pkl', max_epochs=50) 367 | 368 | test_matrix = torch.tensor(test_data.X).float().cuda() 369 | global_recon = reducedVAE.model(test_matrix).global_recon 370 | 371 | recons_error = F.mse_loss(global_recon, test_matrix).detach().item() 372 | method_recons_errors.append(recons_error) 373 | return method_recons_errors 374 | 375 | 376 | # run impute or retrain 377 | if args.removal == "impute": 378 | print("Impute Logvar") 379 | logvar_results[rand_seed,:] = impute_benchmark('logvar') 380 | print("Impute IG") 381 | ig_results[rand_seed,:] = impute_benchmark('IG') 382 | print("Impute LR") 383 | lr_results[rand_seed,:] = impute_benchmark('lr_score') 384 | print("Impute KLD") 385 | kld_results[rand_seed,:] = impute_benchmark('kld') 386 | print("Impute RAND") 387 | rand_results[rand_seed,:] = impute_benchmark('rand') 388 | 389 | if args.removal == "retrain": 390 | print("Retrain Logvar") 391 | logvar_results[rand_seed,:] = retrain_benchmark('logvar') 392 | print("Retrain IG") 393 | ig_results[rand_seed,:] = retrain_benchmark('IG') 394 | print("Retrain LR") 395 | lr_results[rand_seed,:] = retrain_benchmark('lr_score') 396 | print("Retrain KLD") 397 | kld_results[rand_seed,:] = retrain_benchmark('kld') 398 | print("Retrain RAND") 399 | rand_results[rand_seed,:] = retrain_benchmark('rand') 400 | 401 | 402 | # save results every iteration so that if it crashes 403 | # there's at least some progress 404 | with open('{}/{}_{}_logvar.npy'.format(save_path, args.dataset, args.removal), 'wb') as f: 405 | np.save(f, logvar_results) 406 | with open('{}/{}_{}_ig.npy'.format(save_path, args.dataset, args.removal), 'wb') as f: 407 | np.save(f, ig_results) 408 | with open('{}/{}_{}_lr.npy'.format(save_path, args.dataset, args.removal), 'wb') as f: 409 | np.save(f, lr_results) 410 | with open('{}/{}_{}_kld.npy'.format(save_path, args.dataset, args.removal), 'wb') as f: 411 | np.save(f, kld_results) 412 | with open('{}/{}_{}_rand.npy'.format(save_path, args.dataset, args.removal), 'wb') as f: 413 | np.save(f, rand_results) 414 | 415 | 416 | if __name__ == '__main__': 417 | main() 418 | -------------------------------------------------------------------------------- /figures/supplementary_figures/drop_g.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import anndata\n", 10 | "import numpy as np\n", 11 | "import pandas as pd\n", 12 | "\n", 13 | "import torch\n", 14 | "\n", 15 | "import os\n", 16 | "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\"" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "from utils import load_annotations" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "from sklearn.model_selection import train_test_split" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "# load data" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 4, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "data = anndata.read('data/kang_count.h5ad')" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 5, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "pathway_ann_matrix = load_annotations(\n", 60 | " 'data/c2.cp.reactome.v7.4.symbols.gmt',\n", 61 | " data.var_names,\n", 62 | " min_genes=13\n", 63 | ")" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 6, 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "data": { 73 | "text/plain": [ 74 | "['REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION']" 75 | ] 76 | }, 77 | "execution_count": 6, 78 | "metadata": {}, 79 | "output_type": "execute_result" 80 | } 81 | ], 82 | "source": [ 83 | "[x for x in pathway_ann_matrix.columns if 'G2_M_TRANSITION' in x or 'PLK1' in x]" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 7, 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "data": { 93 | "text/html": [ 94 | "
\n", 95 | "\n", 108 | "\n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | "
REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION
index
PPP1CBTrue
CLASP1True
TUBA4ATrue
CCNB1True
TUBBTrue
CUL1True
CDK5RAP2True
TUBB4BTrue
CDK1True
ACTR1ATrue
TUBA1ATrue
DCTN2True
CENPJTrue
HSP90AA1True
CCNB2True
AURKATrue
\n", 186 | "
" 187 | ], 188 | "text/plain": [ 189 | " REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION\n", 190 | "index \n", 191 | "PPP1CB True \n", 192 | "CLASP1 True \n", 193 | "TUBA4A True \n", 194 | "CCNB1 True \n", 195 | "TUBB True \n", 196 | "CUL1 True \n", 197 | "CDK5RAP2 True \n", 198 | "TUBB4B True \n", 199 | "CDK1 True \n", 200 | "ACTR1A True \n", 201 | "TUBA1A True \n", 202 | "DCTN2 True \n", 203 | "CENPJ True \n", 204 | "HSP90AA1 True \n", 205 | "CCNB2 True \n", 206 | "AURKA True " 207 | ] 208 | }, 209 | "execution_count": 7, 210 | "metadata": {}, 211 | "output_type": "execute_result" 212 | } 213 | ], 214 | "source": [ 215 | "pathway_ann_matrix[pathway_ann_matrix['REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION']][['REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION']]" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 8, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "true_pathways_list = [x for x in pathway_ann_matrix.columns if 'G2_M_TRANSITION' in x or 'PLK1' in x]\n", 225 | "drop_pathway_ann_matrix = pathway_ann_matrix.loc[:,~pathway_ann_matrix.columns.isin(true_pathways_list)]" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 9, 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [ 234 | "data.varm['annotations'] = drop_pathway_ann_matrix" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 10, 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "data": { 244 | "text/html": [ 245 | "
\n", 246 | "\n", 259 | "\n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | "
REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEMREACTOME_INTERFERON_ALPHA_BETA_SIGNALINGREACTOME_INTERFERON_SIGNALING
index
ISG15TrueTrueTrue
MIB2FalseFalseFalse
PRKCZFalseFalseFalse
KCNAB2FalseFalseFalse
CTNNBIP1FalseFalseFalse
............
CYP19A1FalseFalseFalse
RAP1GAP2FalseFalseFalse
SSTR2FalseFalseFalse
BIRC5TrueFalseFalse
PLCB4FalseFalseFalse
\n", 343 | "

979 rows × 3 columns

\n", 344 | "
" 345 | ], 346 | "text/plain": [ 347 | " REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM \\\n", 348 | "index \n", 349 | "ISG15 True \n", 350 | "MIB2 False \n", 351 | "PRKCZ False \n", 352 | "KCNAB2 False \n", 353 | "CTNNBIP1 False \n", 354 | "... ... \n", 355 | "CYP19A1 False \n", 356 | "RAP1GAP2 False \n", 357 | "SSTR2 False \n", 358 | "BIRC5 True \n", 359 | "PLCB4 False \n", 360 | "\n", 361 | " REACTOME_INTERFERON_ALPHA_BETA_SIGNALING \\\n", 362 | "index \n", 363 | "ISG15 True \n", 364 | "MIB2 False \n", 365 | "PRKCZ False \n", 366 | "KCNAB2 False \n", 367 | "CTNNBIP1 False \n", 368 | "... ... \n", 369 | "CYP19A1 False \n", 370 | "RAP1GAP2 False \n", 371 | "SSTR2 False \n", 372 | "BIRC5 False \n", 373 | "PLCB4 False \n", 374 | "\n", 375 | " REACTOME_INTERFERON_SIGNALING \n", 376 | "index \n", 377 | "ISG15 True \n", 378 | "MIB2 False \n", 379 | "PRKCZ False \n", 380 | "KCNAB2 False \n", 381 | "CTNNBIP1 False \n", 382 | "... ... \n", 383 | "CYP19A1 False \n", 384 | "RAP1GAP2 False \n", 385 | "SSTR2 False \n", 386 | "BIRC5 False \n", 387 | "PLCB4 False \n", 388 | "\n", 389 | "[979 rows x 3 columns]" 390 | ] 391 | }, 392 | "execution_count": 10, 393 | "metadata": {}, 394 | "output_type": "execute_result" 395 | } 396 | ], 397 | "source": [ 398 | "drop_pathway_ann_matrix.iloc[:,drop_pathway_ann_matrix.loc['IFITM3',:].values == True]" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": 11, 404 | "metadata": {}, 405 | "outputs": [], 406 | "source": [ 407 | "membership_mask = data.varm['annotations'].astype(bool).T\n", 408 | "X_train, X_test = train_test_split(\n", 409 | " data.X,\n", 410 | " test_size=0.25,\n", 411 | " shuffle=True,\n", 412 | " random_state=0,\n", 413 | " \n", 414 | ")" 415 | ] 416 | }, 417 | { 418 | "cell_type": "markdown", 419 | "metadata": {}, 420 | "source": [ 421 | "# initialize model" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 12, 427 | "metadata": {}, 428 | "outputs": [], 429 | "source": [ 430 | "from models import pmVAEModel" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": 13, 436 | "metadata": {}, 437 | "outputs": [], 438 | "source": [ 439 | "kangVAE = pmVAEModel(\n", 440 | " membership_mask.values,\n", 441 | " [12],\n", 442 | " 4,\n", 443 | " beta=1e-05,\n", 444 | " terms=membership_mask.index,\n", 445 | " add_auxiliary_module=True\n", 446 | ")" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": 14, 452 | "metadata": {}, 453 | "outputs": [ 454 | { 455 | "data": { 456 | "text/plain": [ 457 | "pmVAE(\n", 458 | " (encoder_net): pmEncoder(\n", 459 | " (encoder_dense_1): CustomizedLinear(input_features=979, output_features=2400, bias=True)\n", 460 | " (encoder_norm_1): BatchNorm1d(2400, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 461 | " (encoder_elu_1): ELU(alpha=1.0, inplace=True)\n", 462 | " (encoder_dense_2): CustomizedLinear(input_features=2400, output_features=1600, bias=True)\n", 463 | " (encoder_norm_2): BatchNorm1d(1600, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 464 | " )\n", 465 | " (decoder_net): pmDecoder(\n", 466 | " (decoder_dense_1): CustomizedLinear(input_features=800, output_features=2400, bias=True)\n", 467 | " (decoder_norm_1): BatchNorm1d(2400, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 468 | " (decoder_elu_1): ELU(alpha=1.0, inplace=True)\n", 469 | " )\n", 470 | " (merge_layer): CustomizedLinear(input_features=2400, output_features=979, bias=False)\n", 471 | ")" 472 | ] 473 | }, 474 | "execution_count": 14, 475 | "metadata": {}, 476 | "output_type": "execute_result" 477 | } 478 | ], 479 | "source": [ 480 | "kangVAE.model" 481 | ] 482 | }, 483 | { 484 | "cell_type": "markdown", 485 | "metadata": {}, 486 | "source": [ 487 | "# train model" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": null, 493 | "metadata": {}, 494 | "outputs": [], 495 | "source": [ 496 | "kangVAE.train(train_ds, test_ds, checkpoint_path='pmvae_dropG2M_checkpoint.pkl')" 497 | ] 498 | }, 499 | { 500 | "cell_type": "markdown", 501 | "metadata": {}, 502 | "source": [ 503 | "# explain model" 504 | ] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "execution_count": 15, 509 | "metadata": {}, 510 | "outputs": [], 511 | "source": [ 512 | "kangVAE.load_checkpoint('saved_models/pmvae_dropG2M_checkpoint.pkl.best_loss')" 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "execution_count": 16, 518 | "metadata": {}, 519 | "outputs": [], 520 | "source": [ 521 | "kangVAE.set_gpu(False)" 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": 17, 527 | "metadata": {}, 528 | "outputs": [ 529 | { 530 | "data": { 531 | "text/plain": [ 532 | "800" 533 | ] 534 | }, 535 | "execution_count": 17, 536 | "metadata": {}, 537 | "output_type": "execute_result" 538 | } 539 | ], 540 | "source": [ 541 | "len(kangVAE.latent_space_names())" 542 | ] 543 | }, 544 | { 545 | "cell_type": "code", 546 | "execution_count": 18, 547 | "metadata": {}, 548 | "outputs": [ 549 | { 550 | "data": { 551 | "text/plain": [ 552 | "796" 553 | ] 554 | }, 555 | "execution_count": 18, 556 | "metadata": {}, 557 | "output_type": "execute_result" 558 | } 559 | ], 560 | "source": [ 561 | "kangVAE.latent_space_names().index('AUXILIARY-0')" 562 | ] 563 | }, 564 | { 565 | "cell_type": "code", 566 | "execution_count": 19, 567 | "metadata": {}, 568 | "outputs": [ 569 | { 570 | "data": { 571 | "text/plain": [ 572 | "'AUXILIARY-0'" 573 | ] 574 | }, 575 | "execution_count": 19, 576 | "metadata": {}, 577 | "output_type": "execute_result" 578 | } 579 | ], 580 | "source": [ 581 | "kangVAE.latent_space_names()[-4]" 582 | ] 583 | }, 584 | { 585 | "cell_type": "code", 586 | "execution_count": 20, 587 | "metadata": {}, 588 | "outputs": [ 589 | { 590 | "data": { 591 | "text/plain": [ 592 | "'AUXILIARY-1'" 593 | ] 594 | }, 595 | "execution_count": 20, 596 | "metadata": {}, 597 | "output_type": "execute_result" 598 | } 599 | ], 600 | "source": [ 601 | "kangVAE.latent_space_names()[-3]" 602 | ] 603 | }, 604 | { 605 | "cell_type": "code", 606 | "execution_count": 21, 607 | "metadata": {}, 608 | "outputs": [ 609 | { 610 | "data": { 611 | "text/plain": [ 612 | "'AUXILIARY-2'" 613 | ] 614 | }, 615 | "execution_count": 21, 616 | "metadata": {}, 617 | "output_type": "execute_result" 618 | } 619 | ], 620 | "source": [ 621 | "kangVAE.latent_space_names()[-2]" 622 | ] 623 | }, 624 | { 625 | "cell_type": "code", 626 | "execution_count": 22, 627 | "metadata": {}, 628 | "outputs": [ 629 | { 630 | "data": { 631 | "text/plain": [ 632 | "'AUXILIARY-3'" 633 | ] 634 | }, 635 | "execution_count": 22, 636 | "metadata": {}, 637 | "output_type": "execute_result" 638 | } 639 | ], 640 | "source": [ 641 | "kangVAE.latent_space_names()[-1]" 642 | ] 643 | }, 644 | { 645 | "cell_type": "code", 646 | "execution_count": 23, 647 | "metadata": {}, 648 | "outputs": [], 649 | "source": [ 650 | "def model_latent_wrapper(x):\n", 651 | " outs = kangVAE.model(x)\n", 652 | " z = outs.mu\n", 653 | " return z[:,-4].reshape(-1,1) # which to explain" 654 | ] 655 | }, 656 | { 657 | "cell_type": "code", 658 | "execution_count": 24, 659 | "metadata": {}, 660 | "outputs": [], 661 | "source": [ 662 | "from pathexplainer import PathExplainerTorch" 663 | ] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": 25, 668 | "metadata": {}, 669 | "outputs": [], 670 | "source": [ 671 | "input_data = torch.tensor(data.X)\n", 672 | "input_data.requires_grad = True\n", 673 | "baseline_data = torch.zeros(data.X.shape[1])\n", 674 | "baseline_data.requires_grad = True" 675 | ] 676 | }, 677 | { 678 | "cell_type": "code", 679 | "execution_count": 26, 680 | "metadata": {}, 681 | "outputs": [], 682 | "source": [ 683 | "explainer = PathExplainerTorch(model_latent_wrapper)\n", 684 | "attributions = explainer.attributions(input_data,\n", 685 | " baseline=baseline_data,\n", 686 | " num_samples=200,\n", 687 | " use_expectation=False)" 688 | ] 689 | }, 690 | { 691 | "cell_type": "code", 692 | "execution_count": 27, 693 | "metadata": {}, 694 | "outputs": [], 695 | "source": [ 696 | "np_attribs = attributions.detach().numpy()" 697 | ] 698 | }, 699 | { 700 | "cell_type": "code", 701 | "execution_count": 28, 702 | "metadata": {}, 703 | "outputs": [], 704 | "source": [ 705 | "top = pd.DataFrame(index=membership_mask.columns)\n", 706 | "top['means'] = np.abs(np_attribs).mean(0)\n", 707 | "top['stds'] = np.abs(np_attribs).std(0)\n" 708 | ] 709 | }, 710 | { 711 | "cell_type": "code", 712 | "execution_count": 29, 713 | "metadata": {}, 714 | "outputs": [ 715 | { 716 | "data": { 717 | "text/html": [ 718 | "
\n", 719 | "\n", 732 | "\n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | "
meansstds
index
H2AFZ1.5586210.690636
IL80.5885970.379918
PLA2G70.4336170.340465
SSB0.3980440.208317
HIST1H2AC0.2344840.173549
.........
IFNB10.0000110.000189
PELI30.0000100.000337
AURKB0.0000100.000136
SRGAP30.0000100.000202
ATP6V0A40.0000050.000120
\n", 803 | "

979 rows × 2 columns

\n", 804 | "
" 805 | ], 806 | "text/plain": [ 807 | " means stds\n", 808 | "index \n", 809 | "H2AFZ 1.558621 0.690636\n", 810 | "IL8 0.588597 0.379918\n", 811 | "PLA2G7 0.433617 0.340465\n", 812 | "SSB 0.398044 0.208317\n", 813 | "HIST1H2AC 0.234484 0.173549\n", 814 | "... ... ...\n", 815 | "IFNB1 0.000011 0.000189\n", 816 | "PELI3 0.000010 0.000337\n", 817 | "AURKB 0.000010 0.000136\n", 818 | "SRGAP3 0.000010 0.000202\n", 819 | "ATP6V0A4 0.000005 0.000120\n", 820 | "\n", 821 | "[979 rows x 2 columns]" 822 | ] 823 | }, 824 | "execution_count": 29, 825 | "metadata": {}, 826 | "output_type": "execute_result" 827 | } 828 | ], 829 | "source": [ 830 | "top.sort_values('means',ascending=False)" 831 | ] 832 | }, 833 | { 834 | "cell_type": "code", 835 | "execution_count": 30, 836 | "metadata": {}, 837 | "outputs": [], 838 | "source": [ 839 | "top.to_csv('kang_remove_g/aux_0.csv')" 840 | ] 841 | } 842 | ], 843 | "metadata": { 844 | "kernelspec": { 845 | "display_name": "newenv", 846 | "language": "python", 847 | "name": "newenv" 848 | }, 849 | "language_info": { 850 | "codemirror_mode": { 851 | "name": "ipython", 852 | "version": 3 853 | }, 854 | "file_extension": ".py", 855 | "mimetype": "text/x-python", 856 | "name": "python", 857 | "nbconvert_exporter": "python", 858 | "pygments_lexer": "ipython3", 859 | "version": "3.9.7" 860 | } 861 | }, 862 | "nbformat": 4, 863 | "nbformat_minor": 4 864 | } 865 | -------------------------------------------------------------------------------- /figures/supplementary_figures/.ipynb_checkpoints/drop_g-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import anndata\n", 10 | "import numpy as np\n", 11 | "import pandas as pd\n", 12 | "\n", 13 | "import torch\n", 14 | "\n", 15 | "import os\n", 16 | "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\"" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "from utils import load_annotations" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "from sklearn.model_selection import train_test_split" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "# load data" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 4, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "data = anndata.read('data/kang_count.h5ad')" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 5, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "pathway_ann_matrix = load_annotations(\n", 60 | " 'data/c2.cp.reactome.v7.4.symbols.gmt',\n", 61 | " data.var_names,\n", 62 | " min_genes=13\n", 63 | ")" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 6, 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "data": { 73 | "text/plain": [ 74 | "['REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION']" 75 | ] 76 | }, 77 | "execution_count": 6, 78 | "metadata": {}, 79 | "output_type": "execute_result" 80 | } 81 | ], 82 | "source": [ 83 | "[x for x in pathway_ann_matrix.columns if 'G2_M_TRANSITION' in x or 'PLK1' in x]" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 7, 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "data": { 93 | "text/html": [ 94 | "
\n", 95 | "\n", 108 | "\n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | "
REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION
index
PPP1CBTrue
CLASP1True
TUBA4ATrue
CCNB1True
TUBBTrue
CUL1True
CDK5RAP2True
TUBB4BTrue
CDK1True
ACTR1ATrue
TUBA1ATrue
DCTN2True
CENPJTrue
HSP90AA1True
CCNB2True
AURKATrue
\n", 186 | "
" 187 | ], 188 | "text/plain": [ 189 | " REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION\n", 190 | "index \n", 191 | "PPP1CB True \n", 192 | "CLASP1 True \n", 193 | "TUBA4A True \n", 194 | "CCNB1 True \n", 195 | "TUBB True \n", 196 | "CUL1 True \n", 197 | "CDK5RAP2 True \n", 198 | "TUBB4B True \n", 199 | "CDK1 True \n", 200 | "ACTR1A True \n", 201 | "TUBA1A True \n", 202 | "DCTN2 True \n", 203 | "CENPJ True \n", 204 | "HSP90AA1 True \n", 205 | "CCNB2 True \n", 206 | "AURKA True " 207 | ] 208 | }, 209 | "execution_count": 7, 210 | "metadata": {}, 211 | "output_type": "execute_result" 212 | } 213 | ], 214 | "source": [ 215 | "pathway_ann_matrix[pathway_ann_matrix['REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION']][['REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION']]" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 8, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "true_pathways_list = [x for x in pathway_ann_matrix.columns if 'G2_M_TRANSITION' in x or 'PLK1' in x]\n", 225 | "drop_pathway_ann_matrix = pathway_ann_matrix.loc[:,~pathway_ann_matrix.columns.isin(true_pathways_list)]" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 9, 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [ 234 | "data.varm['annotations'] = drop_pathway_ann_matrix" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 10, 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "data": { 244 | "text/html": [ 245 | "
\n", 246 | "\n", 259 | "\n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | "
REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEMREACTOME_INTERFERON_ALPHA_BETA_SIGNALINGREACTOME_INTERFERON_SIGNALING
index
ISG15TrueTrueTrue
MIB2FalseFalseFalse
PRKCZFalseFalseFalse
KCNAB2FalseFalseFalse
CTNNBIP1FalseFalseFalse
............
CYP19A1FalseFalseFalse
RAP1GAP2FalseFalseFalse
SSTR2FalseFalseFalse
BIRC5TrueFalseFalse
PLCB4FalseFalseFalse
\n", 343 | "

979 rows × 3 columns

\n", 344 | "
" 345 | ], 346 | "text/plain": [ 347 | " REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM \\\n", 348 | "index \n", 349 | "ISG15 True \n", 350 | "MIB2 False \n", 351 | "PRKCZ False \n", 352 | "KCNAB2 False \n", 353 | "CTNNBIP1 False \n", 354 | "... ... \n", 355 | "CYP19A1 False \n", 356 | "RAP1GAP2 False \n", 357 | "SSTR2 False \n", 358 | "BIRC5 True \n", 359 | "PLCB4 False \n", 360 | "\n", 361 | " REACTOME_INTERFERON_ALPHA_BETA_SIGNALING \\\n", 362 | "index \n", 363 | "ISG15 True \n", 364 | "MIB2 False \n", 365 | "PRKCZ False \n", 366 | "KCNAB2 False \n", 367 | "CTNNBIP1 False \n", 368 | "... ... \n", 369 | "CYP19A1 False \n", 370 | "RAP1GAP2 False \n", 371 | "SSTR2 False \n", 372 | "BIRC5 False \n", 373 | "PLCB4 False \n", 374 | "\n", 375 | " REACTOME_INTERFERON_SIGNALING \n", 376 | "index \n", 377 | "ISG15 True \n", 378 | "MIB2 False \n", 379 | "PRKCZ False \n", 380 | "KCNAB2 False \n", 381 | "CTNNBIP1 False \n", 382 | "... ... \n", 383 | "CYP19A1 False \n", 384 | "RAP1GAP2 False \n", 385 | "SSTR2 False \n", 386 | "BIRC5 False \n", 387 | "PLCB4 False \n", 388 | "\n", 389 | "[979 rows x 3 columns]" 390 | ] 391 | }, 392 | "execution_count": 10, 393 | "metadata": {}, 394 | "output_type": "execute_result" 395 | } 396 | ], 397 | "source": [ 398 | "drop_pathway_ann_matrix.iloc[:,drop_pathway_ann_matrix.loc['IFITM3',:].values == True]" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": 11, 404 | "metadata": {}, 405 | "outputs": [], 406 | "source": [ 407 | "membership_mask = data.varm['annotations'].astype(bool).T\n", 408 | "X_train, X_test = train_test_split(\n", 409 | " data.X,\n", 410 | " test_size=0.25,\n", 411 | " shuffle=True,\n", 412 | " random_state=0,\n", 413 | " \n", 414 | ")" 415 | ] 416 | }, 417 | { 418 | "cell_type": "markdown", 419 | "metadata": {}, 420 | "source": [ 421 | "# initialize model" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 12, 427 | "metadata": {}, 428 | "outputs": [], 429 | "source": [ 430 | "from models import pmVAEModel" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": 13, 436 | "metadata": {}, 437 | "outputs": [], 438 | "source": [ 439 | "kangVAE = pmVAEModel(\n", 440 | " membership_mask.values,\n", 441 | " [12],\n", 442 | " 4,\n", 443 | " beta=1e-05,\n", 444 | " terms=membership_mask.index,\n", 445 | " add_auxiliary_module=True\n", 446 | ")" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": 14, 452 | "metadata": {}, 453 | "outputs": [ 454 | { 455 | "data": { 456 | "text/plain": [ 457 | "pmVAE(\n", 458 | " (encoder_net): pmEncoder(\n", 459 | " (encoder_dense_1): CustomizedLinear(input_features=979, output_features=2400, bias=True)\n", 460 | " (encoder_norm_1): BatchNorm1d(2400, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 461 | " (encoder_elu_1): ELU(alpha=1.0, inplace=True)\n", 462 | " (encoder_dense_2): CustomizedLinear(input_features=2400, output_features=1600, bias=True)\n", 463 | " (encoder_norm_2): BatchNorm1d(1600, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 464 | " )\n", 465 | " (decoder_net): pmDecoder(\n", 466 | " (decoder_dense_1): CustomizedLinear(input_features=800, output_features=2400, bias=True)\n", 467 | " (decoder_norm_1): BatchNorm1d(2400, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 468 | " (decoder_elu_1): ELU(alpha=1.0, inplace=True)\n", 469 | " )\n", 470 | " (merge_layer): CustomizedLinear(input_features=2400, output_features=979, bias=False)\n", 471 | ")" 472 | ] 473 | }, 474 | "execution_count": 14, 475 | "metadata": {}, 476 | "output_type": "execute_result" 477 | } 478 | ], 479 | "source": [ 480 | "kangVAE.model" 481 | ] 482 | }, 483 | { 484 | "cell_type": "markdown", 485 | "metadata": {}, 486 | "source": [ 487 | "# train model" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": null, 493 | "metadata": {}, 494 | "outputs": [], 495 | "source": [ 496 | "kangVAE.train(train_ds, test_ds, checkpoint_path='pmvae_dropG2M_checkpoint.pkl')" 497 | ] 498 | }, 499 | { 500 | "cell_type": "markdown", 501 | "metadata": {}, 502 | "source": [ 503 | "# explain model" 504 | ] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "execution_count": 15, 509 | "metadata": {}, 510 | "outputs": [], 511 | "source": [ 512 | "kangVAE.load_checkpoint('saved_models/pmvae_dropG2M_checkpoint.pkl.best_loss')" 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "execution_count": 16, 518 | "metadata": {}, 519 | "outputs": [], 520 | "source": [ 521 | "kangVAE.set_gpu(False)" 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": 17, 527 | "metadata": {}, 528 | "outputs": [ 529 | { 530 | "data": { 531 | "text/plain": [ 532 | "800" 533 | ] 534 | }, 535 | "execution_count": 17, 536 | "metadata": {}, 537 | "output_type": "execute_result" 538 | } 539 | ], 540 | "source": [ 541 | "len(kangVAE.latent_space_names())" 542 | ] 543 | }, 544 | { 545 | "cell_type": "code", 546 | "execution_count": 18, 547 | "metadata": {}, 548 | "outputs": [ 549 | { 550 | "data": { 551 | "text/plain": [ 552 | "796" 553 | ] 554 | }, 555 | "execution_count": 18, 556 | "metadata": {}, 557 | "output_type": "execute_result" 558 | } 559 | ], 560 | "source": [ 561 | "kangVAE.latent_space_names().index('AUXILIARY-0')" 562 | ] 563 | }, 564 | { 565 | "cell_type": "code", 566 | "execution_count": 19, 567 | "metadata": {}, 568 | "outputs": [ 569 | { 570 | "data": { 571 | "text/plain": [ 572 | "'AUXILIARY-0'" 573 | ] 574 | }, 575 | "execution_count": 19, 576 | "metadata": {}, 577 | "output_type": "execute_result" 578 | } 579 | ], 580 | "source": [ 581 | "kangVAE.latent_space_names()[-4]" 582 | ] 583 | }, 584 | { 585 | "cell_type": "code", 586 | "execution_count": 20, 587 | "metadata": {}, 588 | "outputs": [ 589 | { 590 | "data": { 591 | "text/plain": [ 592 | "'AUXILIARY-1'" 593 | ] 594 | }, 595 | "execution_count": 20, 596 | "metadata": {}, 597 | "output_type": "execute_result" 598 | } 599 | ], 600 | "source": [ 601 | "kangVAE.latent_space_names()[-3]" 602 | ] 603 | }, 604 | { 605 | "cell_type": "code", 606 | "execution_count": 21, 607 | "metadata": {}, 608 | "outputs": [ 609 | { 610 | "data": { 611 | "text/plain": [ 612 | "'AUXILIARY-2'" 613 | ] 614 | }, 615 | "execution_count": 21, 616 | "metadata": {}, 617 | "output_type": "execute_result" 618 | } 619 | ], 620 | "source": [ 621 | "kangVAE.latent_space_names()[-2]" 622 | ] 623 | }, 624 | { 625 | "cell_type": "code", 626 | "execution_count": 22, 627 | "metadata": {}, 628 | "outputs": [ 629 | { 630 | "data": { 631 | "text/plain": [ 632 | "'AUXILIARY-3'" 633 | ] 634 | }, 635 | "execution_count": 22, 636 | "metadata": {}, 637 | "output_type": "execute_result" 638 | } 639 | ], 640 | "source": [ 641 | "kangVAE.latent_space_names()[-1]" 642 | ] 643 | }, 644 | { 645 | "cell_type": "code", 646 | "execution_count": 23, 647 | "metadata": {}, 648 | "outputs": [], 649 | "source": [ 650 | "def model_latent_wrapper(x):\n", 651 | " outs = kangVAE.model(x)\n", 652 | " z = outs.mu\n", 653 | " return z[:,-4].reshape(-1,1) # which to explain" 654 | ] 655 | }, 656 | { 657 | "cell_type": "code", 658 | "execution_count": 24, 659 | "metadata": {}, 660 | "outputs": [], 661 | "source": [ 662 | "from pathexplainer import PathExplainerTorch" 663 | ] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": 25, 668 | "metadata": {}, 669 | "outputs": [], 670 | "source": [ 671 | "input_data = torch.tensor(data.X)\n", 672 | "input_data.requires_grad = True\n", 673 | "baseline_data = torch.zeros(data.X.shape[1])\n", 674 | "baseline_data.requires_grad = True" 675 | ] 676 | }, 677 | { 678 | "cell_type": "code", 679 | "execution_count": 26, 680 | "metadata": {}, 681 | "outputs": [], 682 | "source": [ 683 | "explainer = PathExplainerTorch(model_latent_wrapper)\n", 684 | "attributions = explainer.attributions(input_data,\n", 685 | " baseline=baseline_data,\n", 686 | " num_samples=200,\n", 687 | " use_expectation=False)" 688 | ] 689 | }, 690 | { 691 | "cell_type": "code", 692 | "execution_count": 27, 693 | "metadata": {}, 694 | "outputs": [], 695 | "source": [ 696 | "np_attribs = attributions.detach().numpy()" 697 | ] 698 | }, 699 | { 700 | "cell_type": "code", 701 | "execution_count": 28, 702 | "metadata": {}, 703 | "outputs": [], 704 | "source": [ 705 | "top = pd.DataFrame(index=membership_mask.columns)\n", 706 | "top['means'] = np.abs(np_attribs).mean(0)\n", 707 | "top['stds'] = np.abs(np_attribs).std(0)\n" 708 | ] 709 | }, 710 | { 711 | "cell_type": "code", 712 | "execution_count": 29, 713 | "metadata": {}, 714 | "outputs": [ 715 | { 716 | "data": { 717 | "text/html": [ 718 | "
\n", 719 | "\n", 732 | "\n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | "
meansstds
index
H2AFZ1.5586210.690636
IL80.5885970.379918
PLA2G70.4336170.340465
SSB0.3980440.208317
HIST1H2AC0.2344840.173549
.........
IFNB10.0000110.000189
PELI30.0000100.000337
AURKB0.0000100.000136
SRGAP30.0000100.000202
ATP6V0A40.0000050.000120
\n", 803 | "

979 rows × 2 columns

\n", 804 | "
" 805 | ], 806 | "text/plain": [ 807 | " means stds\n", 808 | "index \n", 809 | "H2AFZ 1.558621 0.690636\n", 810 | "IL8 0.588597 0.379918\n", 811 | "PLA2G7 0.433617 0.340465\n", 812 | "SSB 0.398044 0.208317\n", 813 | "HIST1H2AC 0.234484 0.173549\n", 814 | "... ... ...\n", 815 | "IFNB1 0.000011 0.000189\n", 816 | "PELI3 0.000010 0.000337\n", 817 | "AURKB 0.000010 0.000136\n", 818 | "SRGAP3 0.000010 0.000202\n", 819 | "ATP6V0A4 0.000005 0.000120\n", 820 | "\n", 821 | "[979 rows x 2 columns]" 822 | ] 823 | }, 824 | "execution_count": 29, 825 | "metadata": {}, 826 | "output_type": "execute_result" 827 | } 828 | ], 829 | "source": [ 830 | "top.sort_values('means',ascending=False)" 831 | ] 832 | }, 833 | { 834 | "cell_type": "code", 835 | "execution_count": 30, 836 | "metadata": {}, 837 | "outputs": [], 838 | "source": [ 839 | "top.to_csv('kang_remove_g/aux_0.csv')" 840 | ] 841 | } 842 | ], 843 | "metadata": { 844 | "kernelspec": { 845 | "display_name": "newenv", 846 | "language": "python", 847 | "name": "newenv" 848 | }, 849 | "language_info": { 850 | "codemirror_mode": { 851 | "name": "ipython", 852 | "version": 3 853 | }, 854 | "file_extension": ".py", 855 | "mimetype": "text/x-python", 856 | "name": "python", 857 | "nbconvert_exporter": "python", 858 | "pygments_lexer": "ipython3", 859 | "version": "3.9.7" 860 | } 861 | }, 862 | "nbformat": 4, 863 | "nbformat_minor": 4 864 | } 865 | -------------------------------------------------------------------------------- /figures/supplementary_figures/g_enrichments.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 61, 6 | "id": "23720687-2ad7-4d94-8df1-39b834c5e456", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import numpy as np\n", 11 | "import pandas as pd \n", 12 | "import anndata \n", 13 | "import matplotlib.pyplot as plt\n", 14 | "import shap as shap \n", 15 | "import seaborn as sns\n", 16 | "import math as math" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 62, 22 | "id": "654a8332-3a7a-4e98-9e86-9a9f44072a58", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "# drop G\n", 27 | "aux_0 = pd.read_csv('kang_remove_g/aux_0.csv')\n", 28 | "aux_1 = pd.read_csv('kang_remove_g/aux_1.csv')\n", 29 | "aux_2 = pd.read_csv('kang_remove_g/aux_2.csv')\n", 30 | "aux_3 = pd.read_csv('kang_remove_g/aux_3.csv')" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 63, 36 | "id": "8a1707e6-79f5-434c-b6de-d41a31658f88", 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "data": { 41 | "text/html": [ 42 | "
\n", 43 | "\n", 56 | "\n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | "
indexmeansstds
233IL81.4722201.072127
250H2AFZ0.6600400.297694
707SQRDL0.5417980.295089
347PLA2G70.4399080.348958
320HIST1H2AC0.3853020.294543
............
342TREM20.0000120.000478
960RRM20.0000110.000294
797PTRF0.0000100.000238
12ALDH4A10.0000090.000227
969ABCC20.0000050.000174
\n", 134 | "

979 rows × 3 columns

\n", 135 | "
" 136 | ], 137 | "text/plain": [ 138 | " index means stds\n", 139 | "233 IL8 1.472220 1.072127\n", 140 | "250 H2AFZ 0.660040 0.297694\n", 141 | "707 SQRDL 0.541798 0.295089\n", 142 | "347 PLA2G7 0.439908 0.348958\n", 143 | "320 HIST1H2AC 0.385302 0.294543\n", 144 | ".. ... ... ...\n", 145 | "342 TREM2 0.000012 0.000478\n", 146 | "960 RRM2 0.000011 0.000294\n", 147 | "797 PTRF 0.000010 0.000238\n", 148 | "12 ALDH4A1 0.000009 0.000227\n", 149 | "969 ABCC2 0.000005 0.000174\n", 150 | "\n", 151 | "[979 rows x 3 columns]" 152 | ] 153 | }, 154 | "execution_count": 63, 155 | "metadata": {}, 156 | "output_type": "execute_result" 157 | } 158 | ], 159 | "source": [ 160 | "aux_0.sort_values('means',ascending=False)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 64, 166 | "id": "b85cd0f4-6b0c-4a1b-82e7-24e0b207a830", 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "combined_means = pd.DataFrame(index = aux_0.index)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 65, 176 | "id": "c9a0bd21-db5e-4109-9fdd-c6c585f99f52", 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "combined_means[0] = aux_0['means']\n", 181 | "combined_means[1] = aux_1['means']\n", 182 | "combined_means[2] = aux_2['means']\n", 183 | "combined_means[3] = aux_3['means']" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 66, 189 | "id": "a69cab5f-5575-4543-8c05-e3a7dabfe983", 190 | "metadata": {}, 191 | "outputs": [ 192 | { 193 | "data": { 194 | "text/html": [ 195 | "
\n", 196 | "\n", 209 | "\n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | "
0123
00.0037620.0076300.0052640.014630
10.0010840.0022510.0023870.000991
20.0002170.0011490.0007990.000178
30.0004710.0005260.0014020.000415
40.0024610.0012580.0009570.000706
...............
9740.0001410.0001670.0000260.000326
9750.0000300.0000440.0000690.000031
9760.0002870.0000680.0000320.000397
9770.0001350.0001820.0000360.000130
9780.0001830.0002110.0001040.000023
\n", 299 | "

979 rows × 4 columns

\n", 300 | "
" 301 | ], 302 | "text/plain": [ 303 | " 0 1 2 3\n", 304 | "0 0.003762 0.007630 0.005264 0.014630\n", 305 | "1 0.001084 0.002251 0.002387 0.000991\n", 306 | "2 0.000217 0.001149 0.000799 0.000178\n", 307 | "3 0.000471 0.000526 0.001402 0.000415\n", 308 | "4 0.002461 0.001258 0.000957 0.000706\n", 309 | ".. ... ... ... ...\n", 310 | "974 0.000141 0.000167 0.000026 0.000326\n", 311 | "975 0.000030 0.000044 0.000069 0.000031\n", 312 | "976 0.000287 0.000068 0.000032 0.000397\n", 313 | "977 0.000135 0.000182 0.000036 0.000130\n", 314 | "978 0.000183 0.000211 0.000104 0.000023\n", 315 | "\n", 316 | "[979 rows x 4 columns]" 317 | ] 318 | }, 319 | "execution_count": 66, 320 | "metadata": {}, 321 | "output_type": "execute_result" 322 | } 323 | ], 324 | "source": [ 325 | "combined_means" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 67, 331 | "id": "f2040385-8056-4c9b-bdee-ee1ab665c26f", 332 | "metadata": {}, 333 | "outputs": [ 334 | { 335 | "data": { 336 | "text/html": [ 337 | "
\n", 338 | "\n", 351 | "\n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | "
0123
01.0000000.6907570.7031570.723252
10.6907571.0000000.6980950.634515
20.7031570.6980951.0000000.589254
30.7232520.6345150.5892541.000000
\n", 392 | "
" 393 | ], 394 | "text/plain": [ 395 | " 0 1 2 3\n", 396 | "0 1.000000 0.690757 0.703157 0.723252\n", 397 | "1 0.690757 1.000000 0.698095 0.634515\n", 398 | "2 0.703157 0.698095 1.000000 0.589254\n", 399 | "3 0.723252 0.634515 0.589254 1.000000" 400 | ] 401 | }, 402 | "execution_count": 67, 403 | "metadata": {}, 404 | "output_type": "execute_result" 405 | } 406 | ], 407 | "source": [ 408 | "combined_means.corr()" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 68, 414 | "id": "7aa9b78a-9769-49f2-ab16-e30ef48a91a1", 415 | "metadata": {}, 416 | "outputs": [], 417 | "source": [ 418 | "corr_mat = combined_means.corr().abs()\n", 419 | "mask = np.tril(np.ones_like(corr_mat, dtype=bool)) " 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": 69, 425 | "id": "8c5f0225-033b-4559-bb16-6500ed35ed8f", 426 | "metadata": {}, 427 | "outputs": [ 428 | { 429 | "data": { 430 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdAAAAFpCAYAAAAsmHm9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAp80lEQVR4nO3dd5gc1Znv8d87PVGjUUJplCMggQRIQiSTk5C9lu1rewETDNgC2+wa4yRfXweW595l7V17HTAwa4PFYsDYJmhtQAQjk4wiAkko51HOk0cT3vvHtETPqCdV18T6fp6nHnVX1ak6fRjm7fecU2fM3QUAAFonraMrAABAV0QABQAgAAIoAAABEEABAAiAAAoAQAAEUAAAAiCAAgC6BDN72Mz2mtnKRo6bmf3czDaY2ftmNiXh2AwzWxs/NieM+hBAAQBdxW8lzWji+DWSxse32ZIekCQzi0m6P358oqTrzGxiqpUhgAIAugR3f13SwSZOmSXpUa/zjqQ+ZpYvabqkDe6+yd2PSnoyfm5KCKAAgO5iqKTtCe8L4/sa25+S9FQv0AKsFdjGpv1gfkdXIRJqqms7ugrdXm0Nvy7a2nv3zbS2unbmWbem9B+wavkjt6uu6/WYAncvaMUlkn02b2J/StojgAIAIsDSYimVjwfL1gTMhgolDU94P0zSTkmZjexPCV24AIBQWFospS0E8yTdFJ+Ne66kI+6+S9JiSePNbLSZZUq6Nn5uSshAAQChCCkINn59syckXSKpv5kVSvqBpAxJcvcHJT0vaaakDZLKJN0SP1ZtZndKmi8pJulhd1+Van0IoACALsHdr2vmuEv6SiPHnlddgA0NARQAEIq2zkA7GwIoACAUFiOAAgDQamkRy0CZhQsAQABkoACAUDAGCgBAAARQAAACsLRojQoSQAEAoYhaBhqtrwsAAISEDBQAEIqoZaAEUABAKAigAAAEwEpEAAAEELUMlElEAAAEQAYKAAhF1DJQAigAIBRRW0yeAAoACEXUMlDGQAEACIAMFAAQiqhloARQAEAoCKAAAARAAAUAIICoBVAmEQEAEAAZKAAgFKyFCwBAAFHrwiWAAgBCQQAFACCAqAVQJhEBABAAGSgCGz0gV9+cOUGTh/VRcUWVnl22Q/+1YINqvfmyl04YqM9fOEZjB/ZURVWNPthRpG/9frkqqmqOn3PrRWP0qanD1Dc3U5v3leqXr6zTOxsPtOEn6jrGDOipb/3DBE0e3lclFVV6ZkmhHvrr+ibb/vbLxumOy8cnPfaL+Wv18Oub2qi2nd+YgT015+MTNXlEXxVXVOmZxdv14CtNt+cdV4zXl65I3p4/e3GtHl6wUWkm3XzRGF106kCNGdhTkrR6xxH94qV1WlV4pC0+SodKS7OOrkK7IoAikLzsdP3qpmnatK9UX3/iXQ3rl6O7rj5FaSY98NcNTZadNWWovjVzgh59a4t+/tJa5WVn6Owx/RRL+J/v8xeO1hcuHquHXtugtbuKNPOMIfrp9VN0228W6oOdRW398Tq1vOx0PXjr2dq0t0Rfe2yphp/UQ3dfc6rMpF+9sr7Rcs8sKdTb6/fX23fphIG65eKxenP9vraudqeVl5Ouh74wXZv2lOiuR+va8+sfPVVmpvtfWtdouacXbddba+u322WnDdKtl4zVW2v3SpKyMmK69ZKxem5JoX6zYKPcpWvPH6nf3nGubnrg71q9o3v9LBsBFGje/zp7uLIyYvrW799VaWWNFm6ScrPSNfuScXr0rc0qraxJWq53jwzdPeNU/fiFNXp2aeHx/QvW7D3+Oj1m+vxHxmjum5s1983NkqR3Nh7Q6AG5+uIlY/W1x99t2w/XyX1m+ghlZcT09cffVWlltRZuPKDcrHTdftl4zX1js0orq5OW21tUob1FFfX2ffHSsdq0t0TrdhW3R9U7pc+cM1LZGTHd/dgylVZW650NdT/Ld1wxXr/926ZWtefsy8dp094SrY23Z2VVjWb+6DUVl394jYUb92veNy7WdeeN0vf/+H7bfbAOYBatAMoYKAI5f1x/vbNhf71AOX/lbmVnxjRlZL9Gy1152mBJ0p+X72j0nGF9e6hndroWbarfXbtw4wGdM7a/0mPR+p+0oQtOHqC/r99X7xf7/Pd3KSczpqmjG2/7hnrlZOjcsf01//1dbVHNLuMjpwzQ2+vqt+eL7+1UTmZM08a0rj3PG9dfL7638/i+Wle94ClJ1TWujXtK1LdnZuqVR4cigCKQUf1ztWV/ab19e45UqPxotUYNyG203OnDemvr/lLNmjJMf7n7Yr3z/Sv12y+eo8nD+xw/Jyu97seyqqa2XtmjNbXKTE/T0L49wvsgXdCoAbnavK9+2+8+1vb9G2/7hq44fbAy0tP04vs7mz+5GxvdVHs28bPc0JWT4u35XtNfSDJiaZo4tLc27SkJVN/OLC3NUtpawsxmmNlaM9tgZnOSHP+mmS2PbyvNrMbM+sWPbTGzFfFjS1L+vKleANHUKydDxRUndm0VlVcrLzuj0XIn9czSyP65uu2iMfrFy+v0tcffVfnRGv38hqnql1v3jbzwULlqa12nDe1dr+yx971zGr9+FOQ10fa9WtE2V0/K1wc7jmjbgbIwq9fl5OVkqLi86oT9ReVVrWrPGWfk64PCI9ra4ItlQ1+8bKx65aTr6cXbW13Xzs7SLKWt2eubxSTdL+kaSRMlXWdmExPPcfcfu/uZ7n6mpO9I+pu7H0w45dL48Wmpft5mA6iZnWpm3zazn5vZz+KvJ6R6Y3R9rhOnKNYNgTQ+dTHN6saX7n1ulV5csUt/37Bf33jyXdW667PnjJAklVZWa/7KXbrlwjGaOqqfeuVk6B/PGaFzxpwkSappyTTfbs49edsn+2+STP+8LE0d3U8vRrz79phkrWYyJWnmpOra8yS98F7T2fyFpwzQFy4dp/98YW2zgbYrausAKmm6pA3uvsndj0p6UtKsJs6/TtITIXy0pJoMoGb2bdVV0CQtkrQ4/vqJZKlzQrnZZrbEzJYUFBSEWV90EkXlVUkzzZ5Z6Umzo2OOxL/pL93y4RfC0soard5ZpDEDeh7f9x8vrNHmfSV66Jaz9dc5l+nG80fpN/HHLA6WVob1Mbqk4vIq5SXJjHpmpZ8w3taYK08fLJP00goCaHF5lfKyT5xP2TM7XcUVJ2amyVw1OV8mNTmefNqw3vrR9Wfpjwu36XdvbQlY284tzSylLTF2xLfZDW4xVFJi6l4Y33cCM+shaYakPyXsdkkvmdnSJNduteZm4d4m6TR3r/dTZGY/kbRK0n3JCrl7gaRjkZN0oRvasr/0hPG2Qb2y1SMrXVv2Nf7Nesu+UtXWuhp+1zSTahO+7h8uq9KX5i7RwF5Z6pmVrq0HynTduSO1v7hSuw5XKMq27CvV6IZt3zve9i3MamZMztfyrYe050i021KSNu8r1eiEL29SQns28bOcaMbkfL3bRHuO7J+rX35+mhZuPKD75q1Kuc7dVYPYkUyyNLWxGPMPkt5q0H17gbvvNLOBkl42szXu/nrA6jbbhVsraUiS/fnxY4iotzfs17lj+6tH5odLd115+mBVHK3Rsq0HGy33xrp9SkszTUuYLZqbla4J+b20fveJj1LsLarUpn2liqWZPn7WUM17t/CEc6LmrXX7dN74+m1/1aR8lR+t0dLNjbf9Mfl9cjR5RF+6b+PeXLtP559cvz2vnlzXnks2Nd+eQ/rm6IyRffXC8uTdt/3zsvTArWdr+4EyzXni3RYtNNJVtUMXbqGk4Qnvh0lqrN/8WjXovnX3nfF/90p6RnVdwoE1l4HeJelVM1uvD9PmEZLGSbozlRuja/vT4u269pyR+vG1Z2rum5s1tG8Pzb5krH739y31Hm155p8v1LKtB3Xvc3XfulfvLNKC1Xv0vVmn6xcvr9PhsqO66SOjVV3remrRtuPlZk7OV3osTTsOlWlw7xxdf95I1brrkTc2t/tn7Wz+sGibrj1/pP7jc1P029c3aWi/HrrjsnF67K36z4A+d/dFWrb5oO55ZmW98jMm56uqplavrNzd3lXvlP6wcKuuP3+kfnLjVD3yt40a1q+HvnTFeP33m/Xb83++cbGWbj6oH/5pRb3yTbVnVnqa7r/lbOXlZOhfn1ul8YN7HT9WVVOrNd1sUZB2WEhhsaTxZjZa0g7VBcnrT6iHWW9JF0u6IWFfrqQ0dy+Ov75K0r+kUpkmA6i7v2hmJ6suSg9VXfpcKGmxuyd/Uh6RUFxRrS/NXaxvzZygn1w/RSUV1Xr8na0qeK3+KkSxtLqxjUTfe3qFvnrVyfrajFOUnRHTe9sO647fLq43dmpmuvkjozW4d7ZKKqv1tzV7df8r61V+lB+74opq3fGbxfr2P0zUf944VcUVVfrd21v04Kv1VyFKb+TRgKsn52vRxgM6VHa0varcqRWXV2v2rxfpO7Mm6uc3T1NxeZUee3OzHmiwqlMs1kh7njFEizYc0KHSE9vzpLwsnTqkLmj+8paz6x3bcahMM/9tQXgfpBNo66X83L3azO6UNF9STNLD7r7KzO6IH38wfuonJb3k7ol98IMkPRNf7CFd0uPu/mIq9bFks/lC1o07LDqHaT+Y39FViISaakYt2lptDb8u2tp7981ssyh3xpznU/oP2JZ1awss5QcACIVFbGUBAigAIBRRWwuXAAoACAV/zgwAgACi9ufMItZjDQBAOMhAAQChiFoGSgAFAISi4TPf3R0BFAAQCjJQAAACiFoAZRIRAAABkIECAELBc6AAAATASkQAAAQQtbVwI/ZxAQAIBxkoACAUjIECABBA1B5jIYACAELBJCIAAAKIWhcuk4gAAAiADBQAEArGQAEACCBGAAUAoPUIoAAABBC1AMokIgAAAiADBQCEImoZKAEUABAKAigAAAGkRyyAMgYKAEAAZKAAgFDQhQsAQAAEUAAAAoilRWtUkAAKAAhF1DLQaH1dAAB0aWY2w8zWmtkGM5uT5PglZnbEzJbHt++3tGxrkYECAELR1hmomcUk3S/pSkmFkhab2Tx3/6DBqW+4+8cClm0xAigAIBTt0IU7XdIGd98kSWb2pKRZkloSBFMpmxQBtBtYcs/VHV2FSJj6vRc7ugrdXs8+2R1dBaQgZqkFUDObLWl2wq4Cdy9IeD9U0vaE94WSzklyqfPM7D1JOyV9w91XtaJsixFAAQChSDUDjQfLgiZOSXYDb/B+maSR7l5iZjMlPStpfAvLtgqTiAAAXUWhpOEJ74epLss8zt2L3L0k/vp5SRlm1r8lZVuLDBQAEIp2GANdLGm8mY2WtEPStZKuTzzBzAZL2uPubmbTVZcoHpB0uLmyrUUABQCEoq0Xk3f3ajO7U9J8STFJD7v7KjO7I378QUmflvQlM6uWVC7pWnd3SUnLplIfAigAIBTtsZBCvFv2+Qb7Hkx4/UtJv2xp2VQwBgoAQABkoACAUERtKT8CKAAgFARQAAACIIACABBA1AIok4gAAAiADBQAEIqoZaAEUABAKAigAAAEQAAFACCAqAVQJhEBABAAGSgAIBRRy0AJoACAUMSMAAoAQKulRSyAMgYKAEAAZKAAgFDEopWAEkABAOFIYxIRAACtxyQiAAACYBIRAABoFhkoACAUTCICACAAJhEBABBA1MZACaAAgFBErQuXSUQAAARABgoACAVduAAABMCfMwMAIAAyUACd3ugBufrWRydq8vA+Kq6o0rNLC1Xw2gbVeuNlZl86TrdfNi7psV++vE6PvL6pjWrb+Y06qYfuuvxknTakl0oqq/Xn93fpkbc3N9mex1w0vr9uOGekxvTPVUV1rdbsLtL/eW6lKqpqJUm3XjBKF40foMG9smUmbTtYpicWbddf1+5t40+FtkYABbqYvOx0PfD5s7VpX4nufnyZhvXroa/NOEVmpgdeXd9ouWeXbtfb6/fV23fphEH6/EVj9Na6fY2U6v56ZqXrp589U1sOlOo7z6zQ0D45+sol42Qm/frNzU2W/dikfN11xXg9sWi7fvW3jcrLTtfUEX3rrQmbm5muF1bu1pYDpap11yUnD9Q9Hz9Ntc+5FnSzdo/aLFwCKNDFfHr6CGVlxPTNJ95VaWWNFm48oNysdN1+6Tg9+uYmlVbWJC23t6hSe4sq6+374iVjtXlfidbtLm6PqndKnzhziLLS0/TdZ1eq7GiNlmw9pNysdN1y/ig9vmibyo4mb8/eORn6p8vG6Wevrtf/vL/r+P431u+vd94vXttQ7/3iLYc0un+urj5tcLcLoFHrwuUxFqCLOX98f/19w/56gfKlFbuUnRnTlFH9WnydXjkZOmdsf81P+OUfReeMPkmLNh+sFyhfWb1H2RkxnTm8T6PlLj1lgCTphZW7W33PI+VVyuiG6VoszVLauhoCKNDFjOqfqy37Suvt232kQuVHqzWqf26Lr3P5aYOUkZ6m+SuiHUBH9uuhrQfL6u3bW1yp8qM1GtmvR6PlJub30raDZfrY5Hz96Y7z9NrdF+uhz03V6UN6JT0/ZqaeWem6csIgnT2qr55bvjPUz9EZpFlqW0uY2QwzW2tmG8xsTpLjnzOz9+Pb22Z2RsKxLWa2wsyWm9mSVD8vXbhAF9MrJ0PFFVUn7C8qr1avnIwWX+fqSflaveOIth0oa/7kbiwvO10lldUn7C+urFJeduPteVJulkb066Gbzh2lB/62UUfKq3T99BH690+foet+/Y4OlX3432hifi89dMNUSVJ1Ta1++up6vbFhf2OXRiPMLCbpfklXSiqUtNjM5rn7BwmnbZZ0sbsfMrNrJBVIOifh+KXuHkrjE0CBrijJ7FCzpLuT6t8zS1NG9dMvXlobarW6Kk/WnjJ5sgPHjpvUIzNd33tulRZtOShJWrnziP54+3n61FnD9Ju3PpyAtGl/ib7w6BL1zE7X+WNO0tcuH6/Symq9uqZ7zcRthz+oPV3SBnffJElm9qSkWZKOB1B3fzvh/HckDWurygTuwjWzW5o4NtvMlpjZkoKCgqC3AJBEUXmVeuac+N23Z1a6istPzEyTufL0wTJJL61o/fhdd1NcUa2e2Se2Z25WLGlmekxRvBdg+fbDx/eVHa3R2j3FGtW/ftdvRVWt1u4p1tKth/SL1zZo/gd79KWLx4bzATqRNLOUtsTYEd9mN7jFUEnbE94Xxvc15jZJLyS8d0kvmdnSJNdutVQy0HskPZLsgLsXqC5tllr+pRhAC2zZX6pR/XvW2zeoV7Z6ZKVry/7SRkrVd9WkfC3fdkh7iiraoopdytaDZSeMdQ7My1KPzPQTxkbrlTtQplp3qUHSVZe5Nn3PdXuK9dFJ+YqlmWpa8rBpFxFLcVZNg9iRTLIUN2kDmtmlqgugH0nYfYG77zSzgZJeNrM17v560Po2+XETBmIbbiskDQp6UwDBvb1+v84bd5J6ZMaO77tq0mBVHK3RsnhXYlPy++Ro8og+kZ99e8zCzQc0fVQ/5WR82J6XnTpQFVU19bLLht7eeEBpZpqSMFM3NzOmkwf11Ia9JU3ec9LQ3tpTVNGtgqeUegbaAoWShie8HybphNlYZjZZ0q8lzXL3A8f2u/vO+L97JT2jui7hwJrLQAdJulrSoYb1k/T2iacDaGt/XLRN1547Qj++7izNfWOzhvbL0exLx+mxt7fUe7Tl2bsu1NIth3Tvsyvrlb960mBV19TqlVV030rSs8t36tNThun/fuJ0/W7RNg3pna1bzh+l3y/ZXu/Rlie+cI6Wbz+sf5tfN268dk+x3li/T3NmnKoHX990fBJRTa3r6Xd3SJIG9crSd2ZM0Cur92jnkXLlZMR00fgBumLCIP07489BLJY03sxGS9oh6VpJ1yeeYGYjJD0t6UZ3X5ewP1dSmrsXx19fJelfUqlMcwH0z5J6uvvyhgfMbEEqNwYQTHFFte54ZLG+/bGJ+ukNU1RSUaXH/75FD/21/gP7sbS0pJM6rpqUr0WbDuhwWcvGS7u7kspq3fXUct11+cn6t09OUklltZ5aUqhH3q6/ClGyZxXv/ctqffmSsbrz0nHKTk/Tip1H9NXfLz8+dlpSUa0DpZW66byR6pebqZLKam3ZX6Zv/vE9vbO5+d6CrqatJxG5e7WZ3SlpvqSYpIfdfZWZ3RE//qCk70s6SdKvrK4+1e4+TXUJ4TPxfemSHnf3F1OpjzU1yywk3auPApE19Xsp/b+GFujRK6ujq9DtvfHNS9ssyi3dfjil3/dTh/fpUqsp8BgLACAUqU4i6moi9nEBAAgHGSgAIBRRW0yeAAoACEXE4icBFAAQjrSk6xx0XwRQAEAoopaBMokIAIAAyEABAKHogn8TOyUEUABAKKLWhUsABQCEImqTiBgDBQAgADJQAEAo6MIFACAAJhEBABBAxOInARQAEI6orYXLJCIAAAIgAwUAhCJiCSgBFAAQjqh1aRJAAQChsIiloARQAEAoovYYS9QybgAAQkEGCgAIRcR6cAmgAIBwRK1LkwAKAAhF1CYRRe0LAwAAoSADBQCEImqzcAmgAIBQRCx+EkABAOEgAwUAIAAmEQEAgGaRgQIAQhG1LlwyUABAKCzFrUX3MJthZmvNbIOZzUly3Mzs5/Hj75vZlJaWbS0CKAAgFGlmKW3NMbOYpPslXSNpoqTrzGxig9OukTQ+vs2W9EAryrbu86ZSGACAY8xS21pguqQN7r7J3Y9KelLSrAbnzJL0qNd5R1IfM8tvYdlWIYACALqKoZK2J7wvjO9ryTktKdsqTCICWmjpvTM6ugrd3tW/equjq4AUmHtq5c1mq67b9ZgCdy9IPCVJsYY3beyclpRtFQIoACAcXpta8bpgWdDEKYWShie8HyZpZwvPyWxB2VahCxcAEArz2pS2FlgsabyZjTazTEnXSprX4Jx5km6Kz8Y9V9IRd9/VwrKtQgYKAOgS3L3azO6UNF9STNLD7r7KzO6IH39Q0vOSZkraIKlM0i1NlU2lPgRQAEA4UuzCbdEt3J9XXZBM3PdgwmuX9JWWlk0FARQAEI4UJxF1NQRQAEA42iED7UwIoACAULRwIlC3wSxcAAACIAMFAIQjYhkoARQAEA4CKAAAARBAAQAIoDZaAZRJRAAABEAGCgAIRdQeYyGAAgDCQQAFACCAiC3lxxgoAAABkIECAMJBFy4AAK3HJCIAAIIggAIAEEDEAiiTiAAACIAMFAAQjohloARQAEAomEQEAEAQEVtMngAKAAgHKxEBAIDmkIECAMLBGCgAAK3HJCIAAIKIWABlDBQAgADIQAEA4YhYBkoABQCEo7amo2vQrgigACJvRN8cffnCMZowKE+lR2v04gd79NiSbapt4rHGQXlZevTGaSfsX7B+n/715XX19l03dZhmThysPjkZ2nqoTI+8s1VLtx8O+VN0PGchBQCIjp5ZMd338dO17WCZ7nlhtfJ752j2+aNkJs1dtK3Z8gVvbdaq3UXH3xeVV9c7/o9Thur6acP134u2aeP+Ul128gDdM3OC7n5mhdbtLQn983QoMlAAiI6PnjZYmbE0/cuLa1RWVSMVHlGPjJhuOHu4/vDujrp9TSg8XK41e5IHwvQ00z9OGaY/LNuhp97dIUlauv2wRvbtoRumDdf3n18d+udB+2EWLoBIO3tEXy3dfqheoFywYZ+yM2KaNKRXStfO752t3Mx0LSs8XG//ssLDOmt4H6WnWUrX73Rqa1LbUmBm/czsZTNbH/+3b5JzhpvZa2a22sxWmdlXE4790Mx2mNny+DazuXuSgQKItOF9crR8x5F6+/aVHFVFVY2G9+2hhVsPNVn+7svGKy8rXYfLq7Rg/T79duE2Ha2pGwvMjNXlKNUNxgaramqVGUtTfq9sbT9cHuKn6Vhe06FduHMkveru95nZnPj7bzc4p1rS1919mZnlSVpqZi+7+wfx4z91939v6Q2bDaBmdqqkoZIWuntJwv4Z7v5iS28EAJ1Rz6x0lVZWn7C/uLJaeVmxRstV1dRq3opdWrr9sMqOVmvy0N767FlDNaR3tn74whpJ0q6iCtW66+SBefW6eU8ZmCdJysvuZjlMx04imiXpkvjruZIWqEEAdfddknbFXxeb2WrVxbcPFECTXbhm9s+SnpP0T5JWmtmshMP/r4lys81siZktKSgoCFIvAGg3ySbbWiP7jzlYVqX739ikd7Yc1Ps7i/TY4u0qeGuLzht9ksaclCtJKjtaowXr9+u6qcN0xpDeystK18cn5eusYb0lSTVNTfPtilLswk2MHfFtdivuPigeII8FyoFNnWxmoySdJWlhwu47zex9M3s4WRdwQ819/fmipKnuXhK/2R/NbJS7/0x1P19JuXuBpGORs5v9hADoTkoqq5WbeeKvwtysdJVUtq5L8o2N+/VPF4/VuAG52nSgVJL04Jub9L+vOkU/+sTpkqS9xZV6fGmhbpo+QofLq1L/AN1Ig9hxAjN7RdLgJIe+25r7mFlPSX+SdJe7H5tC/YCke1UXs+6V9B+Sbm3qOs0F0Nixblt332Jml6guiI5UEwEUALqK7YfLNbxvTr19A3pmKicjpu2Hylp1rWTZwpGKan173ir1z81UbmZM2w+X61NnDNGB0qPaU1yZQs07H2/jx1jc/YrGjpnZHjPLd/ddZpYvaW8j52WoLnj+zt2fTrj2noRz/kvSn5urT3OzcHeb2ZkJNyiR9DFJ/SVNau7iANDZLd52SNOG91FOxofjnReP66+Kqhqt2FnURMkTXTi2vyRp/b4TH2vZX3pUWw+VK5ZmuurUQXppzZ4TzunyamtT21IzT9LN8dc3q274sR4zM0m/kbTa3X/S4Fh+wttPSlrZ3A2by0BvUt2spePcvVrSTWb2UHMXB4DO7i+rdmvWpCH6/oxT9dS7hRrcK1s3nD1CT7+3s96jLY98bore31mkn762QZJ0w9nD1SMjplW7i1R2tEaT8nvr02cN0Zsb92vzgQ8z18tPHqBYmml3UYUG5mXpk5OHqNZdTy4tbPfP2tbaOgNtxn2SnjKz2yRtk/QZSTKzIZJ+7e4zJV0g6UZJK8xsebzc/3b35yX9KJ4wuqQtkm5v7oZNBlB3b/S/sLu/1dzFAaCzK6ms0Zx5K/WVC8fonpkTVFJZo6ff26nHFtdfhSjNTImPbW4/VK5PnzlUMyYMUmZ6mvaVVOqP7+7UE0u31ytnJn32rGEalJel0qPVenvzQT3yzlZVVEdr2bu25u4HJF2eZP9OSTPjr99UI8OP7n5ja+9p7m0+x4dJRABa5Opf8b28rc3/8gVtNn/l6FtPpfT7PvOCz3apuTXd7CEkAECHYTF5AABar4NXImp3BFAAQDgi9tdYWEweAIAAyEABAOGIWAZKAAUAhMKZRAQAQAARy0AZAwUAIAAyUABAOCKWgRJAAQChYAwUAIAgyEABAAggYgGUSUQAAARABgoACAVr4QIAEASTiAAACCBiY6AEUABAKDxiAZRJRAAABEAGCgAIBQspAAAQgNcQQAEAaLWoBVDGQAEACIAMFAAQCsZAAQAIIGpduARQAEAoCKAAAARQG7G1cJlEBABAAGSgAIBQMIkIAIAAGAMFACAAAigAAAFErQuXSUQAgC7PzPqZ2ctmtj7+b99GzttiZivMbLmZLWlt+UQEUABAKGpralPaUjRH0qvuPl7Sq/H3jbnU3c9092kBy0sigAIAQuI1tSltKZolaW789VxJn2jr8oyBAug05n/5go6uAlKQahA0s9mSZifsKnD3ghYWH+TuuyTJ3XeZ2cBGznNJL5mZS3oo4fotLX8cARQA0CnEg1mjAdPMXpE0OMmh77biNhe4+854gHzZzNa4++utrKokAigAICRtPQvX3a9o7JiZ7TGz/Hj2mC9pbyPX2Bn/d6+ZPSNpuqTXJbWofCLGQAEAoejgMdB5km6Ov75Z0nMNTzCzXDPLO/Za0lWSVra0fENkoACAUHTwQgr3SXrKzG6TtE3SZyTJzIZI+rW7z5Q0SNIzZibVxb/H3f3Fpso3hQAKAAhFbQcupODuByRdnmT/Tkkz4683STqjNeWbQhcuAAABkIECAELBWrgAAATgEfuD2gRQAEAooraYPAEUABCKqHXhMokIAIAAyEABAKGIWgZKAAUAhCKEP0nWpRBAAQChiNokIsZAAQAIgAwUABAKxkABAAjAa7yjq9CuCKAAgFAwiQgAgAC8NloZKJOIAAAIgAwUABCKWsZAAQBoPWbhAgAQALNwAQAIIGpduEwiAgAgADJQAEAoGAMFACCA2og9B0oABQCEImqTiBgDBQAgADJQAEAoWAsXAIAAotaFSwAFAISCAAoAQABR68JlEhEAAAGQgQIAQhG1vwdKAAUAhCJqa+ESQAEAoYjaUn6MgQIAQuE1ntKWCjPrZ2Yvm9n6+L99k5xzipktT9iKzOyu+LEfmtmOhGMzm7snARQA0B3MkfSqu4+X9Gr8fT3uvtbdz3T3MyVNlVQm6ZmEU3567Li7P9/cDZvtwjWz6XX39cVmNlHSDElrWnJxAEB0dPAY6CxJl8Rfz5W0QNK3mzj/ckkb3X1r0Bs2mYGa2Q8k/VzSA2b2r5J+KamnpDlm9t0mys02syVmtqSgoCBo3QAAXYjX1qa0JcaO+Da7Fbcf5O67JCn+78Bmzr9W0hMN9t1pZu+b2cPJuoAbMvfGvzGY2QpJZ0rKkrRb0jB3LzKzHEkL3X1yczeQFK1pWQDQuVlbXXj+yVNS+n1/9bplTdbNzF6RNDjJoe9KmuvufRLOPeTuSYOgmWVK2inpNHffE983SNJ+1cWseyXlu/utTdWnuS7canevkVRmZhvdvUiS3L3czKI13QoA0KHc/YrGjpnZHjPLd/ddZpYvaW8Tl7pG0rJjwTN+7eOvzey/JP25ufo0N4noqJn1iL+emnDx3pIIoACA4zpyFq6keZJujr++WdJzTZx7nRp038aD7jGflLSyuRs2l4Fe5O6VkuTuiQEzI6GiAAB09HOg90l6ysxuk7RN0mckycyGSPq1u8+Mv+8h6UpJtzco/yMzO1N1Xbhbkhw/QZNjoCFhDBQAOo82GwP9y/DJKf2+/+j299usbm2BlYgAAKGI2p8zYyEFAAACIAMFAISitu2HBDsVAigAIBQ1BFAAAFovYkOgBFAAQDiiloEyiQgAgADIQAEAoaALFwCAAKLWhUsABQCEImoZKGOgAAAEQAYKAAgFXbgAAAQQtS5cAigAIBQEUAAAAohaFy6TiAAACIAMFAAQCrpwAQAIIGpduARQAEAoopaBMgYKAEAAZKAAgFDQhQsAQABR68IlgAIAQkEGCgBAALUdXYF2xiQiAAACIAMFAISCLlwAAAJgEhEAAAGQgQIAEEDUMlAmEQEAEAAZKAAgFFHrwiUDBQCEosZT21JhZp8xs1VmVmtm05o4b4aZrTWzDWY2J2F/PzN72czWx//t29w9CaAAgFDUuKe0pWilpE9Jer2xE8wsJul+SddImijpOjObGD88R9Kr7j5e0qvx900igAIAujx3X+3ua5s5bbqkDe6+yd2PSnpS0qz4sVmS5sZfz5X0iebuyRgoACAUXWAW7lBJ2xPeF0o6J/56kLvvkiR332VmA5u7WHsEUGuHe4TKzGa7e0FH16M7o43bHm3cPmjnDz3oW1L6fW9msyXNTthVkNi2ZvaKpMFJin7X3Z9ryS2S7Asc9slAk5stif8h2hZt3PZo4/ZBO4ckHiwbbUt3vyLFWxRKGp7wfpiknfHXe8wsP5595kva29zFGAMFAETFYknjzWy0mWVKulbSvPixeZJujr++WVKzGS0BFADQ5ZnZJ82sUNJ5kv5iZvPj+4eY2fOS5O7Vku6UNF/SaklPufuq+CXuk3Slma2XdGX8fdP39Ig9+NoSjGm0Pdq47dHG7YN2ji4CKAAAAdCFCwBAAATQBI0t8YTwmNnDZrbXzFZ2dF26KzMbbmavmdnq+NJmX+3oOnU3ZpZtZovM7L14G9/T0XVC+6MLNy6+xNM61Q0eF6puttZ17v5Bh1asmzGziySVSHrU3U/v6Pp0R/Ep+PnuvszM8iQtlfQJfpbDY2YmKdfdS8wsQ9Kbkr7q7u90cNXQjshAP9TUEk8Iibu/LulgR9ejO3P3Xe6+LP66WHWzDYd2bK26F69TEn+bEd/IRiKGAPqhZEs88UsHXZqZjZJ0lqSFHVyVbsfMYma2XHUP3L/s7rRxxBBAPxTqEk9ARzOznpL+JOkudy/q6Pp0N+5e4+5nqm41m+lmxpBExBBAP9TUEk9AlxIfl/uTpN+5+9MdXZ/uzN0PS1ogaUbH1gTtjQD6oaaWeAK6jPgEl99IWu3uP+no+nRHZjbAzPrEX+dIukLSmg6tFNodATSumSWeEBIze0LS3yWdYmaFZnZbR9epG7pA0o2SLjOz5fFtZkdXqpvJl/Samb2vui/fL7v7nzu4TmhnPMYCAEAAZKAAAARAAAUAIAACKAAAARBAAQAIgAAKAEAABFAAAAIggAIAEAABFACAAP4/bXVxjNw17FEAAAAASUVORK5CYII=\n", 431 | "text/plain": [ 432 | "
" 433 | ] 434 | }, 435 | "metadata": { 436 | "needs_background": "light" 437 | }, 438 | "output_type": "display_data" 439 | } 440 | ], 441 | "source": [ 442 | "plt.figure(figsize=(8,6))\n", 443 | "sns.heatmap(combined_means.corr(),cmap='RdBu',\n", 444 | " mask=mask,vmax=1,vmin=-1,annot=True,annot_kws={\"size\": 15})\n", 445 | "plt.savefig('remove_g_corr.pdf')\n", 446 | "plt.show()" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": null, 452 | "id": "801e7e7d-55b0-488f-a4c9-6c81d8b6ef73", 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [ 456 | "# no statistically enrichments found, so no plots " 457 | ] 458 | } 459 | ], 460 | "metadata": { 461 | "kernelspec": { 462 | "display_name": "plot", 463 | "language": "python", 464 | "name": "plot" 465 | }, 466 | "language_info": { 467 | "codemirror_mode": { 468 | "name": "ipython", 469 | "version": 3 470 | }, 471 | "file_extension": ".py", 472 | "mimetype": "text/x-python", 473 | "name": "python", 474 | "nbconvert_exporter": "python", 475 | "pygments_lexer": "ipython3", 476 | "version": "3.9.10" 477 | } 478 | }, 479 | "nbformat": 4, 480 | "nbformat_minor": 5 481 | } 482 | -------------------------------------------------------------------------------- /figures/supplementary_figures/.ipynb_checkpoints/g_enrichments-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 61, 6 | "id": "23720687-2ad7-4d94-8df1-39b834c5e456", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import numpy as np\n", 11 | "import pandas as pd \n", 12 | "import anndata \n", 13 | "import matplotlib.pyplot as plt\n", 14 | "import shap as shap \n", 15 | "import seaborn as sns\n", 16 | "import math as math" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 62, 22 | "id": "654a8332-3a7a-4e98-9e86-9a9f44072a58", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "# drop G\n", 27 | "aux_0 = pd.read_csv('kang_remove_g/aux_0.csv')\n", 28 | "aux_1 = pd.read_csv('kang_remove_g/aux_1.csv')\n", 29 | "aux_2 = pd.read_csv('kang_remove_g/aux_2.csv')\n", 30 | "aux_3 = pd.read_csv('kang_remove_g/aux_3.csv')" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 63, 36 | "id": "8a1707e6-79f5-434c-b6de-d41a31658f88", 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "data": { 41 | "text/html": [ 42 | "
\n", 43 | "\n", 56 | "\n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | "
indexmeansstds
233IL81.4722201.072127
250H2AFZ0.6600400.297694
707SQRDL0.5417980.295089
347PLA2G70.4399080.348958
320HIST1H2AC0.3853020.294543
............
342TREM20.0000120.000478
960RRM20.0000110.000294
797PTRF0.0000100.000238
12ALDH4A10.0000090.000227
969ABCC20.0000050.000174
\n", 134 | "

979 rows × 3 columns

\n", 135 | "
" 136 | ], 137 | "text/plain": [ 138 | " index means stds\n", 139 | "233 IL8 1.472220 1.072127\n", 140 | "250 H2AFZ 0.660040 0.297694\n", 141 | "707 SQRDL 0.541798 0.295089\n", 142 | "347 PLA2G7 0.439908 0.348958\n", 143 | "320 HIST1H2AC 0.385302 0.294543\n", 144 | ".. ... ... ...\n", 145 | "342 TREM2 0.000012 0.000478\n", 146 | "960 RRM2 0.000011 0.000294\n", 147 | "797 PTRF 0.000010 0.000238\n", 148 | "12 ALDH4A1 0.000009 0.000227\n", 149 | "969 ABCC2 0.000005 0.000174\n", 150 | "\n", 151 | "[979 rows x 3 columns]" 152 | ] 153 | }, 154 | "execution_count": 63, 155 | "metadata": {}, 156 | "output_type": "execute_result" 157 | } 158 | ], 159 | "source": [ 160 | "aux_0.sort_values('means',ascending=False)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 64, 166 | "id": "b85cd0f4-6b0c-4a1b-82e7-24e0b207a830", 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "combined_means = pd.DataFrame(index = aux_0.index)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 65, 176 | "id": "c9a0bd21-db5e-4109-9fdd-c6c585f99f52", 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "combined_means[0] = aux_0['means']\n", 181 | "combined_means[1] = aux_1['means']\n", 182 | "combined_means[2] = aux_2['means']\n", 183 | "combined_means[3] = aux_3['means']" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 66, 189 | "id": "a69cab5f-5575-4543-8c05-e3a7dabfe983", 190 | "metadata": {}, 191 | "outputs": [ 192 | { 193 | "data": { 194 | "text/html": [ 195 | "
\n", 196 | "\n", 209 | "\n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | "
0123
00.0037620.0076300.0052640.014630
10.0010840.0022510.0023870.000991
20.0002170.0011490.0007990.000178
30.0004710.0005260.0014020.000415
40.0024610.0012580.0009570.000706
...............
9740.0001410.0001670.0000260.000326
9750.0000300.0000440.0000690.000031
9760.0002870.0000680.0000320.000397
9770.0001350.0001820.0000360.000130
9780.0001830.0002110.0001040.000023
\n", 299 | "

979 rows × 4 columns

\n", 300 | "
" 301 | ], 302 | "text/plain": [ 303 | " 0 1 2 3\n", 304 | "0 0.003762 0.007630 0.005264 0.014630\n", 305 | "1 0.001084 0.002251 0.002387 0.000991\n", 306 | "2 0.000217 0.001149 0.000799 0.000178\n", 307 | "3 0.000471 0.000526 0.001402 0.000415\n", 308 | "4 0.002461 0.001258 0.000957 0.000706\n", 309 | ".. ... ... ... ...\n", 310 | "974 0.000141 0.000167 0.000026 0.000326\n", 311 | "975 0.000030 0.000044 0.000069 0.000031\n", 312 | "976 0.000287 0.000068 0.000032 0.000397\n", 313 | "977 0.000135 0.000182 0.000036 0.000130\n", 314 | "978 0.000183 0.000211 0.000104 0.000023\n", 315 | "\n", 316 | "[979 rows x 4 columns]" 317 | ] 318 | }, 319 | "execution_count": 66, 320 | "metadata": {}, 321 | "output_type": "execute_result" 322 | } 323 | ], 324 | "source": [ 325 | "combined_means" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 67, 331 | "id": "f2040385-8056-4c9b-bdee-ee1ab665c26f", 332 | "metadata": {}, 333 | "outputs": [ 334 | { 335 | "data": { 336 | "text/html": [ 337 | "
\n", 338 | "\n", 351 | "\n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | "
0123
01.0000000.6907570.7031570.723252
10.6907571.0000000.6980950.634515
20.7031570.6980951.0000000.589254
30.7232520.6345150.5892541.000000
\n", 392 | "
" 393 | ], 394 | "text/plain": [ 395 | " 0 1 2 3\n", 396 | "0 1.000000 0.690757 0.703157 0.723252\n", 397 | "1 0.690757 1.000000 0.698095 0.634515\n", 398 | "2 0.703157 0.698095 1.000000 0.589254\n", 399 | "3 0.723252 0.634515 0.589254 1.000000" 400 | ] 401 | }, 402 | "execution_count": 67, 403 | "metadata": {}, 404 | "output_type": "execute_result" 405 | } 406 | ], 407 | "source": [ 408 | "combined_means.corr()" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 68, 414 | "id": "7aa9b78a-9769-49f2-ab16-e30ef48a91a1", 415 | "metadata": {}, 416 | "outputs": [], 417 | "source": [ 418 | "corr_mat = combined_means.corr().abs()\n", 419 | "mask = np.tril(np.ones_like(corr_mat, dtype=bool)) " 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": 69, 425 | "id": "8c5f0225-033b-4559-bb16-6500ed35ed8f", 426 | "metadata": {}, 427 | "outputs": [ 428 | { 429 | "data": { 430 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdAAAAFpCAYAAAAsmHm9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAp80lEQVR4nO3dd5gc1Znv8d87PVGjUUJplCMggQRIQiSTk5C9lu1rewETDNgC2+wa4yRfXweW595l7V17HTAwa4PFYsDYJmhtQAQjk4wiAkko51HOk0cT3vvHtETPqCdV18T6fp6nHnVX1ak6fRjm7fecU2fM3QUAAFonraMrAABAV0QABQAgAAIoAAABEEABAAiAAAoAQAAEUAAAAiCAAgC6BDN72Mz2mtnKRo6bmf3czDaY2ftmNiXh2AwzWxs/NieM+hBAAQBdxW8lzWji+DWSxse32ZIekCQzi0m6P358oqTrzGxiqpUhgAIAugR3f13SwSZOmSXpUa/zjqQ+ZpYvabqkDe6+yd2PSnoyfm5KCKAAgO5iqKTtCe8L4/sa25+S9FQv0AKsFdjGpv1gfkdXIRJqqms7ugrdXm0Nvy7a2nv3zbS2unbmWbem9B+wavkjt6uu6/WYAncvaMUlkn02b2J/StojgAIAIsDSYimVjwfL1gTMhgolDU94P0zSTkmZjexPCV24AIBQWFospS0E8yTdFJ+Ne66kI+6+S9JiSePNbLSZZUq6Nn5uSshAAQChCCkINn59syckXSKpv5kVSvqBpAxJcvcHJT0vaaakDZLKJN0SP1ZtZndKmi8pJulhd1+Van0IoACALsHdr2vmuEv6SiPHnlddgA0NARQAEIq2zkA7GwIoACAUFiOAAgDQamkRy0CZhQsAQABkoACAUDAGCgBAAARQAAACsLRojQoSQAEAoYhaBhqtrwsAAISEDBQAEIqoZaAEUABAKAigAAAEwEpEAAAEELUMlElEAAAEQAYKAAhF1DJQAigAIBRRW0yeAAoACEXUMlDGQAEACIAMFAAQiqhloARQAEAoCKAAAARAAAUAIICoBVAmEQEAEAAZKAAgFKyFCwBAAFHrwiWAAgBCQQAFACCAqAVQJhEBABAAGSgCGz0gV9+cOUGTh/VRcUWVnl22Q/+1YINqvfmyl04YqM9fOEZjB/ZURVWNPthRpG/9frkqqmqOn3PrRWP0qanD1Dc3U5v3leqXr6zTOxsPtOEn6jrGDOipb/3DBE0e3lclFVV6ZkmhHvrr+ibb/vbLxumOy8cnPfaL+Wv18Oub2qi2nd+YgT015+MTNXlEXxVXVOmZxdv14CtNt+cdV4zXl65I3p4/e3GtHl6wUWkm3XzRGF106kCNGdhTkrR6xxH94qV1WlV4pC0+SodKS7OOrkK7IoAikLzsdP3qpmnatK9UX3/iXQ3rl6O7rj5FaSY98NcNTZadNWWovjVzgh59a4t+/tJa5WVn6Owx/RRL+J/v8xeO1hcuHquHXtugtbuKNPOMIfrp9VN0228W6oOdRW398Tq1vOx0PXjr2dq0t0Rfe2yphp/UQ3dfc6rMpF+9sr7Rcs8sKdTb6/fX23fphIG65eKxenP9vraudqeVl5Ouh74wXZv2lOiuR+va8+sfPVVmpvtfWtdouacXbddba+u322WnDdKtl4zVW2v3SpKyMmK69ZKxem5JoX6zYKPcpWvPH6nf3nGubnrg71q9o3v9LBsBFGje/zp7uLIyYvrW799VaWWNFm6ScrPSNfuScXr0rc0qraxJWq53jwzdPeNU/fiFNXp2aeHx/QvW7D3+Oj1m+vxHxmjum5s1983NkqR3Nh7Q6AG5+uIlY/W1x99t2w/XyX1m+ghlZcT09cffVWlltRZuPKDcrHTdftl4zX1js0orq5OW21tUob1FFfX2ffHSsdq0t0TrdhW3R9U7pc+cM1LZGTHd/dgylVZW650NdT/Ld1wxXr/926ZWtefsy8dp094SrY23Z2VVjWb+6DUVl394jYUb92veNy7WdeeN0vf/+H7bfbAOYBatAMoYKAI5f1x/vbNhf71AOX/lbmVnxjRlZL9Gy1152mBJ0p+X72j0nGF9e6hndroWbarfXbtw4wGdM7a/0mPR+p+0oQtOHqC/r99X7xf7/Pd3KSczpqmjG2/7hnrlZOjcsf01//1dbVHNLuMjpwzQ2+vqt+eL7+1UTmZM08a0rj3PG9dfL7638/i+Wle94ClJ1TWujXtK1LdnZuqVR4cigCKQUf1ztWV/ab19e45UqPxotUYNyG203OnDemvr/lLNmjJMf7n7Yr3z/Sv12y+eo8nD+xw/Jyu97seyqqa2XtmjNbXKTE/T0L49wvsgXdCoAbnavK9+2+8+1vb9G2/7hq44fbAy0tP04vs7mz+5GxvdVHs28bPc0JWT4u35XtNfSDJiaZo4tLc27SkJVN/OLC3NUtpawsxmmNlaM9tgZnOSHP+mmS2PbyvNrMbM+sWPbTGzFfFjS1L+vKleANHUKydDxRUndm0VlVcrLzuj0XIn9czSyP65uu2iMfrFy+v0tcffVfnRGv38hqnql1v3jbzwULlqa12nDe1dr+yx971zGr9+FOQ10fa9WtE2V0/K1wc7jmjbgbIwq9fl5OVkqLi86oT9ReVVrWrPGWfk64PCI9ra4ItlQ1+8bKx65aTr6cXbW13Xzs7SLKWt2eubxSTdL+kaSRMlXWdmExPPcfcfu/uZ7n6mpO9I+pu7H0w45dL48Wmpft5mA6iZnWpm3zazn5vZz+KvJ6R6Y3R9rhOnKNYNgTQ+dTHN6saX7n1ulV5csUt/37Bf33jyXdW667PnjJAklVZWa/7KXbrlwjGaOqqfeuVk6B/PGaFzxpwkSappyTTfbs49edsn+2+STP+8LE0d3U8vRrz79phkrWYyJWnmpOra8yS98F7T2fyFpwzQFy4dp/98YW2zgbYrausAKmm6pA3uvsndj0p6UtKsJs6/TtITIXy0pJoMoGb2bdVV0CQtkrQ4/vqJZKlzQrnZZrbEzJYUFBSEWV90EkXlVUkzzZ5Z6Umzo2OOxL/pL93y4RfC0soard5ZpDEDeh7f9x8vrNHmfSV66Jaz9dc5l+nG80fpN/HHLA6WVob1Mbqk4vIq5SXJjHpmpZ8w3taYK08fLJP00goCaHF5lfKyT5xP2TM7XcUVJ2amyVw1OV8mNTmefNqw3vrR9Wfpjwu36XdvbQlY284tzSylLTF2xLfZDW4xVFJi6l4Y33cCM+shaYakPyXsdkkvmdnSJNduteZm4d4m6TR3r/dTZGY/kbRK0n3JCrl7gaRjkZN0oRvasr/0hPG2Qb2y1SMrXVv2Nf7Nesu+UtXWuhp+1zSTahO+7h8uq9KX5i7RwF5Z6pmVrq0HynTduSO1v7hSuw5XKMq27CvV6IZt3zve9i3MamZMztfyrYe050i021KSNu8r1eiEL29SQns28bOcaMbkfL3bRHuO7J+rX35+mhZuPKD75q1Kuc7dVYPYkUyyNLWxGPMPkt5q0H17gbvvNLOBkl42szXu/nrA6jbbhVsraUiS/fnxY4iotzfs17lj+6tH5odLd115+mBVHK3Rsq0HGy33xrp9SkszTUuYLZqbla4J+b20fveJj1LsLarUpn2liqWZPn7WUM17t/CEc6LmrXX7dN74+m1/1aR8lR+t0dLNjbf9Mfl9cjR5RF+6b+PeXLtP559cvz2vnlzXnks2Nd+eQ/rm6IyRffXC8uTdt/3zsvTArWdr+4EyzXni3RYtNNJVtUMXbqGk4Qnvh0lqrN/8WjXovnX3nfF/90p6RnVdwoE1l4HeJelVM1uvD9PmEZLGSbozlRuja/vT4u269pyR+vG1Z2rum5s1tG8Pzb5krH739y31Hm155p8v1LKtB3Xvc3XfulfvLNKC1Xv0vVmn6xcvr9PhsqO66SOjVV3remrRtuPlZk7OV3osTTsOlWlw7xxdf95I1brrkTc2t/tn7Wz+sGibrj1/pP7jc1P029c3aWi/HrrjsnF67K36z4A+d/dFWrb5oO55ZmW98jMm56uqplavrNzd3lXvlP6wcKuuP3+kfnLjVD3yt40a1q+HvnTFeP33m/Xb83++cbGWbj6oH/5pRb3yTbVnVnqa7r/lbOXlZOhfn1ul8YN7HT9WVVOrNd1sUZB2WEhhsaTxZjZa0g7VBcnrT6iHWW9JF0u6IWFfrqQ0dy+Ov75K0r+kUpkmA6i7v2hmJ6suSg9VXfpcKGmxuyd/Uh6RUFxRrS/NXaxvzZygn1w/RSUV1Xr8na0qeK3+KkSxtLqxjUTfe3qFvnrVyfrajFOUnRHTe9sO647fLq43dmpmuvkjozW4d7ZKKqv1tzV7df8r61V+lB+74opq3fGbxfr2P0zUf944VcUVVfrd21v04Kv1VyFKb+TRgKsn52vRxgM6VHa0varcqRWXV2v2rxfpO7Mm6uc3T1NxeZUee3OzHmiwqlMs1kh7njFEizYc0KHSE9vzpLwsnTqkLmj+8paz6x3bcahMM/9tQXgfpBNo66X83L3azO6UNF9STNLD7r7KzO6IH38wfuonJb3k7ol98IMkPRNf7CFd0uPu/mIq9bFks/lC1o07LDqHaT+Y39FViISaakYt2lptDb8u2tp7981ssyh3xpznU/oP2JZ1awss5QcACIVFbGUBAigAIBRRWwuXAAoACAV/zgwAgACi9ufMItZjDQBAOMhAAQChiFoGSgAFAISi4TPf3R0BFAAQCjJQAAACiFoAZRIRAAABkIECAELBc6AAAATASkQAAAQQtbVwI/ZxAQAIBxkoACAUjIECABBA1B5jIYACAELBJCIAAAKIWhcuk4gAAAiADBQAEArGQAEACCBGAAUAoPUIoAAABBC1AMokIgAAAiADBQCEImoZKAEUABAKAigAAAGkRyyAMgYKAEAAZKAAgFDQhQsAQAAEUAAAAoilRWtUkAAKAAhF1DLQaH1dAAB0aWY2w8zWmtkGM5uT5PglZnbEzJbHt++3tGxrkYECAELR1hmomcUk3S/pSkmFkhab2Tx3/6DBqW+4+8cClm0xAigAIBTt0IU7XdIGd98kSWb2pKRZkloSBFMpmxQBtBtYcs/VHV2FSJj6vRc7ugrdXs8+2R1dBaQgZqkFUDObLWl2wq4Cdy9IeD9U0vaE94WSzklyqfPM7D1JOyV9w91XtaJsixFAAQChSDUDjQfLgiZOSXYDb/B+maSR7l5iZjMlPStpfAvLtgqTiAAAXUWhpOEJ74epLss8zt2L3L0k/vp5SRlm1r8lZVuLDBQAEIp2GANdLGm8mY2WtEPStZKuTzzBzAZL2uPubmbTVZcoHpB0uLmyrUUABQCEoq0Xk3f3ajO7U9J8STFJD7v7KjO7I378QUmflvQlM6uWVC7pWnd3SUnLplIfAigAIBTtsZBCvFv2+Qb7Hkx4/UtJv2xp2VQwBgoAQABkoACAUERtKT8CKAAgFARQAAACIIACABBA1AIok4gAAAiADBQAEIqoZaAEUABAKAigAAAEQAAFACCAqAVQJhEBABAAGSgAIBRRy0AJoACAUMSMAAoAQKulRSyAMgYKAEAAZKAAgFDEopWAEkABAOFIYxIRAACtxyQiAAACYBIRAABoFhkoACAUTCICACAAJhEBABBA1MZACaAAgFBErQuXSUQAAARABgoACAVduAAABMCfMwMAIAAyUACd3ugBufrWRydq8vA+Kq6o0rNLC1Xw2gbVeuNlZl86TrdfNi7psV++vE6PvL6pjWrb+Y06qYfuuvxknTakl0oqq/Xn93fpkbc3N9mex1w0vr9uOGekxvTPVUV1rdbsLtL/eW6lKqpqJUm3XjBKF40foMG9smUmbTtYpicWbddf1+5t40+FtkYABbqYvOx0PfD5s7VpX4nufnyZhvXroa/NOEVmpgdeXd9ouWeXbtfb6/fV23fphEH6/EVj9Na6fY2U6v56ZqXrp589U1sOlOo7z6zQ0D45+sol42Qm/frNzU2W/dikfN11xXg9sWi7fvW3jcrLTtfUEX3rrQmbm5muF1bu1pYDpap11yUnD9Q9Hz9Ntc+5FnSzdo/aLFwCKNDFfHr6CGVlxPTNJ95VaWWNFm48oNysdN1+6Tg9+uYmlVbWJC23t6hSe4sq6+374iVjtXlfidbtLm6PqndKnzhziLLS0/TdZ1eq7GiNlmw9pNysdN1y/ig9vmibyo4mb8/eORn6p8vG6Wevrtf/vL/r+P431u+vd94vXttQ7/3iLYc0un+urj5tcLcLoFHrwuUxFqCLOX98f/19w/56gfKlFbuUnRnTlFH9WnydXjkZOmdsf81P+OUfReeMPkmLNh+sFyhfWb1H2RkxnTm8T6PlLj1lgCTphZW7W33PI+VVyuiG6VoszVLauhoCKNDFjOqfqy37Suvt232kQuVHqzWqf26Lr3P5aYOUkZ6m+SuiHUBH9uuhrQfL6u3bW1yp8qM1GtmvR6PlJub30raDZfrY5Hz96Y7z9NrdF+uhz03V6UN6JT0/ZqaeWem6csIgnT2qr55bvjPUz9EZpFlqW0uY2QwzW2tmG8xsTpLjnzOz9+Pb22Z2RsKxLWa2wsyWm9mSVD8vXbhAF9MrJ0PFFVUn7C8qr1avnIwWX+fqSflaveOIth0oa/7kbiwvO10lldUn7C+urFJeduPteVJulkb066Gbzh2lB/62UUfKq3T99BH690+foet+/Y4OlX3432hifi89dMNUSVJ1Ta1++up6vbFhf2OXRiPMLCbpfklXSiqUtNjM5rn7BwmnbZZ0sbsfMrNrJBVIOifh+KXuHkrjE0CBrijJ7FCzpLuT6t8zS1NG9dMvXlobarW6Kk/WnjJ5sgPHjpvUIzNd33tulRZtOShJWrnziP54+3n61FnD9Ju3PpyAtGl/ib7w6BL1zE7X+WNO0tcuH6/Symq9uqZ7zcRthz+oPV3SBnffJElm9qSkWZKOB1B3fzvh/HckDWurygTuwjWzW5o4NtvMlpjZkoKCgqC3AJBEUXmVeuac+N23Z1a6istPzEyTufL0wTJJL61o/fhdd1NcUa2e2Se2Z25WLGlmekxRvBdg+fbDx/eVHa3R2j3FGtW/ftdvRVWt1u4p1tKth/SL1zZo/gd79KWLx4bzATqRNLOUtsTYEd9mN7jFUEnbE94Xxvc15jZJLyS8d0kvmdnSJNdutVQy0HskPZLsgLsXqC5tllr+pRhAC2zZX6pR/XvW2zeoV7Z6ZKVry/7SRkrVd9WkfC3fdkh7iiraoopdytaDZSeMdQ7My1KPzPQTxkbrlTtQplp3qUHSVZe5Nn3PdXuK9dFJ+YqlmWpa8rBpFxFLcVZNg9iRTLIUN2kDmtmlqgugH0nYfYG77zSzgZJeNrM17v560Po2+XETBmIbbiskDQp6UwDBvb1+v84bd5J6ZMaO77tq0mBVHK3RsnhXYlPy++Ro8og+kZ99e8zCzQc0fVQ/5WR82J6XnTpQFVU19bLLht7eeEBpZpqSMFM3NzOmkwf11Ia9JU3ec9LQ3tpTVNGtgqeUegbaAoWShie8HybphNlYZjZZ0q8lzXL3A8f2u/vO+L97JT2jui7hwJrLQAdJulrSoYb1k/T2iacDaGt/XLRN1547Qj++7izNfWOzhvbL0exLx+mxt7fUe7Tl2bsu1NIth3Tvsyvrlb960mBV19TqlVV030rSs8t36tNThun/fuJ0/W7RNg3pna1bzh+l3y/ZXu/Rlie+cI6Wbz+sf5tfN268dk+x3li/T3NmnKoHX990fBJRTa3r6Xd3SJIG9crSd2ZM0Cur92jnkXLlZMR00fgBumLCIP07489BLJY03sxGS9oh6VpJ1yeeYGYjJD0t6UZ3X5ewP1dSmrsXx19fJelfUqlMcwH0z5J6uvvyhgfMbEEqNwYQTHFFte54ZLG+/bGJ+ukNU1RSUaXH/75FD/21/gP7sbS0pJM6rpqUr0WbDuhwWcvGS7u7kspq3fXUct11+cn6t09OUklltZ5aUqhH3q6/ClGyZxXv/ctqffmSsbrz0nHKTk/Tip1H9NXfLz8+dlpSUa0DpZW66byR6pebqZLKam3ZX6Zv/vE9vbO5+d6CrqatJxG5e7WZ3SlpvqSYpIfdfZWZ3RE//qCk70s6SdKvrK4+1e4+TXUJ4TPxfemSHnf3F1OpjzU1yywk3auPApE19Xsp/b+GFujRK6ujq9DtvfHNS9ssyi3dfjil3/dTh/fpUqsp8BgLACAUqU4i6moi9nEBAAgHGSgAIBRRW0yeAAoACEXE4icBFAAQjrSk6xx0XwRQAEAoopaBMokIAIAAyEABAKHogn8TOyUEUABAKKLWhUsABQCEImqTiBgDBQAgADJQAEAo6MIFACAAJhEBABBAxOInARQAEI6orYXLJCIAAAIgAwUAhCJiCSgBFAAQjqh1aRJAAQChsIiloARQAEAoovYYS9QybgAAQkEGCgAIRcR6cAmgAIBwRK1LkwAKAAhF1CYRRe0LAwAAoSADBQCEImqzcAmgAIBQRCx+EkABAOEgAwUAIAAmEQEAgGaRgQIAQhG1LlwyUABAKCzFrUX3MJthZmvNbIOZzUly3Mzs5/Hj75vZlJaWbS0CKAAgFGlmKW3NMbOYpPslXSNpoqTrzGxig9OukTQ+vs2W9EAryrbu86ZSGACAY8xS21pguqQN7r7J3Y9KelLSrAbnzJL0qNd5R1IfM8tvYdlWIYACALqKoZK2J7wvjO9ryTktKdsqTCICWmjpvTM6ugrd3tW/equjq4AUmHtq5c1mq67b9ZgCdy9IPCVJsYY3beyclpRtFQIoACAcXpta8bpgWdDEKYWShie8HyZpZwvPyWxB2VahCxcAEArz2pS2FlgsabyZjTazTEnXSprX4Jx5km6Kz8Y9V9IRd9/VwrKtQgYKAOgS3L3azO6UNF9STNLD7r7KzO6IH39Q0vOSZkraIKlM0i1NlU2lPgRQAEA4UuzCbdEt3J9XXZBM3PdgwmuX9JWWlk0FARQAEI4UJxF1NQRQAEA42iED7UwIoACAULRwIlC3wSxcAAACIAMFAIQjYhkoARQAEA4CKAAAARBAAQAIoDZaAZRJRAAABEAGCgAIRdQeYyGAAgDCQQAFACCAiC3lxxgoAAABkIECAMJBFy4AAK3HJCIAAIIggAIAEEDEAiiTiAAACIAMFAAQjohloARQAEAomEQEAEAQEVtMngAKAAgHKxEBAIDmkIECAMLBGCgAAK3HJCIAAIKIWABlDBQAgADIQAEA4YhYBkoABQCEo7amo2vQrgigACJvRN8cffnCMZowKE+lR2v04gd79NiSbapt4rHGQXlZevTGaSfsX7B+n/715XX19l03dZhmThysPjkZ2nqoTI+8s1VLtx8O+VN0PGchBQCIjp5ZMd338dO17WCZ7nlhtfJ752j2+aNkJs1dtK3Z8gVvbdaq3UXH3xeVV9c7/o9Thur6acP134u2aeP+Ul128gDdM3OC7n5mhdbtLQn983QoMlAAiI6PnjZYmbE0/cuLa1RWVSMVHlGPjJhuOHu4/vDujrp9TSg8XK41e5IHwvQ00z9OGaY/LNuhp97dIUlauv2wRvbtoRumDdf3n18d+udB+2EWLoBIO3tEXy3dfqheoFywYZ+yM2KaNKRXStfO752t3Mx0LSs8XG//ssLDOmt4H6WnWUrX73Rqa1LbUmBm/czsZTNbH/+3b5JzhpvZa2a22sxWmdlXE4790Mx2mNny+DazuXuSgQKItOF9crR8x5F6+/aVHFVFVY2G9+2hhVsPNVn+7svGKy8rXYfLq7Rg/T79duE2Ha2pGwvMjNXlKNUNxgaramqVGUtTfq9sbT9cHuKn6Vhe06FduHMkveru95nZnPj7bzc4p1rS1919mZnlSVpqZi+7+wfx4z91939v6Q2bDaBmdqqkoZIWuntJwv4Z7v5iS28EAJ1Rz6x0lVZWn7C/uLJaeVmxRstV1dRq3opdWrr9sMqOVmvy0N767FlDNaR3tn74whpJ0q6iCtW66+SBefW6eU8ZmCdJysvuZjlMx04imiXpkvjruZIWqEEAdfddknbFXxeb2WrVxbcPFECTXbhm9s+SnpP0T5JWmtmshMP/r4lys81siZktKSgoCFIvAGg3ySbbWiP7jzlYVqX739ikd7Yc1Ps7i/TY4u0qeGuLzht9ksaclCtJKjtaowXr9+u6qcN0xpDeystK18cn5eusYb0lSTVNTfPtilLswk2MHfFtdivuPigeII8FyoFNnWxmoySdJWlhwu47zex9M3s4WRdwQ819/fmipKnuXhK/2R/NbJS7/0x1P19JuXuBpGORs5v9hADoTkoqq5WbeeKvwtysdJVUtq5L8o2N+/VPF4/VuAG52nSgVJL04Jub9L+vOkU/+sTpkqS9xZV6fGmhbpo+QofLq1L/AN1Ig9hxAjN7RdLgJIe+25r7mFlPSX+SdJe7H5tC/YCke1UXs+6V9B+Sbm3qOs0F0Nixblt332Jml6guiI5UEwEUALqK7YfLNbxvTr19A3pmKicjpu2Hylp1rWTZwpGKan173ir1z81UbmZM2w+X61NnDNGB0qPaU1yZQs07H2/jx1jc/YrGjpnZHjPLd/ddZpYvaW8j52WoLnj+zt2fTrj2noRz/kvSn5urT3OzcHeb2ZkJNyiR9DFJ/SVNau7iANDZLd52SNOG91FOxofjnReP66+Kqhqt2FnURMkTXTi2vyRp/b4TH2vZX3pUWw+VK5ZmuurUQXppzZ4TzunyamtT21IzT9LN8dc3q274sR4zM0m/kbTa3X/S4Fh+wttPSlrZ3A2by0BvUt2spePcvVrSTWb2UHMXB4DO7i+rdmvWpCH6/oxT9dS7hRrcK1s3nD1CT7+3s96jLY98bore31mkn762QZJ0w9nD1SMjplW7i1R2tEaT8nvr02cN0Zsb92vzgQ8z18tPHqBYmml3UYUG5mXpk5OHqNZdTy4tbPfP2tbaOgNtxn2SnjKz2yRtk/QZSTKzIZJ+7e4zJV0g6UZJK8xsebzc/3b35yX9KJ4wuqQtkm5v7oZNBlB3b/S/sLu/1dzFAaCzK6ms0Zx5K/WVC8fonpkTVFJZo6ff26nHFtdfhSjNTImPbW4/VK5PnzlUMyYMUmZ6mvaVVOqP7+7UE0u31ytnJn32rGEalJel0qPVenvzQT3yzlZVVEdr2bu25u4HJF2eZP9OSTPjr99UI8OP7n5ja+9p7m0+x4dJRABa5Opf8b28rc3/8gVtNn/l6FtPpfT7PvOCz3apuTXd7CEkAECHYTF5AABar4NXImp3BFAAQDgi9tdYWEweAIAAyEABAOGIWAZKAAUAhMKZRAQAQAARy0AZAwUAIAAyUABAOCKWgRJAAQChYAwUAIAgyEABAAggYgGUSUQAAARABgoACAVr4QIAEASTiAAACCBiY6AEUABAKDxiAZRJRAAABEAGCgAIBQspAAAQgNcQQAEAaLWoBVDGQAEACIAMFAAQCsZAAQAIIGpduARQAEAoCKAAAARQG7G1cJlEBABAAGSgAIBQMIkIAIAAGAMFACAAAigAAAFErQuXSUQAgC7PzPqZ2ctmtj7+b99GzttiZivMbLmZLWlt+UQEUABAKGpralPaUjRH0qvuPl7Sq/H3jbnU3c9092kBy0sigAIAQuI1tSltKZolaW789VxJn2jr8oyBAug05n/5go6uAlKQahA0s9mSZifsKnD3ghYWH+TuuyTJ3XeZ2cBGznNJL5mZS3oo4fotLX8cARQA0CnEg1mjAdPMXpE0OMmh77biNhe4+854gHzZzNa4++utrKokAigAICRtPQvX3a9o7JiZ7TGz/Hj2mC9pbyPX2Bn/d6+ZPSNpuqTXJbWofCLGQAEAoejgMdB5km6Ov75Z0nMNTzCzXDPLO/Za0lWSVra0fENkoACAUHTwQgr3SXrKzG6TtE3SZyTJzIZI+rW7z5Q0SNIzZibVxb/H3f3Fpso3hQAKAAhFbQcupODuByRdnmT/Tkkz4683STqjNeWbQhcuAAABkIECAELBWrgAAATgEfuD2gRQAEAooraYPAEUABCKqHXhMokIAIAAyEABAKGIWgZKAAUAhCKEP0nWpRBAAQChiNokIsZAAQAIgAwUABAKxkABAAjAa7yjq9CuCKAAgFAwiQgAgAC8NloZKJOIAAAIgAwUABCKWsZAAQBoPWbhAgAQALNwAQAIIGpduEwiAgAgADJQAEAoGAMFACCA2og9B0oABQCEImqTiBgDBQAgADJQAEAoWAsXAIAAotaFSwAFAISCAAoAQABR68JlEhEAAAGQgQIAQhG1vwdKAAUAhCJqa+ESQAEAoYjaUn6MgQIAQuE1ntKWCjPrZ2Yvm9n6+L99k5xzipktT9iKzOyu+LEfmtmOhGMzm7snARQA0B3MkfSqu4+X9Gr8fT3uvtbdz3T3MyVNlVQm6ZmEU3567Li7P9/cDZvtwjWz6XX39cVmNlHSDElrWnJxAEB0dPAY6CxJl8Rfz5W0QNK3mzj/ckkb3X1r0Bs2mYGa2Q8k/VzSA2b2r5J+KamnpDlm9t0mys02syVmtqSgoCBo3QAAXYjX1qa0JcaO+Da7Fbcf5O67JCn+78Bmzr9W0hMN9t1pZu+b2cPJuoAbMvfGvzGY2QpJZ0rKkrRb0jB3LzKzHEkL3X1yczeQFK1pWQDQuVlbXXj+yVNS+n1/9bplTdbNzF6RNDjJoe9KmuvufRLOPeTuSYOgmWVK2inpNHffE983SNJ+1cWseyXlu/utTdWnuS7canevkVRmZhvdvUiS3L3czKI13QoA0KHc/YrGjpnZHjPLd/ddZpYvaW8Tl7pG0rJjwTN+7eOvzey/JP25ufo0N4noqJn1iL+emnDx3pIIoACA4zpyFq6keZJujr++WdJzTZx7nRp038aD7jGflLSyuRs2l4Fe5O6VkuTuiQEzI6GiAAB09HOg90l6ysxuk7RN0mckycyGSPq1u8+Mv+8h6UpJtzco/yMzO1N1Xbhbkhw/QZNjoCFhDBQAOo82GwP9y/DJKf2+/+j299usbm2BlYgAAKGI2p8zYyEFAAACIAMFAISitu2HBDsVAigAIBQ1BFAAAFovYkOgBFAAQDiiloEyiQgAgADIQAEAoaALFwCAAKLWhUsABQCEImoZKGOgAAAEQAYKAAgFXbgAAAQQtS5cAigAIBQEUAAAAohaFy6TiAAACIAMFAAQCrpwAQAIIGpduARQAEAoopaBMgYKAEAAZKAAgFDQhQsAQABR68IlgAIAQkEGCgBAALUdXYF2xiQiAAACIAMFAISCLlwAAAJgEhEAAAGQgQIAEEDUMlAmEQEAEAAZKAAgFFHrwiUDBQCEosZT21JhZp8xs1VmVmtm05o4b4aZrTWzDWY2J2F/PzN72czWx//t29w9CaAAgFDUuKe0pWilpE9Jer2xE8wsJul+SddImijpOjObGD88R9Kr7j5e0qvx900igAIAujx3X+3ua5s5bbqkDe6+yd2PSnpS0qz4sVmS5sZfz5X0iebuyRgoACAUXWAW7lBJ2xPeF0o6J/56kLvvkiR332VmA5u7WHsEUGuHe4TKzGa7e0FH16M7o43bHm3cPmjnDz3oW1L6fW9msyXNTthVkNi2ZvaKpMFJin7X3Z9ryS2S7Asc9slAk5stif8h2hZt3PZo4/ZBO4ckHiwbbUt3vyLFWxRKGp7wfpiknfHXe8wsP5595kva29zFGAMFAETFYknjzWy0mWVKulbSvPixeZJujr++WVKzGS0BFADQ5ZnZJ82sUNJ5kv5iZvPj+4eY2fOS5O7Vku6UNF/SaklPufuq+CXuk3Slma2XdGX8fdP39Ig9+NoSjGm0Pdq47dHG7YN2ji4CKAAAAdCFCwBAAATQBI0t8YTwmNnDZrbXzFZ2dF26KzMbbmavmdnq+NJmX+3oOnU3ZpZtZovM7L14G9/T0XVC+6MLNy6+xNM61Q0eF6puttZ17v5Bh1asmzGziySVSHrU3U/v6Pp0R/Ep+PnuvszM8iQtlfQJfpbDY2YmKdfdS8wsQ9Kbkr7q7u90cNXQjshAP9TUEk8Iibu/LulgR9ejO3P3Xe6+LP66WHWzDYd2bK26F69TEn+bEd/IRiKGAPqhZEs88UsHXZqZjZJ0lqSFHVyVbsfMYma2XHUP3L/s7rRxxBBAPxTqEk9ARzOznpL+JOkudy/q6Pp0N+5e4+5nqm41m+lmxpBExBBAP9TUEk9AlxIfl/uTpN+5+9MdXZ/uzN0PS1ogaUbH1gTtjQD6oaaWeAK6jPgEl99IWu3uP+no+nRHZjbAzPrEX+dIukLSmg6tFNodATSumSWeEBIze0LS3yWdYmaFZnZbR9epG7pA0o2SLjOz5fFtZkdXqpvJl/Samb2vui/fL7v7nzu4TmhnPMYCAEAAZKAAAARAAAUAIAACKAAAARBAAQAIgAAKAEAABFAAAAIggAIAEAABFACAAP4/bXVxjNw17FEAAAAASUVORK5CYII=\n", 431 | "text/plain": [ 432 | "
" 433 | ] 434 | }, 435 | "metadata": { 436 | "needs_background": "light" 437 | }, 438 | "output_type": "display_data" 439 | } 440 | ], 441 | "source": [ 442 | "plt.figure(figsize=(8,6))\n", 443 | "sns.heatmap(combined_means.corr(),cmap='RdBu',\n", 444 | " mask=mask,vmax=1,vmin=-1,annot=True,annot_kws={\"size\": 15})\n", 445 | "plt.savefig('remove_g_corr.pdf')\n", 446 | "plt.show()" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": null, 452 | "id": "801e7e7d-55b0-488f-a4c9-6c81d8b6ef73", 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [ 456 | "# no statistically enrichments found, so no plots " 457 | ] 458 | } 459 | ], 460 | "metadata": { 461 | "kernelspec": { 462 | "display_name": "plot", 463 | "language": "python", 464 | "name": "plot" 465 | }, 466 | "language_info": { 467 | "codemirror_mode": { 468 | "name": "ipython", 469 | "version": 3 470 | }, 471 | "file_extension": ".py", 472 | "mimetype": "text/x-python", 473 | "name": "python", 474 | "nbconvert_exporter": "python", 475 | "pygments_lexer": "ipython3", 476 | "version": "3.9.10" 477 | } 478 | }, 479 | "nbformat": 4, 480 | "nbformat_minor": 5 481 | } 482 | --------------------------------------------------------------------------------