├── concept_fig.png
├── images
├── conceptFig.jpg
├── top_genes_tcr.png
└── top_pathways_img.png
├── utils.py
├── datasets.py
├── LICENSE
├── top_pathways.py
├── figures
├── make_fig2.py
└── supplementary_figures
│ ├── mcfarland_cond_top_pathways.py
│ ├── .ipynb_checkpoints
│ ├── mcfarland_cond_top_pathways-checkpoint.py
│ ├── drop_g-checkpoint.ipynb
│ └── g_enrichments-checkpoint.ipynb
│ ├── drop_g.ipynb
│ └── g_enrichments.ipynb
├── README.md
├── benchmark_intercode.py
├── summary.py
├── get_top_pathways.py
├── pathexplainer.py
├── standard_VAE_impute_benchmark.py
└── benchmark_pmvae.py
/concept_fig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suinleelab/PAUSE/HEAD/concept_fig.png
--------------------------------------------------------------------------------
/images/conceptFig.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suinleelab/PAUSE/HEAD/images/conceptFig.jpg
--------------------------------------------------------------------------------
/images/top_genes_tcr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suinleelab/PAUSE/HEAD/images/top_genes_tcr.png
--------------------------------------------------------------------------------
/images/top_pathways_img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suinleelab/PAUSE/HEAD/images/top_pathways_img.png
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | def parse_gmt(path, symbols=None, min_genes=10):
4 | lut = dict()
5 | for line in open(path, 'r'):
6 | key, _, *genes = line.strip().split()
7 | if symbols is not None:
8 | genes = symbols.intersection(genes).tolist()
9 | if len(genes) < min_genes:
10 | continue
11 | lut[key] = genes
12 |
13 | return lut
14 |
15 | def load_annotations(gmt, genes, min_genes=10):
16 | genesets = parse_gmt(gmt, genes, min_genes)
17 | annotations = pd.DataFrame(False, index=genes, columns=genesets.keys())
18 | for key, genes in genesets.items():
19 | annotations.loc[genes, key] = True
20 |
21 | return annotations
--------------------------------------------------------------------------------
/datasets.py:
--------------------------------------------------------------------------------
1 | from torch.utils.data import Dataset, DataLoader
2 | import torch
3 | import numpy as np
4 | import pandas as pd
5 |
6 | class RNASeqData(Dataset):
7 |
8 | def __init__(self, X, c=None, y=None, transform=None):
9 | self.X = X
10 | self.y = y
11 | self.c = c
12 | self.transform = transform
13 |
14 | def __len__(self):
15 | return self.X.shape[0]
16 |
17 | def __getitem__(self, index):
18 | sample = self.X[index,:]
19 |
20 | if self.transform is not None:
21 | sample = self.transform(sample)
22 |
23 | if self.y is not None and self.c is not None:
24 | return sample, self.y[index], self.c[index]
25 | if self.y is None and self.c is not None:
26 | return sample, self.c[index]
27 | else:
28 | return sample
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Lee Lab @ UW Allen School
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/top_pathways.py:
--------------------------------------------------------------------------------
1 | import anndata
2 | import numpy as np
3 | import pandas as pd
4 | import torch
5 | import torch.nn.functional as F
6 | import os
7 |
8 | from utils import load_annotations
9 | from sklearn.model_selection import train_test_split
10 |
11 | from torch.utils.data import Dataset, DataLoader
12 | from datasets import RNASeqData
13 |
14 | from pathexplainer import PathExplainerTorch
15 | from sklearn.linear_model import LogisticRegression
16 |
17 | from models import pmVAEModel
18 | import os
19 | os.environ["CUDA_VISIBLE_DEVICES"]="1"
20 |
21 |
22 |
23 | def main():
24 |
25 | # load data
26 |
27 | # kang dataset
28 | data = anndata.read('data/kang_count.h5ad')
29 |
30 | # haber dataset
31 | #data = anndata.read('/projects/leelab/data/single-cell/haber_2017/preprocessed/adata_top_2000_genes.h5ad')
32 | #data = data[data.obs['condition'] != 'Salmonella'].copy()
33 |
34 | symbols = data.var_names
35 |
36 | number_of_replicates = 10
37 |
38 | first_run = True
39 |
40 | # for 10 experimental replicates
41 | for rand_seed in range(number_of_replicates):
42 |
43 | print("replicate number " + str(rand_seed))
44 |
45 | # split data
46 |
47 | train_data, test_data = train_test_split(data,
48 | test_size=0.25,
49 | shuffle=True,
50 | random_state=rand_seed)
51 | tr_data, val_data = train_test_split(train_data,
52 | test_size=0.25,
53 | shuffle=True,
54 | random_state=rand_seed)
55 |
56 | tr_ds = RNASeqData(np.array(tr_data.X))
57 | val_ds = RNASeqData(np.array(val_data.X))
58 |
59 | # load annotations
60 | membership_mask = load_annotations('data/c2.cp.reactome.v7.4.symbols.gmt',
61 | symbols,
62 | min_genes=13
63 | ).astype(bool).T
64 |
65 | ##
66 | ## train base model
67 | ##
68 |
69 | # initialize base model
70 | basePMVAE = pmVAEModel(membership_mask.values,
71 | [12],
72 | 1,
73 | beta=1e-05,
74 | terms=membership_mask.index,
75 | add_auxiliary_module=True
76 | )
77 |
78 |
79 | if first_run: # first run
80 | top_ig = pd.DataFrame(index=basePMVAE.latent_space_names())
81 | top_lr = pd.DataFrame(index=basePMVAE.latent_space_names())
82 | first_run = False
83 |
84 |
85 | # train
86 | basePMVAE.train(tr_ds, val_ds,
87 | checkpoint_path='top_kang.pkl',
88 | max_epochs=100)
89 |
90 | basePMVAE.set_gpu(False)
91 |
92 |
93 | # IG pathway rankings
94 | print("Calc IG score")
95 | def model_loss_wrapper(z):
96 | module_outputs = basePMVAE.model.decoder_net(z)
97 | global_recon = basePMVAE.model.merge(module_outputs)
98 | return F.mse_loss(global_recon, ground_truth, reduction='none').mean(1).view(-1,1)
99 |
100 | ground_truth = torch.tensor(np.array(val_data.X)).float()
101 | outs = basePMVAE.model(ground_truth)
102 |
103 | input_data = outs.z
104 | baseline_data = torch.zeros(outs.z.shape[1])
105 | baseline_data.requires_grad = True
106 |
107 | explainer = PathExplainerTorch(model_loss_wrapper)
108 | attributions = explainer.attributions(input_data,
109 | baseline=baseline_data,
110 | num_samples=200,
111 | use_expectation=False)
112 |
113 | np_attribs = attributions.detach().numpy()
114 | top_ig[rand_seed] = np_attribs.mean(0)
115 |
116 | # so far!
117 | top_ig.to_csv('kang_ig.csv', index=False)
118 |
119 |
120 |
121 |
122 | # LR pathway rankings
123 | print("Calc LR score")
124 |
125 | y_tr = tr_data.obs['condition']
126 | y_val = val_data.obs['condition']
127 |
128 | train_labels = (y_tr == 'stimulated').values
129 | val_labels = (y_val == 'stimulated').values
130 |
131 |
132 | train_embedding = basePMVAE.model(torch.tensor(tr_data.X).float()).z.detach().numpy()
133 | val_embedding = basePMVAE.model(torch.tensor(val_data.X).float()).z.detach().numpy()
134 |
135 | lr_scores = []
136 | for pathway in range(train_embedding.shape[1]):
137 | clf = LogisticRegression(random_state=0).fit(train_embedding[:,pathway].reshape(-1,1), train_labels)
138 | lr_scores.append(clf.score(val_embedding[:,pathway].reshape(-1,1), val_labels))
139 |
140 |
141 | top_lr[rand_seed] = lr_scores
142 | top_lr[rand_seed] = -1.*top_lr[rand_seed]
143 |
144 |
145 | # so far!
146 | top_lr.to_csv('kang_lr.csv', index=False)
147 |
148 |
149 |
150 | top_ig.to_csv('kang_ig.csv', index=False)
151 | top_lr.to_csv('kang_lr.csv', index=False)
152 |
153 |
154 |
155 | if __name__ == '__main__':
156 | main()
--------------------------------------------------------------------------------
/figures/make_fig2.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | from scipy import stats
4 | import matplotlib.pyplot as plt
5 | import seaborn as sb
6 | from statannotations.Annotator import Annotator
7 |
8 | DATASETS = ['kang', 'haber', 'datlinger']
9 |
10 | METHODS = ['impute', 'retrain']
11 | PAL = 'colorblind'
12 |
13 | LABEL_SIZE = 18
14 | TITLE_SIZE = 18
15 | AXES_SIZE = 18
16 | LEG_SIZE = 14
17 |
18 |
19 | def get_arrays(dataset, method):
20 | ig_results = np.load('complete_results/'+ dataset + '_' + method + '_ig.npy')
21 | logvar_results = np.load('complete_results/'+ dataset + '_' + method + '_logvar.npy')
22 | lr_results = np.load('complete_results/'+ dataset + '_' + method + '_lr.npy')
23 | kld_results = np.load('complete_results/'+ dataset + '_' + method + '_kld.npy')
24 | rand_results = np.load('complete_results/'+ dataset + '_' + method + '_rand.npy')
25 |
26 | return ig_results, logvar_results, lr_results, kld_results, rand_results
27 |
28 | # load results for single dataset and benchmark method
29 | def load_res(dataset, method):
30 |
31 | ig_results, logvar_results, lr_results, kld_results, rand_results = get_arrays(dataset, method)
32 |
33 | # get AUCs
34 | ig_aucs = np.trapz(ig_results, axis=1)
35 | lr_aucs = np.trapz(lr_results, axis=1)
36 | kld_aucs = np.trapz(kld_results, axis=1)
37 | rand_aucs = np.trapz(rand_results, axis=1)
38 | logvar_aucs = np.trapz(logvar_results, axis=1)
39 |
40 | auc_stack = np.concatenate((ig_aucs, lr_aucs, kld_aucs, rand_aucs, logvar_aucs))
41 |
42 | num_trials = 10
43 | #rankings_methods = np.concatenate((['Loss Attribution']*num_trials, ['LR Score']*num_trials, ['KL Divergence']*num_trials, ['Random']*num_trials, ['LS Variance']*num_trials))
44 |
45 | rankings_methods = np.concatenate((['PAUSE']*num_trials, ['LR']*num_trials, ['KLD']*num_trials, ['Random']*num_trials, ['LSV']*num_trials))
46 |
47 |
48 |
49 | results = pd.DataFrame(index=list(range(0,50)))
50 | results['methods'] = rankings_methods
51 | results['aucs'] = auc_stack
52 |
53 | return results
54 |
55 |
56 | def get_subplot(dataset, method):
57 |
58 | plt.rc('axes', titlesize=TITLE_SIZE) # fontsize title
59 | plt.rc('axes', labelsize=AXES_SIZE) # fontsize of the x and y axis labels
60 | plt.rc('xtick', labelsize=LABEL_SIZE) # fontsize of the (method) tick labels
61 |
62 | results = load_res(dataset, method)
63 |
64 | plt.style.use('seaborn-colorblind')
65 |
66 | fig, ax = plt.subplots(figsize=(6,4))
67 |
68 | bp = sb.boxplot(ax=ax,
69 | data=results,x='methods',y='aucs',dodge=True,
70 | color='white', fliersize=0,
71 | )
72 |
73 | sb.stripplot(ax=ax,
74 | data=results,x='methods',y='aucs',
75 | dodge=True,
76 | s=4)
77 |
78 | """
79 | pairs=[("Loss Attribution", "LR Score")]
80 | annotator = Annotator(ax, pairs, data=results, x='methods',y='aucs')
81 | annotator.set_custom_annotations(['**'])
82 | annotator.annotate()
83 | """
84 |
85 | # for ** position
86 | top = [results[results['methods'] == "LR"].max()['aucs'],
87 | results[results['methods'] == "KLD"].max()['aucs'],
88 | results[results['methods'] == "Random"].max()['aucs'],
89 | results[results['methods'] == "LSV"].max()['aucs']]
90 |
91 |
92 | for i in range(4):
93 | plt.text(x=bp.get_xticks()[i+1] - 0.07, y=top[i] + 0.001, s='**', fontdict={'size':12, 'color':'black'})
94 |
95 |
96 | ax.set_ylabel('AUC')
97 |
98 | if method == "retrain": # not for bottom row
99 | ax.set_xlabel('Pathway Ranking Method')
100 | else:
101 | ax.set_xlabel('')
102 |
103 | #plt.title(get_title(dataset) + ' ' + method.capitalize() + ' Benchmark')
104 | plt.title(method.capitalize())
105 |
106 |
107 | plt.savefig('figs/dataset=%s-method=%s.pdf' % (dataset, method), bbox_inches='tight')
108 |
109 | plt.show()
110 |
111 |
112 |
113 | def get_title(dataset):
114 | dataset_title = ''
115 | if dataset == 'kang':
116 | dataset_title = 'PBMC'
117 | if dataset == 'haber':
118 | dataset_title = 'Intestinal'
119 | if dataset == 'datlinger':
120 | dataset_title = 'Jurkat'
121 | if dataset == 'grubman':
122 | dataset_title = 'Entorhinal'
123 | return dataset_title
124 |
125 |
126 |
127 | # get single line graph
128 | def get_lines(dataset, method):
129 | ig_results, logvar_results, lr_results, kld_results, rand_results = get_arrays(dataset, method)
130 |
131 | plt.style.use('seaborn-colorblind')
132 |
133 | fig, ax = plt.subplots(figsize=(6,4))
134 |
135 | sb.lineplot(data=ig_results.mean(0), label='PAUSE')
136 | sb.lineplot(data=lr_results.mean(0), label='LR')
137 | sb.lineplot(data=kld_results.mean(0), label='KLD')
138 | sb.lineplot(data=rand_results.mean(0), label='Random')
139 | sb.lineplot(data=logvar_results.mean(0), label='LSV')
140 |
141 |
142 | if method == 'impute':
143 | ax.set_xlabel('Number of Top Pathways Ablated')
144 |
145 | if method == 'retrain':
146 | ax.set_xlabel('Number of Top Pathways Included')
147 |
148 | plt.legend(fontsize=LEG_SIZE)
149 |
150 | ax.set_ylabel('Reconstruction Error')
151 |
152 | #plt.title(get_title(dataset) + ' ' + method.capitalize() + ' Benchmark')
153 | plt.title(method.capitalize())
154 |
155 |
156 | plt.savefig('figs/lines-dataset=%s-method=%s.pdf' % (dataset, method),bbox_inches='tight')
157 |
158 | plt.show()
159 |
160 |
161 | if __name__ == '__main__':
162 |
163 | for dataset in DATASETS:
164 | for method in METHODS:
165 | get_subplot(dataset, method)
166 |
167 | get_lines('haber', 'impute')
168 | get_lines('haber', 'retrain')
169 |
170 |
--------------------------------------------------------------------------------
/figures/supplementary_figures/mcfarland_cond_top_pathways.py:
--------------------------------------------------------------------------------
1 | # get Mcfarland top pathways, condition on cell lines
2 |
3 | import anndata
4 | import numpy as np
5 | import pandas as pd
6 | import torch
7 | import torch.nn.functional as F
8 | import os
9 |
10 | from utils import load_annotations
11 | from sklearn.model_selection import train_test_split
12 |
13 | from torch.utils.data import Dataset, DataLoader
14 | from datasets import RNASeqData
15 |
16 | from pathexplainer import PathExplainerTorch
17 | from sklearn.linear_model import LogisticRegression
18 | from sklearn.preprocessing import OneHotEncoder
19 | import argparse
20 |
21 |
22 | from models import pmVAEModel
23 | import os
24 | import time
25 |
26 | save_path = 'new_for_revision/new_res/'
27 |
28 |
29 | def main():
30 |
31 | ig_times = []
32 | lr_times = []
33 | train_times = []
34 |
35 | parser = argparse.ArgumentParser()
36 | parser.add_argument('dataset', action="store", default='kang')
37 | parser.add_argument('which_gpu', action="store", default='0')
38 |
39 | args = parser.parse_args()
40 |
41 | os.environ["CUDA_VISIBLE_DEVICES"]=args.which_gpu
42 | dataset =args.dataset
43 |
44 | # load data
45 |
46 | # load mcfarland data
47 | data = anndata.read('/projects/leelab/data/single-cell/mcfarland_2020_Idasanutlin/preprocessed/adata_top_2000_genes_tc.h5ad')
48 |
49 | data = data[data.obs['condition'] == 'Idasanutlin'].copy()
50 | symbols = data.var_names
51 |
52 | conditions = np.array(data.obs['cell_line']).reshape(-1,1)
53 | enc = OneHotEncoder()
54 | enc.fit(conditions)
55 | pre_processed_conditions = enc.transform(conditions).toarray()
56 |
57 | number_of_replicates = 10
58 |
59 | first_run = True
60 |
61 | # for 10 experimental replicates
62 | for rand_seed in range(number_of_replicates):
63 |
64 | print("replicate number " + str(rand_seed))
65 |
66 | # split data
67 |
68 | train_data, test_data, train_c, test_c = train_test_split(data,pre_processed_conditions,
69 | test_size=0.25,
70 | shuffle=True,
71 | random_state=rand_seed)
72 | tr_data, val_data, tr_c, val_c = train_test_split(train_data,train_c,
73 | test_size=0.25,
74 | shuffle=True,
75 | random_state=rand_seed)
76 |
77 | tr_ds = RNASeqData(np.array(tr_data.X), c=tr_c)
78 | val_ds = RNASeqData(np.array(val_data.X), c=val_c)
79 |
80 | # load annotations
81 | membership_mask = load_annotations('data/c2.cp.reactome.v7.4.symbols.gmt',
82 | symbols,
83 | min_genes=13
84 | ).astype(bool).T
85 |
86 | ##
87 | ## train model
88 | ##
89 |
90 | # initialize base model
91 | basePMVAE = pmVAEModel(membership_mask.values,
92 | [12],
93 | 1,
94 | cdim = tr_c.shape[1],
95 | beta=1e-05,
96 | terms=membership_mask.index,
97 | add_auxiliary_module=True
98 | )
99 |
100 |
101 | if first_run: # first run
102 | top_ig = pd.DataFrame(index=basePMVAE.latent_space_names())
103 | top_lr = pd.DataFrame(index=basePMVAE.latent_space_names())
104 | first_run = False
105 |
106 |
107 | # train
108 |
109 | start_train = time.time()
110 | basePMVAE.train(tr_ds, val_ds,
111 | checkpoint_path='saved_models/seed_' + str(rand_seed) + 'cell_lines_cond_top_' + dataset + '.pkl',
112 | max_epochs=100)
113 |
114 | end_train = time.time()
115 | train_times.append(end_train - start_train)
116 |
117 |
118 | basePMVAE.set_gpu(False)
119 |
120 |
121 | # IG pathway rankings
122 | print("Calc IG score")
123 |
124 | start_ig = time.time()
125 |
126 | def model_loss_wrapper(z):
127 | latent_input = torch.cat([z, c_full], 1)
128 | module_outputs = basePMVAE.model.decoder_net(latent_input)
129 | global_recon = basePMVAE.model.merge(module_outputs)
130 | return F.mse_loss(global_recon, ground_truth, reduction='none').mean(1).view(-1,1)
131 |
132 | ground_truth = torch.tensor(data.X).float()
133 | c_full = torch.tensor(pre_processed_conditions).float()
134 | outs = basePMVAE.model(ground_truth,c_full)
135 |
136 | input_data = outs.z
137 | baseline_data = torch.zeros(outs.z.shape[1])
138 | baseline_data.requires_grad = True
139 |
140 | explainer = PathExplainerTorch(model_loss_wrapper)
141 | attributions = explainer.attributions(input_data,
142 | baseline=baseline_data,
143 | num_samples=200,
144 | use_expectation=False)
145 |
146 | np_attribs = attributions.detach().numpy()
147 | top_ig[rand_seed] = np_attribs.mean(0)
148 |
149 | end_ig = time.time()
150 | ig_times.append(end_ig - start_ig)
151 |
152 |
153 | # so far!
154 | top_ig.to_csv(save_path + dataset + '_cell_lines_cond_ig.csv', index=False)
155 |
156 |
157 | # LR pathway rankings
158 | print("Calc LR score")
159 | start_lr = time.time()
160 |
161 | if args.dataset == 'mcfarland':
162 |
163 | y_tr = tr_data.obs['TP53_mutation_status']
164 | y_val = val_data.obs['TP53_mutation_status']
165 |
166 | train_labels = (y_tr == 'Wild Type').values
167 | val_labels = (y_val == 'Wild Type').values
168 |
169 |
170 | train_embedding = basePMVAE.model(torch.tensor(tr_data.X).float(), torch.tensor(tr_c).float()).z.detach().numpy()
171 | val_embedding = basePMVAE.model(torch.tensor(val_data.X).float(), torch.tensor(val_c).float()).z.detach().numpy()
172 |
173 |
174 | lr_scores = []
175 | for pathway in range(train_embedding.shape[1]):
176 | clf = LogisticRegression(random_state=0).fit(train_embedding[:,pathway].reshape(-1,1), train_labels)
177 | lr_scores.append(clf.score(val_embedding[:,pathway].reshape(-1,1), val_labels))
178 |
179 |
180 | top_lr[rand_seed] = lr_scores
181 | top_lr[rand_seed] = -1.*top_lr[rand_seed]
182 |
183 | end_lr = time.time()
184 | lr_times.append(end_lr - start_lr)
185 |
186 |
187 | # so far!
188 | top_lr.to_csv(save_path + dataset + '_cell_lines_cond_lr.csv', index=False)
189 |
190 | times = pd.DataFrame()
191 | times['ig_times'] = ig_times
192 | times['lr_times'] = lr_times
193 | times['train_times'] = train_times
194 |
195 | times.to_csv(save_path + args.dataset + '_cell_lines_cond_times.csv')
196 |
197 |
198 | if __name__ == '__main__':
199 | main()
--------------------------------------------------------------------------------
/figures/supplementary_figures/.ipynb_checkpoints/mcfarland_cond_top_pathways-checkpoint.py:
--------------------------------------------------------------------------------
1 | # get Mcfarland top pathways, condition on cell lines
2 |
3 | import anndata
4 | import numpy as np
5 | import pandas as pd
6 | import torch
7 | import torch.nn.functional as F
8 | import os
9 |
10 | from utils import load_annotations
11 | from sklearn.model_selection import train_test_split
12 |
13 | from torch.utils.data import Dataset, DataLoader
14 | from datasets import RNASeqData
15 |
16 | from pathexplainer import PathExplainerTorch
17 | from sklearn.linear_model import LogisticRegression
18 | from sklearn.preprocessing import OneHotEncoder
19 | import argparse
20 |
21 |
22 | from models import pmVAEModel
23 | import os
24 | import time
25 |
26 | save_path = 'new_for_revision/new_res/'
27 |
28 |
29 | def main():
30 |
31 | ig_times = []
32 | lr_times = []
33 | train_times = []
34 |
35 | parser = argparse.ArgumentParser()
36 | parser.add_argument('dataset', action="store", default='kang')
37 | parser.add_argument('which_gpu', action="store", default='0')
38 |
39 | args = parser.parse_args()
40 |
41 | os.environ["CUDA_VISIBLE_DEVICES"]=args.which_gpu
42 | dataset =args.dataset
43 |
44 | # load data
45 |
46 | # load mcfarland data
47 | data = anndata.read('/projects/leelab/data/single-cell/mcfarland_2020_Idasanutlin/preprocessed/adata_top_2000_genes_tc.h5ad')
48 |
49 | data = data[data.obs['condition'] == 'Idasanutlin'].copy()
50 | symbols = data.var_names
51 |
52 | conditions = np.array(data.obs['cell_line']).reshape(-1,1)
53 | enc = OneHotEncoder()
54 | enc.fit(conditions)
55 | pre_processed_conditions = enc.transform(conditions).toarray()
56 |
57 | number_of_replicates = 10
58 |
59 | first_run = True
60 |
61 | # for 10 experimental replicates
62 | for rand_seed in range(number_of_replicates):
63 |
64 | print("replicate number " + str(rand_seed))
65 |
66 | # split data
67 |
68 | train_data, test_data, train_c, test_c = train_test_split(data,pre_processed_conditions,
69 | test_size=0.25,
70 | shuffle=True,
71 | random_state=rand_seed)
72 | tr_data, val_data, tr_c, val_c = train_test_split(train_data,train_c,
73 | test_size=0.25,
74 | shuffle=True,
75 | random_state=rand_seed)
76 |
77 | tr_ds = RNASeqData(np.array(tr_data.X), c=tr_c)
78 | val_ds = RNASeqData(np.array(val_data.X), c=val_c)
79 |
80 | # load annotations
81 | membership_mask = load_annotations('data/c2.cp.reactome.v7.4.symbols.gmt',
82 | symbols,
83 | min_genes=13
84 | ).astype(bool).T
85 |
86 | ##
87 | ## train model
88 | ##
89 |
90 | # initialize base model
91 | basePMVAE = pmVAEModel(membership_mask.values,
92 | [12],
93 | 1,
94 | cdim = tr_c.shape[1],
95 | beta=1e-05,
96 | terms=membership_mask.index,
97 | add_auxiliary_module=True
98 | )
99 |
100 |
101 | if first_run: # first run
102 | top_ig = pd.DataFrame(index=basePMVAE.latent_space_names())
103 | top_lr = pd.DataFrame(index=basePMVAE.latent_space_names())
104 | first_run = False
105 |
106 |
107 | # train
108 |
109 | start_train = time.time()
110 | basePMVAE.train(tr_ds, val_ds,
111 | checkpoint_path='saved_models/seed_' + str(rand_seed) + 'cell_lines_cond_top_' + dataset + '.pkl',
112 | max_epochs=100)
113 |
114 | end_train = time.time()
115 | train_times.append(end_train - start_train)
116 |
117 |
118 | basePMVAE.set_gpu(False)
119 |
120 |
121 | # IG pathway rankings
122 | print("Calc IG score")
123 |
124 | start_ig = time.time()
125 |
126 | def model_loss_wrapper(z):
127 | latent_input = torch.cat([z, c_full], 1)
128 | module_outputs = basePMVAE.model.decoder_net(latent_input)
129 | global_recon = basePMVAE.model.merge(module_outputs)
130 | return F.mse_loss(global_recon, ground_truth, reduction='none').mean(1).view(-1,1)
131 |
132 | ground_truth = torch.tensor(data.X).float()
133 | c_full = torch.tensor(pre_processed_conditions).float()
134 | outs = basePMVAE.model(ground_truth,c_full)
135 |
136 | input_data = outs.z
137 | baseline_data = torch.zeros(outs.z.shape[1])
138 | baseline_data.requires_grad = True
139 |
140 | explainer = PathExplainerTorch(model_loss_wrapper)
141 | attributions = explainer.attributions(input_data,
142 | baseline=baseline_data,
143 | num_samples=200,
144 | use_expectation=False)
145 |
146 | np_attribs = attributions.detach().numpy()
147 | top_ig[rand_seed] = np_attribs.mean(0)
148 |
149 | end_ig = time.time()
150 | ig_times.append(end_ig - start_ig)
151 |
152 |
153 | # so far!
154 | top_ig.to_csv(save_path + dataset + '_cell_lines_cond_ig.csv', index=False)
155 |
156 |
157 | # LR pathway rankings
158 | print("Calc LR score")
159 | start_lr = time.time()
160 |
161 | if args.dataset == 'mcfarland':
162 |
163 | y_tr = tr_data.obs['TP53_mutation_status']
164 | y_val = val_data.obs['TP53_mutation_status']
165 |
166 | train_labels = (y_tr == 'Wild Type').values
167 | val_labels = (y_val == 'Wild Type').values
168 |
169 |
170 | train_embedding = basePMVAE.model(torch.tensor(tr_data.X).float(), torch.tensor(tr_c).float()).z.detach().numpy()
171 | val_embedding = basePMVAE.model(torch.tensor(val_data.X).float(), torch.tensor(val_c).float()).z.detach().numpy()
172 |
173 |
174 | lr_scores = []
175 | for pathway in range(train_embedding.shape[1]):
176 | clf = LogisticRegression(random_state=0).fit(train_embedding[:,pathway].reshape(-1,1), train_labels)
177 | lr_scores.append(clf.score(val_embedding[:,pathway].reshape(-1,1), val_labels))
178 |
179 |
180 | top_lr[rand_seed] = lr_scores
181 | top_lr[rand_seed] = -1.*top_lr[rand_seed]
182 |
183 | end_lr = time.time()
184 | lr_times.append(end_lr - start_lr)
185 |
186 |
187 | # so far!
188 | top_lr.to_csv(save_path + dataset + '_cell_lines_cond_lr.csv', index=False)
189 |
190 | times = pd.DataFrame()
191 | times['ig_times'] = ig_times
192 | times['lr_times'] = lr_times
193 | times['train_times'] = train_times
194 |
195 | times.to_csv(save_path + args.dataset + '_cell_lines_cond_times.csv')
196 |
197 |
198 | if __name__ == '__main__':
199 | main()
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PAUSE
2 |
3 | 
4 |
5 | Code for the paper "Principled feature attribution for unsupervised gene expression analysis" (PAUSE).
6 | For more information, see our preprint: https://www.biorxiv.org/content/10.1101/2022.05.03.490535v1.
7 |
8 | ## Examples
9 |
10 | ### Identify most important pathways from an interpretable autoencoder
11 | This first example demonstrates how the PAUSE framework can be used to identify the most important pathways for an interpretable autoencoder.
12 |
13 | ```python
14 | import anndata
15 | # and other import statements...
16 |
17 | ## load a single cell dataset
18 | data = anndata.read('data/kang_count.h5ad')
19 |
20 | ## load a pathway gene set file
21 | ## more examples can be found here (http://www.gsea-msigdb.org/gsea/msigdb/collections.jsp)
22 | data.varm['annotations'] = load_annotations(
23 | 'data/c2.cp.reactome.v7.4.symbols.gmt',
24 | data.var_names,
25 | min_genes=13
26 | )
27 | # binary matrix mapping from genes to pathways
28 | membership_mask = data.varm['annotations'].astype(bool).T.values
29 | ```
30 |
31 | After loading the RNA-seq dataset you want to analyze, you can then initialize and train a model on the dataset. In this case, we use our PyTorch implementation of the [pmVAE architecture](https://www.biorxiv.org/content/10.1101/2021.01.28.428664v1), which is a variational autoencoder composed of a set of subnetworks (pathway modules) that are factorized according to the gene sets defined above. In this model, each latent node in the bottleneck layer only contains information about the genes belonging to its corresponding pathway.
32 |
33 | ```python
34 | from models import pmVAEModel
35 |
36 | # initialize pmVAE model.
37 | # positional arguments are 1) the binary gene set membership matrix,
38 | # 2) a list containing the number of nodes in each hidden layer, and
39 | # 3) an integer indicating the number of nodes in each module's bottleneck.
40 | pmvae = pmVAEModel(
41 | membership_mask,
42 | [12], # This indicates that there will be one intermediate layer before the bottleneck with 12 nodes in each module. To have 2 intermediate layers of 6 nodes, you could write [6, 6]
43 | 4, # number of nodes in each module bottleneck
44 | terms=membership_mask.index, # a list of the names of the pathway modules
45 | add_auxiliary_module=True # whether or not to include a densely connected auxiliary module
46 | )
47 |
48 | # train pmVAE model
49 | pmvae.train(train_dataset, # a PyTorch dataset object containing the training expression samples
50 | val_dataset, # a PyTorch dataset object containing the val expression samples
51 | max_epochs=200, # Maximum number of epochs to train
52 | lr=0.001, # learning rate of the adam optimizer used to train the model
53 | beta=1e-5, # weight multiplier of KL loss term
54 | batch_size=256, # samples per batch
55 | pathway_dropout=True, # whether or not to train with pathway dropout scheme as defined in pmVAE paper
56 | checkpoint_path='pmvae_checkpoint.pkl' # path of model checkpoint
57 | )
58 | ```
59 |
60 | Once the model is trained, we can use the [Path Explain software](https://github.com/suinleelab/path_explain) (also provided in this repository in the `pathexplainer.py` file) to *identify the top pathways* in the dataset by explaining the trained models reconstruction error with respect to the learned latent pathways.
61 |
62 | ```python
63 | from pathexplainer import PathExplainerTorch
64 | import torch
65 | import torch.nn.functional as F
66 |
67 | # define a wrapper function that outputs the reconstruction error of the model given the latent codes
68 | def model_loss_wrapper(z):
69 | module_outputs = pmvae.model.decoder_net(z)
70 | global_recon = pmvae.model.merge(module_outputs)
71 | return F.mse_loss(global_recon, ground_truth, reduction='none').mean(1).view(-1,1)
72 |
73 | # define a tensor to hold the original data, which gets used as an argument in the reconstruction error in the wrapper above
74 | ground_truth = torch.tensor(data.X).float()
75 |
76 | # get the latent codes to use as input to the model loss wrapper
77 | outs = pmvae.model(ground_truth)
78 | input_data = outs.z
79 | baseline_data = torch.zeros(outs.z.shape[1]) # define a baseline, in this case the zeros vector
80 | baseline_data.requires_grad = True
81 |
82 | # calculate the pathway attributions
83 | explainer = PathExplainerTorch(model_loss_wrapper)
84 | attributions = explainer.attributions(input_data,
85 | baseline=baseline_data,
86 | num_samples=200, # number of samples to use when calculating the path integral
87 | use_expectation=False)
88 |
89 | ```
90 |
91 | Once you have calculated the pathway attributions, you can average them over all samples in the dataset to identify and plot the most important pathways.
92 |
93 | ```python
94 | # move attributions to numpy, make a df w/ index as latent space names
95 | np_attribs = attributions.detach().numpy()
96 | top_features = pd.DataFrame(index=pmvae.latent_space_names())
97 | top_features['global_attribs'] = np_attribs.mean(0) # in this case, global attributions are the mean over the dataset
98 |
99 | # Loss explanation
100 | top_features.sort_values('global_attribs',ascending=True).iloc[:30,0].plot.bar()
101 | ```
102 |
103 | 
104 |
105 | ### Identify most important genes contributing to a particular latent pathway
106 | This first example demonstrates how the PAUSE framework can be used to identify the most important pathways for an interpretable autoencoder. However, as you see above, these interpretable autoencoders often have multiple bottleneck nodes for each pathway, raising the question of what the difference between these bottleneck nodes is. Additionally, sometimes the most important pathways are the "uninterpretable" densely-connected auxiliary pathways. How can we identify the most important genes contributing to these latent pathways, and interpret their biological meaning? By using gene level attributions. This example uses another pmVAE model, as in the above example. This time, however, instead of getting attributions of the loss to the latent pathways, we can pick a latent pathway and explain it in terms of its input genes.
107 |
108 | ```python
109 | from summary import summary_plot
110 |
111 | # explain tcr in terms of genes
112 | def model_latent_wrapper(x):
113 | outs = pmvae.model(x)
114 | z = outs.mu
115 | return z[:,316].reshape(-1,1) # 316 is the latent node number corresponding to the pathway of interest here
116 |
117 | input_data = torch.tensor(data.X).float()
118 | input_data.requires_grad = True
119 | baseline_data = torch.zeros(data.X.shape[1])
120 | baseline_data.requires_grad = True
121 |
122 | explainer = PathExplainerTorch(model_latent_wrapper) # this time, use explanation software with latent output wrapper
123 | attributions = explainer.attributions(input_data,
124 | baseline=baseline_data,
125 | num_samples=200, # again use 200 interpolation points to numerically approximate the path integral
126 | use_expectation=False)
127 |
128 | np_attribs = attributions.detach().numpy()
129 | top_features = pd.DataFrame(index=membership_mask.columns)
130 | top_features['global_attribs'] = np.abs(np_attribs).mean(0) # to find top genes, we take the average MAGNITUDE of attribs across all samples
131 |
132 | summary_plot(np_attribs,
133 | data.X,
134 | feature_names=membership_mask.columns,
135 | plot_top_k=10,
136 | standardize_features=False,
137 | scale_x_ind=False,
138 | scale_y_ind=False,
139 | figsize=(4, 4),
140 | dpi=300,
141 | cmap=coolwarm)
142 | ```
143 |
144 | 
145 |
146 | ## Reproducing experiments and figures from paper
147 |
148 | For code to generate the models used, see "models.py". Pathway attributions and gene attributions are generated using code from "pathexplainer.py". Benchmarking pathways attributions against other methods for ranking pathway importance is done using the files "benchmark_pmvae.py", "benchmark_intercode.py", and "top_pathways.py". For code to generate the figures in the paper, see the folder `figures`.
149 |
150 |
--------------------------------------------------------------------------------
/benchmark_intercode.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # benchmark_pmvae.py
3 |
4 | import anndata
5 | import numpy as np
6 | import pandas as pd
7 | import torch
8 | import torch.nn.functional as F
9 | import os
10 |
11 | from utils import load_annotations
12 | from sklearn.model_selection import train_test_split
13 |
14 | from torch.utils.data import Dataset, DataLoader
15 | from datasets import RNASeqData
16 |
17 | import argparse
18 |
19 | from pathexplainer import PathExplainerTorch
20 | from sklearn.linear_model import LogisticRegression
21 |
22 | from models import pmVAEModel
23 |
24 | from intercode import AutoencoderLinearDecoder, train_autoencoder
25 |
26 | import os
27 | os.environ["CUDA_VISIBLE_DEVICES"]="4"
28 |
29 | def main():
30 |
31 | # get dataset, removal method
32 | parser = argparse.ArgumentParser()
33 | # parser.add_argument('split', action="store", default='0')
34 | parser.add_argument('dataset', action="store", default='kang')
35 | parser.add_argument('removal', action="store", default='impute')
36 |
37 | args = parser.parse_args()
38 |
39 | # load data
40 | if args.dataset == 'kang':
41 |
42 | data = anndata.read('data/kang_count.h5ad')
43 | data.varm['I'] = load_annotations(
44 | 'data/c2.cp.reactome.v7.4.symbols.gmt',
45 | data.var_names,
46 | min_genes=13
47 | ).values
48 | data.uns['terms'] = list(load_annotations(
49 | 'data/c2.cp.reactome.v7.4.symbols.gmt',
50 | data.var_names,
51 | min_genes=13
52 | ).columns)
53 |
54 | number_of_pathways = 20
55 | number_of_replicates = 10
56 |
57 | l2_results = np.zeros((number_of_replicates,number_of_pathways))
58 | ig_results = np.zeros((number_of_replicates,number_of_pathways))
59 | # lr_results = np.zeros((number_of_replicates,number_of_pathways))
60 | # kld_results = np.zeros((number_of_replicates,number_of_pathways))
61 | rand_results = np.zeros((number_of_replicates,number_of_pathways))
62 |
63 | # for 10 experimental replicates
64 | for rand_seed in range(number_of_replicates):
65 |
66 | print("replicate number " + str(rand_seed))
67 |
68 | # split data
69 |
70 | train_data, test_data = train_test_split(data,
71 | test_size=0.25,
72 | shuffle=True,
73 | random_state=rand_seed)
74 | tr_data, val_data = train_test_split(train_data,
75 | test_size=0.25,
76 | shuffle=True,
77 | random_state=rand_seed)
78 |
79 | tr_ds = RNASeqData(np.array(tr_data.X))
80 | val_ds = RNASeqData(np.array(val_data.X))
81 |
82 | # load annotations
83 | membership_mask = load_annotations('data/c2.cp.reactome.v7.4.symbols.gmt',
84 | data.var_names,
85 | min_genes=13
86 | ).astype(bool).T
87 |
88 | ##
89 | ## train base model
90 | ##
91 |
92 | LR = 0.001
93 | BATCH_SIZE = 62
94 | N_EPOCHS = 30
95 |
96 | # regularization hyperparameters
97 | # lambda0 - page 19 of presentation
98 | # lambdas 1-3 - last term on page 20
99 |
100 | LAMBDA0 = 0.1
101 |
102 | LAMBDA1 = 0.93*LR
103 | LAMBDA2 = 0.43*LR
104 | LAMBDA3 = 0.57*LR
105 |
106 | # initialize base model
107 | autoencoder = AutoencoderLinearDecoder(tr_data.n_vars, n_ann=len(tr_data.uns['terms']))
108 | autoencoder.cuda()
109 |
110 | # train
111 | train_autoencoder(tr_data, autoencoder, LR, BATCH_SIZE, N_EPOCHS,
112 | l2_reg_lambda0=LAMBDA0, lambda1=LAMBDA1, lambda2=LAMBDA2, lambda3=LAMBDA3)
113 |
114 | ##
115 | ## get pathway rankings
116 | ##
117 | top_features = pd.DataFrame(index=data.uns['terms'])
118 |
119 | ## get L2
120 | top_features['l2'] = -1.*autoencoder.decoder.weight_dict['annotated'].data.norm(p=2, dim=0).detach().cpu().numpy()
121 |
122 | print("Calc IG score")
123 | # IG pathway rankings
124 | ground_truth = torch.tensor(val_data.X).float()
125 | autoencoder.cpu()
126 |
127 | def intercode_loss_wrapper(z):
128 | global_recon = autoencoder.decoder(z)
129 | return F.mse_loss(global_recon, ground_truth, reduction='none').mean(1).view(-1,1)
130 |
131 |
132 | input_data = autoencoder.encoder(torch.tensor(val_data.X).float())
133 | baseline_data = torch.zeros(input_data.shape[1])
134 | baseline_data.requires_grad = True
135 |
136 | explainer = PathExplainerTorch(intercode_loss_wrapper)
137 | attributions = explainer.attributions(input_data,
138 | baseline=baseline_data,
139 | num_samples=200,
140 | use_expectation=False)
141 |
142 | top_features['IG'] = attributions.detach().numpy().mean(0)
143 |
144 | # # LR pathway rankings
145 | # print("Calc LR score")
146 | # y_tr = tr_data.obs['condition']
147 | # y_val = val_data.obs['condition']
148 |
149 | # train_embedding = basePMVAE.model(torch.tensor(tr_data.X).float()).z.detach().numpy()
150 | # val_embedding = basePMVAE.model(torch.tensor(val_data.X).float()).z.detach().numpy()
151 |
152 | # lr_scores = []
153 | # for pathway in range(train_embedding.shape[1]):
154 | # train_labels = (y_tr == 'stimulated').values
155 | # val_labels = (y_val == 'stimulated').values
156 | # clf = LogisticRegression(random_state=0).fit(train_embedding[:,pathway].reshape(-1,1), train_labels)
157 | # lr_scores.append(clf.score(val_embedding[:,pathway].reshape(-1,1), val_labels))
158 |
159 | # top_features['lr_score'] = lr_scores
160 | # top_features['lr_score'] = -1.*top_features['lr_score']
161 |
162 | # # KLD pathway rankings
163 | # print("Calc KLD")
164 | # pathway_kld = (-0.5 * (1 + outs.logvar - outs.mu.pow(2) - outs.logvar.exp()).mean(0)).detach().numpy()
165 | # top_features['kld'] = -1.*pathway_kld
166 |
167 | # Random pathway rankings
168 | print("Calc Random")
169 | np.random.seed(rand_seed)
170 | top_features['rand'] = np.random.randn(top_features.shape[0])
171 |
172 | # impute or retrain
173 | def impute_benchmark(method,n_pathways=20):
174 | method_recons_errors = []
175 |
176 | # for top 10 pathways
177 | for i in range(1,1+n_pathways):
178 |
179 | # set pathways = 0.
180 | test_matrix = torch.tensor(test_data.X).float()
181 | test_matrix_embedded = autoencoder.encoder(test_matrix)
182 | for x in top_features.sort_values(method).index[:i]:
183 | index_to_zero = list(top_features.index).index(x)
184 | test_matrix_embedded[:,index_to_zero] = 0.
185 |
186 | global_recon = autoencoder.decoder(test_matrix_embedded)
187 | recons_error = F.mse_loss(global_recon, test_matrix).detach().item()
188 | method_recons_errors.append(recons_error)
189 | return method_recons_errors
190 |
191 | print("Impute L2")
192 | l2_results[rand_seed,:] = impute_benchmark('l2')
193 | print("Impute IG")
194 | ig_results[rand_seed,:] = impute_benchmark('IG')
195 | # print("Impute LR")
196 | # lr_results[rand_seed,:] = impute_benchmark('lr_score')
197 | # print("Impute KLD")
198 | # kld_results[rand_seed,:] = impute_benchmark('kld')
199 | print("Impute RAND")
200 | rand_results[rand_seed,:] = impute_benchmark('rand')
201 |
202 | # save results
203 | with open('results/intercode_kang_impute_l2.npy', 'wb') as f:
204 | np.save(f, l2_results)
205 | with open('results/intercode_kang_impute_ig.npy', 'wb') as f:
206 | np.save(f, ig_results)
207 | # with open('results/intercode_kang_impute_lr.npy', 'wb') as f:
208 | # np.save(f, lr_results)
209 | # with open('results/intercode_kang_impute_kld.npy', 'wb') as f:
210 | # np.save(f, kld_results)
211 | with open('results/intercode_kang_impute_rand.npy', 'wb') as f:
212 | np.save(f, rand_results)
213 |
214 | if __name__ == '__main__':
215 | main()
--------------------------------------------------------------------------------
/summary.py:
--------------------------------------------------------------------------------
1 | """
2 | Defines a function to plot individual feature-level importances
3 | in a summary plot.
4 | """
5 | import pandas as pd
6 | import numpy as np
7 | import matplotlib.pyplot as plt
8 | import matplotlib as mpl
9 | from scatter import _get_bounds, _color_bar, _get_shared_limits, _set_axis_config
10 | import colors
11 |
12 | def _get_jitter_array(feature_values,
13 | select_attributions):
14 | """
15 | Helper function to get jitter in a summary plot.
16 | Args:
17 | feature_values: see summary_plot
18 | select_attributions: see summary_plot
19 | """
20 | jitter_array = np.zeros(feature_values.shape)
21 | for i in range(select_attributions.shape[1]):
22 | feature_attr = select_attributions[:, i]
23 | num_samples = feature_attr.shape[0]
24 | nbins = 100
25 | quant = np.round(nbins * (feature_attr - np.min(feature_attr)) / \
26 | (np.max(feature_attr) - \
27 | np.min(feature_attr) + 1e-8))
28 | inds = np.argsort(quant + np.random.randn(num_samples) * 1e-6)
29 | layer = 0
30 | last_bin = -1
31 | jitter_values = np.zeros(num_samples)
32 | for ind in inds:
33 | if quant[ind] != last_bin:
34 | layer = 0
35 | jitter_values[ind] = np.ceil(layer / 2) * ((layer % 2) * 2 - 1)
36 | layer += 1
37 | last_bin = quant[ind]
38 | jitter_values *= 0.9 * (1.0 / np.max(jitter_values + 1))
39 | jitter_array[:, i] = jitter_values
40 | return jitter_array
41 |
42 | def _get_jitter_df(interactions, feature_values,
43 | select_attributions, attributions,
44 | interaction_feature, feature_order):
45 | """
46 | Helper function to call the jitter matrix function.
47 | """
48 | if interactions is None:
49 | jitter_array = _get_jitter_array(feature_values, select_attributions)
50 | jitter_df = pd.DataFrame(jitter_array)
51 | else:
52 | if interactions.shape == attributions.shape:
53 | select_interactions = interactions[:, feature_order]
54 | else:
55 | if interaction_feature is None:
56 | raise ValueError('Argument interaction was specified ' + \
57 | 'but argument interaction_feature was not.')
58 | select_interactions = interactions[:, feature_order, interaction_feature]
59 | jitter_df = pd.DataFrame(select_interactions)
60 | return jitter_df
61 |
62 | def summary_plot(attributions,
63 | feature_values,
64 | interactions=None,
65 | interaction_feature=None,
66 | feature_names=None,
67 | plot_top_k=None,
68 | standardize_features=True,
69 | scale_x_ind=False,
70 | scale_y_ind=False,
71 | figsize=(8, 4),
72 | dpi=150,
73 | **kwargs):
74 | """
75 | Function to draw an interactive scatter plot of
76 | attribution values. Since this is built on top
77 | of altair, this function works best when the
78 | number of points is small (< 5000).
79 | Args:
80 | attributions: A matrix of attributions.
81 | Should be of shape [batch_size, feature_dims].
82 | feature_values: A matrix of feature values.
83 | Should the same shape as the attributions.
84 | interactions: Either a matrix of the same shape as attributions representing
85 | the interaction between interaction_feature and all other features,
86 | or a matrix that can be indexed as
87 | interactions[:, :, interaction_feature].
88 | interaction_feature: A feature to use for interactions if interactions
89 | are provided as all pairwise interactions.
90 | feature_names: An optional list of length attributions.shape[1]. Each
91 | entry should be a string representing the name of a feature.
92 | plot_top_k: The number of features to plot. If none, will plot all features.
93 | This might take a while, depending on how many features you have.
94 | scale_x_ind: Set to True to scale the x axes of each plot independently.
95 | Defaults to False.
96 | scale_y_ind: Set to True to scale the y axes of each plot independently.
97 | Defaults to False.
98 | figsize: Figure size in matplotlib units. Each figure will be square.
99 | dpi: Resolution of each plot.
100 | kwargs: Passed to plt.scatter
101 | """
102 | if plot_top_k is None:
103 | plot_top_k = attributions.shape[1]
104 | mean_abs_attr = np.mean(np.abs(attributions), axis=0)
105 | max_order = np.argsort(mean_abs_attr)
106 | feature_order = max_order[::-1][:plot_top_k]
107 |
108 | if feature_names is None:
109 | feature_names = ['Feature {}'.format(i) for i in range(feature_values.shape[1])]
110 |
111 | feature_values = feature_values[:, feature_order]
112 | select_attributions = attributions[:, feature_order]
113 | feature_names = [feature_names[i] for i in feature_order]
114 |
115 | if standardize_features:
116 | standardized_feature_values = (feature_values - np.mean(feature_values,
117 | axis=0,
118 | keepdims=True))
119 | standardized_feature_values = standardized_feature_values / \
120 | (np.std(standardized_feature_values,
121 | axis=0,
122 | keepdims=True) + 1e7)
123 | else:
124 | standardized_feature_values = feature_values
125 |
126 | vmin, vmax = _get_bounds(standardized_feature_values)
127 | standardized_feature_values = np.clip(standardized_feature_values, vmin, vmax)
128 |
129 | attribution_names = ['Attribution to {}'.format(feature_names[i]) for \
130 | i in range(len(feature_names))]
131 | feature_df = pd.DataFrame(standardized_feature_values)
132 | attribution_df = pd.DataFrame(select_attributions)
133 | feature_df.columns = feature_names
134 | attribution_df.columns = attribution_names
135 |
136 | feature_df = pd.melt(feature_df, var_name='Feature', value_name='Normalized Feature Value')
137 | attribution_df = pd.melt(attribution_df, var_name='Attribution', value_name='Attribution Value')
138 | attribution_df = attribution_df.drop(columns=['Attribution'])
139 |
140 | jitter_df = _get_jitter_df(interactions, feature_values,
141 | select_attributions, attributions,
142 | interaction_feature, feature_order)
143 | jitter_df = pd.melt(jitter_df, var_name='Variable', value_name='Jitter')
144 | jitter_df = jitter_df.drop(columns=['Variable'])
145 | melted_df = pd.concat([feature_df, attribution_df, jitter_df], axis=1)
146 |
147 | if 's' not in kwargs:
148 | kwargs['s'] = 4
149 | if 'cmap' not in kwargs:
150 | kwargs['cmap'] = colors.green_gold()
151 |
152 | x_limits, y_limits = _get_shared_limits(melted_df['Attribution Value'],
153 | melted_df['Jitter'],
154 | scale_x_ind,
155 | scale_y_ind)
156 |
157 | fig, axs = plt.subplots(plot_top_k, 1, figsize=figsize, dpi=dpi)
158 | fig.subplots_adjust(left=0.2, hspace=0)
159 | for i in range(plot_top_k - 1):
160 | axis = axs[i]
161 | _set_axis_config(axis,
162 | clear_x_ticks=True,
163 | clear_y_ticks=True)
164 | trans = mpl.transforms.blended_transform_factory(axis.transData, axis.transAxes)
165 | axis.plot([0.0, 1.0], [0.5, 0.5], transform=axis.transAxes,
166 | linewidth=0.5, color='black', alpha=0.3, zorder=1)
167 | axis.plot([0.0, 0.0], [-1.0, 1.0], transform=trans, clip_on=False,
168 | linewidth=0.5, color='black', alpha=0.3, zorder=1)
169 |
170 | axis = axs[-1]
171 | _set_axis_config(axis,
172 | [0.0, 0.0, 0.0, 0.5],
173 | clear_x_ticks=False,
174 | clear_y_ticks=True)
175 | trans = mpl.transforms.blended_transform_factory(axis.transData, axis.transAxes)
176 | axis.plot([0.0, 1.0], [0.5, 0.5], transform=axis.transAxes,
177 | linewidth=0.5, color='black', alpha=0.3, zorder=1)
178 | axis.plot([0.0, 0.0], [0.0, 1.0], transform=trans,
179 | linewidth=0.5, color='black', alpha=0.3, zorder=1)
180 | axis.tick_params(length=4, labelsize=8)
181 | axis.set_xlabel('Attribution Value')
182 |
183 | for i in range(plot_top_k):
184 | axis = axs[i]
185 | selected_df = melted_df.loc[melted_df['Feature'] == feature_names[i]]
186 | trans = mpl.transforms.blended_transform_factory(axis.transAxes, axis.transAxes)
187 | axis.text(-0.02, 0.5, feature_names[i],
188 | horizontalalignment='right',
189 | verticalalignment='center',
190 | fontsize=8,
191 | transform=trans)
192 | axis.scatter(x=selected_df['Attribution Value'],
193 | y=selected_df['Jitter'],
194 | c=selected_df['Normalized Feature Value'],
195 | zorder=2,
196 | **kwargs)
197 | if x_limits is not None:
198 | axis.set_xlim(x_limits)
199 | if y_limits is not None:
200 | axis.set_ylim(y_limits)
201 |
202 | _color_bar(fig, vmin, vmax, 'Feature Value', ticks=False, label_size=8, **kwargs)
--------------------------------------------------------------------------------
/get_top_pathways.py:
--------------------------------------------------------------------------------
1 | import anndata
2 | import numpy as np
3 | import pandas as pd
4 | import torch
5 | import torch.nn.functional as F
6 | import os
7 |
8 | from utils import load_annotations
9 | from sklearn.model_selection import train_test_split
10 |
11 | from torch.utils.data import Dataset, DataLoader
12 | from datasets import RNASeqData
13 |
14 | from pathexplainer import PathExplainerTorch
15 | from sklearn.linear_model import LogisticRegression
16 | import argparse
17 |
18 |
19 | from models import pmVAEModel
20 | import mygene
21 | import os
22 | import time
23 |
24 | save_path = 'new_for_revision/new_res/'
25 |
26 | def main():
27 |
28 | ig_times = []
29 | lr_times = []
30 | train_times = []
31 |
32 | # get dataset, removal method
33 | parser = argparse.ArgumentParser()
34 | parser.add_argument('dataset', action="store", default='kang')
35 | parser.add_argument('which_gpu', action="store", default='0')
36 | parser.add_argument('gene_prog', action="store", default='Ctrl')
37 |
38 | args = parser.parse_args()
39 |
40 | os.environ["CUDA_VISIBLE_DEVICES"]=args.which_gpu
41 | dataset =args.dataset
42 |
43 |
44 | # load data
45 |
46 | # load datlinger data
47 | if args.dataset == 'datlinger':
48 |
49 | data = anndata.read('data/datlinger_pp.h5ad')
50 | symbols = data.var_names
51 |
52 |
53 | # load kang data
54 | if args.dataset == 'kang':
55 |
56 | data = anndata.read('data/kang_count.h5ad')
57 | symbols = data.var_names
58 |
59 |
60 | # load mcfarland data
61 | if args.dataset == 'mcfarland':
62 |
63 | data = anndata.read('/projects/leelab/data/single-cell/mcfarland_2020_Idasanutlin/preprocessed/adata_top_2000_genes_tc.h5ad')
64 | data = data[data.obs['condition'] == 'Idasanutlin'].copy()
65 | symbols = data.var_names
66 |
67 |
68 | # load zheng data
69 | if args.dataset == 'zheng':
70 | data = anndata.read('/projects/leelab/data/single-cell/zheng_2017/preprocessed/adata_top_2000_genes.h5ad')
71 |
72 | # convert ENSG IDs to gene symbols:
73 |
74 | mg = mygene.MyGeneInfo()
75 | geneList = data.var_names
76 | geneSyms = mg.querymany(geneList , scopes='ensembl.gene', fields='symbol', species='human', returnall=True)
77 |
78 | symbols = []
79 | not_in = []
80 | is_in = []
81 | for k in range(2000):
82 | if ('symbol' in geneSyms['out'][k]):
83 | symbols += [geneSyms['out'][k]['symbol']]
84 | is_in += [geneSyms['out'][k]['query']]
85 | else:
86 | not_in += [geneSyms['out'][k]['query']]
87 | symbols = pd.Index(symbols)
88 |
89 | symbols = pd.Index(set(symbols.to_numpy()))
90 |
91 | # filter out post transplant
92 | data = data[data.obs['condition'] != 'post_transplant'][:,is_in].copy()
93 |
94 |
95 | # load haber data
96 | if args.dataset == 'haber':
97 |
98 | data = anndata.read('/projects/leelab/data/single-cell/haber_2017/preprocessed/adata_top_2000_genes.h5ad')
99 |
100 | # filter out H poly
101 | data = data[data.obs['condition'] != 'Salmonella'].copy()
102 |
103 | symbols = data.var_names
104 |
105 |
106 | # load grubman data
107 | if args.dataset == 'grubman':
108 |
109 | data = anndata.read('/projects/leelab/data/single-cell/grubman_2019/preprocessed/adata_top_2000_genes.h5ad')
110 |
111 | symbols = data.var_names
112 |
113 |
114 | if args.dataset == 'norman':
115 |
116 | data = anndata.read('/projects/leelab/data/single-cell/norman_2019/preprocessed/adata_top_2000_genes_tc.h5ad')
117 |
118 | if args.gene_prog == 'erythroid':
119 | data = data[(data.obs['gene_program'] == 'Ctrl') | (data.obs['gene_program'] == 'Erythroid')].copy()
120 |
121 | if args.gene_prog == 'granulocyte-apoptosis':
122 | data = data[(data.obs['gene_program'] == 'Ctrl') | (data.obs['gene_program'] == 'Granulocyte/apoptosis')].copy()
123 |
124 | if args.gene_prog == 'megakaryocyte':
125 | data = data[(data.obs['gene_program'] == 'Ctrl') | (data.obs['gene_program'] == 'Megakaryocyte')].copy()
126 |
127 | if args.gene_prog == 'pro-growth':
128 | data = data[(data.obs['gene_program'] == 'Ctrl') | (data.obs['gene_program'] == 'Pro-growth')].copy()
129 |
130 | test_df = pd.DataFrame(index=data.var['gene_name'])
131 | symbols = test_df.index
132 |
133 |
134 |
135 | number_of_replicates = 10
136 | first_run = True
137 |
138 | # for 10 experimental replicates
139 | for rand_seed in range(number_of_replicates):
140 |
141 | print("replicate number " + str(rand_seed))
142 |
143 | # split data
144 |
145 | train_data, test_data = train_test_split(data,
146 | test_size=0.25,
147 | shuffle=True,
148 | random_state=rand_seed)
149 | tr_data, val_data = train_test_split(train_data,
150 | test_size=0.25,
151 | shuffle=True,
152 | random_state=rand_seed)
153 |
154 | tr_ds = RNASeqData(np.array(tr_data.X))
155 | val_ds = RNASeqData(np.array(val_data.X))
156 |
157 | # load annotations
158 | membership_mask = load_annotations('data/c2.cp.reactome.v7.4.symbols.gmt',
159 | symbols,
160 | min_genes=13
161 | ).astype(bool).T
162 |
163 | ##
164 | ## train model
165 | ##
166 |
167 | # initialize base model
168 | basePMVAE = pmVAEModel(membership_mask.values,
169 | [12],
170 | 1,
171 | beta=1e-05,
172 | terms=membership_mask.index,
173 | add_auxiliary_module=True
174 | )
175 |
176 |
177 | if first_run: # first run
178 |
179 | top_ig = pd.DataFrame(index=basePMVAE.latent_space_names())
180 | top_lr = pd.DataFrame(index=basePMVAE.latent_space_names())
181 | first_run = False
182 |
183 | # train
184 |
185 | start_train = time.time()
186 | basePMVAE.train(tr_ds, val_ds,
187 | checkpoint_path='saved_models/' + dataset + '_' + args.gene_prog + '.pkl',
188 | max_epochs=100)
189 |
190 | end_train = time.time()
191 | train_times.append(end_train - start_train)
192 |
193 | basePMVAE.set_gpu(False)
194 |
195 |
196 | # IG pathway rankings
197 | print("Calc IG score")
198 |
199 | start_ig = time.time()
200 |
201 | def model_loss_wrapper(z):
202 | module_outputs = basePMVAE.model.decoder_net(z)
203 | global_recon = basePMVAE.model.merge(module_outputs)
204 | return F.mse_loss(global_recon, ground_truth, reduction='none').mean(1).view(-1,1)
205 |
206 | ground_truth = torch.tensor(np.array(val_data.X)).float()
207 | outs = basePMVAE.model(ground_truth)
208 |
209 | input_data = outs.z
210 | baseline_data = torch.zeros(outs.z.shape[1])
211 | baseline_data.requires_grad = True
212 |
213 | explainer = PathExplainerTorch(model_loss_wrapper)
214 | attributions = explainer.attributions(input_data,
215 | baseline=baseline_data,
216 | num_samples=200,
217 | use_expectation=False)
218 |
219 | np_attribs = attributions.detach().numpy()
220 | top_ig[rand_seed] = np_attribs.mean(0)
221 |
222 | end_ig = time.time()
223 | ig_times.append(end_ig - start_ig)
224 |
225 | # so far!
226 | top_ig.to_csv(save_path + dataset + '_ig.csv', index=False)
227 |
228 |
229 | # LR pathway rankings
230 | print("Calc LR score")
231 | start_lr = time.time()
232 |
233 |
234 | if args.dataset == 'kang' or args.dataset == 'datlinger':
235 | y_tr = tr_data.obs['condition']
236 | y_val = val_data.obs['condition']
237 |
238 | train_labels = (y_tr == 'stimulated').values
239 | val_labels = (y_val == 'stimulated').values
240 |
241 |
242 | if args.dataset == 'mcfarland':
243 |
244 | y_tr = tr_data.obs['TP53_mutation_status']
245 | y_val = val_data.obs['TP53_mutation_status']
246 |
247 | train_labels = (y_tr == 'Wild Type').values
248 | val_labels = (y_val == 'Wild Type').values
249 |
250 |
251 | if args.dataset == 'haber':
252 | y_tr = tr_data.obs['condition']
253 | y_val = val_data.obs['condition']
254 |
255 | train_labels = (y_tr == 'Control').values
256 | val_labels = (y_val == 'Control').values
257 |
258 | if args.dataset == 'grubman':
259 | y_tr = tr_data.obs['batchCond']
260 | y_val = val_data.obs['batchCond']
261 |
262 | train_labels = (y_tr == 'ct').values
263 | val_labels = (y_val == 'ct').values
264 |
265 |
266 | if args.dataset == 'zheng':
267 | y_tr = tr_data.obs['condition']
268 | y_val = val_data.obs['condition']
269 |
270 | train_labels = (y_tr == 'healthy').values
271 | val_labels = (y_val == 'healthy').values
272 |
273 |
274 | if args.dataset == 'norman':
275 | y_tr = tr_data.obs['gene_program']
276 | y_val = val_data.obs['gene_program']
277 |
278 | train_labels = (y_tr == 'Ctrl').values
279 | val_labels = (y_val == 'Ctrl').values
280 |
281 | train_embedding = basePMVAE.model(torch.tensor(tr_data.X).float()).z.detach().numpy()
282 | val_embedding = basePMVAE.model(torch.tensor(val_data.X).float()).z.detach().numpy()
283 |
284 | lr_scores = []
285 | for pathway in range(train_embedding.shape[1]):
286 | clf = LogisticRegression(random_state=0).fit(train_embedding[:,pathway].reshape(-1,1), train_labels)
287 | lr_scores.append(clf.score(val_embedding[:,pathway].reshape(-1,1), val_labels))
288 |
289 |
290 | top_lr[rand_seed] = lr_scores
291 | top_lr[rand_seed] = -1.*top_lr[rand_seed]
292 |
293 | end_lr = time.time()
294 | lr_times.append(end_lr - start_lr)
295 |
296 |
297 | # so far!
298 | top_lr.to_csv(save_path + dataset + '_lr.csv', index=False)
299 |
300 | times = pd.DataFrame()
301 | times['ig_times'] = ig_times
302 | times['lr_times'] = lr_times
303 | times['train_times'] = train_times
304 |
305 | times.to_csv(save_path + args.dataset + '_times.csv')
306 |
307 | if __name__ == '__main__':
308 | main()
--------------------------------------------------------------------------------
/pathexplainer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import functools
3 | import operator
4 | import torch
5 | from torch.autograd import grad
6 | import numpy as np
7 | from tqdm import *
8 |
9 | def gather_nd(params, indices):
10 | """
11 | Args:
12 | params: Tensor to index
13 | indices: k-dimension tensor of integers.
14 | Returns:
15 | output: 1-dimensional tensor of elements of ``params``, where
16 | output[i] = params[i][indices[i]]
17 |
18 | params indices output
19 | 1 2 1 1 4
20 | 3 4 2 0 ----> 5
21 | 5 6 0 0 1
22 | """
23 | max_value = functools.reduce(operator.mul, list(params.size())) - 1
24 | indices = indices.t().long()
25 | ndim = indices.size(0)
26 | idx = torch.zeros_like(indices[0]).long()
27 | m = 1
28 |
29 | for i in range(ndim)[::-1]:
30 | idx += indices[i]*m
31 | m *= params.size(i)
32 |
33 | idx[idx < 0] = 0
34 | idx[idx > max_value] = 0
35 | return torch.take(params, idx)
36 |
37 | class PathExplainerTorch(object):
38 | def __init__(self, model):
39 | self.model = model
40 | return
41 |
42 | def _get_ref_tensor(self,baseline,batch_size,num_samples):
43 | number_to_draw = num_samples * batch_size
44 | replace = baseline.shape[0] < number_to_draw
45 | sample_indices = np.random.choice(baseline.shape[0],
46 | size=number_to_draw,
47 | replace=replace)
48 | ref_tensor = baseline[sample_indices,:]
49 |
50 | return ref_tensor
51 |
52 | def _get_samples_input(self, input_tensor, baseline,
53 | num_samples, use_expectation):
54 | '''
55 | calculate interpolation points
56 | Args:
57 | input_tensor: Tensor of shape (batch, ...), where ... indicates
58 | the input dimensions.
59 | reference_tensor: A tensor of shape (batch, k, ...) where ...
60 | indicates dimensions, and k represents the number of background
61 | reference samples to draw per input in the batch.
62 | Returns:
63 | samples_input: A tensor of shape (batch, k, ...) with the
64 | interpolated points between input and ref.
65 | samples_delta: A tensor of shape (batch, 1, ...) with the
66 | difference between input and reference for each sample
67 | '''
68 | input_dims = list(input_tensor.size())[1:]
69 | num_input_dims = len(input_dims)
70 | batch_size = input_tensor.size()[0]
71 |
72 | if use_expectation:
73 | reference_tensor = self._get_ref_tensor(baseline,batch_size,num_samples)
74 | shape = reference_tensor.shape
75 | reference_tensor = reference_tensor.view(
76 | batch_size,
77 | num_samples,
78 | *(shape[1:]))
79 |
80 | # Grab a [batch_size, k]-sized interpolation sample
81 | t_tensor = torch.FloatTensor(batch_size, num_samples).uniform_(0,1).to(reference_tensor.device)
82 | shape = [batch_size, num_samples] + [1] * num_input_dims
83 | interp_coef = t_tensor.view(*shape)
84 |
85 | # Evaluate the end points
86 | end_point_ref = (1.0 - interp_coef) * reference_tensor
87 |
88 | input_expand_mult = input_tensor.unsqueeze(1)
89 | end_point_input = interp_coef * input_expand_mult
90 |
91 | # Affine Combine
92 | samples_input = end_point_input + end_point_ref
93 |
94 | else:
95 | batch_size = input_tensor.size()[0]
96 | input_expand = input_tensor.unsqueeze(1)
97 | reps = np.ones(len(baseline.shape)).astype(int)
98 | reps[0] = batch_size
99 | reference_tensor = baseline.repeat(list(reps)).unsqueeze(1)
100 | # reference_tensor = torch.as_tensor(sampled_baseline).unsqueeze(1).to(baseline.device)
101 | scaled_inputs = [reference_tensor + (float(i)/num_samples)*(input_expand - reference_tensor) \
102 | for i in range(0,num_samples+1)]
103 | samples_input = torch.cat(scaled_inputs,dim=1)
104 |
105 | samples_delta = self._get_samples_delta(input_tensor, reference_tensor)
106 | samples_delta = samples_delta.to(samples_input.device)
107 |
108 | return samples_input, samples_delta
109 |
110 | def _get_samples_delta(self, input_tensor, reference_tensor):
111 | input_expand_mult = input_tensor.unsqueeze(1)
112 | sd = input_expand_mult - reference_tensor
113 | return sd
114 |
115 | def _get_grads(self, samples_input, output_indices=None):
116 |
117 | grad_tensor = torch.zeros(samples_input.shape).float().to(samples_input.device)
118 |
119 | k_ = samples_input.shape[1]
120 |
121 | for i in range(k_):
122 | particular_slice = samples_input[:,i]
123 | batch_output = self.model(particular_slice)
124 | # should check that users pass in sparse labels
125 | # Only look at the user-specified label
126 | if batch_output.size(1) > 1:
127 | sample_indices = torch.arange(0,batch_output.size(0)).to(samples_input.device)
128 | indices_tensor = torch.cat([
129 | sample_indices.unsqueeze(1),
130 | output_indices.unsqueeze(1)], dim=1)
131 | batch_output = gather_nd(batch_output, indices_tensor)
132 |
133 | model_grads = grad(
134 | outputs=batch_output,
135 | inputs=particular_slice,
136 | grad_outputs=torch.ones_like(batch_output).to(samples_input.device),
137 | create_graph=True)
138 | grad_tensor[:,i,:] = model_grads[0]
139 | return grad_tensor
140 |
141 | def attributions(self, input_tensor, baseline,
142 | num_samples = 50, use_expectation=True,
143 | output_indices=None):
144 | """
145 | Calculate either Expected or Integrated Gradients approximation of
146 | Aumann-Shapley values for the sample ``input_tensor``.
147 | Args:
148 | model (torch.nn.Module): Pytorch neural network model for which the
149 | output should be explained.
150 | input_tensor (torch.Tensor): Pytorch tensor representing the input
151 | to be explained.
152 | baseline (torch.Tensor): Pytorch tensor representing the baseline.
153 | If use_expectation is true, then baseline should be shape
154 | (num_refs, ...) where ... indicates the dimensionality
155 | of the input. Otherwise, baseline should be shape (1, ...).
156 | output_indices (optional, default=None): For multi-class prediction
157 | """
158 | equal_dims = baseline.shape[1:] == input_tensor.shape[1:]
159 | almost_equal_dims = baseline.shape == input_tensor.shape[1:]
160 |
161 | dev = input_tensor.device
162 | baseline = baseline.to(dev)
163 |
164 | input_tensor.requires_grad_ = True
165 |
166 | if use_expectation and not equal_dims:
167 | raise ValueError('baseline should be shape (num_refs, ...) \
168 | where ... indicates the dimensionality \
169 | of the input')
170 |
171 | if not use_expectation and baseline.shape[0] != 1:
172 | if almost_equal_dims:
173 | baseline = baseline.unsqueeze(0)
174 | else:
175 | raise ValueError('baseline should be shape (...) \
176 | where ... indicates the dimensionality \
177 | of the input')
178 |
179 | samples_input, samples_delta = self._get_samples_input(input_tensor, baseline,
180 | num_samples, use_expectation)
181 | grad_tensor = self._get_grads(samples_input, output_indices)
182 | mult_grads = samples_delta * grad_tensor
183 | attributions = mult_grads.mean(1)
184 |
185 | return attributions
186 |
187 | def interactions(self, input_tensor, baseline,
188 | num_samples=50, use_expectation=True,
189 | output_indices=None, interaction_index=None,
190 | verbose=True):
191 | """
192 | samples_input: A tensor of shape (batch, k, features)
193 | ig_tensor: also size (batch, k, features), but contains IG values
194 |
195 | """
196 |
197 | if len(input_tensor.shape) != 2:
198 | raise ValueError('PyTorch Explainer only supports ' + \
199 | 'interaction for 2D input tensors!')
200 |
201 | equal_dims = baseline.shape[1:] == input_tensor.shape[1:]
202 | almost_equal_dims = baseline.shape == input_tensor.shape[1:]
203 |
204 | if use_expectation and not equal_dims:
205 | raise ValueError('baseline should be shape (num_refs, ...) \
206 | where ... indicates the dimensionality \
207 | of the input')
208 |
209 | if not use_expectation and baseline.shape[0] != 1:
210 | if almost_equal_dims:
211 | baseline = baseline.unsqueeze(0)
212 | else:
213 | raise ValueError('baseline should be shape (...) \
214 | where ... indicates the dimensionality \
215 | of the input')
216 |
217 | inner_loop_nsamples = int(round(np.sqrt(num_samples)))
218 |
219 | samples_input, samples_delta = self._get_samples_input(input_tensor, baseline,
220 | inner_loop_nsamples, use_expectation)
221 |
222 | if interaction_index is not None:
223 | interaction_mult_tensor = torch.zeros([input_tensor.size(0), samples_input.size(1), input_tensor.size(1)])
224 | else:
225 | interaction_mult_tensor = torch.zeros([input_tensor.size(0), samples_input.size(1),
226 | input_tensor.size(1), input_tensor.size(1)])
227 |
228 | ig_tensor = torch.zeros(samples_input.shape).float()
229 |
230 | if use_expectation:
231 | loop_num = inner_loop_nsamples
232 | else:
233 | loop_num = inner_loop_nsamples + 1
234 |
235 | if verbose:
236 | iterable = tqdm(range(loop_num))
237 | else:
238 | iterable = range(loop_num)
239 |
240 | for i in iterable:
241 |
242 | particular_slice = samples_input[:,i]
243 | ig_tensor[:,i,:] = self.attributions(particular_slice, baseline,
244 | num_samples=inner_loop_nsamples, use_expectation=use_expectation,
245 | output_indices=output_indices)
246 |
247 | if interaction_index is not None:
248 | second_grads = grad(
249 | outputs=ig_tensor[:,i,interaction_index],
250 | inputs=particular_slice,
251 | grad_outputs=torch.ones_like(ig_tensor[:,i,interaction_index]),
252 | create_graph=True)[0]
253 | interaction_mult_tensor[:,i,:] = second_grads
254 |
255 | else:
256 | for feature in range(input_tensor.size(1)):
257 | second_grads = grad(
258 | outputs=ig_tensor[:,i,feature],
259 | inputs=particular_slice,
260 | grad_outputs=torch.ones_like(ig_tensor[:,i,feature]),
261 | create_graph=True)[0]
262 | interaction_mult_tensor[:,i,feature,:] = second_grads
263 |
264 | interaction_mult_tensor = interaction_mult_tensor.to(samples_delta.device)
265 | if interaction_index is not None:
266 | interaction_tensor = interaction_mult_tensor * samples_delta
267 | else:
268 | interaction_tensor = interaction_mult_tensor * samples_delta.unsqueeze(2)
269 | interactions = interaction_tensor.mean(1)
270 |
271 | return interactions
--------------------------------------------------------------------------------
/standard_VAE_impute_benchmark.py:
--------------------------------------------------------------------------------
1 | # impute benchmark on standard VAE
2 |
3 | import anndata
4 | import numpy as np
5 | import pandas as pd
6 | import torch
7 | import torch.nn.functional as F
8 | import os
9 | import mygene
10 |
11 | from utils import load_annotations
12 | from sklearn.model_selection import train_test_split
13 |
14 | from torch.utils.data import Dataset, DataLoader
15 | from datasets import RNASeqData
16 |
17 | import argparse
18 |
19 | from pathexplainer import PathExplainerTorch
20 | from sklearn.linear_model import LogisticRegression
21 |
22 | from models import VAEModel
23 | import time
24 |
25 | import os
26 |
27 | save_path = 'new_for_revision/new_res/dense/'
28 |
29 | def main():
30 |
31 | # get dataset, removal method
32 | parser = argparse.ArgumentParser()
33 | parser.add_argument('dataset', action="store", default='kang')
34 | parser.add_argument('removal', action="store", default='impute')
35 | parser.add_argument('which_gpu', action="store", default='0')
36 |
37 | args = parser.parse_args()
38 |
39 | os.environ["CUDA_VISIBLE_DEVICES"]=args.which_gpu
40 |
41 |
42 | # load datlinger data
43 | if args.dataset == 'datlinger':
44 |
45 | data = anndata.read('data/datlinger_pp.h5ad')
46 | symbols = data.var_names
47 |
48 |
49 | # load kang data
50 | if args.dataset == 'kang':
51 |
52 | data = anndata.read('data/kang_count.h5ad')
53 | symbols = data.var_names
54 |
55 |
56 | # load mcfarland data
57 | if args.dataset == 'mcfarland':
58 |
59 | data = anndata.read('/projects/leelab/data/single-cell/mcfarland_2020_Idasanutlin/preprocessed/adata_top_2000_genes_tc.h5ad')
60 | data = data[data.obs['condition'] == 'Idasanutlin'].copy()
61 | symbols = data.var_names
62 |
63 |
64 | # load zheng data
65 | if args.dataset == 'zheng':
66 | data = anndata.read('/projects/leelab/data/single-cell/zheng_2017/preprocessed/adata_top_2000_genes.h5ad')
67 |
68 | # convert ENSG IDs to gene symbols:
69 |
70 | mg = mygene.MyGeneInfo()
71 | geneList = data.var_names
72 | geneSyms = mg.querymany(geneList , scopes='ensembl.gene', fields='symbol', species='human', returnall=True)
73 |
74 | symbols = []
75 | not_in = []
76 | is_in = []
77 | for k in range(2000):
78 | if ('symbol' in geneSyms['out'][k]):
79 | symbols += [geneSyms['out'][k]['symbol']]
80 | is_in += [geneSyms['out'][k]['query']]
81 | else:
82 | not_in += [geneSyms['out'][k]['query']]
83 | symbols = pd.Index(symbols)
84 |
85 | symbols = pd.Index(set(symbols.to_numpy()))
86 |
87 | # filter out post transplant
88 | data = data[data.obs['condition'] != 'post_transplant'][:,is_in].copy()
89 |
90 |
91 | # load haber data
92 | if args.dataset == 'haber':
93 |
94 | data = anndata.read('/projects/leelab/data/single-cell/haber_2017/preprocessed/adata_top_2000_genes.h5ad')
95 |
96 | # filter out H poly
97 | data = data[data.obs['condition'] != 'Salmonella'].copy()
98 |
99 | symbols = data.var_names
100 |
101 |
102 |
103 | # load grubman data
104 | if args.dataset == 'grubman':
105 |
106 | data = anndata.read('/projects/leelab/data/single-cell/grubman_2019/preprocessed/adata_top_2000_genes.h5ad')
107 |
108 | symbols = data.var_names
109 |
110 | # for all datasets
111 | data.varm['I'] = load_annotations(
112 | 'data/c2.cp.reactome.v7.4.symbols.gmt',
113 | symbols,
114 | min_genes=33
115 | ).values
116 | data.uns['terms'] = list(load_annotations(
117 | 'data/c2.cp.reactome.v7.4.symbols.gmt',
118 | symbols,
119 | min_genes=33
120 | ).columns)
121 |
122 | number_of_pathways = 20
123 | number_of_replicates = 10
124 |
125 | logvar_results = np.zeros((number_of_replicates,number_of_pathways))
126 | ig_results = np.zeros((number_of_replicates,number_of_pathways))
127 | lr_results = np.zeros((number_of_replicates,number_of_pathways))
128 | kld_results = np.zeros((number_of_replicates,number_of_pathways))
129 | rand_results = np.zeros((number_of_replicates,number_of_pathways))
130 |
131 | logvar_times = []
132 | ig_times = []
133 | lr_times = []
134 | kld_times = []
135 | rand_times = []
136 |
137 | # for 10 experimental replicates
138 | for rand_seed in range(number_of_replicates):
139 |
140 | print("replicate number " + str(rand_seed))
141 |
142 | # split data
143 |
144 | train_data, test_data = train_test_split(data,
145 | test_size=0.25,
146 | shuffle=True,
147 | random_state=rand_seed)
148 | tr_data, val_data = train_test_split(train_data,
149 | test_size=0.25,
150 | shuffle=True,
151 | random_state=rand_seed)
152 |
153 | tr_ds = RNASeqData(np.array(tr_data.X))
154 | val_ds = RNASeqData(np.array(val_data.X))
155 |
156 | # load annotations
157 |
158 |
159 | membership_mask = load_annotations('data/c2.cp.reactome.v7.4.symbols.gmt',
160 | symbols,
161 | min_genes=13
162 |
163 | ##
164 | ## train base model
165 | ##
166 |
167 |
168 | # initialize base model
169 | basePMVAE = VAEModel(n_features=tr_data.X.shape[1],
170 | hidden_layers=[12*n_pathways, n_pathways],
171 | beta=1e-05,
172 | add_auxiliary_module=False
173 | )
174 |
175 |
176 | print(basePMVAE.model)
177 |
178 | # train
179 | basePMVAE.train(tr_ds, val_ds,
180 | checkpoint_path='saved_models/dense/'+args.dataset + '_' + args.removal +'_baseModel.pkl',
181 | max_epochs=100)
182 |
183 | basePMVAE.set_gpu(False)
184 |
185 | ##
186 | ## get pathway rankings
187 | ##
188 | top_features = pd.DataFrame(index=data.uns['terms'])
189 |
190 | ## get max val logvar
191 |
192 | print("Calc max val score")
193 |
194 | ground_truth = torch.tensor(np.array(val_data.X)).float()
195 | outs = basePMVAE.model(ground_truth)
196 |
197 | start_logvar= time.time()
198 |
199 | top_features['logvar'] = -1.*outs.logvar.mean(0).detach().numpy()
200 |
201 | end_logvar= time.time()
202 | logvar_times.append(end_logvar-start_logvar)
203 |
204 | # IG pathway rankings
205 | print("Calc IG score")
206 | start_ig = time.time()
207 |
208 | def model_loss_wrapper(z):
209 | module_outputs = basePMVAE.model.decoder_net(z)
210 |
211 | global_recon = module_outputs
212 | #global_recon = basePMVAE.model.merge(module_outputs)
213 |
214 | return F.mse_loss(global_recon, ground_truth, reduction='none').mean(1).view(-1,1)
215 |
216 | input_data = outs.z
217 | baseline_data = torch.zeros(outs.z.shape[1])
218 | baseline_data.requires_grad = True
219 |
220 | explainer = PathExplainerTorch(model_loss_wrapper)
221 | attributions = explainer.attributions(input_data,
222 | baseline=baseline_data,
223 | num_samples=200, #200
224 | use_expectation=False)
225 |
226 | np_attribs = attributions.detach().numpy()
227 | top_features['IG'] = np_attribs.mean(0)
228 |
229 | end_ig = time.time()
230 | ig_times.append(end_ig - start_ig)
231 |
232 |
233 | # LR pathway rankings
234 | print("Calc LR score")
235 | start_lr = time.time()
236 |
237 | if args.dataset == 'kang':
238 | print('here')
239 | y_tr = tr_data.obs['condition']
240 | y_val = val_data.obs['condition']
241 |
242 | train_labels = (y_tr == b'stimulated').values
243 | val_labels = (y_val == b'stimulated').values
244 |
245 | print(train_labels.shape)
246 | print(train_labels.sum())
247 |
248 | print(val_labels.shape)
249 | print(val_labels.sum())
250 |
251 | print(tr_data.obs['condition'])
252 |
253 |
254 | if args.dataset == 'datlinger':
255 | y_tr = tr_data.obs['condition']
256 | y_val = val_data.obs['condition']
257 |
258 | train_labels = (y_tr == 'stimulated').values
259 | val_labels = (y_val == 'stimulated').values
260 |
261 |
262 | if args.dataset == 'mcfarland':
263 |
264 | y_tr = tr_data.obs['TP53_mutation_status']
265 | y_val = val_data.obs['TP53_mutation_status']
266 |
267 | train_labels = (y_tr == 'Wild Type').values
268 | val_labels = (y_val == 'Wild Type').values
269 |
270 |
271 | if args.dataset == 'haber':
272 | y_tr = tr_data.obs['condition']
273 | y_val = val_data.obs['condition']
274 |
275 | train_labels = (y_tr == 'Control').values
276 | val_labels = (y_val == 'Control').values
277 |
278 | if args.dataset == 'grubman':
279 | y_tr = tr_data.obs['batchCond']
280 | y_val = val_data.obs['batchCond']
281 |
282 | train_labels = (y_tr == 'ct').values
283 | val_labels = (y_val == 'ct').values
284 |
285 |
286 | if args.dataset == 'zheng':
287 | y_tr = tr_data.obs['condition']
288 | y_val = val_data.obs['condition']
289 |
290 | train_labels = (y_tr == 'healthy').values
291 | val_labels = (y_val == 'healthy').values
292 |
293 | train_embedding = basePMVAE.model(torch.tensor(tr_data.X).float()).z.detach().numpy()
294 | val_embedding = basePMVAE.model(torch.tensor(val_data.X).float()).z.detach().numpy()
295 |
296 | lr_scores = []
297 | for pathway in range(train_embedding.shape[1]):
298 | clf = LogisticRegression(random_state=0).fit(train_embedding[:,pathway].reshape(-1,1), train_labels)
299 | lr_scores.append(clf.score(val_embedding[:,pathway].reshape(-1,1), val_labels))
300 |
301 | top_features['lr_score'] = lr_scores
302 | top_features['lr_score'] = -1.*top_features['lr_score']
303 |
304 | end_lr = time.time()
305 | lr_times.append(end_lr - start_lr)
306 |
307 |
308 | # KLD pathway rankings
309 | print("Calc KLD")
310 | start_kld = time.time()
311 |
312 | pathway_kld = (-0.5 * (1 + outs.logvar - outs.mu.pow(2) - outs.logvar.exp()).mean(0)).detach().numpy()
313 | top_features['kld'] = -1.*pathway_kld
314 |
315 | end_kld = time.time()
316 | kld_times.append(end_kld - start_kld)
317 |
318 | # Random pathway rankings
319 | print("Calc Random")
320 | np.random.seed(rand_seed)
321 | top_features['rand'] = np.random.randn(top_features.shape[0])
322 |
323 |
324 | times = pd.DataFrame()
325 | times['logvar_times'] = logvar_times
326 | times['ig_times'] = ig_times
327 | times['lr_times'] = lr_times
328 | times['kld_times'] = kld_times
329 |
330 | times.to_csv(save_path + args.dataset + '_times.csv')
331 |
332 |
333 | # impute
334 | def impute_benchmark(method,n_pathways=20):
335 | method_recons_errors = []
336 |
337 | # for top 20 pathways
338 | for i in range(1,1+n_pathways):
339 |
340 | # set pathways = 0.
341 | test_matrix = torch.tensor(test_data.X).float()
342 | test_matrix_embedded = basePMVAE.model(test_matrix).z
343 | for x in top_features.sort_values(method).index[:i]:
344 | index_to_zero = list(top_features.index).index(x)
345 | test_matrix_embedded[:,index_to_zero] = 0.
346 |
347 | module_outputs = basePMVAE.model.decoder_net(test_matrix_embedded)
348 |
349 | global_recon = module_outputs
350 |
351 | recons_error = F.mse_loss(global_recon, test_matrix).detach().item()
352 | method_recons_errors.append(recons_error)
353 | return method_recons_errors
354 |
355 | # run impute
356 | if args.removal == "impute":
357 | print("Impute Logvar")
358 | logvar_results[rand_seed,:] = impute_benchmark('logvar')
359 | print("Impute IG")
360 | ig_results[rand_seed,:] = impute_benchmark('IG')
361 | print("Impute LR")
362 | lr_results[rand_seed,:] = impute_benchmark('lr_score')
363 | print("Impute KLD")
364 | kld_results[rand_seed,:] = impute_benchmark('kld')
365 | print("Impute RAND")
366 | rand_results[rand_seed,:] = impute_benchmark('rand')
367 |
368 |
369 | # save results every iteration so that if it crashes
370 | # there's at least some progress
371 | with open('{}/{}_{}_logvar.npy'.format(save_path, args.dataset, args.removal), 'wb') as f:
372 | np.save(f, logvar_results)
373 | with open('{}/{}_{}_ig.npy'.format(save_path, args.dataset, args.removal), 'wb') as f:
374 | np.save(f, ig_results)
375 | with open('{}/{}_{}_lr.npy'.format(save_path, args.dataset, args.removal), 'wb') as f:
376 | np.save(f, lr_results)
377 | with open('{}/{}_{}_kld.npy'.format(save_path, args.dataset, args.removal), 'wb') as f:
378 | np.save(f, kld_results)
379 | with open('{}/{}_{}_rand.npy'.format(save_path, args.dataset, args.removal), 'wb') as f:
380 | np.save(f, rand_results)
381 |
382 | if __name__ == '__main__':
383 | main()
384 |
--------------------------------------------------------------------------------
/benchmark_pmvae.py:
--------------------------------------------------------------------------------
1 | import anndata
2 | import numpy as np
3 | import pandas as pd
4 | import torch
5 | import torch.nn.functional as F
6 | import os
7 | import mygene
8 |
9 | from utils import load_annotations
10 | from sklearn.model_selection import train_test_split
11 |
12 | from torch.utils.data import Dataset, DataLoader
13 | from datasets import RNASeqData
14 |
15 | import argparse
16 |
17 | from pathexplainer import PathExplainerTorch
18 | from sklearn.linear_model import LogisticRegression
19 |
20 | from models import pmVAEModel
21 | import time
22 |
23 | import os
24 |
25 | save_path = 'new_for_revision/new_res/'
26 |
27 | def main():
28 |
29 | # get dataset, removal method
30 | parser = argparse.ArgumentParser()
31 | parser.add_argument('dataset', action="store", default='kang')
32 | parser.add_argument('removal', action="store", default='impute')
33 | parser.add_argument('which_gpu', action="store", default='0')
34 |
35 | args = parser.parse_args()
36 |
37 | os.environ["CUDA_VISIBLE_DEVICES"]=args.which_gpu
38 |
39 | # load datlinger data
40 | if args.dataset == 'datlinger':
41 |
42 | data = anndata.read('data/datlinger_pp.h5ad')
43 | symbols = data.var_names
44 |
45 | # load norman data
46 | if args.dataset == 'norman':
47 | data = anndata.read('/projects/leelab/data/single-cell/norman_2019/preprocessed/adata_top_2000_genes_tc.h5ad')
48 | data = data[(data.obs['gene_program'] == 'Ctrl') | (data.obs['gene_program'] == 'Granulocyte/apoptosis')].copy()
49 |
50 | test_df = pd.DataFrame(index=data.var['gene_name'])
51 | symbols = test_df.index
52 |
53 | # load kang data
54 | if args.dataset == 'kang':
55 |
56 | data = anndata.read('data/kang_count.h5ad')
57 | symbols = data.var_names
58 |
59 |
60 | # load mcfarland data
61 | if args.dataset == 'mcfarland':
62 |
63 | data = anndata.read('/projects/leelab/data/single-cell/mcfarland_2020_Idasanutlin/preprocessed/adata_top_2000_genes_tc.h5ad')
64 | data = data[data.obs['condition'] == 'Idasanutlin'].copy()
65 | symbols = data.var_names
66 |
67 | # load zheng data
68 | if args.dataset == 'zheng':
69 | data = anndata.read('/projects/leelab/data/single-cell/zheng_2017/preprocessed/adata_top_2000_genes.h5ad')
70 |
71 | # convert ENSG IDs to gene symbols:
72 |
73 | mg = mygene.MyGeneInfo()
74 | geneList = data.var_names
75 | geneSyms = mg.querymany(geneList , scopes='ensembl.gene', fields='symbol', species='human', returnall=True)
76 |
77 | symbols = []
78 | not_in = []
79 | is_in = []
80 | for k in range(2000):
81 | if ('symbol' in geneSyms['out'][k]):
82 | symbols += [geneSyms['out'][k]['symbol']]
83 | is_in += [geneSyms['out'][k]['query']]
84 | else:
85 | not_in += [geneSyms['out'][k]['query']]
86 | symbols = pd.Index(symbols)
87 |
88 | symbols = pd.Index(set(symbols.to_numpy()))
89 |
90 | # filter out post transplant
91 | data = data[data.obs['condition'] != 'post_transplant'][:,is_in].copy()
92 |
93 | # load haber data
94 | if args.dataset == 'haber':
95 |
96 | data = anndata.read('/projects/leelab/data/single-cell/haber_2017/preprocessed/adata_top_2000_genes.h5ad')
97 |
98 | # filter out H poly
99 | data = data[data.obs['condition'] != 'Salmonella'].copy()
100 |
101 | symbols = data.var_names
102 |
103 |
104 | # load grubman data
105 | if args.dataset == 'grubman':
106 |
107 | data = anndata.read('/projects/leelab/data/single-cell/grubman_2019/preprocessed/adata_top_2000_genes.h5ad')
108 |
109 | symbols = data.var_names
110 |
111 |
112 | # for all datasets
113 | data.varm['I'] = load_annotations(
114 | 'data/c2.cp.reactome.v7.4.symbols.gmt',
115 | symbols,
116 | min_genes=13
117 | ).values
118 | data.uns['terms'] = list(load_annotations(
119 | 'data/c2.cp.reactome.v7.4.symbols.gmt',
120 | symbols,
121 | min_genes=13
122 | ).columns)
123 |
124 | top_ig = pd.DataFrame(index=data.uns['terms'])
125 | top_lr = pd.DataFrame(index=data.uns['terms'])
126 |
127 | number_of_pathways = 20
128 | number_of_replicates = 10
129 |
130 | logvar_results = np.zeros((number_of_replicates,number_of_pathways))
131 | ig_results = np.zeros((number_of_replicates,number_of_pathways))
132 | lr_results = np.zeros((number_of_replicates,number_of_pathways))
133 | kld_results = np.zeros((number_of_replicates,number_of_pathways))
134 | rand_results = np.zeros((number_of_replicates,number_of_pathways))
135 |
136 | logvar_times = []
137 | ig_times = []
138 | lr_times = []
139 | kld_times = []
140 | rand_times = []
141 |
142 | # for 10 experimental replicates
143 | for rand_seed in range(number_of_replicates):
144 |
145 | print("replicate number " + str(rand_seed))
146 |
147 | # split data
148 |
149 | train_data, test_data = train_test_split(data,
150 | test_size=0.25,
151 | shuffle=True,
152 | random_state=rand_seed)
153 | tr_data, val_data = train_test_split(train_data,
154 | test_size=0.25,
155 | shuffle=True,
156 | random_state=rand_seed)
157 |
158 | tr_ds = RNASeqData(np.array(tr_data.X))
159 | val_ds = RNASeqData(np.array(val_data.X))
160 |
161 | # load annotations
162 | membership_mask = load_annotations('data/c2.cp.reactome.v7.4.symbols.gmt',
163 | symbols,
164 | min_genes=13
165 | ).astype(bool).T
166 |
167 | ##
168 | ## train base model
169 | ##
170 |
171 | # initialize base model
172 | basePMVAE = pmVAEModel(membership_mask.values,
173 | [12],
174 | 1,
175 | beta=1e-05,
176 | terms=membership_mask.index,
177 | add_auxiliary_module=False
178 | )
179 |
180 | # train
181 | basePMVAE.train(tr_ds, val_ds,
182 | checkpoint_path=args.dataset + '_' + args.removal +'_baseModel.pkl',
183 | max_epochs=100)
184 |
185 | basePMVAE.set_gpu(False)
186 |
187 | ##
188 | ## get pathway rankings
189 | ##
190 | top_features = pd.DataFrame(index=data.uns['terms'])
191 |
192 | ## get max val logvar
193 |
194 | print("Calc max val score")
195 |
196 | ground_truth = torch.tensor(np.array(val_data.X)).float()
197 | outs = basePMVAE.model(ground_truth)
198 |
199 | start_logvar= time.time()
200 |
201 | top_features['logvar'] = -1.*outs.logvar.mean(0).detach().numpy()
202 |
203 | end_logvar= time.time()
204 | logvar_times.append(end_logvar-start_logvar)
205 |
206 |
207 | # IG pathway rankings
208 | print("Calc IG score")
209 | start_ig = time.time()
210 |
211 | def model_loss_wrapper(z):
212 | module_outputs = basePMVAE.model.decoder_net(z)
213 | global_recon = basePMVAE.model.merge(module_outputs)
214 | return F.mse_loss(global_recon, ground_truth, reduction='none').mean(1).view(-1,1)
215 |
216 | input_data = outs.z
217 | baseline_data = torch.zeros(outs.z.shape[1])
218 | baseline_data.requires_grad = True
219 |
220 | explainer = PathExplainerTorch(model_loss_wrapper)
221 | attributions = explainer.attributions(input_data,
222 | baseline=baseline_data,
223 | num_samples=200,
224 | use_expectation=False)
225 |
226 | np_attribs = attributions.detach().numpy()
227 | top_features['IG'] = np_attribs.mean(0)
228 |
229 | top_ig[rand_seed] = np_attribs.mean(0)
230 |
231 | end_ig = time.time()
232 | ig_times.append(end_ig - start_ig)
233 |
234 |
235 |
236 | # LR pathway rankings
237 | print("Calc LR score")
238 | start_lr = time.time()
239 |
240 | if args.dataset == 'kang' or args.dataset == 'datlinger':
241 | y_tr = tr_data.obs['condition']
242 | y_val = val_data.obs['condition']
243 |
244 | train_labels = (y_tr == 'stimulated').values
245 | val_labels = (y_val == 'stimulated').values
246 |
247 |
248 | if args.dataset == 'mcfarland':
249 |
250 | y_tr = tr_data.obs['TP53_mutation_status']
251 | y_val = val_data.obs['TP53_mutation_status']
252 |
253 | train_labels = (y_tr == 'Wild Type').values
254 | val_labels = (y_val == 'Wild Type').values
255 |
256 |
257 | if args.dataset == 'haber':
258 | y_tr = tr_data.obs['condition']
259 | y_val = val_data.obs['condition']
260 |
261 | train_labels = (y_tr == 'Control').values
262 | val_labels = (y_val == 'Control').values
263 |
264 | if args.dataset == 'grubman':
265 | y_tr = tr_data.obs['batchCond']
266 | y_val = val_data.obs['batchCond']
267 |
268 | train_labels = (y_tr == 'ct').values
269 | val_labels = (y_val == 'ct').values
270 |
271 |
272 | if args.dataset == 'zheng':
273 | y_tr = tr_data.obs['condition']
274 | y_val = val_data.obs['condition']
275 |
276 | train_labels = (y_tr == 'healthy').values
277 | val_labels = (y_val == 'healthy').values
278 |
279 |
280 | if args.dataset == 'norman':
281 | y_tr = tr_data.obs['gene_program']
282 | y_val = val_data.obs['gene_program']
283 |
284 | train_labels = (y_tr == 'Ctrl').values
285 | val_labels = (y_val == 'Ctrl').values
286 |
287 |
288 | train_embedding = basePMVAE.model(torch.tensor(tr_data.X).float()).z.detach().numpy()
289 | val_embedding = basePMVAE.model(torch.tensor(val_data.X).float()).z.detach().numpy()
290 |
291 | lr_scores = []
292 | for pathway in range(train_embedding.shape[1]):
293 | clf = LogisticRegression(random_state=0).fit(train_embedding[:,pathway].reshape(-1,1), train_labels)
294 | lr_scores.append(clf.score(val_embedding[:,pathway].reshape(-1,1), val_labels))
295 |
296 | top_features['lr_score'] = lr_scores
297 | top_features['lr_score'] = -1.*top_features['lr_score']
298 |
299 | end_lr = time.time()
300 | lr_times.append(end_lr - start_lr)
301 |
302 |
303 | # KLD pathway rankings
304 | print("Calc KLD")
305 | start_kld = time.time()
306 |
307 | pathway_kld = (-0.5 * (1 + outs.logvar - outs.mu.pow(2) - outs.logvar.exp()).mean(0)).detach().numpy()
308 | top_features['kld'] = -1.*pathway_kld
309 |
310 | end_kld = time.time()
311 | kld_times.append(end_kld - start_kld)
312 |
313 | # Random pathway rankings
314 | print("Calc Random")
315 | np.random.seed(rand_seed)
316 | top_features['rand'] = np.random.randn(top_features.shape[0])
317 |
318 | times = pd.DataFrame()
319 | times['logvar_times'] = logvar_times
320 | times['ig_times'] = ig_times
321 | times['lr_times'] = lr_times
322 | times['kld_times'] = kld_times
323 |
324 | times.to_csv(save_path + args.dataset + '_times.csv')
325 |
326 | # impute or retrain
327 | def impute_benchmark(method,n_pathways=20):
328 | method_recons_errors = []
329 |
330 | # for top 20 pathways
331 | for i in range(1,1+n_pathways):
332 |
333 | # set pathways = 0.
334 | test_matrix = torch.tensor(test_data.X).float()
335 | test_matrix_embedded = basePMVAE.model(test_matrix).z
336 | for x in top_features.sort_values(method).index[:i]:
337 | index_to_zero = list(top_features.index).index(x)
338 | test_matrix_embedded[:,index_to_zero] = 0.
339 |
340 | module_outputs = basePMVAE.model.decoder_net(test_matrix_embedded)
341 | global_recon = basePMVAE.model.merge(module_outputs)
342 | recons_error = F.mse_loss(global_recon, test_matrix).detach().item()
343 | method_recons_errors.append(recons_error)
344 | return method_recons_errors
345 |
346 | def retrain_benchmark(method,n_pathways=20):
347 | method_recons_errors = []
348 | # for top 20 pathways
349 | for i in range(1,21):
350 |
351 | # get cumulative pathways
352 | A_new=[]
353 | for x in top_features.sort_values(method).index[:i]:
354 | A_new.append(membership_mask.loc[x,:].values.reshape(1,-1))
355 | A_new = np.concatenate(A_new,axis=0)
356 |
357 | reducedVAE = pmVAEModel(
358 | A_new,
359 | [12],
360 | 1,
361 | beta=1e-05,
362 | terms=list(range(A_new.shape[0])),
363 | add_auxiliary_module=False
364 | )
365 |
366 | reducedVAE.train(tr_ds, val_ds, checkpoint_path= args.dataset + '_' + args.removal +'_reducedVAE.pkl', max_epochs=50)
367 |
368 | test_matrix = torch.tensor(test_data.X).float().cuda()
369 | global_recon = reducedVAE.model(test_matrix).global_recon
370 |
371 | recons_error = F.mse_loss(global_recon, test_matrix).detach().item()
372 | method_recons_errors.append(recons_error)
373 | return method_recons_errors
374 |
375 |
376 | # run impute or retrain
377 | if args.removal == "impute":
378 | print("Impute Logvar")
379 | logvar_results[rand_seed,:] = impute_benchmark('logvar')
380 | print("Impute IG")
381 | ig_results[rand_seed,:] = impute_benchmark('IG')
382 | print("Impute LR")
383 | lr_results[rand_seed,:] = impute_benchmark('lr_score')
384 | print("Impute KLD")
385 | kld_results[rand_seed,:] = impute_benchmark('kld')
386 | print("Impute RAND")
387 | rand_results[rand_seed,:] = impute_benchmark('rand')
388 |
389 | if args.removal == "retrain":
390 | print("Retrain Logvar")
391 | logvar_results[rand_seed,:] = retrain_benchmark('logvar')
392 | print("Retrain IG")
393 | ig_results[rand_seed,:] = retrain_benchmark('IG')
394 | print("Retrain LR")
395 | lr_results[rand_seed,:] = retrain_benchmark('lr_score')
396 | print("Retrain KLD")
397 | kld_results[rand_seed,:] = retrain_benchmark('kld')
398 | print("Retrain RAND")
399 | rand_results[rand_seed,:] = retrain_benchmark('rand')
400 |
401 |
402 | # save results every iteration so that if it crashes
403 | # there's at least some progress
404 | with open('{}/{}_{}_logvar.npy'.format(save_path, args.dataset, args.removal), 'wb') as f:
405 | np.save(f, logvar_results)
406 | with open('{}/{}_{}_ig.npy'.format(save_path, args.dataset, args.removal), 'wb') as f:
407 | np.save(f, ig_results)
408 | with open('{}/{}_{}_lr.npy'.format(save_path, args.dataset, args.removal), 'wb') as f:
409 | np.save(f, lr_results)
410 | with open('{}/{}_{}_kld.npy'.format(save_path, args.dataset, args.removal), 'wb') as f:
411 | np.save(f, kld_results)
412 | with open('{}/{}_{}_rand.npy'.format(save_path, args.dataset, args.removal), 'wb') as f:
413 | np.save(f, rand_results)
414 |
415 |
416 | if __name__ == '__main__':
417 | main()
418 |
--------------------------------------------------------------------------------
/figures/supplementary_figures/drop_g.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import anndata\n",
10 | "import numpy as np\n",
11 | "import pandas as pd\n",
12 | "\n",
13 | "import torch\n",
14 | "\n",
15 | "import os\n",
16 | "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\""
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 2,
22 | "metadata": {},
23 | "outputs": [],
24 | "source": [
25 | "from utils import load_annotations"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 3,
31 | "metadata": {},
32 | "outputs": [],
33 | "source": [
34 | "from sklearn.model_selection import train_test_split"
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {},
40 | "source": [
41 | "# load data"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 4,
47 | "metadata": {},
48 | "outputs": [],
49 | "source": [
50 | "data = anndata.read('data/kang_count.h5ad')"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": 5,
56 | "metadata": {},
57 | "outputs": [],
58 | "source": [
59 | "pathway_ann_matrix = load_annotations(\n",
60 | " 'data/c2.cp.reactome.v7.4.symbols.gmt',\n",
61 | " data.var_names,\n",
62 | " min_genes=13\n",
63 | ")"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": 6,
69 | "metadata": {},
70 | "outputs": [
71 | {
72 | "data": {
73 | "text/plain": [
74 | "['REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION']"
75 | ]
76 | },
77 | "execution_count": 6,
78 | "metadata": {},
79 | "output_type": "execute_result"
80 | }
81 | ],
82 | "source": [
83 | "[x for x in pathway_ann_matrix.columns if 'G2_M_TRANSITION' in x or 'PLK1' in x]"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": 7,
89 | "metadata": {},
90 | "outputs": [
91 | {
92 | "data": {
93 | "text/html": [
94 | "
\n",
95 | "\n",
108 | "
\n",
109 | " \n",
110 | " \n",
111 | " | \n",
112 | " REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION | \n",
113 | "
\n",
114 | " \n",
115 | " | index | \n",
116 | " | \n",
117 | "
\n",
118 | " \n",
119 | " \n",
120 | " \n",
121 | " | PPP1CB | \n",
122 | " True | \n",
123 | "
\n",
124 | " \n",
125 | " | CLASP1 | \n",
126 | " True | \n",
127 | "
\n",
128 | " \n",
129 | " | TUBA4A | \n",
130 | " True | \n",
131 | "
\n",
132 | " \n",
133 | " | CCNB1 | \n",
134 | " True | \n",
135 | "
\n",
136 | " \n",
137 | " | TUBB | \n",
138 | " True | \n",
139 | "
\n",
140 | " \n",
141 | " | CUL1 | \n",
142 | " True | \n",
143 | "
\n",
144 | " \n",
145 | " | CDK5RAP2 | \n",
146 | " True | \n",
147 | "
\n",
148 | " \n",
149 | " | TUBB4B | \n",
150 | " True | \n",
151 | "
\n",
152 | " \n",
153 | " | CDK1 | \n",
154 | " True | \n",
155 | "
\n",
156 | " \n",
157 | " | ACTR1A | \n",
158 | " True | \n",
159 | "
\n",
160 | " \n",
161 | " | TUBA1A | \n",
162 | " True | \n",
163 | "
\n",
164 | " \n",
165 | " | DCTN2 | \n",
166 | " True | \n",
167 | "
\n",
168 | " \n",
169 | " | CENPJ | \n",
170 | " True | \n",
171 | "
\n",
172 | " \n",
173 | " | HSP90AA1 | \n",
174 | " True | \n",
175 | "
\n",
176 | " \n",
177 | " | CCNB2 | \n",
178 | " True | \n",
179 | "
\n",
180 | " \n",
181 | " | AURKA | \n",
182 | " True | \n",
183 | "
\n",
184 | " \n",
185 | "
\n",
186 | "
"
187 | ],
188 | "text/plain": [
189 | " REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION\n",
190 | "index \n",
191 | "PPP1CB True \n",
192 | "CLASP1 True \n",
193 | "TUBA4A True \n",
194 | "CCNB1 True \n",
195 | "TUBB True \n",
196 | "CUL1 True \n",
197 | "CDK5RAP2 True \n",
198 | "TUBB4B True \n",
199 | "CDK1 True \n",
200 | "ACTR1A True \n",
201 | "TUBA1A True \n",
202 | "DCTN2 True \n",
203 | "CENPJ True \n",
204 | "HSP90AA1 True \n",
205 | "CCNB2 True \n",
206 | "AURKA True "
207 | ]
208 | },
209 | "execution_count": 7,
210 | "metadata": {},
211 | "output_type": "execute_result"
212 | }
213 | ],
214 | "source": [
215 | "pathway_ann_matrix[pathway_ann_matrix['REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION']][['REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION']]"
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": 8,
221 | "metadata": {},
222 | "outputs": [],
223 | "source": [
224 | "true_pathways_list = [x for x in pathway_ann_matrix.columns if 'G2_M_TRANSITION' in x or 'PLK1' in x]\n",
225 | "drop_pathway_ann_matrix = pathway_ann_matrix.loc[:,~pathway_ann_matrix.columns.isin(true_pathways_list)]"
226 | ]
227 | },
228 | {
229 | "cell_type": "code",
230 | "execution_count": 9,
231 | "metadata": {},
232 | "outputs": [],
233 | "source": [
234 | "data.varm['annotations'] = drop_pathway_ann_matrix"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "execution_count": 10,
240 | "metadata": {},
241 | "outputs": [
242 | {
243 | "data": {
244 | "text/html": [
245 | "\n",
246 | "\n",
259 | "
\n",
260 | " \n",
261 | " \n",
262 | " | \n",
263 | " REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM | \n",
264 | " REACTOME_INTERFERON_ALPHA_BETA_SIGNALING | \n",
265 | " REACTOME_INTERFERON_SIGNALING | \n",
266 | "
\n",
267 | " \n",
268 | " | index | \n",
269 | " | \n",
270 | " | \n",
271 | " | \n",
272 | "
\n",
273 | " \n",
274 | " \n",
275 | " \n",
276 | " | ISG15 | \n",
277 | " True | \n",
278 | " True | \n",
279 | " True | \n",
280 | "
\n",
281 | " \n",
282 | " | MIB2 | \n",
283 | " False | \n",
284 | " False | \n",
285 | " False | \n",
286 | "
\n",
287 | " \n",
288 | " | PRKCZ | \n",
289 | " False | \n",
290 | " False | \n",
291 | " False | \n",
292 | "
\n",
293 | " \n",
294 | " | KCNAB2 | \n",
295 | " False | \n",
296 | " False | \n",
297 | " False | \n",
298 | "
\n",
299 | " \n",
300 | " | CTNNBIP1 | \n",
301 | " False | \n",
302 | " False | \n",
303 | " False | \n",
304 | "
\n",
305 | " \n",
306 | " | ... | \n",
307 | " ... | \n",
308 | " ... | \n",
309 | " ... | \n",
310 | "
\n",
311 | " \n",
312 | " | CYP19A1 | \n",
313 | " False | \n",
314 | " False | \n",
315 | " False | \n",
316 | "
\n",
317 | " \n",
318 | " | RAP1GAP2 | \n",
319 | " False | \n",
320 | " False | \n",
321 | " False | \n",
322 | "
\n",
323 | " \n",
324 | " | SSTR2 | \n",
325 | " False | \n",
326 | " False | \n",
327 | " False | \n",
328 | "
\n",
329 | " \n",
330 | " | BIRC5 | \n",
331 | " True | \n",
332 | " False | \n",
333 | " False | \n",
334 | "
\n",
335 | " \n",
336 | " | PLCB4 | \n",
337 | " False | \n",
338 | " False | \n",
339 | " False | \n",
340 | "
\n",
341 | " \n",
342 | "
\n",
343 | "
979 rows × 3 columns
\n",
344 | "
"
345 | ],
346 | "text/plain": [
347 | " REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM \\\n",
348 | "index \n",
349 | "ISG15 True \n",
350 | "MIB2 False \n",
351 | "PRKCZ False \n",
352 | "KCNAB2 False \n",
353 | "CTNNBIP1 False \n",
354 | "... ... \n",
355 | "CYP19A1 False \n",
356 | "RAP1GAP2 False \n",
357 | "SSTR2 False \n",
358 | "BIRC5 True \n",
359 | "PLCB4 False \n",
360 | "\n",
361 | " REACTOME_INTERFERON_ALPHA_BETA_SIGNALING \\\n",
362 | "index \n",
363 | "ISG15 True \n",
364 | "MIB2 False \n",
365 | "PRKCZ False \n",
366 | "KCNAB2 False \n",
367 | "CTNNBIP1 False \n",
368 | "... ... \n",
369 | "CYP19A1 False \n",
370 | "RAP1GAP2 False \n",
371 | "SSTR2 False \n",
372 | "BIRC5 False \n",
373 | "PLCB4 False \n",
374 | "\n",
375 | " REACTOME_INTERFERON_SIGNALING \n",
376 | "index \n",
377 | "ISG15 True \n",
378 | "MIB2 False \n",
379 | "PRKCZ False \n",
380 | "KCNAB2 False \n",
381 | "CTNNBIP1 False \n",
382 | "... ... \n",
383 | "CYP19A1 False \n",
384 | "RAP1GAP2 False \n",
385 | "SSTR2 False \n",
386 | "BIRC5 False \n",
387 | "PLCB4 False \n",
388 | "\n",
389 | "[979 rows x 3 columns]"
390 | ]
391 | },
392 | "execution_count": 10,
393 | "metadata": {},
394 | "output_type": "execute_result"
395 | }
396 | ],
397 | "source": [
398 | "drop_pathway_ann_matrix.iloc[:,drop_pathway_ann_matrix.loc['IFITM3',:].values == True]"
399 | ]
400 | },
401 | {
402 | "cell_type": "code",
403 | "execution_count": 11,
404 | "metadata": {},
405 | "outputs": [],
406 | "source": [
407 | "membership_mask = data.varm['annotations'].astype(bool).T\n",
408 | "X_train, X_test = train_test_split(\n",
409 | " data.X,\n",
410 | " test_size=0.25,\n",
411 | " shuffle=True,\n",
412 | " random_state=0,\n",
413 | " \n",
414 | ")"
415 | ]
416 | },
417 | {
418 | "cell_type": "markdown",
419 | "metadata": {},
420 | "source": [
421 | "# initialize model"
422 | ]
423 | },
424 | {
425 | "cell_type": "code",
426 | "execution_count": 12,
427 | "metadata": {},
428 | "outputs": [],
429 | "source": [
430 | "from models import pmVAEModel"
431 | ]
432 | },
433 | {
434 | "cell_type": "code",
435 | "execution_count": 13,
436 | "metadata": {},
437 | "outputs": [],
438 | "source": [
439 | "kangVAE = pmVAEModel(\n",
440 | " membership_mask.values,\n",
441 | " [12],\n",
442 | " 4,\n",
443 | " beta=1e-05,\n",
444 | " terms=membership_mask.index,\n",
445 | " add_auxiliary_module=True\n",
446 | ")"
447 | ]
448 | },
449 | {
450 | "cell_type": "code",
451 | "execution_count": 14,
452 | "metadata": {},
453 | "outputs": [
454 | {
455 | "data": {
456 | "text/plain": [
457 | "pmVAE(\n",
458 | " (encoder_net): pmEncoder(\n",
459 | " (encoder_dense_1): CustomizedLinear(input_features=979, output_features=2400, bias=True)\n",
460 | " (encoder_norm_1): BatchNorm1d(2400, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
461 | " (encoder_elu_1): ELU(alpha=1.0, inplace=True)\n",
462 | " (encoder_dense_2): CustomizedLinear(input_features=2400, output_features=1600, bias=True)\n",
463 | " (encoder_norm_2): BatchNorm1d(1600, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
464 | " )\n",
465 | " (decoder_net): pmDecoder(\n",
466 | " (decoder_dense_1): CustomizedLinear(input_features=800, output_features=2400, bias=True)\n",
467 | " (decoder_norm_1): BatchNorm1d(2400, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
468 | " (decoder_elu_1): ELU(alpha=1.0, inplace=True)\n",
469 | " )\n",
470 | " (merge_layer): CustomizedLinear(input_features=2400, output_features=979, bias=False)\n",
471 | ")"
472 | ]
473 | },
474 | "execution_count": 14,
475 | "metadata": {},
476 | "output_type": "execute_result"
477 | }
478 | ],
479 | "source": [
480 | "kangVAE.model"
481 | ]
482 | },
483 | {
484 | "cell_type": "markdown",
485 | "metadata": {},
486 | "source": [
487 | "# train model"
488 | ]
489 | },
490 | {
491 | "cell_type": "code",
492 | "execution_count": null,
493 | "metadata": {},
494 | "outputs": [],
495 | "source": [
496 | "kangVAE.train(train_ds, test_ds, checkpoint_path='pmvae_dropG2M_checkpoint.pkl')"
497 | ]
498 | },
499 | {
500 | "cell_type": "markdown",
501 | "metadata": {},
502 | "source": [
503 | "# explain model"
504 | ]
505 | },
506 | {
507 | "cell_type": "code",
508 | "execution_count": 15,
509 | "metadata": {},
510 | "outputs": [],
511 | "source": [
512 | "kangVAE.load_checkpoint('saved_models/pmvae_dropG2M_checkpoint.pkl.best_loss')"
513 | ]
514 | },
515 | {
516 | "cell_type": "code",
517 | "execution_count": 16,
518 | "metadata": {},
519 | "outputs": [],
520 | "source": [
521 | "kangVAE.set_gpu(False)"
522 | ]
523 | },
524 | {
525 | "cell_type": "code",
526 | "execution_count": 17,
527 | "metadata": {},
528 | "outputs": [
529 | {
530 | "data": {
531 | "text/plain": [
532 | "800"
533 | ]
534 | },
535 | "execution_count": 17,
536 | "metadata": {},
537 | "output_type": "execute_result"
538 | }
539 | ],
540 | "source": [
541 | "len(kangVAE.latent_space_names())"
542 | ]
543 | },
544 | {
545 | "cell_type": "code",
546 | "execution_count": 18,
547 | "metadata": {},
548 | "outputs": [
549 | {
550 | "data": {
551 | "text/plain": [
552 | "796"
553 | ]
554 | },
555 | "execution_count": 18,
556 | "metadata": {},
557 | "output_type": "execute_result"
558 | }
559 | ],
560 | "source": [
561 | "kangVAE.latent_space_names().index('AUXILIARY-0')"
562 | ]
563 | },
564 | {
565 | "cell_type": "code",
566 | "execution_count": 19,
567 | "metadata": {},
568 | "outputs": [
569 | {
570 | "data": {
571 | "text/plain": [
572 | "'AUXILIARY-0'"
573 | ]
574 | },
575 | "execution_count": 19,
576 | "metadata": {},
577 | "output_type": "execute_result"
578 | }
579 | ],
580 | "source": [
581 | "kangVAE.latent_space_names()[-4]"
582 | ]
583 | },
584 | {
585 | "cell_type": "code",
586 | "execution_count": 20,
587 | "metadata": {},
588 | "outputs": [
589 | {
590 | "data": {
591 | "text/plain": [
592 | "'AUXILIARY-1'"
593 | ]
594 | },
595 | "execution_count": 20,
596 | "metadata": {},
597 | "output_type": "execute_result"
598 | }
599 | ],
600 | "source": [
601 | "kangVAE.latent_space_names()[-3]"
602 | ]
603 | },
604 | {
605 | "cell_type": "code",
606 | "execution_count": 21,
607 | "metadata": {},
608 | "outputs": [
609 | {
610 | "data": {
611 | "text/plain": [
612 | "'AUXILIARY-2'"
613 | ]
614 | },
615 | "execution_count": 21,
616 | "metadata": {},
617 | "output_type": "execute_result"
618 | }
619 | ],
620 | "source": [
621 | "kangVAE.latent_space_names()[-2]"
622 | ]
623 | },
624 | {
625 | "cell_type": "code",
626 | "execution_count": 22,
627 | "metadata": {},
628 | "outputs": [
629 | {
630 | "data": {
631 | "text/plain": [
632 | "'AUXILIARY-3'"
633 | ]
634 | },
635 | "execution_count": 22,
636 | "metadata": {},
637 | "output_type": "execute_result"
638 | }
639 | ],
640 | "source": [
641 | "kangVAE.latent_space_names()[-1]"
642 | ]
643 | },
644 | {
645 | "cell_type": "code",
646 | "execution_count": 23,
647 | "metadata": {},
648 | "outputs": [],
649 | "source": [
650 | "def model_latent_wrapper(x):\n",
651 | " outs = kangVAE.model(x)\n",
652 | " z = outs.mu\n",
653 | " return z[:,-4].reshape(-1,1) # which to explain"
654 | ]
655 | },
656 | {
657 | "cell_type": "code",
658 | "execution_count": 24,
659 | "metadata": {},
660 | "outputs": [],
661 | "source": [
662 | "from pathexplainer import PathExplainerTorch"
663 | ]
664 | },
665 | {
666 | "cell_type": "code",
667 | "execution_count": 25,
668 | "metadata": {},
669 | "outputs": [],
670 | "source": [
671 | "input_data = torch.tensor(data.X)\n",
672 | "input_data.requires_grad = True\n",
673 | "baseline_data = torch.zeros(data.X.shape[1])\n",
674 | "baseline_data.requires_grad = True"
675 | ]
676 | },
677 | {
678 | "cell_type": "code",
679 | "execution_count": 26,
680 | "metadata": {},
681 | "outputs": [],
682 | "source": [
683 | "explainer = PathExplainerTorch(model_latent_wrapper)\n",
684 | "attributions = explainer.attributions(input_data,\n",
685 | " baseline=baseline_data,\n",
686 | " num_samples=200,\n",
687 | " use_expectation=False)"
688 | ]
689 | },
690 | {
691 | "cell_type": "code",
692 | "execution_count": 27,
693 | "metadata": {},
694 | "outputs": [],
695 | "source": [
696 | "np_attribs = attributions.detach().numpy()"
697 | ]
698 | },
699 | {
700 | "cell_type": "code",
701 | "execution_count": 28,
702 | "metadata": {},
703 | "outputs": [],
704 | "source": [
705 | "top = pd.DataFrame(index=membership_mask.columns)\n",
706 | "top['means'] = np.abs(np_attribs).mean(0)\n",
707 | "top['stds'] = np.abs(np_attribs).std(0)\n"
708 | ]
709 | },
710 | {
711 | "cell_type": "code",
712 | "execution_count": 29,
713 | "metadata": {},
714 | "outputs": [
715 | {
716 | "data": {
717 | "text/html": [
718 | "\n",
719 | "\n",
732 | "
\n",
733 | " \n",
734 | " \n",
735 | " | \n",
736 | " means | \n",
737 | " stds | \n",
738 | "
\n",
739 | " \n",
740 | " | index | \n",
741 | " | \n",
742 | " | \n",
743 | "
\n",
744 | " \n",
745 | " \n",
746 | " \n",
747 | " | H2AFZ | \n",
748 | " 1.558621 | \n",
749 | " 0.690636 | \n",
750 | "
\n",
751 | " \n",
752 | " | IL8 | \n",
753 | " 0.588597 | \n",
754 | " 0.379918 | \n",
755 | "
\n",
756 | " \n",
757 | " | PLA2G7 | \n",
758 | " 0.433617 | \n",
759 | " 0.340465 | \n",
760 | "
\n",
761 | " \n",
762 | " | SSB | \n",
763 | " 0.398044 | \n",
764 | " 0.208317 | \n",
765 | "
\n",
766 | " \n",
767 | " | HIST1H2AC | \n",
768 | " 0.234484 | \n",
769 | " 0.173549 | \n",
770 | "
\n",
771 | " \n",
772 | " | ... | \n",
773 | " ... | \n",
774 | " ... | \n",
775 | "
\n",
776 | " \n",
777 | " | IFNB1 | \n",
778 | " 0.000011 | \n",
779 | " 0.000189 | \n",
780 | "
\n",
781 | " \n",
782 | " | PELI3 | \n",
783 | " 0.000010 | \n",
784 | " 0.000337 | \n",
785 | "
\n",
786 | " \n",
787 | " | AURKB | \n",
788 | " 0.000010 | \n",
789 | " 0.000136 | \n",
790 | "
\n",
791 | " \n",
792 | " | SRGAP3 | \n",
793 | " 0.000010 | \n",
794 | " 0.000202 | \n",
795 | "
\n",
796 | " \n",
797 | " | ATP6V0A4 | \n",
798 | " 0.000005 | \n",
799 | " 0.000120 | \n",
800 | "
\n",
801 | " \n",
802 | "
\n",
803 | "
979 rows × 2 columns
\n",
804 | "
"
805 | ],
806 | "text/plain": [
807 | " means stds\n",
808 | "index \n",
809 | "H2AFZ 1.558621 0.690636\n",
810 | "IL8 0.588597 0.379918\n",
811 | "PLA2G7 0.433617 0.340465\n",
812 | "SSB 0.398044 0.208317\n",
813 | "HIST1H2AC 0.234484 0.173549\n",
814 | "... ... ...\n",
815 | "IFNB1 0.000011 0.000189\n",
816 | "PELI3 0.000010 0.000337\n",
817 | "AURKB 0.000010 0.000136\n",
818 | "SRGAP3 0.000010 0.000202\n",
819 | "ATP6V0A4 0.000005 0.000120\n",
820 | "\n",
821 | "[979 rows x 2 columns]"
822 | ]
823 | },
824 | "execution_count": 29,
825 | "metadata": {},
826 | "output_type": "execute_result"
827 | }
828 | ],
829 | "source": [
830 | "top.sort_values('means',ascending=False)"
831 | ]
832 | },
833 | {
834 | "cell_type": "code",
835 | "execution_count": 30,
836 | "metadata": {},
837 | "outputs": [],
838 | "source": [
839 | "top.to_csv('kang_remove_g/aux_0.csv')"
840 | ]
841 | }
842 | ],
843 | "metadata": {
844 | "kernelspec": {
845 | "display_name": "newenv",
846 | "language": "python",
847 | "name": "newenv"
848 | },
849 | "language_info": {
850 | "codemirror_mode": {
851 | "name": "ipython",
852 | "version": 3
853 | },
854 | "file_extension": ".py",
855 | "mimetype": "text/x-python",
856 | "name": "python",
857 | "nbconvert_exporter": "python",
858 | "pygments_lexer": "ipython3",
859 | "version": "3.9.7"
860 | }
861 | },
862 | "nbformat": 4,
863 | "nbformat_minor": 4
864 | }
865 |
--------------------------------------------------------------------------------
/figures/supplementary_figures/.ipynb_checkpoints/drop_g-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import anndata\n",
10 | "import numpy as np\n",
11 | "import pandas as pd\n",
12 | "\n",
13 | "import torch\n",
14 | "\n",
15 | "import os\n",
16 | "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\""
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 2,
22 | "metadata": {},
23 | "outputs": [],
24 | "source": [
25 | "from utils import load_annotations"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 3,
31 | "metadata": {},
32 | "outputs": [],
33 | "source": [
34 | "from sklearn.model_selection import train_test_split"
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {},
40 | "source": [
41 | "# load data"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 4,
47 | "metadata": {},
48 | "outputs": [],
49 | "source": [
50 | "data = anndata.read('data/kang_count.h5ad')"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": 5,
56 | "metadata": {},
57 | "outputs": [],
58 | "source": [
59 | "pathway_ann_matrix = load_annotations(\n",
60 | " 'data/c2.cp.reactome.v7.4.symbols.gmt',\n",
61 | " data.var_names,\n",
62 | " min_genes=13\n",
63 | ")"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": 6,
69 | "metadata": {},
70 | "outputs": [
71 | {
72 | "data": {
73 | "text/plain": [
74 | "['REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION']"
75 | ]
76 | },
77 | "execution_count": 6,
78 | "metadata": {},
79 | "output_type": "execute_result"
80 | }
81 | ],
82 | "source": [
83 | "[x for x in pathway_ann_matrix.columns if 'G2_M_TRANSITION' in x or 'PLK1' in x]"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": 7,
89 | "metadata": {},
90 | "outputs": [
91 | {
92 | "data": {
93 | "text/html": [
94 | "\n",
95 | "\n",
108 | "
\n",
109 | " \n",
110 | " \n",
111 | " | \n",
112 | " REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION | \n",
113 | "
\n",
114 | " \n",
115 | " | index | \n",
116 | " | \n",
117 | "
\n",
118 | " \n",
119 | " \n",
120 | " \n",
121 | " | PPP1CB | \n",
122 | " True | \n",
123 | "
\n",
124 | " \n",
125 | " | CLASP1 | \n",
126 | " True | \n",
127 | "
\n",
128 | " \n",
129 | " | TUBA4A | \n",
130 | " True | \n",
131 | "
\n",
132 | " \n",
133 | " | CCNB1 | \n",
134 | " True | \n",
135 | "
\n",
136 | " \n",
137 | " | TUBB | \n",
138 | " True | \n",
139 | "
\n",
140 | " \n",
141 | " | CUL1 | \n",
142 | " True | \n",
143 | "
\n",
144 | " \n",
145 | " | CDK5RAP2 | \n",
146 | " True | \n",
147 | "
\n",
148 | " \n",
149 | " | TUBB4B | \n",
150 | " True | \n",
151 | "
\n",
152 | " \n",
153 | " | CDK1 | \n",
154 | " True | \n",
155 | "
\n",
156 | " \n",
157 | " | ACTR1A | \n",
158 | " True | \n",
159 | "
\n",
160 | " \n",
161 | " | TUBA1A | \n",
162 | " True | \n",
163 | "
\n",
164 | " \n",
165 | " | DCTN2 | \n",
166 | " True | \n",
167 | "
\n",
168 | " \n",
169 | " | CENPJ | \n",
170 | " True | \n",
171 | "
\n",
172 | " \n",
173 | " | HSP90AA1 | \n",
174 | " True | \n",
175 | "
\n",
176 | " \n",
177 | " | CCNB2 | \n",
178 | " True | \n",
179 | "
\n",
180 | " \n",
181 | " | AURKA | \n",
182 | " True | \n",
183 | "
\n",
184 | " \n",
185 | "
\n",
186 | "
"
187 | ],
188 | "text/plain": [
189 | " REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION\n",
190 | "index \n",
191 | "PPP1CB True \n",
192 | "CLASP1 True \n",
193 | "TUBA4A True \n",
194 | "CCNB1 True \n",
195 | "TUBB True \n",
196 | "CUL1 True \n",
197 | "CDK5RAP2 True \n",
198 | "TUBB4B True \n",
199 | "CDK1 True \n",
200 | "ACTR1A True \n",
201 | "TUBA1A True \n",
202 | "DCTN2 True \n",
203 | "CENPJ True \n",
204 | "HSP90AA1 True \n",
205 | "CCNB2 True \n",
206 | "AURKA True "
207 | ]
208 | },
209 | "execution_count": 7,
210 | "metadata": {},
211 | "output_type": "execute_result"
212 | }
213 | ],
214 | "source": [
215 | "pathway_ann_matrix[pathway_ann_matrix['REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION']][['REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION']]"
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": 8,
221 | "metadata": {},
222 | "outputs": [],
223 | "source": [
224 | "true_pathways_list = [x for x in pathway_ann_matrix.columns if 'G2_M_TRANSITION' in x or 'PLK1' in x]\n",
225 | "drop_pathway_ann_matrix = pathway_ann_matrix.loc[:,~pathway_ann_matrix.columns.isin(true_pathways_list)]"
226 | ]
227 | },
228 | {
229 | "cell_type": "code",
230 | "execution_count": 9,
231 | "metadata": {},
232 | "outputs": [],
233 | "source": [
234 | "data.varm['annotations'] = drop_pathway_ann_matrix"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "execution_count": 10,
240 | "metadata": {},
241 | "outputs": [
242 | {
243 | "data": {
244 | "text/html": [
245 | "\n",
246 | "\n",
259 | "
\n",
260 | " \n",
261 | " \n",
262 | " | \n",
263 | " REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM | \n",
264 | " REACTOME_INTERFERON_ALPHA_BETA_SIGNALING | \n",
265 | " REACTOME_INTERFERON_SIGNALING | \n",
266 | "
\n",
267 | " \n",
268 | " | index | \n",
269 | " | \n",
270 | " | \n",
271 | " | \n",
272 | "
\n",
273 | " \n",
274 | " \n",
275 | " \n",
276 | " | ISG15 | \n",
277 | " True | \n",
278 | " True | \n",
279 | " True | \n",
280 | "
\n",
281 | " \n",
282 | " | MIB2 | \n",
283 | " False | \n",
284 | " False | \n",
285 | " False | \n",
286 | "
\n",
287 | " \n",
288 | " | PRKCZ | \n",
289 | " False | \n",
290 | " False | \n",
291 | " False | \n",
292 | "
\n",
293 | " \n",
294 | " | KCNAB2 | \n",
295 | " False | \n",
296 | " False | \n",
297 | " False | \n",
298 | "
\n",
299 | " \n",
300 | " | CTNNBIP1 | \n",
301 | " False | \n",
302 | " False | \n",
303 | " False | \n",
304 | "
\n",
305 | " \n",
306 | " | ... | \n",
307 | " ... | \n",
308 | " ... | \n",
309 | " ... | \n",
310 | "
\n",
311 | " \n",
312 | " | CYP19A1 | \n",
313 | " False | \n",
314 | " False | \n",
315 | " False | \n",
316 | "
\n",
317 | " \n",
318 | " | RAP1GAP2 | \n",
319 | " False | \n",
320 | " False | \n",
321 | " False | \n",
322 | "
\n",
323 | " \n",
324 | " | SSTR2 | \n",
325 | " False | \n",
326 | " False | \n",
327 | " False | \n",
328 | "
\n",
329 | " \n",
330 | " | BIRC5 | \n",
331 | " True | \n",
332 | " False | \n",
333 | " False | \n",
334 | "
\n",
335 | " \n",
336 | " | PLCB4 | \n",
337 | " False | \n",
338 | " False | \n",
339 | " False | \n",
340 | "
\n",
341 | " \n",
342 | "
\n",
343 | "
979 rows × 3 columns
\n",
344 | "
"
345 | ],
346 | "text/plain": [
347 | " REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM \\\n",
348 | "index \n",
349 | "ISG15 True \n",
350 | "MIB2 False \n",
351 | "PRKCZ False \n",
352 | "KCNAB2 False \n",
353 | "CTNNBIP1 False \n",
354 | "... ... \n",
355 | "CYP19A1 False \n",
356 | "RAP1GAP2 False \n",
357 | "SSTR2 False \n",
358 | "BIRC5 True \n",
359 | "PLCB4 False \n",
360 | "\n",
361 | " REACTOME_INTERFERON_ALPHA_BETA_SIGNALING \\\n",
362 | "index \n",
363 | "ISG15 True \n",
364 | "MIB2 False \n",
365 | "PRKCZ False \n",
366 | "KCNAB2 False \n",
367 | "CTNNBIP1 False \n",
368 | "... ... \n",
369 | "CYP19A1 False \n",
370 | "RAP1GAP2 False \n",
371 | "SSTR2 False \n",
372 | "BIRC5 False \n",
373 | "PLCB4 False \n",
374 | "\n",
375 | " REACTOME_INTERFERON_SIGNALING \n",
376 | "index \n",
377 | "ISG15 True \n",
378 | "MIB2 False \n",
379 | "PRKCZ False \n",
380 | "KCNAB2 False \n",
381 | "CTNNBIP1 False \n",
382 | "... ... \n",
383 | "CYP19A1 False \n",
384 | "RAP1GAP2 False \n",
385 | "SSTR2 False \n",
386 | "BIRC5 False \n",
387 | "PLCB4 False \n",
388 | "\n",
389 | "[979 rows x 3 columns]"
390 | ]
391 | },
392 | "execution_count": 10,
393 | "metadata": {},
394 | "output_type": "execute_result"
395 | }
396 | ],
397 | "source": [
398 | "drop_pathway_ann_matrix.iloc[:,drop_pathway_ann_matrix.loc['IFITM3',:].values == True]"
399 | ]
400 | },
401 | {
402 | "cell_type": "code",
403 | "execution_count": 11,
404 | "metadata": {},
405 | "outputs": [],
406 | "source": [
407 | "membership_mask = data.varm['annotations'].astype(bool).T\n",
408 | "X_train, X_test = train_test_split(\n",
409 | " data.X,\n",
410 | " test_size=0.25,\n",
411 | " shuffle=True,\n",
412 | " random_state=0,\n",
413 | " \n",
414 | ")"
415 | ]
416 | },
417 | {
418 | "cell_type": "markdown",
419 | "metadata": {},
420 | "source": [
421 | "# initialize model"
422 | ]
423 | },
424 | {
425 | "cell_type": "code",
426 | "execution_count": 12,
427 | "metadata": {},
428 | "outputs": [],
429 | "source": [
430 | "from models import pmVAEModel"
431 | ]
432 | },
433 | {
434 | "cell_type": "code",
435 | "execution_count": 13,
436 | "metadata": {},
437 | "outputs": [],
438 | "source": [
439 | "kangVAE = pmVAEModel(\n",
440 | " membership_mask.values,\n",
441 | " [12],\n",
442 | " 4,\n",
443 | " beta=1e-05,\n",
444 | " terms=membership_mask.index,\n",
445 | " add_auxiliary_module=True\n",
446 | ")"
447 | ]
448 | },
449 | {
450 | "cell_type": "code",
451 | "execution_count": 14,
452 | "metadata": {},
453 | "outputs": [
454 | {
455 | "data": {
456 | "text/plain": [
457 | "pmVAE(\n",
458 | " (encoder_net): pmEncoder(\n",
459 | " (encoder_dense_1): CustomizedLinear(input_features=979, output_features=2400, bias=True)\n",
460 | " (encoder_norm_1): BatchNorm1d(2400, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
461 | " (encoder_elu_1): ELU(alpha=1.0, inplace=True)\n",
462 | " (encoder_dense_2): CustomizedLinear(input_features=2400, output_features=1600, bias=True)\n",
463 | " (encoder_norm_2): BatchNorm1d(1600, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
464 | " )\n",
465 | " (decoder_net): pmDecoder(\n",
466 | " (decoder_dense_1): CustomizedLinear(input_features=800, output_features=2400, bias=True)\n",
467 | " (decoder_norm_1): BatchNorm1d(2400, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
468 | " (decoder_elu_1): ELU(alpha=1.0, inplace=True)\n",
469 | " )\n",
470 | " (merge_layer): CustomizedLinear(input_features=2400, output_features=979, bias=False)\n",
471 | ")"
472 | ]
473 | },
474 | "execution_count": 14,
475 | "metadata": {},
476 | "output_type": "execute_result"
477 | }
478 | ],
479 | "source": [
480 | "kangVAE.model"
481 | ]
482 | },
483 | {
484 | "cell_type": "markdown",
485 | "metadata": {},
486 | "source": [
487 | "# train model"
488 | ]
489 | },
490 | {
491 | "cell_type": "code",
492 | "execution_count": null,
493 | "metadata": {},
494 | "outputs": [],
495 | "source": [
496 | "kangVAE.train(train_ds, test_ds, checkpoint_path='pmvae_dropG2M_checkpoint.pkl')"
497 | ]
498 | },
499 | {
500 | "cell_type": "markdown",
501 | "metadata": {},
502 | "source": [
503 | "# explain model"
504 | ]
505 | },
506 | {
507 | "cell_type": "code",
508 | "execution_count": 15,
509 | "metadata": {},
510 | "outputs": [],
511 | "source": [
512 | "kangVAE.load_checkpoint('saved_models/pmvae_dropG2M_checkpoint.pkl.best_loss')"
513 | ]
514 | },
515 | {
516 | "cell_type": "code",
517 | "execution_count": 16,
518 | "metadata": {},
519 | "outputs": [],
520 | "source": [
521 | "kangVAE.set_gpu(False)"
522 | ]
523 | },
524 | {
525 | "cell_type": "code",
526 | "execution_count": 17,
527 | "metadata": {},
528 | "outputs": [
529 | {
530 | "data": {
531 | "text/plain": [
532 | "800"
533 | ]
534 | },
535 | "execution_count": 17,
536 | "metadata": {},
537 | "output_type": "execute_result"
538 | }
539 | ],
540 | "source": [
541 | "len(kangVAE.latent_space_names())"
542 | ]
543 | },
544 | {
545 | "cell_type": "code",
546 | "execution_count": 18,
547 | "metadata": {},
548 | "outputs": [
549 | {
550 | "data": {
551 | "text/plain": [
552 | "796"
553 | ]
554 | },
555 | "execution_count": 18,
556 | "metadata": {},
557 | "output_type": "execute_result"
558 | }
559 | ],
560 | "source": [
561 | "kangVAE.latent_space_names().index('AUXILIARY-0')"
562 | ]
563 | },
564 | {
565 | "cell_type": "code",
566 | "execution_count": 19,
567 | "metadata": {},
568 | "outputs": [
569 | {
570 | "data": {
571 | "text/plain": [
572 | "'AUXILIARY-0'"
573 | ]
574 | },
575 | "execution_count": 19,
576 | "metadata": {},
577 | "output_type": "execute_result"
578 | }
579 | ],
580 | "source": [
581 | "kangVAE.latent_space_names()[-4]"
582 | ]
583 | },
584 | {
585 | "cell_type": "code",
586 | "execution_count": 20,
587 | "metadata": {},
588 | "outputs": [
589 | {
590 | "data": {
591 | "text/plain": [
592 | "'AUXILIARY-1'"
593 | ]
594 | },
595 | "execution_count": 20,
596 | "metadata": {},
597 | "output_type": "execute_result"
598 | }
599 | ],
600 | "source": [
601 | "kangVAE.latent_space_names()[-3]"
602 | ]
603 | },
604 | {
605 | "cell_type": "code",
606 | "execution_count": 21,
607 | "metadata": {},
608 | "outputs": [
609 | {
610 | "data": {
611 | "text/plain": [
612 | "'AUXILIARY-2'"
613 | ]
614 | },
615 | "execution_count": 21,
616 | "metadata": {},
617 | "output_type": "execute_result"
618 | }
619 | ],
620 | "source": [
621 | "kangVAE.latent_space_names()[-2]"
622 | ]
623 | },
624 | {
625 | "cell_type": "code",
626 | "execution_count": 22,
627 | "metadata": {},
628 | "outputs": [
629 | {
630 | "data": {
631 | "text/plain": [
632 | "'AUXILIARY-3'"
633 | ]
634 | },
635 | "execution_count": 22,
636 | "metadata": {},
637 | "output_type": "execute_result"
638 | }
639 | ],
640 | "source": [
641 | "kangVAE.latent_space_names()[-1]"
642 | ]
643 | },
644 | {
645 | "cell_type": "code",
646 | "execution_count": 23,
647 | "metadata": {},
648 | "outputs": [],
649 | "source": [
650 | "def model_latent_wrapper(x):\n",
651 | " outs = kangVAE.model(x)\n",
652 | " z = outs.mu\n",
653 | " return z[:,-4].reshape(-1,1) # which to explain"
654 | ]
655 | },
656 | {
657 | "cell_type": "code",
658 | "execution_count": 24,
659 | "metadata": {},
660 | "outputs": [],
661 | "source": [
662 | "from pathexplainer import PathExplainerTorch"
663 | ]
664 | },
665 | {
666 | "cell_type": "code",
667 | "execution_count": 25,
668 | "metadata": {},
669 | "outputs": [],
670 | "source": [
671 | "input_data = torch.tensor(data.X)\n",
672 | "input_data.requires_grad = True\n",
673 | "baseline_data = torch.zeros(data.X.shape[1])\n",
674 | "baseline_data.requires_grad = True"
675 | ]
676 | },
677 | {
678 | "cell_type": "code",
679 | "execution_count": 26,
680 | "metadata": {},
681 | "outputs": [],
682 | "source": [
683 | "explainer = PathExplainerTorch(model_latent_wrapper)\n",
684 | "attributions = explainer.attributions(input_data,\n",
685 | " baseline=baseline_data,\n",
686 | " num_samples=200,\n",
687 | " use_expectation=False)"
688 | ]
689 | },
690 | {
691 | "cell_type": "code",
692 | "execution_count": 27,
693 | "metadata": {},
694 | "outputs": [],
695 | "source": [
696 | "np_attribs = attributions.detach().numpy()"
697 | ]
698 | },
699 | {
700 | "cell_type": "code",
701 | "execution_count": 28,
702 | "metadata": {},
703 | "outputs": [],
704 | "source": [
705 | "top = pd.DataFrame(index=membership_mask.columns)\n",
706 | "top['means'] = np.abs(np_attribs).mean(0)\n",
707 | "top['stds'] = np.abs(np_attribs).std(0)\n"
708 | ]
709 | },
710 | {
711 | "cell_type": "code",
712 | "execution_count": 29,
713 | "metadata": {},
714 | "outputs": [
715 | {
716 | "data": {
717 | "text/html": [
718 | "\n",
719 | "\n",
732 | "
\n",
733 | " \n",
734 | " \n",
735 | " | \n",
736 | " means | \n",
737 | " stds | \n",
738 | "
\n",
739 | " \n",
740 | " | index | \n",
741 | " | \n",
742 | " | \n",
743 | "
\n",
744 | " \n",
745 | " \n",
746 | " \n",
747 | " | H2AFZ | \n",
748 | " 1.558621 | \n",
749 | " 0.690636 | \n",
750 | "
\n",
751 | " \n",
752 | " | IL8 | \n",
753 | " 0.588597 | \n",
754 | " 0.379918 | \n",
755 | "
\n",
756 | " \n",
757 | " | PLA2G7 | \n",
758 | " 0.433617 | \n",
759 | " 0.340465 | \n",
760 | "
\n",
761 | " \n",
762 | " | SSB | \n",
763 | " 0.398044 | \n",
764 | " 0.208317 | \n",
765 | "
\n",
766 | " \n",
767 | " | HIST1H2AC | \n",
768 | " 0.234484 | \n",
769 | " 0.173549 | \n",
770 | "
\n",
771 | " \n",
772 | " | ... | \n",
773 | " ... | \n",
774 | " ... | \n",
775 | "
\n",
776 | " \n",
777 | " | IFNB1 | \n",
778 | " 0.000011 | \n",
779 | " 0.000189 | \n",
780 | "
\n",
781 | " \n",
782 | " | PELI3 | \n",
783 | " 0.000010 | \n",
784 | " 0.000337 | \n",
785 | "
\n",
786 | " \n",
787 | " | AURKB | \n",
788 | " 0.000010 | \n",
789 | " 0.000136 | \n",
790 | "
\n",
791 | " \n",
792 | " | SRGAP3 | \n",
793 | " 0.000010 | \n",
794 | " 0.000202 | \n",
795 | "
\n",
796 | " \n",
797 | " | ATP6V0A4 | \n",
798 | " 0.000005 | \n",
799 | " 0.000120 | \n",
800 | "
\n",
801 | " \n",
802 | "
\n",
803 | "
979 rows × 2 columns
\n",
804 | "
"
805 | ],
806 | "text/plain": [
807 | " means stds\n",
808 | "index \n",
809 | "H2AFZ 1.558621 0.690636\n",
810 | "IL8 0.588597 0.379918\n",
811 | "PLA2G7 0.433617 0.340465\n",
812 | "SSB 0.398044 0.208317\n",
813 | "HIST1H2AC 0.234484 0.173549\n",
814 | "... ... ...\n",
815 | "IFNB1 0.000011 0.000189\n",
816 | "PELI3 0.000010 0.000337\n",
817 | "AURKB 0.000010 0.000136\n",
818 | "SRGAP3 0.000010 0.000202\n",
819 | "ATP6V0A4 0.000005 0.000120\n",
820 | "\n",
821 | "[979 rows x 2 columns]"
822 | ]
823 | },
824 | "execution_count": 29,
825 | "metadata": {},
826 | "output_type": "execute_result"
827 | }
828 | ],
829 | "source": [
830 | "top.sort_values('means',ascending=False)"
831 | ]
832 | },
833 | {
834 | "cell_type": "code",
835 | "execution_count": 30,
836 | "metadata": {},
837 | "outputs": [],
838 | "source": [
839 | "top.to_csv('kang_remove_g/aux_0.csv')"
840 | ]
841 | }
842 | ],
843 | "metadata": {
844 | "kernelspec": {
845 | "display_name": "newenv",
846 | "language": "python",
847 | "name": "newenv"
848 | },
849 | "language_info": {
850 | "codemirror_mode": {
851 | "name": "ipython",
852 | "version": 3
853 | },
854 | "file_extension": ".py",
855 | "mimetype": "text/x-python",
856 | "name": "python",
857 | "nbconvert_exporter": "python",
858 | "pygments_lexer": "ipython3",
859 | "version": "3.9.7"
860 | }
861 | },
862 | "nbformat": 4,
863 | "nbformat_minor": 4
864 | }
865 |
--------------------------------------------------------------------------------
/figures/supplementary_figures/g_enrichments.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 61,
6 | "id": "23720687-2ad7-4d94-8df1-39b834c5e456",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import numpy as np\n",
11 | "import pandas as pd \n",
12 | "import anndata \n",
13 | "import matplotlib.pyplot as plt\n",
14 | "import shap as shap \n",
15 | "import seaborn as sns\n",
16 | "import math as math"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 62,
22 | "id": "654a8332-3a7a-4e98-9e86-9a9f44072a58",
23 | "metadata": {},
24 | "outputs": [],
25 | "source": [
26 | "# drop G\n",
27 | "aux_0 = pd.read_csv('kang_remove_g/aux_0.csv')\n",
28 | "aux_1 = pd.read_csv('kang_remove_g/aux_1.csv')\n",
29 | "aux_2 = pd.read_csv('kang_remove_g/aux_2.csv')\n",
30 | "aux_3 = pd.read_csv('kang_remove_g/aux_3.csv')"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 63,
36 | "id": "8a1707e6-79f5-434c-b6de-d41a31658f88",
37 | "metadata": {},
38 | "outputs": [
39 | {
40 | "data": {
41 | "text/html": [
42 | "\n",
43 | "\n",
56 | "
\n",
57 | " \n",
58 | " \n",
59 | " | \n",
60 | " index | \n",
61 | " means | \n",
62 | " stds | \n",
63 | "
\n",
64 | " \n",
65 | " \n",
66 | " \n",
67 | " | 233 | \n",
68 | " IL8 | \n",
69 | " 1.472220 | \n",
70 | " 1.072127 | \n",
71 | "
\n",
72 | " \n",
73 | " | 250 | \n",
74 | " H2AFZ | \n",
75 | " 0.660040 | \n",
76 | " 0.297694 | \n",
77 | "
\n",
78 | " \n",
79 | " | 707 | \n",
80 | " SQRDL | \n",
81 | " 0.541798 | \n",
82 | " 0.295089 | \n",
83 | "
\n",
84 | " \n",
85 | " | 347 | \n",
86 | " PLA2G7 | \n",
87 | " 0.439908 | \n",
88 | " 0.348958 | \n",
89 | "
\n",
90 | " \n",
91 | " | 320 | \n",
92 | " HIST1H2AC | \n",
93 | " 0.385302 | \n",
94 | " 0.294543 | \n",
95 | "
\n",
96 | " \n",
97 | " | ... | \n",
98 | " ... | \n",
99 | " ... | \n",
100 | " ... | \n",
101 | "
\n",
102 | " \n",
103 | " | 342 | \n",
104 | " TREM2 | \n",
105 | " 0.000012 | \n",
106 | " 0.000478 | \n",
107 | "
\n",
108 | " \n",
109 | " | 960 | \n",
110 | " RRM2 | \n",
111 | " 0.000011 | \n",
112 | " 0.000294 | \n",
113 | "
\n",
114 | " \n",
115 | " | 797 | \n",
116 | " PTRF | \n",
117 | " 0.000010 | \n",
118 | " 0.000238 | \n",
119 | "
\n",
120 | " \n",
121 | " | 12 | \n",
122 | " ALDH4A1 | \n",
123 | " 0.000009 | \n",
124 | " 0.000227 | \n",
125 | "
\n",
126 | " \n",
127 | " | 969 | \n",
128 | " ABCC2 | \n",
129 | " 0.000005 | \n",
130 | " 0.000174 | \n",
131 | "
\n",
132 | " \n",
133 | "
\n",
134 | "
979 rows × 3 columns
\n",
135 | "
"
136 | ],
137 | "text/plain": [
138 | " index means stds\n",
139 | "233 IL8 1.472220 1.072127\n",
140 | "250 H2AFZ 0.660040 0.297694\n",
141 | "707 SQRDL 0.541798 0.295089\n",
142 | "347 PLA2G7 0.439908 0.348958\n",
143 | "320 HIST1H2AC 0.385302 0.294543\n",
144 | ".. ... ... ...\n",
145 | "342 TREM2 0.000012 0.000478\n",
146 | "960 RRM2 0.000011 0.000294\n",
147 | "797 PTRF 0.000010 0.000238\n",
148 | "12 ALDH4A1 0.000009 0.000227\n",
149 | "969 ABCC2 0.000005 0.000174\n",
150 | "\n",
151 | "[979 rows x 3 columns]"
152 | ]
153 | },
154 | "execution_count": 63,
155 | "metadata": {},
156 | "output_type": "execute_result"
157 | }
158 | ],
159 | "source": [
160 | "aux_0.sort_values('means',ascending=False)"
161 | ]
162 | },
163 | {
164 | "cell_type": "code",
165 | "execution_count": 64,
166 | "id": "b85cd0f4-6b0c-4a1b-82e7-24e0b207a830",
167 | "metadata": {},
168 | "outputs": [],
169 | "source": [
170 | "combined_means = pd.DataFrame(index = aux_0.index)"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": 65,
176 | "id": "c9a0bd21-db5e-4109-9fdd-c6c585f99f52",
177 | "metadata": {},
178 | "outputs": [],
179 | "source": [
180 | "combined_means[0] = aux_0['means']\n",
181 | "combined_means[1] = aux_1['means']\n",
182 | "combined_means[2] = aux_2['means']\n",
183 | "combined_means[3] = aux_3['means']"
184 | ]
185 | },
186 | {
187 | "cell_type": "code",
188 | "execution_count": 66,
189 | "id": "a69cab5f-5575-4543-8c05-e3a7dabfe983",
190 | "metadata": {},
191 | "outputs": [
192 | {
193 | "data": {
194 | "text/html": [
195 | "\n",
196 | "\n",
209 | "
\n",
210 | " \n",
211 | " \n",
212 | " | \n",
213 | " 0 | \n",
214 | " 1 | \n",
215 | " 2 | \n",
216 | " 3 | \n",
217 | "
\n",
218 | " \n",
219 | " \n",
220 | " \n",
221 | " | 0 | \n",
222 | " 0.003762 | \n",
223 | " 0.007630 | \n",
224 | " 0.005264 | \n",
225 | " 0.014630 | \n",
226 | "
\n",
227 | " \n",
228 | " | 1 | \n",
229 | " 0.001084 | \n",
230 | " 0.002251 | \n",
231 | " 0.002387 | \n",
232 | " 0.000991 | \n",
233 | "
\n",
234 | " \n",
235 | " | 2 | \n",
236 | " 0.000217 | \n",
237 | " 0.001149 | \n",
238 | " 0.000799 | \n",
239 | " 0.000178 | \n",
240 | "
\n",
241 | " \n",
242 | " | 3 | \n",
243 | " 0.000471 | \n",
244 | " 0.000526 | \n",
245 | " 0.001402 | \n",
246 | " 0.000415 | \n",
247 | "
\n",
248 | " \n",
249 | " | 4 | \n",
250 | " 0.002461 | \n",
251 | " 0.001258 | \n",
252 | " 0.000957 | \n",
253 | " 0.000706 | \n",
254 | "
\n",
255 | " \n",
256 | " | ... | \n",
257 | " ... | \n",
258 | " ... | \n",
259 | " ... | \n",
260 | " ... | \n",
261 | "
\n",
262 | " \n",
263 | " | 974 | \n",
264 | " 0.000141 | \n",
265 | " 0.000167 | \n",
266 | " 0.000026 | \n",
267 | " 0.000326 | \n",
268 | "
\n",
269 | " \n",
270 | " | 975 | \n",
271 | " 0.000030 | \n",
272 | " 0.000044 | \n",
273 | " 0.000069 | \n",
274 | " 0.000031 | \n",
275 | "
\n",
276 | " \n",
277 | " | 976 | \n",
278 | " 0.000287 | \n",
279 | " 0.000068 | \n",
280 | " 0.000032 | \n",
281 | " 0.000397 | \n",
282 | "
\n",
283 | " \n",
284 | " | 977 | \n",
285 | " 0.000135 | \n",
286 | " 0.000182 | \n",
287 | " 0.000036 | \n",
288 | " 0.000130 | \n",
289 | "
\n",
290 | " \n",
291 | " | 978 | \n",
292 | " 0.000183 | \n",
293 | " 0.000211 | \n",
294 | " 0.000104 | \n",
295 | " 0.000023 | \n",
296 | "
\n",
297 | " \n",
298 | "
\n",
299 | "
979 rows × 4 columns
\n",
300 | "
"
301 | ],
302 | "text/plain": [
303 | " 0 1 2 3\n",
304 | "0 0.003762 0.007630 0.005264 0.014630\n",
305 | "1 0.001084 0.002251 0.002387 0.000991\n",
306 | "2 0.000217 0.001149 0.000799 0.000178\n",
307 | "3 0.000471 0.000526 0.001402 0.000415\n",
308 | "4 0.002461 0.001258 0.000957 0.000706\n",
309 | ".. ... ... ... ...\n",
310 | "974 0.000141 0.000167 0.000026 0.000326\n",
311 | "975 0.000030 0.000044 0.000069 0.000031\n",
312 | "976 0.000287 0.000068 0.000032 0.000397\n",
313 | "977 0.000135 0.000182 0.000036 0.000130\n",
314 | "978 0.000183 0.000211 0.000104 0.000023\n",
315 | "\n",
316 | "[979 rows x 4 columns]"
317 | ]
318 | },
319 | "execution_count": 66,
320 | "metadata": {},
321 | "output_type": "execute_result"
322 | }
323 | ],
324 | "source": [
325 | "combined_means"
326 | ]
327 | },
328 | {
329 | "cell_type": "code",
330 | "execution_count": 67,
331 | "id": "f2040385-8056-4c9b-bdee-ee1ab665c26f",
332 | "metadata": {},
333 | "outputs": [
334 | {
335 | "data": {
336 | "text/html": [
337 | "\n",
338 | "\n",
351 | "
\n",
352 | " \n",
353 | " \n",
354 | " | \n",
355 | " 0 | \n",
356 | " 1 | \n",
357 | " 2 | \n",
358 | " 3 | \n",
359 | "
\n",
360 | " \n",
361 | " \n",
362 | " \n",
363 | " | 0 | \n",
364 | " 1.000000 | \n",
365 | " 0.690757 | \n",
366 | " 0.703157 | \n",
367 | " 0.723252 | \n",
368 | "
\n",
369 | " \n",
370 | " | 1 | \n",
371 | " 0.690757 | \n",
372 | " 1.000000 | \n",
373 | " 0.698095 | \n",
374 | " 0.634515 | \n",
375 | "
\n",
376 | " \n",
377 | " | 2 | \n",
378 | " 0.703157 | \n",
379 | " 0.698095 | \n",
380 | " 1.000000 | \n",
381 | " 0.589254 | \n",
382 | "
\n",
383 | " \n",
384 | " | 3 | \n",
385 | " 0.723252 | \n",
386 | " 0.634515 | \n",
387 | " 0.589254 | \n",
388 | " 1.000000 | \n",
389 | "
\n",
390 | " \n",
391 | "
\n",
392 | "
"
393 | ],
394 | "text/plain": [
395 | " 0 1 2 3\n",
396 | "0 1.000000 0.690757 0.703157 0.723252\n",
397 | "1 0.690757 1.000000 0.698095 0.634515\n",
398 | "2 0.703157 0.698095 1.000000 0.589254\n",
399 | "3 0.723252 0.634515 0.589254 1.000000"
400 | ]
401 | },
402 | "execution_count": 67,
403 | "metadata": {},
404 | "output_type": "execute_result"
405 | }
406 | ],
407 | "source": [
408 | "combined_means.corr()"
409 | ]
410 | },
411 | {
412 | "cell_type": "code",
413 | "execution_count": 68,
414 | "id": "7aa9b78a-9769-49f2-ab16-e30ef48a91a1",
415 | "metadata": {},
416 | "outputs": [],
417 | "source": [
418 | "corr_mat = combined_means.corr().abs()\n",
419 | "mask = np.tril(np.ones_like(corr_mat, dtype=bool)) "
420 | ]
421 | },
422 | {
423 | "cell_type": "code",
424 | "execution_count": 69,
425 | "id": "8c5f0225-033b-4559-bb16-6500ed35ed8f",
426 | "metadata": {},
427 | "outputs": [
428 | {
429 | "data": {
430 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdAAAAFpCAYAAAAsmHm9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAp80lEQVR4nO3dd5gc1Znv8d87PVGjUUJplCMggQRIQiSTk5C9lu1rewETDNgC2+wa4yRfXweW595l7V17HTAwa4PFYsDYJmhtQAQjk4wiAkko51HOk0cT3vvHtETPqCdV18T6fp6nHnVX1ak6fRjm7fecU2fM3QUAAFonraMrAABAV0QABQAgAAIoAAABEEABAAiAAAoAQAAEUAAAAiCAAgC6BDN72Mz2mtnKRo6bmf3czDaY2ftmNiXh2AwzWxs/NieM+hBAAQBdxW8lzWji+DWSxse32ZIekCQzi0m6P358oqTrzGxiqpUhgAIAugR3f13SwSZOmSXpUa/zjqQ+ZpYvabqkDe6+yd2PSnoyfm5KCKAAgO5iqKTtCe8L4/sa25+S9FQv0AKsFdjGpv1gfkdXIRJqqms7ugrdXm0Nvy7a2nv3zbS2unbmWbem9B+wavkjt6uu6/WYAncvaMUlkn02b2J/StojgAIAIsDSYimVjwfL1gTMhgolDU94P0zSTkmZjexPCV24AIBQWFospS0E8yTdFJ+Ne66kI+6+S9JiSePNbLSZZUq6Nn5uSshAAQChCCkINn59syckXSKpv5kVSvqBpAxJcvcHJT0vaaakDZLKJN0SP1ZtZndKmi8pJulhd1+Van0IoACALsHdr2vmuEv6SiPHnlddgA0NARQAEIq2zkA7GwIoACAUFiOAAgDQamkRy0CZhQsAQABkoACAUDAGCgBAAARQAAACsLRojQoSQAEAoYhaBhqtrwsAAISEDBQAEIqoZaAEUABAKAigAAAEwEpEAAAEELUMlElEAAAEQAYKAAhF1DJQAigAIBRRW0yeAAoACEXUMlDGQAEACIAMFAAQiqhloARQAEAoCKAAAARAAAUAIICoBVAmEQEAEAAZKAAgFKyFCwBAAFHrwiWAAgBCQQAFACCAqAVQJhEBABAAGSgCGz0gV9+cOUGTh/VRcUWVnl22Q/+1YINqvfmyl04YqM9fOEZjB/ZURVWNPthRpG/9frkqqmqOn3PrRWP0qanD1Dc3U5v3leqXr6zTOxsPtOEn6jrGDOipb/3DBE0e3lclFVV6ZkmhHvrr+ibb/vbLxumOy8cnPfaL+Wv18Oub2qi2nd+YgT015+MTNXlEXxVXVOmZxdv14CtNt+cdV4zXl65I3p4/e3GtHl6wUWkm3XzRGF106kCNGdhTkrR6xxH94qV1WlV4pC0+SodKS7OOrkK7IoAikLzsdP3qpmnatK9UX3/iXQ3rl6O7rj5FaSY98NcNTZadNWWovjVzgh59a4t+/tJa5WVn6Owx/RRL+J/v8xeO1hcuHquHXtugtbuKNPOMIfrp9VN0228W6oOdRW398Tq1vOx0PXjr2dq0t0Rfe2yphp/UQ3dfc6rMpF+9sr7Rcs8sKdTb6/fX23fphIG65eKxenP9vraudqeVl5Ouh74wXZv2lOiuR+va8+sfPVVmpvtfWtdouacXbddba+u322WnDdKtl4zVW2v3SpKyMmK69ZKxem5JoX6zYKPcpWvPH6nf3nGubnrg71q9o3v9LBsBFGje/zp7uLIyYvrW799VaWWNFm6ScrPSNfuScXr0rc0qraxJWq53jwzdPeNU/fiFNXp2aeHx/QvW7D3+Oj1m+vxHxmjum5s1983NkqR3Nh7Q6AG5+uIlY/W1x99t2w/XyX1m+ghlZcT09cffVWlltRZuPKDcrHTdftl4zX1js0orq5OW21tUob1FFfX2ffHSsdq0t0TrdhW3R9U7pc+cM1LZGTHd/dgylVZW650NdT/Ld1wxXr/926ZWtefsy8dp094SrY23Z2VVjWb+6DUVl394jYUb92veNy7WdeeN0vf/+H7bfbAOYBatAMoYKAI5f1x/vbNhf71AOX/lbmVnxjRlZL9Gy1152mBJ0p+X72j0nGF9e6hndroWbarfXbtw4wGdM7a/0mPR+p+0oQtOHqC/r99X7xf7/Pd3KSczpqmjG2/7hnrlZOjcsf01//1dbVHNLuMjpwzQ2+vqt+eL7+1UTmZM08a0rj3PG9dfL7638/i+Wle94ClJ1TWujXtK1LdnZuqVR4cigCKQUf1ztWV/ab19e45UqPxotUYNyG203OnDemvr/lLNmjJMf7n7Yr3z/Sv12y+eo8nD+xw/Jyu97seyqqa2XtmjNbXKTE/T0L49wvsgXdCoAbnavK9+2+8+1vb9G2/7hq44fbAy0tP04vs7mz+5GxvdVHs28bPc0JWT4u35XtNfSDJiaZo4tLc27SkJVN/OLC3NUtpawsxmmNlaM9tgZnOSHP+mmS2PbyvNrMbM+sWPbTGzFfFjS1L+vKleANHUKydDxRUndm0VlVcrLzuj0XIn9czSyP65uu2iMfrFy+v0tcffVfnRGv38hqnql1v3jbzwULlqa12nDe1dr+yx971zGr9+FOQ10fa9WtE2V0/K1wc7jmjbgbIwq9fl5OVkqLi86oT9ReVVrWrPGWfk64PCI9ra4ItlQ1+8bKx65aTr6cXbW13Xzs7SLKWt2eubxSTdL+kaSRMlXWdmExPPcfcfu/uZ7n6mpO9I+pu7H0w45dL48Wmpft5mA6iZnWpm3zazn5vZz+KvJ6R6Y3R9rhOnKNYNgTQ+dTHN6saX7n1ulV5csUt/37Bf33jyXdW667PnjJAklVZWa/7KXbrlwjGaOqqfeuVk6B/PGaFzxpwkSappyTTfbs49edsn+2+STP+8LE0d3U8vRrz79phkrWYyJWnmpOra8yS98F7T2fyFpwzQFy4dp/98YW2zgbYrausAKmm6pA3uvsndj0p6UtKsJs6/TtITIXy0pJoMoGb2bdVV0CQtkrQ4/vqJZKlzQrnZZrbEzJYUFBSEWV90EkXlVUkzzZ5Z6Umzo2OOxL/pL93y4RfC0soard5ZpDEDeh7f9x8vrNHmfSV66Jaz9dc5l+nG80fpN/HHLA6WVob1Mbqk4vIq5SXJjHpmpZ8w3taYK08fLJP00goCaHF5lfKyT5xP2TM7XcUVJ2amyVw1OV8mNTmefNqw3vrR9Wfpjwu36XdvbQlY284tzSylLTF2xLfZDW4xVFJi6l4Y33cCM+shaYakPyXsdkkvmdnSJNduteZm4d4m6TR3r/dTZGY/kbRK0n3JCrl7gaRjkZN0oRvasr/0hPG2Qb2y1SMrXVv2Nf7Nesu+UtXWuhp+1zSTahO+7h8uq9KX5i7RwF5Z6pmVrq0HynTduSO1v7hSuw5XKMq27CvV6IZt3zve9i3MamZMztfyrYe050i021KSNu8r1eiEL29SQns28bOcaMbkfL3bRHuO7J+rX35+mhZuPKD75q1Kuc7dVYPYkUyyNLWxGPMPkt5q0H17gbvvNLOBkl42szXu/nrA6jbbhVsraUiS/fnxY4iotzfs17lj+6tH5odLd115+mBVHK3Rsq0HGy33xrp9SkszTUuYLZqbla4J+b20fveJj1LsLarUpn2liqWZPn7WUM17t/CEc6LmrXX7dN74+m1/1aR8lR+t0dLNjbf9Mfl9cjR5RF+6b+PeXLtP559cvz2vnlzXnks2Nd+eQ/rm6IyRffXC8uTdt/3zsvTArWdr+4EyzXni3RYtNNJVtUMXbqGk4Qnvh0lqrN/8WjXovnX3nfF/90p6RnVdwoE1l4HeJelVM1uvD9PmEZLGSbozlRuja/vT4u269pyR+vG1Z2rum5s1tG8Pzb5krH739y31Hm155p8v1LKtB3Xvc3XfulfvLNKC1Xv0vVmn6xcvr9PhsqO66SOjVV3remrRtuPlZk7OV3osTTsOlWlw7xxdf95I1brrkTc2t/tn7Wz+sGibrj1/pP7jc1P029c3aWi/HrrjsnF67K36z4A+d/dFWrb5oO55ZmW98jMm56uqplavrNzd3lXvlP6wcKuuP3+kfnLjVD3yt40a1q+HvnTFeP33m/Xb83++cbGWbj6oH/5pRb3yTbVnVnqa7r/lbOXlZOhfn1ul8YN7HT9WVVOrNd1sUZB2WEhhsaTxZjZa0g7VBcnrT6iHWW9JF0u6IWFfrqQ0dy+Ov75K0r+kUpkmA6i7v2hmJ6suSg9VXfpcKGmxuyd/Uh6RUFxRrS/NXaxvzZygn1w/RSUV1Xr8na0qeK3+KkSxtLqxjUTfe3qFvnrVyfrajFOUnRHTe9sO647fLq43dmpmuvkjozW4d7ZKKqv1tzV7df8r61V+lB+74opq3fGbxfr2P0zUf944VcUVVfrd21v04Kv1VyFKb+TRgKsn52vRxgM6VHa0varcqRWXV2v2rxfpO7Mm6uc3T1NxeZUee3OzHmiwqlMs1kh7njFEizYc0KHSE9vzpLwsnTqkLmj+8paz6x3bcahMM/9tQXgfpBNo66X83L3azO6UNF9STNLD7r7KzO6IH38wfuonJb3k7ol98IMkPRNf7CFd0uPu/mIq9bFks/lC1o07LDqHaT+Y39FViISaakYt2lptDb8u2tp7981ssyh3xpznU/oP2JZ1awss5QcACIVFbGUBAigAIBRRWwuXAAoACAV/zgwAgACi9ufMItZjDQBAOMhAAQChiFoGSgAFAISi4TPf3R0BFAAQCjJQAAACiFoAZRIRAAABkIECAELBc6AAAATASkQAAAQQtbVwI/ZxAQAIBxkoACAUjIECABBA1B5jIYACAELBJCIAAAKIWhcuk4gAAAiADBQAEArGQAEACCBGAAUAoPUIoAAABBC1AMokIgAAAiADBQCEImoZKAEUABAKAigAAAGkRyyAMgYKAEAAZKAAgFDQhQsAQAAEUAAAAoilRWtUkAAKAAhF1DLQaH1dAAB0aWY2w8zWmtkGM5uT5PglZnbEzJbHt++3tGxrkYECAELR1hmomcUk3S/pSkmFkhab2Tx3/6DBqW+4+8cClm0xAigAIBTt0IU7XdIGd98kSWb2pKRZkloSBFMpmxQBtBtYcs/VHV2FSJj6vRc7ugrdXs8+2R1dBaQgZqkFUDObLWl2wq4Cdy9IeD9U0vaE94WSzklyqfPM7D1JOyV9w91XtaJsixFAAQChSDUDjQfLgiZOSXYDb/B+maSR7l5iZjMlPStpfAvLtgqTiAAAXUWhpOEJ74epLss8zt2L3L0k/vp5SRlm1r8lZVuLDBQAEIp2GANdLGm8mY2WtEPStZKuTzzBzAZL2uPubmbTVZcoHpB0uLmyrUUABQCEoq0Xk3f3ajO7U9J8STFJD7v7KjO7I378QUmflvQlM6uWVC7pWnd3SUnLplIfAigAIBTtsZBCvFv2+Qb7Hkx4/UtJv2xp2VQwBgoAQABkoACAUERtKT8CKAAgFARQAAACIIACABBA1AIok4gAAAiADBQAEIqoZaAEUABAKAigAAAEQAAFACCAqAVQJhEBABAAGSgAIBRRy0AJoACAUMSMAAoAQKulRSyAMgYKAEAAZKAAgFDEopWAEkABAOFIYxIRAACtxyQiAAACYBIRAABoFhkoACAUTCICACAAJhEBABBA1MZACaAAgFBErQuXSUQAAARABgoACAVduAAABMCfMwMAIAAyUACd3ugBufrWRydq8vA+Kq6o0rNLC1Xw2gbVeuNlZl86TrdfNi7psV++vE6PvL6pjWrb+Y06qYfuuvxknTakl0oqq/Xn93fpkbc3N9mex1w0vr9uOGekxvTPVUV1rdbsLtL/eW6lKqpqJUm3XjBKF40foMG9smUmbTtYpicWbddf1+5t40+FtkYABbqYvOx0PfD5s7VpX4nufnyZhvXroa/NOEVmpgdeXd9ouWeXbtfb6/fV23fphEH6/EVj9Na6fY2U6v56ZqXrp589U1sOlOo7z6zQ0D45+sol42Qm/frNzU2W/dikfN11xXg9sWi7fvW3jcrLTtfUEX3rrQmbm5muF1bu1pYDpap11yUnD9Q9Hz9Ntc+5FnSzdo/aLFwCKNDFfHr6CGVlxPTNJ95VaWWNFm48oNysdN1+6Tg9+uYmlVbWJC23t6hSe4sq6+374iVjtXlfidbtLm6PqndKnzhziLLS0/TdZ1eq7GiNlmw9pNysdN1y/ig9vmibyo4mb8/eORn6p8vG6Wevrtf/vL/r+P431u+vd94vXttQ7/3iLYc0un+urj5tcLcLoFHrwuUxFqCLOX98f/19w/56gfKlFbuUnRnTlFH9WnydXjkZOmdsf81P+OUfReeMPkmLNh+sFyhfWb1H2RkxnTm8T6PlLj1lgCTphZW7W33PI+VVyuiG6VoszVLauhoCKNDFjOqfqy37Suvt232kQuVHqzWqf26Lr3P5aYOUkZ6m+SuiHUBH9uuhrQfL6u3bW1yp8qM1GtmvR6PlJub30raDZfrY5Hz96Y7z9NrdF+uhz03V6UN6JT0/ZqaeWem6csIgnT2qr55bvjPUz9EZpFlqW0uY2QwzW2tmG8xsTpLjnzOz9+Pb22Z2RsKxLWa2wsyWm9mSVD8vXbhAF9MrJ0PFFVUn7C8qr1avnIwWX+fqSflaveOIth0oa/7kbiwvO10lldUn7C+urFJeduPteVJulkb066Gbzh2lB/62UUfKq3T99BH690+foet+/Y4OlX3432hifi89dMNUSVJ1Ta1++up6vbFhf2OXRiPMLCbpfklXSiqUtNjM5rn7BwmnbZZ0sbsfMrNrJBVIOifh+KXuHkrjE0CBrijJ7FCzpLuT6t8zS1NG9dMvXlobarW6Kk/WnjJ5sgPHjpvUIzNd33tulRZtOShJWrnziP54+3n61FnD9Ju3PpyAtGl/ib7w6BL1zE7X+WNO0tcuH6/Symq9uqZ7zcRthz+oPV3SBnffJElm9qSkWZKOB1B3fzvh/HckDWurygTuwjWzW5o4NtvMlpjZkoKCgqC3AJBEUXmVeuac+N23Z1a6istPzEyTufL0wTJJL61o/fhdd1NcUa2e2Se2Z25WLGlmekxRvBdg+fbDx/eVHa3R2j3FGtW/ftdvRVWt1u4p1tKth/SL1zZo/gd79KWLx4bzATqRNLOUtsTYEd9mN7jFUEnbE94Xxvc15jZJLyS8d0kvmdnSJNdutVQy0HskPZLsgLsXqC5tllr+pRhAC2zZX6pR/XvW2zeoV7Z6ZKVry/7SRkrVd9WkfC3fdkh7iiraoopdytaDZSeMdQ7My1KPzPQTxkbrlTtQplp3qUHSVZe5Nn3PdXuK9dFJ+YqlmWpa8rBpFxFLcVZNg9iRTLIUN2kDmtmlqgugH0nYfYG77zSzgZJeNrM17v560Po2+XETBmIbbiskDQp6UwDBvb1+v84bd5J6ZMaO77tq0mBVHK3RsnhXYlPy++Ro8og+kZ99e8zCzQc0fVQ/5WR82J6XnTpQFVU19bLLht7eeEBpZpqSMFM3NzOmkwf11Ia9JU3ec9LQ3tpTVNGtgqeUegbaAoWShie8HybphNlYZjZZ0q8lzXL3A8f2u/vO+L97JT2jui7hwJrLQAdJulrSoYb1k/T2iacDaGt/XLRN1547Qj++7izNfWOzhvbL0exLx+mxt7fUe7Tl2bsu1NIth3Tvsyvrlb960mBV19TqlVV030rSs8t36tNThun/fuJ0/W7RNg3pna1bzh+l3y/ZXu/Rlie+cI6Wbz+sf5tfN268dk+x3li/T3NmnKoHX990fBJRTa3r6Xd3SJIG9crSd2ZM0Cur92jnkXLlZMR00fgBumLCIP07489BLJY03sxGS9oh6VpJ1yeeYGYjJD0t6UZ3X5ewP1dSmrsXx19fJelfUqlMcwH0z5J6uvvyhgfMbEEqNwYQTHFFte54ZLG+/bGJ+ukNU1RSUaXH/75FD/21/gP7sbS0pJM6rpqUr0WbDuhwWcvGS7u7kspq3fXUct11+cn6t09OUklltZ5aUqhH3q6/ClGyZxXv/ctqffmSsbrz0nHKTk/Tip1H9NXfLz8+dlpSUa0DpZW66byR6pebqZLKam3ZX6Zv/vE9vbO5+d6CrqatJxG5e7WZ3SlpvqSYpIfdfZWZ3RE//qCk70s6SdKvrK4+1e4+TXUJ4TPxfemSHnf3F1OpjzU1yywk3auPApE19Xsp/b+GFujRK6ujq9DtvfHNS9ssyi3dfjil3/dTh/fpUqsp8BgLACAUqU4i6moi9nEBAAgHGSgAIBRRW0yeAAoACEXE4icBFAAQjrSk6xx0XwRQAEAoopaBMokIAIAAyEABAKHogn8TOyUEUABAKKLWhUsABQCEImqTiBgDBQAgADJQAEAo6MIFACAAJhEBABBAxOInARQAEI6orYXLJCIAAAIgAwUAhCJiCSgBFAAQjqh1aRJAAQChsIiloARQAEAoovYYS9QybgAAQkEGCgAIRcR6cAmgAIBwRK1LkwAKAAhF1CYRRe0LAwAAoSADBQCEImqzcAmgAIBQRCx+EkABAOEgAwUAIAAmEQEAgGaRgQIAQhG1LlwyUABAKCzFrUX3MJthZmvNbIOZzUly3Mzs5/Hj75vZlJaWbS0CKAAgFGlmKW3NMbOYpPslXSNpoqTrzGxig9OukTQ+vs2W9EAryrbu86ZSGACAY8xS21pguqQN7r7J3Y9KelLSrAbnzJL0qNd5R1IfM8tvYdlWIYACALqKoZK2J7wvjO9ryTktKdsqTCICWmjpvTM6ugrd3tW/equjq4AUmHtq5c1mq67b9ZgCdy9IPCVJsYY3beyclpRtFQIoACAcXpta8bpgWdDEKYWShie8HyZpZwvPyWxB2VahCxcAEArz2pS2FlgsabyZjTazTEnXSprX4Jx5km6Kz8Y9V9IRd9/VwrKtQgYKAOgS3L3azO6UNF9STNLD7r7KzO6IH39Q0vOSZkraIKlM0i1NlU2lPgRQAEA4UuzCbdEt3J9XXZBM3PdgwmuX9JWWlk0FARQAEI4UJxF1NQRQAEA42iED7UwIoACAULRwIlC3wSxcAAACIAMFAIQjYhkoARQAEA4CKAAAARBAAQAIoDZaAZRJRAAABEAGCgAIRdQeYyGAAgDCQQAFACCAiC3lxxgoAAABkIECAMJBFy4AAK3HJCIAAIIggAIAEEDEAiiTiAAACIAMFAAQjohloARQAEAomEQEAEAQEVtMngAKAAgHKxEBAIDmkIECAMLBGCgAAK3HJCIAAIKIWABlDBQAgADIQAEA4YhYBkoABQCEo7amo2vQrgigACJvRN8cffnCMZowKE+lR2v04gd79NiSbapt4rHGQXlZevTGaSfsX7B+n/715XX19l03dZhmThysPjkZ2nqoTI+8s1VLtx8O+VN0PGchBQCIjp5ZMd338dO17WCZ7nlhtfJ752j2+aNkJs1dtK3Z8gVvbdaq3UXH3xeVV9c7/o9Thur6acP134u2aeP+Ul128gDdM3OC7n5mhdbtLQn983QoMlAAiI6PnjZYmbE0/cuLa1RWVSMVHlGPjJhuOHu4/vDujrp9TSg8XK41e5IHwvQ00z9OGaY/LNuhp97dIUlauv2wRvbtoRumDdf3n18d+udB+2EWLoBIO3tEXy3dfqheoFywYZ+yM2KaNKRXStfO752t3Mx0LSs8XG//ssLDOmt4H6WnWUrX73Rqa1LbUmBm/czsZTNbH/+3b5JzhpvZa2a22sxWmdlXE4790Mx2mNny+DazuXuSgQKItOF9crR8x5F6+/aVHFVFVY2G9+2hhVsPNVn+7svGKy8rXYfLq7Rg/T79duE2Ha2pGwvMjNXlKNUNxgaramqVGUtTfq9sbT9cHuKn6Vhe06FduHMkveru95nZnPj7bzc4p1rS1919mZnlSVpqZi+7+wfx4z91939v6Q2bDaBmdqqkoZIWuntJwv4Z7v5iS28EAJ1Rz6x0lVZWn7C/uLJaeVmxRstV1dRq3opdWrr9sMqOVmvy0N767FlDNaR3tn74whpJ0q6iCtW66+SBefW6eU8ZmCdJysvuZjlMx04imiXpkvjruZIWqEEAdfddknbFXxeb2WrVxbcPFECTXbhm9s+SnpP0T5JWmtmshMP/r4lys81siZktKSgoCFIvAGg3ySbbWiP7jzlYVqX739ikd7Yc1Ps7i/TY4u0qeGuLzht9ksaclCtJKjtaowXr9+u6qcN0xpDeystK18cn5eusYb0lSTVNTfPtilLswk2MHfFtdivuPigeII8FyoFNnWxmoySdJWlhwu47zex9M3s4WRdwQ819/fmipKnuXhK/2R/NbJS7/0x1P19JuXuBpGORs5v9hADoTkoqq5WbeeKvwtysdJVUtq5L8o2N+/VPF4/VuAG52nSgVJL04Jub9L+vOkU/+sTpkqS9xZV6fGmhbpo+QofLq1L/AN1Ig9hxAjN7RdLgJIe+25r7mFlPSX+SdJe7H5tC/YCke1UXs+6V9B+Sbm3qOs0F0Nixblt332Jml6guiI5UEwEUALqK7YfLNbxvTr19A3pmKicjpu2Hylp1rWTZwpGKan173ir1z81UbmZM2w+X61NnDNGB0qPaU1yZQs07H2/jx1jc/YrGjpnZHjPLd/ddZpYvaW8j52WoLnj+zt2fTrj2noRz/kvSn5urT3OzcHeb2ZkJNyiR9DFJ/SVNau7iANDZLd52SNOG91FOxofjnReP66+Kqhqt2FnURMkTXTi2vyRp/b4TH2vZX3pUWw+VK5ZmuurUQXppzZ4TzunyamtT21IzT9LN8dc3q274sR4zM0m/kbTa3X/S4Fh+wttPSlrZ3A2by0BvUt2spePcvVrSTWb2UHMXB4DO7i+rdmvWpCH6/oxT9dS7hRrcK1s3nD1CT7+3s96jLY98bore31mkn762QZJ0w9nD1SMjplW7i1R2tEaT8nvr02cN0Zsb92vzgQ8z18tPHqBYmml3UYUG5mXpk5OHqNZdTy4tbPfP2tbaOgNtxn2SnjKz2yRtk/QZSTKzIZJ+7e4zJV0g6UZJK8xsebzc/3b35yX9KJ4wuqQtkm5v7oZNBlB3b/S/sLu/1dzFAaCzK6ms0Zx5K/WVC8fonpkTVFJZo6ff26nHFtdfhSjNTImPbW4/VK5PnzlUMyYMUmZ6mvaVVOqP7+7UE0u31ytnJn32rGEalJel0qPVenvzQT3yzlZVVEdr2bu25u4HJF2eZP9OSTPjr99UI8OP7n5ja+9p7m0+x4dJRABa5Opf8b28rc3/8gVtNn/l6FtPpfT7PvOCz3apuTXd7CEkAECHYTF5AABar4NXImp3BFAAQDgi9tdYWEweAIAAyEABAOGIWAZKAAUAhMKZRAQAQAARy0AZAwUAIAAyUABAOCKWgRJAAQChYAwUAIAgyEABAAggYgGUSUQAAARABgoACAVr4QIAEASTiAAACCBiY6AEUABAKDxiAZRJRAAABEAGCgAIBQspAAAQgNcQQAEAaLWoBVDGQAEACIAMFAAQCsZAAQAIIGpduARQAEAoCKAAAARQG7G1cJlEBABAAGSgAIBQMIkIAIAAGAMFACAAAigAAAFErQuXSUQAgC7PzPqZ2ctmtj7+b99GzttiZivMbLmZLWlt+UQEUABAKGpralPaUjRH0qvuPl7Sq/H3jbnU3c9092kBy0sigAIAQuI1tSltKZolaW789VxJn2jr8oyBAug05n/5go6uAlKQahA0s9mSZifsKnD3ghYWH+TuuyTJ3XeZ2cBGznNJL5mZS3oo4fotLX8cARQA0CnEg1mjAdPMXpE0OMmh77biNhe4+854gHzZzNa4++utrKokAigAICRtPQvX3a9o7JiZ7TGz/Hj2mC9pbyPX2Bn/d6+ZPSNpuqTXJbWofCLGQAEAoejgMdB5km6Ov75Z0nMNTzCzXDPLO/Za0lWSVra0fENkoACAUHTwQgr3SXrKzG6TtE3SZyTJzIZI+rW7z5Q0SNIzZibVxb/H3f3Fpso3hQAKAAhFbQcupODuByRdnmT/Tkkz4683STqjNeWbQhcuAAABkIECAELBWrgAAATgEfuD2gRQAEAooraYPAEUABCKqHXhMokIAIAAyEABAKGIWgZKAAUAhCKEP0nWpRBAAQChiNokIsZAAQAIgAwUABAKxkABAAjAa7yjq9CuCKAAgFAwiQgAgAC8NloZKJOIAAAIgAwUABCKWsZAAQBoPWbhAgAQALNwAQAIIGpduEwiAgAgADJQAEAoGAMFACCA2og9B0oABQCEImqTiBgDBQAgADJQAEAoWAsXAIAAotaFSwAFAISCAAoAQABR68JlEhEAAAGQgQIAQhG1vwdKAAUAhCJqa+ESQAEAoYjaUn6MgQIAQuE1ntKWCjPrZ2Yvm9n6+L99k5xzipktT9iKzOyu+LEfmtmOhGMzm7snARQA0B3MkfSqu4+X9Gr8fT3uvtbdz3T3MyVNlVQm6ZmEU3567Li7P9/cDZvtwjWz6XX39cVmNlHSDElrWnJxAEB0dPAY6CxJl8Rfz5W0QNK3mzj/ckkb3X1r0Bs2mYGa2Q8k/VzSA2b2r5J+KamnpDlm9t0mys02syVmtqSgoCBo3QAAXYjX1qa0JcaO+Da7Fbcf5O67JCn+78Bmzr9W0hMN9t1pZu+b2cPJuoAbMvfGvzGY2QpJZ0rKkrRb0jB3LzKzHEkL3X1yczeQFK1pWQDQuVlbXXj+yVNS+n1/9bplTdbNzF6RNDjJoe9KmuvufRLOPeTuSYOgmWVK2inpNHffE983SNJ+1cWseyXlu/utTdWnuS7canevkVRmZhvdvUiS3L3czKI13QoA0KHc/YrGjpnZHjPLd/ddZpYvaW8Tl7pG0rJjwTN+7eOvzey/JP25ufo0N4noqJn1iL+emnDx3pIIoACA4zpyFq6keZJujr++WdJzTZx7nRp038aD7jGflLSyuRs2l4Fe5O6VkuTuiQEzI6GiAAB09HOg90l6ysxuk7RN0mckycyGSPq1u8+Mv+8h6UpJtzco/yMzO1N1Xbhbkhw/QZNjoCFhDBQAOo82GwP9y/DJKf2+/+j299usbm2BlYgAAKGI2p8zYyEFAAACIAMFAISitu2HBDsVAigAIBQ1BFAAAFovYkOgBFAAQDiiloEyiQgAgADIQAEAoaALFwCAAKLWhUsABQCEImoZKGOgAAAEQAYKAAgFXbgAAAQQtS5cAigAIBQEUAAAAohaFy6TiAAACIAMFAAQCrpwAQAIIGpduARQAEAoopaBMgYKAEAAZKAAgFDQhQsAQABR68IlgAIAQkEGCgBAALUdXYF2xiQiAAACIAMFAISCLlwAAAJgEhEAAAGQgQIAEEDUMlAmEQEAEAAZKAAgFFHrwiUDBQCEosZT21JhZp8xs1VmVmtm05o4b4aZrTWzDWY2J2F/PzN72czWx//t29w9CaAAgFDUuKe0pWilpE9Jer2xE8wsJul+SddImijpOjObGD88R9Kr7j5e0qvx900igAIAujx3X+3ua5s5bbqkDe6+yd2PSnpS0qz4sVmS5sZfz5X0iebuyRgoACAUXWAW7lBJ2xPeF0o6J/56kLvvkiR332VmA5u7WHsEUGuHe4TKzGa7e0FH16M7o43bHm3cPmjnDz3oW1L6fW9msyXNTthVkNi2ZvaKpMFJin7X3Z9ryS2S7Asc9slAk5stif8h2hZt3PZo4/ZBO4ckHiwbbUt3vyLFWxRKGp7wfpiknfHXe8wsP5595kva29zFGAMFAETFYknjzWy0mWVKulbSvPixeZJujr++WVKzGS0BFADQ5ZnZJ82sUNJ5kv5iZvPj+4eY2fOS5O7Vku6UNF/SaklPufuq+CXuk3Slma2XdGX8fdP39Ig9+NoSjGm0Pdq47dHG7YN2ji4CKAAAAdCFCwBAAATQBI0t8YTwmNnDZrbXzFZ2dF26KzMbbmavmdnq+NJmX+3oOnU3ZpZtZovM7L14G9/T0XVC+6MLNy6+xNM61Q0eF6puttZ17v5Bh1asmzGziySVSHrU3U/v6Pp0R/Ep+PnuvszM8iQtlfQJfpbDY2YmKdfdS8wsQ9Kbkr7q7u90cNXQjshAP9TUEk8Iibu/LulgR9ejO3P3Xe6+LP66WHWzDYd2bK26F69TEn+bEd/IRiKGAPqhZEs88UsHXZqZjZJ0lqSFHVyVbsfMYma2XHUP3L/s7rRxxBBAPxTqEk9ARzOznpL+JOkudy/q6Pp0N+5e4+5nqm41m+lmxpBExBBAP9TUEk9AlxIfl/uTpN+5+9MdXZ/uzN0PS1ogaUbH1gTtjQD6oaaWeAK6jPgEl99IWu3uP+no+nRHZjbAzPrEX+dIukLSmg6tFNodATSumSWeEBIze0LS3yWdYmaFZnZbR9epG7pA0o2SLjOz5fFtZkdXqpvJl/Samb2vui/fL7v7nzu4TmhnPMYCAEAAZKAAAARAAAUAIAACKAAAARBAAQAIgAAKAEAABFAAAAIggAIAEAABFACAAP4/bXVxjNw17FEAAAAASUVORK5CYII=\n",
431 | "text/plain": [
432 | ""
433 | ]
434 | },
435 | "metadata": {
436 | "needs_background": "light"
437 | },
438 | "output_type": "display_data"
439 | }
440 | ],
441 | "source": [
442 | "plt.figure(figsize=(8,6))\n",
443 | "sns.heatmap(combined_means.corr(),cmap='RdBu',\n",
444 | " mask=mask,vmax=1,vmin=-1,annot=True,annot_kws={\"size\": 15})\n",
445 | "plt.savefig('remove_g_corr.pdf')\n",
446 | "plt.show()"
447 | ]
448 | },
449 | {
450 | "cell_type": "code",
451 | "execution_count": null,
452 | "id": "801e7e7d-55b0-488f-a4c9-6c81d8b6ef73",
453 | "metadata": {},
454 | "outputs": [],
455 | "source": [
456 | "# no statistically enrichments found, so no plots "
457 | ]
458 | }
459 | ],
460 | "metadata": {
461 | "kernelspec": {
462 | "display_name": "plot",
463 | "language": "python",
464 | "name": "plot"
465 | },
466 | "language_info": {
467 | "codemirror_mode": {
468 | "name": "ipython",
469 | "version": 3
470 | },
471 | "file_extension": ".py",
472 | "mimetype": "text/x-python",
473 | "name": "python",
474 | "nbconvert_exporter": "python",
475 | "pygments_lexer": "ipython3",
476 | "version": "3.9.10"
477 | }
478 | },
479 | "nbformat": 4,
480 | "nbformat_minor": 5
481 | }
482 |
--------------------------------------------------------------------------------
/figures/supplementary_figures/.ipynb_checkpoints/g_enrichments-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 61,
6 | "id": "23720687-2ad7-4d94-8df1-39b834c5e456",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import numpy as np\n",
11 | "import pandas as pd \n",
12 | "import anndata \n",
13 | "import matplotlib.pyplot as plt\n",
14 | "import shap as shap \n",
15 | "import seaborn as sns\n",
16 | "import math as math"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 62,
22 | "id": "654a8332-3a7a-4e98-9e86-9a9f44072a58",
23 | "metadata": {},
24 | "outputs": [],
25 | "source": [
26 | "# drop G\n",
27 | "aux_0 = pd.read_csv('kang_remove_g/aux_0.csv')\n",
28 | "aux_1 = pd.read_csv('kang_remove_g/aux_1.csv')\n",
29 | "aux_2 = pd.read_csv('kang_remove_g/aux_2.csv')\n",
30 | "aux_3 = pd.read_csv('kang_remove_g/aux_3.csv')"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 63,
36 | "id": "8a1707e6-79f5-434c-b6de-d41a31658f88",
37 | "metadata": {},
38 | "outputs": [
39 | {
40 | "data": {
41 | "text/html": [
42 | "\n",
43 | "\n",
56 | "
\n",
57 | " \n",
58 | " \n",
59 | " | \n",
60 | " index | \n",
61 | " means | \n",
62 | " stds | \n",
63 | "
\n",
64 | " \n",
65 | " \n",
66 | " \n",
67 | " | 233 | \n",
68 | " IL8 | \n",
69 | " 1.472220 | \n",
70 | " 1.072127 | \n",
71 | "
\n",
72 | " \n",
73 | " | 250 | \n",
74 | " H2AFZ | \n",
75 | " 0.660040 | \n",
76 | " 0.297694 | \n",
77 | "
\n",
78 | " \n",
79 | " | 707 | \n",
80 | " SQRDL | \n",
81 | " 0.541798 | \n",
82 | " 0.295089 | \n",
83 | "
\n",
84 | " \n",
85 | " | 347 | \n",
86 | " PLA2G7 | \n",
87 | " 0.439908 | \n",
88 | " 0.348958 | \n",
89 | "
\n",
90 | " \n",
91 | " | 320 | \n",
92 | " HIST1H2AC | \n",
93 | " 0.385302 | \n",
94 | " 0.294543 | \n",
95 | "
\n",
96 | " \n",
97 | " | ... | \n",
98 | " ... | \n",
99 | " ... | \n",
100 | " ... | \n",
101 | "
\n",
102 | " \n",
103 | " | 342 | \n",
104 | " TREM2 | \n",
105 | " 0.000012 | \n",
106 | " 0.000478 | \n",
107 | "
\n",
108 | " \n",
109 | " | 960 | \n",
110 | " RRM2 | \n",
111 | " 0.000011 | \n",
112 | " 0.000294 | \n",
113 | "
\n",
114 | " \n",
115 | " | 797 | \n",
116 | " PTRF | \n",
117 | " 0.000010 | \n",
118 | " 0.000238 | \n",
119 | "
\n",
120 | " \n",
121 | " | 12 | \n",
122 | " ALDH4A1 | \n",
123 | " 0.000009 | \n",
124 | " 0.000227 | \n",
125 | "
\n",
126 | " \n",
127 | " | 969 | \n",
128 | " ABCC2 | \n",
129 | " 0.000005 | \n",
130 | " 0.000174 | \n",
131 | "
\n",
132 | " \n",
133 | "
\n",
134 | "
979 rows × 3 columns
\n",
135 | "
"
136 | ],
137 | "text/plain": [
138 | " index means stds\n",
139 | "233 IL8 1.472220 1.072127\n",
140 | "250 H2AFZ 0.660040 0.297694\n",
141 | "707 SQRDL 0.541798 0.295089\n",
142 | "347 PLA2G7 0.439908 0.348958\n",
143 | "320 HIST1H2AC 0.385302 0.294543\n",
144 | ".. ... ... ...\n",
145 | "342 TREM2 0.000012 0.000478\n",
146 | "960 RRM2 0.000011 0.000294\n",
147 | "797 PTRF 0.000010 0.000238\n",
148 | "12 ALDH4A1 0.000009 0.000227\n",
149 | "969 ABCC2 0.000005 0.000174\n",
150 | "\n",
151 | "[979 rows x 3 columns]"
152 | ]
153 | },
154 | "execution_count": 63,
155 | "metadata": {},
156 | "output_type": "execute_result"
157 | }
158 | ],
159 | "source": [
160 | "aux_0.sort_values('means',ascending=False)"
161 | ]
162 | },
163 | {
164 | "cell_type": "code",
165 | "execution_count": 64,
166 | "id": "b85cd0f4-6b0c-4a1b-82e7-24e0b207a830",
167 | "metadata": {},
168 | "outputs": [],
169 | "source": [
170 | "combined_means = pd.DataFrame(index = aux_0.index)"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": 65,
176 | "id": "c9a0bd21-db5e-4109-9fdd-c6c585f99f52",
177 | "metadata": {},
178 | "outputs": [],
179 | "source": [
180 | "combined_means[0] = aux_0['means']\n",
181 | "combined_means[1] = aux_1['means']\n",
182 | "combined_means[2] = aux_2['means']\n",
183 | "combined_means[3] = aux_3['means']"
184 | ]
185 | },
186 | {
187 | "cell_type": "code",
188 | "execution_count": 66,
189 | "id": "a69cab5f-5575-4543-8c05-e3a7dabfe983",
190 | "metadata": {},
191 | "outputs": [
192 | {
193 | "data": {
194 | "text/html": [
195 | "\n",
196 | "\n",
209 | "
\n",
210 | " \n",
211 | " \n",
212 | " | \n",
213 | " 0 | \n",
214 | " 1 | \n",
215 | " 2 | \n",
216 | " 3 | \n",
217 | "
\n",
218 | " \n",
219 | " \n",
220 | " \n",
221 | " | 0 | \n",
222 | " 0.003762 | \n",
223 | " 0.007630 | \n",
224 | " 0.005264 | \n",
225 | " 0.014630 | \n",
226 | "
\n",
227 | " \n",
228 | " | 1 | \n",
229 | " 0.001084 | \n",
230 | " 0.002251 | \n",
231 | " 0.002387 | \n",
232 | " 0.000991 | \n",
233 | "
\n",
234 | " \n",
235 | " | 2 | \n",
236 | " 0.000217 | \n",
237 | " 0.001149 | \n",
238 | " 0.000799 | \n",
239 | " 0.000178 | \n",
240 | "
\n",
241 | " \n",
242 | " | 3 | \n",
243 | " 0.000471 | \n",
244 | " 0.000526 | \n",
245 | " 0.001402 | \n",
246 | " 0.000415 | \n",
247 | "
\n",
248 | " \n",
249 | " | 4 | \n",
250 | " 0.002461 | \n",
251 | " 0.001258 | \n",
252 | " 0.000957 | \n",
253 | " 0.000706 | \n",
254 | "
\n",
255 | " \n",
256 | " | ... | \n",
257 | " ... | \n",
258 | " ... | \n",
259 | " ... | \n",
260 | " ... | \n",
261 | "
\n",
262 | " \n",
263 | " | 974 | \n",
264 | " 0.000141 | \n",
265 | " 0.000167 | \n",
266 | " 0.000026 | \n",
267 | " 0.000326 | \n",
268 | "
\n",
269 | " \n",
270 | " | 975 | \n",
271 | " 0.000030 | \n",
272 | " 0.000044 | \n",
273 | " 0.000069 | \n",
274 | " 0.000031 | \n",
275 | "
\n",
276 | " \n",
277 | " | 976 | \n",
278 | " 0.000287 | \n",
279 | " 0.000068 | \n",
280 | " 0.000032 | \n",
281 | " 0.000397 | \n",
282 | "
\n",
283 | " \n",
284 | " | 977 | \n",
285 | " 0.000135 | \n",
286 | " 0.000182 | \n",
287 | " 0.000036 | \n",
288 | " 0.000130 | \n",
289 | "
\n",
290 | " \n",
291 | " | 978 | \n",
292 | " 0.000183 | \n",
293 | " 0.000211 | \n",
294 | " 0.000104 | \n",
295 | " 0.000023 | \n",
296 | "
\n",
297 | " \n",
298 | "
\n",
299 | "
979 rows × 4 columns
\n",
300 | "
"
301 | ],
302 | "text/plain": [
303 | " 0 1 2 3\n",
304 | "0 0.003762 0.007630 0.005264 0.014630\n",
305 | "1 0.001084 0.002251 0.002387 0.000991\n",
306 | "2 0.000217 0.001149 0.000799 0.000178\n",
307 | "3 0.000471 0.000526 0.001402 0.000415\n",
308 | "4 0.002461 0.001258 0.000957 0.000706\n",
309 | ".. ... ... ... ...\n",
310 | "974 0.000141 0.000167 0.000026 0.000326\n",
311 | "975 0.000030 0.000044 0.000069 0.000031\n",
312 | "976 0.000287 0.000068 0.000032 0.000397\n",
313 | "977 0.000135 0.000182 0.000036 0.000130\n",
314 | "978 0.000183 0.000211 0.000104 0.000023\n",
315 | "\n",
316 | "[979 rows x 4 columns]"
317 | ]
318 | },
319 | "execution_count": 66,
320 | "metadata": {},
321 | "output_type": "execute_result"
322 | }
323 | ],
324 | "source": [
325 | "combined_means"
326 | ]
327 | },
328 | {
329 | "cell_type": "code",
330 | "execution_count": 67,
331 | "id": "f2040385-8056-4c9b-bdee-ee1ab665c26f",
332 | "metadata": {},
333 | "outputs": [
334 | {
335 | "data": {
336 | "text/html": [
337 | "\n",
338 | "\n",
351 | "
\n",
352 | " \n",
353 | " \n",
354 | " | \n",
355 | " 0 | \n",
356 | " 1 | \n",
357 | " 2 | \n",
358 | " 3 | \n",
359 | "
\n",
360 | " \n",
361 | " \n",
362 | " \n",
363 | " | 0 | \n",
364 | " 1.000000 | \n",
365 | " 0.690757 | \n",
366 | " 0.703157 | \n",
367 | " 0.723252 | \n",
368 | "
\n",
369 | " \n",
370 | " | 1 | \n",
371 | " 0.690757 | \n",
372 | " 1.000000 | \n",
373 | " 0.698095 | \n",
374 | " 0.634515 | \n",
375 | "
\n",
376 | " \n",
377 | " | 2 | \n",
378 | " 0.703157 | \n",
379 | " 0.698095 | \n",
380 | " 1.000000 | \n",
381 | " 0.589254 | \n",
382 | "
\n",
383 | " \n",
384 | " | 3 | \n",
385 | " 0.723252 | \n",
386 | " 0.634515 | \n",
387 | " 0.589254 | \n",
388 | " 1.000000 | \n",
389 | "
\n",
390 | " \n",
391 | "
\n",
392 | "
"
393 | ],
394 | "text/plain": [
395 | " 0 1 2 3\n",
396 | "0 1.000000 0.690757 0.703157 0.723252\n",
397 | "1 0.690757 1.000000 0.698095 0.634515\n",
398 | "2 0.703157 0.698095 1.000000 0.589254\n",
399 | "3 0.723252 0.634515 0.589254 1.000000"
400 | ]
401 | },
402 | "execution_count": 67,
403 | "metadata": {},
404 | "output_type": "execute_result"
405 | }
406 | ],
407 | "source": [
408 | "combined_means.corr()"
409 | ]
410 | },
411 | {
412 | "cell_type": "code",
413 | "execution_count": 68,
414 | "id": "7aa9b78a-9769-49f2-ab16-e30ef48a91a1",
415 | "metadata": {},
416 | "outputs": [],
417 | "source": [
418 | "corr_mat = combined_means.corr().abs()\n",
419 | "mask = np.tril(np.ones_like(corr_mat, dtype=bool)) "
420 | ]
421 | },
422 | {
423 | "cell_type": "code",
424 | "execution_count": 69,
425 | "id": "8c5f0225-033b-4559-bb16-6500ed35ed8f",
426 | "metadata": {},
427 | "outputs": [
428 | {
429 | "data": {
430 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdAAAAFpCAYAAAAsmHm9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAp80lEQVR4nO3dd5gc1Znv8d87PVGjUUJplCMggQRIQiSTk5C9lu1rewETDNgC2+wa4yRfXweW595l7V17HTAwa4PFYsDYJmhtQAQjk4wiAkko51HOk0cT3vvHtETPqCdV18T6fp6nHnVX1ak6fRjm7fecU2fM3QUAAFonraMrAABAV0QABQAgAAIoAAABEEABAAiAAAoAQAAEUAAAAiCAAgC6BDN72Mz2mtnKRo6bmf3czDaY2ftmNiXh2AwzWxs/NieM+hBAAQBdxW8lzWji+DWSxse32ZIekCQzi0m6P358oqTrzGxiqpUhgAIAugR3f13SwSZOmSXpUa/zjqQ+ZpYvabqkDe6+yd2PSnoyfm5KCKAAgO5iqKTtCe8L4/sa25+S9FQv0AKsFdjGpv1gfkdXIRJqqms7ugrdXm0Nvy7a2nv3zbS2unbmWbem9B+wavkjt6uu6/WYAncvaMUlkn02b2J/StojgAIAIsDSYimVjwfL1gTMhgolDU94P0zSTkmZjexPCV24AIBQWFospS0E8yTdFJ+Ne66kI+6+S9JiSePNbLSZZUq6Nn5uSshAAQChCCkINn59syckXSKpv5kVSvqBpAxJcvcHJT0vaaakDZLKJN0SP1ZtZndKmi8pJulhd1+Van0IoACALsHdr2vmuEv6SiPHnlddgA0NARQAEIq2zkA7GwIoACAUFiOAAgDQamkRy0CZhQsAQABkoACAUDAGCgBAAARQAAACsLRojQoSQAEAoYhaBhqtrwsAAISEDBQAEIqoZaAEUABAKAigAAAEwEpEAAAEELUMlElEAAAEQAYKAAhF1DJQAigAIBRRW0yeAAoACEXUMlDGQAEACIAMFAAQiqhloARQAEAoCKAAAARAAAUAIICoBVAmEQEAEAAZKAAgFKyFCwBAAFHrwiWAAgBCQQAFACCAqAVQJhEBABAAGSgCGz0gV9+cOUGTh/VRcUWVnl22Q/+1YINqvfmyl04YqM9fOEZjB/ZURVWNPthRpG/9frkqqmqOn3PrRWP0qanD1Dc3U5v3leqXr6zTOxsPtOEn6jrGDOipb/3DBE0e3lclFVV6ZkmhHvrr+ibb/vbLxumOy8cnPfaL+Wv18Oub2qi2nd+YgT015+MTNXlEXxVXVOmZxdv14CtNt+cdV4zXl65I3p4/e3GtHl6wUWkm3XzRGF106kCNGdhTkrR6xxH94qV1WlV4pC0+SodKS7OOrkK7IoAikLzsdP3qpmnatK9UX3/iXQ3rl6O7rj5FaSY98NcNTZadNWWovjVzgh59a4t+/tJa5WVn6Owx/RRL+J/v8xeO1hcuHquHXtugtbuKNPOMIfrp9VN0228W6oOdRW398Tq1vOx0PXjr2dq0t0Rfe2yphp/UQ3dfc6rMpF+9sr7Rcs8sKdTb6/fX23fphIG65eKxenP9vraudqeVl5Ouh74wXZv2lOiuR+va8+sfPVVmpvtfWtdouacXbddba+u322WnDdKtl4zVW2v3SpKyMmK69ZKxem5JoX6zYKPcpWvPH6nf3nGubnrg71q9o3v9LBsBFGje/zp7uLIyYvrW799VaWWNFm6ScrPSNfuScXr0rc0qraxJWq53jwzdPeNU/fiFNXp2aeHx/QvW7D3+Oj1m+vxHxmjum5s1983NkqR3Nh7Q6AG5+uIlY/W1x99t2w/XyX1m+ghlZcT09cffVWlltRZuPKDcrHTdftl4zX1js0orq5OW21tUob1FFfX2ffHSsdq0t0TrdhW3R9U7pc+cM1LZGTHd/dgylVZW650NdT/Ld1wxXr/926ZWtefsy8dp094SrY23Z2VVjWb+6DUVl394jYUb92veNy7WdeeN0vf/+H7bfbAOYBatAMoYKAI5f1x/vbNhf71AOX/lbmVnxjRlZL9Gy1152mBJ0p+X72j0nGF9e6hndroWbarfXbtw4wGdM7a/0mPR+p+0oQtOHqC/r99X7xf7/Pd3KSczpqmjG2/7hnrlZOjcsf01//1dbVHNLuMjpwzQ2+vqt+eL7+1UTmZM08a0rj3PG9dfL7638/i+Wle94ClJ1TWujXtK1LdnZuqVR4cigCKQUf1ztWV/ab19e45UqPxotUYNyG203OnDemvr/lLNmjJMf7n7Yr3z/Sv12y+eo8nD+xw/Jyu97seyqqa2XtmjNbXKTE/T0L49wvsgXdCoAbnavK9+2+8+1vb9G2/7hq44fbAy0tP04vs7mz+5GxvdVHs28bPc0JWT4u35XtNfSDJiaZo4tLc27SkJVN/OLC3NUtpawsxmmNlaM9tgZnOSHP+mmS2PbyvNrMbM+sWPbTGzFfFjS1L+vKleANHUKydDxRUndm0VlVcrLzuj0XIn9czSyP65uu2iMfrFy+v0tcffVfnRGv38hqnql1v3jbzwULlqa12nDe1dr+yx971zGr9+FOQ10fa9WtE2V0/K1wc7jmjbgbIwq9fl5OVkqLi86oT9ReVVrWrPGWfk64PCI9ra4ItlQ1+8bKx65aTr6cXbW13Xzs7SLKWt2eubxSTdL+kaSRMlXWdmExPPcfcfu/uZ7n6mpO9I+pu7H0w45dL48Wmpft5mA6iZnWpm3zazn5vZz+KvJ6R6Y3R9rhOnKNYNgTQ+dTHN6saX7n1ulV5csUt/37Bf33jyXdW667PnjJAklVZWa/7KXbrlwjGaOqqfeuVk6B/PGaFzxpwkSappyTTfbs49edsn+2+STP+8LE0d3U8vRrz79phkrWYyJWnmpOra8yS98F7T2fyFpwzQFy4dp/98YW2zgbYrausAKmm6pA3uvsndj0p6UtKsJs6/TtITIXy0pJoMoGb2bdVV0CQtkrQ4/vqJZKlzQrnZZrbEzJYUFBSEWV90EkXlVUkzzZ5Z6Umzo2OOxL/pL93y4RfC0soard5ZpDEDeh7f9x8vrNHmfSV66Jaz9dc5l+nG80fpN/HHLA6WVob1Mbqk4vIq5SXJjHpmpZ8w3taYK08fLJP00goCaHF5lfKyT5xP2TM7XcUVJ2amyVw1OV8mNTmefNqw3vrR9Wfpjwu36XdvbQlY284tzSylLTF2xLfZDW4xVFJi6l4Y33cCM+shaYakPyXsdkkvmdnSJNduteZm4d4m6TR3r/dTZGY/kbRK0n3JCrl7gaRjkZN0oRvasr/0hPG2Qb2y1SMrXVv2Nf7Nesu+UtXWuhp+1zSTahO+7h8uq9KX5i7RwF5Z6pmVrq0HynTduSO1v7hSuw5XKMq27CvV6IZt3zve9i3MamZMztfyrYe050i021KSNu8r1eiEL29SQns28bOcaMbkfL3bRHuO7J+rX35+mhZuPKD75q1Kuc7dVYPYkUyyNLWxGPMPkt5q0H17gbvvNLOBkl42szXu/nrA6jbbhVsraUiS/fnxY4iotzfs17lj+6tH5odLd115+mBVHK3Rsq0HGy33xrp9SkszTUuYLZqbla4J+b20fveJj1LsLarUpn2liqWZPn7WUM17t/CEc6LmrXX7dN74+m1/1aR8lR+t0dLNjbf9Mfl9cjR5RF+6b+PeXLtP559cvz2vnlzXnks2Nd+eQ/rm6IyRffXC8uTdt/3zsvTArWdr+4EyzXni3RYtNNJVtUMXbqGk4Qnvh0lqrN/8WjXovnX3nfF/90p6RnVdwoE1l4HeJelVM1uvD9PmEZLGSbozlRuja/vT4u269pyR+vG1Z2rum5s1tG8Pzb5krH739y31Hm155p8v1LKtB3Xvc3XfulfvLNKC1Xv0vVmn6xcvr9PhsqO66SOjVV3remrRtuPlZk7OV3osTTsOlWlw7xxdf95I1brrkTc2t/tn7Wz+sGibrj1/pP7jc1P029c3aWi/HrrjsnF67K36z4A+d/dFWrb5oO55ZmW98jMm56uqplavrNzd3lXvlP6wcKuuP3+kfnLjVD3yt40a1q+HvnTFeP33m/Xb83++cbGWbj6oH/5pRb3yTbVnVnqa7r/lbOXlZOhfn1ul8YN7HT9WVVOrNd1sUZB2WEhhsaTxZjZa0g7VBcnrT6iHWW9JF0u6IWFfrqQ0dy+Ov75K0r+kUpkmA6i7v2hmJ6suSg9VXfpcKGmxuyd/Uh6RUFxRrS/NXaxvzZygn1w/RSUV1Xr8na0qeK3+KkSxtLqxjUTfe3qFvnrVyfrajFOUnRHTe9sO647fLq43dmpmuvkjozW4d7ZKKqv1tzV7df8r61V+lB+74opq3fGbxfr2P0zUf944VcUVVfrd21v04Kv1VyFKb+TRgKsn52vRxgM6VHa0varcqRWXV2v2rxfpO7Mm6uc3T1NxeZUee3OzHmiwqlMs1kh7njFEizYc0KHSE9vzpLwsnTqkLmj+8paz6x3bcahMM/9tQXgfpBNo66X83L3azO6UNF9STNLD7r7KzO6IH38wfuonJb3k7ol98IMkPRNf7CFd0uPu/mIq9bFks/lC1o07LDqHaT+Y39FViISaakYt2lptDb8u2tp7981ssyh3xpznU/oP2JZ1awss5QcACIVFbGUBAigAIBRRWwuXAAoACAV/zgwAgACi9ufMItZjDQBAOMhAAQChiFoGSgAFAISi4TPf3R0BFAAQCjJQAAACiFoAZRIRAAABkIECAELBc6AAAATASkQAAAQQtbVwI/ZxAQAIBxkoACAUjIECABBA1B5jIYACAELBJCIAAAKIWhcuk4gAAAiADBQAEArGQAEACCBGAAUAoPUIoAAABBC1AMokIgAAAiADBQCEImoZKAEUABAKAigAAAGkRyyAMgYKAEAAZKAAgFDQhQsAQAAEUAAAAoilRWtUkAAKAAhF1DLQaH1dAAB0aWY2w8zWmtkGM5uT5PglZnbEzJbHt++3tGxrkYECAELR1hmomcUk3S/pSkmFkhab2Tx3/6DBqW+4+8cClm0xAigAIBTt0IU7XdIGd98kSWb2pKRZkloSBFMpmxQBtBtYcs/VHV2FSJj6vRc7ugrdXs8+2R1dBaQgZqkFUDObLWl2wq4Cdy9IeD9U0vaE94WSzklyqfPM7D1JOyV9w91XtaJsixFAAQChSDUDjQfLgiZOSXYDb/B+maSR7l5iZjMlPStpfAvLtgqTiAAAXUWhpOEJ74epLss8zt2L3L0k/vp5SRlm1r8lZVuLDBQAEIp2GANdLGm8mY2WtEPStZKuTzzBzAZL2uPubmbTVZcoHpB0uLmyrUUABQCEoq0Xk3f3ajO7U9J8STFJD7v7KjO7I378QUmflvQlM6uWVC7pWnd3SUnLplIfAigAIBTtsZBCvFv2+Qb7Hkx4/UtJv2xp2VQwBgoAQABkoACAUERtKT8CKAAgFARQAAACIIACABBA1AIok4gAAAiADBQAEIqoZaAEUABAKAigAAAEQAAFACCAqAVQJhEBABAAGSgAIBRRy0AJoACAUMSMAAoAQKulRSyAMgYKAEAAZKAAgFDEopWAEkABAOFIYxIRAACtxyQiAAACYBIRAABoFhkoACAUTCICACAAJhEBABBA1MZACaAAgFBErQuXSUQAAARABgoACAVduAAABMCfMwMAIAAyUACd3ugBufrWRydq8vA+Kq6o0rNLC1Xw2gbVeuNlZl86TrdfNi7psV++vE6PvL6pjWrb+Y06qYfuuvxknTakl0oqq/Xn93fpkbc3N9mex1w0vr9uOGekxvTPVUV1rdbsLtL/eW6lKqpqJUm3XjBKF40foMG9smUmbTtYpicWbddf1+5t40+FtkYABbqYvOx0PfD5s7VpX4nufnyZhvXroa/NOEVmpgdeXd9ouWeXbtfb6/fV23fphEH6/EVj9Na6fY2U6v56ZqXrp589U1sOlOo7z6zQ0D45+sol42Qm/frNzU2W/dikfN11xXg9sWi7fvW3jcrLTtfUEX3rrQmbm5muF1bu1pYDpap11yUnD9Q9Hz9Ntc+5FnSzdo/aLFwCKNDFfHr6CGVlxPTNJ95VaWWNFm48oNysdN1+6Tg9+uYmlVbWJC23t6hSe4sq6+374iVjtXlfidbtLm6PqndKnzhziLLS0/TdZ1eq7GiNlmw9pNysdN1y/ig9vmibyo4mb8/eORn6p8vG6Wevrtf/vL/r+P431u+vd94vXttQ7/3iLYc0un+urj5tcLcLoFHrwuUxFqCLOX98f/19w/56gfKlFbuUnRnTlFH9WnydXjkZOmdsf81P+OUfReeMPkmLNh+sFyhfWb1H2RkxnTm8T6PlLj1lgCTphZW7W33PI+VVyuiG6VoszVLauhoCKNDFjOqfqy37Suvt232kQuVHqzWqf26Lr3P5aYOUkZ6m+SuiHUBH9uuhrQfL6u3bW1yp8qM1GtmvR6PlJub30raDZfrY5Hz96Y7z9NrdF+uhz03V6UN6JT0/ZqaeWem6csIgnT2qr55bvjPUz9EZpFlqW0uY2QwzW2tmG8xsTpLjnzOz9+Pb22Z2RsKxLWa2wsyWm9mSVD8vXbhAF9MrJ0PFFVUn7C8qr1avnIwWX+fqSflaveOIth0oa/7kbiwvO10lldUn7C+urFJeduPteVJulkb066Gbzh2lB/62UUfKq3T99BH690+foet+/Y4OlX3432hifi89dMNUSVJ1Ta1++up6vbFhf2OXRiPMLCbpfklXSiqUtNjM5rn7BwmnbZZ0sbsfMrNrJBVIOifh+KXuHkrjE0CBrijJ7FCzpLuT6t8zS1NG9dMvXlobarW6Kk/WnjJ5sgPHjpvUIzNd33tulRZtOShJWrnziP54+3n61FnD9Ju3PpyAtGl/ib7w6BL1zE7X+WNO0tcuH6/Symq9uqZ7zcRthz+oPV3SBnffJElm9qSkWZKOB1B3fzvh/HckDWurygTuwjWzW5o4NtvMlpjZkoKCgqC3AJBEUXmVeuac+N23Z1a6istPzEyTufL0wTJJL61o/fhdd1NcUa2e2Se2Z25WLGlmekxRvBdg+fbDx/eVHa3R2j3FGtW/ftdvRVWt1u4p1tKth/SL1zZo/gd79KWLx4bzATqRNLOUtsTYEd9mN7jFUEnbE94Xxvc15jZJLyS8d0kvmdnSJNdutVQy0HskPZLsgLsXqC5tllr+pRhAC2zZX6pR/XvW2zeoV7Z6ZKVry/7SRkrVd9WkfC3fdkh7iiraoopdytaDZSeMdQ7My1KPzPQTxkbrlTtQplp3qUHSVZe5Nn3PdXuK9dFJ+YqlmWpa8rBpFxFLcVZNg9iRTLIUN2kDmtmlqgugH0nYfYG77zSzgZJeNrM17v560Po2+XETBmIbbiskDQp6UwDBvb1+v84bd5J6ZMaO77tq0mBVHK3RsnhXYlPy++Ro8og+kZ99e8zCzQc0fVQ/5WR82J6XnTpQFVU19bLLht7eeEBpZpqSMFM3NzOmkwf11Ia9JU3ec9LQ3tpTVNGtgqeUegbaAoWShie8HybphNlYZjZZ0q8lzXL3A8f2u/vO+L97JT2jui7hwJrLQAdJulrSoYb1k/T2iacDaGt/XLRN1547Qj++7izNfWOzhvbL0exLx+mxt7fUe7Tl2bsu1NIth3Tvsyvrlb960mBV19TqlVV030rSs8t36tNThun/fuJ0/W7RNg3pna1bzh+l3y/ZXu/Rlie+cI6Wbz+sf5tfN268dk+x3li/T3NmnKoHX990fBJRTa3r6Xd3SJIG9crSd2ZM0Cur92jnkXLlZMR00fgBumLCIP07489BLJY03sxGS9oh6VpJ1yeeYGYjJD0t6UZ3X5ewP1dSmrsXx19fJelfUqlMcwH0z5J6uvvyhgfMbEEqNwYQTHFFte54ZLG+/bGJ+ukNU1RSUaXH/75FD/21/gP7sbS0pJM6rpqUr0WbDuhwWcvGS7u7kspq3fXUct11+cn6t09OUklltZ5aUqhH3q6/ClGyZxXv/ctqffmSsbrz0nHKTk/Tip1H9NXfLz8+dlpSUa0DpZW66byR6pebqZLKam3ZX6Zv/vE9vbO5+d6CrqatJxG5e7WZ3SlpvqSYpIfdfZWZ3RE//qCk70s6SdKvrK4+1e4+TXUJ4TPxfemSHnf3F1OpjzU1yywk3auPApE19Xsp/b+GFujRK6ujq9DtvfHNS9ssyi3dfjil3/dTh/fpUqsp8BgLACAUqU4i6moi9nEBAAgHGSgAIBRRW0yeAAoACEXE4icBFAAQjrSk6xx0XwRQAEAoopaBMokIAIAAyEABAKHogn8TOyUEUABAKKLWhUsABQCEImqTiBgDBQAgADJQAEAo6MIFACAAJhEBABBAxOInARQAEI6orYXLJCIAAAIgAwUAhCJiCSgBFAAQjqh1aRJAAQChsIiloARQAEAoovYYS9QybgAAQkEGCgAIRcR6cAmgAIBwRK1LkwAKAAhF1CYRRe0LAwAAoSADBQCEImqzcAmgAIBQRCx+EkABAOEgAwUAIAAmEQEAgGaRgQIAQhG1LlwyUABAKCzFrUX3MJthZmvNbIOZzUly3Mzs5/Hj75vZlJaWbS0CKAAgFGlmKW3NMbOYpPslXSNpoqTrzGxig9OukTQ+vs2W9EAryrbu86ZSGACAY8xS21pguqQN7r7J3Y9KelLSrAbnzJL0qNd5R1IfM8tvYdlWIYACALqKoZK2J7wvjO9ryTktKdsqTCICWmjpvTM6ugrd3tW/equjq4AUmHtq5c1mq67b9ZgCdy9IPCVJsYY3beyclpRtFQIoACAcXpta8bpgWdDEKYWShie8HyZpZwvPyWxB2VahCxcAEArz2pS2FlgsabyZjTazTEnXSprX4Jx5km6Kz8Y9V9IRd9/VwrKtQgYKAOgS3L3azO6UNF9STNLD7r7KzO6IH39Q0vOSZkraIKlM0i1NlU2lPgRQAEA4UuzCbdEt3J9XXZBM3PdgwmuX9JWWlk0FARQAEI4UJxF1NQRQAEA42iED7UwIoACAULRwIlC3wSxcAAACIAMFAIQjYhkoARQAEA4CKAAAARBAAQAIoDZaAZRJRAAABEAGCgAIRdQeYyGAAgDCQQAFACCAiC3lxxgoAAABkIECAMJBFy4AAK3HJCIAAIIggAIAEEDEAiiTiAAACIAMFAAQjohloARQAEAomEQEAEAQEVtMngAKAAgHKxEBAIDmkIECAMLBGCgAAK3HJCIAAIKIWABlDBQAgADIQAEA4YhYBkoABQCEo7amo2vQrgigACJvRN8cffnCMZowKE+lR2v04gd79NiSbapt4rHGQXlZevTGaSfsX7B+n/715XX19l03dZhmThysPjkZ2nqoTI+8s1VLtx8O+VN0PGchBQCIjp5ZMd338dO17WCZ7nlhtfJ752j2+aNkJs1dtK3Z8gVvbdaq3UXH3xeVV9c7/o9Thur6acP134u2aeP+Ul128gDdM3OC7n5mhdbtLQn983QoMlAAiI6PnjZYmbE0/cuLa1RWVSMVHlGPjJhuOHu4/vDujrp9TSg8XK41e5IHwvQ00z9OGaY/LNuhp97dIUlauv2wRvbtoRumDdf3n18d+udB+2EWLoBIO3tEXy3dfqheoFywYZ+yM2KaNKRXStfO752t3Mx0LSs8XG//ssLDOmt4H6WnWUrX73Rqa1LbUmBm/czsZTNbH/+3b5JzhpvZa2a22sxWmdlXE4790Mx2mNny+DazuXuSgQKItOF9crR8x5F6+/aVHFVFVY2G9+2hhVsPNVn+7svGKy8rXYfLq7Rg/T79duE2Ha2pGwvMjNXlKNUNxgaramqVGUtTfq9sbT9cHuKn6Vhe06FduHMkveru95nZnPj7bzc4p1rS1919mZnlSVpqZi+7+wfx4z91939v6Q2bDaBmdqqkoZIWuntJwv4Z7v5iS28EAJ1Rz6x0lVZWn7C/uLJaeVmxRstV1dRq3opdWrr9sMqOVmvy0N767FlDNaR3tn74whpJ0q6iCtW66+SBefW6eU8ZmCdJysvuZjlMx04imiXpkvjruZIWqEEAdfddknbFXxeb2WrVxbcPFECTXbhm9s+SnpP0T5JWmtmshMP/r4lys81siZktKSgoCFIvAGg3ySbbWiP7jzlYVqX739ikd7Yc1Ps7i/TY4u0qeGuLzht9ksaclCtJKjtaowXr9+u6qcN0xpDeystK18cn5eusYb0lSTVNTfPtilLswk2MHfFtdivuPigeII8FyoFNnWxmoySdJWlhwu47zex9M3s4WRdwQ819/fmipKnuXhK/2R/NbJS7/0x1P19JuXuBpGORs5v9hADoTkoqq5WbeeKvwtysdJVUtq5L8o2N+/VPF4/VuAG52nSgVJL04Jub9L+vOkU/+sTpkqS9xZV6fGmhbpo+QofLq1L/AN1Ig9hxAjN7RdLgJIe+25r7mFlPSX+SdJe7H5tC/YCke1UXs+6V9B+Sbm3qOs0F0Nixblt332Jml6guiI5UEwEUALqK7YfLNbxvTr19A3pmKicjpu2Hylp1rWTZwpGKan173ir1z81UbmZM2w+X61NnDNGB0qPaU1yZQs07H2/jx1jc/YrGjpnZHjPLd/ddZpYvaW8j52WoLnj+zt2fTrj2noRz/kvSn5urT3OzcHeb2ZkJNyiR9DFJ/SVNau7iANDZLd52SNOG91FOxofjnReP66+Kqhqt2FnURMkTXTi2vyRp/b4TH2vZX3pUWw+VK5ZmuurUQXppzZ4TzunyamtT21IzT9LN8dc3q274sR4zM0m/kbTa3X/S4Fh+wttPSlrZ3A2by0BvUt2spePcvVrSTWb2UHMXB4DO7i+rdmvWpCH6/oxT9dS7hRrcK1s3nD1CT7+3s96jLY98bore31mkn762QZJ0w9nD1SMjplW7i1R2tEaT8nvr02cN0Zsb92vzgQ8z18tPHqBYmml3UYUG5mXpk5OHqNZdTy4tbPfP2tbaOgNtxn2SnjKz2yRtk/QZSTKzIZJ+7e4zJV0g6UZJK8xsebzc/3b35yX9KJ4wuqQtkm5v7oZNBlB3b/S/sLu/1dzFAaCzK6ms0Zx5K/WVC8fonpkTVFJZo6ff26nHFtdfhSjNTImPbW4/VK5PnzlUMyYMUmZ6mvaVVOqP7+7UE0u31ytnJn32rGEalJel0qPVenvzQT3yzlZVVEdr2bu25u4HJF2eZP9OSTPjr99UI8OP7n5ja+9p7m0+x4dJRABa5Opf8b28rc3/8gVtNn/l6FtPpfT7PvOCz3apuTXd7CEkAECHYTF5AABar4NXImp3BFAAQDgi9tdYWEweAIAAyEABAOGIWAZKAAUAhMKZRAQAQAARy0AZAwUAIAAyUABAOCKWgRJAAQChYAwUAIAgyEABAAggYgGUSUQAAARABgoACAVr4QIAEASTiAAACCBiY6AEUABAKDxiAZRJRAAABEAGCgAIBQspAAAQgNcQQAEAaLWoBVDGQAEACIAMFAAQCsZAAQAIIGpduARQAEAoCKAAAARQG7G1cJlEBABAAGSgAIBQMIkIAIAAGAMFACAAAigAAAFErQuXSUQAgC7PzPqZ2ctmtj7+b99GzttiZivMbLmZLWlt+UQEUABAKGpralPaUjRH0qvuPl7Sq/H3jbnU3c9092kBy0sigAIAQuI1tSltKZolaW789VxJn2jr8oyBAug05n/5go6uAlKQahA0s9mSZifsKnD3ghYWH+TuuyTJ3XeZ2cBGznNJL5mZS3oo4fotLX8cARQA0CnEg1mjAdPMXpE0OMmh77biNhe4+854gHzZzNa4++utrKokAigAICRtPQvX3a9o7JiZ7TGz/Hj2mC9pbyPX2Bn/d6+ZPSNpuqTXJbWofCLGQAEAoejgMdB5km6Ov75Z0nMNTzCzXDPLO/Za0lWSVra0fENkoACAUHTwQgr3SXrKzG6TtE3SZyTJzIZI+rW7z5Q0SNIzZibVxb/H3f3Fpso3hQAKAAhFbQcupODuByRdnmT/Tkkz4683STqjNeWbQhcuAAABkIECAELBWrgAAATgEfuD2gRQAEAooraYPAEUABCKqHXhMokIAIAAyEABAKGIWgZKAAUAhCKEP0nWpRBAAQChiNokIsZAAQAIgAwUABAKxkABAAjAa7yjq9CuCKAAgFAwiQgAgAC8NloZKJOIAAAIgAwUABCKWsZAAQBoPWbhAgAQALNwAQAIIGpduEwiAgAgADJQAEAoGAMFACCA2og9B0oABQCEImqTiBgDBQAgADJQAEAoWAsXAIAAotaFSwAFAISCAAoAQABR68JlEhEAAAGQgQIAQhG1vwdKAAUAhCJqa+ESQAEAoYjaUn6MgQIAQuE1ntKWCjPrZ2Yvm9n6+L99k5xzipktT9iKzOyu+LEfmtmOhGMzm7snARQA0B3MkfSqu4+X9Gr8fT3uvtbdz3T3MyVNlVQm6ZmEU3567Li7P9/cDZvtwjWz6XX39cVmNlHSDElrWnJxAEB0dPAY6CxJl8Rfz5W0QNK3mzj/ckkb3X1r0Bs2mYGa2Q8k/VzSA2b2r5J+KamnpDlm9t0mys02syVmtqSgoCBo3QAAXYjX1qa0JcaO+Da7Fbcf5O67JCn+78Bmzr9W0hMN9t1pZu+b2cPJuoAbMvfGvzGY2QpJZ0rKkrRb0jB3LzKzHEkL3X1yczeQFK1pWQDQuVlbXXj+yVNS+n1/9bplTdbNzF6RNDjJoe9KmuvufRLOPeTuSYOgmWVK2inpNHffE983SNJ+1cWseyXlu/utTdWnuS7canevkVRmZhvdvUiS3L3czKI13QoA0KHc/YrGjpnZHjPLd/ddZpYvaW8Tl7pG0rJjwTN+7eOvzey/JP25ufo0N4noqJn1iL+emnDx3pIIoACA4zpyFq6keZJujr++WdJzTZx7nRp038aD7jGflLSyuRs2l4Fe5O6VkuTuiQEzI6GiAAB09HOg90l6ysxuk7RN0mckycyGSPq1u8+Mv+8h6UpJtzco/yMzO1N1Xbhbkhw/QZNjoCFhDBQAOo82GwP9y/DJKf2+/+j299usbm2BlYgAAKGI2p8zYyEFAAACIAMFAISitu2HBDsVAigAIBQ1BFAAAFovYkOgBFAAQDiiloEyiQgAgADIQAEAoaALFwCAAKLWhUsABQCEImoZKGOgAAAEQAYKAAgFXbgAAAQQtS5cAigAIBQEUAAAAohaFy6TiAAACIAMFAAQCrpwAQAIIGpduARQAEAoopaBMgYKAEAAZKAAgFDQhQsAQABR68IlgAIAQkEGCgBAALUdXYF2xiQiAAACIAMFAISCLlwAAAJgEhEAAAGQgQIAEEDUMlAmEQEAEAAZKAAgFFHrwiUDBQCEosZT21JhZp8xs1VmVmtm05o4b4aZrTWzDWY2J2F/PzN72czWx//t29w9CaAAgFDUuKe0pWilpE9Jer2xE8wsJul+SddImijpOjObGD88R9Kr7j5e0qvx900igAIAujx3X+3ua5s5bbqkDe6+yd2PSnpS0qz4sVmS5sZfz5X0iebuyRgoACAUXWAW7lBJ2xPeF0o6J/56kLvvkiR332VmA5u7WHsEUGuHe4TKzGa7e0FH16M7o43bHm3cPmjnDz3oW1L6fW9msyXNTthVkNi2ZvaKpMFJin7X3Z9ryS2S7Asc9slAk5stif8h2hZt3PZo4/ZBO4ckHiwbbUt3vyLFWxRKGp7wfpiknfHXe8wsP5595kva29zFGAMFAETFYknjzWy0mWVKulbSvPixeZJujr++WVKzGS0BFADQ5ZnZJ82sUNJ5kv5iZvPj+4eY2fOS5O7Vku6UNF/SaklPufuq+CXuk3Slma2XdGX8fdP39Ig9+NoSjGm0Pdq47dHG7YN2ji4CKAAAAdCFCwBAAATQBI0t8YTwmNnDZrbXzFZ2dF26KzMbbmavmdnq+NJmX+3oOnU3ZpZtZovM7L14G9/T0XVC+6MLNy6+xNM61Q0eF6puttZ17v5Bh1asmzGziySVSHrU3U/v6Pp0R/Ep+PnuvszM8iQtlfQJfpbDY2YmKdfdS8wsQ9Kbkr7q7u90cNXQjshAP9TUEk8Iibu/LulgR9ejO3P3Xe6+LP66WHWzDYd2bK26F69TEn+bEd/IRiKGAPqhZEs88UsHXZqZjZJ0lqSFHVyVbsfMYma2XHUP3L/s7rRxxBBAPxTqEk9ARzOznpL+JOkudy/q6Pp0N+5e4+5nqm41m+lmxpBExBBAP9TUEk9AlxIfl/uTpN+5+9MdXZ/uzN0PS1ogaUbH1gTtjQD6oaaWeAK6jPgEl99IWu3uP+no+nRHZjbAzPrEX+dIukLSmg6tFNodATSumSWeEBIze0LS3yWdYmaFZnZbR9epG7pA0o2SLjOz5fFtZkdXqpvJl/Samb2vui/fL7v7nzu4TmhnPMYCAEAAZKAAAARAAAUAIAACKAAAARBAAQAIgAAKAEAABFAAAAIggAIAEAABFACAAP4/bXVxjNw17FEAAAAASUVORK5CYII=\n",
431 | "text/plain": [
432 | ""
433 | ]
434 | },
435 | "metadata": {
436 | "needs_background": "light"
437 | },
438 | "output_type": "display_data"
439 | }
440 | ],
441 | "source": [
442 | "plt.figure(figsize=(8,6))\n",
443 | "sns.heatmap(combined_means.corr(),cmap='RdBu',\n",
444 | " mask=mask,vmax=1,vmin=-1,annot=True,annot_kws={\"size\": 15})\n",
445 | "plt.savefig('remove_g_corr.pdf')\n",
446 | "plt.show()"
447 | ]
448 | },
449 | {
450 | "cell_type": "code",
451 | "execution_count": null,
452 | "id": "801e7e7d-55b0-488f-a4c9-6c81d8b6ef73",
453 | "metadata": {},
454 | "outputs": [],
455 | "source": [
456 | "# no statistically enrichments found, so no plots "
457 | ]
458 | }
459 | ],
460 | "metadata": {
461 | "kernelspec": {
462 | "display_name": "plot",
463 | "language": "python",
464 | "name": "plot"
465 | },
466 | "language_info": {
467 | "codemirror_mode": {
468 | "name": "ipython",
469 | "version": 3
470 | },
471 | "file_extension": ".py",
472 | "mimetype": "text/x-python",
473 | "name": "python",
474 | "nbconvert_exporter": "python",
475 | "pygments_lexer": "ipython3",
476 | "version": "3.9.10"
477 | }
478 | },
479 | "nbformat": 4,
480 | "nbformat_minor": 5
481 | }
482 |
--------------------------------------------------------------------------------