├── .DS_Store
├── .gitignore
├── LICENSE
├── README.md
├── signal.xlsx
└── src
    ├── convert_sc_data.py
    ├── convert_seurat.R
    ├── download.py
    └── run.py


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data2intelligence/CytoSig_prediction/8bf4d45215a9e1869b7c3f0e71b54dfec6ddee13/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | data
 2 | build
 3 | dist
 4 | git_*
 5 | .*project
 6 | .settings
 7 | *.egg-info
 8 | __pycache__
 9 | *.pyc
10 | *~
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Users can use CytoSig under the GNU General Public License version 3: https://www.gnu.org/licenses/gpl-3.0.en.html
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Example code of run CytoSig analysis and reproduce prediction results on bulk and single-cell cohorts  
 2 | 
 3 | Please CD into the src folder for the following steps.  
 4 | 
 5 | # Stage 1: download datasets  
 6 | run "./download.py"  
 7 |   
 8 | The starting data will be available in the folder data, named with bulk, single_cell, and output  
 9 | 
10 | # Stage 2: convert single-cell data to CytoSig input format
11 | **This step is optional if you are only interested in bulk data analysis in Figure 4. Just jump to Stage 3.**     
12 |   
13 | run "./convert_sc_data.py"  
14 | **Note**: This step needs a CPU with large memory of 64G as the dataset EGAS00001004571 contains many single cells.  
15 | 
16 | We included two single-cell datasets that are neither in CytoSig input format or CellRanger format. GSE145926 is released as H5 files. EGAS00001004571 is released as Seurat object.  
17 | This program will convert both datasets to python pickles of dense matrices as CytoSig input.  
18 | 
19 | # Stage 3: predict CytoSig signaling activity and generate figures  
20 | run "./run.py"  
21 | If you converted single-cell data in step 2, this step will need a CPU with large memory of 64G as the dataset EGAS00001004571 contains many single cells.  
22 | The relevant figure numbers are labeled above each function.  
23 | 
24 | **Task 1**: bulk data from tumor and inflammatory disease studies  
25 | 
26 | **Task 2**: single-cell data from COVID19 studies (only triggered if you have run the optional stage 2)  
27 | 


--------------------------------------------------------------------------------
/signal.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/data2intelligence/CytoSig_prediction/8bf4d45215a9e1869b7c3f0e71b54dfec6ddee13/signal.xlsx


--------------------------------------------------------------------------------
/src/convert_sc_data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import os, pathlib, tables, pandas, numpy, re
  4 | import scipy.sparse as sp_sparse
  5 | 
  6 | from scipy import io
  7 | from glob import glob
  8 | 
  9 | base_path = pathlib.Path(__file__).parent.absolute()
 10 | base_path = os.path.dirname(base_path)
 11 | 
 12 | src_path = os.path.join(base_path, 'src')
 13 | data_path = os.path.join(base_path, 'data')
 14 | 
 15 | sc_path = os.path.join(data_path, 'single_cell')
 16 | 
 17 | 
 18 | def filter_matrix(matrix):
 19 |     # jump empty barcodes
 20 |     matrix = matrix.loc[~matrix.index.isnull(), matrix.sum() > 0]
 21 |     
 22 |     # jump all zero genes
 23 |     matrix = matrix.loc[(matrix == 0).mean(axis=1) < 1]
 24 |     
 25 |     # jump ambiguous genes, if any
 26 |     cnt_map = matrix.index.value_counts()
 27 |     if cnt_map.max() > 1:
 28 |         matrix = matrix.loc[cnt_map.loc[matrix.index] == 1]
 29 |     
 30 |     return matrix
 31 | 
 32 | 
 33 | 
 34 | def get_matrix_from_h5(filename):
 35 |     fin = tables.open_file(filename, 'r')
 36 | 
 37 |     mat_group = fin.get_node(fin.root, 'matrix')
 38 |     barcodes = fin.get_node(mat_group, 'barcodes').read()
 39 |     data = getattr(mat_group, 'data').read()
 40 |     indices = getattr(mat_group, 'indices').read()
 41 |     indptr = getattr(mat_group, 'indptr').read()
 42 |     shape = getattr(mat_group, 'shape').read()
 43 |     matrix = sp_sparse.csc_matrix((data, indices, indptr), shape=shape)
 44 |         
 45 |     feature_ref = {}
 46 |     feature_group = fin.get_node(mat_group, 'features')
 47 |     feature_ids = getattr(feature_group, 'id').read()
 48 |     feature_names = getattr(feature_group, 'name').read()
 49 |     feature_types = getattr(feature_group, 'feature_type').read()
 50 |     feature_ref['id'] = feature_ids
 51 |     feature_ref['name'] = feature_names
 52 |     feature_ref['feature_type'] = feature_types
 53 |         
 54 |     tag_keys = getattr(feature_group, '_all_tag_keys').read()
 55 |         
 56 |     for key in tag_keys:
 57 |         feature_ref[key] = getattr(feature_group, key.decode("utf-8")).read()
 58 |         
 59 |     matrix = pandas.DataFrame.sparse.from_spmatrix(matrix)
 60 |     matrix.index = map(lambda v: v.decode("utf-8"), feature_ref['name'])
 61 |     matrix.columns = map(lambda v: v.decode("utf-8"), barcodes)
 62 |     
 63 |     fin.close()
 64 |     
 65 |     return filter_matrix(matrix)
 66 | 
 67 | 
 68 | 
 69 | 
 70 | def load_COVID19_GSE145926():
 71 |     """
 72 |     Load a COVID19 single-cell RNASeq data from patient lavage samples
 73 |     Final column names as cell_type.patient.barcode
 74 |     """
 75 |     
 76 |     fprefix = os.path.join(sc_path, 'GSE145926')
 77 |     
 78 |     info = pandas.read_excel(os.path.join(fprefix, 'patient_group.xlsx'), index_col=0)['Group']
 79 |     
 80 |     # the initial h5 files are downloaded from GEO
 81 |     file_list = glob(os.path.join(fprefix, '*.h5'))
 82 |     
 83 |     merge = []
 84 |     
 85 |     for f in file_list:
 86 |         title = os.path.basename(f).split('_')[1]
 87 |         print(title)
 88 |         
 89 |         data = get_matrix_from_h5(f)
 90 |        
 91 |         cat = info.loc[title]
 92 |         
 93 |         data.columns = cat + '_' + title + '.' + data.columns
 94 |         merge.append(data)
 95 |     
 96 |     data = pandas.concat(merge, axis=1, join='inner')
 97 |     data = data.loc[(data == 0).mean(axis=1) < 1]
 98 |     
 99 |     # down-sizing by 10 
100 |     ratio = 1E5/data.sum()
101 |     data *= ratio
102 |     
103 |     data = numpy.log2(data + 1)
104 |     
105 |     # add cell types
106 |     ID_map = {
107 |         'S1': 'severe_C143',
108 |         'S2': 'severe_C145',
109 |         'S3': 'severe_C146',
110 |         'S4': 'severe_C148',
111 |         'S5': 'severe_C149',
112 |         'S6': 'severe_C152',
113 |             
114 |         'M1': 'mild_C141',
115 |         'M2': 'mild_C142',
116 |         'M3': 'mild_C144',
117 |             
118 |         'H1': 'healthy_C51',
119 |         'H2': 'healthy_C52',
120 |         'H3': 'healthy_C100',
121 |     }
122 |         
123 |     # general cell map, these annotations are labeled by ourselves
124 |     info = pandas.read_csv(os.path.join(fprefix, 'cluster_results.tsv'), sep='\t', index_col=0)
125 |     info = info.loc[:, 'clusterID']
126 |         
127 |     cluster = pandas.read_csv(os.path.join(fprefix, 'cluster_annotation.txt'), sep='\t', index_col=0)
128 |     cluster = cluster.loc[:, 'cellTypeSub']
129 |         
130 |     cell_type = info.apply(lambda v: cluster.loc[v])
131 |     cell_type.index = [ID_map[v.split('_')[0]] + '.' + v.split('_')[1] for v in info.index]
132 |         
133 |     # T cell map
134 |     info = pandas.read_csv(os.path.join(fprefix, 'cluster_T_results.tsv'), sep='\t', index_col=0)
135 |     info = info.loc[:, 'clusterID']
136 |             
137 |     cluster = pandas.read_csv(os.path.join(fprefix, 'cluster_T_annotation.txt'), sep='\t', index_col=0)
138 |     cluster = cluster.loc[:, 'cellTypeSub']
139 |         
140 |     Tcell_type = info.apply(lambda v: cluster.loc[v])
141 |     Tcell_type.index = [ID_map[v.split('_')[0]] + '.' + v.split('_')[1] for v in info.index]
142 |         
143 |     cntmap = cell_type.loc[Tcell_type.index].value_counts()
144 |     assert cntmap.shape[0] == 1
145 |         
146 |     cell_type.drop(Tcell_type.index, inplace=True)    
147 |     
148 |     # put in cell type to data columns
149 |     data_1 = data.loc[:, cell_type.index]
150 |     data_1.columns = cell_type.loc[data_1.columns] + '.' + data_1.columns
151 |         
152 |     data_2 = data.loc[:, Tcell_type.index]
153 |     data_2.columns = Tcell_type.loc[data_2.columns] + '.' + data_2.columns
154 |         
155 |     data = pandas.concat([data_1, data_2], axis=1, join='inner')
156 |     
157 |     print(data.shape)
158 |     data.sparse.to_dense().to_pickle(fprefix + '.pickle.gz', compression='gzip')
159 |             
160 | 
161 | 
162 | def load_mtx_to_dataframe(data, gene, barcode):
163 |     data = io.mmread(data)
164 |     data = pandas.DataFrame.sparse.from_spmatrix(data)
165 |     
166 |     gene = pandas.read_csv(gene, sep='\t', header=None).iloc[:,0]
167 |     barcode = pandas.read_csv(barcode, sep='\t', header=None).iloc[:,0]
168 |     
169 |     data.index = gene
170 |     data.columns = barcode
171 |     
172 |     return filter_matrix(data)
173 | 
174 | 
175 | 
176 | def load_COVID19_EGAS00001004571():
177 |     fpath = os.path.join(sc_path, 'EGAS00001004571')
178 |     
179 |     # step 1: Seurat object to mtx matrix with normalized data
180 |     # The initial rds file is downloaded from https://beta.fastgenomics.org/datasets/detail-dataset-952687f71ef34322a850553c4a24e82e#Files
181 |     os.system(' '.join([
182 |         os.path.join(src_path, 'convert_seurat.R'),
183 |         os.path.join(fpath, 'seurat_COVID19_PBMC_cohort1_10x_jonas_FG_2020-08-15.rds'),
184 |         os.path.join(fpath, 'convert')
185 |         ]))
186 |     
187 |     # step 2: sparse matrix to python pickle
188 |     metadata = os.path.join(fpath, 'convert', 'metadata')
189 |     metadata = pandas.read_csv(metadata, sep='\t', index_col=0, low_memory=False)
190 |     
191 |     postfix = re.compile('_[0-9]+$')
192 |     
193 |     cell_type = metadata['cluster_labels_res.0.4'].apply(lambda v: re.sub(postfix, '', v).replace('.', '-'))
194 |     info = cell_type + '.' + metadata['group_per_sample'] + '_' + metadata['sampleID'].apply(lambda v: v.replace('.', '-'))
195 |     
196 |     # these data are seurat normalized already, no need to transform again
197 |     data = load_mtx_to_dataframe(
198 |         os.path.join(fpath, 'convert', 'matrix.mtx'),
199 |         os.path.join(fpath, 'convert', 'genes.tsv'),
200 |         os.path.join(fpath, 'convert', 'barcodes.tsv')
201 |     )
202 |     
203 |     data.columns = info.loc[data.columns] + '.' + data.columns
204 |     
205 |     data.sparse.to_dense().to_pickle(os.path.join(sc_path, 'EGAS00001004571_cohort1.pickle.gz'))
206 |     
207 | 
208 | 
209 | 
210 | def main():
211 |     load_COVID19_GSE145926()
212 |     load_COVID19_EGAS00001004571()
213 |         
214 |     return 0
215 | 
216 | if __name__ == '__main__': main()
217 | 


--------------------------------------------------------------------------------
/src/convert_seurat.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | 
 3 | suppressPackageStartupMessages(library(Seurat))
 4 | suppressPackageStartupMessages(library(DropletUtils))
 5 | 
 6 | commands = commandArgs(trailingOnly=T)
 7 | 
 8 | input = commands[1]
 9 | output_path = commands[2]
10 | 
11 | data = readRDS(input)
12 | write10xCounts(x=data@assays$RNA@data, path=output_path)
13 | write.table(data@meta.data, file.path(output_path, 'metadata'), sep='\t', quote=F)
14 | 


--------------------------------------------------------------------------------
/src/download.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os, pathlib, tarfile
 4 | import urllib.request
 5 | import ssl
 6 | ssl._create_default_https_context = ssl._create_unverified_context
 7 | 
 8 | base_path = pathlib.Path(__file__).parent.absolute()
 9 | base_path = os.path.dirname(base_path)
10 | 
11 | for url in ['https://hpc.nih.gov/~Jiang_Lab/CytoSig/CytoSig_prediction/data.tar.gz']:
12 |     f = os.path.basename(url.rstrip('/'))
13 |     
14 |     out = os.path.join(base_path, f)
15 |     urllib.request.urlretrieve(url, out)
16 |     
17 |     if url.find('.tar.gz') > 0:
18 |         my_tar = tarfile.open(out)
19 |         my_tar.extractall(base_path)
20 |         my_tar.close()
21 |         
22 |         os.remove(out)
23 | 


--------------------------------------------------------------------------------
/src/run.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import CytoSig
  4 | import pandas
  5 | import os, sys
  6 | import re
  7 | import numpy
  8 | import pathlib
  9 | import seaborn
 10 | import matplotlib.pyplot as plt
 11 | 
 12 | from scipy import stats
 13 | from glob import glob
 14 | from lifelines.fitters.coxph_fitter import CoxPHFitter
 15 | from lifelines import KaplanMeierFitter
 16 | from statsmodels.stats.multitest import multipletests
 17 | from scipy.cluster import hierarchy as hc
 18 | from matplotlib.backends.backend_pdf import PdfPages
 19 | 
 20 | 
 21 | ###############################################################
 22 | # define paths
 23 | base_path = pathlib.Path(__file__).parent.absolute()
 24 | base_path = os.path.dirname(base_path)
 25 | 
 26 | data_path = os.path.join(base_path, 'data')
 27 | bulk_path = os.path.join(data_path, 'bulk')
 28 | sc_path = os.path.join(data_path, 'single_cell')
 29 | output_path = os.path.join(data_path, 'output')
 30 | 
 31 | ###############################################################
 32 | # define parameters
 33 | alpha = 1E4
 34 | nrand = 1000
 35 | alternative='two-sided'
 36 | verbose_flag = False
 37 | 
 38 | figure_width = 7
 39 | font_size = 30
 40 | 
 41 | plt.rcParams.update({'font.size': font_size})
 42 | 
 43 | # default colors array
 44 | colors_default = plt.rcParams['axes.prop_cycle'].by_key()['color']
 45 | 
 46 | ###############################################################
 47 | # load CytoSig signature
 48 | signature = os.path.join(sys.prefix, 'bin', 'signature.centroid')
 49 | signature = pandas.read_csv(signature, sep='\t', index_col=0)
 50 | 
 51 | 
 52 | 
 53 | ###############################################################
 54 | # define functions
 55 | 
 56 | def category_background(expression, sep):
 57 |     """
 58 |     Create the background vector for single-cell analysis
 59 |     """
 60 |     
 61 |     if sep is None:
 62 |         background = expression.mean(axis=1)
 63 |     else:
 64 |         category = expression.groupby([v.split(sep)[0] for v in expression.columns], axis=1).apply(lambda v: v.mean(axis=1))
 65 |         background = category.mean(axis=1)
 66 |     
 67 |     background.name = 'background'
 68 |     return background
 69 | 
 70 | 
 71 | 
 72 | def heatmap(data, output, top_value=None, shrink=1, metric_type='correlation', flag_cluster=True):
 73 |     if type(data) == str: data = pandas.read_csv(data, sep='\t', index_col=0)
 74 |     
 75 |     if flag_cluster:
 76 |         data.fillna(0, inplace=True)
 77 |         z = hc.linkage(data, method='average', metric= metric_type)
 78 |         
 79 |         dendro = hc.dendrogram(z, labels=data.index.tolist(), no_plot=True)
 80 |         order_index = dendro['ivl']
 81 |         
 82 |         z = hc.linkage(data.transpose(), method='average', metric=metric_type)
 83 |         dendro = hc.dendrogram(z, labels= data.columns.tolist(), no_plot=True)
 84 |         order_columns = dendro['ivl']
 85 |         
 86 |         data = data.loc[order_index, order_columns]
 87 |     
 88 |     fig = plt.figure(figsize=data.shape, frameon=False)
 89 |     
 90 |     if top_value is None:
 91 |         g = seaborn.heatmap(data.transpose(), cmap="coolwarm", center=0, square=True, xticklabels=True, yticklabels=True, cbar_kws={"shrink": shrink})
 92 |     else:
 93 |         g = seaborn.heatmap(data.transpose(), cmap="coolwarm", center=0, vmax=top_value, vmin=-top_value, square=True, xticklabels=True, yticklabels=True, cbar_kws={"shrink": shrink})
 94 |                 
 95 |     g.set_xticklabels(g.get_xmajorticklabels(), rotation=90)#, fontsize=font_size_label)
 96 |     g.set_yticklabels(g.get_ymajorticklabels(), rotation=0)#, fontsize=font_size_label)
 97 |     
 98 |     fig.savefig(output + '.pdf', bbox_inches='tight', transparent=True)
 99 |     #fig.savefig(output + '.svg', bbox_inches='tight', transparent=True)
100 |     plt.close(fig)
101 |     
102 |     return data
103 | 
104 | 
105 | def analyze_three_level_significance(out, levels, top_value = 4, cnt_thres = 5, qthres=0.05, vthres=1, merged_compare=True):
106 |     """
107 |     Statistical difference analysis between severe, mild, and healthy patient groups
108 |     out: patient scores, such as CytoSig signalig activity or gene expression levels.
109 |     top_value: the highest absolute z-score levels in plotting
110 |     cnt_thres: minimum number of cells for each plotted sample
111 |     qthres: threshold for significant corrected p-values
112 |     vthres: threshold for minimum absolute z-scores to claim a significance event
113 |     merged_compare: in the second comparison, whether merge severe and mild patients as one group against healthy controls
114 |                     alternatively, just compare severe and healthy controls
115 |     """
116 |     
117 |     result = pandas.read_csv(out, sep='\t', index_col=0)
118 |     assert result.columns.value_counts().max() == 1
119 | 
120 |     # two types of comparison in analysis later
121 |     label_comparison1 = '%s-%s' % (levels[0],  levels[1])
122 |     
123 |     if merged_compare:
124 |         label_comparison2 = 'disease-%s' % levels[2]
125 |     else:
126 |         label_comparison2 = '%s-%s' % (levels[0],  levels[2])
127 |     
128 |     # (patient, cell type) group merge, merge on cell level
129 |     flag = ['.'.join(v.split('.')[:2]) for v in result.columns]
130 |     cntmap = pandas.Series(flag).value_counts()
131 | 
132 |     result = result.groupby(flag, axis=1).apply(lambda v: v.mean(axis=1))
133 | 
134 |     # only keep entities with sufficent number of cells
135 |     result = result.loc[:, cntmap.index[cntmap >= cnt_thres]]
136 | 
137 |     result_group = result.groupby([v.split('.')[0] for v in result.columns], axis=1)
138 | 
139 |     writer = pandas.ExcelWriter(out + '.stat.xlsx', engine='xlsxwriter')
140 |     pdf = PdfPages(out + '.heatmap.pdf')
141 |     
142 |     merge_signal_score = []
143 |     
144 |     mat_map_sig = {}
145 |     cnt_map_sig = {}
146 |     
147 |     merge_severe_mild = []
148 |     
149 |     for cell_type, result in result_group:
150 |         result.columns = [v.split('.')[1] for v in result.columns]
151 |         result = result.loc[(result == 0).mean(axis=1) < 1]
152 |         result = result.loc[result.std(axis=1) > 1e-3]
153 |         
154 |         # dataset specific fix
155 |         if out.find('GSE145926') >= 0:
156 |             result.columns = [v.replace('_C', '_') for v in result.columns]
157 |         
158 |         elif out.find('EGAS00001004571') >= 0:
159 |             result.columns = [v.replace('_C19-CB-', '_') for v in result.columns]
160 |         
161 |         
162 |         patient_group = result.groupby([v.split('_')[0] for v in result.columns], axis=1)
163 | 
164 |         if len( set(levels).intersection(patient_group.groups.keys())) != len(levels):
165 |             # some patient group is missing for the current cell type
166 |             print('jump %s by lacking of patient group' % cell_type)
167 |             continue
168 | 
169 |         # order signal names 
170 |         z = hc.linkage(result, method='average', metric='correlation')
171 |         dendro = hc.dendrogram(z, labels=result.index.tolist(), no_plot=True)
172 |         order_index = dendro['ivl']
173 | 
174 |         order_columns = []
175 | 
176 |         # order patients within each level group
177 |         for level in levels:
178 |             result_sub = patient_group.get_group(level)
179 |             
180 |             if result_sub.shape[1] > 1: # with something to order
181 |                 z = hc.linkage(result_sub.transpose(), method='average', metric='correlation')
182 |                 dendro = hc.dendrogram(z, labels= result_sub.columns.tolist(), no_plot=True)
183 |                 order_columns.extend(dendro['ivl'])
184 |             else:
185 |                 order_columns.extend(result_sub.columns)
186 | 
187 |         result = result.loc[order_index, order_columns]
188 | 
189 |         fig = plt.figure(figsize=result.shape, frameon=False)
190 |         g = seaborn.heatmap(result.transpose(), cmap="coolwarm", center=0, vmax=top_value, vmin=-top_value, square=True, xticklabels=True, yticklabels=True, cbar_kws={"shrink": 0.6})
191 |         g.set_xticklabels(g.get_xmajorticklabels(), rotation=90)#, fontsize=font_size_label)
192 |         g.set_yticklabels(g.get_ymajorticklabels(), rotation=0)#, fontsize=font_size_label)
193 |         plt.title(cell_type, fontsize=font_size)
194 | 
195 |         pdf.savefig(fig, bbox_inches='tight', transparent=True)
196 |         plt.close(fig)
197 |              
198 |         # differential analysis
199 |         result_lst = []
200 |         for level in levels: result_lst.append(patient_group.get_group(level))
201 |         
202 |         merge = []
203 | 
204 |         for gid in result_lst[0].index:
205 |             arr_severe = result_lst[0].loc[gid]
206 |             arr_mild = result_lst[1].loc[gid]
207 |             arr_healthy = result_lst[2].loc[gid]
208 |             
209 |             if merged_compare:
210 |                 arr_disease = pandas.concat([arr_severe, arr_mild])
211 |             else:
212 |                 arr_disease = arr_severe
213 |             
214 |             z_severe_mild, p_severe_mild = stats.ranksums(arr_severe, arr_mild)
215 |             z_disease_healthy, p_disease_healthy = stats.ranksums(arr_disease, arr_healthy)
216 |             
217 |             merge.append(
218 |                 pandas.Series(
219 |                     [arr_severe.median(), arr_mild.median(), arr_healthy.median(),
220 |                      z_severe_mild, z_disease_healthy,
221 |                      p_severe_mild, p_disease_healthy
222 |                     ], name=gid,
223 |                     index=['med.%s' % levels[0], 'med.%s' % levels[1], 'med.%s' % levels[2], 'z.%s' % label_comparison1, 'z.%s' % label_comparison2, 'p.%s' % label_comparison1, 'p.%s' % label_comparison2])
224 |                 )
225 |         
226 |         result_stat = pandas.concat(merge, axis=1, join='inner').transpose()
227 |         
228 |         signal_score = []
229 |         
230 |         for label_comparison in [label_comparison1, label_comparison2]:
231 |             arr = result_stat['FDR.%s' % label_comparison] = multipletests(result_stat['p.%s' %label_comparison], method='fdr_bh')[1]
232 |             signal_score.append(arr.mean())
233 |             signal_score.append(arr.min())
234 |         
235 |         signal_score = pandas.Series(signal_score, index=[label_comparison1 + '.mean', label_comparison1 + '.min', label_comparison2 + '.mean', label_comparison2 + '.min'], name=cell_type)
236 |         merge_signal_score.append(signal_score)
237 |         
238 |         result_stat.sort_values('p.%s' % label_comparison1, inplace=True)
239 |         result_stat.to_excel(writer, sheet_name=cell_type[:31])
240 |         
241 |         # test for significance
242 |         flag = (result_stat['FDR.%s' % label_comparison1] < qthres) & (result_stat['FDR.%s' % label_comparison2] < qthres)
243 |         flag &= (
244 |             (result_stat['med.%s' % levels[0]].abs() > vthres) | (result_stat['med.%s' % levels[1]].abs() > vthres) | (result_stat['med.%s' % levels[2]].abs() > vthres)
245 |             )
246 |         
247 |         if sum(flag) > 0:
248 |             print(cell_type)
249 |             
250 |             arr = result_stat.loc[:, 'z.%s' % label_comparison1]
251 |             arr.name = cell_type
252 |             merge_severe_mild.append(arr)
253 |             
254 |             result_stat = result_stat.loc[flag]
255 |             result_stat.drop(['FDR.%s' % label_comparison1, 'FDR.%s' % label_comparison2], axis=1, inplace=True)
256 |             result_stat.sort_values('z.%s' % label_comparison1, ascending=False).to_excel(writer, sheet_name= 'sig.' + cell_type)
257 |             
258 |             result_sub = result.loc[flag]
259 |             result_sub.to_csv(out + '.' + cell_type, sep='\t', index_label=False)
260 |             
261 |             heatmap(result_sub, out + '.' + cell_type, top_value=top_value, shrink=0.5, flag_cluster=False)
262 |             
263 |             for gid in result_stat.index:
264 |                 prev = cnt_map_sig.get(gid, 0)
265 |                 cnt_map_sig[gid] = prev + 1
266 |             
267 |             mat_map_sig[cell_type] = result.groupby([v.split('_')[0] for v in result.columns], axis=1).median()
268 |     
269 |     pdf.close()
270 |     
271 | 
272 |     # triplet pattern merge output
273 |     if len(mat_map_sig) > 0:
274 |         merge_severe_mild = pandas.concat(merge_severe_mild, axis=1, join='inner')
275 |         
276 |         if merge_severe_mild.shape[1] > 1:
277 |             z = hc.linkage(merge_severe_mild.transpose(), method='average', metric='correlation')
278 |             dendro = hc.dendrogram(z, labels= merge_severe_mild.columns.tolist(), no_plot=True)
279 |             merge_severe_mild = merge_severe_mild.loc[:, dendro['ivl']]
280 | 
281 |         pandas.Series(cnt_map_sig).to_csv(out + '.cnt_map', sep='\t', index_label=False, header=False)
282 |                 
283 |         merge = []
284 |         
285 |         for cell_type in merge_severe_mild.columns:
286 |             triplet = mat_map_sig[cell_type]
287 |             triplet.columns = cell_type  + '@' + triplet.columns
288 |             merge.append(triplet.iloc[:, ::-1])
289 |         
290 |         pandas.concat(merge, axis=1, join='inner').to_csv(out + '.triplet', sep='\t', index_label=False)
291 |     
292 |     # output test statistics
293 |     merge_signal_score = pandas.concat(merge_signal_score, axis=1, join='inner').transpose()
294 |     merge_signal_score.sort_values(merge_signal_score.columns[0], inplace=True, ascending=False)
295 |     merge_signal_score.to_csv(out + '.FDR', sep='\t', index_label=False)
296 |     
297 |     format_align = writer.book.add_format({'align': 'center'})
298 |     format_number = writer.book.add_format({'num_format': '#,##0.000', 'align': 'center'})
299 |     format_stat = writer.book.add_format({'num_format': '0.00E+00', 'align': 'center'})
300 |         
301 |     width = max(len('FDR.%s' % label_comparison1), len('FDR.%s' % label_comparison2))
302 |         
303 |     for worksheet in writer.sheets:
304 |         worksheet = writer.sheets[worksheet]
305 |             
306 |         worksheet.set_column(0, result.shape[1], None, format_align)
307 |         worksheet.set_column(1, 5, width, format_number)
308 |         worksheet.set_column(6, 7, width, format_stat)
309 |         worksheet.set_column(8, 9, width, format_number)
310 |             
311 |         worksheet.set_zoom(150)
312 |     
313 |     writer.close()
314 | 
315 | 
316 | 
317 | def survival_best_separation(data, pivot, z_continuous):
318 |     """
319 |     Get the signal cutoff for KM plot through the best-separation criteria.
320 |     However, all statistical test will be based on continuous value without cutoffs..
321 |     """
322 |     cf = CoxPHFitter()
323 |     
324 |     arr = data[pivot]
325 |     arr_rank = arr.sort_values(ascending=False)
326 |     N = arr.shape[0]
327 |     
328 |     vthres_max = z_max = None
329 |     
330 |     margin = max(5, int(0.1*N))
331 |     
332 |     for i in range(margin-1, N-margin):
333 |         vthres = arr_rank.iloc[i]
334 |         
335 |         data.loc[:, pivot] = (arr >= vthres)
336 |         
337 |         try:
338 |             cf.fit(data, data.columns[0], event_col=data.columns[1])
339 |         except:
340 |             continue
341 |         
342 |         z = cf.summary.loc[pivot, 'z']
343 |                
344 |         if vthres_max is None or z * z_continuous > z_max * z_continuous:
345 |             vthres_max = vthres
346 |             z_max = z
347 |     
348 |     # recover the value        
349 |     data.loc[:, pivot] = arr
350 |     
351 |     return vthres_max, z_max
352 | 
353 | 
354 | def Survival_Analysis(clinical, data, signal, title, survival_type, output):
355 |     cf = CoxPHFitter()
356 |     
357 |     data = pandas.read_csv(data, sep='\t', index_col=0)
358 |     
359 |     beta, se, zscore, pvalue = CytoSig.ridge_significance_test(signature, data, alpha, alternative, nrand, 1, True, False, verbose_flag)
360 | 
361 |     survival = pandas.read_csv(clinical, sep='\t', index_col=0)
362 |     survival.index = survival.index.astype(str) # in case patient ID is integer
363 |     
364 |     mat = pandas.concat([survival, zscore.loc[signal]], axis=1, join='inner')
365 | 
366 |     cf.fit(mat, survival.columns[0], event_col=survival.columns[1])
367 | 
368 |     # wald test z-score
369 |     z_Wald = cf.summary.loc[signal, 'z']
370 | 
371 |     # the CoxPH regression output two-sided p-values by default, we divide them by 2 to get one-sided
372 |     p_oneside = cf.summary.loc[signal, 'p']/2
373 | 
374 |     # find the best separation cutoff only for KM plot, but not statistical test above
375 |     thres, _ = survival_best_separation(mat, signal, z_Wald)
376 | 
377 |     flag = (mat[signal] > thres)
378 | 
379 |     fig = plt.figure(figsize=(figure_width, figure_width), frameon=False)
380 |     kmf = KaplanMeierFitter()
381 | 
382 |     kmf.fit(mat.iloc[:,0].loc[flag], mat.iloc[:,1].loc[flag], label= 'High (n=%d)' % (sum(flag)))
383 |     a1 = kmf.plot(ci_show=False, show_censors=True, color='red', linewidth=2)
384 | 
385 |     kmf.fit(mat.iloc[:,0].loc[~flag], mat.iloc[:,1].loc[~flag], label='Low (n=%d)' % (sum(~flag)))
386 |     kmf.plot(ax=a1, ci_show=False, show_censors=True, color='blue', linestyle='--', linewidth=2)
387 | 
388 |     plt.xlabel(survival_type)
389 |     plt.ylabel('Fraction')
390 |     plt.legend(frameon=False)
391 |     plt.title('p = %.2e' % p_oneside, fontsize=font_size)
392 | 
393 |     fig.savefig(output + '.pdf', bbox_inches='tight', transparent=True)
394 |     plt.close(fig)
395 |     
396 | 
397 | def Survival_Analysis_Tumor():
398 |     """
399 |     Analyze across tumor datasets
400 |     """
401 |     
402 |     # data 1, Sunitinib multi-kinase inhibitor, targetting VEGF receptor, in metastatic clear-cell renal cell carcinoma (CCRCC)
403 |     signal = 'VEGFA'
404 |     title = 'Sunitinib CCRCC'
405 |     survival_type = 'Progression-free survival (month)'
406 | 
407 |     fprefix = os.path.join(bulk_path, 'tumor', 'E-MTAB-3267')
408 |     output = os.path.join(output_path, 'E-MTAB-3267.%s_%s' % (signal, title.replace(' ', '_')))
409 | 
410 |     clinical = fprefix + '.PFS'
411 |     data = fprefix + '.norm_subtract.gz'
412 | 
413 |     Survival_Analysis(clinical, data, signal, title, survival_type, output)
414 | 
415 |     # data 2, Bevacizumab mono-therapy treatment in Glioblastoma (GBM)
416 |     signal = 'VEGFA'
417 |     title = 'Bevacizumab GBM'
418 |     survival_type = 'Overall survival (month)'
419 | 
420 |     fprefix = os.path.join(bulk_path, 'tumor', 'GSE72951')
421 |     output = os.path.join(output_path, 'GSE72951.%s_%s' % (signal, title.replace(' ', '_')))
422 | 
423 |     clinical = fprefix + '.OS.Bevacizumab'
424 |     data = fprefix + '.self_subtract.gz'
425 | 
426 |     Survival_Analysis(clinical, data, signal, title, survival_type, output)
427 | 
428 |     # data 3, Atezolizumab anti-PDL1, requiring pre-existing interferon gamma signaling (IFNG), in metastatic urothelial carcinoma
429 |     signal = 'IFNG'
430 |     title = 'Atezolizumab Urothelial'
431 |     survival_type = 'Overall survival (month)'
432 | 
433 |     fprefix = os.path.join(bulk_path, 'tumor', 'EGAS00001002556')
434 |     output = os.path.join(output_path, 'EGAS00001002556.%s_%s' % (signal, title.replace(' ', '_')))
435 | 
436 |     clinical = fprefix + '.OS'
437 |     data = fprefix + '.self_subtract.gz'
438 | 
439 |     Survival_Analysis(clinical, data, signal, title, survival_type, output)
440 | 
441 | 
442 | 
443 | def CytoSig_run_Inflam():
444 |     """
445 |     inflammatory diseases
446 |     """
447 | 
448 |     # this is an output folder from the FDC framework, processed result files should look like *.diff.1
449 |     fpath = os.path.join(bulk_path, 'inflam')
450 |     
451 |     # run CytoSig for all datasets
452 |     diff_lst = glob(os.path.join(fpath, '*.diff.1'))
453 |     
454 |     merge = []
455 |     
456 |     for fprefix in diff_lst:
457 |         title = os.path.basename(fprefix)
458 |         print(title)
459 |         
460 |         data = pandas.read_csv(fprefix, sep='\t', index_col=0)
461 |         beta, se, zscore, pvalue = CytoSig.ridge_significance_test(signature, data, alpha, alternative, nrand, 1, True, False, verbose_flag)
462 |         zscore.to_csv(fprefix + '.signal', sep='\t', index_label=False)
463 |         
464 |         # if a dataset has individual sample information
465 |         if os.path.exists(fprefix + '.sep.gz'):
466 |             data = pandas.read_csv(fprefix + '.sep.gz', sep='\t', index_col=0)
467 |             
468 |             beta, se, zscore, pvalue = CytoSig.ridge_significance_test(signature, data, alpha, alternative, nrand, 1, True, False, verbose_flag)
469 |             zscore.to_csv(fprefix + '.sep.signal', sep='\t', index_label=False)
470 |         
471 |         # merge results
472 |         result = pandas.read_csv(fprefix + '.signal', sep='\t', index_col=0)
473 |         result.columns = title.split('.')[0] + '_' + result.columns
474 |         merge.append(result)
475 |     
476 |     merge = pandas.concat(merge, axis=1)
477 |     assert merge.columns.value_counts().max() == 1
478 |     
479 |     merge.to_csv(os.path.join(fpath, 'merge'), sep='\t', index_label=False)
480 |     
481 | 
482 | 
483 | 
484 | def violinplot_one(handle, arr, i, col, alpha=0.6, flag_dot=False, alpha_dot=None, col_dot=None):
485 |     parts = handle.violinplot(arr, [i], showmeans=False, showmedians=True, showextrema=False, widths=0.6)
486 |     
487 |     for pc in parts['bodies']:
488 |         pc.set_facecolor(col)
489 |         pc.set_edgecolor('black')
490 |         pc.set_alpha(alpha)
491 |     
492 |     if 'cmeans' in parts:
493 |         pc = parts['cmeans']
494 |     elif 'cmedians' in parts:
495 |         pc = parts['cmedians']
496 |     else:
497 |         pc = None
498 |     
499 |     if pc is not None:
500 |         pc.set_linewidth(5)
501 |         pc.set_edgecolor('black')
502 |         pc.set_alpha(1)
503 |         
504 |     if flag_dot:
505 |         if col_dot is None: col_dot = col
506 |         if alpha_dot is None: alpha_dot = alpha
507 |         
508 |         x = numpy.random.normal(i, 0.1, size=arr.shape[0])
509 |         handle.plot(x, arr, color=col_dot, marker='o', linestyle='none', markersize=5, alpha=alpha_dot)
510 | 
511 | 
512 | 
513 | def plot_canakinumab_GSE80060_response_activity():
514 |     """
515 |     systemic juvenile idiopathic arthritis (SJIA)
516 |     """
517 |     
518 |     output = os.path.join(output_path, 'GSE80060.IL1B_Canakinumab_SJIA')
519 |     
520 |     conditions = ['100', '90', '70', '50', '30', '0', 'Placebo']
521 |     
522 |     # section 1: post versus pre activity
523 |     result = os.path.join(bulk_path, 'inflam', 'GSE80060.MicroArray.GPL570.diff.1.sep.signal')
524 |     result = pandas.read_csv(result, sep='\t', index_col=0)
525 |     result = result.loc['IL1B']
526 | 
527 |     # standardize sample names
528 |     info = pandas.DataFrame([v.split('@')[1].split('&')[0].split('_') for v in result.index], index=result.index)
529 |     info.columns = ['Treatment', 'Response']
530 |     
531 |     flag_Canakinumab = (info.iloc[:,0] == 'Canakinumab') & (info.iloc[:,1] != 'nan')
532 |     flag_Placebo = (info.iloc[:,0] == 'Placebo') & (info.iloc[:,1] == 'nan')
533 |     info = info.loc[flag_Canakinumab | flag_Placebo]
534 |     
535 |     info.loc[flag_Canakinumab, 'Response'] = info.loc[flag_Canakinumab, 'Response'].apply(lambda v: v.replace('.0', ''))
536 |     info = pandas.concat([info.loc[flag_Canakinumab, 'Response'], info.loc[flag_Placebo, 'Treatment']])
537 |     
538 |     result = result.loc[info.index]
539 |     result_group = result.groupby(info)
540 |     
541 |     x = []
542 |     y = []
543 |     
544 |     fig = plt.figure(figsize=(figure_width, figure_width), frameon=False)
545 |     
546 |     for i, conc in enumerate(conditions):
547 |         result = result_group.get_group(conc)
548 |         violinplot_one(plt, result, i, colors_default[0], flag_dot=True, alpha=0.5, alpha_dot=0.6)
549 |         
550 |         if conc == 'Placebo':
551 |             conc = 0
552 |         else:
553 |             conc = int(conc)
554 |         
555 |         x.append(conc)
556 |         y.append(result.median())
557 |     
558 |     r, p = stats.spearmanr(x, y)
559 |     
560 |     plt.title('r = %.2f, p = %.1e' % (r, p), fontsize=font_size)
561 |     plt.axhline(0, linestyle='--', color='grey', lw=1)
562 |     plt.xticks(range(len(conditions)), conditions, rotation=90)
563 |     plt.tick_params(pad=10)
564 |     
565 |     plt.xlabel('Response (%)')
566 |     plt.ylabel('IL1B activity diff day3')
567 |     
568 |     fig.savefig(output + '.pdf', bbox_inches='tight', transparent=True)
569 |     plt.close(fig)
570 | 
571 |     
572 | def plot_IFNK_GSE72754_response_activity():
573 |     """
574 |     systemic lupus erythematosus
575 |     """
576 |     
577 |     output = os.path.join(output_path, 'GSE72754.IFN1_IFNK_SLE')
578 |     
579 |     # section 1: post versus pre activity
580 |     result = os.path.join(bulk_path, 'inflam', 'GSE72754.MicroArray.HG-U133_Plus_2.diff.1.sep.signal')
581 |     result = pandas.read_csv(result, sep='\t', index_col=0)
582 |     result = result.loc['IFN1']
583 |     
584 |     info = pandas.DataFrame([v.split('&')[1].split()[0].split('_') for v in result.index], index=result.index, columns=['titer', 'month']).astype(int)
585 |     
586 |     r, p = stats.spearmanr(info['titer'], result)
587 | 
588 |     fig = plt.figure(figsize=(figure_width, figure_width), frameon=False)
589 |     
590 |     plt.plot(info['titer'], result, 'o', markersize=10)
591 |     plt.title('r = %.2f, p = %.1e' % (r, p), fontsize=font_size)
592 |     plt.xlabel('anti-IFNA Titer')
593 |     plt.ylabel('IFN1 activity diff')
594 |     
595 |     plt.axhline(0, linestyle='--', color='grey')
596 |     plt.axvline(100, linestyle='--', color='grey')
597 |     
598 |     plt.gca().ticklabel_format(axis="x", style="sci", scilimits=(0,0))
599 |     
600 |     fig.savefig(output + '.pdf', bbox_inches='tight', transparent=True)
601 |     plt.close(fig)
602 |     
603 | 
604 | 
605 | def CytoSig_run_COVID19_singlecell():
606 |     run_lst = [
607 |         ['GSE145926', ['severe', 'mild', 'healthy']],
608 |         ['EGAS00001004571_cohort1', ['severe', 'mild', 'control']],
609 |     ]
610 |     
611 |     for dataset, levels in run_lst:
612 |         data = os.path.join(sc_path, dataset + '.pickle.gz')
613 |         output = os.path.join(output_path, dataset + '.signal')
614 |         
615 |         if not os.path.exists(data):
616 |             sys.stderr.write('Cannot find converted pickle for %s\n' % dataset)
617 |             continue
618 |         
619 |         data = pandas.read_pickle(data)
620 |         
621 |         # normalize by health controls
622 |         background = data.loc[:, [v.find(levels[2]) >= 0 for v in data.columns]]
623 |         background = category_background(background, '.')
624 |         data = data.subtract(background, axis=0)
625 |         
626 |         beta, se, zscore, pvalue = CytoSig.ridge_significance_test(signature, data, alpha, alternative, nrand, 1, True, False, verbose_flag)
627 |         zscore.to_csv(output, sep='\t', index_label=False)
628 |         
629 |         qthres = 0.05
630 |         # please see Methods section of our paper for neutrophils
631 |         if dataset == 'EGAS00001004571_cohort1': qthres = 0.052
632 |         
633 |         analyze_three_level_significance(output, levels, qthres=qthres)
634 | 
635 | 
636 | 
637 | 
638 | def analyze_COVID19_SC_Severity_joint_heatmap(top_value = 4):
639 |     datasets = ['GSE145926', 'EGAS00001004571_cohort1']
640 |     
641 |     merge = []
642 | 
643 |     included = set()
644 |     
645 |     for dataset in datasets:
646 |         output = os.path.join(output_path, dataset + '.signal')
647 |         
648 |         if not os.path.exists(output + '.triplet'):
649 |             sys.stderr.write('Cannot find result for %s\n' % dataset)
650 |             continue
651 |         
652 |         mat = pandas.read_csv(output + '.triplet', sep='\t', index_col=0)
653 |         merge.append(mat)
654 |         
655 |         N = len(set([v.split('@',1)[0] for v in mat.columns]))
656 |         
657 |         cnt_map = pandas.read_csv(output + '.cnt_map', sep='\t', index_col=0, header=None)
658 |         cnt_map = cnt_map.iloc[:, 0]
659 |         cnt_map = cnt_map.loc[cnt_map >= N/3.0]
660 |         
661 |         included.update(cnt_map.index)
662 |     
663 |     if len(merge) == 0:
664 |         sys.stderr.write('Nothing to merge\n')
665 |         return
666 |     
667 |     merge = pandas.concat(merge, axis=1, join='inner')
668 |     merge = merge.loc[included].transpose()
669 |     
670 |     post_s = re.compile('s$')
671 |     
672 |     lst = []
673 |     for i, v in enumerate(merge.index):
674 |         if (i-1) % 3 == 0:
675 |             v = re.sub(post_s, '', v.split('@')[0])
676 |         else:
677 |             v = ''
678 |         
679 |         lst.append(v)
680 |     
681 |     merge.index = lst
682 |     
683 |     g = seaborn.clustermap(merge, cmap="coolwarm", row_cluster=False, metric='correlation', vmax=top_value, vmin=-top_value, center=0, xticklabels=True, yticklabels=True, cbar_kws={'label': 'Median Activity', "shrink": .5},
684 |                                figsize=(1.1*merge.shape[1], 0.5*merge.shape[0]))#, dendrogram_ratio=(0.5, 0.1))
685 |     
686 |     plt.tick_params(pad=10)
687 |     
688 |     xlabel_lst = g.ax_heatmap.get_xticklabels()
689 |     
690 |     plt.setp(xlabel_lst, rotation=90)
691 |     plt.setp(g.ax_heatmap.get_yticklabels(), rotation=0)
692 |     
693 |     merge = merge.loc[:, [v.get_text() for v in xlabel_lst]]
694 |             
695 |     for a in g.ax_row_dendrogram.collections: a.set_linewidth(2)
696 |     for a in g.ax_col_dendrogram.collections: a.set_linewidth(2)
697 |     
698 |     out = os.path.join(output_path, 'COVID19.signal.triplet')
699 |     
700 |     merge.to_csv(out + '.sourcedata.csv')
701 |     
702 |     plt.savefig(out + '.significant.pdf', bbox_inches='tight', transparent=True)    
703 | 
704 | 
705 | 
706 | def get_map_lst(vmap, key, flag_map=False):
707 |     lst = vmap.get(key)
708 |     
709 |     if lst is None:
710 |         if flag_map:
711 |             lst = vmap[key] = {}
712 |         else:
713 |             lst = vmap[key] = []
714 |         
715 |     return lst
716 | 
717 | 
718 | def plot_blockade_activity_diff():
719 |     output = os.path.join(output_path, 'blockade_diff')
720 |     
721 |     info = pandas.read_excel(os.path.join(base_path, 'signal.xlsx'), engine='openpyxl')
722 |     info.drop(['Comment'], axis=1, inplace=True)
723 |     info.dropna(inplace=True)
724 |     
725 |     # VEGFA has clinical response data available, thus ignore its Xenograft studies
726 |     info = info.loc[(info['Disease'] != 'Xenograft') | ((info['Disease'] == 'Xenograft') & (info['Target'] != 'VEGFA'))]
727 |     
728 |     # load previous CytoSig scores
729 |     result = pandas.read_csv(os.path.join(bulk_path, 'inflam', 'merge'), sep='\t', index_col=0)
730 |     
731 |     dataset_map = {}
732 |     
733 |     for _, fields in info.iterrows():
734 |         dataset = fields['Dataset']
735 |         condition = fields['Condition']
736 |         targets = fields['Target'].split(',')
737 |         targets = [v.strip() for v in targets]
738 |         #treatment = fields['Treatment']
739 |         #disease = fields['Disease']
740 |         
741 |         response = fields['Response']
742 |         
743 |         for target in targets:
744 |             if target not in result.index: continue    
745 |             v = result.loc[target, dataset + '_' + condition]
746 |             
747 |             # for all responder cases, plot their diff values
748 |             if response != 'No':
749 |                 response_map = get_map_lst(dataset_map, target + '\t' + dataset, flag_map=True)
750 |                 lst = get_map_lst(response_map, response)
751 |                 lst.append(v)
752 |     
753 |     target_map = {}
754 |     
755 |     for dataset, response_map in dataset_map.items():
756 |         target, dataset = dataset.split('\t')
757 |             
758 |         if len(response_map) > 1:
759 |             assert 'Yes' in response_map
760 |             lst = response_map['Yes']
761 |         else:
762 |             assert ('Yes' in response_map) or ('Unclear' in response_map)
763 |             lst = list(response_map.values())[0]
764 |         
765 |         # dataset-level median
766 |         v = numpy.median(lst)
767 |         
768 |         lst = target_map.get(target)
769 |         if lst is None: lst = target_map[target] = []
770 |         lst.append(v)
771 |     
772 |     score_map = {}
773 |     for target, lst in target_map.items(): score_map[target] = numpy.median(lst)
774 |     score_map = pandas.Series(score_map).sort_values()
775 |     
776 |     p = stats.wilcoxon(score_map)[1]
777 |     
778 |     # create target merge orders
779 |     accuracy_vthres = -1    
780 |     fig = plt.figure(figsize=(1.5*figure_width, figure_width), frameon=False)
781 |     
782 |     for i, target in enumerate(score_map.index):
783 |         lst = target_map[target]
784 |             
785 |         lst = [max(v, -10) for v in lst]
786 |         
787 |         x = numpy.random.normal(i, 0.1, size= len(lst))
788 |         plt.plot(x, lst, marker='o', linestyle='none', markersize=10, color=colors_default[0])    
789 |         plt.axvline(i+0.5, linestyle='--', color='grey', lw=1)
790 |     
791 |     plt.axhline(0, color='grey', lw=1)
792 |     plt.axhline(accuracy_vthres, linestyle='--')
793 |     plt.text(0, accuracy_vthres - 1, 'v = %d' % accuracy_vthres)
794 |     
795 |     plt.xticks(range(score_map.shape[0]), score_map.index, rotation=90)
796 |     plt.tick_params(pad=10)
797 |     plt.ylabel('Activity Diff')
798 |     
799 |     plt.title('accuracy = %.2f , p = %.1e' % ((score_map < accuracy_vthres).mean(), p), fontsize=font_size)
800 |     
801 |     fig.savefig(output + '.pdf', bbox_inches='tight', transparent=True)
802 |     plt.close(fig)
803 | 
804 | 
805 |     
806 |     
807 |  
808 | 
809 | def main():
810 |     # Figure 4e & Extended Data Figure 4 
811 |     Survival_Analysis_Tumor()
812 |     
813 |     # prepare for Figure 4a-c
814 |     CytoSig_run_Inflam()
815 |     
816 |     # Figure 4a
817 |     plot_canakinumab_GSE80060_response_activity()
818 |     
819 |     # Figure 4b
820 |     plot_IFNK_GSE72754_response_activity()
821 |     
822 |     # Figure 4c
823 |     plot_blockade_activity_diff()
824 |     
825 |     # prepare for Figure 6 and generate Figure 6f
826 |     CytoSig_run_COVID19_singlecell()
827 |     
828 |     # Figure 6g
829 |     analyze_COVID19_SC_Severity_joint_heatmap()
830 |     
831 |     return 0
832 | 
833 | if __name__ == '__main__': main()
834 | 


--------------------------------------------------------------------------------