├── AnalysisTutorial.ipynb
├── CITEsort
├── BTree.py
├── BTreeTraversal.py
├── Matryoshka.py
├── ReSplit.py
├── Visualize.py
├── __init__.py
└── traversal.py
├── CITEsort_out
├── .DS_Store
├── data_clr.csv
├── data_cls_hist.png
├── leaf_labels.csv
├── tree.dot
├── tree.pdf
├── tree.pickle
├── tree_complete.dot
└── tree_complete.pdf
├── LICENSE
├── README.md
├── datasets
├── CBMC_8k_ADT_clr.csv
├── GSE143363_ADT_Dx_count.csv
├── GSE143363_ADT_Rl_count.csv
├── MALT_8k_ADT_clr.csv
├── PBMC_16k_ADT_clr.csv
├── PBMC_1k_ADT_clr.csv
├── PBMC_1k_b_ADT_clr.csv
├── PBMC_2k_ADT_clr.csv
├── PBMC_5k_ADT_clr.csv
└── PBMC_8k_ADT_clr.csv
├── performance.py
├── performance
├── ll.pdf
├── record_8DBs.csv
├── record_alldb.csv
├── record_full_alldb.csv
└── time.pdf
├── preCITEsort.py
├── readme_figs
├── ACTandBCT.png
├── ACTandBCT_small.jpeg
├── ACTimbalance.png
├── CITE-sort.png
├── FittingInLowDimension.png
└── taxonomy.png
└── runCITEsort.py
/CITEsort/BTree.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sun Aug 18 15:41:14 2019
5 |
6 | @author: lqyair
7 | """
8 |
9 | class BTree:
10 |
11 | def __init__(self, key, left = None, right = None, indices = None, stop=None,\
12 | all_clustering_dic = None, where_dominant = None,weight=None,ll=None,bic=None):
13 | self.key = key # a str
14 | self.right = right # a BstNode
15 | self.left = left # a BstNode
16 | self.indices = indices # a []
17 | self.all_clustering_dic = all_clustering_dic
18 | self.weight = weight
19 | self.ll = ll
20 | self.bic = bic
21 | #self.marker_summary = marker_summary # a pd.df
22 | #self.para = para # a {} parameters for qualified components
23 | self.where_dominant = where_dominant # str ("left"/"right"), indicator of edge color
24 | self.stop = stop # legacy
25 |
26 |
27 | def display(self):
28 | lines, _, _, _ = self._display_aux()
29 | for line in lines:
30 | print(line)
31 |
32 | def _display_aux(self):
33 | """Returns list of strings, width, height, and horizontal coordinate of the root."""
34 | # No child.
35 | if self.right is None and self.left is None:
36 | #if self.right.key is 'leaf' and self.left.key is 'leaf':
37 | line = '%s' % '_'.join(self.key)
38 | width = len(line)
39 | height = 1
40 | middle = width // 2
41 | return [line], width, height, middle
42 |
43 | # Only left child.
44 | if self.right is None:
45 | #if self.right.key is 'leaf':
46 | lines, n, p, x = self.left._display_aux()
47 | s = '%s' % '_'.join(self.key)
48 | u = len(s)
49 | first_line = (x + 1) * ' ' + (n - x - 1) * '_' + s
50 | second_line = x * ' ' + '/' + (n - x - 1 + u) * ' '
51 | shifted_lines = [line + u * ' ' for line in lines]
52 | return [first_line, second_line] + shifted_lines, n + u, p + 2, n + u // 2
53 |
54 | # Only right child.
55 | if self.left is None:
56 | #if self.left.key is 'leaf':
57 | lines, n, p, x = self.right._display_aux()
58 | s = '%s' % '_'.join(self.key)
59 | u = len(s)
60 | first_line = s + x * '_' + (n - x) * ' '
61 | second_line = (u + x) * ' ' + '\\' + (n - x - 1) * ' '
62 | shifted_lines = [u * ' ' + line for line in lines]
63 | return [first_line, second_line] + shifted_lines, n + u, p + 2, u // 2
64 |
65 | # Two children.
66 | left, n, p, x = self.left._display_aux()
67 | right, m, q, y = self.right._display_aux()
68 | s = '%s' % '_'.join(self.key)
69 | u = len(s)
70 | first_line = (x + 1) * ' ' + (n - x - 1) * '_' + s + y * '_' + (m - y) * ' '
71 | second_line = x * ' ' + '/' + (n - x - 1 + u + y) * ' ' + '\\' + (m - y - 1) * ' '
72 | if p < q:
73 | left += [n * ' '] * (q - p)
74 | elif q < p:
75 | right += [m * ' '] * (p - q)
76 | zipped_lines = zip(left, right)
77 | lines = [first_line, second_line] + [a + u * ' ' + b for a, b in zipped_lines]
78 | return lines, n + m + u, max(p, q) + 2, n + u // 2
79 |
80 |
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/CITEsort/BTreeTraversal.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sun Aug 18 20:26:57 2019
5 |
6 | @author: lqyair
7 | """
8 | import sys
9 | sys.path.append("./CITEsort")
10 |
11 | import pandas as pd
12 | import numpy as np
13 | from matplotlib import pyplot as plt
14 | from Visualize import visualize_node,visualize_pair
15 |
16 |
17 | '''
18 | from matplotlib import pyplot as plt
19 | from matplotlib import cm,colors
20 | from mpl_toolkits.axes_grid1 import axes_grid
21 | #import seaborn as sns
22 | import pdb
23 | '''
24 |
25 | class BTreeTraversal:
26 |
27 | def __init__(self,tree,method='bfs',nodelist=None,nodename=None,tree_summary=None,leaf_summary=None,ll=None,n_components=None):
28 |
29 | #print('initializing...')
30 |
31 | self.tree = tree
32 | self.method = method
33 | if self.method == 'bfs':
34 | self.nodelist = self.levelOrderTraversal()
35 | if self.method == 'dfs':
36 | self.nodelist = self.preorderTraversal()
37 |
38 | nodename_temp = ['_'.join(x.key) for x in self.nodelist]
39 | self.nodename = [str(i)+'_'+nodename_temp[i] for i in range(len(nodename_temp))]
40 | self.tree_summary, self.leaf_summary = self.summarize()
41 | if 'll' in self.tree.__dir__():
42 | self.ll = self.leaf_summary['ll'].sum()
43 | self.n_components = self.leaf_summary.shape[0]
44 |
45 |
46 | def summarize(self):
47 | if 'll' in self.tree.__dir__():
48 | tree_summary = pd.DataFrame({'Count':[len(x.indices) for x in self.nodelist],
49 | 'Weight':[x.weight for x in self.nodelist],
50 | 'Stop':[x.stop for x in self.nodelist],
51 | 'll':[x.ll for x in self.nodelist]
52 | },index=self.nodename)
53 | else:
54 | tree_summary = pd.DataFrame({'Count':[len(x.indices) for x in self.nodelist] },index=self.nodename)
55 |
56 | leaf_summary = tree_summary.loc[[x for x in self.nodename if x.split('_')[1]=='leaf'],:]
57 | leaf_summary = leaf_summary.sort_values(by='Count',ascending=False)
58 |
59 | return tree_summary,leaf_summary
60 |
61 |
62 |
63 | def get_ll(self):
64 |
65 | self.ll_tot = sum([x.ll for idx,x in enumerate(self.nodelist) if self.nodename[idx].split('_')[1]=='leaf'])
66 |
67 |
68 | def get_node(self,nodeID):
69 | return self.nodelist[nodeID]
70 |
71 |
72 |
73 | def generateLabel(self):
74 | """generate label file (binary matrix: Num.cell x Num.node, X_ij = 1 means cell i is attached to node j.)"""
75 |
76 | label = pd.DataFrame(np.zeros([len(self.tree.indices),len(self.nodename)]),index=self.tree.indices,columns=self.nodename)
77 |
78 | for i in range(len(self.nodename)):
79 | label.loc[self.nodelist[i].indices,self.nodename[i]] = 1
80 |
81 | return label
82 |
83 |
84 |
85 | def get_leaf_label(self):
86 | """generate label (one column, indicating which leaf cells are assigned.)"""
87 | label = pd.DataFrame({'GEM':self.tree.indices,'Label':[None]*len(self.tree.indices)},index=self.tree.indices)
88 | for i in range(len(self.nodename)):
89 | if self.nodename[i].split('_')[1] == 'leaf':
90 | label.loc[self.nodelist[i].indices,'Label'] = self.nodename[i]
91 |
92 | return label
93 |
94 |
95 |
96 | def plot_node(self,data, nodeID, viz_dim = 1, **plot_para):
97 | """plot the specified node (default: savefig=False,savepath='.')"""
98 | if viz_dim == 1:
99 | visualize_node(data, node = self.nodelist[nodeID], nodename = self.nodename[nodeID], **plot_para)
100 | if viz_dim == 2:
101 | visualize_pair(data, node = self.nodelist[nodeID], nodename = self.nodename[nodeID], **plot_para)
102 |
103 |
104 |
105 | def plot_leaf_size(self):
106 |
107 | leaf_size = self.leaf_summary['Count']
108 | leaf_prop = self.leaf_summary['Proportion']
109 |
110 | fig, ax1 = plt.subplots(1,2,figsize=(12,4))
111 |
112 | # plot number/proportion of cells in each leaf
113 | color = 'tab:red'
114 | ax1[0].set_xlabel('leaf',fontsize=20)
115 | ax1[0].set_ylabel('Proportion', color=color,fontsize=20)
116 | ax1[0].plot(range(len(leaf_prop)),leaf_prop, color=color,marker='o')
117 | if len(leaf_prop) >= 5:
118 | plt.xticks(np.arange(0, len(leaf_prop), len(leaf_prop)//5))
119 | else:
120 | plt.xticks(np.arange(0, len(leaf_prop), 1))
121 | ax1[0].tick_params(axis='y', labelcolor=color,labelsize=15)
122 | ax1[0].tick_params(axis='x', labelsize=15)
123 | ax1[0].set_title('Num. of cells in leaf',fontsize=20,pad=20)
124 |
125 | ax2 = ax1[0].twinx() # instantiate a second axes that shares the same x-axis
126 | color = 'tab:blue'
127 | ax2.set_ylabel('Count', color=color,fontsize=20) # we already handled the x-label with ax1
128 | ax2.plot(range(len(leaf_size)),leaf_size, color=color,marker='o')
129 | ax2.tick_params(axis='y', labelcolor=color,labelsize=15)
130 |
131 |
132 | # plot cumulative number/proportion of cells in each leaf
133 | color = 'tab:red'
134 | ax1[1].set_xlabel('leaf',fontsize=20)
135 | ax1[1].set_ylabel('Proportion', color=color,fontsize=20)
136 | ax1[1].plot(range(len(leaf_prop)),leaf_prop.cumsum(), color=color,marker='o')
137 | if len(leaf_prop) >= 5:
138 | plt.xticks(np.arange(0, len(leaf_prop), len(leaf_prop)//5))
139 | else:
140 | plt.xticks(np.arange(0, len(leaf_prop), 1))
141 | ax1[1].tick_params(axis='y', labelcolor=color,labelsize=15)
142 | ax1[1].tick_params(axis='x', labelsize=15)
143 | ax1[1].set_title('Cumulative num. of cell in leaf',fontsize=20,pad=20)
144 |
145 | ax2 = ax1[1].twinx() # instantiate a second axes that shares the same x-axis
146 | color = 'tab:blue'
147 | ax2.set_ylabel('Count', color=color,fontsize=20) # we already handled the x-label with ax1
148 | ax2.plot(range(len(leaf_size)),leaf_size.cumsum(), color=color,marker='o')
149 | ax2.tick_params(axis='y', labelcolor=color,labelsize=15)
150 |
151 | fig.tight_layout() # otherwise the right y-label is slightly clipped
152 | plt.show()
153 |
154 |
155 |
156 | '''
157 | def track_marker(self,data,n_big_leaf,**plot_para):
158 | """track marker distributions in big leafs. (at most 12 leafs. default para: savefig=False,outpath='.')"""
159 | n_big_leaf = min(n_big_leaf,12)
160 |
161 | savefig = plot_para.get('savefig',False)
162 | outpath = plot_para.get('outpath','.')
163 |
164 | big_leaf = self.leaf_summary.index.values.tolist()[0:n_big_leaf]
165 | markers = data.columns.values.tolist()
166 | node_plot = [self.nodename[0]] + big_leaf
167 |
168 | cmap = cm.get_cmap('Set3')
169 | col_dic = dict(zip(big_leaf,[colors.to_hex(cmap(i)) for i in range(len(big_leaf))]))
170 | col_dic[node_plot[0]] = '#999999' # col for all cells
171 |
172 | nrows = np.ceil(len(markers)/5)
173 | ncols = 5
174 | naxes = len(node_plot)
175 | f = plt.figure(figsize=(10, naxes))
176 | for i, m in enumerate(markers):
177 | ag = axes_grid.Grid(f, (nrows, ncols, i+1), (naxes, 1), axes_pad=0)
178 | for j in range(naxes):
179 | leaf_idx = int(node_plot[j].split('_')[0])
180 | ag[j].hist(data.loc[self.nodelist[leaf_idx].indices,m],
181 | color = col_dic[node_plot[j]], density = True, bins = 'auto')
182 | ag[j].axvline(0, linestyle='dashed', linewidth=2)
183 | ag[j].yaxis.set_ticks([])
184 | ag[j].xaxis.set_ticks([])
185 | if j%naxes == 0:
186 | ag[j].set_title(markers[i],fontsize=15)
187 | if i%ncols == 0:
188 | ag[j].set_ylabel(str(leaf_idx),fontsize=12)
189 |
190 | plt.subplots_adjust(wspace=0.1, hspace=0.3)
191 | if savefig == True:
192 | plt.savefig(outpath+'/track_marker_in_big_leafs.png')
193 | plt.show()
194 | '''
195 |
196 |
197 | # dfs
198 | def preorderTraversal(self):
199 |
200 | node = self.tree
201 | if node is None:
202 | return
203 |
204 | nodelist = []
205 | myStack = []
206 |
207 | while node or myStack:
208 | while node:
209 | nodelist.append(node)
210 | myStack.append(node)
211 | node = node.left
212 | node = myStack.pop()
213 | node = node.right
214 |
215 | return nodelist
216 |
217 |
218 | # bfs
219 | def levelOrderTraversal(self):
220 | #print('bfs...')
221 | node = self.tree
222 | if node is None:
223 | return
224 |
225 | queue = []
226 | nodelist = []
227 |
228 | queue.append(node)
229 | nodelist.append(node)
230 |
231 | while(len(queue) > 0):
232 | node = queue.pop(0)
233 |
234 | if node.left is not None:
235 | nodelist.append(node.left)
236 | queue.append(node.left)
237 |
238 | if node.right is not None:
239 | nodelist.append(node.right)
240 | queue.append(node.right)
241 |
242 | return nodelist
243 |
244 |
245 |
246 |
247 |
248 |
249 |
--------------------------------------------------------------------------------
/CITEsort/Matryoshka.py:
--------------------------------------------------------------------------------
1 |
2 | import sys
3 | sys.path.append("./CITEsort")
4 | import numpy as np
5 | from sklearn.mixture import GaussianMixture
6 | import itertools
7 | from scipy import stats
8 | import operator
9 | from scipy.spatial import distance
10 | from BTree import BTree
11 | import copy
12 | #from scipy.signal import upfirdn
13 | #import pandas as pd
14 |
15 |
16 | def Matryoshka(data,merge_cutoff=0.1,max_k=10,max_ndim=2,bic='bic'):
17 |
18 | if data.shape[0] <= 20:
19 | root = BTree(('leaf',))
20 | root.indices = data.index.values.tolist()
21 | root.all_clustering_dic = _set_small_leaf(data)
22 | return root
23 |
24 | separable_features, bipartitions, scores, all_clustering_dic = HiScanFeatures(data,merge_cutoff,max_k,max_ndim,bic)
25 |
26 | if len(separable_features) == 0:
27 | root = BTree(('leaf',))
28 | root.indices = data.index.values.tolist()
29 | root.all_clustering_dic = all_clustering_dic
30 | return root
31 |
32 | idx_best = np.argmax(scores)
33 | best_feature = separable_features[idx_best]
34 | best_partition = bipartitions[best_feature]
35 |
36 | ## construct current node
37 | root = BTree(best_feature)
38 | root.indices = data.index.values.tolist()
39 | root.all_clustering_dic = all_clustering_dic
40 | #root.marker_summary = marker_summary
41 | #root.para = para
42 |
43 | ## branch cells, component with higher mean goes right.
44 | p1_mean = data.loc[best_partition, best_feature].mean(0)
45 | p2_mean = data.loc[~best_partition, best_feature].mean(0)
46 |
47 | flag = True
48 | if len(p1_mean) == 1:
49 | flag = p1_mean.values > p2_mean.values
50 | else:
51 | p1_cosine = sum(p1_mean)/np.sqrt(sum(p1_mean**2))
52 | p2_cosine = sum(p2_mean)/np.sqrt(sum(p2_mean**2))
53 | flag = p1_cosine > p2_cosine
54 |
55 | if flag:
56 | child_right = data.iloc[best_partition, :]
57 | child_left = data.iloc[~best_partition, :]
58 | root.where_dominant = 'right'
59 | else:
60 | child_right = data.iloc[~best_partition, :]
61 | child_left = data.iloc[best_partition, :]
62 | root.where_dominant = 'left'
63 |
64 | ## recursion
65 | root.left = Matryoshka(child_left,merge_cutoff,max_k,max_ndim,bic)
66 | root.right = Matryoshka(child_right,merge_cutoff,max_k,max_ndim,bic)
67 |
68 | return root
69 |
70 |
71 |
72 | def HiScanFeatures(data,merge_cutoff,max_k,max_ndim,bic):
73 |
74 | ndim = 1
75 | all_clustering_dic = {}
76 | separable_features, bipartitions, scores, all_clustering_dic[ndim] = ScanFeatures(data,merge_cutoff,max_k,ndim,bic)
77 |
78 | if len(separable_features) == 0:
79 |
80 | rescan_features = []
81 | for item in all_clustering_dic[ndim]:
82 | val = all_clustering_dic[ndim][item]['similarity_stopped']
83 | if val > 0.1 and val < 0.5:
84 | rescan_features.append(item[0])
85 |
86 | for ndim in range(2,max_ndim+1):
87 | separable_features, bipartitions, scores, all_clustering_dic[ndim] = ScanFeatures(data[rescan_features],merge_cutoff,max_k,ndim,bic)
88 | if len(separable_features) >= 1:
89 | break
90 |
91 | return separable_features, bipartitions, scores, all_clustering_dic
92 |
93 |
94 |
95 | def ScanFeatures(data,merge_cutoff,max_k,ndim,bic):
96 |
97 | F_set = data.columns.values.tolist()
98 |
99 | all_clustering = {}
100 | separable_features = []
101 | bipartitions = {}
102 | scores = []
103 |
104 | for item in itertools.combinations(F_set, ndim):
105 | x = data.loc[:,item]
106 | all_clustering[item] = Clustering(x,merge_cutoff,max_k,bic)
107 |
108 | for item in all_clustering:
109 | if all_clustering[item]['mp_ncluster'] > 1:
110 | separable_features.append(item)
111 | bipartitions[item] = all_clustering[item]['max_ent_p']
112 | scores.append(all_clustering[item]['max_ent'])
113 |
114 | return separable_features, bipartitions, scores, all_clustering
115 |
116 |
117 |
118 | def Clustering(x,merge_cutoff,max_k,bic):
119 |
120 | val,cnt = np.unique(x.values.tolist(),return_counts=True)
121 |
122 | if len(val) < 50:
123 | clustering = _set_one_component(x)
124 |
125 | else:
126 |
127 | k_bic,_ = BIC(x,max_k,bic)
128 |
129 | if k_bic == 1:
130 | # if only one component, set values
131 | clustering = _set_one_component(x)
132 | else:
133 |
134 | bp_gmm = GaussianMixture(k_bic).fit(x)
135 | clustering = merge_bhat(x,bp_gmm,merge_cutoff)
136 |
137 | if clustering['mp_ncluster'] > 1:
138 |
139 | merged_label = clustering['mp_clustering']
140 | labels, counts = np.unique(merged_label, return_counts=True)
141 | per = counts/np.sum(counts)
142 | ents = [stats.entropy([per_i, 1-per_i],base=2) for per_i in per]
143 | clustering['max_ent'] = np.max(ents)
144 | best_cc_idx = np.argmax(ents)
145 | best_cc_label = labels[best_cc_idx]
146 | clustering['max_ent_p'] = merged_label == best_cc_label
147 |
148 | return clustering
149 |
150 |
151 |
152 | def bhattacharyya_dist(mu1, mu2, Sigma1, Sigma2):
153 | Sig = (Sigma1+Sigma2)/2
154 | ldet_s = np.linalg.det(Sig)
155 | ldet_s1 = np.linalg.det(Sigma1)
156 | ldet_s2 = np.linalg.det(Sigma2)
157 | d1 = distance.mahalanobis(mu1,mu2,np.linalg.inv(Sig))**2/8
158 | d2 = 0.5*np.log(ldet_s) - 0.25*np.log(ldet_s1) - 0.25*np.log(ldet_s2)
159 | return d1+d2
160 |
161 |
162 |
163 | def merge_bhat(x,bp_gmm,cutoff):
164 |
165 | clustering = {}
166 | clustering['bp_ncluster'] = bp_gmm.n_components
167 | clustering['bp_clustering'] = bp_gmm.predict(x)
168 | clustering['bp_pro'] = bp_gmm.weights_
169 | clustering['bp_mean'] = bp_gmm.means_
170 | clustering['bp_Sigma'] = bp_gmm.covariances_
171 |
172 | #clustering['last_pair_similarity'] = _get_last_pair_similarity_2D(x,bp_gmm)
173 | gmm = copy.deepcopy(bp_gmm)
174 |
175 | mu = gmm.means_
176 | Sigma = gmm.covariances_
177 | weights = list(gmm.weights_)
178 | posterior = gmm.predict_proba(x)
179 |
180 | current_ncluster = len(mu)
181 | mergedtonumbers = [int(item) for item in range(current_ncluster)]
182 |
183 | merge_flag = True
184 | clustering['bhat_dic_track'] = {}
185 | merge_time = 0
186 |
187 | while current_ncluster > 1 and merge_flag:
188 |
189 | bhat_dic = {}
190 |
191 | for c_pair in itertools.combinations(range(current_ncluster), 2):
192 | m1 = mu[c_pair[0],:]
193 | m2 = mu[c_pair[1],:]
194 | Sigma1 = Sigma[c_pair[0],:,:]
195 | Sigma2 = Sigma[c_pair[1],:,:]
196 | bhat_dic[c_pair] = np.exp(-bhattacharyya_dist(m1, m2, Sigma1, Sigma2))
197 |
198 | clustering['bhat_dic_track'][merge_time] = bhat_dic
199 | merge_time = merge_time + 1
200 |
201 | max_pair = max(bhat_dic.items(), key=operator.itemgetter(1))[0]
202 | max_val = bhat_dic[max_pair]
203 |
204 | if max_val > cutoff:
205 | merged_i,merged_j = max_pair
206 | # update mergedtonumbers
207 | for idx,val in enumerate(mergedtonumbers):
208 | if val == merged_j:
209 | mergedtonumbers[idx] = merged_i
210 | if val > merged_j:
211 | mergedtonumbers[idx] = val - 1
212 |
213 | # update parameters
214 | weights[merged_i] = weights[merged_i] + weights[merged_j]
215 |
216 | posterior[:,merged_i] = posterior[:,merged_i] + posterior[:,merged_j]
217 |
218 | w = posterior[:,merged_i]/np.sum(posterior[:,merged_i])
219 | mu[merged_i,:] = np.dot(w,x)# update
220 |
221 | x_centered = x.apply(lambda xx: xx-mu[merged_i,:],1)
222 | Sigma[merged_i,:,:] = np.cov(x_centered.T,aweights=w,bias=1)
223 |
224 | del weights[merged_j]
225 | #weights = np.delete(weights,merged_j,0)
226 | mu = np.delete(mu,merged_j,0)
227 | Sigma = np.delete(Sigma,merged_j,0)
228 | posterior = np.delete(posterior,merged_j,1)
229 | current_ncluster = current_ncluster - 1
230 |
231 | else:
232 | merge_flag = False
233 |
234 |
235 | clustering['similarity_stopped'] = np.min(list(bhat_dic.values()))
236 | clustering['mp_ncluster'] = mu.shape[0]
237 | clustering['mergedtonumbers'] = mergedtonumbers
238 | clustering['mp_clustering'] = list(np.apply_along_axis(np.argmax,1,posterior))
239 |
240 | return clustering
241 |
242 |
243 |
244 | def _set_small_leaf(data):
245 | all_clustering_dic = {}
246 | all_clustering_dic[1] = {}
247 |
248 | F_set = data.columns.values.tolist()
249 | all_clustering = {}
250 |
251 | for item in itertools.combinations(F_set, 1):
252 | x = data.loc[:,item]
253 | all_clustering[item] = _set_one_component(x)
254 |
255 | all_clustering_dic[1] = all_clustering
256 |
257 | return all_clustering_dic
258 |
259 |
260 |
261 | def _set_one_component(x):
262 |
263 | clustering = {}
264 | clustering['bp_ncluster'] = 1
265 | clustering['bp_clustering'] = [0]*len(x)
266 | clustering['bp_pro'] = [1]
267 | clustering['bp_mean'] = np.mean(x)
268 | clustering['bp_Sigma'] = np.var(x)
269 | clustering['bhat_dic_track'] = {}
270 | clustering['similarity_stopped'] = 1
271 | clustering['mp_ncluster'] = 1
272 | clustering['mp_clustering'] = [0]*len(x)
273 | clustering['mergedtonumbers'] = [0]
274 |
275 | return clustering
276 |
277 |
278 |
279 | def BIC(X, max_k = 10,bic = 'bic'):
280 | """return best k chosen with BIC method"""
281 |
282 | bic_list = _get_BIC_k(X, min(max_k,len(np.unique(X))))
283 |
284 | if bic == 'bic':
285 | return min(np.argmin(bic_list)+1,_FindElbow(bic_list)),bic_list
286 | elif bic == 'bic_min':
287 | return np.argmin(bic_list)+1,bic_list
288 | elif bic == 'bic_elbow':
289 | return _FindElbow(bic_list),bic_list
290 |
291 |
292 |
293 | def _get_BIC_k(X, max_k):
294 | """compute BIC scores with k belongs to [1,max_k]"""
295 | bic_list = []
296 | for i in range(1,max_k+1):
297 | gmm_i = GaussianMixture(i).fit(X)
298 | bic_list.append(gmm_i.bic(X))
299 | return bic_list
300 |
301 |
302 |
303 | def _FindElbow(bic_list):
304 | """return elbow point, defined as the farthest point from the line through the first and last points"""
305 | if len(bic_list) == 1:
306 | return 1
307 | else:
308 | a = bic_list[0] - bic_list[-1]
309 | b = len(bic_list) - 1
310 | c = bic_list[-1]*1 - bic_list[0]*len(bic_list)
311 | dis = np.abs(a*range(1,len(bic_list)+1) + b*np.array(bic_list) + c)/np.sqrt(a**2+b**2)
312 | return np.argmax(dis)+1
313 |
314 |
315 |
--------------------------------------------------------------------------------
/CITEsort/ReSplit.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Wed Jan 15 23:44:58 2020
5 |
6 | @author: lianqiuyu
7 | """
8 |
9 | import sys
10 | sys.path.append("./CITEsort")
11 |
12 | import numpy as np
13 | from sklearn.mixture import GaussianMixture
14 | import itertools
15 | from scipy import stats
16 | import operator
17 | from scipy.spatial import distance
18 | from BTree import BTree
19 | import copy
20 | #from scipy.signal import upfirdn
21 | #import pandas as pd
22 | import random
23 |
24 |
25 | def ReSplit(data,merge_cutoff=0.1,weight=1,max_k=10,max_ndim=2,bic='bic'):
26 |
27 | root = BTree(('leaf',))
28 | root.indices = data.index.values.tolist()
29 | root.weight = weight
30 | #if len(root.indices) < 500:
31 | # print(root.indices)
32 |
33 | if data.shape[0] < 2:
34 | root.all_clustering_dic = _set_small_leaf(data)
35 | root.stop = 'small size'
36 | return root
37 |
38 | unimodal = GaussianMixture(1,covariance_type='full').fit(data)
39 | root.ll = root.weight * unimodal.lower_bound_
40 | root.bic = unimodal.bic(data)
41 |
42 | separable_features, bipartitions, scores_ll, bic_list, all_clustering_dic = HiScanFeatures(data,root,merge_cutoff,max_k,max_ndim,bic)
43 |
44 | if len(separable_features) == 0:
45 | root.all_clustering_dic = all_clustering_dic
46 | root.stop = 'no separable features'
47 | return root
48 |
49 | '''
50 | scores_ll = np.zeros(len(separable_features))
51 | bic_list = np.zeros(len(separable_features))
52 | for fidx in range(len(separable_features)):
53 | f = separable_features[fidx]
54 | if np.sum(bipartitions[f]) < 2 or np.sum(~bipartitions[f]) < 2:
55 | continue
56 | gmm1 = GaussianMixture(1,covariance_type='full').fit(data.loc[bipartitions[f],:])
57 | ll1 = gmm1.lower_bound_ * sum(bipartitions[f])/len(bipartitions[f])
58 | bic1 = gmm1.bic(data.loc[bipartitions[f],:])
59 |
60 | gmm0 = GaussianMixture(1,covariance_type='full').fit(data.loc[~bipartitions[f],:])
61 | ll0 = gmm0.lower_bound_ * sum(~bipartitions[f])/len(bipartitions[f])
62 | bic0 = gmm0.bic(data.loc[~bipartitions[f],:])
63 |
64 | scores_ll[fidx] = (ll1 + ll0) * root.weight - root.ll
65 | bic_list[fidx] = bic1 + bic0
66 | '''
67 | #print(separable_features)
68 | #print(scores_ll)
69 | #print(bic_list)
70 | idx_best = np.argmax(scores_ll)
71 | if np.max(scores_ll) < 0.001:
72 | #if root.bic < bic_list[idx_best]:
73 | root.stop = 'spliting increases bic'
74 | return root
75 |
76 | #idx_best = np.argmax(scores_ent)
77 | best_feature = separable_features[idx_best]
78 | best_partition = bipartitions[best_feature]
79 | #best_weights = all_clustering_dic[len(best_feature)][best_feature]['weight']
80 |
81 | ## construct current node
82 | root.key = best_feature
83 | root.all_clustering_dic = all_clustering_dic
84 | #root.marker_summary = marker_summary
85 | #root.para = para
86 |
87 | ## branch cells, component with higher mean goes right.
88 | p1_mean = data.loc[best_partition, best_feature].mean(0)
89 | p2_mean = data.loc[~best_partition, best_feature].mean(0)
90 |
91 | flag = True
92 | if len(p1_mean) == 1:
93 | flag = p1_mean.values > p2_mean.values
94 | else:
95 | p1_cosine = sum(p1_mean)/np.sqrt(sum(p1_mean**2))
96 | p2_cosine = sum(p2_mean)/np.sqrt(sum(p2_mean**2))
97 | flag = p1_cosine > p2_cosine
98 |
99 | if flag:
100 | child_right = data.iloc[best_partition, :]
101 | w_r = sum(best_partition)/len(best_partition)
102 | child_left = data.iloc[~best_partition, :]
103 | w_l = sum(~best_partition)/len(best_partition)
104 | root.where_dominant = 'right'
105 | else:
106 | child_right = data.iloc[~best_partition, :]
107 | w_r = sum(~best_partition)/len(best_partition)
108 | child_left = data.iloc[best_partition, :]
109 | w_l = sum(best_partition)/len(best_partition)
110 | root.where_dominant = 'left'
111 |
112 | ## recursion
113 | root.left = ReSplit(child_left,merge_cutoff,weight * w_l,max_k,max_ndim,bic)
114 | root.right = ReSplit(child_right,merge_cutoff,weight * w_r,max_k,max_ndim,bic)
115 |
116 | return root
117 |
118 |
119 |
120 | def HiScanFeatures(data,root,merge_cutoff,max_k,max_ndim,bic):
121 |
122 | ndim = 1
123 | all_clustering_dic = {}
124 | separable_features, bipartitions, scores, bic_list, all_clustering_dic[ndim] = ScoreFeatures(data,root,merge_cutoff,max_k,ndim,bic)
125 |
126 | if len(separable_features) == 0:
127 |
128 | rescan_features = []
129 | for item in all_clustering_dic[ndim]:
130 | val = all_clustering_dic[ndim][item]['similarity_stopped']
131 | if val > 0.1 and val < 0.5:
132 | rescan_features.append(item[0])
133 |
134 | for ndim in range(2,max_ndim+1):
135 | if len(rescan_features) < ndim:
136 | separable_features, bipartitions, scores, bic_list, all_clustering_dic[ndim] = ScoreFeatures(data,root,0.5,max_k,len(rescan_features),bic)
137 | break
138 |
139 | separable_features, bipartitions, scores,bic_list, all_clustering_dic[ndim] = ScoreFeatures(data[rescan_features],root,0.5,max_k,ndim,bic)
140 | if len(separable_features) >= 1:
141 | break
142 |
143 | return separable_features, bipartitions, scores, bic_list, all_clustering_dic
144 |
145 |
146 |
147 | def ScoreFeatures(data,root,merge_cutoff,max_k,ndim,bic):
148 |
149 | F_set = data.columns.values.tolist()
150 |
151 | all_clustering = {}
152 | separable_features = []
153 | bipartitions = {}
154 | scores = []
155 | bic_list = []
156 |
157 | for item in itertools.combinations(F_set, ndim):
158 | x = data.loc[:,item]
159 | all_clustering[item] = Clustering(x,merge_cutoff,max_k,bic)
160 |
161 | for item in all_clustering:
162 | if all_clustering[item]['mp_ncluster'] > 1:
163 |
164 | merged_label = all_clustering[item]['mp_clustering']
165 | labels, counts = np.unique(merged_label, return_counts=True)
166 | if len(counts) == 1 or np.min(counts) < 5:
167 | continue
168 |
169 | ll_gain = []#np.zeros(len(labels))
170 | bic_mlabels = []
171 | for mlabel in labels:
172 | assignment = merged_label == mlabel
173 |
174 | gmm1 = GaussianMixture(1,covariance_type='full').fit(data.loc[assignment,:])
175 | ll1 = gmm1.lower_bound_ * sum(assignment)/len(assignment)
176 | bic1 = gmm1.bic(data.loc[assignment,:])
177 |
178 | gmm0 = GaussianMixture(1,covariance_type='full').fit(data.loc[~assignment,:])
179 | ll0 = gmm0.lower_bound_ * sum(~assignment)/len(assignment)
180 | bic0 = gmm0.bic(data.loc[~assignment,:])
181 |
182 | ll_gain.append( (ll1 + ll0) * root.weight - root.ll )
183 | bic_mlabels.append( bic1 + bic0 )
184 |
185 | best_mlabel_idx = np.argmax(ll_gain)
186 | best_mlabel = labels[best_mlabel_idx]
187 |
188 | bipartitions[item] = merged_label == best_mlabel
189 | scores.append( ll_gain[best_mlabel_idx] )
190 | separable_features.append(item)
191 | bic_list.append( bic_mlabels[best_mlabel_idx] )
192 |
193 | # bipartitions[item] = all_clustering[item]['max_ent_p']
194 | # scores.append(all_clustering[item]['max_ent'])
195 |
196 | return separable_features, bipartitions, scores, bic_list, all_clustering
197 |
198 |
199 |
200 | def Clustering(x,merge_cutoff,max_k,bic):
201 |
202 | val,cnt = np.unique(x.values.tolist(),return_counts=True)
203 |
204 | if len(val) < 50:
205 | clustering = _set_one_component(x)
206 |
207 | else:
208 |
209 | k_bic,_ = BIC(x,max_k,bic)
210 |
211 | if k_bic == 1:
212 | # if only one component, set values
213 | clustering = _set_one_component(x)
214 | else:
215 |
216 | bp_gmm = GaussianMixture(k_bic).fit(x)
217 | clustering = merge_bhat(x,bp_gmm,merge_cutoff)
218 | '''
219 | if clustering['mp_ncluster'] > 1:
220 |
221 | merged_label = clustering['mp_clustering']
222 | labels, counts = np.unique(merged_label, return_counts=True)
223 |
224 | per = counts/np.sum(counts)
225 | ents = [stats.entropy([per_i, 1-per_i],base=2) for per_i in per]
226 | clustering['max_ent'] = np.max(ents)
227 | best_cc_idx = np.argmax(ents)
228 | best_cc_label = labels[best_cc_idx]
229 | clustering['max_ent_p'] = merged_label == best_cc_label
230 | '''
231 | return clustering
232 |
233 |
234 |
235 | def bhattacharyya_dist(mu1, mu2, Sigma1, Sigma2):
236 | Sig = (Sigma1+Sigma2)/2
237 | ldet_s = np.linalg.det(Sig)
238 | ldet_s1 = np.linalg.det(Sigma1)
239 | ldet_s2 = np.linalg.det(Sigma2)
240 | d1 = distance.mahalanobis(mu1,mu2,np.linalg.inv(Sig))**2/8
241 | d2 = 0.5*np.log(ldet_s) - 0.25*np.log(ldet_s1) - 0.25*np.log(ldet_s2)
242 | return d1+d2
243 |
244 |
245 |
246 | def merge_bhat(x,bp_gmm,cutoff):
247 |
248 | clustering = {}
249 | clustering['bp_ncluster'] = bp_gmm.n_components
250 | clustering['bp_clustering'] = bp_gmm.predict(x)
251 | clustering['bp_pro'] = bp_gmm.weights_
252 | clustering['bp_mean'] = bp_gmm.means_
253 | clustering['bp_Sigma'] = bp_gmm.covariances_
254 |
255 | #clustering['last_pair_similarity'] = _get_last_pair_similarity_2D(x,bp_gmm)
256 | gmm = copy.deepcopy(bp_gmm)
257 |
258 | mu = gmm.means_
259 | Sigma = gmm.covariances_
260 | weights = list(gmm.weights_)
261 | posterior = gmm.predict_proba(x)
262 |
263 | current_ncluster = len(mu)
264 | mergedtonumbers = [int(item) for item in range(current_ncluster)]
265 |
266 | merge_flag = True
267 | clustering['bhat_dic_track'] = {}
268 | merge_time = 0
269 |
270 | while current_ncluster > 1 and merge_flag:
271 |
272 | bhat_dic = {}
273 |
274 | for c_pair in itertools.combinations(range(current_ncluster), 2):
275 | m1 = mu[c_pair[0],:]
276 | m2 = mu[c_pair[1],:]
277 | Sigma1 = Sigma[c_pair[0],:,:]
278 | Sigma2 = Sigma[c_pair[1],:,:]
279 | bhat_dic[c_pair] = np.exp(-bhattacharyya_dist(m1, m2, Sigma1, Sigma2))
280 |
281 | clustering['bhat_dic_track'][merge_time] = bhat_dic
282 | merge_time = merge_time + 1
283 |
284 | max_pair = max(bhat_dic.items(), key=operator.itemgetter(1))[0]
285 | max_val = bhat_dic[max_pair]
286 |
287 | if max_val > cutoff:
288 | merged_i,merged_j = max_pair
289 | # update mergedtonumbers
290 | for idx,val in enumerate(mergedtonumbers):
291 | if val == merged_j:
292 | mergedtonumbers[idx] = merged_i
293 | if val > merged_j:
294 | mergedtonumbers[idx] = val - 1
295 |
296 | # update parameters
297 | weights[merged_i] = weights[merged_i] + weights[merged_j]
298 |
299 | posterior[:,merged_i] = posterior[:,merged_i] + posterior[:,merged_j]
300 |
301 | w = posterior[:,merged_i]/np.sum(posterior[:,merged_i])
302 | mu[merged_i,:] = np.dot(w,x)# update
303 |
304 | x_centered = x.apply(lambda xx: xx-mu[merged_i,:],1)
305 | Sigma[merged_i,:,:] = np.cov(x_centered.T,aweights=w,bias=1)
306 |
307 | del weights[merged_j]
308 | #weights = np.delete(weights,merged_j,0)
309 | mu = np.delete(mu,merged_j,0)
310 | Sigma = np.delete(Sigma,merged_j,0)
311 | posterior = np.delete(posterior,merged_j,1)
312 | current_ncluster = current_ncluster - 1
313 |
314 | else:
315 | merge_flag = False
316 |
317 |
318 | clustering['similarity_stopped'] = np.min(list(bhat_dic.values()))
319 | clustering['mp_ncluster'] = mu.shape[0]
320 | clustering['mergedtonumbers'] = mergedtonumbers
321 | clustering['mp_clustering'] = list(np.apply_along_axis(np.argmax,1,posterior))
322 |
323 | return clustering
324 |
325 |
326 |
327 | def _set_small_leaf(data):
328 | all_clustering_dic = {}
329 | all_clustering_dic[1] = {}
330 |
331 | F_set = data.columns.values.tolist()
332 | all_clustering = {}
333 |
334 | for item in itertools.combinations(F_set, 1):
335 | x = data.loc[:,item]
336 | all_clustering[item] = _set_one_component(x)
337 |
338 | all_clustering_dic[1] = all_clustering
339 |
340 | return all_clustering_dic
341 |
342 |
343 |
344 | def _set_one_component(x):
345 |
346 | clustering = {}
347 | clustering['bp_ncluster'] = 1
348 | clustering['bp_clustering'] = [0]*len(x)
349 | clustering['bp_pro'] = [1]
350 | clustering['bp_mean'] = np.mean(x)
351 | clustering['bp_Sigma'] = np.var(x)
352 | clustering['bhat_dic_track'] = {}
353 | clustering['similarity_stopped'] = 1
354 | clustering['mp_ncluster'] = 1
355 | clustering['mp_clustering'] = [0]*len(x)
356 | clustering['mergedtonumbers'] = [0]
357 |
358 | return clustering
359 |
360 |
361 |
362 | def BIC(X, max_k = 10,bic = 'bic'):
363 | """return best k chosen with BIC method"""
364 |
365 | bic_list = _get_BIC_k(X, min(max_k,len(np.unique(X))))
366 |
367 | if bic == 'bic':
368 | return min(np.argmin(bic_list)+1,_FindElbow(bic_list)),bic_list
369 | elif bic == 'bic_min':
370 | return np.argmin(bic_list)+1,bic_list
371 | elif bic == 'bic_elbow':
372 | return _FindElbow(bic_list),bic_list
373 |
374 |
375 |
376 | def _get_BIC_k(X, max_k):
377 | """compute BIC scores with k belongs to [1,max_k]"""
378 | bic_list = []
379 | for i in range(1,max_k+1):
380 | gmm_i = GaussianMixture(i).fit(X)
381 | bic_list.append(gmm_i.bic(X))
382 | return bic_list
383 |
384 |
385 |
386 | def _FindElbow(bic_list):
387 | """return elbow point, defined as the farthest point from the line through the first and last points"""
388 | if len(bic_list) == 1:
389 | return 1
390 | else:
391 | a = bic_list[0] - bic_list[-1]
392 | b = len(bic_list) - 1
393 | c = bic_list[-1]*1 - bic_list[0]*len(bic_list)
394 | dis = np.abs(a*range(1,len(bic_list)+1) + b*np.array(bic_list) + c)/np.sqrt(a**2+b**2)
395 | return np.argmax(dis)+1
396 |
397 |
398 |
--------------------------------------------------------------------------------
/CITEsort/Visualize.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Tue Aug 20 11:51:16 2019
5 |
6 | @author: lqyair
7 | """
8 |
9 | #import pandas as pd
10 | import numpy as np
11 | #from BTreeTraversal import BTreeTraversal
12 | from matplotlib import pyplot as plt
13 | from scipy import stats
14 | import pandas as pd
15 | import matplotlib
16 |
17 | #node = traversal.get_node(0)
18 | #nodename = traversal.nodename[0]
19 |
20 | def visualize_node(data,node,nodename,**plot_para):
21 |
22 | #matplotlib.rcParams['figure.dpi'] = 200
23 |
24 | # plot_para: savefig, outpath,
25 | savefig = plot_para.get('savefig',False)
26 | savepath = plot_para.get('savepath','.')
27 | savename = plot_para.get('savename','.')
28 |
29 | current_indices = node.indices
30 | node_data = data.loc[current_indices,:]
31 |
32 | plt.figure(figsize=(12,((data.shape[1]-1)//5+1)*2), dpi=70)
33 | plt.style.use('seaborn-white')
34 | #ax.tick_params(axis='both', which='major', labelsize=10)
35 |
36 |
37 | if node.key == ('leaf',) and node_data.shape[0] <= 20 :
38 | markers = node_data.columns.values.tolist()
39 | for i in range(len(markers)):
40 | X = node_data.loc[:,markers[i]].values.reshape(-1, 1)
41 | plt.subplot( (len(markers)-1)//5+1,5,i+1 )
42 | plt.hist(X,bins=30, density = True, color = "lightblue")
43 | plt.ylabel('density',fontsize=10)
44 | plt.title( markers[i],fontsize=12)
45 |
46 | else:
47 | all_clustering = node.all_clustering_dic[1]
48 | markers = list(all_clustering.keys())
49 |
50 | for i in range(len(markers)):
51 |
52 | X = node_data.loc[:,markers[i]].values.reshape(-1, 1)
53 |
54 | plt.subplot( (len(markers)-1)//5+1,5,i+1 )
55 |
56 | bins = np.linspace(min(X),max(X),500)
57 | cols = ['r','g','b','c','m','y','darkorange','lightgreen','lightpink','darkgray']
58 |
59 | bp_ncluster = int(all_clustering[markers[i]]['bp_ncluster'])
60 | mp_ncluster = 1 # default
61 | weights = all_clustering[markers[i]]['bp_pro']
62 | means = all_clustering[markers[i]]['bp_mean']
63 | sigmas = np.sqrt(all_clustering[markers[i]]['bp_Sigma'])
64 |
65 | y = np.zeros((len(bins),bp_ncluster))
66 |
67 | for k in range(bp_ncluster):
68 | y[:,k] = (weights[k] * stats.norm.pdf(bins, means[k], sigmas[k]))[:,0]
69 | plt.plot(bins,y[:,k],linewidth=0.6,color='black')
70 |
71 | if bp_ncluster > 1:
72 | mp_ncluster = all_clustering[markers[i]]['mp_ncluster']
73 | mergedtonumbers = all_clustering[markers[i]]['mergedtonumbers']
74 |
75 | for k in range(mp_ncluster):
76 |
77 | merged_idx = [idx for idx,val in enumerate(mergedtonumbers) if val == k]
78 | y_merged = np.apply_along_axis(sum,1,y[:,merged_idx])
79 |
80 | plt.plot(bins,y_merged,cols[k],linewidth=2,linestyle='-.')
81 |
82 | subfig_title = '_'.join(markers[i])+' ('+str(mp_ncluster)+'|'+str(bp_ncluster)+') ' + str(round(all_clustering[markers[i]]['similarity_stopped'],2))
83 |
84 | if markers[i] == node.key:
85 | plt.title( subfig_title,fontsize=12,color='red')
86 | else:
87 | plt.title( subfig_title,fontsize=12,color='darkgrey' if mp_ncluster <= 1 else 'black')
88 |
89 | plt.hist(X,bins=30, density = True, color = "lightblue")
90 | plt.ylabel('density',fontsize=10)
91 |
92 | plt.subplots_adjust(top=0.9, bottom=0.1, left=0.1, right=0.9, hspace=0.4,wspace=0.45)
93 | plt.suptitle(nodename+' | '+str(len(current_indices))+' cells',fontsize=15,color="darkblue")
94 | plt.subplots_adjust(top=0.85)
95 | #plt.savefig(savepath+'/visualize_node.png')
96 | if savefig == True:
97 | plt.savefig(savepath+'/'+savename+'_'+nodename+'.png')
98 | plt.show()
99 |
100 |
101 |
102 |
103 |
104 |
105 | #import matplotlib.pyplot as plt
106 | import seaborn as sns; sns.set()
107 |
108 | def visualize_pair(data,node,nodename,**plot_para):
109 |
110 | # plot_para: savefig, outpath,
111 | savefig = plot_para.get('savefig',False)
112 | savepath = plot_para.get('savepath','.')
113 | savename = plot_para.get('savename','.')
114 |
115 | all_clustering = node.all_clustering_dic[2]
116 | marker_pairs = list(all_clustering.keys())
117 | current_indices = node.indices
118 |
119 | plt.figure(figsize=(12,((len(marker_pairs)-1)//5+1)*2.5), dpi=96)
120 | sns.set_style("white")
121 |
122 | for i in range(len(marker_pairs)):
123 |
124 | marker1,marker2 = marker_pairs[i]
125 | X1 = data.loc[current_indices, marker1]
126 | X2 = data.loc[current_indices, marker2]
127 |
128 | bp_clustering = all_clustering[marker_pairs[i]]['bp_clustering']
129 | mp_clustering = all_clustering[marker_pairs[i]]['mp_clustering']
130 |
131 | mp_ncluster = all_clustering[marker_pairs[i]]['mp_ncluster']
132 | bp_ncluster = all_clustering[marker_pairs[i]]['bp_ncluster']
133 |
134 | data_pair = pd.DataFrame({marker1:X1,marker2:X2,
135 | 'bp':bp_clustering,
136 | 'mp':mp_clustering},index=node.indices)
137 |
138 | plt.subplot( (len(marker_pairs)-1)//5+1,5,i+1 )
139 |
140 | #shapes = ['s','X','+']
141 | #markers = dict(zip(np.unique(mp_clustering),[shapes[idx] for idx in range(mp_ncluster)]))
142 | sns.scatterplot(x=marker1, y=marker2,hue="bp",style="mp",
143 | data=data_pair,s=15,legend=False);
144 |
145 | marker_pair_joint = marker_pairs[i][0]+'_'+marker_pairs[i][1]
146 | subfig_title = marker_pair_joint+' ('+str(mp_ncluster)+'|'+str(bp_ncluster)+') ' + str(round(all_clustering[marker_pairs[i]]['similarity_stopped'],2))
147 |
148 | if marker_pairs[i] == node.key:
149 | plt.title( subfig_title,fontsize=12,color='red')
150 | else:
151 | plt.title( subfig_title,fontsize=12,color='darkgrey' if mp_ncluster <= 1 else 'black')
152 |
153 | plt.subplots_adjust(top=0.9, bottom=0.1, left=0.1, right=0.9, hspace=0.6,wspace=0.45)
154 | plt.suptitle(nodename+' | '+str(len(current_indices))+' cells',fontsize=15,color="darkblue")
155 | plt.subplots_adjust(top=0.85)
156 | #plt.savefig(savepath+'/visualize_node.png')
157 | if savefig == True:
158 | plt.savefig(savepath+'/'+savename+'_'+nodename+'.png')
159 | plt.show()
160 |
161 |
162 |
163 |
164 |
165 |
166 | def plot_keymarker(data,traversal,node_ID,dpi=5,savepath=None):
167 |
168 | node = traversal.get_node(node_ID)
169 |
170 | current_indices = node.indices
171 | node_data = data.loc[current_indices,:]
172 |
173 | marker_dkey = node.key
174 |
175 | if len(marker_dkey) == 1:
176 | marker = marker_dkey[0]
177 |
178 | clustering = node.all_clustering_dic[1][marker_dkey]
179 |
180 | X = node_data.loc[:,marker_dkey].values.reshape(-1, 1)
181 |
182 | bins = np.linspace(min(X),max(X),500)
183 | cols = ['firebrick','navy','lightgreen','darkorange']
184 |
185 | bp_ncluster = int(clustering['bp_ncluster'])
186 | mp_ncluster = 1 # default
187 | weights = clustering['bp_pro']
188 | means = clustering['bp_mean']
189 | sigmas = np.sqrt(clustering['bp_Sigma'])
190 |
191 | y = np.zeros((len(bins),bp_ncluster))
192 |
193 | #plt.figure(figsize=(4,3), dpi=24)
194 | plt.style.use('seaborn-white')
195 | matplotlib.rcParams['axes.linewidth'] = 0.1
196 | fig, ax = plt.subplots(figsize=(4,3), dpi=dpi)
197 |
198 | for k in range(bp_ncluster):
199 | y[:,k] = (weights[k] * stats.norm.pdf(bins, means[k], sigmas[k]))[:,0]
200 | plt.plot(bins,y[:,k],linewidth=0.05,color='black')
201 |
202 | mp_ncluster = clustering['mp_ncluster']
203 |
204 | # red -- component with bigger mean
205 | mp_means = []
206 | for i in range(mp_ncluster):
207 | mp_means.append(np.mean(X[np.array(clustering['mp_clustering'])==i,0]))
208 |
209 | idx = list(np.argsort(mp_means))
210 | idx.reverse()
211 |
212 | mergedtonumbers = clustering['mergedtonumbers']
213 |
214 | for k in range(mp_ncluster):
215 |
216 | merged_idx = [ii for ii,val in enumerate(mergedtonumbers) if val == k]
217 | y_merged = np.apply_along_axis(sum,1,y[:,merged_idx])
218 |
219 | plt.plot(bins,y_merged,cols[idx.index(k)],linewidth=0.8,linestyle='--')
220 |
221 | #subfig_title = str(node_ID) + '_'+ marker# +' ('+str(mp_ncluster)+'|'+str(bp_ncluster)+') ' + str(round(clustering['similarity_stopped'],2))
222 |
223 | plt.hist(X,bins=30, density = True, color = "lightblue",linewidth=0)
224 |
225 | #plt.title( subfig_title,fontsize=16)
226 | plt.ylabel('density',fontsize=18)
227 | plt.xlabel(marker,fontsize=18)
228 | plt.subplots_adjust(top=0.8, bottom=0.2, left=0.15, right=0.9, hspace=0.2,wspace=0.8)
229 | ax.tick_params(axis='both', which='major', labelsize=10)
230 | if savepath is not None:
231 | plt.savefig(savepath+'/'+str(node_ID)+'_'+marker+'.pdf')
232 | plt.show()
233 |
234 | if len(marker_dkey) == 2:
235 |
236 | marker1,marker2 = marker_dkey
237 |
238 | subdata = node_data.loc[:,marker_dkey]
239 | clustering = node.all_clustering_dic[2][marker_dkey]
240 | cols = ['firebrick','navy','lightgreen','darkorange']
241 |
242 | mp_ncluster = clustering['mp_ncluster']
243 | #mp_clustering = clustering['mp_clustering']
244 | componentidx = np.array(clustering['mp_clustering'])==1
245 | p1_mean = node_data.loc[componentidx,marker_dkey].mean(0)
246 | p2_mean = node_data.loc[~componentidx,marker_dkey].mean(0)
247 |
248 | p1_cosine = sum(p1_mean)/np.sqrt(sum(p1_mean**2))
249 | p2_cosine = sum(p2_mean)/np.sqrt(sum(p2_mean**2))
250 |
251 | plt.style.use('seaborn-white')
252 | matplotlib.rcParams['axes.linewidth'] = 0.1
253 | fig, ax = plt.subplots(figsize=(4,3), dpi=dpi)
254 |
255 | if p1_cosine > p2_cosine:
256 | plt.scatter(subdata.loc[componentidx,marker1],subdata.loc[componentidx,marker2],c='firebrick',s=1)
257 | plt.scatter(subdata.loc[~componentidx,marker1],subdata.loc[~componentidx,marker2],c='navy',s=1)
258 | else:
259 | plt.scatter(subdata.loc[componentidx,marker1],subdata.loc[componentidx,marker2],c='navy',s=1)
260 | plt.scatter(subdata.loc[~componentidx,marker1],subdata.loc[~componentidx,marker2],c='firebrick',s=1)
261 |
262 | sns.kdeplot(subdata[marker1], subdata[marker2], ax=ax, n_levels = 5, cmap = 'Wistia')
263 |
264 | plt.xlabel(marker1,fontsize=18)
265 | plt.ylabel(marker2,fontsize=18)
266 | ax.tick_params(axis='both', which='major', labelsize=10)
267 | plt.subplots_adjust(top=0.8, bottom=0.2, left=0.15, right=0.9, hspace=0.2,wspace=0.8)
268 | if savepath is not None:
269 | plt.savefig(savepath+'/'+str(node_ID)+'_'+marker1+'_'+marker2+'.pdf')
270 |
271 | plt.show()
272 |
273 |
274 |
275 |
276 |
277 |
278 | from subprocess import call
279 | #from IPython.display import Image
280 | #import pandas as pd
281 | #import numpy as np
282 |
283 | def visualize_tree(root,data,outpath,filename,compact=False):
284 | """write tree structure into .dot and .png files."""
285 |
286 | # open a file, and design general format
287 | tree_dot = open(outpath+'/'+filename+'.dot','w')
288 | tree_dot.writelines('digraph Tree {')
289 | tree_dot.writelines('node [shape=box, style="filled, rounded", color="black", fontname=helvetica] ;')
290 | tree_dot.writelines('edge [fontname=helvetica] ;')
291 |
292 |
293 | #tree_dot = _write_tree_bfs(root,tree_dot)
294 | # Base Case
295 | if root is None:
296 | return
297 |
298 |
299 | # Create an empty queue for level order traversal
300 | queue = []
301 | nodelist = []
302 | idxStack = []
303 |
304 | tot_cells = len(root.indices)
305 | #means_in_root = root.marker_summary['mean']
306 | #stds_in_root = root.marker_summary['std']
307 | means_in_root = data.mean(axis = 0)
308 | stds_in_root = data.std(axis = 0)
309 | markers = means_in_root.index.values.tolist()
310 |
311 | # auxiliary parameters for color display
312 | branch_col = pd.Series({1:'#ffccccff',2:'#ffff99ff',3:'#CC99CC',4:'#99CCFF'})
313 | leaf_col = matplotlib.colors.Normalize(vmin=0, vmax=np.log(tot_cells))
314 |
315 | node = root
316 |
317 | # Enqueue Root and initialize height
318 | queue.append(node)
319 |
320 | i = 0
321 | #print(str(i)+'_'+root.key)
322 | all_clustering = node.all_clustering_dic[len(node.key)]
323 | bp_ncluster = all_clustering[node.key]['bp_ncluster']
324 | mp_ncluster = all_clustering[node.key]['mp_ncluster']
325 | tree_dot.writelines(str(i)+' [label="'+str(i)+'_'+'_'.join(node.key)+ \
326 | '\\nNum: '+str(len(node.indices))+ \
327 | '\\n('+str(mp_ncluster)+'|'+str(bp_ncluster)+')",fillcolor="#ff9966ff",fontsize=25];')
328 | nodelist.append(node.key)
329 | idxStack.append(i)
330 |
331 | while(len(queue) > 0):
332 | # Print front of queue and remove it from queue
333 | node = queue.pop(0)
334 | idx = idxStack.pop(0)
335 |
336 | # left child
337 | if node.left is not None:
338 | nodelist.append(node.left.key)
339 | queue.append(node.left)
340 | i = i + 1
341 | idxStack.append(i)
342 | #print(str(i)+'_'+node.left.key)
343 |
344 | percent = str(round(len(node.left.indices)/tot_cells*100,2))+'%'
345 | mean_temp = data.loc[node.left.indices,:].mean(0)
346 |
347 | if node.left.key == ('leaf',):
348 | # left leaf node
349 | if compact:
350 | offset_in_leaf = ''
351 | else:
352 | temp = (mean_temp - means_in_root)/stds_in_root
353 | offset_in_leaf = '\n' + markers[0]+': '+str(round(temp[markers[0]],2))
354 | for k in range(1,len(markers)):
355 | offset_in_leaf = offset_in_leaf + '\n' + markers[k]+': '+ str(round(temp[markers[k]],2))
356 |
357 | col = matplotlib.colors.to_hex(matplotlib.cm.Greens(leaf_col(np.log(len(node.left.indices)))))
358 | tree_dot.writelines(str(i)+' [label="'+str(i)+'_'+'_'.join(node.left.key)+'\\n'+ \
359 | str(len(node.left.indices))+ ' ('+percent+')\\n'+ \
360 | offset_in_leaf+'",fillcolor="'+col+'",fontsize=20];')
361 | else:
362 | # left branch node
363 | all_clustering = node.left.all_clustering_dic[len(node.left.key)]
364 | bp_ncluster = all_clustering[node.left.key]['bp_ncluster']
365 | mp_ncluster = all_clustering[node.left.key]['mp_ncluster']
366 |
367 | tree_dot.writelines(str(i)+' [label="'+str(i)+'_'+'_'.join(node.left.key)+'\\n'+ \
368 | str(len(node.left.indices))+' ('+percent+')\\n'+ \
369 | '('+str(mp_ncluster)+'|'+str(bp_ncluster)+')",fillcolor="'+branch_col[len(node.left.key)]+'",fontsize=25];')
370 |
371 | # edge from parent to left node
372 | offset = ''
373 | for m in nodelist[idx]:
374 | val = (mean_temp[m] - means_in_root[m])/stds_in_root[m]
375 | offset = offset + str(round(val,2))+'\n'
376 | #print(str(idx)+'->'+str(i))
377 | tree_dot.writelines(str(idx)+' -> '+str(i)+ ' [labeldistance=3, label = "'+offset+'",fontsize=25, color='+['black','red'][node.where_dominant=='left']+\
378 | ', style='+['solid','bold'][node.where_dominant=='left']+'];')
379 |
380 | # right child
381 | if node.right is not None:
382 | nodelist.append(node.right.key)
383 | queue.append(node.right)
384 | i = i + 1
385 | idxStack.append(i)
386 | #print(str(i)+'_'+node.right.key)
387 |
388 | percent = str(round(len(node.right.indices)/tot_cells*100,2))+'%'
389 | mean_temp = data.loc[node.right.indices,:].mean(0)
390 |
391 | if node.right.key == ('leaf',):
392 | # right leaf node
393 | if compact:
394 | offset_in_leaf = ''
395 | else:
396 | temp = (mean_temp - means_in_root)/stds_in_root
397 | offset_in_leaf = '\n' + markers[0]+': '+str(round(temp[markers[0]],2))
398 | for k in range(1,len(markers)):
399 | offset_in_leaf = offset_in_leaf + '\n' + markers[k]+': '+ str(round(temp[markers[k]],2))
400 |
401 | col = matplotlib.colors.to_hex(matplotlib.cm.Greens(leaf_col(np.log(len(node.right.indices)))))
402 | tree_dot.writelines(str(i)+' [label="'+str(i)+'_'+'_'.join(node.right.key)+'\\n'+ \
403 | str(len(node.right.indices))+ ' ('+percent+')'+'\\n'+ \
404 | offset_in_leaf+'",fillcolor="'+col+'",fontsize=20];')
405 |
406 | else:
407 | # right branch node
408 | all_clustering = node.right.all_clustering_dic[len(node.right.key)]
409 | bp_ncluster = all_clustering[node.right.key]['bp_ncluster']
410 | mp_ncluster = all_clustering[node.right.key]['mp_ncluster']
411 |
412 | tree_dot.writelines(str(i)+' [label="'+str(i)+'_'+'_'.join(node.right.key)+'\\n'+ \
413 | str(len(node.right.indices))+' ('+percent+')\\n'+ \
414 | '('+str(mp_ncluster)+'|'+str(bp_ncluster)+')",fillcolor="'+branch_col[len(node.right.key)]+'",fontsize=25];')
415 |
416 | # edge from parent to right node
417 | offset = ''
418 | for m in nodelist[idx]:
419 | val = (mean_temp[m] - means_in_root[m])/stds_in_root[m]
420 | offset = offset + str(round(val,2))+'\n'
421 | #print(str(idx)+'->'+str(i))
422 | tree_dot.writelines(str(idx)+' -> '+str(i)+' [labeldistance=3, label = "'+offset+'",fontsize=25, color='+['black','red'][node.where_dominant=='right']+ \
423 | ', style='+['solid','bold'][node.where_dominant=='right']+'];')
424 |
425 | # main body is completed
426 |
427 | tree_dot.writelines('}')
428 | tree_dot.close()
429 |
430 | # Convert to png using system command (requires Graphviz)
431 | call(['dot', '-Tpdf', outpath+'/'+filename+'.dot', '-o', outpath+'/'+filename+'.pdf', '-Gdpi=100'])
432 |
433 | # Display in jupyter notebook
434 | #Image(filename = outpath+'/GatingTree.png')
435 |
436 |
437 |
--------------------------------------------------------------------------------
/CITEsort/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/CITEsort/__init__.py
--------------------------------------------------------------------------------
/CITEsort/traversal.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sun Dec 29 20:58:29 2019
5 |
6 | @author: lianqiuyu
7 | """
8 |
9 | import pandas as pd
10 | import numpy as np
11 | from matplotlib import pyplot as plt
12 | from scipy import stats
13 |
14 | #from Visualize import visualize_node,visualize_pair
15 |
16 | class Traversal:
17 |
18 | def __init__(self,tree,c_type,method='bfs',nodelist=None,nodename=None,markers=None,n_samples=None,\
19 | tree_summary=None,leaf_summary=None,n_components=None,ll=None,bic=None,leaf_ID=None,\
20 | leaf_summary_code=None,multiplet_ratio=None,multiplet_predict=None):
21 |
22 | #print('initializing...')
23 |
24 | self.tree = tree
25 | self.method = method
26 | if self.method == 'bfs':
27 | self.nodelist = self.levelOrderTraversal()
28 | if self.method == 'dfs':
29 | self.nodelist = self.preorderTraversal()
30 |
31 | nodename_temp = ['_'.join(x.key) for x in self.nodelist]
32 | self.nodename = [str(i)+'_'+nodename_temp[i] for i in range(len(nodename_temp))]
33 | self.markers = [x[0] for x in self.nodelist[0].all_clustering[1]]
34 |
35 | self.tree_summary, self.leaf_summary = self.summarize()
36 |
37 | self.n_components = self.leaf_summary.shape[0]
38 | self.ll = self.leaf_summary['ll'].sum()
39 | n_features = len(self.markers)
40 |
41 | mean_params = self.n_components * n_features
42 | if c_type == 'diag':
43 | cov_params = self.n_components * n_features
44 | if c_type == 'full':
45 | cov_params = self.n_components * n_features * (n_features + 1) / 2.
46 |
47 | n_parameters = int(self.n_components-1 + mean_params + cov_params)
48 | self.n_samples = len(self.nodelist[0].indices)
49 | self.bic = n_parameters * np.log(self.n_samples) - 2 * self.ll * self.n_samples
50 | self.leaf_ID = [int(x.split('_')[0]) for x in self.leaf_summary.index]
51 | self.predict_ACT_BCT()
52 | self.predict_multiplets()
53 |
54 |
55 |
56 | def summarize(self):
57 | #print('summarizing...')
58 | #num_node = len(self.nodename)
59 | n_samples = len(self.nodelist[0].indices)
60 | tree_summary = pd.DataFrame({'Count':[len(x.indices) for x in self.nodelist],
61 | 'Proportion': [len(x.indices)/n_samples for x in self.nodelist],
62 | 'Weight':[x.weight for x in self.nodelist],
63 | 'll':[x.ll_tot for x in self.nodelist],
64 | 'stop':[x.stop for x in self.nodelist]
65 | },index=self.nodename)
66 |
67 | mean_m = pd.DataFrame(np.zeros([tree_summary.shape[0],len(self.markers)]),
68 | index = self.nodename,columns = self.markers)
69 |
70 | for i in range(mean_m.shape[0]):
71 | mean_m.iloc[i,:] = self.nodelist[i].mean_vec
72 |
73 | tree_summary = pd.concat([tree_summary,mean_m],axis=1)
74 |
75 | leaf_summary = tree_summary.loc[[x for x in self.nodename if x.split('_')[1]=='leaf'],:]
76 | leaf_summary = leaf_summary.sort_values(by='Count',ascending=False)
77 | return tree_summary,leaf_summary
78 |
79 |
80 | def get_node(self,nodeID):
81 | return self.nodelist[nodeID]
82 |
83 |
84 | def get_leaf_label(self):
85 | """generate label (one column, indicating which leaf cells are assigned.)"""
86 | label = pd.DataFrame({'GEM':self.tree.indices,'Label':[None]*len(self.tree.indices)},index=self.tree.indices)
87 | for i in range(len(self.nodename)):
88 | if self.nodename[i].split('_')[1] == 'leaf':
89 | label.loc[self.nodelist[i].indices,'Label'] = self.nodename[i]
90 |
91 | return label
92 |
93 |
94 | def plot_node(self,data,ID):
95 | node = self.nodelist[ID]
96 | node_data = data.loc[node.indices,:]
97 | plt.figure(figsize=(10,((data.shape[1]-1)//4+1)*2), dpi=96)
98 | plt.style.use('seaborn-white')
99 | if node.key == ('leaf',):
100 | for i in range(len(self.markers)):
101 | X = node_data.loc[:,self.markers[i]].values.reshape(-1, 1)
102 | bins = np.linspace(min(X),max(X),500)
103 | den = stats.norm.pdf(bins, node.mean_vec[i], np.sqrt(node.covariance_vec[i,i]))
104 | plt.subplot( (len(self.markers)-1)//5+1,5,i+1 )
105 | plt.hist(X,bins=30, density = True, color = "lightblue")
106 | plt.plot(bins,den,linewidth=1,color='black')
107 | plt.ylabel('density',fontsize=10)
108 | plt.title( self.markers[i],fontsize=12)
109 |
110 | else:
111 |
112 | for i in range(len(self.markers)):
113 |
114 | X = node_data.loc[:,self.markers[i]].values.reshape(-1, 1)
115 | bins = np.linspace(min(X),max(X),500)
116 | plt.subplot( (len(self.markers)-1)//4+1,5,i+1 )
117 | if (self.markers[i],) in node.all_clustering[1]:
118 | weights = node.all_clustering[1][(self.markers[i],)]['component_weights']
119 | means = node.all_clustering[1][(self.markers[i],)]['means']
120 | covariances = node.all_clustering[1][(self.markers[i],)]['covariances']
121 | y = np.zeros((len(bins),2))
122 | y[:,0] = (weights[0] * stats.norm.pdf(bins, means[0], np.sqrt(covariances[0])))[:,0]
123 | y[:,1] = (weights[1] * stats.norm.pdf(bins, means[1], np.sqrt(covariances[1])))[:,0]
124 | if means[0] > means[1]:
125 | cols = ['red','blue']
126 | else:
127 | cols = ['blue','red']
128 | plt.plot(bins,y[:,0],linewidth=1,color=cols[0])
129 | plt.plot(bins,y[:,1],linewidth=1,color=cols[1])
130 | else:
131 | den = stats.norm.pdf(bins, node.mean_vec[i], np.sqrt(node.covariance_vec[i,i]))
132 | plt.plot(bins,den,linewidth=1,color='black')
133 |
134 | plt.hist(X,bins=30, density = True, color = "lightblue")
135 |
136 | subfig_title = self.markers[i]
137 | if (self.markers[i],) == node.key:
138 | plt.title( subfig_title,fontsize=12,color='red')
139 | else:
140 | plt.title( subfig_title,fontsize=12,color='darkgrey')
141 |
142 | plt.ylabel('density',fontsize=10)
143 |
144 | plt.subplots_adjust(top=0.9, bottom=0.1, left=0.1, right=0.9, hspace=0.4,wspace=0.45)
145 | plt.suptitle(self.nodename[ID]+' | '+str(len(node.indices))+' cells',fontsize=15,color="darkblue")
146 | plt.subplots_adjust(top=0.8)
147 | plt.show()
148 |
149 |
150 |
151 |
152 | def predict_ACT_BCT(self):
153 |
154 | markers_cutoff = self._compute_markers_cutoff()
155 | markers = self.markers
156 | #leaf_p_markers = {}
157 | code = pd.DataFrame(np.zeros([len(self.leaf_ID),len(markers)]),index=self.leaf_summary.index,columns=markers)
158 |
159 | for leaf in code.index:
160 | mean = self.leaf_summary.loc[leaf,markers]
161 | for m in markers:
162 | code.loc[leaf,m] = 1 if mean[m] > markers_cutoff[m] else 0
163 |
164 | BCT_dic = {}
165 | ACT_dic = {}
166 | #ACT_tri_dic = {}
167 |
168 | for idx in code.index:
169 | #print(str(idx))
170 | if not BCT_dic:
171 | BCT_dic[idx] = np.sign(code.loc[idx,markers])
172 | else:
173 | new_center = np.sign(code.loc[idx,markers])
174 | new_flag = True
175 | terms = list(BCT_dic.keys())
176 |
177 | for i in range(len(terms)):
178 | term1 = terms[i]
179 | center1 = BCT_dic[term1]
180 | for j in range(i,len(terms)):
181 | term2 = terms[j]
182 | center2 = BCT_dic[term2]
183 | merge = pd.concat([center1,center2],axis=1)
184 | if sum(new_center == merge.max(1)) == len(markers):
185 | new_flag = False
186 | if idx in ACT_dic:
187 | ACT_dic[idx].append((term1,term2))
188 | else:
189 | ACT_dic[idx] = [(term1,term2)]
190 |
191 | if new_flag:
192 | for i in range(len(terms)):
193 | term1 = terms[i]
194 | center1 = BCT_dic[term1]
195 | for j in range(i+1,len(terms)):
196 | term2 = terms[j]
197 | center2 = BCT_dic[term2]
198 | for k in range(j+1,len(terms)):
199 | term3 = terms[k]
200 | center3 = BCT_dic[term3]
201 | merge = pd.concat([center1,center2,center3],axis=1)
202 | if sum(new_center == merge.max(1)) == len(markers):
203 | new_flag = False
204 | if idx in ACT_dic:
205 | ACT_dic[idx].append((term1,term2,term3))
206 | else:
207 | ACT_dic[idx] = [(term1,term2,term3)]
208 |
209 | if new_flag:
210 | BCT_dic[idx] = new_center
211 |
212 |
213 | leaf_summary_code = self.leaf_summary.drop(columns=self.markers)
214 |
215 | #leaf_summary_code.loc[list(ACT_dic.keys()),'Count'].sum()/data.shape[0]
216 | # 0.2504577309173559
217 |
218 | leaf_summary_code['BCT_predict'] = 0
219 | leaf_summary_code.loc[list(BCT_dic.keys()),'BCT_predict'] = 1
220 |
221 | leaf_summary_code['ACT_merge'] = None
222 | for term in ACT_dic.keys():
223 | leaf_summary_code.loc[term,'ACT_merge'] = str(ACT_dic[term])
224 |
225 |
226 | leaf_summary_code['merge_const'] = 0
227 | for term in ACT_dic.keys():
228 | p = []
229 | for pair in ACT_dic[term]:
230 | temp = 1
231 | for pair_i in pair:
232 | temp = leaf_summary_code.loc[pair_i,'Weight'] * temp
233 |
234 | p.append(temp)
235 | #p /= len(ACT_dic[term])
236 | leaf_summary_code.loc[term,'merge_const'] = np.max(p)/leaf_summary_code.loc[term,'Weight']
237 |
238 |
239 | self.leaf_summary_code = pd.concat([leaf_summary_code,code],axis=1)
240 |
241 |
242 |
243 | def predict_multiplets(self):
244 | multiplet_predict = pd.Series([0]*self.n_samples,index=self.nodelist[0].indices)
245 | for leaf in self.leaf_summary_code.index:
246 | if self.leaf_summary_code.loc[leaf,'BCT_predict'] == 0 :
247 | multiplet_predict[self.nodelist[int(leaf.split('_')[0])].indices] = 1
248 |
249 | self.multiplet_ratio = sum(multiplet_predict)/len(multiplet_predict)
250 | self.multiplet_predict = multiplet_predict
251 |
252 |
253 | def _compute_markers_cutoff(self):
254 |
255 | _all = self.nodelist[0]
256 |
257 | markers_cutoff = []
258 | for m in _all.all_clustering[1]:
259 |
260 | m1,m2 = _all.all_clustering[1][m]['means'][:,0]
261 | std1,std2 = np.sqrt(_all.all_clustering[1][m]['covariances'][:,0,0])
262 | s1,s2 = _all.all_clustering[1][m]['component_weights']
263 | inter_X = self._solve(m1,m2,std1,std2,s1,s2)
264 | if len(inter_X) == 1:
265 | markers_cutoff.append(inter_X)
266 | if (m1 - inter_X[0])*(m2 - inter_X[0]) < 0:
267 | markers_cutoff.append(inter_X[0])
268 | if (m1 - inter_X[1])*(m2 - inter_X[1]) < 0:
269 | markers_cutoff.append(inter_X[1])
270 |
271 | markers_cutoff = pd.Series(markers_cutoff,index=self.markers)
272 |
273 | return markers_cutoff
274 |
275 |
276 |
277 | def _solve(self,m1,m2,std1,std2,s1,s2):
278 | """solve equation: s1*N(m1,std1)=s2*N(m2,std2), return the intersection points of two weighted Gaussian"""
279 | a = 1/(2*std1**2) - 1/(2*std2**2)
280 | b = m2/(std2**2) - m1/(std1**2)
281 | c = m1**2 /(2*std1**2) - m2**2 / (2*std2**2) - np.log((std2*s1)/(std1*s2))
282 | return np.roots([a,b,c])
283 |
284 |
285 |
286 | # dfs
287 | def preorderTraversal(self):
288 |
289 | node = self.tree
290 | if node is None:
291 | return
292 |
293 | nodelist = []
294 | myStack = []
295 |
296 | while node or myStack:
297 | while node:
298 | nodelist.append(node)
299 | myStack.append(node)
300 | node = node.left
301 | node = myStack.pop()
302 | node = node.right
303 |
304 | return nodelist
305 |
306 |
307 | # bfs
308 | def levelOrderTraversal(self):
309 | #print('bfs...')
310 | node = self.tree
311 | if node is None:
312 | return
313 |
314 | queue = []
315 | nodelist = []
316 |
317 | queue.append(node)
318 | nodelist.append(node)
319 |
320 | while(len(queue) > 0):
321 | node = queue.pop(0)
322 |
323 | if node.left is not None:
324 | nodelist.append(node.left)
325 | queue.append(node.left)
326 |
327 | if node.right is not None:
328 | nodelist.append(node.right)
329 | queue.append(node.right)
330 |
331 | return nodelist
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
--------------------------------------------------------------------------------
/CITEsort_out/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/CITEsort_out/.DS_Store
--------------------------------------------------------------------------------
/CITEsort_out/data_cls_hist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/CITEsort_out/data_cls_hist.png
--------------------------------------------------------------------------------
/CITEsort_out/tree.dot:
--------------------------------------------------------------------------------
1 | digraph Tree {node [shape=box, style="filled, rounded", color="black", fontname=helvetica] ;edge [fontname=helvetica] ;0 [label="0_CD64\nNum: 2580\n(2|2)",fillcolor="#ff9966ff",fontsize=25];1 [label="1_CD341\n1313 (50.89%)\n(2|2)",fillcolor="#ffccccff",fontsize=25];0 -> 1 [labeldistance=3, label = "-0.94
2 | ",fontsize=25, color=black, style=solid];2 [label="2_CD361\n1267 (49.11%)\n(1|2)",fillcolor="#ffccccff",fontsize=25];0 -> 2 [labeldistance=3, label = "0.98
3 | ",fontsize=25, color=red, style=bold];3 [label="3_CD3\n298 (11.55%)\n(2|2)",fillcolor="#ffccccff",fontsize=25];1 -> 3 [labeldistance=3, label = "-0.86
4 | ",fontsize=25, color=black, style=solid];4 [label="4_leaf\n1015 (39.34%)\n",fillcolor="#006b2b",fontsize=20];1 -> 4 [labeldistance=3, label = "1.19
5 | ",fontsize=25, color=red, style=bold];5 [label="5_CD56_CD91\n299 (11.59%)\n(3|3)",fillcolor="#ffff99ff",fontsize=25];2 -> 5 [labeldistance=3, label = "-0.21
6 | ",fontsize=25, color=black, style=solid];6 [label="6_leaf\n968 (37.52%)\n",fillcolor="#006c2c",fontsize=20];2 -> 6 [labeldistance=3, label = "1.2
7 | ",fontsize=25, color=red, style=bold];7 [label="7_CD45RA_CD45\n78 (3.02%)\n(3|3)",fillcolor="#ffff99ff",fontsize=25];3 -> 7 [labeldistance=3, label = "-0.36
8 | ",fontsize=25, color=black, style=solid];8 [label="8_CD41\n220 (8.53%)\n(2|2)",fillcolor="#ffccccff",fontsize=25];3 -> 8 [labeldistance=3, label = "2.95
9 | ",fontsize=25, color=red, style=bold];9 [label="9_HLA-DR_CD361\n179 (6.94%)\n(3|3)",fillcolor="#ffff99ff",fontsize=25];5 -> 9 [labeldistance=3, label = "-0.1
10 | -0.24
11 | ",fontsize=25, color=red, style=bold];10 [label="10_leaf\n120 (4.65%)\n",fillcolor="#46ae60",fontsize=20];5 -> 10 [labeldistance=3, label = "1.41
12 | 0.53
13 | ",fontsize=25, color=black, style=solid];11 [label="11_leaf\n30 (1.16%)\n",fillcolor="#8dd08a",fontsize=20];7 -> 11 [labeldistance=3, label = "-0.38
14 | 0.13
15 | ",fontsize=25, color=black, style=solid];12 [label="12_leaf\n48 (1.86%)\n",fillcolor="#76c578",fontsize=20];7 -> 12 [labeldistance=3, label = "2.24
16 | 0.52
17 | ",fontsize=25, color=red, style=bold];13 [label="13_CD45RA\n99 (3.84%)\n(2|2)",fillcolor="#ffccccff",fontsize=25];8 -> 13 [labeldistance=3, label = "-0.76
18 | ",fontsize=25, color=black, style=solid];14 [label="14_CD71\n121 (4.69%)\n(1|2)",fillcolor="#ffccccff",fontsize=25];8 -> 14 [labeldistance=3, label = "2.88
19 | ",fontsize=25, color=red, style=bold];15 [label="15_HLA-DR\n125 (4.84%)\n(1|2)",fillcolor="#ffccccff",fontsize=25];9 -> 15 [labeldistance=3, label = "0.41
20 | -0.41
21 | ",fontsize=25, color=black, style=solid];16 [label="16_leaf\n54 (2.09%)\n",fillcolor="#72c375",fontsize=20];9 -> 16 [labeldistance=3, label = "-0.58
22 | 0.26
23 | ",fontsize=25, color=red, style=bold];17 [label="17_leaf\n40 (1.55%)\n",fillcolor="#7fc97f",fontsize=20];13 -> 17 [labeldistance=3, label = "-0.54
24 | ",fontsize=25, color=black, style=solid];18 [label="18_leaf\n59 (2.29%)\n",fillcolor="#6dc072",fontsize=20];13 -> 18 [labeldistance=3, label = "2.06
25 | ",fontsize=25, color=red, style=bold];19 [label="19_leaf\n26 (1.01%)\n",fillcolor="#92d28f",fontsize=20];14 -> 19 [labeldistance=3, label = "-0.04
26 | ",fontsize=25, color=black, style=solid];20 [label="20_leaf\n95 (3.68%)\n",fillcolor="#53b466",fontsize=20];14 -> 20 [labeldistance=3, label = "2.18
27 | ",fontsize=25, color=red, style=bold];21 [label="21_CD931\n58 (2.25%)\n(1|2)",fillcolor="#ffccccff",fontsize=25];15 -> 21 [labeldistance=3, label = "-0.68
28 | ",fontsize=25, color=red, style=bold];22 [label="22_CD11b\n67 (2.6%)\n(1|2)",fillcolor="#ffccccff",fontsize=25];15 -> 22 [labeldistance=3, label = "1.36
29 | ",fontsize=25, color=black, style=solid];23 [label="23_leaf\n43 (1.67%)\n",fillcolor="#7cc87c",fontsize=20];21 -> 23 [labeldistance=3, label = "-0.0
30 | ",fontsize=25, color=red, style=bold];24 [label="24_leaf\n15 (0.58%)\n",fillcolor="#aadda4",fontsize=20];21 -> 24 [labeldistance=3, label = "1.05
31 | ",fontsize=25, color=black, style=solid];25 [label="25_leaf\n17 (0.66%)\n",fillcolor="#a5db9f",fontsize=20];22 -> 25 [labeldistance=3, label = "-0.39
32 | ",fontsize=25, color=red, style=bold];26 [label="26_leaf\n50 (1.94%)\n",fillcolor="#75c477",fontsize=20];22 -> 26 [labeldistance=3, label = "0.48
33 | ",fontsize=25, color=black, style=solid];}
--------------------------------------------------------------------------------
/CITEsort_out/tree.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/CITEsort_out/tree.pdf
--------------------------------------------------------------------------------
/CITEsort_out/tree.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/CITEsort_out/tree.pickle
--------------------------------------------------------------------------------
/CITEsort_out/tree_complete.dot:
--------------------------------------------------------------------------------
1 | digraph Tree {node [shape=box, style="filled, rounded", color="black", fontname=helvetica] ;edge [fontname=helvetica] ;0 [label="0_CD64\nNum: 2580\n(2|2)",fillcolor="#ff9966ff",fontsize=25];1 [label="1_CD341\n1313 (50.89%)\n(2|2)",fillcolor="#ffccccff",fontsize=25];0 -> 1 [labeldistance=3, label = "-0.94
2 | ",fontsize=25, color=red, style=bold];2 [label="2_CD361\n1267 (49.11%)\n(1|2)",fillcolor="#ffccccff",fontsize=25];0 -> 2 [labeldistance=3, label = "0.98
3 | ",fontsize=25, color=black, style=solid];3 [label="3_CD3\n298 (11.55%)\n(2|2)",fillcolor="#ffccccff",fontsize=25];1 -> 3 [labeldistance=3, label = "-0.86
4 | ",fontsize=25, color=black, style=solid];4 [label="4_leaf\n1015 (39.34%)\n
5 | CD41: -0.76
6 | CD71: 0.43
7 | CD56: -0.56
8 | CD3: -0.41
9 | CD331: -0.4
10 | CD341: 1.19
11 | CD90: -0.21
12 | CD117: 0.7
13 | CD45RA: 0.55
14 | CD123: 0.74
15 | CD141: -0.48
16 | HLA-DR: 0.25
17 | CD11b: -0.95
18 | CD64: -0.95
19 | CD381: -0.63
20 | CD45: -1.15
21 | CD361: -0.85
22 | CD931: -0.81
23 | CD91: -0.13",fillcolor="#006b2b",fontsize=20];1 -> 4 [labeldistance=3, label = "1.19
24 | ",fontsize=25, color=red, style=bold];5 [label="5_CD56_CD91\n285 (11.05%)\n(3|3)",fillcolor="#ffff99ff",fontsize=25];2 -> 5 [labeldistance=3, label = "-0.24
25 | ",fontsize=25, color=red, style=bold];6 [label="6_leaf\n982 (38.06%)\n
26 | CD41: 0.45
27 | CD71: -0.65
28 | CD56: 0.41
29 | CD3: -0.12
30 | CD331: 0.89
31 | CD341: -0.72
32 | CD90: 0.27
33 | CD117: -0.36
34 | CD45RA: -0.51
35 | CD123: -0.35
36 | CD141: 0.69
37 | HLA-DR: -0.25
38 | CD11b: 1.08
39 | CD64: 1.0
40 | CD381: 0.69
41 | CD45: 0.88
42 | CD361: 1.19
43 | CD931: 1.07
44 | CD91: 0.16",fillcolor="#006c2c",fontsize=20];2 -> 6 [labeldistance=3, label = "1.19
45 | ",fontsize=25, color=black, style=solid];7 [label="7_CD45RA_CD45\n78 (3.02%)\n(3|3)",fillcolor="#ffff99ff",fontsize=25];3 -> 7 [labeldistance=3, label = "-0.36
46 | ",fontsize=25, color=black, style=solid];8 [label="8_CD41\n220 (8.53%)\n(2|2)",fillcolor="#ffccccff",fontsize=25];3 -> 8 [labeldistance=3, label = "2.95
47 | ",fontsize=25, color=red, style=bold];9 [label="9_HLA-DR_CD361\n170 (6.59%)\n(2|2)",fillcolor="#ffff99ff",fontsize=25];5 -> 9 [labeldistance=3, label = "-0.1
48 | -0.24
49 | ",fontsize=25, color=red, style=bold];10 [label="10_CD361_CD91\n115 (4.46%)\n(2|2)",fillcolor="#ffff99ff",fontsize=25];5 -> 10 [labeldistance=3, label = "1.42
50 | 0.53
51 | ",fontsize=25, color=black, style=solid];11 [label="11_leaf\n36 (1.4%)\n
52 | CD41: -0.28
53 | CD71: -0.14
54 | CD56: 0.02
55 | CD3: -0.43
56 | CD331: -0.92
57 | CD341: -0.75
58 | CD90: -0.26
59 | CD117: -0.69
60 | CD45RA: -0.12
61 | CD123: -0.29
62 | CD141: -0.27
63 | HLA-DR: -0.03
64 | CD11b: -0.23
65 | CD64: -0.78
66 | CD381: -0.68
67 | CD45: 0.04
68 | CD361: -0.58
69 | CD931: -0.64
70 | CD91: -0.25",fillcolor="#84cc83",fontsize=20];7 -> 11 [labeldistance=3, label = "-0.12
71 | 0.04
72 | ",fontsize=25, color=black, style=solid];12 [label="12_leaf\n42 (1.63%)\n
73 | CD41: -0.64
74 | CD71: 0.45
75 | CD56: 0.61
76 | CD3: -0.31
77 | CD331: -1.89
78 | CD341: -0.84
79 | CD90: -0.21
80 | CD117: -0.74
81 | CD45RA: 2.4
82 | CD123: -0.76
83 | CD141: -0.44
84 | HLA-DR: 1.83
85 | CD11b: -0.66
86 | CD64: -0.92
87 | CD381: -0.18
88 | CD45: 0.66
89 | CD361: -0.79
90 | CD931: -0.82
91 | CD91: 0.02",fillcolor="#7dc87e",fontsize=20];7 -> 12 [labeldistance=3, label = "2.4
92 | 0.66
93 | ",fontsize=25, color=red, style=bold];13 [label="13_CD45RA\n99 (3.84%)\n(2|2)",fillcolor="#ffccccff",fontsize=25];8 -> 13 [labeldistance=3, label = "-0.76
94 | ",fontsize=25, color=red, style=bold];14 [label="14_CD71\n121 (4.69%)\n(1|2)",fillcolor="#ffccccff",fontsize=25];8 -> 14 [labeldistance=3, label = "2.88
95 | ",fontsize=25, color=black, style=solid];15 [label="15_leaf\n89 (3.45%)\n
96 | CD41: 0.0
97 | CD71: -0.73
98 | CD56: -0.16
99 | CD3: -0.26
100 | CD331: 0.36
101 | CD341: -0.87
102 | CD90: -0.03
103 | CD117: -0.61
104 | CD45RA: -0.83
105 | CD123: -0.75
106 | CD141: -0.21
107 | HLA-DR: -0.71
108 | CD11b: 0.71
109 | CD64: 0.74
110 | CD381: 0.39
111 | CD45: 0.42
112 | CD361: -0.05
113 | CD931: 0.38
114 | CD91: -0.3",fillcolor="#56b567",fontsize=20];9 -> 15 [labeldistance=3, label = "-0.71
115 | -0.05
116 | ",fontsize=25, color=black, style=solid];16 [label="16_CD11b\n81 (3.14%)\n(1|2)",fillcolor="#ffccccff",fontsize=25];9 -> 16 [labeldistance=3, label = "1.13
117 | -0.46
118 | ",fontsize=25, color=red, style=bold];17 [label="17_CD11b\n80 (3.1%)\n(1|2)",fillcolor="#ffccccff",fontsize=25];10 -> 17 [labeldistance=3, label = "-0.25
119 | -0.28
120 | ",fontsize=25, color=black, style=solid];18 [label="18_leaf\n35 (1.36%)\n
121 | CD41: 0.39
122 | CD71: -0.65
123 | CD56: 0.43
124 | CD3: -0.44
125 | CD331: 0.09
126 | CD341: -0.73
127 | CD90: -0.03
128 | CD117: -0.62
129 | CD45RA: -0.58
130 | CD123: -0.12
131 | CD141: -0.23
132 | HLA-DR: 0.72
133 | CD11b: 0.09
134 | CD64: 0.83
135 | CD381: 0.59
136 | CD45: 0.37
137 | CD361: -0.19
138 | CD931: -0.04
139 | CD91: 2.39",fillcolor="#86cc85",fontsize=20];10 -> 18 [labeldistance=3, label = "-0.19
140 | 2.39
141 | ",fontsize=25, color=red, style=bold];19 [label="19_leaf\n40 (1.55%)\n
142 | CD41: -0.78
143 | CD71: 1.78
144 | CD56: -0.08
145 | CD3: 2.93
146 | CD331: -1.9
147 | CD341: -0.89
148 | CD90: -0.1
149 | CD117: -0.65
150 | CD45RA: -0.54
151 | CD123: -0.98
152 | CD141: -0.5
153 | HLA-DR: -0.74
154 | CD11b: -0.86
155 | CD64: -0.98
156 | CD381: -0.94
157 | CD45: 0.87
158 | CD361: -0.83
159 | CD931: -0.9
160 | CD91: -0.25",fillcolor="#7fc97f",fontsize=20];13 -> 19 [labeldistance=3, label = "-0.54
161 | ",fontsize=25, color=black, style=solid];20 [label="20_leaf\n59 (2.29%)\n
162 | CD41: -0.75
163 | CD71: 1.97
164 | CD56: 0.5
165 | CD3: 2.67
166 | CD331: -1.94
167 | CD341: -0.88
168 | CD90: -0.28
169 | CD117: -0.65
170 | CD45RA: 2.06
171 | CD123: -0.99
172 | CD141: -0.4
173 | HLA-DR: -0.83
174 | CD11b: -0.72
175 | CD64: -0.91
176 | CD381: -1.14
177 | CD45: 0.83
178 | CD361: -0.8
179 | CD931: -0.83
180 | CD91: -0.15",fillcolor="#6dc072",fontsize=20];13 -> 20 [labeldistance=3, label = "2.06
181 | ",fontsize=25, color=red, style=bold];21 [label="21_leaf\n26 (1.01%)\n
182 | CD41: 2.9
183 | CD71: -0.04
184 | CD56: -0.21
185 | CD3: 3.02
186 | CD331: -1.92
187 | CD341: -0.98
188 | CD90: -0.17
189 | CD117: -0.59
190 | CD45RA: -1.01
191 | CD123: -1.01
192 | CD141: -0.34
193 | HLA-DR: -0.67
194 | CD11b: -0.91
195 | CD64: -0.92
196 | CD381: -1.14
197 | CD45: 0.66
198 | CD361: -0.84
199 | CD931: -0.92
200 | CD91: -0.38",fillcolor="#92d28f",fontsize=20];14 -> 21 [labeldistance=3, label = "-0.04
201 | ",fontsize=25, color=black, style=solid];22 [label="22_leaf\n95 (3.68%)\n
202 | CD41: 2.87
203 | CD71: 2.18
204 | CD56: -0.26
205 | CD3: 3.11
206 | CD331: -1.93
207 | CD341: -0.87
208 | CD90: -0.11
209 | CD117: -0.63
210 | CD45RA: -0.11
211 | CD123: -0.97
212 | CD141: -0.36
213 | HLA-DR: -0.92
214 | CD11b: -0.91
215 | CD64: -0.9
216 | CD381: -0.8
217 | CD45: 0.61
218 | CD361: -0.79
219 | CD931: -0.86
220 | CD91: -0.11",fillcolor="#53b466",fontsize=20];14 -> 22 [labeldistance=3, label = "2.18
221 | ",fontsize=25, color=red, style=bold];23 [label="23_leaf\n19 (0.74%)\n
222 | CD41: 0.54
223 | CD71: -0.87
224 | CD56: -0.05
225 | CD3: -0.47
226 | CD331: -0.07
227 | CD341: -0.88
228 | CD90: -0.13
229 | CD117: -0.53
230 | CD45RA: -1.1
231 | CD123: -0.47
232 | CD141: -0.51
233 | HLA-DR: 1.17
234 | CD11b: -0.4
235 | CD64: 0.8
236 | CD381: 1.2
237 | CD45: 0.19
238 | CD361: -0.65
239 | CD931: -0.35
240 | CD91: -0.24",fillcolor="#a2d99c",fontsize=20];16 -> 23 [labeldistance=3, label = "-0.4
241 | ",fontsize=25, color=red, style=bold];24 [label="24_leaf\n62 (2.4%)\n
242 | CD41: 0.53
243 | CD71: -0.81
244 | CD56: -0.02
245 | CD3: -0.33
246 | CD331: 0.33
247 | CD341: -0.8
248 | CD90: 0.02
249 | CD117: -0.5
250 | CD45RA: -0.74
251 | CD123: -0.49
252 | CD141: -0.32
253 | HLA-DR: 1.12
254 | CD11b: 0.49
255 | CD64: 1.05
256 | CD381: 1.04
257 | CD45: 0.4
258 | CD361: -0.4
259 | CD931: -0.07
260 | CD91: -0.16",fillcolor="#6abf71",fontsize=20];16 -> 24 [labeldistance=3, label = "0.49
261 | ",fontsize=25, color=black, style=solid];25 [label="25_leaf\n17 (0.66%)\n
262 | CD41: 0.68
263 | CD71: -0.73
264 | CD56: 1.9
265 | CD3: -0.34
266 | CD331: -0.01
267 | CD341: -0.95
268 | CD90: -0.14
269 | CD117: -0.66
270 | CD45RA: -0.94
271 | CD123: -0.28
272 | CD141: -0.22
273 | HLA-DR: 1.22
274 | CD11b: -0.26
275 | CD64: 0.93
276 | CD381: 0.88
277 | CD45: 0.25
278 | CD361: -0.42
279 | CD931: -0.33
280 | CD91: -0.24",fillcolor="#a5db9f",fontsize=20];17 -> 25 [labeldistance=3, label = "-0.26
281 | ",fontsize=25, color=black, style=solid];26 [label="26_leaf\n63 (2.44%)\n
282 | CD41: 0.44
283 | CD71: -0.66
284 | CD56: 1.84
285 | CD3: -0.21
286 | CD331: 0.2
287 | CD341: -0.82
288 | CD90: 0.13
289 | CD117: -0.49
290 | CD45RA: -0.69
291 | CD123: -0.54
292 | CD141: -0.21
293 | HLA-DR: 0.37
294 | CD11b: 0.6
295 | CD64: 1.02
296 | CD381: 0.86
297 | CD45: 0.51
298 | CD361: -0.21
299 | CD931: 0.08
300 | CD91: -0.29",fillcolor="#68be70",fontsize=20];17 -> 26 [labeldistance=3, label = "0.6
301 | ",fontsize=25, color=red, style=bold];}
--------------------------------------------------------------------------------
/CITEsort_out/tree_complete.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/CITEsort_out/tree_complete.pdf
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 QiuyuLian
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CITE-sort
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 | An artificial-cell-type aware surface marker clustering method for CITE-seq data.
14 |
15 | ## Description
16 |
17 | CITE-sort conducts auto-gating with CITE-seq ADT data using recursive Gaussian Mixture Model. It is robust against artificial cell types that stem from multiplets. CITE-sort also provides concrete explanations of its internal decision process by constructing a biologically meaningful sort tree. See our [paper](https://academic.oup.com/bioinformatics/article/36/Supplement_1/i542/5870491) for more details.
18 |
19 | Below shows an example of sort tree constructed by CITE-sort from an in-house PBMC dataset. Each node represents a subpopulation. The title of each inner node represents the selected surface markers subspace. Red and blue colors represent the two component complexes for subdivision. Edges are colored according to their corresponding component complexes. Leaf nodes are hand-curated and are annotated with domain knowledge. Cell types that should not exist are labeled as suspect _artificial cell type_ (ACT) clusters. Suspect ACT clusters are characterized by their population percentages in the overall dataset (denoted by ‘prop’) and their multi-sample multiplets percentages (denoted by ‘MSM’). Abbreviations: iNK: intermediate NK cells; mNK: vast majority of NK cells; C-mono: classical monocytes; NC-mono: non-classical monocytes; mDC: myeloid DC; DNT: double negative T cells.
20 |
21 |
22 |
23 | ## Usage
24 |
25 | ### Input
26 |
27 | The input of CITE-sort should be a csv file with CLR normalized CITE-seq ADT data (row: droplet/sample, col: ADT/feature).
28 |
29 | ### Run
30 |
31 | `python runCITEsort.py ADT_clr_file -c 0.1 -o ./CITEsort_out`
32 |
33 | - -c, cutoff, the similarity threshold of merging Gaussian components; the default is 0.1. It should be a real value between 0 and 1. The bigger value leads to split more aggressively, and ends in a more complicated tree.
34 | - -o, output, the path to save ouput files. If not specified, CITE-sort will create a folder "./CITEsort_out" in the current directory.
35 |
36 | `python runCITEsort.py ADT_clr_file -c 0.1 -o ./CITEsort_out --compact`
37 |
38 | - --compact, adding this parameter will output a compact tree.
39 |
40 | See analysis [tutorial](https://github.com/QiuyuLian/CITE-sort/blob/master/AnalysisTutorial.ipynb) for visualizing each node.
41 |
42 | ### Outputs
43 |
44 | - tree.pdf, the vasualized sort tree of input dataset created by CITE-sort.
45 | - There are three rows in each inner node:
46 | - "**n_marker(s)**": **n** is the node ID, which is obtained by Breath First Search. **marker(s)**, the surface markers next to the ID, is the subspace selected to subdivide the current population.
47 | - "**Num: xxx**": is the number of droplets in current population.
48 | - "**(a|b)**": **b** denotes the number of components determined by BIC in the selected surface marker subspace. **a** denotes the number of component-complexes after merging with a certain threshold. Generally, **a** <= **b**. **a** = **b** when all components can not be merged with current threshold.
49 | - The numbers next to the arrows denote the mean of the selected markers in the partition the arrow stands for. In leaf nodes, the means of all markers are marked if not using '--compact'. As CITE-sort takes CLR-format values as input, these numbers could be positive or negative.
50 | - leaf_labels.csv, the labels of each droplets in the sort tree.
51 | - tree.pickle, the tree structure recording the main clusteirng infromation of input dataset.
52 | - tree.dot, the auxiliary file to plot the tree.
53 |
54 | ## Examples
55 |
56 | We provide 3 in-house and 5 public CITE-seq datasets in "./datasets":
57 |
58 | - [PBMC_1k (10X Genomics)](https://support.10xgenomics.com/single-cell-gene-expression/datasets/3.0.0/pbmc_1k_protein_v3)
59 | - [PBMC_1k_b (In house)](https://github.com/QiuyuLian/CITE-sort/tree/master/datasets)
60 | - [PBMC_2k (In house)](https://github.com/QiuyuLian/CITE-sort/tree/master/datasets)
61 | - [PBMC_5k (10X Genomics)](https://support.10xgenomics.com/single-cell-gene-expression/datasets/3.0.2/5k_pbmc_protein_v3)
62 | - [PBMC_8k (10X Genomics)](https://support.10xgenomics.com/single-cell-gene-expression/datasets/3.0.0/pbmc_10k_protein_v3)
63 | - [MALT_8k (10X Genomics)](https://support.10xgenomics.com/single-cell-gene-expression/datasets/3.0.0/malt_10k_protein_v3)
64 | - [CBMC_8k (GSE100866)](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE100866)
65 | - [PBMC_16k (with cell hashing) (In house)](https://github.com/QiuyuLian/CITE-sort/tree/master/datasets)
66 |
67 | ### Example Commond
68 |
69 | **Example 1**: The PBMC_2k dataset is used as an example of beginning with CLR-format data.
70 |
71 | `python preCITEsort.py ./datasets/PBMC_2k_ADT_clr.csv `
72 |
73 | - plot histgram of each marker.
74 |
75 | `python runCITEsort.py ./datasets/PBMC_2k_ADT_clr.csv `
76 |
77 | - run CITE-sort and output a sort tree.
78 |
79 | **Example 2**: ADTs from [GSE143363](https://github.com/QiuyuLian/CITE-sort/blob/master/datasets) are extracted from [GEO](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE143363) and used as an example of begining with raw counts.
80 |
81 | `python preCITEsort.py ./datasets/GSE143363_ADT_Dx_count.csv --CLR `
82 |
83 | - transform data into CLR format and plot histgram of each marker.
84 |
85 | `python runCITEsort.py ./CITEsort_out/data_clr.csv --compact`
86 |
87 | - run CITE-sort and output a sort tree in compact way.
88 |
89 | ## Authors
90 |
91 | Qiuyu Lian\*, Hongyi Xin\*, Jianzhu Ma, Liza Konnikova, Wei Chen\#, Jin Gu\#,Kong Chen\#
92 |
93 | ## Maintainer
94 |
95 | Qiuyu Lian, Hongyi Xin.
96 |
97 |
98 |
99 |
--------------------------------------------------------------------------------
/performance.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Thu Jan 16 22:39:00 2020
5 |
6 | @author: lianqiuyu
7 | """
8 |
9 | import seaborn as sns
10 | from matplotlib import pyplot as plt
11 | #from sort_harddivision import sort
12 | import pandas as pd
13 | import numpy as np
14 | from sklearn.mixture import GaussianMixture,BayesianGaussianMixture
15 |
16 |
17 | sns.set(style="whitegrid")
18 | import time
19 | from ReSplit import ReSplit
20 | from BTreeTraversal import BTreeTraversal
21 | #from DEmerge import DEmerge
22 |
23 | namelist = ['PBMC_1k','PBMC_1k_b','PBMC_2k', 'PBMC_5k', 'PBMC_8k', 'MALT_8k', 'CBMC_8k','PBMC_16k']
24 | datapath = './datasets'
25 | savepath = './performance'
26 |
27 |
28 |
29 | from sys import argv
30 |
31 |
32 |
33 | max_cluster_num = 50
34 |
35 | def find_k(data,c_type,max_cluster_num=100):
36 | k_list = []
37 | inertia = []
38 |
39 | for k in range(1, max_cluster_num + 1):
40 |
41 | gmm = GaussianMixture(k,covariance_type=c_type).fit(data)
42 | k_list.append(k)
43 | inertia.append(gmm.bic(data))
44 |
45 | idx = np.argmin(inertia)
46 | final_k = k_list[idx]
47 | return final_k
48 |
49 |
50 | merge_cutoff = 0.1
51 | record_full ={}
52 |
53 | #for i in range(len(namelist)):
54 |
55 | #name = namelist[i]
56 | name = argv[1]
57 | print(name)
58 | data = pd.read_csv(datapath+'/'+name+'_ADT_clr_10markers.csv',header=0,index_col=0)
59 | #N=data.shape[0]
60 |
61 | record_sort = pd.DataFrame(np.zeros([10,3]),columns = ['time','ll','n_component'])
62 | record_gmm = pd.DataFrame(np.zeros([10,3]),columns = ['time','ll','n_component'])
63 | record_ngmm = pd.DataFrame(np.zeros([10,3]),columns = ['time','ll','n_component'])
64 | record_dpgmm = pd.DataFrame(np.zeros([10,3]),columns = ['time','ll','n_component'])
65 |
66 | record_gmm_fix_k = pd.DataFrame(np.zeros([10,3]),columns = ['time','ll','n_component'])
67 | record_ngmm_fix_k = pd.DataFrame(np.zeros([10,3]),columns = ['time','ll','n_component'])
68 |
69 |
70 |
71 | for t in range(10):
72 |
73 | print('CITE-sort.')
74 | start_time = time.time()
75 | rgmm = ReSplit(data,merge_cutoff) #sort(data,F_path,N,c_type,weight,rescan_cut,tol,fix_k,max_ndim)
76 | record_sort.iloc[t,0] = time.time() - start_time
77 | # print("--- %s seconds ---" % (t1))
78 | trav = BTreeTraversal(rgmm)
79 | record_sort.iloc[t,1] = trav.ll
80 | #record_sort.iloc[t,2] = trav.bic
81 | record_sort.iloc[t,2] = trav.n_components
82 | #print(rgmm_ll)
83 |
84 | print('full GMM.')
85 | start_time = time.time()
86 | final_k = find_k(data,'full', max_cluster_num)
87 | gmm = GaussianMixture(final_k).fit(data)
88 | record_gmm.iloc[t,0] = time.time() - start_time
89 | #print("--- %s seconds ---" % gmm_time)
90 | #record_gmm.iloc[t,2] = gmm.bic(data)
91 | record_gmm.iloc[t,1] = gmm.score(data)
92 | record_gmm.iloc[t,2] = final_k #trav.n_components
93 |
94 |
95 | print('full GMM wigh fix k.')
96 | start_time = time.time()
97 | gmm = GaussianMixture(trav.n_components).fit(data)
98 | record_gmm_fix_k.iloc[t,0] = time.time() - start_time
99 | #print("--- %s seconds ---" % gmm_time)
100 | #record_gmm.iloc[t,2] = gmm.bic(data)
101 | record_gmm_fix_k.iloc[t,1] = gmm.score(data)
102 | record_gmm_fix_k.iloc[t,2] = trav.n_components
103 |
104 |
105 | print('naive GMM.')
106 | start_time = time.time()
107 | final_k = find_k(data,'diag',max_cluster_num)
108 | ngmm = GaussianMixture(final_k,covariance_type='diag').fit(data)
109 | record_ngmm.iloc[t,0] = time.time() - start_time
110 | #print("--- %s seconds ---" % (t))
111 | #record_ngmm.iloc[t,2] = ngmm.bic(data)
112 | record_ngmm.iloc[t,1] = ngmm.score(data)
113 | record_ngmm.iloc[t,2] = final_k#trav.n_components
114 | #print(ngmm_ll)
115 |
116 |
117 | print('naive GMM with fix k.')
118 | start_time = time.time()
119 | ngmm = GaussianMixture(trav.n_components,covariance_type='diag').fit(data)
120 | record_ngmm_fix_k.iloc[t,0] = time.time() - start_time
121 | #print("--- %s seconds ---" % (t))
122 | #record_ngmm.iloc[t,2] = ngmm.bic(data)
123 | record_ngmm_fix_k.iloc[t,1] = ngmm.score(data)
124 | record_ngmm_fix_k.iloc[t,2] = trav.n_components
125 | #print(ngmm_ll)
126 |
127 |
128 | print('dpgmm.')
129 | start_time = time.time()
130 | dpgmm = BayesianGaussianMixture(n_components=max_cluster_num,max_iter=500).fit(data)
131 | record_dpgmm.iloc[t,0] = time.time() - start_time
132 | record_dpgmm.iloc[t,1] = dpgmm.score(data)
133 | record_dpgmm.iloc[t,2] = len(dpgmm.weights_)
134 |
135 |
136 | db_summary = pd.concat([record_sort,record_gmm,record_gmm_fix_k,record_ngmm,record_ngmm_fix_k,record_dpgmm])
137 | db_summary['DB'] = name
138 | db_summary['method'] = ['CITE-sort']*record_sort.shape[0] + ['GMM']*record_gmm.shape[0] + ['GMM_fixk']*record_gmm_fix_k.shape[0] + \
139 | ['nGMM']*record_ngmm.shape[0] + ['nGMM_fixk']*record_ngmm_fix_k.shape[0] + ['dpgmm']*record_dpgmm.shape[0]
140 |
141 | db_summary.to_csv(savepath+'/record_'+name+'.csv')
142 |
143 |
144 |
145 |
146 |
147 | record_full[name] = db_summary
148 |
149 |
150 | record_full_alldb = pd.concat([record_full[name] for name in namelist])
151 | record_full_alldb.to_csv(savepath+'/record_8DBs.csv')
152 |
153 |
154 |
155 | # record_full_alldb = pd.read_csv('./performance/record_8DBs.csv',header=0,index_col=0)
156 |
157 | temp = record_full_alldb.loc[record_full_alldb['method']!='GMM_fixk',]
158 | record_plot = temp.loc[temp['method']!='nGMM_fixk',:]
159 |
160 | record_plot['time'] = record_plot['time']/60
161 |
162 |
163 |
164 |
165 |
166 | plt.figure(figsize=(8,3), dpi=96)
167 | ax = sns.barplot(x='DB', y='time', hue='method', data=record_plot)
168 | plt.ylabel('Time (min)',fontsize=15)
169 | plt.xlabel('')
170 | plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
171 | plt.savefig('./performance/time.pdf')
172 | plt.show()
173 |
174 |
175 | record_plot['ll'] = - record_plot['ll']
176 |
177 | plt.figure(figsize=(8,3), dpi=96)
178 | ax = sns.barplot(x='DB', y='ll', hue='method', data=record_plot)
179 | plt.ylabel(' - log-likelihood',fontsize=15)
180 | plt.xlabel('')
181 | plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
182 | plt.savefig('./performance/ll.pdf')
183 | plt.show()
184 |
185 |
186 |
--------------------------------------------------------------------------------
/performance/ll.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/performance/ll.pdf
--------------------------------------------------------------------------------
/performance/record_8DBs.csv:
--------------------------------------------------------------------------------
1 | ,time,ll,n_component,DB,method
2 | 0,7.126919984817505,-11.595296347428391,10.0,PBMC_1k,CITE-sort
3 | 1,7.169904470443726,-11.011866862133289,11.0,PBMC_1k,CITE-sort
4 | 2,6.501286268234253,-11.595296347428391,10.0,PBMC_1k,CITE-sort
5 | 3,6.519277572631836,-11.629320053052382,10.0,PBMC_1k,CITE-sort
6 | 4,6.452306270599365,-12.319758924800478,9.0,PBMC_1k,CITE-sort
7 | 5,6.9830121994018555,-11.73632943950538,10.0,PBMC_1k,CITE-sort
8 | 6,6.7821266651153564,-12.319758924800478,9.0,PBMC_1k,CITE-sort
9 | 7,6.54626202583313,-11.73632943950538,10.0,PBMC_1k,CITE-sort
10 | 8,6.8560850620269775,-11.73632943950538,10.0,PBMC_1k,CITE-sort
11 | 9,6.662195682525635,-12.319758924800478,9.0,PBMC_1k,CITE-sort
12 | 0,6.587217807769775,-17.140941651826182,4.0,PBMC_1k,GMM
13 | 1,5.890617609024048,-17.059280976259892,4.0,PBMC_1k,GMM
14 | 2,6.10949182510376,-17.106031776612685,4.0,PBMC_1k,GMM
15 | 3,5.937591314315796,-16.582158231213626,5.0,PBMC_1k,GMM
16 | 4,6.394338369369507,-17.140941651826182,4.0,PBMC_1k,GMM
17 | 5,6.005538702011108,-17.106031776612685,4.0,PBMC_1k,GMM
18 | 6,6.512260913848877,-15.174585542784197,7.0,PBMC_1k,GMM
19 | 7,6.381338357925415,-16.14805683403237,5.0,PBMC_1k,GMM
20 | 8,5.929594278335571,-16.163219553315656,5.0,PBMC_1k,GMM
21 | 9,6.02553915977478,-17.10511391549158,4.0,PBMC_1k,GMM
22 | 0,0.05996561050415039,-14.088582148687417,10.0,PBMC_1k,GMM_fixk
23 | 1,0.08894848823547363,-13.41499493553318,11.0,PBMC_1k,GMM_fixk
24 | 2,0.03797769546508789,-13.540707534411622,10.0,PBMC_1k,GMM_fixk
25 | 3,0.07995438575744629,-13.850122640730694,10.0,PBMC_1k,GMM_fixk
26 | 4,0.032981157302856445,-14.322336721328481,9.0,PBMC_1k,GMM_fixk
27 | 5,0.043974876403808594,-13.958332357223027,10.0,PBMC_1k,GMM_fixk
28 | 6,0.07495713233947754,-14.111789277986022,9.0,PBMC_1k,GMM_fixk
29 | 7,0.04097485542297363,-14.064216918680327,10.0,PBMC_1k,GMM_fixk
30 | 8,0.0439755916595459,-13.776412026253446,10.0,PBMC_1k,GMM_fixk
31 | 9,0.03797793388366699,-13.804259594360275,9.0,PBMC_1k,GMM_fixk
32 | 0,1.9418880939483643,-16.391776859648125,21.0,PBMC_1k,nGMM
33 | 1,1.7360057830810547,-17.017084083201127,17.0,PBMC_1k,nGMM
34 | 2,1.7729849815368652,-16.82115726746788,18.0,PBMC_1k,nGMM
35 | 3,1.7459995746612549,-16.76203681311147,19.0,PBMC_1k,nGMM
36 | 4,1.9488842487335205,-17.389385051589162,15.0,PBMC_1k,nGMM
37 | 5,1.8409459590911865,-16.74670537154237,21.0,PBMC_1k,nGMM
38 | 6,1.8669307231903076,-16.147770612554257,24.0,PBMC_1k,nGMM
39 | 7,1.8069655895233154,-16.94872309211664,18.0,PBMC_1k,nGMM
40 | 8,1.8809239864349365,-17.244953751785008,17.0,PBMC_1k,nGMM
41 | 9,1.9099071025848389,-16.986451147967987,17.0,PBMC_1k,nGMM
42 | 0,0.0159912109375,-18.676332923006573,10.0,PBMC_1k,nGMM_fixk
43 | 1,0.016989946365356445,-18.419379500351987,11.0,PBMC_1k,nGMM_fixk
44 | 2,0.01699066162109375,-18.6721788347362,10.0,PBMC_1k,nGMM_fixk
45 | 3,0.01899862289428711,-18.937733836680852,10.0,PBMC_1k,nGMM_fixk
46 | 4,0.0199892520904541,-19.050810720422405,9.0,PBMC_1k,nGMM_fixk
47 | 5,0.01898956298828125,-18.78758349587411,10.0,PBMC_1k,nGMM_fixk
48 | 6,0.01299285888671875,-19.648878098092986,9.0,PBMC_1k,nGMM_fixk
49 | 7,0.014991521835327148,-18.680480444565013,10.0,PBMC_1k,nGMM_fixk
50 | 8,0.013991594314575195,-18.70135380257122,10.0,PBMC_1k,nGMM_fixk
51 | 9,0.008994340896606445,-19.010671457346113,9.0,PBMC_1k,nGMM_fixk
52 | 0,0.5646781921386719,-13.607799761585456,50.0,PBMC_1k,dpgmm
53 | 1,0.3867809772491455,-13.640822625981155,50.0,PBMC_1k,dpgmm
54 | 2,0.30382585525512695,-13.563951990951315,50.0,PBMC_1k,dpgmm
55 | 3,0.34279322624206543,-13.69329328968205,50.0,PBMC_1k,dpgmm
56 | 4,0.43774843215942383,-13.459885318638806,50.0,PBMC_1k,dpgmm
57 | 5,0.21987438201904297,-13.675988952177404,50.0,PBMC_1k,dpgmm
58 | 6,0.5296964645385742,-13.716782354231182,50.0,PBMC_1k,dpgmm
59 | 7,0.14991450309753418,-13.616409725572016,50.0,PBMC_1k,dpgmm
60 | 8,0.2648475170135498,-13.520443398422579,50.0,PBMC_1k,dpgmm
61 | 9,0.33780574798583984,-13.558766483095482,50.0,PBMC_1k,dpgmm
62 | 0,10.114209651947021,-3.8899679024823515,8.0,PBMC_1k_b,CITE-sort
63 | 1,9.929314374923706,-3.8899679024823515,8.0,PBMC_1k_b,CITE-sort
64 | 2,9.921319484710693,-3.8899679024823515,8.0,PBMC_1k_b,CITE-sort
65 | 3,9.674460411071777,-3.8899679024823515,8.0,PBMC_1k_b,CITE-sort
66 | 4,9.697448492050171,-3.8899679024823515,8.0,PBMC_1k_b,CITE-sort
67 | 5,9.811382293701172,-3.8899679024823515,8.0,PBMC_1k_b,CITE-sort
68 | 6,9.994277238845825,-3.8899679024823515,8.0,PBMC_1k_b,CITE-sort
69 | 7,9.762410879135132,-3.8899679024823515,8.0,PBMC_1k_b,CITE-sort
70 | 8,9.930314302444458,-3.8899679024823515,8.0,PBMC_1k_b,CITE-sort
71 | 9,9.905328750610352,-3.8899679024823515,8.0,PBMC_1k_b,CITE-sort
72 | 0,17.497982263565063,-3.857410803360695,10.0,PBMC_1k_b,GMM
73 | 1,18.163599729537964,-4.078737114779303,9.0,PBMC_1k_b,GMM
74 | 2,16.286674976348877,-5.057414356410817,7.0,PBMC_1k_b,GMM
75 | 3,17.917723417282104,-4.432242063109768,8.0,PBMC_1k_b,GMM
76 | 4,17.478991985321045,-4.033576647238859,9.0,PBMC_1k_b,GMM
77 | 5,18.270538568496704,-4.514995141086516,7.0,PBMC_1k_b,GMM
78 | 6,17.49598240852356,-5.035745244930868,7.0,PBMC_1k_b,GMM
79 | 7,16.475565433502197,-4.111351115530821,9.0,PBMC_1k_b,GMM
80 | 8,18.53538727760315,-3.9977804567218325,9.0,PBMC_1k_b,GMM
81 | 9,17.7558331489563,-3.786756768717093,12.0,PBMC_1k_b,GMM
82 | 0,0.048970699310302734,-4.249406391322957,8.0,PBMC_1k_b,GMM_fixk
83 | 1,0.0299832820892334,-4.278763405710032,8.0,PBMC_1k_b,GMM_fixk
84 | 2,0.04897260665893555,-4.197110693176923,8.0,PBMC_1k_b,GMM_fixk
85 | 3,0.051970720291137695,-4.130739821809692,8.0,PBMC_1k_b,GMM_fixk
86 | 4,0.04897260665893555,-4.286676718538526,8.0,PBMC_1k_b,GMM_fixk
87 | 5,0.0279843807220459,-4.246833006190683,8.0,PBMC_1k_b,GMM_fixk
88 | 6,0.0459747314453125,-4.250855964037708,8.0,PBMC_1k_b,GMM_fixk
89 | 7,0.027984619140625,-4.345332236195747,8.0,PBMC_1k_b,GMM_fixk
90 | 8,0.03697919845581055,-4.256822206604944,8.0,PBMC_1k_b,GMM_fixk
91 | 9,0.03597855567932129,-4.899120027881217,8.0,PBMC_1k_b,GMM_fixk
92 | 0,3.9977121353149414,-4.459123535455444,33.0,PBMC_1k_b,nGMM
93 | 1,3.716872215270996,-5.009960395234014,24.0,PBMC_1k_b,nGMM
94 | 2,3.637917995452881,-4.437605804271244,30.0,PBMC_1k_b,nGMM
95 | 3,3.8797781467437744,-4.61302649284386,27.0,PBMC_1k_b,nGMM
96 | 4,4.056677341461182,-4.299643428195814,32.0,PBMC_1k_b,nGMM
97 | 5,3.7988243103027344,-4.600914628653415,27.0,PBMC_1k_b,nGMM
98 | 6,3.7018799781799316,-4.617832642909012,29.0,PBMC_1k_b,nGMM
99 | 7,3.8947696685791016,-4.465719923331148,30.0,PBMC_1k_b,nGMM
100 | 8,3.5669586658477783,-4.668268214854305,27.0,PBMC_1k_b,nGMM
101 | 9,3.653907299041748,-4.329243028727759,33.0,PBMC_1k_b,nGMM
102 | 0,0.0239865779876709,-6.85627287191229,8.0,PBMC_1k_b,nGMM_fixk
103 | 1,0.01898932456970215,-6.858093580112152,8.0,PBMC_1k_b,nGMM_fixk
104 | 2,0.02398538589477539,-6.858015886140156,8.0,PBMC_1k_b,nGMM_fixk
105 | 3,0.018988847732543945,-6.858142104920901,8.0,PBMC_1k_b,nGMM_fixk
106 | 4,0.024985551834106445,-6.856269306800974,8.0,PBMC_1k_b,nGMM_fixk
107 | 5,0.02298712730407715,-6.788174348841982,8.0,PBMC_1k_b,nGMM_fixk
108 | 6,0.02398681640625,-6.856261942823088,8.0,PBMC_1k_b,nGMM_fixk
109 | 7,0.012992620468139648,-6.653478604934252,8.0,PBMC_1k_b,nGMM_fixk
110 | 8,0.027983427047729492,-6.858467841012181,8.0,PBMC_1k_b,nGMM_fixk
111 | 9,0.01898980140686035,-6.858474157989434,8.0,PBMC_1k_b,nGMM_fixk
112 | 0,1.0374045372009277,-4.032790733087652,50.0,PBMC_1k_b,dpgmm
113 | 1,1.1683309078216553,-3.995996423147343,50.0,PBMC_1k_b,dpgmm
114 | 2,0.9604494571685791,-4.014887756711081,50.0,PBMC_1k_b,dpgmm
115 | 3,1.4381763935089111,-3.996255332722274,50.0,PBMC_1k_b,dpgmm
116 | 4,0.8465161323547363,-4.041724312614789,50.0,PBMC_1k_b,dpgmm
117 | 5,1.267275333404541,-3.996727813434055,50.0,PBMC_1k_b,dpgmm
118 | 6,0.6995992660522461,-4.007786680958331,50.0,PBMC_1k_b,dpgmm
119 | 7,0.7925460338592529,-4.049556668208995,50.0,PBMC_1k_b,dpgmm
120 | 8,1.1173601150512695,-4.00238851229009,50.0,PBMC_1k_b,dpgmm
121 | 9,0.8585078716278076,-4.02573409179672,50.0,PBMC_1k_b,dpgmm
122 | 0,19.135042667388916,-3.039977922112491,13.0,PBMC_2k,CITE-sort
123 | 1,17.3620707988739,-3.838225569154643,13.0,PBMC_2k,CITE-sort
124 | 2,19.087072610855103,-3.3607753391316337,12.0,PBMC_2k,CITE-sort
125 | 3,18.724289417266846,-3.356040836253083,12.0,PBMC_2k,CITE-sort
126 | 4,18.1196346282959,-3.349418687088477,12.0,PBMC_2k,CITE-sort
127 | 5,18.74930214881897,-3.193732731984007,13.0,PBMC_2k,CITE-sort
128 | 6,16.743402242660522,-4.008552378859202,12.0,PBMC_2k,CITE-sort
129 | 7,18.80124521255493,-3.184140139622323,13.0,PBMC_2k,CITE-sort
130 | 8,19.01911997795105,-3.3504575966091403,12.0,PBMC_2k,CITE-sort
131 | 9,19.966576099395752,-3.1847493044634674,13.0,PBMC_2k,CITE-sort
132 | 0,22.255257844924927,-4.305043135400915,11.0,PBMC_2k,GMM
133 | 1,20.33034920692444,-4.304050243172198,11.0,PBMC_2k,GMM
134 | 2,20.554229974746704,-4.533803215260254,9.0,PBMC_2k,GMM
135 | 3,22.419154405593872,-4.42924593580085,11.0,PBMC_2k,GMM
136 | 4,21.91244387626648,-4.9055623093266565,7.0,PBMC_2k,GMM
137 | 5,21.474693298339844,-4.38432892944463,11.0,PBMC_2k,GMM
138 | 6,22.620038270950317,-4.254110055313111,11.0,PBMC_2k,GMM
139 | 7,21.389756202697754,-4.388461844683266,10.0,PBMC_2k,GMM
140 | 8,22.062358617782593,-4.534595518289228,10.0,PBMC_2k,GMM
141 | 9,20.67216420173645,-4.196493224579602,11.0,PBMC_2k,GMM
142 | 0,0.06296348571777344,-4.183532801379574,13.0,PBMC_2k,GMM_fixk
143 | 1,0.09994292259216309,-4.017509471280552,13.0,PBMC_2k,GMM_fixk
144 | 2,0.09394574165344238,-4.31464758998454,12.0,PBMC_2k,GMM_fixk
145 | 3,0.09194636344909668,-4.13375789931093,12.0,PBMC_2k,GMM_fixk
146 | 4,0.17489886283874512,-4.141008950409885,12.0,PBMC_2k,GMM_fixk
147 | 5,0.06496405601501465,-4.071073186506558,13.0,PBMC_2k,GMM_fixk
148 | 6,0.08295249938964844,-4.157396516272058,12.0,PBMC_2k,GMM_fixk
149 | 7,0.09694290161132812,-4.306028816385136,13.0,PBMC_2k,GMM_fixk
150 | 8,0.11393475532531738,-4.153402297840144,12.0,PBMC_2k,GMM_fixk
151 | 9,0.11994051933288574,-4.159316942750561,13.0,PBMC_2k,GMM_fixk
152 | 0,4.954163312911987,-4.56337892325203,30.0,PBMC_2k,nGMM
153 | 1,4.922183036804199,-4.277406133166951,36.0,PBMC_2k,nGMM
154 | 2,4.849223375320435,-4.564568888372911,31.0,PBMC_2k,nGMM
155 | 3,4.615357398986816,-4.509464553316169,31.0,PBMC_2k,nGMM
156 | 4,4.847224473953247,-4.45675264736644,30.0,PBMC_2k,nGMM
157 | 5,4.723294734954834,-4.348757776747538,33.0,PBMC_2k,nGMM
158 | 6,4.740287780761719,-4.664607870752208,27.0,PBMC_2k,nGMM
159 | 7,4.55738091468811,-4.5980796898912475,31.0,PBMC_2k,nGMM
160 | 8,5.003134250640869,-4.2928258594938535,34.0,PBMC_2k,nGMM
161 | 9,4.799241542816162,-4.440630441784365,36.0,PBMC_2k,nGMM
162 | 0,0.03497719764709473,-5.9833291676446265,13.0,PBMC_2k,nGMM_fixk
163 | 1,0.02698540687561035,-6.003957870476196,13.0,PBMC_2k,nGMM_fixk
164 | 2,0.027983665466308594,-6.093630484174055,12.0,PBMC_2k,nGMM_fixk
165 | 3,0.03298163414001465,-6.004338077354764,12.0,PBMC_2k,nGMM_fixk
166 | 4,0.02498626708984375,-6.147670550662348,12.0,PBMC_2k,nGMM_fixk
167 | 5,0.06996989250183105,-5.7699411467960475,13.0,PBMC_2k,nGMM_fixk
168 | 6,0.034979820251464844,-6.0118061485416225,12.0,PBMC_2k,nGMM_fixk
169 | 7,0.021988630294799805,-6.014065281933787,13.0,PBMC_2k,nGMM_fixk
170 | 8,0.02298712730407715,-6.080352334919585,12.0,PBMC_2k,nGMM_fixk
171 | 9,0.027984619140625,-5.829470008860642,13.0,PBMC_2k,nGMM_fixk
172 | 0,0.7405760288238525,-4.154761130337304,50.0,PBMC_2k,dpgmm
173 | 1,0.6976006031036377,-4.095399727089786,50.0,PBMC_2k,dpgmm
174 | 2,0.722585916519165,-4.100913209694664,50.0,PBMC_2k,dpgmm
175 | 3,0.7875497341156006,-4.158746515696291,50.0,PBMC_2k,dpgmm
176 | 4,1.0024268627166748,-4.115404945244016,50.0,PBMC_2k,dpgmm
177 | 5,0.8954741954803467,-4.119898009291454,50.0,PBMC_2k,dpgmm
178 | 6,1.0593931674957275,-4.104017128919311,50.0,PBMC_2k,dpgmm
179 | 7,1.5970842838287354,-4.0937771166383365,50.0,PBMC_2k,dpgmm
180 | 8,0.5696749687194824,-4.096304595974938,50.0,PBMC_2k,dpgmm
181 | 9,0.6646206378936768,-4.11045295307734,50.0,PBMC_2k,dpgmm
182 | 0,114.97818398475647,-18.507600707601924,30.0,PBMC_5k,CITE-sort
183 | 1,112.17079138755798,-20.140833521518715,24.0,PBMC_5k,CITE-sort
184 | 2,118.43320631980896,-20.1687032085678,25.0,PBMC_5k,CITE-sort
185 | 3,108.78671073913574,-19.60266635400196,27.0,PBMC_5k,CITE-sort
186 | 4,112.15385246276855,-19.91097177461177,26.0,PBMC_5k,CITE-sort
187 | 5,104.8159863948822,-20.579059335247663,25.0,PBMC_5k,CITE-sort
188 | 6,129.31895780563354,-18.047981442496756,33.0,PBMC_5k,CITE-sort
189 | 7,121.13580322265625,-18.7037400372988,29.0,PBMC_5k,CITE-sort
190 | 8,110.68664026260376,-20.892593978353037,22.0,PBMC_5k,CITE-sort
191 | 9,109.54827690124512,-20.383111524145896,26.0,PBMC_5k,CITE-sort
192 | 0,211.32600045204163,-23.454350630966285,10.0,PBMC_5k,GMM
193 | 1,210.00974488258362,-23.054916710314494,10.0,PBMC_5k,GMM
194 | 2,215.26674675941467,-23.572181097836197,9.0,PBMC_5k,GMM
195 | 3,194.1368443965912,-24.363631408785547,9.0,PBMC_5k,GMM
196 | 4,212.23347163200378,-24.327461240101023,7.0,PBMC_5k,GMM
197 | 5,214.3302800655365,-24.32745953177523,7.0,PBMC_5k,GMM
198 | 6,211.78773593902588,-23.873551854916364,8.0,PBMC_5k,GMM
199 | 7,213.2189016342163,-22.46290853355792,11.0,PBMC_5k,GMM
200 | 8,203.1698772907257,-23.317866247583037,9.0,PBMC_5k,GMM
201 | 9,205.1005666255951,-26.363360699392683,6.0,PBMC_5k,GMM
202 | 0,5.1210691928863525,-19.4854647187879,30.0,PBMC_5k,GMM_fixk
203 | 1,3.6479127407073975,-20.752295899917097,24.0,PBMC_5k,GMM_fixk
204 | 2,4.176607847213745,-20.312525768964793,25.0,PBMC_5k,GMM_fixk
205 | 3,6.233431577682495,-20.262024172246882,27.0,PBMC_5k,GMM_fixk
206 | 4,5.7177252769470215,-20.31578796444344,26.0,PBMC_5k,GMM_fixk
207 | 5,4.098653793334961,-20.1200694101659,25.0,PBMC_5k,GMM_fixk
208 | 6,6.036544322967529,-19.28314747254952,33.0,PBMC_5k,GMM_fixk
209 | 7,4.6363441944122314,-20.369931181472886,29.0,PBMC_5k,GMM_fixk
210 | 8,3.9037649631500244,-20.91871447452352,22.0,PBMC_5k,GMM_fixk
211 | 9,3.270127058029175,-20.971680742345054,26.0,PBMC_5k,GMM_fixk
212 | 0,25.154587030410767,-23.45307846766253,45.0,PBMC_5k,nGMM
213 | 1,27.146455764770508,-23.183804631992455,48.0,PBMC_5k,nGMM
214 | 2,22.942864418029785,-23.561227156902333,47.0,PBMC_5k,nGMM
215 | 3,24.96070146560669,-23.065355294909185,48.0,PBMC_5k,nGMM
216 | 4,23.56050968170166,-23.316521817329175,50.0,PBMC_5k,nGMM
217 | 5,25.249542474746704,-23.05478727667191,50.0,PBMC_5k,nGMM
218 | 6,24.98569369316101,-23.347849308051337,44.0,PBMC_5k,nGMM
219 | 7,23.13675308227539,-23.305425238489914,49.0,PBMC_5k,nGMM
220 | 8,21.95742917060852,-24.176815345583663,38.0,PBMC_5k,nGMM
221 | 9,23.830355167388916,-23.248454154840363,48.0,PBMC_5k,nGMM
222 | 0,0.5936610698699951,-24.652414093913112,30.0,PBMC_5k,nGMM_fixk
223 | 1,0.4087660312652588,-25.39239092081412,24.0,PBMC_5k,nGMM_fixk
224 | 2,0.4007716178894043,-25.469402508908196,25.0,PBMC_5k,nGMM_fixk
225 | 3,0.3987720012664795,-25.245510415722396,27.0,PBMC_5k,nGMM_fixk
226 | 4,0.5996565818786621,-25.328413319173446,26.0,PBMC_5k,nGMM_fixk
227 | 5,0.3507990837097168,-25.305626390488744,25.0,PBMC_5k,nGMM_fixk
228 | 6,0.5296971797943115,-24.678219937327306,33.0,PBMC_5k,nGMM_fixk
229 | 7,0.5896611213684082,-24.94437548448975,29.0,PBMC_5k,nGMM_fixk
230 | 8,0.23586344718933105,-25.515242501309615,22.0,PBMC_5k,nGMM_fixk
231 | 9,0.35979413986206055,-25.17855829387026,26.0,PBMC_5k,nGMM_fixk
232 | 0,28.763530015945435,-22.730742229489056,50.0,PBMC_5k,dpgmm
233 | 1,42.97938942909241,-22.637541038184285,50.0,PBMC_5k,dpgmm
234 | 2,17.364058017730713,-22.767235365076285,50.0,PBMC_5k,dpgmm
235 | 3,16.866344213485718,-22.87149997712949,50.0,PBMC_5k,dpgmm
236 | 4,15.228280782699585,-22.635595389698874,50.0,PBMC_5k,dpgmm
237 | 5,31.790797233581543,-22.657364518166947,50.0,PBMC_5k,dpgmm
238 | 6,17.372053384780884,-22.704211114881595,50.0,PBMC_5k,dpgmm
239 | 7,14.91546082496643,-22.65279666789171,50.0,PBMC_5k,dpgmm
240 | 8,38.50095534324646,-22.687663246028038,50.0,PBMC_5k,dpgmm
241 | 9,21.965423107147217,-22.618340821480327,50.0,PBMC_5k,dpgmm
242 | 0,107.70734000205994,-10.630034855679904,32.0,PBMC_8k,CITE-sort
243 | 1,104.77302098274231,-10.779157598548627,30.0,PBMC_8k,CITE-sort
244 | 2,102.81413292884827,-10.731528850104391,31.0,PBMC_8k,CITE-sort
245 | 3,97.51817464828491,-10.95221067864014,28.0,PBMC_8k,CITE-sort
246 | 4,104.70505881309509,-10.644467718087931,32.0,PBMC_8k,CITE-sort
247 | 5,101.70977544784546,-10.779783029296437,30.0,PBMC_8k,CITE-sort
248 | 6,103.72061419487,-10.677899612997283,31.0,PBMC_8k,CITE-sort
249 | 7,99.14624118804932,-10.731528850104391,31.0,PBMC_8k,CITE-sort
250 | 8,100.68646454811096,-10.636410030908806,32.0,PBMC_8k,CITE-sort
251 | 9,101.97961044311523,-10.775643442866567,30.0,PBMC_8k,CITE-sort
252 | 0,150.70470070838928,-12.67843340228169,15.0,PBMC_8k,GMM
253 | 1,154.03279638290405,-12.913352643743446,13.0,PBMC_8k,GMM
254 | 2,149.3135085105896,-12.829395836729475,14.0,PBMC_8k,GMM
255 | 3,153.33319759368896,-13.018263989717305,13.0,PBMC_8k,GMM
256 | 4,166.12588119506836,-12.653060691652987,15.0,PBMC_8k,GMM
257 | 5,156.92513966560364,-12.848781057466264,14.0,PBMC_8k,GMM
258 | 6,151.6311810016632,-12.641724530158609,16.0,PBMC_8k,GMM
259 | 7,149.8801727294922,-12.915539484541302,14.0,PBMC_8k,GMM
260 | 8,152.36875820159912,-12.547906030609802,17.0,PBMC_8k,GMM
261 | 9,159.12189269065857,-12.613771250927476,16.0,PBMC_8k,GMM
262 | 0,3.632920026779175,-11.889442365173966,32.0,PBMC_8k,GMM_fixk
263 | 1,2.9373178482055664,-11.976433486487593,30.0,PBMC_8k,GMM_fixk
264 | 2,3.503993511199951,-11.896689684489052,31.0,PBMC_8k,GMM_fixk
265 | 3,2.3806371688842773,-12.045370890493576,28.0,PBMC_8k,GMM_fixk
266 | 4,4.423468589782715,-11.858538382211858,32.0,PBMC_8k,GMM_fixk
267 | 5,2.940316677093506,-11.946873896980716,30.0,PBMC_8k,GMM_fixk
268 | 6,4.510417461395264,-11.913008586845843,31.0,PBMC_8k,GMM_fixk
269 | 7,4.011703968048096,-11.887681569730864,31.0,PBMC_8k,GMM_fixk
270 | 8,4.2235822677612305,-11.863128233722959,32.0,PBMC_8k,GMM_fixk
271 | 9,3.6099324226379395,-11.9449885106109,30.0,PBMC_8k,GMM_fixk
272 | 0,24.473987102508545,-13.257029056715261,48.0,PBMC_8k,nGMM
273 | 1,24.23612380027771,-13.248785835689983,50.0,PBMC_8k,nGMM
274 | 2,23.25368595123291,-13.361189414486198,47.0,PBMC_8k,nGMM
275 | 3,24.1181902885437,-13.269971941994285,50.0,PBMC_8k,nGMM
276 | 4,23.40259838104248,-13.343187852332242,46.0,PBMC_8k,nGMM
277 | 5,24.961705923080444,-13.205067487524545,50.0,PBMC_8k,nGMM
278 | 6,25.16559147834778,-13.23389395749099,49.0,PBMC_8k,nGMM
279 | 7,23.77938461303711,-13.264781680716615,50.0,PBMC_8k,nGMM
280 | 8,23.51953411102295,-13.2131171601585,50.0,PBMC_8k,nGMM
281 | 9,23.450572729110718,-13.276658640933295,49.0,PBMC_8k,nGMM
282 | 0,0.471729040145874,-13.881171859596767,32.0,PBMC_8k,nGMM_fixk
283 | 1,0.7545680999755859,-13.948365559776445,30.0,PBMC_8k,nGMM_fixk
284 | 2,0.4767270088195801,-13.99342604286498,31.0,PBMC_8k,nGMM_fixk
285 | 3,0.6516270637512207,-14.070899084715712,28.0,PBMC_8k,nGMM_fixk
286 | 4,0.4997129440307617,-13.848630499522454,32.0,PBMC_8k,nGMM_fixk
287 | 5,0.548687219619751,-13.917406030558176,30.0,PBMC_8k,nGMM_fixk
288 | 6,0.519702672958374,-13.95738399383949,31.0,PBMC_8k,nGMM_fixk
289 | 7,0.5456867218017578,-14.07034839948307,31.0,PBMC_8k,nGMM_fixk
290 | 8,0.4397470951080322,-13.899285897252645,32.0,PBMC_8k,nGMM_fixk
291 | 9,0.4247574806213379,-14.03919632385835,30.0,PBMC_8k,nGMM_fixk
292 | 0,36.233253717422485,-12.107852092857222,50.0,PBMC_8k,dpgmm
293 | 1,30.52152371406555,-12.123206747355963,50.0,PBMC_8k,dpgmm
294 | 2,32.488396883010864,-12.097017001280479,50.0,PBMC_8k,dpgmm
295 | 3,27.36733055114746,-12.106176005176184,50.0,PBMC_8k,dpgmm
296 | 4,64.03633379936218,-12.105459632889083,50.0,PBMC_8k,dpgmm
297 | 5,25.79922842979431,-12.09453672839614,50.0,PBMC_8k,dpgmm
298 | 6,47.8176212310791,-12.076773396014579,50.0,PBMC_8k,dpgmm
299 | 7,20.216424465179443,-12.086241998093666,50.0,PBMC_8k,dpgmm
300 | 8,53.82618069648743,-12.112368588880203,50.0,PBMC_8k,dpgmm
301 | 9,26.560791969299316,-12.083497305149283,50.0,PBMC_8k,dpgmm
302 | 0,104.09340858459473,-9.964740682097846,16.0,MALT_8k,CITE-sort
303 | 1,90.37426471710205,-10.519119375728287,12.0,MALT_8k,CITE-sort
304 | 2,106.49103569984436,-10.02847883660573,15.0,MALT_8k,CITE-sort
305 | 3,101.72176790237427,-9.965077840276498,16.0,MALT_8k,CITE-sort
306 | 4,87.93864917755127,-10.519119375728287,12.0,MALT_8k,CITE-sort
307 | 5,94.86282300949097,-10.334480564516857,14.0,MALT_8k,CITE-sort
308 | 6,91.9593575000763,-10.334480564516857,14.0,MALT_8k,CITE-sort
309 | 7,98.18778967857361,-9.964740682097846,16.0,MALT_8k,CITE-sort
310 | 8,104.2493200302124,-9.964740682097846,16.0,MALT_8k,CITE-sort
311 | 9,99.52801489830017,-10.149379493309278,14.0,MALT_8k,CITE-sort
312 | 0,162.16115069389343,-10.78066231422725,12.0,MALT_8k,GMM
313 | 1,158.64816308021545,-11.21033384197538,9.0,MALT_8k,GMM
314 | 2,172.3073332309723,-10.878094176531889,12.0,MALT_8k,GMM
315 | 3,160.98681473731995,-10.719243317607775,13.0,MALT_8k,GMM
316 | 4,165.07948184013367,-10.851954988736965,11.0,MALT_8k,GMM
317 | 5,164.23995232582092,-11.03250866619702,11.0,MALT_8k,GMM
318 | 6,156.05663537979126,-10.84594605238231,12.0,MALT_8k,GMM
319 | 7,166.68856000900269,-10.871247623058528,11.0,MALT_8k,GMM
320 | 8,159.71054553985596,-11.03118670656651,10.0,MALT_8k,GMM
321 | 9,160.99881625175476,-10.983021426242125,10.0,MALT_8k,GMM
322 | 0,1.252284049987793,-10.664905043752391,16.0,MALT_8k,GMM_fixk
323 | 1,1.3892052173614502,-10.845855560036672,12.0,MALT_8k,GMM_fixk
324 | 2,1.602081537246704,-10.66361543142215,15.0,MALT_8k,GMM_fixk
325 | 3,1.984863519668579,-10.616611113282252,16.0,MALT_8k,GMM_fixk
326 | 4,1.2582783699035645,-10.817374274546374,12.0,MALT_8k,GMM_fixk
327 | 5,2.392629861831665,-10.643494736654336,14.0,MALT_8k,GMM_fixk
328 | 6,2.1727559566497803,-10.628945033757466,14.0,MALT_8k,GMM_fixk
329 | 7,1.6800284385681152,-10.573609392280927,16.0,MALT_8k,GMM_fixk
330 | 8,2.473573923110962,-10.587864387007873,16.0,MALT_8k,GMM_fixk
331 | 9,1.652052879333496,-10.646049674875046,14.0,MALT_8k,GMM_fixk
332 | 0,32.25153422355652,-11.17962721969968,49.0,MALT_8k,nGMM
333 | 1,31.582916736602783,-11.20487390143217,50.0,MALT_8k,nGMM
334 | 2,32.2605299949646,-11.170188239143851,49.0,MALT_8k,nGMM
335 | 3,31.862756490707397,-11.183292293735285,48.0,MALT_8k,nGMM
336 | 4,31.8977370262146,-11.234650526891373,48.0,MALT_8k,nGMM
337 | 5,33.487815856933594,-11.301479520501074,45.0,MALT_8k,nGMM
338 | 6,30.58048915863037,-11.214989136550269,50.0,MALT_8k,nGMM
339 | 7,32.620323181152344,-11.195152031158566,48.0,MALT_8k,nGMM
340 | 8,33.890594482421875,-11.174076335338153,49.0,MALT_8k,nGMM
341 | 9,33.67072105407715,-11.294354727575243,44.0,MALT_8k,nGMM
342 | 0,0.33580660820007324,-12.582591731579186,16.0,MALT_8k,nGMM_fixk
343 | 1,0.2268695831298828,-13.164387624746238,12.0,MALT_8k,nGMM_fixk
344 | 2,0.2488558292388916,-12.725037790760195,15.0,MALT_8k,nGMM_fixk
345 | 3,0.2548534870147705,-12.613755613304862,16.0,MALT_8k,nGMM_fixk
346 | 4,0.11993145942687988,-13.399302772496565,12.0,MALT_8k,nGMM_fixk
347 | 5,0.13193511962890625,-12.917129308349562,14.0,MALT_8k,nGMM_fixk
348 | 6,0.19788742065429688,-12.802236991102422,14.0,MALT_8k,nGMM_fixk
349 | 7,0.3148186206817627,-12.607639773411858,16.0,MALT_8k,nGMM_fixk
350 | 8,0.25785279273986816,-12.61815087537536,16.0,MALT_8k,nGMM_fixk
351 | 9,0.22188377380371094,-12.836320077438216,14.0,MALT_8k,nGMM_fixk
352 | 0,49.70354175567627,-10.339389581778589,50.0,MALT_8k,dpgmm
353 | 1,59.39699029922485,-10.312004086512436,50.0,MALT_8k,dpgmm
354 | 2,48.110453844070435,-10.287859167423095,50.0,MALT_8k,dpgmm
355 | 3,39.88716220855713,-10.342858734870253,50.0,MALT_8k,dpgmm
356 | 4,82.59071135520935,-10.313174194319487,50.0,MALT_8k,dpgmm
357 | 5,48.4212646484375,-10.339740513621718,50.0,MALT_8k,dpgmm
358 | 6,53.693257331848145,-10.305461754565231,50.0,MALT_8k,dpgmm
359 | 7,67.16954040527344,-10.326394833530733,50.0,MALT_8k,dpgmm
360 | 8,77.51761603355408,-10.320139729879017,50.0,MALT_8k,dpgmm
361 | 9,49.45867133140564,-10.298376990321723,50.0,MALT_8k,dpgmm
362 | 0,82.48777031898499,-4.970225496294415,14.0,CBMC_8k,CITE-sort
363 | 1,84.576584815979,-4.925015700588989,15.0,CBMC_8k,CITE-sort
364 | 2,85.64297437667847,-4.970225496294415,14.0,CBMC_8k,CITE-sort
365 | 3,85.82585883140564,-4.970225496294415,14.0,CBMC_8k,CITE-sort
366 | 4,88.71920251846313,-4.822622999413,15.0,CBMC_8k,CITE-sort
367 | 5,85.5120496749878,-4.970225496294415,14.0,CBMC_8k,CITE-sort
368 | 6,84.42366147041321,-4.970225496294415,14.0,CBMC_8k,CITE-sort
369 | 7,83.03945541381836,-4.970225496294415,14.0,CBMC_8k,CITE-sort
370 | 8,84.73048639297485,-4.970225496294415,14.0,CBMC_8k,CITE-sort
371 | 9,89.86754655838013,-4.774042780518445,16.0,CBMC_8k,CITE-sort
372 | 0,60.67326259613037,-5.338610559049961,14.0,CBMC_8k,GMM
373 | 1,62.55017423629761,-5.327642232587172,14.0,CBMC_8k,GMM
374 | 2,59.244077920913696,-5.221029908669777,16.0,CBMC_8k,GMM
375 | 3,61.91255235671997,-5.19289199869492,17.0,CBMC_8k,GMM
376 | 4,59.807756185531616,-5.378078059387416,14.0,CBMC_8k,GMM
377 | 5,62.438249826431274,-5.324600227554156,15.0,CBMC_8k,GMM
378 | 6,59.465951919555664,-5.257326024047886,16.0,CBMC_8k,GMM
379 | 7,62.204383850097656,-5.384429447050616,13.0,CBMC_8k,GMM
380 | 8,59.13414263725281,-5.303643582809363,15.0,CBMC_8k,GMM
381 | 9,58.410555601119995,-5.359907671418008,14.0,CBMC_8k,GMM
382 | 0,0.4987154006958008,-5.318481191595465,14.0,CBMC_8k,GMM_fixk
383 | 1,0.780552864074707,-5.28135052374074,15.0,CBMC_8k,GMM_fixk
384 | 2,0.41576290130615234,-5.384607513553807,14.0,CBMC_8k,GMM_fixk
385 | 3,0.40477871894836426,-5.376723987110303,14.0,CBMC_8k,GMM_fixk
386 | 4,0.44574522972106934,-5.308714250487742,15.0,CBMC_8k,GMM_fixk
387 | 5,0.30182743072509766,-5.3898127309971,14.0,CBMC_8k,GMM_fixk
388 | 6,0.560678243637085,-5.353808478549749,14.0,CBMC_8k,GMM_fixk
389 | 7,0.5047101974487305,-5.346889684520823,14.0,CBMC_8k,GMM_fixk
390 | 8,0.4087657928466797,-5.338011181859372,14.0,CBMC_8k,GMM_fixk
391 | 9,0.5027120113372803,-5.248688020174258,16.0,CBMC_8k,GMM_fixk
392 | 0,24.397029638290405,-6.2279743860146715,50.0,CBMC_8k,nGMM
393 | 1,24.86376404762268,-6.286328829454328,49.0,CBMC_8k,nGMM
394 | 2,24.761822938919067,-6.304667013372216,49.0,CBMC_8k,nGMM
395 | 3,26.78266477584839,-6.293575089003526,50.0,CBMC_8k,nGMM
396 | 4,25.76124930381775,-6.32647488892547,47.0,CBMC_8k,nGMM
397 | 5,24.554939031600952,-6.229492287427068,50.0,CBMC_8k,nGMM
398 | 6,23.87632966041565,-6.279360804260311,50.0,CBMC_8k,nGMM
399 | 7,23.980270862579346,-6.328715170276957,48.0,CBMC_8k,nGMM
400 | 8,26.58477783203125,-6.257984565811174,50.0,CBMC_8k,nGMM
401 | 9,25.429440021514893,-6.270241702230268,49.0,CBMC_8k,nGMM
402 | 0,0.11693239212036133,-8.38443259883629,14.0,CBMC_8k,nGMM_fixk
403 | 1,0.11793279647827148,-8.39593961250455,15.0,CBMC_8k,nGMM_fixk
404 | 2,0.08295154571533203,-8.38986093480687,14.0,CBMC_8k,nGMM_fixk
405 | 3,0.12292909622192383,-8.482321174496342,14.0,CBMC_8k,nGMM_fixk
406 | 4,0.16690397262573242,-8.222030381194092,15.0,CBMC_8k,nGMM_fixk
407 | 5,0.10094189643859863,-8.373626097978223,14.0,CBMC_8k,nGMM_fixk
408 | 6,0.08994841575622559,-8.400707110006236,14.0,CBMC_8k,nGMM_fixk
409 | 7,0.10893726348876953,-8.369739140656122,14.0,CBMC_8k,nGMM_fixk
410 | 8,0.12592792510986328,-8.583404484295043,14.0,CBMC_8k,nGMM_fixk
411 | 9,0.24486041069030762,-8.17223767640293,16.0,CBMC_8k,nGMM_fixk
412 | 0,26.914589881896973,-4.837041146685309,50.0,CBMC_8k,dpgmm
413 | 1,17.987701177597046,-4.825254321767748,50.0,CBMC_8k,dpgmm
414 | 2,30.537515878677368,-4.812883948532638,50.0,CBMC_8k,dpgmm
415 | 3,16.31166124343872,-4.8260832952804265,50.0,CBMC_8k,dpgmm
416 | 4,19.667738914489746,-4.827847765340304,50.0,CBMC_8k,dpgmm
417 | 5,14.024969577789307,-4.818149091948307,50.0,CBMC_8k,dpgmm
418 | 6,21.808513402938843,-4.827393844981892,50.0,CBMC_8k,dpgmm
419 | 7,21.315794706344604,-4.826081350901387,50.0,CBMC_8k,dpgmm
420 | 8,34.81006860733032,-4.827883090808033,50.0,CBMC_8k,dpgmm
421 | 9,34.777087926864624,-4.800197197416694,50.0,CBMC_8k,dpgmm
422 | 0,131.48872256278992,-4.824296845335957,25.0,PBMC_16k,CITE-sort
423 | 1,134.248144865036,-5.170327640657248,23.0,PBMC_16k,CITE-sort
424 | 2,153.29023241996765,-4.622667569567744,27.0,PBMC_16k,CITE-sort
425 | 3,148.85877871513367,-4.714098794409115,25.0,PBMC_16k,CITE-sort
426 | 4,138.88249158859253,-5.248031352743899,21.0,PBMC_16k,CITE-sort
427 | 5,151.88603711128235,-4.628945027215183,28.0,PBMC_16k,CITE-sort
428 | 6,155.8997368812561,-4.601722116091284,28.0,PBMC_16k,CITE-sort
429 | 7,139.5650990009308,-4.810842836864783,25.0,PBMC_16k,CITE-sort
430 | 8,138.50376057624817,-4.8428604057279685,25.0,PBMC_16k,CITE-sort
431 | 9,153.2203996181488,-4.5555910214670465,28.0,PBMC_16k,CITE-sort
432 | 0,151.23140001296997,-6.151465680535652,26.0,PBMC_16k,GMM
433 | 1,142.63832092285156,-6.177100083396359,25.0,PBMC_16k,GMM
434 | 2,138.85249733924866,-6.231229949495253,22.0,PBMC_16k,GMM
435 | 3,144.43529105186462,-6.21842403857333,22.0,PBMC_16k,GMM
436 | 4,146.25425124168396,-6.271936250794268,21.0,PBMC_16k,GMM
437 | 5,152.47869396209717,-6.185376585620828,25.0,PBMC_16k,GMM
438 | 6,146.16531109809875,-6.2142301946581515,23.0,PBMC_16k,GMM
439 | 7,146.71299743652344,-6.292184652989888,20.0,PBMC_16k,GMM
440 | 8,149.17957639694214,-6.217817180692061,23.0,PBMC_16k,GMM
441 | 9,153.0943329334259,-6.2561118327358685,22.0,PBMC_16k,GMM
442 | 0,3.302109956741333,-6.167149522908562,25.0,PBMC_16k,GMM_fixk
443 | 1,2.8083910942077637,-6.227611206044761,23.0,PBMC_16k,GMM_fixk
444 | 2,3.3340916633605957,-6.130149880123634,27.0,PBMC_16k,GMM_fixk
445 | 3,2.936318874359131,-6.170888958309285,25.0,PBMC_16k,GMM_fixk
446 | 4,2.600506067276001,-6.253061840300715,21.0,PBMC_16k,GMM_fixk
447 | 5,4.07366681098938,-6.1187655429051775,28.0,PBMC_16k,GMM_fixk
448 | 6,3.296112537384033,-6.139985798808145,28.0,PBMC_16k,GMM_fixk
449 | 7,2.9353177547454834,-6.185204323654343,25.0,PBMC_16k,GMM_fixk
450 | 8,2.7274365425109863,-6.168232315138406,25.0,PBMC_16k,GMM_fixk
451 | 9,3.511990785598755,-6.123503587453922,28.0,PBMC_16k,GMM_fixk
452 | 0,38.71083426475525,-6.379849891641167,48.0,PBMC_16k,nGMM
453 | 1,38.449986696243286,-6.376821446147685,49.0,PBMC_16k,nGMM
454 | 2,38.604896068573,-6.40737439570078,48.0,PBMC_16k,nGMM
455 | 3,38.98268151283264,-6.352696487397852,49.0,PBMC_16k,nGMM
456 | 4,38.36903095245361,-6.3716690156441835,50.0,PBMC_16k,nGMM
457 | 5,38.76480484008789,-6.364510993512695,50.0,PBMC_16k,nGMM
458 | 6,39.50738000869751,-6.354521920871351,49.0,PBMC_16k,nGMM
459 | 7,38.842759132385254,-6.369662674404592,48.0,PBMC_16k,nGMM
460 | 8,38.83576488494873,-6.423211994218609,45.0,PBMC_16k,nGMM
461 | 9,38.5829074382782,-6.3840953086766286,47.0,PBMC_16k,nGMM
462 | 0,0.6576240062713623,-6.785083664655333,25.0,PBMC_16k,nGMM_fixk
463 | 1,0.5037086009979248,-6.8704074699934905,23.0,PBMC_16k,nGMM_fixk
464 | 2,0.6106505393981934,-6.7397046065996005,27.0,PBMC_16k,nGMM_fixk
465 | 3,0.9654474258422852,-6.769382740398923,25.0,PBMC_16k,nGMM_fixk
466 | 4,0.4957153797149658,-6.941616866643075,21.0,PBMC_16k,nGMM_fixk
467 | 5,0.8255264759063721,-6.7202072370311505,28.0,PBMC_16k,nGMM_fixk
468 | 6,0.8914902210235596,-6.734181354395366,28.0,PBMC_16k,nGMM_fixk
469 | 7,0.6896049976348877,-6.8174893307630136,25.0,PBMC_16k,nGMM_fixk
470 | 8,0.8914885520935059,-6.800764054821048,25.0,PBMC_16k,nGMM_fixk
471 | 9,0.7515683174133301,-6.703480739455203,28.0,PBMC_16k,nGMM_fixk
472 | 0,108.17206335067749,-6.048718855431777,50.0,PBMC_16k,dpgmm
473 | 1,109.62023544311523,-6.047686785818196,50.0,PBMC_16k,dpgmm
474 | 2,85.39810633659363,-6.048645567224814,50.0,PBMC_16k,dpgmm
475 | 3,80.45993161201477,-6.049458774154263,50.0,PBMC_16k,dpgmm
476 | 4,63.30175542831421,-6.049415431938901,50.0,PBMC_16k,dpgmm
477 | 5,86.23262572288513,-6.037890865010017,50.0,PBMC_16k,dpgmm
478 | 6,98.0288712978363,-6.051989896080837,50.0,PBMC_16k,dpgmm
479 | 7,94.77173709869385,-6.053023460720795,50.0,PBMC_16k,dpgmm
480 | 8,59.19510841369629,-6.0452716926611085,50.0,PBMC_16k,dpgmm
481 | 9,78.02951526641846,-6.039499627171305,50.0,PBMC_16k,dpgmm
482 |
--------------------------------------------------------------------------------
/performance/record_alldb.csv:
--------------------------------------------------------------------------------
1 | ,time,ll,bic,n_component,DB,method
2 | 0,1.2234070301055908,-11.122508339097205,18120.598500175132,15.0,PBMC_1k,CITE-sort
3 | 1,1.3422009944915771,-10.599447969967613,17979.10670447445,19.0,PBMC_1k,CITE-sort
4 | 2,1.3595807552337646,-10.580384296272932,17951.921905785835,19.0,PBMC_1k,CITE-sort
5 | 3,1.2019219398498535,-10.797541537800555,18110.490059534695,18.0,PBMC_1k,CITE-sort
6 | 4,1.0659277439117432,-11.520668331656603,18386.178504225776,13.0,PBMC_1k,CITE-sort
7 | 5,1.2330269813537598,-10.884964282200043,18084.056820378835,17.0,PBMC_1k,CITE-sort
8 | 6,1.0871169567108154,-11.210810373520744,18246.517201263097,15.0,PBMC_1k,CITE-sort
9 | 7,1.2957079410552979,-10.559284814490477,17921.83404476405,19.0,PBMC_1k,CITE-sort
10 | 8,1.344512939453125,-10.747012821200016,18038.43610966233,18.0,PBMC_1k,CITE-sort
11 | 9,1.3971610069274902,-10.908992508838459,18118.321071565217,17.0,PBMC_1k,CITE-sort
12 | 0,0.06934595108032227,-9.391548425467587,21070.52750958933,15.0,PBMC_1k,GMM
13 | 1,0.17578816413879395,-8.432751674316203,21753.759768630494,19.0,PBMC_1k,GMM
14 | 2,0.11098718643188477,-8.613242757652294,22009.233753526605,19.0,PBMC_1k,GMM
15 | 3,0.15456628799438477,-8.906608827994178,21917.34935044522,18.0,PBMC_1k,GMM
16 | 4,0.0856637954711914,-9.958450318552085,20854.826033934376,13.0,PBMC_1k,GMM
17 | 5,0.0980536937713623,-8.95775671436817,21477.677566627026,17.0,PBMC_1k,GMM
18 | 6,0.11914801597595215,-9.423531116544487,21117.317180993778,15.0,PBMC_1k,GMM
19 | 7,0.09374594688415527,-8.560323153610016,21935.238604155347,19.0,PBMC_1k,GMM
20 | 8,0.11969304084777832,-8.761752986297514,21710.63558117411,18.0,PBMC_1k,GMM
21 | 9,0.08350276947021484,-9.11605125005756,21703.562246918555,17.0,PBMC_1k,GMM
22 | 0,0.016718149185180664,-12.228966785816784,19697.072733446294,15.0,PBMC_1k,nGMM
23 | 1,0.025583267211914062,-11.594671772544062,19397.97519151494,19.0,PBMC_1k,nGMM
24 | 2,0.02981114387512207,-11.814677063212406,19711.51782998137,19.0,PBMC_1k,nGMM
25 | 3,0.01725292205810547,-11.925040189720319,19717.787536750096,18.0,PBMC_1k,nGMM
26 | 4,0.013669252395629883,-12.560536341854464,19869.01292539533,13.0,PBMC_1k,nGMM
27 | 5,0.020405054092407227,-11.955083622692364,19608.56844846602,17.0,PBMC_1k,nGMM
28 | 6,0.01281285285949707,-12.197622142865846,19653.335425410318,15.0,PBMC_1k,nGMM
29 | 7,0.018594741821289062,-11.828031980611136,19730.34262987801,19.0,PBMC_1k,nGMM
30 | 8,0.018755197525024414,-11.63414537780662,19303.315537517778,18.0,PBMC_1k,nGMM
31 | 9,0.015864133834838867,-11.989330130360914,19658.157454504828,17.0,PBMC_1k,nGMM
32 | 0,5.529792070388794,-16.91493898317407,191860.99980536764,43.0,PBMC_5k,CITE-sort
33 | 1,5.566227912902832,-17.187725669048742,193387.41905180132,39.0,PBMC_5k,CITE-sort
34 | 2,5.876396894454956,-16.992907777426087,193347.30644981583,45.0,PBMC_5k,CITE-sort
35 | 3,6.318313837051392,-17.05918118363452,195044.93275091852,48.0,PBMC_5k,CITE-sort
36 | 4,7.204071044921875,-16.39752962601377,189437.76554038146,52.0,PBMC_5k,CITE-sort
37 | 5,6.371899843215942,-17.298500969848494,195886.09929352903,43.0,PBMC_5k,CITE-sort
38 | 6,6.815957069396973,-17.43462256473374,197648.61036903856,44.0,PBMC_5k,CITE-sort
39 | 7,7.576941967010498,-16.649744678369668,190080.20394710155,46.0,PBMC_5k,CITE-sort
40 | 8,6.156139135360718,-17.383231381499122,195773.1070570394,40.0,PBMC_5k,CITE-sort
41 | 9,6.679594993591309,-16.79224664803924,192243.72173438163,48.0,PBMC_5k,CITE-sort
42 | 0,1.6649901866912842,-14.844552393295237,233104.98341420465,43.0,PBMC_5k,GMM
43 | 1,1.0674920082092285,-15.037024547766473,227931.69830776658,39.0,PBMC_5k,GMM
44 | 2,2.366248846054077,-14.82285979406439,236478.72132183344,45.0,PBMC_5k,GMM
45 | 3,2.8846938610076904,-14.559256840564421,239108.96140165094,48.0,PBMC_5k,GMM
46 | 4,3.003420829772949,-14.306739501113247,243649.47077560163,52.0,PBMC_5k,GMM
47 | 5,1.7910127639770508,-14.686577174542666,231450.33086139915,43.0,PBMC_5k,GMM
48 | 6,2.4024720191955566,-14.643450337378097,232796.6201212383,44.0,PBMC_5k,GMM
49 | 7,1.8464860916137695,-14.667242647587006,236641.7195371195,46.0,PBMC_5k,GMM
50 | 8,1.3643510341644287,-14.927212459043272,228577.29615223384,40.0,PBMC_5k,GMM
51 | 9,2.4785399436950684,-14.413919172519448,237584.03507317504,48.0,PBMC_5k,GMM
52 | 0,0.16876792907714844,-17.38843703779885,196822.3291029122,43.0,PBMC_5k,nGMM
53 | 1,0.29349207878112793,-17.51402190353099,196804.70850178157,39.0,PBMC_5k,nGMM
54 | 2,0.24037885665893555,-17.29008998736503,196456.40502038345,45.0,PBMC_5k,nGMM
55 | 3,0.2437589168548584,-17.10378499463751,195501.2237193164,48.0,PBMC_5k,nGMM
56 | 4,0.3140268325805664,-17.026432628183088,196034.51057826742,52.0,PBMC_5k,nGMM
57 | 5,0.23077607154846191,-17.311941894049042,196019.5000339507,43.0,PBMC_5k,nGMM
58 | 6,0.250927209854126,-17.258264687618446,195792.26132085206,44.0,PBMC_5k,nGMM
59 | 7,0.22651004791259766,-17.245524790302447,196324.12175886668,46.0,PBMC_5k,nGMM
60 | 8,0.23640108108520508,-17.408061851528625,196026.13892767506,40.0,PBMC_5k,nGMM
61 | 9,0.20556211471557617,-17.13414245065703,195825.1496236616,48.0,PBMC_5k,nGMM
62 | 0,5.518562078475952,-9.438671093443384,162813.61062683674,64.0,PBMC_8k,CITE-sort
63 | 1,5.238887786865234,-9.527185491367458,164205.94210618243,64.0,PBMC_8k,CITE-sort
64 | 2,5.150882005691528,-9.429811058963818,162898.4967298605,65.0,PBMC_8k,CITE-sort
65 | 3,4.848948001861572,-9.477606068105388,163426.05777827007,64.0,PBMC_8k,CITE-sort
66 | 4,5.218626976013184,-9.540687806052675,163521.31573463164,60.0,PBMC_8k,CITE-sort
67 | 5,4.935317039489746,-9.599496430758604,163997.86651048128,58.0,PBMC_8k,CITE-sort
68 | 6,4.948575973510742,-9.795641854357607,167083.2340236936,58.0,PBMC_8k,CITE-sort
69 | 7,5.144322633743286,-9.230961086041829,160219.0955465722,67.0,PBMC_8k,CITE-sort
70 | 8,4.917170763015747,-9.617264540039264,164725.86776024068,60.0,PBMC_8k,CITE-sort
71 | 9,4.8958821296691895,-9.601601862078336,165152.25717207725,63.0,PBMC_8k,CITE-sort
72 | 0,3.542236804962158,-8.505365806753996,186012.32876150298,64.0,PBMC_8k,GMM
73 | 1,3.0150198936462402,-8.510668219698958,186086.48126502425,64.0,PBMC_8k,GMM
74 | 2,2.065992832183838,-8.47587075076912,186352.87436215894,65.0,PBMC_8k,GMM
75 | 3,2.717287063598633,-8.49005107506052,185769.4078329935,64.0,PBMC_8k,GMM
76 | 4,2.4877541065216064,-8.563654062586876,183661.35173620144,60.0,PBMC_8k,GMM
77 | 5,2.2299141883850098,-8.586341323927163,182386.9540043941,58.0,PBMC_8k,GMM
78 | 6,2.152177095413208,-8.614111537042746,182822.8978000642,58.0,PBMC_8k,GMM
79 | 7,3.0803709030151367,-8.443504929276104,187493.258575882,67.0,PBMC_8k,GMM
80 | 8,1.8948559761047363,-8.552899113517682,183494.69225233048,60.0,PBMC_8k,GMM
81 | 9,2.120100975036621,-8.510342519139735,185271.44473235984,63.0,PBMC_8k,GMM
82 | 0,0.47810983657836914,-9.721009822112643,167242.9225783066,64.0,PBMC_8k,nGMM
83 | 1,0.4756009578704834,-9.749047584219634,167687.2338148695,64.0,PBMC_8k,nGMM
84 | 2,0.47623300552368164,-9.737309160620608,167724.37832321285,65.0,PBMC_8k,nGMM
85 | 3,0.511469841003418,-9.726102128056768,167325.1309656989,64.0,PBMC_8k,nGMM
86 | 4,0.5160980224609375,-9.790378216486198,167441.11678066733,60.0,PBMC_8k,nGMM
87 | 5,0.41063928604125977,-9.85447361673597,167996.43592706084,58.0,PBMC_8k,nGMM
88 | 6,0.49051523208618164,-9.783789107146575,166886.08268799845,58.0,PBMC_8k,nGMM
89 | 7,0.5964858531951904,-9.726101110527079,167995.91832828856,67.0,PBMC_8k,nGMM
90 | 8,0.4071769714355469,-9.852173429322423,168402.72006131237,60.0,PBMC_8k,nGMM
91 | 9,0.33146119117736816,-9.779301694005651,167933.24796642642,63.0,PBMC_8k,nGMM
92 | 0,2.1575942039489746,-8.138739673093426,140179.62549337992,19.0,MALT_8k,CITE-sort
93 | 1,2.227728843688965,-8.922936855925352,154059.80240214276,23.0,MALT_8k,CITE-sort
94 | 2,2.343740940093994,-8.13794607903107,140166.27406687484,19.0,MALT_8k,CITE-sort
95 | 3,2.3254079818725586,-8.246200163057445,141815.82990033497,18.0,MALT_8k,CITE-sort
96 | 4,3.141963005065918,-8.915490803426332,155823.34965309518,34.0,MALT_8k,CITE-sort
97 | 5,2.9481611251831055,-8.030433983019996,140246.3102017805,30.0,MALT_8k,CITE-sort
98 | 6,2.876828908920288,-7.782560647041819,136247.80007348326,31.0,MALT_8k,CITE-sort
99 | 7,2.4367339611053467,-7.97465601269262,137934.21461939564,22.0,MALT_8k,CITE-sort
100 | 8,2.2418417930603027,-8.138935994492234,140182.92840459346,19.0,MALT_8k,CITE-sort
101 | 9,2.3312768936157227,-8.922909613328,154059.3440726849,23.0,MALT_8k,CITE-sort
102 | 0,0.46611690521240234,-7.6671368017252135,138414.1099555053,19.0,MALT_8k,GMM
103 | 1,0.4600691795349121,-7.592912384700842,139152.7301840142,23.0,MALT_8k,GMM
104 | 2,0.5721631050109863,-7.66235849156766,138333.79762451886,19.0,MALT_8k,GMM
105 | 3,0.38485026359558105,-7.6836761189961225,138194.63889813964,18.0,MALT_8k,GMM
106 | 4,0.8844120502471924,-7.415133148317597,141632.45732113929,34.0,MALT_8k,GMM
107 | 5,0.6158289909362793,-7.483651829160183,140793.14569936835,30.0,MALT_8k,GMM
108 | 6,0.626539945602417,-7.462304113577804,140930.3085264492,31.0,MALT_8k,GMM
109 | 7,0.5197019577026367,-7.625773664539795,139205.6743213384,22.0,MALT_8k,GMM
110 | 8,0.38326501846313477,-7.659503236269939,138283.92799364313,19.0,MALT_8k,GMM
111 | 9,0.5138049125671387,-7.591387664140254,139127.59931757045,23.0,MALT_8k,GMM
112 | 0,0.1121678352355957,-8.592168064831728,147799.10108325208,19.0,MALT_8k,nGMM
113 | 1,0.11516475677490234,-8.388677086350944,145058.16402031583,23.0,MALT_8k,nGMM
114 | 2,0.10192298889160156,-8.549684730617042,147081.83850916644,19.0,MALT_8k,nGMM
115 | 3,0.14873814582824707,-8.667081021205721,148885.22874656002,18.0,MALT_8k,nGMM
116 | 4,0.20810818672180176,-8.135204373051037,142682.1087272623,34.0,MALT_8k,nGMM
117 | 5,0.18267393112182617,-8.204251432404153,143159.43351469105,30.0,MALT_8k,nGMM
118 | 6,0.24073505401611328,-8.192342137051503,143129.25976990396,31.0,MALT_8k,nGMM
119 | 7,0.11271023750305176,-8.419873076011498,145416.0084224086,22.0,MALT_8k,nGMM
120 | 8,0.09589195251464844,-8.53846610987413,146897.02971192208,19.0,MALT_8k,nGMM
121 | 9,0.14923810958862305,-8.407899930568272,145384.24865985673,23.0,MALT_8k,nGMM
122 | 0,1.5813980102539062,-6.364109836495208,112596.86943478562,19.0,CBMC_8k,CITE-sort
123 | 1,1.5774438381195068,-6.3673479587837996,112498.62986562813,18.0,CBMC_8k,CITE-sort
124 | 2,1.539851188659668,-6.367761693867996,112505.76017606918,18.0,CBMC_8k,CITE-sort
125 | 3,1.6679980754852295,-6.34040005469012,112188.25505515673,19.0,CBMC_8k,CITE-sort
126 | 4,1.5511629581451416,-6.360399431970839,112532.92432321265,19.0,CBMC_8k,CITE-sort
127 | 5,1.6738181114196777,-6.3647364950804,112607.66926884283,19.0,CBMC_8k,CITE-sort
128 | 6,1.5708410739898682,-6.355419664913761,112601.14838643002,20.0,CBMC_8k,CITE-sort
129 | 7,1.5760369300842285,-6.357796351725545,112642.10820694432,20.0,CBMC_8k,CITE-sort
130 | 8,1.464735984802246,-6.367761693867996,112505.76017606918,18.0,CBMC_8k,CITE-sort
131 | 9,1.5899713039398193,-6.32961681581974,112310.50745382276,21.0,CBMC_8k,CITE-sort
132 | 0,0.6619877815246582,-4.510979393152983,85469.61130894872,19.0,CBMC_8k,GMM
133 | 1,0.33260416984558105,-4.60923707895074,86752.91326746752,18.0,CBMC_8k,GMM
134 | 2,0.5183930397033691,-4.57249215155964,86125.12276259843,18.0,CBMC_8k,GMM
135 | 3,0.41444897651672363,-4.598139084551907,86975.21086818096,19.0,CBMC_8k,GMM
136 | 4,0.47031712532043457,-4.575349654426403,86578.95923897572,19.0,CBMC_8k,GMM
137 | 5,0.4637570381164551,-4.565348853628528,86410.23337112383,19.0,CBMC_8k,GMM
138 | 6,0.3703291416168213,-4.524238921480881,86103.61132192895,20.0,CBMC_8k,GMM
139 | 7,0.44608592987060547,-4.5440882313597735,86455.32499173887,20.0,CBMC_8k,GMM
140 | 8,0.4300708770751953,-4.663088073844756,87680.80151608032,18.0,CBMC_8k,GMM
141 | 9,0.3334038257598877,-4.532936659473049,86659.01521307034,21.0,CBMC_8k,GMM
142 | 0,0.12944817543029785,-6.057223833590501,107298.58690010864,19.0,CBMC_8k,nGMM
143 | 1,0.10278129577636719,-6.188431521796373,109400.40181947568,18.0,CBMC_8k,nGMM
144 | 2,0.10554099082946777,-6.120164468368747,108229.25988782156,18.0,CBMC_8k,nGMM
145 | 3,0.09456467628479004,-6.040737858995399,107012.36836988111,19.0,CBMC_8k,nGMM
146 | 4,0.08441495895385742,-6.1769925054567985,109358.50227287639,19.0,CBMC_8k,nGMM
147 | 5,0.10467410087585449,-6.0628602173089226,107391.16892016676,19.0,CBMC_8k,nGMM
148 | 6,0.10837197303771973,-6.03670324695701,107096.7686853721,20.0,CBMC_8k,nGMM
149 | 7,0.09143185615539551,-5.991699539515126,106319.03245007277,20.0,CBMC_8k,nGMM
150 | 8,0.09178495407104492,-6.085520395418077,107630.15162431629,18.0,CBMC_8k,nGMM
151 | 9,0.13098692893981934,-5.949626117580737,105740.3608819197,21.0,CBMC_8k,nGMM
152 | 0,2.5118660926818848,-4.622624318068818,151357.64048860376,30.0,PBMC_16k,CITE-sort
153 | 1,2.182826042175293,-5.422101895636898,175532.73375749,23.0,PBMC_16k,CITE-sort
154 | 2,2.1419589519500732,-5.353554822535321,172868.11781864305,20.0,PBMC_16k,CITE-sort
155 | 3,2.5556678771972656,-5.300773603717451,171524.9022070207,22.0,PBMC_16k,CITE-sort
156 | 4,2.3147940635681152,-5.3531155629512295,173182.99079162834,22.0,PBMC_16k,CITE-sort
157 | 5,2.499323844909668,-4.65552766288677,151742.3769695666,26.0,PBMC_16k,CITE-sort
158 | 6,2.1241798400878906,-4.880281503964596,158040.1595520047,21.0,PBMC_16k,CITE-sort
159 | 7,2.152833938598633,-5.4451787511451855,176263.76238628154,23.0,PBMC_16k,CITE-sort
160 | 8,2.1633620262145996,-5.502401365071444,177747.67251214743,21.0,PBMC_16k,CITE-sort
161 | 9,2.31435489654541,-4.8439605286700225,157382.76545375836,24.0,PBMC_16k,CITE-sort
162 | 0,1.4144580364227295,-5.124886394260993,175365.1889132596,30.0,PBMC_16k,GMM
163 | 1,0.8795738220214844,-5.247281883187149,176205.95219128366,23.0,PBMC_16k,GMM
164 | 2,0.7388548851013184,-5.348087190138184,178092.651414541,20.0,PBMC_16k,GMM
165 | 3,0.7176558971405029,-5.302812571086369,177520.39248609767,22.0,PBMC_16k,GMM
166 | 4,0.9349429607391357,-5.25161498397784,175914.07391252043,22.0,PBMC_16k,GMM
167 | 5,0.803678035736084,-5.251041933734646,177622.75427598614,26.0,PBMC_16k,GMM
168 | 6,0.7691028118133545,-5.281019420338424,176394.5392779519,21.0,PBMC_16k,GMM
169 | 7,0.7854559421539307,-5.270179898208045,176922.41885804676,23.0,PBMC_16k,GMM
170 | 8,0.7193880081176758,-5.3287302931610485,177901.53403452266,21.0,PBMC_16k,GMM
171 | 9,1.0948500633239746,-5.24096548882081,176431.03614872135,24.0,PBMC_16k,GMM
172 | 0,0.4008901119232178,-5.5584422812945515,180978.25149073126,30.0,PBMC_16k,nGMM
173 | 1,0.2562389373779297,-5.746965671212717,185816.3434176792,23.0,PBMC_16k,nGMM
174 | 2,0.16918683052062988,-5.855950550695162,188765.34370402092,20.0,PBMC_16k,nGMM
175 | 3,0.28665900230407715,-5.782957060395885,186788.73678983096,22.0,PBMC_16k,nGMM
176 | 4,0.3226139545440674,-5.806195436457849,187508.72556868164,22.0,PBMC_16k,nGMM
177 | 5,0.29004693031311035,-5.634555563700794,182733.68691707938,26.0,PBMC_16k,nGMM
178 | 6,0.16335010528564453,-5.8535466736657,188848.0856092121,21.0,PBMC_16k,nGMM
179 | 7,0.21469879150390625,-5.743257931506316,185672.74806116667,23.0,PBMC_16k,nGMM
180 | 8,0.21370887756347656,-5.81137711137325,187511.01631720006,21.0,PBMC_16k,nGMM
181 | 9,0.2974238395690918,-5.696305582413209,184361.43877733074,24.0,PBMC_16k,nGMM
182 |
--------------------------------------------------------------------------------
/performance/record_full_alldb.csv:
--------------------------------------------------------------------------------
1 | ,time,ll,bic,n_component,DB,method
2 | 0,1.4083750247955322,-5.22614012126497,17694.297347349733,20.0,PBMC_1k,CITE-sort
3 | 1,1.1967318058013916,-6.834202666357369,17425.29678268992,15.0,PBMC_1k,CITE-sort
4 | 2,1.3504319190979004,-6.265277074706867,17638.847990580936,17.0,PBMC_1k,CITE-sort
5 | 3,1.2247920036315918,-6.375117306926319,17795.48016172587,17.0,PBMC_1k,CITE-sort
6 | 4,1.406689167022705,-5.927471739871177,18181.97668468987,19.0,PBMC_1k,CITE-sort
7 | 5,1.160640001296997,-6.933182248691533,18078.861217890753,16.0,PBMC_1k,CITE-sort
8 | 6,1.2971611022949219,-6.491712095554915,17449.324779517934,16.0,PBMC_1k,CITE-sort
9 | 7,1.2381787300109863,-6.1806809547366965,17518.21392350347,17.0,PBMC_1k,CITE-sort
10 | 8,1.3634178638458252,-5.54105350167419,18655.783378605596,21.0,PBMC_1k,CITE-sort
11 | 9,1.24202299118042,-6.577575012771381,18084.18485026093,17.0,PBMC_1k,CITE-sort
12 | 0,0.21073102951049805,-8.529966559712106,22401.603397840703,20.0,PBMC_1k,GMM
13 | 1,0.061823129653930664,-9.539507803253352,21282.40387053851,15.0,PBMC_1k,GMM
14 | 2,0.14079070091247559,-9.644085019856453,22456.349203700607,17.0,PBMC_1k,GMM
15 | 3,0.09861302375793457,-9.510790115912984,22266.383567939105,17.0,PBMC_1k,GMM
16 | 4,0.13012290000915527,-8.746533173662325,22201.70051304868,19.0,PBMC_1k,GMM
17 | 5,0.16895508766174316,-9.177449082611146,21278.880041425553,16.0,PBMC_1k,GMM
18 | 6,0.11718511581420898,-9.463803564231196,21687.477995720026,16.0,PBMC_1k,GMM
19 | 7,0.12487006187438965,-8.810452482167188,21267.711649445482,17.0,PBMC_1k,GMM
20 | 8,0.26333117485046387,-8.100522728490219,22304.106579471587,21.0,PBMC_1k,GMM
21 | 9,0.10646820068359375,-9.185367037079187,21802.197071284805,17.0,PBMC_1k,GMM
22 | 0,0.030717134475708008,-11.574240290860638,19519.393501782622,20.0,PBMC_1k,nGMM
23 | 1,0.01583719253540039,-12.582327588717853,20201.545868265435,15.0,PBMC_1k,nGMM
24 | 2,0.01724696159362793,-12.01715941481364,19697.837400564586,17.0,PBMC_1k,nGMM
25 | 3,0.016399860382080078,-11.892201843087133,19519.84070765181,17.0,PBMC_1k,nGMM
26 | 4,0.018496036529541016,-11.88263154626566,19808.43357921994,19.0,PBMC_1k,nGMM
27 | 5,0.026629209518432617,-12.150261839948268,19736.454511739288,16.0,PBMC_1k,nGMM
28 | 6,0.024701356887817383,-12.134395995184603,19714.265925310712,16.0,PBMC_1k,nGMM
29 | 7,0.024443864822387695,-12.038335544520606,19727.715846408726,17.0,PBMC_1k,nGMM
30 | 8,0.024069786071777344,-11.576145594733852,19673.081128548773,21.0,PBMC_1k,nGMM
31 | 9,0.016176223754882812,-12.223796587435212,19992.872123428424,17.0,PBMC_1k,nGMM
32 | 0,6.781627893447876,-9.998504538201088,193053.82826045481,49.0,PBMC_5k,CITE-sort
33 | 1,6.160115003585815,-10.774060071144454,193997.56214166566,45.0,PBMC_5k,CITE-sort
34 | 2,8.255143880844116,-9.385251516861697,204605.71575826133,59.0,PBMC_5k,CITE-sort
35 | 3,6.1250319480896,-10.476676137487793,194474.28808262106,47.0,PBMC_5k,CITE-sort
36 | 4,6.1504247188568115,-10.667226659512071,194675.1887903696,46.0,PBMC_5k,CITE-sort
37 | 5,6.686645030975342,-9.608489099356111,197954.68859708664,54.0,PBMC_5k,CITE-sort
38 | 6,5.918744087219238,-10.315595836758309,194582.64787714006,48.0,PBMC_5k,CITE-sort
39 | 7,6.57272481918335,-9.297901358731517,200091.59025809477,57.0,PBMC_5k,CITE-sort
40 | 8,5.612346887588501,-10.478001633603006,192689.46136847988,46.0,PBMC_5k,CITE-sort
41 | 9,5.69504714012146,-10.920122385864424,195530.340072337,45.0,PBMC_5k,CITE-sort
42 | 0,2.291753053665161,-14.358126790040503,238792.89874423892,49.0,PBMC_5k,GMM
43 | 1,1.8819971084594727,-14.70760453280036,235266.0466927468,45.0,PBMC_5k,GMM
44 | 2,2.5993258953094482,-13.897761531325635,251950.6764950317,59.0,PBMC_5k,GMM
45 | 3,1.9641737937927246,-14.583750765038003,237559.38821198785,47.0,PBMC_5k,GMM
46 | 4,2.1672379970550537,-14.491249214074706,234796.6223725828,46.0,PBMC_5k,GMM
47 | 5,2.1744298934936523,-14.408898960824732,248321.56104229172,54.0,PBMC_5k,GMM
48 | 6,2.274665117263794,-14.467449217102281,238141.43805275435,48.0,PBMC_5k,GMM
49 | 7,2.0596659183502197,-13.941547414973176,248814.61145642295,57.0,PBMC_5k,GMM
50 | 8,1.2713558673858643,-14.456796081254263,234435.5119427906,46.0,PBMC_5k,GMM
51 | 9,1.8523399829864502,-14.820257733959885,236453.24580368795,45.0,PBMC_5k,GMM
52 | 0,0.20482516288757324,-17.162898002294302,196458.95369860908,49.0,PBMC_5k,nGMM
53 | 1,0.24257564544677734,-17.27725679417593,196325.0151104464,45.0,PBMC_5k,nGMM
54 | 2,0.22489190101623535,-16.894444387997563,196981.7101214813,59.0,PBMC_5k,nGMM
55 | 3,0.1976771354675293,-17.187442559180443,196046.25667727954,47.0,PBMC_5k,nGMM
56 | 4,0.1876363754272461,-17.247541163012016,196345.5477243657,46.0,PBMC_5k,nGMM
57 | 5,0.21679210662841797,-16.97484842732038,196156.06355618988,54.0,PBMC_5k,nGMM
58 | 6,0.225081205368042,-17.15077295409227,195997.61407698583,48.0,PBMC_5k,nGMM
59 | 7,0.24540996551513672,-16.940910738001595,196801.1918648725,57.0,PBMC_5k,nGMM
60 | 8,0.21463489532470703,-17.229297792510877,196152.52786378318,46.0,PBMC_5k,nGMM
61 | 9,0.22436809539794922,-17.271073237309157,196258.65312791697,45.0,PBMC_5k,nGMM
62 | 0,4.7353222370147705,-6.442717330259638,156842.43374943547,68.0,PBMC_8k,CITE-sort
63 | 1,5.241987943649292,-6.441431807197678,157638.49865288063,69.0,PBMC_8k,CITE-sort
64 | 2,5.433183908462524,-6.343022901335477,156090.52656366822,69.0,PBMC_8k,CITE-sort
65 | 3,5.4518561363220215,-6.594241419217372,155960.76294390138,64.0,PBMC_8k,CITE-sort
66 | 4,4.971299886703491,-6.446390753865133,156900.2167027499,68.0,PBMC_8k,CITE-sort
67 | 5,4.68738579750061,-6.521551047051461,156449.91575215122,66.0,PBMC_8k,CITE-sort
68 | 6,4.696994066238403,-6.5400360083330655,157556.97037432066,67.0,PBMC_8k,CITE-sort
69 | 7,4.924194097518921,-6.41796427249102,156453.0681507351,68.0,PBMC_8k,CITE-sort
70 | 8,5.501037836074829,-5.71441369422992,152732.79318557627,77.0,PBMC_8k,CITE-sort
71 | 9,4.573446750640869,-6.546313666654377,156839.4317585051,66.0,PBMC_8k,CITE-sort
72 | 0,2.452871799468994,-8.438164332711427,188213.19368342296,68.0,PBMC_8k,GMM
73 | 1,2.481646776199341,-8.408244670975371,188562.3432336556,69.0,PBMC_8k,GMM
74 | 2,2.4254770278930664,-8.425269197382306,188829.66860220348,69.0,PBMC_8k,GMM
75 | 3,2.2687690258026123,-8.516921712531076,186190.13744548397,64.0,PBMC_8k,GMM
76 | 4,2.4142231941223145,-8.423963572877305,187992.5286425008,68.0,PBMC_8k,GMM
77 | 5,2.4891116619110107,-8.472690391977398,187127.93351873392,66.0,PBMC_8k,GMM
78 | 6,2.817267894744873,-8.452734470863497,187632.69180096796,67.0,PBMC_8k,GMM
79 | 7,2.1789538860321045,-8.454787895757315,188476.12509409327,68.0,PBMC_8k,GMM
80 | 8,2.621440887451172,-8.325179760310094,193786.62762067723,77.0,PBMC_8k,GMM
81 | 9,2.8781800270080566,-8.446774501013161,186722.26227093083,66.0,PBMC_8k,GMM
82 | 0,0.43262195587158203,-9.65678742518136,167133.7682424381,68.0,PBMC_8k,nGMM
83 | 1,0.4279301166534424,-9.664082093128723,167466.82285745992,69.0,PBMC_8k,nGMM
84 | 2,0.5662620067596436,-9.664073585728513,167475.68048906935,69.0,PBMC_8k,nGMM
85 | 3,0.4881327152252197,-9.728779479892161,167366.25292936395,64.0,PBMC_8k,nGMM
86 | 4,0.37183523178100586,-9.695972792450466,167745.48842498852,68.0,PBMC_8k,nGMM
87 | 5,0.3246877193450928,-9.717640334690314,167640.97924803954,66.0,PBMC_8k,nGMM
88 | 6,0.3578360080718994,-9.692053459770374,167460.0482029322,67.0,PBMC_8k,nGMM
89 | 7,0.5417490005493164,-9.656363639822144,167127.87730005436,68.0,PBMC_8k,nGMM
90 | 8,0.468184232711792,-9.57447404466968,167850.05727313482,77.0,PBMC_8k,nGMM
91 | 9,0.3371288776397705,-9.680468388884275,167059.67573926813,66.0,PBMC_8k,nGMM
92 | 0,2.4241321086883545,-6.461879788708171,120137.95753977002,23.0,MALT_8k,CITE-sort
93 | 1,2.472317934036255,-6.469601301991959,119770.80647973124,22.0,MALT_8k,CITE-sort
94 | 2,2.6662960052490234,-6.392868239746441,120965.13843813885,27.0,MALT_8k,CITE-sort
95 | 3,2.400151014328003,-6.875146878310774,129078.9942533452,27.0,MALT_8k,CITE-sort
96 | 4,2.471524238586426,-6.461960658211897,120139.3180883007,23.0,MALT_8k,CITE-sort
97 | 5,2.413289785385132,-6.928097227328886,128478.65752665016,24.0,MALT_8k,CITE-sort
98 | 6,2.572601079940796,-6.396962624654681,121034.02236983507,27.0,MALT_8k,CITE-sort
99 | 7,2.4453630447387695,-6.857828848154396,129781.75131304478,29.0,MALT_8k,CITE-sort
100 | 8,2.4264273643493652,-6.432067865198751,120630.51733769802,25.0,MALT_8k,CITE-sort
101 | 9,2.580191135406494,-6.180245928877178,116890.92288054913,26.0,MALT_8k,CITE-sort
102 | 0,0.7916312217712402,-7.5733742822893015,138830.43687258917,23.0,MALT_8k,GMM
103 | 1,0.4620349407196045,-7.620961426871539,139128.386457385,22.0,MALT_8k,GMM
104 | 2,0.5637567043304443,-7.513061564350352,139796.79255236703,27.0,MALT_8k,GMM
105 | 3,0.7792801856994629,-7.495331411753802,139500.56491482095,27.0,MALT_8k,GMM
106 | 4,0.6430320739746094,-7.606809230020523,139385.69294576487,23.0,MALT_8k,GMM
107 | 5,0.5811178684234619,-7.558761418649488,139072.82709241417,24.0,MALT_8k,GMM
108 | 6,0.5059757232666016,-7.509713681736872,139740.8039349963,27.0,MALT_8k,GMM
109 | 7,0.8608949184417725,-7.465963527022147,139999.32341265664,29.0,MALT_8k,GMM
110 | 8,0.47983527183532715,-7.536732209682566,139200.025794297,25.0,MALT_8k,GMM
111 | 9,0.677994966506958,-7.538792286731549,139728.35896189898,26.0,MALT_8k,GMM
112 | 0,0.14325523376464844,-8.388214875332125,145054.02586564902,23.0,MALT_8k,nGMM
113 | 1,0.17519116401672363,-8.416418878040503,145351.99308976828,22.0,MALT_8k,nGMM
114 | 2,0.1341571807861328,-8.332098384714984,144794.97302204653,27.0,MALT_8k,nGMM
115 | 3,0.17314600944519043,-8.280708076968548,143930.46104843976,27.0,MALT_8k,nGMM
116 | 4,0.15224695205688477,-8.404336760644817,145326.1152133652,23.0,MALT_8k,nGMM
117 | 5,0.12498688697814941,-8.361138604558358,144768.0299749365,24.0,MALT_8k,nGMM
118 | 6,0.1561110019683838,-8.283330320507796,143970.9216884222,27.0,MALT_8k,nGMM
119 | 7,0.1398000717163086,-8.25514860142787,143843.0658015172,29.0,MALT_8k,nGMM
120 | 8,0.1175081729888916,-8.374867440614503,145169.23478887198,25.0,MALT_8k,nGMM
121 | 9,0.20955324172973633,-8.3111056544382,144270.70650229516,26.0,MALT_8k,nGMM
122 | 0,1.5833871364593506,-3.919513431963239,74879.64173712343,18.0,CBMC_8k,CITE-sort
123 | 1,1.5685839653015137,-3.931301545064249,75082.79807830622,18.0,CBMC_8k,CITE-sort
124 | 2,1.7406930923461914,-3.7618776280127144,73386.24774899747,21.0,CBMC_8k,CITE-sort
125 | 3,1.5874481201171875,-3.925912843612369,74989.92919748454,18.0,CBMC_8k,CITE-sort
126 | 4,1.63087797164917,-3.903215512370876,75006.53054325444,19.0,CBMC_8k,CITE-sort
127 | 5,1.7103888988494873,-3.8702735972487146,75254.34388281069,21.0,CBMC_8k,CITE-sort
128 | 6,1.4840190410614014,-3.9371554798486907,74775.91763799552,17.0,CBMC_8k,CITE-sort
129 | 7,1.5850739479064941,-3.908120541955717,75091.06382311959,19.0,CBMC_8k,CITE-sort
130 | 8,1.441645860671997,-3.931343563763011,75083.5222285607,18.0,CBMC_8k,CITE-sort
131 | 9,1.5382959842681885,-3.932688568734865,75106.70204424563,18.0,CBMC_8k,CITE-sort
132 | 0,0.3776676654815674,-4.645293744347796,87374.8735614585,18.0,CBMC_8k,GMM
133 | 1,0.43041038513183594,-4.635588058108038,87209.30608116445,18.0,CBMC_8k,GMM
134 | 2,0.42351698875427246,-4.535376105193187,86710.29123445934,21.0,CBMC_8k,GMM
135 | 3,0.40677404403686523,-4.619650195847958,86936.70566622089,18.0,CBMC_8k,GMM
136 | 4,0.4390437602996826,-4.573374223551965,86540.58232416117,19.0,CBMC_8k,GMM
137 | 5,0.33440589904785156,-4.504476520258802,86171.12337944779,21.0,CBMC_8k,GMM
138 | 6,0.3982429504394531,-4.600247617602428,86193.17685786876,17.0,CBMC_8k,GMM
139 | 7,0.5639379024505615,-4.519731820263969,85625.9816159598,19.0,CBMC_8k,GMM
140 | 8,0.42975497245788574,-4.635610627481338,87208.48856031569,18.0,CBMC_8k,GMM
141 | 9,0.3296630382537842,-4.681246352493923,87996.98092958682,18.0,CBMC_8k,GMM
142 | 0,0.13054585456848145,-6.0634860919955225,107254.88370089428,18.0,CBMC_8k,nGMM
143 | 1,0.11309194564819336,-6.051646822327419,107046.21832424909,18.0,CBMC_8k,nGMM
144 | 2,0.11344599723815918,-5.989152132253573,106431.88521841567,21.0,CBMC_8k,nGMM
145 | 3,0.11068177223205566,-6.1375979311296085,108530.39928987606,18.0,CBMC_8k,nGMM
146 | 4,0.17274022102355957,-5.979296283737786,105953.77676080014,19.0,CBMC_8k,nGMM
147 | 5,0.13007712364196777,-5.997731473835821,106580.94095563744,21.0,CBMC_8k,nGMM
148 | 6,0.08965611457824707,-6.199086354739511,109433.15385132025,17.0,CBMC_8k,nGMM
149 | 7,0.15646934509277344,-6.118994616338503,108356.54408592555,19.0,CBMC_8k,nGMM
150 | 8,0.13138699531555176,-6.076944171932883,107485.88201896346,18.0,CBMC_8k,nGMM
151 | 9,0.10138487815856934,-6.113887728378639,108122.98460215003,18.0,CBMC_8k,nGMM
152 | 0,2.2695460319519043,-3.623972212166671,125234.37048111115,24.0,PBMC_16k,CITE-sort
153 | 1,2.2640860080718994,-3.675952817767073,126010.69135743506,22.0,PBMC_16k,CITE-sort
154 | 2,2.1994199752807617,-3.8018323381371757,130433.463177662,23.0,PBMC_16k,CITE-sort
155 | 3,2.0647919178009033,-4.407775641803095,148758.21440330538,21.0,PBMC_16k,CITE-sort
156 | 4,2.226551055908203,-3.882777516123145,133432.80489984434,24.0,PBMC_16k,CITE-sort
157 | 5,1.8370881080627441,-4.444344102963767,148175.9886201819,17.0,PBMC_16k,CITE-sort
158 | 6,2.107717275619507,-3.73121484499415,127761.2818559344,22.0,PBMC_16k,CITE-sort
159 | 7,2.198000907897949,-3.882385940989626,132550.0798348791,22.0,PBMC_16k,CITE-sort
160 | 8,2.198988199234009,-4.403432716440874,148620.63921368093,21.0,PBMC_16k,CITE-sort
161 | 9,2.224411964416504,-3.7769675123279263,128775.47447779097,21.0,PBMC_16k,CITE-sort
162 | 0,1.0075252056121826,-5.219640986255932,175755.31830882383,24.0,PBMC_16k,GMM
163 | 1,1.2031233310699463,-5.260690224689408,176190.5350520727,22.0,PBMC_16k,GMM
164 | 2,1.143347978591919,-5.23125750368259,175675.9753731755,23.0,PBMC_16k,GMM
165 | 3,0.6386978626251221,-5.2955009970938365,176863.04925257954,21.0,PBMC_16k,GMM
166 | 4,0.7805860042572021,-5.241280620565762,176437.7785188551,24.0,PBMC_16k,GMM
167 | 5,0.5949759483337402,-5.428483739588968,179324.9396759519,17.0,PBMC_16k,GMM
168 | 6,0.7616188526153564,-5.248322439227608,175806.96588796642,22.0,PBMC_16k,GMM
169 | 7,0.6302831172943115,-5.305637157055214,177612.91779621228,22.0,PBMC_16k,GMM
170 | 8,0.5043289661407471,-5.321080190805292,177666.2841797434,21.0,PBMC_16k,GMM
171 | 9,0.5827598571777344,-5.275454529834107,176219.78769735052,21.0,PBMC_16k,GMM
172 | 0,0.17671608924865723,-5.714774138841729,184952.60012144645,24.0,PBMC_16k,nGMM
173 | 1,0.22993779182434082,-5.795554772085684,187173.43329816326,22.0,PBMC_16k,nGMM
174 | 2,0.20836615562438965,-5.73201185776524,185327.85821218122,23.0,PBMC_16k,nGMM
175 | 3,0.1986408233642578,-5.818204534449665,187738.2148923623,21.0,PBMC_16k,nGMM
176 | 4,0.3298048973083496,-5.702030735231473,184542.13535930746,24.0,PBMC_16k,nGMM
177 | 5,0.17983317375183105,-5.967352924237675,191798.40535841696,17.0,PBMC_16k,nGMM
178 | 6,0.20919299125671387,-5.7969637320579945,187217.16628912961,22.0,PBMC_16k,nGMM
179 | 7,0.1835939884185791,-5.7863992760750875,186885.1308991835,22.0,PBMC_16k,nGMM
180 | 8,0.1705019474029541,-5.855641969131611,188918.93864829233,21.0,PBMC_16k,nGMM
181 | 9,0.14423871040344238,-5.8438262967148855,188547.48461834533,21.0,PBMC_16k,nGMM
182 |
--------------------------------------------------------------------------------
/performance/time.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/performance/time.pdf
--------------------------------------------------------------------------------
/preCITEsort.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Tue Jun 2 19:25:32 2020
5 |
6 | @author: lianqiuyu
7 | """
8 |
9 | import pandas as pd
10 | import argparse
11 | import os
12 | import numpy as np
13 | import seaborn as sns
14 | from matplotlib import pyplot as plt
15 | import sys
16 |
17 |
18 | parser = argparse.ArgumentParser()
19 | parser.add_argument('data_path', help = "The input path of CLR normalized data in .csv files with row as sample, col as feature.")
20 | parser.add_argument('-o', '--output', type=str, default='./CITEsort_out',help='Path to save output files.')
21 | parser.add_argument('--CLR', action='store_true', default=False, help='Input is raw counts. Transform counts into CLR format.')
22 |
23 | args = parser.parse_args()
24 | data_path = args.data_path
25 |
26 | if not os.path.exists(data_path):
27 | print('Error: input file does not exist. Please check.')
28 | sys.exit(0)
29 |
30 | if args.output:
31 | output_path = args.output
32 | else:
33 | output_path = "./CITEsort_out"
34 |
35 | if not os.path.exists(output_path):
36 | os.mkdir(output_path)
37 |
38 | print('read data.')
39 | data = pd.read_csv(data_path,header=0,index_col=0)
40 | dataplot = data
41 |
42 | if args.CLR:
43 | print('perform CLR transformation on raw counts.')
44 | data_clr = np.apply_along_axis(lambda x: np.log(x+1) - np.mean(np.log(x+1)),0,data)
45 | data_clr = pd.DataFrame(data_clr,index=data.index,columns = data.columns)
46 | data_clr.to_csv(output_path+'/data_clr.csv')
47 | dataplot = data_clr
48 |
49 | print('plot histgrams of all markers in CLR format.')
50 | plt.figure(figsize=(12,2*np.ceil(data.shape[1] / 5)), dpi=96)
51 | plt.style.use('seaborn-white')
52 | for i in range(dataplot.shape[1]):
53 | ax = plt.subplot(int(np.ceil(dataplot.shape[1] / 5)),5,i+1)
54 | sns.distplot(dataplot.iloc[:,i].values,kde_kws={'bw':0.2})
55 | plt.yticks([0,1])
56 | plt.title(dataplot.columns[i],fontsize=15)
57 | if i%5 == 0:
58 | plt.ylabel('Density',fontsize=12)
59 | ax.spines['right'].set_visible(False)
60 | ax.spines['top'].set_visible(False)
61 | ax.yaxis.set_ticks_position('left')
62 |
63 | plt.suptitle('DB: '+str(dataplot.shape[1])+' ADTs,'+str(dataplot.shape[0])+' droplets',fontsize=15)
64 | plt.subplots_adjust(top=0.9, bottom=0.1, left=0.1, right=0.9, hspace=0.6,wspace=0.15)
65 | #plt.subplots_adjust(top=0.85)
66 | #plt.savefig('./PBMC_16k/marker_hist.png')
67 | plt.savefig(output_path+'/data_cls_hist.png')
68 | plt.clf()
69 | #plt.show()
70 |
71 |
72 |
--------------------------------------------------------------------------------
/readme_figs/ACTandBCT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/readme_figs/ACTandBCT.png
--------------------------------------------------------------------------------
/readme_figs/ACTandBCT_small.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/readme_figs/ACTandBCT_small.jpeg
--------------------------------------------------------------------------------
/readme_figs/ACTimbalance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/readme_figs/ACTimbalance.png
--------------------------------------------------------------------------------
/readme_figs/CITE-sort.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/readme_figs/CITE-sort.png
--------------------------------------------------------------------------------
/readme_figs/FittingInLowDimension.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/readme_figs/FittingInLowDimension.png
--------------------------------------------------------------------------------
/readme_figs/taxonomy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/readme_figs/taxonomy.png
--------------------------------------------------------------------------------
/runCITEsort.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Wed Oct 9 23:49:04 2019
5 |
6 | @author: lianqiuyu
7 | """
8 |
9 | import pandas as pd
10 | from CITEsort.Matryoshka import Matryoshka
11 | from CITEsort.Visualize import visualize_tree
12 | from CITEsort.BTreeTraversal import BTreeTraversal
13 | from CITEsort.ReSplit import ReSplit
14 | import pickle
15 | import argparse
16 | import os
17 |
18 | #from sys import argv
19 |
20 | def main():
21 |
22 | parser = argparse.ArgumentParser()
23 | parser.add_argument('data_path', help = "The input path of CLR normalized data in .csv files with row as sample, col as feature.")
24 | parser.add_argument('-c','--cutoff',type = float, default=0.1, help = "The cutoff for merging components (default 0.1). It shoube a value between 0 and 1. The bigger value leads to split more aggressively, and ends in a more complicated tree.")
25 | parser.add_argument('-o', '--output', type=str, default='./CITEsort_out',help='Path to save output files.')
26 | parser.add_argument('--compact', action='store_true', default=False, help='Output a compact tree.')
27 | args = parser.parse_args()
28 |
29 | data_path = args.data_path
30 | output_path = args.output
31 | merge_cutoff = args.cutoff
32 | compact_flag = args.compact
33 |
34 | if not os.path.exists(output_path):
35 | os.mkdir(output_path)
36 |
37 | print('read data and run CITE-sort.')
38 | data = pd.read_csv(data_path,header = 0, index_col=0)
39 | tree = ReSplit(data,merge_cutoff)
40 | #tree = Matryoshka(data,merge_cutoff)
41 | print('done.\nplot tree.')
42 | visualize_tree(tree,data,output_path,'tree',compact=compact_flag)
43 |
44 | f = open(output_path+'/tree.pickle','wb')
45 | pickle.dump(tree,f)
46 | f.close()
47 |
48 | print('generate labels.')
49 | traversal = BTreeTraversal(tree)
50 | leaves_labels = traversal.get_leaf_label()
51 | leaves_labels.to_csv(output_path + '/leaf_labels.csv',index=False)
52 |
53 |
54 | if __name__ == "__main__":
55 | main()
56 |
57 |
--------------------------------------------------------------------------------