├── AnalysisTutorial.ipynb ├── CITEsort ├── BTree.py ├── BTreeTraversal.py ├── Matryoshka.py ├── ReSplit.py ├── Visualize.py ├── __init__.py └── traversal.py ├── CITEsort_out ├── .DS_Store ├── data_clr.csv ├── data_cls_hist.png ├── leaf_labels.csv ├── tree.dot ├── tree.pdf ├── tree.pickle ├── tree_complete.dot └── tree_complete.pdf ├── LICENSE ├── README.md ├── datasets ├── CBMC_8k_ADT_clr.csv ├── GSE143363_ADT_Dx_count.csv ├── GSE143363_ADT_Rl_count.csv ├── MALT_8k_ADT_clr.csv ├── PBMC_16k_ADT_clr.csv ├── PBMC_1k_ADT_clr.csv ├── PBMC_1k_b_ADT_clr.csv ├── PBMC_2k_ADT_clr.csv ├── PBMC_5k_ADT_clr.csv └── PBMC_8k_ADT_clr.csv ├── performance.py ├── performance ├── ll.pdf ├── record_8DBs.csv ├── record_alldb.csv ├── record_full_alldb.csv └── time.pdf ├── preCITEsort.py ├── readme_figs ├── ACTandBCT.png ├── ACTandBCT_small.jpeg ├── ACTimbalance.png ├── CITE-sort.png ├── FittingInLowDimension.png └── taxonomy.png └── runCITEsort.py /CITEsort/BTree.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Aug 18 15:41:14 2019 5 | 6 | @author: lqyair 7 | """ 8 | 9 | class BTree: 10 | 11 | def __init__(self, key, left = None, right = None, indices = None, stop=None,\ 12 | all_clustering_dic = None, where_dominant = None,weight=None,ll=None,bic=None): 13 | self.key = key # a str 14 | self.right = right # a BstNode 15 | self.left = left # a BstNode 16 | self.indices = indices # a [] 17 | self.all_clustering_dic = all_clustering_dic 18 | self.weight = weight 19 | self.ll = ll 20 | self.bic = bic 21 | #self.marker_summary = marker_summary # a pd.df 22 | #self.para = para # a {} parameters for qualified components 23 | self.where_dominant = where_dominant # str ("left"/"right"), indicator of edge color 24 | self.stop = stop # legacy 25 | 26 | 27 | def display(self): 28 | lines, _, _, _ = self._display_aux() 29 | for line in lines: 30 | print(line) 31 | 32 | def _display_aux(self): 33 | """Returns list of strings, width, height, and horizontal coordinate of the root.""" 34 | # No child. 35 | if self.right is None and self.left is None: 36 | #if self.right.key is 'leaf' and self.left.key is 'leaf': 37 | line = '%s' % '_'.join(self.key) 38 | width = len(line) 39 | height = 1 40 | middle = width // 2 41 | return [line], width, height, middle 42 | 43 | # Only left child. 44 | if self.right is None: 45 | #if self.right.key is 'leaf': 46 | lines, n, p, x = self.left._display_aux() 47 | s = '%s' % '_'.join(self.key) 48 | u = len(s) 49 | first_line = (x + 1) * ' ' + (n - x - 1) * '_' + s 50 | second_line = x * ' ' + '/' + (n - x - 1 + u) * ' ' 51 | shifted_lines = [line + u * ' ' for line in lines] 52 | return [first_line, second_line] + shifted_lines, n + u, p + 2, n + u // 2 53 | 54 | # Only right child. 55 | if self.left is None: 56 | #if self.left.key is 'leaf': 57 | lines, n, p, x = self.right._display_aux() 58 | s = '%s' % '_'.join(self.key) 59 | u = len(s) 60 | first_line = s + x * '_' + (n - x) * ' ' 61 | second_line = (u + x) * ' ' + '\\' + (n - x - 1) * ' ' 62 | shifted_lines = [u * ' ' + line for line in lines] 63 | return [first_line, second_line] + shifted_lines, n + u, p + 2, u // 2 64 | 65 | # Two children. 66 | left, n, p, x = self.left._display_aux() 67 | right, m, q, y = self.right._display_aux() 68 | s = '%s' % '_'.join(self.key) 69 | u = len(s) 70 | first_line = (x + 1) * ' ' + (n - x - 1) * '_' + s + y * '_' + (m - y) * ' ' 71 | second_line = x * ' ' + '/' + (n - x - 1 + u + y) * ' ' + '\\' + (m - y - 1) * ' ' 72 | if p < q: 73 | left += [n * ' '] * (q - p) 74 | elif q < p: 75 | right += [m * ' '] * (p - q) 76 | zipped_lines = zip(left, right) 77 | lines = [first_line, second_line] + [a + u * ' ' + b for a, b in zipped_lines] 78 | return lines, n + m + u, max(p, q) + 2, n + u // 2 79 | 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /CITEsort/BTreeTraversal.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Aug 18 20:26:57 2019 5 | 6 | @author: lqyair 7 | """ 8 | import sys 9 | sys.path.append("./CITEsort") 10 | 11 | import pandas as pd 12 | import numpy as np 13 | from matplotlib import pyplot as plt 14 | from Visualize import visualize_node,visualize_pair 15 | 16 | 17 | ''' 18 | from matplotlib import pyplot as plt 19 | from matplotlib import cm,colors 20 | from mpl_toolkits.axes_grid1 import axes_grid 21 | #import seaborn as sns 22 | import pdb 23 | ''' 24 | 25 | class BTreeTraversal: 26 | 27 | def __init__(self,tree,method='bfs',nodelist=None,nodename=None,tree_summary=None,leaf_summary=None,ll=None,n_components=None): 28 | 29 | #print('initializing...') 30 | 31 | self.tree = tree 32 | self.method = method 33 | if self.method == 'bfs': 34 | self.nodelist = self.levelOrderTraversal() 35 | if self.method == 'dfs': 36 | self.nodelist = self.preorderTraversal() 37 | 38 | nodename_temp = ['_'.join(x.key) for x in self.nodelist] 39 | self.nodename = [str(i)+'_'+nodename_temp[i] for i in range(len(nodename_temp))] 40 | self.tree_summary, self.leaf_summary = self.summarize() 41 | if 'll' in self.tree.__dir__(): 42 | self.ll = self.leaf_summary['ll'].sum() 43 | self.n_components = self.leaf_summary.shape[0] 44 | 45 | 46 | def summarize(self): 47 | if 'll' in self.tree.__dir__(): 48 | tree_summary = pd.DataFrame({'Count':[len(x.indices) for x in self.nodelist], 49 | 'Weight':[x.weight for x in self.nodelist], 50 | 'Stop':[x.stop for x in self.nodelist], 51 | 'll':[x.ll for x in self.nodelist] 52 | },index=self.nodename) 53 | else: 54 | tree_summary = pd.DataFrame({'Count':[len(x.indices) for x in self.nodelist] },index=self.nodename) 55 | 56 | leaf_summary = tree_summary.loc[[x for x in self.nodename if x.split('_')[1]=='leaf'],:] 57 | leaf_summary = leaf_summary.sort_values(by='Count',ascending=False) 58 | 59 | return tree_summary,leaf_summary 60 | 61 | 62 | 63 | def get_ll(self): 64 | 65 | self.ll_tot = sum([x.ll for idx,x in enumerate(self.nodelist) if self.nodename[idx].split('_')[1]=='leaf']) 66 | 67 | 68 | def get_node(self,nodeID): 69 | return self.nodelist[nodeID] 70 | 71 | 72 | 73 | def generateLabel(self): 74 | """generate label file (binary matrix: Num.cell x Num.node, X_ij = 1 means cell i is attached to node j.)""" 75 | 76 | label = pd.DataFrame(np.zeros([len(self.tree.indices),len(self.nodename)]),index=self.tree.indices,columns=self.nodename) 77 | 78 | for i in range(len(self.nodename)): 79 | label.loc[self.nodelist[i].indices,self.nodename[i]] = 1 80 | 81 | return label 82 | 83 | 84 | 85 | def get_leaf_label(self): 86 | """generate label (one column, indicating which leaf cells are assigned.)""" 87 | label = pd.DataFrame({'GEM':self.tree.indices,'Label':[None]*len(self.tree.indices)},index=self.tree.indices) 88 | for i in range(len(self.nodename)): 89 | if self.nodename[i].split('_')[1] == 'leaf': 90 | label.loc[self.nodelist[i].indices,'Label'] = self.nodename[i] 91 | 92 | return label 93 | 94 | 95 | 96 | def plot_node(self,data, nodeID, viz_dim = 1, **plot_para): 97 | """plot the specified node (default: savefig=False,savepath='.')""" 98 | if viz_dim == 1: 99 | visualize_node(data, node = self.nodelist[nodeID], nodename = self.nodename[nodeID], **plot_para) 100 | if viz_dim == 2: 101 | visualize_pair(data, node = self.nodelist[nodeID], nodename = self.nodename[nodeID], **plot_para) 102 | 103 | 104 | 105 | def plot_leaf_size(self): 106 | 107 | leaf_size = self.leaf_summary['Count'] 108 | leaf_prop = self.leaf_summary['Proportion'] 109 | 110 | fig, ax1 = plt.subplots(1,2,figsize=(12,4)) 111 | 112 | # plot number/proportion of cells in each leaf 113 | color = 'tab:red' 114 | ax1[0].set_xlabel('leaf',fontsize=20) 115 | ax1[0].set_ylabel('Proportion', color=color,fontsize=20) 116 | ax1[0].plot(range(len(leaf_prop)),leaf_prop, color=color,marker='o') 117 | if len(leaf_prop) >= 5: 118 | plt.xticks(np.arange(0, len(leaf_prop), len(leaf_prop)//5)) 119 | else: 120 | plt.xticks(np.arange(0, len(leaf_prop), 1)) 121 | ax1[0].tick_params(axis='y', labelcolor=color,labelsize=15) 122 | ax1[0].tick_params(axis='x', labelsize=15) 123 | ax1[0].set_title('Num. of cells in leaf',fontsize=20,pad=20) 124 | 125 | ax2 = ax1[0].twinx() # instantiate a second axes that shares the same x-axis 126 | color = 'tab:blue' 127 | ax2.set_ylabel('Count', color=color,fontsize=20) # we already handled the x-label with ax1 128 | ax2.plot(range(len(leaf_size)),leaf_size, color=color,marker='o') 129 | ax2.tick_params(axis='y', labelcolor=color,labelsize=15) 130 | 131 | 132 | # plot cumulative number/proportion of cells in each leaf 133 | color = 'tab:red' 134 | ax1[1].set_xlabel('leaf',fontsize=20) 135 | ax1[1].set_ylabel('Proportion', color=color,fontsize=20) 136 | ax1[1].plot(range(len(leaf_prop)),leaf_prop.cumsum(), color=color,marker='o') 137 | if len(leaf_prop) >= 5: 138 | plt.xticks(np.arange(0, len(leaf_prop), len(leaf_prop)//5)) 139 | else: 140 | plt.xticks(np.arange(0, len(leaf_prop), 1)) 141 | ax1[1].tick_params(axis='y', labelcolor=color,labelsize=15) 142 | ax1[1].tick_params(axis='x', labelsize=15) 143 | ax1[1].set_title('Cumulative num. of cell in leaf',fontsize=20,pad=20) 144 | 145 | ax2 = ax1[1].twinx() # instantiate a second axes that shares the same x-axis 146 | color = 'tab:blue' 147 | ax2.set_ylabel('Count', color=color,fontsize=20) # we already handled the x-label with ax1 148 | ax2.plot(range(len(leaf_size)),leaf_size.cumsum(), color=color,marker='o') 149 | ax2.tick_params(axis='y', labelcolor=color,labelsize=15) 150 | 151 | fig.tight_layout() # otherwise the right y-label is slightly clipped 152 | plt.show() 153 | 154 | 155 | 156 | ''' 157 | def track_marker(self,data,n_big_leaf,**plot_para): 158 | """track marker distributions in big leafs. (at most 12 leafs. default para: savefig=False,outpath='.')""" 159 | n_big_leaf = min(n_big_leaf,12) 160 | 161 | savefig = plot_para.get('savefig',False) 162 | outpath = plot_para.get('outpath','.') 163 | 164 | big_leaf = self.leaf_summary.index.values.tolist()[0:n_big_leaf] 165 | markers = data.columns.values.tolist() 166 | node_plot = [self.nodename[0]] + big_leaf 167 | 168 | cmap = cm.get_cmap('Set3') 169 | col_dic = dict(zip(big_leaf,[colors.to_hex(cmap(i)) for i in range(len(big_leaf))])) 170 | col_dic[node_plot[0]] = '#999999' # col for all cells 171 | 172 | nrows = np.ceil(len(markers)/5) 173 | ncols = 5 174 | naxes = len(node_plot) 175 | f = plt.figure(figsize=(10, naxes)) 176 | for i, m in enumerate(markers): 177 | ag = axes_grid.Grid(f, (nrows, ncols, i+1), (naxes, 1), axes_pad=0) 178 | for j in range(naxes): 179 | leaf_idx = int(node_plot[j].split('_')[0]) 180 | ag[j].hist(data.loc[self.nodelist[leaf_idx].indices,m], 181 | color = col_dic[node_plot[j]], density = True, bins = 'auto') 182 | ag[j].axvline(0, linestyle='dashed', linewidth=2) 183 | ag[j].yaxis.set_ticks([]) 184 | ag[j].xaxis.set_ticks([]) 185 | if j%naxes == 0: 186 | ag[j].set_title(markers[i],fontsize=15) 187 | if i%ncols == 0: 188 | ag[j].set_ylabel(str(leaf_idx),fontsize=12) 189 | 190 | plt.subplots_adjust(wspace=0.1, hspace=0.3) 191 | if savefig == True: 192 | plt.savefig(outpath+'/track_marker_in_big_leafs.png') 193 | plt.show() 194 | ''' 195 | 196 | 197 | # dfs 198 | def preorderTraversal(self): 199 | 200 | node = self.tree 201 | if node is None: 202 | return 203 | 204 | nodelist = [] 205 | myStack = [] 206 | 207 | while node or myStack: 208 | while node: 209 | nodelist.append(node) 210 | myStack.append(node) 211 | node = node.left 212 | node = myStack.pop() 213 | node = node.right 214 | 215 | return nodelist 216 | 217 | 218 | # bfs 219 | def levelOrderTraversal(self): 220 | #print('bfs...') 221 | node = self.tree 222 | if node is None: 223 | return 224 | 225 | queue = [] 226 | nodelist = [] 227 | 228 | queue.append(node) 229 | nodelist.append(node) 230 | 231 | while(len(queue) > 0): 232 | node = queue.pop(0) 233 | 234 | if node.left is not None: 235 | nodelist.append(node.left) 236 | queue.append(node.left) 237 | 238 | if node.right is not None: 239 | nodelist.append(node.right) 240 | queue.append(node.right) 241 | 242 | return nodelist 243 | 244 | 245 | 246 | 247 | 248 | 249 | -------------------------------------------------------------------------------- /CITEsort/Matryoshka.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | sys.path.append("./CITEsort") 4 | import numpy as np 5 | from sklearn.mixture import GaussianMixture 6 | import itertools 7 | from scipy import stats 8 | import operator 9 | from scipy.spatial import distance 10 | from BTree import BTree 11 | import copy 12 | #from scipy.signal import upfirdn 13 | #import pandas as pd 14 | 15 | 16 | def Matryoshka(data,merge_cutoff=0.1,max_k=10,max_ndim=2,bic='bic'): 17 | 18 | if data.shape[0] <= 20: 19 | root = BTree(('leaf',)) 20 | root.indices = data.index.values.tolist() 21 | root.all_clustering_dic = _set_small_leaf(data) 22 | return root 23 | 24 | separable_features, bipartitions, scores, all_clustering_dic = HiScanFeatures(data,merge_cutoff,max_k,max_ndim,bic) 25 | 26 | if len(separable_features) == 0: 27 | root = BTree(('leaf',)) 28 | root.indices = data.index.values.tolist() 29 | root.all_clustering_dic = all_clustering_dic 30 | return root 31 | 32 | idx_best = np.argmax(scores) 33 | best_feature = separable_features[idx_best] 34 | best_partition = bipartitions[best_feature] 35 | 36 | ## construct current node 37 | root = BTree(best_feature) 38 | root.indices = data.index.values.tolist() 39 | root.all_clustering_dic = all_clustering_dic 40 | #root.marker_summary = marker_summary 41 | #root.para = para 42 | 43 | ## branch cells, component with higher mean goes right. 44 | p1_mean = data.loc[best_partition, best_feature].mean(0) 45 | p2_mean = data.loc[~best_partition, best_feature].mean(0) 46 | 47 | flag = True 48 | if len(p1_mean) == 1: 49 | flag = p1_mean.values > p2_mean.values 50 | else: 51 | p1_cosine = sum(p1_mean)/np.sqrt(sum(p1_mean**2)) 52 | p2_cosine = sum(p2_mean)/np.sqrt(sum(p2_mean**2)) 53 | flag = p1_cosine > p2_cosine 54 | 55 | if flag: 56 | child_right = data.iloc[best_partition, :] 57 | child_left = data.iloc[~best_partition, :] 58 | root.where_dominant = 'right' 59 | else: 60 | child_right = data.iloc[~best_partition, :] 61 | child_left = data.iloc[best_partition, :] 62 | root.where_dominant = 'left' 63 | 64 | ## recursion 65 | root.left = Matryoshka(child_left,merge_cutoff,max_k,max_ndim,bic) 66 | root.right = Matryoshka(child_right,merge_cutoff,max_k,max_ndim,bic) 67 | 68 | return root 69 | 70 | 71 | 72 | def HiScanFeatures(data,merge_cutoff,max_k,max_ndim,bic): 73 | 74 | ndim = 1 75 | all_clustering_dic = {} 76 | separable_features, bipartitions, scores, all_clustering_dic[ndim] = ScanFeatures(data,merge_cutoff,max_k,ndim,bic) 77 | 78 | if len(separable_features) == 0: 79 | 80 | rescan_features = [] 81 | for item in all_clustering_dic[ndim]: 82 | val = all_clustering_dic[ndim][item]['similarity_stopped'] 83 | if val > 0.1 and val < 0.5: 84 | rescan_features.append(item[0]) 85 | 86 | for ndim in range(2,max_ndim+1): 87 | separable_features, bipartitions, scores, all_clustering_dic[ndim] = ScanFeatures(data[rescan_features],merge_cutoff,max_k,ndim,bic) 88 | if len(separable_features) >= 1: 89 | break 90 | 91 | return separable_features, bipartitions, scores, all_clustering_dic 92 | 93 | 94 | 95 | def ScanFeatures(data,merge_cutoff,max_k,ndim,bic): 96 | 97 | F_set = data.columns.values.tolist() 98 | 99 | all_clustering = {} 100 | separable_features = [] 101 | bipartitions = {} 102 | scores = [] 103 | 104 | for item in itertools.combinations(F_set, ndim): 105 | x = data.loc[:,item] 106 | all_clustering[item] = Clustering(x,merge_cutoff,max_k,bic) 107 | 108 | for item in all_clustering: 109 | if all_clustering[item]['mp_ncluster'] > 1: 110 | separable_features.append(item) 111 | bipartitions[item] = all_clustering[item]['max_ent_p'] 112 | scores.append(all_clustering[item]['max_ent']) 113 | 114 | return separable_features, bipartitions, scores, all_clustering 115 | 116 | 117 | 118 | def Clustering(x,merge_cutoff,max_k,bic): 119 | 120 | val,cnt = np.unique(x.values.tolist(),return_counts=True) 121 | 122 | if len(val) < 50: 123 | clustering = _set_one_component(x) 124 | 125 | else: 126 | 127 | k_bic,_ = BIC(x,max_k,bic) 128 | 129 | if k_bic == 1: 130 | # if only one component, set values 131 | clustering = _set_one_component(x) 132 | else: 133 | 134 | bp_gmm = GaussianMixture(k_bic).fit(x) 135 | clustering = merge_bhat(x,bp_gmm,merge_cutoff) 136 | 137 | if clustering['mp_ncluster'] > 1: 138 | 139 | merged_label = clustering['mp_clustering'] 140 | labels, counts = np.unique(merged_label, return_counts=True) 141 | per = counts/np.sum(counts) 142 | ents = [stats.entropy([per_i, 1-per_i],base=2) for per_i in per] 143 | clustering['max_ent'] = np.max(ents) 144 | best_cc_idx = np.argmax(ents) 145 | best_cc_label = labels[best_cc_idx] 146 | clustering['max_ent_p'] = merged_label == best_cc_label 147 | 148 | return clustering 149 | 150 | 151 | 152 | def bhattacharyya_dist(mu1, mu2, Sigma1, Sigma2): 153 | Sig = (Sigma1+Sigma2)/2 154 | ldet_s = np.linalg.det(Sig) 155 | ldet_s1 = np.linalg.det(Sigma1) 156 | ldet_s2 = np.linalg.det(Sigma2) 157 | d1 = distance.mahalanobis(mu1,mu2,np.linalg.inv(Sig))**2/8 158 | d2 = 0.5*np.log(ldet_s) - 0.25*np.log(ldet_s1) - 0.25*np.log(ldet_s2) 159 | return d1+d2 160 | 161 | 162 | 163 | def merge_bhat(x,bp_gmm,cutoff): 164 | 165 | clustering = {} 166 | clustering['bp_ncluster'] = bp_gmm.n_components 167 | clustering['bp_clustering'] = bp_gmm.predict(x) 168 | clustering['bp_pro'] = bp_gmm.weights_ 169 | clustering['bp_mean'] = bp_gmm.means_ 170 | clustering['bp_Sigma'] = bp_gmm.covariances_ 171 | 172 | #clustering['last_pair_similarity'] = _get_last_pair_similarity_2D(x,bp_gmm) 173 | gmm = copy.deepcopy(bp_gmm) 174 | 175 | mu = gmm.means_ 176 | Sigma = gmm.covariances_ 177 | weights = list(gmm.weights_) 178 | posterior = gmm.predict_proba(x) 179 | 180 | current_ncluster = len(mu) 181 | mergedtonumbers = [int(item) for item in range(current_ncluster)] 182 | 183 | merge_flag = True 184 | clustering['bhat_dic_track'] = {} 185 | merge_time = 0 186 | 187 | while current_ncluster > 1 and merge_flag: 188 | 189 | bhat_dic = {} 190 | 191 | for c_pair in itertools.combinations(range(current_ncluster), 2): 192 | m1 = mu[c_pair[0],:] 193 | m2 = mu[c_pair[1],:] 194 | Sigma1 = Sigma[c_pair[0],:,:] 195 | Sigma2 = Sigma[c_pair[1],:,:] 196 | bhat_dic[c_pair] = np.exp(-bhattacharyya_dist(m1, m2, Sigma1, Sigma2)) 197 | 198 | clustering['bhat_dic_track'][merge_time] = bhat_dic 199 | merge_time = merge_time + 1 200 | 201 | max_pair = max(bhat_dic.items(), key=operator.itemgetter(1))[0] 202 | max_val = bhat_dic[max_pair] 203 | 204 | if max_val > cutoff: 205 | merged_i,merged_j = max_pair 206 | # update mergedtonumbers 207 | for idx,val in enumerate(mergedtonumbers): 208 | if val == merged_j: 209 | mergedtonumbers[idx] = merged_i 210 | if val > merged_j: 211 | mergedtonumbers[idx] = val - 1 212 | 213 | # update parameters 214 | weights[merged_i] = weights[merged_i] + weights[merged_j] 215 | 216 | posterior[:,merged_i] = posterior[:,merged_i] + posterior[:,merged_j] 217 | 218 | w = posterior[:,merged_i]/np.sum(posterior[:,merged_i]) 219 | mu[merged_i,:] = np.dot(w,x)# update 220 | 221 | x_centered = x.apply(lambda xx: xx-mu[merged_i,:],1) 222 | Sigma[merged_i,:,:] = np.cov(x_centered.T,aweights=w,bias=1) 223 | 224 | del weights[merged_j] 225 | #weights = np.delete(weights,merged_j,0) 226 | mu = np.delete(mu,merged_j,0) 227 | Sigma = np.delete(Sigma,merged_j,0) 228 | posterior = np.delete(posterior,merged_j,1) 229 | current_ncluster = current_ncluster - 1 230 | 231 | else: 232 | merge_flag = False 233 | 234 | 235 | clustering['similarity_stopped'] = np.min(list(bhat_dic.values())) 236 | clustering['mp_ncluster'] = mu.shape[0] 237 | clustering['mergedtonumbers'] = mergedtonumbers 238 | clustering['mp_clustering'] = list(np.apply_along_axis(np.argmax,1,posterior)) 239 | 240 | return clustering 241 | 242 | 243 | 244 | def _set_small_leaf(data): 245 | all_clustering_dic = {} 246 | all_clustering_dic[1] = {} 247 | 248 | F_set = data.columns.values.tolist() 249 | all_clustering = {} 250 | 251 | for item in itertools.combinations(F_set, 1): 252 | x = data.loc[:,item] 253 | all_clustering[item] = _set_one_component(x) 254 | 255 | all_clustering_dic[1] = all_clustering 256 | 257 | return all_clustering_dic 258 | 259 | 260 | 261 | def _set_one_component(x): 262 | 263 | clustering = {} 264 | clustering['bp_ncluster'] = 1 265 | clustering['bp_clustering'] = [0]*len(x) 266 | clustering['bp_pro'] = [1] 267 | clustering['bp_mean'] = np.mean(x) 268 | clustering['bp_Sigma'] = np.var(x) 269 | clustering['bhat_dic_track'] = {} 270 | clustering['similarity_stopped'] = 1 271 | clustering['mp_ncluster'] = 1 272 | clustering['mp_clustering'] = [0]*len(x) 273 | clustering['mergedtonumbers'] = [0] 274 | 275 | return clustering 276 | 277 | 278 | 279 | def BIC(X, max_k = 10,bic = 'bic'): 280 | """return best k chosen with BIC method""" 281 | 282 | bic_list = _get_BIC_k(X, min(max_k,len(np.unique(X)))) 283 | 284 | if bic == 'bic': 285 | return min(np.argmin(bic_list)+1,_FindElbow(bic_list)),bic_list 286 | elif bic == 'bic_min': 287 | return np.argmin(bic_list)+1,bic_list 288 | elif bic == 'bic_elbow': 289 | return _FindElbow(bic_list),bic_list 290 | 291 | 292 | 293 | def _get_BIC_k(X, max_k): 294 | """compute BIC scores with k belongs to [1,max_k]""" 295 | bic_list = [] 296 | for i in range(1,max_k+1): 297 | gmm_i = GaussianMixture(i).fit(X) 298 | bic_list.append(gmm_i.bic(X)) 299 | return bic_list 300 | 301 | 302 | 303 | def _FindElbow(bic_list): 304 | """return elbow point, defined as the farthest point from the line through the first and last points""" 305 | if len(bic_list) == 1: 306 | return 1 307 | else: 308 | a = bic_list[0] - bic_list[-1] 309 | b = len(bic_list) - 1 310 | c = bic_list[-1]*1 - bic_list[0]*len(bic_list) 311 | dis = np.abs(a*range(1,len(bic_list)+1) + b*np.array(bic_list) + c)/np.sqrt(a**2+b**2) 312 | return np.argmax(dis)+1 313 | 314 | 315 | -------------------------------------------------------------------------------- /CITEsort/ReSplit.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jan 15 23:44:58 2020 5 | 6 | @author: lianqiuyu 7 | """ 8 | 9 | import sys 10 | sys.path.append("./CITEsort") 11 | 12 | import numpy as np 13 | from sklearn.mixture import GaussianMixture 14 | import itertools 15 | from scipy import stats 16 | import operator 17 | from scipy.spatial import distance 18 | from BTree import BTree 19 | import copy 20 | #from scipy.signal import upfirdn 21 | #import pandas as pd 22 | import random 23 | 24 | 25 | def ReSplit(data,merge_cutoff=0.1,weight=1,max_k=10,max_ndim=2,bic='bic'): 26 | 27 | root = BTree(('leaf',)) 28 | root.indices = data.index.values.tolist() 29 | root.weight = weight 30 | #if len(root.indices) < 500: 31 | # print(root.indices) 32 | 33 | if data.shape[0] < 2: 34 | root.all_clustering_dic = _set_small_leaf(data) 35 | root.stop = 'small size' 36 | return root 37 | 38 | unimodal = GaussianMixture(1,covariance_type='full').fit(data) 39 | root.ll = root.weight * unimodal.lower_bound_ 40 | root.bic = unimodal.bic(data) 41 | 42 | separable_features, bipartitions, scores_ll, bic_list, all_clustering_dic = HiScanFeatures(data,root,merge_cutoff,max_k,max_ndim,bic) 43 | 44 | if len(separable_features) == 0: 45 | root.all_clustering_dic = all_clustering_dic 46 | root.stop = 'no separable features' 47 | return root 48 | 49 | ''' 50 | scores_ll = np.zeros(len(separable_features)) 51 | bic_list = np.zeros(len(separable_features)) 52 | for fidx in range(len(separable_features)): 53 | f = separable_features[fidx] 54 | if np.sum(bipartitions[f]) < 2 or np.sum(~bipartitions[f]) < 2: 55 | continue 56 | gmm1 = GaussianMixture(1,covariance_type='full').fit(data.loc[bipartitions[f],:]) 57 | ll1 = gmm1.lower_bound_ * sum(bipartitions[f])/len(bipartitions[f]) 58 | bic1 = gmm1.bic(data.loc[bipartitions[f],:]) 59 | 60 | gmm0 = GaussianMixture(1,covariance_type='full').fit(data.loc[~bipartitions[f],:]) 61 | ll0 = gmm0.lower_bound_ * sum(~bipartitions[f])/len(bipartitions[f]) 62 | bic0 = gmm0.bic(data.loc[~bipartitions[f],:]) 63 | 64 | scores_ll[fidx] = (ll1 + ll0) * root.weight - root.ll 65 | bic_list[fidx] = bic1 + bic0 66 | ''' 67 | #print(separable_features) 68 | #print(scores_ll) 69 | #print(bic_list) 70 | idx_best = np.argmax(scores_ll) 71 | if np.max(scores_ll) < 0.001: 72 | #if root.bic < bic_list[idx_best]: 73 | root.stop = 'spliting increases bic' 74 | return root 75 | 76 | #idx_best = np.argmax(scores_ent) 77 | best_feature = separable_features[idx_best] 78 | best_partition = bipartitions[best_feature] 79 | #best_weights = all_clustering_dic[len(best_feature)][best_feature]['weight'] 80 | 81 | ## construct current node 82 | root.key = best_feature 83 | root.all_clustering_dic = all_clustering_dic 84 | #root.marker_summary = marker_summary 85 | #root.para = para 86 | 87 | ## branch cells, component with higher mean goes right. 88 | p1_mean = data.loc[best_partition, best_feature].mean(0) 89 | p2_mean = data.loc[~best_partition, best_feature].mean(0) 90 | 91 | flag = True 92 | if len(p1_mean) == 1: 93 | flag = p1_mean.values > p2_mean.values 94 | else: 95 | p1_cosine = sum(p1_mean)/np.sqrt(sum(p1_mean**2)) 96 | p2_cosine = sum(p2_mean)/np.sqrt(sum(p2_mean**2)) 97 | flag = p1_cosine > p2_cosine 98 | 99 | if flag: 100 | child_right = data.iloc[best_partition, :] 101 | w_r = sum(best_partition)/len(best_partition) 102 | child_left = data.iloc[~best_partition, :] 103 | w_l = sum(~best_partition)/len(best_partition) 104 | root.where_dominant = 'right' 105 | else: 106 | child_right = data.iloc[~best_partition, :] 107 | w_r = sum(~best_partition)/len(best_partition) 108 | child_left = data.iloc[best_partition, :] 109 | w_l = sum(best_partition)/len(best_partition) 110 | root.where_dominant = 'left' 111 | 112 | ## recursion 113 | root.left = ReSplit(child_left,merge_cutoff,weight * w_l,max_k,max_ndim,bic) 114 | root.right = ReSplit(child_right,merge_cutoff,weight * w_r,max_k,max_ndim,bic) 115 | 116 | return root 117 | 118 | 119 | 120 | def HiScanFeatures(data,root,merge_cutoff,max_k,max_ndim,bic): 121 | 122 | ndim = 1 123 | all_clustering_dic = {} 124 | separable_features, bipartitions, scores, bic_list, all_clustering_dic[ndim] = ScoreFeatures(data,root,merge_cutoff,max_k,ndim,bic) 125 | 126 | if len(separable_features) == 0: 127 | 128 | rescan_features = [] 129 | for item in all_clustering_dic[ndim]: 130 | val = all_clustering_dic[ndim][item]['similarity_stopped'] 131 | if val > 0.1 and val < 0.5: 132 | rescan_features.append(item[0]) 133 | 134 | for ndim in range(2,max_ndim+1): 135 | if len(rescan_features) < ndim: 136 | separable_features, bipartitions, scores, bic_list, all_clustering_dic[ndim] = ScoreFeatures(data,root,0.5,max_k,len(rescan_features),bic) 137 | break 138 | 139 | separable_features, bipartitions, scores,bic_list, all_clustering_dic[ndim] = ScoreFeatures(data[rescan_features],root,0.5,max_k,ndim,bic) 140 | if len(separable_features) >= 1: 141 | break 142 | 143 | return separable_features, bipartitions, scores, bic_list, all_clustering_dic 144 | 145 | 146 | 147 | def ScoreFeatures(data,root,merge_cutoff,max_k,ndim,bic): 148 | 149 | F_set = data.columns.values.tolist() 150 | 151 | all_clustering = {} 152 | separable_features = [] 153 | bipartitions = {} 154 | scores = [] 155 | bic_list = [] 156 | 157 | for item in itertools.combinations(F_set, ndim): 158 | x = data.loc[:,item] 159 | all_clustering[item] = Clustering(x,merge_cutoff,max_k,bic) 160 | 161 | for item in all_clustering: 162 | if all_clustering[item]['mp_ncluster'] > 1: 163 | 164 | merged_label = all_clustering[item]['mp_clustering'] 165 | labels, counts = np.unique(merged_label, return_counts=True) 166 | if len(counts) == 1 or np.min(counts) < 5: 167 | continue 168 | 169 | ll_gain = []#np.zeros(len(labels)) 170 | bic_mlabels = [] 171 | for mlabel in labels: 172 | assignment = merged_label == mlabel 173 | 174 | gmm1 = GaussianMixture(1,covariance_type='full').fit(data.loc[assignment,:]) 175 | ll1 = gmm1.lower_bound_ * sum(assignment)/len(assignment) 176 | bic1 = gmm1.bic(data.loc[assignment,:]) 177 | 178 | gmm0 = GaussianMixture(1,covariance_type='full').fit(data.loc[~assignment,:]) 179 | ll0 = gmm0.lower_bound_ * sum(~assignment)/len(assignment) 180 | bic0 = gmm0.bic(data.loc[~assignment,:]) 181 | 182 | ll_gain.append( (ll1 + ll0) * root.weight - root.ll ) 183 | bic_mlabels.append( bic1 + bic0 ) 184 | 185 | best_mlabel_idx = np.argmax(ll_gain) 186 | best_mlabel = labels[best_mlabel_idx] 187 | 188 | bipartitions[item] = merged_label == best_mlabel 189 | scores.append( ll_gain[best_mlabel_idx] ) 190 | separable_features.append(item) 191 | bic_list.append( bic_mlabels[best_mlabel_idx] ) 192 | 193 | # bipartitions[item] = all_clustering[item]['max_ent_p'] 194 | # scores.append(all_clustering[item]['max_ent']) 195 | 196 | return separable_features, bipartitions, scores, bic_list, all_clustering 197 | 198 | 199 | 200 | def Clustering(x,merge_cutoff,max_k,bic): 201 | 202 | val,cnt = np.unique(x.values.tolist(),return_counts=True) 203 | 204 | if len(val) < 50: 205 | clustering = _set_one_component(x) 206 | 207 | else: 208 | 209 | k_bic,_ = BIC(x,max_k,bic) 210 | 211 | if k_bic == 1: 212 | # if only one component, set values 213 | clustering = _set_one_component(x) 214 | else: 215 | 216 | bp_gmm = GaussianMixture(k_bic).fit(x) 217 | clustering = merge_bhat(x,bp_gmm,merge_cutoff) 218 | ''' 219 | if clustering['mp_ncluster'] > 1: 220 | 221 | merged_label = clustering['mp_clustering'] 222 | labels, counts = np.unique(merged_label, return_counts=True) 223 | 224 | per = counts/np.sum(counts) 225 | ents = [stats.entropy([per_i, 1-per_i],base=2) for per_i in per] 226 | clustering['max_ent'] = np.max(ents) 227 | best_cc_idx = np.argmax(ents) 228 | best_cc_label = labels[best_cc_idx] 229 | clustering['max_ent_p'] = merged_label == best_cc_label 230 | ''' 231 | return clustering 232 | 233 | 234 | 235 | def bhattacharyya_dist(mu1, mu2, Sigma1, Sigma2): 236 | Sig = (Sigma1+Sigma2)/2 237 | ldet_s = np.linalg.det(Sig) 238 | ldet_s1 = np.linalg.det(Sigma1) 239 | ldet_s2 = np.linalg.det(Sigma2) 240 | d1 = distance.mahalanobis(mu1,mu2,np.linalg.inv(Sig))**2/8 241 | d2 = 0.5*np.log(ldet_s) - 0.25*np.log(ldet_s1) - 0.25*np.log(ldet_s2) 242 | return d1+d2 243 | 244 | 245 | 246 | def merge_bhat(x,bp_gmm,cutoff): 247 | 248 | clustering = {} 249 | clustering['bp_ncluster'] = bp_gmm.n_components 250 | clustering['bp_clustering'] = bp_gmm.predict(x) 251 | clustering['bp_pro'] = bp_gmm.weights_ 252 | clustering['bp_mean'] = bp_gmm.means_ 253 | clustering['bp_Sigma'] = bp_gmm.covariances_ 254 | 255 | #clustering['last_pair_similarity'] = _get_last_pair_similarity_2D(x,bp_gmm) 256 | gmm = copy.deepcopy(bp_gmm) 257 | 258 | mu = gmm.means_ 259 | Sigma = gmm.covariances_ 260 | weights = list(gmm.weights_) 261 | posterior = gmm.predict_proba(x) 262 | 263 | current_ncluster = len(mu) 264 | mergedtonumbers = [int(item) for item in range(current_ncluster)] 265 | 266 | merge_flag = True 267 | clustering['bhat_dic_track'] = {} 268 | merge_time = 0 269 | 270 | while current_ncluster > 1 and merge_flag: 271 | 272 | bhat_dic = {} 273 | 274 | for c_pair in itertools.combinations(range(current_ncluster), 2): 275 | m1 = mu[c_pair[0],:] 276 | m2 = mu[c_pair[1],:] 277 | Sigma1 = Sigma[c_pair[0],:,:] 278 | Sigma2 = Sigma[c_pair[1],:,:] 279 | bhat_dic[c_pair] = np.exp(-bhattacharyya_dist(m1, m2, Sigma1, Sigma2)) 280 | 281 | clustering['bhat_dic_track'][merge_time] = bhat_dic 282 | merge_time = merge_time + 1 283 | 284 | max_pair = max(bhat_dic.items(), key=operator.itemgetter(1))[0] 285 | max_val = bhat_dic[max_pair] 286 | 287 | if max_val > cutoff: 288 | merged_i,merged_j = max_pair 289 | # update mergedtonumbers 290 | for idx,val in enumerate(mergedtonumbers): 291 | if val == merged_j: 292 | mergedtonumbers[idx] = merged_i 293 | if val > merged_j: 294 | mergedtonumbers[idx] = val - 1 295 | 296 | # update parameters 297 | weights[merged_i] = weights[merged_i] + weights[merged_j] 298 | 299 | posterior[:,merged_i] = posterior[:,merged_i] + posterior[:,merged_j] 300 | 301 | w = posterior[:,merged_i]/np.sum(posterior[:,merged_i]) 302 | mu[merged_i,:] = np.dot(w,x)# update 303 | 304 | x_centered = x.apply(lambda xx: xx-mu[merged_i,:],1) 305 | Sigma[merged_i,:,:] = np.cov(x_centered.T,aweights=w,bias=1) 306 | 307 | del weights[merged_j] 308 | #weights = np.delete(weights,merged_j,0) 309 | mu = np.delete(mu,merged_j,0) 310 | Sigma = np.delete(Sigma,merged_j,0) 311 | posterior = np.delete(posterior,merged_j,1) 312 | current_ncluster = current_ncluster - 1 313 | 314 | else: 315 | merge_flag = False 316 | 317 | 318 | clustering['similarity_stopped'] = np.min(list(bhat_dic.values())) 319 | clustering['mp_ncluster'] = mu.shape[0] 320 | clustering['mergedtonumbers'] = mergedtonumbers 321 | clustering['mp_clustering'] = list(np.apply_along_axis(np.argmax,1,posterior)) 322 | 323 | return clustering 324 | 325 | 326 | 327 | def _set_small_leaf(data): 328 | all_clustering_dic = {} 329 | all_clustering_dic[1] = {} 330 | 331 | F_set = data.columns.values.tolist() 332 | all_clustering = {} 333 | 334 | for item in itertools.combinations(F_set, 1): 335 | x = data.loc[:,item] 336 | all_clustering[item] = _set_one_component(x) 337 | 338 | all_clustering_dic[1] = all_clustering 339 | 340 | return all_clustering_dic 341 | 342 | 343 | 344 | def _set_one_component(x): 345 | 346 | clustering = {} 347 | clustering['bp_ncluster'] = 1 348 | clustering['bp_clustering'] = [0]*len(x) 349 | clustering['bp_pro'] = [1] 350 | clustering['bp_mean'] = np.mean(x) 351 | clustering['bp_Sigma'] = np.var(x) 352 | clustering['bhat_dic_track'] = {} 353 | clustering['similarity_stopped'] = 1 354 | clustering['mp_ncluster'] = 1 355 | clustering['mp_clustering'] = [0]*len(x) 356 | clustering['mergedtonumbers'] = [0] 357 | 358 | return clustering 359 | 360 | 361 | 362 | def BIC(X, max_k = 10,bic = 'bic'): 363 | """return best k chosen with BIC method""" 364 | 365 | bic_list = _get_BIC_k(X, min(max_k,len(np.unique(X)))) 366 | 367 | if bic == 'bic': 368 | return min(np.argmin(bic_list)+1,_FindElbow(bic_list)),bic_list 369 | elif bic == 'bic_min': 370 | return np.argmin(bic_list)+1,bic_list 371 | elif bic == 'bic_elbow': 372 | return _FindElbow(bic_list),bic_list 373 | 374 | 375 | 376 | def _get_BIC_k(X, max_k): 377 | """compute BIC scores with k belongs to [1,max_k]""" 378 | bic_list = [] 379 | for i in range(1,max_k+1): 380 | gmm_i = GaussianMixture(i).fit(X) 381 | bic_list.append(gmm_i.bic(X)) 382 | return bic_list 383 | 384 | 385 | 386 | def _FindElbow(bic_list): 387 | """return elbow point, defined as the farthest point from the line through the first and last points""" 388 | if len(bic_list) == 1: 389 | return 1 390 | else: 391 | a = bic_list[0] - bic_list[-1] 392 | b = len(bic_list) - 1 393 | c = bic_list[-1]*1 - bic_list[0]*len(bic_list) 394 | dis = np.abs(a*range(1,len(bic_list)+1) + b*np.array(bic_list) + c)/np.sqrt(a**2+b**2) 395 | return np.argmax(dis)+1 396 | 397 | 398 | -------------------------------------------------------------------------------- /CITEsort/Visualize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Aug 20 11:51:16 2019 5 | 6 | @author: lqyair 7 | """ 8 | 9 | #import pandas as pd 10 | import numpy as np 11 | #from BTreeTraversal import BTreeTraversal 12 | from matplotlib import pyplot as plt 13 | from scipy import stats 14 | import pandas as pd 15 | import matplotlib 16 | 17 | #node = traversal.get_node(0) 18 | #nodename = traversal.nodename[0] 19 | 20 | def visualize_node(data,node,nodename,**plot_para): 21 | 22 | #matplotlib.rcParams['figure.dpi'] = 200 23 | 24 | # plot_para: savefig, outpath, 25 | savefig = plot_para.get('savefig',False) 26 | savepath = plot_para.get('savepath','.') 27 | savename = plot_para.get('savename','.') 28 | 29 | current_indices = node.indices 30 | node_data = data.loc[current_indices,:] 31 | 32 | plt.figure(figsize=(12,((data.shape[1]-1)//5+1)*2), dpi=70) 33 | plt.style.use('seaborn-white') 34 | #ax.tick_params(axis='both', which='major', labelsize=10) 35 | 36 | 37 | if node.key == ('leaf',) and node_data.shape[0] <= 20 : 38 | markers = node_data.columns.values.tolist() 39 | for i in range(len(markers)): 40 | X = node_data.loc[:,markers[i]].values.reshape(-1, 1) 41 | plt.subplot( (len(markers)-1)//5+1,5,i+1 ) 42 | plt.hist(X,bins=30, density = True, color = "lightblue") 43 | plt.ylabel('density',fontsize=10) 44 | plt.title( markers[i],fontsize=12) 45 | 46 | else: 47 | all_clustering = node.all_clustering_dic[1] 48 | markers = list(all_clustering.keys()) 49 | 50 | for i in range(len(markers)): 51 | 52 | X = node_data.loc[:,markers[i]].values.reshape(-1, 1) 53 | 54 | plt.subplot( (len(markers)-1)//5+1,5,i+1 ) 55 | 56 | bins = np.linspace(min(X),max(X),500) 57 | cols = ['r','g','b','c','m','y','darkorange','lightgreen','lightpink','darkgray'] 58 | 59 | bp_ncluster = int(all_clustering[markers[i]]['bp_ncluster']) 60 | mp_ncluster = 1 # default 61 | weights = all_clustering[markers[i]]['bp_pro'] 62 | means = all_clustering[markers[i]]['bp_mean'] 63 | sigmas = np.sqrt(all_clustering[markers[i]]['bp_Sigma']) 64 | 65 | y = np.zeros((len(bins),bp_ncluster)) 66 | 67 | for k in range(bp_ncluster): 68 | y[:,k] = (weights[k] * stats.norm.pdf(bins, means[k], sigmas[k]))[:,0] 69 | plt.plot(bins,y[:,k],linewidth=0.6,color='black') 70 | 71 | if bp_ncluster > 1: 72 | mp_ncluster = all_clustering[markers[i]]['mp_ncluster'] 73 | mergedtonumbers = all_clustering[markers[i]]['mergedtonumbers'] 74 | 75 | for k in range(mp_ncluster): 76 | 77 | merged_idx = [idx for idx,val in enumerate(mergedtonumbers) if val == k] 78 | y_merged = np.apply_along_axis(sum,1,y[:,merged_idx]) 79 | 80 | plt.plot(bins,y_merged,cols[k],linewidth=2,linestyle='-.') 81 | 82 | subfig_title = '_'.join(markers[i])+' ('+str(mp_ncluster)+'|'+str(bp_ncluster)+') ' + str(round(all_clustering[markers[i]]['similarity_stopped'],2)) 83 | 84 | if markers[i] == node.key: 85 | plt.title( subfig_title,fontsize=12,color='red') 86 | else: 87 | plt.title( subfig_title,fontsize=12,color='darkgrey' if mp_ncluster <= 1 else 'black') 88 | 89 | plt.hist(X,bins=30, density = True, color = "lightblue") 90 | plt.ylabel('density',fontsize=10) 91 | 92 | plt.subplots_adjust(top=0.9, bottom=0.1, left=0.1, right=0.9, hspace=0.4,wspace=0.45) 93 | plt.suptitle(nodename+' | '+str(len(current_indices))+' cells',fontsize=15,color="darkblue") 94 | plt.subplots_adjust(top=0.85) 95 | #plt.savefig(savepath+'/visualize_node.png') 96 | if savefig == True: 97 | plt.savefig(savepath+'/'+savename+'_'+nodename+'.png') 98 | plt.show() 99 | 100 | 101 | 102 | 103 | 104 | 105 | #import matplotlib.pyplot as plt 106 | import seaborn as sns; sns.set() 107 | 108 | def visualize_pair(data,node,nodename,**plot_para): 109 | 110 | # plot_para: savefig, outpath, 111 | savefig = plot_para.get('savefig',False) 112 | savepath = plot_para.get('savepath','.') 113 | savename = plot_para.get('savename','.') 114 | 115 | all_clustering = node.all_clustering_dic[2] 116 | marker_pairs = list(all_clustering.keys()) 117 | current_indices = node.indices 118 | 119 | plt.figure(figsize=(12,((len(marker_pairs)-1)//5+1)*2.5), dpi=96) 120 | sns.set_style("white") 121 | 122 | for i in range(len(marker_pairs)): 123 | 124 | marker1,marker2 = marker_pairs[i] 125 | X1 = data.loc[current_indices, marker1] 126 | X2 = data.loc[current_indices, marker2] 127 | 128 | bp_clustering = all_clustering[marker_pairs[i]]['bp_clustering'] 129 | mp_clustering = all_clustering[marker_pairs[i]]['mp_clustering'] 130 | 131 | mp_ncluster = all_clustering[marker_pairs[i]]['mp_ncluster'] 132 | bp_ncluster = all_clustering[marker_pairs[i]]['bp_ncluster'] 133 | 134 | data_pair = pd.DataFrame({marker1:X1,marker2:X2, 135 | 'bp':bp_clustering, 136 | 'mp':mp_clustering},index=node.indices) 137 | 138 | plt.subplot( (len(marker_pairs)-1)//5+1,5,i+1 ) 139 | 140 | #shapes = ['s','X','+'] 141 | #markers = dict(zip(np.unique(mp_clustering),[shapes[idx] for idx in range(mp_ncluster)])) 142 | sns.scatterplot(x=marker1, y=marker2,hue="bp",style="mp", 143 | data=data_pair,s=15,legend=False); 144 | 145 | marker_pair_joint = marker_pairs[i][0]+'_'+marker_pairs[i][1] 146 | subfig_title = marker_pair_joint+' ('+str(mp_ncluster)+'|'+str(bp_ncluster)+') ' + str(round(all_clustering[marker_pairs[i]]['similarity_stopped'],2)) 147 | 148 | if marker_pairs[i] == node.key: 149 | plt.title( subfig_title,fontsize=12,color='red') 150 | else: 151 | plt.title( subfig_title,fontsize=12,color='darkgrey' if mp_ncluster <= 1 else 'black') 152 | 153 | plt.subplots_adjust(top=0.9, bottom=0.1, left=0.1, right=0.9, hspace=0.6,wspace=0.45) 154 | plt.suptitle(nodename+' | '+str(len(current_indices))+' cells',fontsize=15,color="darkblue") 155 | plt.subplots_adjust(top=0.85) 156 | #plt.savefig(savepath+'/visualize_node.png') 157 | if savefig == True: 158 | plt.savefig(savepath+'/'+savename+'_'+nodename+'.png') 159 | plt.show() 160 | 161 | 162 | 163 | 164 | 165 | 166 | def plot_keymarker(data,traversal,node_ID,dpi=5,savepath=None): 167 | 168 | node = traversal.get_node(node_ID) 169 | 170 | current_indices = node.indices 171 | node_data = data.loc[current_indices,:] 172 | 173 | marker_dkey = node.key 174 | 175 | if len(marker_dkey) == 1: 176 | marker = marker_dkey[0] 177 | 178 | clustering = node.all_clustering_dic[1][marker_dkey] 179 | 180 | X = node_data.loc[:,marker_dkey].values.reshape(-1, 1) 181 | 182 | bins = np.linspace(min(X),max(X),500) 183 | cols = ['firebrick','navy','lightgreen','darkorange'] 184 | 185 | bp_ncluster = int(clustering['bp_ncluster']) 186 | mp_ncluster = 1 # default 187 | weights = clustering['bp_pro'] 188 | means = clustering['bp_mean'] 189 | sigmas = np.sqrt(clustering['bp_Sigma']) 190 | 191 | y = np.zeros((len(bins),bp_ncluster)) 192 | 193 | #plt.figure(figsize=(4,3), dpi=24) 194 | plt.style.use('seaborn-white') 195 | matplotlib.rcParams['axes.linewidth'] = 0.1 196 | fig, ax = plt.subplots(figsize=(4,3), dpi=dpi) 197 | 198 | for k in range(bp_ncluster): 199 | y[:,k] = (weights[k] * stats.norm.pdf(bins, means[k], sigmas[k]))[:,0] 200 | plt.plot(bins,y[:,k],linewidth=0.05,color='black') 201 | 202 | mp_ncluster = clustering['mp_ncluster'] 203 | 204 | # red -- component with bigger mean 205 | mp_means = [] 206 | for i in range(mp_ncluster): 207 | mp_means.append(np.mean(X[np.array(clustering['mp_clustering'])==i,0])) 208 | 209 | idx = list(np.argsort(mp_means)) 210 | idx.reverse() 211 | 212 | mergedtonumbers = clustering['mergedtonumbers'] 213 | 214 | for k in range(mp_ncluster): 215 | 216 | merged_idx = [ii for ii,val in enumerate(mergedtonumbers) if val == k] 217 | y_merged = np.apply_along_axis(sum,1,y[:,merged_idx]) 218 | 219 | plt.plot(bins,y_merged,cols[idx.index(k)],linewidth=0.8,linestyle='--') 220 | 221 | #subfig_title = str(node_ID) + '_'+ marker# +' ('+str(mp_ncluster)+'|'+str(bp_ncluster)+') ' + str(round(clustering['similarity_stopped'],2)) 222 | 223 | plt.hist(X,bins=30, density = True, color = "lightblue",linewidth=0) 224 | 225 | #plt.title( subfig_title,fontsize=16) 226 | plt.ylabel('density',fontsize=18) 227 | plt.xlabel(marker,fontsize=18) 228 | plt.subplots_adjust(top=0.8, bottom=0.2, left=0.15, right=0.9, hspace=0.2,wspace=0.8) 229 | ax.tick_params(axis='both', which='major', labelsize=10) 230 | if savepath is not None: 231 | plt.savefig(savepath+'/'+str(node_ID)+'_'+marker+'.pdf') 232 | plt.show() 233 | 234 | if len(marker_dkey) == 2: 235 | 236 | marker1,marker2 = marker_dkey 237 | 238 | subdata = node_data.loc[:,marker_dkey] 239 | clustering = node.all_clustering_dic[2][marker_dkey] 240 | cols = ['firebrick','navy','lightgreen','darkorange'] 241 | 242 | mp_ncluster = clustering['mp_ncluster'] 243 | #mp_clustering = clustering['mp_clustering'] 244 | componentidx = np.array(clustering['mp_clustering'])==1 245 | p1_mean = node_data.loc[componentidx,marker_dkey].mean(0) 246 | p2_mean = node_data.loc[~componentidx,marker_dkey].mean(0) 247 | 248 | p1_cosine = sum(p1_mean)/np.sqrt(sum(p1_mean**2)) 249 | p2_cosine = sum(p2_mean)/np.sqrt(sum(p2_mean**2)) 250 | 251 | plt.style.use('seaborn-white') 252 | matplotlib.rcParams['axes.linewidth'] = 0.1 253 | fig, ax = plt.subplots(figsize=(4,3), dpi=dpi) 254 | 255 | if p1_cosine > p2_cosine: 256 | plt.scatter(subdata.loc[componentidx,marker1],subdata.loc[componentidx,marker2],c='firebrick',s=1) 257 | plt.scatter(subdata.loc[~componentidx,marker1],subdata.loc[~componentidx,marker2],c='navy',s=1) 258 | else: 259 | plt.scatter(subdata.loc[componentidx,marker1],subdata.loc[componentidx,marker2],c='navy',s=1) 260 | plt.scatter(subdata.loc[~componentidx,marker1],subdata.loc[~componentidx,marker2],c='firebrick',s=1) 261 | 262 | sns.kdeplot(subdata[marker1], subdata[marker2], ax=ax, n_levels = 5, cmap = 'Wistia') 263 | 264 | plt.xlabel(marker1,fontsize=18) 265 | plt.ylabel(marker2,fontsize=18) 266 | ax.tick_params(axis='both', which='major', labelsize=10) 267 | plt.subplots_adjust(top=0.8, bottom=0.2, left=0.15, right=0.9, hspace=0.2,wspace=0.8) 268 | if savepath is not None: 269 | plt.savefig(savepath+'/'+str(node_ID)+'_'+marker1+'_'+marker2+'.pdf') 270 | 271 | plt.show() 272 | 273 | 274 | 275 | 276 | 277 | 278 | from subprocess import call 279 | #from IPython.display import Image 280 | #import pandas as pd 281 | #import numpy as np 282 | 283 | def visualize_tree(root,data,outpath,filename,compact=False): 284 | """write tree structure into .dot and .png files.""" 285 | 286 | # open a file, and design general format 287 | tree_dot = open(outpath+'/'+filename+'.dot','w') 288 | tree_dot.writelines('digraph Tree {') 289 | tree_dot.writelines('node [shape=box, style="filled, rounded", color="black", fontname=helvetica] ;') 290 | tree_dot.writelines('edge [fontname=helvetica] ;') 291 | 292 | 293 | #tree_dot = _write_tree_bfs(root,tree_dot) 294 | # Base Case 295 | if root is None: 296 | return 297 | 298 | 299 | # Create an empty queue for level order traversal 300 | queue = [] 301 | nodelist = [] 302 | idxStack = [] 303 | 304 | tot_cells = len(root.indices) 305 | #means_in_root = root.marker_summary['mean'] 306 | #stds_in_root = root.marker_summary['std'] 307 | means_in_root = data.mean(axis = 0) 308 | stds_in_root = data.std(axis = 0) 309 | markers = means_in_root.index.values.tolist() 310 | 311 | # auxiliary parameters for color display 312 | branch_col = pd.Series({1:'#ffccccff',2:'#ffff99ff',3:'#CC99CC',4:'#99CCFF'}) 313 | leaf_col = matplotlib.colors.Normalize(vmin=0, vmax=np.log(tot_cells)) 314 | 315 | node = root 316 | 317 | # Enqueue Root and initialize height 318 | queue.append(node) 319 | 320 | i = 0 321 | #print(str(i)+'_'+root.key) 322 | all_clustering = node.all_clustering_dic[len(node.key)] 323 | bp_ncluster = all_clustering[node.key]['bp_ncluster'] 324 | mp_ncluster = all_clustering[node.key]['mp_ncluster'] 325 | tree_dot.writelines(str(i)+' [label="'+str(i)+'_'+'_'.join(node.key)+ \ 326 | '\\nNum: '+str(len(node.indices))+ \ 327 | '\\n('+str(mp_ncluster)+'|'+str(bp_ncluster)+')",fillcolor="#ff9966ff",fontsize=25];') 328 | nodelist.append(node.key) 329 | idxStack.append(i) 330 | 331 | while(len(queue) > 0): 332 | # Print front of queue and remove it from queue 333 | node = queue.pop(0) 334 | idx = idxStack.pop(0) 335 | 336 | # left child 337 | if node.left is not None: 338 | nodelist.append(node.left.key) 339 | queue.append(node.left) 340 | i = i + 1 341 | idxStack.append(i) 342 | #print(str(i)+'_'+node.left.key) 343 | 344 | percent = str(round(len(node.left.indices)/tot_cells*100,2))+'%' 345 | mean_temp = data.loc[node.left.indices,:].mean(0) 346 | 347 | if node.left.key == ('leaf',): 348 | # left leaf node 349 | if compact: 350 | offset_in_leaf = '' 351 | else: 352 | temp = (mean_temp - means_in_root)/stds_in_root 353 | offset_in_leaf = '\n' + markers[0]+': '+str(round(temp[markers[0]],2)) 354 | for k in range(1,len(markers)): 355 | offset_in_leaf = offset_in_leaf + '\n' + markers[k]+': '+ str(round(temp[markers[k]],2)) 356 | 357 | col = matplotlib.colors.to_hex(matplotlib.cm.Greens(leaf_col(np.log(len(node.left.indices))))) 358 | tree_dot.writelines(str(i)+' [label="'+str(i)+'_'+'_'.join(node.left.key)+'\\n'+ \ 359 | str(len(node.left.indices))+ ' ('+percent+')\\n'+ \ 360 | offset_in_leaf+'",fillcolor="'+col+'",fontsize=20];') 361 | else: 362 | # left branch node 363 | all_clustering = node.left.all_clustering_dic[len(node.left.key)] 364 | bp_ncluster = all_clustering[node.left.key]['bp_ncluster'] 365 | mp_ncluster = all_clustering[node.left.key]['mp_ncluster'] 366 | 367 | tree_dot.writelines(str(i)+' [label="'+str(i)+'_'+'_'.join(node.left.key)+'\\n'+ \ 368 | str(len(node.left.indices))+' ('+percent+')\\n'+ \ 369 | '('+str(mp_ncluster)+'|'+str(bp_ncluster)+')",fillcolor="'+branch_col[len(node.left.key)]+'",fontsize=25];') 370 | 371 | # edge from parent to left node 372 | offset = '' 373 | for m in nodelist[idx]: 374 | val = (mean_temp[m] - means_in_root[m])/stds_in_root[m] 375 | offset = offset + str(round(val,2))+'\n' 376 | #print(str(idx)+'->'+str(i)) 377 | tree_dot.writelines(str(idx)+' -> '+str(i)+ ' [labeldistance=3, label = "'+offset+'",fontsize=25, color='+['black','red'][node.where_dominant=='left']+\ 378 | ', style='+['solid','bold'][node.where_dominant=='left']+'];') 379 | 380 | # right child 381 | if node.right is not None: 382 | nodelist.append(node.right.key) 383 | queue.append(node.right) 384 | i = i + 1 385 | idxStack.append(i) 386 | #print(str(i)+'_'+node.right.key) 387 | 388 | percent = str(round(len(node.right.indices)/tot_cells*100,2))+'%' 389 | mean_temp = data.loc[node.right.indices,:].mean(0) 390 | 391 | if node.right.key == ('leaf',): 392 | # right leaf node 393 | if compact: 394 | offset_in_leaf = '' 395 | else: 396 | temp = (mean_temp - means_in_root)/stds_in_root 397 | offset_in_leaf = '\n' + markers[0]+': '+str(round(temp[markers[0]],2)) 398 | for k in range(1,len(markers)): 399 | offset_in_leaf = offset_in_leaf + '\n' + markers[k]+': '+ str(round(temp[markers[k]],2)) 400 | 401 | col = matplotlib.colors.to_hex(matplotlib.cm.Greens(leaf_col(np.log(len(node.right.indices))))) 402 | tree_dot.writelines(str(i)+' [label="'+str(i)+'_'+'_'.join(node.right.key)+'\\n'+ \ 403 | str(len(node.right.indices))+ ' ('+percent+')'+'\\n'+ \ 404 | offset_in_leaf+'",fillcolor="'+col+'",fontsize=20];') 405 | 406 | else: 407 | # right branch node 408 | all_clustering = node.right.all_clustering_dic[len(node.right.key)] 409 | bp_ncluster = all_clustering[node.right.key]['bp_ncluster'] 410 | mp_ncluster = all_clustering[node.right.key]['mp_ncluster'] 411 | 412 | tree_dot.writelines(str(i)+' [label="'+str(i)+'_'+'_'.join(node.right.key)+'\\n'+ \ 413 | str(len(node.right.indices))+' ('+percent+')\\n'+ \ 414 | '('+str(mp_ncluster)+'|'+str(bp_ncluster)+')",fillcolor="'+branch_col[len(node.right.key)]+'",fontsize=25];') 415 | 416 | # edge from parent to right node 417 | offset = '' 418 | for m in nodelist[idx]: 419 | val = (mean_temp[m] - means_in_root[m])/stds_in_root[m] 420 | offset = offset + str(round(val,2))+'\n' 421 | #print(str(idx)+'->'+str(i)) 422 | tree_dot.writelines(str(idx)+' -> '+str(i)+' [labeldistance=3, label = "'+offset+'",fontsize=25, color='+['black','red'][node.where_dominant=='right']+ \ 423 | ', style='+['solid','bold'][node.where_dominant=='right']+'];') 424 | 425 | # main body is completed 426 | 427 | tree_dot.writelines('}') 428 | tree_dot.close() 429 | 430 | # Convert to png using system command (requires Graphviz) 431 | call(['dot', '-Tpdf', outpath+'/'+filename+'.dot', '-o', outpath+'/'+filename+'.pdf', '-Gdpi=100']) 432 | 433 | # Display in jupyter notebook 434 | #Image(filename = outpath+'/GatingTree.png') 435 | 436 | 437 | -------------------------------------------------------------------------------- /CITEsort/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/CITEsort/__init__.py -------------------------------------------------------------------------------- /CITEsort/traversal.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Dec 29 20:58:29 2019 5 | 6 | @author: lianqiuyu 7 | """ 8 | 9 | import pandas as pd 10 | import numpy as np 11 | from matplotlib import pyplot as plt 12 | from scipy import stats 13 | 14 | #from Visualize import visualize_node,visualize_pair 15 | 16 | class Traversal: 17 | 18 | def __init__(self,tree,c_type,method='bfs',nodelist=None,nodename=None,markers=None,n_samples=None,\ 19 | tree_summary=None,leaf_summary=None,n_components=None,ll=None,bic=None,leaf_ID=None,\ 20 | leaf_summary_code=None,multiplet_ratio=None,multiplet_predict=None): 21 | 22 | #print('initializing...') 23 | 24 | self.tree = tree 25 | self.method = method 26 | if self.method == 'bfs': 27 | self.nodelist = self.levelOrderTraversal() 28 | if self.method == 'dfs': 29 | self.nodelist = self.preorderTraversal() 30 | 31 | nodename_temp = ['_'.join(x.key) for x in self.nodelist] 32 | self.nodename = [str(i)+'_'+nodename_temp[i] for i in range(len(nodename_temp))] 33 | self.markers = [x[0] for x in self.nodelist[0].all_clustering[1]] 34 | 35 | self.tree_summary, self.leaf_summary = self.summarize() 36 | 37 | self.n_components = self.leaf_summary.shape[0] 38 | self.ll = self.leaf_summary['ll'].sum() 39 | n_features = len(self.markers) 40 | 41 | mean_params = self.n_components * n_features 42 | if c_type == 'diag': 43 | cov_params = self.n_components * n_features 44 | if c_type == 'full': 45 | cov_params = self.n_components * n_features * (n_features + 1) / 2. 46 | 47 | n_parameters = int(self.n_components-1 + mean_params + cov_params) 48 | self.n_samples = len(self.nodelist[0].indices) 49 | self.bic = n_parameters * np.log(self.n_samples) - 2 * self.ll * self.n_samples 50 | self.leaf_ID = [int(x.split('_')[0]) for x in self.leaf_summary.index] 51 | self.predict_ACT_BCT() 52 | self.predict_multiplets() 53 | 54 | 55 | 56 | def summarize(self): 57 | #print('summarizing...') 58 | #num_node = len(self.nodename) 59 | n_samples = len(self.nodelist[0].indices) 60 | tree_summary = pd.DataFrame({'Count':[len(x.indices) for x in self.nodelist], 61 | 'Proportion': [len(x.indices)/n_samples for x in self.nodelist], 62 | 'Weight':[x.weight for x in self.nodelist], 63 | 'll':[x.ll_tot for x in self.nodelist], 64 | 'stop':[x.stop for x in self.nodelist] 65 | },index=self.nodename) 66 | 67 | mean_m = pd.DataFrame(np.zeros([tree_summary.shape[0],len(self.markers)]), 68 | index = self.nodename,columns = self.markers) 69 | 70 | for i in range(mean_m.shape[0]): 71 | mean_m.iloc[i,:] = self.nodelist[i].mean_vec 72 | 73 | tree_summary = pd.concat([tree_summary,mean_m],axis=1) 74 | 75 | leaf_summary = tree_summary.loc[[x for x in self.nodename if x.split('_')[1]=='leaf'],:] 76 | leaf_summary = leaf_summary.sort_values(by='Count',ascending=False) 77 | return tree_summary,leaf_summary 78 | 79 | 80 | def get_node(self,nodeID): 81 | return self.nodelist[nodeID] 82 | 83 | 84 | def get_leaf_label(self): 85 | """generate label (one column, indicating which leaf cells are assigned.)""" 86 | label = pd.DataFrame({'GEM':self.tree.indices,'Label':[None]*len(self.tree.indices)},index=self.tree.indices) 87 | for i in range(len(self.nodename)): 88 | if self.nodename[i].split('_')[1] == 'leaf': 89 | label.loc[self.nodelist[i].indices,'Label'] = self.nodename[i] 90 | 91 | return label 92 | 93 | 94 | def plot_node(self,data,ID): 95 | node = self.nodelist[ID] 96 | node_data = data.loc[node.indices,:] 97 | plt.figure(figsize=(10,((data.shape[1]-1)//4+1)*2), dpi=96) 98 | plt.style.use('seaborn-white') 99 | if node.key == ('leaf',): 100 | for i in range(len(self.markers)): 101 | X = node_data.loc[:,self.markers[i]].values.reshape(-1, 1) 102 | bins = np.linspace(min(X),max(X),500) 103 | den = stats.norm.pdf(bins, node.mean_vec[i], np.sqrt(node.covariance_vec[i,i])) 104 | plt.subplot( (len(self.markers)-1)//5+1,5,i+1 ) 105 | plt.hist(X,bins=30, density = True, color = "lightblue") 106 | plt.plot(bins,den,linewidth=1,color='black') 107 | plt.ylabel('density',fontsize=10) 108 | plt.title( self.markers[i],fontsize=12) 109 | 110 | else: 111 | 112 | for i in range(len(self.markers)): 113 | 114 | X = node_data.loc[:,self.markers[i]].values.reshape(-1, 1) 115 | bins = np.linspace(min(X),max(X),500) 116 | plt.subplot( (len(self.markers)-1)//4+1,5,i+1 ) 117 | if (self.markers[i],) in node.all_clustering[1]: 118 | weights = node.all_clustering[1][(self.markers[i],)]['component_weights'] 119 | means = node.all_clustering[1][(self.markers[i],)]['means'] 120 | covariances = node.all_clustering[1][(self.markers[i],)]['covariances'] 121 | y = np.zeros((len(bins),2)) 122 | y[:,0] = (weights[0] * stats.norm.pdf(bins, means[0], np.sqrt(covariances[0])))[:,0] 123 | y[:,1] = (weights[1] * stats.norm.pdf(bins, means[1], np.sqrt(covariances[1])))[:,0] 124 | if means[0] > means[1]: 125 | cols = ['red','blue'] 126 | else: 127 | cols = ['blue','red'] 128 | plt.plot(bins,y[:,0],linewidth=1,color=cols[0]) 129 | plt.plot(bins,y[:,1],linewidth=1,color=cols[1]) 130 | else: 131 | den = stats.norm.pdf(bins, node.mean_vec[i], np.sqrt(node.covariance_vec[i,i])) 132 | plt.plot(bins,den,linewidth=1,color='black') 133 | 134 | plt.hist(X,bins=30, density = True, color = "lightblue") 135 | 136 | subfig_title = self.markers[i] 137 | if (self.markers[i],) == node.key: 138 | plt.title( subfig_title,fontsize=12,color='red') 139 | else: 140 | plt.title( subfig_title,fontsize=12,color='darkgrey') 141 | 142 | plt.ylabel('density',fontsize=10) 143 | 144 | plt.subplots_adjust(top=0.9, bottom=0.1, left=0.1, right=0.9, hspace=0.4,wspace=0.45) 145 | plt.suptitle(self.nodename[ID]+' | '+str(len(node.indices))+' cells',fontsize=15,color="darkblue") 146 | plt.subplots_adjust(top=0.8) 147 | plt.show() 148 | 149 | 150 | 151 | 152 | def predict_ACT_BCT(self): 153 | 154 | markers_cutoff = self._compute_markers_cutoff() 155 | markers = self.markers 156 | #leaf_p_markers = {} 157 | code = pd.DataFrame(np.zeros([len(self.leaf_ID),len(markers)]),index=self.leaf_summary.index,columns=markers) 158 | 159 | for leaf in code.index: 160 | mean = self.leaf_summary.loc[leaf,markers] 161 | for m in markers: 162 | code.loc[leaf,m] = 1 if mean[m] > markers_cutoff[m] else 0 163 | 164 | BCT_dic = {} 165 | ACT_dic = {} 166 | #ACT_tri_dic = {} 167 | 168 | for idx in code.index: 169 | #print(str(idx)) 170 | if not BCT_dic: 171 | BCT_dic[idx] = np.sign(code.loc[idx,markers]) 172 | else: 173 | new_center = np.sign(code.loc[idx,markers]) 174 | new_flag = True 175 | terms = list(BCT_dic.keys()) 176 | 177 | for i in range(len(terms)): 178 | term1 = terms[i] 179 | center1 = BCT_dic[term1] 180 | for j in range(i,len(terms)): 181 | term2 = terms[j] 182 | center2 = BCT_dic[term2] 183 | merge = pd.concat([center1,center2],axis=1) 184 | if sum(new_center == merge.max(1)) == len(markers): 185 | new_flag = False 186 | if idx in ACT_dic: 187 | ACT_dic[idx].append((term1,term2)) 188 | else: 189 | ACT_dic[idx] = [(term1,term2)] 190 | 191 | if new_flag: 192 | for i in range(len(terms)): 193 | term1 = terms[i] 194 | center1 = BCT_dic[term1] 195 | for j in range(i+1,len(terms)): 196 | term2 = terms[j] 197 | center2 = BCT_dic[term2] 198 | for k in range(j+1,len(terms)): 199 | term3 = terms[k] 200 | center3 = BCT_dic[term3] 201 | merge = pd.concat([center1,center2,center3],axis=1) 202 | if sum(new_center == merge.max(1)) == len(markers): 203 | new_flag = False 204 | if idx in ACT_dic: 205 | ACT_dic[idx].append((term1,term2,term3)) 206 | else: 207 | ACT_dic[idx] = [(term1,term2,term3)] 208 | 209 | if new_flag: 210 | BCT_dic[idx] = new_center 211 | 212 | 213 | leaf_summary_code = self.leaf_summary.drop(columns=self.markers) 214 | 215 | #leaf_summary_code.loc[list(ACT_dic.keys()),'Count'].sum()/data.shape[0] 216 | # 0.2504577309173559 217 | 218 | leaf_summary_code['BCT_predict'] = 0 219 | leaf_summary_code.loc[list(BCT_dic.keys()),'BCT_predict'] = 1 220 | 221 | leaf_summary_code['ACT_merge'] = None 222 | for term in ACT_dic.keys(): 223 | leaf_summary_code.loc[term,'ACT_merge'] = str(ACT_dic[term]) 224 | 225 | 226 | leaf_summary_code['merge_const'] = 0 227 | for term in ACT_dic.keys(): 228 | p = [] 229 | for pair in ACT_dic[term]: 230 | temp = 1 231 | for pair_i in pair: 232 | temp = leaf_summary_code.loc[pair_i,'Weight'] * temp 233 | 234 | p.append(temp) 235 | #p /= len(ACT_dic[term]) 236 | leaf_summary_code.loc[term,'merge_const'] = np.max(p)/leaf_summary_code.loc[term,'Weight'] 237 | 238 | 239 | self.leaf_summary_code = pd.concat([leaf_summary_code,code],axis=1) 240 | 241 | 242 | 243 | def predict_multiplets(self): 244 | multiplet_predict = pd.Series([0]*self.n_samples,index=self.nodelist[0].indices) 245 | for leaf in self.leaf_summary_code.index: 246 | if self.leaf_summary_code.loc[leaf,'BCT_predict'] == 0 : 247 | multiplet_predict[self.nodelist[int(leaf.split('_')[0])].indices] = 1 248 | 249 | self.multiplet_ratio = sum(multiplet_predict)/len(multiplet_predict) 250 | self.multiplet_predict = multiplet_predict 251 | 252 | 253 | def _compute_markers_cutoff(self): 254 | 255 | _all = self.nodelist[0] 256 | 257 | markers_cutoff = [] 258 | for m in _all.all_clustering[1]: 259 | 260 | m1,m2 = _all.all_clustering[1][m]['means'][:,0] 261 | std1,std2 = np.sqrt(_all.all_clustering[1][m]['covariances'][:,0,0]) 262 | s1,s2 = _all.all_clustering[1][m]['component_weights'] 263 | inter_X = self._solve(m1,m2,std1,std2,s1,s2) 264 | if len(inter_X) == 1: 265 | markers_cutoff.append(inter_X) 266 | if (m1 - inter_X[0])*(m2 - inter_X[0]) < 0: 267 | markers_cutoff.append(inter_X[0]) 268 | if (m1 - inter_X[1])*(m2 - inter_X[1]) < 0: 269 | markers_cutoff.append(inter_X[1]) 270 | 271 | markers_cutoff = pd.Series(markers_cutoff,index=self.markers) 272 | 273 | return markers_cutoff 274 | 275 | 276 | 277 | def _solve(self,m1,m2,std1,std2,s1,s2): 278 | """solve equation: s1*N(m1,std1)=s2*N(m2,std2), return the intersection points of two weighted Gaussian""" 279 | a = 1/(2*std1**2) - 1/(2*std2**2) 280 | b = m2/(std2**2) - m1/(std1**2) 281 | c = m1**2 /(2*std1**2) - m2**2 / (2*std2**2) - np.log((std2*s1)/(std1*s2)) 282 | return np.roots([a,b,c]) 283 | 284 | 285 | 286 | # dfs 287 | def preorderTraversal(self): 288 | 289 | node = self.tree 290 | if node is None: 291 | return 292 | 293 | nodelist = [] 294 | myStack = [] 295 | 296 | while node or myStack: 297 | while node: 298 | nodelist.append(node) 299 | myStack.append(node) 300 | node = node.left 301 | node = myStack.pop() 302 | node = node.right 303 | 304 | return nodelist 305 | 306 | 307 | # bfs 308 | def levelOrderTraversal(self): 309 | #print('bfs...') 310 | node = self.tree 311 | if node is None: 312 | return 313 | 314 | queue = [] 315 | nodelist = [] 316 | 317 | queue.append(node) 318 | nodelist.append(node) 319 | 320 | while(len(queue) > 0): 321 | node = queue.pop(0) 322 | 323 | if node.left is not None: 324 | nodelist.append(node.left) 325 | queue.append(node.left) 326 | 327 | if node.right is not None: 328 | nodelist.append(node.right) 329 | queue.append(node.right) 330 | 331 | return nodelist 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | -------------------------------------------------------------------------------- /CITEsort_out/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/CITEsort_out/.DS_Store -------------------------------------------------------------------------------- /CITEsort_out/data_cls_hist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/CITEsort_out/data_cls_hist.png -------------------------------------------------------------------------------- /CITEsort_out/tree.dot: -------------------------------------------------------------------------------- 1 | digraph Tree {node [shape=box, style="filled, rounded", color="black", fontname=helvetica] ;edge [fontname=helvetica] ;0 [label="0_CD64\nNum: 2580\n(2|2)",fillcolor="#ff9966ff",fontsize=25];1 [label="1_CD341\n1313 (50.89%)\n(2|2)",fillcolor="#ffccccff",fontsize=25];0 -> 1 [labeldistance=3, label = "-0.94 2 | ",fontsize=25, color=black, style=solid];2 [label="2_CD361\n1267 (49.11%)\n(1|2)",fillcolor="#ffccccff",fontsize=25];0 -> 2 [labeldistance=3, label = "0.98 3 | ",fontsize=25, color=red, style=bold];3 [label="3_CD3\n298 (11.55%)\n(2|2)",fillcolor="#ffccccff",fontsize=25];1 -> 3 [labeldistance=3, label = "-0.86 4 | ",fontsize=25, color=black, style=solid];4 [label="4_leaf\n1015 (39.34%)\n",fillcolor="#006b2b",fontsize=20];1 -> 4 [labeldistance=3, label = "1.19 5 | ",fontsize=25, color=red, style=bold];5 [label="5_CD56_CD91\n299 (11.59%)\n(3|3)",fillcolor="#ffff99ff",fontsize=25];2 -> 5 [labeldistance=3, label = "-0.21 6 | ",fontsize=25, color=black, style=solid];6 [label="6_leaf\n968 (37.52%)\n",fillcolor="#006c2c",fontsize=20];2 -> 6 [labeldistance=3, label = "1.2 7 | ",fontsize=25, color=red, style=bold];7 [label="7_CD45RA_CD45\n78 (3.02%)\n(3|3)",fillcolor="#ffff99ff",fontsize=25];3 -> 7 [labeldistance=3, label = "-0.36 8 | ",fontsize=25, color=black, style=solid];8 [label="8_CD41\n220 (8.53%)\n(2|2)",fillcolor="#ffccccff",fontsize=25];3 -> 8 [labeldistance=3, label = "2.95 9 | ",fontsize=25, color=red, style=bold];9 [label="9_HLA-DR_CD361\n179 (6.94%)\n(3|3)",fillcolor="#ffff99ff",fontsize=25];5 -> 9 [labeldistance=3, label = "-0.1 10 | -0.24 11 | ",fontsize=25, color=red, style=bold];10 [label="10_leaf\n120 (4.65%)\n",fillcolor="#46ae60",fontsize=20];5 -> 10 [labeldistance=3, label = "1.41 12 | 0.53 13 | ",fontsize=25, color=black, style=solid];11 [label="11_leaf\n30 (1.16%)\n",fillcolor="#8dd08a",fontsize=20];7 -> 11 [labeldistance=3, label = "-0.38 14 | 0.13 15 | ",fontsize=25, color=black, style=solid];12 [label="12_leaf\n48 (1.86%)\n",fillcolor="#76c578",fontsize=20];7 -> 12 [labeldistance=3, label = "2.24 16 | 0.52 17 | ",fontsize=25, color=red, style=bold];13 [label="13_CD45RA\n99 (3.84%)\n(2|2)",fillcolor="#ffccccff",fontsize=25];8 -> 13 [labeldistance=3, label = "-0.76 18 | ",fontsize=25, color=black, style=solid];14 [label="14_CD71\n121 (4.69%)\n(1|2)",fillcolor="#ffccccff",fontsize=25];8 -> 14 [labeldistance=3, label = "2.88 19 | ",fontsize=25, color=red, style=bold];15 [label="15_HLA-DR\n125 (4.84%)\n(1|2)",fillcolor="#ffccccff",fontsize=25];9 -> 15 [labeldistance=3, label = "0.41 20 | -0.41 21 | ",fontsize=25, color=black, style=solid];16 [label="16_leaf\n54 (2.09%)\n",fillcolor="#72c375",fontsize=20];9 -> 16 [labeldistance=3, label = "-0.58 22 | 0.26 23 | ",fontsize=25, color=red, style=bold];17 [label="17_leaf\n40 (1.55%)\n",fillcolor="#7fc97f",fontsize=20];13 -> 17 [labeldistance=3, label = "-0.54 24 | ",fontsize=25, color=black, style=solid];18 [label="18_leaf\n59 (2.29%)\n",fillcolor="#6dc072",fontsize=20];13 -> 18 [labeldistance=3, label = "2.06 25 | ",fontsize=25, color=red, style=bold];19 [label="19_leaf\n26 (1.01%)\n",fillcolor="#92d28f",fontsize=20];14 -> 19 [labeldistance=3, label = "-0.04 26 | ",fontsize=25, color=black, style=solid];20 [label="20_leaf\n95 (3.68%)\n",fillcolor="#53b466",fontsize=20];14 -> 20 [labeldistance=3, label = "2.18 27 | ",fontsize=25, color=red, style=bold];21 [label="21_CD931\n58 (2.25%)\n(1|2)",fillcolor="#ffccccff",fontsize=25];15 -> 21 [labeldistance=3, label = "-0.68 28 | ",fontsize=25, color=red, style=bold];22 [label="22_CD11b\n67 (2.6%)\n(1|2)",fillcolor="#ffccccff",fontsize=25];15 -> 22 [labeldistance=3, label = "1.36 29 | ",fontsize=25, color=black, style=solid];23 [label="23_leaf\n43 (1.67%)\n",fillcolor="#7cc87c",fontsize=20];21 -> 23 [labeldistance=3, label = "-0.0 30 | ",fontsize=25, color=red, style=bold];24 [label="24_leaf\n15 (0.58%)\n",fillcolor="#aadda4",fontsize=20];21 -> 24 [labeldistance=3, label = "1.05 31 | ",fontsize=25, color=black, style=solid];25 [label="25_leaf\n17 (0.66%)\n",fillcolor="#a5db9f",fontsize=20];22 -> 25 [labeldistance=3, label = "-0.39 32 | ",fontsize=25, color=red, style=bold];26 [label="26_leaf\n50 (1.94%)\n",fillcolor="#75c477",fontsize=20];22 -> 26 [labeldistance=3, label = "0.48 33 | ",fontsize=25, color=black, style=solid];} -------------------------------------------------------------------------------- /CITEsort_out/tree.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/CITEsort_out/tree.pdf -------------------------------------------------------------------------------- /CITEsort_out/tree.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/CITEsort_out/tree.pickle -------------------------------------------------------------------------------- /CITEsort_out/tree_complete.dot: -------------------------------------------------------------------------------- 1 | digraph Tree {node [shape=box, style="filled, rounded", color="black", fontname=helvetica] ;edge [fontname=helvetica] ;0 [label="0_CD64\nNum: 2580\n(2|2)",fillcolor="#ff9966ff",fontsize=25];1 [label="1_CD341\n1313 (50.89%)\n(2|2)",fillcolor="#ffccccff",fontsize=25];0 -> 1 [labeldistance=3, label = "-0.94 2 | ",fontsize=25, color=red, style=bold];2 [label="2_CD361\n1267 (49.11%)\n(1|2)",fillcolor="#ffccccff",fontsize=25];0 -> 2 [labeldistance=3, label = "0.98 3 | ",fontsize=25, color=black, style=solid];3 [label="3_CD3\n298 (11.55%)\n(2|2)",fillcolor="#ffccccff",fontsize=25];1 -> 3 [labeldistance=3, label = "-0.86 4 | ",fontsize=25, color=black, style=solid];4 [label="4_leaf\n1015 (39.34%)\n 5 | CD41: -0.76 6 | CD71: 0.43 7 | CD56: -0.56 8 | CD3: -0.41 9 | CD331: -0.4 10 | CD341: 1.19 11 | CD90: -0.21 12 | CD117: 0.7 13 | CD45RA: 0.55 14 | CD123: 0.74 15 | CD141: -0.48 16 | HLA-DR: 0.25 17 | CD11b: -0.95 18 | CD64: -0.95 19 | CD381: -0.63 20 | CD45: -1.15 21 | CD361: -0.85 22 | CD931: -0.81 23 | CD91: -0.13",fillcolor="#006b2b",fontsize=20];1 -> 4 [labeldistance=3, label = "1.19 24 | ",fontsize=25, color=red, style=bold];5 [label="5_CD56_CD91\n285 (11.05%)\n(3|3)",fillcolor="#ffff99ff",fontsize=25];2 -> 5 [labeldistance=3, label = "-0.24 25 | ",fontsize=25, color=red, style=bold];6 [label="6_leaf\n982 (38.06%)\n 26 | CD41: 0.45 27 | CD71: -0.65 28 | CD56: 0.41 29 | CD3: -0.12 30 | CD331: 0.89 31 | CD341: -0.72 32 | CD90: 0.27 33 | CD117: -0.36 34 | CD45RA: -0.51 35 | CD123: -0.35 36 | CD141: 0.69 37 | HLA-DR: -0.25 38 | CD11b: 1.08 39 | CD64: 1.0 40 | CD381: 0.69 41 | CD45: 0.88 42 | CD361: 1.19 43 | CD931: 1.07 44 | CD91: 0.16",fillcolor="#006c2c",fontsize=20];2 -> 6 [labeldistance=3, label = "1.19 45 | ",fontsize=25, color=black, style=solid];7 [label="7_CD45RA_CD45\n78 (3.02%)\n(3|3)",fillcolor="#ffff99ff",fontsize=25];3 -> 7 [labeldistance=3, label = "-0.36 46 | ",fontsize=25, color=black, style=solid];8 [label="8_CD41\n220 (8.53%)\n(2|2)",fillcolor="#ffccccff",fontsize=25];3 -> 8 [labeldistance=3, label = "2.95 47 | ",fontsize=25, color=red, style=bold];9 [label="9_HLA-DR_CD361\n170 (6.59%)\n(2|2)",fillcolor="#ffff99ff",fontsize=25];5 -> 9 [labeldistance=3, label = "-0.1 48 | -0.24 49 | ",fontsize=25, color=red, style=bold];10 [label="10_CD361_CD91\n115 (4.46%)\n(2|2)",fillcolor="#ffff99ff",fontsize=25];5 -> 10 [labeldistance=3, label = "1.42 50 | 0.53 51 | ",fontsize=25, color=black, style=solid];11 [label="11_leaf\n36 (1.4%)\n 52 | CD41: -0.28 53 | CD71: -0.14 54 | CD56: 0.02 55 | CD3: -0.43 56 | CD331: -0.92 57 | CD341: -0.75 58 | CD90: -0.26 59 | CD117: -0.69 60 | CD45RA: -0.12 61 | CD123: -0.29 62 | CD141: -0.27 63 | HLA-DR: -0.03 64 | CD11b: -0.23 65 | CD64: -0.78 66 | CD381: -0.68 67 | CD45: 0.04 68 | CD361: -0.58 69 | CD931: -0.64 70 | CD91: -0.25",fillcolor="#84cc83",fontsize=20];7 -> 11 [labeldistance=3, label = "-0.12 71 | 0.04 72 | ",fontsize=25, color=black, style=solid];12 [label="12_leaf\n42 (1.63%)\n 73 | CD41: -0.64 74 | CD71: 0.45 75 | CD56: 0.61 76 | CD3: -0.31 77 | CD331: -1.89 78 | CD341: -0.84 79 | CD90: -0.21 80 | CD117: -0.74 81 | CD45RA: 2.4 82 | CD123: -0.76 83 | CD141: -0.44 84 | HLA-DR: 1.83 85 | CD11b: -0.66 86 | CD64: -0.92 87 | CD381: -0.18 88 | CD45: 0.66 89 | CD361: -0.79 90 | CD931: -0.82 91 | CD91: 0.02",fillcolor="#7dc87e",fontsize=20];7 -> 12 [labeldistance=3, label = "2.4 92 | 0.66 93 | ",fontsize=25, color=red, style=bold];13 [label="13_CD45RA\n99 (3.84%)\n(2|2)",fillcolor="#ffccccff",fontsize=25];8 -> 13 [labeldistance=3, label = "-0.76 94 | ",fontsize=25, color=red, style=bold];14 [label="14_CD71\n121 (4.69%)\n(1|2)",fillcolor="#ffccccff",fontsize=25];8 -> 14 [labeldistance=3, label = "2.88 95 | ",fontsize=25, color=black, style=solid];15 [label="15_leaf\n89 (3.45%)\n 96 | CD41: 0.0 97 | CD71: -0.73 98 | CD56: -0.16 99 | CD3: -0.26 100 | CD331: 0.36 101 | CD341: -0.87 102 | CD90: -0.03 103 | CD117: -0.61 104 | CD45RA: -0.83 105 | CD123: -0.75 106 | CD141: -0.21 107 | HLA-DR: -0.71 108 | CD11b: 0.71 109 | CD64: 0.74 110 | CD381: 0.39 111 | CD45: 0.42 112 | CD361: -0.05 113 | CD931: 0.38 114 | CD91: -0.3",fillcolor="#56b567",fontsize=20];9 -> 15 [labeldistance=3, label = "-0.71 115 | -0.05 116 | ",fontsize=25, color=black, style=solid];16 [label="16_CD11b\n81 (3.14%)\n(1|2)",fillcolor="#ffccccff",fontsize=25];9 -> 16 [labeldistance=3, label = "1.13 117 | -0.46 118 | ",fontsize=25, color=red, style=bold];17 [label="17_CD11b\n80 (3.1%)\n(1|2)",fillcolor="#ffccccff",fontsize=25];10 -> 17 [labeldistance=3, label = "-0.25 119 | -0.28 120 | ",fontsize=25, color=black, style=solid];18 [label="18_leaf\n35 (1.36%)\n 121 | CD41: 0.39 122 | CD71: -0.65 123 | CD56: 0.43 124 | CD3: -0.44 125 | CD331: 0.09 126 | CD341: -0.73 127 | CD90: -0.03 128 | CD117: -0.62 129 | CD45RA: -0.58 130 | CD123: -0.12 131 | CD141: -0.23 132 | HLA-DR: 0.72 133 | CD11b: 0.09 134 | CD64: 0.83 135 | CD381: 0.59 136 | CD45: 0.37 137 | CD361: -0.19 138 | CD931: -0.04 139 | CD91: 2.39",fillcolor="#86cc85",fontsize=20];10 -> 18 [labeldistance=3, label = "-0.19 140 | 2.39 141 | ",fontsize=25, color=red, style=bold];19 [label="19_leaf\n40 (1.55%)\n 142 | CD41: -0.78 143 | CD71: 1.78 144 | CD56: -0.08 145 | CD3: 2.93 146 | CD331: -1.9 147 | CD341: -0.89 148 | CD90: -0.1 149 | CD117: -0.65 150 | CD45RA: -0.54 151 | CD123: -0.98 152 | CD141: -0.5 153 | HLA-DR: -0.74 154 | CD11b: -0.86 155 | CD64: -0.98 156 | CD381: -0.94 157 | CD45: 0.87 158 | CD361: -0.83 159 | CD931: -0.9 160 | CD91: -0.25",fillcolor="#7fc97f",fontsize=20];13 -> 19 [labeldistance=3, label = "-0.54 161 | ",fontsize=25, color=black, style=solid];20 [label="20_leaf\n59 (2.29%)\n 162 | CD41: -0.75 163 | CD71: 1.97 164 | CD56: 0.5 165 | CD3: 2.67 166 | CD331: -1.94 167 | CD341: -0.88 168 | CD90: -0.28 169 | CD117: -0.65 170 | CD45RA: 2.06 171 | CD123: -0.99 172 | CD141: -0.4 173 | HLA-DR: -0.83 174 | CD11b: -0.72 175 | CD64: -0.91 176 | CD381: -1.14 177 | CD45: 0.83 178 | CD361: -0.8 179 | CD931: -0.83 180 | CD91: -0.15",fillcolor="#6dc072",fontsize=20];13 -> 20 [labeldistance=3, label = "2.06 181 | ",fontsize=25, color=red, style=bold];21 [label="21_leaf\n26 (1.01%)\n 182 | CD41: 2.9 183 | CD71: -0.04 184 | CD56: -0.21 185 | CD3: 3.02 186 | CD331: -1.92 187 | CD341: -0.98 188 | CD90: -0.17 189 | CD117: -0.59 190 | CD45RA: -1.01 191 | CD123: -1.01 192 | CD141: -0.34 193 | HLA-DR: -0.67 194 | CD11b: -0.91 195 | CD64: -0.92 196 | CD381: -1.14 197 | CD45: 0.66 198 | CD361: -0.84 199 | CD931: -0.92 200 | CD91: -0.38",fillcolor="#92d28f",fontsize=20];14 -> 21 [labeldistance=3, label = "-0.04 201 | ",fontsize=25, color=black, style=solid];22 [label="22_leaf\n95 (3.68%)\n 202 | CD41: 2.87 203 | CD71: 2.18 204 | CD56: -0.26 205 | CD3: 3.11 206 | CD331: -1.93 207 | CD341: -0.87 208 | CD90: -0.11 209 | CD117: -0.63 210 | CD45RA: -0.11 211 | CD123: -0.97 212 | CD141: -0.36 213 | HLA-DR: -0.92 214 | CD11b: -0.91 215 | CD64: -0.9 216 | CD381: -0.8 217 | CD45: 0.61 218 | CD361: -0.79 219 | CD931: -0.86 220 | CD91: -0.11",fillcolor="#53b466",fontsize=20];14 -> 22 [labeldistance=3, label = "2.18 221 | ",fontsize=25, color=red, style=bold];23 [label="23_leaf\n19 (0.74%)\n 222 | CD41: 0.54 223 | CD71: -0.87 224 | CD56: -0.05 225 | CD3: -0.47 226 | CD331: -0.07 227 | CD341: -0.88 228 | CD90: -0.13 229 | CD117: -0.53 230 | CD45RA: -1.1 231 | CD123: -0.47 232 | CD141: -0.51 233 | HLA-DR: 1.17 234 | CD11b: -0.4 235 | CD64: 0.8 236 | CD381: 1.2 237 | CD45: 0.19 238 | CD361: -0.65 239 | CD931: -0.35 240 | CD91: -0.24",fillcolor="#a2d99c",fontsize=20];16 -> 23 [labeldistance=3, label = "-0.4 241 | ",fontsize=25, color=red, style=bold];24 [label="24_leaf\n62 (2.4%)\n 242 | CD41: 0.53 243 | CD71: -0.81 244 | CD56: -0.02 245 | CD3: -0.33 246 | CD331: 0.33 247 | CD341: -0.8 248 | CD90: 0.02 249 | CD117: -0.5 250 | CD45RA: -0.74 251 | CD123: -0.49 252 | CD141: -0.32 253 | HLA-DR: 1.12 254 | CD11b: 0.49 255 | CD64: 1.05 256 | CD381: 1.04 257 | CD45: 0.4 258 | CD361: -0.4 259 | CD931: -0.07 260 | CD91: -0.16",fillcolor="#6abf71",fontsize=20];16 -> 24 [labeldistance=3, label = "0.49 261 | ",fontsize=25, color=black, style=solid];25 [label="25_leaf\n17 (0.66%)\n 262 | CD41: 0.68 263 | CD71: -0.73 264 | CD56: 1.9 265 | CD3: -0.34 266 | CD331: -0.01 267 | CD341: -0.95 268 | CD90: -0.14 269 | CD117: -0.66 270 | CD45RA: -0.94 271 | CD123: -0.28 272 | CD141: -0.22 273 | HLA-DR: 1.22 274 | CD11b: -0.26 275 | CD64: 0.93 276 | CD381: 0.88 277 | CD45: 0.25 278 | CD361: -0.42 279 | CD931: -0.33 280 | CD91: -0.24",fillcolor="#a5db9f",fontsize=20];17 -> 25 [labeldistance=3, label = "-0.26 281 | ",fontsize=25, color=black, style=solid];26 [label="26_leaf\n63 (2.44%)\n 282 | CD41: 0.44 283 | CD71: -0.66 284 | CD56: 1.84 285 | CD3: -0.21 286 | CD331: 0.2 287 | CD341: -0.82 288 | CD90: 0.13 289 | CD117: -0.49 290 | CD45RA: -0.69 291 | CD123: -0.54 292 | CD141: -0.21 293 | HLA-DR: 0.37 294 | CD11b: 0.6 295 | CD64: 1.02 296 | CD381: 0.86 297 | CD45: 0.51 298 | CD361: -0.21 299 | CD931: 0.08 300 | CD91: -0.29",fillcolor="#68be70",fontsize=20];17 -> 26 [labeldistance=3, label = "0.6 301 | ",fontsize=25, color=red, style=bold];} -------------------------------------------------------------------------------- /CITEsort_out/tree_complete.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/CITEsort_out/tree_complete.pdf -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 QiuyuLian 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CITE-sort 2 | 3 | CITE-sort 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | An artificial-cell-type aware surface marker clustering method for CITE-seq data. 14 | 15 | ## Description 16 | 17 | CITE-sort conducts auto-gating with CITE-seq ADT data using recursive Gaussian Mixture Model. It is robust against artificial cell types that stem from multiplets. CITE-sort also provides concrete explanations of its internal decision process by constructing a biologically meaningful sort tree. See our [paper](https://academic.oup.com/bioinformatics/article/36/Supplement_1/i542/5870491) for more details. 18 | 19 | Below shows an example of sort tree constructed by CITE-sort from an in-house PBMC dataset. Each node represents a subpopulation. The title of each inner node represents the selected surface markers subspace. Red and blue colors represent the two component complexes for subdivision. Edges are colored according to their corresponding component complexes. Leaf nodes are hand-curated and are annotated with domain knowledge. Cell types that should not exist are labeled as suspect _artificial cell type_ (ACT) clusters. Suspect ACT clusters are characterized by their population percentages in the overall dataset (denoted by ‘prop’) and their multi-sample multiplets percentages (denoted by ‘MSM’). Abbreviations: iNK: intermediate NK cells; mNK: vast majority of NK cells; C-mono: classical monocytes; NC-mono: non-classical monocytes; mDC: myeloid DC; DNT: double negative T cells. 20 | 21 | taxonomy 22 | 23 | ## Usage 24 | 25 | ### Input 26 | 27 | The input of CITE-sort should be a csv file with CLR normalized CITE-seq ADT data (row: droplet/sample, col: ADT/feature). 28 | 29 | ### Run 30 | 31 | `python runCITEsort.py ADT_clr_file -c 0.1 -o ./CITEsort_out` 32 | 33 | - -c, cutoff, the similarity threshold of merging Gaussian components; the default is 0.1. It should be a real value between 0 and 1. The bigger value leads to split more aggressively, and ends in a more complicated tree. 34 | - -o, output, the path to save ouput files. If not specified, CITE-sort will create a folder "./CITEsort_out" in the current directory. 35 | 36 | `python runCITEsort.py ADT_clr_file -c 0.1 -o ./CITEsort_out --compact` 37 | 38 | - --compact, adding this parameter will output a compact tree. 39 | 40 | See analysis [tutorial](https://github.com/QiuyuLian/CITE-sort/blob/master/AnalysisTutorial.ipynb) for visualizing each node. 41 | 42 | ### Outputs 43 | 44 | - tree.pdf, the vasualized sort tree of input dataset created by CITE-sort. 45 | - There are three rows in each inner node: 46 | - "**n_marker(s)**": **n** is the node ID, which is obtained by Breath First Search. **marker(s)**, the surface markers next to the ID, is the subspace selected to subdivide the current population. 47 | - "**Num: xxx**": is the number of droplets in current population. 48 | - "**(a|b)**": **b** denotes the number of components determined by BIC in the selected surface marker subspace. **a** denotes the number of component-complexes after merging with a certain threshold. Generally, **a** <= **b**. **a** = **b** when all components can not be merged with current threshold. 49 | - The numbers next to the arrows denote the mean of the selected markers in the partition the arrow stands for. In leaf nodes, the means of all markers are marked if not using '--compact'. As CITE-sort takes CLR-format values as input, these numbers could be positive or negative. 50 | - leaf_labels.csv, the labels of each droplets in the sort tree. 51 | - tree.pickle, the tree structure recording the main clusteirng infromation of input dataset. 52 | - tree.dot, the auxiliary file to plot the tree. 53 | 54 | ## Examples 55 | 56 | We provide 3 in-house and 5 public CITE-seq datasets in "./datasets": 57 | 58 | - [PBMC_1k (10X Genomics)](https://support.10xgenomics.com/single-cell-gene-expression/datasets/3.0.0/pbmc_1k_protein_v3) 59 | - [PBMC_1k_b (In house)](https://github.com/QiuyuLian/CITE-sort/tree/master/datasets) 60 | - [PBMC_2k (In house)](https://github.com/QiuyuLian/CITE-sort/tree/master/datasets) 61 | - [PBMC_5k (10X Genomics)](https://support.10xgenomics.com/single-cell-gene-expression/datasets/3.0.2/5k_pbmc_protein_v3) 62 | - [PBMC_8k (10X Genomics)](https://support.10xgenomics.com/single-cell-gene-expression/datasets/3.0.0/pbmc_10k_protein_v3) 63 | - [MALT_8k (10X Genomics)](https://support.10xgenomics.com/single-cell-gene-expression/datasets/3.0.0/malt_10k_protein_v3) 64 | - [CBMC_8k (GSE100866)](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE100866) 65 | - [PBMC_16k (with cell hashing) (In house)](https://github.com/QiuyuLian/CITE-sort/tree/master/datasets) 66 | 67 | ### Example Commond 68 | 69 | **Example 1**: The PBMC_2k dataset is used as an example of beginning with CLR-format data. 70 | 71 | `python preCITEsort.py ./datasets/PBMC_2k_ADT_clr.csv ` 72 | 73 | - plot histgram of each marker. 74 | 75 | `python runCITEsort.py ./datasets/PBMC_2k_ADT_clr.csv ` 76 | 77 | - run CITE-sort and output a sort tree. 78 | 79 | **Example 2**: ADTs from [GSE143363](https://github.com/QiuyuLian/CITE-sort/blob/master/datasets) are extracted from [GEO](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE143363) and used as an example of begining with raw counts. 80 | 81 | `python preCITEsort.py ./datasets/GSE143363_ADT_Dx_count.csv --CLR ` 82 | 83 | - transform data into CLR format and plot histgram of each marker. 84 | 85 | `python runCITEsort.py ./CITEsort_out/data_clr.csv --compact` 86 | 87 | - run CITE-sort and output a sort tree in compact way. 88 | 89 | ## Authors 90 | 91 | Qiuyu Lian\*, Hongyi Xin\*, Jianzhu Ma, Liza Konnikova, Wei Chen\#, Jin Gu\#,Kong Chen\# 92 | 93 | ## Maintainer 94 | 95 | Qiuyu Lian, Hongyi Xin. 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /performance.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Jan 16 22:39:00 2020 5 | 6 | @author: lianqiuyu 7 | """ 8 | 9 | import seaborn as sns 10 | from matplotlib import pyplot as plt 11 | #from sort_harddivision import sort 12 | import pandas as pd 13 | import numpy as np 14 | from sklearn.mixture import GaussianMixture,BayesianGaussianMixture 15 | 16 | 17 | sns.set(style="whitegrid") 18 | import time 19 | from ReSplit import ReSplit 20 | from BTreeTraversal import BTreeTraversal 21 | #from DEmerge import DEmerge 22 | 23 | namelist = ['PBMC_1k','PBMC_1k_b','PBMC_2k', 'PBMC_5k', 'PBMC_8k', 'MALT_8k', 'CBMC_8k','PBMC_16k'] 24 | datapath = './datasets' 25 | savepath = './performance' 26 | 27 | 28 | 29 | from sys import argv 30 | 31 | 32 | 33 | max_cluster_num = 50 34 | 35 | def find_k(data,c_type,max_cluster_num=100): 36 | k_list = [] 37 | inertia = [] 38 | 39 | for k in range(1, max_cluster_num + 1): 40 | 41 | gmm = GaussianMixture(k,covariance_type=c_type).fit(data) 42 | k_list.append(k) 43 | inertia.append(gmm.bic(data)) 44 | 45 | idx = np.argmin(inertia) 46 | final_k = k_list[idx] 47 | return final_k 48 | 49 | 50 | merge_cutoff = 0.1 51 | record_full ={} 52 | 53 | #for i in range(len(namelist)): 54 | 55 | #name = namelist[i] 56 | name = argv[1] 57 | print(name) 58 | data = pd.read_csv(datapath+'/'+name+'_ADT_clr_10markers.csv',header=0,index_col=0) 59 | #N=data.shape[0] 60 | 61 | record_sort = pd.DataFrame(np.zeros([10,3]),columns = ['time','ll','n_component']) 62 | record_gmm = pd.DataFrame(np.zeros([10,3]),columns = ['time','ll','n_component']) 63 | record_ngmm = pd.DataFrame(np.zeros([10,3]),columns = ['time','ll','n_component']) 64 | record_dpgmm = pd.DataFrame(np.zeros([10,3]),columns = ['time','ll','n_component']) 65 | 66 | record_gmm_fix_k = pd.DataFrame(np.zeros([10,3]),columns = ['time','ll','n_component']) 67 | record_ngmm_fix_k = pd.DataFrame(np.zeros([10,3]),columns = ['time','ll','n_component']) 68 | 69 | 70 | 71 | for t in range(10): 72 | 73 | print('CITE-sort.') 74 | start_time = time.time() 75 | rgmm = ReSplit(data,merge_cutoff) #sort(data,F_path,N,c_type,weight,rescan_cut,tol,fix_k,max_ndim) 76 | record_sort.iloc[t,0] = time.time() - start_time 77 | # print("--- %s seconds ---" % (t1)) 78 | trav = BTreeTraversal(rgmm) 79 | record_sort.iloc[t,1] = trav.ll 80 | #record_sort.iloc[t,2] = trav.bic 81 | record_sort.iloc[t,2] = trav.n_components 82 | #print(rgmm_ll) 83 | 84 | print('full GMM.') 85 | start_time = time.time() 86 | final_k = find_k(data,'full', max_cluster_num) 87 | gmm = GaussianMixture(final_k).fit(data) 88 | record_gmm.iloc[t,0] = time.time() - start_time 89 | #print("--- %s seconds ---" % gmm_time) 90 | #record_gmm.iloc[t,2] = gmm.bic(data) 91 | record_gmm.iloc[t,1] = gmm.score(data) 92 | record_gmm.iloc[t,2] = final_k #trav.n_components 93 | 94 | 95 | print('full GMM wigh fix k.') 96 | start_time = time.time() 97 | gmm = GaussianMixture(trav.n_components).fit(data) 98 | record_gmm_fix_k.iloc[t,0] = time.time() - start_time 99 | #print("--- %s seconds ---" % gmm_time) 100 | #record_gmm.iloc[t,2] = gmm.bic(data) 101 | record_gmm_fix_k.iloc[t,1] = gmm.score(data) 102 | record_gmm_fix_k.iloc[t,2] = trav.n_components 103 | 104 | 105 | print('naive GMM.') 106 | start_time = time.time() 107 | final_k = find_k(data,'diag',max_cluster_num) 108 | ngmm = GaussianMixture(final_k,covariance_type='diag').fit(data) 109 | record_ngmm.iloc[t,0] = time.time() - start_time 110 | #print("--- %s seconds ---" % (t)) 111 | #record_ngmm.iloc[t,2] = ngmm.bic(data) 112 | record_ngmm.iloc[t,1] = ngmm.score(data) 113 | record_ngmm.iloc[t,2] = final_k#trav.n_components 114 | #print(ngmm_ll) 115 | 116 | 117 | print('naive GMM with fix k.') 118 | start_time = time.time() 119 | ngmm = GaussianMixture(trav.n_components,covariance_type='diag').fit(data) 120 | record_ngmm_fix_k.iloc[t,0] = time.time() - start_time 121 | #print("--- %s seconds ---" % (t)) 122 | #record_ngmm.iloc[t,2] = ngmm.bic(data) 123 | record_ngmm_fix_k.iloc[t,1] = ngmm.score(data) 124 | record_ngmm_fix_k.iloc[t,2] = trav.n_components 125 | #print(ngmm_ll) 126 | 127 | 128 | print('dpgmm.') 129 | start_time = time.time() 130 | dpgmm = BayesianGaussianMixture(n_components=max_cluster_num,max_iter=500).fit(data) 131 | record_dpgmm.iloc[t,0] = time.time() - start_time 132 | record_dpgmm.iloc[t,1] = dpgmm.score(data) 133 | record_dpgmm.iloc[t,2] = len(dpgmm.weights_) 134 | 135 | 136 | db_summary = pd.concat([record_sort,record_gmm,record_gmm_fix_k,record_ngmm,record_ngmm_fix_k,record_dpgmm]) 137 | db_summary['DB'] = name 138 | db_summary['method'] = ['CITE-sort']*record_sort.shape[0] + ['GMM']*record_gmm.shape[0] + ['GMM_fixk']*record_gmm_fix_k.shape[0] + \ 139 | ['nGMM']*record_ngmm.shape[0] + ['nGMM_fixk']*record_ngmm_fix_k.shape[0] + ['dpgmm']*record_dpgmm.shape[0] 140 | 141 | db_summary.to_csv(savepath+'/record_'+name+'.csv') 142 | 143 | 144 | 145 | 146 | 147 | record_full[name] = db_summary 148 | 149 | 150 | record_full_alldb = pd.concat([record_full[name] for name in namelist]) 151 | record_full_alldb.to_csv(savepath+'/record_8DBs.csv') 152 | 153 | 154 | 155 | # record_full_alldb = pd.read_csv('./performance/record_8DBs.csv',header=0,index_col=0) 156 | 157 | temp = record_full_alldb.loc[record_full_alldb['method']!='GMM_fixk',] 158 | record_plot = temp.loc[temp['method']!='nGMM_fixk',:] 159 | 160 | record_plot['time'] = record_plot['time']/60 161 | 162 | 163 | 164 | 165 | 166 | plt.figure(figsize=(8,3), dpi=96) 167 | ax = sns.barplot(x='DB', y='time', hue='method', data=record_plot) 168 | plt.ylabel('Time (min)',fontsize=15) 169 | plt.xlabel('') 170 | plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) 171 | plt.savefig('./performance/time.pdf') 172 | plt.show() 173 | 174 | 175 | record_plot['ll'] = - record_plot['ll'] 176 | 177 | plt.figure(figsize=(8,3), dpi=96) 178 | ax = sns.barplot(x='DB', y='ll', hue='method', data=record_plot) 179 | plt.ylabel(' - log-likelihood',fontsize=15) 180 | plt.xlabel('') 181 | plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) 182 | plt.savefig('./performance/ll.pdf') 183 | plt.show() 184 | 185 | 186 | -------------------------------------------------------------------------------- /performance/ll.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/performance/ll.pdf -------------------------------------------------------------------------------- /performance/record_8DBs.csv: -------------------------------------------------------------------------------- 1 | ,time,ll,n_component,DB,method 2 | 0,7.126919984817505,-11.595296347428391,10.0,PBMC_1k,CITE-sort 3 | 1,7.169904470443726,-11.011866862133289,11.0,PBMC_1k,CITE-sort 4 | 2,6.501286268234253,-11.595296347428391,10.0,PBMC_1k,CITE-sort 5 | 3,6.519277572631836,-11.629320053052382,10.0,PBMC_1k,CITE-sort 6 | 4,6.452306270599365,-12.319758924800478,9.0,PBMC_1k,CITE-sort 7 | 5,6.9830121994018555,-11.73632943950538,10.0,PBMC_1k,CITE-sort 8 | 6,6.7821266651153564,-12.319758924800478,9.0,PBMC_1k,CITE-sort 9 | 7,6.54626202583313,-11.73632943950538,10.0,PBMC_1k,CITE-sort 10 | 8,6.8560850620269775,-11.73632943950538,10.0,PBMC_1k,CITE-sort 11 | 9,6.662195682525635,-12.319758924800478,9.0,PBMC_1k,CITE-sort 12 | 0,6.587217807769775,-17.140941651826182,4.0,PBMC_1k,GMM 13 | 1,5.890617609024048,-17.059280976259892,4.0,PBMC_1k,GMM 14 | 2,6.10949182510376,-17.106031776612685,4.0,PBMC_1k,GMM 15 | 3,5.937591314315796,-16.582158231213626,5.0,PBMC_1k,GMM 16 | 4,6.394338369369507,-17.140941651826182,4.0,PBMC_1k,GMM 17 | 5,6.005538702011108,-17.106031776612685,4.0,PBMC_1k,GMM 18 | 6,6.512260913848877,-15.174585542784197,7.0,PBMC_1k,GMM 19 | 7,6.381338357925415,-16.14805683403237,5.0,PBMC_1k,GMM 20 | 8,5.929594278335571,-16.163219553315656,5.0,PBMC_1k,GMM 21 | 9,6.02553915977478,-17.10511391549158,4.0,PBMC_1k,GMM 22 | 0,0.05996561050415039,-14.088582148687417,10.0,PBMC_1k,GMM_fixk 23 | 1,0.08894848823547363,-13.41499493553318,11.0,PBMC_1k,GMM_fixk 24 | 2,0.03797769546508789,-13.540707534411622,10.0,PBMC_1k,GMM_fixk 25 | 3,0.07995438575744629,-13.850122640730694,10.0,PBMC_1k,GMM_fixk 26 | 4,0.032981157302856445,-14.322336721328481,9.0,PBMC_1k,GMM_fixk 27 | 5,0.043974876403808594,-13.958332357223027,10.0,PBMC_1k,GMM_fixk 28 | 6,0.07495713233947754,-14.111789277986022,9.0,PBMC_1k,GMM_fixk 29 | 7,0.04097485542297363,-14.064216918680327,10.0,PBMC_1k,GMM_fixk 30 | 8,0.0439755916595459,-13.776412026253446,10.0,PBMC_1k,GMM_fixk 31 | 9,0.03797793388366699,-13.804259594360275,9.0,PBMC_1k,GMM_fixk 32 | 0,1.9418880939483643,-16.391776859648125,21.0,PBMC_1k,nGMM 33 | 1,1.7360057830810547,-17.017084083201127,17.0,PBMC_1k,nGMM 34 | 2,1.7729849815368652,-16.82115726746788,18.0,PBMC_1k,nGMM 35 | 3,1.7459995746612549,-16.76203681311147,19.0,PBMC_1k,nGMM 36 | 4,1.9488842487335205,-17.389385051589162,15.0,PBMC_1k,nGMM 37 | 5,1.8409459590911865,-16.74670537154237,21.0,PBMC_1k,nGMM 38 | 6,1.8669307231903076,-16.147770612554257,24.0,PBMC_1k,nGMM 39 | 7,1.8069655895233154,-16.94872309211664,18.0,PBMC_1k,nGMM 40 | 8,1.8809239864349365,-17.244953751785008,17.0,PBMC_1k,nGMM 41 | 9,1.9099071025848389,-16.986451147967987,17.0,PBMC_1k,nGMM 42 | 0,0.0159912109375,-18.676332923006573,10.0,PBMC_1k,nGMM_fixk 43 | 1,0.016989946365356445,-18.419379500351987,11.0,PBMC_1k,nGMM_fixk 44 | 2,0.01699066162109375,-18.6721788347362,10.0,PBMC_1k,nGMM_fixk 45 | 3,0.01899862289428711,-18.937733836680852,10.0,PBMC_1k,nGMM_fixk 46 | 4,0.0199892520904541,-19.050810720422405,9.0,PBMC_1k,nGMM_fixk 47 | 5,0.01898956298828125,-18.78758349587411,10.0,PBMC_1k,nGMM_fixk 48 | 6,0.01299285888671875,-19.648878098092986,9.0,PBMC_1k,nGMM_fixk 49 | 7,0.014991521835327148,-18.680480444565013,10.0,PBMC_1k,nGMM_fixk 50 | 8,0.013991594314575195,-18.70135380257122,10.0,PBMC_1k,nGMM_fixk 51 | 9,0.008994340896606445,-19.010671457346113,9.0,PBMC_1k,nGMM_fixk 52 | 0,0.5646781921386719,-13.607799761585456,50.0,PBMC_1k,dpgmm 53 | 1,0.3867809772491455,-13.640822625981155,50.0,PBMC_1k,dpgmm 54 | 2,0.30382585525512695,-13.563951990951315,50.0,PBMC_1k,dpgmm 55 | 3,0.34279322624206543,-13.69329328968205,50.0,PBMC_1k,dpgmm 56 | 4,0.43774843215942383,-13.459885318638806,50.0,PBMC_1k,dpgmm 57 | 5,0.21987438201904297,-13.675988952177404,50.0,PBMC_1k,dpgmm 58 | 6,0.5296964645385742,-13.716782354231182,50.0,PBMC_1k,dpgmm 59 | 7,0.14991450309753418,-13.616409725572016,50.0,PBMC_1k,dpgmm 60 | 8,0.2648475170135498,-13.520443398422579,50.0,PBMC_1k,dpgmm 61 | 9,0.33780574798583984,-13.558766483095482,50.0,PBMC_1k,dpgmm 62 | 0,10.114209651947021,-3.8899679024823515,8.0,PBMC_1k_b,CITE-sort 63 | 1,9.929314374923706,-3.8899679024823515,8.0,PBMC_1k_b,CITE-sort 64 | 2,9.921319484710693,-3.8899679024823515,8.0,PBMC_1k_b,CITE-sort 65 | 3,9.674460411071777,-3.8899679024823515,8.0,PBMC_1k_b,CITE-sort 66 | 4,9.697448492050171,-3.8899679024823515,8.0,PBMC_1k_b,CITE-sort 67 | 5,9.811382293701172,-3.8899679024823515,8.0,PBMC_1k_b,CITE-sort 68 | 6,9.994277238845825,-3.8899679024823515,8.0,PBMC_1k_b,CITE-sort 69 | 7,9.762410879135132,-3.8899679024823515,8.0,PBMC_1k_b,CITE-sort 70 | 8,9.930314302444458,-3.8899679024823515,8.0,PBMC_1k_b,CITE-sort 71 | 9,9.905328750610352,-3.8899679024823515,8.0,PBMC_1k_b,CITE-sort 72 | 0,17.497982263565063,-3.857410803360695,10.0,PBMC_1k_b,GMM 73 | 1,18.163599729537964,-4.078737114779303,9.0,PBMC_1k_b,GMM 74 | 2,16.286674976348877,-5.057414356410817,7.0,PBMC_1k_b,GMM 75 | 3,17.917723417282104,-4.432242063109768,8.0,PBMC_1k_b,GMM 76 | 4,17.478991985321045,-4.033576647238859,9.0,PBMC_1k_b,GMM 77 | 5,18.270538568496704,-4.514995141086516,7.0,PBMC_1k_b,GMM 78 | 6,17.49598240852356,-5.035745244930868,7.0,PBMC_1k_b,GMM 79 | 7,16.475565433502197,-4.111351115530821,9.0,PBMC_1k_b,GMM 80 | 8,18.53538727760315,-3.9977804567218325,9.0,PBMC_1k_b,GMM 81 | 9,17.7558331489563,-3.786756768717093,12.0,PBMC_1k_b,GMM 82 | 0,0.048970699310302734,-4.249406391322957,8.0,PBMC_1k_b,GMM_fixk 83 | 1,0.0299832820892334,-4.278763405710032,8.0,PBMC_1k_b,GMM_fixk 84 | 2,0.04897260665893555,-4.197110693176923,8.0,PBMC_1k_b,GMM_fixk 85 | 3,0.051970720291137695,-4.130739821809692,8.0,PBMC_1k_b,GMM_fixk 86 | 4,0.04897260665893555,-4.286676718538526,8.0,PBMC_1k_b,GMM_fixk 87 | 5,0.0279843807220459,-4.246833006190683,8.0,PBMC_1k_b,GMM_fixk 88 | 6,0.0459747314453125,-4.250855964037708,8.0,PBMC_1k_b,GMM_fixk 89 | 7,0.027984619140625,-4.345332236195747,8.0,PBMC_1k_b,GMM_fixk 90 | 8,0.03697919845581055,-4.256822206604944,8.0,PBMC_1k_b,GMM_fixk 91 | 9,0.03597855567932129,-4.899120027881217,8.0,PBMC_1k_b,GMM_fixk 92 | 0,3.9977121353149414,-4.459123535455444,33.0,PBMC_1k_b,nGMM 93 | 1,3.716872215270996,-5.009960395234014,24.0,PBMC_1k_b,nGMM 94 | 2,3.637917995452881,-4.437605804271244,30.0,PBMC_1k_b,nGMM 95 | 3,3.8797781467437744,-4.61302649284386,27.0,PBMC_1k_b,nGMM 96 | 4,4.056677341461182,-4.299643428195814,32.0,PBMC_1k_b,nGMM 97 | 5,3.7988243103027344,-4.600914628653415,27.0,PBMC_1k_b,nGMM 98 | 6,3.7018799781799316,-4.617832642909012,29.0,PBMC_1k_b,nGMM 99 | 7,3.8947696685791016,-4.465719923331148,30.0,PBMC_1k_b,nGMM 100 | 8,3.5669586658477783,-4.668268214854305,27.0,PBMC_1k_b,nGMM 101 | 9,3.653907299041748,-4.329243028727759,33.0,PBMC_1k_b,nGMM 102 | 0,0.0239865779876709,-6.85627287191229,8.0,PBMC_1k_b,nGMM_fixk 103 | 1,0.01898932456970215,-6.858093580112152,8.0,PBMC_1k_b,nGMM_fixk 104 | 2,0.02398538589477539,-6.858015886140156,8.0,PBMC_1k_b,nGMM_fixk 105 | 3,0.018988847732543945,-6.858142104920901,8.0,PBMC_1k_b,nGMM_fixk 106 | 4,0.024985551834106445,-6.856269306800974,8.0,PBMC_1k_b,nGMM_fixk 107 | 5,0.02298712730407715,-6.788174348841982,8.0,PBMC_1k_b,nGMM_fixk 108 | 6,0.02398681640625,-6.856261942823088,8.0,PBMC_1k_b,nGMM_fixk 109 | 7,0.012992620468139648,-6.653478604934252,8.0,PBMC_1k_b,nGMM_fixk 110 | 8,0.027983427047729492,-6.858467841012181,8.0,PBMC_1k_b,nGMM_fixk 111 | 9,0.01898980140686035,-6.858474157989434,8.0,PBMC_1k_b,nGMM_fixk 112 | 0,1.0374045372009277,-4.032790733087652,50.0,PBMC_1k_b,dpgmm 113 | 1,1.1683309078216553,-3.995996423147343,50.0,PBMC_1k_b,dpgmm 114 | 2,0.9604494571685791,-4.014887756711081,50.0,PBMC_1k_b,dpgmm 115 | 3,1.4381763935089111,-3.996255332722274,50.0,PBMC_1k_b,dpgmm 116 | 4,0.8465161323547363,-4.041724312614789,50.0,PBMC_1k_b,dpgmm 117 | 5,1.267275333404541,-3.996727813434055,50.0,PBMC_1k_b,dpgmm 118 | 6,0.6995992660522461,-4.007786680958331,50.0,PBMC_1k_b,dpgmm 119 | 7,0.7925460338592529,-4.049556668208995,50.0,PBMC_1k_b,dpgmm 120 | 8,1.1173601150512695,-4.00238851229009,50.0,PBMC_1k_b,dpgmm 121 | 9,0.8585078716278076,-4.02573409179672,50.0,PBMC_1k_b,dpgmm 122 | 0,19.135042667388916,-3.039977922112491,13.0,PBMC_2k,CITE-sort 123 | 1,17.3620707988739,-3.838225569154643,13.0,PBMC_2k,CITE-sort 124 | 2,19.087072610855103,-3.3607753391316337,12.0,PBMC_2k,CITE-sort 125 | 3,18.724289417266846,-3.356040836253083,12.0,PBMC_2k,CITE-sort 126 | 4,18.1196346282959,-3.349418687088477,12.0,PBMC_2k,CITE-sort 127 | 5,18.74930214881897,-3.193732731984007,13.0,PBMC_2k,CITE-sort 128 | 6,16.743402242660522,-4.008552378859202,12.0,PBMC_2k,CITE-sort 129 | 7,18.80124521255493,-3.184140139622323,13.0,PBMC_2k,CITE-sort 130 | 8,19.01911997795105,-3.3504575966091403,12.0,PBMC_2k,CITE-sort 131 | 9,19.966576099395752,-3.1847493044634674,13.0,PBMC_2k,CITE-sort 132 | 0,22.255257844924927,-4.305043135400915,11.0,PBMC_2k,GMM 133 | 1,20.33034920692444,-4.304050243172198,11.0,PBMC_2k,GMM 134 | 2,20.554229974746704,-4.533803215260254,9.0,PBMC_2k,GMM 135 | 3,22.419154405593872,-4.42924593580085,11.0,PBMC_2k,GMM 136 | 4,21.91244387626648,-4.9055623093266565,7.0,PBMC_2k,GMM 137 | 5,21.474693298339844,-4.38432892944463,11.0,PBMC_2k,GMM 138 | 6,22.620038270950317,-4.254110055313111,11.0,PBMC_2k,GMM 139 | 7,21.389756202697754,-4.388461844683266,10.0,PBMC_2k,GMM 140 | 8,22.062358617782593,-4.534595518289228,10.0,PBMC_2k,GMM 141 | 9,20.67216420173645,-4.196493224579602,11.0,PBMC_2k,GMM 142 | 0,0.06296348571777344,-4.183532801379574,13.0,PBMC_2k,GMM_fixk 143 | 1,0.09994292259216309,-4.017509471280552,13.0,PBMC_2k,GMM_fixk 144 | 2,0.09394574165344238,-4.31464758998454,12.0,PBMC_2k,GMM_fixk 145 | 3,0.09194636344909668,-4.13375789931093,12.0,PBMC_2k,GMM_fixk 146 | 4,0.17489886283874512,-4.141008950409885,12.0,PBMC_2k,GMM_fixk 147 | 5,0.06496405601501465,-4.071073186506558,13.0,PBMC_2k,GMM_fixk 148 | 6,0.08295249938964844,-4.157396516272058,12.0,PBMC_2k,GMM_fixk 149 | 7,0.09694290161132812,-4.306028816385136,13.0,PBMC_2k,GMM_fixk 150 | 8,0.11393475532531738,-4.153402297840144,12.0,PBMC_2k,GMM_fixk 151 | 9,0.11994051933288574,-4.159316942750561,13.0,PBMC_2k,GMM_fixk 152 | 0,4.954163312911987,-4.56337892325203,30.0,PBMC_2k,nGMM 153 | 1,4.922183036804199,-4.277406133166951,36.0,PBMC_2k,nGMM 154 | 2,4.849223375320435,-4.564568888372911,31.0,PBMC_2k,nGMM 155 | 3,4.615357398986816,-4.509464553316169,31.0,PBMC_2k,nGMM 156 | 4,4.847224473953247,-4.45675264736644,30.0,PBMC_2k,nGMM 157 | 5,4.723294734954834,-4.348757776747538,33.0,PBMC_2k,nGMM 158 | 6,4.740287780761719,-4.664607870752208,27.0,PBMC_2k,nGMM 159 | 7,4.55738091468811,-4.5980796898912475,31.0,PBMC_2k,nGMM 160 | 8,5.003134250640869,-4.2928258594938535,34.0,PBMC_2k,nGMM 161 | 9,4.799241542816162,-4.440630441784365,36.0,PBMC_2k,nGMM 162 | 0,0.03497719764709473,-5.9833291676446265,13.0,PBMC_2k,nGMM_fixk 163 | 1,0.02698540687561035,-6.003957870476196,13.0,PBMC_2k,nGMM_fixk 164 | 2,0.027983665466308594,-6.093630484174055,12.0,PBMC_2k,nGMM_fixk 165 | 3,0.03298163414001465,-6.004338077354764,12.0,PBMC_2k,nGMM_fixk 166 | 4,0.02498626708984375,-6.147670550662348,12.0,PBMC_2k,nGMM_fixk 167 | 5,0.06996989250183105,-5.7699411467960475,13.0,PBMC_2k,nGMM_fixk 168 | 6,0.034979820251464844,-6.0118061485416225,12.0,PBMC_2k,nGMM_fixk 169 | 7,0.021988630294799805,-6.014065281933787,13.0,PBMC_2k,nGMM_fixk 170 | 8,0.02298712730407715,-6.080352334919585,12.0,PBMC_2k,nGMM_fixk 171 | 9,0.027984619140625,-5.829470008860642,13.0,PBMC_2k,nGMM_fixk 172 | 0,0.7405760288238525,-4.154761130337304,50.0,PBMC_2k,dpgmm 173 | 1,0.6976006031036377,-4.095399727089786,50.0,PBMC_2k,dpgmm 174 | 2,0.722585916519165,-4.100913209694664,50.0,PBMC_2k,dpgmm 175 | 3,0.7875497341156006,-4.158746515696291,50.0,PBMC_2k,dpgmm 176 | 4,1.0024268627166748,-4.115404945244016,50.0,PBMC_2k,dpgmm 177 | 5,0.8954741954803467,-4.119898009291454,50.0,PBMC_2k,dpgmm 178 | 6,1.0593931674957275,-4.104017128919311,50.0,PBMC_2k,dpgmm 179 | 7,1.5970842838287354,-4.0937771166383365,50.0,PBMC_2k,dpgmm 180 | 8,0.5696749687194824,-4.096304595974938,50.0,PBMC_2k,dpgmm 181 | 9,0.6646206378936768,-4.11045295307734,50.0,PBMC_2k,dpgmm 182 | 0,114.97818398475647,-18.507600707601924,30.0,PBMC_5k,CITE-sort 183 | 1,112.17079138755798,-20.140833521518715,24.0,PBMC_5k,CITE-sort 184 | 2,118.43320631980896,-20.1687032085678,25.0,PBMC_5k,CITE-sort 185 | 3,108.78671073913574,-19.60266635400196,27.0,PBMC_5k,CITE-sort 186 | 4,112.15385246276855,-19.91097177461177,26.0,PBMC_5k,CITE-sort 187 | 5,104.8159863948822,-20.579059335247663,25.0,PBMC_5k,CITE-sort 188 | 6,129.31895780563354,-18.047981442496756,33.0,PBMC_5k,CITE-sort 189 | 7,121.13580322265625,-18.7037400372988,29.0,PBMC_5k,CITE-sort 190 | 8,110.68664026260376,-20.892593978353037,22.0,PBMC_5k,CITE-sort 191 | 9,109.54827690124512,-20.383111524145896,26.0,PBMC_5k,CITE-sort 192 | 0,211.32600045204163,-23.454350630966285,10.0,PBMC_5k,GMM 193 | 1,210.00974488258362,-23.054916710314494,10.0,PBMC_5k,GMM 194 | 2,215.26674675941467,-23.572181097836197,9.0,PBMC_5k,GMM 195 | 3,194.1368443965912,-24.363631408785547,9.0,PBMC_5k,GMM 196 | 4,212.23347163200378,-24.327461240101023,7.0,PBMC_5k,GMM 197 | 5,214.3302800655365,-24.32745953177523,7.0,PBMC_5k,GMM 198 | 6,211.78773593902588,-23.873551854916364,8.0,PBMC_5k,GMM 199 | 7,213.2189016342163,-22.46290853355792,11.0,PBMC_5k,GMM 200 | 8,203.1698772907257,-23.317866247583037,9.0,PBMC_5k,GMM 201 | 9,205.1005666255951,-26.363360699392683,6.0,PBMC_5k,GMM 202 | 0,5.1210691928863525,-19.4854647187879,30.0,PBMC_5k,GMM_fixk 203 | 1,3.6479127407073975,-20.752295899917097,24.0,PBMC_5k,GMM_fixk 204 | 2,4.176607847213745,-20.312525768964793,25.0,PBMC_5k,GMM_fixk 205 | 3,6.233431577682495,-20.262024172246882,27.0,PBMC_5k,GMM_fixk 206 | 4,5.7177252769470215,-20.31578796444344,26.0,PBMC_5k,GMM_fixk 207 | 5,4.098653793334961,-20.1200694101659,25.0,PBMC_5k,GMM_fixk 208 | 6,6.036544322967529,-19.28314747254952,33.0,PBMC_5k,GMM_fixk 209 | 7,4.6363441944122314,-20.369931181472886,29.0,PBMC_5k,GMM_fixk 210 | 8,3.9037649631500244,-20.91871447452352,22.0,PBMC_5k,GMM_fixk 211 | 9,3.270127058029175,-20.971680742345054,26.0,PBMC_5k,GMM_fixk 212 | 0,25.154587030410767,-23.45307846766253,45.0,PBMC_5k,nGMM 213 | 1,27.146455764770508,-23.183804631992455,48.0,PBMC_5k,nGMM 214 | 2,22.942864418029785,-23.561227156902333,47.0,PBMC_5k,nGMM 215 | 3,24.96070146560669,-23.065355294909185,48.0,PBMC_5k,nGMM 216 | 4,23.56050968170166,-23.316521817329175,50.0,PBMC_5k,nGMM 217 | 5,25.249542474746704,-23.05478727667191,50.0,PBMC_5k,nGMM 218 | 6,24.98569369316101,-23.347849308051337,44.0,PBMC_5k,nGMM 219 | 7,23.13675308227539,-23.305425238489914,49.0,PBMC_5k,nGMM 220 | 8,21.95742917060852,-24.176815345583663,38.0,PBMC_5k,nGMM 221 | 9,23.830355167388916,-23.248454154840363,48.0,PBMC_5k,nGMM 222 | 0,0.5936610698699951,-24.652414093913112,30.0,PBMC_5k,nGMM_fixk 223 | 1,0.4087660312652588,-25.39239092081412,24.0,PBMC_5k,nGMM_fixk 224 | 2,0.4007716178894043,-25.469402508908196,25.0,PBMC_5k,nGMM_fixk 225 | 3,0.3987720012664795,-25.245510415722396,27.0,PBMC_5k,nGMM_fixk 226 | 4,0.5996565818786621,-25.328413319173446,26.0,PBMC_5k,nGMM_fixk 227 | 5,0.3507990837097168,-25.305626390488744,25.0,PBMC_5k,nGMM_fixk 228 | 6,0.5296971797943115,-24.678219937327306,33.0,PBMC_5k,nGMM_fixk 229 | 7,0.5896611213684082,-24.94437548448975,29.0,PBMC_5k,nGMM_fixk 230 | 8,0.23586344718933105,-25.515242501309615,22.0,PBMC_5k,nGMM_fixk 231 | 9,0.35979413986206055,-25.17855829387026,26.0,PBMC_5k,nGMM_fixk 232 | 0,28.763530015945435,-22.730742229489056,50.0,PBMC_5k,dpgmm 233 | 1,42.97938942909241,-22.637541038184285,50.0,PBMC_5k,dpgmm 234 | 2,17.364058017730713,-22.767235365076285,50.0,PBMC_5k,dpgmm 235 | 3,16.866344213485718,-22.87149997712949,50.0,PBMC_5k,dpgmm 236 | 4,15.228280782699585,-22.635595389698874,50.0,PBMC_5k,dpgmm 237 | 5,31.790797233581543,-22.657364518166947,50.0,PBMC_5k,dpgmm 238 | 6,17.372053384780884,-22.704211114881595,50.0,PBMC_5k,dpgmm 239 | 7,14.91546082496643,-22.65279666789171,50.0,PBMC_5k,dpgmm 240 | 8,38.50095534324646,-22.687663246028038,50.0,PBMC_5k,dpgmm 241 | 9,21.965423107147217,-22.618340821480327,50.0,PBMC_5k,dpgmm 242 | 0,107.70734000205994,-10.630034855679904,32.0,PBMC_8k,CITE-sort 243 | 1,104.77302098274231,-10.779157598548627,30.0,PBMC_8k,CITE-sort 244 | 2,102.81413292884827,-10.731528850104391,31.0,PBMC_8k,CITE-sort 245 | 3,97.51817464828491,-10.95221067864014,28.0,PBMC_8k,CITE-sort 246 | 4,104.70505881309509,-10.644467718087931,32.0,PBMC_8k,CITE-sort 247 | 5,101.70977544784546,-10.779783029296437,30.0,PBMC_8k,CITE-sort 248 | 6,103.72061419487,-10.677899612997283,31.0,PBMC_8k,CITE-sort 249 | 7,99.14624118804932,-10.731528850104391,31.0,PBMC_8k,CITE-sort 250 | 8,100.68646454811096,-10.636410030908806,32.0,PBMC_8k,CITE-sort 251 | 9,101.97961044311523,-10.775643442866567,30.0,PBMC_8k,CITE-sort 252 | 0,150.70470070838928,-12.67843340228169,15.0,PBMC_8k,GMM 253 | 1,154.03279638290405,-12.913352643743446,13.0,PBMC_8k,GMM 254 | 2,149.3135085105896,-12.829395836729475,14.0,PBMC_8k,GMM 255 | 3,153.33319759368896,-13.018263989717305,13.0,PBMC_8k,GMM 256 | 4,166.12588119506836,-12.653060691652987,15.0,PBMC_8k,GMM 257 | 5,156.92513966560364,-12.848781057466264,14.0,PBMC_8k,GMM 258 | 6,151.6311810016632,-12.641724530158609,16.0,PBMC_8k,GMM 259 | 7,149.8801727294922,-12.915539484541302,14.0,PBMC_8k,GMM 260 | 8,152.36875820159912,-12.547906030609802,17.0,PBMC_8k,GMM 261 | 9,159.12189269065857,-12.613771250927476,16.0,PBMC_8k,GMM 262 | 0,3.632920026779175,-11.889442365173966,32.0,PBMC_8k,GMM_fixk 263 | 1,2.9373178482055664,-11.976433486487593,30.0,PBMC_8k,GMM_fixk 264 | 2,3.503993511199951,-11.896689684489052,31.0,PBMC_8k,GMM_fixk 265 | 3,2.3806371688842773,-12.045370890493576,28.0,PBMC_8k,GMM_fixk 266 | 4,4.423468589782715,-11.858538382211858,32.0,PBMC_8k,GMM_fixk 267 | 5,2.940316677093506,-11.946873896980716,30.0,PBMC_8k,GMM_fixk 268 | 6,4.510417461395264,-11.913008586845843,31.0,PBMC_8k,GMM_fixk 269 | 7,4.011703968048096,-11.887681569730864,31.0,PBMC_8k,GMM_fixk 270 | 8,4.2235822677612305,-11.863128233722959,32.0,PBMC_8k,GMM_fixk 271 | 9,3.6099324226379395,-11.9449885106109,30.0,PBMC_8k,GMM_fixk 272 | 0,24.473987102508545,-13.257029056715261,48.0,PBMC_8k,nGMM 273 | 1,24.23612380027771,-13.248785835689983,50.0,PBMC_8k,nGMM 274 | 2,23.25368595123291,-13.361189414486198,47.0,PBMC_8k,nGMM 275 | 3,24.1181902885437,-13.269971941994285,50.0,PBMC_8k,nGMM 276 | 4,23.40259838104248,-13.343187852332242,46.0,PBMC_8k,nGMM 277 | 5,24.961705923080444,-13.205067487524545,50.0,PBMC_8k,nGMM 278 | 6,25.16559147834778,-13.23389395749099,49.0,PBMC_8k,nGMM 279 | 7,23.77938461303711,-13.264781680716615,50.0,PBMC_8k,nGMM 280 | 8,23.51953411102295,-13.2131171601585,50.0,PBMC_8k,nGMM 281 | 9,23.450572729110718,-13.276658640933295,49.0,PBMC_8k,nGMM 282 | 0,0.471729040145874,-13.881171859596767,32.0,PBMC_8k,nGMM_fixk 283 | 1,0.7545680999755859,-13.948365559776445,30.0,PBMC_8k,nGMM_fixk 284 | 2,0.4767270088195801,-13.99342604286498,31.0,PBMC_8k,nGMM_fixk 285 | 3,0.6516270637512207,-14.070899084715712,28.0,PBMC_8k,nGMM_fixk 286 | 4,0.4997129440307617,-13.848630499522454,32.0,PBMC_8k,nGMM_fixk 287 | 5,0.548687219619751,-13.917406030558176,30.0,PBMC_8k,nGMM_fixk 288 | 6,0.519702672958374,-13.95738399383949,31.0,PBMC_8k,nGMM_fixk 289 | 7,0.5456867218017578,-14.07034839948307,31.0,PBMC_8k,nGMM_fixk 290 | 8,0.4397470951080322,-13.899285897252645,32.0,PBMC_8k,nGMM_fixk 291 | 9,0.4247574806213379,-14.03919632385835,30.0,PBMC_8k,nGMM_fixk 292 | 0,36.233253717422485,-12.107852092857222,50.0,PBMC_8k,dpgmm 293 | 1,30.52152371406555,-12.123206747355963,50.0,PBMC_8k,dpgmm 294 | 2,32.488396883010864,-12.097017001280479,50.0,PBMC_8k,dpgmm 295 | 3,27.36733055114746,-12.106176005176184,50.0,PBMC_8k,dpgmm 296 | 4,64.03633379936218,-12.105459632889083,50.0,PBMC_8k,dpgmm 297 | 5,25.79922842979431,-12.09453672839614,50.0,PBMC_8k,dpgmm 298 | 6,47.8176212310791,-12.076773396014579,50.0,PBMC_8k,dpgmm 299 | 7,20.216424465179443,-12.086241998093666,50.0,PBMC_8k,dpgmm 300 | 8,53.82618069648743,-12.112368588880203,50.0,PBMC_8k,dpgmm 301 | 9,26.560791969299316,-12.083497305149283,50.0,PBMC_8k,dpgmm 302 | 0,104.09340858459473,-9.964740682097846,16.0,MALT_8k,CITE-sort 303 | 1,90.37426471710205,-10.519119375728287,12.0,MALT_8k,CITE-sort 304 | 2,106.49103569984436,-10.02847883660573,15.0,MALT_8k,CITE-sort 305 | 3,101.72176790237427,-9.965077840276498,16.0,MALT_8k,CITE-sort 306 | 4,87.93864917755127,-10.519119375728287,12.0,MALT_8k,CITE-sort 307 | 5,94.86282300949097,-10.334480564516857,14.0,MALT_8k,CITE-sort 308 | 6,91.9593575000763,-10.334480564516857,14.0,MALT_8k,CITE-sort 309 | 7,98.18778967857361,-9.964740682097846,16.0,MALT_8k,CITE-sort 310 | 8,104.2493200302124,-9.964740682097846,16.0,MALT_8k,CITE-sort 311 | 9,99.52801489830017,-10.149379493309278,14.0,MALT_8k,CITE-sort 312 | 0,162.16115069389343,-10.78066231422725,12.0,MALT_8k,GMM 313 | 1,158.64816308021545,-11.21033384197538,9.0,MALT_8k,GMM 314 | 2,172.3073332309723,-10.878094176531889,12.0,MALT_8k,GMM 315 | 3,160.98681473731995,-10.719243317607775,13.0,MALT_8k,GMM 316 | 4,165.07948184013367,-10.851954988736965,11.0,MALT_8k,GMM 317 | 5,164.23995232582092,-11.03250866619702,11.0,MALT_8k,GMM 318 | 6,156.05663537979126,-10.84594605238231,12.0,MALT_8k,GMM 319 | 7,166.68856000900269,-10.871247623058528,11.0,MALT_8k,GMM 320 | 8,159.71054553985596,-11.03118670656651,10.0,MALT_8k,GMM 321 | 9,160.99881625175476,-10.983021426242125,10.0,MALT_8k,GMM 322 | 0,1.252284049987793,-10.664905043752391,16.0,MALT_8k,GMM_fixk 323 | 1,1.3892052173614502,-10.845855560036672,12.0,MALT_8k,GMM_fixk 324 | 2,1.602081537246704,-10.66361543142215,15.0,MALT_8k,GMM_fixk 325 | 3,1.984863519668579,-10.616611113282252,16.0,MALT_8k,GMM_fixk 326 | 4,1.2582783699035645,-10.817374274546374,12.0,MALT_8k,GMM_fixk 327 | 5,2.392629861831665,-10.643494736654336,14.0,MALT_8k,GMM_fixk 328 | 6,2.1727559566497803,-10.628945033757466,14.0,MALT_8k,GMM_fixk 329 | 7,1.6800284385681152,-10.573609392280927,16.0,MALT_8k,GMM_fixk 330 | 8,2.473573923110962,-10.587864387007873,16.0,MALT_8k,GMM_fixk 331 | 9,1.652052879333496,-10.646049674875046,14.0,MALT_8k,GMM_fixk 332 | 0,32.25153422355652,-11.17962721969968,49.0,MALT_8k,nGMM 333 | 1,31.582916736602783,-11.20487390143217,50.0,MALT_8k,nGMM 334 | 2,32.2605299949646,-11.170188239143851,49.0,MALT_8k,nGMM 335 | 3,31.862756490707397,-11.183292293735285,48.0,MALT_8k,nGMM 336 | 4,31.8977370262146,-11.234650526891373,48.0,MALT_8k,nGMM 337 | 5,33.487815856933594,-11.301479520501074,45.0,MALT_8k,nGMM 338 | 6,30.58048915863037,-11.214989136550269,50.0,MALT_8k,nGMM 339 | 7,32.620323181152344,-11.195152031158566,48.0,MALT_8k,nGMM 340 | 8,33.890594482421875,-11.174076335338153,49.0,MALT_8k,nGMM 341 | 9,33.67072105407715,-11.294354727575243,44.0,MALT_8k,nGMM 342 | 0,0.33580660820007324,-12.582591731579186,16.0,MALT_8k,nGMM_fixk 343 | 1,0.2268695831298828,-13.164387624746238,12.0,MALT_8k,nGMM_fixk 344 | 2,0.2488558292388916,-12.725037790760195,15.0,MALT_8k,nGMM_fixk 345 | 3,0.2548534870147705,-12.613755613304862,16.0,MALT_8k,nGMM_fixk 346 | 4,0.11993145942687988,-13.399302772496565,12.0,MALT_8k,nGMM_fixk 347 | 5,0.13193511962890625,-12.917129308349562,14.0,MALT_8k,nGMM_fixk 348 | 6,0.19788742065429688,-12.802236991102422,14.0,MALT_8k,nGMM_fixk 349 | 7,0.3148186206817627,-12.607639773411858,16.0,MALT_8k,nGMM_fixk 350 | 8,0.25785279273986816,-12.61815087537536,16.0,MALT_8k,nGMM_fixk 351 | 9,0.22188377380371094,-12.836320077438216,14.0,MALT_8k,nGMM_fixk 352 | 0,49.70354175567627,-10.339389581778589,50.0,MALT_8k,dpgmm 353 | 1,59.39699029922485,-10.312004086512436,50.0,MALT_8k,dpgmm 354 | 2,48.110453844070435,-10.287859167423095,50.0,MALT_8k,dpgmm 355 | 3,39.88716220855713,-10.342858734870253,50.0,MALT_8k,dpgmm 356 | 4,82.59071135520935,-10.313174194319487,50.0,MALT_8k,dpgmm 357 | 5,48.4212646484375,-10.339740513621718,50.0,MALT_8k,dpgmm 358 | 6,53.693257331848145,-10.305461754565231,50.0,MALT_8k,dpgmm 359 | 7,67.16954040527344,-10.326394833530733,50.0,MALT_8k,dpgmm 360 | 8,77.51761603355408,-10.320139729879017,50.0,MALT_8k,dpgmm 361 | 9,49.45867133140564,-10.298376990321723,50.0,MALT_8k,dpgmm 362 | 0,82.48777031898499,-4.970225496294415,14.0,CBMC_8k,CITE-sort 363 | 1,84.576584815979,-4.925015700588989,15.0,CBMC_8k,CITE-sort 364 | 2,85.64297437667847,-4.970225496294415,14.0,CBMC_8k,CITE-sort 365 | 3,85.82585883140564,-4.970225496294415,14.0,CBMC_8k,CITE-sort 366 | 4,88.71920251846313,-4.822622999413,15.0,CBMC_8k,CITE-sort 367 | 5,85.5120496749878,-4.970225496294415,14.0,CBMC_8k,CITE-sort 368 | 6,84.42366147041321,-4.970225496294415,14.0,CBMC_8k,CITE-sort 369 | 7,83.03945541381836,-4.970225496294415,14.0,CBMC_8k,CITE-sort 370 | 8,84.73048639297485,-4.970225496294415,14.0,CBMC_8k,CITE-sort 371 | 9,89.86754655838013,-4.774042780518445,16.0,CBMC_8k,CITE-sort 372 | 0,60.67326259613037,-5.338610559049961,14.0,CBMC_8k,GMM 373 | 1,62.55017423629761,-5.327642232587172,14.0,CBMC_8k,GMM 374 | 2,59.244077920913696,-5.221029908669777,16.0,CBMC_8k,GMM 375 | 3,61.91255235671997,-5.19289199869492,17.0,CBMC_8k,GMM 376 | 4,59.807756185531616,-5.378078059387416,14.0,CBMC_8k,GMM 377 | 5,62.438249826431274,-5.324600227554156,15.0,CBMC_8k,GMM 378 | 6,59.465951919555664,-5.257326024047886,16.0,CBMC_8k,GMM 379 | 7,62.204383850097656,-5.384429447050616,13.0,CBMC_8k,GMM 380 | 8,59.13414263725281,-5.303643582809363,15.0,CBMC_8k,GMM 381 | 9,58.410555601119995,-5.359907671418008,14.0,CBMC_8k,GMM 382 | 0,0.4987154006958008,-5.318481191595465,14.0,CBMC_8k,GMM_fixk 383 | 1,0.780552864074707,-5.28135052374074,15.0,CBMC_8k,GMM_fixk 384 | 2,0.41576290130615234,-5.384607513553807,14.0,CBMC_8k,GMM_fixk 385 | 3,0.40477871894836426,-5.376723987110303,14.0,CBMC_8k,GMM_fixk 386 | 4,0.44574522972106934,-5.308714250487742,15.0,CBMC_8k,GMM_fixk 387 | 5,0.30182743072509766,-5.3898127309971,14.0,CBMC_8k,GMM_fixk 388 | 6,0.560678243637085,-5.353808478549749,14.0,CBMC_8k,GMM_fixk 389 | 7,0.5047101974487305,-5.346889684520823,14.0,CBMC_8k,GMM_fixk 390 | 8,0.4087657928466797,-5.338011181859372,14.0,CBMC_8k,GMM_fixk 391 | 9,0.5027120113372803,-5.248688020174258,16.0,CBMC_8k,GMM_fixk 392 | 0,24.397029638290405,-6.2279743860146715,50.0,CBMC_8k,nGMM 393 | 1,24.86376404762268,-6.286328829454328,49.0,CBMC_8k,nGMM 394 | 2,24.761822938919067,-6.304667013372216,49.0,CBMC_8k,nGMM 395 | 3,26.78266477584839,-6.293575089003526,50.0,CBMC_8k,nGMM 396 | 4,25.76124930381775,-6.32647488892547,47.0,CBMC_8k,nGMM 397 | 5,24.554939031600952,-6.229492287427068,50.0,CBMC_8k,nGMM 398 | 6,23.87632966041565,-6.279360804260311,50.0,CBMC_8k,nGMM 399 | 7,23.980270862579346,-6.328715170276957,48.0,CBMC_8k,nGMM 400 | 8,26.58477783203125,-6.257984565811174,50.0,CBMC_8k,nGMM 401 | 9,25.429440021514893,-6.270241702230268,49.0,CBMC_8k,nGMM 402 | 0,0.11693239212036133,-8.38443259883629,14.0,CBMC_8k,nGMM_fixk 403 | 1,0.11793279647827148,-8.39593961250455,15.0,CBMC_8k,nGMM_fixk 404 | 2,0.08295154571533203,-8.38986093480687,14.0,CBMC_8k,nGMM_fixk 405 | 3,0.12292909622192383,-8.482321174496342,14.0,CBMC_8k,nGMM_fixk 406 | 4,0.16690397262573242,-8.222030381194092,15.0,CBMC_8k,nGMM_fixk 407 | 5,0.10094189643859863,-8.373626097978223,14.0,CBMC_8k,nGMM_fixk 408 | 6,0.08994841575622559,-8.400707110006236,14.0,CBMC_8k,nGMM_fixk 409 | 7,0.10893726348876953,-8.369739140656122,14.0,CBMC_8k,nGMM_fixk 410 | 8,0.12592792510986328,-8.583404484295043,14.0,CBMC_8k,nGMM_fixk 411 | 9,0.24486041069030762,-8.17223767640293,16.0,CBMC_8k,nGMM_fixk 412 | 0,26.914589881896973,-4.837041146685309,50.0,CBMC_8k,dpgmm 413 | 1,17.987701177597046,-4.825254321767748,50.0,CBMC_8k,dpgmm 414 | 2,30.537515878677368,-4.812883948532638,50.0,CBMC_8k,dpgmm 415 | 3,16.31166124343872,-4.8260832952804265,50.0,CBMC_8k,dpgmm 416 | 4,19.667738914489746,-4.827847765340304,50.0,CBMC_8k,dpgmm 417 | 5,14.024969577789307,-4.818149091948307,50.0,CBMC_8k,dpgmm 418 | 6,21.808513402938843,-4.827393844981892,50.0,CBMC_8k,dpgmm 419 | 7,21.315794706344604,-4.826081350901387,50.0,CBMC_8k,dpgmm 420 | 8,34.81006860733032,-4.827883090808033,50.0,CBMC_8k,dpgmm 421 | 9,34.777087926864624,-4.800197197416694,50.0,CBMC_8k,dpgmm 422 | 0,131.48872256278992,-4.824296845335957,25.0,PBMC_16k,CITE-sort 423 | 1,134.248144865036,-5.170327640657248,23.0,PBMC_16k,CITE-sort 424 | 2,153.29023241996765,-4.622667569567744,27.0,PBMC_16k,CITE-sort 425 | 3,148.85877871513367,-4.714098794409115,25.0,PBMC_16k,CITE-sort 426 | 4,138.88249158859253,-5.248031352743899,21.0,PBMC_16k,CITE-sort 427 | 5,151.88603711128235,-4.628945027215183,28.0,PBMC_16k,CITE-sort 428 | 6,155.8997368812561,-4.601722116091284,28.0,PBMC_16k,CITE-sort 429 | 7,139.5650990009308,-4.810842836864783,25.0,PBMC_16k,CITE-sort 430 | 8,138.50376057624817,-4.8428604057279685,25.0,PBMC_16k,CITE-sort 431 | 9,153.2203996181488,-4.5555910214670465,28.0,PBMC_16k,CITE-sort 432 | 0,151.23140001296997,-6.151465680535652,26.0,PBMC_16k,GMM 433 | 1,142.63832092285156,-6.177100083396359,25.0,PBMC_16k,GMM 434 | 2,138.85249733924866,-6.231229949495253,22.0,PBMC_16k,GMM 435 | 3,144.43529105186462,-6.21842403857333,22.0,PBMC_16k,GMM 436 | 4,146.25425124168396,-6.271936250794268,21.0,PBMC_16k,GMM 437 | 5,152.47869396209717,-6.185376585620828,25.0,PBMC_16k,GMM 438 | 6,146.16531109809875,-6.2142301946581515,23.0,PBMC_16k,GMM 439 | 7,146.71299743652344,-6.292184652989888,20.0,PBMC_16k,GMM 440 | 8,149.17957639694214,-6.217817180692061,23.0,PBMC_16k,GMM 441 | 9,153.0943329334259,-6.2561118327358685,22.0,PBMC_16k,GMM 442 | 0,3.302109956741333,-6.167149522908562,25.0,PBMC_16k,GMM_fixk 443 | 1,2.8083910942077637,-6.227611206044761,23.0,PBMC_16k,GMM_fixk 444 | 2,3.3340916633605957,-6.130149880123634,27.0,PBMC_16k,GMM_fixk 445 | 3,2.936318874359131,-6.170888958309285,25.0,PBMC_16k,GMM_fixk 446 | 4,2.600506067276001,-6.253061840300715,21.0,PBMC_16k,GMM_fixk 447 | 5,4.07366681098938,-6.1187655429051775,28.0,PBMC_16k,GMM_fixk 448 | 6,3.296112537384033,-6.139985798808145,28.0,PBMC_16k,GMM_fixk 449 | 7,2.9353177547454834,-6.185204323654343,25.0,PBMC_16k,GMM_fixk 450 | 8,2.7274365425109863,-6.168232315138406,25.0,PBMC_16k,GMM_fixk 451 | 9,3.511990785598755,-6.123503587453922,28.0,PBMC_16k,GMM_fixk 452 | 0,38.71083426475525,-6.379849891641167,48.0,PBMC_16k,nGMM 453 | 1,38.449986696243286,-6.376821446147685,49.0,PBMC_16k,nGMM 454 | 2,38.604896068573,-6.40737439570078,48.0,PBMC_16k,nGMM 455 | 3,38.98268151283264,-6.352696487397852,49.0,PBMC_16k,nGMM 456 | 4,38.36903095245361,-6.3716690156441835,50.0,PBMC_16k,nGMM 457 | 5,38.76480484008789,-6.364510993512695,50.0,PBMC_16k,nGMM 458 | 6,39.50738000869751,-6.354521920871351,49.0,PBMC_16k,nGMM 459 | 7,38.842759132385254,-6.369662674404592,48.0,PBMC_16k,nGMM 460 | 8,38.83576488494873,-6.423211994218609,45.0,PBMC_16k,nGMM 461 | 9,38.5829074382782,-6.3840953086766286,47.0,PBMC_16k,nGMM 462 | 0,0.6576240062713623,-6.785083664655333,25.0,PBMC_16k,nGMM_fixk 463 | 1,0.5037086009979248,-6.8704074699934905,23.0,PBMC_16k,nGMM_fixk 464 | 2,0.6106505393981934,-6.7397046065996005,27.0,PBMC_16k,nGMM_fixk 465 | 3,0.9654474258422852,-6.769382740398923,25.0,PBMC_16k,nGMM_fixk 466 | 4,0.4957153797149658,-6.941616866643075,21.0,PBMC_16k,nGMM_fixk 467 | 5,0.8255264759063721,-6.7202072370311505,28.0,PBMC_16k,nGMM_fixk 468 | 6,0.8914902210235596,-6.734181354395366,28.0,PBMC_16k,nGMM_fixk 469 | 7,0.6896049976348877,-6.8174893307630136,25.0,PBMC_16k,nGMM_fixk 470 | 8,0.8914885520935059,-6.800764054821048,25.0,PBMC_16k,nGMM_fixk 471 | 9,0.7515683174133301,-6.703480739455203,28.0,PBMC_16k,nGMM_fixk 472 | 0,108.17206335067749,-6.048718855431777,50.0,PBMC_16k,dpgmm 473 | 1,109.62023544311523,-6.047686785818196,50.0,PBMC_16k,dpgmm 474 | 2,85.39810633659363,-6.048645567224814,50.0,PBMC_16k,dpgmm 475 | 3,80.45993161201477,-6.049458774154263,50.0,PBMC_16k,dpgmm 476 | 4,63.30175542831421,-6.049415431938901,50.0,PBMC_16k,dpgmm 477 | 5,86.23262572288513,-6.037890865010017,50.0,PBMC_16k,dpgmm 478 | 6,98.0288712978363,-6.051989896080837,50.0,PBMC_16k,dpgmm 479 | 7,94.77173709869385,-6.053023460720795,50.0,PBMC_16k,dpgmm 480 | 8,59.19510841369629,-6.0452716926611085,50.0,PBMC_16k,dpgmm 481 | 9,78.02951526641846,-6.039499627171305,50.0,PBMC_16k,dpgmm 482 | -------------------------------------------------------------------------------- /performance/record_alldb.csv: -------------------------------------------------------------------------------- 1 | ,time,ll,bic,n_component,DB,method 2 | 0,1.2234070301055908,-11.122508339097205,18120.598500175132,15.0,PBMC_1k,CITE-sort 3 | 1,1.3422009944915771,-10.599447969967613,17979.10670447445,19.0,PBMC_1k,CITE-sort 4 | 2,1.3595807552337646,-10.580384296272932,17951.921905785835,19.0,PBMC_1k,CITE-sort 5 | 3,1.2019219398498535,-10.797541537800555,18110.490059534695,18.0,PBMC_1k,CITE-sort 6 | 4,1.0659277439117432,-11.520668331656603,18386.178504225776,13.0,PBMC_1k,CITE-sort 7 | 5,1.2330269813537598,-10.884964282200043,18084.056820378835,17.0,PBMC_1k,CITE-sort 8 | 6,1.0871169567108154,-11.210810373520744,18246.517201263097,15.0,PBMC_1k,CITE-sort 9 | 7,1.2957079410552979,-10.559284814490477,17921.83404476405,19.0,PBMC_1k,CITE-sort 10 | 8,1.344512939453125,-10.747012821200016,18038.43610966233,18.0,PBMC_1k,CITE-sort 11 | 9,1.3971610069274902,-10.908992508838459,18118.321071565217,17.0,PBMC_1k,CITE-sort 12 | 0,0.06934595108032227,-9.391548425467587,21070.52750958933,15.0,PBMC_1k,GMM 13 | 1,0.17578816413879395,-8.432751674316203,21753.759768630494,19.0,PBMC_1k,GMM 14 | 2,0.11098718643188477,-8.613242757652294,22009.233753526605,19.0,PBMC_1k,GMM 15 | 3,0.15456628799438477,-8.906608827994178,21917.34935044522,18.0,PBMC_1k,GMM 16 | 4,0.0856637954711914,-9.958450318552085,20854.826033934376,13.0,PBMC_1k,GMM 17 | 5,0.0980536937713623,-8.95775671436817,21477.677566627026,17.0,PBMC_1k,GMM 18 | 6,0.11914801597595215,-9.423531116544487,21117.317180993778,15.0,PBMC_1k,GMM 19 | 7,0.09374594688415527,-8.560323153610016,21935.238604155347,19.0,PBMC_1k,GMM 20 | 8,0.11969304084777832,-8.761752986297514,21710.63558117411,18.0,PBMC_1k,GMM 21 | 9,0.08350276947021484,-9.11605125005756,21703.562246918555,17.0,PBMC_1k,GMM 22 | 0,0.016718149185180664,-12.228966785816784,19697.072733446294,15.0,PBMC_1k,nGMM 23 | 1,0.025583267211914062,-11.594671772544062,19397.97519151494,19.0,PBMC_1k,nGMM 24 | 2,0.02981114387512207,-11.814677063212406,19711.51782998137,19.0,PBMC_1k,nGMM 25 | 3,0.01725292205810547,-11.925040189720319,19717.787536750096,18.0,PBMC_1k,nGMM 26 | 4,0.013669252395629883,-12.560536341854464,19869.01292539533,13.0,PBMC_1k,nGMM 27 | 5,0.020405054092407227,-11.955083622692364,19608.56844846602,17.0,PBMC_1k,nGMM 28 | 6,0.01281285285949707,-12.197622142865846,19653.335425410318,15.0,PBMC_1k,nGMM 29 | 7,0.018594741821289062,-11.828031980611136,19730.34262987801,19.0,PBMC_1k,nGMM 30 | 8,0.018755197525024414,-11.63414537780662,19303.315537517778,18.0,PBMC_1k,nGMM 31 | 9,0.015864133834838867,-11.989330130360914,19658.157454504828,17.0,PBMC_1k,nGMM 32 | 0,5.529792070388794,-16.91493898317407,191860.99980536764,43.0,PBMC_5k,CITE-sort 33 | 1,5.566227912902832,-17.187725669048742,193387.41905180132,39.0,PBMC_5k,CITE-sort 34 | 2,5.876396894454956,-16.992907777426087,193347.30644981583,45.0,PBMC_5k,CITE-sort 35 | 3,6.318313837051392,-17.05918118363452,195044.93275091852,48.0,PBMC_5k,CITE-sort 36 | 4,7.204071044921875,-16.39752962601377,189437.76554038146,52.0,PBMC_5k,CITE-sort 37 | 5,6.371899843215942,-17.298500969848494,195886.09929352903,43.0,PBMC_5k,CITE-sort 38 | 6,6.815957069396973,-17.43462256473374,197648.61036903856,44.0,PBMC_5k,CITE-sort 39 | 7,7.576941967010498,-16.649744678369668,190080.20394710155,46.0,PBMC_5k,CITE-sort 40 | 8,6.156139135360718,-17.383231381499122,195773.1070570394,40.0,PBMC_5k,CITE-sort 41 | 9,6.679594993591309,-16.79224664803924,192243.72173438163,48.0,PBMC_5k,CITE-sort 42 | 0,1.6649901866912842,-14.844552393295237,233104.98341420465,43.0,PBMC_5k,GMM 43 | 1,1.0674920082092285,-15.037024547766473,227931.69830776658,39.0,PBMC_5k,GMM 44 | 2,2.366248846054077,-14.82285979406439,236478.72132183344,45.0,PBMC_5k,GMM 45 | 3,2.8846938610076904,-14.559256840564421,239108.96140165094,48.0,PBMC_5k,GMM 46 | 4,3.003420829772949,-14.306739501113247,243649.47077560163,52.0,PBMC_5k,GMM 47 | 5,1.7910127639770508,-14.686577174542666,231450.33086139915,43.0,PBMC_5k,GMM 48 | 6,2.4024720191955566,-14.643450337378097,232796.6201212383,44.0,PBMC_5k,GMM 49 | 7,1.8464860916137695,-14.667242647587006,236641.7195371195,46.0,PBMC_5k,GMM 50 | 8,1.3643510341644287,-14.927212459043272,228577.29615223384,40.0,PBMC_5k,GMM 51 | 9,2.4785399436950684,-14.413919172519448,237584.03507317504,48.0,PBMC_5k,GMM 52 | 0,0.16876792907714844,-17.38843703779885,196822.3291029122,43.0,PBMC_5k,nGMM 53 | 1,0.29349207878112793,-17.51402190353099,196804.70850178157,39.0,PBMC_5k,nGMM 54 | 2,0.24037885665893555,-17.29008998736503,196456.40502038345,45.0,PBMC_5k,nGMM 55 | 3,0.2437589168548584,-17.10378499463751,195501.2237193164,48.0,PBMC_5k,nGMM 56 | 4,0.3140268325805664,-17.026432628183088,196034.51057826742,52.0,PBMC_5k,nGMM 57 | 5,0.23077607154846191,-17.311941894049042,196019.5000339507,43.0,PBMC_5k,nGMM 58 | 6,0.250927209854126,-17.258264687618446,195792.26132085206,44.0,PBMC_5k,nGMM 59 | 7,0.22651004791259766,-17.245524790302447,196324.12175886668,46.0,PBMC_5k,nGMM 60 | 8,0.23640108108520508,-17.408061851528625,196026.13892767506,40.0,PBMC_5k,nGMM 61 | 9,0.20556211471557617,-17.13414245065703,195825.1496236616,48.0,PBMC_5k,nGMM 62 | 0,5.518562078475952,-9.438671093443384,162813.61062683674,64.0,PBMC_8k,CITE-sort 63 | 1,5.238887786865234,-9.527185491367458,164205.94210618243,64.0,PBMC_8k,CITE-sort 64 | 2,5.150882005691528,-9.429811058963818,162898.4967298605,65.0,PBMC_8k,CITE-sort 65 | 3,4.848948001861572,-9.477606068105388,163426.05777827007,64.0,PBMC_8k,CITE-sort 66 | 4,5.218626976013184,-9.540687806052675,163521.31573463164,60.0,PBMC_8k,CITE-sort 67 | 5,4.935317039489746,-9.599496430758604,163997.86651048128,58.0,PBMC_8k,CITE-sort 68 | 6,4.948575973510742,-9.795641854357607,167083.2340236936,58.0,PBMC_8k,CITE-sort 69 | 7,5.144322633743286,-9.230961086041829,160219.0955465722,67.0,PBMC_8k,CITE-sort 70 | 8,4.917170763015747,-9.617264540039264,164725.86776024068,60.0,PBMC_8k,CITE-sort 71 | 9,4.8958821296691895,-9.601601862078336,165152.25717207725,63.0,PBMC_8k,CITE-sort 72 | 0,3.542236804962158,-8.505365806753996,186012.32876150298,64.0,PBMC_8k,GMM 73 | 1,3.0150198936462402,-8.510668219698958,186086.48126502425,64.0,PBMC_8k,GMM 74 | 2,2.065992832183838,-8.47587075076912,186352.87436215894,65.0,PBMC_8k,GMM 75 | 3,2.717287063598633,-8.49005107506052,185769.4078329935,64.0,PBMC_8k,GMM 76 | 4,2.4877541065216064,-8.563654062586876,183661.35173620144,60.0,PBMC_8k,GMM 77 | 5,2.2299141883850098,-8.586341323927163,182386.9540043941,58.0,PBMC_8k,GMM 78 | 6,2.152177095413208,-8.614111537042746,182822.8978000642,58.0,PBMC_8k,GMM 79 | 7,3.0803709030151367,-8.443504929276104,187493.258575882,67.0,PBMC_8k,GMM 80 | 8,1.8948559761047363,-8.552899113517682,183494.69225233048,60.0,PBMC_8k,GMM 81 | 9,2.120100975036621,-8.510342519139735,185271.44473235984,63.0,PBMC_8k,GMM 82 | 0,0.47810983657836914,-9.721009822112643,167242.9225783066,64.0,PBMC_8k,nGMM 83 | 1,0.4756009578704834,-9.749047584219634,167687.2338148695,64.0,PBMC_8k,nGMM 84 | 2,0.47623300552368164,-9.737309160620608,167724.37832321285,65.0,PBMC_8k,nGMM 85 | 3,0.511469841003418,-9.726102128056768,167325.1309656989,64.0,PBMC_8k,nGMM 86 | 4,0.5160980224609375,-9.790378216486198,167441.11678066733,60.0,PBMC_8k,nGMM 87 | 5,0.41063928604125977,-9.85447361673597,167996.43592706084,58.0,PBMC_8k,nGMM 88 | 6,0.49051523208618164,-9.783789107146575,166886.08268799845,58.0,PBMC_8k,nGMM 89 | 7,0.5964858531951904,-9.726101110527079,167995.91832828856,67.0,PBMC_8k,nGMM 90 | 8,0.4071769714355469,-9.852173429322423,168402.72006131237,60.0,PBMC_8k,nGMM 91 | 9,0.33146119117736816,-9.779301694005651,167933.24796642642,63.0,PBMC_8k,nGMM 92 | 0,2.1575942039489746,-8.138739673093426,140179.62549337992,19.0,MALT_8k,CITE-sort 93 | 1,2.227728843688965,-8.922936855925352,154059.80240214276,23.0,MALT_8k,CITE-sort 94 | 2,2.343740940093994,-8.13794607903107,140166.27406687484,19.0,MALT_8k,CITE-sort 95 | 3,2.3254079818725586,-8.246200163057445,141815.82990033497,18.0,MALT_8k,CITE-sort 96 | 4,3.141963005065918,-8.915490803426332,155823.34965309518,34.0,MALT_8k,CITE-sort 97 | 5,2.9481611251831055,-8.030433983019996,140246.3102017805,30.0,MALT_8k,CITE-sort 98 | 6,2.876828908920288,-7.782560647041819,136247.80007348326,31.0,MALT_8k,CITE-sort 99 | 7,2.4367339611053467,-7.97465601269262,137934.21461939564,22.0,MALT_8k,CITE-sort 100 | 8,2.2418417930603027,-8.138935994492234,140182.92840459346,19.0,MALT_8k,CITE-sort 101 | 9,2.3312768936157227,-8.922909613328,154059.3440726849,23.0,MALT_8k,CITE-sort 102 | 0,0.46611690521240234,-7.6671368017252135,138414.1099555053,19.0,MALT_8k,GMM 103 | 1,0.4600691795349121,-7.592912384700842,139152.7301840142,23.0,MALT_8k,GMM 104 | 2,0.5721631050109863,-7.66235849156766,138333.79762451886,19.0,MALT_8k,GMM 105 | 3,0.38485026359558105,-7.6836761189961225,138194.63889813964,18.0,MALT_8k,GMM 106 | 4,0.8844120502471924,-7.415133148317597,141632.45732113929,34.0,MALT_8k,GMM 107 | 5,0.6158289909362793,-7.483651829160183,140793.14569936835,30.0,MALT_8k,GMM 108 | 6,0.626539945602417,-7.462304113577804,140930.3085264492,31.0,MALT_8k,GMM 109 | 7,0.5197019577026367,-7.625773664539795,139205.6743213384,22.0,MALT_8k,GMM 110 | 8,0.38326501846313477,-7.659503236269939,138283.92799364313,19.0,MALT_8k,GMM 111 | 9,0.5138049125671387,-7.591387664140254,139127.59931757045,23.0,MALT_8k,GMM 112 | 0,0.1121678352355957,-8.592168064831728,147799.10108325208,19.0,MALT_8k,nGMM 113 | 1,0.11516475677490234,-8.388677086350944,145058.16402031583,23.0,MALT_8k,nGMM 114 | 2,0.10192298889160156,-8.549684730617042,147081.83850916644,19.0,MALT_8k,nGMM 115 | 3,0.14873814582824707,-8.667081021205721,148885.22874656002,18.0,MALT_8k,nGMM 116 | 4,0.20810818672180176,-8.135204373051037,142682.1087272623,34.0,MALT_8k,nGMM 117 | 5,0.18267393112182617,-8.204251432404153,143159.43351469105,30.0,MALT_8k,nGMM 118 | 6,0.24073505401611328,-8.192342137051503,143129.25976990396,31.0,MALT_8k,nGMM 119 | 7,0.11271023750305176,-8.419873076011498,145416.0084224086,22.0,MALT_8k,nGMM 120 | 8,0.09589195251464844,-8.53846610987413,146897.02971192208,19.0,MALT_8k,nGMM 121 | 9,0.14923810958862305,-8.407899930568272,145384.24865985673,23.0,MALT_8k,nGMM 122 | 0,1.5813980102539062,-6.364109836495208,112596.86943478562,19.0,CBMC_8k,CITE-sort 123 | 1,1.5774438381195068,-6.3673479587837996,112498.62986562813,18.0,CBMC_8k,CITE-sort 124 | 2,1.539851188659668,-6.367761693867996,112505.76017606918,18.0,CBMC_8k,CITE-sort 125 | 3,1.6679980754852295,-6.34040005469012,112188.25505515673,19.0,CBMC_8k,CITE-sort 126 | 4,1.5511629581451416,-6.360399431970839,112532.92432321265,19.0,CBMC_8k,CITE-sort 127 | 5,1.6738181114196777,-6.3647364950804,112607.66926884283,19.0,CBMC_8k,CITE-sort 128 | 6,1.5708410739898682,-6.355419664913761,112601.14838643002,20.0,CBMC_8k,CITE-sort 129 | 7,1.5760369300842285,-6.357796351725545,112642.10820694432,20.0,CBMC_8k,CITE-sort 130 | 8,1.464735984802246,-6.367761693867996,112505.76017606918,18.0,CBMC_8k,CITE-sort 131 | 9,1.5899713039398193,-6.32961681581974,112310.50745382276,21.0,CBMC_8k,CITE-sort 132 | 0,0.6619877815246582,-4.510979393152983,85469.61130894872,19.0,CBMC_8k,GMM 133 | 1,0.33260416984558105,-4.60923707895074,86752.91326746752,18.0,CBMC_8k,GMM 134 | 2,0.5183930397033691,-4.57249215155964,86125.12276259843,18.0,CBMC_8k,GMM 135 | 3,0.41444897651672363,-4.598139084551907,86975.21086818096,19.0,CBMC_8k,GMM 136 | 4,0.47031712532043457,-4.575349654426403,86578.95923897572,19.0,CBMC_8k,GMM 137 | 5,0.4637570381164551,-4.565348853628528,86410.23337112383,19.0,CBMC_8k,GMM 138 | 6,0.3703291416168213,-4.524238921480881,86103.61132192895,20.0,CBMC_8k,GMM 139 | 7,0.44608592987060547,-4.5440882313597735,86455.32499173887,20.0,CBMC_8k,GMM 140 | 8,0.4300708770751953,-4.663088073844756,87680.80151608032,18.0,CBMC_8k,GMM 141 | 9,0.3334038257598877,-4.532936659473049,86659.01521307034,21.0,CBMC_8k,GMM 142 | 0,0.12944817543029785,-6.057223833590501,107298.58690010864,19.0,CBMC_8k,nGMM 143 | 1,0.10278129577636719,-6.188431521796373,109400.40181947568,18.0,CBMC_8k,nGMM 144 | 2,0.10554099082946777,-6.120164468368747,108229.25988782156,18.0,CBMC_8k,nGMM 145 | 3,0.09456467628479004,-6.040737858995399,107012.36836988111,19.0,CBMC_8k,nGMM 146 | 4,0.08441495895385742,-6.1769925054567985,109358.50227287639,19.0,CBMC_8k,nGMM 147 | 5,0.10467410087585449,-6.0628602173089226,107391.16892016676,19.0,CBMC_8k,nGMM 148 | 6,0.10837197303771973,-6.03670324695701,107096.7686853721,20.0,CBMC_8k,nGMM 149 | 7,0.09143185615539551,-5.991699539515126,106319.03245007277,20.0,CBMC_8k,nGMM 150 | 8,0.09178495407104492,-6.085520395418077,107630.15162431629,18.0,CBMC_8k,nGMM 151 | 9,0.13098692893981934,-5.949626117580737,105740.3608819197,21.0,CBMC_8k,nGMM 152 | 0,2.5118660926818848,-4.622624318068818,151357.64048860376,30.0,PBMC_16k,CITE-sort 153 | 1,2.182826042175293,-5.422101895636898,175532.73375749,23.0,PBMC_16k,CITE-sort 154 | 2,2.1419589519500732,-5.353554822535321,172868.11781864305,20.0,PBMC_16k,CITE-sort 155 | 3,2.5556678771972656,-5.300773603717451,171524.9022070207,22.0,PBMC_16k,CITE-sort 156 | 4,2.3147940635681152,-5.3531155629512295,173182.99079162834,22.0,PBMC_16k,CITE-sort 157 | 5,2.499323844909668,-4.65552766288677,151742.3769695666,26.0,PBMC_16k,CITE-sort 158 | 6,2.1241798400878906,-4.880281503964596,158040.1595520047,21.0,PBMC_16k,CITE-sort 159 | 7,2.152833938598633,-5.4451787511451855,176263.76238628154,23.0,PBMC_16k,CITE-sort 160 | 8,2.1633620262145996,-5.502401365071444,177747.67251214743,21.0,PBMC_16k,CITE-sort 161 | 9,2.31435489654541,-4.8439605286700225,157382.76545375836,24.0,PBMC_16k,CITE-sort 162 | 0,1.4144580364227295,-5.124886394260993,175365.1889132596,30.0,PBMC_16k,GMM 163 | 1,0.8795738220214844,-5.247281883187149,176205.95219128366,23.0,PBMC_16k,GMM 164 | 2,0.7388548851013184,-5.348087190138184,178092.651414541,20.0,PBMC_16k,GMM 165 | 3,0.7176558971405029,-5.302812571086369,177520.39248609767,22.0,PBMC_16k,GMM 166 | 4,0.9349429607391357,-5.25161498397784,175914.07391252043,22.0,PBMC_16k,GMM 167 | 5,0.803678035736084,-5.251041933734646,177622.75427598614,26.0,PBMC_16k,GMM 168 | 6,0.7691028118133545,-5.281019420338424,176394.5392779519,21.0,PBMC_16k,GMM 169 | 7,0.7854559421539307,-5.270179898208045,176922.41885804676,23.0,PBMC_16k,GMM 170 | 8,0.7193880081176758,-5.3287302931610485,177901.53403452266,21.0,PBMC_16k,GMM 171 | 9,1.0948500633239746,-5.24096548882081,176431.03614872135,24.0,PBMC_16k,GMM 172 | 0,0.4008901119232178,-5.5584422812945515,180978.25149073126,30.0,PBMC_16k,nGMM 173 | 1,0.2562389373779297,-5.746965671212717,185816.3434176792,23.0,PBMC_16k,nGMM 174 | 2,0.16918683052062988,-5.855950550695162,188765.34370402092,20.0,PBMC_16k,nGMM 175 | 3,0.28665900230407715,-5.782957060395885,186788.73678983096,22.0,PBMC_16k,nGMM 176 | 4,0.3226139545440674,-5.806195436457849,187508.72556868164,22.0,PBMC_16k,nGMM 177 | 5,0.29004693031311035,-5.634555563700794,182733.68691707938,26.0,PBMC_16k,nGMM 178 | 6,0.16335010528564453,-5.8535466736657,188848.0856092121,21.0,PBMC_16k,nGMM 179 | 7,0.21469879150390625,-5.743257931506316,185672.74806116667,23.0,PBMC_16k,nGMM 180 | 8,0.21370887756347656,-5.81137711137325,187511.01631720006,21.0,PBMC_16k,nGMM 181 | 9,0.2974238395690918,-5.696305582413209,184361.43877733074,24.0,PBMC_16k,nGMM 182 | -------------------------------------------------------------------------------- /performance/record_full_alldb.csv: -------------------------------------------------------------------------------- 1 | ,time,ll,bic,n_component,DB,method 2 | 0,1.4083750247955322,-5.22614012126497,17694.297347349733,20.0,PBMC_1k,CITE-sort 3 | 1,1.1967318058013916,-6.834202666357369,17425.29678268992,15.0,PBMC_1k,CITE-sort 4 | 2,1.3504319190979004,-6.265277074706867,17638.847990580936,17.0,PBMC_1k,CITE-sort 5 | 3,1.2247920036315918,-6.375117306926319,17795.48016172587,17.0,PBMC_1k,CITE-sort 6 | 4,1.406689167022705,-5.927471739871177,18181.97668468987,19.0,PBMC_1k,CITE-sort 7 | 5,1.160640001296997,-6.933182248691533,18078.861217890753,16.0,PBMC_1k,CITE-sort 8 | 6,1.2971611022949219,-6.491712095554915,17449.324779517934,16.0,PBMC_1k,CITE-sort 9 | 7,1.2381787300109863,-6.1806809547366965,17518.21392350347,17.0,PBMC_1k,CITE-sort 10 | 8,1.3634178638458252,-5.54105350167419,18655.783378605596,21.0,PBMC_1k,CITE-sort 11 | 9,1.24202299118042,-6.577575012771381,18084.18485026093,17.0,PBMC_1k,CITE-sort 12 | 0,0.21073102951049805,-8.529966559712106,22401.603397840703,20.0,PBMC_1k,GMM 13 | 1,0.061823129653930664,-9.539507803253352,21282.40387053851,15.0,PBMC_1k,GMM 14 | 2,0.14079070091247559,-9.644085019856453,22456.349203700607,17.0,PBMC_1k,GMM 15 | 3,0.09861302375793457,-9.510790115912984,22266.383567939105,17.0,PBMC_1k,GMM 16 | 4,0.13012290000915527,-8.746533173662325,22201.70051304868,19.0,PBMC_1k,GMM 17 | 5,0.16895508766174316,-9.177449082611146,21278.880041425553,16.0,PBMC_1k,GMM 18 | 6,0.11718511581420898,-9.463803564231196,21687.477995720026,16.0,PBMC_1k,GMM 19 | 7,0.12487006187438965,-8.810452482167188,21267.711649445482,17.0,PBMC_1k,GMM 20 | 8,0.26333117485046387,-8.100522728490219,22304.106579471587,21.0,PBMC_1k,GMM 21 | 9,0.10646820068359375,-9.185367037079187,21802.197071284805,17.0,PBMC_1k,GMM 22 | 0,0.030717134475708008,-11.574240290860638,19519.393501782622,20.0,PBMC_1k,nGMM 23 | 1,0.01583719253540039,-12.582327588717853,20201.545868265435,15.0,PBMC_1k,nGMM 24 | 2,0.01724696159362793,-12.01715941481364,19697.837400564586,17.0,PBMC_1k,nGMM 25 | 3,0.016399860382080078,-11.892201843087133,19519.84070765181,17.0,PBMC_1k,nGMM 26 | 4,0.018496036529541016,-11.88263154626566,19808.43357921994,19.0,PBMC_1k,nGMM 27 | 5,0.026629209518432617,-12.150261839948268,19736.454511739288,16.0,PBMC_1k,nGMM 28 | 6,0.024701356887817383,-12.134395995184603,19714.265925310712,16.0,PBMC_1k,nGMM 29 | 7,0.024443864822387695,-12.038335544520606,19727.715846408726,17.0,PBMC_1k,nGMM 30 | 8,0.024069786071777344,-11.576145594733852,19673.081128548773,21.0,PBMC_1k,nGMM 31 | 9,0.016176223754882812,-12.223796587435212,19992.872123428424,17.0,PBMC_1k,nGMM 32 | 0,6.781627893447876,-9.998504538201088,193053.82826045481,49.0,PBMC_5k,CITE-sort 33 | 1,6.160115003585815,-10.774060071144454,193997.56214166566,45.0,PBMC_5k,CITE-sort 34 | 2,8.255143880844116,-9.385251516861697,204605.71575826133,59.0,PBMC_5k,CITE-sort 35 | 3,6.1250319480896,-10.476676137487793,194474.28808262106,47.0,PBMC_5k,CITE-sort 36 | 4,6.1504247188568115,-10.667226659512071,194675.1887903696,46.0,PBMC_5k,CITE-sort 37 | 5,6.686645030975342,-9.608489099356111,197954.68859708664,54.0,PBMC_5k,CITE-sort 38 | 6,5.918744087219238,-10.315595836758309,194582.64787714006,48.0,PBMC_5k,CITE-sort 39 | 7,6.57272481918335,-9.297901358731517,200091.59025809477,57.0,PBMC_5k,CITE-sort 40 | 8,5.612346887588501,-10.478001633603006,192689.46136847988,46.0,PBMC_5k,CITE-sort 41 | 9,5.69504714012146,-10.920122385864424,195530.340072337,45.0,PBMC_5k,CITE-sort 42 | 0,2.291753053665161,-14.358126790040503,238792.89874423892,49.0,PBMC_5k,GMM 43 | 1,1.8819971084594727,-14.70760453280036,235266.0466927468,45.0,PBMC_5k,GMM 44 | 2,2.5993258953094482,-13.897761531325635,251950.6764950317,59.0,PBMC_5k,GMM 45 | 3,1.9641737937927246,-14.583750765038003,237559.38821198785,47.0,PBMC_5k,GMM 46 | 4,2.1672379970550537,-14.491249214074706,234796.6223725828,46.0,PBMC_5k,GMM 47 | 5,2.1744298934936523,-14.408898960824732,248321.56104229172,54.0,PBMC_5k,GMM 48 | 6,2.274665117263794,-14.467449217102281,238141.43805275435,48.0,PBMC_5k,GMM 49 | 7,2.0596659183502197,-13.941547414973176,248814.61145642295,57.0,PBMC_5k,GMM 50 | 8,1.2713558673858643,-14.456796081254263,234435.5119427906,46.0,PBMC_5k,GMM 51 | 9,1.8523399829864502,-14.820257733959885,236453.24580368795,45.0,PBMC_5k,GMM 52 | 0,0.20482516288757324,-17.162898002294302,196458.95369860908,49.0,PBMC_5k,nGMM 53 | 1,0.24257564544677734,-17.27725679417593,196325.0151104464,45.0,PBMC_5k,nGMM 54 | 2,0.22489190101623535,-16.894444387997563,196981.7101214813,59.0,PBMC_5k,nGMM 55 | 3,0.1976771354675293,-17.187442559180443,196046.25667727954,47.0,PBMC_5k,nGMM 56 | 4,0.1876363754272461,-17.247541163012016,196345.5477243657,46.0,PBMC_5k,nGMM 57 | 5,0.21679210662841797,-16.97484842732038,196156.06355618988,54.0,PBMC_5k,nGMM 58 | 6,0.225081205368042,-17.15077295409227,195997.61407698583,48.0,PBMC_5k,nGMM 59 | 7,0.24540996551513672,-16.940910738001595,196801.1918648725,57.0,PBMC_5k,nGMM 60 | 8,0.21463489532470703,-17.229297792510877,196152.52786378318,46.0,PBMC_5k,nGMM 61 | 9,0.22436809539794922,-17.271073237309157,196258.65312791697,45.0,PBMC_5k,nGMM 62 | 0,4.7353222370147705,-6.442717330259638,156842.43374943547,68.0,PBMC_8k,CITE-sort 63 | 1,5.241987943649292,-6.441431807197678,157638.49865288063,69.0,PBMC_8k,CITE-sort 64 | 2,5.433183908462524,-6.343022901335477,156090.52656366822,69.0,PBMC_8k,CITE-sort 65 | 3,5.4518561363220215,-6.594241419217372,155960.76294390138,64.0,PBMC_8k,CITE-sort 66 | 4,4.971299886703491,-6.446390753865133,156900.2167027499,68.0,PBMC_8k,CITE-sort 67 | 5,4.68738579750061,-6.521551047051461,156449.91575215122,66.0,PBMC_8k,CITE-sort 68 | 6,4.696994066238403,-6.5400360083330655,157556.97037432066,67.0,PBMC_8k,CITE-sort 69 | 7,4.924194097518921,-6.41796427249102,156453.0681507351,68.0,PBMC_8k,CITE-sort 70 | 8,5.501037836074829,-5.71441369422992,152732.79318557627,77.0,PBMC_8k,CITE-sort 71 | 9,4.573446750640869,-6.546313666654377,156839.4317585051,66.0,PBMC_8k,CITE-sort 72 | 0,2.452871799468994,-8.438164332711427,188213.19368342296,68.0,PBMC_8k,GMM 73 | 1,2.481646776199341,-8.408244670975371,188562.3432336556,69.0,PBMC_8k,GMM 74 | 2,2.4254770278930664,-8.425269197382306,188829.66860220348,69.0,PBMC_8k,GMM 75 | 3,2.2687690258026123,-8.516921712531076,186190.13744548397,64.0,PBMC_8k,GMM 76 | 4,2.4142231941223145,-8.423963572877305,187992.5286425008,68.0,PBMC_8k,GMM 77 | 5,2.4891116619110107,-8.472690391977398,187127.93351873392,66.0,PBMC_8k,GMM 78 | 6,2.817267894744873,-8.452734470863497,187632.69180096796,67.0,PBMC_8k,GMM 79 | 7,2.1789538860321045,-8.454787895757315,188476.12509409327,68.0,PBMC_8k,GMM 80 | 8,2.621440887451172,-8.325179760310094,193786.62762067723,77.0,PBMC_8k,GMM 81 | 9,2.8781800270080566,-8.446774501013161,186722.26227093083,66.0,PBMC_8k,GMM 82 | 0,0.43262195587158203,-9.65678742518136,167133.7682424381,68.0,PBMC_8k,nGMM 83 | 1,0.4279301166534424,-9.664082093128723,167466.82285745992,69.0,PBMC_8k,nGMM 84 | 2,0.5662620067596436,-9.664073585728513,167475.68048906935,69.0,PBMC_8k,nGMM 85 | 3,0.4881327152252197,-9.728779479892161,167366.25292936395,64.0,PBMC_8k,nGMM 86 | 4,0.37183523178100586,-9.695972792450466,167745.48842498852,68.0,PBMC_8k,nGMM 87 | 5,0.3246877193450928,-9.717640334690314,167640.97924803954,66.0,PBMC_8k,nGMM 88 | 6,0.3578360080718994,-9.692053459770374,167460.0482029322,67.0,PBMC_8k,nGMM 89 | 7,0.5417490005493164,-9.656363639822144,167127.87730005436,68.0,PBMC_8k,nGMM 90 | 8,0.468184232711792,-9.57447404466968,167850.05727313482,77.0,PBMC_8k,nGMM 91 | 9,0.3371288776397705,-9.680468388884275,167059.67573926813,66.0,PBMC_8k,nGMM 92 | 0,2.4241321086883545,-6.461879788708171,120137.95753977002,23.0,MALT_8k,CITE-sort 93 | 1,2.472317934036255,-6.469601301991959,119770.80647973124,22.0,MALT_8k,CITE-sort 94 | 2,2.6662960052490234,-6.392868239746441,120965.13843813885,27.0,MALT_8k,CITE-sort 95 | 3,2.400151014328003,-6.875146878310774,129078.9942533452,27.0,MALT_8k,CITE-sort 96 | 4,2.471524238586426,-6.461960658211897,120139.3180883007,23.0,MALT_8k,CITE-sort 97 | 5,2.413289785385132,-6.928097227328886,128478.65752665016,24.0,MALT_8k,CITE-sort 98 | 6,2.572601079940796,-6.396962624654681,121034.02236983507,27.0,MALT_8k,CITE-sort 99 | 7,2.4453630447387695,-6.857828848154396,129781.75131304478,29.0,MALT_8k,CITE-sort 100 | 8,2.4264273643493652,-6.432067865198751,120630.51733769802,25.0,MALT_8k,CITE-sort 101 | 9,2.580191135406494,-6.180245928877178,116890.92288054913,26.0,MALT_8k,CITE-sort 102 | 0,0.7916312217712402,-7.5733742822893015,138830.43687258917,23.0,MALT_8k,GMM 103 | 1,0.4620349407196045,-7.620961426871539,139128.386457385,22.0,MALT_8k,GMM 104 | 2,0.5637567043304443,-7.513061564350352,139796.79255236703,27.0,MALT_8k,GMM 105 | 3,0.7792801856994629,-7.495331411753802,139500.56491482095,27.0,MALT_8k,GMM 106 | 4,0.6430320739746094,-7.606809230020523,139385.69294576487,23.0,MALT_8k,GMM 107 | 5,0.5811178684234619,-7.558761418649488,139072.82709241417,24.0,MALT_8k,GMM 108 | 6,0.5059757232666016,-7.509713681736872,139740.8039349963,27.0,MALT_8k,GMM 109 | 7,0.8608949184417725,-7.465963527022147,139999.32341265664,29.0,MALT_8k,GMM 110 | 8,0.47983527183532715,-7.536732209682566,139200.025794297,25.0,MALT_8k,GMM 111 | 9,0.677994966506958,-7.538792286731549,139728.35896189898,26.0,MALT_8k,GMM 112 | 0,0.14325523376464844,-8.388214875332125,145054.02586564902,23.0,MALT_8k,nGMM 113 | 1,0.17519116401672363,-8.416418878040503,145351.99308976828,22.0,MALT_8k,nGMM 114 | 2,0.1341571807861328,-8.332098384714984,144794.97302204653,27.0,MALT_8k,nGMM 115 | 3,0.17314600944519043,-8.280708076968548,143930.46104843976,27.0,MALT_8k,nGMM 116 | 4,0.15224695205688477,-8.404336760644817,145326.1152133652,23.0,MALT_8k,nGMM 117 | 5,0.12498688697814941,-8.361138604558358,144768.0299749365,24.0,MALT_8k,nGMM 118 | 6,0.1561110019683838,-8.283330320507796,143970.9216884222,27.0,MALT_8k,nGMM 119 | 7,0.1398000717163086,-8.25514860142787,143843.0658015172,29.0,MALT_8k,nGMM 120 | 8,0.1175081729888916,-8.374867440614503,145169.23478887198,25.0,MALT_8k,nGMM 121 | 9,0.20955324172973633,-8.3111056544382,144270.70650229516,26.0,MALT_8k,nGMM 122 | 0,1.5833871364593506,-3.919513431963239,74879.64173712343,18.0,CBMC_8k,CITE-sort 123 | 1,1.5685839653015137,-3.931301545064249,75082.79807830622,18.0,CBMC_8k,CITE-sort 124 | 2,1.7406930923461914,-3.7618776280127144,73386.24774899747,21.0,CBMC_8k,CITE-sort 125 | 3,1.5874481201171875,-3.925912843612369,74989.92919748454,18.0,CBMC_8k,CITE-sort 126 | 4,1.63087797164917,-3.903215512370876,75006.53054325444,19.0,CBMC_8k,CITE-sort 127 | 5,1.7103888988494873,-3.8702735972487146,75254.34388281069,21.0,CBMC_8k,CITE-sort 128 | 6,1.4840190410614014,-3.9371554798486907,74775.91763799552,17.0,CBMC_8k,CITE-sort 129 | 7,1.5850739479064941,-3.908120541955717,75091.06382311959,19.0,CBMC_8k,CITE-sort 130 | 8,1.441645860671997,-3.931343563763011,75083.5222285607,18.0,CBMC_8k,CITE-sort 131 | 9,1.5382959842681885,-3.932688568734865,75106.70204424563,18.0,CBMC_8k,CITE-sort 132 | 0,0.3776676654815674,-4.645293744347796,87374.8735614585,18.0,CBMC_8k,GMM 133 | 1,0.43041038513183594,-4.635588058108038,87209.30608116445,18.0,CBMC_8k,GMM 134 | 2,0.42351698875427246,-4.535376105193187,86710.29123445934,21.0,CBMC_8k,GMM 135 | 3,0.40677404403686523,-4.619650195847958,86936.70566622089,18.0,CBMC_8k,GMM 136 | 4,0.4390437602996826,-4.573374223551965,86540.58232416117,19.0,CBMC_8k,GMM 137 | 5,0.33440589904785156,-4.504476520258802,86171.12337944779,21.0,CBMC_8k,GMM 138 | 6,0.3982429504394531,-4.600247617602428,86193.17685786876,17.0,CBMC_8k,GMM 139 | 7,0.5639379024505615,-4.519731820263969,85625.9816159598,19.0,CBMC_8k,GMM 140 | 8,0.42975497245788574,-4.635610627481338,87208.48856031569,18.0,CBMC_8k,GMM 141 | 9,0.3296630382537842,-4.681246352493923,87996.98092958682,18.0,CBMC_8k,GMM 142 | 0,0.13054585456848145,-6.0634860919955225,107254.88370089428,18.0,CBMC_8k,nGMM 143 | 1,0.11309194564819336,-6.051646822327419,107046.21832424909,18.0,CBMC_8k,nGMM 144 | 2,0.11344599723815918,-5.989152132253573,106431.88521841567,21.0,CBMC_8k,nGMM 145 | 3,0.11068177223205566,-6.1375979311296085,108530.39928987606,18.0,CBMC_8k,nGMM 146 | 4,0.17274022102355957,-5.979296283737786,105953.77676080014,19.0,CBMC_8k,nGMM 147 | 5,0.13007712364196777,-5.997731473835821,106580.94095563744,21.0,CBMC_8k,nGMM 148 | 6,0.08965611457824707,-6.199086354739511,109433.15385132025,17.0,CBMC_8k,nGMM 149 | 7,0.15646934509277344,-6.118994616338503,108356.54408592555,19.0,CBMC_8k,nGMM 150 | 8,0.13138699531555176,-6.076944171932883,107485.88201896346,18.0,CBMC_8k,nGMM 151 | 9,0.10138487815856934,-6.113887728378639,108122.98460215003,18.0,CBMC_8k,nGMM 152 | 0,2.2695460319519043,-3.623972212166671,125234.37048111115,24.0,PBMC_16k,CITE-sort 153 | 1,2.2640860080718994,-3.675952817767073,126010.69135743506,22.0,PBMC_16k,CITE-sort 154 | 2,2.1994199752807617,-3.8018323381371757,130433.463177662,23.0,PBMC_16k,CITE-sort 155 | 3,2.0647919178009033,-4.407775641803095,148758.21440330538,21.0,PBMC_16k,CITE-sort 156 | 4,2.226551055908203,-3.882777516123145,133432.80489984434,24.0,PBMC_16k,CITE-sort 157 | 5,1.8370881080627441,-4.444344102963767,148175.9886201819,17.0,PBMC_16k,CITE-sort 158 | 6,2.107717275619507,-3.73121484499415,127761.2818559344,22.0,PBMC_16k,CITE-sort 159 | 7,2.198000907897949,-3.882385940989626,132550.0798348791,22.0,PBMC_16k,CITE-sort 160 | 8,2.198988199234009,-4.403432716440874,148620.63921368093,21.0,PBMC_16k,CITE-sort 161 | 9,2.224411964416504,-3.7769675123279263,128775.47447779097,21.0,PBMC_16k,CITE-sort 162 | 0,1.0075252056121826,-5.219640986255932,175755.31830882383,24.0,PBMC_16k,GMM 163 | 1,1.2031233310699463,-5.260690224689408,176190.5350520727,22.0,PBMC_16k,GMM 164 | 2,1.143347978591919,-5.23125750368259,175675.9753731755,23.0,PBMC_16k,GMM 165 | 3,0.6386978626251221,-5.2955009970938365,176863.04925257954,21.0,PBMC_16k,GMM 166 | 4,0.7805860042572021,-5.241280620565762,176437.7785188551,24.0,PBMC_16k,GMM 167 | 5,0.5949759483337402,-5.428483739588968,179324.9396759519,17.0,PBMC_16k,GMM 168 | 6,0.7616188526153564,-5.248322439227608,175806.96588796642,22.0,PBMC_16k,GMM 169 | 7,0.6302831172943115,-5.305637157055214,177612.91779621228,22.0,PBMC_16k,GMM 170 | 8,0.5043289661407471,-5.321080190805292,177666.2841797434,21.0,PBMC_16k,GMM 171 | 9,0.5827598571777344,-5.275454529834107,176219.78769735052,21.0,PBMC_16k,GMM 172 | 0,0.17671608924865723,-5.714774138841729,184952.60012144645,24.0,PBMC_16k,nGMM 173 | 1,0.22993779182434082,-5.795554772085684,187173.43329816326,22.0,PBMC_16k,nGMM 174 | 2,0.20836615562438965,-5.73201185776524,185327.85821218122,23.0,PBMC_16k,nGMM 175 | 3,0.1986408233642578,-5.818204534449665,187738.2148923623,21.0,PBMC_16k,nGMM 176 | 4,0.3298048973083496,-5.702030735231473,184542.13535930746,24.0,PBMC_16k,nGMM 177 | 5,0.17983317375183105,-5.967352924237675,191798.40535841696,17.0,PBMC_16k,nGMM 178 | 6,0.20919299125671387,-5.7969637320579945,187217.16628912961,22.0,PBMC_16k,nGMM 179 | 7,0.1835939884185791,-5.7863992760750875,186885.1308991835,22.0,PBMC_16k,nGMM 180 | 8,0.1705019474029541,-5.855641969131611,188918.93864829233,21.0,PBMC_16k,nGMM 181 | 9,0.14423871040344238,-5.8438262967148855,188547.48461834533,21.0,PBMC_16k,nGMM 182 | -------------------------------------------------------------------------------- /performance/time.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/performance/time.pdf -------------------------------------------------------------------------------- /preCITEsort.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Jun 2 19:25:32 2020 5 | 6 | @author: lianqiuyu 7 | """ 8 | 9 | import pandas as pd 10 | import argparse 11 | import os 12 | import numpy as np 13 | import seaborn as sns 14 | from matplotlib import pyplot as plt 15 | import sys 16 | 17 | 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('data_path', help = "The input path of CLR normalized data in .csv files with row as sample, col as feature.") 20 | parser.add_argument('-o', '--output', type=str, default='./CITEsort_out',help='Path to save output files.') 21 | parser.add_argument('--CLR', action='store_true', default=False, help='Input is raw counts. Transform counts into CLR format.') 22 | 23 | args = parser.parse_args() 24 | data_path = args.data_path 25 | 26 | if not os.path.exists(data_path): 27 | print('Error: input file does not exist. Please check.') 28 | sys.exit(0) 29 | 30 | if args.output: 31 | output_path = args.output 32 | else: 33 | output_path = "./CITEsort_out" 34 | 35 | if not os.path.exists(output_path): 36 | os.mkdir(output_path) 37 | 38 | print('read data.') 39 | data = pd.read_csv(data_path,header=0,index_col=0) 40 | dataplot = data 41 | 42 | if args.CLR: 43 | print('perform CLR transformation on raw counts.') 44 | data_clr = np.apply_along_axis(lambda x: np.log(x+1) - np.mean(np.log(x+1)),0,data) 45 | data_clr = pd.DataFrame(data_clr,index=data.index,columns = data.columns) 46 | data_clr.to_csv(output_path+'/data_clr.csv') 47 | dataplot = data_clr 48 | 49 | print('plot histgrams of all markers in CLR format.') 50 | plt.figure(figsize=(12,2*np.ceil(data.shape[1] / 5)), dpi=96) 51 | plt.style.use('seaborn-white') 52 | for i in range(dataplot.shape[1]): 53 | ax = plt.subplot(int(np.ceil(dataplot.shape[1] / 5)),5,i+1) 54 | sns.distplot(dataplot.iloc[:,i].values,kde_kws={'bw':0.2}) 55 | plt.yticks([0,1]) 56 | plt.title(dataplot.columns[i],fontsize=15) 57 | if i%5 == 0: 58 | plt.ylabel('Density',fontsize=12) 59 | ax.spines['right'].set_visible(False) 60 | ax.spines['top'].set_visible(False) 61 | ax.yaxis.set_ticks_position('left') 62 | 63 | plt.suptitle('DB: '+str(dataplot.shape[1])+' ADTs,'+str(dataplot.shape[0])+' droplets',fontsize=15) 64 | plt.subplots_adjust(top=0.9, bottom=0.1, left=0.1, right=0.9, hspace=0.6,wspace=0.15) 65 | #plt.subplots_adjust(top=0.85) 66 | #plt.savefig('./PBMC_16k/marker_hist.png') 67 | plt.savefig(output_path+'/data_cls_hist.png') 68 | plt.clf() 69 | #plt.show() 70 | 71 | 72 | -------------------------------------------------------------------------------- /readme_figs/ACTandBCT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/readme_figs/ACTandBCT.png -------------------------------------------------------------------------------- /readme_figs/ACTandBCT_small.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/readme_figs/ACTandBCT_small.jpeg -------------------------------------------------------------------------------- /readme_figs/ACTimbalance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/readme_figs/ACTimbalance.png -------------------------------------------------------------------------------- /readme_figs/CITE-sort.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/readme_figs/CITE-sort.png -------------------------------------------------------------------------------- /readme_figs/FittingInLowDimension.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/readme_figs/FittingInLowDimension.png -------------------------------------------------------------------------------- /readme_figs/taxonomy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QiuyuLian/CITE-sort/658d9481c0748e7d20e2f50fe3522ab7ab825c5f/readme_figs/taxonomy.png -------------------------------------------------------------------------------- /runCITEsort.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Oct 9 23:49:04 2019 5 | 6 | @author: lianqiuyu 7 | """ 8 | 9 | import pandas as pd 10 | from CITEsort.Matryoshka import Matryoshka 11 | from CITEsort.Visualize import visualize_tree 12 | from CITEsort.BTreeTraversal import BTreeTraversal 13 | from CITEsort.ReSplit import ReSplit 14 | import pickle 15 | import argparse 16 | import os 17 | 18 | #from sys import argv 19 | 20 | def main(): 21 | 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument('data_path', help = "The input path of CLR normalized data in .csv files with row as sample, col as feature.") 24 | parser.add_argument('-c','--cutoff',type = float, default=0.1, help = "The cutoff for merging components (default 0.1). It shoube a value between 0 and 1. The bigger value leads to split more aggressively, and ends in a more complicated tree.") 25 | parser.add_argument('-o', '--output', type=str, default='./CITEsort_out',help='Path to save output files.') 26 | parser.add_argument('--compact', action='store_true', default=False, help='Output a compact tree.') 27 | args = parser.parse_args() 28 | 29 | data_path = args.data_path 30 | output_path = args.output 31 | merge_cutoff = args.cutoff 32 | compact_flag = args.compact 33 | 34 | if not os.path.exists(output_path): 35 | os.mkdir(output_path) 36 | 37 | print('read data and run CITE-sort.') 38 | data = pd.read_csv(data_path,header = 0, index_col=0) 39 | tree = ReSplit(data,merge_cutoff) 40 | #tree = Matryoshka(data,merge_cutoff) 41 | print('done.\nplot tree.') 42 | visualize_tree(tree,data,output_path,'tree',compact=compact_flag) 43 | 44 | f = open(output_path+'/tree.pickle','wb') 45 | pickle.dump(tree,f) 46 | f.close() 47 | 48 | print('generate labels.') 49 | traversal = BTreeTraversal(tree) 50 | leaves_labels = traversal.get_leaf_label() 51 | leaves_labels.to_csv(output_path + '/leaf_labels.csv',index=False) 52 | 53 | 54 | if __name__ == "__main__": 55 | main() 56 | 57 | --------------------------------------------------------------------------------