├── .Rbuildignore
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── Dev_scripts
    ├── cells_families.txt
    ├── compare_facs.R
    ├── signatures_functions.R
    ├── types_dependecies.txt
    ├── xCell_ImmPort.zip
    ├── xCell_dev_functions.R
    ├── xCell_dev_pipeline.R
    └── xCell_plots.R
├── NAMESPACE
├── R
    ├── .DS_Store
    └── xCell.R
├── README.Md
├── data
    ├── .DS_Store
    └── xCell.data.rda
├── genes_used_by_xCell.txt
├── man
    ├── .DS_Store
    ├── microenvironmentScores.Rd
    ├── rawEnrichmentAnalysis.Rd
    ├── spillOver.Rd
    ├── transformScores.Rd
    ├── xCell.Rd
    ├── xCell.data.Rd
    ├── xCellAnalysis.Rd
    ├── xCellSignifcanceBetaDist.Rd
    └── xCellSignifcanceRandomMatrix.Rd
├── vignettes
    ├── sdy311.rds
    ├── sdy420.rds
    ├── xCell-Immport.Rmd
    ├── xCell-Immport.html
    ├── xCell.Rmd
    └── xCell.html
├── xCell.Rproj
└── xCell
    ├── .Rbuildignore
    ├── .gitignore
    ├── DESCRIPTION
    ├── NAMESPACE
    └── xCell.Rproj


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^\.travis\.yml$
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | inst/doc
2 | .Rproj.user
3 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
2 | 
3 | language: R
4 | sudo: false
5 | cache: packages
6 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: xCell
 2 | Type: Package
 3 | Title: Cell type enrichment analysis
 4 | Version: 1.1.0
 5 | Author: person(“Dvir”, “Aran”, email = “dvir.aran@ucsf.edu”,role = c("aut", "cre"))
 6 | Maintainer: Dvir Aran <dvir.aran@ucsf.edu>
 7 | Description: Tissues are a complex milieu consisting of numerous cell types. 
 8 |     In cancer, understanding the cellular heterogeneity in the tumor microenvironment is
 9 |     an emerging field of research. Numerous methods have been published in recent years
10 |     for the enumeration of cell subsets from tissue expression profiles. However, the 
11 |     available methods suffer from three major problems: inferring cell subset based on
12 |     gene sets learned and verified from limited sources; displaying only partial portrayal 
13 |     of the full cellular heterogeneity; and insufficient validation in mixed tissues.
14 |     The xCell package performs cell type enrichment analysis from gene expression data for 
15 |     64 immune and stroma cell types. xCell is a gene signatures-based method learned from
16 |     thousands of pure cell types from various sources. xCell applies a novel technique for
17 |     reducing associations between closley related cell types. xCell signatures were 
18 |     validated using extensive in-silico simulations and also cytometry immunophenotyping, 
19 |     and were shown to outperform previous methods. xCell allows researchers to reliably 
20 |     portray the cellular heterogeneity landscape of tissue expression profiles.
21 | License: GPL-3
22 | Encoding: UTF-8
23 | URL: http://xCell.ucsf.edu
24 | LazyData: true
25 | Depends: R (>= 3.1.0)
26 | biocViews:
27 | Imports:
28 |    GSVA,
29 |    GSEABase,
30 |    pracma,
31 |    utils,
32 |    stats,
33 |    MASS,
34 |    digest,
35 |    curl,
36 |    quadprog
37 | RoxygenNote: 6.0.1
38 | Suggests: knitr,
39 |     rmarkdown,
40 |     testthat
41 | VignetteBuilder: knitr
42 | 


--------------------------------------------------------------------------------
/Dev_scripts/cells_families.txt:
--------------------------------------------------------------------------------
1 | Cells	Family	Group	Type	Family2Preadipocytes	Non-Hematopoietic	Adipocytes	Stroma	Non-HematopoieticNeurons	Non-Hematopoietic	Astrocytes	Epithelial	Non-HematopoieticClass-switched memory B-cells	Lymphocytes	B-cells	Lymphoid	ImmuneMemory B-cells	Lymphocytes	B-cells	Lymphoid	Immunenaive B-cells	Lymphocytes	B-cells	Lymphoid	ImmunePlasma cells	Lymphocytes	B-cells	Lymphoid	Immunepro B-cells	Lymphocytes	B-cells	Lymphoid	ImmuneCD4+ memory T-cells	Lymphocytes	CD4+ memory T-cells	Lymphoid	ImmuneCD4+ naive T-cells	Lymphocytes	CD4+ T-cells	Lymphoid	ImmuneCD4+ Tcm	Lymphocytes	CD4+ T-cells	Lymphoid	ImmuneCD4+ Tem	Lymphocytes	CD4+ T-cells	Lymphoid	ImmuneTgd cells	Lymphocytes	CD4+ T-cells	Lymphoid	ImmuneTh1 cells	Lymphocytes	CD4+ T-cells	Lymphoid	ImmuneTh2 cells	Lymphocytes	CD4+ T-cells	Lymphoid	ImmuneTregs	Lymphocytes	CD4+ T-cells	Lymphoid	ImmuneCD8+ naive T-cells	Lymphocytes	CD8+ T-cells	Lymphoid	ImmuneCD8+ Tcm	Lymphocytes	CD8+ T-cells	Lymphoid	ImmuneCD8+ Tem	Lymphocytes	CD8+ T-cells	Lymphoid	ImmuneaDC	Non-lymphocytes	DC	Myeloid	ImmunecDC	Non-lymphocytes	DC	Myeloid	ImmuneiDC	Non-lymphocytes	DC	Myeloid	ImmunepDC	Non-lymphocytes	DC	Myeloid	Immunely Endothelial cells	Non-Hematopoietic	Endothelial cells	Stroma	Non-Hematopoieticmv Endothelial cells	Non-Hematopoietic	Endothelial cells	Stroma	Non-HematopoieticKeratinocytes	Non-Hematopoietic	Epithelial cells	Epithelial	Non-HematopoieticMesangial cells	Non-Hematopoietic	Fibroblasts	Stroma	Non-HematopoieticSebocytes	Non-Hematopoietic	Epithelial cells	Epithelial	Non-HematopoieticPericytes	Non-Hematopoietic	Fibroblasts	Stroma	Non-HematopoieticCLP	HSC	HSC	HSC	HSCCMP	HSC	HSC	HSC	HSCErythrocytes	HSC	HSC	HSC	HSCGMP	HSC	HSC	HSC	HSCMegakaryocytes	HSC	HSC	HSC	HSCMEP	HSC	HSC	HSC	HSCMacrophages M1	Non-lymphocytes	Macrophages	Myeloid	ImmuneMacrophages M2	Non-lymphocytes	Macrophages	Myeloid	ImmuneHSC	HSC	Parent	HSC	HSCMPP	HSC	Parent	HSC	HSCPlatelets	Non-lymphocytes	Parent	HSC	HSCB-cells	Lymphocytes	Parent	Lymphoid	ImmuneCD4+ T-cells	Lymphocytes	Parent	Lymphoid	ImmuneCD8+ T-cells	Lymphocytes	Parent	Lymphoid	ImmuneNK cells	Lymphocytes	Parent	Lymphoid	ImmuneNKT	Lymphocytes	Parent	Lymphoid	ImmuneAdipocytes	Non-Hematopoietic	Parent	Stroma	Non-HematopoieticAstrocytes	Non-Hematopoietic	Parent	Epithelial	Non-HematopoieticChondrocytes	Non-Hematopoietic	Parent	Stroma	Non-HematopoieticEndothelial cells	Non-Hematopoietic	Parent	Stroma	Non-HematopoieticEpithelial cells	Non-Hematopoietic	Parent	Epithelial	Non-HematopoieticFibroblasts	Non-Hematopoietic	Parent	Stroma	Non-HematopoieticHepatocytes	Non-Hematopoietic	Parent	Epithelial	Non-HematopoieticMelanocytes	Non-Hematopoietic	Parent	Epithelial	Non-HematopoieticMSC	Non-Hematopoietic	Parent	Stroma	Non-HematopoieticSkeletal muscle	Non-Hematopoietic	Parent	Stroma	Non-HematopoieticSmooth muscle	Non-Hematopoietic	Parent	Stroma	Non-HematopoieticBasophils	Non-lymphocytes	Parent	Myeloid	ImmuneDC	Non-lymphocytes	Parent	Myeloid	ImmuneEosinophils	Non-lymphocytes	Parent	Myeloid	ImmuneMacrophages	Non-lymphocytes	Parent	Myeloid	ImmuneMast cells	Non-lymphocytes	Parent	Myeloid	ImmuneMonocytes	Non-lymphocytes	Parent	Myeloid	ImmuneNeutrophils	Non-lymphocytes	Parent	Myeloid	ImmuneMyocytes	Non-Hematopoietic	Skeletal muscle	Stroma	Non-HematopoieticOsteoblast	Non-Hematopoietic	Skeletal muscle	Stroma	Non-Hematopoietic


--------------------------------------------------------------------------------
/Dev_scripts/compare_facs.R:
--------------------------------------------------------------------------------
 1 | library(psych)
 2 | #spill = createSpilloverMatrix()
 3 | 
 4 | expr = read.table("~/Documents/Immport/GE_SDY311-2010.txt",sep="\t",header=TRUE,row.names=1, as.is=TRUE)
 5 | scores = rawEnrichmentAnalysis(as.matrix(expr),signatures,genes.use,paste0(working.dir,'/Immport/sdy311_scores.txt'))
 6 | 
 7 | expr = read.table("~/Documents/Immport/GE_SDY420.filtered.txt",sep="\t",header=TRUE,row.names=1, as.is=TRUE)
 8 | scores = rawEnrichmentAnalysis(as.matrix(expr),signatures,genes.use,paste0(working.dir,'/Immport/sdy420_scores.txt'))
 9 | 
10 | sets = c('B-cells',	'CD16- monocytes',	'CD16+ monocytes',	'CD4+ T-cells',	'CD8+ T-cells',	'CD4+ Tcm',	'CD8+ Tcm',	'effector CD4+ T cells',	'effector CD8+ T cells',	'CD4+ Tem',	'CD8+ Tem',	'Tgd cells',	'lymphocytes',	'Memory B-cells',	'Monocytes',	'naive B-cells',	'CD4+ naive T-cells',	'CD8+ naive T-cells',	'NK cells',	'NKT',	'Plasma cells',	'T cells',	'pro B-cells',	'Tregs')
11 | 
12 | fcs = read.table("~/Documents/Immport/FCS_SDY311-2010.txt",sep="\t",header=TRUE,row.names=1, as.is=TRUE)
13 | rownames(fcs) = sets
14 | colors <- brewer.pal(11, "RdBu")
15 | 
16 | scores = read.table(paste0(working.dir,'/Immport/sdy311_scores.txt'),sep="\t",header=TRUE,row.names=1, as.is=TRUE)
17 | colnames(scores) = gsub("\\.1","",colnames(scores))
18 | scores = aggregate(t(scores)~colnames(scores),FUN=mean)
19 | rownames(scores) = scores[,1]
20 | scores = scores[,-1]
21 | scores311 =t(scores)
22 | 
23 | fcs= fcs[,-which(colnames(fcs) %in% c("SUB134240","SUB134283"))]
24 | fcs311=fcs
25 | r311=analyze.facs(fcs311,scores311,sets,FALSE,spill.array,alpha,paste0(working.dir,'/simulations/plots/sdy311'))
26 | 
27 | kscores = read.table("~/Documents/signatures/scores/sdy311_known.txt",sep="\t",header=TRUE,row.names=1, as.is=TRUE)
28 | colnames(kscores) = gsub("\\.1","",colnames(kscores))
29 | kscores = aggregate(t(kscores)~colnames(kscores),FUN=mean)
30 | rownames(kscores) = kscores[,1]
31 | kscores = kscores[,-1]
32 | kscores311 =t(kscores)
33 | 
34 | k311=analyze.facs(fcs311,kscores311,sets,TRUE)
35 | c311 = ciberAnalysis("~/Documents/signatures/cibersort/CIBERSORT.SDY311.txt",fcs311,paste0(working.dir,'/simulations/plots/sdy311_cibersort.pdf'),remove.dups=TRUE)
36 | 
37 | fcs = read.table("~/Documents/Immport/FCS_SDY420.filtered.txt",sep="\t",header=TRUE,row.names=1, as.is=TRUE)
38 | rownames(fcs) = sets
39 | fcs420=fcs
40 | 
41 | scores420 = read.table(paste0(working.dir,'/Immport/sdy420_scores.txt'),sep="\t",header=TRUE,row.names=1, as.is=TRUE)
42 | r420=analyze.facs(fcs420,scores420,sets,FALSE,spill.array,alpha,paste0(working.dir,'/simulations/plots/sdy420'))
43 | kscores420 = read.table("~/Documents/signatures/scores/sdy420_known.txt",sep="\t",header=TRUE,row.names=1, as.is=TRUE)
44 | k420=analyze.facs(fcs420,kscores420,sets,TRUE)
45 | 
46 | c420 = ciberAnalysis("~/Documents/signatures/cibersort/CIBERSORT.SDY420.txt",fcs420,paste0(working.dir,'/simulations/plots/sdy420_cibersort.pdf'))
47 | 
48 | m311 = compare.facs(r311,k311,c311,paste0(working.dir,'/simulations/plots/sdy311_compare.pdf'))
49 | m420 = compare.facs(r420,k420,c420,paste0(working.dir,'/simulations/plots/sdy420_compare.pdf'))
50 | 


--------------------------------------------------------------------------------
/Dev_scripts/signatures_functions.R:
--------------------------------------------------------------------------------
  1 | library(RColorBrewer)
  2 | library(gplots)
  3 | library(stats)
  4 | library(corrplot)
  5 | library(stringr)
  6 | library(psych)
  7 | library(pheatmap)
  8 | library(MCPcounter)
  9 | 
 10 | createMatrixNoDups = function(data) {
 11 |   M = as.matrix(data[,-1])
 12 |   samples = colnames(M)
 13 |   genes = data[,1]
 14 |   M = as.numeric(as.character(M))
 15 |   M = matrix(M,nrow=length(genes),byrow=FALSE)
 16 |   if (length(unique(genes))!=length(genes)) {
 17 |     M = aggregate(x = M, by = list(genes), FUN = mean)
 18 |     genes = M[,1]
 19 |     M = M[,-1]
 20 |   }
 21 |   rownames(M) = genes
 22 |   colnames(M) = samples
 23 |   return (as.matrix(M))
 24 | }
 25 | 
 26 | 
 27 | mix.protocol = function(working.dir,mix_set,spill,alpha=0.5,do.ciber=TRUE,do.known=TRUE,do.mcp=FALSE,rnaseq=TRUE,transpose=FALSE,ct_order=NULL,font.size=7,show.before=TRUE,beta_dist) {
 28 |   colors <- brewer.pal(11, "RdBu")
 29 |   m=(read.table(paste0(working.dir,'/simulations/mix_',mix_set,'_scores.txt'),sep="\t",header=TRUE,row.names=1, as.is=TRUE))
 30 |   dist=(read.table(paste0(working.dir,'/simulations/mix_',mix_set,'_dist.txt'),sep="\t",header=TRUE,row.names=1, as.is=TRUE))
 31 | 
 32 |   if(is.null(ct_order)) {
 33 |     ct_order = rownames(dist)
 34 |     oclust='original'
 35 |   } else {
 36 |     oclust = 'original'
 37 |   }
 38 |   rxr=cor(t(m[rownames(dist),]),t(dist),method="pearson")
 39 |   pdf(paste0(working.dir,'/simulations/plots/',mix_set,'_raw.pdf'),width=4,height=4)
 40 |   rxrc = corrplot(as.matrix(rxr[ct_order,ct_order]),tl.cex = font.size/10,tl.col = "black",col=colorRampPalette(rev(colors))(200),order=oclust)
 41 |   dev.off()
 42 |   mixScatters(paste0(working.dir,'/simulations/plots/scatters/',mix_set,'_scatters_raw.pdf'),m[rownames(dist),],dist,norm=TRUE)
 43 | 
 44 |   M = transformScores(m,spill$fv)
 45 |   rxb=cor(t(M[rownames(dist),]),t(dist),method="pearson")
 46 |   pdf(paste0(working.dir,'/simulations/plots/',mix_set,'_transform.pdf'),width=4,height=4)
 47 |   rxbc = corrplot(as.matrix(rxb[ct_order,ct_order]),tl.cex = font.size/10,tl.col = "black",col=colorRampPalette(rev(colors))(200),order=oclust)
 48 |   dev.off()
 49 |   mixScatters(paste0(working.dir,'/simulations/plots/scatters/',mix_set,'_scatters_transform.pdf'),M[rownames(dist),],dist,norm=FALSE)
 50 | 
 51 |   M2 = spillOver(M,spill$K,alpha)
 52 |   #rownamesM2  =intersect(rownames(rxbc),rownames(M2))
 53 |   rx=cor(t(M2[ct_order,]),t(dist[ct_order,]),method="pearson")
 54 |   pdf(paste0(working.dir,'/simulations/plots/',mix_set,'_spill.pdf'),width=4,height=4)
 55 |   corrplot(as.matrix(rx),tl.cex = font.size/10,tl.col = "black",col=colorRampPalette(rev(colors))(200),order=oclust)
 56 |   dev.off()
 57 |   A = intersect(rownames(M2),rownames(dist))
 58 |   mixScatters(paste0(working.dir,'/simulations/plots/scatters/',mix_set,'_scatters_spill.pdf'),M2[A,],dist[A,],norm=FALSE)
 59 |   mixScatters(paste0(working.dir,'/simulations/plots/scatters/spill/',mix_set,'_scatters_spill.pdf'),M2[A,],dist[A,],norm=FALSE)
 60 | 
 61 |   pdf(paste0(working.dir,'/simulations/plots/',mix_set,'_corrplots.pdf'),width=8,height=4)
 62 |   par(mfrow=c(1,2))
 63 |   rxbc=corrplot(as.matrix(rxb[ct_order,ct_order]),tl.cex = font.size/10,tl.col = "black",col=colorRampPalette(rev(colors))(200),order=oclust)
 64 |   rxba=corrplot(as.matrix(rx[ct_order,ct_order]),tl.cex = font.size/10,tl.col = "black",col=colorRampPalette(rev(colors))(200),order=oclust)
 65 |   dev.off()
 66 | 
 67 |   ddiag = sum(diag(rxba))/sum(diag(rxbc))
 68 |   drest = (sum(abs(rxba))-sum(diag(rxba)))/(sum(abs(rxbc))-sum(diag(rxbc)))
 69 |   ddist = cor(t(dist))
 70 | 
 71 |   badCor = rxba>0.25 | rxbc>0.25
 72 |   diag(badCor)<-FALSE
 73 |   diff_expectedA = rxba[badCor] - ddist[badCor]
 74 |   diff_expectedB = rxbc[badCor] - ddist[badCor]
 75 | 
 76 |   diff_expected = sum(abs(diff_expectedA))/sum(abs(diff_expectedB))
 77 | 
 78 |   badCor1 = rxba>0.25
 79 |   badCor2 = rxbc>0.25
 80 |   diag(badCor1)<-FALSE
 81 |   diag(badCor2)<-FALSE
 82 | 
 83 |   diff_expectedA = rxba[badCor] - ddist[badCor]
 84 |   diff_expectedB = rxbc[badCor] - ddist[badCor]
 85 | 
 86 |   txt1 = paste(mix_set,mean(diag(rxba)),ddiag,drest,sum(badCor),diff_expected,sum(badCor2),sum(badCor1),sum(badCor1 & badCor2),countNotExpectedCellTypes(M,rownames(dist),families),countNotExpectedCellTypes(M2,rownames(dist),families),sep='\t')
 87 | 
 88 |   #file_shuff = paste0(working.dir,'/simulations/mix_',mix_set,'_pvals.txt')
 89 |   #if (!file.exists(file_shuff)) {
 90 |   #  mix=(read.table(paste0(working.dir,'/simulations/mix_',mix_set,'_expr.txt'),sep="\t",header=TRUE,row.names=1, as.is=TRUE))
 91 |   #  res = xCellSignifcance(mix,M2,spill=spill,alpha=alpha,file.name = file_shuff)
 92 |   #}
 93 |   #pvals=(read.table(file_shuff,sep="\t",header=TRUE,row.names=1, as.is=TRUE))
 94 | 
 95 |   pvals = xCellSignifcanceBetaDist(M2,spill$beta_params)
 96 | 
 97 |   M3 = M2
 98 |   pval.thres = 0.05
 99 |   M3[pvals>pval.thres]=0
100 |   rx_pval=cor(t(M3[ct_order,]),t(dist[ct_order,]),method="pearson")
101 |   rx_pval[is.na(rx_pval)] = 0
102 |   pdf(paste0(working.dir,'/simulations/plots/',mix_set,'_spill_pvals.pdf'),width=4,height=4)
103 |   corrplot(as.matrix(rx_pval),tl.cex = font.size/10,tl.col = "black",col=colorRampPalette(rev(colors))(200),order=oclust)
104 |   dev.off()
105 |   A = intersect(rownames(M3),rownames(dist))
106 |   #mixScatters(paste0(working.dir,'/simulations/plots/scatters/',mix_set,'_scatters_spill_pvals.pdf'),M3[A,],dist[A,],norm=FALSE)
107 |   #mixScatters(paste0(working.dir,'/simulations/plots/scatters/spill/',mix_set,'_scatters_spill_pvals.pdf'),M3[A,],dist[A,],norm=FALSE)
108 | 
109 |   rxba_pval=corrplot(as.matrix(rx_pval[ct_order,ct_order]),tl.cex = font.size/10,tl.col = "black",col=colorRampPalette(rev(colors))(200),order=oclust)
110 | 
111 |   ddiag = sum(diag(rxba_pval))/sum(diag(rxbc))
112 |   drest = (sum(abs(rxba_pval))-sum(diag(rxba_pval)))/(sum(abs(rxbc))-sum(diag(rxbc)))
113 |   ddist = cor(t(dist))
114 | 
115 |   badCor = rxba_pval>0.25 | rxbc>0.25
116 |   diag(badCor)<-FALSE
117 |   diff_expectedA = rxba_pval[badCor] - ddist[badCor]
118 | 
119 |   diff_expected = sum(abs(diff_expectedA))/sum(abs(diff_expectedB))
120 | 
121 |   badCor1 = rxba_pval>0.25
122 |   diag(badCor1)<-FALSE
123 | 
124 |   diff_expectedA = rxba_pval[badCor] - ddist[badCor]
125 | 
126 |   #message(paste(txt1,mix_set,mean(diag(rxba_pval)),ddiag,drest,sum(badCor),diff_expected,sum(badCor2),sum(badCor1),sum(badCor1 & badCor2),countNotExpectedCellTypes(M3,rownames(dist),families),sep='\t'))
127 | 
128 |   #message(paste(mix_set,mean(diag(rxba)),mean(diag(rxba_pval)),(sum(abs(rxba))-sum(diag(rxba)))/(ncol(rxba)*(ncol(rxba)-1)),(sum(abs(rxba_pval))-sum(diag(rxba_pval)))/(ncol(rxba_pval)*(ncol(rxba_pval)-1)),countNotExpectedCellTypes(M2,rownames(dist),families),countNotExpectedCellTypes(M3,rownames(dist),families),sep='\t'))
129 | 
130 |   parents = families[rownames(dist),2]
131 |   other_cell_types = setdiff(rownames(M2),c(rownames(dist),parents))
132 |   p1 = pvals[rownames(dist),]
133 |   p2 = pvals[other_cell_types,]
134 |   message(paste(mix_set,mean(diag(rxba)),mean(diag(rxba_pval)),mean(M2[other_cell_types,]>0.001),mean(M2[other_cell_types,]>0.001 & p2<pval.thres),mean(M2[rownames(dist),]>0.001),mean(M2[rownames(dist),]>0.001 & p1<pval.thres),sep='\t'))
135 | 
136 |   aranbef = cbind(as.matrix(diag(rxb)),rownames(rxb),'xCell (before)')
137 |   rownames(aranbef) = str_c(rep('xCell (before)',dim(aranbef)[1]),' ',rownames(aranbef))
138 | 
139 |   #rx = rx_pval
140 |   #M2 = M3
141 | 
142 |   aran = cbind(as.matrix(diag(rx)),rownames(rx),'xCell')
143 |   rownames(aran) = str_c(rep('xCell',dim(aran)[1]),' ',rownames(aran))
144 |   if (show.before==TRUE) {
145 |     R = rbind(aran,aranbef)
146 |   } else {
147 |     R = aran
148 |   }
149 |   ciber = list()
150 |   if (do.ciber == TRUE) {
151 |     rc = ciberAnalysis(paste0(working.dir,'/simulations/CIBERSORT.',mix_set,'.txt'),dist,paste0(working.dir,'/simulations/plots/',mix_set,'_cibersort.pdf'))
152 |     ciber = cbind(as.matrix(diag(rc$r)),rownames(rc$r),'Newman')
153 |     rownames(ciber) = str_c(rep('Newman',dim(ciber)[1]),' ',rownames(ciber))
154 |     A = intersect(rownames(rc$x),rownames(dist))
155 |     z1 = cor(as.matrix(rc$x[A,]),as.matrix(rc$y[A,]))
156 |     z2 = cor(M2[A,],dist[A,])
157 |     blue <- rgb(0, 0, 1, alpha=0.5)
158 |     red <- rgb(1, 0, 0, alpha=0.5)
159 |     pdf(paste0(working.dir,'/simulations/plots/',mix_set,'_xcell_cibersort.pdf'),width=3,height=4)
160 |     barplot(sort(diag(z2)),border=NA,xaxt='n',ylab="R (Pearson)",col=blue,space=0)
161 |     barplot(sort(diag(z1)),border=NA,xaxt='n',col=red,space=0,add=TRUE)
162 |     print(sprintf('N = %d, xCell = %.2f, CIBEROSRT = %.2f',length(A),median(diag(z2)),median(diag(z1))))
163 |     dev.off()
164 |     R = rbind(R,ciber)
165 | 
166 |   }
167 |   if (do.known == TRUE) {
168 |     pdf(paste0(working.dir,'/simulations/plots/',mix_set,'_known.pdf'),width=4,height=8)
169 |     rk = knownAnalysis(dist,paste0(working.dir,'/simulations/mix_',mix_set,'_known.txt'))
170 |     dev.off()
171 |     R = rbind(R,rk$out)
172 | 
173 |   }
174 |   if (do.mcp == TRUE) {
175 |     rmcp = mcp.analyze('bp_n12')
176 |     mcp = cbind(rmcp,names(rmcp),'MCP')
177 |     rownames(mcp) = str_c(rep('MCP',length(rmcp)),' ',names(rmcp))
178 |     R = rbind(R,mcp)
179 |   }
180 | 
181 |   if (do.ciber==TRUE && do.known==TRUE) {
182 |     colnames(rc$r2) = paste('CIBERSORT',colnames(rc$r2))
183 |     known_ciber_mat = cbind(rk$mat,rc$r2[rownames(rk$mat),])
184 |     pdf(paste0(working.dir,'/simulations/plots/',mix_set,'_known_ciber.pdf'),width=4,height=8)
185 |     mat = corrplot(as.matrix(known_ciber_mat),tl.cex = 0.7,tl.col = "black",col=colorRampPalette(rev(colors))(200))
186 |     dev.off()
187 |   } else {
188 |     known_ciber_mat = rk$mat
189 |   }
190 | 
191 |   if (do.ciber==TRUE || do.known==TRUE) {
192 |     mat = compareCorrelations(R,c("xCell","xCell (before)","Bindea","Charoentong","Palmer","Rooney","Tirosh","Newman",'MCP'),NULL,paste0(working.dir,'/simulations/plots/',mix_set,'_compare.pdf'),8,3,drop.only1 = TRUE,transpose,ct_order=ct_order,font.size=font.size)
193 | 
194 |     pdf(paste0(working.dir,'/simulations/plots/page/',mix_set,'_page.pdf'),width=8,height=8)
195 |     par(mfrow=c(2,2))
196 |     corrplot(as.matrix(rxb[ct_order,ct_order]),tl.cex = font.size/10,tl.col = "black",col=colorRampPalette(rev(colors))(200),order=oclust)
197 |     corrplot(as.matrix(rx[ct_order,ct_order]),tl.cex = font.size/10,tl.col = "black",col=colorRampPalette(rev(colors))(200),order=oclust)
198 |     if (do.ciber==TRUE && do.known==TRUE) {
199 |       tl.col = c(rep('black',dim(rk$mat)[2]),rep('blue',dim(rc$r2)[2]))
200 |     } else if (do.known==TRUE) {
201 |       tl.col = rep('black',dim(rk$mat)[2])
202 |     } else {
203 |       tl.col = rep('blue',dim(rc$r2)[2])
204 |     }
205 |     corrplot(as.matrix(known_ciber_mat),tl.cex = 0.7,tl.col = tl.col,col=colorRampPalette(rev(colors))(200))
206 |     corrplot(as.matrix(mat),method="pie",na.label="-",tl.cex = font.size/10,cl.cex=font.size/10,number.cex=(font.size+3)/10,tl.col = "black",col=colorRampPalette(rev(colors))(200))
207 |     dev.off()
208 | 
209 |     R
210 |   } else {
211 |     rx
212 |   }
213 | 
214 | }
215 | 
216 | mcp.analyze = function(mix_set) {
217 |   ex=read.table(paste0(working.dir,'/simulations/mix_',mix_set,'_expr.txt'),sep="\t",header=TRUE,row.names=1, as.is=TRUE)
218 |   dist=read.table(paste0(working.dir,'/simulations/mix_',mix_set,'_dist.txt'),sep="\t",header=TRUE,row.names=1, as.is=TRUE)
219 |   mcp = MCPcounter.estimate(ex,'HUGO_symbols')
220 |   #mono =rownames(families)[families$Type=='Myeloid']
221 |   rownames(mcp) = c('T-cells','CD8+ T-cells','Cytotoxic','NK cells','B-cells','Monocytes','DC','Neutrophils','Endothelial cells','Fibroblasts')
222 |   A = intersect(rownames(mcp),rownames(dist))
223 |   r = corr.test(t(mcp[A,]),t(dist[A,]))
224 |   diag(r$r)
225 | }
226 | 
227 | knownAnalysis = function(dist,fn) {
228 |   colors <- brewer.pal(11, "RdBu")
229 | 
230 |   mk=read.table(fn,sep="\t",header=TRUE,row.names=1, as.is=TRUE)
231 |   source = unlist(lapply(rownames(mk),function(x) {unlist(strsplit(x,'[_%]'))[1]}))
232 |   known = unlist(lapply(rownames(mk),function(x) {unlist(strsplit(x,'[_%]'))[2]}))
233 |   rownames(mk) = str_c(source," ",known)
234 |   id = sort(known,index.return=TRUE)
235 |   known = known[id$ix]
236 |   source = source[id$ix]
237 |   mk = mk[id$ix,]
238 |   rows = known %in% rownames(dist)
239 |   dist = dist[sort(rownames(dist)),]
240 |   r=cor(t(mk),t(dist),method='pearson')
241 |   out = data.frame()
242 |   A = known %in% rownames(dist)[1]
243 |   out = cbind(r[A,1,drop=FALSE],known[A],source[A])
244 |   for (i in 2:length(rownames(dist))) {
245 |     A = known %in% rownames(dist)[i]
246 |     out = rbind(out,cbind(r[known %in% rownames(dist)[i],i,drop=FALSE],known[A],source[A]))
247 |   }
248 |   colnames(out) = c('r','cell_type','source')
249 |   r2 = r[rows,]
250 |   colors <- brewer.pal(11, "RdBu")
251 | 
252 |   mat = corrplot(as.matrix(t(r2)),tl.cex = 0.7,tl.col = "black",col=colorRampPalette(rev(colors))(200))
253 |   res = list()
254 |   res$mat = mat
255 |   res$out = out
256 |   res
257 | }
258 | 
259 | fixScales = function(X,Y) {
260 |   z = matrix(0,dim(X)[1])
261 |   for (i in 1:dim(X)[1]) {
262 |     x = as.matrix(X[i,])
263 |     y = as.matrix(t(Y[i,]))
264 |     fit <- lm(x ~ y)
265 |     z[i] = coef(fit)[2]
266 |   }
267 |   rownames(z) = rownames(X)
268 |   z
269 | 
270 | }
271 | 
272 | compareCorrelations = function(br,usr,uct,fn,w,h,drop.only1=TRUE,transpose=FALSE,font.size=7,ct_order=NULL) {
273 |   colors <- brewer.pal(11, "RdBu")
274 | 
275 |   abr = aggregate(as.numeric(br[,1])~rownames(br),FUN=mean)
276 |   nbr = sort(unique(str_c(br[,3],'_',br[,2])))
277 |   ct = unlist(lapply(nbr,function(x) {unlist(strsplit(x,'[_%]'))[2]}))
278 |   sr = tolower(unlist(lapply(nbr,function(x) {unlist(strsplit(x,'[_%]'))[1]})))
279 |   #usr = unique(sr)
280 |   if (is.null(uct)) {
281 |     uct = sort(unique(ct))
282 |   }
283 |   mat = matrix(nrow=length(usr),ncol=length(uct))
284 |   for (i in 1:length(usr)) {
285 |     for (j in 1:length(uct)) {
286 |       x = abr[sr==tolower(usr[i]) & ct==uct[j],2]
287 |       if(length(x)==1)
288 |         mat[i,j] = x
289 |     }
290 |   }
291 |   colnames(mat) = uct
292 |   rownames(mat) = usr
293 |   n = apply(mat,2,function(x) sum(!is.na(x)))
294 |   if(drop.only1==TRUE) {
295 |     mat = as.matrix(mat[,n>1])
296 |     colnames(mat) = uct[n>1]
297 |   }
298 |   mat = as.matrix(mat[!apply(mat, 1, function(x){sum(!is.na(x))==0}),])
299 |   pdf(fn,width=w,height=h)
300 |   if (is.null(ct_order))
301 |     ct_order = colnames(mat)
302 |   if (transpose==TRUE)
303 |     mat = t(mat[,ct_order])
304 |   corrplot(as.matrix(mat),method="pie",na.label="-",tl.cex = font.size/10,cl.cex=font.size/10,number.cex=(font.size+3)/10,tl.col = "black",col=colorRampPalette(rev(colors))(200))
305 |   dev.off()
306 |   mat
307 | }
308 | 
309 | ciberAnalysis = function(fn,dist,fn_pdf,remove.dups=FALSE) {
310 |   m=read.table(fn,sep="\t",header=TRUE,row.names=NULL, as.is=TRUE)
311 |   if (remove.dups==TRUE) {
312 |     m = createMatrixNoDups(m)
313 |     B = intersect(rownames(m),colnames(dist))
314 |     m = m[B,]
315 |     dist = dist[,B]
316 |   } else {
317 |     m = m[,-1]
318 |   }
319 |   m = t(m[,1:22])
320 |   rownames(m) = c("naive B-cells","Memory B-cells","Plasma cells","CD8+ T-cells","CD4+ naive T-cells","CD4+ memory T-cells rest","CD4+ memory T-cells act","Tfh","Tregs","Tgd","NK cells rest","NK cells act","Monocytes","Macrophages M0","Macrophages M1","Macrophages M2","DC rest","DC act","Mast cells rest","Mast cells act","Eosinophils","Neutrophils")
321 |   cd4 = colSums(m[c("CD4+ naive T-cells","CD4+ memory T-cells rest","CD4+ memory T-cells act","Tregs"),])
322 |   cd4mem = colSums(m[c("CD4+ memory T-cells rest","CD4+ memory T-cells act"),])
323 |   nk = colSums(m[c("NK cells rest","NK cells act"),])
324 |   dc = colSums(m[c("DC rest","DC act"),])
325 |   bcell = colSums(m[c("naive B-cells","Memory B-cells","Plasma cells"),])
326 |   mast = colSums(m[c("Mast cells rest","Mast cells act"),])
327 |   macs = colSums(m[c("Macrophages M0","Macrophages M1","Macrophages M2"),])
328 |   m = rbind(m,cd4,cd4mem,cd4mem,nk,dc,bcell,mast,macs)
329 |   rownames(m)[23:30] = c("CD4+ T-cells","CD4+ Tcm","CD4+ Tem","NK cells","DC","B-cells","Mast cells","Macrophages")
330 |   A = intersect(rownames(m),rownames(dist))
331 | 
332 |   M = m[A,]
333 |   d = dist[A,]
334 |   r=corr.test(t(M),t(d),method="pearson")
335 |   r$r[is.na(r$r)] = 0
336 |   pdf(fn_pdf,width=4,height=4)
337 |   colors <- brewer.pal(11, "RdBu")
338 |   corrplot(as.matrix(r$r),tl.cex = 0.7,tl.col = "black",col=colorRampPalette(rev(colors))(200))
339 |   dev.off()
340 |   r2=cor(M,d)
341 |   barplot(sort(diag(r2)))
342 |   #print(median(diag(r2)))
343 |   r$x = M
344 |   r$y = d
345 | 
346 |   r2 = cor(t(M),t(dist),method="pearson")
347 |   r$r2 = t(r2)
348 |   r
349 | }
350 | 
351 | 
352 | mixScatters = function(fn,m,dist,norm=FALSE,point.cex=0.25) {
353 |   range01 <- function(x){(x-min(x))/(max(x)-min(x))}
354 |   corr_eqn <- function(x,y, digits = 2) {
355 |     corr_coef <- round(cor(x, y,method="pearson"), digits = digits)
356 |     paste("R = ", corr_coef)
357 |   }
358 | 
359 |   names = rownames(dist)
360 |   n = dim(dist)[1]
361 |   nr = round(n/2+0.05)
362 |   pdf(fn,width=nr,height=2)
363 |   layout(matrix(c(1:(nr*2)),2,nr))
364 |   par( mai = c(0, 0, 0, 0))
365 |   for (i in 1:n) {
366 |     #y = t(range01(dist[names[i],]))
367 |     #x = range01(m[names[i],])
368 |     y = t(as.matrix(dist[names[i],]))
369 |     x = as.matrix(m[names[i],])
370 | 
371 |     #x = M2[names[i],]
372 |     #p[[i]] = ggplot(subset(df,get(colnames(df)[c(i,i+12)])) + geom_point(aes(x=get(colnames(df)[i]), y=get(colnames(df)[i+12])),color="blue",size=0.5) + geom_smooth(method=lm,level = 0.95, aes(x=get(colnames(df)[i]), y=get(colnames(df)[i+12])), fill="red",colour="darkblue", size=1)+theme_bw()+theme(axis.title.x=element_blank(),axis.title.y=element_blank(), axis.ticks=element_blank(), axis.text.x=element_blank(), axis.text.y=element_blank(),plot.margin=unit(c(0,0,0,0), "cm"))+annotate("text",label=corr_eqn(df[,i],df[,i+12]),x=0.7, y=0.01,vjust=0, hjust=1,size=1.6)
373 |     if (norm==FALSE) {
374 |       maxxy1 = max(max(x),max(y))
375 |       maxxy2 = max(max(x),max(y))
376 |     } else {
377 |       x = x-min(x)
378 |       y = y-min(y)
379 |       maxxy1 = max(x)*1.1
380 |       maxxy2 = max(y)*1.1
381 |     }
382 |     plot(x,y,pch=16,cex=point.cex,col="darkblue",yaxt='n',xaxt='n',xlim=c(0,maxxy1),ylim=c(0,maxxy2))
383 |     #lines(lowess(x,y), col="red")
384 |     abline(lm(as.vector(y)~as.vector(x)),col='red')
385 |     r=sprintf('%.2f',cor(as.vector(x),as.vector(y),method="pearson"))
386 |     #text(0.01, 0.95, labels=names[i], adj=c(0, .5),cex=0.65)
387 |     text(0.005, maxxy2*0.95, labels=names[i], adj=c(0, .5),cex=0.85)
388 |     text(maxxy1*0.6, maxxy2*0.02, labels=bquote(rho == ~ .(r)), adj=c(0, .5),cex=0.85)
389 | 
390 |   }
391 |   dev.off()
392 | }
393 | 
394 | 
395 | library(psych)
396 | 
397 | analyze.facs = function(fcs,scores,sets,known=FALSE,spill=NULL,alpha=0.5,beta_dist=NULL,fn=NULL) {
398 |   a = intersect(colnames(scores),colnames(fcs))
399 |   rownames(fcs) = sets
400 | 
401 |   if(known==TRUE) {
402 |     source = unlist(lapply(rownames(scores),function(x) {unlist(strsplit(x,'[_%]'))[1]}))
403 |     ct = unlist(lapply(rownames(scores),function(x) {unlist(strsplit(x,'[_%]'))[2]}))
404 |     #rownames(scores) = str_c(source," ",ct)
405 |     y = scores[ct %in% sets,a]
406 |     ct2 = ct[ct %in% sets]
407 |     x = sapply(ct2,function(x) fcs[x,a])
408 |     x <- matrix(unlist(x), ncol = dim(x)[2], byrow = FALSE)
409 |     colnames(x) = rownames(y)
410 |   } else {
411 |     ct = intersect(sets,rownames(scores))
412 |     y = scores[ct,a]
413 |     x = t(fcs[ct,a])
414 |   }
415 | 
416 |   if (is.null(spill)) {
417 |     s = as.matrix(y)
418 |   } else {
419 |     #spill$fv[,2]=1
420 |     z = transformScores(as.matrix(y),spill$fv)
421 |     s = spillOver(z,spill$K,alpha)
422 |     #s = y
423 |     A = intersect(colnames(x),rownames(s))
424 |     x = x[,A]
425 |     s=s[A,]
426 |     if (!is.null(beta_dist)) {
427 |       beta_dist = beta_dist[rownames(s),]
428 |       pvals = s
429 |       for (i in 1:nrow(beta_dist))
430 |         pvals[i,] = 1-unlist(lapply(s[i,],FUN=function(x) mean(x>beta_dist[i,])))
431 |       s[pvals>0.05] = 0
432 | 
433 |     }
434 |   }
435 |   r = corr.test(x,t(s),method="pearson",adjust='none')
436 |   r$r[is.na(r$r)] = 0
437 |   dr = diag(r$r)
438 |   names(dr) = colnames(r$r)
439 |   print(as.matrix(dr))
440 |   colors <- brewer.pal(11, "RdBu")
441 |   corrplot(r$r,tl.cex=0.5,tl.col = "black",col=colorRampPalette(rev(colors))(200))
442 | 
443 |   if (!is.null(fn)) {
444 |     pdf(paste0(fn,'.pdf'),width=3.2,height=4.7)
445 |     corrplot(r$r,tl.cex=0.5,tl.col = "black",col=colorRampPalette(rev(colors))(200))
446 |     dev.off()
447 |     thres= sort(diag(r$r),decreasing = TRUE)[7]
448 |     print(thres)
449 |     A = diag(r$r)>thres
450 |     mixScatters(paste0(fn,'_mix_scatters.pdf'),s[A,],as.matrix(t(x[,A])),norm=TRUE,point.cex=0.5)
451 | 
452 |   }
453 |   r2 = cor(t(x),s,method="spearman")
454 |   barplot(sort(diag(r2)))
455 |   print(median(diag(r2)))
456 |   r$x = t(x)
457 |   r$y = s
458 |   r
459 | 
460 | }
461 | 
462 | compare.facs = function(r,k,ciber,fn,drop.only1=TRUE) {
463 |   aran = cbind(diag(r$r),colnames(r$r),'xCell',diag(r$p))
464 |   rownames(aran) = str_c(rep('xCell',dim(aran)[1]),' ',aran[,2])
465 |   cbr = cbind(diag(ciber$r),colnames(ciber$r),'Newman',diag(ciber$p))
466 |   rownames(cbr) = str_c(rep('Newman',dim(cbr)[1]),' ',cbr[,2])
467 |   source = unlist(lapply(colnames(k$r),function(x) {unlist(strsplit(x,'[_%]'))[1]}))
468 |   ct = unlist(lapply(colnames(k$r),function(x) {unlist(strsplit(x,'[_%]'))[2]}))
469 |   known = cbind(diag(k$r),ct,source,diag(k$p))
470 |   rownames(known) = str_c(source," ",ct)
471 |   abr = rbind(aran,known,cbr)
472 | 
473 |   usr = c('xCell','Bindea','Charoentong','Palmer','Rooney','Tirosh','Newman')
474 |   ct = abr[,2]
475 |   uct = unique(abr[,2])
476 |   sr = tolower(abr[,3])
477 | 
478 |   mat.r= matrix(nrow=length(usr),ncol=length(uct))
479 |   mat.p = mat.r
480 |   for (i in 1:length(usr)) {
481 |     for (j in 1:length(uct)) {
482 |       x = abr[which(sr==tolower(usr[i]) & ct==uct[j]),]
483 |       mat.r[i,j] = as.numeric(x[1])
484 |       mat.p[i,j] = as.numeric(x[4])
485 | 
486 |     }
487 |   }
488 |   colnames(mat.r) = uct
489 |   rownames(mat.r) = usr
490 |   n = apply(mat.r,2,function(x) sum(!is.na(x)))
491 |   if(drop.only1==TRUE) {
492 |     mat.r = mat.r[,n>1]
493 |     mat.r = mat.r[,!is.na(mat.r[1,])]
494 |     mat.p = mat.p[,n>1]
495 |     mat.p = mat.p[,!is.na(mat.r[1,])]
496 |     avg.fcs=apply(fcs,1,function(x) mean(x,na.rm=TRUE))
497 |     avg.fcs2 = which(colnames(mat.r) %in% rownames(fcs)[avg.fcs>0.01])
498 |     mat.r = mat.r[,avg.fcs2]
499 |     mat.p = mat.p[,avg.fcs2]
500 |   }
501 |   pdf(fn,width=5,height=3)
502 |   corrplot(as.matrix(mat.r),p.mat=mat.p,sig.level=0.05,pch.col = "gray", pch.cex = 1,method="pie",na.label="-",tl.cex = 0.7,tl.col = "black",col=colorRampPalette(rev(colors))(200))
503 |   dev.off()
504 |   mat.r
505 | 
506 | }
507 | 
508 | fit.scores.plots = function(fv,families,working.dir,ctrl1,ctrl2,ctrl2_type=NULL,platform) {
509 |   files1 = list.files(path=paste0(working.dir,'/mixtures/scores/'),pattern=paste0('_',ctrl1,'_'))
510 |   files2 = list.files(path=paste0(working.dir,'/mixtures/scores/'),pattern=paste0('_',ctrl2,'_'))
511 |   if (!is.null(ctrl2_type)) {
512 |     for (i in 1:length(files1)) {
513 |       ct = gsub('(.txt)','',strsplit(files1[i],'_')[[1]][3]);
514 |       if(ctrl2_typefamilies[ct,1]==ctrl2_type)
515 |         files1[i] = 'NA'
516 |     }
517 |     for (i in 1:length(files2)) {
518 |       ct = gsub('(.txt)','',strsplit(files2[i],'_')[[1]][3]);
519 |       if(families[ct,1]!=ctrl2_type)
520 |         files2[i] = 'NA'
521 |     }
522 |   }
523 |   files = c(files1,files2)
524 |   files = files[files!="NA"]
525 |   n = 32
526 |   for (j in 1:3) {
527 |     p = list()
528 |     N = min(n,length(files)-n*(j-1))
529 |     for (k in 1:N) {
530 |       i = k+(j-1)*n
531 |       ct = gsub('(.txt)','',strsplit(files[i],'_')[[1]][3]);
532 |       sr = gsub('(.txt)','',strsplit(files[i],'_')[[1]][1]);
533 |       mix <- as.matrix(read.table(paste0(working.dir,'/mixtures/scores/',files[i]), header=TRUE, sep="\t", row.names=1, as.is=TRUE))
534 |       d = list()
535 |       d$x = (mix[ct,seq(2,33)]-mix[ct,2])/5000
536 |       d$y=seq(0.008,0.256,0.008)
537 |       #z <- nls(y ~ a * x^b, data = d, start = list(a=1, b=1),control = list(maxiter = 500))
538 |       #d$z = predict(z,d$x)
539 | 
540 |       d$z = fv[ct,1]*d$x^fv[ct,2]
541 |       d = as.data.frame(d)
542 | 
543 |       #message(sprintf('%d\t%s\t%s\t%f\t%f\t%f',i,sr,ct,mix[ct,2],coef(z)[1],coef(z)[2]))
544 | 
545 |       if (sr == 'blueprint') {
546 |         sr = 'B'
547 |       } else if (sr == 'encode') {
548 |         sr
549 |       }
550 |       if (ct == 'Class-switched memory B-cells') {
551 |         ct = 'CS memory B-cells'
552 |       }
553 |       p[[k]] = ggplot(d, aes(y=y)) + geom_line(aes(x=x), colour="red") + geom_line(aes(x=z), colour="blue")+theme_bw()+geom_point(aes(y=y,x=x),size=0.25,col='black')+geom_point(aes(y=y,x=z),size=0.25,col='black')+annotate("text", hjust=0, x = 0.02, y = 0.205, size=2.5,label = sprintf('y==%.2f*x^%.2f',fv[ct,1],fv[ct,2]),parse=T)+annotate("text", hjust=0,x = 0.02, y = 0.24,size=2.5,label = paste0(ct,' (',toupper(substr(sr,1,1)),')'), fontface =2)
554 |       #p[[k]] = ggplot(d, aes(y=y)) + geom_line(aes(x=x), colour="red") + geom_line(aes(x=z), colour="blue")+theme_bw()+geom_point(aes(y=y,x=x),size=0.25,col='black')+geom_point(aes(y=y,x=z),size=0.25,col='black')+annotate("text", hjust=0, x = 0.02, y = 0.205, size=2.5,label = sprintf('y==%.2f*x^%.2f', coef(z)[1],coef(z)[2]),parse=T)+annotate("text", hjust=0,x = 0.02, y = 0.24,size=2.5,label = paste0(ct,' (',toupper(sr),')'), fontface =2)
555 |       p[[k]] = p[[k]] + theme(plot.margin=unit(c(0,0,0,0), "cm"),axis.line=element_blank(),axis.text.x=element_blank(),axis.text.y=element_blank(),axis.ticks=element_blank(),axis.title.x=element_blank(),axis.title.y=element_blank(),legend.position="none",panel.background=element_blank())
556 |     }
557 |     pdf(paste0(working.dir,'/plots/supp3_',platform,'_',j,'.pdf'),width=6,height=ceil(N/4))
558 |     do.call(grid.arrange,c(p,ncol=4))
559 |     dev.off()
560 |   }
561 | }
562 | 
563 | 
564 | test.mixture = function(mix.file,spill,alpha,cibersort=TRUE) {
565 |   scores = as.matrix(read.table(paste0(mix.file,'_scores.txt'),sep="\t",header=TRUE,row.names=1, as.is=TRUE))
566 |   dist = as.matrix(read.table(paste0(mix.file,'_dist.txt'),sep="\t",header=TRUE,row.names=1, as.is=TRUE))
567 | 
568 |   A = intersect(rownames(spill$fv),rownames(scores))
569 |   scores = scores[A,]
570 |   a = scores-apply(scores,1,min)
571 |   calib = as.vector(spill$fv[rownames(scores),'calib'])
572 |   fit_power = as.vector(spill$fv[rownames(scores),'fit'])
573 |   transformed = ((a/16000)^fit_power)/(2*calib)
574 |   transformed=t(t(transformed)/apply(transformed,2,sum))
575 |   A = intersect(rownames(spill$K),rownames(transformed))
576 |   K = spill$K[A,A]*alpha
577 |   diag(K)<-1
578 |   scores <- apply(transformed[A, ], 2, function(x) lsqlincon(K,x, lb = 0))
579 |   rownames(scores) = A
580 | 
581 |   r = cor(t(scores),t(dist))
582 |   if (cibersort==TRUE) {
583 |     ciber = as.matrix(read.table(paste0(mix.file,'_CIBERSORT.txt'),sep="\t",header=TRUE,row.names=1, as.is=TRUE))
584 |     ciber = ciber[,1:(dim(ciber)[2]-3)]
585 |     A = intersect(colnames(ciber),rownames(dist))
586 |     rc  = cor(ciber,t(dist))
587 |     rc[is.na(rc)] = 0
588 |     A = intersect(rownames(r),rownames(rc))
589 |     B = intersect(colnames(r),colnames(rc))
590 |     res = list(dist=dist,xCell=scores,CIBERSORT=ciber,RX=r[A,B],RC=rc[A,B])
591 |   } else {
592 |     res = list(dist=dist,xCell=scores,RX=r)
593 |   }
594 | 
595 |   colors <- brewer.pal(11, "RdBu")
596 |   col=colorRampPalette(rev(colors))(200)
597 | 
598 |   pdf(paste0(mix.file,'.pdf'))
599 |   A = intersect(colnames(res$RX),rownames(res$RX))
600 |   corrplot(res$RX[A,A],title='xCell',col=col)
601 |   if (cibersort==TRUE) {
602 |     A = intersect(colnames(res$RC),rownames(res$RC))
603 |     corrplot(res$RC[A,A],title='CIBERSORT',col=col)
604 |     b = cbind(diag(res$RX[A,A]),diag(res$RC[A,A]))
605 |     colnames(b) = c('xCell','CIBERSORT')
606 |     corrplot(b,method='number',col=col)
607 |   }
608 | 
609 |   corrplot(res$RX,title='xCell - All',col=col)
610 | 
611 |   if (cibersort==TRUE) {
612 |     corrplot(res$RC,title='CIBERSORT - All',col=col)
613 |   }
614 |   mixScatters(res$xCell,dist)
615 |   mtext("xCell", outer=TRUE,  cex=1, line=-0.5)
616 |   if (cibersort==TRUE) {
617 |     mixScatters(t(res$CIBERSORT),dist)
618 |     mtext("CIBERSORT", outer=TRUE,  cex=1, line=-0.5)
619 |   }
620 |   dev.off()
621 |   res
622 | }
623 | 
624 | choose.types.to.use = function(samples,n,dependencies) {
625 |   A = sample(length(samples),length(samples))
626 |   types.to.use = c()
627 |   i=1
628 |   while (i <= length(A)) {
629 |     id = which(dependencies$types==samples[A[i]])
630 |     if (length(id)==1) {
631 |       A = A[!(samples[A] %in% dependencies$dep[[id]])]
632 |     }
633 |     i=i+1
634 |   }
635 |   samples[A[1:min(n,length(A))]]
636 | }
637 | 
638 | countNotExpectedCellTypes = function(scores,cell_types,families) {
639 |   parents = families[cell_types,2]
640 |   A = setdiff(rownames(scores),c(cell_types,parents))
641 |   mean(scores[A,]>0.01)
642 | }
643 | 
644 | 
645 | 


--------------------------------------------------------------------------------
/Dev_scripts/types_dependecies.txt:
--------------------------------------------------------------------------------
1 | aDC	DC	cDC	pDC			B-cells	Class-switched memory B-cells	Memory B-cells	naive B-cells	Plasma cells		CD4+ memory T-cells	CD4+ Tcm	CD4+ Tem	CD4+ T-cells			CD4+ naive T-cells	CD4+ T-cells					CD4+ T-cells	CD4+ Tcm	CD4+ Tem	CD4+ memory T-cells	CD4+ naive T-cells	Tregs	TregsCD4+ Tcm	CD4+ T-cells					CD4+ Tem	CD4+ T-cells					CD8+ T-cells	CD8+ Tcm	CD8+ Tem	CD8+ naive T-cells			CD8+ naive T-cells	CD8+ T-cells					CD8+ Tcm	CD8+ T-cells					CD8+ Tem	CD8+ T-cells					cDC	DC	aDC	iDC			Class-switched memory B-cells	B-cells					DC	aDC	cDC	pDC	iDC		Endothelial cells	ly Endothelial cells	mv Endothelial cells				Eosinophils						Epithelial cells	Keratinocytes	Sebocytes	Mesangial cells			iDC	DC	pDC	cDC			Keratinocytes						ly Endothelial cells	Endothelial cells					Macrophages	Macrophages M1	Macrophages M2				Macrophages M1	Macrophages					Macrophages M2	Macrophages					Memory B-cells	B-cells					mv Endothelial cells	Endothelial cells					naive B-cells	B-cells					Neutrophils	Granulocytes					pDC	DC					pro B-cells	B-cells					Tgd cells	CD4+ T-cells	CD8+ T-cells				Tregs	CD4+ T-cells					


--------------------------------------------------------------------------------
/Dev_scripts/xCell_ImmPort.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dviraran/xCell/20e2919eefd37e15af35f29f4944e30697098a28/Dev_scripts/xCell_ImmPort.zip


--------------------------------------------------------------------------------
/Dev_scripts/xCell_dev_functions.R:
--------------------------------------------------------------------------------
  1 | library(GSVA)
  2 | library(GSEABase)
  3 | library(pracma)
  4 | library(RColorBrewer)
  5 | library(pheatmap)
  6 | 
  7 | create.train.test.ref.sets = function(working.dir,data_sets,nsamples) {
  8 |   
  9 |   ref = lapply(data_sets, function (x) {
 10 |     message(x)
 11 |     ref.samples.file = file.path(working.dir,'reference_sets',paste0(x,'_samples.txt'))
 12 |     ref.expr.file = file.path(working.dir,'reference_sets',paste0(x,'_expr.txt'))
 13 |     
 14 |     expr = read.table(ref.expr.file,sep="\t",header=TRUE,row.names=1, as.is=TRUE)
 15 |     samples = read.table(ref.samples.file,sep="\t",header=FALSE,row.names=NULL, as.is=TRUE)
 16 |     A = !(samples[,2]=="NaN")
 17 |     expr = expr[,A]
 18 |     samples = samples[A,]
 19 |     
 20 |     types = sort(unique(samples[,2]))
 21 |     A = table(samples[,2])>nsamples
 22 |     train = c()
 23 |     test = c()
 24 |     for (i in types[A]) {
 25 |       ids = which(samples[,2]==i)
 26 |       sid = sample(ids,1)
 27 |       test = c(test,sid)
 28 |       train = c(train,setdiff(ids,sid))
 29 |     }
 30 |     for (i in types[!A]) {
 31 |       ids = which(samples[,2]==i)
 32 |       train = c(train,ids)
 33 |     }
 34 |     
 35 |     write.table(expr[,train],file.path(working.dir,'reference_sets',paste0(x,'_expr_train.txt')),sep="\t",row.names=TRUE,quote =FALSE,col.names = TRUE)
 36 |     write.table(expr[,test],file.path(working.dir,'reference_sets',paste0(x,'_expr_test.txt')),sep="\t",row.names=TRUE,quote =FALSE,col.names = TRUE)
 37 |     write.table(samples[train,],file.path(working.dir,'reference_sets',paste0(x,'_samples_train.txt')),sep="\t",row.names=FALSE,quote =FALSE,col.names = FALSE)
 38 |     write.table(samples[test,],file.path(working.dir,'reference_sets',paste0(x,'_samples_test.txt')),sep="\t",row.names=FALSE,quote =FALSE,col.names = FALSE)
 39 |     
 40 |   })
 41 | }
 42 | 
 43 | read.data.sets = function(working.dir,data_sets,train_test) {
 44 |   
 45 |   ref = lapply(data_sets, function (x) {
 46 |     ref.samples.file = file.path(working.dir,'reference_sets',paste0(x,'_samples_',train_test,'.txt'))
 47 |     ref.expr.file = file.path(working.dir,'reference_sets',paste0(x,'_expr_',train_test,'.txt'))
 48 |     
 49 |     expr = read.table(ref.expr.file,sep="\t",header=TRUE,row.names=1, as.is=TRUE)
 50 |     samples = read.table(ref.samples.file,sep="\t",header=FALSE,row.names=NULL, as.is=TRUE)
 51 |     ref = list(expr = expr, samples = samples, name=x)
 52 |   })
 53 | }
 54 | 
 55 | 
 56 | read.types.dependencies = function(working.dir,file.name) {
 57 |   con  <- file(file.path(working.dir,file.name), open = "r")
 58 |   out <- list()
 59 |   i=1
 60 |   while (length(oneLine <- readLines(con, n = 1, warn = FALSE)) > 0) {
 61 |     vec <- unlist(strsplit(oneLine, "\t"))
 62 |     out$types[i] = vec[1]
 63 |     n = max(which(!(vec=="")))
 64 |     if (n<2)
 65 |       out$dep[i] = list(types="")
 66 |     else
 67 |       out$dep[i] = list(types=vec[2:n])
 68 |     i=i+1
 69 |   } 
 70 |   close(con)
 71 |   out
 72 | }
 73 | 
 74 | create.signatures = function(ref,dependencies,genes.use,other.gene.set,no.filter.types,working.dir) {
 75 |   
 76 |   dir.create(file.path(working.dir, 'signatures'), showWarnings = FALSE)
 77 |   
 78 |   temp=lapply(ref,function (x) {
 79 |     message(paste('Dataset -',x$name))
 80 |     expr = x$expr[genes.use,] 
 81 |     if(x$type=='seq') {
 82 |       expr = log2(expr)
 83 |       expr[expr<0] = 0
 84 |     }
 85 |     
 86 |     A = !is.na(x$samples[,2])
 87 |     expr = expr[,A]
 88 |     samples = x$samples[A,2]
 89 |     probs = c(.1,.25,.33333333,.5,.6666666,.75,.9)
 90 |     diff_vals = c(0, 0.1, 0.585, 1, 1.585, 2,3,4,5)
 91 |     
 92 |     types = unique(samples)
 93 |     
 94 |     message('get quantiles...')
 95 |     q = lapply(types,function(y) {
 96 |       A = samples==y
 97 |       if (sum(A)==1) {
 98 |         ex = cbind(expr[,samples==y],expr[,samples==y])
 99 |       } else {
100 |         ex = expr[,A]
101 |       }
102 |       
103 |       q = apply(ex,1,function(z) quantile(z,probs,na.rm=TRUE))
104 |       
105 |       # -- remove over expressed genes, but not for epithelial cells
106 |       if (!(y %in% no.filter.types))
107 |         q[,!(rownames(expr) %in% other.gene.set)]=0
108 |       q
109 |     })
110 |     
111 |     message('create all signatures...')
112 |     
113 |     for (i in 1:length(types)) {
114 |       message(types[i])
115 |       id = which(dependencies$types==types[i])
116 |       if (length(id)==1) {
117 |         A = !(types %in% dependencies$dep[[id]])
118 |       } else {
119 |         A = rep(TRUE,length(types))
120 |       }
121 |       ntypes = sum(A)
122 |       
123 |       for (diff in diff_vals) {
124 |         for (j in 1:round(length(probs)/2+0.5)) {
125 |           diff_genes = lapply(q[A],function(x) q[[i]][j,]>x[length(probs)-j,]+diff)
126 |           output <- matrix(unlist(diff_genes), nrow = ntypes, byrow = TRUE)
127 |           for (n in (ntypes-1):(ntypes-3)) {
128 |             g = colSums(output)>=n
129 |             if (sum(g)>7 & sum(g)<201)
130 |               write.table(rownames(expr)[g],file=file.path(working.dir,'signatures',sprintf("%s%%%s%%j%g%%d%g%%n%g.txt",types[i],toupper(x$name),round(probs[j]*100),diff,ntypes-n)),sep="\t",row.names=FALSE,quote =FALSE,col.names = FALSE)
131 |           }
132 |         }
133 |       }
134 |     }
135 |   })
136 |   return(NULL)
137 | }
138 | 
139 | score.ref.signatures = function(ref,genes.use,working.dir) {
140 |   dir.create(file.path(working.dir, 'scores'), showWarnings = FALSE)
141 |   egc = read.signatures.dir(file.path(working.dir,'signatures'))
142 |   lapply(ref, function(x) {
143 |     scores = gsva(as.matrix(x$expr[genes.use,]),egc,method='ssgsea',ssgsea.norm=FALSE)
144 |     write.table(scores,file=file.path(working.dir,"scores",paste(x$name,"_ssgsea.txt")),sep="\t",row.names=TRUE,quote =FALSE,col.names = NA)
145 |   })
146 | }
147 | 
148 | choose.best.signatures = function(ref,nsigs,dependencies,top.diff.percent=1,working.dir) {
149 |   dir.create(file.path(working.dir, 'best_signatures'), showWarnings = FALSE)
150 |   lapply(ref, function(x) {
151 |     
152 |     samples = x$samples[,2]
153 |     types = unique(samples)
154 |     message(x$name)
155 |     scores = read.table(file.path(working.dir,"scores",paste(x$name,"_ssgsea.txt")),sep="\t",header=TRUE,row.names=1, as.is=TRUE)
156 |     
157 |     signatures_split = unlist(strsplit(rownames(scores),'%'))
158 |     cell_types = signatures_split[seq(1,length(signatures_split),5)]
159 |     sources = signatures_split[seq(2,length(signatures_split),5)]
160 |     
161 |     message(ref$name)
162 |     for (i in types) {
163 |       id = which(dependencies$types==i)
164 |       if (length(id)==1)
165 |         opt_in = types[!(types %in% dependencies$dep[[id]])]
166 |       else
167 |         opt_in = types
168 |       
169 |       message(i)
170 |       sig.use = cell_types==i
171 |       if (length(unique(sources[sig.use]))>1) {
172 |         sig.use = cell_types==i & !(sources==toupper(x$name))
173 |       }
174 |       if (sum(sig.use)>0) {
175 |         ttest = apply(scores[sig.use,],1,function(y) {
176 |           others = samples %in% setdiff(opt_in,i)
177 |           n = round(sum(others)*top.diff.percent)
178 |           s = sort(y[others],decreasing = TRUE)
179 |           A = samples==i
180 |           if (sum(A)==1) {
181 |             B = others & as.vector(y>as.numeric(s[n]))
182 |             z = (y-min(y[B]))/(max(y[B])-min(y[B]))
183 |             as.numeric(z[A]-max(z[B]))
184 |           } else {
185 |             t.test(y[A],y[others & as.vector(y>as.numeric(s[n]))],alternative='greater')$statistic
186 |           }
187 |         })
188 |         ttest.ordered = sort(ttest,decreasing = TRUE)
189 |         for (j in 1:nsigs){
190 |           fn = paste0(i,'%',toupper(x$name),'%',j,'.txt')
191 |           file.copy(from=file.path(working.dir, 'signatures',names(ttest.ordered[j])), to=file.path(working.dir, 'best_signatures',fn))
192 |         }
193 |       }
194 |       
195 |     }
196 |   })
197 | }  
198 | 
199 | read.signatures.dir = function(path) {
200 |   sig = list()
201 |   files = list.files(path=path)
202 |   for (i in 1:length(files)) {
203 |     sig[i] = GeneSet(scan(paste0(path,'/',files[i]), what="", sep="\n"), setName=files[i])
204 |   }
205 |   signatures <- GeneSetCollection(sig)
206 |   toGmt(signatures,'~/Documents/signatures/Mouse/signatures.txt')
207 |   signatures
208 | }
209 | 
210 | create.mix = function(expr,types,types.to.use,signatures,genes.use,nrep,noise.add=0,out.name) {
211 |   types.to.use = intersect(types.to.use,types)
212 |   n.oftypes = length(types.to.use)
213 |   dist = rand(n.oftypes,nrep)
214 |   dist = as.matrix(apply(dist,2,function(x) x/sum(x)))
215 |   mix = matrix(0,nrep,dim(expr)[1])
216 |   for (i in 1:n.oftypes) {
217 |     message(types.to.use[i])
218 |     A = which(types==types.to.use[i])
219 |     if(length(A)>1) {
220 |       ex = expr[,sample(A,nrep,replace=TRUE)]
221 |     } else {
222 |       ex = expr[,rep(A,nrep)]
223 |     }
224 |     noise = runif(dim(ex)[1],-noise.add,noise.add)
225 |     ex = ex+ex*noise
226 |     mix = mix + t(ex)*dist[i,]
227 |     
228 |   }
229 |   rownames(dist) = types.to.use
230 |   mix = t(mix)
231 |   colnames(mix) = colnames(dist)
232 |   write.table(mix, file = paste0(out.name,'_n',n.oftypes,'_expr.txt'), sep = "\t", col.names=NA,quote = FALSE)
233 |   write.table(dist, file = paste0(out.name,'_n',n.oftypes,'_dist.txt'), sep = "\t", col.names=NA,quote = FALSE)
234 |   
235 |   scores = rawEnrichmentAnalysis(as.matrix(mix),signatures,genes.use,paste0(out.name,'_n',n.oftypes,'_scores.txt'))
236 |   
237 |   res = list()
238 |   res$mix = mix
239 |   res$dist = dist
240 |   res$scores = scores
241 |   res
242 | }
243 | 
244 | create.onecell.mix = function(ref,genes.use,control.type,nrep,working.dir) {
245 |   dir.create(file.path(working.dir, 'mixtures'), showWarnings = FALSE)
246 |   dir.create(file.path(working.dir, 'mixtures/expr'), showWarnings = FALSE)
247 |   
248 |   lapply(ref, function(x) {
249 |     
250 |     samples = x$samples[,2]
251 |     types = unique(samples)
252 |     CTRL =  as.matrix(apply(as.matrix(x$expr[genes.use,samples==control.type]),1,mean))
253 |     for (i in 1:length(types)) {
254 |       ct = types[i]
255 |       message(ct)
256 |       d = t(as.matrix(seq(0,1,by=1/nrep)))
257 |       mix = matrix(0,length(d),dim(x$expr)[1])
258 |       A = which(samples==ct)
259 |       if (length(A)==1)
260 |         expr = as.matrix(x$expr[genes.use,A])
261 |       else
262 |         expr = as.matrix(apply(x$expr[genes.use,A],1,mean))
263 |       mix = expr%*%d+CTRL%*%(1-d)
264 |       write.table(mix, file = file.path(working.dir,'/mixtures/expr/',paste0(x$name,'_',control.type,'_',ct,'.txt')), sep = "\t", col.names=NA,quote = FALSE)
265 |     }
266 |   })
267 | }
268 | 
269 | score.onecell.mix = function(signatures,genes.use,working.dir,rescore.all=TRUE) {
270 |   dir.create(file.path(working.dir, 'mixtures/scores'), showWarnings = FALSE)
271 |   
272 |   files = list.files(path=paste0(working.dir,'/mixtures/expr/'),pattern='txt')
273 |   for (i in files) {
274 |     message(i)
275 |     out_file = file.path(working.dir,'/mixtures/scores/',i)
276 |     if(rescore.all==FALSE) {
277 |       if (file.exists(out_file))
278 |         next;
279 |     } 
280 |     mix=read.table(file.path(working.dir,'mixtures/expr',i),sep="\t",header=TRUE,row.names=1, as.is=TRUE)
281 |     scores = rawEnrichmentAnalysis(as.matrix(mix[,1:35]),signatures,genes.use,out_file)
282 |   }
283 | }
284 | 
285 | create.ref.mix = function(ref,genes.use,control.type,signatures,percent,working.dir) {
286 |   dir.create(file.path(working.dir, 'mixtures'), showWarnings = FALSE)
287 |   
288 |   lapply(ref, function(x) {
289 |     samples = x$samples[,2]
290 |     types = unique(samples)
291 |     
292 |     CTRL =  as.matrix(apply(as.matrix(x$expr[genes.use,which(samples==control.type)]),1,mean))
293 |     
294 |     expr = apply(as.matrix(types),1,function(y) {apply(as.matrix(x$expr[genes.use,samples==y]),1,mean)})
295 |     colnames(expr) = types
296 |     rownames(expr) = genes.use
297 |     mix = expr*percent + (CTRL%*%matrix(1,1,length(types)))*(1-percent)
298 |     
299 |     fn = file.path(working.dir,'mixtures',paste0(x$name,'_',control.type,'_',round(100*percent),'p.txt'))
300 |     
301 |     scores = rawEnrichmentAnalysis(as.matrix(mix),signatures,genes.use,fn)
302 |     as.matrix(scores)
303 |   })
304 | }
305 | 
306 | learn.transform.parameters = function(data_set,scores_path,ctrl_type,nrep,percent) {
307 |   files = c()
308 |   for (i in data_sets)
309 |     files = c(files,list.files(path=scores_path,pattern=paste0(i,'_',ctrl_type,'_')))
310 |   
311 |   fit_table = list()
312 |   
313 |   d = t(as.matrix(seq(0,1,by=1/nrep)))
314 |   pid = max.col(1-abs((d-percent)))+1
315 |   
316 |   for (i in 1:length(files)) {
317 |     ct = gsub('(.txt)','',strsplit(files[i],'_')[[1]][3]);
318 |     sr = gsub('(.txt)','',strsplit(files[i],'_')[[1]][1]);
319 |     if (!(ct==ctrl_type)) {
320 |       mix = as.matrix(read.table(file.path(scores_path,files[i]),sep="\t",header=TRUE,row.names=1, as.is=TRUE))
321 |       df = data.frame(x=(mix[ct,seq(2,pid)]-mix[ct,2])/5000,y=d[2:pid])
322 |       
323 |       if (tail(df$x,1)<df$x[1]) {
324 |         fit_table$value1[i] = 1
325 |         fit_table$value2[i] = 1
326 |       } else {
327 |         z = try(nls(y ~ a * x^b, data = df, start = list(a=1, b=1),control = list(maxiter = 500)))
328 |         fit_table$name[i] = ct
329 |         if (class(z) == "try-error") {
330 |           fit_table$value1[i] = NA
331 |           fit_table$value2[i] = NA
332 |           fit_table$calib[i] = NA
333 |         } else {
334 |           fit_table$value1[i] = coef(z)[1]
335 |           fit_table$value2[i] = coef(z)[2]
336 |           fit_table$calib[i] = coef(lm(df$x^coef(z)[2]~df$y))[2]
337 |           message(sprintf('%d\t%s\t%s\t%f\t%f\t%f\t%f',i,sr,ct,mix[ct,2],coef(z)[1],coef(z)[2],fit_table$calib[i]))
338 |           
339 |         }
340 |       }
341 |       
342 |     }
343 |   }
344 |   
345 |   fv = aggregate(cbind(fit_table$value1,fit_table$value2,fit_table$calib),by=list(fit_table$name),FUN=mean)
346 |   rownames(fv) = fv[,1]
347 |   fv = fv[,-1]
348 |   fv
349 | }
350 | 
351 | transform.and.scale = function(mat,fv) {
352 |   md <- apply(mat, 1, median)
353 |   A <- rownames(mat)
354 |   mat <- (mat - md)/5000
355 |   mat[mat < 0] <- 0
356 |   mat <- (mat[A,]^fv[A,2])/fv[A,3]
357 | }
358 | 
359 | combine.ref.mix = function(fv,ref1,ref2,families,ref2_types) {
360 |   temp = t(cbind(transform.and.scale(ref1[[1]],fv),transform.and.scale(ref1[[2]],fv),transform.and.scale(ref1[[3]],fv)))
361 |   temp=aggregate(temp,by=list(rownames(temp)),FUN=mean)
362 |   rownames(temp) = temp[,1]
363 |   temp = temp[,-1]
364 |   ref1 = t(temp)
365 |   
366 |   temp = t(cbind(transform.and.scale(ref2[[1]],fv),transform.and.scale(ref2[[2]],fv),transform.and.scale(ref2[[3]],fv)))
367 |   temp=aggregate(temp,by=list(rownames(temp)),FUN=mean)
368 |   rownames(temp) = temp[,1]
369 |   temp = temp[,-1]
370 |   ref2 = t(temp)
371 |   
372 |   ref.mix = ref1
373 |   
374 |   temp = intersect(rownames(ref.mix),rownames(families)[families[,1] %in% ref2_types])
375 |   ref.mix[temp,] = ref2[temp,]
376 |   ref.mix[,temp] = ref2[,temp]
377 |   ref.mix
378 | }
379 | 
380 | combine.ref.mix.array = function(fv.array,ref1,ref2,ref2_types) {
381 |   temp = t(cbind(transform.and.scale(ref1[[1]],fv.array),transform.and.scale(ref1[[2]],fv.array)))
382 |   temp=aggregate(temp,by=list(rownames(temp)),FUN=mean)
383 |   rownames(temp) = temp[,1]
384 |   temp = temp[,-1]
385 |   ref1 = t(temp)
386 |   
387 |   temp = t(cbind(transform.and.scale(ref2[[1]],fv.array),transform.and.scale(ref2[[2]],fv.array),transform.and.scale(ref2[[3]],fv.array)))
388 |   temp=aggregate(temp,by=list(rownames(temp)),FUN=mean)
389 |   rownames(temp) = temp[,1]
390 |   temp = temp[,-1]
391 |   ref2 = t(temp)
392 |   
393 |   ref.array.mix = rbind(ref1[!(rownames(ref1) %in% ref2_types),],ref2[ref2_types,colnames(ref1)])
394 |   ref.array.mix = cbind(ref.array.mix[,!(colnames(ref1) %in% ref2_types)],ref2[rownames(ref.array.mix),ref2_types])
395 |   ref.array.mix
396 | }
397 | 
398 | create.spillover.matrix = function(ref.mix,fv,families,working.dir) {
399 |   K = ref.mix/diag(ref.mix)
400 |   fv[rownames(K),3] = 2*diag(ref.mix)*fv[rownames(K),3]
401 |   K[K>0.5] = 0.5
402 |   families = families[rownames(K),]
403 |   for (i in 1:dim(K)[1]) {
404 |     id = which(dependencies$types==rownames(K)[i])
405 |     if (length(id)==1) {
406 |       A = rownames(K) %in% dependencies$dep[[id]]
407 |       K[i,A] = 0
408 |       K[A,i] = 0
409 |     }
410 |    if (families[i,2]=="Parent") {
411 |       K[i,which(families[,2]!="Parent")]=0
412 |     } else {
413 |       K[i,rownames(K) == families[rownames(K)[i],2]]=0
414 |       K[i,intersect(which(!(families[,2] %in% families[rownames(K)[i],2])),which(families[,2]!="Parent"))]=0
415 |     }
416 |   }
417 | 
418 |   diag(K) <- 1
419 |   
420 |   pheatmap(K,cluster_rows=FALSE,cluster_cols=FALSE)
421 |   
422 |   spill = list(K = K,fv = fv)
423 | }
424 | 


--------------------------------------------------------------------------------
/Dev_scripts/xCell_dev_pipeline.R:
--------------------------------------------------------------------------------
 1 | working.dir = "~/Documents/xCell"
 2 | source(paste0(working.dir,'/xCell.R'))
 3 | source(paste0(working.dir,'/xCell_dev_functions.R'))
 4 | 
 5 | # -- read reference data sets -- #
 6 | data_sets = c('fantom','blueprint','encode','iris','novershtern','hpca')
 7 | data_types = c('seq','seq','seq','array','array','array')
 8 | 
 9 | families=read.table(file.path(working.dir,'cells_families.txt'),sep="\t",header=TRUE,row.names=1, as.is=TRUE)
10 | dependencies = read.types.dependencies(working.dir,'types_dependecies.txt')
11 | 
12 | # --- create train and test sets --- #
13 | create.train.test.ref.sets(working.dir,data_sets,nsamples=4)
14 | 
15 | # read reference data sets
16 | ref = read.data.sets(working.dir,data_sets,'train')
17 | for(i in 1:length(ref)) {
18 |   ref[[i]]$type=  data_types[i]
19 | }
20 | 
21 | # get common genes
22 | genes.use = rownames(ref[[1]]$expr)
23 | for (i in 2:length(ref)) {
24 |   genes.use = intersect(genes.use,rownames(ref[[i]]$expr))
25 | }
26 | length(genes.use)
27 | 
28 | # -- create all signatures and choose best signatures -- #
29 | not_over_expressed = unlist(read.delim(file.path(working.dir,'not_overexpressed_genes.txt'),sep='\n'))
30 | create.signatures(ref,dependencies,genes.use,not_over_expressed,rownames(families)[families[,3]=="Epithelial"],working.dir)
31 | score.ref.signatures(ref,genes.use,working.dir)
32 | choose.best.signatures(ref,3,dependencies,1,working.dir)
33 | 
34 | # save signatures files as Gmt format
35 | signatures = read.signatures.dir(file.path(working.dir,'best_signatures'))
36 | toGmt(signatures,file.path(working.dir,'xCell_signatures.txt'))
37 | signatures = getGmt(file.path(working.dir,'xCell_signatures.txt'))
38 | 
39 | # -- trasnformation RNA-seq -- #
40 | nrep = 125
41 | 
42 | # -- create synthetic mixtures for learning transformation parameters and score them -- #
43 | nrep=125
44 | create.onecell.mix(ref[data_types=="seq"],genes.use,'MPP',nrep,working.dir)
45 | create.onecell.mix(ref[data_types=="seq"],genes.use,'Endothelial cells',nrep,working.dir)
46 | create.onecell.mix(ref[data_types=="array"],genes.use,'Erythrocytes',nrep,working.dir)
47 | create.onecell.mix(ref[data_types=="array"],genes.use,'Monocytes',nrep,working.dir)
48 | score.onecell.mix(signatures,genes.use,working.dir,FALSE)
49 | 
50 | # -- learn transformation parameteres - seq -- #
51 | fv.mpp = learn.transform.parameters(data_sets[data_types=="seq"],paste0(working.dir,'/mixtures/scores'),'MPP',125,0.25)
52 | fv.endo = learn.transform.parameters(data_sets[data_types=="seq"],paste0(working.dir,'/mixtures/scores'),'Endothelial cells',125,0.25)
53 | 
54 | fv = fv.mpp[intersect(rownames(fv.mpp),rownames(families)[!(families[,1]=="HSC")]),]
55 | fv = rbind(fv,fv.endo[intersect(rownames(fv.endo),rownames(families)[families[,1]=="HSC"]),])
56 | temp = setdiff(rownames(families),rownames(fv))
57 | x = t(matrix(c(mean(fv[,1]),mean(fv[,2]),mean(fv[,3])),3,length(temp)))
58 | rownames(x) = temp
59 | fv = rbind(fv,x)
60 | 
61 | # -- learn transformation parameteres - array -- #
62 | fv.eryt = learn.transform.parameters(data_sets[5:6],paste0(working.dir,'/mixtures/scores'),'Erythrocytes',125,0.25)
63 | fv.mono = learn.transform.parameters(data_sets[4:6],paste0(working.dir,'/mixtures/scores'),'Monocytes',125,0.25)
64 | 
65 | fv.array = rbind(fv.eryt,fv.mono)
66 | temp = setdiff(rownames(families),rownames(fv.array))
67 | x = t(matrix(c(mean(fv.array[,1]),mean(fv.array[,2]),mean(fv.array[,3])),3,length(temp)))
68 | rownames(x) = temp
69 | fv.array = rbind(fv.array,x)
70 | 
71 | # -- create synthetic references (25%) -- #
72 | ref.mpp = create.ref.mix(ref[data_types=="seq"],genes.use,'MPP',signatures,0.25,working.dir)
73 | ref.endo = create.ref.mix(ref[data_types=="seq"],genes.use,'Endothelial cells',signatures,0.25,working.dir)
74 | ref.eryt = create.ref.mix(ref[5:6],genes.use,'Erythrocytes',signatures,0.25,working.dir)
75 | ref.mono = create.ref.mix(ref[data_types=="array"],genes.use,'Monocytes',signatures,0.25,working.dir)
76 | 
77 | # -- combine reference matrices -- #
78 | ref.mix = combine.ref.mix(fv,ref.mpp,ref.endo,families,c('HSC'))
79 | ref.array.mix = combine.ref.mix.array(fv.array,ref.eryt,ref.mono,c('Erythrocytes','Th1 cells','Th2 cells','CD4+ memory T-cells','MEP','Platelets'))
80 | 
81 | # add missing cell types
82 | temp = ref.array.mix[rownames(ref.mix),setdiff(colnames(ref.array.mix),colnames(ref.mix))]
83 | ref.mix = cbind(ref.mix,temp)
84 | ref.mix = ref.mix[rownames(ref.mix),row.names(ref.mix)]
85 | 
86 | temp = ref.mix[rownames(ref.array.mix),setdiff(colnames(ref.mix),colnames(ref.array.mix))]
87 | ref.array.mix = cbind(ref.array.mix,temp)
88 | ref.array.mix = ref.array.mix[rownames(ref.array.mix),row.names(ref.array.mix)]
89 | 
90 | # -- create spillover matrices and save data -- #
91 | spill = create.spillover.matrix(ref.mix,fv,families,working.dir)
92 | spill.array = create.spillover.matrix(ref.array.mix,fv.array,families,working.dir)
93 | 
94 | xCell.data = list(spill=spill,spill.array=spill.array,signatures=signatures,genes=genes.use)
95 | save(xCell.data,file=file.path(working.dir,'xCell.data.Rdata'))
96 | save(ref.mpp,ref.endo,ref.mix,fv.mpp,fv.endo,fv,ref.eryt,ref.mono,ref.array.mix,fv.eryt,fv.mono,fv.array,file=file.path(working.dir,'dev.data.Rdata'))
97 | 


--------------------------------------------------------------------------------
/Dev_scripts/xCell_plots.R:
--------------------------------------------------------------------------------
  1 | library(RColorBrewer)
  2 | library(gplots)
  3 | library(stats)
  4 | library(corrplot)
  5 | library(stringr)
  6 | library(psych)
  7 | library(pheatmap)
  8 | library(MCPcounter)
  9 | 
 10 | createMatrixNoDups = function(data) {
 11 |   M = as.matrix(data[,-1])
 12 |   samples = colnames(M)
 13 |   genes = data[,1]
 14 |   M = as.numeric(as.character(M))
 15 |   M = matrix(M,nrow=length(genes),byrow=FALSE)
 16 |   if (length(unique(genes))!=length(genes)) {
 17 |     M = aggregate(x = M, by = list(genes), FUN = mean)
 18 |     genes = M[,1]
 19 |     M = M[,-1]
 20 |   } 
 21 |   rownames(M) = genes
 22 |   colnames(M) = samples
 23 |   return (as.matrix(M))
 24 | }
 25 | 
 26 | mix.protocol = function(working.dir,mix_set,spill,alpha=1,do.ciber=TRUE,do.known=TRUE,do.mcp=FALSE,transpose=FALSE,ct_order=NULL,font.size=7,show.before=TRUE) {
 27 |   colors <- brewer.pal(11, "RdBu")
 28 |   m=(read.table(paste0(working.dir,'/simulations/mix_',mix_set,'_scores.txt'),sep="\t",header=TRUE,row.names=1, as.is=TRUE))
 29 |   dist=(read.table(paste0(working.dir,'/simulations/mix_',mix_set,'_dist.txt'),sep="\t",header=TRUE,row.names=1, as.is=TRUE))
 30 |   
 31 |   oclust='original'
 32 |   
 33 |   if(is.null(ct_order)) {
 34 |     ct_order = rownames(dist)
 35 |   }
 36 |   rxr=cor(t(m[rownames(dist),]),t(dist),method="pearson")
 37 |   pdf(paste0(working.dir,'/simulations/plots/',mix_set,'_raw.pdf'),width=4,height=4)
 38 |   rxrc = corrplot(as.matrix(rxr[ct_order,ct_order]),tl.cex = font.size/10,tl.col = "black",col=colorRampPalette(rev(colors))(200),order=oclust)
 39 |   dev.off()
 40 |   mixScatters(paste0(working.dir,'/simulations/plots/scatters/',mix_set,'_scatters_raw.pdf'),m[rownames(dist),],dist,norm=TRUE)
 41 |   
 42 |   M = transformScores(m,spill$fv)
 43 |   rxb=cor(t(M[rownames(dist),]),t(dist),method="pearson")
 44 |   pdf(paste0(working.dir,'/simulations/plots/',mix_set,'_transform.pdf'),width=4,height=4)
 45 |   rxbc = corrplot(as.matrix(rxb[ct_order,ct_order]),tl.cex = font.size/10,tl.col = "black",col=colorRampPalette(rev(colors))(200),order=oclust)
 46 |   dev.off()
 47 |   mixScatters(paste0(working.dir,'/simulations/plots/scatters/',mix_set,'_scatters_transform.pdf'),M[rownames(dist),],dist,norm=FALSE)
 48 | 
 49 |   M2 = spillOver(M,spill$K,alpha)
 50 |   rx=cor(t(M2[ct_order,]),t(dist[ct_order,]),method="pearson")
 51 |   pdf(paste0(working.dir,'/simulations/plots/',mix_set,'_spill.pdf'),width=4,height=4)
 52 |   corrplot(as.matrix(rx),tl.cex = font.size/10,tl.col = "black",col=colorRampPalette(rev(colors))(200),order=oclust)
 53 |   dev.off()
 54 |   A = intersect(rownames(M2),rownames(dist))
 55 |   mixScatters(paste0(working.dir,'/simulations/plots/scatters/',mix_set,'_scatters_spill.pdf'),M2[A,],dist[A,],norm=FALSE)
 56 |   mixScatters(paste0(working.dir,'/simulations/plots/scatters/spill/',mix_set,'_scatters_spill.pdf'),M2[A,],dist[A,],norm=FALSE)
 57 |   
 58 |   pdf(paste0(working.dir,'/simulations/plots/',mix_set,'_corrplots.pdf'),width=8,height=4)
 59 |   par(mfrow=c(1,2))
 60 |   rxbc=corrplot(as.matrix(rxb[ct_order,ct_order]),tl.cex = font.size/10,tl.col = "black",col=colorRampPalette(rev(colors))(200),order=oclust)
 61 |   rxba=corrplot(as.matrix(rx[ct_order,ct_order]),tl.cex = font.size/10,tl.col = "black",col=colorRampPalette(rev(colors))(200),order=oclust)
 62 |   dev.off()
 63 |   
 64 |   ddiag = sum(diag(rxba))/sum(diag(rxbc))
 65 |   drest = (sum(abs(rxba))-sum(diag(rxba)))/(sum(abs(rxbc))-sum(diag(rxbc)))
 66 |   ddist = cor(t(dist))
 67 |   
 68 |   badCor = rxba>0.25 | rxbc>0.25
 69 |   diag(badCor)<-FALSE
 70 |   diff_expectedA = rxba[badCor] - ddist[badCor]
 71 |   diff_expectedB = rxbc[badCor] - ddist[badCor]
 72 |   
 73 |   diff_expected = sum(abs(diff_expectedA))/sum(abs(diff_expectedB))
 74 |   
 75 |   badCor1 = rxba>0.25 
 76 |   badCor2 = rxbc>0.25
 77 |   diag(badCor1)<-FALSE
 78 |   diag(badCor2)<-FALSE
 79 |   
 80 |   diff_expectedA = rxba[badCor] - ddist[badCor]
 81 |   diff_expectedB = rxbc[badCor] - ddist[badCor]
 82 |   
 83 |   message(paste(mix_set,mean(diag(rxba)),ddiag,drest,sum(badCor),diff_expected,sum(badCor2),sum(badCor1),sum(badCor1 & badCor2),sep='\t'))
 84 |   
 85 |   aranbef = cbind(as.matrix(diag(rxb)),rownames(rxb),'xCell (before)')
 86 |   rownames(aranbef) = str_c(rep('xCell (before)',dim(aranbef)[1]),' ',rownames(aranbef))
 87 |   
 88 |   aran = cbind(as.matrix(diag(rx)),rownames(rx),'xCell')
 89 |   rownames(aran) = str_c(rep('xCell',dim(aran)[1]),' ',rownames(aran))
 90 |   if (show.before==TRUE) {
 91 |     R = rbind(aran,aranbef)
 92 |   } else {
 93 |     R = aran
 94 |   }
 95 |   ciber = list()
 96 |   if (do.ciber == TRUE) {
 97 |     rc = ciberAnalysis(paste0(working.dir,'/simulations/CIBERSORT.',mix_set,'.txt'),dist,paste0(working.dir,'/simulations/plots/',mix_set,'_cibersort.pdf'))
 98 |     ciber = cbind(as.matrix(diag(rc$r)),rownames(rc$r),'Newman')
 99 |     rownames(ciber) = str_c(rep('Newman',dim(ciber)[1]),' ',rownames(ciber))
100 |     A = intersect(rownames(rc$x),rownames(dist))
101 |     z1 = cor(as.matrix(rc$x[A,]),as.matrix(rc$y[A,]))
102 |     z2 = cor(M2[A,],dist[A,])
103 |     blue <- rgb(0, 0, 1, alpha=0.5)
104 |     red <- rgb(1, 0, 0, alpha=0.5)
105 |     pdf(paste0(working.dir,'/simulations/plots/',mix_set,'_xcell_cibersort.pdf'),width=3,height=4)
106 |     barplot(sort(diag(z2)),border=NA,xaxt='n',ylab="R (Pearson)",col=blue,space=0)
107 |     barplot(sort(diag(z1)),border=NA,xaxt='n',col=red,space=0,add=TRUE)
108 |     print(sprintf('N = %d, xCell = %.2f, CIBEROSRT = %.2f',length(A),median(diag(z2)),median(diag(z1))))
109 |     dev.off()
110 |     R = rbind(R,ciber)
111 |     
112 |   } 
113 |   if (do.known == TRUE) {
114 |     pdf(paste0(working.dir,'/simulations/plots/',mix_set,'_known.pdf'),width=4,height=8)
115 |     rk = knownAnalysis(dist,paste0(working.dir,'/simulations/mix_',mix_set,'_known.txt'))
116 |     dev.off()
117 |     R = rbind(R,rk$out)
118 |     
119 |   }
120 |   if (do.mcp == TRUE) {
121 |     rmcp = mcp.analyze('bp_n12')
122 |     mcp = cbind(rmcp,names(rmcp),'MCP')
123 |     rownames(mcp) = str_c(rep('MCP',length(rmcp)),' ',names(rmcp))
124 |     R = rbind(R,mcp)
125 |   }
126 |   
127 |   if (do.ciber==TRUE && do.known==TRUE) {
128 |     colnames(rc$r2) = paste('CIBERSORT',colnames(rc$r2))
129 |     known_ciber_mat = cbind(rk$mat,rc$r2[rownames(rk$mat),])
130 |     pdf(paste0(working.dir,'/simulations/plots/',mix_set,'_known_ciber.pdf'),width=4,height=8)
131 |     mat = corrplot(as.matrix(known_ciber_mat),tl.cex = 0.7,tl.col = "black",col=colorRampPalette(rev(colors))(200))
132 |     dev.off()
133 |   } else {
134 |     known_ciber_mat = rk$mat
135 |   }
136 |   
137 |   if (do.ciber==TRUE || do.known==TRUE) {
138 |     mat = compareCorrelations(R,c("xCell","xCell (before)","Bindea","Charoentong","Palmer","Rooney","Tirosh","Newman",'MCP'),NULL,paste0(working.dir,'/simulations/plots/',mix_set,'_compare.pdf'),8,3,drop.only1 = TRUE,transpose,ct_order=ct_order,font.size=font.size)
139 |     
140 |     
141 |     pdf(paste0(working.dir,'/simulations/plots/page/',mix_set,'_page.pdf'),width=8,height=8)
142 |     par(mfrow=c(2,2))
143 |     corrplot(as.matrix(rxb[ct_order,ct_order]),tl.cex = font.size/10,tl.col = "black",col=colorRampPalette(rev(colors))(200),order=oclust)
144 |     corrplot(as.matrix(rx[ct_order,ct_order]),tl.cex = font.size/10,tl.col = "black",col=colorRampPalette(rev(colors))(200),order=oclust)
145 |     tl.col = c(rep('black',dim(rk$mat)[2]),rep('blue',dim(rc$r2)[2]))
146 |     corrplot(as.matrix(known_ciber_mat),tl.cex = 0.7,tl.col = tl.col,col=colorRampPalette(rev(colors))(200))
147 |     corrplot(as.matrix(mat),method="pie",na.label="-",tl.cex = font.size/10,cl.cex=font.size/10,number.cex=(font.size+3)/10,tl.col = "black",col=colorRampPalette(rev(colors))(200))
148 |     dev.off()
149 |     
150 |     R 
151 |   } else {
152 |     rx
153 |   }
154 |   
155 | }
156 | 
157 | mcp.analyze = function(mix_set) {
158 |   ex=read.table(paste0(working.dir,'/simulations/mix_',mix_set,'_expr.txt'),sep="\t",header=TRUE,row.names=1, as.is=TRUE)
159 |   dist=read.table(paste0(working.dir,'/simulations/mix_',mix_set,'_dist.txt'),sep="\t",header=TRUE,row.names=1, as.is=TRUE)
160 |   mcp = MCPcounter.estimate(ex,'HUGO_symbols')
161 |   rownames(mcp) = c('T-cells','CD8+ T-cells','Cytotoxic','NK cells','B-cells','Monocytes','DC','Neutrophils','Endothelial cells','Fibroblasts')
162 |   A = intersect(rownames(mcp),rownames(dist))
163 |   r = corr.test(t(mcp[A,]),t(dist[A,]))
164 |   diag(r$r)
165 | }
166 | 
167 | knownAnalysis = function(dist,fn) {
168 |   colors <- brewer.pal(11, "RdBu")
169 |   
170 |   mk=read.table(fn,sep="\t",header=TRUE,row.names=1, as.is=TRUE)
171 |   source = unlist(lapply(rownames(mk),function(x) {unlist(strsplit(x,'[_%]'))[1]}))
172 |   known = unlist(lapply(rownames(mk),function(x) {unlist(strsplit(x,'[_%]'))[2]}))
173 |   rownames(mk) = str_c(source," ",known)
174 |   id = sort(known,index.return=TRUE)
175 |   known = known[id$ix]
176 |   source = source[id$ix]
177 |   mk = mk[id$ix,]
178 |   rows = known %in% rownames(dist)
179 |   dist = dist[sort(rownames(dist)),]
180 |   r=cor(t(mk),t(dist),method='pearson')
181 |   out = data.frame()
182 |   A = known %in% rownames(dist)[1]
183 |   out = cbind(r[A,1,drop=FALSE],known[A],source[A])
184 |   for (i in 2:length(rownames(dist))) {
185 |     A = known %in% rownames(dist)[i]
186 |     out = rbind(out,cbind(r[known %in% rownames(dist)[i],i,drop=FALSE],known[A],source[A]))
187 |   }
188 |   colnames(out) = c('r','cell_type','source')
189 |   r2 = r[rows,]
190 |   colors <- brewer.pal(11, "RdBu")
191 |   
192 |   mat = corrplot(as.matrix(t(r2)),tl.cex = 0.7,tl.col = "black",col=colorRampPalette(rev(colors))(200))
193 |   res = list()
194 |   res$mat = mat
195 |   res$out = out
196 |   res
197 | }
198 | 
199 | fixScales = function(X,Y) {
200 |   z = matrix(0,dim(X)[1])
201 |   for (i in 1:dim(X)[1]) {
202 |     x = as.matrix(X[i,])
203 |     y = as.matrix(t(Y[i,]))
204 |     fit <- lm(x ~ y)
205 |     z[i] = coef(fit)[2]
206 |   }
207 |   rownames(z) = rownames(X)
208 |   z
209 |   
210 | }
211 | 
212 | compareCorrelations = function(br,usr,uct,fn,w,h,drop.only1=TRUE,transpose=FALSE,font.size=7,ct_order=NULL) {
213 |   colors <- brewer.pal(11, "RdBu")
214 |   
215 |   abr = aggregate(as.numeric(br[,1])~rownames(br),FUN=mean)
216 |   nbr = sort(unique(str_c(br[,3],'_',br[,2])))
217 |   ct = unlist(lapply(nbr,function(x) {unlist(strsplit(x,'[_%]'))[2]}))
218 |   sr = tolower(unlist(lapply(nbr,function(x) {unlist(strsplit(x,'[_%]'))[1]})))
219 |   #usr = unique(sr)
220 |   if (is.null(uct)) {
221 |     uct = sort(unique(ct))
222 |   }
223 |   mat = matrix(nrow=length(usr),ncol=length(uct))
224 |   for (i in 1:length(usr)) {
225 |     for (j in 1:length(uct)) {
226 |       x = abr[sr==tolower(usr[i]) & ct==uct[j],2]
227 |       if(length(x)==1)
228 |         mat[i,j] = x
229 |     }
230 |   }
231 |   colnames(mat) = uct
232 |   rownames(mat) = usr
233 |   n = apply(mat,2,function(x) sum(!is.na(x)))
234 |   if(drop.only1==TRUE)
235 |     mat = mat[,n>1]
236 |   mat = mat[!apply(mat, 1, function(x){sum(!is.na(x))==0}),]
237 |   pdf(fn,width=w,height=h)
238 |   if (is.null(ct_order))
239 |     ct_order = colnames(mat)
240 |   if (transpose==TRUE)
241 |     mat = t(mat[,ct_order])
242 |   corrplot(as.matrix(mat),method="pie",na.label="-",tl.cex = font.size/10,cl.cex=font.size/10,number.cex=(font.size+3)/10,tl.col = "black",col=colorRampPalette(rev(colors))(200))
243 |   dev.off()  
244 |   mat
245 | }
246 | 
247 | ciberAnalysis = function(fn,dist,fn_pdf,remove.dups=FALSE) {
248 |   m=read.table(fn,sep="\t",header=TRUE,row.names=NULL, as.is=TRUE)
249 |   if (remove.dups==TRUE) {
250 |     m = createMatrixNoDups(m)
251 |     B = intersect(rownames(m),colnames(dist))
252 |     m = m[B,]
253 |     dist = dist[,B]
254 |   } else {
255 |     m = m[,-1]
256 |   }
257 |   m = t(m[,1:22])
258 |   rownames(m) = c("naive B-cells","Memory B-cells","Plasma cells","CD8+ T-cells","CD4+ naive T-cells","CD4+ memory T-cells rest","CD4+ memory T-cells act","Tfh","Tregs","Tgd","NK cells rest","NK cells act","Monocytes","Macrophages M0","Macrophages M1","Macrophages M2","DC rest","DC act","Mast cells rest","Mast cells act","Eosinophils","Neutrophils")
259 |   cd4 = colSums(m[c("CD4+ naive T-cells","CD4+ memory T-cells rest","CD4+ memory T-cells act","Tregs"),])
260 |   cd4mem = colSums(m[c("CD4+ memory T-cells rest","CD4+ memory T-cells act"),])
261 |   nk = colSums(m[c("NK cells rest","NK cells act"),])
262 |   dc = colSums(m[c("DC rest","DC act"),])
263 |   bcell = colSums(m[c("naive B-cells","Memory B-cells","Plasma cells"),])
264 |   mast = colSums(m[c("Mast cells rest","Mast cells act"),])
265 |   macs = colSums(m[c("Macrophages M0","Macrophages M1","Macrophages M2"),])
266 |   m = rbind(m,cd4,cd4mem,cd4mem,nk,dc,bcell,mast,macs)
267 |   rownames(m)[23:30] = c("CD4+ T-cells","CD4+ Tcm","CD4+ Tem","NK cells","DC","B-cells","Mast cells","Macrophages")
268 |   A = intersect(rownames(m),rownames(dist))
269 |   
270 |   M = m[A,]
271 |   d = dist[A,]
272 |   r=corr.test(t(M),t(d),method="pearson")
273 |   r$r[is.na(r$r)] = 0
274 |   pdf(fn_pdf,width=4,height=4)
275 |   colors <- brewer.pal(11, "RdBu")
276 |   corrplot(as.matrix(r$r),tl.cex = 0.7,tl.col = "black",col=colorRampPalette(rev(colors))(200))
277 |   dev.off()
278 |   r2=cor(M,d)
279 |   barplot(sort(diag(r2)))
280 |   #print(median(diag(r2)))
281 |   r$x = M
282 |   r$y = d
283 |   
284 |   r2 = cor(t(M),t(dist),method="pearson")
285 |   r$r2 = t(r2)
286 |   r
287 | }
288 | 
289 | 
290 | mixScatters = function(fn,m,dist,norm=FALSE,point.cex=0.25) {
291 |   range01 <- function(x){(x-min(x))/(max(x)-min(x))}
292 |   corr_eqn <- function(x,y, digits = 2) {
293 |     corr_coef <- round(cor(x, y,method="pearson"), digits = digits)
294 |     paste("R = ", corr_coef)
295 |   }
296 |   
297 |   names = rownames(dist)
298 |   n = dim(dist)[1]
299 |   nr = round(n/2+0.05)
300 |   pdf(fn,width=nr,height=2)
301 |   layout(matrix(c(1:(nr*2)),2,nr))
302 |   par( mai = c(0, 0, 0, 0))
303 |   for (i in 1:n) {
304 |     y = t(as.matrix(dist[names[i],]))
305 |     x = as.matrix(m[names[i],])
306 |     
307 |     if (norm==FALSE) {
308 |       maxxy1 = max(max(x),max(y))
309 |       maxxy2 = max(max(x),max(y))
310 |     } else {
311 |       x = x-min(x)
312 |       y = y-min(y)
313 |       maxxy1 = max(x)*1.1
314 |       maxxy2 = max(y)*1.1
315 |     }
316 |     plot(x,y,pch=16,cex=point.cex,col="darkblue",yaxt='n',xaxt='n',xlim=c(0,maxxy1),ylim=c(0,maxxy2))
317 |     #lines(lowess(x,y), col="red")
318 |     abline(lm(as.vector(y)~as.vector(x)),col='red')
319 |     r=sprintf('%.2f',cor(as.vector(x),as.vector(y),method="pearson"))
320 |     #text(0.01, 0.95, labels=names[i], adj=c(0, .5),cex=0.65)
321 |     text(0.005, maxxy2*0.95, labels=names[i], adj=c(0, .5),cex=0.85)
322 |     text(maxxy1*0.6, maxxy2*0.02, labels=bquote(rho == ~ .(r)), adj=c(0, .5),cex=0.85)
323 |     
324 |   }
325 |   dev.off()
326 | }
327 | 
328 | 
329 | library(psych)
330 | 
331 | analyze.facs = function(fcs,scores,sets,known=FALSE,spill=NULL,alpha=0.5,fn=NULL) {
332 |   a = intersect(colnames(scores),colnames(fcs))
333 |   rownames(fcs) = sets
334 |   
335 |   if(known==TRUE) {
336 |     source = unlist(lapply(rownames(scores),function(x) {unlist(strsplit(x,'[_%]'))[1]}))
337 |     ct = unlist(lapply(rownames(scores),function(x) {unlist(strsplit(x,'[_%]'))[2]}))
338 |     #rownames(scores) = str_c(source," ",ct)
339 |     y = scores[ct %in% sets,a]
340 |     ct2 = ct[ct %in% sets]
341 |     x = sapply(ct2,function(x) fcs[x,a])
342 |     x <- matrix(unlist(x), ncol = dim(x)[2], byrow = FALSE)
343 |     colnames(x) = rownames(y)
344 |   } else {
345 |     ct = intersect(sets,rownames(scores))
346 |     y = scores[ct,a]
347 |     x = t(fcs[ct,a])
348 |   }
349 |   
350 |   if (is.null(spill)) { 
351 |     s = as.matrix(y)
352 |   } else {
353 |     #spill$fv[,2]=1
354 |     z = transformScores(as.matrix(y),spill$fv)
355 |     s = spillOver(z,spill$K,alpha)
356 |     #s = y
357 |     A = intersect(colnames(x),rownames(s))
358 |     x = x[,A]
359 |     s=s[A,]
360 |   }
361 |   r = corr.test(x,t(s),method="pearson",adjust='none')
362 |   dr = diag(r$r)
363 |   names(dr) = colnames(r$r)
364 |   print(as.matrix(dr))
365 |   colors <- brewer.pal(11, "RdBu")
366 |   corrplot(r$r,tl.cex=0.5,tl.col = "black",col=colorRampPalette(rev(colors))(200))
367 |   
368 |   if (!is.null(fn)) {
369 |     pdf(paste0(fn,'.pdf'),width=3.2,height=4.7)
370 |     corrplot(r$r,tl.cex=0.5,tl.col = "black",col=colorRampPalette(rev(colors))(200))
371 |     dev.off()
372 |     thres= sort(diag(r$r),decreasing = TRUE)[7]
373 |     print(thres)
374 |     A = diag(r$r)>thres
375 |     mixScatters(paste0(fn,'_mix_scatters.pdf'),s[A,],as.matrix(t(x[,A])),norm=TRUE,point.cex=0.5)
376 |     
377 |   }
378 |   r2 = cor(t(x),s,method="spearman")
379 |   barplot(sort(diag(r2)))
380 |   print(median(diag(r2)))
381 |   r$x = t(x)
382 |   r$y = s
383 |   r
384 |   
385 | }
386 | 
387 | compare.facs = function(r,k,ciber,fn,drop.only1=TRUE) {
388 |   aran = cbind(diag(r$r),colnames(r$r),'xCell',diag(r$p))
389 |   rownames(aran) = str_c(rep('xCell',dim(aran)[1]),' ',aran[,2])
390 |   cbr = cbind(diag(ciber$r),colnames(ciber$r),'Newman',diag(ciber$p))
391 |   rownames(cbr) = str_c(rep('Newman',dim(cbr)[1]),' ',cbr[,2])
392 |   source = unlist(lapply(colnames(k$r),function(x) {unlist(strsplit(x,'[_%]'))[1]}))
393 |   ct = unlist(lapply(colnames(k$r),function(x) {unlist(strsplit(x,'[_%]'))[2]}))
394 |   known = cbind(diag(k$r),ct,source,diag(k$p))
395 |   rownames(known) = str_c(source," ",ct)
396 |   abr = rbind(aran,known,cbr)
397 |   
398 |   usr = c('xCell','Bindea','Charoentong','Palmer','Rooney','Tirosh','Newman')
399 |   ct = abr[,2]
400 |   uct = unique(abr[,2])
401 |   sr = tolower(abr[,3])
402 |   
403 |   mat.r= matrix(nrow=length(usr),ncol=length(uct))
404 |   mat.p = mat.r
405 |   for (i in 1:length(usr)) {
406 |     for (j in 1:length(uct)) {
407 |       x = abr[which(sr==tolower(usr[i]) & ct==uct[j]),]
408 |       mat.r[i,j] = as.numeric(x[1])
409 |       mat.p[i,j] = as.numeric(x[4])
410 |       
411 |     }
412 |   }
413 |   colnames(mat.r) = uct
414 |   rownames(mat.r) = usr
415 |   n = apply(mat.r,2,function(x) sum(!is.na(x)))
416 |   if(drop.only1==TRUE) {
417 |     mat.r = mat.r[,n>1]
418 |     mat.r = mat.r[,!is.na(mat.r[1,])]
419 |     mat.p = mat.p[,n>1]
420 |     mat.p = mat.p[,!is.na(mat.r[1,])]
421 |     avg.fcs=apply(fcs,1,function(x) mean(x,na.rm=TRUE))
422 |     avg.fcs2 = which(colnames(mat.r) %in% rownames(fcs)[avg.fcs>0.01])
423 |     mat.r = mat.r[,avg.fcs2]
424 |     mat.p = mat.p[,avg.fcs2]
425 |   }
426 |   pdf(fn,width=5,height=3)
427 |   corrplot(as.matrix(mat.r),p.mat=mat.p,sig.level=0.05,pch.col = "gray", pch.cex = 1,method="pie",na.label="-",tl.cex = 0.7,tl.col = "black",col=colorRampPalette(rev(colors))(200))
428 |   dev.off()  
429 |   mat.r  
430 |   
431 | }
432 | 
433 | fit.scores.plots = function(fv,families,working.dir,ctrl1,ctrl2,ctrl2_type=NULL,platform) {
434 |   files1 = list.files(path=paste0(working.dir,'/mixtures/scores/'),pattern=paste0('_',ctrl1,'_'))
435 |   files2 = list.files(path=paste0(working.dir,'/mixtures/scores/'),pattern=paste0('_',ctrl2,'_'))
436 |   if (!is.null(ctrl2_type)) {
437 |     for (i in 1:length(files1)) {
438 |       ct = gsub('(.txt)','',strsplit(files1[i],'_')[[1]][3]);
439 |       if(ctrl2_typefamilies[ct,1]==ctrl2_type) 
440 |         files1[i] = 'NA'
441 |     }
442 |     for (i in 1:length(files2)) {
443 |       ct = gsub('(.txt)','',strsplit(files2[i],'_')[[1]][3]);
444 |       if(families[ct,1]!=ctrl2_type) 
445 |         files2[i] = 'NA'
446 |     }
447 |   }
448 |   files = c(files1,files2)
449 |   files = files[files!="NA"]
450 |   n = 32
451 |   for (j in 1:3) {
452 |     p = list()
453 |     N = min(n,length(files)-n*(j-1))
454 |     for (k in 1:N) {
455 |       i = k+(j-1)*n
456 |       ct = gsub('(.txt)','',strsplit(files[i],'_')[[1]][3]);
457 |       sr = gsub('(.txt)','',strsplit(files[i],'_')[[1]][1]);
458 |       mix <- as.matrix(read.table(paste0(working.dir,'/mixtures/scores/',files[i]), header=TRUE, sep="\t", row.names=1, as.is=TRUE))
459 |       d = list()
460 |       d$x = (mix[ct,seq(2,33)]-mix[ct,2])/5000
461 |       d$y=seq(0.008,0.256,0.008)
462 |       
463 |       d$z = fv[ct,1]*d$x^fv[ct,2]
464 |       d = as.data.frame(d)
465 |       
466 |       #message(sprintf('%d\t%s\t%s\t%f\t%f\t%f',i,sr,ct,mix[ct,2],coef(z)[1],coef(z)[2]))
467 |       
468 |       if (sr == 'blueprint') {
469 |         sr = 'B'
470 |       } else if (sr == 'encode') {
471 |         sr 
472 |       }
473 |       if (ct == 'Class-switched memory B-cells') {
474 |         ct = 'CS memory B-cells'
475 |       }
476 |       p[[k]] = ggplot(d, aes(y=y)) + geom_line(aes(x=x), colour="red") + geom_line(aes(x=z), colour="blue")+theme_bw()+geom_point(aes(y=y,x=x),size=0.25,col='black')+geom_point(aes(y=y,x=z),size=0.25,col='black')+annotate("text", hjust=0, x = 0.02, y = 0.205, size=2.5,label = sprintf('y==%.2f*x^%.2f',fv[ct,1],fv[ct,2]),parse=T)+annotate("text", hjust=0,x = 0.02, y = 0.24,size=2.5,label = paste0(ct,' (',toupper(substr(sr,1,1)),')'), fontface =2)
477 |       #p[[k]] = ggplot(d, aes(y=y)) + geom_line(aes(x=x), colour="red") + geom_line(aes(x=z), colour="blue")+theme_bw()+geom_point(aes(y=y,x=x),size=0.25,col='black')+geom_point(aes(y=y,x=z),size=0.25,col='black')+annotate("text", hjust=0, x = 0.02, y = 0.205, size=2.5,label = sprintf('y==%.2f*x^%.2f', coef(z)[1],coef(z)[2]),parse=T)+annotate("text", hjust=0,x = 0.02, y = 0.24,size=2.5,label = paste0(ct,' (',toupper(sr),')'), fontface =2)
478 |       p[[k]] = p[[k]] + theme(plot.margin=unit(c(0,0,0,0), "cm"),axis.line=element_blank(),axis.text.x=element_blank(),axis.text.y=element_blank(),axis.ticks=element_blank(),axis.title.x=element_blank(),axis.title.y=element_blank(),legend.position="none",panel.background=element_blank())
479 |     }
480 |     pdf(paste0(working.dir,'/plots/supp3_',platform,'_',j,'.pdf'),width=6,height=ceil(N/4))
481 |     do.call(grid.arrange,c(p,ncol=4))
482 |     dev.off()
483 |   }
484 | }
485 | 
486 | 
487 | test.mixture = function(mix.file,spill,alpha,cibersort=TRUE) {
488 |   scores = as.matrix(read.table(paste0(mix.file,'_scores.txt'),sep="\t",header=TRUE,row.names=1, as.is=TRUE))
489 |   dist = as.matrix(read.table(paste0(mix.file,'_dist.txt'),sep="\t",header=TRUE,row.names=1, as.is=TRUE))
490 |   
491 |   A = intersect(rownames(spill$fv),rownames(scores))
492 |   scores = scores[A,]
493 |   a = scores-apply(scores,1,min)
494 |   calib = as.vector(spill$fv[rownames(scores),'calib'])
495 |   fit_power = as.vector(spill$fv[rownames(scores),'fit'])
496 |   transformed = ((a/16000)^fit_power)/(2*calib)
497 |   transformed=t(t(transformed)/apply(transformed,2,sum))
498 |   A = intersect(rownames(spill$K),rownames(transformed))
499 |   K = spill$K[A,A]*alpha
500 |   diag(K)<-1
501 |   scores <- apply(transformed[A, ], 2, function(x) lsqlincon(K,x, lb = 0))
502 |   rownames(scores) = A
503 |   
504 |   r = cor(t(scores),t(dist))
505 |   if (cibersort==TRUE) {
506 |     ciber = as.matrix(read.table(paste0(mix.file,'_CIBERSORT.txt'),sep="\t",header=TRUE,row.names=1, as.is=TRUE))
507 |     ciber = ciber[,1:(dim(ciber)[2]-3)]
508 |     A = intersect(colnames(ciber),rownames(dist))
509 |     rc  = cor(ciber,t(dist))
510 |     rc[is.na(rc)] = 0
511 |     A = intersect(rownames(r),rownames(rc))
512 |     B = intersect(colnames(r),colnames(rc))
513 |     res = list(dist=dist,xCell=scores,CIBERSORT=ciber,RX=r[A,B],RC=rc[A,B])
514 |   } else {
515 |     res = list(dist=dist,xCell=scores,RX=r)
516 |   }
517 |   
518 |   colors <- brewer.pal(11, "RdBu")
519 |   col=colorRampPalette(rev(colors))(200)
520 |   
521 |   pdf(paste0(mix.file,'.pdf'))
522 |   A = intersect(colnames(res$RX),rownames(res$RX))
523 |   corrplot(res$RX[A,A],title='xCell',col=col)
524 |   if (cibersort==TRUE) {
525 |     A = intersect(colnames(res$RC),rownames(res$RC))
526 |     corrplot(res$RC[A,A],title='CIBERSORT',col=col)
527 |     b = cbind(diag(res$RX[A,A]),diag(res$RC[A,A]))
528 |     colnames(b) = c('xCell','CIBERSORT')
529 |     corrplot(b,method='number',col=col)
530 |   }
531 |   
532 |   corrplot(res$RX,title='xCell - All',col=col)
533 |   
534 |   if (cibersort==TRUE) {
535 |     corrplot(res$RC,title='CIBERSORT - All',col=col)
536 |   }
537 |   mixScatters(res$xCell,dist)
538 |   mtext("xCell", outer=TRUE,  cex=1, line=-0.5)
539 |   if (cibersort==TRUE) {
540 |     mixScatters(t(res$CIBERSORT),dist)
541 |     mtext("CIBERSORT", outer=TRUE,  cex=1, line=-0.5)
542 |   }
543 |   dev.off()
544 |   res
545 | }
546 | 
547 | choose.types.to.use = function(samples,n,dependencies) {
548 |   A = sample(length(samples),length(samples))
549 |   types.to.use = c()
550 |   i=1
551 |   while (i <= length(A)) {
552 |     id = which(dependencies$types==samples[A[i]])
553 |     if (length(id)==1) {
554 |       A = A[!(samples[A] %in% dependencies$dep[[id]])]
555 |     }
556 |     i=i+1
557 |   }
558 |   samples[A[1:min(n,length(A))]]
559 | }


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | importFrom("stats", "aggregate")
 2 | importFrom("utils", "write.table")
 3 | importFrom("GSVA", "gsva")
 4 | importFrom("GSEABase", "geneIds")
 5 | importFrom("pracma", "lsqlincon")
 6 | importFrom("utils", "data")
 7 | importFrom("MASS", "fitdistr")
 8 | importFrom("stats", "rbeta")
 9 | exportPattern("^[[:alpha:]]+")
10 | 


--------------------------------------------------------------------------------
/R/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dviraran/xCell/20e2919eefd37e15af35f29f4944e30697098a28/R/.DS_Store


--------------------------------------------------------------------------------
/R/xCell.R:
--------------------------------------------------------------------------------
  1 | #' xCell: A package for calculating cell type enrichments.
  2 | #'
  3 | #' @docType package
  4 | #' @name xCell
  5 | NULL
  6 | 
  7 | #' xCell datasets
  8 | #'
  9 | #' @format list:
 10 | #' \describe{
 11 | #'   \item{spill}{spillover matrix and calibration parameters}
 12 | #'   \item{signatures}{the signatures for calculating scores}
 13 | #'   \item{genes}{genes to use to calculate xCell}
 14 | #' }
 15 | "xCell.data"
 16 | 
 17 | #' The xCell analysis pipeline
 18 | #'
 19 | #' \code{xCellAnalysis} Returns the xCell cell types enrichment scores.
 20 | #'
 21 | #' @param expr the gene expression data set. A matrix with row names as symbols and columns as samples.
 22 | #' @param signatures a GMT object of signatures.
 23 | #' @param genes list of genes to use in the analysis.
 24 | #' @param spill the Spillover object for adjusting the scores.
 25 | #' @param rnaseq if true than use RNAseq spillover and calibration paramters, else use array parameters.
 26 | #' @param file.name string for the file name for saving the scores. Default is NULL.
 27 | #' @param scale if TRUE, uses scaling to trnasform scores using fit.vals
 28 | #' @param alpha a value to override the spillover alpha parameter. Deafult = 0.5
 29 | #' @param save.raw TRUE to save a raw
 30 | #' @param parallel.sz integer for the number of threads to use. Default is 4.
 31 | #' @param parallel.type Type of cluster architecture when using snow. 'SOCK' or 'FORK'. Fork is faster, but is not supported in windows.
 32 | #' @param cell.types.use a character list of the cell types to use in the analysis. If NULL runs xCell with all cell types.
 33 | #' The spillover compensation step may over compensate, thus it is always better to run xCell with a list of cell types that are expected
 34 | #' to be in the mixture. The names of cell types in this list must be a subset of the cell types that are inferred by xCell.
 35 | #'
 36 | #' @return the adjusted xCell scores
 37 | xCellAnalysis <- function(expr, signatures=NULL, genes=NULL, spill=NULL, rnaseq=TRUE, file.name = NULL, scale=TRUE,
 38 |                           alpha = 0.5, save.raw = FALSE, parallel.sz = 4, parallel.type = 'SOCK',
 39 |                           cell.types.use = NULL) {
 40 |   if (is.null(signatures))
 41 |     signatures = xCell.data$signatures
 42 |   if (is.null(genes))
 43 |     genes = xCell.data$genes
 44 |   if (is.null(spill)) {
 45 |     if (rnaseq==TRUE) {
 46 |       spill = xCell.data$spill
 47 |     } else {
 48 |       spill = xCell.data$spill.array
 49 |     }
 50 |   }
 51 | 
 52 |   # Caulcate average ssGSEA scores for cell types
 53 |   if (is.null(file.name) || save.raw==FALSE) {
 54 |     fn <- NULL
 55 |   } else {
 56 |     fn <- paste0(file.name,'_RAW.txt')
 57 |   }
 58 | 
 59 |   if (!is.null(cell.types.use)) {
 60 |     A = intersect(cell.types.use,rownames(spill$K))
 61 |     if (length(A)<length(cell.types.use)) {
 62 |       return ('ERROR - not all cell types listed are available')
 63 |     }
 64 |   }
 65 | 
 66 |   scores <- rawEnrichmentAnalysis(expr,signatures,genes,fn, parallel.sz = parallel.sz, parallel.type = 'SOCK')
 67 | 
 68 |   # Transform scores from raw to percentages
 69 |   scores.transformed <- transformScores(scores, spill$fv, scale)
 70 | 
 71 |   # Adjust scores using the spill over compensation matrix
 72 |   if (is.null(file.name)) {
 73 |     fn <- NULL
 74 |   } else {
 75 |     fn <- file.name
 76 |   }
 77 | 
 78 |   if (is.null(cell.types.use)) {
 79 |     scores.adjusted <- spillOver(scores.transformed, spill$K, alpha,fn )
 80 |     scores.adjusted = microenvironmentScores(scores.adjusted)
 81 |   } else {
 82 |     scores.adjusted <- spillOver(scores.transformed[cell.types.use,], spill$K, alpha,fn )
 83 |   }
 84 |   return(scores.adjusted)
 85 | }
 86 | 
 87 | #' Calculated raw xCell enrichment scores
 88 | #'
 89 | #' \code{rawEnrichmentAnalysis} Returns the raw xCell cell types enrichment scores.
 90 | #'
 91 | #' @param expr the gene expression data set. A matrix with row names as symbols and columns as samples.
 92 | #' @param signatures a GMT object of signatures.
 93 | #' @param genes list of genes to use in the analysis.
 94 | #' @param file.name string for the file name for saving the scores. Default is NULL.
 95 | #' @param parallel.sz integer for the number of threads to use. Default is 4.
 96 | #' @param parallel.type Type of cluster architecture when using snow. 'SOCK' or 'FORK'.
 97 | #'   Fork is faster, but is not supported in windows.
 98 | #'   This argument is ignored in GSVA versions >= 1.36.0. Register a `BiocParallel` backend instead. 
 99 | 
100 | #' @return the raw xCell scores
101 | rawEnrichmentAnalysis <- function(expr, signatures, genes, file.name = NULL, parallel.sz = 4, parallel.type = 'SOCK') {
102 | 
103 |   # Reduce the expression dataset to contain only the required genes
104 |   shared.genes <- intersect(rownames(expr), genes)
105 |   print(paste("Num. of genes:", length(shared.genes)))
106 |   expr <- expr[shared.genes, ]
107 |   if (dim(expr)[1] < 5000) {
108 |     print(paste("ERROR: not enough genes"))
109 |     return - 1
110 |   }
111 | 
112 |   # Transform the expression to rank
113 |   expr <- apply(expr, 2, rank)
114 | 
115 |   # Run ssGSEA analysis for the ranked gene expression dataset
116 |   if(packageVersion("GSVA") >= "1.50.0") {
117 |     # Legacy GSVA function was depreciated in version 1.50 and entirely removed in version 1.52.
118 |     gsvapar <- GSVA::ssgseaParam(exprData = expr, geneSets = signatures, normalize = FALSE)
119 |     scores <- GSVA::gsva(gsvapar)
120 |   } else if(packageVersion("GSVA") >= "1.36.0") {
121 |     # GSVA >= 1.36.0 does not support `parallel.type` any more. 
122 |     # Instead it automatically uses the backend registered by BiocParallel. 
123 |     scores <- GSVA::gsva(expr, signatures, method = "ssgsea",
124 |                          ssgsea.norm = FALSE,parallel.sz = parallel.sz)
125 |   } else {
126 |     scores <- GSVA::gsva(expr, signatures, method = "ssgsea",
127 |                          ssgsea.norm = FALSE,parallel.sz = parallel.sz,parallel.type = parallel.type)
128 |   }
129 | 
130 | 
131 |   scores = scores - apply(scores,1,min)
132 | 
133 |   # Combine signatures for same cell types
134 |   cell_types <- unlist(strsplit(rownames(scores), "%"))
135 |   cell_types <- cell_types[seq(1, length(cell_types), 3)]
136 |   agg <- aggregate(scores ~ cell_types, FUN = mean)
137 |   rownames(agg) <- agg[, 1]
138 |   scores <- agg[, -1]
139 | 
140 |   # Save raw scores
141 |   if (!is.null(file.name)) {
142 |     write.table(scores, file = file.name, sep = "\t",
143 |                 col.names = NA, quote = FALSE)
144 |   }
145 |   scores
146 | }
147 | 
148 | #' Transform scores from raw scores to fractions
149 | #'
150 | #' \code{transformScores} Returns the trasnformed xCell scores (not adjusted).
151 | #'
152 | #' @param scores raw scores of cell types calculated by rawEnrichmentAnalysis
153 | #' @param fit.vals the calibration values in the spill object (spill$fv).
154 | #' @param scale if TRUE, uses scaling to trnasform scores using fit.vals
155 | #' @param fn string for the file name for saving the scores. Default is NULL.
156 | #'
157 | #' @return the trasnformed xCell scores
158 | transformScores <- function(scores, fit.vals, scale=TRUE,
159 |                             fn = NULL) {
160 |   rows <- rownames(scores)[rownames(scores) %in% rownames(fit.vals)]
161 |   tscores <- scores[rows, ]
162 |   minX <- apply(tscores, 1, min)
163 |   A <- rownames(tscores)
164 |   tscores <- (as.matrix(tscores) - minX)/5000
165 |   tscores[tscores < 0] <- 0
166 |   if (scale==FALSE) {
167 |     fit.vals[A,3] = 1
168 |   }
169 | 
170 |   tscores <- (tscores^fit.vals[A,2])/(fit.vals[A,3]*2)
171 | 
172 |   if (!is.null(fn)) {
173 |     write.table(format(tscores, digits = 4), file = fn, sep = "\t",
174 |                 col.names = NA, quote = FALSE)
175 |   }
176 |   return(tscores)
177 | }
178 | 
179 | #' Adjust scores using the spillover compensation method
180 | #'
181 | #' \code{spillOver} Returns the adjusted xCell scores.
182 | #'
183 | #' @param transformedScores the trasnformed scores of cell types calculated by transformScores
184 | #' @param K the Spillover matrix (spill$K).
185 | #' @param alpha a value to override the spillover alpha parameter. Deafult = 0.5
186 | #' @param file.name string for the file name for saving the scores. Default is NULL.
187 | #'
188 | #' @return the adjusted xCell scores
189 | spillOver <- function(transformedScores, K, alpha = 0.5, file.name = NULL) {
190 |   K <- K * alpha
191 |   diag(K) <- 1
192 |   rows <- rownames(transformedScores)[rownames(transformedScores) %in%
193 |                                         rownames(K)]
194 |   scores <- apply(transformedScores[rows, ], 2, function(x) pracma::lsqlincon(K[rows,rows],
195 |                                                                               x, lb = 0))
196 | 
197 |   scores[scores<0]=0
198 |   rownames(scores) <- rows
199 |   if (!is.null(file.name)) {
200 |     scores = round(scores*10000)
201 |     scores = scores/10000
202 |     write.table(scores, file = file.name, sep = "\t",
203 |                 col.names = NA, quote = FALSE)
204 |   }
205 |   return(scores)
206 | }
207 | 
208 | #' Calculate microenvironment scores
209 | #'
210 | #' \code{microenvironmentScores} Returns the adjusted xCell scores.
211 | #'
212 | #' @param adjustedScores the combined microenvironment scores
213 | #'
214 | #' @return the microenvironment scores
215 | microenvironmentScores <- function(adjustedScores) {
216 |   ImmuneScore = apply(adjustedScores[c('B-cells','CD4+ T-cells','CD8+ T-cells','DC','Eosinophils','Macrophages','Monocytes','Mast cells','Neutrophils','NK cells'),],2,sum)/1.5
217 |   StromaScore = apply(adjustedScores[c('Adipocytes','Endothelial cells','Fibroblasts'),],2,sum)/2
218 |   MicroenvironmentScore = ImmuneScore+StromaScore
219 |   adjustedScores = rbind(adjustedScores,ImmuneScore,StromaScore,MicroenvironmentScore)
220 | }
221 | 
222 | #' Calculate significance p-values for the null hypothesis that the cell type is not present in the mixture using a random matrix.
223 | #'
224 | #' \code{xCellSignifcanceBetaDist} Returns the FDR adjusted p-values of the chance that the cell is not present in the mixture.
225 | #'
226 | #' @param scores the xCell scores.
227 | #' @param beta_params the pre-calculated beta distribution parameters from random mixtures.
228 | #' @param rnaseq if beta_params is null, than uses xCell.data beta_params. If TRUE uses sequencing-based params, else array-based params.
229 | #' @param file.name file name to write the p-values table.
230 | #'
231 | #' @return a p-values matrix for each score.
232 | xCellSignifcanceBetaDist = function(scores,beta_params=NULL,rnaseq=T,file.name = NULL) {
233 |   if (is.null(beta_params)) {
234 |     if (rnaseq==T) {
235 |       beta_params = xCell.data$spill$beta_params
236 |     } else {
237 |       beta_params = xCell.data$spill.array$beta_params
238 |     }
239 |   }
240 |   scores = scores[rownames(scores) %in% colnames(xCell.data$spill$beta_params[[1]]),]
241 |   pvals = matrix(0,nrow(scores),ncol(scores))
242 |   rownames(pvals) = rownames(scores)
243 |   eps = 1e-3
244 | 
245 |   for (i in 1:nrow(scores)) {
246 |     ct = rownames(scores)[i]
247 |     beta_dist = c()
248 |     for (bp in beta_params) {
249 |       if (sum(bp[,i]==0)) {
250 |         bd = matrix(eps,1,100000)
251 |       } else {
252 |         bd = stats::rbeta(100000,bp[1,ct],bp[2,ct])
253 |         bd = ((1+eps)*(bp[3,ct]))*bd
254 |       }
255 |       beta_dist = c(beta_dist,bd)
256 |     }
257 |     pvals[i,] = 1-mapply(scores[i,],FUN=function(x) mean(x>beta_dist))
258 |   }
259 | 
260 |   if (!is.null(file.name)) {
261 |     write.table(pvals, file=file.name, quote=FALSE, row.names=TRUE, sep="\t",col.names = NA)
262 |   }
263 | 
264 |   pvals
265 | }
266 | #' Calculate significance p-values for the null hypothesis that the cell type is not present in the mixture using a random matrix.
267 | #'
268 | #' \code{xCellSignifcanceRandomMatrix} Returns the FDR adjusted p-values of the chance that the cell is not present in the mixture.
269 | #'
270 | #' @param scores the xCell scores.
271 | #' @param expr the input expression matrix.
272 | #' @param spill the Spillover object for adjusting the scores.
273 | #' @param alpha a value to override the spillover alpha parameter. Deafult = 0.5
274 | #' @param nperm number of samples in the shuffled matrix, default = 250
275 | #' @param file.name file name to write the p-values table.
276 | #'
277 | #' @return a list with the p-values, the xcell scores of the shuffled data and the shuffled expression matrix.
278 | xCellSignifcanceRandomMatrix = function(scores,expr,spill,alpha=0.5,nperm=250,file.name = NULL) {
279 | 
280 |   shuff_expr = mapply(seq(1:nperm),FUN=function(x) sample(nrow(expr),nrow(expr)))
281 | 
282 |   rownames(shuff_expr) = sample(rownames(expr))
283 |   shuff_xcell = xCellAnalysis(shuff_expr,spill=spill,alpha=alpha)
284 | 
285 |   shuff_xcell = shuff_xcell[rownames(scores),]
286 | 
287 |   pvals = matrix(0,nrow(scores),ncol(scores))
288 |   beta_dist = matrix(0,nrow(scores),100000)
289 |   eps = 1e-3
290 |   for (i in 1:nrow(scores)) {
291 |     x = shuff_xcell[i,]
292 |     if (stats::sd(x)<eps) {
293 |       beta_dist[i,] = rep(eps,100000)
294 |     } else {
295 |       x = x+eps
296 |       beta_params=MASS::fitdistr(x/((1+2*eps)*(max(x)))+eps,"beta",list(shape1=1,shape2=1),lower=eps)
297 |       beta_dist[i,] = stats::rbeta(100000,beta_params$estimate[1],beta_params$estimate[2])
298 |       beta_dist[i,] = ((1+2*eps)*(max(x)))*beta_dist[i,]
299 |     }
300 |     #sm.density.compare(c(shuff_xcell[i,],beta_dist),factor(c(rep(1,100),rep(2,100000))),xlim=c(0,max(beta_dist)));title(rownames(scores)[i])
301 |     pvals[i,] = 1-unlist(lapply(scores[i,],FUN=function(x) mean(x>beta_dist[i,])))
302 |   }
303 |   rownames(pvals) = rownames(scores)
304 |   colnames(pvals) = colnames(scores)
305 |   rownames(shuff_xcell) = rownames(scores)
306 |   rownames(beta_dist) = rownames(scores)
307 | 
308 |   if (!is.null(file.name)) {
309 |     write.table(pvals, file=file.name, quote=FALSE, row.names=TRUE, sep="\t",col.names = NA)
310 |   }
311 | 
312 |   list(pvals=pvals,shuff_xcell=shuff_xcell,shuff_expr=shuff_expr,beta_dist=beta_dist)
313 | }
314 | 
315 | .onLoad <- function(libname, pkgname) {
316 |   op <- options()
317 |   op.devtools <- list(
318 |     devtools.path = "~/R-dev",
319 |     devtools.install.args = "",
320 |     devtools.name = "Dvir Aran",
321 |     devtools.desc.author = '"Dvir Aran <dvir.aran@ucsf.edu> [aut, cre]"',
322 |     devtools.desc.license = "GPL-3",
323 |     devtools.desc.suggests = NULL,
324 |     devtools.desc = list()
325 |   )
326 |   toset <- !(names(op.devtools) %in% names(op))
327 |   if(any(toset)) options(op.devtools[toset])
328 | 
329 |   requireNamespace(c('GSVA','GSEABase','pracma','stats','MASS'))
330 | 
331 |   invisible()
332 | }
333 | 


--------------------------------------------------------------------------------
/README.Md:
--------------------------------------------------------------------------------
 1 | **Important note:** xCell was trained for use with bulk gene expression data. Due to many reasons we believe it is not a great solution for using it directly to infer cell types in scRNA-seq data. For such data we recommed using our method SingleR which was developed exactly for this purpose - https://github.com/dviraran/SingleR 
 2 | 
 3 | **User guide:** For more details on using xCell, please refer to Aran D. (2020) Cell-Type Enrichment Analysis of Bulk Transcriptomes Using xCell. In: Boegel S. (eds) Bioinformatics for Cancer Immunotherapy. Methods in Molecular Biology, vol 2120. https://link.springer.com/protocol/10.1007/978-1-0716-0327-7_19 
 4 | 
 5 | # xCell - cell types enrichment analysis
 6 | 
 7 | xCell is a webtool that performs cell type enrichment analysis from gene expression data for 64 immune and stroma cell types. 
 8 | 
 9 | xCell is a gene signatures-based method learned from thousands of pure cell types from various sources. xCell applies a novel technique for reducing associations between closely related cell types. xCell signatures were validated using extensive in-silico simulations and also cytometry immunophenotyping, and were shown to outperform previous methods. xCell allows researchers to reliably portray the cellular heterogeneity landscape of tissue expression profiles. 
10 | 
11 | For more informations please refer to the xCell manuscript.
12 | 
13 | # Install
14 | 
15 | ```R
16 | devtools::install_github('dviraran/xCell')
17 | ```
18 | 
19 | Note: if the installation fails, try setting `options(unzip = "internal")` before calling the install function (thanks @hermidalc).
20 | 
21 | # Usage
22 | 
23 | ```R
24 | library(xCell)
25 | exprMatrix = read.table(expr_file,header=TRUE,row.names=1, as.is=TRUE)
26 | xCellAnalysis(exprMatrix)
27 | ```
28 | 
29 | This function performs all three steps in xCell, which can be performed seperately as well:
30 | 
31 | 1. rawEnrichmentAnalysis
32 | 2. transformScores
33 | 3. spillOver
34 | 
35 | xCell loads the xCell.data object which is a list with the spill over and calibration parameters, the signatures and the list of genes it uses. However, one can use different signatures and different spill over functions to perform the analysis.
36 | 
37 | In addition xCell provides significance assessment of the null hypothesis that the cell type is not in the mixture:
38 | 
39 | 1. xCellSignifcanceBetaDist - uses predefined beta distribution parameters from random mixtures generated using the reference data sets. Recommended. 
40 | 2. xCellSignifcanceRandomMatrix - uses random matrix and caculates beta distribution parameters. 
41 | 
42 | See documentation for more details on each function.
43 | 
44 | # Data input
45 | 
46 | The expression matrix should be a matrix with genes in rows and samples in columns. The rownames should be gene symbols. xCell uses the expression levels ranking and not the actual values, thus normalization does not have an effect, however normalizing to gene length is required. 
47 | 
48 | Importantly, xCell performs best with heterogenous dataset. Thus it is recommended to use all data combined in one run, and not break down to pieces (especially not cases and control in different runs).
49 | 
50 | # Notes for correct usage
51 | 
52 | 1. xCell produces enrichment scores, not percentages. It is not a deconvolution method, but an enrichment method. That means that the main usage is for comparing across samples, not across cell types. xCell does an attempt to make the scores resemble percentages, but it is a hard problem, and is very platform and experiment specific. We have made some tests to compare the ability of xCell for cross-cell types analysis, and found that it generally performed better in that than other methods (on limited and comparable cell types), but this type of analysis should be performed carefully.
53 | 
54 | Regarding this issue, scaling the scores by samples is extremely dangerous and will inevitably will result in false interpretations.
55 | 
56 | 2. For the linear transformation xCell uses a calibration parameter to make it resemble percentages. If your analysis produces high scores for a cell type that is clearly false, you may tweak the calibration parameters, which can be found in the xCell.data object (xCell.data$spill$fv$V3). In the paper, we used an automatic method to learn these parameters, but if you have prior knowledge about your mixture, this is very handy to get better results. 
57 | 
58 | 3. xCell uses the variability among the samples for the linear transformation. xCell will only function with heterogenous mixtures. If there is no variability between the samples, xCell will not identify any signal. As noted above, it is highly recommended to use all data combined in one run. Failing to do so will again inevitably make xCell's results false. 
59 | 
60 | 4. xCell is a method for detecting cell types enrichments from mixed samples, not to detect the cell of origin. xCell is still able to recognize the cell of origin many times, but it was not trained for this problem, and there is no expectation that it will perform better than other methods for that problem. In accordance, it is strongly not advised to be performed on single-cell data. 
61 | 
62 | # Vignettes
63 | 
64 | **11.6.2018 update:** Due to popular demand, we created a simple vignette to reproduce the analysis that was performed in the xCell paper of [correlating the xCell scores with the CyTOF immunoprofiling data available from ImmPort](http://comphealth.ucsf.edu/sample-apps/xCellView/xCell-Immport.html). The data files are available in the 'vignette' directory.
65 | 
66 | # Contributors
67 | 
68 | xCell is developed by the Butte lab. Please contact Dvir Aran: dvir.aran at technion.ac.il for any questions or suggestions.
69 | 
70 | To cite xCell: Aran, Hu and Butte, xCell: digitally portraying the tissue cellular heterogeneity landscape. Genome Biology (2017) 18:220
71 | 


--------------------------------------------------------------------------------
/data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dviraran/xCell/20e2919eefd37e15af35f29f4944e30697098a28/data/.DS_Store


--------------------------------------------------------------------------------
/data/xCell.data.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dviraran/xCell/20e2919eefd37e15af35f29f4944e30697098a28/data/xCell.data.rda


--------------------------------------------------------------------------------
/man/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dviraran/xCell/20e2919eefd37e15af35f29f4944e30697098a28/man/.DS_Store


--------------------------------------------------------------------------------
/man/microenvironmentScores.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xCell.R
 3 | \name{microenvironmentScores}
 4 | \alias{microenvironmentScores}
 5 | \title{Calculate microenvironment scores}
 6 | \usage{
 7 | microenvironmentScores(adjustedScores)
 8 | }
 9 | \arguments{
10 | \item{adjustedScores}{the combined microenvironment scores}
11 | }
12 | \value{
13 | the microenvironment scores
14 | }
15 | \description{
16 | \code{microenvironmentScores} Returns the adjusted xCell scores.
17 | }
18 | 


--------------------------------------------------------------------------------
/man/rawEnrichmentAnalysis.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xCell.R
 3 | \name{rawEnrichmentAnalysis}
 4 | \alias{rawEnrichmentAnalysis}
 5 | \title{Calculated raw xCell enrichment scores}
 6 | \usage{
 7 | rawEnrichmentAnalysis(expr, signatures, genes, file.name = NULL,
 8 |   parallel.sz = 4, parallel.type = "SOCK")
 9 | }
10 | \arguments{
11 | \item{expr}{the gene expression data set. A matrix with row names as symbols and columns as samples.}
12 | 
13 | \item{signatures}{a GMT object of signatures.}
14 | 
15 | \item{genes}{list of genes to use in the analysis.}
16 | 
17 | \item{file.name}{string for the file name for saving the scores. Default is NULL.}
18 | 
19 | \item{parallel.sz}{integer for the number of threads to use. Default is 4.}
20 | 
21 | \item{parallel.type}{Type of cluster architecture when using snow. 'SOCK' or 'FORK'. Fork is faster, but is not supported in windows.}
22 | }
23 | \value{
24 | the raw xCell scores
25 | }
26 | \description{
27 | \code{rawEnrichmentAnalysis} Returns the raw xCell cell types enrichment scores.
28 | }
29 | 


--------------------------------------------------------------------------------
/man/spillOver.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xCell.R
 3 | \name{spillOver}
 4 | \alias{spillOver}
 5 | \title{Adjust scores using the spillover compensation method}
 6 | \usage{
 7 | spillOver(transformedScores, K, alpha = 0.5, file.name = NULL)
 8 | }
 9 | \arguments{
10 | \item{transformedScores}{the trasnformed scores of cell types calculated by transformScores}
11 | 
12 | \item{K}{the Spillover matrix (spill$K).}
13 | 
14 | \item{alpha}{a value to override the spillover alpha parameter. Deafult = 0.5}
15 | 
16 | \item{file.name}{string for the file name for saving the scores. Default is NULL.}
17 | }
18 | \value{
19 | the adjusted xCell scores
20 | }
21 | \description{
22 | \code{spillOver} Returns the adjusted xCell scores.
23 | }
24 | 


--------------------------------------------------------------------------------
/man/transformScores.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xCell.R
 3 | \name{transformScores}
 4 | \alias{transformScores}
 5 | \title{Transform scores from raw scores to fractions}
 6 | \usage{
 7 | transformScores(scores, fit.vals, scale = TRUE, fn = NULL)
 8 | }
 9 | \arguments{
10 | \item{scores}{raw scores of cell types calculated by rawEnrichmentAnalysis}
11 | 
12 | \item{fit.vals}{the calibration values in the spill object (spill$fv).}
13 | 
14 | \item{scale}{if TRUE, uses scaling to trnasform scores using fit.vals}
15 | 
16 | \item{fn}{string for the file name for saving the scores. Default is NULL.}
17 | }
18 | \value{
19 | the trasnformed xCell scores
20 | }
21 | \description{
22 | \code{transformScores} Returns the trasnformed xCell scores (not adjusted).
23 | }
24 | 


--------------------------------------------------------------------------------
/man/xCell.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xCell.R
 3 | \docType{package}
 4 | \name{xCell}
 5 | \alias{xCell}
 6 | \alias{xCell-package}
 7 | \title{xCell: A package for calculating cell type enrichments.}
 8 | \description{
 9 | xCell: A package for calculating cell type enrichments.
10 | }
11 | 


--------------------------------------------------------------------------------
/man/xCell.data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xCell.R
 3 | \docType{data}
 4 | \name{xCell.data}
 5 | \alias{xCell.data}
 6 | \title{xCell datasets}
 7 | \format{list:
 8 | \describe{
 9 |   \item{spill}{spillover matrix and calibration parameters}
10 |   \item{signatures}{the signatures for calculating scores}
11 |   \item{genes}{genes to use to calculate xCell}
12 | }}
13 | \usage{
14 | xCell.data
15 | }
16 | \description{
17 | xCell datasets
18 | }
19 | \keyword{datasets}
20 | 


--------------------------------------------------------------------------------
/man/xCellAnalysis.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xCell.R
 3 | \name{xCellAnalysis}
 4 | \alias{xCellAnalysis}
 5 | \title{The xCell analysis pipeline}
 6 | \usage{
 7 | xCellAnalysis(expr, signatures = NULL, genes = NULL, spill = NULL,
 8 |   rnaseq = TRUE, file.name = NULL, scale = TRUE, alpha = 0.5,
 9 |   save.raw = FALSE, parallel.sz = 4, parallel.type = "SOCK",
10 |   cell.types.use = NULL)
11 | }
12 | \arguments{
13 | \item{expr}{the gene expression data set. A matrix with row names as symbols and columns as samples.}
14 | 
15 | \item{signatures}{a GMT object of signatures.}
16 | 
17 | \item{genes}{list of genes to use in the analysis.}
18 | 
19 | \item{spill}{the Spillover object for adjusting the scores.}
20 | 
21 | \item{rnaseq}{if true than use RNAseq spillover and calibration paramters, else use array parameters.}
22 | 
23 | \item{file.name}{string for the file name for saving the scores. Default is NULL.}
24 | 
25 | \item{scale}{if TRUE, uses scaling to trnasform scores using fit.vals}
26 | 
27 | \item{alpha}{a value to override the spillover alpha parameter. Deafult = 0.5}
28 | 
29 | \item{save.raw}{TRUE to save a raw}
30 | 
31 | \item{parallel.sz}{integer for the number of threads to use. Default is 4.}
32 | 
33 | \item{parallel.type}{Type of cluster architecture when using snow. 'SOCK' or 'FORK'. Fork is faster, but is not supported in windows.}
34 | 
35 | \item{cell.types.use}{a character list of the cell types to use in the analysis. If NULL runs xCell with all cell types.
36 | The spillover compensation step may over compensate, thus it is always better to run xCell with a list of cell types that are expected
37 | to be in the mixture. The names of cell types in this list must be a subset of the cell types that are inferred by xCell.}
38 | }
39 | \value{
40 | the adjusted xCell scores
41 | }
42 | \description{
43 | \code{xCellAnalysis} Returns the xCell cell types enrichment scores.
44 | }
45 | 


--------------------------------------------------------------------------------
/man/xCellSignifcanceBetaDist.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xCell.R
 3 | \name{xCellSignifcanceBetaDist}
 4 | \alias{xCellSignifcanceBetaDist}
 5 | \title{Calculate significance p-values for the null hypothesis that the cell type is not present in the mixture using a random matrix.}
 6 | \usage{
 7 | xCellSignifcanceBetaDist(scores, beta_params = NULL, rnaseq = T,
 8 |   file.name = NULL)
 9 | }
10 | \arguments{
11 | \item{scores}{the xCell scores.}
12 | 
13 | \item{beta_params}{the pre-calculated beta distribution parameters from random mixtures.}
14 | 
15 | \item{rnaseq}{if beta_params is null, than uses xCell.data beta_params. If TRUE uses sequencing-based params, else array-based params.}
16 | 
17 | \item{file.name}{file name to write the p-values table.}
18 | }
19 | \value{
20 | a p-values matrix for each score.
21 | }
22 | \description{
23 | \code{xCellSignifcanceBetaDist} Returns the FDR adjusted p-values of the chance that the cell is not present in the mixture.
24 | }
25 | 


--------------------------------------------------------------------------------
/man/xCellSignifcanceRandomMatrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xCell.R
 3 | \name{xCellSignifcanceRandomMatrix}
 4 | \alias{xCellSignifcanceRandomMatrix}
 5 | \title{Calculate significance p-values for the null hypothesis that the cell type is not present in the mixture using a random matrix.}
 6 | \usage{
 7 | xCellSignifcanceRandomMatrix(scores, expr, spill, alpha = 0.5,
 8 |   nperm = 250, file.name = NULL)
 9 | }
10 | \arguments{
11 | \item{scores}{the xCell scores.}
12 | 
13 | \item{expr}{the input expression matrix.}
14 | 
15 | \item{spill}{the Spillover object for adjusting the scores.}
16 | 
17 | \item{alpha}{a value to override the spillover alpha parameter. Deafult = 0.5}
18 | 
19 | \item{nperm}{number of samples in the shuffled matrix, default = 250}
20 | 
21 | \item{file.name}{file name to write the p-values table.}
22 | }
23 | \value{
24 | a list with the p-values, the xcell scores of the shuffled data and the shuffled expression matrix.
25 | }
26 | \description{
27 | \code{xCellSignifcanceRandomMatrix} Returns the FDR adjusted p-values of the chance that the cell is not present in the mixture.
28 | }
29 | 


--------------------------------------------------------------------------------
/vignettes/sdy311.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dviraran/xCell/20e2919eefd37e15af35f29f4944e30697098a28/vignettes/sdy311.rds


--------------------------------------------------------------------------------
/vignettes/sdy420.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dviraran/xCell/20e2919eefd37e15af35f29f4944e30697098a28/vignettes/sdy420.rds


--------------------------------------------------------------------------------
/vignettes/xCell-Immport.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Validating xCell with CyTOF immunoprofilings"
 3 | author: "Dvir Aran"
 4 | date: "11/6/2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE)
10 | ```
11 | 
12 | In the xCell paper we present an analysis of validating xCell with cell counts measured by CyTOF immunoprofiling. Here we present a short tutorial to reproduce those results.
13 | 
14 | ## Data gathering
15 | 
16 | From the Immport web portal we downloaded the study files of SDY311 and SDY420. Each Immport study has a file named *fcs_analyzed_result.txt* which contains counts of cells. We extract the number of cells from all live cells to create a matrix of the fraction of cells for each samples. 
17 | 
18 | In addition we downloaded the gene expression data, also available with the Immport study files. This is an Illumnia array, and was normalized using Matlab functions. Both objects can be downloaded from the vignettes directory in the GitHub repository.
19 | 
20 | ```{r,echo=T, results='hide'}
21 | sdy311 = readRDS('~/Documents/xCell/sdy311.rds')
22 | sdy420 = readRDS('~/Documents/xCell/sdy420.rds')
23 | ```
24 | 
25 | ## Generating xCell scores
26 | 
27 | We do two data filtering in the SDY311 dataset before analysis. First, there are 10 patients with replicates in the gene expression data. We use the `rawEnrichmentAnalysis` function to create the xCell scores and then for each patient with a replicate we take the average of the raw scores.
28 | 
29 | Second, there were two samples in the SDY311 that are suspected as outliers, thus we removed them from the analysis.
30 | 
31 | The next step is transforming the raw scores and applying the spill over compensation. To get best results it is best to run the spill over compensation only on relevant cell types (e.g. if we know there are no macrophages in the mixtures, it is best to remove them from the analysis). Thus, we subset the scores matrix to only cell types that are also measured in the CyTOF dataset.
32 | 
33 | Note that we use the xCell.data$spill.array data since the expression data was generated with microarrays.
34 | 
35 | ```{r,echo=T, results='hide'}
36 | get.xCell.scores = function(sdy) {
37 |   raw.scores = rawEnrichmentAnalysis(as.matrix(sdy$expr),
38 |                                      xCell.data$signatures,
39 |                                      xCell.data$genes)
40 |   
41 |   colnames(raw.scores) = gsub("\\.1","",colnames(raw.scores))
42 |   raw.scores = aggregate(t(raw.scores)~colnames(raw.scores),FUN=mean)
43 |   rownames(raw.scores) = raw.scores[,1]
44 |   raw.scores = raw.scores[,-1]
45 |   raw.scores = t(raw.scores)
46 |   
47 |   cell.types = rownames(sdy$fcs) 
48 |   
49 |   cell.types.use = intersect(rownames(raw.scores),rownames(sdy$fcs))
50 |   transformed.scores = transformScores(raw.scores[cell.types.use,],xCell.data$spill.array$fv)
51 |   scores = spillOver(transformed.scores,xCell.data$spill.array$K)
52 |   #s = y
53 |   A = intersect(colnames(sdy$fcs),colnames(scores))
54 |   scores = scores[,A]
55 |   
56 |   scores
57 | } 
58 | 
59 | 
60 | library(xCell)
61 | 
62 | sdy311$fcs= sdy311$fcs[,-which(colnames(sdy311$fcs) %in% c("SUB134240","SUB134283"))]
63 | 
64 | scores311 = get.xCell.scores(sdy311)
65 | scores420 = get.xCell.scores(sdy420)
66 | 
67 | ```
68 | 
69 | ## Correlating xCell scores and CyTOF immunoprofilings
70 | 
71 | Using these scores we can now find the correlation between the xCell scores and the cell types fractions from the CyTOF immunoprofilings:
72 | 
73 | ```{r,echo=T, results='hide'}
74 | library(psych)
75 | library(ggplot2)
76 | 
77 | correlateScoresFCS = function(scores,fcs,tit) {
78 |   fcs = fcs[rownames(scores),colnames(scores)]
79 | 
80 | res = corr.test(t(scores),t(fcs),adjust='none')
81 | 
82 | df = data.frame(R=diag(res$r),p.value=diag(res$p),Cell.Types=rownames(res$r))
83 | ggplot(df)+geom_col(aes(y=df$R,x=Cell.Types,fill=p.value<0.05))+theme_classic()+
84 |   theme(axis.text.x = element_text(angle = 45, hjust = 1))+
85 |   ylab('Pearson R')+ggtitle(tit)
86 | }
87 | ```
88 | 
89 | ```{r}
90 | correlateScoresFCS(scores311,sdy311$fcs,'SDY311')
91 | correlateScoresFCS(scores420,sdy420$fcs,'SDY420')
92 | ```
93 | 


--------------------------------------------------------------------------------
/vignettes/xCell.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "xCell"
 3 | author: "Dvir Aran"
 4 | date: "`r Sys.Date()`"
 5 | output: rmarkdown::html_vignette
 6 | vignette: >
 7 |   %\VignetteIndexEntry{xCell}
 8 |   %\VignetteEngine{knitr::rmarkdown}
 9 |   %\VignetteEncoding{UTF-8}
10 | ---
11 | 
12 | Tissues are a complex milieu consisting of numerous cell types. In cancer, understanding the cellular heterogeneity in the tumor microenvironment is an emerging field of research. Numerous methods have been published in recent years for the enumeration of cell subsets from tissue expression profiles. However, the available methods suffer from three major problems: inferring cell subset based on gene sets learned and verified from limited sources; displaying only partial portrayal of the full cellular heterogeneity; and insufficient validation in mixed tissues.
13 | 
14 | xCell is a tool that performs cell type enrichment analysis from gene expression data for 64 immune and stroma cell types. xCell is a gene signatures-based method learned from thousands of pure cell types from various sources. xCell applies a novel technique for reducing associations between closley related cell types. xCell signatures were validated using extensive in-silico simulations and also cytometry immunophenotyping, and were shown to outperform previous methods. xCell allows researchers to reliably portray the cellular heterogeneity landscape of tissue expression profiles. For more informations please refer to the xCell manuscript.
15 | 
16 | To use xCell simply load a human gene expression data matrix. The rows of the matrix must be gene symbols. If the data contains non-unique gene symbols, rows with same gene symbols will be averaged. 
17 | 


--------------------------------------------------------------------------------
/vignettes/xCell.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | 
 3 | <html xmlns="http://www.w3.org/1999/xhtml">
 4 | 
 5 | <head>
 6 | 
 7 | <meta charset="utf-8" />
 8 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 9 | <meta name="generator" content="pandoc" />
10 | 
11 | <meta name="viewport" content="width=device-width, initial-scale=1">
12 | 
13 | <meta name="author" content="Dvir Aran" />
14 | 
15 | <meta name="date" content="2017-08-24" />
16 | 
17 | <title>xCell</title>
18 | 
19 | 
20 | 
21 | 
22 | 
23 | 
24 | <link href="data:text/css;charset=utf-8,body%20%7B%0Abackground%2Dcolor%3A%20%23fff%3B%0Amargin%3A%201em%20auto%3B%0Amax%2Dwidth%3A%20700px%3B%0Aoverflow%3A%20visible%3B%0Apadding%2Dleft%3A%202em%3B%0Apadding%2Dright%3A%202em%3B%0Afont%2Dfamily%3A%20%22Open%20Sans%22%2C%20%22Helvetica%20Neue%22%2C%20Helvetica%2C%20Arial%2C%20sans%2Dserif%3B%0Afont%2Dsize%3A%2014px%3B%0Aline%2Dheight%3A%201%2E35%3B%0A%7D%0A%23header%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0A%23TOC%20%7B%0Aclear%3A%20both%3B%0Amargin%3A%200%200%2010px%2010px%3B%0Apadding%3A%204px%3B%0Awidth%3A%20400px%3B%0Aborder%3A%201px%20solid%20%23CCCCCC%3B%0Aborder%2Dradius%3A%205px%3B%0Abackground%2Dcolor%3A%20%23f6f6f6%3B%0Afont%2Dsize%3A%2013px%3B%0Aline%2Dheight%3A%201%2E3%3B%0A%7D%0A%23TOC%20%2Etoctitle%20%7B%0Afont%2Dweight%3A%20bold%3B%0Afont%2Dsize%3A%2015px%3B%0Amargin%2Dleft%3A%205px%3B%0A%7D%0A%23TOC%20ul%20%7B%0Apadding%2Dleft%3A%2040px%3B%0Amargin%2Dleft%3A%20%2D1%2E5em%3B%0Amargin%2Dtop%3A%205px%3B%0Amargin%2Dbottom%3A%205px%3B%0A%7D%0A%23TOC%20ul%20ul%20%7B%0Amargin%2Dleft%3A%20%2D2em%3B%0A%7D%0A%23TOC%20li%20%7B%0Aline%2Dheight%3A%2016px%3B%0A%7D%0Atable%20%7B%0Amargin%3A%201em%20auto%3B%0Aborder%2Dwidth%3A%201px%3B%0Aborder%2Dcolor%3A%20%23DDDDDD%3B%0Aborder%2Dstyle%3A%20outset%3B%0Aborder%2Dcollapse%3A%20collapse%3B%0A%7D%0Atable%20th%20%7B%0Aborder%2Dwidth%3A%202px%3B%0Apadding%3A%205px%3B%0Aborder%2Dstyle%3A%20inset%3B%0A%7D%0Atable%20td%20%7B%0Aborder%2Dwidth%3A%201px%3B%0Aborder%2Dstyle%3A%20inset%3B%0Aline%2Dheight%3A%2018px%3B%0Apadding%3A%205px%205px%3B%0A%7D%0Atable%2C%20table%20th%2C%20table%20td%20%7B%0Aborder%2Dleft%2Dstyle%3A%20none%3B%0Aborder%2Dright%2Dstyle%3A%20none%3B%0A%7D%0Atable%20thead%2C%20table%20tr%2Eeven%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0A%7D%0Ap%20%7B%0Amargin%3A%200%2E5em%200%3B%0A%7D%0Ablockquote%20%7B%0Abackground%2Dcolor%3A%20%23f6f6f6%3B%0Apadding%3A%200%2E25em%200%2E75em%3B%0A%7D%0Ahr%20%7B%0Aborder%2Dstyle%3A%20solid%3B%0Aborder%3A%20none%3B%0Aborder%2Dtop%3A%201px%20solid%20%23777%3B%0Amargin%3A%2028px%200%3B%0A%7D%0Adl%20%7B%0Amargin%2Dleft%3A%200%3B%0A%7D%0Adl%20dd%20%7B%0Amargin%2Dbottom%3A%2013px%3B%0Amargin%2Dleft%3A%2013px%3B%0A%7D%0Adl%20dt%20%7B%0Afont%2Dweight%3A%20bold%3B%0A%7D%0Aul%20%7B%0Amargin%2Dtop%3A%200%3B%0A%7D%0Aul%20li%20%7B%0Alist%2Dstyle%3A%20circle%20outside%3B%0A%7D%0Aul%20ul%20%7B%0Amargin%2Dbottom%3A%200%3B%0A%7D%0Apre%2C%20code%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0Aborder%2Dradius%3A%203px%3B%0Acolor%3A%20%23333%3B%0Awhite%2Dspace%3A%20pre%2Dwrap%3B%20%0A%7D%0Apre%20%7B%0Aborder%2Dradius%3A%203px%3B%0Amargin%3A%205px%200px%2010px%200px%3B%0Apadding%3A%2010px%3B%0A%7D%0Apre%3Anot%28%5Bclass%5D%29%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0A%7D%0Acode%20%7B%0Afont%2Dfamily%3A%20Consolas%2C%20Monaco%2C%20%27Courier%20New%27%2C%20monospace%3B%0Afont%2Dsize%3A%2085%25%3B%0A%7D%0Ap%20%3E%20code%2C%20li%20%3E%20code%20%7B%0Apadding%3A%202px%200px%3B%0A%7D%0Adiv%2Efigure%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0Aimg%20%7B%0Abackground%2Dcolor%3A%20%23FFFFFF%3B%0Apadding%3A%202px%3B%0Aborder%3A%201px%20solid%20%23DDDDDD%3B%0Aborder%2Dradius%3A%203px%3B%0Aborder%3A%201px%20solid%20%23CCCCCC%3B%0Amargin%3A%200%205px%3B%0A%7D%0Ah1%20%7B%0Amargin%2Dtop%3A%200%3B%0Afont%2Dsize%3A%2035px%3B%0Aline%2Dheight%3A%2040px%3B%0A%7D%0Ah2%20%7B%0Aborder%2Dbottom%3A%204px%20solid%20%23f7f7f7%3B%0Apadding%2Dtop%3A%2010px%3B%0Apadding%2Dbottom%3A%202px%3B%0Afont%2Dsize%3A%20145%25%3B%0A%7D%0Ah3%20%7B%0Aborder%2Dbottom%3A%202px%20solid%20%23f7f7f7%3B%0Apadding%2Dtop%3A%2010px%3B%0Afont%2Dsize%3A%20120%25%3B%0A%7D%0Ah4%20%7B%0Aborder%2Dbottom%3A%201px%20solid%20%23f7f7f7%3B%0Amargin%2Dleft%3A%208px%3B%0Afont%2Dsize%3A%20105%25%3B%0A%7D%0Ah5%2C%20h6%20%7B%0Aborder%2Dbottom%3A%201px%20solid%20%23ccc%3B%0Afont%2Dsize%3A%20105%25%3B%0A%7D%0Aa%20%7B%0Acolor%3A%20%230033dd%3B%0Atext%2Ddecoration%3A%20none%3B%0A%7D%0Aa%3Ahover%20%7B%0Acolor%3A%20%236666ff%3B%20%7D%0Aa%3Avisited%20%7B%0Acolor%3A%20%23800080%3B%20%7D%0Aa%3Avisited%3Ahover%20%7B%0Acolor%3A%20%23BB00BB%3B%20%7D%0Aa%5Bhref%5E%3D%22http%3A%22%5D%20%7B%0Atext%2Ddecoration%3A%20underline%3B%20%7D%0Aa%5Bhref%5E%3D%22https%3A%22%5D%20%7B%0Atext%2Ddecoration%3A%20underline%3B%20%7D%0A%0Acode%20%3E%20span%2Ekw%20%7B%20color%3A%20%23555%3B%20font%2Dweight%3A%20bold%3B%20%7D%20%0Acode%20%3E%20span%2Edt%20%7B%20color%3A%20%23902000%3B%20%7D%20%0Acode%20%3E%20span%2Edv%20%7B%20color%3A%20%2340a070%3B%20%7D%20%0Acode%20%3E%20span%2Ebn%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Efl%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Ech%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Est%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Eco%20%7B%20color%3A%20%23888888%3B%20font%2Dstyle%3A%20italic%3B%20%7D%20%0Acode%20%3E%20span%2Eot%20%7B%20color%3A%20%23007020%3B%20%7D%20%0Acode%20%3E%20span%2Eal%20%7B%20color%3A%20%23ff0000%3B%20font%2Dweight%3A%20bold%3B%20%7D%20%0Acode%20%3E%20span%2Efu%20%7B%20color%3A%20%23900%3B%20font%2Dweight%3A%20bold%3B%20%7D%20%20code%20%3E%20span%2Eer%20%7B%20color%3A%20%23a61717%3B%20background%2Dcolor%3A%20%23e3d2d2%3B%20%7D%20%0A" rel="stylesheet" type="text/css" />
25 | 
26 | </head>
27 | 
28 | <body>
29 | 
30 | 
31 | 
32 | 
33 | <h1 class="title toc-ignore">xCell</h1>
34 | <h4 class="author"><em>Dvir Aran</em></h4>
35 | <h4 class="date"><em>2017-08-24</em></h4>
36 | 
37 | 
38 | 
39 | <p>Tissues are a complex milieu consisting of numerous cell types. In cancer, understanding the cellular heterogeneity in the tumor microenvironment is an emerging field of research. Numerous methods have been published in recent years for the enumeration of cell subsets from tissue expression profiles. However, the available methods suffer from three major problems: inferring cell subset based on gene sets learned and verified from limited sources; displaying only partial portrayal of the full cellular heterogeneity; and insufficient validation in mixed tissues.</p>
40 | <p>xCell is a tool that performs cell type enrichment analysis from gene expression data for 64 immune and stroma cell types. xCell is a gene signatures-based method learned from thousands of pure cell types from various sources. xCell applies a novel technique for reducing associations between closley related cell types. xCell signatures were validated using extensive in-silico simulations and also cytometry immunophenotyping, and were shown to outperform previous methods. xCell allows researchers to reliably portray the cellular heterogeneity landscape of tissue expression profiles. For more informations please refer to the xCell manuscript.</p>
41 | <p>To use xCell simply load a human gene expression data matrix. The rows of the matrix must be gene symbols. If the data contains non-unique gene symbols, rows with same gene symbols will be averaged.</p>
42 | 
43 | 
44 | 
45 | <!-- dynamically load mathjax for compatibility with self-contained -->
46 | <script>
47 |   (function () {
48 |     var script = document.createElement("script");
49 |     script.type = "text/javascript";
50 |     script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
51 |     document.getElementsByTagName("head")[0].appendChild(script);
52 |   })();
53 | </script>
54 | 
55 | </body>
56 | </html>
57 | 


--------------------------------------------------------------------------------
/xCell.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace,vignette
22 | 


--------------------------------------------------------------------------------
/xCell/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/xCell/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | 


--------------------------------------------------------------------------------
/xCell/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: xCell
 2 | Title: What the Package Does (one line, title case)
 3 | Version: 0.0.0.9000
 4 | Authors@R: person("First", "Last", email = "first.last@example.com", role = c("aut", "cre"))
 5 | Description: What the package does (one paragraph).
 6 | Depends: R (>= 3.3.2)
 7 | License: What license is it under?
 8 | Encoding: UTF-8
 9 | LazyData: true
10 | 


--------------------------------------------------------------------------------
/xCell/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: fake comment so roxygen2 overwrites silently.
2 | exportPattern("^[^\\.]")
3 | 


--------------------------------------------------------------------------------
/xCell/xCell.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | Encoding: UTF-8
 9 | 
10 | AutoAppendNewline: Yes
11 | StripTrailingWhitespace: Yes
12 | 
13 | BuildType: Package
14 | PackageUseDevtools: Yes
15 | PackageInstallArgs: --no-multiarch --with-keep.source
16 | PackageRoxygenize: rd,collate,namespace
17 | 


--------------------------------------------------------------------------------