├── Code_v1.5.1 ├── python.buildLinearRegression.py ├── python.calculateReadsDepthOfAmp.py ├── python.chrXNormalizeReadDepth.py ├── python.getCNVPerSample.py ├── python.mergeCNVFiles.py ├── python.mergeReadDepthStatistics.py ├── python.scoreCNV.py ├── r.filterLowQualSample.r ├── r.plotPerSample.noLog2.r └── r.plotPerSample.r ├── DeviCNV1.5 Manual20171101.pdf ├── DeviCNV_Example.probeInformation.txt ├── DeviCNV_Example.runningScript.sh ├── DeviCNV_Example.sampleInfo.txt ├── DeviCNV_scoringSystemThresholds.txt ├── ExampleBams ├── GM14603_Example.bam ├── GM14603_Example.bam.bai ├── GM14734_Example.bam ├── GM14734_Example.bam.bai ├── GM17433_Example.bam ├── GM17433_Example.bam.bai ├── GM23221_Example.bam ├── GM23221_Example.bam.bai ├── GM23431_Example.bam ├── GM23431_Example.bam.bai ├── GM23891_Example.bam ├── GM23891_Example.bam.bai ├── GM24007_Example.bam ├── GM24007_Example.bam.bai ├── NA00006_Example.bam ├── NA00006_Example.bam.bai ├── NA00852_Example.bam ├── NA00852_Example.bam.bai ├── NA01741_Example.bam └── NA01741_Example.bam.bai ├── ExampleOutputs_v1.5.1.zip ├── LICENSE └── README.md /Code_v1.5.1/python.buildLinearRegression.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from scipy import stats 3 | import sys 4 | 5 | def getHqSampleList(hqSampleFileName): 6 | inFile=open(hqSampleFileName,'r') 7 | inLine=inFile.readline() 8 | headCheck=False 9 | hqSampleList=[] 10 | while(inLine): 11 | inList=inLine.replace("\n","").replace("\r","").split("\t") 12 | if(headCheck==False): 13 | headCheck=True 14 | sampleID=inList.index("Sample") 15 | lowQualID=inList.index("LowQualSample?") 16 | else: 17 | sample=inList[sampleID] 18 | lowQual=inList[lowQualID] 19 | if(lowQual=="HQ"): 20 | hqSampleList.append(sample) 21 | inLine=inFile.readline() 22 | inFile.close() 23 | print("High-quality samples: "+str(hqSampleList)) 24 | return hqSampleList 25 | 26 | 27 | def getAmpRDDic(inFileName): 28 | inFile=open(inFileName,'r') 29 | inLine=inFile.readline() 30 | headCheck=False 31 | ampRDDic={} 32 | ampDic={} 33 | ampList=[] 34 | while(inLine): 35 | inList=inLine.replace("\n","").replace("\r","").split("\t") 36 | if(headCheck==False): 37 | headCheck=True 38 | ampliconIDID=inList.index("Amplicon_ID") 39 | poolID=inList.index("Pool") 40 | sampleID=poolID+1 41 | sampleList=inList[sampleID:] 42 | headList=inList[:poolID+1] 43 | else: 44 | ampliconID=tuple(inList[:poolID+1]) 45 | pool=inList[poolID] 46 | ampDic[ampliconID]=pool 47 | sampleRDList=inList[sampleID:] 48 | ampRDDic[ampliconID]=sampleRDList 49 | ampList.append(ampliconID) 50 | inLine=inFile.readline() 51 | inFile.close() 52 | return [headList, sampleList, ampList, ampRDDic, ampDic] 53 | 54 | 55 | def getMedCovDic(inFileName): 56 | inFile=open(inFileName,'r') 57 | inLine=inFile.readline() 58 | headCheck=False 59 | medCovDic={} 60 | medCovSampleList=[] 61 | while(inLine): 62 | inList=inLine.replace("\n","").replace("\r","").split("\t") 63 | if(headCheck==False): 64 | headCheck=True 65 | sampleID=inList.index("Sample") 66 | PoolID=inList.index("Pool") 67 | MQID=inList.index("MQ") 68 | medianID=inList.index("Median") 69 | else: 70 | ID=(inList[PoolID], inList[MQID]) 71 | if(ID not in medCovDic): 72 | medCovDic[ID]=[] 73 | medCovDic[ID].append(inList[medianID]) 74 | if(inList[sampleID] not in medCovSampleList): 75 | medCovSampleList.append(inList[sampleID]) 76 | inLine=inFile.readline() 77 | inFile.close() 78 | print("All samples") 79 | print(medCovSampleList) 80 | return [medCovSampleList, medCovDic] 81 | 82 | 83 | def runBootstriping(sampleList, ampRDDic, medCovSampleList, medCovDic, ampliconID, Pool, MQ, dupTh, delTh, hqSampleList): 84 | faultyAmp=False 85 | ### filter out low quality sample ################################################ 86 | x_raw=[] 87 | y_raw=[] 88 | x_raw_highQ=[] 89 | y_raw_highQ=[] 90 | sampleList_highQ=[] 91 | for sampleName in sampleList: 92 | x_raw.append(float(medCovDic[(Pool, MQ)][medCovSampleList.index(sampleName)])) 93 | y_raw.append(float(ampRDDic[ampliconID][sampleList.index(sampleName)])) 94 | if(sampleName in hqSampleList): 95 | x_raw_highQ.append(float(medCovDic[(Pool, MQ)][medCovSampleList.index(sampleName)])) 96 | y_raw_highQ.append(float(ampRDDic[ampliconID][sampleList.index(sampleName)])) 97 | sampleList_highQ.append(sampleName) 98 | ################## make regression model ################################################ 99 | slope, intercept, r_value, p_value, std_err = stats.linregress(x_raw_highQ,y_raw_highQ) 100 | ################## filter-out 20% outliner ############################################## 101 | y_raw_highQ_predict=intercept+slope*numpy.array(x_raw_highQ) 102 | residList=abs(numpy.array(y_raw_highQ)-y_raw_highQ_predict) 103 | Q3=numpy.percentile(residList,75) 104 | Q1=numpy.percentile(residList,25) 105 | IQR=Q3-Q1 106 | upperTh=Q3+1.5*IQR 107 | #maxThs=numpy.percentile(residList,90) 108 | filterOutPercent=0.2 109 | maxThs=residList[numpy.argsort(residList)][int(-1*round(len(residList)*filterOutPercent))] 110 | parsedSampleList=[] 111 | filterOutSampleList=[] 112 | for i in range(0, len(residList)): 113 | if((residList[i]=round(len(residList)*filterOutPercent)): 114 | parsedSampleList.append(sampleList_highQ[i]) 115 | elif(residList[i]>=maxThs): 116 | filterOutSampleList.append(sampleList_highQ[i]) 117 | ########### print 2 type of warining ##################################################### 118 | if(len(sampleList_highQ)<10): 119 | print("Warning(Less 10 samples have >=50 medianCoverage)\t",ampliconID, Pool, MQ, "highQSampleList",str(sampleList_highQ),str(x_raw_highQ)) 120 | if(len(filterOutSampleList)>round(len(residList)*filterOutPercent)): 121 | print("Warning(Over 10% samples have sample max residual)\t",ampliconID, Pool, MQ, maxThs,"filterOutSampleList",str(filterOutSampleList),str(residList)) 122 | print(str(ampliconID)+": Total "+str(len(parsedSampleList))+" samples for bootstrapping") 123 | ########### bootstraping ################################################################# 124 | RUNTIME=1000 125 | yRatioList=[] 126 | yPredictList=[] 127 | r_valueList=[] 128 | for t in range(0,RUNTIME): 129 | selectedSampleList=numpy.random.choice(parsedSampleList, len(parsedSampleList), replace=True) 130 | x=[] 131 | y=[] 132 | for sampleName in selectedSampleList: 133 | x.append(float(medCovDic[(Pool, MQ)][medCovSampleList.index(sampleName)])) 134 | y.append(float(ampRDDic[ampliconID][sampleList.index(sampleName)])) 135 | slope, intercept, r_value, p_value, std_err = stats.linregress(x,y) 136 | y_predict=intercept+numpy.array(x_raw)*slope 137 | yRatio=y_raw/y_predict 138 | yPredictList.append(list(y_predict)) 139 | yRatioList.append(list(yRatio)) 140 | r_valueList.append(r_value) 141 | ######## mark low quality amplicon ##### 142 | if(slope<=0): 143 | faultyAmp=True 144 | ########### mark low quality sample ############################################ 145 | yPredictList=numpy.array(yPredictList) 146 | yRatioList=numpy.array(yRatioList) 147 | for i in range(0, len(yPredictList[0,:])): 148 | if(list(yPredictList[:,i]<0).count(True)>0): 149 | yPredictList[:,i]=["nan"]*len(yPredictList[:,i]) 150 | yRatioList[:,i]=["nan"]*len(yRatioList[:,i]) 151 | ############ write outList ################################################### 152 | x_raw=numpy.round(x_raw,3) 153 | y_raw=numpy.round(y_raw,3) 154 | yRatio_bottom=numpy.round(numpy.percentile(yRatioList,2.5, axis=0),3) 155 | yRatio_median=numpy.round(numpy.percentile(yRatioList,50,axis=0),3) 156 | yRatio_top=numpy.round(numpy.percentile(yRatioList,97.5, axis=0),3) 157 | yPredict_bottom=numpy.round(numpy.percentile(yPredictList,2.5, axis=0),3) 158 | yPredict_median=numpy.round(numpy.percentile(yPredictList,50,axis=0),3) 159 | yPredict_top=numpy.round(numpy.percentile(yPredictList,97.5, axis=0),3) 160 | ampP=[str(round(numpy.mean(r_valueList),2))]*len(x_raw) 161 | yPredictList_t=numpy.transpose(yPredictList) 162 | yRatioList_t=numpy.transpose(yRatioList) 163 | X=[] 164 | Y=[] 165 | Y_M=[] 166 | Y_L=[] 167 | Y_U=[] 168 | CN_M=[] 169 | CI_L=[] 170 | CI_U=[] 171 | dupP=[] 172 | delP=[] 173 | P=[] 174 | CNVTYPE=[] 175 | for i in range(0,len(y_raw)): 176 | ### calculate p-values for dup/del ######################################## 177 | dupPvalue=round(1-float(list(yRatioList_t[i]>dupTh).count(True))/RUNTIME,3) 178 | delPvalue=round(1-float(list(yRatioList_t[i]y_raw[i]) and Pvalue<0.5): 183 | cnvType="del" 184 | else: 185 | cnvType="neutral" 186 | ### mark low quality sample ################ 187 | if(list(numpy.isnan(yRatioList_t[i])).count(True)>0): 188 | dupPvalue="nan" 189 | delPvalue="nan" 190 | Pvalue="nan" 191 | cnvType="faultySample" 192 | #### write out List ####################### 193 | X.append(str(x_raw[i])) 194 | Y.append(str(y_raw[i])) 195 | Y_M.append(str(yPredict_median[i])) 196 | Y_L.append(str(yPredict_bottom[i])) 197 | Y_U.append(str(yPredict_top[i])) 198 | CN_M.append(str(yRatio_median[i])) 199 | CI_L.append(str(yRatio_bottom[i])) 200 | CI_U.append(str(yRatio_top[i])) 201 | dupP.append(str(dupPvalue)) 202 | delP.append(str(delPvalue)) 203 | P.append(str(Pvalue)) 204 | CNVTYPE.append(cnvType) 205 | ### mark low quality amplicon ############# 206 | if(faultyAmp==True): 207 | dupP=["nan"]*len(dupP) 208 | delP=["nan"]*len(delP) 209 | P=["nan"]*len(P) 210 | CNVTYPE=["faultyAmp"]*len(CNVTYPE) 211 | return [X, Y, Y_M, Y_L, Y_U, CN_M, CI_L, CI_U, dupP, delP, P, CNVTYPE, ampP] 212 | 213 | 214 | def writeAmpLinearRegression(headList, sampleList, ampList, ampRDDic, medCovSampleList, medCovDic, ampDic, modelFile, MQ, dupTh, delTh, hqSampleList): 215 | outFile=open(modelFile,'w') 216 | AmpliconHeaderList=headList+["MQ","Type"] 217 | outFile.write("\t".join(AmpliconHeaderList+sampleList)+"\n") 218 | for ampliconID in ampList: 219 | Pool=ampDic[ampliconID] 220 | try: 221 | [X, Y, Y_M, Y_L, Y_U, CN_M, CI_L, CI_U, dupP, delP, P, CNVTYPE,ampP]=runBootstriping(sampleList, ampRDDic, medCovSampleList, medCovDic, ampliconID, Pool, MQ, dupTh, delTh, hqSampleList) 222 | outFile.write("\t".join(list(ampliconID)+[MQ, "MedianRD"]+X)+"\n") 223 | outFile.write("\t".join(list(ampliconID)+[MQ, "Y"]+Y)+"\n") 224 | outFile.write("\t".join(list(ampliconID)+[MQ, "Y_L"]+Y_L)+"\n") 225 | outFile.write("\t".join(list(ampliconID)+[MQ, "Y_M"]+Y_M)+"\n") 226 | outFile.write("\t".join(list(ampliconID)+[MQ, "Y_U"]+Y_U)+"\n") 227 | outFile.write("\t".join(list(ampliconID)+[MQ, "CI_L"]+CI_L)+"\n") 228 | outFile.write("\t".join(list(ampliconID)+[MQ, "CN_M"]+CN_M)+"\n") 229 | outFile.write("\t".join(list(ampliconID)+[MQ, "CI_U"]+CI_U)+"\n") 230 | outFile.write("\t".join(list(ampliconID)+[MQ, "DupPvalue"]+dupP)+"\n") 231 | outFile.write("\t".join(list(ampliconID)+[MQ, "DelPvalue"]+delP)+"\n") 232 | outFile.write("\t".join(list(ampliconID)+[MQ, "Pvalue"]+P)+"\n") 233 | outFile.write("\t".join(list(ampliconID)+[MQ, "CNVType"]+CNVTYPE)+"\n") 234 | outFile.write("\t".join(list(ampliconID)+[MQ, "RegRvalue"]+ampP)+"\n") 235 | except: 236 | print("error", ampliconID) 237 | outFile.close() 238 | 239 | 240 | if __name__ == '__main__': 241 | inputs=list(sys.argv) 242 | batchTag=inputs[1] 243 | readCountStatDir=inputs[2] 244 | norReadDepthDir=inputs[3] 245 | lqSampleDir=inputs[4] 246 | RDRatioDir=inputs[5] 247 | MQList=inputs[6].split(",") 248 | dupdelList=inputs[7].split(",") 249 | 250 | 251 | for MQ in MQList: 252 | hqSampleFileName_MQ=lqSampleDir+"/"+batchTag+".lowQualitySampleTest."+MQ+".txt" 253 | hqSampleList=getHqSampleList(hqSampleFileName_MQ) 254 | RDStaticFile=readCountStatDir+batchTag+".readDepthStatistics.txt" 255 | [medCovSampleList, medCovDic]=getMedCovDic(RDStaticFile) 256 | RDRatioFileName=norReadDepthDir+batchTag+".readDepth.normalizedChrX."+MQ+".txt" 257 | [headList, sampleList, ampList, ampRDDic, ampDic]=getAmpRDDic(RDRatioFileName) 258 | for dupdelTh in dupdelList: 259 | dupTh=float(dupdelTh.split("_")[0]) 260 | delTh=float(dupdelTh.split("_")[1]) 261 | modelFile=RDRatioDir+"/"+batchTag+".readDepthRatioFromLRModel."+MQ+".dupdelTh"+dupdelTh+".txt" 262 | writeAmpLinearRegression(headList, sampleList, ampList, ampRDDic, medCovSampleList, medCovDic, ampDic, modelFile, MQ, dupTh, delTh, hqSampleList) 263 | 264 | print("finish") 265 | -------------------------------------------------------------------------------- /Code_v1.5.1/python.calculateReadsDepthOfAmp.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy 3 | import pysam 4 | import sys 5 | from intervaltree import Interval, IntervalTree 6 | from intervaltree_bio import GenomeIntervalTree 7 | 8 | class Amplicon: 9 | ampID="" 10 | chr="" 11 | ampS=0 12 | inS=0 13 | inE=0 14 | ampE=0 15 | gene="" 16 | trans="" 17 | exon="" 18 | pool="" 19 | datType="" 20 | mappedReadList=[] 21 | readDepthList=[] 22 | 23 | def __init__(self,ampID,chr,ampS,inS,inE,ampE,gene,trans,exon,pool,datType): 24 | self.ampID=ampID 25 | self.chr=chr.replace("chr","") 26 | self.ampS=int(ampS) 27 | self.inS=int(inS) 28 | self.inE=int(inE) 29 | self.ampE=int(ampE) 30 | self.gene=gene 31 | self.trans=trans 32 | self.exon=exon 33 | self.pool=pool.split("_")[-1] 34 | self.datType=datType 35 | self.mappedReadList=[] 36 | self.readDepthList=[] 37 | 38 | def putMappedRead(self, inMappedRead): 39 | self.mappedReadList.append(inMappedRead) 40 | 41 | def getOverlapRatio(self, rChr, rS, rE): 42 | if(rChr.replace("chr","")!=self.chr): 43 | return 0.0 44 | else: 45 | rLen=rE-rS 46 | overlapLen=min(rE, self.ampE)-max(rS, self.ampS) 47 | overlapRatio=float(overlapLen)/float(rLen) 48 | if(overlapRatio>1): 49 | return 1.0 50 | else: 51 | return overlapRatio 52 | 53 | def getReadDepthPCR(self, MQList): 54 | ampliconLength=self.inE-self.inS 55 | depthPerSiteDic={} 56 | for MQ in MQList: 57 | depthPerSiteDic[MQ]=[0]*ampliconLength 58 | for pos in range(0, ampliconLength): 59 | nowS=self.inS+pos 60 | for read in self.mappedReadList: 61 | if(read.pos<=nowS and nowS+1<=read.pos+read.alen): 62 | for MQ in MQList: 63 | if(read.mapq>=MQ): 64 | depthPerSiteDic[MQ][pos]+=1 65 | readDepthOutList=[] 66 | for MQ in MQList: 67 | readDepth=0 68 | for read in self.mappedReadList: 69 | if(read.mapq>=MQ): 70 | readDepth+=1 71 | readDepthOutList.append(readDepth) 72 | self.readDepthList=readDepthOutList 73 | 74 | def getReadDepthHYB(self, MQList): ## using insert 75 | ampliconLength=self.inE-self.inS 76 | depthPerSiteDic={} 77 | for MQ in MQList: 78 | depthPerSiteDic[MQ]=[0]*ampliconLength 79 | for pos in range(0, ampliconLength): 80 | nowS=self.inS+pos 81 | for read in self.mappedReadList: 82 | if(read.pos<=nowS and nowS+1<=read.pos+read.alen): 83 | for MQ in MQList: 84 | if(read.mapq>=MQ): 85 | depthPerSiteDic[MQ][pos]+=1 86 | readDepthOutList=[] 87 | for MQ in MQList: 88 | depCov=0 89 | for depth in depthPerSiteDic[MQ]: 90 | depCov+=depth 91 | readDepthOutList.append(round(float(depCov)/ampliconLength,3)) 92 | self.readDepthList=readDepthOutList 93 | 94 | def runGetReadDepth(self, MQList): 95 | if(self.datType=="HYB"): 96 | self.getReadDepthHYB(MQList) 97 | elif(self.datType=="PCR"): 98 | self.getReadDepthPCR(MQList) 99 | else: 100 | print(self.datType, "unknown data") 101 | 102 | def allInfoList(self): 103 | return [self.ampID, self.chr, self.ampS,self.inS,self.inE,self.ampE, self.gene, self.trans, self.exon, self.pool] 104 | 105 | def head(self): 106 | return ["Amplicon_ID","Chr","Amplicon_Start","Insert_Start","Insert_End","Amplicon_End","Gene","Transcript","Exon","Pool"] 107 | 108 | 109 | def MakeAmpliconDic(inAmpliconTxt, datType): 110 | inFile=open(inAmpliconTxt, 'r') 111 | inLine=inFile.readline() 112 | ampliconDic={} 113 | ampLocalDic={} 114 | ampliconList=[] 115 | ampTree=GenomeIntervalTree() 116 | headCheck=False 117 | while(inLine): 118 | if(headCheck==False): 119 | headCheck=True 120 | header=inLine.replace("\n","").replace("\r","").split("\t") 121 | print(header) 122 | ampIDID=header.index("Amplicon_ID") 123 | chrID=header.index("Chr") 124 | ampSID=header.index("Amplicon_Start") 125 | inSID=header.index("Insert_Start") 126 | inEID=header.index("Insert_End") 127 | ampEID=header.index("Amplicon_End") 128 | geneID=header.index("Gene") 129 | transID=header.index("Transcript") 130 | exonID=header.index("Exon") 131 | poolID=header.index("Pool") 132 | else: 133 | inList=inLine.replace("\n","").replace("\r","").split("\t") 134 | ampID=inList[ampIDID] 135 | chr=inList[chrID].replace("chr","") 136 | ampS=inList[ampSID] 137 | inS=int(inList[inSID]) 138 | inE=int(inList[inEID]) 139 | ampE=inList[ampEID] 140 | gene=inList[geneID] 141 | exon=inList[exonID] 142 | trans=inList[transID] 143 | pool=inList[poolID] 144 | if(ampID not in ampLocalDic): 145 | ampliconList.append(ampID) 146 | ampLocalDic[ampID]=Amplicon(ampID,chr,ampS,inS,inE,ampE,gene,exon,trans,pool,datType) 147 | ampTree.addi(chr,inS+1,inE+1,ampID) ## [start, end) 148 | else: 149 | print("Error!! : Not unique Amplicon_ID : "+ampID) 150 | break 151 | inLine=inFile.readline() 152 | inFile.close() 153 | 154 | for ampliconID in ampliconList: 155 | amplicon=ampLocalDic[ampliconID] 156 | pool=amplicon.pool 157 | if(pool not in ampliconDic): 158 | ampliconDic[pool]=[] 159 | ampliconDic[pool].append(amplicon) 160 | print("Total Amplicons: "+str(len(ampLocalDic.keys()))) 161 | print("ampTree made!") 162 | return [ampliconDic, ampTree] 163 | 164 | 165 | def MapReadinBamPCR(inBamFile, ampliconDic, ampTree, dedupOp, MQList): 166 | ampliconList=[] 167 | poolList=list(ampliconDic.keys()) 168 | poolList.sort() 169 | for pool in poolList: 170 | ampliconList+=ampliconDic[pool] 171 | 172 | inBam=pysam.Samfile(inBamFile,'rb') 173 | for read in inBam: 174 | if(read.is_unmapped): 175 | pass 176 | else: 177 | if(read.is_duplicate): 178 | if(dedupOp=="true"): 179 | continue 180 | overlapAmpTreeList=ampTree[inBam.getrname(read.rname).replace("chr","")].search(read.pos+1, read.pos+read.alen+1) ## [start, end) 181 | if(len(overlapAmpTreeList)==0): 182 | pass 183 | else: 184 | overlapAmpIDList=[] 185 | for overlapAmpTree in overlapAmpTreeList: 186 | overlapAmpIDList.append(overlapAmpTree[-1]) 187 | 188 | overlapAmpList=[] 189 | for amplicon in ampliconList: 190 | if(amplicon.ampID in overlapAmpIDList): 191 | overlapAmpList.append(amplicon) 192 | 193 | overlapRatioList=[] 194 | ampLenList=[] 195 | for amplicon in overlapAmpList: 196 | overlapRatioList.append(amplicon.getOverlapRatio(inBam.getrname(read.rname).replace("chr",""), read.pos, read.pos+read.alen)) 197 | ampLenList.append(amplicon.ampE-amplicon.ampS) 198 | 199 | maxValue=max(overlapRatioList) 200 | overlapAmpList2=[] 201 | overlapRatioList2=[] 202 | ampLenList2=[] 203 | for i in range(0,len(overlapAmpList)): 204 | if(maxValue==overlapRatioList[i]): 205 | overlapAmpList2.append(overlapAmpList[i]) 206 | overlapRatioList2.append(overlapRatioList[i]) 207 | ampLenList2.append(ampLenList[i]) 208 | 209 | minAmpLen=min(ampLenList2) 210 | overlapAmpList3=[] 211 | overlapRatioList3=[] 212 | ampLenList3=[] 213 | for j in range(0,len(overlapAmpList2)): 214 | if(minAmpLen==ampLenList2[j]): 215 | overlapAmpList3.append(overlapAmpList2[j]) 216 | overlapRatioList3.append(overlapRatioList2[j]) 217 | ampLenList3.append(ampLenList2[j]) 218 | 219 | mappedAmp=overlapAmpList3[int((random.random()*10000))%(len(overlapAmpList3))] 220 | mappedAmp.mappedReadList.append(read) 221 | 222 | for amplicon in ampliconList: 223 | amplicon.runGetReadDepth(MQList) 224 | 225 | return ampliconDic 226 | 227 | 228 | def MapReadinBamHYB(inBamFile, ampliconDic, ampTree, dedupOp, MQList): 229 | ampliconList=[] 230 | poolList=list(ampliconDic.keys()) 231 | poolList.sort() 232 | for pool in poolList: 233 | ampliconList+=ampliconDic[pool] 234 | print(pool) 235 | 236 | inBam=pysam.Samfile(inBamFile,'rb') 237 | 238 | for read in inBam: 239 | if(read.is_unmapped): 240 | pass 241 | else: 242 | if(read.is_duplicate): 243 | if(dedupOp=="true"): 244 | continue 245 | overlapAmpTreeList=ampTree[inBam.getrname(read.rname).replace("chr","")].search(read.pos+1, read.pos+read.alen+1) ## [start, end) 246 | if(len(overlapAmpTreeList)==0): 247 | pass 248 | else: 249 | overlapAmpIDList=[] 250 | for overlapAmpTree in overlapAmpTreeList: 251 | overlapAmpIDList.append(overlapAmpTree[-1]) 252 | for amplicon in ampliconList: 253 | if(amplicon.ampID in overlapAmpIDList): 254 | amplicon.mappedReadList.append(read) 255 | 256 | for amplicon in ampliconList: 257 | amplicon.runGetReadDepth(MQList) 258 | 259 | return ampliconDic 260 | 261 | 262 | def WriteReadDepthFile(ampliconDic, outFileName, MQList): 263 | ### write file per pool ########################### 264 | ampliconList=list(ampliconDic.keys()) 265 | ampliconList.sort() 266 | for pool in ampliconList: 267 | #### write attributes ########################## 268 | outFile=open(outFileName+"."+pool+".txt",'w') 269 | header=ampliconDic[pool][0].head() 270 | outFile.write("\t".join(header)) 271 | for MQ in MQList: 272 | outFile.write("\tMQ"+str(MQ)) 273 | outFile.write("\n") 274 | #### write values per amplicon ################ 275 | for amplicon in ampliconDic[pool]: 276 | outFile.write("\t".join(numpy.array(amplicon.allInfoList()).astype(str))) 277 | readDepthOutList=amplicon.readDepthList 278 | outFile.write("\t"+"\t".join(numpy.array(readDepthOutList).astype(str))) 279 | outFile.write("\n") 280 | outFile.close() 281 | 282 | 283 | def WriteMappedReadDepthStatFile(ampliconDic, RCstaticFileName, MQList, inSample): 284 | staticFile=open(RCstaticFileName+".txt",'w') 285 | staticFile.write("Sample\tPool\tMQ\tMean\tMedian\tStandardDeviation\tSum\n") 286 | ### write file per pool ########################### 287 | ampliconList=list(ampliconDic.keys()) 288 | ampliconList.sort() 289 | for pool in ampliconList: 290 | totalReadDepthOutList=[] 291 | for amplicon in ampliconDic[pool]: 292 | readDepthOutList=amplicon.readDepthList 293 | totalReadDepthOutList.append(readDepthOutList) 294 | #### write StaticFile per Pool+MQ ############# 295 | totalReadDepthOutList=numpy.transpose(totalReadDepthOutList) 296 | for i in range(0,len(MQList)): 297 | MQ=MQList[i] 298 | RCList=totalReadDepthOutList[i] 299 | staticList=[round(numpy.mean(RCList),2),round(numpy.median(RCList),2), round(numpy.std(RCList),2), round(numpy.sum(RCList))] 300 | staticFile.write(inSample+"\t"+pool+"\tMQ"+str(MQ)+"\t"+"\t".join(numpy.array(staticList).astype(str))+"\n") 301 | ##################################################### 302 | staticFile.close() 303 | 304 | 305 | if __name__ == '__main__': 306 | inputs=list(sys.argv) 307 | inSample=inputs[1] 308 | inBamDir=inputs[2] 309 | inAmpliconTxt=inputs[3] 310 | readDepthDir=inputs[4] 311 | readDepthStatDir=inputs[5] 312 | dedupOp=inputs[6].lower() 313 | datType=inputs[7] 314 | MQList=list(numpy.array(inputs[8].replace("MQ","").split(",")).astype(int)) 315 | 316 | [ampliconDic, ampTree]=MakeAmpliconDic(inAmpliconTxt,datType) 317 | 318 | inBamFile=inBamDir+inSample+".bam" 319 | if(datType=="HYB"): 320 | ampliconDic=MapReadinBamHYB(inBamFile, ampliconDic, ampTree, dedupOp, MQList) 321 | elif(datType=="PCR"): 322 | ampliconDic=MapReadinBamPCR(inBamFile, ampliconDic, ampTree, dedupOp, MQList) 323 | else: 324 | print("ERROR !! Unknown data type") 325 | 326 | readDepthFile=readDepthDir+inSample+".readDepth" 327 | WriteReadDepthFile(ampliconDic, readDepthFile, MQList) 328 | RCStaticFile=readDepthStatDir+inSample+".readDepthStatistics" 329 | WriteMappedReadDepthStatFile(ampliconDic, RCStaticFile, MQList, inSample) 330 | 331 | 332 | 333 | -------------------------------------------------------------------------------- /Code_v1.5.1/python.chrXNormalizeReadDepth.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import sys 3 | from operator import itemgetter, attrgetter, methodcaller 4 | 5 | class Amplicon: 6 | ampID="" 7 | chr="" 8 | ampS=0 9 | inS=0 10 | inE=0 11 | ampE=0 12 | gene="" 13 | trans="" 14 | exon="" 15 | pool="" 16 | 17 | def __init__(self,ampID,chr,ampS,inS,inE,ampE,gene,trans,exon,pool): 18 | self.ampID=ampID 19 | self.chr=chr.replace("chr","") 20 | self.ampS=int(ampS) 21 | self.inS=int(inS) 22 | self.inE=int(inE) 23 | self.ampE=int(ampE) 24 | self.gene=gene 25 | self.trans=trans 26 | self.exon=exon 27 | self.pool=pool.split("_")[-1] 28 | 29 | def allInfoList(self): 30 | return [self.ampID, self.chr, self.ampS,self.inS,self.inE,self.ampE, self.gene, self.trans, self.exon, self.pool] 31 | 32 | def head(self): 33 | return ["Amplicon_ID","Chr","Amplicon_Start","Insert_Start","Insert_End","Amplicon_End","Gene","Transcript","Exon","Pool"] 34 | 35 | 36 | class RCPerSample: 37 | RCList=[] 38 | ampList=[] 39 | totalRC=0 40 | aveRC=0 41 | medRC=0 42 | stdRC=0 43 | sampleName="" 44 | gender="" 45 | MQ="" 46 | pool="" 47 | 48 | def __init__(self, RCList, ampList, sampleName, gender , MQ, pool): 49 | self.RCList=RCList[:] 50 | self.ampList=ampList 51 | self.sampleName=sampleName 52 | self.gender=gender 53 | self.MQ=MQ 54 | self.pool=pool 55 | self.totalRC=sum(self.RCList) 56 | self.aveRC=numpy.mean(self.RCList) 57 | self.medRC=numpy.median(self.RCList) 58 | self.stdRC=numpy.std(self.RCList) 59 | 60 | def normalizeGender(self): 61 | if(self.gender.lower()=="female"): 62 | oldtotalRC=self.totalRC 63 | for i in range(0, len(self.ampList)): 64 | if("X" in self.ampList[i].chr): 65 | self.RCList[i]=self.RCList[i]/2 66 | self.totalRC=sum(self.RCList) 67 | self.aveRC=numpy.mean(self.RCList) 68 | self.medRC=numpy.median(self.RCList) 69 | self.stdRC=numpy.std(self.RCList) 70 | print(str(self.sampleName)+": Female normalization "+str(oldtotalRC)+" > "+str(self.totalRC)) 71 | elif(self.gender.lower()=="male"): 72 | pass 73 | else: 74 | print(str(self.sampleName)+": wrongGender "+self.gender) 75 | 76 | def getRC(self, ampID): 77 | for pos in range(0, len(self.ampList)): 78 | amplicon=self.ampList[pos] 79 | if(amplicon.ampID==ampID): 80 | if(amplicon.chr=="Y" and self.gender=="female"): 81 | return "Female" 82 | return self.RCList[pos] 83 | return "NA" 84 | 85 | def showSampleInfo(self): 86 | return [self.sampleName, self.gender, self.MQ, self.pool] 87 | 88 | 89 | def getSampleListAndDic(inSampleInfoTxt): 90 | inFile=open(inSampleInfoTxt,'r') 91 | inLine=inFile.readline() 92 | headCheck=False 93 | sampleList=[] 94 | sampleSexDic={} 95 | while(inLine): 96 | if(headCheck==False): 97 | headCheck=True 98 | headList=inLine.replace("\n","").replace("\r","").split("\t") 99 | sampleID=headList.index("Sample") 100 | sexID=headList.index("Sex") 101 | else: 102 | inList=inLine.replace("\n","").replace("\r","").split("\t") 103 | sample=inList[sampleID] 104 | sex=inList[sexID].lower() 105 | if(sample!="" and sample not in sampleList): 106 | sampleList.append(sample) 107 | sampleSexDic[sample]=sex 108 | inLine=inFile.readline() 109 | inFile.close() 110 | return [sampleList, sampleSexDic] 111 | 112 | 113 | def getRCListPerSample(sampleList, readDepthDir, poolList, MQList, sampleSexDic): 114 | RCPerSampleDic={} 115 | ampliconDic={} 116 | for sample in sampleList: 117 | for pool in poolList: 118 | inFile=open(readDepthDir+sample+".readDepth."+pool+".txt") 119 | inLine=inFile.readline() 120 | headCheck=False 121 | RCDic={} 122 | ampList=[] 123 | while(inLine): 124 | if(headCheck==False): 125 | headCheck=True 126 | header=inLine.replace("\n","").split("\t") 127 | ampIDID=header.index("Amplicon_ID") 128 | chrID=header.index("Chr") 129 | ampSID=header.index("Amplicon_Start") 130 | inSID=header.index("Insert_Start") 131 | inEID=header.index("Insert_End") 132 | ampEID=header.index("Amplicon_End") 133 | geneID=header.index("Gene") 134 | transID=header.index("Transcript") 135 | exonID=header.index("Exon") 136 | poolID=header.index("Pool") 137 | else: 138 | inList=inLine.replace("\n","").replace("\r","").split("\t") 139 | ampID=inList[ampIDID] 140 | chr=inList[chrID].replace("chr","") 141 | ampS=inList[ampSID] 142 | inS=int(inList[inSID]) 143 | inE=int(inList[inEID]) 144 | ampE=inList[ampEID] 145 | gene=inList[geneID] 146 | exon=inList[exonID] 147 | trans=inList[transID] 148 | ampliconDic[ampID]=Amplicon(ampID,chr,ampS,inS,inE,ampE,gene,exon,trans,pool) 149 | ampList.append(ampliconDic[ampID]) 150 | for pos in range(0, len(inList)): 151 | if(header[pos].startswith("MQ") and header[pos] in MQList): 152 | MQ=header[pos] 153 | if(MQ not in RCDic): 154 | RCDic[MQ]=[] 155 | RCDic[MQ].append(float(inList[pos])) 156 | inLine=inFile.readline() 157 | inFile.close() 158 | for MQ in RCDic.keys(): 159 | rcpersample=RCPerSample(RCDic[MQ], ampList, sample, sampleSexDic[sample], MQ, pool) 160 | rcpersample.normalizeGender() 161 | if(sample not in RCPerSampleDic): 162 | RCPerSampleDic[sample]=[] 163 | RCPerSampleDic[sample].append(rcpersample) 164 | 165 | return [RCPerSampleDic,ampliconDic] 166 | 167 | 168 | def WriteNorRCFile(MQList, norRCFileName, sampleList, ampliconDic, RCPerSampleDic): 169 | head=ampliconDic[list(ampliconDic.keys())[0]].head() 170 | header=head+sampleList 171 | posID=header.index("Amplicon_Start") 172 | chrID=header.index("Chr") 173 | for MQ in MQList: 174 | outList=[] 175 | ampliconList=ampliconDic.keys() 176 | for ampliconID in ampliconList: 177 | amplicon=ampliconDic[ampliconID] 178 | outList.append(amplicon.allInfoList()) 179 | for sampleName in sampleList: 180 | for rcpersample in RCPerSampleDic[sampleName]: 181 | if(rcpersample.MQ==MQ and rcpersample.pool==amplicon.pool): 182 | norRC=rcpersample.getRC(amplicon.ampID) 183 | outList[-1].append(norRC) 184 | 185 | outList=sorted(outList, key=itemgetter(posID)) 186 | outList=sorted(outList, key=itemgetter(chrID)) 187 | outFileName=norRCFileName+MQ+".txt" 188 | outFile=open(outFileName,'w') 189 | outFile.write("\t".join(numpy.array(header).astype(str))+"\n") 190 | for out in outList: 191 | outFile.write("\t".join(numpy.array(list(out)).astype(str))+"\n") 192 | outFile.close() 193 | 194 | 195 | if __name__ == '__main__': 196 | inputs=list(sys.argv) 197 | batchTag=inputs[1] 198 | inSampleInfoTxt=inputs[2] 199 | readDepthDir=inputs[3] 200 | norRCDir=inputs[4] 201 | PoolList=inputs[5].split(",") 202 | MQList=inputs[6].split(",") 203 | 204 | [sampleList, sampleSexDic]=getSampleListAndDic(inSampleInfoTxt) 205 | [RCPerSampleDic,ampliconDic]=getRCListPerSample(sampleList, readDepthDir, PoolList, MQList, sampleSexDic) 206 | norRCFileName=norRCDir+"/"+batchTag+".readDepth.normalizedChrX." 207 | WriteNorRCFile(MQList, norRCFileName, sampleList, ampliconDic, RCPerSampleDic) 208 | 209 | -------------------------------------------------------------------------------- /Code_v1.5.1/python.getCNVPerSample.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy 3 | from operator import itemgetter, attrgetter 4 | import vcf 5 | from intervaltree import Interval, IntervalTree 6 | from intervaltree_bio import GenomeIntervalTree 7 | import math 8 | 9 | 10 | class InfoPerAmplicon: 11 | ampID="" 12 | chr="" 13 | ampS=0 14 | inS=0 15 | inE=0 16 | ampE=0 17 | gene="" 18 | trans="" 19 | exon="" 20 | pval=-1 21 | cnm=-1 22 | ciLen=[] 23 | regRval=-1 24 | filter="" 25 | 26 | def __init__(self,ampID, chr, ampS, inS, inE, ampE, gene, trans, exon, pval, cnm, ciLen, regRval, filter): 27 | self.ampID=ampID 28 | self.chr=chr.replace("chr","") 29 | self.ampS=int(ampS) 30 | self.inS=int(inS) 31 | self.inE=int(inE) 32 | self.ampE=int(ampE) 33 | self.gene=gene 34 | self.trans=trans 35 | self.exon=exon.split("|") 36 | self.pval=float(pval) 37 | self.cnm=float(cnm) 38 | self.ciLen=ciLen 39 | self.regRval=float(regRval) 40 | self.filter=filter 41 | 42 | def getInS(self): 43 | return self.inS 44 | 45 | 46 | class GenePerSample: 47 | gene="" 48 | trans="" 49 | sample="" 50 | medCov=[] 51 | exonDupDic={} 52 | exonDelDic={} 53 | geneDupList=[] 54 | geneDelList=[] 55 | dupList=[] 56 | delList=[] 57 | pTh=0.5 58 | coveredExonCnt=0 59 | totalAmpCnt=0 60 | qType="" 61 | dupTree="" 62 | delTree="" 63 | chrom="" 64 | start=-1 65 | end=-1 66 | minAmpCntInRegion=0 67 | dupTh=0 68 | delTh=0 69 | 70 | def __init__(self, gene, trans, sample, pTh, minAmpCntInRegion, dupTh, delTh): 71 | self.gene=gene 72 | self.sample=sample 73 | self.trans=trans 74 | self.medCov=[] 75 | self.exonDupDic={} 76 | self.exonDelDic={} 77 | self.geneDupList=[] 78 | self.geneDelList=[] 79 | self.dupList=[] 80 | self.delList=[] 81 | self.pTh=float(pTh) 82 | self.coveredExonCnt=0 83 | self.totalExonCnt=0 84 | self.totalAmpCnt=0 85 | self.qType="" 86 | self.dupTree=GenomeIntervalTree() 87 | self.delTree=GenomeIntervalTree() 88 | self.chrom="" 89 | self.start=-1 90 | self.end=-1 91 | self.minAmpCntInRegion=minAmpCntInRegion 92 | self.dupTh=dupTh 93 | self.delTh=delTh 94 | 95 | def putTrans (self, trans): 96 | self.trans=trans 97 | 98 | def putMedCov(self,medCov): 99 | self.medCov.append(medCov) 100 | 101 | def putPvalue(self, ampID, chr, ampS, inS, inE, ampE, gene, trans, exon, type, pval, cnm, ciLen, regRval): 102 | self.chrom=chr 103 | if(self.start==-1): 104 | self.start=int(inS) 105 | if(self.end==-1): 106 | self.end=int(inE) 107 | self.start=min(self.start,int(inS)) 108 | self.end=max(self.end,int(inE)) 109 | if((pval!="nan") and (ciLen!="nan") and (cnm!="nan")): 110 | filter="filter-out" 111 | if(float(pval)=minAmpCntInRegion): 149 | dupList=sorted(dupList,key=attrgetter('inS')) 150 | UnifiedRegion[self.sample+"_"+self.gene+"_"+"dup"+":"+str(ID)]=dupList 151 | if(self.qType=="del"): 152 | delList=[] 153 | for overlapAmps in list(self.delTree[segChr].search(segStart, segEnd)): 154 | delList.append(overlapAmps[-1]) 155 | if(delList!=[] and len(delList)>=minAmpCntInRegion): 156 | delList=sorted(delList,key=attrgetter('inS')) 157 | UnifiedRegion[self.sample+"_"+self.gene+"_"+"del"+":"+str(ID)]=delList 158 | ID+=1 159 | #### Regions in a unifiedRegions ################## 160 | UnifiedRegionIDList=list(UnifiedRegion.keys()) 161 | for ID in UnifiedRegionIDList: 162 | subID=1 163 | filterIn=[] 164 | for Info in UnifiedRegion[ID]: 165 | if(Info.filter=="filter-in"): 166 | filterIn.append(Info) 167 | else: 168 | if(filterIn!=[]): 169 | if(len(filterIn)>=minAmpCntInRegion and filterIn!=UnifiedRegion[ID]): 170 | UnifiedRegion[ID+"-sub"+str(subID)]=filterIn 171 | subID+=1 172 | filterIn=[] 173 | if(filterIn!=[]): 174 | if(len(filterIn)>=minAmpCntInRegion and filterIn!=UnifiedRegion[ID]): 175 | UnifiedRegion[ID+"-sub"+str(subID)]=filterIn 176 | ################################################### 177 | TotalUnifiedRegionIDList=list(UnifiedRegion.keys()) 178 | TotalUnifiedRegionIDList.sort() 179 | regionList=[] 180 | for ID in TotalUnifiedRegionIDList: 181 | infos=UnifiedRegion[ID] 182 | infosAnnotation=self.getRegionInfo(infos) 183 | if(infosAnnotation!=""): 184 | regionList.append(ID+"\t"+infosAnnotation) 185 | return regionList 186 | 187 | def getRegionInfo(self, infos): 188 | ampCnt=len(infos) 189 | exons=[] 190 | pvals=[] 191 | amps=[] 192 | cnms=[] 193 | ciLens=[] 194 | regRvals=[] 195 | chr=infos[0].chr 196 | start=infos[0].inS 197 | end=infos[-1].inE 198 | filterCheck=[] 199 | for info in infos: 200 | amps+=[info.ampID] 201 | exons+=info.exon 202 | pvals+=[info.pval] 203 | cnms+=[info.cnm] 204 | ciLens+=[info.ciLen] 205 | regRvals+=[info.regRval] 206 | filterCheck+=[info.filter] 207 | for i in range(len(exons)): 208 | if(exons[i]==""): 209 | exons[i]="Intron" 210 | selectedExons=list(set(exons)) 211 | if("Intron" in selectedExons): 212 | selectedExons.remove("Intron") 213 | selectedExonCnt=len(selectedExons) 214 | try: 215 | exonInRegionRatio=round(float(selectedExonCnt)/self.coveredExonCnt,2) 216 | except: 217 | exonInRegionRatio="nan" 218 | medCovList=list(set(self.medCov)) 219 | medCovList.sort() 220 | 221 | CN=round(numpy.mean(cnms)*2,0)-2 222 | if(CN==0 and numpy.mean(cnms)>0): 223 | CN=1 224 | elif(CN==0 and numpy.mean(cnms)<0): 225 | CN=-1 226 | 227 | lognormCNMs=[] 228 | for cnm in cnms: 229 | lognormcnm=math.log(cnm+0.0001,2) 230 | if(lognormcnm<-2): 231 | lognormcnm=-2 232 | lognormCNMs.append(lognormcnm) 233 | 234 | lognormCILens=[] 235 | for ciLen in ciLens: 236 | [ciu, cil]=ciLen 237 | try: 238 | ciu=math.log(float(ciu)+0.000001,2) 239 | cil=math.log(float(cil)+0.000001,2) 240 | except: 241 | ciu="nan" 242 | cil="nan" 243 | if(ciu<(-2)): 244 | ciu=(-2) 245 | if(cil<(-2)): 246 | cil=(-2) 247 | if(ciu!="nan" and cil!="nan"): 248 | lognormciLen=ciu-cil 249 | else: 250 | lognormciLen="nan" 251 | lognormCILens.append(lognormciLen) 252 | 253 | region=[self.qType, CN, abs(CN-(numpy.mean(cnms)*2-2)), self.sample, ",".join(medCovList), chr, start, end, end-start+1, self.gene, self.trans] 254 | region+=[selectedExonCnt, self.coveredExonCnt, exonInRegionRatio , "|".join(exons)] 255 | region+=[ampCnt, self.totalAmpCnt ,"|".join(amps), ",".join(numpy.array(pvals).astype(str)),round(float(filterCheck.count("filter-in"))/len(filterCheck),2)] 256 | region+=[numpy.mean(lognormCNMs),numpy.std(lognormCNMs),",".join(numpy.array(cnms).astype(str)), numpy.mean(lognormCILens), numpy.mean(regRvals)] 257 | 258 | if(self.qType=="dup" and numpy.mean(cnms)<=self.dupTh): 259 | return "" 260 | elif(self.qType=="del" and numpy.mean(cnms)>=self.delTh): 261 | return "" 262 | else: 263 | return "\t".join(numpy.array(region).astype(str))+"\n" 264 | 265 | def getHead(self): 266 | HeadList=["RegionID","CnvType","CopyNumber","HowCloseToCopyNumber","Sample","MedianRDOfSample","Chr","Start","End","Length","Gene","Transcript","ExonCntInRegion","CoveredExonCnt","ExonInRegionRatio","Exons","AmpCntInRegion","TotalAmpCnt","Amplicons"] 267 | HeadList+=["Pvalues","FilterInAmpRatio","AverageOfReadDepthRatios","STDOfReadDepthRatios","ReadDepthRatios","AverageOfCIs","AverageOfR2vals"] 268 | return HeadList 269 | 270 | 271 | def getRDRatioDic(inFileName, batchTag, pTh, sample, CBSTree, minAmpCntInRegion, dulTh, delTh): 272 | inFile=open(inFileName,'r') 273 | inLine=inFile.readline() 274 | headCheck=False 275 | genePerSampleDic={} 276 | medCovList="" 277 | while(inLine): 278 | if(headCheck==False): 279 | headCheck=True 280 | header=inLine.replace("\n","").split("\t") 281 | ampIDID=header.index("Amplicon_ID") 282 | chrID=header.index("Chr") 283 | ampSID=header.index("Amplicon_Start") 284 | inSID=header.index("Insert_Start") 285 | inEID=header.index("Insert_End") 286 | ampEID=header.index("Amplicon_End") 287 | geneID=header.index("Gene") 288 | transID=header.index("Transcript") 289 | exonID=header.index("Exon") 290 | poolID=header.index("Pool") 291 | typeID=header.index("Type") 292 | sampleID=header.index(sample) 293 | else: 294 | inList=inLine.replace("\n","").split("\t") 295 | type=inList[typeID] 296 | if(type=="MedianRD"): 297 | if(medCovList!=""): 298 | ampID=medCovList[ampIDID] 299 | chr=medCovList[chrID] 300 | ampS=medCovList[ampSID] 301 | inS=medCovList[inSID] 302 | inE=medCovList[inEID] 303 | ampE=medCovList[ampEID] 304 | gene=medCovList[geneID] 305 | trans=medCovList[transID] 306 | exon=medCovList[exonID] 307 | pool=medCovList[poolID] 308 | regRval=regRvalList[sampleID] 309 | medCov=pool+":"+medCovList[sampleID] 310 | cnm=cnmList[sampleID] 311 | ciu=ciuList[sampleID] 312 | cil=cilList[sampleID] 313 | ciLen=[ciu, cil] 314 | dupPval=dupPvalList[sampleID] 315 | delPval=delPvalList[sampleID] 316 | cnvtype=cnvtypeList[sampleID] 317 | if("low" in cnvtype): 318 | dupPval="nan" 319 | delPval="nan" 320 | ID=(gene,sample) 321 | if(ID not in genePerSampleDic): 322 | genePerSampleDic[ID]=GenePerSample(gene, trans, sample, pTh, minAmpCntInRegion, dulTh, delTh) 323 | if(trans!=""): 324 | genePerSampleDic[ID].putTrans(trans) 325 | genePerSampleDic[ID].putMedCov(medCov) 326 | genePerSampleDic[ID].putPvalue(ampID, chr, ampS, inS, inE, ampE, gene, trans, exon, "dup", dupPval, cnm, ciLen, regRval) 327 | genePerSampleDic[ID].putPvalue(ampID, chr, ampS, inS, inE, ampE, gene, trans, exon, "del", delPval, cnm, ciLen, regRval) 328 | else: 329 | pass #frist line 330 | medCovList=inList 331 | elif(type=="CI_U"): 332 | ciuList=inList 333 | elif(type=="CI_L"): 334 | cilList=inList 335 | elif(type=="CN_M"): 336 | cnmList=inList 337 | elif(type=="DupPvalue"): 338 | dupPvalList=inList 339 | elif(type=="DelPvalue"): 340 | delPvalList=inList 341 | elif(type=="CNVType"): 342 | cnvtypeList=inList 343 | elif(type=="RegRvalue"): 344 | regRvalList=inList 345 | inLine=inFile.readline() 346 | inFile.close() 347 | return genePerSampleDic 348 | 349 | 350 | def getRegionCandidate(genePerSampleDic, outFileName, pTh, CBSTree,minAmpCntInRegion, dupTh, delTh, MQ): 351 | outFile=open(outFileName,'w') 352 | outFile.write("#P-value<"+str(pTh)+"\n") 353 | outFile.write("#Minimum-number of amplicons to extract small CNVs >="+str(minAmpCntInRegion)+"\n") 354 | outFile.write("#Duplication threshold:"+str(dupTh)+", Deletion threshold:"+str(delTh)+"\n") 355 | outFile.write("#MQV>="+str(MQ)+"\n") 356 | outFile.write("#"+"\t".join(genePerSampleDic[list(genePerSampleDic.keys())[0]].getHead())+"\n") 357 | genePerSampleIDs=list(genePerSampleDic.keys()) 358 | for genepersampleID in genePerSampleIDs: 359 | genepersample=genePerSampleDic[genepersampleID] 360 | out=genepersample.getCandidateRegion("del", CBSTree) 361 | out+=genepersample.getCandidateRegion("dup", CBSTree) 362 | if(len(out)>0): 363 | outFile.write("".join(out)) 364 | outFile.close() 365 | 366 | 367 | def readCBSFile(inFileName, dupTh, delTh): 368 | inFile=open(inFileName,'r') 369 | inLine=inFile.readline() 370 | check=False 371 | dupList=[] 372 | delList=[] 373 | CBSTree=GenomeIntervalTree() 374 | dupCheck=False 375 | delCheck=False 376 | dupCheckList=[] 377 | delCheckList=[] 378 | while(inLine): 379 | if(inLine.startswith("#")): 380 | pass 381 | else: 382 | if(check==False): 383 | headList=inLine.replace("\n","").split("\t") 384 | chrID=headList.index("chromosome") 385 | startID=headList.index("start") 386 | endID=headList.index("end") 387 | noID=headList.index("nbrOfLoci") 388 | meanID=headList.index("mean") 389 | check=True 390 | else: 391 | inList=inLine.replace("\n","").split("\t") 392 | if(dupCheck==False and dupCheckList!=[] and len(dupCheckList)>1): 393 | chrs=[] 394 | starts=[] 395 | ends=[] 396 | nos=[] 397 | copynumbers=[] 398 | for dupCheck in dupCheckList: 399 | chrs.append(dupCheck[0]) 400 | starts.append(int(dupCheck[1])) 401 | ends.append(int(dupCheck[2])) 402 | nos.append(int(dupCheck[3])) 403 | copynumbers.append(dupCheck[4]) 404 | chr_merge=chrs[0].replace("23","X") 405 | start_merge=min(starts) 406 | end_merge=max(ends) 407 | no_merge=sum(nos) 408 | copynumber_merge=numpy.mean(copynumbers) 409 | CBSTree.addi(chr_merge, start_merge, end_merge+1, [chr_merge, start_merge, end_merge+1, no_merge, copynumber_merge]) 410 | dupCheckList=[] 411 | elif(dupCheck==False and dupCheckList!=[] and len(dupCheckList)==1): 412 | dupCheckList=[] 413 | if(delCheck==False and delCheckList!=[] and len(delCheckList)>1): 414 | chrs=[] 415 | starts=[] 416 | ends=[] 417 | nos=[] 418 | copynumbers=[] 419 | for delCheck in delCheckList: 420 | chrs.append(delCheck[0]) 421 | starts.append(int(delCheck[1])) 422 | ends.append(int(delCheck[2])) 423 | nos.append(int(delCheck[3])) 424 | copynumbers.append(float(delCheck[4])) 425 | chr_merge=chrs[0].replace("23","X") 426 | start_merge=min(starts) 427 | end_merge=max(ends) 428 | no_merge=sum(nos) 429 | copynumber_merge=numpy.mean(copynumbers) 430 | CBSTree.addi(chr_merge, start_merge, end_merge+1, [chr_merge, start_merge, end_merge+1, no_merge, copynumber_merge]) 431 | delCheckList=[] 432 | elif(delCheck==False and delCheckList!=[] and len(delCheckList)==1): 433 | delCheckList=[] 434 | try: 435 | copyNumber=float(inList[meanID]) 436 | if(copyNumber<1): 437 | delList.append([inList[chrID], inList[startID], inList[endID], copyNumber]) 438 | elif(copyNumber>1): 439 | dupList.append([inList[chrID], inList[startID], inList[endID], copyNumber]) 440 | CBSTree.addi(inList[chrID].replace("23","X"), int(inList[startID]), int(inList[endID])+1, [inList[chrID].replace("23","X"), int(inList[startID]), int(inList[endID])+1, int(inList[noID]),float(inList[meanID])]) 441 | if(copyNumber>=dupTh): 442 | dupCheck=True 443 | dupCheckList.append([inList[chrID], inList[startID], inList[endID], inList[noID], copyNumber]) 444 | else: 445 | dupCheck=False 446 | if(copyNumber<=delTh): 447 | delCheck=True 448 | delCheckList.append([inList[chrID], inList[startID], inList[endID], inList[noID], copyNumber]) 449 | else: 450 | delCheck=False 451 | except: 452 | dupCheck=False 453 | delCheck=False 454 | inLine=inFile.readline() 455 | inFile.close() 456 | 457 | if(dupCheckList!=[] and len(dupCheckList)>1): 458 | chrs=[] 459 | starts=[] 460 | ends=[] 461 | nos=[] 462 | copynumbers=[] 463 | for dupCheck in dupCheckList: 464 | chrs.append(dupCheck[0]) 465 | starts.append(int(dupCheck[1])) 466 | ends.append(int(dupCheck[2])) 467 | nos.append(int(dupCheck[3])) 468 | copynumbers.append(dupCheck[4]) 469 | chr_merge=chrs[0].replace("23","X") 470 | start_merge=min(starts) 471 | end_merge=max(ends) 472 | no_merge=sum(nos) 473 | copynumber_merge=numpy.mean(copynumbers) 474 | CBSTree.addi(chr_merge, start_merge, end_merge+1, [chr_merge, start_merge, end_merge+1, no_merge, copynumber_merge]) 475 | if(delCheckList!=[] and len(delCheckList)>1): 476 | chrs=[] 477 | starts=[] 478 | ends=[] 479 | nos=[] 480 | copynumbers=[] 481 | for delCheck in delCheckList: 482 | chrs.append(delCheck[0]) 483 | starts.append(int(delCheck[1])) 484 | ends.append(int(delCheck[2])) 485 | nos.append(int(delCheck[3])) 486 | copynumbers.append(float(delCheck[4])) 487 | chr_merge=chrs[0].replace("23","X") 488 | start_merge=min(starts) 489 | end_merge=max(ends) 490 | no_merge=sum(nos) 491 | copynumber_merge=numpy.mean(copynumbers) 492 | CBSTree.addi(chr_merge, start_merge, end_merge+1, [chr_merge, start_merge, end_merge+1, no_merge, copynumber_merge]) 493 | return [dupList, delList, CBSTree] 494 | 495 | if __name__ == '__main__': 496 | inputs=list(sys.argv) 497 | batchTag=inputs[1] 498 | inSample=inputs[2] 499 | RDRatioDir=inputs[3] 500 | CBSDir=inputs[4] 501 | CNVDir=inputs[5] 502 | MQList=inputs[6].split(",") 503 | dupdelList=inputs[7].split(",") 504 | 505 | 506 | pTh=0.5 ## p-value threshold 507 | minAmpCntInRegion=2 ## The minimum number of significant amplicons to extract small region CNV candidates 508 | 509 | for MQ in MQList: 510 | for dupdelTh in dupdelList: 511 | dupTh=float(dupdelTh.split("_")[0]) 512 | delTh=float(dupdelTh.split("_")[1]) 513 | inFileName=CBSDir+"/"+inSample+".CBS."+MQ+".dupdelTh"+dupdelTh+".tsv" 514 | outFileName=CNVDir+"/"+inSample+".CNV."+MQ+".dupdelTh"+dupdelTh+".txt" 515 | #try: 516 | [dupList,delList,CBSTree]=readCBSFile(inFileName, dupTh, delTh) 517 | inFileName2=RDRatioDir+batchTag+".readDepthRatioFromLRModel."+MQ+".dupdelTh"+dupdelTh+".txt" 518 | genePerSampleDic=getRDRatioDic(inFileName2, batchTag, pTh, inSample, CBSTree, minAmpCntInRegion, dupTh, delTh) 519 | getRegionCandidate(genePerSampleDic, outFileName, pTh, CBSTree,minAmpCntInRegion, dupTh, delTh, MQ) 520 | #except: 521 | # print("Unexpected error:", sys.exc_info()[0]) 522 | # print("Maybe all values are nan for "+inSample) -------------------------------------------------------------------------------- /Code_v1.5.1/python.mergeCNVFiles.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | def getSampleList(inFileName): 4 | inFile=open(inFileName,'r') 5 | inLine=inFile.readline() 6 | headCheck=False 7 | sampleList=[] 8 | while(inLine): 9 | if(headCheck==False): 10 | headCheck=True 11 | headList=inLine.replace("\n","").replace("\r","").split("\t") 12 | sampleID=headList.index("Sample") 13 | else: 14 | inList=inLine.replace("\n","").replace("\r","").split("\t") 15 | sample=inList[sampleID] 16 | if(sample!="" and sample not in sampleList): 17 | sampleList.append(sample) 18 | inLine=inFile.readline() 19 | inFile.close() 20 | return sampleList 21 | 22 | 23 | def mergeSampleFileList(batchTag, dupdelTh, MQ, CNVDir, sampleList): 24 | outFileName=CNVDir+"/"+batchTag+".CNV."+MQ+".dupdelTh"+dupdelTh+".txt" 25 | outFile=open(outFileName,'w') 26 | headCheck=False 27 | for sample in sampleList: 28 | qfileName=CNVDir+"/"+sample+".CNV."+MQ+".dupdelTh"+dupdelTh+".txt" 29 | try: 30 | qFile=open(qfileName,'r') 31 | qLine=qFile.readline() 32 | while(qLine): 33 | if(qLine.startswith("#")): 34 | if(headCheck==False): 35 | outFile.write(qLine) 36 | else: 37 | outFile.write(qLine) 38 | qLine=qFile.readline() 39 | headCheck=True 40 | qFile.close() 41 | except: 42 | print("Unexpected error:", sys.exc_info()[0]) 43 | print("Maybe all values are nan for "+sample) 44 | 45 | outFile.close() 46 | 47 | 48 | if __name__ == '__main__': 49 | inputs=list(sys.argv) 50 | batchTag=inputs[1] 51 | inSampleInfoTxt=inputs[2] 52 | CNVDir=inputs[3] 53 | MQList=inputs[4].split(",") 54 | dupdelList=inputs[5].split(",") 55 | 56 | sampleList=getSampleList(inSampleInfoTxt) 57 | for MQ in MQList: 58 | for dupdelTh in dupdelList: 59 | mergeSampleFileList(batchTag, dupdelTh, MQ, CNVDir, sampleList) 60 | -------------------------------------------------------------------------------- /Code_v1.5.1/python.mergeReadDepthStatistics.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | def getSampleList(inSampleInfoTxt): 4 | inFile=open(inSampleInfoTxt,'r') 5 | inLine=inFile.readline() 6 | headCheck=False 7 | sampleList=[] 8 | while(inLine): 9 | if(headCheck==False): 10 | headCheck=True 11 | headList=inLine.replace("\n","").replace("\r","").split("\t") 12 | sampleID=headList.index("Sample") 13 | else: 14 | inList=inLine.replace("\n","").replace("\r","").split("\t") 15 | sample=inList[sampleID] 16 | if(sample!="" and sample not in sampleList): 17 | sampleList.append(sample) 18 | inLine=inFile.readline() 19 | inFile.close() 20 | return sampleList 21 | 22 | 23 | def writeFile(sampleList, readDepthStatDir, outFileName): 24 | outFile=open(outFileName,'w') 25 | headCheck=False 26 | for sample in sampleList: 27 | sampleFileName=readDepthStatDir+"/"+sample+".readDepthStatistics.txt" 28 | inFile=open(sampleFileName,'r') 29 | inLineList=inFile.readlines() 30 | if(headCheck==False): 31 | headCheck=True 32 | outFile.write("".join(inLineList)) 33 | else: 34 | outFile.write("".join(inLineList[1:])) 35 | inFile.close() 36 | outFile.close() 37 | 38 | 39 | if __name__ == '__main__': 40 | inputs=list(sys.argv) 41 | outTag=inputs[1] 42 | inSampleInfoTxt=inputs[2] 43 | readDepthStatDir=inputs[3] 44 | 45 | sampleList=getSampleList(inSampleInfoTxt) 46 | outFileName=readDepthStatDir+"/"+outTag+".readDepthStatistics.txt" 47 | writeFile(sampleList, readDepthStatDir, outFileName) 48 | -------------------------------------------------------------------------------- /Code_v1.5.1/python.scoreCNV.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy 3 | 4 | def getHqSampleList(lqSampleFileName): 5 | inFile=open(lqSampleFileName,'r') 6 | inLine=inFile.readline() 7 | headCheck=False 8 | hqSampleList=[] 9 | sampleList=[] 10 | while(inLine): 11 | inList=inLine.replace("\n","").replace("\r","").split("\t") 12 | if(headCheck==False): 13 | headCheck=True 14 | sampleID=inList.index("Sample") 15 | lowQualID=inList.index("LowQualSample?") 16 | else: 17 | sample=inList[sampleID] 18 | lowQual=inList[lowQualID] 19 | if(lowQual=="HQ"): 20 | hqSampleList.append(sample) 21 | sampleList.append(sample) 22 | inLine=inFile.readline() 23 | inFile.close() 24 | return [sampleList, hqSampleList] 25 | 26 | 27 | def getFilter(inFileName): 28 | inFile=open(inFileName,'r') 29 | inLine=inFile.readline() 30 | delfilterDic={} 31 | dupfilterDic={} 32 | headCheck=False 33 | while(inLine): 34 | if(headCheck==False): 35 | headCheck=True 36 | headList=inLine.lower().replace("\n","").replace("\r","").replace("#","").split("\t") 37 | attID=headList.index("attribute") 38 | delID=headList.index("delfilter") 39 | dupID=headList.index("dupfilter") 40 | else: 41 | inList=inLine.lower().replace("\n","").replace("\r","").split("\t") 42 | if(inList[delID]!=""): 43 | if(inList[attID] not in delfilterDic): 44 | delfilterDic[inList[attID]]=[] 45 | delfilterDic[inList[attID]].append(inList[delID]) 46 | if(inList[dupID]!=""): 47 | if(inList[attID] not in dupfilterDic): 48 | dupfilterDic[inList[attID]]=[] 49 | dupfilterDic[inList[attID]].append(inList[dupID]) 50 | inLine=inFile.readline() 51 | inFile.close() 52 | filterList="#FilterList\t(Del):" 53 | filterAttList=list(delfilterDic.keys()) 54 | filterAttList.sort() 55 | for filterAtt in filterAttList: 56 | filterList+=" "+filterAtt+str(delfilterDic[filterAtt]) 57 | filterList+="\n#FilterList\t(Dup):" 58 | filterAttList=list(dupfilterDic.keys()) 59 | filterAttList.sort() 60 | for filterAtt in filterAttList: 61 | filterList+=" "+filterAtt+str(dupfilterDic[filterAtt]) 62 | return [delfilterDic, dupfilterDic, [filterList+"\n"]] 63 | 64 | 65 | class ScoreStat: 66 | delfilterDic={} 67 | dupfilterDic={} 68 | sampleDic={} 69 | scoreID=[] 70 | sampleListHQ=[] 71 | sampleList=[] 72 | delMedian=[] 73 | dupMedian=[] 74 | allMedian=[] 75 | delMean=[] 76 | dupMean=[] 77 | allMean=[] 78 | delSum=[] 79 | dupSum=[] 80 | allSum=[] 81 | filterLen=0 82 | 83 | def __init__(self, delfilterDic, dupfilerDic): 84 | self.delfilterDic=delfilterDic 85 | self.dupfilterDic=dupfilterDic 86 | self.filterLen= len(delfilterDic)+2 87 | for i in range(0, len(delfilterDic)+1): 88 | self.scoreID.append(len(delfilterDic)-i) 89 | self.scoreID.append("raw") 90 | 91 | def putSampleListHQ (self, sampleListHQ): 92 | self.sampleListHQ =sampleListHQ 93 | 94 | def makeSampleScore (self, sampleList): 95 | self.sampleList=sampleList 96 | for sample in sampleList: 97 | self.sampleDic[sample]=[[0]*self.filterLen, [0]*self.filterLen, [0]*self.filterLen] 98 | 99 | def putSampleScore(self, sample, cnvType, score): 100 | if(cnvType=="del"): 101 | self.sampleDic[sample][0][self.scoreID.index(score)]+=1 102 | self.sampleDic[sample][0][self.scoreID.index("raw")]+=1 103 | elif(cnvType=="dup"): 104 | self.sampleDic[sample][1][self.scoreID.index(score)]+=1 105 | self.sampleDic[sample][1][self.scoreID.index("raw")]+=1 106 | ## All ######################### 107 | self.sampleDic[sample][2][self.scoreID.index(score)]+=1 108 | self.sampleDic[sample][2][self.scoreID.index("raw")]+=1 109 | 110 | def getMeanMedianScore(self): 111 | allScore=[[],[],[]] 112 | for i in range(self.filterLen): 113 | allScore[0].append([]) 114 | allScore[1].append([]) 115 | allScore[2].append([]) 116 | for sample in self.sampleList: 117 | if(sample in self.sampleListHQ): 118 | for i in range(0,self.filterLen): 119 | allScore[0][i].append(self.sampleDic[sample][0][i]) 120 | allScore[1][i].append(self.sampleDic[sample][1][i]) 121 | allScore[2][i].append(self.sampleDic[sample][2][i]) 122 | 123 | for allscores in allScore[0]: 124 | self.delMedian.append(numpy.median(allscores)) 125 | self.delMean.append(numpy.around(numpy.mean(allscores),decimals=1)) 126 | self.delSum.append(numpy.sum(allscores)) 127 | for allscores in allScore[1]: 128 | self.dupMedian.append(numpy.median(allscores)) 129 | self.dupMean.append(numpy.around(numpy.mean(allscores),decimals=1)) 130 | self.dupSum.append(numpy.sum(allscores)) 131 | for allscores in allScore[2]: 132 | self.allMedian.append(numpy.median(allscores)) 133 | self.allMean.append(numpy.around(numpy.mean(allscores),decimals=1)) 134 | self.allSum.append(numpy.sum(allscores)) 135 | 136 | def showStat(self): 137 | outAlls=[] 138 | outAlls.append(["##Score","(del):"]+self.scoreID+["(dup):"]+self.scoreID+["(total):"]+self.scoreID) 139 | outAlls.append(["##Sum(onlyHQSample)","(del):"]+self.delSum+["(dup):"]+self.dupSum+["(total):"]+self.allSum) 140 | outAlls.append(["##Mean(onlyHQSample)","(del):"]+self.delMean+["(dup):"]+self.dupMean+["(total):"]+self.allMean) 141 | outAlls.append(["##Median(onlyHQSample)","(del):"]+self.delMedian+["(dup):"]+self.dupMedian+["(total):"]+self.allMedian) 142 | outHQs=[] 143 | outLQs=[] 144 | for sample in self.sampleList: 145 | if(sample in self.sampleListHQ): 146 | outHQs.append(["#"+sample,"(del):"]+self.sampleDic[sample][0]+["(dup):"]+self.sampleDic[sample][1]+["(total):"]+self.sampleDic[sample][2]) 147 | else: 148 | outLQs.append(["#(LQ)"+sample,"(del):"]+self.sampleDic[sample][0]+["(dup):"]+self.sampleDic[sample][1]+["(total):"]+self.sampleDic[sample][2]) 149 | outList=[] 150 | for out in outAlls+outHQs+outLQs: 151 | outList.append("\t".join(numpy.array(out).astype(str))+"\n") 152 | return outList 153 | 154 | 155 | def readCNVFile(inCNVFileName, delfilterDic, dupfilterDic, scoreStat): 156 | inFile=open(inCNVFileName,'r') 157 | inLine=inFile.readline() 158 | outHList=[] 159 | outVDic={} 160 | unifiedRegionIDList=[] 161 | regionIDDic={} 162 | headCheck=False 163 | sampleCNVDic={} 164 | while(inLine): 165 | if(inLine.startswith("#")): 166 | outHList.append(inLine) 167 | headLine=inLine 168 | else: 169 | if(headCheck==False): 170 | headCheck=True 171 | outHList[-1]="#Score\t"+outHList[-1].replace("#","") 172 | headList=headLine.lower().replace("\n","").replace("\r","").replace("#","").split("\t") 173 | regionIDID=headList.index("regionid") 174 | cnvTypeID=headList.index("cnvtype") 175 | sampleID=headList.index("sample") 176 | print(headList) 177 | chrID=headList.index("chr") 178 | startID=headList.index("start") 179 | endID=headList.index("end") 180 | for headPos in xrange(len(headList)): 181 | if(headList[headPos] in delfilterDic): 182 | delfilterDic[headList[headPos]].append(headPos) 183 | if(headList[headPos] in dupfilterDic): 184 | dupfilterDic[headList[headPos]].append(headPos) 185 | ########################################## 186 | inList=inLine.replace("\n","").replace("\r","").split("\t") 187 | regionID=inList[regionIDID] 188 | ID=regionID.split(":")[-1] 189 | nameID=regionID.split(":")[0]+":" 190 | if("-sub" in ID): 191 | unifiedID=nameID+ID.split("-sub")[0] 192 | if(unifiedID not in regionIDDic): 193 | regionIDDic[unifiedID]=[] 194 | regionIDDic[unifiedID].append(regionID) 195 | unifiedRegionIDList.append(unifiedID) 196 | else: 197 | unifiedRegionIDList.append(regionID) 198 | sample=inList[sampleID] 199 | cnvType=inList[cnvTypeID] 200 | chr=inList[chrID] 201 | start=int(inList[startID]) 202 | end=int(inList[endID]) 203 | if(cnvType=="del"): 204 | filterDic=delfilterDic 205 | if(cnvType=="dup"): 206 | filterDic=dupfilterDic 207 | passCnt=0 208 | for filterAtt in filterDic.keys(): 209 | filterVList=filterDic[filterAtt][:-1] 210 | headPos=filterDic[filterAtt][-1] 211 | for filterV in filterVList: 212 | if(">=" in filterV): 213 | if(float(inList[headPos]) >= float(filterV.replace(">=",""))): 214 | passCnt+=1 215 | if((">" in filterV) and ("=" not in filterV)): 216 | if(float(inList[headPos]) > float(filterV.replace(">",""))): 217 | passCnt+=1 218 | if("<=" in filterV): 219 | if(float(inList[headPos]) <= float(filterV.replace("<=",""))): 220 | passCnt+=1 221 | if(("<" in filterV) and ("=" not in filterV)): 222 | if(float(inList[headPos]) < float(filterV.replace("<",""))): 223 | passCnt+=1 224 | outVDic[regionID]=[passCnt, cnvType, sample, chr, start, end, str(passCnt)+"\t"+inLine] 225 | sampleCNVID=sample+"_"+cnvType 226 | if(sampleCNVID not in sampleCNVDic): 227 | sampleCNVDic[sampleCNVID]=[] 228 | sampleCNVDic[sampleCNVID].append(regionID) 229 | inLine=inFile.readline() 230 | inFile.close() 231 | 232 | outVsList=filterOutSmallCNVs(outVDic, sampleCNVDic) 233 | 234 | outVsList.sort(reverse=True) 235 | outVList=[] 236 | for outVs in outVsList: 237 | [score, cnvType, sample, chr, start, end, results]=outVs 238 | outVList.append(results) 239 | scoreStat.putSampleScore(sample, cnvType, score) 240 | 241 | scoreStat.getMeanMedianScore() 242 | scoreStats=scoreStat.showStat() 243 | 244 | return [scoreStats,outHList,outVList] 245 | 246 | 247 | def filterOutSmallCNVs(outVDic, sampleCNVDic): 248 | FalseIDList=[] 249 | for sampleCNVID in sampleCNVDic.keys(): 250 | IDList=sampleCNVDic[sampleCNVID] 251 | 252 | ##bug fixed by ihpark and yklee (2019.03.15) 253 | for idx1 in range(0, len(IDList)): 254 | ID1 = IDList[idx1] 255 | [score1, cnvType1, sample1, chr1, start1, end1, results1]=outVDic[ID1] 256 | for idx2 in range(0, len(IDList)): 257 | ID2=IDList[idx2] 258 | if(ID1!=ID2): 259 | [score2, cnvType2, sample2, chr2, start2, end2, results2]=outVDic[ID2] 260 | if(score1>=score2 and cnvType1==cnvType2 and sample1==sample2 and chr1==chr2 and start1<=start2 and end1>=end2): 261 | if (score1==score2 and cnvType1==cnvType2 and sample1==sample2 and chr1==chr2 and start1==start2 and end1==end2): 262 | if(idx1MAXV]<-MAXV*1.0 104 | temp.CN_M[temp.CN_M0){ 117 | if(as.numeric(as.matrix(medCov[medCov$Pool==Pool,2]))<50){ 118 | temp.poolvalues[temp.poolvalues==Pool]<-"LowMedCov" 119 | } 120 | } 121 | poolInfo<-cbind(poolInfo, paste0(Pool,":",medCov[medCov$Pool==Pool,2])) 122 | poolCexList<-cbind(poolCexList,refCexList[i]) 123 | i<-i+1 124 | } 125 | 126 | alphavalues[temp.poolvalues=="Faulty"]<-0.5 127 | alphavalues[temp.poolvalues=="LowMedCov"]<-0.6 128 | alphavalues[temp.poolvalues=="LowRval"]<-0.6 129 | 130 | chr.now="" 131 | for(chrPos in c(1:length(chrlabels))){ 132 | if(is.na(chrlabels[chrPos])){ 133 | chrlabels[chrPos]<-"" 134 | } 135 | if(chr.now==chrlabels[chrPos]){ 136 | chrlabels[chrPos]<-"" 137 | } 138 | if(chrlabels[chrPos]!=""){ 139 | chr.now<-chrlabels[chrPos] 140 | } 141 | } 142 | 143 | ## running CBS ############### 144 | chrvalues[chrvalues=="X"]<-23 145 | chrvalues[chrvalues=="Y"]<-24 146 | chrvalues[chrvalues=="MT"]<-25 147 | copynumber<-CNMvalues 148 | copynumber[poolvalues=="lowQual"]<-NA 149 | 150 | CBSDat<-data.frame(chromosome=as.numeric(chrvalues), x=genomicPos, y=copynumber) 151 | CBSDat<-CBSDat[!is.na(CBSDat$y),] 152 | 153 | colnames(gaps)<-c("chromosome","start","end","length") 154 | gaps2<-data.frame(gaps) 155 | if(NROW(gaps2)>1){ 156 | for(i in seq(NROW(gaps2),2)){ 157 | end1<-gaps2$end[i-1] 158 | start2<-gaps2$start[i] 159 | if(gaps2$chromosome[i-1]==gaps2$chromosome[i]){ 160 | if(end1>start2){ 161 | gaps2[i-1,3]<-gaps2$end[i] 162 | gaps2[i-1,4]<-gaps2[i-1,3]-gaps2[i-1,2]+1 163 | gaps2<-gaps2[-i,] 164 | } 165 | } 166 | } 167 | knownSegments<-gapsToSegments(gaps2) 168 | fit <- segmentByCBS(CBSDat,knownSegments=knownSegments) 169 | }else{ 170 | fit <- segmentByCBS(CBSDat) 171 | } 172 | CBSFileName=paste0(CBSdir,"/",inSample,".CBS.",MQ,".dupdelTh",dupdelTh) 173 | CBSFile=paste0(CBSdir,"/",inSample,".CBS.",MQ,".dupdelTh",dupdelTh,".tsv") 174 | if(file.exists(CBSFile)){ 175 | file.remove(CBSFile) 176 | } 177 | pathname<-writeSegments(fit, name=CBSFileName, simplify=TRUE) 178 | 179 | 180 | poolvalues<-factor(temp.poolvalues,levels=c(PoolList,"Faulty","LowMedCov",paste0("LowRval(<",RvalTh,")"))) 181 | dat<-data.frame(xvalues, CNMvalues, pvalues, poolvalues, alphavalues, sizevalues, chrlabels) 182 | medCovTXT=paste0(poolInfo,collapse=", ") 183 | titleTXT=paste0(inSample," (",MQ,")\n",medCovTXT) 184 | ggplot(dat, aes(x=dat$xvalues,y=dat$CNMvalues, shape=dat$poolvalues, color=dat$pvalues, alpha=dat$alphavalues))+ 185 | scale_x_continuous(breaks=c())+ 186 | scale_y_continuous(limits=c(-1,3), breaks=seq(MINV,MAXV,1))+ 187 | labs(x="All Genes", y="Observed read depth/Expected read depth")+theme_bw()+ 188 | ggtitle(titleTXT)+ 189 | theme(plot.title = element_text(size=20))+ 190 | geom_point(size=0.8)+scale_alpha(guide = 'none', limits=c(0,1))+ 191 | scale_shape_manual("Pool",limits=c(PoolList,"Faulty","LowMedCov",paste0("LowRval(<",RvalTh,")")), values=c(poolCexList,4,3,5))+ 192 | scale_colour_gradient("P-value", limits=c(0, 1), low="red", high="grey40")+ 193 | geom_hline(yintercept=1,linetype="dashed", size=1)+ 194 | geom_hline(yintercept=delTh,linetype="dashed", size=0.5)+geom_hline(yintercept=dupTh,linetype="dashed", size=0.5)+ 195 | geom_hline(yintercept=1.5,linetype="solid", size=0.2)+geom_hline(yintercept=0.5,linetype="solid", size=0.2)+ 196 | annotate("text", x=dat$xvalues, y=MINV-0.2, label=dat$chrlabels, angle=90, size=2) 197 | 198 | ggsave(filename=paste0(plotdir,"/",MQ,"_dupdelTh",dupdelTh,"_",inSample,"_AllGene.pdf"), plot=last_plot(), width = 170, units= "mm") 199 | } 200 | } 201 | } 202 | 203 | 204 | GenePlot=TRUE 205 | if(GenePlot==TRUE){ 206 | ### For gene ########## 207 | geneList<-levels(RDTable$Gene) 208 | runTime<-round(NROW(geneList)/rangeX) 209 | for(i in 0:(runTime-1)){ 210 | geneList.run<-geneList[seq(from=i*rangeX+1, to=(i+1)*rangeX, by=1)] 211 | geneLen.run<-rangeX-length(geneList.run[geneList.run=="NA"]) 212 | geneList.run<-geneList.run[1:geneLen.run] 213 | RDTable.run<-c() 214 | for(gene.run in geneList.run){ 215 | RDTable.run<-rbind(RDTable.run,RDTable[RDTable$Gene==gene.run,]) 216 | } 217 | 218 | medCov<-unique(RDTable[RDTable$Type=="MedianRD",][c("Pool",inSample)]) 219 | colnames(medCov)<-c("Pool","MedianRD") 220 | 221 | ampLen<-length(unique(RDTable.run$Amplicon_ID)) 222 | 223 | xvalues<-rep(NA, ampLen) 224 | CNMvalues<-rep(NA, ampLen) 225 | CNUvalues<-rep(NA, ampLen) 226 | CNLvalues<-rep(NA, ampLen) 227 | pvalues<-rep(NA, ampLen) 228 | rvalues<-rep(NA, ampLen) 229 | cnvtypes<-rep(NA, ampLen) 230 | poolvalues<-rep(19, ampLen) 231 | exonvalues<-rep(19, ampLen) 232 | exonRvalues<-rep(19, ampLen) 233 | posvalues<-rep(19, ampLen) 234 | alphavalues<-rep(1.0, ampLen) 235 | 236 | labelX<-rep(NA,geneLen.run) 237 | labelTXT<-rep(NA,geneLen.run) 238 | 239 | xstart<-500 240 | start<-1 241 | for(x in 1:geneLen.run){ 242 | gene=geneList.run[x] 243 | RDTable.gene<-RDTable.run[RDTable.run$Gene==gene,] 244 | ampLen.gene<-length(unique(RDTable.gene$Amplicon_ID)) 245 | end<-start+ampLen.gene-1 246 | xend<-xstart+10*(ampLen.gene-1) 247 | 248 | xvalues[start:end]<-seq(from=xstart, to=xend, by=10) 249 | CNMvalues[start:end]<-as.numeric(as.matrix(RDTable.gene[RDTable.gene$Type=="CN_M",inSample])) 250 | CNUvalues[start:end]<-as.numeric(as.matrix(RDTable.gene[RDTable.gene$Type=="CI_U",inSample])) 251 | CNLvalues[start:end]<-as.numeric(as.matrix(RDTable.gene[RDTable.gene$Type=="CI_L",inSample])) 252 | pvalues[start:end]<-as.numeric(as.matrix(RDTable.gene[RDTable.gene$Type=="Pvalue",inSample])) 253 | rvalues[start:end]<-as.numeric(as.matrix(RDTable.gene[RDTable.gene$Type=="RegRvalue",inSample])) 254 | cnvtypes[start:end]<-as.character(as.matrix(RDTable.gene[RDTable.gene$Type=="CNVType",inSample])) 255 | poolvalues[start:end]<-as.character(as.matrix(RDTable.gene[RDTable.gene$Type=="CN_M","Pool"])) 256 | posvalues[start:end]<-as.numeric(RDTable.gene[RDTable.gene$Type=="CN_M","Amplicon_Start"]) 257 | 258 | exons<-RDTable.gene[RDTable.gene$Type=="CN_M","Exon"] 259 | exonvalues[start:end]<-as.character(as.matrix(gsub("Exon","E",exons))) 260 | exonRvalues[start:end]<-as.character(as.matrix(gsub("Exon","E",exons))) 261 | exon.now="" 262 | for(exonPos in c(start:end)){ 263 | if(is.na(exonvalues[exonPos])|exonvalues[exonPos]==""){ 264 | exonvalues[exonPos]<-"" 265 | }else{ 266 | exons<-unlist(strsplit(exonvalues[exonPos],"\\|")) 267 | if(as.character(exons[length(exons)])==exon.now){ 268 | exonvalues[exonPos]<-"" 269 | }else{ 270 | out<-1 271 | for(i in c(1:length(exons))){ 272 | if(as.character(exons[i])==exon.now){ 273 | out<-i+1 274 | } 275 | } 276 | exon<-paste(exons[c(out:length(exons))],collapse="|") 277 | exonvalues[exonPos]<-exon 278 | exon.now<-as.character(exons[length(exons)]) 279 | } 280 | } 281 | } 282 | 283 | labelX[x]<-median(xvalues[start:end]) 284 | TransList<-unique(as.character(RDTable.gene[RDTable.gene$Type=="CN_M","Transcript"])) 285 | TransID<-TransList[order(TransList)][length(TransList)] 286 | chr<-as.character(RDTable.gene[RDTable.gene$Type=="CN_M","Chr"])[1] 287 | if(TransID==""){ 288 | labelTXT[x]<-paste0(gene,"\nchr",chr) 289 | }else{ 290 | labelTXT[x]<-paste0(gene," (",as.character(TransID),")","\nchr",chr) 291 | } 292 | 293 | xstart<-xend+500 294 | start<-end+1 295 | x<-x+1 296 | } 297 | if(length(pvalues[is.na(pvalues)])==length(pvalues)){ 298 | print("All p-values are nan!!! impossible to draw plot") 299 | }else{ 300 | 301 | MAXV=3 302 | MINV=(-2) 303 | ###outlier handlin###### 304 | temp.poolvalues<-poolvalues 305 | 306 | temp.CN_M<-CNMvalues 307 | temp.CN_M[temp.CN_M>MAXV]<-MAXV*1.0 308 | temp.CN_M[temp.CN_MMAXV]<-MAXV*1.0 313 | temp.CI_L[temp.CI_LMAXV]<-MAXV*1.0 318 | temp.CI_U[temp.CI_U0){ 336 | if(as.numeric(as.matrix(medCov[medCov$Pool==Pool,2]))<50){ 337 | temp.poolvalues[temp.poolvalues==Pool]<-"LowMedCov" 338 | } 339 | } 340 | poolInfo<-cbind(poolInfo, paste0(Pool,":",medCov[medCov$Pool==Pool,2])) 341 | poolCexList<-cbind(poolCexList,refCexList[i]) 342 | i<-i+1 343 | } 344 | 345 | poolvalues<-factor(temp.poolvalues,levels=c(PoolList,"Faulty","LowMedCov",paste0("LowRval(<",RvalTh,")"))) 346 | 347 | exonXvalues<-xvalues-5 348 | exonXvalues<-c(exonXvalues,exonXvalues[length(exonXvalues)]+10) 349 | exonList<-unique(unlist(strsplit(exonvalues,"\\|"))) 350 | if(sum(exonList=="")!=length(exonList)){ 351 | exonList<-exonList[exonList!=""] 352 | exonPosValues<-matrix(rep(NA, length(exonXvalues)*length(exonList)),length(exonList),length(exonXvalues)) 353 | for(exon.now in exonList){ 354 | check=TRUE 355 | for(i in c(1:length(exonList))){ 356 | exonValue<-(-0.2)-(i*0.2) 357 | if(check==TRUE){ 358 | if(min(grep(exon.now,exonRvalues))==1){ 359 | if(sum(is.na(exonPosValues[i,grep(exon.now,exonRvalues)]))==length(grep(exon.now,exonRvalues))){ 360 | pos<-grep(exon.now,exonRvalues) 361 | exonPosValues[i,c(pos,pos[length(pos)]+1)]<-exonValue 362 | check=FALSE 363 | } 364 | }else if(sum(is.na(exonPosValues[i,c(min(grep(exon.now,exonRvalues))-1,grep(exon.now,exonRvalues))]))==length(grep(exon.now,exonRvalues))+1){ 365 | pos<-grep(exon.now,exonRvalues) 366 | exonPosValues[i,c(pos,pos[length(pos)]+1)]<-exonValue 367 | check=FALSE 368 | } 369 | } 370 | } 371 | } 372 | exonPosValues<-exonPosValues[rowSums(is.na(exonPosValues))!=length(exonPosValues[i,]), ] 373 | }else{ 374 | exonPosValues<-t(rep(NA, length(exonXvalues))) 375 | } 376 | 377 | dat<-data.frame(xvalues, CNMvalues, CNUvalues,CNLvalues, pvalues, poolvalues, exonvalues, posvalues, alphavalues) 378 | dat2<-data.frame(exonXvalues,t(exonPosValues)) 379 | 380 | medCovTXT=paste0(poolInfo,collapse=", ") 381 | titleTXT=paste0(inSample," (",MQ,")\n",medCovTXT) 382 | plot.now<-ggplot()+ 383 | scale_x_continuous(breaks=c())+ 384 | scale_y_continuous(limits=c(-1,3), breaks=seq(MINV,MAXV,1))+ 385 | labs(x="Exons", y="Observed read depth/Expected read depth)")+theme_bw()+ 386 | ggtitle(titleTXT)+ 387 | theme(plot.title = element_text(size=20))+ 388 | geom_point(data=dat, aes(x=dat$xvalues,y=dat$CNMvalues, shape=dat$poolvalues, color=dat$pvalues, alpha=dat$alphavalues), size=3)+ 389 | geom_errorbar(data=dat, aes(x=dat$xvalues,ymin=dat$CNLvalues,ymax=dat$CNUvalues, color=dat$pvalues, alpha=dat$alphavalues))+ 390 | scale_shape_manual("Pool",limits=c(PoolList,"Faulty","LowMedCov",paste0("LowRval(<",RvalTh,")")), values=c(poolCexList,4,3,5))+ 391 | scale_colour_gradient("P-value", limits=c(0, 1), low="red", high="grey40")+ 392 | scale_alpha(guide = 'none', limits=c(0,1))+ 393 | geom_hline(yintercept=1,linetype="dashed", size=1)+ 394 | geom_hline(yintercept=delTh,linetype="dashed", size=0.5)+geom_hline(yintercept=dupTh,linetype="dashed", size=0.5)+ 395 | geom_hline(yintercept=1.5,linetype="solid", size=0.2)+geom_hline(yintercept=0.5,linetype="solid", size=0.2)+ 396 | annotate("text", x=labelX, y=MAXV-0.5, label=labelTXT, size=7)+ 397 | annotate("text", x=xvalues, y=MINV-0.2, label=dat$exonvalues, angle=90, size=2) 398 | for(i in c(2:length(dat2))){ 399 | if(sum(is.na(dat2[i]))!=NROW(dat2[i])){ 400 | plot.now<-plot.now+geom_line(aes(x,y), data=data.frame(x=dat2$exonXvalues, y=unlist(dat2[i])), color="grey", size=3, alpha=0.5) 401 | } 402 | } 403 | ggsave(filename=paste0(plotdir,"/",MQ,"_dupdelTh",dupdelTh,"_",inSample,"_",gene,".pdf"), plot=plot.now, width = 170, units= "mm") 404 | } 405 | } 406 | } 407 | } 408 | -------------------------------------------------------------------------------- /Code_v1.5.1/r.plotPerSample.r: -------------------------------------------------------------------------------- 1 | args<-commandArgs(TRUE) 2 | batchTag<-args[1] 3 | inSample<-args[2] 4 | RDRatioDir<-args[3] 5 | plotdir<- args[4] 6 | CBSdir<-args[5] 7 | PoolList<-args[6] 8 | MQList<-args[7] 9 | dupdelList<-args[8] 10 | 11 | RvalTh<-0.8 12 | rangeX<-1 ##How many gene you want to plot in one figure. 13 | 14 | library("ggplot2") 15 | library("PSCBS") 16 | 17 | PoolList<-unlist(strsplit(PoolList,",")) 18 | MQList<-unlist(strsplit(MQList,",")) 19 | dupdelList<-unlist(strsplit(dupdelList,",")) 20 | 21 | for(MQ in MQList){ 22 | for(dupdelTh in dupdelList){ 23 | dupTh<-as.numeric(unlist(strsplit(dupdelTh,"_"))[1]) 24 | delTh<-as.numeric(unlist(strsplit(dupdelTh,"_"))[2]) 25 | 26 | inFile=paste0(RDRatioDir,batchTag,".readDepthRatioFromLRModel.",MQ,".dupdelTh",dupdelTh,".txt") 27 | RDTable<-read.table(inFile, head=T, sep="\t", fill=T, quote = "", check.names=F) 28 | RDTable<-RDTable[order(RDTable$Amplicon_Start),] 29 | sortedRDTable<-c() 30 | for(chrom in c(1:22, "X", "Y", "MT")){ 31 | sortedRDTable<-rbind(sortedRDTable,RDTable[RDTable$Chr==chrom,]) 32 | } 33 | RDTable<-sortedRDTable 34 | 35 | AllPlot=TRUE 36 | if(AllPlot==TRUE){ 37 | ### All ##################### 38 | geneList<-unique(RDTable$Gene) 39 | geneLen<-NROW(geneList) 40 | 41 | medCov<-unique(RDTable[RDTable$Type=="MedianRD",][c("Pool",inSample)]) 42 | colnames(medCov)<-c("Pool","MedianRD") 43 | 44 | ampLen<-length(unique(RDTable$Amplicon_ID)) 45 | xvalues<-rep(NA, ampLen) 46 | CNMvalues<-rep(NA, ampLen) 47 | pvalues<-rep(NA, ampLen) 48 | rvalues<-rep(NA, ampLen) 49 | cnvtypes<-rep(NA, ampLen) 50 | poolvalues<-rep(19, ampLen) 51 | alphavalues<-rep(1.0, ampLen) 52 | sizevalues<-rep(0.8, ampLen) 53 | genomicPos<-rep(NA, ampLen) 54 | chrvalues<-rep(NA, ampLen) 55 | chrlabels<-rep(NA, ampLen) 56 | gaps<-c() 57 | 58 | xstart<-1 59 | start<-1 60 | for(x in 1:geneLen){ 61 | gene=geneList[x] 62 | RDTable.gene<-RDTable[RDTable$Gene==gene,] 63 | ampLen.gene<-length(unique(RDTable.gene$Amplicon_ID)) 64 | end<-start+ampLen.gene-1 65 | xend<-xstart+10*(ampLen.gene-1) 66 | 67 | xvalues[start:end]<-seq(from=xstart, to=xend, by=10) 68 | CNMvalues[start:end]<-as.numeric(as.matrix(RDTable.gene[RDTable.gene$Type=="CN_M",inSample])) 69 | pvalues[start:end]<-as.numeric(as.matrix(RDTable.gene[RDTable.gene$Type=="Pvalue",inSample])) 70 | rvalues[start:end]<-as.numeric(as.matrix(RDTable.gene[RDTable.gene$Type=="RegRvalue",inSample])) 71 | cnvtypes[start:end]<-as.character(as.matrix(RDTable.gene[RDTable.gene$Type=="CNVType",inSample])) 72 | poolvalues[start:end]<-as.character(as.matrix(RDTable.gene[RDTable.gene$Type=="CN_M","Pool"])) 73 | chrvalues[start:end]<-as.character(RDTable.gene[RDTable.gene$Type=="CN_M","Chr"]) 74 | chrlabels[start:end]<-as.character(RDTable.gene[RDTable.gene$Type=="CN_M","Chr"]) 75 | 76 | genomicPos[start:end]<-as.numeric(RDTable.gene[RDTable.gene$Type=="CN_M","Amplicon_Start"]) 77 | gaps.chr<-chrvalues[start] 78 | gaps.chr<-gsub("X",23,gaps.chr) 79 | gaps.chr<-gsub("Y",24,gaps.chr) 80 | gaps.chr<-gsub("MT",25,gaps.chr) 81 | gaps.chr<-as.numeric(gaps.chr) 82 | gaps.start<-min(genomicPos[start:end]) 83 | gaps.end<-max(genomicPos[start:end])+1 84 | gaps.length<-(gaps.end-gaps.start+1) 85 | gaps<-rbind(gaps, c(gaps.chr,gaps.start,gaps.end,gaps.length)) 86 | 87 | xstart<-xend+1 88 | start<-end+1 89 | x<-x+1 90 | } 91 | 92 | if(length(pvalues[is.na(pvalues)])==length(pvalues)){ 93 | print("All p-values are nan!!! impossible to draw plot") 94 | }else{ 95 | 96 | MAXV=3 97 | MINV=(-2) 98 | 99 | chrvalues[chrvalues=="X"]<-23 100 | chrvalues[chrvalues=="Y"]<-24 101 | chrvalues[chrvalues=="MT"]<-25 102 | 103 | copynumber<-CNMvalues 104 | copynumber[copynumber>2^(MAXV)]<-2^(MAXV)*1.0 105 | copynumber[cnvtypes=="faultyAmp"]<-NA 106 | copynumber[cnvtypes=="faultySample"]<-NA 107 | 108 | CBSDat<-data.frame(chromosome=as.numeric(chrvalues), x=genomicPos, y=copynumber) 109 | CBSDat<-CBSDat[!is.na(CBSDat$y),] 110 | 111 | colnames(gaps)<-c("chromosome","start","end","length") 112 | gaps2<-data.frame(gaps) 113 | if(NROW(gaps2)>1){ 114 | for(i in seq(NROW(gaps2),2)){ 115 | end1<-gaps2$end[i-1] 116 | start2<-gaps2$start[i] 117 | if(gaps2$chromosome[i-1]==gaps2$chromosome[i]){ 118 | if(end1>start2){ 119 | gaps2[i-1,3]<-gaps2$end[i] 120 | gaps2[i-1,4]<-gaps2[i-1,3]-gaps2[i-1,2]+1 121 | gaps2<-gaps2[-i,] 122 | } 123 | } 124 | } 125 | knownSegments<-gapsToSegments(gaps2) 126 | fit <- segmentByCBS(CBSDat,knownSegments=knownSegments) 127 | }else{ 128 | fit <- segmentByCBS(CBSDat) 129 | } 130 | CBSFileName=paste0(CBSdir,"/",inSample,".CBS.",MQ,".dupdelTh",dupdelTh) 131 | CBSFile=paste0(CBSdir,"/",inSample,".CBS.",MQ,".dupdelTh",dupdelTh,".tsv") 132 | if(file.exists(CBSFile)){ 133 | file.remove(CBSFile) 134 | } 135 | pathname<-writeSegments(fit, name=CBSFileName, simplify=TRUE) 136 | 137 | 138 | ###outlier handlin###### 139 | temp.poolvalues<-poolvalues 140 | 141 | temp.CN_M<-CNMvalues 142 | temp.CN_M<-log2(temp.CN_M+0.000000001) 143 | temp.CN_M[temp.CN_M>MAXV]<-MAXV*1.0 144 | temp.CN_M[temp.CN_M0){ 154 | if(as.numeric(as.matrix(medCov[medCov$Pool==Pool,2]))<50){ 155 | temp.poolvalues[temp.poolvalues==Pool]<-"LowMedRD" 156 | } 157 | } 158 | poolInfo<-cbind(poolInfo, paste0(Pool,":",medCov[medCov$Pool==Pool,2])) 159 | poolCexList<-cbind(poolCexList,refCexList[i]) 160 | i<-i+1 161 | } 162 | 163 | chr.now="" 164 | for(chrPos in c(1:length(chrlabels))){ 165 | if(is.na(chrlabels[chrPos])){ 166 | chrlabels[chrPos]<-"" 167 | } 168 | if(chr.now==chrlabels[chrPos]){ 169 | chrlabels[chrPos]<-"" 170 | } 171 | if(chrlabels[chrPos]!=""){ 172 | chr.now<-chrlabels[chrPos] 173 | } 174 | } 175 | 176 | temp.poolvalues[rvaluesMAXV]<-MAXV*1.0 312 | temp.CN_M[temp.CN_MMAXV]<-MAXV*1.0 318 | temp.CI_L[temp.CI_LMAXV]<-MAXV*1.0 324 | temp.CI_U[temp.CI_U0){ 334 | if(as.numeric(as.matrix(medCov[medCov$Pool==Pool,2]))<50){ 335 | temp.poolvalues[temp.poolvalues==Pool]<-"LowMedRD" 336 | } 337 | } 338 | poolInfo<-cbind(poolInfo, paste0(Pool,":",medCov[medCov$Pool==Pool,2])) 339 | poolCexList<-cbind(poolCexList,refCexList[i]) 340 | i<-i+1 341 | } 342 | 343 | temp.poolvalues[rvalues=2 >=2 3 | AverageOfReadDepthRatios How strong is the signal supporting the candidate? If deletion, 1 point for < log2(0.65); If duplication, 1 point for >log2(1.35) <-0.7369656 >0.4854268 4 | STDOfReadDepthRatios How stable are the signals supporting the candidate? 1 point for < 0.4 <0.4 <0.4 5 | AverageOfCIs How small are the confidence intervals of the signals supporting the candidate CNV? 1 point for < 0.4 <0.4 <0.4 6 | AverageOfR2vals How reliable is the model that generated the signals that support 1 point for >=0.85 >=0.85 >=0.85 7 | -------------------------------------------------------------------------------- /ExampleBams/GM14603_Example.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleBams/GM14603_Example.bam -------------------------------------------------------------------------------- /ExampleBams/GM14603_Example.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleBams/GM14603_Example.bam.bai -------------------------------------------------------------------------------- /ExampleBams/GM14734_Example.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleBams/GM14734_Example.bam -------------------------------------------------------------------------------- /ExampleBams/GM14734_Example.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleBams/GM14734_Example.bam.bai -------------------------------------------------------------------------------- /ExampleBams/GM17433_Example.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleBams/GM17433_Example.bam -------------------------------------------------------------------------------- /ExampleBams/GM17433_Example.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleBams/GM17433_Example.bam.bai -------------------------------------------------------------------------------- /ExampleBams/GM23221_Example.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleBams/GM23221_Example.bam -------------------------------------------------------------------------------- /ExampleBams/GM23221_Example.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleBams/GM23221_Example.bam.bai -------------------------------------------------------------------------------- /ExampleBams/GM23431_Example.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleBams/GM23431_Example.bam -------------------------------------------------------------------------------- /ExampleBams/GM23431_Example.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleBams/GM23431_Example.bam.bai -------------------------------------------------------------------------------- /ExampleBams/GM23891_Example.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleBams/GM23891_Example.bam -------------------------------------------------------------------------------- /ExampleBams/GM23891_Example.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleBams/GM23891_Example.bam.bai -------------------------------------------------------------------------------- /ExampleBams/GM24007_Example.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleBams/GM24007_Example.bam -------------------------------------------------------------------------------- /ExampleBams/GM24007_Example.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleBams/GM24007_Example.bam.bai -------------------------------------------------------------------------------- /ExampleBams/NA00006_Example.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleBams/NA00006_Example.bam -------------------------------------------------------------------------------- /ExampleBams/NA00006_Example.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleBams/NA00006_Example.bam.bai -------------------------------------------------------------------------------- /ExampleBams/NA00852_Example.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleBams/NA00852_Example.bam -------------------------------------------------------------------------------- /ExampleBams/NA00852_Example.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleBams/NA00852_Example.bam.bai -------------------------------------------------------------------------------- /ExampleBams/NA01741_Example.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleBams/NA01741_Example.bam -------------------------------------------------------------------------------- /ExampleBams/NA01741_Example.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleBams/NA01741_Example.bam.bai -------------------------------------------------------------------------------- /ExampleOutputs_v1.5.1.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SD-Genomics/DeviCNV/dbe60ccb12648bf02f42fdf4e4ccefa4c8453a57/ExampleOutputs_v1.5.1.zip -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | {one line to give the program's name and a brief idea of what it does.} 635 | Copyright (C) {year} {name of author} 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | {project} Copyright (C) {year} {fullname} 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeviCNV 2 | Detection and Visualization of Exon-Level Copy Number Variants in Targeted Next Generation Sequencing Data 3 | 4 | # Environment 5 | DeviCNV runs on Python 2.7 and R 3.2.0. 6 | 7 | # Python dependencies 8 | - sys 9 | - intervaltree 10 | - intervaltree_bio 11 | - numpy 12 | - operator 13 | - random 14 | - pysam 15 | - pyvcf 16 | - scipy 17 | 18 | # R dependencies 19 | - ggplot2 20 | - PSCBS 21 | 22 | # Installation 23 | To install DeviCNV, simply download 9 scripts in “Code” directory. 24 | 25 | # Documentation 26 | PDF documentation is included in the package. 27 | - DeviCNV1.5 Manual20171101.pdf 28 | 29 | # Version description 30 | We uploaded DeviCNV_v1.5.1 in 26/06/2019 31 | We fixed some bugs in code. 32 | 1. Delete codes for running with Slurm Workload Manager in "DeviCNV_Example.runningScript.sh" 33 | 2. Fix codes for selecting large segments in "python.scoreCNV.py" 34 | --------------------------------------------------------------------------------