├── Strelka ├── Strelka.germline ├── README.md ├── manta.sh ├── Strelka.sh ├── getmanta.sh ├── getpy.sh ├── 1.batch_manta.py └── 2.batch_streka_somatic.py ├── ex_region.sort.bed.gz ├── 7.run_sompy.sh ├── 6.index_bam.sh ├── 5.run_merge.sh ├── GATK ├── BQSR2.sh ├── BQSR1.sh ├── 1.bqsr1.report.py ├── 2.bqsr.2.bqsr.py ├── 3.multiprocess_mutect2.py └── mutect2.sh ├── multiprocess_index_bam.py ├── README.md ├── 3.dedup.sh ├── 4.downsample.sh ├── multiprocess_merge.py ├── 1.QC.sh ├── 2.mapping.sh └── batch_extract.py /Strelka/Strelka.germline: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ex_region.sort.bed.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zic12345/SR2019/HEAD/ex_region.sort.bed.gz -------------------------------------------------------------------------------- /Strelka/README.md: -------------------------------------------------------------------------------- 1 | As is recomended, Manta was run first and its result were used as input of Strelka2. -------------------------------------------------------------------------------- /7.run_sompy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | som.py truth_snvs.vcf.gz my_snvs.vcf.gz -f ex_region.sort.bed.gz -o result 3 | -------------------------------------------------------------------------------- /Strelka/manta.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | (time yhrun -n 1 -c 24 ./runWorkflow.py --memGb=60 --mode=local) 2>manta.logfile 3 | -------------------------------------------------------------------------------- /Strelka/Strelka.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | (time yhrun -n 1 -c 24 ./runWorkflow.py --memGb=60 --mode=local) 2>strelka.logfile 3 | -------------------------------------------------------------------------------- /6.index_bam.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Batch index the merged BAM files 4 | 5 | dir=/path/to/merged 6 | yhrun -n 1 -c 24 python3 multiprocess_index_bam.py $dir -------------------------------------------------------------------------------- /5.run_merge.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | # Batch merge the BAM files generated by 4.downsample.sh, put all unmerged BAM files in one directory 4 | # which do not contain any other files, run this script with one parameter which represent the depth 5 | # you used in 4.downsample.sh 6 | 7 | dir=/path/to/unmerged 8 | yhrun -n 1 -c 24 python3 multiprocess_merge.py $dir $1 -------------------------------------------------------------------------------- /Strelka/getmanta.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | manta='/path/to/manta-1.5.0.centos6_x86_64/bin/configManta.py' 3 | reference='/path/to/hg19.fa' 4 | normbam='something' 5 | tumorbam='something' 6 | runpath='something' 7 | bed='/path/to/ex_region.sort.bed.gz' 8 | $manta \ 9 | --normalBam $normbam \ 10 | --tumorBam $tumorbam \ 11 | --referenceFasta $reference \ 12 | --runDir $runpath \ 13 | --exome \ 14 | --callRegions $bed -------------------------------------------------------------------------------- /GATK/BQSR2.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | #apply BQSR 3 | gatk='/path/to/gatk --java-options -Xmx60g' 4 | dbsnp='/path/to/dbsnp_138.hg19.vcf.gz' 5 | Mills='/path/to/Mills_and_1000G_gold_standard.indels.hg19.sites.vcf.gz' 6 | ref='/path/to/hg19.fa' 7 | bed='/path/to/ex_region.sort.bed' 8 | cd $1 9 | bam=$2 10 | recaltable=$3 11 | bqsrbam=$4 12 | 13 | time $gatk ApplyBQSR \ 14 | -R $ref \ 15 | -I $bam \ 16 | -L $bed \ 17 | -O $bqsrbam \ 18 | --bqsr-recal-file $recaltable 19 | 20 | -------------------------------------------------------------------------------- /multiprocess_index_bam.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os,sys 3 | from multiprocessing import Pool 4 | os.chdir(sys.argv[1]) 5 | files = [x for x in os.listdir() if (x.endswith(".cram"))|(x.endswith('.bam'))] 6 | 7 | def replaceRG(infile): 8 | script = "samtools index -@ 2 %s"%infile 9 | os.system(script) 10 | 11 | if __name__ == "__main__": 12 | p=Pool(12) 13 | for infile in files: 14 | p.apply_async(replaceRG,args=(infile,)) 15 | p.close() 16 | p.join() 17 | -------------------------------------------------------------------------------- /Strelka/getpy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | strelka2='/path/to/strelka-2.9.7/bin/configureStrelkaSomaticWorkflow.py' 3 | reference='/BIGDATA1/scut_hldu_1/work/Low_frequency/NAYH/Genome/UCSC/hg19.fa' 4 | normbam='something' 5 | tumorbam='something' 6 | runpath='something' 7 | indel='something' 8 | bed='/path/to/ex_region.sort.bed.gz' 9 | $strelka2 \ 10 | --normalBam $normbam \ 11 | --tumorBam $tumorbam \ 12 | --referenceFasta $reference \ 13 | --runDir $runpath \ 14 | --indelCandidates $indel \ 15 | --exome \ 16 | --callRegions $bed 17 | -------------------------------------------------------------------------------- /GATK/BQSR1.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | gatk='/path/to/gatk --java-options -Xmx60g' 3 | dbsnp='/path/to/dbsnp_138.hg19.vcf.gz' 4 | Mills='/path/to/Mills_and_1000G_gold_standard.indels.hg19.sites.vcf.gz' 5 | ref='/path/to/hg19.fa' 6 | bed='/path/to/ex_region.sort.bed' 7 | cd $1 8 | bam=$2 9 | recaltable=$3 10 | bqsrbam=$4 11 | ##the first step of BQSR, generate a recalibrating table 12 | time $gatk BaseRecalibrator \ 13 | -I $bam \ 14 | -L $bed \ 15 | --known-sites $dbsnp \ 16 | --known-sites $Mills \ 17 | -R $ref \ 18 | -O $recaltable 19 | 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SR2019 2 | All codes used to generate data in article "Systematic comparison of somatic variant calling performance among different sequencing depth and mutation frequency" 3 | 4 | If you have problems, please contact the authors or submit issues. 5 | 6 | 7 | ## Noticing 8 | When you downsampled and mixed the bam files, all of the RG tags in NA.md.bam and YH.md.bam will be kept, Strelka2 ignores RG tags so the result won't be affected, however, the result of programs which needs the information of RG tags such as GATK mutect2 will be afffected, **thus you need to run RG tag replacement on the mixed bam files before calling mutations**. 9 | -------------------------------------------------------------------------------- /GATK/1.bqsr1.report.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | #batch run BQSR1.sh for files in a certain path 3 | import os,sys 4 | from multiprocessing import Pool 5 | wkdir=sys.argv[1] 6 | 7 | def bqsr(script): 8 | os.system(script) 9 | 10 | 11 | if __name__ == '__main__': 12 | files = sorted([x for x in os.listdir(wkdir) if ((x.endswith(".cram"))|(x.endswith(".bam")))]) 13 | p=Pool(2) 14 | for file in files: 15 | report=file+'.report' 16 | bqsrbam='bqsr.'+file 17 | script='/absolute/path/to/BQSR1.sh %s %s %s %s'%(wkdir,file,report,bqsrbam) 18 | p.apply_async(bqsr,args=(script,)) 19 | p.close() 20 | p.join() 21 | -------------------------------------------------------------------------------- /GATK/2.bqsr.2.bqsr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | #batch run BQSR2.sh for files in a certain path 3 | import os,sys 4 | from multiprocessing import Pool 5 | wkdir=sys.argv[1] 6 | 7 | def bqsr(script): 8 | os.system(script) 9 | 10 | if __name__ == '__main__': 11 | files = sorted([x for x in os.listdir(wkdir) if ((x.endswith(".cram"))|(x.endswith(".bam")))]) 12 | p=Pool(6) 13 | 14 | for file in files: 15 | report=file+'.report' 16 | bqsrbam='bqsr.'+file 17 | script='/absolute/path/to/BQSR2.sh %s %s %s %s'%(wkdir,file,report,bqsrbam) 18 | p.apply_async(bqsr,args=(script,)) 19 | p.close() 20 | p.join() 21 | -------------------------------------------------------------------------------- /3.dedup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | # remove duplications using picard 4 | naI='/path/to/na.addRG.bam' 5 | naO='/path/to/na.addRG.mdup.bam' 6 | naD='/path/to/na.dup.txt' 7 | yhI='/path/to/yh.addRG.bam' 8 | yhO='/path/to/yh.addRG.mdup.bam' 9 | yhD='/path/to/yh.dup.txt' 10 | NAlog='/path/to/na.dup.log' 11 | YHlog='/path/to/yh.dup.log' 12 | 13 | (time yhrun -N 1 -n 1 java -jar $picard MarkDuplicates \ 14 | I=$naI \ 15 | O=$naO \ 16 | M=$naD \ 17 | REMOVE_DUPLICATES=true \ 18 | ASO=coordinate \ 19 | CREATE_INDEX=true) 2>&1 >$NAlog 20 | 21 | (time yhrun -N 1 -n 1 java -jar $picard MarkDuplicates \ 22 | I=$yhI \ 23 | O=$yhO \ 24 | M=$yhD \ 25 | REMOVE_DUPLICATES=true \ 26 | ASO=coordinate \ 27 | CREATE_INDEX=true) 2>&1 >$YHlog 28 | -------------------------------------------------------------------------------- /GATK/3.multiprocess_mutect2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | #batch run mutect2.sh for files in a certain path 3 | import os,sys 4 | os.chdir(sys.argv[1]) 5 | files = [x for x in os.listdir() if ((x.endswith(".bam"))|(x.endswith(".cram")))] 6 | os.mkdir("mutect") 7 | 8 | for file in files: 9 | outgzvcf = "mutect/"+file+".vcf.gz" 10 | NAdepth=file.split("-")[1][:-1] 11 | YHdepth=file.split("-")[2][:-1] 12 | totaldepth = int(NAdepth)+int(YHdepth) 13 | YHpercent = int(YHdepth)/int(totaldepth) 14 | num = file.split("-")[4].split('.')[0] 15 | rgid="mix%sX_YH%s_%s"%(totaldepth,YHpercent,num) 16 | script = "yhbatch /path/to/mutect2.sh %s %s %s"%(file,rgid,outgzvcf) 17 | os.system(script) 18 | 19 | # "yhbach" is the command which submit batch jobs to the computer clusters, we allocated 1 node and 24 threads for fastp. -------------------------------------------------------------------------------- /GATK/mutect2.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | gatk='/BIGDATA1/scut_hldu_1/work/Low_frequency/NAYH/mix-NA1-YH1/GATK/gatk-4.1.0.0/gatk --java-options -Xmx40g' 3 | ref='/BIGDATA1/scut_hldu_1/work/Low_frequency/NAYH/Genome/UCSC/hg19.fa' 4 | bed='/BIGDATA1/scut_hldu_1/work/Low_frequency/NAYH/Genome/AgilentV5/ex_region.sort.bed' 5 | norm='/BIGDATA1/scut_hldu_1/work/Low_frequency/NAYH/mix-NA1-YH1/100Xnorm/bqsr/bqsr.NA-100X-1.bam' 6 | gnomad='/BIGDATA1/scut_hldu_1/work/Low_frequency/NAYH/mix-NA1-YH1/GATK/bundle/af-only-gnomad.raw.sites.hg19.vcf.gz' 7 | tumor=$1 8 | tumorid=$2 9 | outvcf=$3 10 | (time $gatk Mutect2 \ 11 | -R $ref \ 12 | -I $tumor \ 13 | -I $norm \ 14 | -tumor $tumorid \ 15 | -normal NAnorm100X \ 16 | --germline-resource $gnomad \ 17 | --disable-read-filter MateOnSameContigOrNoMappedMateReadFilter \ 18 | --native-pair-hmm-threads 24 \ 19 | -O $outvcf 20 | ) 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /4.downsample.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | # Downsample the original NA12878 and YH-1 BAM files to different depth. 4 | # Here we use a python script to conduct batch extraction 5 | # This script needs two parameters, the first one is the total depth you 6 | # finally want, and the second parameter is "NA" or "YH", which means the 7 | # file you want to downsample. 8 | 9 | # e.g. If you want to generate 100X depth mixed files, with YH-1 percent 10 | # 1%, 5%, 10%, 20%, 30% and 40%, first run this script with parameters: 11 | # "100 NA", then run this script with parameters:"100 YH", you will get 12 | # the downsampled NA12878 and YH-1 BAM files, the depth of NA12878 BAM 13 | # files are: 99X, 95X, 90X, 80X, 70X and 60X, the depth of YH-1 BAM files 14 | # are 1X, 5X, 10X, 20X, 30X and 40X. These downsampled BAM files can be 15 | # merged using script 5.run_merge.sh to get the 100X depth mixed files 16 | # you wanted 17 | 18 | python3 batch_extract.py $1 $2 19 | 20 | 21 | -------------------------------------------------------------------------------- /multiprocess_merge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os,sys 3 | from multiprocessing import Pool 4 | os.chdir(sys.argv[1]) 5 | na = [x for x in os.listdir() if 'NA' in x] 6 | total_depth = int(sys.argv[2]) 7 | 8 | def merge_bam(out,bam1,bam2): 9 | script = "samtools merge -n -@ 3 %s %s %s"%(out,bam1,bam2) 10 | os.system(script) 11 | 12 | 13 | if __name__ == "__main__": 14 | p = Pool(8) 15 | result = [] 16 | for naname in na: 17 | num = naname.split(".")[0].split("-")[-1] 18 | na_depth = int(naname.split(".")[0].split("-")[1].split('X')[0]) 19 | yh_depth = total_depth - na_depth 20 | yhname = "YH-%sX-%s.bam"%(yh_depth,num) 21 | bam1 = os.path.abspath(naname) 22 | bam2 = os.path.abspath(yhname) 23 | out = os.path.join(os.path.split(os.getcwd())[0],"NAYH-%sX-%sX-somatic-%s.bam"%(na_depth,yh_depth,num)) 24 | result.append(p.apply_async(merge_bam,args=(out,bam1,bam2))) 25 | p.close() 26 | p.join() 27 | -------------------------------------------------------------------------------- /1.QC.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | # Raw data QC using fastp 4 | # specify paths for softwares and input/output files 5 | fastp='/path/to/fastp' 6 | 7 | # infile 8 | NA12878_1='/path/to/NA12878.r1.fq.gz' 9 | NA12878_2='/path/to/NA12878.r2.fq.gz' 10 | YH_1='/path/to/YH.r1.fq.gz' 11 | YH_2='/path/to/YH.r2.fq.gz' 12 | 13 | # outfile 14 | NAout1='/path/to/naout.r1.fq.gz' 15 | NAout2='/path/to/naout.r2.fq.gz' 16 | NA12878js='/path/to/na12878.json' 17 | NA12878html='/path/to/na12878.html' 18 | YHout1='/path/to/yhout.r1.fq.gz' 19 | YHout2='/path/to/yhout.r2.fq.gz' 20 | YHjs='/path/to/yh.json' 21 | YHhtml='/path/to/yh.html' 22 | logNA='/path/to/na.log' 23 | logYH='/path/to/yh.log' 24 | 25 | # run fastp 26 | (time yhrun -N 1 -n 1 $fastp -w 24 -j $NA1287js -h $NA12878html -R "NA fastp Report" -i $NA12878_1 -o $NAout1 -I $NA12878_2 -O $NAout2) 2>&1 > $logNA 27 | (time yhrun -N 1 -n 1 $fastp -w 24 -j $YHjs -h $YHhtml -R "YH fastp Report" -i $YH_1 -o $YHout1 -I $YH_2 -O $YHout2) 2>&1 > $logYH 28 | # note:"yhrun" is the command which submit jobs to the computer clusters, we allocated 1 node and 24 threads for fastp. -------------------------------------------------------------------------------- /2.mapping.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | # mapping to hg19 reference genome using bwa mem 4 | # specify paths for softwares and input/output files 5 | hg19='/path/to/hg19' 6 | bwa='/path/to/bwa' 7 | picard='/path/to/picard.jar' 8 | NA12878_1='/path/to/naout.r1.fq.gz' 9 | NA12878_2='/path/to/naout.r2.fq.gz' 10 | YH_1='/path/to/yhout.r1.fq.gz' 11 | YH_2='/path/to/yhout.r2.fq.gz' 12 | NAsam='/path/to/nasam.sam' 13 | NAbam='/path/to/na.addRG.bam' 14 | YHsam='/path/to/yhsam.sam' 15 | YHbam='/path/to/yh.addRG.bam' 16 | logNAmap='/path/to/na.log' 17 | logYHmap='/path/to/yh.log' 18 | lognatrans='/path/to/lognatrans.log' 19 | logyhtrans='/path/to/logyhtrans.log' 20 | 21 | # run bwa mem 22 | (time yhrun -N 1 -n 1 $bwa mem -t 6 $hg19 $NA12878_1 $NA12878_2 > $NAsam ) 2>&1 > $logNAmap 23 | 24 | (time yhrun -N 1 -n 1 $bwa mem -t 6 $hg19 $YH_1 $YH_2 > $YHsam ) 2>&1 > $logYHmap 25 | 26 | # convert SAM to BAM and add read groups 27 | (time yhrun -N 1 -n 1 java -jar $picard AddOrReplaceReadGroups \ 28 | I=$NAsam \ 29 | O=$NAbam \ 30 | RGID=NA \ 31 | RGLB=WES \ 32 | RGPL=Illumina \ 33 | RGPU=Novaseq \ 34 | RGSM=NA \ 35 | SO=coordinate \ 36 | CREATE_INDEX=true ) 2>&1 > $lognatrans 37 | 38 | (time yhrun -N 1 -n 1 java -jar $picard AddOrReplaceReadGroups \ 39 | I=$YHsam \ 40 | O=$YHbam \ 41 | RGID=YH \ 42 | RGLB=WES \ 43 | RGPL=Illumina \ 44 | RGPU=Novaseq \ 45 | RGSM=YH \ 46 | SO=coordinate \ 47 | CREATE_INDEX=true ) 2>&1 > $logyhtrans -------------------------------------------------------------------------------- /Strelka/1.batch_manta.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # batch start manta software on computer cluster, require 3 parameters: normal BAM file dir, 4 | # tumor BAM file dir and work dir 5 | 6 | import os,sys,shutil 7 | import subprocess as sub 8 | normdir = sys.argv[1] 9 | tumordir = sys.argv[2] 10 | workdir = sys.argv[3] 11 | 12 | tumordic={x:os.path.join(tumordir,x) for x in os.listdir(tumordir) if ((x.endswith(".bam"))|(x.endswith(".cram")))} 13 | normdic={x[-5]:os.path.join(normdir,x) for x in os.listdir(normdir) if ((x.endswith(".bam"))|(x.endswith(".cram")))} 14 | 15 | if not os.path.exists(workdir): 16 | os.makedirs(workdir,exist_ok=True) 17 | os.chdir(workdir) 18 | 19 | mantash = "manta.sh" 20 | 21 | with open('getmanta.sh') as f: 22 | getpy = f.readlines() 23 | 24 | for file in tumordic: 25 | tumor_abspath = tumordic[file] 26 | tmp = getpy.copy() 27 | os.makedirs(file.split(".")[0],exist_ok=True) 28 | os.chdir(file.split(".")[0]) 29 | shutil.copy(mantash,"./") 30 | norm_abspath = normdic['1'] 31 | tmp[4] = tmp[4].replace("something",norm_abspath) 32 | tmp[5] = tmp[5].replace("something",tumor_abspath) 33 | tmp[6] = tmp[6].replace("something",os.getcwd()) 34 | with open("getmanta.sh",'w') as f: 35 | f.writelines(tmp) 36 | os.system("chmod +x ./getmanta.sh") 37 | process = sub.Popen("./getmanta.sh",shell=True) 38 | ret = process.wait() 39 | if ret == 0: 40 | os.system("yhbatch manta.sh") 41 | os.chdir("..") 42 | -------------------------------------------------------------------------------- /batch_extract.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os,sys 3 | from multiprocessing import Pool 4 | 5 | # the original depth of NA12878 and YH-1 6 | nadepth=812.40 7 | yhdepth=407.25 8 | work_dph=int(sys.argv[1]) 9 | 10 | napercent = [0.99,0.95,0.90,0.80,0.70,0.60] 11 | yhpercent = [0.01,0.05,0.10,0.20,0.30,0.40] 12 | 13 | yhpercent = [0.05] 14 | NAtotal='/path/to/na.addRG.mdup.bam' 15 | YHtotal='/path/to/yh.addRG.mdup.bam' 16 | downsample="java -Xmx3000m -Djava.io.tmpdir=/BIGDATA1/scut_hldu_1/tmp -jar /BIGDATA1/scut_hldu_1/bin/picard.jar DownsampleSam" 17 | 18 | # Important! Random seed must NOT be changed! Or you won't get the same bam file! 19 | seeds = {1:6666,2:8888,3:9999} 20 | 21 | def extract(script): 22 | os.system(script) 23 | 24 | 25 | p=Pool(18) 26 | if sys.argv[2]=='NA': 27 | for i in napercent: 28 | na_dph = int(work_dph*i) 29 | p_na = na_dph/nadepth 30 | for a in seeds: 31 | rand_seed = seeds[a] 32 | script = "%s I=%s O=NA-%sX-%s.bam R=%s P=%s A=0.00000001"%(downsample,NAtotal,na_dph,a,rand_seed,p_na) 33 | p.apply_async(extract,args=(script,)) 34 | 35 | elif sys.argv[2]=='YH': 36 | for i in yhpercent: 37 | yh_dph = int(work_dph*i) 38 | p_yh = yh_dph/yhdepth 39 | for a in seeds: 40 | rand_seed = seeds[a] 41 | script = "%s I=%s O=YH-%sX-%s.bam R=%s P=%s A=0.00000001"%(downsample,YHtotal,yh_dph,a,rand_seed,p_yh) 42 | p.apply_async(extract,args=(script,)) 43 | 44 | p.close() 45 | p.join() 46 | -------------------------------------------------------------------------------- /Strelka/2.batch_streka_somatic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # batch start Strelka2 somatic mode on computer cluster, require 4 parameters: normal BAM file dir, 4 | # tumor BAM file dir, work dir and the according manta result directory (the 3rd parameter of 1.batch_manta.py) 5 | 6 | import os,sys,shutil 7 | import subprocess as sub 8 | normdir = sys.argv[1] 9 | tumordir = sys.argv[2] 10 | workdir = sys.argv[3] 11 | depthN = sys.argv[4] 12 | tumordic={x:os.path.join(tumordir,x) for x in os.listdir(tumordir) if ((x.endswith(".bam"))|(x.endswith(".cram")))} 13 | normdic={x[-5]:os.path.join(normdir,x) for x in os.listdir(normdir) if ((x.endswith(".bam"))|(x.endswith(".cram")))} 14 | 15 | if not os.path.exists(workdir): 16 | os.makedirs(workdir,exist_ok=True) 17 | os.chdir(workdir) 18 | manta_dir = os.path.join(os.path.split(workdir)[0],"manta-%sNorm"%depthN) 19 | strelkash = "Strelka.sh" 20 | 21 | with open('getpy.sh') as f: 22 | getpy = f.readlines() 23 | 24 | for file in tumordic: 25 | tumor_abspath = tumordic[file] 26 | tmp = getpy.copy() 27 | 28 | os.makedirs(file.split(".")[0],exist_ok=True) 29 | os.chdir(file.split(".")[0]) 30 | shutil.copy(strelkash,"./") 31 | norm_abspath = normdic['1'] 32 | manta_path = os.path.join(manta_dir,file.split(".")[0],"results/variants/candidateSmallIndels.vcf.gz") 33 | tmp[4] = tmp[4].replace("something",norm_abspath) 34 | tmp[5] = tmp[5].replace("something",tumor_abspath) 35 | tmp[6] = tmp[6].replace("something",os.getcwd()) 36 | tmp[7] = tmp[7].replace("something",manta_path) 37 | with open("getpy.sh",'w') as f: 38 | f.writelines(tmp) 39 | os.system("chmod +x ./getpy.sh") 40 | process = sub.Popen("./getpy.sh",shell=True) 41 | ret = process.wait() 42 | if ret == 0: 43 | os.system("yhbatch Strelka.sh") 44 | os.chdir("..") --------------------------------------------------------------------------------