├── CNV_PDX ├── ASCAT_single_tool.xml ├── annotation_segments_tool.xml ├── bin │ ├── ensemblegenes_cnv_break.pl │ ├── get_model_gender.py │ ├── lrrbaf_ascat_tumor.R │ └── segment_raw_extend.pl ├── gender_single_tool.xml ├── lrrbaf_tool.xml └── single_sample_cnv_snparray.xml ├── CTP_PDX ├── XenomeSingleSample_PDX_Panel.xml ├── aggregate_stats_updated.xml ├── bin │ ├── aggregate_stats_updated.py │ ├── allele_depth_min_and_AF_from_ADs.py │ ├── caller_add_pindel.sh │ ├── clean_intergenic_region_gene_names.py │ ├── coveragecalculator.py │ ├── filter_dna_coverage.py │ ├── filter_for_minimum_depth │ ├── filter_trim.py │ └── read_group_from_fastq.py ├── bwa_mem.xml ├── config_file_SingleSample_PDX_Panel ├── gatkcoveragestats.xml ├── microIndel_calling.xml ├── qual_statistics.xml ├── removeFiles.xml ├── variant_annotation.xml ├── variant_calling.xml ├── variant_filtration.xml ├── variant_filtration_pindel.xml ├── variant_pre_proc_1.xml ├── variant_pre_proc_2.xml ├── variant_pre_proc_3.xml └── xenome_classification_DNA.xml ├── LICENSE.md ├── README.md └── RNA_PDX ├── XenomeRnaSeqSingleSamplePE.xml ├── add_gene_name_normalization_out.xml ├── bin ├── GeneName_and_Normalization_without_UCSC.pl ├── filter_rna_coverage.py ├── filter_trim.py ├── lymphoma_classifier.py ├── read_group_from_fastq.py └── summary_QC_metrics.pl ├── classifier_and_coverage.xml ├── picard_alignment_metrics.xml ├── qual_statistics.xml ├── read_group.xml ├── rsem_alignment.xml ├── summary_metrics.xml └── xenome_classification_RNA.xml /CNV_PDX/ASCAT_single_tool.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | 7 | ASCAT 2.4 Single sample 8 | 9 | 22 | 23 | 24 | R/3.1.1 25 | 26 | 27 | 28 | 29 | -f1-5 {in_2} > {out_6} 30 | 31 | 32 | 33 | CMD BATCH --slave "--args {in_1} {in_3}" {run_ascat_single} 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /CNV_PDX/annotation_segments_tool.xml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | Annotate ASCAT segments with LOH, chromosome arm fraction, ploidy. 9 | Annotate ensembl genes with copy number (CN). 10 | Rename relevant files with sample name. 11 | 12 | 13 | 26 | 27 | 29 | 30 | 32 | 33 | 34 | 35 | 36 | {segment_ploidy} {in_1} {in_2} {in_5} {in_7} 37 | 38 | 39 | 40 | {segment_gene} {out_1} {in_4} 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /CNV_PDX/bin/ensemblegenes_cnv_break.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use POSIX; 3 | use File::Basename; 4 | 5 | # This script annotates ensembl genes with copy number and breakpoints 6 | # nohup perl ensemblegenes_cnv_break.pl *.segments_raw.extend.txt mart_export_gene_chr1-Y.hg19ensembl75-85.08232016.txt 7 | 8 | if ($#ARGV != 1) { 9 | print "This scripts requires: \n"; 10 | exit(-1); 11 | } 12 | 13 | $file_cn = $ARGV[0]; 14 | $file_gene = $ARGV[1]; 15 | 16 | $file_output = basename($file_cn,".txt").".ensgene_cnvbreak.txt"; 17 | open(OUTFILE, ">$file_output"); 18 | 19 | open(GENEFILE, "$file_gene") or die "can't open $file_gene: $!"; 20 | $gene = ; 21 | chomp($gene); 22 | 23 | open(CNFILE, "$file_cn") or die "can't open $file_cn: $!"; 24 | @data = ; 25 | close(CNFILE); 26 | chomp(@data); 27 | 28 | #print OUTFILE "$tmp\tstartext\tendext\tstartext_desc\tendext_desc\tCN_raw\tLOH\tparm_fraction\tqarm_fraction\tploidy\tcopydiff_2\tcopydiff_ploidy\tlogratio_2\tlogratio_ploidy\n"; 29 | print OUTFILE "$gene\tnum_cnv_seg\tseg_desc\tploidy\tnMajor\tnMinor\tnAraw\tnBraw\tCN_raw\tLOH\tcopydiff_2\tcopydiff_ploidy\tlogratio_2\tlogratio_ploidy\tnMajor_max\tnMinor_max\tnAraw_max\tnBraw_max\tCN_raw_max\tLOH_max\tcopydiff_2_max\tcopydiff_ploidy_max\tlogratio_2_max\tlogratio_ploidy_max\n"; 30 | 31 | while ($gene = ) { 32 | 33 | chomp($gene); 34 | @line = split(/\t/, $gene); 35 | $chr = $line[2]; 36 | $start = $line[3]; 37 | $end = $line[4]; 38 | 39 | #$cnraw1=999; 40 | $numseg=0; 41 | $region=""; 42 | %segline = (); 43 | @n = (); 44 | 45 | for ($j=1; $j<=$#data; $j++) { 46 | @segment = split(/\t/, $data[$j]); 47 | 48 | $chr_cn = $segment[1]; 49 | $pos1 = $segment[2]; 50 | $pos2 = $segment[3]; 51 | $pos1ext = $segment[9]; 52 | $pos2ext = $segment[10]; 53 | $left = $segment[11]; 54 | $right = $segment[12]; 55 | $cnraw = $segment[13]; 56 | 57 | if (($chr_cn eq $chr) && ($start <= $pos2ext) && ($end >= $pos1ext)) { #overlap 58 | #$numseg++; 59 | push(@n, $cnraw); 60 | $segline{$cnraw} = [ @segment ]; 61 | 62 | #check if overlap with regions with no call 63 | if (($start <= $pos1) && ($end >= $pos1ext)) { 64 | $region = $region.$left.";"; 65 | } 66 | if (($start <= $pos2ext) && ($end >= $pos2)) { 67 | $region = $region.$right.";"; 68 | } 69 | 70 | #if ($cnraw < $cnraw1) { 71 | # $cnraw1 = $cnraw; 72 | # $count = $j; 73 | #} 74 | } 75 | } 76 | 77 | if ($region eq "") { 78 | $region = "NA"; 79 | } 80 | 81 | if ($#n >= 0) { 82 | 83 | $numseg = $#n +1; 84 | @sortn = sort{ $a <=> $b } @n; 85 | 86 | $nA = $segline{$sortn[0]}[4]; 87 | $nB = $segline{$sortn[0]}[5]; 88 | $rawA = $segline{$sortn[0]}[6]; 89 | $rawB = $segline{$sortn[0]}[7]; 90 | $cnraw = $segline{$sortn[0]}[13]; 91 | $loh = $segline{$sortn[0]}[14]; 92 | $ploidy= $segline{$sortn[0]}[17]; 93 | $copydiff1 = $segline{$sortn[0]}[18]; 94 | $copydiff2 = $segline{$sortn[0]}[19]; 95 | $logratio1 = $segline{$sortn[0]}[20]; 96 | $logratio2 = $segline{$sortn[0]}[21]; 97 | 98 | $outline = "$gene\t$numseg\t$region\t$ploidy\t$nA\t$nB\t$rawA\t$rawB\t$cnraw\t$loh\t$copydiff1\t$copydiff2\t$logratio1\t$logratio2\t"; 99 | 100 | if ($numseg > 1 ) { 101 | $nA = $segline{$sortn[$#sortn]}[4]; 102 | $nB = $segline{$sortn[$#sortn]}[5]; 103 | $rawA = $segline{$sortn[$#sortn]}[6]; 104 | $rawB = $segline{$sortn[$#sortn]}[7]; 105 | $cnraw = $segline{$sortn[$#sortn]}[13]; 106 | $loh = $segline{$sortn[$#sortn]}[14]; 107 | $copydiff1 = $segline{$sortn[$#sortn]}[18]; 108 | $copydiff2 = $segline{$sortn[$#sortn]}[19]; 109 | $logratio1 = $segline{$sortn[$#sortn]}[20]; 110 | $logratio2 = $segline{$sortn[$#sortn]}[21]; 111 | } 112 | else { 113 | $nA = "NA"; 114 | $nB = "NA"; 115 | $rawA = "NA"; 116 | $rawB = "NA"; 117 | $cnraw = "NA"; 118 | $loh = "NA"; 119 | $copydiff1 = "NA"; 120 | $copydiff2 = "NA"; 121 | $logratio1 = "NA"; 122 | $logratio2 = "NA"; 123 | 124 | } 125 | 126 | $outline = $outline."$nA\t$nB\t$rawA\t$rawB\t$cnraw\t$loh\t$copydiff1\t$copydiff2\t$logratio1\t$logratio2"; 127 | print OUTFILE "$outline\n"; 128 | } 129 | } 130 | 131 | close (GENEFILE); 132 | close (OUTFILE); 133 | -------------------------------------------------------------------------------- /CNV_PDX/bin/get_model_gender.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | from __future__ import print_function 3 | import sys 4 | import requests 5 | import argparse 6 | import json 7 | 8 | def parse_args(): 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument('-d', '--details', action='store_true', 11 | help="Return ID information as well as gender") 12 | parser.add_argument('id', help="The model ID whose gender is needed") 13 | 14 | return parser.parse_args() 15 | 16 | 17 | def main(): 18 | args = parse_args() 19 | # for test: 20 | url = 'http://JSON/gender' 21 | # for production: 22 | url = 'http://elims/JSON/gender' 23 | r = requests.get(url, params={'id': args.id}) 24 | if r.status_code == 200: 25 | try: 26 | d = r.json() 27 | except: 28 | print('JSON decoding failed. Here is the returned string:', 29 | file=sys.stderr) 30 | print(r.text, file=sys.stderr) 31 | print('And the request...', file=sys.stderr) 32 | print(r.request.__dict__, file=sys.stderr) 33 | 34 | sys.exit(3) 35 | 36 | if args.details: 37 | print('Query ID: {0}\tInventory Code: {1}\t' 38 | 'Model ID: {2}\tGender: {3}'. 39 | format(d['query_id'], d['inventory_code'], 40 | d['model_id'], d['gender'])) 41 | else: 42 | print(d['gender'].lower()) 43 | if d['gender'] == "NOT FOUND": 44 | # We're about to exit with error status. Write a reason to the log. 45 | print("Couldn't find model {0} in the database.".format(args.id), 46 | file=sys.stderr) 47 | sys.exit(1) 48 | else: 49 | print('Request failed with status code:', r.status_code, file=sys.stderr) 50 | sys.exit(2) 51 | 52 | if __name__ == '__main__': 53 | main() 54 | -------------------------------------------------------------------------------- /CNV_PDX/bin/lrrbaf_ascat_tumor.R: -------------------------------------------------------------------------------- 1 | options(scipen = 999) 2 | 3 | args=(commandArgs(TRUE)) 4 | snp_pos <- args[1] 5 | gcfile <- args[2] 6 | 7 | lrrbaf = read.table("lrr_baf1.txt", header = T, sep = "\t", row.names=1) 8 | 9 | SNPpos = read.table(snp_pos,header=T,sep="\t",row.names=1) 10 | 11 | firstline = read.table("lrr_baf1.txt", nrows=1, sep = "\t") 12 | sample = sub(".CEL.Log.R.Ratio","",firstline[1,4]) 13 | #sample = sub(".CEL.Log.R.Ratio","",colnames(lrrbaf)[3]) 14 | 15 | Tumor_LogR = lrrbaf[rownames(SNPpos),3,drop=F] 16 | colnames(Tumor_LogR) = sample 17 | 18 | Tumor_BAF = lrrbaf[rownames(SNPpos),4,drop=F] 19 | colnames(Tumor_BAF) = sample 20 | 21 | #Normal_LogR = lrrbaf[rownames(SNPpos),5,drop=F] 22 | #colnames(Normal_LogR) = sample 23 | 24 | #Normal_BAF = lrrbaf[rownames(SNPpos),6,drop=F] 25 | #colnames(Normal_BAF) = sample 26 | 27 | #replace 2's by NA 28 | Tumor_BAF[Tumor_BAF==2]=NA 29 | #Normal_BAF[Normal_BAF==2]=NA 30 | 31 | # Tumor_LogR: correct difference between copy number only probes and other probes 32 | CNprobes = substring(rownames(SNPpos),1,2)=="CN" 33 | 34 | Tumor_LogR[CNprobes,1] = Tumor_LogR[CNprobes,1]-mean(Tumor_LogR[CNprobes,1],na.rm=T) 35 | Tumor_LogR[!CNprobes,1] = Tumor_LogR[!CNprobes,1]-mean(Tumor_LogR[!CNprobes,1],na.rm=T) 36 | 37 | #Normal_LogR[CNprobes,1] = Normal_LogR[CNprobes,1]-mean(Normal_LogR[CNprobes,1],na.rm=T) 38 | #Normal_LogR[!CNprobes,1] = Normal_LogR[!CNprobes,1]-mean(Normal_LogR[!CNprobes,1],na.rm=T) 39 | 40 | # limit the number of digits: 41 | Tumor_LogR = round(Tumor_LogR,4) 42 | #Normal_LogR = round(Normal_LogR,4) 43 | 44 | write.table(cbind(SNPpos,Tumor_BAF),paste(sample, ".tumor.BAF.txt", sep=""),sep="\t",row.names=T,col.names=NA,quote=F) 45 | #write.table(cbind(SNPpos,Normal_BAF),paste(sample, ".normal.BAF.txt", sep=""),sep="\t",row.names=T,col.names=NA,quote=F) 46 | 47 | write.table(cbind(SNPpos,Tumor_LogR),paste(sample, ".tumor.LogR.txt", sep=""),sep="\t",row.names=T,col.names=NA,quote=F) 48 | #write.table(cbind(SNPpos,Normal_LogR),paste(sample, ".normal.LogR.txt", sep=""),sep="\t",row.names=T,col.names=NA,quote=F) 49 | 50 | #run ASCAT functions 51 | 52 | library(ASCAT) 53 | file.tumor.LogR <- dir(pattern="tumor.LogR") 54 | file.tumor.BAF <- dir(pattern="tumor.BAF") 55 | #file.normal.LogR <- dir(pattern="normal.LogR") 56 | #file.normal.BAF <- dir(pattern="normal.BAF") 57 | 58 | gender <- read.table("gender.txt", sep="\t") 59 | sex <- as.vector(gender[1,1]) 60 | sex[sex == "female"] <- "XX" 61 | sex[sex == "male"] <- "XY" 62 | sex[sex == "unknown"] <- "XX" 63 | 64 | #samplename <- sub(".tumor.LogR.txt", "", file.tumor.LogR) 65 | 66 | if (sex == "XX") { 67 | 68 | ascat.bc <- ascat.loadData(file.tumor.LogR, file.tumor.BAF, chrs=c(1:22, "X", "Y"), gender=sex) 69 | 70 | } else if (sex == "XY") { 71 | 72 | ascat.bc <- ascat.loadData(file.tumor.LogR, file.tumor.BAF, chrs=c(1:22, "X","Y"), gender=sex) 73 | 74 | } 75 | #ascat.bc <- ascat.loadData(file.tumor.LogR, file.tumor.BAF, file.normal.LogR, file.normal.BAF, chrs=c(1:22, "X"), gender=sex) 76 | 77 | #GC correction for SNP6 data 78 | ascat.bc <- ascat.GCcorrect(ascat.bc, gcfile) 79 | 80 | ascat.plotRawData(ascat.bc) 81 | 82 | gg<-ascat.predictGermlineGenotypes(ascat.bc, platform = "AffySNP6") 83 | 84 | ascat.bc = ascat.aspcf(ascat.bc, ascat.gg=gg) 85 | 86 | ascat.plotSegmentedData(ascat.bc) 87 | 88 | ascat.output = ascat.runAscat(ascat.bc) 89 | 90 | #save ASCAT results 91 | 92 | save.image(paste(sample,".RData",sep="")) 93 | 94 | if ( length(ascat.output$failedarrays) == 0 ) { 95 | 96 | num_probes <- vector(mode="numeric", length=nrow(ascat.output$segments_raw)) 97 | for (i in 1:nrow(ascat.output$segments_raw)) { 98 | 99 | #print(i) 100 | L1 = which(SNPpos$Chromosome == ascat.output$segments_raw$chr[i] & SNPpos$Physical.Position == ascat.output$segments_raw$startpos[i]) 101 | L2 = which(SNPpos$Chromosome == ascat.output$segments_raw$chr[i] & SNPpos$Physical.Position == ascat.output$segments_raw$endpos[i]) 102 | num_probes[i] = L2[length(L2)] - L1[1] + 1 103 | 104 | } 105 | seg_raw = cbind(ascat.output$segments_raw,num_probes) 106 | 107 | num_probes <- vector(mode="numeric", length=nrow(ascat.output$segments)) 108 | for (i in 1:nrow(ascat.output$segments)) { 109 | 110 | #print(i) 111 | L1 = which(SNPpos$Chromosome == ascat.output$segments$chr[i] & SNPpos$Physical.Position == ascat.output$segments$startpos[i]) 112 | L2 = which(SNPpos$Chromosome == ascat.output$segments$chr[i] & SNPpos$Physical.Position == ascat.output$segments$endpos[i]) 113 | num_probes[i] = L2[length(L2)] - L1[1] + 1 114 | 115 | } 116 | seg = cbind(ascat.output$segments,num_probes) 117 | 118 | write.table(seg_raw, file=paste(sample,".segments_raw.txt",sep=""), sep="\t", quote=F, row.names=F) 119 | write.table(seg, file=paste(sample,".segments.txt",sep=""), sep="\t", quote=F, row.names=F) 120 | write.table(as.data.frame(ascat.output$aberrantcellfraction), file=paste(sample,".aberrantcellfraction.txt",sep=""), sep="\t", quote=F, row.names=F, col.names=F) 121 | write.table(as.data.frame(ascat.output$ploidy), file=paste(sample,".ploidy.txt",sep=""), sep="\t", quote=F, row.names=F, col.names=F) 122 | 123 | } else { 124 | 125 | write.table(as.data.frame(ascat.output$failedarrays), file=paste(sample,".failedarrays.txt",sep=""), sep="\t", quote=F, row.names=F, col.names=F) 126 | 127 | } 128 | 129 | if ( !is.null(ascat.output$nonaberrantarrays) ) { 130 | 131 | write.table(as.data.frame(ascat.output$nonaberrantarrays), file=paste(sample,".nonaberrantarrays.txt",sep=""), sep="\t", quote=F, row.names=F, col.names=F) 132 | 133 | } 134 | -------------------------------------------------------------------------------- /CNV_PDX/bin/segment_raw_extend.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use POSIX; 3 | use File::Basename; 4 | 5 | # This script adds to segment file the the arm fraction, LOH and CN diff and log ratio relative to 2 and ploidy 6 | # The segments are extended 7 | 8 | # nohup perl segment_raw_annotate.pl *segments_raw.txt *ploidy.txt hg19_chromosome_arm.txt gender.txt & 9 | 10 | if ($#ARGV != 3) { 11 | print "This scripts requires: \n"; 12 | exit(-1); 13 | } 14 | 15 | $file_cn = $ARGV[0]; 16 | $file_ploidy = $ARGV[1]; 17 | $file_arm = $ARGV[2]; 18 | $file_gender = $ARGV[3]; 19 | 20 | $file_output = basename($file_cn,".txt").".extend.txt"; 21 | 22 | $ploidy = `cat $file_ploidy`; 23 | chomp($ploidy); 24 | 25 | $gender = `cat $file_gender`; 26 | chomp($gender); 27 | 28 | if (($gender eq "female") || ($gender eq "unknown")) { 29 | $cn_factor = 1; 30 | } 31 | elsif ($gender eq "male") { 32 | $cn_factor= 0.5; 33 | } 34 | 35 | $tmp = `cat $file_arm | awk 'NR>1'`; 36 | @arm = split(/\n/,$tmp); 37 | chomp(@arm); 38 | 39 | open(CN, "$file_cn") or die "can't open $file_cn: $!"; 40 | $tmp = ; 41 | chomp($tmp); 42 | 43 | open(OUTFILE, ">$file_output"); 44 | print OUTFILE "$tmp\tstartext\tendext\tstartext_desc\tendext_desc\tCN_raw\tLOH\tparm_fraction\tqarm_fraction\tploidy\tcopydiff_2\tcopydiff_ploidy\tlogratio_2\tlogratio_ploidy\n"; 45 | 46 | open(TMPFILE, ">tmp.txt"); 47 | 48 | #merge segments 49 | $tmp = ; 50 | chomp($tmp); 51 | @line = split(/\t/,$tmp); 52 | $sample = $line[0]; 53 | $chromo = $line[1]; 54 | $n1 = $line[4]; 55 | $n2 = $line[5]; 56 | $cn1 = $line[6]; 57 | $cn2 = $line[7]; 58 | $start = $line[2]; 59 | $end = $line[3]; 60 | $num = $line[8]; 61 | 62 | while ($tmp = ) { 63 | chomp($tmp); 64 | @line = split(/\t/,$tmp); 65 | 66 | if (($chromo eq $line[1]) && ($cn1 == $line[6]) && ($cn2 == $line[7])) { 67 | $end = $line[3]; 68 | $num = $num + $line[8]; 69 | } 70 | else { 71 | print TMPFILE "$sample\t$chromo\t$start\t$end\t$n1\t$n2\t$cn1\t$cn2\t$num\n"; 72 | $sample = $line[0]; 73 | $chromo = $line[1]; 74 | $n1 = $line[4]; 75 | $n2 = $line[5]; 76 | $cn1 = $line[6]; 77 | $cn2 = $line[7]; 78 | $start = $line[2]; 79 | $end = $line[3]; 80 | $num = $line[8]; 81 | } 82 | } 83 | #lastline 84 | print TMPFILE "$sample\t$chromo\t$start\t$end\t$n1\t$n2\t$cn1\t$cn2\t$num\n"; 85 | 86 | close (CN); 87 | close (TMPFILE); 88 | 89 | open(CN, "tmp.txt") or die "can't open tmp.txt: $!"; 90 | @seg = ; 91 | chomp(@seg); 92 | close (CN); 93 | $n = 0; 94 | 95 | for ($j=0; $j<$#seg; $j++) { 96 | 97 | @array1 = split(/\t/,$seg[$j]); 98 | @array2 = split(/\t/,$seg[$j+1]); 99 | #$x1 = $array1[2]; 100 | $x2 = $array1[3]; 101 | $y1 = $array2[2]; 102 | #$y2 = $array2[3]; 103 | 104 | if ($array1[1] ne $n) { #first line for chr 105 | 106 | $n = $array1[1]; 107 | $left = 0; 108 | $left1 = "telomere"; 109 | 110 | for ($i=1; $i<=$#arm; $i+=2) { 111 | @line = split(/\t/,$arm[$i]); 112 | if ($n eq substr($line[0],3)) { 113 | $a = $line[1]; 114 | $b = $line[2]; 115 | } 116 | } 117 | 118 | if ($array2[1] ne $n) { #last line for chr 119 | $right = $b; 120 | $right1 = "telomere"; 121 | } 122 | elsif (($x2 < $a) && ($y1 > $a)) { 123 | $right = $a; 124 | $right1 = "centromere"; 125 | } 126 | else { 127 | $right = floor(($x2 + $y1)/2); 128 | $right1 = "no_probe"; 129 | } 130 | } 131 | else { 132 | 133 | $left = $right + 1; 134 | $left1 = $right1; 135 | 136 | if ($array2[1] ne $n) { #last line for chr 137 | 138 | $right = $b; 139 | $right1 = "telomere"; 140 | } 141 | elsif (($x2 < $a) && ($y1 > $a)) { 142 | $right = $a; 143 | $right1 = "centromere"; 144 | } 145 | else { 146 | $right = floor(($x2 + $y1)/2); 147 | $right1 = "no_probe"; 148 | } 149 | } 150 | 151 | $copy = $array1[6] + $array1[7]; 152 | if ($array1[6] >= 0.5 && $array1[7] <= 0.1) { 153 | $loh=1; 154 | } 155 | else { 156 | $loh=0; 157 | } 158 | 159 | for ($i=0; $i<=$#arm; $i+=2) { 160 | @line = split(/\t/,$arm[$i]); 161 | if ($n eq substr($line[0],3)) { 162 | if (($right>=$line[1]) && ($left<=$line[2])) { 163 | @tmp = ($left,$right,$line[1],$line[2]); 164 | @sorttmp = sort{ $a <=> $b } @tmp; 165 | $overlap1=($sorttmp[2]-$sorttmp[1])/($line[2]-$line[1]); 166 | } 167 | else { 168 | $overlap1=0; 169 | } 170 | } 171 | } 172 | 173 | for ($i=1; $i<=$#arm; $i+=2) { 174 | @line = split(/\t/,$arm[$i]); 175 | if ($n eq substr($line[0],3)) { 176 | if (($right>=$line[1]) && ($left<=$line[2])) { 177 | @tmp = ($left,$right,$line[1],$line[2]); 178 | @sorttmp = sort{ $a <=> $b } @tmp; 179 | $overlap2=($sorttmp[2]-$sorttmp[1])/($line[2]-$line[1]); 180 | } 181 | else { 182 | $overlap2=0; 183 | } 184 | } 185 | } 186 | 187 | if (($n eq "X") || ($n eq "Y")) { 188 | $diff1=$copy - ($cn_factor * 2); 189 | $diff2=$copy- ($cn_factor * $ploidy); 190 | $logratio1 = log(($copy+0.01)/($cn_factor * 2))/log(2); 191 | $logratio2 = log(($copy+0.01)/($cn_factor * $ploidy))/log(2); 192 | } 193 | else { 194 | $diff1=$copy-2; 195 | $diff2=$copy-$ploidy; 196 | $logratio1 = log(($copy+0.01)/2)/log(2); 197 | $logratio2 = log(($copy+0.01)/$ploidy)/log(2); 198 | } 199 | 200 | print OUTFILE "$seg[$j]\t$left\t$right\t$left1\t$right1\t$copy\t$loh\t$overlap1\t$overlap2\t$ploidy\t$diff1\t$diff2\t$logratio1\t$logratio2\n"; 201 | } 202 | 203 | @array1 = split(/\t/,$seg[$#seg]); 204 | 205 | if ($array1[1] ne $n) { #first line for chr 206 | 207 | $n = $array1[1]; 208 | $left = 0; 209 | $left1 = "telomere"; 210 | 211 | for ($i=1; $i<=$#arm; $i+=2) { 212 | @line = split(/\t/,$arm[$i]); 213 | if ($n eq substr($line[0],3)) { 214 | $a = $line[1]; 215 | $b = $line[2]; 216 | } 217 | } 218 | 219 | $right = $b; 220 | $right1 = "telomere"; 221 | 222 | } 223 | else { 224 | 225 | $left = $right + 1; 226 | $left1 = $right1; 227 | 228 | $right = $b; 229 | $right1 = "telomere"; 230 | 231 | } 232 | 233 | $copy = $array1[6] + $array1[7]; 234 | if ($array1[6] >= 0.5 && $array1[7] <= 0.1) { 235 | $loh=1; 236 | } 237 | else { 238 | $loh=0; 239 | } 240 | 241 | for ($i=0; $i<=$#arm; $i+=2) { 242 | @line = split(/\t/,$arm[$i]); 243 | if ($n eq substr($line[0],3)) { 244 | if (($right>=$line[1]) && ($left<=$line[2])) { 245 | @tmp = ($left,$right,$line[1],$line[2]); 246 | @sorttmp = sort{ $a <=> $b } @tmp; 247 | $overlap1=($sorttmp[2]-$sorttmp[1])/($line[2]-$line[1]); 248 | } 249 | else { 250 | $overlap1=0; 251 | } 252 | } 253 | } 254 | 255 | for ($i=1; $i<=$#arm; $i+=2) { 256 | @line = split(/\t/,$arm[$i]); 257 | if ($n eq substr($line[0],3)) { 258 | if (($right>=$line[1]) && ($left<=$line[2])) { 259 | @tmp = ($left,$right,$line[1],$line[2]); 260 | @sorttmp = sort{ $a <=> $b } @tmp; 261 | $overlap2=($sorttmp[2]-$sorttmp[1])/($line[2]-$line[1]); 262 | } 263 | else { 264 | $overlap2=0; 265 | } 266 | } 267 | } 268 | 269 | if (($n eq "X") || ($n eq "Y")) { 270 | $diff1=$copy - ($cn_factor * 2); 271 | $diff2=$copy- ($cn_factor * $ploidy); 272 | $logratio1 = log(($copy+0.01)/($cn_factor * 2))/log(2); 273 | $logratio2 = log(($copy+0.01)/($cn_factor * $ploidy))/log(2); 274 | } 275 | else { 276 | $diff1=$copy-2; 277 | $diff2=$copy-$ploidy; 278 | $logratio1 = log(($copy+0.01)/2)/log(2); 279 | $logratio2 = log(($copy+0.01)/$ploidy)/log(2); 280 | } 281 | 282 | print OUTFILE "$seg[$j]\t$left\t$right\t$left1\t$right1\t$copy\t$loh\t$overlap1\t$overlap2\t$ploidy\t$diff1\t$diff2\t$logratio1\t$logratio2\n"; 283 | 284 | close(CN); 285 | close (OUTFILE); 286 | -------------------------------------------------------------------------------- /CNV_PDX/gender_single_tool.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | 7 | Get gender from elims or genotype and prepare listfile 8 | 9 | 24 | 25 | 26 | python 27 | apt/1.15.0 28 | 29 | 30 | 31 | 32 | {elims_gender} {in_1} > {out_3} 33 | 34 | 35 | 36 | 37 | 38 | "cel_files" > {list_file} 39 | 40 | 41 | 42 | {in_2} >> {list_file} 43 | 44 | 45 | 46 | -v a=|in_3| '{if (NR>1) print a"/"$2}' |in_4| >> |list_file| 47 | 48 | 49 | 50 | -v a=|in_3| '{if (NR>1) print a"/"$2}' |in_5| >> |list_file| 51 | 52 | 53 | 55 | 56 | 58 | 59 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | apt-probeset-genotype -c {cdf} -a birdseed --read-models-birdseed {birdseed_models} --special-snps {special_snps} --out-dir {in_6} --cel-files {list_file} 72 | 73 | 74 | 75 | {birdseed_report} | grep -v "#" | awk 'NR==2' | cut -f2 > {out_4} 76 | 77 | 78 | 79 | gender=`cat {out_3}` 80 | 81 | 82 | 83 | if [ "X$gender" = "Xunknown" -o "X$gender" = "Xunspecified" ]; 84 | then 85 | gender=`cat {out_4}`; 86 | cp {out_4} {out_1}; 87 | else 88 | cp {out_3} {out_1}; 89 | fi 90 | 91 | 92 | 93 | "cel_files" > {out_2} 94 | 95 | 96 | 97 | {in_2} >> {out_2} 98 | 99 | 100 | 101 | if [ "X$gender" = "Xfemale" -o "X$gender" = "Xunknown" ]; 102 | then 103 | awk -v a=#in_3# '{if (NR>1) print a"/"$2}' #in_4# >> #out_2#; 104 | elif [ "X$gender" = "Xmale" ]; 105 | then 106 | awk -v a=#in_3# '{if (NR>1) print a"/"$2}' #in_5# >> #out_2#; 107 | fi 108 | 109 | 110 | 111 | {birdseed_confidences} | grep -v "#" | cut -f1-2 > {birdseed_confidences1} 112 | 113 | 114 | 115 | {birdseed_calls} | grep -v "#" | cut -f1-2 > {birdseed_calls1} 116 | 117 | 118 | 119 | {birdseed_report} | grep -v "#" | head -2 > {birdseed_report1} 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /CNV_PDX/lrrbaf_tool.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | 7 | Normalize snp array cancer cel file with 300 hapmap cel files, and outputs LRR (Log R ratio) and BAF (B-Allele Frequency) using genoclustering 8 | 9 | 16 | 17 | 18 | apt/1.15.0 19 | 20 | 21 | 22 | 24 | 25 | 27 | 28 | 29 | 30 | 31 |