├── _config.yml ├── testsample ├── groupa.txt ├── groupb.txt ├── testsample_WT.sorted.bam ├── testsample_WT.sorted.bam.bai ├── testsample_mutant.sorted.bam ├── testsample_mutant.sorted.bam.bai └── testsample.db ├── pvalue_dist.tiff ├── qvalue_dist.tiff ├── Event_Region_Explained.png ├── LICENSE ├── sjFromSAMcollapseUandM_inclOverlaps.awk ├── dummyai-db-alone.pl ├── PSI-Sigma_gff2gtf.py ├── Parallele.md ├── PSIsigma-FDR-v.1.0.pl ├── PSIsigma-ir-v.1.2.pl ├── PSIsigma-filter-v.1.0.pl ├── README.md ├── PSIsigma-longread-gene-expression.pl ├── dummyai.pl ├── PSIsigma-db-v.1.0.pl └── PSIsigma-PSI-v.1.1.pl /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-leap-day -------------------------------------------------------------------------------- /testsample/groupa.txt: -------------------------------------------------------------------------------- 1 | testsample_WT.sorted.bam 2 | 3 | -------------------------------------------------------------------------------- /testsample/groupb.txt: -------------------------------------------------------------------------------- 1 | testsample_mutant.sorted.bam 2 | 3 | -------------------------------------------------------------------------------- /pvalue_dist.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wososa/PSI-Sigma/HEAD/pvalue_dist.tiff -------------------------------------------------------------------------------- /qvalue_dist.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wososa/PSI-Sigma/HEAD/qvalue_dist.tiff -------------------------------------------------------------------------------- /Event_Region_Explained.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wososa/PSI-Sigma/HEAD/Event_Region_Explained.png -------------------------------------------------------------------------------- /testsample/testsample_WT.sorted.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wososa/PSI-Sigma/HEAD/testsample/testsample_WT.sorted.bam -------------------------------------------------------------------------------- /testsample/testsample_WT.sorted.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wososa/PSI-Sigma/HEAD/testsample/testsample_WT.sorted.bam.bai -------------------------------------------------------------------------------- /testsample/testsample_mutant.sorted.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wososa/PSI-Sigma/HEAD/testsample/testsample_mutant.sorted.bam -------------------------------------------------------------------------------- /testsample/testsample_mutant.sorted.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wososa/PSI-Sigma/HEAD/testsample/testsample_mutant.sorted.bam.bai -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | PSI-Sigma: A splicing-detection method for short-read and long-read RNA-seq data 2 | © Kuan-Ting Lin, 2018-2024 3 | PSI-Sigma is free for non-commercial purposes by individuals at an academic or non-profit institutions. 4 | For commercial purposes, please contact tech transfer office of CSHL via narayan@cshl.edu 5 | -------------------------------------------------------------------------------- /sjFromSAMcollapseUandM_inclOverlaps.awk: -------------------------------------------------------------------------------- 1 | BEGIN { 2 | OFS="\t"; 3 | mapqU=255; 4 | } 5 | { 6 | if (substr($1,1,1)!="@") { 7 | 8 | #m=and($2,0x80)/0x80+1; 9 | 10 | if ($1!=readNameOld) delete readSJs; 11 | readNameOld=$1; 12 | 13 | n=split($6,L,/[A-Z]/)-1; 14 | split($6,C,/[0-9]*/); 15 | t=1;g=$4; 16 | for (k=1;k<=n;k++) {#scan through CIGAR operations 17 | if (C[k+1]=="S" || C[k+1]=="I") { 18 | t+=L[k]; 19 | } else if (C[k+1]=="D") { 20 | g+=L[k]; 21 | } else if (C[k+1]=="N") { 22 | sj1=$3 "\t" g "\t" g+L[k]-1; 23 | readSJs[sj1]++; 24 | 25 | if (readSJs[sj1]==1) {#only count this junction if it has not been counted for the same read 26 | SJ[sj1]=1; 27 | if ($5>=mapqU) { 28 | SJu[sj1]++; 29 | } else { 30 | SJm[sj1]++; 31 | }; 32 | }; 33 | 34 | if ($5>=mapqU) { 35 | SJu1[sj1]++; 36 | } else { 37 | SJm1[sj1]++; 38 | }; 39 | 40 | g+=L[k]; 41 | 42 | } else { # M operation 43 | g+=L[k]; 44 | t+=L[k]; 45 | }; 46 | }; 47 | }; 48 | }; 49 | END { 50 | 51 | for (ii in SJ) { 52 | print ii, SJu[ii]+0, SJm[ii]+0, SJu1[ii]+0, SJm1[ii]+0; 53 | }; 54 | 55 | }; 56 | -------------------------------------------------------------------------------- /testsample/testsample.db: -------------------------------------------------------------------------------- 1 | 2 20263461 20278582 20278820 20282946 20278583 20278819 - 20263461 20282946 2_20263461_20282946_W_ENST00000338086_1 PUM2 2 | 2 20308585 20311493 20311664 20312235 20311494 20311663 - 20308585 20312235 2_20308585_20312235_W_ENST00000319801_1 PUM2 3 | 2 20308585 20311493 20311664 20312235 20311494 20311663 NMD 20308585 20312235 2_20308585_20312235_W_ENST00000429419_1 PUM2 4 | 2 20312424 20318536 20318646 20326260 20318537 20318645 - 20312424 20327309 2_20312424_20327309_W_ENST00000319801_1 PUM2 5 | 2 20318646 20326260 20326386 20327309 20326261 20326385 - 20312424 20327309 2_20312424_20327309_W_ENST00000319801_2 PUM2 6 | 2 20312424 20318536 20318646 20327309 20318537 20318645 - 20312424 20327309 2_20312424_20327309_W_ENST00000338086_1 PUM2 7 | 2 20312424 20318536 20318646 20326260 20318537 20318645 NMD 20312424 20327309 2_20312424_20327309_W_ENST00000432105_1 PUM2 8 | 2 20318646 20326260 20326386 20327309 20326261 20326385 NMD 20312424 20327309 2_20312424_20327309_W_ENST00000432105_2 PUM2 9 | 2 20318646 20326260 20326386 20327309 20326261 20326385 - 20318646 20327309 2_20318646_20327309_W_ENST00000319801_1 PUM2 10 | 2 20318646 20326260 20326386 20327309 20326261 20326385 NMD 20318646 20327309 2_20318646_20327309_W_ENST00000432105_1 PUM2 11 | 2 20327379 20331716 20331938 20332966 20331717 20331937 - 20327379 20350596 2_20327379_20350596_W_ENST00000424110_1 PUM2 12 | 2 20331938 20332966 20333064 20350596 20332967 20333063 - 20327379 20350596 2_20327379_20350596_W_ENST00000424110_2 PUM2 13 | 2 20327379 20327379 20327379 20350596 20350597 20350850 - 20327379 20350809 2_20327379_20350809_S_ENST00000361078_1 PUM2 14 | -------------------------------------------------------------------------------- /dummyai-db-alone.pl: -------------------------------------------------------------------------------- 1 | =begin 2 | PSI-Sigma: A splicing-detection method for short-read and long-read RNA-seq data 3 | © Kuan-Ting Lin, 2018-2024 4 | PSI-Sigma is free for non-commercial purposes by individuals at an academic or non-profit institution. 5 | For commercial purposes, please contact tech transfer office of CSHL via narayan@cshl.edu 6 | =end 7 | =cut 8 | #!/usr/bin/perl -w 9 | use strict; 10 | use Cwd qw(abs_path); 11 | 12 | my ($gtf,$name) = @ARGV; 13 | 14 | my $path = abs_path($0); 15 | $path=~s/\/dummyai\-db\-alone\.pl//; 16 | print "Path = $path\n"; 17 | 18 | my $noveljunctioncriteria = 10; 19 | 20 | my %chr; 21 | open(FILE,"$gtf") || die "Aborting.. Can't open $gtf : $!\n"; 22 | while(my $line=){ 23 | chomp $line; 24 | next if($line=~/^\#/); 25 | my @array = split(/\t/,$line); 26 | next if($array[2] ne "transcript"); 27 | my ($chr,$cat,$start,$end,$strand,$name) = ($array[0],$array[1],$array[3],$array[4],$array[6],$array[8]); 28 | $chr = "chr" . $chr if($chr!~/chr/); 29 | $chr{$chr}++; 30 | } 31 | close(OUT); 32 | close(FILE); 33 | 34 | my $starttime = time; 35 | my $dbname = $name . ".db"; 36 | my $bedname = $name . ".bed"; 37 | my $chrs; 38 | foreach my $chr(sort keys %chr){ 39 | $chrs .= "\t" . $chr; 40 | } 41 | $chrs=~s/\t//; 42 | if(-e $dbname){ 43 | if(-z $dbname){ 44 | print "Regenerating $dbname...\n"; 45 | rundb($noveljunctioncriteria,$gtf,$chrs); 46 | } 47 | }else{ 48 | print "Generating $dbname...\n"; 49 | rundb($noveljunctioncriteria,$gtf,$chrs); 50 | } 51 | my $stoptime = time; 52 | my $hours = sprintf("%.4f",(($stoptime-$starttime)/3600)); 53 | print "===Database spent $hours hours.===\n"; 54 | 55 | sub rundb{ 56 | my $noveljunctioncriteria = shift; 57 | my $gtf = shift; 58 | my $chrs = shift; 59 | my @chromosomes = split(/\t/,$chrs); 60 | foreach my $chromosome(@chromosomes){ 61 | next if($chromosome=~/chrGL/); 62 | next if($chromosome=~/chrKI/); 63 | my $commend = "perl " . $path . "/PSIsigma-db-v.1.0.pl $gtf " . $chromosome . " " . $noveljunctioncriteria; 64 | #print "Doing... $commend\n"; 65 | print "Doing... $chromosome\n"; 66 | system("$commend"); 67 | } 68 | 69 | system("cat chr*.db > $dbname"); 70 | system("cat chr*.bed > $bedname"); 71 | system("rm chr*.db"); 72 | system("rm chr*.bed"); 73 | } 74 | -------------------------------------------------------------------------------- /PSI-Sigma_gff2gtf.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | def gff_to_gtf(gff_filename, gtf_filename): 4 | with open(gff_filename, 'r') as gff_file, open(gtf_filename, 'w') as gtf_file: 5 | for line in gff_file: 6 | if line.startswith('#') or line.strip() == '': 7 | continue 8 | 9 | fields = line.strip().split('\t') 10 | attributes = fields[8] 11 | 12 | # Extract ID, Name, and Parent if available 13 | attr_dict = {attr.split('=')[0]: attr.split('=')[1] for attr in attributes.split(';') if '=' in attr} 14 | gene_id = attr_dict.get('ID', '') 15 | gene_name = attr_dict.get('Name', '') 16 | transcript_id = attr_dict.get('Parent', '') 17 | 18 | # Change 'mRNA' feature type to 'transcript' for GTF 19 | feature_type = fields[2] 20 | if feature_type == 'mRNA': 21 | feature_type = 'transcript' 22 | 23 | # Construct GTF attribute field 24 | gtf_attributes = [] 25 | if feature_type == 'gene': 26 | gtf_attributes.append(f'gene_id "{gene_id}"') 27 | gtf_attributes.append(f'gene_name "{gene_name}"') 28 | elif feature_type == 'transcript': 29 | gtf_attributes.append(f'gene_id "{transcript_id}"') 30 | gtf_attributes.append(f'transcript_id "{gene_id}"') 31 | else: # exon or CDS 32 | gtf_attributes.append(f'gene_id "{transcript_id.split(".")[0]}"') 33 | gtf_attributes.append(f'transcript_id "{transcript_id}"') 34 | exon_number = gene_id.split(".")[-1].lstrip('exon') 35 | gtf_attributes.append(f'exon_number "{exon_number}"') 36 | if feature_type == 'exon': 37 | gtf_attributes.append('transcript_biotype "unknown"') 38 | 39 | gtf_attributes_str = '; '.join(gtf_attributes) + ';' 40 | gtf_line = '\t'.join(fields[:2] + [feature_type] + fields[3:8]) + '\t' + gtf_attributes_str + '\n' 41 | gtf_file.write(gtf_line) 42 | 43 | def main(): 44 | parser = argparse.ArgumentParser(description="Convert GFF file to GTF format.") 45 | parser.add_argument("--gff", required=True, help="Input GFF file") 46 | parser.add_argument("--gtf", required=True, help="Output GTF file") 47 | args = parser.parse_args() 48 | 49 | gff_to_gtf(args.gff, args.gtf) 50 | 51 | if __name__ == "__main__": 52 | main() 53 | -------------------------------------------------------------------------------- /Parallele.md: -------------------------------------------------------------------------------- 1 | Generating .db and .IR.out.tab in parallel 2 | ============ 3 | PSI-Sigma is currently a single-thread software. The majority of the computing time was for creating .db and IR.out.tab files. Also, to make event IDs compatible in multiple comparisons, you need to generate a univeral .db file. 4 | 5 | Generating .db in parallel 6 | ============ 7 | To generating a univeral .db in parallel, you first link all the .SJ.out.tab and .bam files in the `afolder` and run the `PSIsigma-db-v.1.0.pl` script as below. The first parameter of `PSIsigma-db-v.1.0.pl` is the name of Chromosome. `5` is the minimum number of novel junctions to construct an event in .db. The third and fourth parameter of `PSIsigma-db-v.1.0.pl` are `--type` and `--irmode`, respectively. 8 | ``` 9 | mkdir afolder 10 | cd afolder 11 | ln -s bamfolder/*.bam* . 12 | ln -s bamfolder/*.SJ.* . 13 | get ftp://ftp.ensembl.org/pub/release-87/gtf/homo_sapiens//Homo_sapiens.GRCh38.87.gtf.gz 14 | gzip -d Homo_sapiens.GRCh38.87.gtf.gz 15 | (grep "^#" Homo_sapiens.GRCh38.87.gtf; grep -v "^#" Homo_sapiens.GRCh38.87.gtf | sort -k1,1 -k4,4n) > Homo_sapiens.GRCh38.87.sorted.gtf 16 | rm Homo_sapiens.GRCh38.87.gtf 17 | 18 | #one file name per line in groupa.txt and groupb.txt 19 | total=$(ls *.SJ.out.tab|wc -l) 20 | counta=$(printf "%.0f" $(($total/2))) 21 | countb=$((total-counta)) 22 | ls *.SJ.out.tab|sed 's/.SJ.out.tab//'|head -n $counta > groupa.txt 23 | ls *.SJ.out.tab|sed 's/.SJ.out.tab//'|head -n $countb > groupb.txt 24 | 25 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 1 5 1 1 & 26 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 2 5 1 1 & 27 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 3 5 1 1 & 28 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 4 5 1 1 & 29 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 5 5 1 1 & 30 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 6 5 1 1 & 31 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 7 5 1 1 & 32 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 8 5 1 1 & 33 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 9 5 1 1 & 34 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 10 5 1 1 & 35 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 11 5 1 1 & 36 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 12 5 1 1 & 37 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 13 5 1 1 & 38 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 14 5 1 1 & 39 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 15 5 1 1 & 40 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 16 5 1 1 & 41 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 17 5 1 1 & 42 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 18 5 1 1 & 43 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 19 5 1 1 & 44 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 20 5 1 1 & 45 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 21 5 1 1 & 46 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf 22 5 1 1 & 47 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf X 5 1 1 & 48 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf Y 5 1 1 & 49 | perl ~/PSI-Sigma-1.9k/PSIsigma-db-v.1.0.pl Homo_sapiens.GRCh38.100.sorted.gtf MT 5 1 1 & 50 | wait 51 | cat *.db.tmp > PSIsigma1d9k.db 52 | cat *.bed.tmp > PSIsigma1d9k.bed 53 | rm *.db.tmp 54 | rm *.bed.tmp 55 | ``` 56 | Generating IR.out.tab in parallel 57 | ============ 58 | After .db is generated, in the same `afolder`, run `PSIsigma-ir-v.1.2.pl` for each .bam file. The parameter for `PSIsigma-ir-v.1.2.pl` is `--type`. 59 | ``` 60 | perl ~/PSI-Sigma-1.9k/PSIsigma-ir-v.1.2.pl PSIsigma1d9k.db Sample1.Aligned.sortedByCoord.out.bam 1 & 61 | perl ~/PSI-Sigma-1.9k/PSIsigma-ir-v.1.2.pl PSIsigma1d9k.db Sample2.Aligned.sortedByCoord.out.bam 1 & 62 | perl ~/PSI-Sigma-1.9k/PSIsigma-ir-v.1.2.pl PSIsigma1d9k.db Sample3.Aligned.sortedByCoord.out.bam 1 & 63 | ``` 64 | Alternatively, if you are confident.. 65 | ``` 66 | for bam in `ls *.Aligned.sortedByCoord.out.bam`; do 67 | perl ~/PSI-Sigma-1.9k/PSIsigma-ir-v.1.2.pl PSIsigma1d9k.db $bam 1 & 68 | done 69 | ``` 70 | -------------------------------------------------------------------------------- /PSIsigma-FDR-v.1.0.pl: -------------------------------------------------------------------------------- 1 | =begin 2 | PSI-Sigma: A splicing-detection method for short-read and long-read RNA-seq data 3 | © Kuan-Ting Lin, 2018-2024 4 | PSI-Sigma is free for non-commercial purposes by individuals at an academic or non-profit institution. 5 | For commercial purposes, please contact tech transfer office of CSHL via narayan@cshl.edu 6 | =end 7 | =cut 8 | #!/usr/bin/perl -w 9 | 10 | use strict; 11 | use Statistics::R; 12 | 13 | my ($fn,$min,$trimp) = @ARGV; 14 | 15 | my $accession = $fn; 16 | $accession=~s/\.sorted\.txt//; 17 | 18 | print "Minimum number of samples for p-value = $min\n"; 19 | 20 | my @rawp; 21 | my @p; 22 | my @fdr; 23 | my @dPSI; 24 | my @symbol; 25 | my $linecount = 0; 26 | my $validcount = 0; 27 | my @output; 28 | my @q; 29 | my $header = "-"; 30 | my @removedp; 31 | my @validcount; 32 | my $nacount = 0; 33 | print "Reading... $fn\n"; 34 | open(FILE,"$fn") || die "Aborting.. Can't open $fn : $!\n"; 35 | while(my $line=){ 36 | chomp $line; 37 | $linecount++; 38 | if($linecount == 1){ 39 | $header = $line; 40 | next; 41 | } 42 | my $p = 1; 43 | my $q = 1; 44 | my $deltaPSI = 0; 45 | my $symbol = "-"; 46 | my ($region,$gn,$exon,$event,$num_n,$num_t,$exontype,$ENST,$dPSI,$pvalue,$fdr,$nvalues,$tvalues,$dbid) = split(/\t/,$line); 47 | push(@rawp,$p); 48 | next if($num_n < $min || $num_t < $min); 49 | next if($pvalue eq "NaN"); 50 | my @nvalues = split(/\|/,$nvalues); 51 | my @tvalues = split(/\|/,$tvalues); 52 | my ($nZn,$nZt,$n100n,$n100t) = (0,0,0,0); 53 | my ($ncount,$tcount) = (0,0); 54 | my ($nmax,$tmax,$nmin,$tmin) = (0,0,100,100); 55 | foreach my $n(@nvalues){ 56 | next if($n eq "na"); 57 | $ncount++ if($n > 0 && $n < 100); 58 | $nmax = $n if($n > $nmax); 59 | $nmin = $n if($n < $nmin); 60 | $nZn++ if($n > $trimp); 61 | $n100n++ if($n < (100-$trimp)); 62 | } 63 | foreach my $t(@tvalues){ 64 | next if($t eq "na"); 65 | $tcount++ if($t > 0 && $t < 100); 66 | $tmax = $t if($t > $tmax); 67 | $tmin = $t if($t < $tmin); 68 | $nZt++ if($t > $trimp); 69 | $n100t++ if($t < (100-$trimp)); 70 | } 71 | 72 | #remove 0.2 peak (unreliable p-values at the edge of 0% or 100%) 73 | my $mask = 0; 74 | $mask = 1 if($nZn == 0 && $nZt == 0); 75 | $mask = 1 if($n100n == 0 && $n100t == 0); 76 | 77 | #remove 0.4 peak (outlier numbers in one of the replicate) 78 | if($ncount > 1 || $tcount > 1){ 79 | }else{ 80 | my $pass = 0; 81 | $pass = 1 if($nmin == 0 && $tmax == 100); 82 | $pass = 1 if($nmax == 100 && $tmin == 0); 83 | if($pass == 0){ 84 | push(@removedp,$pvalue); 85 | next; 86 | } 87 | } 88 | 89 | $p = $pvalue; 90 | $q = $fdr; 91 | $deltaPSI = $dPSI; 92 | $symbol = $gn; 93 | push(@rawp,$p); 94 | my $parray = scalar @p; 95 | if($mask == 0){ 96 | $validcount[$validcount] = $parray; 97 | push(@p,$p); 98 | }else{ 99 | $validcount[$validcount] = "na"; 100 | $nacount++; 101 | } 102 | push(@q,$q); 103 | push(@dPSI,$deltaPSI); 104 | push(@symbol,$symbol); 105 | push(@output,$line); 106 | $validcount++; 107 | 108 | } 109 | close(FILE); 110 | 111 | print "NA count = " . $nacount . "\n"; 112 | print "number of p-values to be adjusted = " . scalar @p . "\n"; 113 | 114 | my $qvalues = rqvalue(\@p,\@rawp,$accession); 115 | #my $qvalues = rqvalue(\@p); 116 | my @qvalues = @$qvalues; 117 | 118 | #print "number of adjusted p-values = " . scalar @qvalues . "\n"; 119 | 120 | my $outfn = $fn; 121 | if(-e $fn . ".rawpvalue.txt.tar.gz"){ 122 | print "rawpvalue.txt.tar.gz existed. (no backup)\n"; 123 | }else{ 124 | print "Backup rawpvalue.txt\n"; 125 | system("mv $fn $fn.rawpvalue.txt"); 126 | system("tar zcvf $fn.rawpvalue.txt.tar.gz $fn.rawpvalue.txt"); 127 | system("rm $fn.rawpvalue.txt"); 128 | } 129 | 130 | print "Outputing... $outfn\n"; 131 | open(OUT,">$outfn") || die "Aborting.. Can't open $outfn : $!\n"; 132 | print OUT "$header\n"; 133 | for(my $i = 0;$i < scalar @output;$i++){ 134 | my ($region,$gn,$exon,$event,$n,$t,$exontype,$ENST,$dPSI,$pvalue,$fdr,$nvalues,$tvalues,$dbid) = split(/\t/,$output[$i]); 135 | if($validcount[$i] eq "na"){ 136 | $fdr = "na"; 137 | }else{ 138 | $fdr = $qvalues[$validcount[$i]]; 139 | } 140 | print OUT "$region\t$gn\t$exon\t$event\t$n\t$t\t$exontype\t$ENST\t$dPSI\t$pvalue\t$fdr\t$nvalues\t$tvalues\t$dbid\n"; 141 | } 142 | close(OUT); 143 | 144 | 145 | sub rqvalue{ 146 | my $p = shift; 147 | my $rawp = shift; 148 | my $accession = shift; 149 | my $R = Statistics::R->new(); 150 | my $qvalues = "-"; 151 | $R->run(q`a<-installed.packages()`); 152 | $R->run(q`packages<-a[,1]`); 153 | my $available = $R->get(q`is.element("qvalue", packages)`); 154 | $available = "FALSE"; 155 | if($available eq "TRUE"){ 156 | print "## Note: qvalue module is available.\n"; 157 | print "Doing ... qvalue from R\n"; 158 | $R->run(q`library(qvalue)`); 159 | $R->run(q`options(max.print=999999)`); 160 | $R->set('p',\@$p); 161 | $R->run(q`qvalue <- qvalue(p=p)`); 162 | $R->run(q`q <- qvalue$qvalues`); 163 | $qvalues = $R->get('q'); 164 | }else{ 165 | print "## Note: qvalue module isn't available. Switch to p.adjust().\n"; 166 | print "Doing ... p.adjust from R\n"; 167 | $R->run(q`options(max.print=999999)`); 168 | $R->set('p',\@$p); 169 | $R->run(q`q <- p.adjust(p,method="BH")`); 170 | $qvalues = $R->get('q'); 171 | } 172 | $R->set('accession',$accession); 173 | $R->set('rawp',\@$rawp); 174 | $R->run(q`tiff(paste(accession,"rawpvalue_dist.tiff",sep="_"))`); 175 | $R->run(q`plot(density(rawp))`); 176 | $R->run(q`dev.off()`); 177 | $R->run(q`tiff(paste(accession,"pvalue_dist.tiff",sep="_"))`); 178 | $R->run(q`plot(density(p))`); 179 | $R->run(q`dev.off()`); 180 | $R->run(q`tiff(paste(accession,"qvalue_dist.tiff",sep="_"))`); 181 | $R->run(q`plot(density(q))`); 182 | $R->run(q`dev.off()`); 183 | return $qvalues; 184 | } 185 | 186 | sub densityplot{ 187 | my $p = shift; 188 | my $R = Statistics::R->new(); 189 | $R->run(q`options(max.print=999999)`); 190 | $R->set('p',\@$p); 191 | $R->run(q`tiff("rawpvalue_dist.tiff")`); 192 | $R->run(q`plot(density(p))`); 193 | $R->run(q`dev.off()`); 194 | return 1; 195 | } -------------------------------------------------------------------------------- /PSIsigma-ir-v.1.2.pl: -------------------------------------------------------------------------------- 1 | =begin 2 | PSI-Sigma: A splicing-detection method for short-read and long-read RNA-seq data 3 | © Kuan-Ting Lin, 2018-2024 4 | PSI-Sigma is free for non-commercial purposes by individuals at an academic or non-profit institution. 5 | For commercial purposes, please contact tech transfer office of CSHL via narayan@cshl.edu 6 | =end 7 | =cut 8 | #!/usr/bin/perl -w 9 | 10 | eval unpack u=>q{_=7-E('-T2`H)&1B+"1B_86TL)'1Y<&4I(#T@0$%21U8["B`@("`*("`@("1B86T]?G,O*"XJ*5PO+R\["B`@("`*("`@(&UY("5T;7`[_"B`@("!M>2`E=6YI<75E.PH@("`@;7D@*"1G8VAR+"1S=&%R="PD96YD*2`]("@B+2(L,"PP*3L*("`@(&UY_("1E>&]N2`D:6YP=70@/2`\24Y0550^*2!["@D)8VAO;7`@)&EN<'5T.PH)"21R96%D_9F]R;6%T(#T@(G!A:7)E9"UE;F0B.PH)?0H)8VQO2`D:6-HPH);7D@)6%N;F\["B`@("!M>2`E:6YT2`D8V]U;G1I'0@:68H)&-H2`D8S(@/2`D864@+2`Q.PH)"6UY("1I;G1E2`D;&]C(#T@_(B1I,G-<="1I,F4B.PH@("`@("`@("1A;FYO>R1C:')]>R1C,7U[)&QO8WTK*SL*("`@("`@("`D86YN;WLD_8VAR?7LD8S)]>R1L;V-]*RL["B`@("`@("`@)&%N;F][)&-HR1C:')]>R1C-'U[)&QO8WTK*SL*("`@("`@("`D86YN;WLD8VAR?7LD8S5]>R1L;V-]*RL[("`@"B`@_("`@("`@)&EN=')O;G-[)&-H2`D8V]U;G0@/2`P.PH);7D@)&%D9"`]("(B.PH)(R-&;W(@26QL=6UI_;F$@7!E(#T](#$I>PH)"21A9&0@/2`B+48@,C4V("UF(#(@+7$@,C4U(B!I_9B@D2`D:6YP=70@/2`\24Y0550^*2!["@D)8VAO;7`@)&EN<'5T.PH)"6YE>'0@:68H)&EN<'5T(&5Q("(B*3L*_"0DD8V]U;G0K*SL*"0EM>2!`87)R87D@/2!S<&QI="@O7'0O+"1I;G!U="D["@D);7D@*"1N86UE+"1C:'(L_)&9L865LR72PD87)R87E;,5TL)&%R5LU72D["@D):68H(21C:6=APH)"0D)"21S2`D:CTP.R1J/"1N8CLD:BLK_*7L*"0D)"0D);7D@)'!O7,@)7LD86YN;WLD8VAR?7LD_<&]S?7T@/3T@,"E["@D)"0D)"0EN97AT.PH)"0D)"0E]96QS97L*"0D)"0D)"69OR1C:')]>R1P;W-]('TI>PH)"0D)"0D)"6YE>'0@:68H(21A;FYO>R1C:')]>R1P;W-]_>R1L;V-]*3L*"0D)"0D)"0DD:6YT7!E(#T](#(I>PH)"0EM_>2`D;&]N9TX@/2`P.PH)"0EF;W(H;7D@)&D],#LD:3PD;F,[)&DK*RE["@D)"0EI9B@D3%LD:2LQ72!E<2`B_3B(I>PH)"0D)"6EF*"1#6R1I72`^(#4P*7L*"0D)"0D))&QO;F=.(#T@,3L*"0D)"0D);&%S=#L*"0D)"0E]_"@D)"0E]"@D)"7T*"0D);F5X="!I9B@D;&]N9TX@/3T@,2D["@D)"69O2`D:3TP.R1I/"1N8SLD:2LK_*7L*"0D)"6EF*"$D3%LD:2LQ72E["@D)"0D)<')I;G0@(FD@/2`D:5QN(CL*"0D)"0EP2`D:CTP.R1J/"1N8CLD_:BLK*7L*"0D)"0D);7D@)'!O7,@)7LD86YN;WLD8VAR_?7LD<&]S?7T@/3T@,"E["@D)"0D)"0EN97AT.PH)"0D)"0E]96QS97L*"0D)"0D)"69OR1C:')]>R1P;W-]('TI>PH)"0D)"0D)"6YE>'0@:68H(21A;FYO>R1C:')]>R1P_;W-]>R1L;V-]*3L*"0D)"0D)"0DD:6YT'0@.B`D(5QN(CL*"69O7,@)6EN=')O;G,I>PH)_"69O7,@)7L@)&EN=')O;G-[)&-H2`D<&]S*'-OR`D:6YT'0@:68H)'!A2`D=VET:&EN;F]R;6%L(#T@,#L*"0D);7D@)&%V9R`](&%V97)A9V4H0'9A;'5E2`D;6%R9VEN;V9E_2`D=BA`=F%L=65S*7L*"0D)"6EF*"1V(#P]("@D879G*R@DPH)"0D)"21W:71H:6YN;W)M86PK*SL*"0D)"7T*"0D)?0H)_"0EM>2`D'0@/B`B("X@)&]U=&9N*3L*"2-S>7-T96TH(G)M(%Q?=V]S;W-A=&UP7"\B("X@)&)A;2`N(")<_*BY)4G1M<"YT>'0B*3L*("`@(&UY("1S=&]P=&EM92`]('1I;64["B`@("!M>2`D;6ENPH@("`@_("`@(')E='5R;B`H)'9A;'-;:6YT*"1L96XO,BDM,5T@*R`D=F%L2`H0'9A;'5E2`D_8V]U;G0@/2!S8V%L87(@0'9A;'5E2`D){ 20 | chomp $line; 21 | my ($ENST,$symbol,$strand) = split(/\t/,$line); 22 | $symbol{$ENST} = $symbol; 23 | $strand{$ENST} = $strand; 24 | } 25 | close(FILE); 26 | 27 | print "Reading... $db\n"; 28 | my %ET; 29 | my %wing; 30 | my %names; 31 | open(FILE, "$db") || die "Aborting.. Can't open $db\n"; 32 | while(my $line=){ 33 | chomp $line; 34 | next if($line eq ""); 35 | my ($chr,$i1s,$i1e,$i2s,$i2e,$tes,$tee,$anno,$as,$ae,$name,$gn) = split(/\t/,$line); 36 | my ($et) = "-"; 37 | my ($accession,$num) = ($1,$2) if($name=~/(.*)\_(\d+)$/); 38 | 39 | my @array = split(/\_/,$accession); 40 | #$accession=~s/(.*)\_//; 41 | my $ENST = $array[4]; 42 | if($ENST eq ""){ 43 | print "WARNNING: ENST format in the .db file is not correct\n"; 44 | print "accession = $accession\n"; 45 | $ENST = $accession; 46 | $ENST=~s/(.*)\_//; 47 | exit; 48 | } 49 | $wing{$name} = "$chr,$i1s,$i1e,$i2s,$i2e"; 50 | $ENST=~s/^Ex\.//; 51 | $ENST=~s/^TSS\.//; 52 | 53 | if($name=~/\_W\_/){ 54 | if($i1s == $as && $i2e == $ae){ 55 | $ET{$accession} = "SES" if($num == 1); 56 | }else{ 57 | $ET{$accession} = "MES" if($num > 1); 58 | } 59 | } 60 | 61 | if($name=~/\_S\_/){ 62 | if($i1s == $i1e){ 63 | $ET{$name} = "A3SS" if($strand{$ENST} eq "+"); 64 | $ET{$name} = "A5SS" if($strand{$ENST} eq "-"); 65 | } 66 | if($i2s == $i2e){ 67 | $ET{$name} = "A5SS" if($strand{$ENST} eq "+"); 68 | $ET{$name} = "A3SS" if($strand{$ENST} eq "-"); 69 | } 70 | if($i1s != $i1e && $i2s != $i2e){ 71 | print "[ERROR] coordinates are not A5SS or A3SS!\n"; 72 | print "$i1s,$i1e; $i2s,$i2e; $accession:$num\n"; 73 | } 74 | } 75 | if($name=~/\_R\_/){ 76 | $ET{$name} = "IR"; 77 | $ET{$name} = "IR (overlapping region)" if($gn=~/\,/); 78 | } 79 | } 80 | close(FILE); 81 | 82 | my %output; 83 | my %asae; 84 | my %es; 85 | my %ee; 86 | my %exonR; 87 | my ($maxn,$maxt) = (0,0); 88 | open(FILE,"$input") || die "Aborting.. Can't open $input : $!\n"; 89 | while(my $line=){ 90 | chomp $line; 91 | next if($line=~/^Event ID/); 92 | my ($ID,$tmpgn,$exon,$eventtype,$N,$T,$exontype,$ENST,$dPSI,$pvalue,$FDR,$nvalues,$tvalues) = split(/\t/,$line); 93 | my @nvalue = split(/\|/,$nvalues); 94 | my @tvalue = split(/\|/,$tvalues); 95 | my $wing = $wing{$ID}; 96 | if(!$wing{$ID}){ 97 | print "(ERROR) $ID can't find wings in the database.\n"; 98 | print "Aborting...\n"; 99 | exit; 100 | } 101 | if($fmode == 0){ 102 | next if($pvalue >= 0.01 || abs($dPSI) <= 10); 103 | } 104 | if($fmode == 1){ 105 | next if(abs($dPSI) <= 10); 106 | } 107 | if($fmode == 2){ 108 | next if($pvalue >= 0.05); 109 | } 110 | if($fmode == 3){ 111 | my ($avgn,$avgt) = (average(\@nvalue),average(\@tvalue)); 112 | #next if(abs($dPSI) < 5); 113 | #next if($avgn < 10 && $avgt < 10); 114 | #next if($avgn < 5 || $avgt < 5); 115 | if($N > 1 && $T > 1){ 116 | #next if($pvalue > 0.05); 117 | } 118 | #my $no = 0; 119 | #if($avgn < 20 || $avgt < 20){ 120 | # next if($avgn < 20 && $avgt < 20); 121 | #}else{ 122 | # $no++; 123 | #} 124 | #if($avgn > 80 || $avgt > 80){ 125 | # next if($avgn > 80 && $avgt > 80); 126 | #}else{ 127 | # $no++; 128 | #} 129 | #if($no == 2){ 130 | # next if($pvalue >= 0.01 || abs($dPSI) <= 10); 131 | #}else{ 132 | # next if($pvalue >= 0.5 || abs($dPSI) <= 5); 133 | #} 134 | } 135 | $maxn = $N if($maxn < $N); 136 | $maxt = $T if($maxt < $T); 137 | $line=~s/\, /\|/g; 138 | my ($chr,$as,$ae,$t,$tmpENST,$num) = split(/\_/,$ID); 139 | $chr=~s/chr//; 140 | my ($exonchr,$exonss,$exonee) = split(/[\:\-]/,$exon); 141 | if($eventtype eq "R"){ 142 | $exon = "$chr:$as-$ae"; 143 | } 144 | if(!$output{"$exon\t$wing\t$eventtype"}){ 145 | $output{"$exon\t$wing\t$eventtype"} = $line; 146 | }else{ 147 | my @tmparray = split(/\t/,$output{"$exon\t$wing\t$eventtype"}); 148 | my $tmpp = $tmparray[9]; 149 | next if($tmpp <= $pvalue); 150 | $output{"$exon\t$wing\t$eventtype"} = $line; 151 | } 152 | if($eventtype eq "W"){ 153 | $es{$exonss}++; 154 | $ee{$exonee}++; 155 | $asae{"$chr\t$as\t$ae"}{"$exonss\t$exonee"}++; 156 | } 157 | if($eventtype eq "R"){ 158 | $exonR{$exon}++; 159 | } 160 | } 161 | close(FILE); 162 | 163 | $input=~s/\.txt$//; 164 | my $outfn = $input . ".sorted.txt"; 165 | my $tmpfn = $input . ".filtered.txt"; 166 | #$tmpfn = $input . ".0to1.filtered.txt" if($fmode == 2); 167 | #$outfn = $input . ".0to1.sorted.txt" if($fmode == 2); 168 | open(OUT,">" . $tmpfn) || die "Aborting.. Can't open " . $tmpfn . " : $!\n"; 169 | print OUT "Event Region Gene Symbol Target Exon Event Type N T Exon Type Reference Transcript ΔPSI (%) T-test p-value FDR (BH) N Values T Values Database ID\n"; 170 | foreach my $exoninfo(keys %output){ 171 | my ($ID,$tmpgn,$exon,$eventtype,$N,$T,$exontype,$ENST,$dPSI,$pvalue,$FDR,$nvalues,$tvalues) = split(/\t/,$output{$exoninfo}); 172 | if($eventtype eq "R"){ 173 | if(!$exonR{$exon}){ 174 | }else{ 175 | #next if($exonR{$exon} > 1); 176 | } 177 | } 178 | my ($chr,$as,$ae,$tmp,$tmpENST,$tmpnum) = split(/\_/,$ID); 179 | #$chr=~s/chr//; 180 | my ($accession,$num) = ($1,$2) if($ID=~/(.*)\_(\d+)$/); 181 | #$accession=~s/(.*)\_//; 182 | 183 | =begin 184 | if($ID=~/\_R1\./ || $ID=~/\_R2\./){ 185 | $ENST=~s/\./\_/g; 186 | $ID=~s/\./\_/g; 187 | ($accession,$num) = ($1,$2) if($ID=~/(\w+)\_(\d+)$/); 188 | #$accession=~s/\_/\./g; 189 | $ENST=~s/\_/\./g; 190 | #$accession=~s/(.*)\_R1/R1/; 191 | #$accession=~s/(.*)\_R2/R2/; 192 | #$accession=~s/\_/\./g; 193 | #print "Name = $name\n"; 194 | #print "accession = $accession\n"; 195 | #print "num = $num\n"; 196 | #exit; 197 | } 198 | =end 199 | =cut 200 | if($fmode == 0){ 201 | next if($N < ($maxn*$criteria) || $T < ($maxt*$criteria)); 202 | } 203 | my $symbol = "-"; 204 | $tmpENST=~s/^Ex\.//; 205 | $tmpENST=~s/^TSS\.//; 206 | if(!$symbol{$tmpENST}){ 207 | $symbol="N/A"; 208 | print "$tmpENST has no symbol!\n"; 209 | }else{ 210 | $symbol = $symbol{$tmpENST}; 211 | } 212 | 213 | my $ETID = $ID; 214 | $ETID = $accession if($ID=~/\_W\_/); 215 | 216 | if(!$ET{$ETID}){ 217 | print "Can't find $accession\n"; 218 | print "ID = $ID\n"; 219 | print "ENST = $ENST\n"; 220 | print "symbol = $symbol\n"; 221 | exit; 222 | }else{ 223 | $eventtype = $ET{$ETID}; 224 | } 225 | if($eventtype eq "SES"){ 226 | my $mxs = 0; 227 | my ($exonchr,$exonss,$exonee) = split(/[\:\-]/,$exon); 228 | foreach my $loc(keys %{ $asae{"$chr\t$as\t$ae"} }){ 229 | my ($es,$ee) = split(/\t/,$loc); 230 | next if($exonss <= $es && $exonee >= $es); 231 | next if($exonss <= $ee && $exonee >= $ee); 232 | next if($es < $exonss && $ee > $exonee); 233 | $mxs = 1; 234 | last; 235 | } 236 | $eventtype = "MXS" if($mxs == 1); 237 | } 238 | if($eventtype eq "A5SS" || $eventtype eq "A3SS"){ 239 | #my ($exonchr,$exonss,$exonee) = split(/[\:\-]/,$exon); 240 | #if(!$es{$exonss} && !$ee{$exonee}){ 241 | #}else{ 242 | # next if($fmode != 0); 243 | #} 244 | } 245 | my $eventregion = "$chr:$as-$ae"; 246 | my ($exonchr,$exonss,$exonee) = split(/[\:\-]/,$exon); 247 | if($eventtype=~/IR/){ 248 | $eventregion = $exon; 249 | $exon = "$chr:$as-$ae"; 250 | } 251 | if($ENST=~/^TSS/){ 252 | $eventtype = "TSS" . "|" . $eventtype; 253 | } 254 | #print OUT "$eventregion\t$symbol\t$exon\t$eventtype\t$N\t$T\t$exontype\t$ENST\t$dPSI\t$pvalue\t$FDR\t$nvalues\t$tvalues\n"; 255 | print OUT "$eventregion\t$symbol\t$exon\t$eventtype\t$N\t$T\t$exontype\t$ENST\t$dPSI\t$pvalue\t$FDR\t$nvalues\t$tvalues\t$ID\n"; 256 | } 257 | close(OUT); 258 | 259 | my $sortcommand = "\(head \-n 1 " . $tmpfn . " \&\& tail \-n \+2 " . $tmpfn . " \| sort \-gr \-t '\t' \-k 9\) \> " . $outfn . "\n"; 260 | system($sortcommand); 261 | system("rm " . $tmpfn); 262 | 263 | sub average{ 264 | my($data) = @_; 265 | if (not @$data) { 266 | die("Empty array\n"); 267 | } 268 | my $total = 0; 269 | foreach (@$data) { 270 | $total += $_; 271 | } 272 | my $average = $total / @$data; 273 | return $average; 274 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | PSI-Sigma 2 | ================= 3 | Percent Spliced-In (PSI) values are commonly used to report alternative pre-mRNA splicing (AS) changes. 4 | However, previous PSI-detection methods are limited to specific types of AS events. PSI-Sigma is using a new splicing index (PSIΣ) that is more flexible, can incorporate novel junctions, and can compute PSI values of individual exons in complex splicing events. 5 |
6 | * PSI-Sigma is now published: https://www.ncbi.nlm.nih.gov/pubmed/31135034 7 | 8 | Updates 9 | ================= 10 | * PSI-Sigma workshop: 11 | 12 | 1. [Day 1 .ppt](https://drive.google.com/file/d/1l7ie9yND67Pa5N4R2Fy6X8efXDk8maR-/view?usp=share_link) 13 | 2. [Day 2 .ppt](https://drive.google.com/file/d/1VvCcRb44gsLPzJwgjDYEYD5hbBrqSNe7/view?usp=share_link) 14 | 15 | * Docker/Singularity version: 16 | ``` 17 | docker pull docker.io/woodydon/psi_sigma_pipeline:4.0 18 | ``` 19 | ``` 20 | singularity pull docker://woodydon/psi_sigma_pipeline:4.0 21 | ``` 22 | ``` 23 | singularity pull docker.io/woodydon/psi_sigma_pipeline:4.0 24 | ``` 25 | * Run PSI-Sigma with singularity (example): 26 | ``` 27 | singularity exec --bind /mnt:/mnt docker://woodydon/psi_sigma_pipeline:4.0 perl /usr/local/bin/PSI-Sigma-2.3/dummyai.pl --gtf Homo_sapiens.GRCh38.100.sorted.gtf --nread 10 --name PSIsigma2d1 --type 1 --fmode 3 --threads 6 28 | ``` 29 | * The latest release: https://github.com/wososa/PSI-Sigma/releases/tag/v2.3 30 | * Try the "--help" and "--threads" function. 31 | * Papers using PSI-Sigma: [Nature 2019](https://rdcu.be/bSL5W) [Nature 2023](https://www.nature.com/articles/s41586-023-05820-3) 32 | * Alignment file for nanopore long-read PCR-cDNA-seq of human U87 cells: https://dropfiles.cshl.edu/link/mpkT92runIvNJ3QeBvZPzC 33 | 34 | AUTHOR/SUPPORT 35 | ============== 36 | Kuan-Ting (Woody) Lin, klin@cshl.edu 37 | 38 | Alignment files 39 | ====== 40 | For short-read RNA-seq data, please generate .bam, .bai and .SJ.out files by using STAR (https://github.com/alexdobin/STAR). 41 | ``` 42 | ###This is an example for short-read RNA-seq### 43 | STAR --runThreadN 6 \ 44 | --outSAMtype BAM SortedByCoordinate \ 45 | --outFilterIntronMotifs RemoveNoncanonical \ 46 | --genomeDir ~/index/starR100H38 \ 47 | --twopassMode Basic \ 48 | --readFilesIn R1.fastq R2.fastq \ 49 | --outFileNamePrefix . 50 | samtools index .Aligned.sortedByCoord.out.bam 51 | ``` 52 | For long-read RNA-seq data, please use GMAP (http://research-pub.gene.com/gmap/src/gmap-gsnap-2017-11-15.tar.gz) or minimap2 (https://github.com/lh3/minimap2). 53 | ``` 54 | ###This is an example for long-read RNA-seq### 55 | #Example of using GMAP# 56 | ~/gmap-2017-11-15/bin/gmap -d GRCh38 -f samse --min-trimmed-coverage=0.5 --no-chimeras -B 5 -t 6 MinION_long_read.fastq > .sam 57 | #Example of using minimap2# 58 | ~/minimap2-2.17/minimap2 -ax splice:hq -uf H38.fa MinION_long_read.fastq > .sam 59 | 60 | samtools view -bS .sam > .bam 61 | samtools sort .bam -o .Aligned.sortedByCoord.out.bam 62 | samtools index .Aligned.sortedByCoord.out.bam 63 | ``` 64 | Quick Start 65 | ====== 66 | Create links to the .bam, .bai, and .SJ.out files in the a folder (afolder). If you are using long-read RNA-seq data, .SJ.out files will be generated automatically since GMAP doesn't produce the file. 67 | ``` 68 | mkdir afolder 69 | cd afolder 70 | ln -s bamfolder/*.bam* . 71 | ln -s bamfolder/*.SJ.* . 72 | ``` 73 | Download a .gtf file and sort the coordinates. **(NOTE: sorting .gtf file is necessary!)** 74 | ``` 75 | wget ftp://ftp.ensembl.org/pub/release-87/gtf/homo_sapiens//Homo_sapiens.GRCh38.87.gtf.gz 76 | gzip -d Homo_sapiens.GRCh38.87.gtf.gz 77 | (grep "^#" Homo_sapiens.GRCh38.87.gtf; grep -v "^#" Homo_sapiens.GRCh38.87.gtf | sort -k1,1 -k4,4n) > Homo_sapiens.GRCh38.87.sorted.gtf 78 | rm Homo_sapiens.GRCh38.87.gtf 79 | ``` 80 | Create two files: (1) groupa.txt and (2) groupb.txt. Please put the full name or the suffix of your .bam files in the groupa.txt or groupb.txt. For example, the suffix of a "Sequins_MixA.Aligned.sortedByCoord.out.bam" file is "Sequins_MixA". Groupa.txt will be compared with groupb.txt. Below is an example: 81 | ``` 82 | #Note: one file name per line in groupa.txt and groupb.txt 83 | echo Sequins_MixA.Aligned.sortedByCoord.out.bam >> groupa.txt 84 | echo Sequins_MixB.Aligned.sortedByCoord.out.bam >> groupb.txt 85 | 86 | #Alternatively, you can put only the suffix (WARNNING: only works when the .bam files are linked to the working directory) 87 | echo Sequins_MixA > groupa.txt 88 | echo Sequins_MixB > groupb.txt 89 | 90 | ``` 91 | Run dummyai.pl. After the .gtf file, please specify 1 for short-read RNA-seq and 2 for long-read RNA-seq. The last column is used to specify the minimum number of supporting reads for an AS event (10 is specified in the example below). 92 | ``` 93 | #For short-read RNA-seq (minimum 10 supporting reads for an AS event) 94 | perl ~/PSIsigma/dummyai.pl --gtf Homo_sapiens.GRCh38.87.sorted.gtf --name PSIsigma --type 1 -nread 10 95 | #For long-read RNA-seq (minimum 10 supporting reads for an AS event) 96 | perl ~/PSIsigma/dummyai.pl --gtf Homo_sapiens.GRCh38.87.sorted.gtf --name PSIsigma --type 2 -nread 10 97 | ``` 98 | That's it. 99 | Filtered results (p<0.01) will be listed in the PSIsigma_r10_ir3.sorted.txt. 100 | 101 | * Filtered Results (p<0.01): PSIsigma_r10_ir3.sorted.txt 102 | * Unfiltered Results: PSIsigma_r10_ir3.txt 103 | * Junction Read File: *.SJ.out.tab 104 | * Intronic Read File: *.IR.out.tab 105 | * Database File: *.db 106 | * BAM File: *.bam 107 | * GTF File: *.gtf (http://useast.ensembl.org/info/data/ftp/index.html/) 108 | 109 | 110 | OUTPUT 111 | ============== 112 | * Event Region: Genomic coordinates of the splicing event. 113 | * Gene Symbol: Gene symbol of the splicing event. 114 | * Target Exon: Genomic coordinates of the alternative exon. 115 | * Event Type: Category of the splicing event. 116 | * N: the number of valid samples in groupa.txt (influenced by the number of supporting reads). 117 | * T: the number of valid samples in groupb.txt (influenced by the number of supporting reads). 118 | * Exon Type: Whether the exon is a novel exon or an exon related to nonsense mediated decay (NMD). 119 | * Reference Transcript: The transcript ID in the gene annotation file. 120 | * ΔPSI (%): the average difference of PSI values in groupa.txt and groupb.txt. 121 | * T-test p-value: p-value derived from two-sample t-test. 122 | * FDR (BH): false discovery rate based on the p-values. 123 | * N Values: It shows all valid PSI values derived from the .SJ.out.tab files based on groupa.txt. (influenced by the number of supporting reads). 124 | * T Values: It shows all valid PSI values derived from the .SJ.out.tab files based on groupb.txt. (influenced by the number of supporting reads). 125 | * Database ID: It shows the accession number of the splicing event in the database of PSI-Sigma (e.g., PSIsigma.db). 126 | 127 | DATABASE CREATION (for advanced users) 128 | ============================== 129 | * Wikipage: https://github.com/wososa/PSI-Sigma/wiki/Database-creation-manual 130 | 131 | SOFTWARE REQUIREMENTS 132 | ============================== 133 | * Perl (https://www.perl.org/get.html) 134 | * Samtools (http://www.htslib.org) 135 | * R (https://www.r-project.org) (For version 1.9k and when --adjp 2 is used) 136 | 137 | Perl EXTENSIONS 138 | ============================== 139 | * PDL::LiteF 140 | * PDL::Stats 141 | * PDL::GSL::CDF 142 | * Statistics::Multtest 143 | * Statistics::R (For version 1.9k and when --adjp 2 is used) 144 | 145 | 146 | EXAMPLE of INSTALLING Perl EXTENSIONS (Anaconda) 147 | ================= 148 | ``` 149 | conda create -n PSIsigma r-essentials r-base perl-app-cpanminus 150 | conda activate PSIsigma 151 | conda install python=3.9 152 | conda install -c conda-forge gcc gsl 153 | cpanm PDL::LiteF 154 | cpanm PDL::GSL::CDF 155 | cpanm PDL::Stats 156 | cpanm Statistics::Multtest 157 | cpanm Statistics::R 158 | ``` 159 | 160 | EXAMPLE of INSTALLING Perl EXTENSIONS (without Anaconda) 161 | ============================== 162 | ``` 163 | # 1-a. If you are a sudo user. Set up working directory for Perl library (Using Perl version 5.18 as an example) 164 | export PERL5LIB=/usr/local/lib/perl/5.18 165 | 166 | # 1-b. If you are a local user, you can do like this (https://stackoverflow.com/questions/2980297/how-can-i-use-cpan-as-a-non-root-user) 167 | wget -O- http://cpanmin.us | perl - -l ~/perl5 App::cpanminus local::lib 168 | eval `perl -I ~/perl5/lib/perl5 -Mlocal::lib` 169 | echo 'eval `perl -I ~/perl5/lib/perl5 -Mlocal::lib`' >> ~/.bashrc 170 | echo 'export MANPATH=$HOME/perl5/man:$MANPATH' >> ~/.bashrc 171 | 172 | # 2. Install GSL 173 | apt-get install -y git make g++ gcc python wget libgsl0-dev 174 | 175 | # 3. Install PDL::GSL 176 | cpan App::cpanminus 177 | cpanm PDL::LiteF 178 | cpanm PDL::GSL::CDF 179 | cpanm PDL::Stats 180 | cpanm Statistics::Multtest 181 | cpanm Statistics::R 182 | ``` 183 | PSI-Sigma on Windows OS 184 | =========== 185 | PSI-Sigma has been tested in Linux and Mac OS environment. You can install Linux bash shell on Windows to run PSI-Sigma. 186 | * Linux Bash Shell on Windows: https://www.howtogeek.com/249966/how-to-install-and-use-the-linux-bash-shell-on-windows-10/ 187 | 188 | Gene Expression Analysis for nanopore long-read RNA-seq 189 | =========== 190 | To use the PSIsigma-longread-gene-expression.pl: 191 | ``` 192 | perl ~/PSIsigma/PSIsigma-longread-gene-expression.pl Homo_sapiens.GRCh38.87.sorted.gtf Experiment.Aligned.sortedByCoord.out.bam 193 | ``` 194 | The default setting is using 4 CPUs to calculate gene expression levels by matching constitutive exons in the gene annotation. An extra perl extension (threads) is needed. 195 | 196 | CITATION 197 | =========== 198 | https://www.ncbi.nlm.nih.gov/pubmed/31135034 199 | * Lin, K. T. & Krainer, A. R. PSI-Sigma: a comprehensive splicing-detection method for short-read and long-read RNA-seq analysis. Bioinformatics, doi:10.1093/bioinformatics/btz438 (2019). 200 | 201 | PSI-Sigma PRESENTATION 202 | =========== 203 | * Oxford Nanopore London Calling 2019: 204 | https://vimeo.com/339511487 205 | 206 | PUBLICATIONS USING PSI-Sigma 207 | =========== 208 | https://pubmed.ncbi.nlm.nih.gov/29449409 209 | * K. T. Lin, W. K. Ma, J. Scharner et al., A human-specific switch of alternatively spliced AFMID isoforms contributes to TP53 mutations and tumor recurrence in hepatocellular carcinoma. Genome Res, (2018). 210 | 211 | https://pubmed.ncbi.nlm.nih.gov/31578525 212 | * A. Yoshimi, K. T. Lin, D. H. Wiseman et al., Coordinated alterations in RNA splicing and epigenetic regulation drive leukaemogenesis. Nature 574, 273-277 (2019). 213 | 214 | https://pubmed.ncbi.nlm.nih.gov/32001512/ 215 | * M. A. Rahman, K. T. Lin, R. K. Bradley et al., Recurrent SRSF2 mutations in MDS affect both splicing and NMD. Genes Dev 34, 413-427 (2020). 216 | 217 | https://pubmed.ncbi.nlm.nih.gov/34921016/ 218 | * W. K. Ma, D. M. Voss, J. Scharner et al., ASO-based PKM splice-switching therapy inhibits hepatocellular carcinoma growth. Cancer Res, (2021). 219 | 220 | https://www.nature.com/articles/s41586-023-05820-3 221 | * A. Jbara, K. T. Lin, C. Stossel et al., RBFOX2 modulates a metastatic signature of alternative splicing in pancreatic cancer. Nature, (2023). 222 | 223 | 224 | Commercial Use 225 | =========== 226 | * For licensing, please contact CSHL tech transfer office: narayan@cshl.edu 227 | 228 | [![Analytics](https://ga-beacon.appspot.com/UA-123441271-1/PSI-Sigma/readme)](https://github.com/igrigorik/ga-beacon) 229 | 230 | -------------------------------------------------------------------------------- /PSIsigma-longread-gene-expression.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | eval unpack u=>q{_;7D@)&YB7W!R;V-E%]C;VUP=71E(#T@,3(["@EM>2`D8VAE8VMT;W1A;"`](#$[_"@EM>2`D8VAE8VMS=7!P;W)T;&5V96P@/2`Q.PH);7D@)&D],#L*"6UY($!R=6YN:6YG(#T@*"D["@EM>2!`_5&AR96%D2`D=&AR97-H;VQD(#T@,C`P.PH@("`@;7D@)6=E;F5S(#IS:&%R960[_"B`@("!M>2`E=')A;G-C2`E_;F]S=7!P;W)T3D$@.G-H87)E9#L*"6UY("5C;W9E2`E_8F%T8V@["B`@("!M>2`E8FEO='EP93L*("`@('!R:6YT(")296%D:6YG+BXN("1G=&9<;B(["B`@("!O<&5N_*$9)3$4L("(D9W1F(BD@?'P@9&EE(")!8F]R=&EN9RXN($-A;B=T(&]P96X@)&=T9EQN(CL*("`@('=H:6QE_*&UY("1L:6YE/3Q&24Q%/BE["B`@("`)8VAO;7`@)&QI;F4["B`@("`);F5X="!I9B@D;&EN93U^+UY<(R\I_.PH@("`@"6UY("@D8VAR+"1S<&5C:65S+"1C870L)'-T87)T+"1E;F0L)&1O=#$L)'-T2`D9VED(#T@)#$@:68H)&%N;F\]_?B]G96YE7%]I9"!<(BA<=RLI7")<.R\I.PH@("`@"6EF*"1C870@97$@(F=E;F4B*7L*("`@(`D);7D@)&=N_(#T@)#$@:68H)&%N;F\]?B]G96YE7%]N86UE(%PB*%M<=UPM7"Y<+UTK*5PB7#LO*3L*("`@(`D);7D@)&)I_;W1Y<&4@/2`D,2!I9B@D86YN;SU^+V=E;F5<7V)I;W1Y<&4@7"(H7'R1C:')]6R1B871C_:'LD8VAR?5TI>PH@("`@"0E]96QS97L*("`@(`D)"6UY($!T;7`@/2!S<&QI="@O7"PO+"1G96YE8F%T8VA[_)&-HR1C:')]72D["B`@("`)"0EI9BAS8V%L87(@0'1M<"`^(#4P,"E["B`@("`)"0D))&)A_=&-H>R1C:')]*RL["B`@("`)"0E]"B`@("`)"7T*("`@(`D))&=E;F5B871C:'LD8VAR?5LD8F%T8VA[)&-H_'0["B`@("`)?0H*("`@(`EM>2`D=&ED(#T@)#$@:68H)&%N_;F\]?B]TPH@("`@"0DDPH@("`@"0DD;F]S=7!P;W)T3D%[)&=I9'T@/2`DPH@("`@"0DD;F]S=7!P;W)T3D%[)&=I9'T@/2`D2`D9VED*&ME>7,@)71R86YS8W)I<'1S*7L*"0EM_>2`D9V]O9'-U<'!OR`D=')A_;G-CPH)"0D)"21SPH@("`@"21T;W1A;"`](&!S_86UT;V]L2`D8F%T8VAG_:60H0'1M<"E["B`@("`)"0EN97AT(&EF*"1B871C:&=I9"!E<2`B(BD["B`@("`)"0DD9VED2`D<&%SPH)"0D)"0D);F5X=#L*"0D)"0D)?0H)"0D)"0EM_>2!`'0@:68H)')V(&5Q("(B*3L*"0D)"0D)"6UY("@D8V]U;G0L_)')G:60L)&)I;G,I(#T@&ET.PH)"0D)"0D)_?0H)"0D)"0D))')E861C;W5N='LDR1R9VED_?2`]("1B:6YS.PH)"0D)"0E]"@D)"0D)"7!R:6YT("(@("T@5&AR96%D("1T:60@:&%S(&)E96X@:F]I;F5D_7&XB.PH)"0D)"7T*"0D)"7T*"0D)?0HC(R-4:')E860@4F5G:6]N(R,C"@D)?0H)?0H)<')I;G0@(E-U8FUI_='1E9"`D:2!J;V)S+EQN(CL*"2-PPH)"4!R=6YN:6YG(#T@=&AR96%DPH)"0D);7D@)'1I9"`]("1T:'(M/G1I9#L*"0D)"6UY("1R971UPH)"0D)"6YE>'0@:68H)')V(&5Q("(B*3L*"0D)"0EM>2`H)&-O=6YT+"1R9VED+"1B_:6YS*2`]('-P;&ET*"]<+"\L)')V*3L*"0D)"0EI9B@A)')G:60I>PH)"0D)"0EP2`EPH@("`@"6YE>'0@:68H)&=I9"!E<2`B_(BD["B`@("`);7D@)&]U=&=I9"`]("1G:60["B`@("`):68H(21G;GLD9VED?2E["B`@("`)"7!R:6YT("(D_9VED(&-A;B=T(&9I;F0@9V5N92!S>6UB;VPN7&XB.PH@("`@"0DD;W5T9VED(#T@)&=I9#L*("`@(`E]96QS_97L*("`@(`D))&]U=&=I9"`]("1G;GLD9VED?3L*("`@(`E]"B`@("`);7D@)&QO8R`]("1L;V-[)&=I9'T[_"B`@("`);7D@)&-O=6YT(#T@)')E861C;W5N='LD9VED?3L*("`@(`EM>2`D8FEO='EP92`]("1B:6]T>7!E_>R1G:61].PH@("`@"21R97!OR1B:6]T>7!E?2LK(&EF*"1C;W5N="`^/2`Q,"D["B`@("`))')E<&]R='LQ,#!]>R1B:6]T>7!E_?2LK(&EF*"1C;W5N="`^/2`Q,#`I.PH@("`@"7!R:6YT($]55"`B)&=I9%QT)&]U=&=I9%QT)&)I;W1Y<&5<_="1L;V-<="1C;W5N=%QT(B`N("@D8V]U;G0O*"1T;W1A;"\Q,#`P,#`P*2D@+B`B7&XB.PH@("`@?0H@("`@_8VQO'0B*2!\?"!D:64@(D%B_;W)T:6YG+BX@0V%N)W0@;W!E;B`D;W5T9FXN7!E.PH@("`@9F]R96%C:"!M>2`D8FEO='EP92AS;W)T(&ME>7,@)7L@)')E<&]R_='LQ?2!]*7L*("`@(`EP7!E.PH@("`@"7!UPH@("`@"2-P7!E*'-OR`DR1C;W5N='T@?2E[_"B`@("`)9F]R96%C:"!M>2`D8FEO='EP92A`8FEO='EP92E["B`@("`)"6EF*"$DR1C;W5N='U[_)&)I;W1Y<&5]*7L*("`@(`D)"7!R:6YT($]55"`B7'0B.PH@("`@"0D);F5X=#L*("`@(`D)?0H@("`@"0EP_2`D;W5T9FXR(#T@)&)A;3L*"21O=71F_;C(]?G,O7"XH+BHI+R\["@DD;W5T9FXR("X]("(N8V]V97)A9V5?=7!T;S,N='AT(CL*"6UY("1O=71F;C,@_/2`D8F%M.PH))&]U=&9N,SU^6UB;VQ<=$)I;W1Y<&5<_=$(Q7'1",EQT0C-<=$(T7'1"-5QT0C9<=$(W7'1".%QT0CE<=$(Q,%QT5&]T86Q<;B(["B`@("!P7!E7'1",5QT0C)<=$(S7'1"-%QT0C5<=$(V7'1"-UQT0CA<=$(Y7'1",3!<=%1O=&%L7&XB.PH)9F]R_96%C:"!M>2`D9VED*&ME>7,@)6-O=F5R86=E8FEN6UB;VPN7&XB.PH@_("`@"0DD;W5T9VED(#T@)&=I9#L*("`@(`E]96QS97L*("`@(`D))&]U=&=I9"`]("1G;GLD9VED?3L*("`@_(`E]"B`@("`);7D@)&)I;G,@/2`D8V]V97)A9V5B:6YS>R1G:61].PH@("`@"6UY($!B:6YS(#T@2`D;W)I9VEN_86QB:6YS(#T@(B(["@H@("`@"6UY("@D;W5T<'5T8FEN'0["B`@("`)"7UE_;'-E>PH@("`@"0D))'9A;'5E(#T@)&)I;G-;)&E=.PH@("`@"0E]"B`@("`)"6EF*"1C;W5N="`^/2`Q*7L*_("`@(`D)"21O=71P=71B:6YS("X](")<="(@+B!S<')I;G1F*"(E+C)F(BPH*"1V86QU92\D8V]U;G0I*C$P_,"DI.PH@("`@"0D);7D@*"1V86QU93(L)&-O=6YT,BD@/2`H)'9A;'5E+"1C;W5N="D["B`@("`)"0DD=F%L_=64R(#T@,R!I9B@D=F%L=64R(#X@,RD["B`@("`)"0DD8V]U;G0R(#T@,R!I9B@D8V]U;G0R(#X@,RD["B`@_("`)"0DD;W5T<'5T8FEN7!E7'0D;W5T<'5T8FEN7!E7'0D;W5T<'5T8FEN2`D:6-H2!`9VED2`H)&-H2`D;&]C(#T@)&EC:'(["B`@("!M>2`D8V]M;65N_9"`](")S86UT;V]L2`D<',L_("PH@("`@"6-H;VUP("1L:6YE.PH@("`@"6YE>'0@:68H)&QI;F4@97$@(B(I.PH@_("`@"6UY($!A2`]('-P;&ET*"]<="\L)&QI;F4I.PH@("`@"6UY("@D:60L)')C:'(L)')S=&%R="PD_8VEG87(I(#T@*"1A5LP72PD87)R87E;,ETL)&%R5LU72D["B`@("`);7D@)'-O_9G1C;&EP*%QD*RE3+RD["B`@("`);7D@)'-O9G1C;&EP96YD(#T@_)#$@:68H)&-I9V%R/7XO*%QD*RE3)"\I.PH@("`@"21S;V9T8VQI<'-T87)T(#T@,"!I9B@A)'-O9G1C;&EP_2`D:2`](#`[("1I(#P@PH)"0EM>2`D;G5M(#T@)&YU;5LD:5T["B`@_(`D)"6UY("1C:&%R(#T@)&-C6R@D:2LQ*5T["B`@(`D)"21L96YG=&@K/21N=6T@:68H)&-H87(]?B];34Y$_72\I.PH@("`)"0DD;G5M;V9J=6YC=&EO;BLK(&EF*"1C:&%R/7XO6TY=+RD["B`@(`D)?0H@("`@"6UY("1R_96YD(#T@)')S=&%R="`K("1L96YG=&@@+2`Q.PH@("`@"0H@("`@"6UY("1E>&-E960@/2`P.PH@("`@"6UY_("1S96-T:6]N2`D_<&%S2`E;6%T8VAE>&]N(#T@*"D["@D);7D@)7-I;F=L965X;VYG96YE.PH)"6UY_("1F;W5N9"`](#`["@D);7D@)6-O;7!E=&EN9V=E;F5S.PH)"6UY("1T97-T:6=I9"`](")%3E-',#`P,#`Q_,#0S,S(B.PH)"6UY("1T97-T:60@/2`B.&5C-34P83`M9C%D-"TT9C(X+3EB8V4M,60P9&%B-#-A.&5E(CL*_("`@"0EF;W(H;7D@)&D@/2`P.R`D:2`\('-C86QA2`D8VAA'0@:68H*"1R96YD*2`^("@D96YD*R1T_:')ER1S=7!P;W)T;&5V96Q]>R1T:61]*3L*("`@"0D)"0D)"2,D=VET:&EN(#T@,#L*("`@"0D)"0D)"6UY("1N_=6UO9F5L96UE;G1S(#T@PH@("`)"0D)"0D)"21S:6YG;&5E>&]N_9V5N97LD:6=I9'T@/2`Q(&EF*"$DPH@_("`)"0D)"0D)"21S:6YG;&5E>&]N9V5N97LD:6=I9'T@/2`R.PH@("`)"0D)"0D)?0H@("`)"0D)"0D)9F]R_*&UY("1I(#T@*"1N=6UO9F5L96UE;G1S+3$I.R1I(#X@,#LD:2TM*7L*("`@"0D)"0D)"0EM>2`H)&5C:'(L_)&5S=&%R="PD965N9"PD97-TR1T:61]>R1I?2`]("@D;G5M;V9E;&5M96YTR1I9VED?7LD=&ED?2`](#$@:68H(21C;VUP971I;F=G96YE&]N_>R1I9VED?7LD=&ED?7LH)&YU;6]F96QE;65N=',M,2E]*7L*("`@(`D)"0D)"7UE;'-E>PH@("`@"0D)"0D)_"21C;VUP971I;F=G96YE7,@)7L@)&UA=&-H97AO;GLD:6=I9'U[)'1I9'T@?2`]/2`H)&YU;6]F96QE;65N=',M,2D@)B8@_)'-I;F=L965X;VYG96YE>R1I9VED?2`]/2`R("8F("@D;G5M;V9J=6YC=&EO;BLQ*2`]/2`H)&YU;6]F96QE_;65N=',M,2D@)B8@)&YU;6]F:G5N8W1I;VX@/B`P*7L*("`@(`D)"0D)"0DD9F]U;F0@/2`Q.PH@("`@"0D)_"0D)"21C;W5N='LD:6=I9'U[)&ED?2LK.PHC(T-O=F5R86=E(R,*("`@(`D)"0D)"0EM>2`H)&-HR1I9VED?7LD:61](#T@)&-O=F5R86=E(&EF*"$D8V]V97)A9V5[)&EG:61]_>R1I9'TI.PH@("`@"0D)"0D)"6QAPH@("`@"0DC(T-H96-K(&9O2`D;6%X;6%T8V@@/2`P.PH@("`@"0EM>2`D;6%X:6=I9"`]("(M(CL*("`@(`D);7D@)&)I9V=E_2`D:6=I9"AK97ES_("5C;VUP971I;F=G96YE&]N9V5N97LD:6=I9'T@/3T@,2D[_"B`@("`)"0EM>2`D8V]M<&5T96QI2`D=&ED*&ME>7,@)7LD8V]M_<&5T:6YG9V5N97-[)&EG:61]?2E["B`@("`)"0D):68H(21C;VUP971I;F=G96YEPH@("`@"0D)"0EN97AT.PH@("`@"0D)"7T*("`@(`D)"0EN97AT(&EF*"1C;VUP971I;F=G96YE2`D;G5M;V9M871C:&5X;VYS(#T@,#L*("`@(`D)"69O&EG:61]>R1T:61](#T](#$I.PH@("`@"0D)"21N=6UO9FUA=&-H97AO;G,@/2`P.PH@("`@"0D)"69O_&EG:61]>R1T:61]('TI>PH@("`@"0D)"0DD_;G5M;V9M871C:&5X;VYS*RL["B`@("`)"0D)?0H@("`@"0D)?0H@("`@"0D):68H)&YU;6]F;6%T8VAE>&]N_PH@("`@"0D)"21F;W5N9"`](#$["B`@("`)"0D))&-O=6YT>R1M87AI9VED?7LD:61]*RL["B,C_0V]V97)A9V4C(PH@("`@"0D)"6UY("@D8VAR+"1S=&%R="PD96YD+"1S=')A;F0I(#T@R1M87AI9VED?2D["B`@("`)"0D);7D@)&-O=F5R86=E(#T@,2`M("@H*"1R&EG:61]>R1I9'T@/2`D_8V]V97)A9V4@:68H(21C;W9EPH@_("`@"0D))&UA>&EG:60@/2`B+2(["B`@("`)"0EF;W)E86-H(&UY("1I9VED*&ME>7,@)6-O;7!E=&EN9V=E_;F5S*7L*("`@(`D)"0EN97AT(&EF*"1S:6YG;&5E>&]N9V5N97LD:6=I9'T@/3T@,2D["B`@("`)"0D);F5X_="!I9B@D8V]M<&5T:6YG9V5N97-[)&EG:61](#T](#$I.PH@("`@"0D)"69OPH@("`@"0D)"0EN97AT(&EF*"$E>R1M871C:&5X;VY[)&EG:61]_>R1T:61]?2D["B`@("`)"0D)"6EF*"$D8V]M<&5T:6YG9V5N97-[)&EG:61]>R1T:61]*7L*("`@(`D)"0D)_"6EF*"1I9"!E<2`D=&5S=&ED*7L*("`@(`D)"0D)"0EP'0@:68H)&-O;7!E=&EN9V=E;F5S>R1I9VED?7LD=&ED_?2`]/2`Q*3L*("`@(`D)"0D);7D@)&YU;6]F;6%T8VAE>&]N2`D96ED*&ME>7,@)7L@)&UA=&-H97AO;GLD:6=I9'U[_)'1I9'T@?2E["B`@("`)"0D)"0DD;G5M;V9M871C:&5X;VYS*RL["B`@("`)"0D)"0DD;G5M;V9T:61E>&]N_R1T:61]>R1E:61].PH@("`@"0D)"0E]"B`@("`)"0D)"6YE>'0@:68H_)&YU;6]F;6%T8VAE>&]N&]N&]N&UA=&-H*7L*("`@_(`D)"0D)"21B:6=G97(@/2`Q(&EF*"1M87AM871C:"`^(#`@?'P@)&YU;6-O;7!E=&=E;F4@/B`Q*3L*("`@_(`D)"0D)"21M87AM871C:"`]("@D;G5M;V9M871C:&5X;VYS+R1N=6UO9G1I9&5X;VYS*3L*("`@(`D)"0D)_"21M87AI9VED(#T@)&EG:60["B`@("`)"0D)"7T*("`@(`D)"0E]"B`@("`)"0E]"B`@("`)"0EI9B@D;6%X_:6=I9"!N92`B+2(@)B8@)&)I9V=E2`H)&-HR1M87AI9VED?7LD:61](#T@)&-O=F5R86=E(&EF*"$D8V]V97)A9V5[)&UA>&EG:61]>R1I9'TI.PH@_("`@"0D)?0H@("`@"0E]"B`@("`)?0H@("`@"0H@("`@"6EF*"1F;W5N9"`]/2`P("8F("1N=6UO9FIU;F-T_:6]N(#T](#`I>PH@("`@"0DC(T-H96-K(&9OPH@("`@"0D);F5X="!I9B@D2`D=&ED*&ME>7,@)7L@)&UA=&-H97AO;GLD:6=I9'T@_?2E["B`@("`)"0D);F5X="!I9B@A)&UA=&-H97AO;GLD:6=I9'U[)'1I9'U[,7TI.PH@("`@"0D)"21C;W5N_='LD:6=I9'U[)&ED?2LK.PH@("`@"0D)"21F;W5N9"`](#$["B,C0V]V97)A9V4C(PH@("`@"0D)"6UY("@D_8VAR+"1S=&%R="PD96YD+"1S=')A;F0I(#T@R1I9VED?2D["B`@("`)_"0D);7D@)&-O=F5R86=E(#T@,2`M("@H*"1RR1I9'T@/2`D8V]V97)A9V4@:68H(21C;W9EPD*("`@(`D)(R-3;VUE(')E861S(&%L:6=N960@_=&\@;VYL>2`UXH"9+65N9"!O&]NPH@("`@"0D)"69OR`D;6%T8VAE>&]N>R1I9VED?7LD=&ED?2!](#P@,2D["B`@("`)_"0D)"21F;W5N9"`](#$["B`@("`)"0D)"6QAR1I9VED?7LD:61]*RL["B,C0V]V97)A9V4C(PH@("`@"0D)"0EM>2`H)&-H2`D8V]V97)A9V4@/2`Q("T@*"@H)')S=&%R="TDR1I9VED?7LD:61](#T@)&-O=F5R86=E(&EF*"$D8V]V97)A9V5[)&EG:61]>R1I_9'TI.PH@("`@"0D)"7T*("`@(`D)"7T*("`@(`D)?0H@("`@"7T*"B`@("!]"B`@("`*("`@(&UY("1R971U_2`D:6=I9"A`9VEDR1I9VED?2E["B`@("`)"21C;W5N_="`](#`["B`@("`)?65L2`D;W5T<'5T8FEN(#T@(B(["@D);7D@_)'1O=&%L8V]U;G0@/2`P.PH@("`@"69OR1R:61]*2\Q,"DI_.PH@("`@"0DD8FEN*RL["B`@("`)"21B:6X@/2`Q,"!I9B@D8FEN(#T](#$Q*3L*("`@(`D)9F]R*&UY("1I_(#T@,3L@)&D@/#T@)&)I;CL@)&DK*RE["B`@("`)"0DD8V]V97)A9V5B:6Y;)&E=*RL["B`@("`)"7T*("`@_(`D))'1O=&%L8V]U;G0K*SL*("`@(`E]"B`@("`)9F]R*&UY("1I(#T@,3L@)&D@/#T@,3`[("1I*RLI>PH@_("`@"0DD8V]V97)A9V5B:6Y;)&E=(#T@,"!I9B@A)&-O=F5R86=E8FEN6R1I72D["B`@("`)"21O=71P=71B_:6X@+CT@(GPB("X@)&-O=F5R86=E8FEN6R1I73L*("`@(`E]"@H@("`@"21R971U 10% and p-value < 0.01 (default/recommended)\n"; 32 | print " 1: delta-PSI > 10%\n"; 33 | print " 2: p-value < 0.05\n"; 34 | print " 3: report all events\n"; 35 | print " --irmode [number] 0: only introns in the .gtf. (default)\n"; 36 | print " 1: aggressively search for all introns\n"; 37 | print " 2: Skip intron-retention events.\n"; 38 | print " --irrange [number] 0: IR event is using reference points of the target exon region.\n"; 39 | print " n: Use +-n bases around target intron region and +-n bases around event region. (default)\n"; 40 | print " --irclean [number] 0: No action (default)\n"; 41 | print " 25~100: Apply skipratio and remove intron-retention events whose coverage difference between the its two splice sites is > n% of their average.\n"; 42 | print " --adjp [number] 0: Skip p-value adjustment. (default)\n"; 43 | print " 1: Benjamini-Hochberg (Statistics::Multtest Perl module)\n"; 44 | print " 2: Benjamini-Hochberg (qvalue or p.adjust() R package)\n"; 45 | print " --trimp [number] Set FDR to 'na' for events whose maximum PSI values are below [number]% or whose minimum PSI values are above [100-number]% in all groupa.txt and groupb.txt during p-value adjustment. [default:5]\n"; 46 | print " --denominator [number] 0: Don't report denominators. (default)\n"; 47 | print " 1: Report the table of denominators.\n"; 48 | print " --variance [number] 0: Assuming equal variance and use Student's t-test. (default)\n"; 49 | print " 1: Assuming unequal variance and use Welch's t-test. \n"; 50 | print " --groupa [/path/groupa.txt] Specify where the groupa.txt file is. (default: groupa.txt)\n"; 51 | print " --groupb [/path/groupb.txt] Specify where the groupb.txt file is. (default: groupb.txt)\n"; 52 | print " --threads [number] the number of CPUs used when --irmode > 0.\n"; 53 | print " 1: one CPU thread (4~8GB RAM per CPU thread)\n"; 54 | print " 1+: more CPU thread (2 = 8~16GB RAM, 3 = 12~24GB RAM, and so forth.)\n"; 55 | print "\n"; 56 | exit; 57 | } 58 | 59 | my ($gtf,$name,$type,$supporting_read_criteria,$fmode,$skipratio,$irmode,$adjp,$trimp,$denominator,$irrange,$variance,$threads,$groupa,$groupb,$output,$irclean) = split(/\t/,$status); 60 | $fmode = 0 if($fmode ne "0" && $fmode ne "1" && $fmode ne "2" && $fmode ne "3"); 61 | $irmode = 0 if($irmode ne "0" && $irmode ne "1" && $irmode ne "2"); 62 | $irrange = 5 if(!$irrange || $irrange!~/\d/ || $irrange eq "-"); 63 | $adjp = 0 if(!$adjp); 64 | $adjp = 0 if($adjp ne "0" && $adjp ne "1" && $adjp ne "2"); 65 | $denominator = 0 if($denominator ne "0" && $denominator ne "1"); 66 | $skipratio = 0.05 if($skipratio eq "-" || $skipratio > 1 || $skipratio < 0); 67 | $trimp = 5 if($trimp eq "-"); 68 | $variance = 0 if(!$variance || $variance eq "-"); 69 | $threads = 1 if(!$threads || $threads eq "-"); 70 | $groupa = "groupa.txt" if(!$groupa || $groupa eq "-"); 71 | $groupb = "groupb.txt" if(!$groupb || $groupb eq "-"); 72 | $output = "." if(!$output || $output eq "-"); 73 | $irclean = 0 if(!$irclean || $irclean eq "-"); 74 | 75 | if(!-e $groupa){ 76 | print "$groupa is not found. exit.\n"; 77 | exit; 78 | }else{ 79 | print "$groupa is found. Copy to the working directory.\n"; 80 | system("cp $groupa $name.groupa.txt"); 81 | $groupa = $name . ".groupa.txt"; 82 | } 83 | if(!-e $groupb){ 84 | print "$groupb is not found. exit.\n"; 85 | exit; 86 | }else{ 87 | print "$groupb is found. Copy to the working directory.\n"; 88 | system("cp $groupb $name.groupb.txt"); 89 | $groupb = $name . ".groupb.txt"; 90 | } 91 | 92 | if($irclean != 0){ 93 | if($irclean < 25 || $irclean > 100){ 94 | print "(ERROR) --irclean parameter should in a range of 25% to 100%.\n"; 95 | exit; 96 | } 97 | } 98 | 99 | if($output ne "."){ 100 | if(-e $output){ 101 | print "$output folder already exists. exit.\n"; 102 | exit; 103 | }else{ 104 | system("mkdir $output"); 105 | } 106 | if(!-e $output){ 107 | print "$output can't be created. exit.\n"; 108 | exit; 109 | } 110 | } 111 | 112 | print "gtf = $gtf\n"; 113 | print "name = $name\n"; 114 | print "type = $type\n"; 115 | print "nread = $supporting_read_criteria\n"; 116 | print "skipratio = $skipratio\n"; 117 | print "fmode = $fmode\n"; 118 | print "irmode = $irmode\n"; 119 | print "adjp = $adjp\n"; 120 | print "trimp = $trimp\n"; 121 | print "denominator = $denominator\n"; 122 | print "irrange = $irrange\n"; 123 | print "groupa = $groupa\n"; 124 | print "groupb = $groupb\n"; 125 | print "threads = $threads\n"; 126 | print "output = $output\n"; 127 | 128 | if($variance == 0){ 129 | print "variance assumption = equal (Student's t-test)\n"; 130 | } 131 | if($variance == 1){ 132 | print "variance assumption = unequal (Welch's t-test)\n"; 133 | } 134 | 135 | if($adjp == 2){ 136 | print "### NOTE: Staitistics::R and R are required for p-value adjustment.\n"; 137 | use Statistics::R; 138 | } 139 | #my ($gtf,$name,$longread,$supporting_read_criteria) = @ARGV; 140 | 141 | my $path = abs_path($0); 142 | $path=~s/\/dummyai\.pl//; 143 | $path=~s/\/dummyai\-parallel\.pl//; 144 | print "Path = $path\n"; 145 | if($path=~/\.pl$/){ 146 | print "Path format isn't correct.\n"; 147 | exit; 148 | } 149 | 150 | my ($starttime,$stoptime,$hours) = (time,0,0); 151 | 152 | my $noveljunctioncriteria = 10; 153 | #my $supporting_read_criteria = 10; 154 | my $intron_criteria = 3; 155 | my $totaltime = 0; 156 | 157 | my %group; 158 | my $patha = 0; 159 | open(FILE,"$groupa") || die "Aborting.. Can't open $groupa : $!\n"; 160 | while(my $line=){ 161 | chomp $line; 162 | next if($line eq ""); 163 | my $bam = $line; 164 | my $bamfn = $bam; 165 | my $bai = "$bam\.bai"; 166 | my $sjout = $bam; 167 | $sjout=~s/\.Aligned\.sortedByCoord\.out\.bam/\.SJ\.out\.tab/; 168 | my $tmp = "-"; 169 | if($bam!~/\.bam/){ 170 | $tmp = $bam . ".Aligned.sortedByCoord.out.bam"; 171 | $bam = $tmp if(-e $tmp); 172 | $tmp = $bam . ".sorted.bam"; 173 | $bam = $tmp if(-e $tmp); 174 | $tmp = $bam . ".bam"; 175 | $bam = $tmp if(-e $tmp); 176 | } 177 | if($bam=~/\//){ 178 | $patha = 1; 179 | $bamfn=~s/(.*)\///; 180 | system("ln -s $bam $bamfn"); 181 | system("ln -s $bam\.bai $bamfn\.bai"); 182 | if(-e $sjout){ 183 | system("cp $sjout ."); 184 | }else{ 185 | print "(WARNNING) $sjout is not found.\n"; 186 | } 187 | $bam = $bamfn; 188 | } 189 | if($bam=~/\.bam$/){ 190 | $bai = "$bam\.bai"; 191 | if(-e $bai){ 192 | print "$bai is ready.\n"; 193 | #system("ln -s $bam\.bai $bamfn\.bai"); 194 | }else{ 195 | print "$bai doesn't exist. Creating a new index...\n"; 196 | system("samtools index $bamfn"); 197 | } 198 | } 199 | $group{$bam}++; 200 | } 201 | 202 | open(FILE,"$groupb") || die "Aborting.. Can't open $groupb : $!\n"; 203 | while(my $line=){ 204 | chomp $line; 205 | next if($line eq ""); 206 | my $bam = $line; 207 | my $bamfn = $bam; 208 | my $bai = "$bam\.bai"; 209 | my $sjout = $bam; 210 | $sjout=~s/\.Aligned\.sortedByCoord\.out\.bam/\.SJ\.out\.tab/; 211 | my $tmp = "-"; 212 | if($bam!~/\.bam/){ 213 | $tmp = $bam . ".Aligned.sortedByCoord.out.bam"; 214 | $bam = $tmp if(-e $tmp); 215 | $tmp = $bam . ".sorted.bam"; 216 | $bam = $tmp if(-e $tmp); 217 | $tmp = $bam . ".bam"; 218 | $bam = $tmp if(-e $tmp); 219 | } 220 | if($bam=~/\//){ 221 | $patha = 1; 222 | $bamfn=~s/(.*)\///; 223 | system("ln -s $bam $bamfn"); 224 | system("ln -s $bam\.bai $bamfn\.bai"); 225 | if(-e $sjout){ 226 | system("cp $sjout ."); 227 | }else{ 228 | print "(WARNNING) $sjout is not found.\n"; 229 | } 230 | $bam = $bamfn; 231 | } 232 | if($bam=~/\.bam$/){ 233 | $bai = "$bam\.bai"; 234 | if(-e $bai){ 235 | print "$bai is ready.\n"; 236 | #system("ln -s $bam\.bai $bamfn\.bai"); 237 | }else{ 238 | print "$bai doesn't exist. Creating a new index...\n"; 239 | system("samtools index $bamfn"); 240 | } 241 | } 242 | $group{$bam}++; 243 | } 244 | 245 | if($patha == 1){ 246 | print "Formatting $groupa and $groupb...\n"; 247 | system("cp $groupa $groupa.orig"); 248 | system("cp $groupb $groupb.orig"); 249 | open(FILE,"$groupa.orig") || die "Aborting.. Can't open $groupa.orig : $!\n"; 250 | open(OUT,">$groupa") || die "Aborting.. Can't open $groupa : $!\n"; 251 | while(my $line=){ 252 | chomp $line; 253 | next if($line eq ""); 254 | $line=~s/(.*)\///; 255 | $line=~s/\.Aligned\.sortedByCoord\.out\.bam//; 256 | $line=~s/\.sorted\.out\.bam//; 257 | $line=~s/\.sorted\.bam//; 258 | $line=~s/\.bam//; 259 | print OUT "$line\n"; 260 | } 261 | close(OUT); 262 | close(FILE); 263 | open(FILE,"$groupb.orig") || die "Aborting.. Can't open $groupb.orig : $!\n"; 264 | open(OUT,">$groupb") || die "Aborting.. Can't open $groupb : $!\n"; 265 | while(my $line=){ 266 | chomp $line; 267 | next if($line eq ""); 268 | $line=~s/(.*)\///; 269 | $line=~s/\.Aligned\.sortedByCoord\.out\.bam//; 270 | $line=~s/\.sorted\.out\.bam//; 271 | $line=~s/\.sorted\.bam//; 272 | $line=~s/\.bam//; 273 | print OUT "$line\n"; 274 | } 275 | close(OUT); 276 | close(FILE); 277 | } 278 | 279 | print "Generating mapping file...\n"; 280 | my $mappingcount = 0; 281 | my %chr; 282 | open(FILE,"$gtf") || die "Aborting.. Can't open $gtf : $!\n"; 283 | open(OUT,">" . $gtf . ".mapping.txt") || die "Aborting.. Can't open $gtf : $!\n"; 284 | while(my $line=){ 285 | chomp $line; 286 | next if($line=~/^\#/); 287 | my @array = split(/\t/,$line); 288 | next if($array[2] ne "transcript"); 289 | my ($chr,$cat,$start,$end,$strand,$name) = ($array[0],$array[1],$array[3],$array[4],$array[6],$array[8]); 290 | #$chr = "chr" . $chr if($chr!~/chr/); 291 | $chr{$chr}++; 292 | my $ENST = $name; 293 | $ENST=~s/(.*)transcript\_id \"//; 294 | $ENST=~s/\"\; (.*)//; 295 | if($ENST=~/\_/){ 296 | $ENST=~s/\_/\./g; 297 | } 298 | $name=~s/(.*)gene\_name \"//; 299 | $name=~s/\"\; (.*)//; 300 | $name=~s/\"\;//; 301 | $name=~s/gene\_id \"//; 302 | if($name=~/\_/){ 303 | $name=~s/\_/\./g; 304 | } 305 | print OUT $ENST . "\t" . $name . "\t" . $strand . "\n"; 306 | $mappingcount++ if($ENST ne "" && $name ne "" && $strand ne ""); 307 | } 308 | close(OUT); 309 | close(FILE); 310 | if($mappingcount == 0){ 311 | print "$gtf is not in an acceptable gtf format. Exiting...\n"; 312 | exit; 313 | } 314 | 315 | my $nfiles = scalar keys %group; 316 | if($nfiles < 2){ 317 | print "Not enough files. Exiting...\n"; 318 | exit; 319 | } 320 | 321 | print "Checking splice-junction files...\n"; 322 | $starttime = time; 323 | my $sjcount = 0; 324 | foreach my $bam(keys %group){ 325 | next if($bam eq ""); 326 | my $accession = $bam; 327 | $accession=~s/\.Aligned\.sortedByCoord\.out\.bam//; 328 | $accession=~s/\.sorted\.out\.bam//; 329 | $accession=~s/\.sorted\.bam//; 330 | $accession=~s/\.bam//; 331 | $accession=~s/\.$//; 332 | my $sjfn = $accession . ".SJ.out.tab"; 333 | 334 | my $commend = "samtools view $bam | awk -f " . $path . "/sjFromSAMcollapseUandM_inclOverlaps.awk > " . $sjfn; 335 | if(-e $sjfn){ 336 | if(-z $sjfn){ 337 | if($type == 1){ 338 | generateSJ($bam,$accession); 339 | } 340 | if($type == 2){ 341 | print "Generating... $sjfn\n"; 342 | system($commend); 343 | } 344 | } 345 | }else{ 346 | if($type == 1){ 347 | generateSJ($bam,$accession); 348 | } 349 | if($type == 2){ 350 | print "Generating... $sjfn\n"; 351 | system($commend); 352 | } 353 | } 354 | $sjcount++; 355 | } 356 | $stoptime = time; 357 | $hours = sprintf("%.4f",(($stoptime-$starttime)/3600)); 358 | $totaltime += $hours; 359 | print "===Splice-junction files spent $hours hours.===\n"; 360 | if($sjcount < 2){ 361 | print "Only $sjcount samples. It is not enough. Exiting...\n"; 362 | exit; 363 | } 364 | 365 | $starttime = time; 366 | my $dbname = $name . ".db"; 367 | my $bedname = $name . ".bed"; 368 | my $chrs; 369 | foreach my $chr(sort keys %chr){ 370 | $chrs .= "\t" . $chr; 371 | } 372 | $chrs=~s/\t//; 373 | if(-e $dbname){ 374 | if(-z $dbname){ 375 | print "Regenerating $dbname...\n"; 376 | rundb($noveljunctioncriteria,$gtf,$chrs,$type); 377 | } 378 | }else{ 379 | print "Generating $dbname...\n"; 380 | rundb($noveljunctioncriteria,$gtf,$chrs,$type); 381 | } 382 | $stoptime = time; 383 | $hours = sprintf("%.4f",(($stoptime-$starttime)/3600)); 384 | $totaltime += $hours; 385 | print "===Database spent $hours hours.===\n"; 386 | 387 | my $ircheck = 1; 388 | $ircheck = 0 if($irmode == 2); 389 | if($ircheck == 1){ 390 | print "Getting intron reads....\n"; 391 | $starttime = time; 392 | my $ircount = 0; 393 | my @commend; 394 | foreach my $bam(keys %group){ 395 | my $accession = $bam; 396 | $accession=~s/\.Aligned\.sortedByCoord\.out\.bam//; 397 | $accession=~s/\.sorted\.out\.bam//; 398 | $accession=~s/\.sorted\.bam//; 399 | $accession=~s/\.bam//; 400 | $accession=~s/\.$//; 401 | my $pname = ""; 402 | my $checkpname = 0; 403 | if($bam!~/\.bam/){ 404 | $pname = $bam . "\.Aligned\.sortedByCoord\.out\.bam"; 405 | if(-e $pname){ 406 | $bam = $pname; 407 | $checkpname = 1; 408 | } 409 | if($checkpname == 0){ 410 | $pname = $bam . "\.sorted\.out\.bam"; 411 | if(-e $pname){ 412 | $bam = $pname; 413 | $checkpname = 1; 414 | } 415 | } 416 | if($checkpname == 0){ 417 | $pname = $bam . "\.bam"; 418 | if(-e $pname){ 419 | $bam = $pname; 420 | $checkpname = 1; 421 | } 422 | } 423 | } 424 | print "Checking $bam...\n"; 425 | my $irfn = $accession . ".IR.out.tab"; 426 | print "Checking $irfn...\n"; 427 | #print "commend = $commend\n"; 428 | if(-e $irfn){ 429 | if(-z $irfn){ 430 | print "Regenerating .IR.out for $accession\n"; 431 | push(@commend,"perl " . $path . "/PSIsigma-ir-v.1.2.pl " . $name . ".db " . $bam . " " . $type); 432 | #$commend .= "perl " . $path . "/PSIsigma-ir-v.1.2.pl " . $name . ".db " . $bam . " " . $type . " &\n"; 433 | }else{ 434 | print "$irfn existed. Pass...\n"; 435 | } 436 | }else{ 437 | print "Generating .IR.out for $accession\n"; 438 | push(@commend,"perl " . $path . "/PSIsigma-ir-v.1.2.pl " . $name . ".db " . $bam . " " . $type); 439 | #$commend .= "perl " . $path . "/PSIsigma-ir-v.1.2.pl " . $name . ".db " . $bam . " " . $type . " &\n"; 440 | } 441 | $ircount++; 442 | } 443 | if(scalar @commend > 0 && $ircount > 0){ 444 | my $commend = ""; 445 | my $commend_count = 0; 446 | foreach my $line(@commend){ 447 | $commend .= "$line &\n"; 448 | $commend_count++; 449 | $commend .= "wait\n" if($commend_count % $threads == 0); 450 | } 451 | $commend .= "wait\n"; 452 | system($commend); 453 | } 454 | $stoptime = time; 455 | $hours = sprintf("%.4f",(($stoptime-$starttime)/3600)); 456 | $totaltime += $hours; 457 | print "===Intron-read file spent $hours hours.===\n"; 458 | if($ircount < 2){ 459 | print "Only $ircount samples. It is not enough. Exiting...\n"; 460 | exit; 461 | } 462 | } 463 | 464 | print "Ready to do PSI analysis...\n"; 465 | $starttime = time; 466 | my $adjpdefault = 0; 467 | $adjpdefault = 1 if($adjp == 1); 468 | my $commend = "perl " . $path . "/PSIsigma-PSI-v.1.1.pl " . $name . ".db " . $name . " " . $supporting_read_criteria . " " . $skipratio . " " . $intron_criteria . " " . $type . " " . $adjpdefault . " " . $denominator . " " . $ircheck . " " . $irrange . " " . $variance . " " . $groupa . " " . $groupb . " " . $irclean; 469 | system($commend); 470 | $stoptime = time; 471 | $hours = sprintf("%.4f",(($stoptime-$starttime)/3600)); 472 | $totaltime += $hours; 473 | print "===PSI analysis spent $hours hours.===\n"; 474 | 475 | print "Filtering ΔPSI results...\n"; 476 | $starttime = time; 477 | if($nfiles < 4 && $fmode == 0){ 478 | print "Not enough samples for p-value calculation, so switch to fmode = 1.\n"; 479 | $fmode = 1; 480 | } 481 | if($nfiles < 4 && $fmode == 2){ 482 | print "Not enough samples for p-value calculation, so switch to fmode = 1.\n"; 483 | $fmode = 1; 484 | } 485 | if($adjp == 2 && $fmode != 3){ 486 | print "## Note: --fmode has been changed to 3 for p-value adjustment.\n"; 487 | $fmode = 3; 488 | } 489 | $commend = "perl " . $path . "/PSIsigma-filter-v.1.0.pl " . $name . ".db " . $gtf . ".mapping.txt " . $name . "_r" . $supporting_read_criteria . "_ir" . $intron_criteria . ".txt " . $fmode; 490 | system($commend); 491 | $stoptime = time; 492 | $hours = sprintf("%.4f",(($stoptime-$starttime)/3600)); 493 | $totaltime += $hours; 494 | print "===Filtering spent $hours hours.===\n"; 495 | 496 | if(-e $name . "_r" . $supporting_read_criteria . "_ir" . $intron_criteria . ".txt"){ 497 | print "Archiving... " . $name . "_r" . $supporting_read_criteria . "_ir" . $intron_criteria . ".txt \n"; 498 | system("tar zcvf " . $name . "_r" . $supporting_read_criteria . "_ir" . $intron_criteria . ".txt.tar.gz " . $name . "_r" . $supporting_read_criteria . "_ir" . $intron_criteria . ".txt"); 499 | system("rm " . $name . "_r" . $supporting_read_criteria . "_ir" . $intron_criteria . ".txt"); 500 | }else{ 501 | } 502 | 503 | if($adjp == 2){ 504 | print "Adjusting p-value distribution...\n"; 505 | $commend = "perl " . $path . "/PSIsigma-FDR-v.1.0.pl " . $name . "_r" . $supporting_read_criteria . "_ir" . $intron_criteria . ".sorted.txt 2 $trimp"; 506 | system($commend); 507 | $stoptime = time; 508 | $hours = sprintf("%.4f",(($stoptime-$starttime)/3600)); 509 | $totaltime += $hours; 510 | print "===P-value adjustment spent $hours hours.===\n"; 511 | } 512 | 513 | 514 | print "\n***Total: $totaltime hours (or " . ($totaltime*60) . "mins).\n"; 515 | 516 | open(LOG,">" . $name . ".Log.txt") || die "Aborting.. Can't open " . $name . ".Log.txt : $!\n"; 517 | print LOG "Working directory = $path\n"; 518 | print LOG "Parameters = " . $inputs . "\n"; 519 | print LOG "Computing time = $totaltime hours (or " . ($totaltime*60) . "mins).\n"; 520 | close(LOG); 521 | 522 | 523 | if($output ne "."){ 524 | print "Publishing results to $output\n"; 525 | system("mv $groupa $output/"); 526 | system("mv $groupb $output/"); 527 | system("mv $dbname $output/"); 528 | system("mv $bedname $output/"); 529 | system("mv " . $gtf . ".mapping.txt $output/"); 530 | system("mv " . $name . "_r" . $supporting_read_criteria . "_ir" . $intron_criteria . ".txt.tar.gz $output/"); 531 | system("mv " . $name . "_r" . $supporting_read_criteria . "_ir" . $intron_criteria . ".sorted.txt $output/"); 532 | system("mv " . $name . ".Log.txt $output/"); 533 | } 534 | 535 | sub rundb{ 536 | my $noveljunctioncriteria = shift; 537 | my $gtf = shift; 538 | my $chrs = shift; 539 | my $type = shift; 540 | my @chromosomes = split(/\t/,$chrs); 541 | my @dbcommend; 542 | foreach my $chromosome(@chromosomes){ 543 | next if($chromosome=~/^chrGL/); 544 | next if($chromosome=~/^chrKI/); 545 | next if($chromosome=~/^GL/); 546 | next if($chromosome=~/^KI/); 547 | 548 | #my $commend = "perl " . $path . "/PSIsigma-db-v.1.0.pl $gtf " . $chromosome . " " . $noveljunctioncriteria . " " . $type . " " . $irmode; 549 | push(@dbcommend, "perl " . $path . "/PSIsigma-db-v.1.0.pl $gtf " . $chromosome . " " . $noveljunctioncriteria . " " . $type . " " . $irmode . " " . $groupa . " " . $groupb); 550 | #print "Doing... $commend\n"; 551 | #print "Doing... $chromosome\n"; 552 | #system("$commend"); 553 | } 554 | 555 | my $commend = ""; 556 | my $commend_count = 0; 557 | foreach my $line(@dbcommend){ 558 | $commend .= "$line &\n"; 559 | $commend_count++; 560 | $commend .= "wait\n" if($commend_count % $threads == 0); 561 | } 562 | $commend .= "wait\n"; 563 | system($commend); 564 | 565 | system("cat $name.*.db.tmp > $dbname"); 566 | system("cat $name.*.bed.tmp > $bedname"); 567 | system("rm $name.*.db.tmp"); 568 | system("rm $name.*.bed.tmp"); 569 | } 570 | 571 | sub param{ 572 | my $a = $_[0]; 573 | my @array = @$a; 574 | 575 | if($array[0] eq "--help" || $array[0] eq "-h"){ 576 | return "Help"; 577 | } 578 | 579 | my %parameters; 580 | $parameters{"gtf"} = "-"; 581 | $parameters{"name"} = "-"; 582 | $parameters{"type"} = "-"; 583 | $parameters{"nread"} = "-"; 584 | $parameters{"fmode"} = "-"; 585 | $parameters{"skipratio"} = "-"; 586 | $parameters{"irmode"} = "-"; 587 | $parameters{"adjp"} = "-"; 588 | $parameters{"denominator"} = "-"; 589 | $parameters{"irrange"} = "-"; 590 | $parameters{"trimp"} = "-"; 591 | $parameters{"variance"} = "-"; 592 | $parameters{"threads"} = "-"; 593 | $parameters{"groupa"} = "-"; 594 | $parameters{"groupb"} = "-"; 595 | $parameters{"output"} = "-"; 596 | $parameters{"irclean"} = "-"; 597 | 598 | my $oldformat = 1; 599 | for(my $i = 0;$i < scalar @array;$i++){ 600 | if($array[$i]=~/^\-/){ 601 | my $pam = $array[$i]; 602 | $pam=~s/^\-\-//; 603 | $pam=~s/^\-//; 604 | if(!$parameters{$pam}){ 605 | return "Can't recognize the parameter name: $pam"; 606 | }else{ 607 | if(!$array[($i+1)] && $array[($i+1)] != 0){ 608 | return "Parameter $pam has no input value"; 609 | }else{ 610 | $oldformat = 0; 611 | $parameters{$pam} = $array[($i+1)]; 612 | } 613 | } 614 | } 615 | } 616 | if($oldformat == 1){ 617 | if($array[0]!~/\.gtf/){ 618 | return "Not recognized parameter: $gtf"; 619 | } 620 | return $array[0] . "\t" . $array[1] . "\t" . $array[2] . "\t" . $array[3] . "\t" . "0"; 621 | } 622 | foreach my $key(keys %parameters){ 623 | next if($key eq "fmode" || $key eq "skipratio" || $key eq "irmode" || $key eq "adjp" || $key eq "denominator" || $key eq "irrange" || $key eq "trimp" || $key eq "variance" || $key eq "groupa" || $key eq "groupb" || $key eq "output" || $key eq "threads" || $key eq "irclean"); 624 | if($parameters{$key} eq "-"){ 625 | print "Parameter $key has no input value"; 626 | return "Parameter $key has no input value"; 627 | } 628 | } 629 | if($parameters{"gtf"}!~/\.gtf/){ 630 | return "--gtf parameter didn't find a files with .gtf extension."; 631 | } 632 | if($parameters{"type"} != 1 && $parameters{"type"} != 2){ 633 | return "--type parameter should use 1 for short-read or 2 for long-read."; 634 | } 635 | 636 | return $parameters{"gtf"} . "\t" . $parameters{"name"} . "\t" . $parameters{"type"} . "\t" . $parameters{"nread"} . "\t" . $parameters{"fmode"} . "\t" . $parameters{"skipratio"} . "\t" . $parameters{"irmode"} . "\t" . $parameters{"adjp"} . "\t" . $parameters{"trimp"} . "\t" . $parameters{"denominator"} . "\t" . $parameters{"irrange"} . "\t" . $parameters{"variance"} . "\t" . $parameters{"threads"} . "\t" . $parameters{"groupa"} . "\t" . $parameters{"groupb"} . "\t" . $parameters{"output"} . "\t" . $parameters{"irclean"}; 637 | } 638 | 639 | sub generateSJ{ 640 | my $bam = shift; 641 | my $accession = shift; 642 | print "Generating .SJ.out.tab will need to re-sort $bam file by read names.\n"; 643 | print "It will consume a lot of time, do you want to proceed? (Y/N)"; 644 | my $input = ; 645 | my $input = "Y"; 646 | chomp $input; 647 | my $newbam = "$accession.SortedbyName.bam"; 648 | if($input eq "N" || $input ne "Y"){ 649 | print "Bye.\n"; 650 | exit; 651 | }else{ 652 | print "\nYes, proceed.\n"; 653 | my $accession = "-"; 654 | my $dupcount = 0; 655 | open(INPUT, '-|',"samtools view " . $bam . " | head -n 100") or die $!; 656 | while (my $input = ) { 657 | chomp $input; 658 | my @array = split(/\t/,$input); 659 | my ($name,$chr,$flag,$ss,$cigar) = ($array[0],$array[2],$array[1],$array[3],$array[5]); 660 | $accession = $name if($accession eq "-"); 661 | $dupcount++ if($accession eq $name); 662 | } 663 | close(INPUT); 664 | if($dupcount > 90){ 665 | print "[ERROR]: The .bam contains too many duplicated read names (over 90% in the first 100 lines).\n"; 666 | exit; 667 | } 668 | print "Starting to sort $bam by read names...\n"; 669 | #print "chromosome format = " . $chrformat . "\n"; 670 | system("samtools sort -n $bam -o $newbam"); 671 | } 672 | 673 | my $sjfn = $accession . ".SJ.out.tab"; 674 | my $commend = "samtools view $newbam | awk -f " . $path . "/sjFromSAMcollapseUandM_inclOverlaps.awk > " . $sjfn; 675 | system($commend); 676 | #system("rm $newbam"); 677 | } -------------------------------------------------------------------------------- /PSIsigma-db-v.1.0.pl: -------------------------------------------------------------------------------- 1 | =begin 2 | PSI-Sigma: A splicing-detection method for short-read and long-read RNA-seq data 3 | © Kuan-Ting Lin, 2018-2024 4 | PSI-Sigma is free for non-commercial purposes by individuals at an academic or non-profit institution. 5 | For commercial purposes, please contact tech transfer office of CSHL via narayan@cshl.edu 6 | =end 7 | =cut 8 | #!/usr/bin/perl -w 9 | 10 | eval unpack u=>q{_=7-E('-T2`H)&=T9BPD<6-H&]N2`D;6%X_(#T@,#L*("`@(&UY("1A8V-E2`E97AO;F%N;F\[_"B`@("!O<&5N*$9)3$4L(B1G=&8B*2!\?"!D:64@(D%B;W)T:6YG+BX@0V%N)W0@;W!E;B`D9W1F(#H@)"%<_;B(["B`@("!W:&EL92AM>2`D;&EN93T\1DE,13XI>PH@("`@("`@(&-H;VUP("1L:6YE.PH@("`@("`@(&YE_>'0@:68H)&QI;F4]?B]>7",O*3L*("`@("`@("!M>2!`87)R87D@/2!S<&QI="@O7'0O+"1L:6YE*3L*("`@_("`@("!N97AT(&EF*"1A5LR72!N92`B9V5N92(@)B8@)&%R&]N(BD["B`@("`@_("`@;7D@*"1C:'(L)&-A="PD5LS72PD87)R87E;-%TL)&%R5LX72D["B`@("`@("`@(R1C:'(@/2`B_8VAR(B`N("1C:'(@:68H)&-H'0@:68H)&-HPH)"0D))&YA;64]?G,O7%\O7"XO9SL*"0D)?0H@("`@("`@(`DD86YN;WLD8VAR?7LB)'-T87)T7'0D_96YD(GTK*SL*("`@("`@("`))&%N;F][(B1C:'(B?7LB)'-T87)T7'0D96YD(GT@/2`D;F%M93L*("`@("`@_("!]"B`@("`@("`@:68H)&%R&]N(BE["@D)"6UY("1%3E-4(#T@)&YA;64["B`)"0DD_14Y35#U^PH)"0D))$5.4U0]?G,O7%\O7"XO9SL*_"0D)?0H)"0D*("`@("`@("`):68H)&YA;64]?B]T7!E(%PB+R\["B`@("`@("`@_"0DD;F%M93U^2`D;&%B96P@/2`M,3L*("`@("`@("`))&QA8F5L(#T@,2!I9B@D8V%T(&5Q(")N;VYS96YS95]M_961I871E9%]D96-A>2(I.PH@("`@("`@(`DC)&5X;VYA;FYO>R1C:')]>R(D&]N86YN;WLB)&-HR(DPH@_("`@("`@(`D))&5X;VYA;FYO>R(D8VAR(GU[(B1S=&%R=%QT)&5N9")]>R1%3E-4?2`]("1L86)E;#L*("`@_("`@("`)?65L2!`9FEL97,["B`);W!E;BA&24Q%+"(D9W)O=7!A(BD@?'P@9&EE(")!8F]R=&EN_9RXN($-A;B=T(&]P96X@)&=R;W5P82`Z("0A7&XB.PH@("`@=VAI;&4H;7D@)&QI;F4]/$9)3$4^*7L*("`@_("`@("!C:&]M<"`D;&EN93L*("`@("`@("!N97AT(&EF*"1L:6YE(&5Q("(B*3L*("`@("`@("!M>2`D86-C_97-S:6]N(#T@)&QI;F4["B`@("`@("`@)&%C8V5S4-O;W)D7"YO=71<+F)A;2\O.PH)"21A8V-E2`D8V]U;G1S:B`](#`["B`@("!F;W)E86-H(&UY("1J9FXH0&9I;&5S*7L*("`@_(`EN97AT(&EF*"UZ("1J9FXI.PH@("`@"2-P2`D8V]U;G0@/2`P_.PH)"6]P96XH1DE,12P@(B1J9FXB*2!\?"!D:64@(D%B;W)T:6YG+BX@0V%N)W0@;W!E;B`D:F9N7&XB.PH@_("`@("`@('=H:6QE*&UY("1L:6YE/3Q&24Q%/BE["B`@("`@("`@"6-H;VUP("1L:6YE.PH@("`@("`@(`EM_>2!`87)R87D@/2!S<&QI="@O7'0O+"1L:6YE*3L*("`@("`@("`@("`@;7D@*"1C:'(L)'-T87)T+"1E;F0L_)&YU;2D@/2`H)&%R5LQ72PD87)R87E;,ETL)&%R2`]/2`W*7L*("`@("`@("`@("`@("`@(",D:F9N(&ES(&$@8W5S=&]M:7IE9"!32B!F_:6QE7&XB.PH@("`@("`@("`@("`@("`@)&YU;2`]("1A5LS72!I9B@D;&]N9W)E860@/3T@,2D["B`@_("`@("`@("`@("`@("`D;G5M(#T@)&%RR1C:')]>R(DR1C:')]>R1S=&%R='TK*SL*("`@("`@("`@("`@)%-*2`E=&UP.PH@("`@;7D@)6)E9#L*("`@(&UY("5D8CL*("`@(&UY("5U;FEQ=64["B`@("!M_>2`D=&]T86Q?97AO;B`](#`["B`@("!O<&5N*$9)3$4L(B1G=&8B*2!\?"!D:64@(D%B;W)T:6YG+BX@0V%N_)W0@;W!E;B`D9W1F(#H@)"%<;B(["B`@("!W:&EL92AM>2`D;&EN93T\1DE,13XI>PH@("`@("`@(&-H;VUP_("1L:6YE.PH@("`@("`@(&UY("1N=6T@/2!S8V%L87(@:V5YPH@_("`@("`@("-]96QS97L*("`@("`@("`C"6YE>'0@:68H)&QI;F4A?B]>)'%C:'(O*3L*("`@("`@("`C?0H@_("`@("`@(&YE>'0@:68H)&QI;F4A?B]<=&5X;VY<="\@)B8@)&QI;F4A?B]<='1R86YS8W)I<'1<="\I.PH@_("`@("`@(&UY($!A2`]('-P;&ET*"]<="\L)&QI;F4I.PH@("`@("`@(&UY("@D8VAR+"1C870L)'-S_+"1E92PD5LS72PD87)R87E;-%TL_)&%R5LX72D["@D)(R1C:'(@/2`B8VAR(B`N("1C:'(@:68H)&-H'0@:68H)&-H&]N(B`F)B`D9V-H2`D=&ED(#T@)&%N;F\["@D)"21T:60]?G,O*"XJ*71R86YS8W)I<'1<7VED(%PB+R\[_"B`@("`)"21T:60]?G,O7")<.R`H+BHI+R\["B`@("`)"21T:60]?G,O7")<.R\O.PH)"0EI9B@D=&ED/7XO_7%\O*7L*"0D)"21T:60]?G,O7%\O7"XO9SL*"0D)?0H@("`@("`@"0EP=7-H*$![)'1M<'LD=&ED?7TL(B1C_:')<="1SR(D8VAR7'0D&]N*RL["B`@("`@_("`)"7T*"0E]"@D)"@D)(W-TPH@("`@("`@(`EI9B@D"D["@D)_"0D);7D@)&ED(#T@)&=C:'(@+B`B7R(@+B`D7,@)6)E9"E["@D)<')I;G0@3U54("1B961[)&)E9&]U='!U='T@+B`B7&XB.PH)?0H)_8VQOPH);7D@*"1I9"PD=&]T86Q?97AO_;BPE:6YP=70I(#T@0%\["@EM>2`H)&EC:'(L)&ES=&%R="PD:65N9"PD:7-TPH)"6UY("1S:&]W8V]U;G0@/2`P.PH)"69O7,@)6EN<'5T*7L*"0D)<')I;G0@(B1T:61<;B(["@D)"21S:&]W8V]U;G0K*SL*_"0D);&%S="!I9B@D&]N+G1X_="(["@DC:68H)'1O=&%L7V5X;VX@/B`U,#`I>PH)(PEO<&5N*$]55"PB/CXD9FXB*2!\?"!D:64@(D%B;W)T_:6YG+BX@0V%N)W0@;W!E;B`D9FX@.B`D(5QN(CL*"2,)<')I;G0@3U54("(D:61<="1T;W1A;%]E>&]N7&XB_.PH)(PEC;&]S92A/550I.PH)(WT*"@EM>2`E:6YT2`E:VYO=VYS2!`87)R87D@/2!`>R1I;G!U='LD=&ED?7T["@D);7D@)&YU;2`]('-C_86QA2`H)&-HR(DR@D2`D7,@)6MN;W=NPH)"6YE>'0@:68H7,@)7LD:VYO=VYSR1K;F]W;F5S>R1S:71E?7T@/3T@,2D["@D)<'5S:"A`:VYO=VYE2`D97,H0&MN;W=N97,I>PH)"0EM>2`D8FEN_9"`](#`["@D)"69O&]N,2AK97ES("5[)&MN;W=NPH)"0D)9F]R_96%C:"!M>2`D8FEN9&5X;VXR*&ME>7,@)7LD:VYO=VYE&]N,2!E_<2`D8FEN9&5X;VXR*7L*"0D)"0D))&)I;F0K*SL*"0D)"0E]"@D)"0E]"@D)"0EL87-T(&EF*"1B:6YD(#X@_,2D["@D)"7T*"0D))&EN=')O;GLD2`E;F]V96QI;G1R;VX["@EM>2`E4TIS:71ER`D4TI[)&EC:')]('TI>PH)"6UY("@D:7,L)&EE*2`]('-P_;&ET*"]<="\L)'-J:6YTR1S:FEN=')O;GTI>PH)"0DD;F]V96QI;G1R;VY[)'-J:6YTR1S:FEN=')O;GTK*SL*"0E]96QS97L*"0D);F5X=#L*"0E]"@E]"@H);7D@)6-O;&QE8W1I;VX[_"@EM>2`E<75E=64["@EM>2`E;&%S=#L*"69O7,@)6EN=')O;BE[_"@D);7D@*"1I2`H)'1A2`D=&ED*'-OPH)"0EN97AT(&EF*"1T_:60@97$@(B(I.PH)"0EM>2!`87)R87D@/2!`>R1I;G!U='LD=&ED?7T["@D)"6UY("1N=6T@/2!S8V%L87(@_0&%R2`H)&QA&]N(#T@*"1L87-T&]N86YN;R`]("1E>&]N86YN;WLB_)&QAR1T:61].PH)"0D))'=I;F=S(#T@*"1IR(D;&%S=&-HPH)"0D)"6YE>'0["@D)"0E]"@D)"0EI9B@D:7)M;V1E(#T](#$I>PH)"0D)"21C_;VQL96-T:6]N>R(D;&%S=&-HR(D;&%S=&-HPH)"0D);7D@*"1C:'(R+"1S5LD:5TI.PH)"0D);7D@*"1C:'(S+"1S5LD:2LQ_72D["@D)"0D*"0D)"6QAPH)"0D)"21T87)G971E>&]N(#T@*"1SR1T87)G971E>&]N?7LD=&ED?3L*"0D)"0DD=&%R9V5T_97AO;B`]("@D&]N?2E["@D)"0D)"21Q=65U97LB)&QA&]N?2LK.PH)"0D)"7UE;'-E>PH)"0D)"0EN97AT.PH)"0D)"7T*"0D)"0EI9B@D:7)M;V1E_(#T](#$I>PH)"0D)"0DD8V]L;&5C=&EO;GLB)&EC:')<="1I&]N("X@(EQT(B`N("1E>&]N86YN;SL*"0D)"0E]96QS97L*"0D)_"0D):68H(21N;W9E;&EN=')O;GLD:6YT&]N("X@(EQT(B`N("1E_>&]N86YN;SL*"0D)"0D)?0H)"0D)"7T*"0D)"0EL87-T.PH)"0D)?0H)"0D)"B`@("`@("`@("`@("`@("`C_1F]R(&YO=F5L($E2(&5V96YT2`H)&YE=VES+"1N97=I92D@/2`H*"1E93(K,2DL*"1S&]N86YN;R`](#(["B`@("`@("`@("`@("`@("`);7D@)&MN;W=N25(@/2`P.PH@("`@("`@_("`@("`@("`@"69O7!E+"1C:60I(#T@2D["B`@("`@_("`@("`@("`@("`@("`@"6UY("@D>#%S9BPD>#%E9BPD>#)S9BPD>#)E9BPD>'1E7TI.PH@("`@("`@("`@("`@("`@("`@(`EN_97AT(&EF*"1C='EP92!N92`B4B(I.PH@("`@("`@("`@("`@("`@("`@(`EI9B@D>'1EPH)"0D)_"0D))'%U975E>R(D:6-HPH)"0D)"0D);F5X=#L*"0D)"0D)?0H@("`@("`@("`@("`@("`@_("`@(`DD8V]L;&5C=&EO;GLB)&EC:')<="1N97=I&]N("X@(EQT(B`N("1E>&]N86YN;SL*("`@("`@("`@("`@_("`@(`E]"@D)"0E]"B`@("`@("`@("`@("`@("!L87-T(&EF*"1IPH)"0D)?65LPH)"0D)_"0EM>2`D=&UP:6YT&]N(#T@_*"1I92LQ+21I2`D5%-3(#T@,#L*"0D)"0D):68H(2132G-I=&5[)&-HR@DPH)"0D)"0D);F5X=#L*"0D)"0D)?0H)"0D)"0DD=&ED(#T@(D5X+B(@+B`D=&ED.PH)"0D)"0DD8V]L;&5C_=&EO;GLB)&EC:')<="1T;7!I;G1R;VY<=%-<="1T:60B?2`N/2`B+"(@+B`D=VEN9W,@+B`B7'0B("X@)'1A_R1C:'(R?7LH)&5E_,RLQ*7TI>PH)"0D)"0D))%134RLK.PH)"0D)"0E]"@D)"0D)"6EF*"1I(#T](#`@?'P@)&D@/3T@*"1N=6TM_,BDI>PH)"0D)"0D))'1I9"`](")44U,N(B`N("1T:60@:68H)&YU;2`^(#(@)B8@)%134R`]/2`Q*3L*"0D)_"0D)?0H)"0D)"0EI9B@A)'%U975E>R(D:6-HPH)"0D)"0D))'%U975E>R(D:6-HR(D:6-H&]N86YN_;R`](#(["@D)"0D)"21W:6YG2`D5%-3(#T@_,#L*"0D)"0D):68H(2132G-I=&5[)&-HR@DPH)"0D)"0D);F5X=#L*"0D)"0D)?0H)"0D)_"0DD=&ED(#T@(D5X+B(@+B`D=&ED.PH)"0D)"0DD8V]L;&5C=&EO;GLB)&EC:')<="1T;7!I;G1R;VY<=%-<_="1T:60B?2`N/2`B+"(@+B`D=VEN9W,@+B`B7'0B("X@)'1A&]N"@D)"0D):68H*"1E93(K,2D@/B`D:7,@)B8@)&EE(#T]("@DPH)"0D)"0EM>2`D=&UP:6YT&]N(#T@*"1I2`D5%-3(#T@,#L*"0D)"0D):68H_(2132G-I=&5[)&-HR@DPH)"0D)"0D);F5X=#L*"0D)"0D)?0H)"0D)"0DD=&ED(#T@(D5X_+B(@+B`D=&ED.PH)"0D)"0DD8V]L;&5C=&EO;GLB)&EC:')<="1T;7!I;G1R;VY<=%-<="1T:60B?2`N/2`B_+"(@+B`D=VEN9W,@+B`B7'0B("X@)'1AR1C:'(S?7LD=&%R9V5T97AO;GU[)'1I9'T["@D)"0D))'1A2`D5%-3(#T@,#L*"0D)_"0EI9B@A)%-*R1C:'(R?7LH)&5E,RLQ*7TI>PH)_"0D)"0DD5%-3*RL["@D)"0D)?0H)"0D)"6EF*"1I(#T](#`@?'P@)&D@/3T@*"1N=6TM,BDI>PH)"0D)"0DD_=&ED(#T@(E134RXB("X@)'1I9"!I9B@D;G5M(#X@,B`F)B`D5%-3(#T](#$I.PH)"0D)"7T*"0D)"0EI9B@A_)'%U975E>R(D:6-HPH)_"0D)"0DD<75E=65[(B1I8VAR7'0D:6YT&]N_?2LK.PH)"0D)"7UE;'-E>PH)"0D)"0EN97AT.PH)"0D)"7T*"0D)"0DD8V]L;&5C=&EO;GLB)&EC:')<="1I_;G1R;VY<=%-<="1T:60B?2`N/2`B+"(@+B`D=VEN9W,@+B`B7'0B("X@)'1APH)"0D)"2,D=&%R9V5T97AO;B`]("@D:7,I("X@(EQT(B`N("@D_964R*3L*"0D)"0DD97AO;F%N;F\@/2`D97AO;F%N;F][)&-HR1T87)G971E>&]N?7LD=&ED?3L*"0D)_"0DD=&%R9V5T97AO;B`]("@D:7,M)&ES*2`N(")<="(@+B`H)&5E,BTD:7,I.PH)"0D)"21W:6YGR1C:'(R?7LH)'-S,BTQ*7T@?'P@_(2132G-I=&5[)&-HR@D964S*S$I?2E["@D)"0D)"2144U,K*SL*"0D)"0E]"@D)"0D):68H)&D@/3T@_,"!\?"`D:2`]/2`H)&YU;2TR*2E["@D)"0D)"21T:60@/2`B5%-3+B(@+B`D=&ED(&EF*"1N=6T@/B`R("8F_("144U,@/3T@,2D["@D)"0D)?0H)"0D)"6EF*"$D<75E=65[(B1I8VAR7'0D:6YT&]N?2E["@D)"0D)"21Q=65U97LB)&EC:')<="1I;G1R;VY<=%-<="(@_+B`D=VEN9W,@+B`B7'0B("X@)'1A'0["@D)"0D)_?0H)"0D)"21C;VQL96-T:6]N>R(D:6-H'0@:68H)&D@/3T@,"D["@D)"0EM_>2`H)&-HPH)"0D)"0EN97AT(&EF*"@D964Q*S$I("$]("1I&]N(#T@*"1SR1C:'(R?7LD=&%R9V5T97AO;GU[_)'1I9'T["@D)"0D)"21T87)G971E>&]N(#T@*"1SPH)"0D)"0D)<')I;G0@_(B1T:60@6R1C:'(R+"1S&]N('1Y<&4N7&XB.PH)"0D)"0D)97AI=#L*_"0D)"0D)?0H)"0D)"0EI9B@A)'%U975E>R(D:6-HPH)"0D)"0D))'%U975E>R(D:6-HR(D:6-H&]N_(#T@*"1SR(D8VAR,B)]_>R1T87)G971E>&]N?7LD=&ED?3L*"0D)"0D))'1A&]N86YN;RE[_"@D)"0D)"0EP&ET.PH)"0D)"0E]"@D)"0D)"6EF*"$D<75E=65[(B1I8VAR7'0D:6YT&]N?2E["@D)"0D)"0DD<75E=65[(B1I8VAR7'0D:6YT&]N?2LK.PH)"0D)"0E]96QS97L*"0D)"0D)"6YE>'0[_"@D)"0D)"7T*"0D)"0D))&-O;&QE8W1I;VY[(B1I8VAR7'0D:6YT&]N("X@(EQT(B`N("1E>&]N86YN;SL*"0D)"0E]"@D)"0E]"@H)_"0E]"@D)?0H)?0D*"0H)"2-4=V\@4R!E=F5N=',@8V%N(&-R96%T92!A(&YO=F5L(&5X;VX@:68@=&AE>2!H_879E('1H92!S86UE(&-O;G-T:71U=&EV92!E>&]N2`D;F]V96P@/2`P.PH)"6UY("1CR1E=F5N=#%].PH)"0DD:G5N8W1I;VYS/7YS+UPL+R\[_"@D)"6UY($!J=6YC=&EO;G,@/2!S<&QI="@O7"PO+"1J=6YC=&EO;G,I.PH)"0EN97AT(&EF*'-C86QA2`D;G5M(#T@2`H)&IS,C$L)&IE,C$L)&IS,C(L)&IE,C(I(#T@R@D:F4Q,BLQ*2`N(")<="(@_+B`H)&IS,C(M,2E]*RL["@D)"75N9&5F("1C;VQL96-T:6]N>R1E=F5N=#%].PH)"7T*"0D)"@EF;W)E86-H_(&UY("1T:60H:V5Y&]N*7L*"0DC<')I;G0@(F-H96-K:6YG+BXN(&YO=F5L(&5X;VX@9F]R_("1T:61<;B(["@D);7D@*"1T87)G971E>&]N+"1E>&]N86YN;RD["@D)9F]R96%C:"!M>2`D;&]C*&ME>7,@_)7L@)&YO=F5L97AO;GLD=&ED?2!]*7L*"0D);7D@*"1T97,L)'1E92D@/2!S<&QI="@O7'0O+"1L;V,I.PH)_"0EI9B@A)&EN<'5T>R1T:61]*7L*"0D)"21T:60]?G,O7E134UPN+R\["@D)"7T*"0D);7D@0&%R2`D;G5M(#T@3L*"0D)9F]R*&UY("1I(#T@,#LD_:2`\("@D;G5M+3$I.R1I*RLI>PH)"0D);7D@*"1C:'(R+"1S5LD:5TI.PH)"0D);F5X="!I9B@D2`H)&-H2`D:6YT&]N(#T@*"1T97,M*"1E93(K,2DI("X@(EQT(B`N("@D=&5E+2@D964R*S$I_*3L*"0D)"0DD97AO;F%N;F\@/2`R.PH)"0D)"21C;VQL96-T:6]N>R(D:6-H&]N("X@(EQT(B`N("1E>&]N86YN;SL*"0D)"0EL87-T.PH)"0D)?0H)"0E]"@D)?0H)?0H]96YD"CUC_=70*"0H)(U)E;6]V92!D=7!L:6-A=&5S"@DC1&]N)W0@;F5E9"!T:&ES(&%N>6UO2`E96UP='D["@EF;W)E86-H(&UY("1A,2AS;W)T(&ME>7,@)6-O;&QE8W1I;VXI>PH)"6YE_>'0@:68H(21C;VQL96-T:6]N>R1A,7TI.PH)"6EF*"$D96UP='E[)&$Q?2E["@D)?65L'0[_"@D)?0H)"6UY("@D86-H2!`_83$@/2!S<&QI="@O7'0O+"1C;VQL96-T:6]N>R1A,7TI.PH)"6UY("1L86)E;&$Q(#T@)&$Q6S!=("X@(EQT_(B`N("1A,5LQ72`N(")<="(@+B`D83%;,ET@+B`B7'0B("X@)&$Q6S-=.PH)"69O'0@:68H)&$Q(&5Q("1A,BD["@D)"6YE>'0@:68H(21C;VQL_96-T:6]N>R1A,GTI.PH)"0EM>2`H)&)C:'(L)&)S+"1B92PD8F-A="PD8G1I9"D@/2!S<&QI="@O7'0O+"1A_,BD["@D)"6YE>'0@:68H)&%C870@;F4@)&)C870I.PH)"0EM>2!`83(@/2!S<&QI="@O7'0O+"1C;VQL96-T_:6]N>R1A,GTI.PH)"0EM>2`D;&%B96QA,B`]("1A,ELP72`N(")<="(@+B`D83);,5T@+B`B7'0B("X@)&$R_6S)=("X@(EQT(B`N("1A,ELS73L*"0D):68H)&-O;&QE8W1I;VY[)&$Q?2!E<2`D8V]L;&5C=&EO;GLD83)]_("8F("1A8V%T(&YE(")3(BE["@D)"0DD96UP='E[)&$R?2`](#$["@D)"0EN97AT.PH)"0E]"@D)"6EF*"1L_86)E;&$Q(&5Q("1L86)E;&$R("8F("1A8V%T(&5Q(")3(BE["@D)"0DD96UP='E[)&$R?2`](#$["@D)"0EN_97AT.PH)"0E]"@D)"0H)"7T*"7T*"0H)9F]R96%C:"!M>2`D86-C97-S:6]N*'-OR1A8V-E'0["@D)?0H)?0H]96YD"CUC=70*"B`@("!M>2`D8F5D;W5T<'5T.PH@("`@;7D@_)&1B;W5T<'5T.PH@("`@9F]R96%C:"!M>2`D8RAS;W)T(&ME>7,@)6-O;&QE8W1I;VXI>PH@("`@("`@(&YE_>'0@:68H(21C;VQL96-T:6]N>R1C?2D["B`@("`@("`@;7D@0&=R;W5P2`D:2`](#$[)&D@/"`D;&%S=#LD:2LK*7L*("`@("`@("`);7D@_0'1M<"`]('-P;&ET*"]<="\L)&=R;W5P2`D;F5W=F%L=64["B`@("`@("`@_"6UY("1N=6UT;7`@/2!S8V%L87(@0'1M<#L*"0D)9F]R*&UY("1J(#T@,#LD:B`\("1N=6UT;7`[)&HK*RE[_"@D)"0DC(R-C:&%N9V4@:&5R92!F;W(@=&]M871O"@D)"0DC:68H)&H@/"`H)&YU;71M<"TQ*2E["@D)"0EI_9B@D:B`\("@D;G5M=&UP*2E["@D)"0D):68H(21N97=V86QU92E["@D)"0D)"21N97=V86QU92`]("@D=&UP_6R1J72LD:7-S*3L*"0D)"0E]96QS97L*"0D)"0D))&YE=W9A;'5E("X](")<="(@+B`H)'1M<%LD:ETK)&ES_2`D86-C97-S:6]N(#T@_)&,["B`@("`@("`@)&%C8V5SR1I8VAR?2!]*7L*("`@("`@("`);7D@_*"1S=&%R="PD96YD*2`]('-P;&ET*"]<="\L)&QO8RD["B`@("`@("`@"6YE>'0@:68H)&5N9"`\("1IR1L;V-]_.PH@("`@("`@('T*("`@("`@("`D;F%M97,]?G,O7"U<+"`O+SL*("`@("`@("`*("`@("`@("`*"0DC(R-#_:&5C:R!7(&-O;FYE8W1I=FET>0H@("`@("`@(&EF*"1I8V%T(&5Q(")7(BE["B`@("`@("`@"6UY("1D:7-C_;VYN96-T(#T@,#L*("`@("`@("`)9F]R*&UY("1K(#T@,3LD:R`\("@D;&%S="TQ*3LD:RLK*7L*("`@("`@_("`)"6YE>'0@:68H(21G2`H)&EN=')O;C%S9BPD:6YT2`D:2`](#$[("1I(#P@_PH@("`@("`@(`EM>2`H)&EN=')O;C%S9BPD:6YT&]N86YN;WLD:6-HR1T:61].PH@("`@_("`@(`DD96%N;F\@/2`B+2(@:68H)&5A;FYO(&5Q("(M,2(I.PH@("`@("`@(`DD96%N;F\@/2`B3DU$(B!I_9B@D96%N;F\@97$@(C$B*3L*("`@("`@("`))&5A;FYO(#T@(DYO=F5L(B!I9B@D96%N;F\@97$@(C(B*3L*_("`@("`@("`):68H)&5X;VYA;FYO>R1I8VAR?7LB)'1EPH@("`@_("`@(`D)(W!R:6YT("(D86-C97-S:6]N(%LD:6-H7!E7&XB.PH@("`@("`@(`D))&5A;FYO(#T@(BTB.PH@("`@("`@(`E]"B`@("`@("`@"21D8F]U='!U_="`N/2`B)&EC:')<="1I;G1R;VXQPH)"0EM>2!`=&UP(#T@PH)"0D))&$@/2`H)'1M<%LS72TD=&UP6S)=*S$I("X@(BPP(CL*"0D)"21B(#T@_(C`L(B`N("@D:7-S+21T;7!;,ETI.PH)"0E]96QS97L*"0D)"21A(#T@(C`L(B`N("@D=&UP6S-=+21T;7!;_,ETK,2D["@D)"0DD8B`]("(P+"(@+B`H)'1M<%LR72TD:7-S*3L*"0D)?0H)"7T*"0E]"@D)"@D):68H)&EC_870@97$@(E2!`=&UP(#T@R1I9'T@/2`D8F5D;W5T<'5T.PH)"21D8GLD:61](#T@)&1B;W5T<'5T.PH)?0H*"6UY("1S=&]P=&EM97@@_/2!T:6UE.PH@("`@;7D@)'-E8V]N9',@/2!S<')I;G1F*"(E+C1F(BPH)'-T;W!T:6UE>"TD"DI.PH@("`@"@ER971Uq{_=7-E('-TPH@("`@"2-P&ET_.PH@("`@?0H))&]U='!U=&%S2`E9W)O=7!A.PH@"6]P96XH1DE,12PB)&=R;W5P_82(I('Q\(&1I92`B06)O4-O;W)D7"YO=71<+F)A;2\O.PH)"21A8V-E2`D;&EN93T\1DE,13XI>PH@("`@("`@(&-H;VUP("1L_:6YE.PH@("`@("`@(&YE>'0@:68H)&QI;F4@97$@(B(I.PH@("`@("`@(&UY("1A8V-E7,@)6=R_;W5P82P@7,@)6=R;W5P8BD["B`@("!P2`D;&EN93T\1DE,13XI>PH@_("`@"6-H;VUP("1L:6YE.PH@("`@"6YE>'0@:68H)&QI;F4@97$@(B(I.PH@("`@"6UY("@D8VAR+"1I,7,L_)&DQ92PD:3)S+"1I,F4L)'1ER1A:61]*RL["B`@("!]"B`@("!C;&]S92A&24Q%*3L*/65N9`H]8W5T"@H)_;7D@)&-H96-K9F1R(#T@)&%D:G`["@EM>2`D9F-C7,@)6=R;W5P82`\(#(@?'P@7,@)6=R;W5P8B`\(#(I.PH@_("`@(W!R:6YT(")086ER960@/2`D<&%I2`E=&5V96YT.PH@("`@;7D@)6=R_;W5P86YN;SL*"6EF*"UE(")T979E;G0N='AT(BE["B`)"6]P96XH1DE,12PB=&5V96YT+G1X="(I('Q\(&1I_92`B06)O'0@.B`D(5QN(CL*("`@(`EW:&EL92AM>2`D;&EN_93T\1DE,13XI>PH@("`@"2`@("!C:&]M<"`D;&EN93L*("`@(`D@("`@;F5X="!I9B@D;&EN92!E<2`B(BD[_"B`@("`)("`@("1T979E;G1[)&QI;F5]*RL["B`@("`)?0H@("`@"6-L;W-E*$9)3$4I.PH@("`@"6]P96XH_1DE,12PB9W)O=7!A;FYO+G1X="(I('Q\(&1I92`B06)O'0@_.B`D(5QN(CL*("`@(`EW:&EL92AM>2`D;&EN93T\1DE,13XI>PH@("`@"2`@("!C:&]M<"`D;&EN93L*("`@_(`D@("`@;F5X="!I9B@D;&EN92!E<2`B(BD["B`@("`)("`@(&UY("@D240L)&=R;W5P*2`]('-P;&ET*"]<_="\L)&QI;F4I.PH@("`@"2`@("`D9W)O=7!A;FYO>R1)1'T@/2`D9W)O=7`["B`@("`)?0H@("`@"6-L;W-E_*$9)3$4I.PH@("`@"21PPH@_("`@"6UY("1A8V-ER1S:61]*7L*"0D))&%C8V5S2`E2`D=&%G(#T@)&%C8V5S2`D8V%T.PH@("`@"21C870@_/2`B3B(@:68H)&%C8V5SR(D8V%T7'0D86-C97-S:6]N(GT@/2`D=&%G.PH*("`@(`EI9B@D:F9N/7XO1U1%6"@N_*BE<+E-*7"YO=71<+G1A8B\I>PH@("`@"0DD:F9N/7YS+UQ?3EPN4TI<+F]U=%PN=&%B+UPN4TI<+F]U=%PN_=&%B+SL*("`@(`E]"B`@("`)"B`@("`);7D@)&ER9FX@/2`D:F9N.PH@("`@"2-M>2`D:7)C:&5C:R`](#$[_"B`@("`):68H)&ER8VAE8VL@/3T@,2E["@D)"7!R:6YT(")#:&5C:VEN9R!)4B!R96%DR(D8VAR7'0D2!`87)R87D@/2!S<&QI_="@O7'0O+"1L:6YE*3L*("`@("`@("`@("`@("`@(&UY("@D8VAR+"1S=&%R="PD96YD+"1N=6TI(#T@*"1A_5LP72PD87)R87E;,5TL)&%R5LV72D["B`@("`@("`@("`@("`@("`C:68H)&IF_;CU^+U-*+FEN8VQ/=F5R;&%PF5D(%-*(&9I;&5<;B(["B`@("`@("`@_("`@("`@("`))&YU;2`]("1A5LS72!I9B@D;&]N9W)E860@/3T@,2D["B`@("`@("`@("`@("`@("`)_)&YU;2`]("1A5LT72!I9B@D;&]N9W)E860@/3T@,BD["B`@("`@("`@("`@("`@("!]96QS97L*("`@_("`@("`@("`@("`@(`EPR(D8VAR7'0DR1E;F1]*RL@:68H)&YU;2`^/2`H)&EN=')O86QL8W)I=&5R:6$I*3L*("`@_("`@("`@("`@("`@("1I96%L;'LD8VAR?7LD96YD?7LD'0@:68H)&QI;F4@97$@(B(I.PH@("`@("`@("`@("`@("`@;7D@*"1C:'(L_)&DQ2`H)&%I9"PD96ED*2`]("@D,2PD,BD@:68H)&YA;64]_?B\H+BHI7%\H7&0K*20O*3L*("`@("`@("`@("`@("`@(&UY("1P87-S(#T@,3L*("`@("`@("`@("`@("`@_(&YE>'0@:68H)&YA;64]?B]<7U)<7R\@)B8@)&ER8VAE8VL@/3T@,"D["B`@("`@("`@("`@("`@("!M>2`D_='EP92`](")X(CL*("`@("`@("`@("`@("`@("1T>7!E(#T@(E,B(&EF*"1N86UE/7XO7%]37%\O*3L*("`@_("`@("`@("`@("`@("1T>7!E(#T@(E7!E(#T@(E(B(&EF*"1N86UE/7XO7%]27%\O*3L*"B`@("`@("`@("`@("`@("`C)&-H&]N(#T@(BTB.PH@("`@("`@("`@("`@("`@;7D@)&0@/2`M_,3L*("`@("`@("`@("`@("`@(&EF*"1T>7!E(&5Q(")7(BE["B`@("`@("`@("`@("`@("`))'1A&]N(#T@)'1AR1C:')]>R(D87-<="1A92)]*3L*("`@("`@("`@_("`@("`@(`E]"B`@("`@("`@("`@("`@("!]96QS97L*("`@("`@("`@("`@("`@(`EM>2`H)'1A7!E(&5Q(")2(BE["B`@("`@("`@("`@("`@("`))'1APH@_("`@("`@("`@("`@("`@"21I;C(@/2`D:6YTR(D8VAR7'0D:3)S7'0D:3)E(GT["B`@("`@("`@("`@_("`@("!]"B`@("`@("`@("`@("`@("!I9B@A)&EN=')O;GLB)&-HPH@("`@("`@("`@("`@("`@"21I_;C$@/2`D:6YTR(D8VAR7'0D:3%S7'0D:3%E(GT["B`@("`@("`@("`@("`@("!]"B`@("`@("`@("`@_("`@("!I9B@D:3%S(#T]("1I,64I>PH@("`@("`@("`@("`@("`@"21I;C$@/2`D:6XR.PH@("`@("`@("`@_("`@("`@?0H)"0D):68H)&DRPH@("`@("`@("`@("`@("`@"6EF*"$D_R`DPH@("`@("`@("`@("`@("`@"0D);F5X_="!I9B@D964@/B`D864I.PH@("`@("`@("`@("`@("`@"0D))'-U;7-S*ST@)'-U;7-S>R(D8VAR7'0D87,B_?7LD965].PH@("`@("`@("`@("`@("`@"0E]"@D)"0D)"69OR`D_PH@("`@("`@("`@("`@("`@"0D);F5X="!I9B@DR(D8VAR7'0D864B?7LD#$@/2`H)'-U;7-S*R1S=6UE92DO,CL*("`@("`@("`@_("`@("`@(`D):68H)&EN=')O;GLB)&-H2`D964HR`DPH@("`@("`@("`@("`@("`@"0DC;F5X="!I9B@DR(D8VAR7'0D864B?7LDPH@("`@("`@("`@("`@("`@"0EI_9B@D:3%S(#T]("1I,64I>PH@("`@("`@("`@("`@("`@"0D))&5X,2`]("1S=6USPH@("`@("`@("`@("`@("`@"0D)"21TR1N86UE?2LK.PH@_("`@("`@("`@("`@("`@"0D)?0H@("`@("`@("`@("`@("`@"0E]"B`@("`@("`@("`@("`@("`)"6EF*"1I_,G,@/3T@)&DR92E["B`@("`@("`@("`@("`@("`)"0DD97@Q(#T@)'-U;65E.PH@("`@("`@("`@("`@("`@_"0D):68H)'-U;7-S(#X@,"E["B`@("`@("`@("`@("`@("`)"0D))'1R=65!4U-[)&YA;65]*RL["B`@("`@_("`@("`@("`@("`)"0E]"B`@("`@("`@("`@("`@("`)"7T*("`@("`@("`@("`@("`@(`D):68H)&EN=')O_;GLB)&-H7!E(&5Q(")2(BE["B`@("`@("`@("`@("`@("`)"6EF*"1I2`H)&UA>'-S+"1M87AE92D@/2`H,"PP*3L*("`@("`@("`@("`@("`@(`D)"69O2`D:7)R_96=I;VYSPH@("`@("`@("`@("`@("`@"0D)"6UY("1L;V-A;'-S(#T@,#L*("`@("`@("`@("`@("`@(`D)_"0DD8V]U;G0K*SL*"0D)"0D)"0EF;W)E86-H(&UY("1E92AS;W)T(&ME>7,@)7L@)'-U;7-S>R(D8VAR7'0D_:7)R96=I;VYSPH)"0D)"0D)"0DC;F5X="!I9B@D964@/B`D864@)B8@)'1Y<&4@97$@(E(B*3L*_"0D)"0D)"0D))&QO8V%LR(D8VAR7'0D:7)R96=I;VYSR1E97T["@D)"0D)"0D)_?0H)"0D)"0D)"21M87ASPH@("`@("`@("`@("`@_("`@"0D)"6UY("1L;V-A;'-S(#T@,#L*("`@("`@("`@("`@("`@(`D)"0DD8V]U;G0K*SL*"0D)"0D)"0EF_;W)E86-H(&UY("1S7,@)7L@)'-U;65E>R(D8VAR7'0D:7)R96=I;VYSPH)"0D)_"0D)"0DC;F5X="!I9B@D964@/B`D864@)B8@)'1Y<&4@97$@(E(B*3L*"0D)"0D)"0D))&QO8V%LR(D8VAR7'0D:7)R96=I;VYSR1S2`D;&]C86QE92`](#`[_"B`@("`@("`@("`@("`@("`)"0D))&-O=6YT*RL["@D)"0D)"0D)9F]R96%C:"!M>2`D'0@:68H)&5E(#X@)&%E_("8F("1T>7!E(&5Q(")2(BD["@D)"0D)"0D)"21L;V-A;&5E("L]("1S=6UE97LB)&-HPH@("`@("`@("`@("`@("`@"0D)"6UY("1L;V-A;&5E(#T@,#L*("`@("`@("`@("`@("`@(`D)"0DD_8V]U;G0K*SL*"0D)"0D)"0EF;W)E86-H(&UY("1E92AS;W)T(&ME>7,@)7L@)'-U;7-S>R(D8VAR7'0D:7)R_96=I;VYE92)]('TI>PH)"0D)"0D)"0DC;F5X="!I9B@D964@/B`D864@)B8@)'1Y<&4@97$@(E(B*3L*"0D)_"0D)"0D))&QO8V%L964@*ST@)'-U;7-S>R(D8VAR7'0D:7)R96=I;VYE92)]>R1E97T["@D)"0D)"0D)?0H)_"0D)"0D)"21M87AE92`]("1L;V-A;&5E(&EF*"1M87AE92`\("1L;V-A;&5E*3L*("`@("`@("`@("`@("`@_(`D)"7T*("`@("`@("`@("`@("`@(`D)"21M87AS&5E(#T@)'-U;65E(&EF*"1M87AE92`\("1S=6UE92D["B`@("`@("`@_("`@("`@("`)"0DC)&5X,2`]("@D;6%X&5E*7L*("`@("`@("`@("`@("`@(`D)"0DD97@Q(#T@)&UA>'-S.PH@("`@("`@("`@_("`@("`@"0D)?65L'0@:68H)'-U;7-S(#X@*"1S=6UE92HR*2!\?"`DPH@("`@("`@("`@("`@("`@"6EF*"$D:6YTR(D8VAR_7'0D:3)S7'0D:3)E(GTI>PH@("`@("`@("`@("`@("`@"0DD:6XQ(#T@(FYA(CL*("`@("`@("`@("`@("`@_(`D))&EN,B`]("1I;C$["B`@("`@("`@("`@("`@("`)"6YE>'0["B`@("`@("`@("`@("`@("`)?65LR(D8VAR7'0D:3)S7'0D:3)E(GT@+2`Q_.PH@("`@("`@("`@("`@("`@"0DD:6XR(#T@)&EN,3L*("`@("`@("`@("`@("`@(`E]"B`@("`@("`@("`@_("`@("!]"@H@("`@("`@("`@("`@("`@)'1A7!E(&5Q(")2(BE["B`@("`@("`@("`@(`D))&5X,2`]_("1I;C$K)&5X,3L*("`@("`@("`@("`@"0DC25(@97%U86QI='D*("`@("`@("`@("`@"0EI9B@D:7)C;&5A_;B`A/2`P*7L*("`@("`@("`@("`@("`@(`D):68H)'-U;7-S(#T](#`@?'P@)'-U;65E(#T](#`I>PH@("`@_("`@("`@("`@("`@"0D))&EN,2`](#`["B`@("`@("`@("`@("`@("`)"0DD:6XR(#T@,#L*("`@("`@("`@_("`@("`@(`D)"21E>#$@/2`D8W)I=&5R:6$["B`@("`@("`@("`@("`@("`)"7T*("`@("`@("`@("`@("`@_(`D):68H)'-U;7-S("$](#`@)B8@)'-U;65E("$](#`I>PH@("`@("`@("`@("`@("`@"0D):68H*&%BPH@("`@("`@("`@("`)"21D96YO;6EN871O#$["B`@("`@("`@("`@(`E]"B`@("`@("`@("`@(`D*"0D)_"6YE>'0@:68H)&5X,2`\("1C&]N7'0D:3%S+"1I,64L)&DR&]N7'0D:3%S+"1I,64L)&DRPH@("`@("`@("`@("`@("`@"21N86UE&]N7'0D:3%S+"1I,64L_)&DR7,@)6YA;65S*3L*"7!R:6YT(").=6UB97(@;V8@_979E;G1S(#T@)&YC7&XB.PH)<')I;G0@(DYU;6)E2`D_'0@:68H)'-A;7!L92!E<2`B(BD["@D)"6UY("@D_8V%T+"1A8V-E'0@:68H)&ME>2!E<2`B(BD["@D)"6UY("@D:60L)'1A2`D_2`E9FEN86P["@H)_:68H)'!R:6YT9V-T(#T](#$I>PH)"6]P96XH1T-4+"`B/B1O=71P=71A7,@)71E=F5N=#L*"0EPR1S86UP_;&5]("X@(BDB("X@)'-A;7!L92!I9BAS8V%L87(@:V5Y2`D979E;G0H'0@:68H)&YA;65S>R1E=F5N='T@/B`P*3L*"0EM>2`H)&YA;64L)'1AR1N86UE?2E["@D)?65L'0@:68H)'-K:7![)&YA;65]_(#T]("1N2`D86-C*$!GPH)"0EM>2`H)&-A_="PD2`D7,@)7-A;7!L97,I_>PH)"0DC;7D@*"1C870L)'-A;7!L92D@/2!S<&QI="@O7'0O+"1S86UP;&5A8V,I.PH)"0EN97AT(&EF*"1S_86UP;&4@97$@(B(I.PH)"0EI9B@D8V%T(&5Q(").(BE["@D)"0EI9B@A)&]U='!U='LB)'-A;7!L92(@+B`B_7TXB("X@(EQT)&YA;64B?2E["@D)"0D))&=C=&Y[)'-A;7!L97T@/2`B;F$B.PH)"0D)"21N("X]("(L(&YA_(CL*"0D)"7UE;'-E>PH)"0D)"6UY("1V86QU92`]("1O=71P=71[)'-A;7!L92`N(")?3B(@+B`B7'0D;F%M_92)]*C$P,#L*"0D)"0DD9V-T;GLDR).7'0DPH)_"0D)"0DD;B`N/2`B+"`B("X@)'9A;'5E.PH)"0D)"7T*"0D)"0DD;GLD=F%L=65]*RL["@D)"0E]"@D)"7T*_"0D):68H)&-A="!E<2`B5"(I>PH)"0D):68H(21O=71P=71[(B1S86UP;&4B("X@(E]4(B`N(")<="1N86UE_(GTI>PH)"0D)"21G8W1T>R1S86UP;&5](#T@(FYA(CL*"0D)"0DD="`N/2`B+"!N82(["@D)"0E]96QS97L*_"0D)"0EM>2`D=F%L=64@/2`D;W5T<'5T>R1S86UP;&4@+B`B7U0B("X@(EQT)&YA;64B?2HQ,#`["@D)"0D)_)&=C='1[)'-A;7!L97T@/2`D=F%L=64["@D)"0D))'9A;'5E(#T@,"!I9B@D=F%L=64@/3T@+3$P,"D["@D)_"0D)<'5S:"A`="PD=F%L=64I.PH)"0D)"6EF*"1S:&]W:60@/3T@,2E["@D)"0D)"21T("X]("(L*"(@+B`D_PH)"7UE;'-E>PH)"0EI9B@D_<')I;G1G8W0@/3T@,2E["@D)"0EP2`DPH)"0D)"6EF*"$D9V-T;GLDR1S86UP;&5](#T]("TQ,#`I>PH)"0D)"0D)<')I;G0@1T-4(")<="(@+B`B,"([_"@D)"0D)"7UE;'-E>PH)"0D)"0D)<')I;G0@1T-4(")<="(@+B`D9V-T;GLDPH)"0D)"7UE;'-E>PH)"0D)"0EI_9B@D9V-T='LDPH)"0EN97AT_(&EF*"1N=6U?;B`]/2`Q('Q\("1N=6U?="`]/2`Q*3L*"0E]"@D*"0EN97AT(&EF*"$D;B!\?"`A)'0I.PH)_"0H)"6EF*"1P86ER960@/#T@,2E["@D)"6YE>'0@:68HR1T87)G971E>&]N("X@(EQT(B`N("1W:6YG&]N+"1W:6YG&]N+"1W:6YG&]N+"1W:6YG7!E_7'12969ER1T86=]*3L*"0EM>2`H)'1A2!`87)R87D@/2!S<&QI="@O7'PO_+"1F:6YA;'LD=&%G?2D["@D);7D@)&UI;F1I&1I_9F8@/2`P.PH)"6UY("1M:6YP(#T@,3L*"0EM>2`D;6%X:60@/2`P.PH)"6UY("1G;B`]("(M(CL*"0EM>2`D_;7AE(#T@,#L*"0EI9B@D8V%T(&5Q(")2(BE["@D)"21M87AI9"`](#$["@D)?0H)"6EF*'-C86QA3LD:2LK*7L*"0D);7D@*"1N86UE_+"1W:6YGPH)"0D);F5X="!I9B@A)'1R=65!4U-[)&YA;65]*3L*"0D)"6UY("1T2!`=VEN9VQO8R`]('-P;&ET*"]<+"\L)'=I;F=S*3L*"0D))&=N(#T@)&=X;CL*"0D);7D@_*"1C:'(L)'-T87)T+"1E;F0L)'1Y<&4L)')E9BPD96ED*2`]('-P;&ET*"]<7R\L)')E'0@:68H_(21I;G1R;VYA;&Q[)&-H7!E(&5Q(")2(BD["@D)"6UY("1I8V]U_;G0@/2`P.PH*"0D);7D@*"1I7,@)7L@)&ES86QL>R1C:')]>R@DR1C:')]>R1S_=&%R='U[)&ES86QL96YD?2E["@D)"0D)"6YE>'0["@D)"0D)"7!R:6YT("(H:7-A;&PI("1C:'(Z)'-T87)T_+21E;F0@:&%S('IE&ET.PH)"0D)"7T*"0D)"0EN97AT(&EF*"1IR1C:')]>R1S=&%R='U[)&ES86QL96YD?3L*_"0D)"7T*"0D)"21IR1C:')]>R1W:6YG;&]C6S-=?7LD=VEN_9VQO8ULR77TI>PH)"0E]96QS97L*"0D)"69OR1C:')]>R1E;F1]>R1I'0["@D)"0D)"7!R:6YT("(H:65A;&PI("1C:'(Z)'-T87)T+21E;F0@:&%S('IE&ET.PH)"0D)"7T*"0D)"0EN97AT(&EF*"1I'0@:68H)&ES=6USR1C:')]>R(DPH@("`@("`@("`@("`))&EC_;W5N="`K/2`D:6YTR1C:')]>R(D&1I9F8@/2`D:6-O_=6YT.PH)"0D))&UA>&ED(#T@)&D["@D)"0DD;6EN9&ES="`]("@D96YD("T@)'-T87)T*2`K(#$["@D)"7T*_"@D)?0H)"7T*"0D*"0EI9BAS8V%L87(@0&%RPH)"0DD;6%X:60@/2`Q.PH)"7T*("`@("`@_("`@"@D);F5X="!I9B@D;6%X:60@/3T@,"`F)B`D8V%T(&YE(")2(BD["@D);7D@*"1N86UE+"1W:6YG&ED72D[_"@D);7D@*"1G>&XL)')E7!E+"1R968L)&5I9"D@/2!S<&QI="@O7%\O+"1R97-T*3L*"0DC)&-H&]N+RE["@D)"6UY("1T;7!E>&]N(#T@)'1A&]N/7YS+UPM+UPZ+SL*"0D);7D@*"1E8VAR+"1E2`H)&ES=&%R="PD:65N9"D@/2!S<&QI="@O7'0O+"1I;&]C*3L*_"0D)"6EF*"1IPH)"0D)"21F;W5N9"`](#$["@D)"0D);&%S_=#L*"0D)"7T*"0D)"6EF*"1IPH)"0D)"21F;W5N9"`](#$[_"@D)"0D);&%S=#L*"0D)"7T*"0D)?0H)"0EN97AT(&EF*"1F;W5N9"`]/2`Q*3L*"0E]"@H)"6UY("@D;W)G_86XL)$5.4U0I(#T@*"(B+"(B*3L*"0EI9B@D;F%M93U^+UQ?14XH7'6YT:&5T:6,B+"1T;7!N86UE*3L*("`@("`@("!]"B`@("`@("`@:68H)$5.4U0@_97$@(B(I>PH@("`@("`@(`EM>2!`2!`9F1R.PH):68H)'!A:7)E9"`]/2`R*7L*"0E`9F1R(#T@0&9I;F%L<'9A;'5E.PH)?65LPH)"0DD9F1R(#T@0D@H7$!F:6YA;'!V86QU92D["@D)"4!F9'(@/2!`)&9D_2`D:2`](#`[)&D@/"!S8V%L87(@0&9I;F%L;W5T<'5T.R1I*RLI>PH)"6UY("@D979E;G0L)&=X;BPD14Y3_5"PD8V%T+"1T87)G971E>&]N+"1C;BPD8W0L)&%N;F\L)&1I9F8L)'!V86QU92PD;G9A;'5E+"1T=F%L=64I_(#T@2`D9FEN86QO=71P=70@/2`B)&5V96YT7'0D_9WAN7'0D=&%R9V5T97AO;EQT)&-A=%QT)&-N7'0D8W1<="1A;FYO7'0D14Y35"(["@D)<')I;G1F($]55"`D_9FEN86QO=71P=70@+B`B7'0E+C)F7'0E+C5E7'0E+C5E7'0D;G9A;'5E7'0D='9A;'5E7&XB+"`D9&EF9BPD_<'9A;'5E+"1F9');)&E=.PH)?0H)8VQOPH*"0EM>2`H)&XL_)'0I(#T@0%\["@D);7D@*"1A=F=N+"1A=F=T*2`]("AA=F5R86=E>"@D;BDL879E2`H)'!?,G1A:6PL)&1I9F8I(#T@*")N86XB+#`I.PH)"7)E='5R;B`D<%\R=&%I;"PD9&EF9B!I9B@D879G_;B`]/2`D879G="D["@D);7D@0&X@/2!`)&X["@D);7D@0'0@/2!`)'0["@D)"@D):68H)'9APH)"0EM>2`E;CL*"0D)9F]R96%C:"!M>2`D=F%L=64H0&XI>PH)"0D))&Y[)'9A;'5E?2LK.PH)"0E]_"@D)"6UY("5T.PH)"0EF;W)E86-H(&UY("1V86QU92A`="E["@D)"0DD='LD=F%L=65]*RL["@D)"7T*"0D)_;7D@*"1N=6U?;BPD;G5M7W0I(#T@*'-C86QA2`H)'1S=&%TPH)"0DH)'1S=&%T2@D9&%T82D@/2!`7SL*("`@("`@("!I9BA`)&1A=&$@/3T@,2E["B`@_("`@("`@("`@("`@("!R971U'L*("`@("`@("!M>2@D9&%T82D@/2!`7SL*("`@("`@("!I9B`H;F]T($`D9&%T_82D@>PH@("`@("`@("`@("`@("`@9&EE*")%;7!T>2!A5QN(BD["B`@("`@("`@?0H@("`@("`@(&UY_("1T;W1A;"`](#`["B`@("`@("`@9F]R96%C:"`H0"1D871A*2!["B`@("`@("`@("`@("`@("`D=&]T86P@_*ST@)%\["B`@("`@("`@?0H@("`@("`@(&UY("1A=F5R86=E(#T@)'1O=&%L("\@0"1D871A.PH@("`@("`@3(')E='5R;B`D879E