├── bin ├── README ├── Arima │ ├── README.md │ ├── sum_anchor_reads.py │ ├── bam_to_temp_HiC.pl │ ├── remove.blacklist.py │ ├── integrated.r │ ├── get_dist.py │ ├── add.vis.to.cis.2M.pl │ ├── get_corr_factor_by_len.py │ ├── split_chromo.py │ ├── pick.dist.pl │ ├── summary_sorted_trans_frag_loop.pl │ ├── remove_dup_PE_SAM_sorted.pl │ ├── fragdata_to_anchordata.pl │ ├── correct.trans.reads.by.corr.pl │ ├── summary_sorted_frag_loop.pl │ ├── list_full_matrix.pl │ ├── Arima.sh │ ├── get_trans.avg_by_len.pl │ ├── pairing_two_SAM_reads.pl │ ├── reads_2_trans_frag_loop.pl │ ├── get_loop_lambda.pl │ └── merge_sorted_anchor_loop.pl ├── generateReference_lib │ ├── README.md │ ├── remove.blacklist.py │ ├── get_group_range.pl │ ├── get_aveg_frag_length.py │ ├── sites_to_frag.py │ ├── sequence_match.pl │ ├── find_RE_sites.pl │ ├── list_full_matrix.pl │ ├── count_trans_pairs_by_GC.pl │ ├── count_cis_pairs_by_GC.pl │ └── count_cis_2M_pair.pl ├── microC │ ├── README.md │ ├── calculate_vis.py │ ├── remove.blacklist.py │ ├── integrated.r │ ├── get_dist.py │ ├── add.vis.to.cis.2M.pl │ ├── split_chromo.py │ ├── fragdata_to_anchordata.pl │ ├── get_loop_lambda.pl │ ├── list_full_matrix.pl │ ├── get_group_statistics.pl │ └── merge_sorted_anchor_loop.pl ├── HindIII │ ├── sum_frag_reads_2.py │ ├── remove_outlier.py │ ├── get_anchor_pval.r │ ├── get_corr_factor_by_GC.pl │ ├── test_frag_corr.py │ ├── fragdata_to_anchordata.pl │ ├── batch_anchor_by_chrom.pl │ ├── get_loop_lambda_GC_correct.pl │ └── get_cis_avg_by_GC.pl ├── eHiC │ ├── remove_outlier.py │ ├── get_anchor_pval.r │ ├── sum_frag_reads_2.py │ ├── remove_outlier_ELPU.py │ ├── split_list_by_group.pl │ ├── ends_count_to_frag_count.py │ ├── remove_ends_without_HD.py │ ├── get_group_range.pl │ ├── get_corr_factor_by_GC.pl │ ├── fragdata_to_anchordata.pl │ ├── merge_and_resort_end_loop.py │ ├── test_frag_corr_eHiC.py │ ├── batch_anchor_by_chrom.pl │ ├── get_loop_lambda_GC_correct.pl │ ├── get_trans_avg_by_GC.pl │ └── model_fit.r ├── select_anchor.sh ├── select_loop.py ├── DPNII │ ├── sum_anchor_reads_DPNII.py │ ├── bam_to_temp_HiC_DPNII.pl │ ├── integrated.r │ ├── remove.blacklist_DPNII.py │ ├── get_dist_DPNII.py │ ├── add.vis.to.cis.2M_DPNII.pl │ ├── get_corr_factor_by_len_DPNII.py │ ├── split_chromo.py │ ├── pick.dist.pl │ ├── summary_sorted_trans_frag_loop_DPNII.pl │ ├── remove_dup_PE_SAM_sorted_DPNII.pl │ ├── fragdata_to_anchordata_DNPII.pl │ ├── correct.trans.reads.by.corr_DPNII.pl │ ├── summary_sorted_frag_loop_DPNII.pl │ ├── list_full_matrix_DPNII.pl │ ├── get_trans.avg_by_len_DPNII.pl │ ├── DPNII.sh │ ├── pairing_two_SAM_reads_DPNII.pl │ ├── reads_2_trans_frag_loop_DPNII.pl │ └── get_loop_lambda_DPNII.pl ├── bam_to_temp_HiC.pl ├── preprocess │ ├── bam_to_temp_HiC.pl │ ├── bam_to_temp.pl │ ├── reformat_fastq.py │ ├── summary_sorted_trans_frag_loop.pl │ ├── remove_dup_PE_SAM_sorted.pl │ ├── resort_by_frag_id.pl │ ├── summary_sorted_frag_loop.pl │ ├── bam_to_frag_loop.sh │ ├── pairing_two_SAM_reads.pl │ ├── reads_2_trans_frag_loop.pl │ └── generate_data_matrix.pl ├── bam_to_temp.pl ├── eHiC-QC │ ├── bam_to_temp.pl │ ├── reform_end_id.py │ ├── end_id_to_original.py │ ├── reformat_fastq.py │ ├── summary_sorted_trans_frag_loop.pl │ ├── remove_dup_PE_ELPU.pl │ ├── resort_by_frag_id.pl │ ├── summary_sorted_frag_loop.pl │ └── eHiC-QC.sh ├── reformat_fastq.py ├── plot.heatmap.r ├── plot.multiple.r ├── summary_sorted_trans_frag_loop.pl ├── draw_heatmap.sh ├── resort_by_frag_id.pl ├── summary_sorted_frag_loop.pl ├── template.r ├── bam_to_frag_loop.sh ├── reads_2_trans_frag_loop.pl └── generate_data_matrix.pl ├── png ├── README.md ├── hg19.full.matrix.PNG ├── hg19.DPNII.blacklist.PNG ├── hg19.DPNII.frag.bed.PNG ├── hg19.dist.401.group.PNG ├── hg19.full.dist.len.stat.PNG ├── hg19.trans.possible.pairs.PNG ├── hg19_DPNII_frag_2_anchor.PNG ├── hg19_anchor_length.groups.PNG ├── hg19_5kb_anchors_blacklist.PNG ├── hg19_DPNII_anchors_avg.bed.PNG ├── hg19.HindIII.chr11_130000000_130800000.expt.matrix.png ├── hg19.HindIII.chr11_130000000_130800000.raw.matrix.png └── hg19.HindIII.chr11_130000000_130800000.ratio.matrix.png ├── HiCorr_heatmap.sh ├── documents ├── lib │ ├── README.md │ ├── remove.blacklist.py │ ├── get_group_range.pl │ ├── get_aveg_frag_length.py │ ├── find_RE_sites.pl │ ├── sites_to_frag.py │ ├── sequence_match.pl │ ├── list_full_matrix.pl │ ├── count_trans_pairs_by_GC.pl │ ├── count_cis_2M_pair.pl │ └── count_cis_pairs_by_GC.pl ├── HiCorr_micro-C.md ├── HiCorr_insituHi-C.md ├── HiCorr_Arima.md ├── Arima.preprocessing.allValidPairs.sh ├── Generate.reference.5kb_bin.md ├── HiCorr_heatmap.old.md └── old.md.left.md ├── HiCorr ├── HiCorr_micro-C.sh ├── HiCorr_Arima.sh ├── HiCorr_DPNII.sh └── README.md /bin/README: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /png/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /HiCorr_heatmap.sh: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /bin/Arima/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /documents/lib/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /bin/generateReference_lib/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /bin/microC/README.md: -------------------------------------------------------------------------------- 1 | Scripts for micro-C HiCorr 2 | -------------------------------------------------------------------------------- /png/hg19.full.matrix.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19.full.matrix.PNG -------------------------------------------------------------------------------- /png/hg19.DPNII.blacklist.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19.DPNII.blacklist.PNG -------------------------------------------------------------------------------- /png/hg19.DPNII.frag.bed.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19.DPNII.frag.bed.PNG -------------------------------------------------------------------------------- /png/hg19.dist.401.group.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19.dist.401.group.PNG -------------------------------------------------------------------------------- /png/hg19.full.dist.len.stat.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19.full.dist.len.stat.PNG -------------------------------------------------------------------------------- /png/hg19.trans.possible.pairs.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19.trans.possible.pairs.PNG -------------------------------------------------------------------------------- /png/hg19_DPNII_frag_2_anchor.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19_DPNII_frag_2_anchor.PNG -------------------------------------------------------------------------------- /png/hg19_anchor_length.groups.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19_anchor_length.groups.PNG -------------------------------------------------------------------------------- /png/hg19_5kb_anchors_blacklist.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19_5kb_anchors_blacklist.PNG -------------------------------------------------------------------------------- /png/hg19_DPNII_anchors_avg.bed.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19_DPNII_anchors_avg.bed.PNG -------------------------------------------------------------------------------- /png/hg19.HindIII.chr11_130000000_130800000.expt.matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19.HindIII.chr11_130000000_130800000.expt.matrix.png -------------------------------------------------------------------------------- /png/hg19.HindIII.chr11_130000000_130800000.raw.matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19.HindIII.chr11_130000000_130800000.raw.matrix.png -------------------------------------------------------------------------------- /png/hg19.HindIII.chr11_130000000_130800000.ratio.matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19.HindIII.chr11_130000000_130800000.ratio.matrix.png -------------------------------------------------------------------------------- /bin/HindIII/sum_frag_reads_2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | dic={} 6 | for line in sys.stdin: 7 | frag1,frag2,reads=line.rstrip().split('\t')[0:3] 8 | if frag1 not in dic: 9 | dic[frag1]=0 10 | dic[frag1] += float(reads) 11 | 12 | for frag in dic: 13 | print(frag,str(dic[frag]),sep='\t') 14 | -------------------------------------------------------------------------------- /bin/eHiC/remove_outlier.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | dic={} 6 | outlier=open(sys.argv[1]) 7 | for line in outlier.readlines(): 8 | frag=line.rstrip().split('\t')[3] 9 | dic[frag]="" 10 | outlier.close() 11 | 12 | for line in sys.stdin: 13 | frag1,frag2=line.split('\t')[0:2] 14 | if frag1 not in dic and frag2 not in dic: 15 | print line.rstrip() 16 | -------------------------------------------------------------------------------- /bin/HindIII/remove_outlier.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | dic={} 6 | outlier=open(sys.argv[1]) 7 | for line in outlier.readlines(): 8 | frag=line.rstrip().split('\t')[3] 9 | dic[frag]="" 10 | outlier.close() 11 | 12 | for line in sys.stdin: 13 | frag1,frag2=line.split('\t')[0:2] 14 | if frag1 not in dic and frag2 not in dic: 15 | print(line.rstrip()) 16 | -------------------------------------------------------------------------------- /bin/eHiC/get_anchor_pval.r: -------------------------------------------------------------------------------- 1 | # NEED TO UPDATE THIS SLOPE VALUE EVERY TIME 2 | slope <- 2 3 | 4 | data <- read.table("FILE", col.names=c("gid1", "gid2", "val", "rand")) 5 | attach(data) 6 | prob <- 1 - 1/slope 7 | r <- rand / (slope - 1) 8 | r[r==0] <- 1e5 9 | data$pval <- pnbinom(val - 1, size=r, mu=rand, lower.tail=FALSE) 10 | detach() 11 | write.table(data, file="FILE.p_val", row.names=FALSE, col.names=FALSE, sep="\t", quote=FALSE) 12 | 13 | -------------------------------------------------------------------------------- /bin/HindIII/get_anchor_pval.r: -------------------------------------------------------------------------------- 1 | # NEED TO UPDATE THIS SLOPE VALUE EVERY TIME 2 | slope <- 2 3 | 4 | data <- read.table("FILE", col.names=c("gid1", "gid2", "val", "rand")) 5 | attach(data) 6 | prob <- 1 - 1/slope 7 | r <- rand / (slope - 1) 8 | r[r==0] <- 1e5 9 | data$pval <- pnbinom(val - 1, size=r, mu=rand, lower.tail=FALSE) 10 | detach() 11 | write.table(data, file="FILE.p_val", row.names=FALSE, col.names=FALSE, sep="\t", quote=FALSE) 12 | 13 | -------------------------------------------------------------------------------- /bin/select_anchor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | chr=$1 4 | beg=$2 5 | end=$3 6 | ref=$4 7 | genome=$5 8 | enzyme=$6 9 | 10 | let range=$end-$beg 11 | if [ $range -gt 2000000 ];then 12 | echo "Please enter an region <=2,000,000bp" 13 | else 14 | awk -v chr=$chr -v beg=$beg -v end=$end '{OFS="\t";if($1==chr && $2>=beg && $3<=end)print $1,$2,$3,$4}' $ref/$enzyme/${genome}_${enzyme}_anchors_avg.bed > ${genome}.anchors_${chr}_${beg}_${end}.bed 15 | fi 16 | -------------------------------------------------------------------------------- /bin/microC/calculate_vis.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | dic={} 6 | sums=0 7 | count=0 8 | for line in sys.stdin: 9 | if not line: 10 | break 11 | frag1,frag2,reads=line.rstrip().split('\t')[0:3] 12 | if frag1 not in dic: 13 | dic[frag1]=0 14 | count+=1 15 | dic[frag1] += float(reads) 16 | sums+=float(reads) 17 | 18 | 19 | avg=float(sums)/count 20 | 21 | for frag in dic: 22 | # print frag+"\t"+str(dic[frag]) 23 | print(frag + '\t' + str(dic[frag]/avg)) 24 | -------------------------------------------------------------------------------- /bin/select_loop.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | dic={} 6 | anchor_bed=open(sys.argv[1]) 7 | for line in anchor_bed.readlines(): 8 | chr,start,end,id=line.rstrip().split('\t') 9 | dic[id]="" 10 | anchor_bed.close() 11 | 12 | loop=open(sys.argv[2]) 13 | while True: 14 | line=loop.readline() 15 | if not line: 16 | break 17 | anchor1,anchor2,obs,expt=line.rstrip().split('\t') 18 | if anchor1 in dic and anchor2 in dic: 19 | print(line.rstrip()) 20 | loop.close() 21 | -------------------------------------------------------------------------------- /bin/Arima/sum_anchor_reads.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | file=open(sys.argv[1]) 6 | line=file.readline() 7 | #prev_frag=line.split('\t')[0] 8 | #sum=int(line.split('\t')[2]) 9 | dic={} 10 | while True: 11 | line=file.readline() 12 | if not line: 13 | break 14 | frag1,frag2,reads=line.rstrip().split('\t')[0:3] 15 | if frag1 not in dic: 16 | dic[frag1]=0 17 | dic[frag1] += float(reads) 18 | file.close() 19 | 20 | for frag in dic: 21 | print(frag+"\t"+str(dic[frag])) 22 | -------------------------------------------------------------------------------- /bin/DPNII/sum_anchor_reads_DPNII.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | file=open(sys.argv[1]) 6 | line=file.readline() 7 | #prev_frag=line.split('\t')[0] 8 | #sum=int(line.split('\t')[2]) 9 | dic={} 10 | while True: 11 | line=file.readline() 12 | if not line: 13 | break 14 | frag1,frag2,reads=line.rstrip().split('\t')[0:3] 15 | if frag1 not in dic: 16 | dic[frag1]=0 17 | dic[frag1] += float(reads) 18 | file.close() 19 | 20 | for frag in dic: 21 | print(frag, str(dic[frag]),sep='\t') 22 | -------------------------------------------------------------------------------- /bin/bam_to_temp_HiC.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | 4 | my $bed = "-"; 5 | open(IN, $bed) || die("Error: Cannot open file $bed!\n"); 6 | while(my $line = ){ 7 | chomp $line; 8 | my $str1="+"; 9 | my $str2="+"; 10 | my @a = split "\t", $line; 11 | if($a[0]&16){ 12 | $str1="-"; 13 | } 14 | if($a[0]&32){ 15 | $str2="-"; 16 | } 17 | if($a[5] eq "="){ 18 | $a[5]=$a[1]; 19 | } 20 | print join("\t",$a[1],$a[2],$str1,$a[5],$a[6],$str2)."\n"; 21 | } 22 | close(IN); 23 | exit; 24 | 25 | -------------------------------------------------------------------------------- /bin/eHiC/sum_frag_reads_2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | #file=open(sys.argv[1]) 6 | #line=file.readline() 7 | #prev_frag=line.split('\t')[0] 8 | #sum=int(line.split('\t')[2]) 9 | dic={} 10 | for line in sys.stdin: 11 | #line=file.readline() 12 | #if not line: 13 | # break 14 | frag1,frag2,reads=line.rstrip().split('\t')[0:3] 15 | if frag1 not in dic: 16 | dic[frag1]=0 17 | dic[frag1] += float(reads) 18 | #file.close() 19 | 20 | for frag in dic: 21 | print frag+"\t"+str(dic[frag]) 22 | -------------------------------------------------------------------------------- /bin/Arima/bam_to_temp_HiC.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | 4 | my $bed = "-"; 5 | open(IN, $bed) || die("Error: Cannot open file $bed!\n"); 6 | while(my $line = ){ 7 | chomp $line; 8 | my $str1="+"; 9 | my $str2="+"; 10 | my @a = split "\t", $line; 11 | if($a[0]&16){ 12 | $str1="-"; 13 | } 14 | if($a[0]&32){ 15 | $str2="-"; 16 | } 17 | if($a[5] eq "="){ 18 | $a[5]=$a[1]; 19 | } 20 | print join("\t",$a[1],$a[2],$str1,$a[5],$a[6],$str2)."\n"; 21 | } 22 | close(IN); 23 | exit; 24 | 25 | -------------------------------------------------------------------------------- /bin/eHiC/remove_outlier_ELPU.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | dic={} 6 | outlier=open(sys.argv[1]) 7 | for line in outlier.readlines(): 8 | frag=line.rstrip().split('\t')[3] 9 | dic[frag]="" 10 | outlier.close() 11 | 12 | file=open(sys.argv[2]) 13 | while True: 14 | line=file.readline() 15 | if not line: 16 | break 17 | frag1,frag2=line.split('\t')[0:2] 18 | #frag1=frag1[:-1] 19 | #frag2=frag2[:-1] 20 | if frag1 not in dic and frag2 not in dic: 21 | print line.rstrip() 22 | file.close() 23 | -------------------------------------------------------------------------------- /bin/DPNII/bam_to_temp_HiC_DPNII.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | 4 | my $bed = "-"; 5 | open(IN, $bed) || die("Error: Cannot open file $bed!\n"); 6 | while(my $line = ){ 7 | chomp $line; 8 | my $str1="+"; 9 | my $str2="+"; 10 | my @a = split "\t", $line; 11 | if($a[0]&16){ 12 | $str1="-"; 13 | } 14 | if($a[0]&32){ 15 | $str2="-"; 16 | } 17 | if($a[5] eq "="){ 18 | $a[5]=$a[1]; 19 | } 20 | print join("\t",$a[1],$a[2],$str1,$a[5],$a[6],$str2)."\n"; 21 | } 22 | close(IN); 23 | exit; 24 | 25 | -------------------------------------------------------------------------------- /bin/preprocess/bam_to_temp_HiC.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | 4 | my $bed = "-"; 5 | open(IN, $bed) || die("Error: Cannot open file $bed!\n"); 6 | while(my $line = ){ 7 | chomp $line; 8 | my $str1="+"; 9 | my $str2="+"; 10 | my @a = split "\t", $line; 11 | if($a[0]&16){ 12 | $str1="-"; 13 | } 14 | if($a[0]&32){ 15 | $str2="-"; 16 | } 17 | if($a[5] eq "="){ 18 | $a[5]=$a[1]; 19 | } 20 | print join("\t",$a[1],$a[2],$str1,$a[5],$a[6],$str2)."\n"; 21 | } 22 | close(IN); 23 | exit; 24 | 25 | -------------------------------------------------------------------------------- /bin/bam_to_temp.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | 4 | my $bed = "-"; 5 | open(IN, $bed) || die("Error: Cannot open file $bed!\n"); 6 | while(my $line = ){ 7 | chomp $line; 8 | my $str1="+"; 9 | my $str2="+"; 10 | my @a = split "\t", $line; 11 | if($a[0]&16){ 12 | $str1="-"; 13 | } 14 | if($a[0]&32){ 15 | $str2="-"; 16 | } 17 | if($a[5] eq "="){ 18 | $a[5]=$a[1]; 19 | } 20 | print join("\t",$a[1],$a[2],$str1,$a[5],$a[6],$str2,(substr $a[10],5), (substr $a[11],5))."\n"; 21 | } 22 | close(IN); 23 | exit; 24 | 25 | -------------------------------------------------------------------------------- /bin/eHiC-QC/bam_to_temp.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | 4 | my $bed = "-"; 5 | open(IN, $bed) || die("Error: Cannot open file $bed!\n"); 6 | while(my $line = ){ 7 | chomp $line; 8 | my $str1="+"; 9 | my $str2="+"; 10 | my @a = split "\t", $line; 11 | if($a[0]&16){ 12 | $str1="-"; 13 | } 14 | if($a[0]&32){ 15 | $str2="-"; 16 | } 17 | if($a[5] eq "="){ 18 | $a[5]=$a[1]; 19 | } 20 | print join("\t",$a[1],$a[2],$str1,$a[5],$a[6],$str2,(substr $a[10],5), (substr $a[11],5))."\n"; 21 | } 22 | close(IN); 23 | exit; 24 | 25 | -------------------------------------------------------------------------------- /bin/preprocess/bam_to_temp.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | 4 | my $bed = "-"; 5 | open(IN, $bed) || die("Error: Cannot open file $bed!\n"); 6 | while(my $line = ){ 7 | chomp $line; 8 | my $str1="+"; 9 | my $str2="+"; 10 | my @a = split "\t", $line; 11 | if($a[0]&16){ 12 | $str1="-"; 13 | } 14 | if($a[0]&32){ 15 | $str2="-"; 16 | } 17 | if($a[5] eq "="){ 18 | $a[5]=$a[1]; 19 | } 20 | print join("\t",$a[1],$a[2],$str1,$a[5],$a[6],$str2,(substr $a[10],5), (substr $a[11],5))."\n"; 21 | } 22 | close(IN); 23 | exit; 24 | 25 | -------------------------------------------------------------------------------- /bin/Arima/remove.blacklist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | """ 4 | my usage= "remove.blacklist.py " 5 | """ 6 | 7 | import sys 8 | 9 | bed=set() 10 | 11 | with open(sys.argv[1],'r') as f: 12 | 13 | for line in f: 14 | 15 | chr, beg, end, id = line.rstrip().split('\t')[0:4] 16 | 17 | bed.add(id) 18 | f.close() 19 | 20 | 21 | 22 | 23 | for line in sys.stdin: 24 | 25 | a1, a2 = line.rstrip().split('\t')[0:2] 26 | 27 | if a1 not in bed and a2 not in bed and a1 != a2: 28 | print(line.rstrip()) 29 | 30 | 31 | f.close() 32 | -------------------------------------------------------------------------------- /bin/microC/remove.blacklist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | """ 4 | my usage= "remove.blacklist.py " 5 | """ 6 | 7 | import sys 8 | 9 | bed=set() 10 | 11 | with open(sys.argv[1],'r') as f: 12 | 13 | for line in f: 14 | 15 | chr, beg, end, id = line.rstrip().split('\t')[0:4] 16 | 17 | bed.add(id) 18 | f.close() 19 | 20 | 21 | 22 | 23 | for line in sys.stdin: 24 | 25 | a1, a2 = line.rstrip().split('\t')[0:2] 26 | 27 | if a1 not in bed and a2 not in bed and a1 != a2: 28 | 29 | print(line.rstrip()) 30 | 31 | 32 | f.close() 33 | -------------------------------------------------------------------------------- /documents/lib/remove.blacklist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | """ 4 | my usage= "remove.blacklist.py " 5 | """ 6 | 7 | import sys 8 | 9 | bed=set() 10 | 11 | with open(sys.argv[1],'r') as f: 12 | 13 | for line in f: 14 | 15 | chr, beg, end, id = line.rstrip().split('\t')[0:4] 16 | 17 | bed.add(id) 18 | f.close() 19 | 20 | 21 | 22 | 23 | for line in sys.stdin: 24 | 25 | a1, a2 = line.rstrip().split('\t')[0:2] 26 | 27 | if a1 not in bed and a2 not in bed and a1 != a2: 28 | 29 | print line.rstrip() 30 | 31 | 32 | f.close() 33 | -------------------------------------------------------------------------------- /bin/Arima/integrated.r: -------------------------------------------------------------------------------- 1 | #!/bin/env Rscript 2 | 3 | args = commandArgs(trailingOnly=TRUE) 4 | 5 | full=args[1] 6 | 7 | data=read.table(full,sep='\t',stringsAsFactors=F) 8 | df=read.table('dist.len.stat',sep='\t',stringsAsFactors=F) 9 | 10 | rownames(df)=paste(df[,1],df[,2],df[,3],sep=':') 11 | df$read=df[,4]*df[,5] 12 | rownames(data)=paste(data[,1],data[,2],data[,3],sep=':') 13 | 14 | data$read=0 15 | data[rownames(df),'read']=df$read 16 | data[,5]=data$read/data[,4] 17 | write.table(data[,1:5],'integrated.dist.len.stat',sep='\t',quote=F,row.names=F,col.names=F) 18 | 19 | 20 | -------------------------------------------------------------------------------- /bin/DPNII/integrated.r: -------------------------------------------------------------------------------- 1 | #!/bin/env Rscript 2 | 3 | args = commandArgs(trailingOnly=TRUE) 4 | 5 | full=args[1] 6 | 7 | data=read.table(full,sep='\t',stringsAsFactors=F) 8 | df=read.table('dist.len.stat',sep='\t',stringsAsFactors=F) 9 | 10 | rownames(df)=paste(df[,1],df[,2],df[,3],sep=':') 11 | df$read=df[,4]*df[,5] 12 | rownames(data)=paste(data[,1],data[,2],data[,3],sep=':') 13 | 14 | data$read=0 15 | data[rownames(df),'read']=df$read 16 | data[,5]=data$read/data[,4] 17 | write.table(data[,1:5],'integrated.dist.len.stat',sep='\t',quote=F,row.names=F,col.names=F) 18 | 19 | 20 | -------------------------------------------------------------------------------- /bin/DPNII/remove.blacklist_DPNII.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | """ 4 | my usage= "remove.blacklist.py " 5 | """ 6 | 7 | import sys 8 | 9 | bed=set() 10 | 11 | with open(sys.argv[1],'r') as f: 12 | 13 | for line in f: 14 | 15 | chr, beg, end, id = line.rstrip().split('\t')[0:4] 16 | 17 | bed.add(id) 18 | f.close() 19 | 20 | 21 | 22 | 23 | for line in sys.stdin: 24 | 25 | a1, a2 = line.rstrip().split('\t')[0:2] 26 | 27 | if a1 not in bed and a2 not in bed and a1 != a2: 28 | print(line.rstrip()) 29 | 30 | 31 | f.close() 32 | -------------------------------------------------------------------------------- /bin/generateReference_lib/remove.blacklist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | """ 4 | my usage= "remove.blacklist.py " 5 | """ 6 | 7 | import sys 8 | 9 | bed=set() 10 | 11 | with open(sys.argv[1],'r') as f: 12 | 13 | for line in f: 14 | 15 | chr, beg, end, id = line.rstrip().split('\t')[0:4] 16 | 17 | bed.add(id) 18 | f.close() 19 | 20 | 21 | 22 | 23 | for line in sys.stdin: 24 | 25 | a1, a2 = line.rstrip().split('\t')[0:2] 26 | 27 | if a1 not in bed and a2 not in bed and a1 != a2: 28 | 29 | print(line.rstrip()) 30 | 31 | 32 | f.close() 33 | -------------------------------------------------------------------------------- /bin/reformat_fastq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | start=int(sys.argv[1])-1 6 | length=int(sys.argv[2]) 7 | 8 | i=1 9 | for line in sys.stdin: 10 | if i==1: 11 | id,barcode=line.rstrip().split(' ') 12 | barcode=barcode.rstrip().split(':') 13 | print id+':'+barcode[-1][0:6] 14 | if i==2: 15 | line='AAGCTT'+line[start:(start+length)] 16 | print line.rstrip() 17 | if i==3: 18 | print line.rstrip() 19 | if i==4: 20 | line='JJJJJJ'+line[start:(start+length)] 21 | print line.rstrip() 22 | i=i+1 23 | if i>4: 24 | i=1 25 | 26 | -------------------------------------------------------------------------------- /bin/eHiC-QC/reform_end_id.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | def reform(id): 6 | if id[-1]=="+": 7 | id=int(id[0:-1])*2-1 8 | elif id[-1]=="-": 9 | id=int(id[0:-1])*2 10 | else: 11 | return -1 12 | return str(id) 13 | 14 | #file=open(sys.argv[1]) 15 | #while True: 16 | #line=file.readline() 17 | #if not line: 18 | # break 19 | for line in sys.stdin: 20 | end1,end2=line.rstrip().split('\t') 21 | end1=end1.split('_')[1] 22 | end2=end2.split('_')[1] 23 | end1="frag_"+reform(end1) 24 | end2="frag_"+reform(end2) 25 | print end1+"\t"+end2+"\t1" 26 | #file.close() 27 | -------------------------------------------------------------------------------- /bin/eHiC-QC/end_id_to_original.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | def reform(id): 6 | id=int(id.split('_')[1]) 7 | if id%2==0: 8 | id="frag_"+str(id/2)+"-" 9 | else: 10 | id="frag_"+str((id+1)/2)+"+" 11 | return id 12 | 13 | #file=open(sys.argv[1]) 14 | #while True: 15 | # line=file.readline() 16 | # if not line: 17 | # break 18 | for line in sys.stdin: 19 | cols=line.rstrip().split('\t') 20 | end1,end2=cols[0],cols[1] 21 | end1=reform(end1) 22 | end2=reform(end2) 23 | print end1+"\t"+end2+"\t"+"\t".join(cols[2:]) 24 | #file.close() 25 | 26 | -------------------------------------------------------------------------------- /bin/eHiC-QC/reformat_fastq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | start=int(sys.argv[1])-1 6 | length=int(sys.argv[2]) 7 | 8 | i=1 9 | for line in sys.stdin: 10 | if i==1: 11 | id,barcode=line.rstrip().split(' ') 12 | barcode=barcode.rstrip().split(':') 13 | print id+':'+barcode[-1][0:6] 14 | if i==2: 15 | line='AAGCTT'+line[start:(start+length)] 16 | print line.rstrip() 17 | if i==3: 18 | print line.rstrip() 19 | if i==4: 20 | line='JJJJJJ'+line[start:(start+length)] 21 | print line.rstrip() 22 | i=i+1 23 | if i>4: 24 | i=1 25 | 26 | -------------------------------------------------------------------------------- /bin/preprocess/reformat_fastq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | start=int(sys.argv[1])-1 6 | length=int(sys.argv[2]) 7 | 8 | i=1 9 | for line in sys.stdin: 10 | if i==1: 11 | # id,barcode=line.rstrip().split(' ') 12 | # barcode=barcode.rstrip().split(':') 13 | # print id+':'+barcode[-1][0:6] 14 | print line.rstrip() 15 | if i==2: 16 | line=line[start:(start+length)] 17 | print line.rstrip() 18 | if i==3: 19 | print line.rstrip() 20 | if i==4: 21 | line=line[start:(start+length)] 22 | print line.rstrip() 23 | i=i+1 24 | if i>4: 25 | i=1 26 | 27 | -------------------------------------------------------------------------------- /bin/microC/integrated.r: -------------------------------------------------------------------------------- 1 | #!/bin/env Rscript 2 | 3 | args = commandArgs(trailingOnly=TRUE) 4 | 5 | full=args[1] 6 | 7 | data=read.table(full,sep='\t',stringsAsFactors=F,row.names=1) 8 | df=read.table('dist.stat',sep='\t',stringsAsFactors=F,row.names=1) 9 | 10 | #rownames(df)=paste(df[,1],df[,2],df[,3],sep=':') 11 | #df$read=df[,4]*df[,5] 12 | #rownames(data)=paste(data[,1],data[,2],data[,3],sep=':') 13 | #colnames(df)='read' 14 | df$read=df[,1]*df[,2] 15 | 16 | data$read=0 17 | data[rownames(df),'read']=df$read 18 | data$avg=data$read/data[,1] 19 | 20 | write.table(data[,c(1,5)],'integrated.dist.len.stat',sep='\t',quote=F,col.names=F) 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /bin/eHiC/split_list_by_group.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | 5 | my ($group_file, $list_file) = @ARGV; 6 | 7 | open(IN, $group_file) || die("Error: Cannot open $group_file!\n"); 8 | my $i=0; 9 | while(my $line = ){ 10 | $i = $i+1; 11 | chomp $line; 12 | my ($group, $lambda, $fraction) = split "\t", $line; 13 | my $low_lim = $lambda * (1 - $fraction); 14 | my $high_lim = $lambda * (1 + $fraction); 15 | `cat $list_file | cut -f3-4 | awk '{if(\$2>$low_lim && \$2<$high_lim)print \$0}' > data_list.group.$group &` ; 16 | if($i==5){ 17 | sleep 600; 18 | $i = 0; 19 | } 20 | #print "Is speed ok now?[enter to run more]"; 21 | #; 22 | } 23 | close(IN); 24 | exit; 25 | -------------------------------------------------------------------------------- /bin/eHiC/ends_count_to_frag_count.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | dic={} 6 | file=open(sys.argv[1]) 7 | while True: 8 | line=file.readline() 9 | if not line: 10 | break 11 | frag1,frag2,obs,expt=line.rstrip().split('\t') 12 | frag1=frag1[0:-1] 13 | frag2=int(frag2[0:-1].split('_')[1]) 14 | if frag1 not in dic: 15 | for x in dic: 16 | for y in sorted(dic[x]): 17 | print x+"\tfrag_"+str(y)+"\t"+str(dic[x][y][0])+"\t"+str(dic[x][y][1]) 18 | dic={} 19 | dic[frag1]={} 20 | dic[frag1][frag2]=[int(obs),float(expt)] 21 | else: 22 | if frag2 in dic[frag1]: 23 | dic[frag1][frag2][0] += int(obs) 24 | dic[frag1][frag2][1] += float(expt) 25 | else: 26 | dic[frag1][frag2]=[int(obs),float(expt)] 27 | file.close() 28 | -------------------------------------------------------------------------------- /bin/microC/get_dist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | 6 | """ 7 | This code is going to get the distance between anchor pairs 8 | 9 | """ 10 | 11 | bed={} 12 | 13 | with open(sys.argv[1], 'r') as f: 14 | 15 | for line in f: 16 | 17 | chr, beg, end , anchor = line.rstrip().split('\t')[0:4] 18 | 19 | bed[anchor]=map(int, [beg,end]) 20 | 21 | f.close() 22 | 23 | for line in sys.stdin: 24 | 25 | a1, a2 = line.rstrip().split('\t')[0:2] 26 | 27 | id1, id2 = map(lambda x:int(x.split('_')[1]),[a1, a2]) 28 | 29 | if id1 > id2: 30 | 31 | dist = bed[a1][0] - bed[a2][1] - 1 32 | 33 | else: 34 | 35 | dist = bed[a2][0] - bed[a1][1] - 1 36 | 37 | print(line.rstrip() + '\t' + str(dist)) 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /bin/Arima/get_dist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | 6 | """ 7 | This code is going to get the distance between anchor pairs 8 | 9 | """ 10 | 11 | bed={} 12 | 13 | with open(sys.argv[1], 'r') as f: 14 | 15 | for line in f: 16 | 17 | chr, beg, end , anchor = line.rstrip().split('\t')[0:4] 18 | 19 | bed[anchor]=map(int, [beg,end]) 20 | 21 | f.close() 22 | 23 | for line in sys.stdin: 24 | 25 | a1, a2 = line.rstrip().split('\t')[0:2] 26 | 27 | id1, id2 = map(lambda x:int(x.split('_')[1]),[a1, a2]) 28 | 29 | if id1 > id2: 30 | 31 | dist = bed[a1][0] - bed[a2][1] - 1 32 | 33 | else: 34 | 35 | dist = bed[a2][0] - bed[a1][1] - 1 36 | 37 | print("\t", line.rstrip(), '\t', str(dist)) 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /bin/DPNII/get_dist_DPNII.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | 6 | """ 7 | This code is going to get the distance between anchor pairs 8 | 9 | """ 10 | 11 | bed={} 12 | 13 | with open(sys.argv[1], 'r') as f: 14 | 15 | for line in f: 16 | 17 | chr, beg, end , anchor = line.rstrip().split('\t')[0:4] 18 | 19 | bed[anchor]=map(int, [beg,end]) 20 | 21 | f.close() 22 | 23 | for line in sys.stdin: 24 | 25 | a1, a2 = line.rstrip().split('\t')[0:2] 26 | 27 | id1, id2 = map(lambda x:int(x.split('_')[1]),[a1, a2]) 28 | 29 | if id1 > id2: 30 | 31 | dist = bed[a1][0] - bed[a2][1] - 1 32 | 33 | else: 34 | 35 | dist = bed[a2][0] - bed[a1][1] - 1 36 | 37 | print("\t", line.rstrip(), '\t', str(dist)) 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /bin/Arima/add.vis.to.cis.2M.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | my $usage = "Usage: ./add.vis.to.2M.loop.pl \n"; 4 | my ($loop, $vis) = @ARGV; 5 | if(not defined $loop){ 6 | die($usage); 7 | } 8 | my $vis_lis; 9 | open(IN, $vis); 10 | while(my $line= ){ 11 | chomp $line; 12 | my ($frag,$vis_value)= split "\t", $line; 13 | $vis_lis->{$frag}=$vis_value; 14 | } 15 | close(IN); 16 | open(IN, $loop); 17 | while(my $line= ){ 18 | chomp $line; 19 | my ($frag1,$frag2,$obs,$expect)= split "\t", $line; 20 | my $vis1=$vis_lis->{$frag1}; 21 | my $vis2=$vis_lis->{$frag2}; 22 | my $new_expt=$expect*$vis1*$vis2; 23 | print join("\t",$frag1,$frag2,$obs,$new_expt)."\n"; 24 | } 25 | close(IN); 26 | exit; 27 | -------------------------------------------------------------------------------- /bin/microC/add.vis.to.cis.2M.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | my $usage = "Usage: ./add.vis.to.2M.loop.pl \n"; 4 | my ($loop,$vis) = @ARGV; 5 | #if(not defined $loop){ 6 | # die($usage); 7 | #} 8 | my $vis_lis; 9 | open(IN, $vis); 10 | while(my $line= ){ 11 | chomp $line; 12 | my ($frag,$vis_value)= split "\t", $line; 13 | $vis_lis->{$frag}=$vis_value; 14 | } 15 | close(IN); 16 | open(IN, $loop); 17 | while(my $line= ){ 18 | chomp $line; 19 | my ($frag1,$frag2,$obs,$expect)= split "\t", $line; 20 | my $vis1=$vis_lis->{$frag1}; 21 | my $vis2=$vis_lis->{$frag2}; 22 | my $new_expt=$expect*$vis1*$vis2; 23 | print join("\t",$frag1,$frag2,$obs,$new_expt)."\n"; 24 | } 25 | close(IN); 26 | exit; 27 | -------------------------------------------------------------------------------- /bin/DPNII/add.vis.to.cis.2M_DPNII.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | my $usage = "Usage: ./add.vis.to.2M.loop.pl \n"; 4 | my ($loop, $vis) = @ARGV; 5 | if(not defined $loop){ 6 | die($usage); 7 | } 8 | my $vis_lis; 9 | open(IN, $vis); 10 | while(my $line= ){ 11 | chomp $line; 12 | my ($frag,$vis_value)= split "\t", $line; 13 | $vis_lis->{$frag}=$vis_value; 14 | } 15 | close(IN); 16 | open(IN, $loop); 17 | while(my $line= ){ 18 | chomp $line; 19 | my ($frag1,$frag2,$obs,$expect)= split "\t", $line; 20 | my $vis1=$vis_lis->{$frag1}; 21 | my $vis2=$vis_lis->{$frag2}; 22 | my $new_expt=$expect*$vis1*$vis2; 23 | print join("\t",$frag1,$frag2,$obs,$new_expt)."\n"; 24 | } 25 | close(IN); 26 | exit; 27 | -------------------------------------------------------------------------------- /bin/eHiC/remove_ends_without_HD.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | #if len(sys.argv)!=3: 6 | # print "Usage: ./remove_ends_without_HD.py " 7 | # sys.exit() 8 | 9 | dic={} 10 | mappability={} 11 | GC_file=open(sys.argv[1]) 12 | for line in GC_file.readlines(): 13 | end,GC,map=line.rstrip().split('\t') 14 | if float(GC)==0: 15 | dic[end]="" 16 | if int(map)==0: 17 | dic[end]="" 18 | GC_file.close() 19 | 20 | #loop_file=open(sys.argv[2]) 21 | #out=open(sys.argv[3],"w+") 22 | #while True: 23 | for line in sys.stdin: 24 | #line=loop_file.readline() 25 | #if not line: 26 | # break 27 | cols=line.rstrip().split('\t') 28 | frag1=cols[0] 29 | frag2=cols[1] 30 | if frag1 not in dic and frag2 not in dic: 31 | print line.rstrip() 32 | #loop_file.close() 33 | -------------------------------------------------------------------------------- /bin/eHiC/get_group_range.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | my $usage = "./get_group_range.pl \n". 4 | "\tThis program take numeric column of and print out ranges after breaking into of groups after sorting ascend\n"; 5 | 6 | my ($bed, $col, $n) = @ARGV; 7 | if(not defined $n){ 8 | die($usage); 9 | } 10 | 11 | my @len = (); 12 | open(IN, $bed); 13 | while(my $line = ){ 14 | chomp $line; 15 | my @x = split "\t", $line; 16 | push @len, $x[$col - 1]; 17 | } 18 | close(IN); 19 | my @sort_len = sort {$a <=> $b} @len; 20 | unshift @sort_len, -1e10; 21 | my $total = $#sort_len; 22 | my @len_cut; 23 | for(my $i=1; $i<=$n; $i++){ 24 | print join("\t", $i, $sort_len[int(($i-1) * $total / $n)], $sort_len[int($i * $total / $n)])."\n"; 25 | } 26 | 27 | exit; 28 | -------------------------------------------------------------------------------- /documents/lib/get_group_range.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | my $usage = "./get_group_range.pl \n". 4 | "\tThis program take numeric column of and print out ranges after breaking into of groups after sorting ascend\n"; 5 | 6 | my ($bed, $col, $n) = @ARGV; 7 | if(not defined $n){ 8 | die($usage); 9 | } 10 | 11 | my @len = (); 12 | open(IN, $bed); 13 | while(my $line = ){ 14 | chomp $line; 15 | my @x = split "\t", $line; 16 | push @len, $x[$col - 1]; 17 | } 18 | close(IN); 19 | my @sort_len = sort {$a <=> $b} @len; 20 | unshift @sort_len, -1e10; 21 | my $total = $#sort_len; 22 | my @len_cut; 23 | for(my $i=1; $i<=$n; $i++){ 24 | print join("\t", $i, $sort_len[int(($i-1) * $total / $n)], $sort_len[int($i * $total / $n)])."\n"; 25 | } 26 | 27 | exit; 28 | -------------------------------------------------------------------------------- /bin/generateReference_lib/get_group_range.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | my $usage = "./get_group_range.pl \n". 4 | "\tThis program take numeric column of and print out ranges after breaking into of groups after sorting ascend\n"; 5 | 6 | my ($bed, $col, $n) = @ARGV; 7 | if(not defined $n){ 8 | die($usage); 9 | } 10 | 11 | my @len = (); 12 | open(IN, $bed); 13 | while(my $line = ){ 14 | chomp $line; 15 | my @x = split "\t", $line; 16 | push @len, $x[$col - 1]; 17 | } 18 | close(IN); 19 | my @sort_len = sort {$a <=> $b} @len; 20 | unshift @sort_len, -1e10; 21 | my $total = $#sort_len; 22 | my @len_cut; 23 | for(my $i=1; $i<=$n; $i++){ 24 | print join("\t", $i, $sort_len[int(($i-1) * $total / $n)], $sort_len[int($i * $total / $n)])."\n"; 25 | } 26 | 27 | exit; 28 | -------------------------------------------------------------------------------- /bin/microC/split_chromo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import os 5 | 6 | dic={} 7 | 8 | dir_name='split' 9 | 10 | os.system('mkdir '+dir_name) 11 | 12 | 13 | 14 | with open(sys.argv[1],'r') as f: 15 | 16 | for line in f: 17 | 18 | chr, beg, end, id = line.rstrip().split('\t')[0:4] 19 | dic[id] = chr 20 | 21 | f.close() 22 | 23 | 24 | out=open(os.path.join(dir_name, 'anchor_2_anchor.loop.chr1'),'w') 25 | 26 | prev_chr='chr1' 27 | 28 | for line in sys.stdin: 29 | 30 | a1, a2, obs, exp = line.rstrip().split('\t')[0:4] 31 | 32 | chr = dic[a1] 33 | 34 | if chr!=prev_chr: 35 | 36 | out.close() 37 | 38 | out=open(os.path.join(dir_name,'anchor_2_anchor.loop.'+chr),'w') 39 | 40 | out.write(line) 41 | prev_chr = chr 42 | 43 | 44 | out.close() 45 | 46 | f.close() 47 | 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /bin/Arima/get_corr_factor_by_len.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | my ($group_avg_file) = @ARGV; 5 | if(not defined $group_avg_file){ 6 | die("Usage:./get_corr_factor_by_len.pl \n"); 7 | } 8 | 9 | my $group_avg; 10 | my $total_count; 11 | my $total_reads; 12 | open(IN, $group_avg_file); 13 | while(my $line = ){ 14 | chomp $line; 15 | my ($gc1, $gc2, $count, $avg) = split "\t", $line; 16 | $group_avg->{$gc1}->{$gc2} = $avg; 17 | $total_count += $count; 18 | $total_reads += $avg * $count; 19 | } 20 | close(IN); 21 | 22 | my $total_avg = $total_reads / $total_count; 23 | for(my $gc1 = 1; $gc1 <= 20; $gc1 ++){ 24 | for(my $gc2 = 1; $gc2 <= 20; $gc2 ++){ 25 | my $correct = $group_avg->{$gc1}->{$gc2} / $total_avg; 26 | print join("\t", $gc1, $gc2, $correct)."\n"; 27 | } 28 | } 29 | 30 | exit; 31 | 32 | -------------------------------------------------------------------------------- /bin/eHiC/get_corr_factor_by_GC.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | my ($group_avg_file) = @ARGV; 5 | if(not defined $group_avg_file){ 6 | die("Usage:./get_corr_factor_by_GC_length.pl \n"); 7 | } 8 | 9 | my $group_avg; 10 | my $total_count; 11 | my $total_reads; 12 | open(IN, $group_avg_file); 13 | while(my $line = ){ 14 | chomp $line; 15 | my ($gc1, $gc2, $count, $avg) = split "\t", $line; 16 | $group_avg->{$gc1}->{$gc2} = $avg; 17 | $total_count += $count; 18 | $total_reads += $avg * $count; 19 | } 20 | close(IN); 21 | 22 | my $total_avg = $total_reads / $total_count; 23 | for(my $gc1 = 1; $gc1 <= 20; $gc1 ++){ 24 | for(my $gc2 = 1; $gc2 <= 20; $gc2 ++){ 25 | my $correct = $group_avg->{$gc1}->{$gc2} / $total_avg; 26 | print join("\t", $gc1, $gc2, $correct)."\n"; 27 | } 28 | } 29 | 30 | exit; 31 | 32 | -------------------------------------------------------------------------------- /bin/DPNII/get_corr_factor_by_len_DPNII.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | my ($group_avg_file) = @ARGV; 5 | if(not defined $group_avg_file){ 6 | die("Usage:./get_corr_factor_by_len.pl \n"); 7 | } 8 | 9 | my $group_avg; 10 | my $total_count; 11 | my $total_reads; 12 | open(IN, $group_avg_file); 13 | while(my $line = ){ 14 | chomp $line; 15 | my ($gc1, $gc2, $count, $avg) = split "\t", $line; 16 | $group_avg->{$gc1}->{$gc2} = $avg; 17 | $total_count += $count; 18 | $total_reads += $avg * $count; 19 | } 20 | close(IN); 21 | 22 | my $total_avg = $total_reads / $total_count; 23 | for(my $gc1 = 1; $gc1 <= 20; $gc1 ++){ 24 | for(my $gc2 = 1; $gc2 <= 20; $gc2 ++){ 25 | my $correct = $group_avg->{$gc1}->{$gc2} / $total_avg; 26 | print join("\t", $gc1, $gc2, $correct)."\n"; 27 | } 28 | } 29 | 30 | exit; 31 | 32 | -------------------------------------------------------------------------------- /bin/HindIII/get_corr_factor_by_GC.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | my ($group_avg_file) = @ARGV; 5 | if(not defined $group_avg_file){ 6 | die("Usage:./get_corr_factor_by_GC_length.pl \n"); 7 | } 8 | 9 | my $group_avg; 10 | my $total_count; 11 | my $total_reads; 12 | open(IN, $group_avg_file); 13 | while(my $line = ){ 14 | chomp $line; 15 | my ($gc1, $gc2, $count, $avg) = split "\t", $line; 16 | $group_avg->{$gc1}->{$gc2} = $avg; 17 | $total_count += $count; 18 | $total_reads += $avg * $count; 19 | } 20 | close(IN); 21 | 22 | my $total_avg = $total_reads / $total_count; 23 | for(my $gc1 = 1; $gc1 <= 20; $gc1 ++){ 24 | for(my $gc2 = 1; $gc2 <= 20; $gc2 ++){ 25 | my $correct = $group_avg->{$gc1}->{$gc2} / $total_avg; 26 | print join("\t", $gc1, $gc2, $correct)."\n"; 27 | } 28 | } 29 | 30 | exit; 31 | 32 | -------------------------------------------------------------------------------- /documents/lib/get_aveg_frag_length.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | 4 | from __future__ import division 5 | 6 | import sys 7 | import os 8 | 9 | 10 | """ frag anchor bed """ 11 | 12 | dic={} 13 | 14 | with open(sys.argv[1],'r') as f: 15 | 16 | for line in f: 17 | 18 | frag, anchor = line.rstrip().split('\t') 19 | 20 | if anchor not in dic: 21 | 22 | dic[anchor] = set() 23 | 24 | dic[anchor].add(frag) 25 | 26 | f.close() 27 | 28 | 29 | """DpnII anchor bed file """ 30 | 31 | with open(sys.argv[2],'r') as f: 32 | 33 | for line in f: 34 | 35 | chr, beg, end, anchor, length = line.rstrip().split('\t') 36 | 37 | frag_count = len(dic[anchor]) 38 | 39 | avg = float(length)/frag_count 40 | 41 | 42 | 43 | 44 | print '\t'.join([chr, beg, end, anchor, length, str(avg)]) 45 | 46 | 47 | f.close() 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /bin/generateReference_lib/get_aveg_frag_length.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | 4 | from __future__ import division 5 | 6 | import sys 7 | import os 8 | 9 | 10 | """ frag anchor bed """ 11 | 12 | dic={} 13 | 14 | with open(sys.argv[1],'r') as f: 15 | 16 | for line in f: 17 | 18 | frag, anchor = line.rstrip().split('\t') 19 | 20 | if anchor not in dic: 21 | 22 | dic[anchor] = set() 23 | 24 | dic[anchor].add(frag) 25 | 26 | f.close() 27 | 28 | 29 | """DpnII anchor bed file """ 30 | 31 | with open(sys.argv[2],'r') as f: 32 | 33 | for line in f: 34 | 35 | chr, beg, end, anchor, length = line.rstrip().split('\t') 36 | 37 | frag_count = len(dic[anchor]) 38 | 39 | avg = float(length)/frag_count 40 | 41 | 42 | 43 | 44 | print('\t'.join([chr, beg, end, anchor, length, str(avg)])) 45 | 46 | 47 | f.close() 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /bin/Arima/split_chromo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import os 5 | 6 | dic={} 7 | 8 | dir_name='HiCorr_output' 9 | 10 | os.system('mkdir '+dir_name) 11 | 12 | 13 | 14 | with open(sys.argv[1],'r') as f: 15 | 16 | for line in f: 17 | 18 | chr, beg, end, id = line.rstrip().split('\t')[0:4] 19 | dic[id] = chr 20 | 21 | f.close() 22 | 23 | 24 | out=open(os.path.join(dir_name, 'anchor_2_anchor.loop.chr1'),'w') 25 | 26 | prev_chr='chr1' 27 | 28 | for line in sys.stdin: 29 | 30 | a1, a2, obs, exp = line.rstrip().split('\t')[0:4] 31 | 32 | chr = dic[a1] 33 | 34 | if chr!=prev_chr: 35 | 36 | out.close() 37 | 38 | out=open(os.path.join(dir_name,'anchor_2_anchor.loop.'+chr),'w') 39 | 40 | out.write(line) 41 | prev_chr = chr 42 | 43 | 44 | out.close() 45 | 46 | f.close() 47 | 48 | -------------------------------------------------------------------------------- /bin/DPNII/split_chromo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import os 5 | 6 | dic={} 7 | 8 | dir_name='HiCorr_output' 9 | 10 | os.system('mkdir '+dir_name) 11 | 12 | 13 | 14 | with open(sys.argv[1],'r') as f: 15 | 16 | for line in f: 17 | 18 | chr, beg, end, id = line.rstrip().split('\t')[0:4] 19 | dic[id] = chr 20 | 21 | f.close() 22 | 23 | 24 | out=open(os.path.join(dir_name, 'anchor_2_anchor.loop.chr1'),'w') 25 | 26 | prev_chr='chr1' 27 | 28 | for line in sys.stdin: 29 | 30 | a1, a2, obs, exp = line.rstrip().split('\t')[0:4] 31 | 32 | chr = dic[a1] 33 | 34 | if chr!=prev_chr: 35 | 36 | out.close() 37 | 38 | out=open(os.path.join(dir_name,'anchor_2_anchor.loop.'+chr),'w') 39 | 40 | out.write(line) 41 | prev_chr = chr 42 | 43 | 44 | out.close() 45 | 46 | f.close() 47 | 48 | -------------------------------------------------------------------------------- /documents/lib/find_RE_sites.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | 5 | my $usage = "Usage:./find_RE_sites.pl \n". 6 | "\tThis program find sites in \n". 7 | "\tOutput is in 1-system.\n"; 8 | 9 | my ($chromSize, $fa_dir,$seq) = @ARGV; 10 | if(not defined $seq){ 11 | die($usage); 12 | } 13 | 14 | open(IN, $chromSize); 15 | my $sizeref; 16 | while(my $line = ){ 17 | my ($chr, $size) = split "\t", $line; 18 | if($chr =~ /_/ || $chr eq "chrM"){ 19 | next; 20 | } 21 | $sizeref->{$chr} = $size; 22 | } 23 | close(IN); 24 | 25 | foreach my $chr (sort keys %{$sizeref}){ 26 | my $size = $sizeref->{$chr}; 27 | my @pos = `./sequence_match.pl -c $fa_dir/$chr.fa $seq`; 28 | chomp @pos; 29 | my $len = length($seq); 30 | while(my $loc = shift @pos){ 31 | print join("\t", $chr, $loc, $loc + $len - 1)."\n"; 32 | } 33 | } 34 | exit; 35 | -------------------------------------------------------------------------------- /documents/lib/sites_to_frag.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | chromosize={} 6 | 7 | with open(sys.argv[1],'r') as f: 8 | for line in f: 9 | chr,size=line.rstrip().split('\t') 10 | chromosize[chr]=size 11 | f.close() 12 | 13 | prev="" 14 | i=1 15 | with open(sys.argv[2],'r') as f: 16 | for line in f: 17 | chr,beg,end=line.rstrip().split('\t') 18 | if prev=="": 19 | print chr+'\t'+'1'+'\t'+str(int(beg)+1)+'\t'+'frag_'+str(i) 20 | i+=1 21 | if prev==chr: 22 | print chr+'\t'+str(prev_end-1)+'\t'+str(int(beg)+1)+'\t'+'frag_'+str(i) 23 | i+=1 24 | if prev!=chr and prev!="": 25 | print prev+'\t'+str(prev_end-1)+'\t'+str(chromosize[prev])+'\t'+'frag_'+str(i) 26 | i+=1 27 | print chr+'\t'+'1'+'\t'+str(int(beg)+1)+'\t'+'frag_'+str(i) 28 | i+=1 29 | prev=chr 30 | prev_end=int(end) 31 | 32 | print prev+'\t'+str(prev_end-1)+'\t'+str(chromosize[prev])+'\t'+'frag_'+str(i) 33 | 34 | f.close() 35 | -------------------------------------------------------------------------------- /bin/generateReference_lib/sites_to_frag.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | chromosize={} 6 | 7 | with open(sys.argv[1],'r') as f: 8 | for line in f: 9 | chr,size=line.rstrip().split('\t') 10 | chromosize[chr]=size 11 | f.close() 12 | 13 | prev="" 14 | i=1 15 | with open(sys.argv[2],'r') as f: 16 | for line in f: 17 | chr,beg,end=line.rstrip().split('\t') 18 | if prev=="": 19 | print(chr+'\t'+'1'+'\t'+str(int(beg)+1)+'\t'+'frag_'+str(i)) 20 | i+=1 21 | if prev==chr: 22 | print(chr+'\t'+str(prev_end-1)+'\t'+str(int(beg)+1)+'\t'+'frag_'+str(i)) 23 | i+=1 24 | if prev!=chr and prev!="": 25 | print(prev+'\t'+str(prev_end-1)+'\t'+str(chromosize[prev])+'\t'+'frag_'+str(i)) 26 | i+=1 27 | print(chr+'\t'+'1'+'\t'+str(int(beg)+1)+'\t'+'frag_'+str(i)) 28 | i+=1 29 | prev=chr 30 | prev_end=int(end) 31 | 32 | print(prev+'\t'+str(prev_end-1)+'\t'+str(chromosize[prev])+'\t'+'frag_'+str(i)) 33 | 34 | f.close() 35 | -------------------------------------------------------------------------------- /documents/lib/sequence_match.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | my $usage = "Usage: ./sequence_match.pl