├── bin
    ├── README
    ├── Arima
    │   ├── README.md
    │   ├── sum_anchor_reads.py
    │   ├── bam_to_temp_HiC.pl
    │   ├── remove.blacklist.py
    │   ├── integrated.r
    │   ├── get_dist.py
    │   ├── add.vis.to.cis.2M.pl
    │   ├── get_corr_factor_by_len.py
    │   ├── split_chromo.py
    │   ├── pick.dist.pl
    │   ├── summary_sorted_trans_frag_loop.pl
    │   ├── remove_dup_PE_SAM_sorted.pl
    │   ├── fragdata_to_anchordata.pl
    │   ├── correct.trans.reads.by.corr.pl
    │   ├── summary_sorted_frag_loop.pl
    │   ├── list_full_matrix.pl
    │   ├── Arima.sh
    │   ├── get_trans.avg_by_len.pl
    │   ├── pairing_two_SAM_reads.pl
    │   ├── reads_2_trans_frag_loop.pl
    │   ├── get_loop_lambda.pl
    │   └── merge_sorted_anchor_loop.pl
    ├── generateReference_lib
    │   ├── README.md
    │   ├── remove.blacklist.py
    │   ├── get_group_range.pl
    │   ├── get_aveg_frag_length.py
    │   ├── sites_to_frag.py
    │   ├── sequence_match.pl
    │   ├── find_RE_sites.pl
    │   ├── list_full_matrix.pl
    │   ├── count_trans_pairs_by_GC.pl
    │   ├── count_cis_pairs_by_GC.pl
    │   └── count_cis_2M_pair.pl
    ├── microC
    │   ├── README.md
    │   ├── calculate_vis.py
    │   ├── remove.blacklist.py
    │   ├── integrated.r
    │   ├── get_dist.py
    │   ├── add.vis.to.cis.2M.pl
    │   ├── split_chromo.py
    │   ├── fragdata_to_anchordata.pl
    │   ├── get_loop_lambda.pl
    │   ├── list_full_matrix.pl
    │   ├── get_group_statistics.pl
    │   └── merge_sorted_anchor_loop.pl
    ├── HindIII
    │   ├── sum_frag_reads_2.py
    │   ├── remove_outlier.py
    │   ├── get_anchor_pval.r
    │   ├── get_corr_factor_by_GC.pl
    │   ├── test_frag_corr.py
    │   ├── fragdata_to_anchordata.pl
    │   ├── batch_anchor_by_chrom.pl
    │   ├── get_loop_lambda_GC_correct.pl
    │   └── get_cis_avg_by_GC.pl
    ├── eHiC
    │   ├── remove_outlier.py
    │   ├── get_anchor_pval.r
    │   ├── sum_frag_reads_2.py
    │   ├── remove_outlier_ELPU.py
    │   ├── split_list_by_group.pl
    │   ├── ends_count_to_frag_count.py
    │   ├── remove_ends_without_HD.py
    │   ├── get_group_range.pl
    │   ├── get_corr_factor_by_GC.pl
    │   ├── fragdata_to_anchordata.pl
    │   ├── merge_and_resort_end_loop.py
    │   ├── test_frag_corr_eHiC.py
    │   ├── batch_anchor_by_chrom.pl
    │   ├── get_loop_lambda_GC_correct.pl
    │   ├── get_trans_avg_by_GC.pl
    │   └── model_fit.r
    ├── select_anchor.sh
    ├── select_loop.py
    ├── DPNII
    │   ├── sum_anchor_reads_DPNII.py
    │   ├── bam_to_temp_HiC_DPNII.pl
    │   ├── integrated.r
    │   ├── remove.blacklist_DPNII.py
    │   ├── get_dist_DPNII.py
    │   ├── add.vis.to.cis.2M_DPNII.pl
    │   ├── get_corr_factor_by_len_DPNII.py
    │   ├── split_chromo.py
    │   ├── pick.dist.pl
    │   ├── summary_sorted_trans_frag_loop_DPNII.pl
    │   ├── remove_dup_PE_SAM_sorted_DPNII.pl
    │   ├── fragdata_to_anchordata_DNPII.pl
    │   ├── correct.trans.reads.by.corr_DPNII.pl
    │   ├── summary_sorted_frag_loop_DPNII.pl
    │   ├── list_full_matrix_DPNII.pl
    │   ├── get_trans.avg_by_len_DPNII.pl
    │   ├── DPNII.sh
    │   ├── pairing_two_SAM_reads_DPNII.pl
    │   ├── reads_2_trans_frag_loop_DPNII.pl
    │   └── get_loop_lambda_DPNII.pl
    ├── bam_to_temp_HiC.pl
    ├── preprocess
    │   ├── bam_to_temp_HiC.pl
    │   ├── bam_to_temp.pl
    │   ├── reformat_fastq.py
    │   ├── summary_sorted_trans_frag_loop.pl
    │   ├── remove_dup_PE_SAM_sorted.pl
    │   ├── resort_by_frag_id.pl
    │   ├── summary_sorted_frag_loop.pl
    │   ├── bam_to_frag_loop.sh
    │   ├── pairing_two_SAM_reads.pl
    │   ├── reads_2_trans_frag_loop.pl
    │   └── generate_data_matrix.pl
    ├── bam_to_temp.pl
    ├── eHiC-QC
    │   ├── bam_to_temp.pl
    │   ├── reform_end_id.py
    │   ├── end_id_to_original.py
    │   ├── reformat_fastq.py
    │   ├── summary_sorted_trans_frag_loop.pl
    │   ├── remove_dup_PE_ELPU.pl
    │   ├── resort_by_frag_id.pl
    │   ├── summary_sorted_frag_loop.pl
    │   └── eHiC-QC.sh
    ├── reformat_fastq.py
    ├── plot.heatmap.r
    ├── plot.multiple.r
    ├── summary_sorted_trans_frag_loop.pl
    ├── draw_heatmap.sh
    ├── resort_by_frag_id.pl
    ├── summary_sorted_frag_loop.pl
    ├── template.r
    ├── bam_to_frag_loop.sh
    ├── reads_2_trans_frag_loop.pl
    └── generate_data_matrix.pl
├── png
    ├── README.md
    ├── hg19.full.matrix.PNG
    ├── hg19.DPNII.blacklist.PNG
    ├── hg19.DPNII.frag.bed.PNG
    ├── hg19.dist.401.group.PNG
    ├── hg19.full.dist.len.stat.PNG
    ├── hg19.trans.possible.pairs.PNG
    ├── hg19_DPNII_frag_2_anchor.PNG
    ├── hg19_anchor_length.groups.PNG
    ├── hg19_5kb_anchors_blacklist.PNG
    ├── hg19_DPNII_anchors_avg.bed.PNG
    ├── hg19.HindIII.chr11_130000000_130800000.expt.matrix.png
    ├── hg19.HindIII.chr11_130000000_130800000.raw.matrix.png
    └── hg19.HindIII.chr11_130000000_130800000.ratio.matrix.png
├── HiCorr_heatmap.sh
├── documents
    ├── lib
    │   ├── README.md
    │   ├── remove.blacklist.py
    │   ├── get_group_range.pl
    │   ├── get_aveg_frag_length.py
    │   ├── find_RE_sites.pl
    │   ├── sites_to_frag.py
    │   ├── sequence_match.pl
    │   ├── list_full_matrix.pl
    │   ├── count_trans_pairs_by_GC.pl
    │   ├── count_cis_2M_pair.pl
    │   └── count_cis_pairs_by_GC.pl
    ├── HiCorr_micro-C.md
    ├── HiCorr_insituHi-C.md
    ├── HiCorr_Arima.md
    ├── Arima.preprocessing.allValidPairs.sh
    ├── Generate.reference.5kb_bin.md
    ├── HiCorr_heatmap.old.md
    └── old.md.left.md
├── HiCorr
├── HiCorr_micro-C.sh
├── HiCorr_Arima.sh
├── HiCorr_DPNII.sh
└── README.md


/bin/README:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/png/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/HiCorr_heatmap.sh:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/bin/Arima/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/documents/lib/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/bin/generateReference_lib/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/bin/microC/README.md:
--------------------------------------------------------------------------------
1 | Scripts for micro-C HiCorr
2 | 


--------------------------------------------------------------------------------
/png/hg19.full.matrix.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19.full.matrix.PNG


--------------------------------------------------------------------------------
/png/hg19.DPNII.blacklist.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19.DPNII.blacklist.PNG


--------------------------------------------------------------------------------
/png/hg19.DPNII.frag.bed.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19.DPNII.frag.bed.PNG


--------------------------------------------------------------------------------
/png/hg19.dist.401.group.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19.dist.401.group.PNG


--------------------------------------------------------------------------------
/png/hg19.full.dist.len.stat.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19.full.dist.len.stat.PNG


--------------------------------------------------------------------------------
/png/hg19.trans.possible.pairs.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19.trans.possible.pairs.PNG


--------------------------------------------------------------------------------
/png/hg19_DPNII_frag_2_anchor.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19_DPNII_frag_2_anchor.PNG


--------------------------------------------------------------------------------
/png/hg19_anchor_length.groups.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19_anchor_length.groups.PNG


--------------------------------------------------------------------------------
/png/hg19_5kb_anchors_blacklist.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19_5kb_anchors_blacklist.PNG


--------------------------------------------------------------------------------
/png/hg19_DPNII_anchors_avg.bed.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19_DPNII_anchors_avg.bed.PNG


--------------------------------------------------------------------------------
/png/hg19.HindIII.chr11_130000000_130800000.expt.matrix.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19.HindIII.chr11_130000000_130800000.expt.matrix.png


--------------------------------------------------------------------------------
/png/hg19.HindIII.chr11_130000000_130800000.raw.matrix.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19.HindIII.chr11_130000000_130800000.raw.matrix.png


--------------------------------------------------------------------------------
/png/hg19.HindIII.chr11_130000000_130800000.ratio.matrix.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JinLabBioinfo/HiCorr/HEAD/png/hg19.HindIII.chr11_130000000_130800000.ratio.matrix.png


--------------------------------------------------------------------------------
/bin/HindIII/sum_frag_reads_2.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | dic={}
 6 | for line in sys.stdin:
 7 | 	frag1,frag2,reads=line.rstrip().split('\t')[0:3]
 8 | 	if frag1 not in dic:
 9 | 		dic[frag1]=0
10 | 	dic[frag1] += float(reads)
11 | 
12 | for frag in dic:
13 | 	print(frag,str(dic[frag]),sep='\t')
14 | 


--------------------------------------------------------------------------------
/bin/eHiC/remove_outlier.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | dic={}
 6 | outlier=open(sys.argv[1])
 7 | for line in outlier.readlines():
 8 | 	frag=line.rstrip().split('\t')[3]
 9 | 	dic[frag]=""
10 | outlier.close()
11 | 
12 | for line in sys.stdin:
13 | 	frag1,frag2=line.split('\t')[0:2]
14 | 	if frag1 not in dic and frag2 not in dic:
15 | 		print line.rstrip()
16 | 


--------------------------------------------------------------------------------
/bin/HindIII/remove_outlier.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | dic={}
 6 | outlier=open(sys.argv[1])
 7 | for line in outlier.readlines():
 8 | 	frag=line.rstrip().split('\t')[3]
 9 | 	dic[frag]=""
10 | outlier.close()
11 | 
12 | for line in sys.stdin:
13 | 	frag1,frag2=line.split('\t')[0:2]
14 | 	if frag1 not in dic and frag2 not in dic:
15 |             print(line.rstrip())
16 | 


--------------------------------------------------------------------------------
/bin/eHiC/get_anchor_pval.r:
--------------------------------------------------------------------------------
 1 | # NEED TO UPDATE THIS SLOPE VALUE EVERY TIME
 2 | slope <- 2
 3 | 
 4 | data <- read.table("FILE", col.names=c("gid1", "gid2", "val", "rand"))
 5 | attach(data)
 6 | prob <- 1 - 1/slope
 7 | r <- rand / (slope - 1)
 8 | r[r==0] <- 1e5
 9 | data$pval <- pnbinom(val - 1, size=r, mu=rand, lower.tail=FALSE)
10 | detach()
11 | write.table(data, file="FILE.p_val", row.names=FALSE, col.names=FALSE, sep="\t", quote=FALSE)
12 | 
13 | 


--------------------------------------------------------------------------------
/bin/HindIII/get_anchor_pval.r:
--------------------------------------------------------------------------------
 1 | # NEED TO UPDATE THIS SLOPE VALUE EVERY TIME
 2 | slope <- 2
 3 | 
 4 | data <- read.table("FILE", col.names=c("gid1", "gid2", "val", "rand"))
 5 | attach(data)
 6 | prob <- 1 - 1/slope
 7 | r <- rand / (slope - 1)
 8 | r[r==0] <- 1e5
 9 | data$pval <- pnbinom(val - 1, size=r, mu=rand, lower.tail=FALSE)
10 | detach()
11 | write.table(data, file="FILE.p_val", row.names=FALSE, col.names=FALSE, sep="\t", quote=FALSE)
12 | 
13 | 


--------------------------------------------------------------------------------
/bin/select_anchor.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | chr=$1
 4 | beg=$2
 5 | end=$3
 6 | ref=$4
 7 | genome=$5
 8 | enzyme=$6
 9 | 
10 | let range=$end-$beg
11 | if [ $range -gt 2000000 ];then
12 | 	echo "Please enter an region <=2,000,000bp"
13 | else 
14 | 	awk -v chr=$chr -v beg=$beg -v end=$end '{OFS="\t";if($1==chr && $2>=beg && $3<=end)print $1,$2,$3,$4}' $ref/$enzyme/${genome}_${enzyme}_anchors_avg.bed > ${genome}.anchors_${chr}_${beg}_${end}.bed
15 | fi
16 | 


--------------------------------------------------------------------------------
/bin/microC/calculate_vis.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | dic={}
 6 | sums=0
 7 | count=0
 8 | for line in sys.stdin:
 9 | 	if not line:
10 | 		break
11 | 	frag1,frag2,reads=line.rstrip().split('\t')[0:3]
12 | 	if frag1 not in dic:
13 | 		dic[frag1]=0
14 | 		count+=1
15 | 	dic[frag1] += float(reads)
16 | 	sums+=float(reads)
17 | 
18 | 
19 | avg=float(sums)/count
20 | 
21 | for frag in dic:
22 | #	print frag+"\t"+str(dic[frag])
23 | 	print(frag + '\t' + str(dic[frag]/avg))
24 | 


--------------------------------------------------------------------------------
/bin/select_loop.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | dic={}
 6 | anchor_bed=open(sys.argv[1])
 7 | for line in anchor_bed.readlines():
 8 | 	chr,start,end,id=line.rstrip().split('\t')
 9 | 	dic[id]=""
10 | anchor_bed.close()
11 | 
12 | loop=open(sys.argv[2])
13 | while True:
14 | 	line=loop.readline()
15 | 	if not line:
16 | 		break
17 | 	anchor1,anchor2,obs,expt=line.rstrip().split('\t')
18 | 	if anchor1 in dic and anchor2 in dic:
19 | 		print(line.rstrip())
20 | loop.close()
21 | 


--------------------------------------------------------------------------------
/bin/Arima/sum_anchor_reads.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | file=open(sys.argv[1])
 6 | line=file.readline()
 7 | #prev_frag=line.split('\t')[0]
 8 | #sum=int(line.split('\t')[2])
 9 | dic={}
10 | while True:
11 | 	line=file.readline()
12 | 	if not line:
13 | 		break
14 | 	frag1,frag2,reads=line.rstrip().split('\t')[0:3]
15 | 	if frag1 not in dic:
16 | 		dic[frag1]=0
17 | 	dic[frag1] += float(reads)
18 | file.close()
19 | 
20 | for frag in dic:
21 | 	print(frag+"\t"+str(dic[frag]))
22 | 


--------------------------------------------------------------------------------
/bin/DPNII/sum_anchor_reads_DPNII.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | file=open(sys.argv[1])
 6 | line=file.readline()
 7 | #prev_frag=line.split('\t')[0]
 8 | #sum=int(line.split('\t')[2])
 9 | dic={}
10 | while True:
11 | 	line=file.readline()
12 | 	if not line:
13 | 		break
14 | 	frag1,frag2,reads=line.rstrip().split('\t')[0:3]
15 | 	if frag1 not in dic:
16 | 		dic[frag1]=0
17 | 	dic[frag1] += float(reads)
18 | file.close()
19 | 
20 | for frag in dic:
21 | 	print(frag, str(dic[frag]),sep='\t')
22 | 


--------------------------------------------------------------------------------
/bin/bam_to_temp_HiC.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | 
 4 | my $bed = "-";
 5 | open(IN, $bed) || die("Error: Cannot open file $bed!\n");
 6 | while(my $line = <IN>){
 7 | 	chomp $line;
 8 | 	my $str1="+";
 9 | 	my $str2="+";
10 | 	my @a = split "\t", $line;
11 | 	if($a[0]&16){
12 | 		$str1="-";
13 | 	}
14 | 	if($a[0]&32){
15 | 		$str2="-";
16 | 	}
17 | 	if($a[5] eq "="){
18 | 		$a[5]=$a[1];
19 | 	}
20 | 	print join("\t",$a[1],$a[2],$str1,$a[5],$a[6],$str2)."\n";        
21 | }
22 | close(IN);
23 | exit;
24 | 
25 | 


--------------------------------------------------------------------------------
/bin/eHiC/sum_frag_reads_2.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | #file=open(sys.argv[1])
 6 | #line=file.readline()
 7 | #prev_frag=line.split('\t')[0]
 8 | #sum=int(line.split('\t')[2])
 9 | dic={}
10 | for line in sys.stdin:
11 | 	#line=file.readline()
12 | 	#if not line:
13 | 	#	break
14 | 	frag1,frag2,reads=line.rstrip().split('\t')[0:3]
15 | 	if frag1 not in dic:
16 | 		dic[frag1]=0
17 | 	dic[frag1] += float(reads)
18 | #file.close()
19 | 
20 | for frag in dic:
21 | 	print frag+"\t"+str(dic[frag])
22 | 


--------------------------------------------------------------------------------
/bin/Arima/bam_to_temp_HiC.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | 
 4 | my $bed = "-";
 5 | open(IN, $bed) || die("Error: Cannot open file $bed!\n");
 6 | while(my $line = <IN>){
 7 | 	chomp $line;
 8 | 	my $str1="+";
 9 | 	my $str2="+";
10 | 	my @a = split "\t", $line;
11 | 	if($a[0]&16){
12 | 		$str1="-";
13 | 	}
14 | 	if($a[0]&32){
15 | 		$str2="-";
16 | 	}
17 | 	if($a[5] eq "="){
18 | 		$a[5]=$a[1];
19 | 	}
20 | 	print join("\t",$a[1],$a[2],$str1,$a[5],$a[6],$str2)."\n";        
21 | }
22 | close(IN);
23 | exit;
24 | 
25 | 


--------------------------------------------------------------------------------
/bin/eHiC/remove_outlier_ELPU.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | dic={}
 6 | outlier=open(sys.argv[1])
 7 | for line in outlier.readlines():
 8 | 	frag=line.rstrip().split('\t')[3]
 9 | 	dic[frag]=""
10 | outlier.close()
11 | 
12 | file=open(sys.argv[2])
13 | while True:
14 | 	line=file.readline()
15 | 	if not line:
16 | 		break
17 | 	frag1,frag2=line.split('\t')[0:2]
18 | 	#frag1=frag1[:-1]
19 | 	#frag2=frag2[:-1]
20 | 	if frag1 not in dic and frag2 not in dic:
21 | 		print line.rstrip()
22 | file.close()	
23 | 


--------------------------------------------------------------------------------
/bin/DPNII/bam_to_temp_HiC_DPNII.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | 
 4 | my $bed = "-";
 5 | open(IN, $bed) || die("Error: Cannot open file $bed!\n");
 6 | while(my $line = <IN>){
 7 | 	chomp $line;
 8 | 	my $str1="+";
 9 | 	my $str2="+";
10 | 	my @a = split "\t", $line;
11 | 	if($a[0]&16){
12 | 		$str1="-";
13 | 	}
14 | 	if($a[0]&32){
15 | 		$str2="-";
16 | 	}
17 | 	if($a[5] eq "="){
18 | 		$a[5]=$a[1];
19 | 	}
20 | 	print join("\t",$a[1],$a[2],$str1,$a[5],$a[6],$str2)."\n";        
21 | }
22 | close(IN);
23 | exit;
24 | 
25 | 


--------------------------------------------------------------------------------
/bin/preprocess/bam_to_temp_HiC.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | 
 4 | my $bed = "-";
 5 | open(IN, $bed) || die("Error: Cannot open file $bed!\n");
 6 | while(my $line = <IN>){
 7 | 	chomp $line;
 8 | 	my $str1="+";
 9 | 	my $str2="+";
10 | 	my @a = split "\t", $line;
11 | 	if($a[0]&16){
12 | 		$str1="-";
13 | 	}
14 | 	if($a[0]&32){
15 | 		$str2="-";
16 | 	}
17 | 	if($a[5] eq "="){
18 | 		$a[5]=$a[1];
19 | 	}
20 | 	print join("\t",$a[1],$a[2],$str1,$a[5],$a[6],$str2)."\n";        
21 | }
22 | close(IN);
23 | exit;
24 | 
25 | 


--------------------------------------------------------------------------------
/bin/bam_to_temp.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | 
 4 | my $bed = "-";
 5 | open(IN, $bed) || die("Error: Cannot open file $bed!\n");
 6 | while(my $line = <IN>){
 7 | 	chomp $line;
 8 | 	my $str1="+";
 9 | 	my $str2="+";
10 | 	my @a = split "\t", $line;
11 | 	if($a[0]&16){
12 | 		$str1="-";
13 | 	}
14 | 	if($a[0]&32){
15 | 		$str2="-";
16 | 	}
17 | 	if($a[5] eq "="){
18 | 		$a[5]=$a[1];
19 | 	}
20 | 	print join("\t",$a[1],$a[2],$str1,$a[5],$a[6],$str2,(substr $a[10],5), (substr $a[11],5))."\n";        
21 | }
22 | close(IN);
23 | exit;
24 | 
25 | 


--------------------------------------------------------------------------------
/bin/eHiC-QC/bam_to_temp.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | 
 4 | my $bed = "-";
 5 | open(IN, $bed) || die("Error: Cannot open file $bed!\n");
 6 | while(my $line = <IN>){
 7 | 	chomp $line;
 8 | 	my $str1="+";
 9 | 	my $str2="+";
10 | 	my @a = split "\t", $line;
11 | 	if($a[0]&16){
12 | 		$str1="-";
13 | 	}
14 | 	if($a[0]&32){
15 | 		$str2="-";
16 | 	}
17 | 	if($a[5] eq "="){
18 | 		$a[5]=$a[1];
19 | 	}
20 | 	print join("\t",$a[1],$a[2],$str1,$a[5],$a[6],$str2,(substr $a[10],5), (substr $a[11],5))."\n";        
21 | }
22 | close(IN);
23 | exit;
24 | 
25 | 


--------------------------------------------------------------------------------
/bin/preprocess/bam_to_temp.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | 
 4 | my $bed = "-";
 5 | open(IN, $bed) || die("Error: Cannot open file $bed!\n");
 6 | while(my $line = <IN>){
 7 | 	chomp $line;
 8 | 	my $str1="+";
 9 | 	my $str2="+";
10 | 	my @a = split "\t", $line;
11 | 	if($a[0]&16){
12 | 		$str1="-";
13 | 	}
14 | 	if($a[0]&32){
15 | 		$str2="-";
16 | 	}
17 | 	if($a[5] eq "="){
18 | 		$a[5]=$a[1];
19 | 	}
20 | 	print join("\t",$a[1],$a[2],$str1,$a[5],$a[6],$str2,(substr $a[10],5), (substr $a[11],5))."\n";        
21 | }
22 | close(IN);
23 | exit;
24 | 
25 | 


--------------------------------------------------------------------------------
/bin/Arima/remove.blacklist.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | """
 4 | my usage= "remove.blacklist.py <blacklist.bed> <end_loop>" 
 5 | """
 6 | 
 7 | import sys
 8 | 
 9 | bed=set()
10 | 
11 | with open(sys.argv[1],'r') as f:
12 | 
13 | 	for line in f:
14 | 		
15 | 		chr, beg, end, id = line.rstrip().split('\t')[0:4]
16 | 
17 | 		bed.add(id)
18 | f.close()
19 | 
20 | 
21 | 
22 | 
23 | for line in sys.stdin:
24 | 
25 | 	a1, a2 = line.rstrip().split('\t')[0:2]
26 | 
27 | 	if a1 not in bed and a2 not in bed and a1 != a2:
28 |             print(line.rstrip())
29 | 
30 | 
31 | f.close()
32 | 


--------------------------------------------------------------------------------
/bin/microC/remove.blacklist.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | """
 4 | my usage= "remove.blacklist.py <blacklist.bed> <end_loop>" 
 5 | """
 6 | 
 7 | import sys
 8 | 
 9 | bed=set()
10 | 
11 | with open(sys.argv[1],'r') as f:
12 | 
13 | 	for line in f:
14 | 		
15 | 		chr, beg, end, id = line.rstrip().split('\t')[0:4]
16 | 
17 | 		bed.add(id)
18 | f.close()
19 | 
20 | 
21 | 
22 | 
23 | for line in sys.stdin:
24 | 
25 | 	a1, a2 = line.rstrip().split('\t')[0:2]
26 | 
27 | 	if a1 not in bed and a2 not in bed and a1 != a2:
28 | 
29 | 		print(line.rstrip())
30 | 
31 | 
32 | f.close()
33 | 


--------------------------------------------------------------------------------
/documents/lib/remove.blacklist.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | """
 4 | my usage= "remove.blacklist.py <blacklist.bed> <end_loop>" 
 5 | """
 6 | 
 7 | import sys
 8 | 
 9 | bed=set()
10 | 
11 | with open(sys.argv[1],'r') as f:
12 | 
13 | 	for line in f:
14 | 		
15 | 		chr, beg, end, id = line.rstrip().split('\t')[0:4]
16 | 
17 | 		bed.add(id)
18 | f.close()
19 | 
20 | 
21 | 
22 | 
23 | for line in sys.stdin:
24 | 
25 | 	a1, a2 = line.rstrip().split('\t')[0:2]
26 | 
27 | 	if a1 not in bed and a2 not in bed and a1 != a2:
28 | 
29 | 		print line.rstrip()
30 | 
31 | 
32 | f.close()
33 | 


--------------------------------------------------------------------------------
/bin/Arima/integrated.r:
--------------------------------------------------------------------------------
 1 | #!/bin/env Rscript
 2 | 
 3 | args = commandArgs(trailingOnly=TRUE)
 4 | 
 5 | full=args[1]
 6 | 
 7 | data=read.table(full,sep='\t',stringsAsFactors=F)
 8 | df=read.table('dist.len.stat',sep='\t',stringsAsFactors=F)
 9 | 
10 | rownames(df)=paste(df[,1],df[,2],df[,3],sep=':')
11 | df$read=df[,4]*df[,5]
12 | rownames(data)=paste(data[,1],data[,2],data[,3],sep=':')
13 | 
14 | data$read=0
15 | data[rownames(df),'read']=df$read
16 | data[,5]=data$read/data[,4]
17 | write.table(data[,1:5],'integrated.dist.len.stat',sep='\t',quote=F,row.names=F,col.names=F)
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/bin/DPNII/integrated.r:
--------------------------------------------------------------------------------
 1 | #!/bin/env Rscript
 2 | 
 3 | args = commandArgs(trailingOnly=TRUE)
 4 | 
 5 | full=args[1]
 6 | 
 7 | data=read.table(full,sep='\t',stringsAsFactors=F)
 8 | df=read.table('dist.len.stat',sep='\t',stringsAsFactors=F)
 9 | 
10 | rownames(df)=paste(df[,1],df[,2],df[,3],sep=':')
11 | df$read=df[,4]*df[,5]
12 | rownames(data)=paste(data[,1],data[,2],data[,3],sep=':')
13 | 
14 | data$read=0
15 | data[rownames(df),'read']=df$read
16 | data[,5]=data$read/data[,4]
17 | write.table(data[,1:5],'integrated.dist.len.stat',sep='\t',quote=F,row.names=F,col.names=F)
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/bin/DPNII/remove.blacklist_DPNII.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | """
 4 | my usage= "remove.blacklist.py <blacklist.bed> <end_loop>" 
 5 | """
 6 | 
 7 | import sys
 8 | 
 9 | bed=set()
10 | 
11 | with open(sys.argv[1],'r') as f:
12 | 
13 | 	for line in f:
14 | 		
15 | 		chr, beg, end, id = line.rstrip().split('\t')[0:4]
16 | 
17 | 		bed.add(id)
18 | f.close()
19 | 
20 | 
21 | 
22 | 
23 | for line in sys.stdin:
24 | 
25 | 	a1, a2 = line.rstrip().split('\t')[0:2]
26 | 
27 | 	if a1 not in bed and a2 not in bed and a1 != a2:
28 |             print(line.rstrip())
29 | 
30 | 
31 | f.close()
32 | 


--------------------------------------------------------------------------------
/bin/generateReference_lib/remove.blacklist.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | """
 4 | my usage= "remove.blacklist.py <blacklist.bed> <end_loop>" 
 5 | """
 6 | 
 7 | import sys
 8 | 
 9 | bed=set()
10 | 
11 | with open(sys.argv[1],'r') as f:
12 | 
13 | 	for line in f:
14 | 		
15 | 		chr, beg, end, id = line.rstrip().split('\t')[0:4]
16 | 
17 | 		bed.add(id)
18 | f.close()
19 | 
20 | 
21 | 
22 | 
23 | for line in sys.stdin:
24 | 
25 | 	a1, a2 = line.rstrip().split('\t')[0:2]
26 | 
27 | 	if a1 not in bed and a2 not in bed and a1 != a2:
28 | 
29 | 		print(line.rstrip())
30 | 
31 | 
32 | f.close()
33 | 


--------------------------------------------------------------------------------
/bin/reformat_fastq.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | start=int(sys.argv[1])-1
 6 | length=int(sys.argv[2])
 7 | 
 8 | i=1
 9 | for line in sys.stdin:
10 | 	if i==1:
11 | 			id,barcode=line.rstrip().split(' ')
12 | 			barcode=barcode.rstrip().split(':')
13 | 			print id+':'+barcode[-1][0:6]
14 | 	if i==2:
15 | 			line='AAGCTT'+line[start:(start+length)]
16 | 			print line.rstrip()
17 | 	if i==3:
18 |                 print line.rstrip()
19 |         if i==4:
20 | 			line='JJJJJJ'+line[start:(start+length)]
21 | 			print line.rstrip()
22 | 	i=i+1
23 | 	if i>4:
24 | 		i=1
25 | 
26 | 


--------------------------------------------------------------------------------
/bin/eHiC-QC/reform_end_id.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | def reform(id):
 6 | 	if id[-1]=="+":
 7 | 		id=int(id[0:-1])*2-1
 8 | 	elif id[-1]=="-":
 9 | 		id=int(id[0:-1])*2
10 | 	else:
11 | 		return -1
12 | 	return str(id)
13 | 
14 | #file=open(sys.argv[1])
15 | #while True:
16 | 	#line=file.readline()
17 | 	#if not line:
18 | 	#	break
19 | for line in sys.stdin:
20 | 	end1,end2=line.rstrip().split('\t')
21 | 	end1=end1.split('_')[1]
22 | 	end2=end2.split('_')[1]
23 | 	end1="frag_"+reform(end1)
24 | 	end2="frag_"+reform(end2)
25 | 	print end1+"\t"+end2+"\t1"
26 | #file.close()
27 | 


--------------------------------------------------------------------------------
/bin/eHiC-QC/end_id_to_original.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | def reform(id):
 6 | 	id=int(id.split('_')[1])
 7 |         if id%2==0:
 8 |                 id="frag_"+str(id/2)+"-"
 9 | 	else:
10 | 		id="frag_"+str((id+1)/2)+"+"
11 | 	return id
12 | 
13 | #file=open(sys.argv[1])
14 | #while True:
15 | #	line=file.readline()
16 | #	if not line:
17 | #		break
18 | for line in sys.stdin:
19 | 	cols=line.rstrip().split('\t')
20 | 	end1,end2=cols[0],cols[1]
21 | 	end1=reform(end1)
22 | 	end2=reform(end2)
23 | 	print end1+"\t"+end2+"\t"+"\t".join(cols[2:])
24 | #file.close()
25 |         
26 | 


--------------------------------------------------------------------------------
/bin/eHiC-QC/reformat_fastq.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | start=int(sys.argv[1])-1
 6 | length=int(sys.argv[2])
 7 | 
 8 | i=1
 9 | for line in sys.stdin:
10 | 	if i==1:
11 | 			id,barcode=line.rstrip().split(' ')
12 | 			barcode=barcode.rstrip().split(':')
13 | 			print id+':'+barcode[-1][0:6]
14 | 	if i==2:
15 | 			line='AAGCTT'+line[start:(start+length)]
16 | 			print line.rstrip()
17 | 	if i==3:
18 |                 print line.rstrip()
19 |         if i==4:
20 | 			line='JJJJJJ'+line[start:(start+length)]
21 | 			print line.rstrip()
22 | 	i=i+1
23 | 	if i>4:
24 | 		i=1
25 | 
26 | 


--------------------------------------------------------------------------------
/bin/preprocess/reformat_fastq.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | start=int(sys.argv[1])-1
 6 | length=int(sys.argv[2])
 7 | 
 8 | i=1
 9 | for line in sys.stdin:
10 | 	if i==1:
11 | #			id,barcode=line.rstrip().split(' ')
12 | #			barcode=barcode.rstrip().split(':')
13 | #			print id+':'+barcode[-1][0:6]
14 | 			print line.rstrip()
15 | 	if i==2:
16 | 			line=line[start:(start+length)]
17 | 			print line.rstrip()
18 | 	if i==3:
19 |                 print line.rstrip()
20 |         if i==4:
21 | 			line=line[start:(start+length)]
22 | 			print line.rstrip()
23 | 	i=i+1
24 | 	if i>4:
25 | 		i=1
26 | 
27 | 


--------------------------------------------------------------------------------
/bin/microC/integrated.r:
--------------------------------------------------------------------------------
 1 | #!/bin/env Rscript
 2 | 
 3 | args = commandArgs(trailingOnly=TRUE)
 4 | 
 5 | full=args[1]
 6 | 
 7 | data=read.table(full,sep='\t',stringsAsFactors=F,row.names=1)
 8 | df=read.table('dist.stat',sep='\t',stringsAsFactors=F,row.names=1)
 9 | 
10 | #rownames(df)=paste(df[,1],df[,2],df[,3],sep=':')
11 | #df$read=df[,4]*df[,5]
12 | #rownames(data)=paste(data[,1],data[,2],data[,3],sep=':')
13 | #colnames(df)='read'
14 | df$read=df[,1]*df[,2]
15 | 
16 | data$read=0
17 | data[rownames(df),'read']=df$read
18 | data$avg=data$read/data[,1]
19 | 
20 | write.table(data[,c(1,5)],'integrated.dist.len.stat',sep='\t',quote=F,col.names=F)
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/bin/eHiC/split_list_by_group.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my ($group_file, $list_file) = @ARGV;
 6 | 
 7 | open(IN, $group_file) || die("Error: Cannot open $group_file!\n");
 8 | my $i=0;
 9 | while(my $line = <IN>){
10 | 	$i = $i+1;
11 |         chomp $line;
12 |         my ($group, $lambda, $fraction) = split "\t", $line;
13 |         my $low_lim = $lambda * (1 - $fraction);
14 |         my $high_lim = $lambda * (1 + $fraction);
15 |         `cat $list_file | cut -f3-4 | awk '{if(\$2>$low_lim && \$2<$high_lim)print \$0}' > data_list.group.$group &` ;
16 | 	if($i==5){
17 | 		sleep 600;
18 | 		$i = 0;
19 | 	}
20 |         #print "Is speed ok now?[enter to run more]";
21 |         #<stdin>;
22 | }
23 | close(IN);
24 | exit;
25 | 


--------------------------------------------------------------------------------
/bin/eHiC/ends_count_to_frag_count.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | dic={}
 6 | file=open(sys.argv[1])
 7 | while True:
 8 | 	line=file.readline()
 9 | 	if not line:
10 | 		break
11 | 	frag1,frag2,obs,expt=line.rstrip().split('\t')
12 | 	frag1=frag1[0:-1]
13 | 	frag2=int(frag2[0:-1].split('_')[1])
14 | 	if frag1 not in dic:
15 | 		for x in dic:
16 | 			for y in sorted(dic[x]):
17 | 				print x+"\tfrag_"+str(y)+"\t"+str(dic[x][y][0])+"\t"+str(dic[x][y][1])
18 | 		dic={}
19 | 		dic[frag1]={}
20 | 		dic[frag1][frag2]=[int(obs),float(expt)]
21 | 	else:
22 | 		if frag2 in dic[frag1]:
23 | 			dic[frag1][frag2][0] += int(obs)
24 | 			dic[frag1][frag2][1] += float(expt)	
25 | 		else:
26 | 			dic[frag1][frag2]=[int(obs),float(expt)]
27 | file.close()
28 | 


--------------------------------------------------------------------------------
/bin/microC/get_dist.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | 
 6 | """ 
 7 | This code is going to get the distance between anchor pairs
 8 | 
 9 | """
10 | 
11 | bed={}
12 | 
13 | with open(sys.argv[1], 'r') as f:
14 | 	
15 | 	for line in f:
16 | 
17 | 		chr, beg, end , anchor = line.rstrip().split('\t')[0:4]
18 | 
19 | 		bed[anchor]=map(int, [beg,end])
20 | 
21 | f.close()
22 | 
23 | for line in sys.stdin:
24 | 
25 | 	a1, a2 = line.rstrip().split('\t')[0:2]
26 | 
27 | 	id1, id2 = map(lambda x:int(x.split('_')[1]),[a1, a2])
28 | 
29 | 	if id1 > id2:
30 | 
31 | 		dist = bed[a1][0] - bed[a2][1] - 1
32 | 
33 | 	else:
34 | 
35 | 		dist = bed[a2][0] - bed[a1][1] - 1
36 | 
37 | 	print(line.rstrip() + '\t' + str(dist))
38 | 
39 | 
40 | 
41 | 
42 | 	
43 | 
44 | 


--------------------------------------------------------------------------------
/bin/Arima/get_dist.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | 
 6 | """ 
 7 | This code is going to get the distance between anchor pairs
 8 | 
 9 | """
10 | 
11 | bed={}
12 | 
13 | with open(sys.argv[1], 'r') as f:
14 | 	
15 | 	for line in f:
16 | 
17 | 		chr, beg, end , anchor = line.rstrip().split('\t')[0:4]
18 | 
19 | 		bed[anchor]=map(int, [beg,end])
20 | 
21 | f.close()
22 | 
23 | for line in sys.stdin:
24 | 
25 | 	a1, a2 = line.rstrip().split('\t')[0:2]
26 | 
27 | 	id1, id2 = map(lambda x:int(x.split('_')[1]),[a1, a2])
28 | 
29 | 	if id1 > id2:
30 | 
31 | 		dist = bed[a1][0] - bed[a2][1] - 1
32 | 
33 | 	else:
34 | 
35 | 		dist = bed[a2][0] - bed[a1][1] - 1
36 | 
37 | 	print("\t", line.rstrip(), '\t', str(dist))
38 | 
39 | 
40 | 
41 | 
42 | 	
43 | 
44 | 


--------------------------------------------------------------------------------
/bin/DPNII/get_dist_DPNII.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | 
 6 | """ 
 7 | This code is going to get the distance between anchor pairs
 8 | 
 9 | """
10 | 
11 | bed={}
12 | 
13 | with open(sys.argv[1], 'r') as f:
14 | 	
15 | 	for line in f:
16 | 
17 | 		chr, beg, end , anchor = line.rstrip().split('\t')[0:4]
18 | 
19 | 		bed[anchor]=map(int, [beg,end])
20 | 
21 | f.close()
22 | 
23 | for line in sys.stdin:
24 | 
25 | 	a1, a2 = line.rstrip().split('\t')[0:2]
26 | 
27 | 	id1, id2 = map(lambda x:int(x.split('_')[1]),[a1, a2])
28 | 
29 | 	if id1 > id2:
30 | 
31 | 		dist = bed[a1][0] - bed[a2][1] - 1
32 | 
33 | 	else:
34 | 
35 | 		dist = bed[a2][0] - bed[a1][1] - 1
36 | 
37 | 	print("\t", line.rstrip(), '\t', str(dist))
38 | 
39 | 
40 | 
41 | 
42 | 	
43 | 
44 | 


--------------------------------------------------------------------------------
/bin/Arima/add.vis.to.cis.2M.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | my $usage = "Usage: ./add.vis.to.2M.loop.pl <cis_loop.after.dist.len> <visibility.list> \n";
 4 | my ($loop, $vis) = @ARGV;
 5 | if(not defined $loop){
 6 |         die($usage);
 7 | }
 8 | my $vis_lis;
 9 | open(IN, $vis);
10 | while(my $line= <IN>){
11 |         chomp $line;
12 |         my ($frag,$vis_value)= split "\t", $line;
13 | 		$vis_lis->{$frag}=$vis_value;
14 | }
15 | close(IN);
16 | open(IN, $loop);
17 | while(my $line= <IN>){
18 |         chomp $line;
19 |         my ($frag1,$frag2,$obs,$expect)= split "\t", $line;
20 | 	my $vis1=$vis_lis->{$frag1};
21 | 	my $vis2=$vis_lis->{$frag2};
22 | 	my $new_expt=$expect*$vis1*$vis2;
23 | 	print join("\t",$frag1,$frag2,$obs,$new_expt)."\n";
24 | }
25 | close(IN);
26 | exit;
27 | 


--------------------------------------------------------------------------------
/bin/microC/add.vis.to.cis.2M.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | my $usage = "Usage: ./add.vis.to.2M.loop.pl <cis_loop.after.dist.len> <visibility.list> \n";
 4 | my ($loop,$vis) = @ARGV;
 5 | #if(not defined $loop){
 6 |  #       die($usage);
 7 | #}
 8 | my $vis_lis;
 9 | open(IN, $vis);
10 | while(my $line= <IN>){
11 |         chomp $line;
12 |         my ($frag,$vis_value)= split "\t", $line;
13 | 		$vis_lis->{$frag}=$vis_value;
14 | }
15 | close(IN);
16 | open(IN, $loop);
17 | while(my $line= <IN>){
18 |         chomp $line;
19 |         my ($frag1,$frag2,$obs,$expect)= split "\t", $line;
20 | 	my $vis1=$vis_lis->{$frag1};
21 | 	my $vis2=$vis_lis->{$frag2};
22 | 	my $new_expt=$expect*$vis1*$vis2;
23 | 	print join("\t",$frag1,$frag2,$obs,$new_expt)."\n";
24 | }
25 | close(IN);
26 | exit;
27 | 


--------------------------------------------------------------------------------
/bin/DPNII/add.vis.to.cis.2M_DPNII.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | my $usage = "Usage: ./add.vis.to.2M.loop.pl <cis_loop.after.dist.len> <visibility.list> \n";
 4 | my ($loop, $vis) = @ARGV;
 5 | if(not defined $loop){
 6 |         die($usage);
 7 | }
 8 | my $vis_lis;
 9 | open(IN, $vis);
10 | while(my $line= <IN>){
11 |         chomp $line;
12 |         my ($frag,$vis_value)= split "\t", $line;
13 | 		$vis_lis->{$frag}=$vis_value;
14 | }
15 | close(IN);
16 | open(IN, $loop);
17 | while(my $line= <IN>){
18 |         chomp $line;
19 |         my ($frag1,$frag2,$obs,$expect)= split "\t", $line;
20 | 	my $vis1=$vis_lis->{$frag1};
21 | 	my $vis2=$vis_lis->{$frag2};
22 | 	my $new_expt=$expect*$vis1*$vis2;
23 | 	print join("\t",$frag1,$frag2,$obs,$new_expt)."\n";
24 | }
25 | close(IN);
26 | exit;
27 | 


--------------------------------------------------------------------------------
/bin/eHiC/remove_ends_without_HD.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | #if len(sys.argv)!=3:
 6 | #	print "Usage: ./remove_ends_without_HD.py <frag_GC_map> <frag_loop>"
 7 | #	sys.exit()
 8 |  
 9 | dic={}
10 | mappability={}
11 | GC_file=open(sys.argv[1])
12 | for line in GC_file.readlines():
13 | 	end,GC,map=line.rstrip().split('\t')
14 | 	if float(GC)==0:
15 | 		dic[end]=""
16 | 	if int(map)==0:
17 | 		dic[end]=""
18 | GC_file.close()
19 | 
20 | #loop_file=open(sys.argv[2])
21 | #out=open(sys.argv[3],"w+")
22 | #while True:
23 | for line in sys.stdin:
24 | 	#line=loop_file.readline()
25 | 	#if not line:
26 | 	#	break
27 | 	cols=line.rstrip().split('\t')
28 | 	frag1=cols[0]
29 | 	frag2=cols[1]
30 | 	if frag1 not in dic and frag2 not in dic:
31 | 		print line.rstrip()
32 | #loop_file.close()
33 | 


--------------------------------------------------------------------------------
/bin/eHiC/get_group_range.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | my $usage = 	"./get_group_range.pl <bed> <col> <group_number>\n".
 4 | 		"\tThis program take numeric column <col> of <bed> and print out ranges after breaking into <group_number> of groups after sorting ascend\n";
 5 | 
 6 | my ($bed, $col, $n) = @ARGV;
 7 | if(not defined $n){
 8 | 	die($usage);
 9 | }
10 | 
11 | my @len = ();
12 | open(IN, $bed);
13 | while(my $line = <IN>){
14 | 	chomp $line;
15 | 	my @x = split "\t", $line;
16 | 	push @len, $x[$col - 1];
17 | }
18 | close(IN);
19 | my @sort_len = sort {$a <=> $b} @len;
20 | unshift @sort_len, -1e10;
21 | my $total = $#sort_len;
22 | my @len_cut;
23 | for(my $i=1; $i<=$n; $i++){
24 | 	 print join("\t", $i, $sort_len[int(($i-1) * $total / $n)], $sort_len[int($i * $total / $n)])."\n";
25 | }
26 | 
27 | exit;
28 | 


--------------------------------------------------------------------------------
/documents/lib/get_group_range.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | my $usage = 	"./get_group_range.pl <bed> <col> <group_number>\n".
 4 | 		"\tThis program take numeric column <col> of <bed> and print out ranges after breaking into <group_number> of groups after sorting ascend\n";
 5 | 
 6 | my ($bed, $col, $n) = @ARGV;
 7 | if(not defined $n){
 8 | 	die($usage);
 9 | }
10 | 
11 | my @len = ();
12 | open(IN, $bed);
13 | while(my $line = <IN>){
14 | 	chomp $line;
15 | 	my @x = split "\t", $line;
16 | 	push @len, $x[$col - 1];
17 | }
18 | close(IN);
19 | my @sort_len = sort {$a <=> $b} @len;
20 | unshift @sort_len, -1e10;
21 | my $total = $#sort_len;
22 | my @len_cut;
23 | for(my $i=1; $i<=$n; $i++){
24 | 	 print join("\t", $i, $sort_len[int(($i-1) * $total / $n)], $sort_len[int($i * $total / $n)])."\n";
25 | }
26 | 
27 | exit;
28 | 


--------------------------------------------------------------------------------
/bin/generateReference_lib/get_group_range.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | my $usage = 	"./get_group_range.pl <bed> <col> <group_number>\n".
 4 | 		"\tThis program take numeric column <col> of <bed> and print out ranges after breaking into <group_number> of groups after sorting ascend\n";
 5 | 
 6 | my ($bed, $col, $n) = @ARGV;
 7 | if(not defined $n){
 8 | 	die($usage);
 9 | }
10 | 
11 | my @len = ();
12 | open(IN, $bed);
13 | while(my $line = <IN>){
14 | 	chomp $line;
15 | 	my @x = split "\t", $line;
16 | 	push @len, $x[$col - 1];
17 | }
18 | close(IN);
19 | my @sort_len = sort {$a <=> $b} @len;
20 | unshift @sort_len, -1e10;
21 | my $total = $#sort_len;
22 | my @len_cut;
23 | for(my $i=1; $i<=$n; $i++){
24 | 	 print join("\t", $i, $sort_len[int(($i-1) * $total / $n)], $sort_len[int($i * $total / $n)])."\n";
25 | }
26 | 
27 | exit;
28 | 


--------------------------------------------------------------------------------
/bin/microC/split_chromo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | import os
 5 | 
 6 | dic={}
 7 | 
 8 | dir_name='split'
 9 | 
10 | os.system('mkdir '+dir_name)
11 | 
12 | 
13 | 
14 | with open(sys.argv[1],'r') as f:
15 | 
16 | 	for line in f:
17 | 
18 | 		chr, beg, end, id = line.rstrip().split('\t')[0:4]
19 | 		dic[id] = chr
20 | 
21 | f.close()
22 | 
23 | 
24 | out=open(os.path.join(dir_name, 'anchor_2_anchor.loop.chr1'),'w')
25 | 
26 | prev_chr='chr1'
27 | 
28 | for line in sys.stdin:
29 | 
30 | 	a1, a2, obs, exp = line.rstrip().split('\t')[0:4]
31 | 		
32 | 	chr = dic[a1]
33 | 		
34 | 	if chr!=prev_chr:
35 | 
36 | 		out.close()
37 | 
38 | 		out=open(os.path.join(dir_name,'anchor_2_anchor.loop.'+chr),'w')
39 | 
40 | 	out.write(line)
41 | 	prev_chr = chr
42 | 	
43 | 
44 | out.close()
45 | 
46 | f.close()
47 | 
48 | 
49 | 
50 | 		
51 | 		
52 | 


--------------------------------------------------------------------------------
/bin/Arima/get_corr_factor_by_len.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my ($group_avg_file) = @ARGV;
 5 | if(not defined $group_avg_file){
 6 | 	die("Usage:./get_corr_factor_by_len.pl <group avg file>\n");
 7 | }
 8 | 
 9 | my $group_avg;
10 | my $total_count;
11 | my $total_reads;
12 | open(IN, $group_avg_file);
13 | while(my $line = <IN>){
14 |         chomp $line;
15 | 	my ($gc1, $gc2, $count, $avg) = split "\t", $line;
16 | 	$group_avg->{$gc1}->{$gc2} = $avg;
17 | 	$total_count += $count;
18 | 	$total_reads += $avg * $count;
19 | }
20 | close(IN);
21 | 
22 | my $total_avg = $total_reads / $total_count;
23 | for(my $gc1 = 1; $gc1 <= 20; $gc1 ++){
24 | 	for(my $gc2 = 1; $gc2 <= 20; $gc2 ++){
25 | 		my $correct = $group_avg->{$gc1}->{$gc2} / $total_avg;
26 | 		print join("\t", $gc1, $gc2, $correct)."\n";
27 | 	}
28 | }
29 | 
30 | exit;
31 | 
32 | 


--------------------------------------------------------------------------------
/bin/eHiC/get_corr_factor_by_GC.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my ($group_avg_file) = @ARGV;
 5 | if(not defined $group_avg_file){
 6 | 	die("Usage:./get_corr_factor_by_GC_length.pl <group avg file>\n");
 7 | }
 8 | 
 9 | my $group_avg;
10 | my $total_count;
11 | my $total_reads;
12 | open(IN, $group_avg_file);
13 | while(my $line = <IN>){
14 |         chomp $line;
15 | 	my ($gc1, $gc2, $count, $avg) = split "\t", $line;
16 | 	$group_avg->{$gc1}->{$gc2} = $avg;
17 | 	$total_count += $count;
18 | 	$total_reads += $avg * $count;
19 | }
20 | close(IN);
21 | 
22 | my $total_avg = $total_reads / $total_count;
23 | for(my $gc1 = 1; $gc1 <= 20; $gc1 ++){
24 | 	for(my $gc2 = 1; $gc2 <= 20; $gc2 ++){
25 | 		my $correct = $group_avg->{$gc1}->{$gc2} / $total_avg;
26 | 		print join("\t", $gc1, $gc2, $correct)."\n";
27 | 	}
28 | }
29 | 
30 | exit;
31 | 
32 | 


--------------------------------------------------------------------------------
/bin/DPNII/get_corr_factor_by_len_DPNII.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my ($group_avg_file) = @ARGV;
 5 | if(not defined $group_avg_file){
 6 | 	die("Usage:./get_corr_factor_by_len.pl <group avg file>\n");
 7 | }
 8 | 
 9 | my $group_avg;
10 | my $total_count;
11 | my $total_reads;
12 | open(IN, $group_avg_file);
13 | while(my $line = <IN>){
14 |         chomp $line;
15 | 	my ($gc1, $gc2, $count, $avg) = split "\t", $line;
16 | 	$group_avg->{$gc1}->{$gc2} = $avg;
17 | 	$total_count += $count;
18 | 	$total_reads += $avg * $count;
19 | }
20 | close(IN);
21 | 
22 | my $total_avg = $total_reads / $total_count;
23 | for(my $gc1 = 1; $gc1 <= 20; $gc1 ++){
24 | 	for(my $gc2 = 1; $gc2 <= 20; $gc2 ++){
25 | 		my $correct = $group_avg->{$gc1}->{$gc2} / $total_avg;
26 | 		print join("\t", $gc1, $gc2, $correct)."\n";
27 | 	}
28 | }
29 | 
30 | exit;
31 | 
32 | 


--------------------------------------------------------------------------------
/bin/HindIII/get_corr_factor_by_GC.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my ($group_avg_file) = @ARGV;
 5 | if(not defined $group_avg_file){
 6 | 	die("Usage:./get_corr_factor_by_GC_length.pl <group avg file>\n");
 7 | }
 8 | 
 9 | my $group_avg;
10 | my $total_count;
11 | my $total_reads;
12 | open(IN, $group_avg_file);
13 | while(my $line = <IN>){
14 |         chomp $line;
15 | 	my ($gc1, $gc2, $count, $avg) = split "\t", $line;
16 | 	$group_avg->{$gc1}->{$gc2} = $avg;
17 | 	$total_count += $count;
18 | 	$total_reads += $avg * $count;
19 | }
20 | close(IN);
21 | 
22 | my $total_avg = $total_reads / $total_count;
23 | for(my $gc1 = 1; $gc1 <= 20; $gc1 ++){
24 | 	for(my $gc2 = 1; $gc2 <= 20; $gc2 ++){
25 | 		my $correct = $group_avg->{$gc1}->{$gc2} / $total_avg;
26 | 		print join("\t", $gc1, $gc2, $correct)."\n";
27 | 	}
28 | }
29 | 
30 | exit;
31 | 
32 | 


--------------------------------------------------------------------------------
/documents/lib/get_aveg_frag_length.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | 
 4 | from __future__ import division
 5 | 
 6 | import sys
 7 | import os
 8 | 
 9 | 
10 | """ frag anchor bed """
11 | 
12 | dic={}
13 | 
14 | with open(sys.argv[1],'r') as f:
15 | 
16 | 	for line in f:
17 | 
18 | 		frag, anchor = line.rstrip().split('\t')
19 | 
20 | 		if anchor not in dic:
21 | 
22 | 			dic[anchor] = set()
23 | 
24 | 		dic[anchor].add(frag)
25 | 
26 | f.close()
27 | 
28 | 
29 | """DpnII anchor bed file """
30 | 
31 | with open(sys.argv[2],'r') as f:
32 | 
33 | 	for line in f:
34 | 
35 | 		chr, beg, end, anchor, length = line.rstrip().split('\t')
36 | 
37 | 		frag_count = len(dic[anchor])
38 | 
39 | 		avg = float(length)/frag_count
40 | 	
41 | 		
42 | 
43 | 
44 | 		print '\t'.join([chr, beg, end, anchor, length, str(avg)])
45 | 
46 | 
47 | f.close()
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 	
58 | 


--------------------------------------------------------------------------------
/bin/generateReference_lib/get_aveg_frag_length.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | 
 4 | from __future__ import division
 5 | 
 6 | import sys
 7 | import os
 8 | 
 9 | 
10 | """ frag anchor bed """
11 | 
12 | dic={}
13 | 
14 | with open(sys.argv[1],'r') as f:
15 | 
16 | 	for line in f:
17 | 
18 | 		frag, anchor = line.rstrip().split('\t')
19 | 
20 | 		if anchor not in dic:
21 | 
22 | 			dic[anchor] = set()
23 | 
24 | 		dic[anchor].add(frag)
25 | 
26 | f.close()
27 | 
28 | 
29 | """DpnII anchor bed file """
30 | 
31 | with open(sys.argv[2],'r') as f:
32 | 
33 | 	for line in f:
34 | 
35 | 		chr, beg, end, anchor, length = line.rstrip().split('\t')
36 | 
37 | 		frag_count = len(dic[anchor])
38 | 
39 | 		avg = float(length)/frag_count
40 | 	
41 | 		
42 | 
43 | 
44 | 		print('\t'.join([chr, beg, end, anchor, length, str(avg)]))
45 | 
46 | 
47 | f.close()
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 	
58 | 


--------------------------------------------------------------------------------
/bin/Arima/split_chromo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | import os
 5 | 
 6 | dic={}
 7 | 
 8 | dir_name='HiCorr_output'
 9 | 
10 | os.system('mkdir '+dir_name)
11 | 
12 | 
13 | 
14 | with open(sys.argv[1],'r') as f:
15 | 
16 |         for line in f:
17 | 
18 |                 chr, beg, end, id = line.rstrip().split('\t')[0:4]
19 |                 dic[id] = chr
20 | 
21 | f.close()
22 | 
23 | 
24 | out=open(os.path.join(dir_name, 'anchor_2_anchor.loop.chr1'),'w')
25 | 
26 | prev_chr='chr1'
27 | 
28 | for line in sys.stdin:
29 | 
30 |         a1, a2, obs, exp = line.rstrip().split('\t')[0:4]
31 | 
32 |         chr = dic[a1]
33 | 
34 |         if chr!=prev_chr:
35 | 
36 |                 out.close()
37 | 
38 |                 out=open(os.path.join(dir_name,'anchor_2_anchor.loop.'+chr),'w')
39 | 
40 |         out.write(line)
41 |         prev_chr = chr
42 | 
43 | 
44 | out.close()
45 | 
46 | f.close()
47 | 
48 | 


--------------------------------------------------------------------------------
/bin/DPNII/split_chromo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | import os
 5 | 
 6 | dic={}
 7 | 
 8 | dir_name='HiCorr_output'
 9 | 
10 | os.system('mkdir '+dir_name)
11 | 
12 | 
13 | 
14 | with open(sys.argv[1],'r') as f:
15 | 
16 |         for line in f:
17 | 
18 |                 chr, beg, end, id = line.rstrip().split('\t')[0:4]
19 |                 dic[id] = chr
20 | 
21 | f.close()
22 | 
23 | 
24 | out=open(os.path.join(dir_name, 'anchor_2_anchor.loop.chr1'),'w')
25 | 
26 | prev_chr='chr1'
27 | 
28 | for line in sys.stdin:
29 | 
30 |         a1, a2, obs, exp = line.rstrip().split('\t')[0:4]
31 | 
32 |         chr = dic[a1]
33 | 
34 |         if chr!=prev_chr:
35 | 
36 |                 out.close()
37 | 
38 |                 out=open(os.path.join(dir_name,'anchor_2_anchor.loop.'+chr),'w')
39 | 
40 |         out.write(line)
41 |         prev_chr = chr
42 | 
43 | 
44 | out.close()
45 | 
46 | f.close()
47 | 
48 | 


--------------------------------------------------------------------------------
/documents/lib/find_RE_sites.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = 	"Usage:./find_RE_sites.pl <genome chromSize> <chr_fa_dir> <cutting site>\n".
 6 | 		"\tThis program find <sequence> sites in <genome>\n".
 7 | 		"\tOutput is in 1-system.\n";
 8 | 
 9 | my ($chromSize, $fa_dir,$seq) = @ARGV;
10 | if(not defined $seq){
11 | 	die($usage);
12 | }
13 | 
14 | open(IN, $chromSize);
15 | my $sizeref;
16 | while(my $line = <IN>){
17 | 	my ($chr, $size) = split "\t", $line;
18 | 	if($chr =~ /_/ || $chr eq "chrM"){
19 | 		next;
20 | 	}
21 | 	$sizeref->{$chr} = $size;
22 | }
23 | close(IN);
24 | 
25 | foreach my $chr (sort keys %{$sizeref}){
26 | 	my $size = $sizeref->{$chr};
27 | 	my @pos = `./sequence_match.pl -c $fa_dir/$chr.fa $seq`;
28 | 	chomp @pos;
29 | 	my $len = length($seq);
30 | 	while(my $loc = shift @pos){
31 | 		print join("\t", $chr, $loc, $loc + $len - 1)."\n";
32 | 	}
33 | }
34 | exit;
35 | 


--------------------------------------------------------------------------------
/documents/lib/sites_to_frag.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | chromosize={}
 6 | 
 7 | with open(sys.argv[1],'r') as f:
 8 | 	for line in f:
 9 | 		chr,size=line.rstrip().split('\t')
10 | 		chromosize[chr]=size
11 | f.close()
12 | 
13 | prev=""
14 | i=1
15 | with open(sys.argv[2],'r') as f:
16 | 	for line in f:
17 | 		chr,beg,end=line.rstrip().split('\t')
18 | 		if prev=="":
19 | 			print chr+'\t'+'1'+'\t'+str(int(beg)+1)+'\t'+'frag_'+str(i)
20 | 			i+=1
21 | 		if prev==chr:
22 | 			print chr+'\t'+str(prev_end-1)+'\t'+str(int(beg)+1)+'\t'+'frag_'+str(i)			
23 | 			i+=1
24 | 		if prev!=chr and prev!="":
25 | 			print prev+'\t'+str(prev_end-1)+'\t'+str(chromosize[prev])+'\t'+'frag_'+str(i)
26 | 			i+=1
27 | 			print chr+'\t'+'1'+'\t'+str(int(beg)+1)+'\t'+'frag_'+str(i)
28 | 			i+=1
29 | 		prev=chr
30 | 		prev_end=int(end)
31 | 		
32 | print prev+'\t'+str(prev_end-1)+'\t'+str(chromosize[prev])+'\t'+'frag_'+str(i)
33 | 
34 | f.close()
35 | 


--------------------------------------------------------------------------------
/bin/generateReference_lib/sites_to_frag.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | chromosize={}
 6 | 
 7 | with open(sys.argv[1],'r') as f:
 8 | 	for line in f:
 9 | 		chr,size=line.rstrip().split('\t')
10 | 		chromosize[chr]=size
11 | f.close()
12 | 
13 | prev=""
14 | i=1
15 | with open(sys.argv[2],'r') as f:
16 | 	for line in f:
17 | 		chr,beg,end=line.rstrip().split('\t')
18 | 		if prev=="":
19 | 			print(chr+'\t'+'1'+'\t'+str(int(beg)+1)+'\t'+'frag_'+str(i))
20 | 			i+=1
21 | 		if prev==chr:
22 | 			print(chr+'\t'+str(prev_end-1)+'\t'+str(int(beg)+1)+'\t'+'frag_'+str(i))
23 | 			i+=1
24 | 		if prev!=chr and prev!="":
25 | 			print(prev+'\t'+str(prev_end-1)+'\t'+str(chromosize[prev])+'\t'+'frag_'+str(i))
26 | 			i+=1
27 | 			print(chr+'\t'+'1'+'\t'+str(int(beg)+1)+'\t'+'frag_'+str(i))
28 | 			i+=1
29 | 		prev=chr
30 | 		prev_end=int(end)
31 | 		
32 | print(prev+'\t'+str(prev_end-1)+'\t'+str(chromosize[prev])+'\t'+'frag_'+str(i))
33 | 
34 | f.close()
35 | 


--------------------------------------------------------------------------------
/documents/lib/sequence_match.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my $usage = 	"Usage: ./sequence_match.pl <option> <fasta> <query seq>\n".
 5 | 		"\tThis program searches <query seq> in <fasta> file and prints all locations that have a match.\n".
 6 | 		"\t<option>: -c matching is not case specific. Case specific is by default\n";
 7 | 
 8 | my $option;
 9 | if($ARGV[0] =~ /^-/){
10 | 	$option = shift @ARGV;
11 | }
12 | 
13 | my ($fa, $motif) = @ARGV;
14 | if(not defined $motif){
15 | 	die($usage);
16 | }
17 | 
18 | my $case = 1;
19 | if($option =~ /c/){
20 | 	$case = 0;
21 | }
22 | 
23 | open(IN, $fa);
24 | my @lines = <IN>;
25 | close(IN);
26 | 
27 | chomp @lines;
28 | my $seq_name = shift @lines;
29 | my $seq = join("", @lines);
30 | #print "Loading $seq_name finished!\n";
31 | 
32 | if(! $case){
33 | 	$seq =~ tr/a-z/A-Z/;
34 | 	$motif =~ tr/a-z/A-Z/;
35 | }
36 | 
37 | my $pos = index($seq, $motif) + 1;
38 | while($pos > 0){
39 | 	print $pos ."\n";
40 | 	$pos = index($seq, $motif, $pos) + 1;
41 | }
42 | exit;
43 | 


--------------------------------------------------------------------------------
/bin/generateReference_lib/sequence_match.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my $usage = 	"Usage: ./sequence_match.pl <option> <fasta> <query seq>\n".
 5 | 		"\tThis program searches <query seq> in <fasta> file and prints all locations that have a match.\n".
 6 | 		"\t<option>: -c matching is not case specific. Case specific is by default\n";
 7 | 
 8 | my $option;
 9 | if($ARGV[0] =~ /^-/){
10 | 	$option = shift @ARGV;
11 | }
12 | 
13 | my ($fa, $motif) = @ARGV;
14 | if(not defined $motif){
15 | 	die($usage);
16 | }
17 | 
18 | my $case = 1;
19 | if($option =~ /c/){
20 | 	$case = 0;
21 | }
22 | 
23 | open(IN, $fa);
24 | my @lines = <IN>;
25 | close(IN);
26 | 
27 | chomp @lines;
28 | my $seq_name = shift @lines;
29 | my $seq = join("", @lines);
30 | #print "Loading $seq_name finished!\n";
31 | 
32 | if(! $case){
33 | 	$seq =~ tr/a-z/A-Z/;
34 | 	$motif =~ tr/a-z/A-Z/;
35 | }
36 | 
37 | my $pos = index($seq, $motif) + 1;
38 | while($pos > 0){
39 | 	print $pos ."\n";
40 | 	$pos = index($seq, $motif, $pos) + 1;
41 | }
42 | exit;
43 | 


--------------------------------------------------------------------------------
/bin/Arima/pick.dist.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | my $usage = "Usage: pick.distance.of.loop.pl <bed> <anchor_to_anchor> \n";
 4 | my ($bed, $anchor) = @ARGV;
 5 | if(not defined $bed){
 6 |         die($usage);
 7 | }
 8 | my $beg_hash;
 9 | my $end_hash;
10 | open(IN, $bed);
11 | while(my $line= <IN>){
12 |         chomp $line;
13 |         my ($chr,$beg,$end,$id)= split "\t", $line;
14 |                 $beg_hash->{$id}=$beg;
15 |                 $end_hash->{$id}=$end;
16 | }
17 | close(IN);
18 | open(IN, $anchor);
19 | while(my $line= <IN>){
20 |         chomp $line;
21 |         my ($A1,$A2,@res)= split "\t", $line;
22 |         my @str1 = split /\_/, $A1;
23 |         my $a1=@str1[1];
24 |         my @str2 = split /\_/, $A2;
25 |         my $a2=@str2[1];
26 |         my $dist;
27 |         if($a1<$a2){
28 |                 $dist=$beg_hash->{$A2}-$end_hash->{$A1};
29 |         }else{
30 |                 $dist=$beg_hash->{$A1}-$end_hash->{$A2};
31 |         }
32 |         print join("\t",$line,$dist)."\n";
33 | }
34 | close(IN);
35 | exit;
36 | 
37 | 


--------------------------------------------------------------------------------
/bin/DPNII/pick.dist.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | my $usage = "Usage: pick.distance.of.loop.pl <bed> <anchor_to_anchor> \n";
 4 | my ($bed, $anchor) = @ARGV;
 5 | if(not defined $bed){
 6 |         die($usage);
 7 | }
 8 | my $beg_hash;
 9 | my $end_hash;
10 | open(IN, $bed);
11 | while(my $line= <IN>){
12 |         chomp $line;
13 |         my ($chr,$beg,$end,$id)= split "\t", $line;
14 |                 $beg_hash->{$id}=$beg;
15 |                 $end_hash->{$id}=$end;
16 | }
17 | close(IN);
18 | open(IN, $anchor);
19 | while(my $line= <IN>){
20 |         chomp $line;
21 |         my ($A1,$A2,@res)= split "\t", $line;
22 |         my @str1 = split /\_/, $A1;
23 |         my $a1=@str1[1];
24 |         my @str2 = split /\_/, $A2;
25 |         my $a2=@str2[1];
26 |         my $dist;
27 |         if($a1<$a2){
28 |                 $dist=$beg_hash->{$A2}-$end_hash->{$A1};
29 |         }else{
30 |                 $dist=$beg_hash->{$A1}-$end_hash->{$A2};
31 |         }
32 |         print join("\t",$line,$dist)."\n";
33 | }
34 | close(IN);
35 | exit;
36 | 
37 | 


--------------------------------------------------------------------------------
/bin/plot.heatmap.r:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | args = commandArgs(trailingOnly=TRUE)
 3 | 
 4 | make_symmetric <- function(input_matrix){
 5 | 	m=matrix(0,nrow(input_matrix),ncol(input_matrix))
 6 | 	for(i in 1:(nrow(input_matrix)-1)){
 7 | 		for(j in (i+1):nrow(input_matrix)){
 8 | 			m[i,j]=0.5*(input_matrix[i,j]+input_matrix[j,i])
 9 | 			m[j,i]=m[i,j]
10 | 		}			
11 | 	}
12 | 	return(m)
13 | }
14 | 
15 | file<-args[1]
16 | data<-as.matrix(read.table(file,stringsAsFactors=FALSE))
17 | diag(data)<-0
18 | data[which(data<0)]<-0
19 | n <- 20
20 | col <- rgb(1,(n-2):0/(n-1),(n-2):0/(n-1))
21 | png(paste(file,".png",sep=''))
22 | par(mar=c(0,0,0,0))
23 | if(max(data)<2){
24 | 	breaks <- seq(1.001,max(data),(max(data)-1.001)/19)
25 | }else{
26 | 	step <- (quantile(data, prob=0.98)-1)/18
27 | 	up <- quantile(data, prob=0.98)+0.011
28 | 	if(up<2){
29 | 		up <- 2
30 | 		step <- 0.999/18
31 | 	}
32 | 	breaks <- c(seq(1.001,up,step),max(data))
33 | }
34 | image(data[,ncol(data):1], zlim=c(1, 50), col=col, breaks=breaks,xaxt="n",yaxt="n",xlab="",ylab="")
35 | dev.off()
36 | 


--------------------------------------------------------------------------------
/documents/HiCorr_micro-C.md:
--------------------------------------------------------------------------------
 1 | # :point_down:  *HiCorr on micro-C*
 2 | - Download the code from this repository, "bin/microC/" <br/>
 3 | - Download the reference files for micro-C (mm10/hg19 genome build)
 4 | ```
 5 | wget --no-check-certificate https://hiview10.gene.cwru.edu/public/DeepLoop_ref/microC_HiCorr.tar.gz
 6 | # old path: http://hiview.case.edu/ssz20/tmp.HiCorr.ref/microC_HiCorr.tar.gz
 7 | tar -xvf microC_HiCorr.tar.gz
 8 | chmod 775 HiCorr/bin/microC/*
 9 | ```
10 | - Check the [preprocessing for micor-C data (mapping, fragments filter, outs are cis and trans 500bp fragment loops)](https://github.com/JinLabBioinfo/HiCorr/blob/master/documents/micro-C%20preprocessing.sh) <br/>
11 | - Run HiCorr on micor-C data:
12 | ```
13 | bash HiCorr_micro-C.sh microC_ref/ bin/microC/ <frag_loop.name.cis> <frag_loop.name.trans> <outputname> <hg19/mm10>
14 |    # specify the path of downloaded unzipped reference file and scripts
15 |    # input two fragment loop files genrated from preprocessing step
16 |    # specifiy outputname prefix
17 |    # specify genome build, the provided reference only include hg19 and mm10
18 | ```
19 | 


--------------------------------------------------------------------------------
/documents/HiCorr_insituHi-C.md:
--------------------------------------------------------------------------------
 1 | # :point_down:  *HiCorr on in-situ Hi-C or DPNII/Mbol enzyme Hi-C*
 2 | - Download the code from this repository, "bin/DPNII/" <br/>
 3 | - Download the reference files for DPNII (mm10/hg19/hg38 genome build)
 4 | ```
 5 | wget --no-check-certificate https://hiview10.gene.cwru.edu/public/DeepLoop_ref/ref/DPNII_HiCorr_ref.tar.gz
 6 | # old path: http://hiview.case.edu/ssz20/tmp.HiCorr.ref/ref/DPNII_HiCorr_ref.tar.gz
 7 | tar -xvf DPNII_HiCorr_ref.tar.gz
 8 | ```
 9 | - Check the [preprocessing for DPNII data (mapping, fragments filter, outs are cis and trans DPNII fragment contacts)](https://github.com/JinLabBioinfo/HiCorr/blob/master/documents/DPNII_preprocessing.sh) <br/>
10 | - Run HiCorr on DPNII Hi-C data:
11 | ```
12 | bash HiCorr_DPNIII.sh DPNII_HiCorr_ref/ bin/DPNII/ <frag_loop.name.cis> <frag_loop.name.trans> <outputname> <hg19/mm10/hg38>
13 |    # specify the path of downloaded unzipped reference file and scripts
14 |    # input two fragment loop files genrated from preprocessing step
15 |    # specifiy outputname prefix
16 |    # specify genome build, the provided reference only include hg19, hg38 and mm10
17 | ```
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/bin/plot.multiple.r:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | args = commandArgs(trailingOnly=TRUE)
 3 | 
 4 | outdirfile <- as.character(args[1])
 5 | layout.rowN <- as.numeric(args[2])
 6 | layout.colN <- as.numeric(args[3])
 7 | inputfilelis<- as.character(args[4:length(args)])
 8 | 
 9 | plot_heatmap <- function(file){
10 | 	data<-as.matrix(read.table(file,stringsAsFactors=FALSE))
11 | 	data[which(data<0)]<-0
12 | 	n <- 20
13 | 	col <- rgb(1,(n-2):0/(n-1),(n-2):0/(n-1))
14 | 	if(max(data)<2){
15 | 		breaks <- seq(1.001,max(data),(max(data)-1.001)/19)
16 | 	}else{
17 | 		step <- (quantile(data, prob=0.98)-1)/18
18 | 		up <- quantile(data, prob=0.98)+0.011
19 | 		if(up<2){
20 | 			up <- 2
21 | 			step <- 0.999/18
22 | 		}
23 | 		breaks <- c(seq(1.001,up,step),max(data))
24 | 	}
25 | 	image(data[,ncol(data):1], zlim=c(1, 50), col=col, breaks=breaks,xaxt="n",yaxt="n",xlab="",ylab="")
26 | }
27 | png(paste(outdirfile,".plot.png",sep=''),150*layout.colN,150*layout.rowN)
28 | #nf <- layout(matrix(c(1:6),2,3,byrow=TRUE), widths=c(rep(4,3)), heights=c(rep(4,2), TRUE))
29 | par(mar=c(1,1,1,1))
30 | par(mfrow=c(layout.rowN, layout.colN))
31 | for(inputfile in inputfilelis){
32 | 	plot_heatmap(inputfile)
33 | }
34 | 
35 | dev.off()
36 | 


--------------------------------------------------------------------------------
/bin/summary_sorted_trans_frag_loop.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = "Usage:./summary_sorted_trans_frag_loop.pl <loop_list>\n";
 6 | 
 7 | my ($loop_list) = @ARGV;
 8 | 
 9 | 
10 | if(not defined $loop_list){
11 | 	$loop_list = "-";
12 | }
13 | 
14 | open(IN, $loop_list);
15 | my $prev_id = 0;
16 | my $frag_ref;
17 | while(my $line = <IN>){
18 | 	chomp $line;
19 | 	my ($fid1, $fid2, $count) = split "\t", $line;
20 | 	$fid1 =~ s/frag_//;
21 | 	$fid2 =~ s/frag_//;
22 | 	
23 | 	if(not defined $count){
24 | 		$count = 1;
25 | 	}
26 | 	
27 | 	if($fid1 != $prev_id){
28 | 		if($prev_id){
29 | 			foreach my $id (sort {$a <=> $b} keys %$frag_ref){
30 | 				print join("\t", "frag_$prev_id", "frag_$id", $frag_ref->{$id})."\n";
31 | 				delete $frag_ref->{$id};
32 | 			}
33 | 		}
34 | 		$prev_id = $fid1;
35 | 	}
36 | 
37 | 	if(not defined $frag_ref->{$fid2}){
38 | 		$frag_ref->{$fid2} = $count;
39 | 	}else{
40 | 		$frag_ref->{$fid2} += $count;
41 | 	}
42 | }
43 | close(IN);
44 | 
45 | if($prev_id){
46 | 	foreach my $id (sort {$a <=> $b} keys %$frag_ref){
47 | 		print join("\t", "frag_$prev_id", "frag_$id", $frag_ref->{$id})."\n";
48 | 		delete $frag_ref->{$id};
49 | 	}
50 | }
51 | 
52 | exit;
53 | 


--------------------------------------------------------------------------------
/bin/Arima/summary_sorted_trans_frag_loop.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = "Usage:./summary_sorted_trans_frag_loop.pl <loop_list>\n";
 6 | 
 7 | my ($loop_list) = @ARGV;
 8 | 
 9 | 
10 | if(not defined $loop_list){
11 | 	$loop_list = "-";
12 | }
13 | 
14 | open(IN, $loop_list);
15 | my $prev_id = 0;
16 | my $frag_ref;
17 | while(my $line = <IN>){
18 | 	chomp $line;
19 | 	my ($fid1, $fid2, $count) = split "\t", $line;
20 | 	$fid1 =~ s/frag_//;
21 | 	$fid2 =~ s/frag_//;
22 | 	
23 | 	if(not defined $count){
24 | 		$count = 1;
25 | 	}
26 | 	
27 | 	if($fid1 != $prev_id){
28 | 		if($prev_id){
29 | 			foreach my $id (sort {$a <=> $b} keys %$frag_ref){
30 | 				print join("\t", "frag_$prev_id", "frag_$id", $frag_ref->{$id})."\n";
31 | 				delete $frag_ref->{$id};
32 | 			}
33 | 		}
34 | 		$prev_id = $fid1;
35 | 	}
36 | 
37 | 	if(not defined $frag_ref->{$fid2}){
38 | 		$frag_ref->{$fid2} = $count;
39 | 	}else{
40 | 		$frag_ref->{$fid2} += $count;
41 | 	}
42 | }
43 | close(IN);
44 | 
45 | if($prev_id){
46 | 	foreach my $id (sort {$a <=> $b} keys %$frag_ref){
47 | 		print join("\t", "frag_$prev_id", "frag_$id", $frag_ref->{$id})."\n";
48 | 		delete $frag_ref->{$id};
49 | 	}
50 | }
51 | 
52 | exit;
53 | 


--------------------------------------------------------------------------------
/bin/DPNII/summary_sorted_trans_frag_loop_DPNII.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = "Usage:./summary_sorted_trans_frag_loop.pl <loop_list>\n";
 6 | 
 7 | my ($loop_list) = @ARGV;
 8 | 
 9 | 
10 | if(not defined $loop_list){
11 | 	$loop_list = "-";
12 | }
13 | 
14 | open(IN, $loop_list);
15 | my $prev_id = 0;
16 | my $frag_ref;
17 | while(my $line = <IN>){
18 | 	chomp $line;
19 | 	my ($fid1, $fid2, $count) = split "\t", $line;
20 | 	$fid1 =~ s/frag_//;
21 | 	$fid2 =~ s/frag_//;
22 | 	
23 | 	if(not defined $count){
24 | 		$count = 1;
25 | 	}
26 | 	
27 | 	if($fid1 != $prev_id){
28 | 		if($prev_id){
29 | 			foreach my $id (sort {$a <=> $b} keys %$frag_ref){
30 | 				print join("\t", "frag_$prev_id", "frag_$id", $frag_ref->{$id})."\n";
31 | 				delete $frag_ref->{$id};
32 | 			}
33 | 		}
34 | 		$prev_id = $fid1;
35 | 	}
36 | 
37 | 	if(not defined $frag_ref->{$fid2}){
38 | 		$frag_ref->{$fid2} = $count;
39 | 	}else{
40 | 		$frag_ref->{$fid2} += $count;
41 | 	}
42 | }
43 | close(IN);
44 | 
45 | if($prev_id){
46 | 	foreach my $id (sort {$a <=> $b} keys %$frag_ref){
47 | 		print join("\t", "frag_$prev_id", "frag_$id", $frag_ref->{$id})."\n";
48 | 		delete $frag_ref->{$id};
49 | 	}
50 | }
51 | 
52 | exit;
53 | 


--------------------------------------------------------------------------------
/bin/eHiC-QC/summary_sorted_trans_frag_loop.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = "Usage:./summary_sorted_trans_frag_loop.pl <loop_list>\n";
 6 | 
 7 | my ($loop_list) = @ARGV;
 8 | 
 9 | 
10 | if(not defined $loop_list){
11 | 	$loop_list = "-";
12 | }
13 | 
14 | open(IN, $loop_list);
15 | my $prev_id = 0;
16 | my $frag_ref;
17 | while(my $line = <IN>){
18 | 	chomp $line;
19 | 	my ($fid1, $fid2, $count) = split "\t", $line;
20 | 	$fid1 =~ s/frag_//;
21 | 	$fid2 =~ s/frag_//;
22 | 	
23 | 	if(not defined $count){
24 | 		$count = 1;
25 | 	}
26 | 	
27 | 	if($fid1 != $prev_id){
28 | 		if($prev_id){
29 | 			foreach my $id (sort {$a <=> $b} keys %$frag_ref){
30 | 				print join("\t", "frag_$prev_id", "frag_$id", $frag_ref->{$id})."\n";
31 | 				delete $frag_ref->{$id};
32 | 			}
33 | 		}
34 | 		$prev_id = $fid1;
35 | 	}
36 | 
37 | 	if(not defined $frag_ref->{$fid2}){
38 | 		$frag_ref->{$fid2} = $count;
39 | 	}else{
40 | 		$frag_ref->{$fid2} += $count;
41 | 	}
42 | }
43 | close(IN);
44 | 
45 | if($prev_id){
46 | 	foreach my $id (sort {$a <=> $b} keys %$frag_ref){
47 | 		print join("\t", "frag_$prev_id", "frag_$id", $frag_ref->{$id})."\n";
48 | 		delete $frag_ref->{$id};
49 | 	}
50 | }
51 | 
52 | exit;
53 | 


--------------------------------------------------------------------------------
/bin/preprocess/summary_sorted_trans_frag_loop.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = "Usage:./summary_sorted_trans_frag_loop.pl <loop_list>\n";
 6 | 
 7 | my ($loop_list) = @ARGV;
 8 | 
 9 | 
10 | if(not defined $loop_list){
11 | 	$loop_list = "-";
12 | }
13 | 
14 | open(IN, $loop_list);
15 | my $prev_id = 0;
16 | my $frag_ref;
17 | while(my $line = <IN>){
18 | 	chomp $line;
19 | 	my ($fid1, $fid2, $count) = split "\t", $line;
20 | 	$fid1 =~ s/frag_//;
21 | 	$fid2 =~ s/frag_//;
22 | 	
23 | 	if(not defined $count){
24 | 		$count = 1;
25 | 	}
26 | 	
27 | 	if($fid1 != $prev_id){
28 | 		if($prev_id){
29 | 			foreach my $id (sort {$a <=> $b} keys %$frag_ref){
30 | 				print join("\t", "frag_$prev_id", "frag_$id", $frag_ref->{$id})."\n";
31 | 				delete $frag_ref->{$id};
32 | 			}
33 | 		}
34 | 		$prev_id = $fid1;
35 | 	}
36 | 
37 | 	if(not defined $frag_ref->{$fid2}){
38 | 		$frag_ref->{$fid2} = $count;
39 | 	}else{
40 | 		$frag_ref->{$fid2} += $count;
41 | 	}
42 | }
43 | close(IN);
44 | 
45 | if($prev_id){
46 | 	foreach my $id (sort {$a <=> $b} keys %$frag_ref){
47 | 		print join("\t", "frag_$prev_id", "frag_$id", $frag_ref->{$id})."\n";
48 | 		delete $frag_ref->{$id};
49 | 	}
50 | }
51 | 
52 | exit;
53 | 


--------------------------------------------------------------------------------
/documents/HiCorr_Arima.md:
--------------------------------------------------------------------------------
 1 | # :point_down:  *HiCorr on Arima*
 2 | - Download the code from this repository, "bin/Arima/" <br/>
 3 | - Download the reference files for Arima (mm10/hg19 genome build)
 4 | ```
 5 | wget --no-check-certificate https://hiview10.gene.cwru.edu/public/DeepLoop_ref/ref/Arima_HiCorr_ref.tar.gz
 6 | # old path: http://hiview.case.edu/ssz20/tmp.HiCorr.ref/ref/Arima_HiCorr_ref.tar.gz
 7 | tar -xvf Arima_HiCorr_ref.tar.gz
 8 | ```
 9 | - Check the [preprocessing for Arima data (mapping, fragments filter, outs are cis and trans fragment loops)](https://github.com/JinLabBioinfo/HiCorr/blob/master/documents/Arima.preprocessing.sh) <br/>
10 | - One [example log](https://github.com/JinLabBioinfo/HiCorr/blob/master/documents/Arima.preprocessing.example.sh) for preprocessing two biological reps of Arima Hi-C
11 | - Run HiCorr on Arima data:
12 | ```
13 | bash HiCorr_Arima.sh Arima_HiCorr_ref/ bin/Arima/ <frag_loop.name.cis> <frag_loop.name.trans> <name> <hg19/mm10>
14 |    # specify the path of downloaded unzipped reference file and scripts
15 |    # input two fragment loop files genrated from preprocessing step
16 |    # specifiy outputname prefix
17 |    # specify genome build, the provided reference only include hg19 and mm10
18 | ```
19 | 


--------------------------------------------------------------------------------
/HiCorr:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ref=/lab/solexa_weissman/cweng/PlayRoom/Shanshan/test_HiCorr/test_V2/HiCorr/ref
 4 | bin=/lab/solexa_weissman/cweng/PlayRoom/Shanshan/test_HiCorr/test_V2/HiCorr/bin
 5 | 
 6 | if [ $1 = "HindIII" ];then
 7 |   bash $bin/HindIII/HindIII.sh $ref/HindIII/ $bin/HindIII/ $2 $3 $4 $5  # <cis_loop_file> <trans_loop_file> <name_of_your_data> <reference_genome>
 8 | elif [ $1 = "DPNII" ];then
 9 |   bash $bin/DPNII/DPNII.sh $ref/DPNII/ $bin/DPNII/ $2 $3 $4 $5 # <cis_loop_file> <trans_loop_file> <name_of_your_data> <reference_genome>
10 | elif [ $1 = "eHiC" ];then
11 |   bash $bin/eHiC/eHiC.sh $ref/eHiC/ $bin/eHiC/ $2 $3 $4
12 | elif [ $1 = "Heatmap" ];then
13 |   bash $bin/draw_heatmap.sh $2 $3 $4 $5 $ref $bin $6 $7 $8 # <chr> <start> <end> <anchor_loop_file> $ref $bin <reference_genome> <enzyme>
14 | elif [ $1 = "Bam-process-HindIII" ];then
15 |   bash $bin/preprocess/bam_to_frag_loop.sh $2 $3 $4 $ref $bin/preprocess/ $5 HindIII
16 | elif [ $1 = "Bam-process-DpNII" ];then
17 |   bash $bin/preprocess/bam_to_frag_loop.sh $2 $3 $4 $ref $bin/preprocess/ $5 DPNII
18 | elif [ $1 = "eHiC-QC" ];then
19 |   bash $bin/eHiC-QC.sh $bin/eHiC-QC/ $1 $2 $3 $ref/eHiC-QC/
20 | else
21 |   echo "Wrong mode entered. Please refer to manual and re-enter"
22 | fi
23 | 


--------------------------------------------------------------------------------
/bin/draw_heatmap.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | chr=$1
 4 | start=$2
 5 | end=$3
 6 | region=$chr"_"$start"_"$end
 7 | anchor_loop=$4
 8 | ref=$5
 9 | bin=$6
10 | genome=$7
11 | enzyme=$8
12 | option=$9
13 | 
14 | $bin/generate.raw.expt.ratio.matrix.pl <(cat $ref/$enzyme/${genome}_${enzyme}_anchors_avg.bed | awk '{if($1=="'$chr'")print}') $anchor_loop $chr $start $end ${genome}.${enzyme}.${region}
15 | 
16 | #$bin/select_anchor.sh $chr $start $end $ref $genome $enzyme
17 | #$bin/select_loop.py $genome.anchors_$region.bed $anchor_loop >anchor_loop.$region
18 | 
19 | #$bin/generate_data_matrix.pl anchors_$region.bed  anchor_loop.$region grid.$region
20 | 
21 | #name=$region.r
22 | #sed "s/REGION/$region/g" $bin/template.r >$name
23 | #sed -i "s/START/$start/g" $name
24 | #sed -i "s/END/$end/g" $name
25 | if [ $9 == "-raw" ];then
26 | 	$bin/plot.heatmap.r ${genome}.${enzyme}.${region}.raw.matrix
27 | elif [ $9 == "-expected" ];then
28 | 	$bin/plot.heatmap.r ${genome}.${enzyme}.${region}.expt.matrix
29 | elif [ $9 == "-ratio" ];then
30 | 	$bin/plot.heatmap.r ${genome}.${enzyme}.${region}.ratio.matrix
31 | else
32 | 	$bin/plot.heatmap.r ${genome}.${enzyme}.${region}.raw.matrix
33 | 	$bin/plot.heatmap.r ${genome}.${enzyme}.${region}.expt.matrix
34 | 	$bin/plot.heatmap.r ${genome}.${enzyme}.${region}.ratio.matrix
35 | fi
36 | rm -f ${genome}.${enzyme}.${region}.raw.matrix ${genome}.${enzyme}.${region}.expt.matrix ${genome}.${enzyme}.${region}.ratio.matrix
37 | 


--------------------------------------------------------------------------------
/bin/Arima/remove_dup_PE_SAM_sorted.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $prev_chr = 0;
 6 | my $prev_loc = 0;
 7 | 
 8 | my $data;
 9 | 
10 | while(my $line = <stdin>){
11 | 	chomp $line;
12 | 	my ($id, $flag, $chr1, $loc1, $t1, $t2, $chr2, $loc2, $size, @rest) = split "\t", $line;
13 | 	if($chr2 eq "="){$chr2 = $chr1;}
14 | 	my ($str1, $str2) = ("+", "+");
15 | 	if($flag & 16){$str1 = "-";}
16 | 	if($flag & 32){$str2 = "-";}
17 | 	my $group = join(":", $str1,$chr2,$loc2,$str2);
18 | 
19 | 	if(($chr1 eq $prev_chr) && ($loc1 eq $prev_loc)){
20 | 		$data = save_data($data, $group, $line);
21 | 	}else{
22 | 		print_data($data);
23 | 		$prev_chr = $chr1;
24 | 		$prev_loc = $loc1;
25 | 		$data = save_data($data, $group, $line);
26 | 	}
27 | }
28 | print_data($data);
29 | exit;
30 | 
31 | ##########################################################
32 | sub print_data{
33 | 	my ($data) = @_;
34 | 	foreach my $group (keys %$data){
35 | 		print $data->{$group}."\n";
36 | 		delete $data->{$group};
37 | 	}
38 | 	return;
39 | }
40 | 
41 | sub save_data{
42 | 	my ($data, $group, $line) = @_;
43 | 	if(not defined $data->{$group}){
44 | 		$data->{$group} = $line;
45 | 	}else{
46 | 		my ($prev_id) = split "\t", $data->{$group};
47 | 		my ($id) = split "\t", $line;
48 | 		if($id lt $prev_id){
49 | 			$data->{$group} = $line;
50 | 		}elsif($id eq $prev_id){
51 | 			$data->{$group} = join("\n", $data->{$group}, $line);
52 | 		}else{
53 | 		}
54 | 	}
55 | 	return $data;
56 | }
57 | 


--------------------------------------------------------------------------------
/bin/DPNII/remove_dup_PE_SAM_sorted_DPNII.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $prev_chr = 0;
 6 | my $prev_loc = 0;
 7 | 
 8 | my $data;
 9 | 
10 | while(my $line = <stdin>){
11 | 	chomp $line;
12 | 	my ($id, $flag, $chr1, $loc1, $t1, $t2, $chr2, $loc2, $size, @rest) = split "\t", $line;
13 | 	if($chr2 eq "="){$chr2 = $chr1;}
14 | 	my ($str1, $str2) = ("+", "+");
15 | 	if($flag & 16){$str1 = "-";}
16 | 	if($flag & 32){$str2 = "-";}
17 | 	my $group = join(":", $str1,$chr2,$loc2,$str2);
18 | 
19 | 	if(($chr1 eq $prev_chr) && ($loc1 eq $prev_loc)){
20 | 		$data = save_data($data, $group, $line);
21 | 	}else{
22 | 		print_data($data);
23 | 		$prev_chr = $chr1;
24 | 		$prev_loc = $loc1;
25 | 		$data = save_data($data, $group, $line);
26 | 	}
27 | }
28 | print_data($data);
29 | exit;
30 | 
31 | ##########################################################
32 | sub print_data{
33 | 	my ($data) = @_;
34 | 	foreach my $group (keys %$data){
35 | 		print $data->{$group}."\n";
36 | 		delete $data->{$group};
37 | 	}
38 | 	return;
39 | }
40 | 
41 | sub save_data{
42 | 	my ($data, $group, $line) = @_;
43 | 	if(not defined $data->{$group}){
44 | 		$data->{$group} = $line;
45 | 	}else{
46 | 		my ($prev_id) = split "\t", $data->{$group};
47 | 		my ($id) = split "\t", $line;
48 | 		if($id lt $prev_id){
49 | 			$data->{$group} = $line;
50 | 		}elsif($id eq $prev_id){
51 | 			$data->{$group} = join("\n", $data->{$group}, $line);
52 | 		}else{
53 | 		}
54 | 	}
55 | 	return $data;
56 | }
57 | 


--------------------------------------------------------------------------------
/bin/preprocess/remove_dup_PE_SAM_sorted.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $prev_chr = 0;
 6 | my $prev_loc = 0;
 7 | 
 8 | my $data;
 9 | 
10 | while(my $line = <stdin>){
11 | 	chomp $line;
12 | 	my ($id, $flag, $chr1, $loc1, $t1, $t2, $chr2, $loc2, $size, @rest) = split "\t", $line;
13 | 	if($chr2 eq "="){$chr2 = $chr1;}
14 | 	my ($str1, $str2) = ("+", "+");
15 | 	if($flag & 16){$str1 = "-";}
16 | 	if($flag & 32){$str2 = "-";}
17 | 	my $group = join(":", $str1,$chr2,$loc2,$str2);
18 | 
19 | 	if(($chr1 eq $prev_chr) && ($loc1 eq $prev_loc)){
20 | 		$data = save_data($data, $group, $line);
21 | 	}else{
22 | 		print_data($data);
23 | 		$prev_chr = $chr1;
24 | 		$prev_loc = $loc1;
25 | 		$data = save_data($data, $group, $line);
26 | 	}
27 | }
28 | print_data($data);
29 | exit;
30 | 
31 | ##########################################################
32 | sub print_data{
33 | 	my ($data) = @_;
34 | 	foreach my $group (keys %$data){
35 | 		print $data->{$group}."\n";
36 | 		delete $data->{$group};
37 | 	}
38 | 	return;
39 | }
40 | 
41 | sub save_data{
42 | 	my ($data, $group, $line) = @_;
43 | 	if(not defined $data->{$group}){
44 | 		$data->{$group} = $line;
45 | 	}else{
46 | 		my ($prev_id) = split "\t", $data->{$group};
47 | 		my ($id) = split "\t", $line;
48 | 		if($id lt $prev_id){
49 | 			$data->{$group} = $line;
50 | 		}elsif($id eq $prev_id){
51 | 			$data->{$group} = join("\n", $data->{$group}, $line);
52 | 		}else{
53 | 		}
54 | 	}
55 | 	return $data;
56 | }
57 | 


--------------------------------------------------------------------------------
/bin/HindIII/test_frag_corr.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | #if len(sys.argv)!=4:
 6 | #	print "Usage: ./test_frag_corr.py <read_sum> <frag_GC_mappability> <frag_loop>"
 7 | #	sys.exit()
 8 | 
 9 | reads_count={}
10 | mappability={}
11 | read_sum=open(sys.argv[1])
12 | sum=0
13 | NR=0
14 | for line in read_sum.readlines():
15 | 	frag,read=line.rstrip().split('\t')
16 | 	reads_count[frag]=float(read)
17 | read_sum.close()
18 | 
19 | GC={}
20 | map_file=open(sys.argv[2])
21 | for line in map_file.readlines():
22 | 	frag,gc,map=line.rstrip().split('\t')
23 | 	mappability[frag]=float(map)
24 | 	GC[frag]=float(gc)
25 | 	#NR+=1
26 | map_file.close()
27 | 
28 | dic={}
29 | for frag in mappability:
30 | 	if frag in reads_count and mappability[frag]!=0 and GC[frag]!=0:
31 | 		dic[frag]=reads_count[frag]/mappability[frag]
32 | 		sum+=dic[frag]
33 | 		NR+=1
34 | 	else:
35 | 		dic[frag]=0
36 | 	#sum+=dic[frag]
37 | 	#NR+=1
38 | mean=sum/float(NR)
39 | 
40 | for frag in dic:
41 | 	dic[frag]=dic[frag]/mean
42 | 
43 | #loop=open(sys.argv[3])
44 | #while True:
45 | 	#line=loop.readline()
46 | 	#if not line:
47 | 	#	break
48 | for line in sys.stdin:
49 | 	frag1,frag2,obs,expt=line.rstrip().split('\t')
50 | 	if frag1 in dic:
51 | 		corr1=dic[frag1]
52 | 	else:
53 | 		corr1=0
54 | 	if frag2 in dic:
55 | 		corr2=dic[frag2]
56 | 	else:
57 | 		corr2=0
58 | 	corr=corr1*corr2
59 | 	expt=float(expt)*corr
60 | 	if expt==0:
61 | 		obs="0"
62 | 	print(frag1, frag2, obs, str(expt),sep='\t')
63 | #loop.close()
64 | 


--------------------------------------------------------------------------------
/HiCorr_micro-C.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ref=$1
 4 | bin=$2
 5 | cis=$3
 6 | trans=$4
 7 | name=$5
 8 | genome=$6
 9 | anchorbed=${ref}/$genome.5kb.bed
10 | blacklist=${ref}/$genome.5kb.bed.blacklist
11 | lib=$bin
12 | #
13 | # fragment pairs to anchorpairs 
14 | $lib/fragdata_to_anchordata.pl $cis $ref/$genome.500bp_5kb | $lib/remove.blacklist.py $blacklist | $lib/get_dist.py $anchorbed > end_loop.$name.cis &
15 | $lib/fragdata_to_anchordata.pl $trans $ref/$genome.500bp_5kb | $lib/remove.blacklist.py $blacklist > end_loop.$name.trans &
16 | wait 
17 | 
18 | cat end_loop.$name.cis | awk '{if($4<=2000000) print $1,$2,$3,$4}' OFS='\t' | $lib/split_chromo.py $anchorbed & 
19 | cat end_loop.$name.cis | awk '{if($4<=2000000) print $1,$2,$3,$4}' OFS='\t' | $lib/get_group_statistics.pl - $ref/$genome.dist.5kb.group > dist.stat & 
20 | cat end_loop.$name.cis | awk '{if($4>2000000) print $0}' | cat - end_loop.$name.trans | cut -f1-3 | $lib/calculate_vis.py > anchor.vis.list &
21 | wait
22 | 
23 | Rscript $lib/integrated.r $ref/$genome.full.dist.stat.5kb
24 | 
25 | for file in `ls split/`;do
26 |         chr=${file#anchor_2_anchor.loop.}
27 |         cat $anchorbed | awk '{if($1=="'$chr'")print}' | $lib/list_full_matrix.pl - 2000000 | perl $lib/merge_sorted_anchor_loop.pl - split/$file | $lib/get_loop_lambda.pl $ref/$genome.dist.5kb.group integrated.dist.len.stat | $lib/add.vis.to.cis.2M.pl - anchor.vis.list > temp
28 |         mv temp split/anchor_2_anchor.loop.$chr
29 | done
30 | mv split ${name}.HiCorr_output
31 | wait
32 | exit
33 | 


--------------------------------------------------------------------------------
/bin/generateReference_lib/find_RE_sites.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = 	"Usage:./find_RE_sites.pl <genome_fa> <genome_size> <sequence> <generateLibPath>\n".
 6 | 		"\tThis program find <sequence> sites in <genome>\n".
 7 | 		"\tOutput is in 1-system.\n";
 8 | 
 9 | my ($genome_fa_dir, $genome_chrom_size, $cutsite,$lib_path) = @ARGV;
10 | #if(not defined $seq){
11 | #	die($usage);
12 | #}
13 | 
14 | #my $home = `echo \$HOME`;
15 | #chomp $home;
16 | #my $folder="/mnt/NFS/geneITS0/JinLab/fxj45/GenebankDB/$genome";
17 | #if($genome=="hg19"){
18 | open(IN, $genome_chrom_size);
19 | #}else{
20 | #open(IN, "/mnt/rstor/genetics/JinLab/xxl244/Reference_Indexes/mm10_bowtie_index/mm10.chrom.sizes");}
21 | my $sizeref;
22 | while(my $line = <IN>){
23 | 	my ($chr, $size) = split "\t", $line;
24 | 	if($chr =~ /_/ || $chr eq "chrM"){
25 | 		next;
26 | 	}
27 | 	$sizeref->{$chr} = $size;
28 | }
29 | close(IN);
30 | #my $fa_dir;
31 | #if($genome=="mm10"){
32 | #	$fa_dir = "/mnt/rstor/genetics/JinLab/xxl244/Reference_Indexes/mm10_bowtie_index/";
33 | #}else{
34 | #	$fa_dir = "/mnt/rstor/genetics/JinLab/xxl244/Reference_Indexes/hg19/Homo_sapiens/UCSC/hg19/Sequence/Chromosomes/";}
35 | 
36 | foreach my $chr (sort keys %{$sizeref}){
37 | 	my $size = $sizeref->{$chr};
38 | 	my @pos = `$lib_path/sequence_match.pl -c $genome_fa_dir/$chr.fa $cutsite`;
39 | 	chomp @pos;
40 | 	my $len = length($cutsite);
41 | 	while(my $loc = shift @pos){
42 | 		print join("\t", $chr, $loc, $loc + $len - 1)."\n";
43 | 	}
44 | }
45 | exit;
46 | 


--------------------------------------------------------------------------------
/bin/eHiC-QC/remove_dup_PE_ELPU.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $prev_chr = 0;
 6 | my $prev_loc = 0;
 7 | 
 8 | my $data;
 9 | 
10 | while(my $line = <stdin>){
11 | 	chomp $line;
12 | 	my ($id, $flag, $chr1, $loc1, $t1, $t2, $chr2, $loc2, $size, @rest) = split "\t", $line;
13 | 
14 | 	my @temp = split ":", $id;
15 | 	my $barcode = pop @temp;
16 | 
17 | 	if($chr2 eq "="){$chr2 = $chr1;}
18 | 	my ($str1, $str2) = ("+", "+");
19 | 	if($flag & 16){$str1 = "-";}
20 | 	if($flag & 32){$str2 = "-";}
21 | 	my $group = join(":", $barcode,$str1,$chr2,$loc2,$str2);
22 | 
23 | 	if(($chr1 eq $prev_chr) && ($loc1 eq $prev_loc)){
24 | 		$data = save_data($data, $group, $line);
25 | 	}else{
26 | 		print_data($data);
27 | 		$prev_chr = $chr1;
28 | 		$prev_loc = $loc1;
29 | 		$data = save_data($data, $group, $line);
30 | 	}
31 | }
32 | print_data($data);
33 | exit;
34 | 
35 | ##########################################################
36 | sub print_data{
37 | 	my ($data) = @_;
38 | 	foreach my $group (keys %$data){
39 | 		print $data->{$group}."\n";
40 | 		delete $data->{$group};
41 | 	}
42 | 	return;
43 | }
44 | 
45 | sub save_data{
46 | 	my ($data, $group, $line) = @_;
47 | 	if(not defined $data->{$group}){
48 | 		$data->{$group} = $line;
49 | 	}else{
50 | 		my ($prev_id) = split "\t", $data->{$group};
51 | 		my ($id) = split "\t", $line;
52 | 		if($id lt $prev_id){
53 | 			$data->{$group} = $line;
54 | 		}elsif($id eq $prev_id){
55 | 			$data->{$group} = join("\n", $data->{$group}, $line);
56 | 		}else{
57 | 		}
58 | 	}
59 | 	return $data;
60 | }
61 | 


--------------------------------------------------------------------------------
/bin/eHiC-QC/resort_by_frag_id.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my $usage =     "Usage:./resort_by_frag_id.pl <frag_bed> <loop_file_sorted_within_chrom>\n".
 5 |                 "\tThis program takes loop file, each chrome should already be sorted, but frag_id not sorted because of unclear order between chrome\n".
 6 | 		"\tTherefore this program first split loop file by chrom and then merge them again by fragment id.\n";
 7 | 
 8 | my ($frag_bed, $loop_file) = @ARGV;
 9 | 
10 | if(not defined $loop_file){
11 | 	die($usage);
12 | }
13 | 
14 | my $folder = "tmp_folder.$loop_file";
15 | `mkdir $folder`;
16 | 
17 | my @temp_files = ();
18 | 
19 | my $frag_chr;
20 | my $file_handles;
21 | open(IN, $frag_bed);
22 | while(my $line = <IN>){
23 |         chomp $line;
24 |         my ($chr, $beg, $end, $id) = split "\t", $line;
25 |         $frag_chr->{$id} = $chr;
26 | 	if(not defined $file_handles->{$chr}){
27 | 		my $fh;
28 | 		open($fh, ">$folder/$loop_file.$chr");
29 | 		push @temp_files, "$folder/$loop_file.$chr";
30 | 		$file_handles->{$chr} = $fh;
31 | 	}
32 | }
33 | close(IN);
34 | 
35 | my $prev_chr = 0;
36 | my $curr_fh;
37 | open(IN, $loop_file);
38 | while(my $line = <IN>){
39 | 	chomp $line;
40 | 	my ($id, @rest) = split "\t", $line;
41 | 	my $chr = $frag_chr->{$id};
42 | 	if($chr ne $prev_chr){
43 | 		if($prev_chr){
44 | 			close($curr_fh);
45 | 		}
46 | 		$curr_fh = $file_handles->{$chr};
47 | 		$prev_chr = $chr;
48 | 	}
49 | 	print $curr_fh $line."\n";
50 | }
51 | close(IN);
52 | 
53 | close($curr_fh);
54 | my $file_list = join(" ", @temp_files);
55 | 
56 | `~/lib/HiC/merge_sorted_frag_loop.pl $file_list > temp.$loop_file`;
57 | `mv temp.$loop_file $loop_file`;
58 | `rm -r $folder`;
59 | 
60 | exit;
61 | 


--------------------------------------------------------------------------------
/bin/resort_by_frag_id.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my $usage =     "Usage:./resort_by_frag_id.pl <frag_bed> <loop_file_sorted_within_chrom>\n".
 5 |                 "\tThis program takes loop file, each chrome should already be sorted, but frag_id not sorted because of unclear order between chrome\n".
 6 | 		"\tTherefore this program first split loop file by chrom and then merge them again by fragment id.\n";
 7 | 
 8 | my ($frag_bed, $loop_file, $bin) = @ARGV;
 9 | 
10 | if(not defined $loop_file){
11 | 	$loop_file = "-";
12 | }
13 | 
14 | my $folder = "tmp_folder.$loop_file";
15 | `mkdir $folder`;
16 | 
17 | my @temp_files = ();
18 | 
19 | my $frag_chr;
20 | my $file_handles;
21 | open(IN, $frag_bed);
22 | while(my $line = <IN>){
23 |         chomp $line;
24 |         my ($chr, $beg, $end, $id) = split "\t", $line;
25 |         $frag_chr->{$id} = $chr;
26 | 	if(not defined $file_handles->{$chr}){
27 | 		my $fh;
28 | 		open($fh, ">$folder/$loop_file.$chr");
29 | 		push @temp_files, "$folder/$loop_file.$chr";
30 | 		$file_handles->{$chr} = $fh;
31 | 	}
32 | }
33 | close(IN);
34 | 
35 | my $prev_chr = 0;
36 | my $curr_fh;
37 | open(IN, $loop_file);
38 | while(my $line = <IN>){
39 | 	chomp $line;
40 | 	my ($id, @rest) = split "\t", $line;
41 | 	my $chr = $frag_chr->{$id};
42 | 	if($chr ne $prev_chr){
43 | 		if($prev_chr){
44 | 			close($curr_fh);
45 | 		}
46 | 		$curr_fh = $file_handles->{$chr};
47 | 		$prev_chr = $chr;
48 | 	}
49 | 	print $curr_fh $line."\n";
50 | }
51 | close(IN);
52 | 
53 | close($curr_fh);
54 | my $file_list = join(" ", @temp_files);
55 | 
56 | my $path=$bin."merge_sorted_frag_loop.pl";
57 | `$path $file_list > temp.$loop_file`;
58 | `mv temp.$loop_file $loop_file`;
59 | `rm -r $folder`;
60 | 
61 | exit;
62 | 


--------------------------------------------------------------------------------
/bin/preprocess/resort_by_frag_id.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my $usage =     "Usage:./resort_by_frag_id.pl <frag_bed> <loop_file_sorted_within_chrom>\n".
 5 |                 "\tThis program takes loop file, each chrome should already be sorted, but frag_id not sorted because of unclear order between chrome\n".
 6 | 		"\tTherefore this program first split loop file by chrom and then merge them again by fragment id.\n";
 7 | 
 8 | my ($frag_bed, $loop_file, $bin) = @ARGV;
 9 | 
10 | if(not defined $loop_file){
11 | 	$loop_file = "-";
12 | }
13 | 
14 | my $folder = "tmp_folder.$loop_file";
15 | `mkdir $folder`;
16 | 
17 | my @temp_files = ();
18 | 
19 | my $frag_chr;
20 | my $file_handles;
21 | open(IN, $frag_bed);
22 | while(my $line = <IN>){
23 |         chomp $line;
24 |         my ($chr, $beg, $end, $id) = split "\t", $line;
25 |         $frag_chr->{$id} = $chr;
26 | 	if(not defined $file_handles->{$chr}){
27 | 		my $fh;
28 | 		open($fh, ">$folder/$loop_file.$chr");
29 | 		push @temp_files, "$folder/$loop_file.$chr";
30 | 		$file_handles->{$chr} = $fh;
31 | 	}
32 | }
33 | close(IN);
34 | 
35 | my $prev_chr = 0;
36 | my $curr_fh;
37 | open(IN, $loop_file);
38 | while(my $line = <IN>){
39 | 	chomp $line;
40 | 	my ($id, @rest) = split "\t", $line;
41 | 	my $chr = $frag_chr->{$id};
42 | 	if($chr ne $prev_chr){
43 | 		if($prev_chr){
44 | 			close($curr_fh);
45 | 		}
46 | 		$curr_fh = $file_handles->{$chr};
47 | 		$prev_chr = $chr;
48 | 	}
49 | 	print $curr_fh $line."\n";
50 | }
51 | close(IN);
52 | 
53 | close($curr_fh);
54 | my $file_list = join(" ", @temp_files);
55 | 
56 | my $path=$bin."/merge_sorted_frag_loop.pl";
57 | `$path $file_list > temp.$loop_file`;
58 | `mv temp.$loop_file $loop_file`;
59 | `rm -r $folder`;
60 | 
61 | exit;
62 | 


--------------------------------------------------------------------------------
/bin/Arima/fragdata_to_anchordata.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = "Usage:./fragdata_to_anchordata.pl <loop_list> <promoter_map>\n";
 6 | 
 7 | my ($loop_list, $map_file) = @ARGV;
 8 | 
 9 | if(not defined $map_file){
10 |         die($usage);
11 | }
12 | 
13 | ##################### Read fragment map #####################
14 | my $map_frag;
15 | my $map_gene;
16 | open(IN, $map_file) || die("Error: Cannot open file $map_file!\n");
17 | while(my $line = <IN>){
18 |         chomp $line;
19 |         my ($fid, $gid) = split "\t", $line;
20 |         $map_frag->{$fid} = $gid;
21 | 	push @{$map_gene->{$gid}}, $fid;
22 | }
23 | close(IN);
24 | 
25 | ######################## Bed fragment ########################
26 | my $loop_bed;
27 | #my $random_bed;
28 | open(IN, $loop_list) || die("Error: Cannot open file $loop_list!\n");
29 | while(my $line = <IN>){
30 |         chomp $line;
31 |         my ($fid1, $fid2,$val,$dist) = split "\t", $line;
32 | 	my $gid1 = $map_frag->{$fid1};
33 | 	my $gid2 = $map_frag->{$fid2};
34 | 	if($gid1 ne $gid2){
35 | 		if(not defined $loop_bed->{$gid1}->{$gid2}){
36 | 			$loop_bed->{$gid1}->{$gid2} = 0;
37 | 			#$random_bed->{$gid1}->{$gid2} = $rand;
38 | 		}
39 | 		
40 | 		$loop_bed->{$gid1}->{$gid2} += $val;
41 | 			
42 | 	}
43 | }
44 | close(IN);
45 | 
46 | ####################### Output files ######################
47 | foreach my $gid1 (sort {($a=~/.+_([0-9]+)$/)[0] <=> ($b=~/.+_([0-9]+)/)[0]} keys %$loop_bed){
48 |         foreach my $gid2 (sort {($a=~/.+_([0-9]+)$/)[0] <=> ($b=~/.+_([0-9]+)/)[0]} keys %{$loop_bed->{$gid1}}){
49 |                 #my $rand = $random_bed->{$gid1}->{$gid2};
50 | 		my $val = $loop_bed->{$gid1}->{$gid2};
51 | 		print join("\t", $gid1, $gid2, $val)."\n";
52 |         }
53 | }
54 | 
55 | exit;
56 | 


--------------------------------------------------------------------------------
/bin/microC/fragdata_to_anchordata.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = "Usage:./fragdata_to_anchordata.pl <loop_list> <promoter_map>\n";
 6 | 
 7 | my ($loop_list, $map_file) = @ARGV;
 8 | 
 9 | if(not defined $map_file){
10 |         die($usage);
11 | }
12 | 
13 | ##################### Read fragment map #####################
14 | my $map_frag;
15 | my $map_gene;
16 | open(IN, $map_file) || die("Error: Cannot open file $map_file!\n");
17 | while(my $line = <IN>){
18 |         chomp $line;
19 |         my ($fid, $gid) = split "\t", $line;
20 |         $map_frag->{$fid} = $gid;
21 | 	push @{$map_gene->{$gid}}, $fid;
22 | }
23 | close(IN);
24 | 
25 | ######################## Bed fragment ########################
26 | my $loop_bed;
27 | #my $random_bed;
28 | open(IN, $loop_list) || die("Error: Cannot open file $loop_list!\n");
29 | while(my $line = <IN>){
30 |         chomp $line;
31 |         my ($fid1, $fid2,$val,$dist) = split "\t", $line;
32 | 	my $gid1 = $map_frag->{$fid1};
33 | 	my $gid2 = $map_frag->{$fid2};
34 | 	if($gid1 ne $gid2){
35 | 		if(not defined $loop_bed->{$gid1}->{$gid2}){
36 | 			$loop_bed->{$gid1}->{$gid2} = 0;
37 | 			#$random_bed->{$gid1}->{$gid2} = $rand;
38 | 		}
39 | 		
40 | 		$loop_bed->{$gid1}->{$gid2} += $val;
41 | 			
42 | 	}
43 | }
44 | close(IN);
45 | 
46 | ####################### Output files ######################
47 | foreach my $gid1 (sort {($a=~/.+_([0-9]+)$/)[0] <=> ($b=~/.+_([0-9]+)/)[0]} keys %$loop_bed){
48 |         foreach my $gid2 (sort {($a=~/.+_([0-9]+)$/)[0] <=> ($b=~/.+_([0-9]+)/)[0]} keys %{$loop_bed->{$gid1}}){
49 |                 #my $rand = $random_bed->{$gid1}->{$gid2};
50 | 		my $val = $loop_bed->{$gid1}->{$gid2};
51 | 		print join("\t", $gid1, $gid2, $val)."\n";
52 |         }
53 | }
54 | 
55 | exit;
56 | 


--------------------------------------------------------------------------------
/bin/DPNII/fragdata_to_anchordata_DNPII.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = "Usage:./fragdata_to_anchordata.pl <loop_list> <promoter_map>\n";
 6 | 
 7 | my ($loop_list, $map_file) = @ARGV;
 8 | 
 9 | if(not defined $map_file){
10 |         die($usage);
11 | }
12 | 
13 | ##################### Read fragment map #####################
14 | my $map_frag;
15 | my $map_gene;
16 | open(IN, $map_file) || die("Error: Cannot open file $map_file!\n");
17 | while(my $line = <IN>){
18 |         chomp $line;
19 |         my ($fid, $gid) = split "\t", $line;
20 |         $map_frag->{$fid} = $gid;
21 | 	push @{$map_gene->{$gid}}, $fid;
22 | }
23 | close(IN);
24 | 
25 | ######################## Bed fragment ########################
26 | my $loop_bed;
27 | #my $random_bed;
28 | open(IN, $loop_list) || die("Error: Cannot open file $loop_list!\n");
29 | while(my $line = <IN>){
30 |         chomp $line;
31 |         my ($fid1, $fid2,$val,$dist) = split "\t", $line;
32 | 	my $gid1 = $map_frag->{$fid1};
33 | 	my $gid2 = $map_frag->{$fid2};
34 | 	if($gid1 ne $gid2){
35 | 		if(not defined $loop_bed->{$gid1}->{$gid2}){
36 | 			$loop_bed->{$gid1}->{$gid2} = 0;
37 | 			#$random_bed->{$gid1}->{$gid2} = $rand;
38 | 		}
39 | 		
40 | 		$loop_bed->{$gid1}->{$gid2} += $val;
41 | 			
42 | 	}
43 | }
44 | close(IN);
45 | 
46 | ####################### Output files ######################
47 | foreach my $gid1 (sort {($a=~/.+_([0-9]+)$/)[0] <=> ($b=~/.+_([0-9]+)/)[0]} keys %$loop_bed){
48 |         foreach my $gid2 (sort {($a=~/.+_([0-9]+)$/)[0] <=> ($b=~/.+_([0-9]+)/)[0]} keys %{$loop_bed->{$gid1}}){
49 |                 #my $rand = $random_bed->{$gid1}->{$gid2};
50 | 		my $val = $loop_bed->{$gid1}->{$gid2};
51 | 		print join("\t", $gid1, $gid2, $val)."\n";
52 |         }
53 | }
54 | 
55 | exit;
56 | 


--------------------------------------------------------------------------------
/bin/HindIII/fragdata_to_anchordata.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = "Usage:./fragdata_to_anchordata.pl <loop_list> <promoter_map>\n";
 6 | 
 7 | my ($loop_list, $map_file) = @ARGV;
 8 | 
 9 | if(not defined $map_file){
10 |         die($usage);
11 | }
12 | 
13 | ##################### Read fragment map #####################
14 | my $map_frag;
15 | my $map_gene;
16 | open(IN, $map_file) || die("Error: Cannot open file $map_file!\n");
17 | while(my $line = <IN>){
18 |         chomp $line;
19 |         my ($fid, $gid) = split "\t", $line;
20 |         $map_frag->{$fid} = $gid;
21 | 	push @{$map_gene->{$gid}}, $fid;
22 | }
23 | close(IN);
24 | 
25 | ######################## Bed fragment ########################
26 | my $loop_bed;
27 | my $random_bed;
28 | open(IN, $loop_list) || die("Error: Cannot open file $loop_list!\n");
29 | while(my $line = <IN>){
30 |         chomp $line;
31 |         my ($fid1, $fid2, $val, $rand) = split "\t", $line;
32 | 	my $gid1 = $map_frag->{$fid1};
33 | 	my $gid2 = $map_frag->{$fid2};
34 | 	if($gid1 ne $gid2){
35 | 		if(not defined $loop_bed->{$gid1}->{$gid2}){
36 | 			$loop_bed->{$gid1}->{$gid2} = $val;
37 | 			$random_bed->{$gid1}->{$gid2} = $rand;
38 | 		}else{
39 | 			$loop_bed->{$gid1}->{$gid2} += $val;
40 | 			$random_bed->{$gid1}->{$gid2} += $rand;
41 | 		}
42 | 	}
43 | }
44 | close(IN);
45 | 
46 | ####################### Output files ######################
47 | foreach my $gid1 (sort {($a=~/.+_([0-9]+)$/)[0] <=> ($b=~/.+_([0-9]+)/)[0]} keys %$loop_bed){
48 |         foreach my $gid2 (sort {($a=~/.+_([0-9]+)$/)[0] <=> ($b=~/.+_([0-9]+)/)[0]} keys %{$random_bed->{$gid1}}){
49 |                 my $rand = $random_bed->{$gid1}->{$gid2};
50 | 		my $val = $loop_bed->{$gid1}->{$gid2};
51 | 		print join("\t", $gid1, $gid2, $val, $rand)."\n";
52 |         }
53 | }
54 | 
55 | exit;
56 | 


--------------------------------------------------------------------------------
/bin/eHiC/fragdata_to_anchordata.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = "Usage:./fragdata_to_anchordata.pl <loop_list> <promoter_map>\n";
 6 | 
 7 | my ($loop_list, $map_file) = @ARGV;
 8 | 
 9 | if(not defined $map_file){
10 |         die($usage);
11 | }
12 | 
13 | ##################### Read fragment map #####################
14 | my $map_frag;
15 | my $map_gene;
16 | open(IN, $map_file) || die("Error: Cannot open file $map_file!\n");
17 | while(my $line = <IN>){
18 |         chomp $line;
19 |         my ($fid, $gid) = split "\t", $line;
20 |         $map_frag->{$fid} = $gid;
21 | 	push @{$map_gene->{$gid}}, $fid;
22 | }
23 | close(IN);
24 | 
25 | ######################## Bed fragment ########################
26 | my $loop_bed;
27 | my $random_bed;
28 | open(IN, $loop_list) || die("Error: Cannot open file $loop_list!\n");
29 | while(my $line = <IN>){
30 |         chomp $line;
31 |         my ($fid1, $fid2, $val, $rand) = split "\t", $line;
32 | 	my $gid1 = $map_frag->{$fid1};
33 | 	my $gid2 = $map_frag->{$fid2};
34 | 	if($gid1 ne $gid2){
35 | 		if(not defined $loop_bed->{$gid1}->{$gid2}){
36 | 			$loop_bed->{$gid1}->{$gid2} = $val;
37 | 			$random_bed->{$gid1}->{$gid2} = $rand;
38 | 		}else{
39 | 			$loop_bed->{$gid1}->{$gid2} += $val;
40 | 			$random_bed->{$gid1}->{$gid2} += $rand;
41 | 		}
42 | 	}
43 | }
44 | close(IN);
45 | 
46 | ####################### Output files ######################
47 | foreach my $gid1 (sort {($a=~/.+_([0-9]+)$/)[0] <=> ($b=~/.+_([0-9]+)/)[0]} keys %$loop_bed){
48 |         foreach my $gid2 (sort {($a=~/.+_([0-9]+)$/)[0] <=> ($b=~/.+_([0-9]+)/)[0]} keys %{$random_bed->{$gid1}}){
49 |                 my $rand = $random_bed->{$gid1}->{$gid2};
50 | 		my $val = $loop_bed->{$gid1}->{$gid2};
51 | 		print join("\t", $gid1, $gid2, $val, $rand)."\n";
52 |         }
53 | }
54 | 
55 | exit;
56 | 


--------------------------------------------------------------------------------
/bin/Arima/correct.trans.reads.by.corr.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | my ($trans_loop, $frag_bed, $len_20_group,$len_group_corr) = @ARGV;
 4 | 
 5 | 
 6 | #####################GC ###################################### #################
 7 | 
 8 | my $len_range;
 9 | open(IN, $len_20_group);
10 | while(my $line = <IN>){
11 |         chomp $line;
12 |         my ($id, $min, $max) = split "\t", $line;
13 |         $len_range->{$id} = "$min:$max";
14 | }
15 | close(IN);
16 | my $frag_len_group;
17 | open(IN, $frag_bed);
18 | while(my $line = <IN>){
19 |         chomp $line;
20 |         my ($chr, $beg, $end, $id, $dist, $len) = split "\t", $line;
21 |         $frag_len_group->{$id} = get_id($len, $len_range);
22 | }
23 | close(IN);
24 | 
25 | 
26 | ##################### get the fragment stat information ###########################
27 | my $len_c;
28 | open(IN, $len_group_corr);
29 | while(my $line=<IN>){
30 | 	chomp $line;
31 | 	my ($g1, $g2, $corr) = split "\t", $line;
32 | 	$len_c->{$g1}->{$g2} = $corr;
33 | }
34 | close(IN);
35 | 
36 | ################## Calculate group averages #######################################
37 | open(IN, $trans_loop);
38 | #open(OUT,">./facor.zero");
39 | while(my $line = <IN>){
40 | 	chomp $line;
41 | 	my ($frag1, $frag2, $obs) = split "\t", $line;
42 | 	my $len_g1=$frag_len_group->{$frag1};
43 | 	my $len_g2=$frag_len_group->{$frag2};
44 | 	my $len_corr = $len_c->{$len_g1}->{$len_g2};
45 | 	my $avg=$obs/$len_corr;
46 | 	print join("\t",$frag1, $frag2, $avg)."\n";
47 | 	
48 | }
49 | close(IN);
50 | close(OUT);
51 | ##############################################################
52 | sub get_id{
53 |         my ($val, $range) = @_;
54 |         foreach my $id (keys %{$range}){
55 |                 my ($min, $max) = split ":", $range->{$id};
56 |                 if($val > $min && $val <= $max){
57 |                         return $id;
58 |                 }
59 |         }
60 | #        die("Error: did not find a group\n");
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/bin/DPNII/correct.trans.reads.by.corr_DPNII.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | my ($trans_loop, $frag_bed, $len_20_group,$len_group_corr) = @ARGV;
 4 | 
 5 | 
 6 | #####################GC ###################################### #################
 7 | 
 8 | my $len_range;
 9 | open(IN, $len_20_group);
10 | while(my $line = <IN>){
11 |         chomp $line;
12 |         my ($id, $min, $max) = split "\t", $line;
13 |         $len_range->{$id} = "$min:$max";
14 | }
15 | close(IN);
16 | my $frag_len_group;
17 | open(IN, $frag_bed);
18 | while(my $line = <IN>){
19 |         chomp $line;
20 |         my ($chr, $beg, $end, $id, $dist, $len) = split "\t", $line;
21 |         $frag_len_group->{$id} = get_id($len, $len_range);
22 | }
23 | close(IN);
24 | 
25 | 
26 | ##################### get the fragment stat information ###########################
27 | my $len_c;
28 | open(IN, $len_group_corr);
29 | while(my $line=<IN>){
30 | 	chomp $line;
31 | 	my ($g1, $g2, $corr) = split "\t", $line;
32 | 	$len_c->{$g1}->{$g2} = $corr;
33 | }
34 | close(IN);
35 | 
36 | ################## Calculate group averages #######################################
37 | open(IN, $trans_loop);
38 | #open(OUT,">./facor.zero");
39 | while(my $line = <IN>){
40 | 	chomp $line;
41 | 	my ($frag1, $frag2, $obs) = split "\t", $line;
42 | 	my $len_g1=$frag_len_group->{$frag1};
43 | 	my $len_g2=$frag_len_group->{$frag2};
44 | 	my $len_corr = $len_c->{$len_g1}->{$len_g2};
45 | 	my $avg=$obs/$len_corr;
46 | 	print join("\t",$frag1, $frag2, $avg)."\n";
47 | 	
48 | }
49 | close(IN);
50 | close(OUT);
51 | ##############################################################
52 | sub get_id{
53 |         my ($val, $range) = @_;
54 |         foreach my $id (keys %{$range}){
55 |                 my ($min, $max) = split ":", $range->{$id};
56 |                 if($val > $min && $val <= $max){
57 |                         return $id;
58 |                 }
59 |         }
60 | #        die("Error: did not find a group\n");
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/bin/summary_sorted_frag_loop.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = "Usage:./summary_sorted_frag_loop.pl <frag_bed> <loop_list>\n";
 6 | 
 7 | my ($frag_bed, $loop_list) = @ARGV;
 8 | 
 9 | if(not defined $frag_bed){
10 | 	die($usage);
11 | }
12 | 
13 | if(not defined $loop_list){
14 | 	$loop_list = "-";
15 | }
16 | 
17 | my $frag_loc;
18 | open(IN, $frag_bed);
19 | while(my $line = <IN>){
20 |         chomp $line;
21 |         my ($chr, $beg, $end, $id) = split "\t", $line;
22 | 	$id =~ s/frag_//;
23 |         $frag_loc->{$id} = join(":", $beg, $end);
24 | }
25 | close(IN);
26 | 
27 | open(IN, $loop_list);
28 | my $prev_id = 0;
29 | my $frag_ref;
30 | while(my $line = <IN>){
31 | 	chomp $line;
32 | 	my ($fid1, $fid2, $count) = split "\t", $line;
33 | 	$fid1 =~ s/frag_//;
34 | 	$fid2 =~ s/frag_//;
35 | 	
36 | 	if(not defined $count){
37 | 		$count = 1;
38 | 	}
39 | 	
40 | 	if($fid1 != $prev_id){
41 | 		if($prev_id){
42 | 			my ($beg1, $end1) = split ":", $frag_loc->{$prev_id};
43 | 			foreach my $id (sort {$a <=> $b} keys %$frag_ref){
44 | 				my ($beg2, $end2) = split ":", $frag_loc->{$id};
45 | 				my $dist = ($beg1 < $beg2)?($beg2 - $end1 - 1):($beg1 - $end2 -1);
46 | #				my $cut = abs($id - $prev_id);
47 | 				print join("\t", "frag_$prev_id", "frag_$id", $frag_ref->{$id}, $dist)."\n";
48 | 				delete $frag_ref->{$id};
49 | 			}
50 | 		}
51 | 		$prev_id = $fid1;
52 | 	}
53 | 
54 | 	if(not defined $frag_ref->{$fid2}){
55 | 		$frag_ref->{$fid2} = $count;
56 | 	}else{
57 | 		$frag_ref->{$fid2} += $count;
58 | 	}
59 | }
60 | close(IN);
61 | 
62 | if($prev_id){
63 | 	my ($beg1, $end1) = split ":", $frag_loc->{$prev_id};
64 | 	foreach my $id (sort {$a <=> $b} keys %$frag_ref){
65 | 		my ($beg2, $end2) = split ":", $frag_loc->{$id};
66 | 		my $dist = ($beg1 < $beg2)?($beg2 - $end1 - 1):($beg1 - $end2 -1);
67 | #		my $cut = abs($id - $prev_id);
68 | 		print join("\t", "frag_$prev_id", "frag_$id", $frag_ref->{$id}, $dist)."\n";
69 | 		delete $frag_ref->{$id};
70 | 	}
71 | }
72 | 
73 | exit;
74 | 


--------------------------------------------------------------------------------
/bin/Arima/summary_sorted_frag_loop.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = "Usage:./summary_sorted_frag_loop.pl <frag_bed> <loop_list>\n";
 6 | 
 7 | my ($frag_bed, $loop_list) = @ARGV;
 8 | 
 9 | if(not defined $frag_bed){
10 | 	die($usage);
11 | }
12 | 
13 | if(not defined $loop_list){
14 | 	$loop_list = "-";
15 | }
16 | 
17 | my $frag_loc;
18 | open(IN, $frag_bed);
19 | while(my $line = <IN>){
20 |         chomp $line;
21 |         my ($chr, $beg, $end, $id) = split "\t", $line;
22 | 	$id =~ s/frag_//;
23 |         $frag_loc->{$id} = join(":", $beg, $end);
24 | }
25 | close(IN);
26 | 
27 | open(IN, $loop_list);
28 | my $prev_id = 0;
29 | my $frag_ref;
30 | while(my $line = <IN>){
31 | 	chomp $line;
32 | 	my ($fid1, $fid2, $count) = split "\t", $line;
33 | 	$fid1 =~ s/frag_//;
34 | 	$fid2 =~ s/frag_//;
35 | 	
36 | 	if(not defined $count){
37 | 		$count = 1;
38 | 	}
39 | 	
40 | 	if($fid1 != $prev_id){
41 | 		if($prev_id){
42 | 			my ($beg1, $end1) = split ":", $frag_loc->{$prev_id};
43 | 			foreach my $id (sort {$a <=> $b} keys %$frag_ref){
44 | 				my ($beg2, $end2) = split ":", $frag_loc->{$id};
45 | 				my $dist = ($beg1 < $beg2)?($beg2 - $end1 - 1):($beg1 - $end2 -1);
46 | #				my $cut = abs($id - $prev_id);
47 | 				print join("\t", "frag_$prev_id", "frag_$id", $frag_ref->{$id}, $dist)."\n";
48 | 				delete $frag_ref->{$id};
49 | 			}
50 | 		}
51 | 		$prev_id = $fid1;
52 | 	}
53 | 
54 | 	if(not defined $frag_ref->{$fid2}){
55 | 		$frag_ref->{$fid2} = $count;
56 | 	}else{
57 | 		$frag_ref->{$fid2} += $count;
58 | 	}
59 | }
60 | close(IN);
61 | 
62 | if($prev_id){
63 | 	my ($beg1, $end1) = split ":", $frag_loc->{$prev_id};
64 | 	foreach my $id (sort {$a <=> $b} keys %$frag_ref){
65 | 		my ($beg2, $end2) = split ":", $frag_loc->{$id};
66 | 		my $dist = ($beg1 < $beg2)?($beg2 - $end1 - 1):($beg1 - $end2 -1);
67 | #		my $cut = abs($id - $prev_id);
68 | 		print join("\t", "frag_$prev_id", "frag_$id", $frag_ref->{$id}, $dist)."\n";
69 | 		delete $frag_ref->{$id};
70 | 	}
71 | }
72 | 
73 | exit;
74 | 


--------------------------------------------------------------------------------
/bin/eHiC-QC/summary_sorted_frag_loop.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = "Usage:./summary_sorted_frag_loop.pl <frag_bed> <loop_list>\n";
 6 | 
 7 | my ($frag_bed, $loop_list) = @ARGV;
 8 | 
 9 | if(not defined $frag_bed){
10 | 	die($usage);
11 | }
12 | 
13 | if(not defined $loop_list){
14 | 	$loop_list = "-";
15 | }
16 | 
17 | my $frag_loc;
18 | open(IN, $frag_bed);
19 | while(my $line = <IN>){
20 |         chomp $line;
21 |         my ($chr, $beg, $end, $id) = split "\t", $line;
22 | 	$id =~ s/frag_//;
23 |         $frag_loc->{$id} = join(":", $beg, $end);
24 | }
25 | close(IN);
26 | 
27 | open(IN, $loop_list);
28 | my $prev_id = 0;
29 | my $frag_ref;
30 | while(my $line = <IN>){
31 | 	chomp $line;
32 | 	my ($fid1, $fid2, $count) = split "\t", $line;
33 | 	$fid1 =~ s/frag_//;
34 | 	$fid2 =~ s/frag_//;
35 | 	
36 | 	if(not defined $count){
37 | 		$count = 1;
38 | 	}
39 | 	
40 | 	if($fid1 != $prev_id){
41 | 		if($prev_id){
42 | 			my ($beg1, $end1) = split ":", $frag_loc->{$prev_id};
43 | 			foreach my $id (sort {$a <=> $b} keys %$frag_ref){
44 | 				my ($beg2, $end2) = split ":", $frag_loc->{$id};
45 | 				my $dist = ($beg1 < $beg2)?($beg2 - $end1 - 1):($beg1 - $end2 -1);
46 | #				my $cut = abs($id - $prev_id);
47 | 				print join("\t", "frag_$prev_id", "frag_$id", $frag_ref->{$id}, $dist)."\n";
48 | 				delete $frag_ref->{$id};
49 | 			}
50 | 		}
51 | 		$prev_id = $fid1;
52 | 	}
53 | 
54 | 	if(not defined $frag_ref->{$fid2}){
55 | 		$frag_ref->{$fid2} = $count;
56 | 	}else{
57 | 		$frag_ref->{$fid2} += $count;
58 | 	}
59 | }
60 | close(IN);
61 | 
62 | if($prev_id){
63 | 	my ($beg1, $end1) = split ":", $frag_loc->{$prev_id};
64 | 	foreach my $id (sort {$a <=> $b} keys %$frag_ref){
65 | 		my ($beg2, $end2) = split ":", $frag_loc->{$id};
66 | 		my $dist = ($beg1 < $beg2)?($beg2 - $end1 - 1):($beg1 - $end2 -1);
67 | #		my $cut = abs($id - $prev_id);
68 | 		print join("\t", "frag_$prev_id", "frag_$id", $frag_ref->{$id}, $dist)."\n";
69 | 		delete $frag_ref->{$id};
70 | 	}
71 | }
72 | 
73 | exit;
74 | 


--------------------------------------------------------------------------------
/bin/DPNII/summary_sorted_frag_loop_DPNII.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = "Usage:./summary_sorted_frag_loop.pl <frag_bed> <loop_list>\n";
 6 | 
 7 | my ($frag_bed, $loop_list) = @ARGV;
 8 | 
 9 | if(not defined $frag_bed){
10 | 	die($usage);
11 | }
12 | 
13 | if(not defined $loop_list){
14 | 	$loop_list = "-";
15 | }
16 | 
17 | my $frag_loc;
18 | open(IN, $frag_bed);
19 | while(my $line = <IN>){
20 |         chomp $line;
21 |         my ($chr, $beg, $end, $id) = split "\t", $line;
22 | 	$id =~ s/frag_//;
23 |         $frag_loc->{$id} = join(":", $beg, $end);
24 | }
25 | close(IN);
26 | 
27 | open(IN, $loop_list);
28 | my $prev_id = 0;
29 | my $frag_ref;
30 | while(my $line = <IN>){
31 | 	chomp $line;
32 | 	my ($fid1, $fid2, $count) = split "\t", $line;
33 | 	$fid1 =~ s/frag_//;
34 | 	$fid2 =~ s/frag_//;
35 | 	
36 | 	if(not defined $count){
37 | 		$count = 1;
38 | 	}
39 | 	
40 | 	if($fid1 != $prev_id){
41 | 		if($prev_id){
42 | 			my ($beg1, $end1) = split ":", $frag_loc->{$prev_id};
43 | 			foreach my $id (sort {$a <=> $b} keys %$frag_ref){
44 | 				my ($beg2, $end2) = split ":", $frag_loc->{$id};
45 | 				my $dist = ($beg1 < $beg2)?($beg2 - $end1 - 1):($beg1 - $end2 -1);
46 | #				my $cut = abs($id - $prev_id);
47 | 				print join("\t", "frag_$prev_id", "frag_$id", $frag_ref->{$id}, $dist)."\n";
48 | 				delete $frag_ref->{$id};
49 | 			}
50 | 		}
51 | 		$prev_id = $fid1;
52 | 	}
53 | 
54 | 	if(not defined $frag_ref->{$fid2}){
55 | 		$frag_ref->{$fid2} = $count;
56 | 	}else{
57 | 		$frag_ref->{$fid2} += $count;
58 | 	}
59 | }
60 | close(IN);
61 | 
62 | if($prev_id){
63 | 	my ($beg1, $end1) = split ":", $frag_loc->{$prev_id};
64 | 	foreach my $id (sort {$a <=> $b} keys %$frag_ref){
65 | 		my ($beg2, $end2) = split ":", $frag_loc->{$id};
66 | 		my $dist = ($beg1 < $beg2)?($beg2 - $end1 - 1):($beg1 - $end2 -1);
67 | #		my $cut = abs($id - $prev_id);
68 | 		print join("\t", "frag_$prev_id", "frag_$id", $frag_ref->{$id}, $dist)."\n";
69 | 		delete $frag_ref->{$id};
70 | 	}
71 | }
72 | 
73 | exit;
74 | 


--------------------------------------------------------------------------------
/bin/preprocess/summary_sorted_frag_loop.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = "Usage:./summary_sorted_frag_loop.pl <frag_bed> <loop_list>\n";
 6 | 
 7 | my ($frag_bed, $loop_list) = @ARGV;
 8 | 
 9 | if(not defined $frag_bed){
10 | 	die($usage);
11 | }
12 | 
13 | if(not defined $loop_list){
14 | 	$loop_list = "-";
15 | }
16 | 
17 | my $frag_loc;
18 | open(IN, $frag_bed);
19 | while(my $line = <IN>){
20 |         chomp $line;
21 |         my ($chr, $beg, $end, $id) = split "\t", $line;
22 | 	$id =~ s/frag_//;
23 |         $frag_loc->{$id} = join(":", $beg, $end);
24 | }
25 | close(IN);
26 | 
27 | open(IN, $loop_list);
28 | my $prev_id = 0;
29 | my $frag_ref;
30 | while(my $line = <IN>){
31 | 	chomp $line;
32 | 	my ($fid1, $fid2, $count) = split "\t", $line;
33 | 	$fid1 =~ s/frag_//;
34 | 	$fid2 =~ s/frag_//;
35 | 	
36 | 	if(not defined $count){
37 | 		$count = 1;
38 | 	}
39 | 	
40 | 	if($fid1 != $prev_id){
41 | 		if($prev_id){
42 | 			my ($beg1, $end1) = split ":", $frag_loc->{$prev_id};
43 | 			foreach my $id (sort {$a <=> $b} keys %$frag_ref){
44 | 				my ($beg2, $end2) = split ":", $frag_loc->{$id};
45 | 				my $dist = ($beg1 < $beg2)?($beg2 - $end1 - 1):($beg1 - $end2 -1);
46 | #				my $cut = abs($id - $prev_id);
47 | 				print join("\t", "frag_$prev_id", "frag_$id", $frag_ref->{$id}, $dist)."\n";
48 | 				delete $frag_ref->{$id};
49 | 			}
50 | 		}
51 | 		$prev_id = $fid1;
52 | 	}
53 | 
54 | 	if(not defined $frag_ref->{$fid2}){
55 | 		$frag_ref->{$fid2} = $count;
56 | 	}else{
57 | 		$frag_ref->{$fid2} += $count;
58 | 	}
59 | }
60 | close(IN);
61 | 
62 | if($prev_id){
63 | 	my ($beg1, $end1) = split ":", $frag_loc->{$prev_id};
64 | 	foreach my $id (sort {$a <=> $b} keys %$frag_ref){
65 | 		my ($beg2, $end2) = split ":", $frag_loc->{$id};
66 | 		my $dist = ($beg1 < $beg2)?($beg2 - $end1 - 1):($beg1 - $end2 -1);
67 | #		my $cut = abs($id - $prev_id);
68 | 		print join("\t", "frag_$prev_id", "frag_$id", $frag_ref->{$id}, $dist)."\n";
69 | 		delete $frag_ref->{$id};
70 | 	}
71 | }
72 | 
73 | exit;
74 | 


--------------------------------------------------------------------------------
/bin/eHiC/merge_and_resort_end_loop.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | import os
 5 | 
 6 | if len(sys.argv)<4:
 7 | 	print "Usage: ./merge_and_resort_end_loop.py <end_bed> <name> <end_loop> <full_end_pair>"
 8 | 	sys.exit()
 9 | 
10 | end_bed=open(sys.argv[1])
11 | chrs={}
12 | order={}
13 | i=1
14 | for line in end_bed.readlines():
15 | 	chr,start,end,id,length=line.rstrip().split('\t')
16 | 	chrs[id]=chr
17 | 	order[id]=i
18 | 	i+=1
19 | end_bed.close()
20 | 
21 | dir_name="temp."+sys.argv[2]
22 | os.system("mkdir "+dir_name)
23 | 
24 | chr_list=[]
25 | prev_chr="chr1"
26 | out=open(dir_name+"/end_loop.chr1",'a')
27 | loop=open(sys.argv[3])
28 | while True:
29 | 	line=loop.readline()
30 | 	if not line:
31 | 		break
32 | 	end1,end2,reads,dist=line.rstrip().split('\t')
33 | 	chr=chrs[end1]
34 | 	if chr!=prev_chr:
35 | 		out.close()
36 | 		chr_list.append(prev_chr)
37 | 		prev_chr=chr
38 | 		out=open(dir_name+"/end_loop."+chr,'a')
39 | 	out.write(line)
40 | loop.close()
41 | out.close()
42 | chr_list.append(prev_chr)
43 | 
44 | full_pair=open(sys.argv[4])
45 | prev_chr="chr1"
46 | dic_loop={}
47 | file=open(dir_name+"/end_loop."+prev_chr)
48 | while True:
49 | 	line=file.readline()
50 | 	if not line:
51 | 		break
52 | 	end1,end2,reads,dist=line.rstrip().split('\t')
53 | 	dic_loop[(end1,end2)]=reads
54 | file.close()
55 | 
56 | while True:
57 | 	line=full_pair.readline()
58 | 	if not line:
59 | 		break
60 | 	end1,end2,reads,dist=line.rstrip().split('\t')
61 | 	chr=chrs[end1]
62 | 	if chr!=prev_chr:
63 | 		dic_loop.clear()
64 | 		prev_chr=chr
65 | 		file=open(dir_name+"/end_loop."+prev_chr)
66 | 		while True:
67 |         		line=file.readline()
68 |         		if not line:
69 | 	                	break
70 |         		end1,end2,reads,dist=line.rstrip().split('\t')
71 |         		dic_loop[(end1,end2)]=reads
72 | 		file.close()
73 | 	if (end1,end2) in dic_loop:
74 | 		print end1+"\t"+end2+"\t"+dic_loop[(end1,end2)]+"\t"+dist
75 | 	else:
76 | 		print line.rstrip()
77 | full_pair.close()
78 | 		
79 | #os.system("rm -r "+dir_name)
80 | 


--------------------------------------------------------------------------------
/bin/template.r:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | args = commandArgs(trailingOnly=TRUE)
 3 | file=args[1]
 4 | 
 5 | matrix <- read.table(file)
 6 | grid <- read.table("grid.REGION",header=TRUE)
 7 | attach(grid)
 8 | matrix <- data.matrix(matrix)
 9 | for (i in c(1:sqrt(length(matrix)))){matrix[i,i] <- 0}
10 | ind.matrix <- Index[Loc >= START & Loc <= END]
11 | ind.grid <- c(ind.matrix[1] -1, ind.matrix)
12 | beg <- Loc[ind.grid[1]]
13 | end <- Loc[ind.grid[length(ind.grid)]]
14 | gap <- Loc[ind.matrix]-Loc[ind.matrix][1]
15 | #tmp <- Loc[ind.matrix[2:107]]-Loc[ind.matrix[1:106]]
16 | #y_Loc <- (Loc[1])
17 | #for(x in rev(tmp)){y_Loc <- c(y_Loc,y_Loc[length(y_Loc)]+x)}
18 | #x <- c(Loc[1],Loc[length(Loc)])
19 | #y <- c(y_Loc[length(y_Loc)],y_Loc[1])
20 | x <- c(Loc[1],Loc[length(Loc)])
21 | y <- c(Loc[1],Loc[length(Loc)])
22 | 
23 | rotate <- function(x) t(apply(x, 2, rev))
24 | n <- 20
25 | col <- rgb(1,(n-2):0/(n-1),(n-2):0/(n-1))
26 | draw_heatmap <- function(matrix,name,color_scale){
27 | 	matrix <- rotate(matrix)
28 | 	if (color_scale!= 0){
29 |         	breaks <- c(seq(1.001,color_scale,(color_scale-1.001)/18),max(matrix))
30 |         }else{
31 | 		if(max(matrix)<2){
32 | 			breaks <- seq(1.001,max(matrix),(max(matrix)-1.001)/19)
33 | 		}else{
34 | 			step <- (quantile(matrix, prob=0.98)-1)/18
35 | 			up <- quantile(matrix, prob=0.98)+0.011
36 | 			if(up<2){
37 | 				up <- 2	
38 | 				step <- 0.999/18
39 | 			}
40 | 			breaks <- c(seq(1.001,up,step),max(matrix))
41 | 		}
42 | 	}
43 | 	pdf_name <- paste(name,"pdf",sep=".")
44 | 	pdf(pdf_name)
45 | 	par(mar=c(0,0,0,0))
46 | 	image(Loc[ind.grid], Loc[ind.grid], matrix, zlim=c(1, 50), col=col, breaks=breaks,xaxt="n",yaxt="n",xlab="",ylab="")
47 | 	lines(x,y)
48 | 	dev.off()
49 | 	png_name <- paste(name,"png",sep=".")
50 | 	png(png_name)
51 | 	par(mar=c(0,0,0,0))
52 | 	image(Loc[ind.grid], Loc[ind.grid], matrix, zlim=c(1, 50), col=col, breaks=breaks,xaxt="n",yaxt="n",xlab="",ylab="")
53 | 	lines(x,y)
54 | 	dev.off()
55 | }
56 | 
57 | draw_heatmap(matrix[ind.matrix, ind.matrix],"REGION",0)
58 | 


--------------------------------------------------------------------------------
/bin/microC/get_loop_lambda.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my ($dist_group_file,$group_stat_file) = @ARGV;
 5 | if(not defined $dist_group_file){
 6 | 	die("Usage:./get_loop_lambda.pl <frag_loop_data> <frag bed> <frag_stat> <frag_length_group> <frag_dist_group> <group_statistic_file>\n");
 7 | }
 8 | 
 9 | ##################### determine the length group for fragments #################
10 | 
11 | ################### use an array to store group info for differenct distance ######
12 | my @dist_group;
13 | open(IN, $dist_group_file);
14 | while(my $line = <IN>){
15 | 	chomp $line;
16 | 	my ($g, $low, $high) = split "\t", $line;
17 | 	for(my $i = $low + 1; $i <= $high; $i ++){
18 | 		$dist_group[$i] = $g;
19 | 	}
20 | }
21 | close(IN);
22 | 
23 | ################## get group average values #######################################
24 | my $g_avg;
25 | open(IN, $group_stat_file);
26 | while(my $line = <IN>){
27 | 	chomp $line;
28 | 	my ($g1,$count,$avg) = split "\t", $line;
29 | 	$g_avg->{$g1} = $avg;
30 | }
31 | close(IN);
32 | 
33 | 
34 | ################## print new data with group avg ##################################
35 | #open(IN, $regress);
36 | while(my $line = <STDIN>){
37 | 	chomp $line;
38 | 	my ($frag1, $frag2, $count, $dist) = split "\t", $line;
39 | #	my $g1 = $frag_len_group->{$frag1};
40 | #	my $g2 = $frag_len_group->{$frag2};
41 | 	my $g3 = $dist_group[$dist];
42 | 	my $avg = $g_avg->{$g3};
43 | 	#my $map1 = $frag_map->{$frag1};
44 | 	#my $map2 = $frag_map->{$frag2};
45 | 	my $lambda = $avg;
46 | 	if($lambda == 0){
47 | 		$count = 0;
48 | 	}
49 | 	print join("\t", $frag1, $frag2, $count, $lambda)."\n";
50 | }
51 | #close(IN);
52 | 
53 | exit;
54 | 
55 | 
56 | ##############################################################
57 | sub get_id{
58 |         my ($val, $range) = @_;
59 |         foreach my $id (keys %{$range}){
60 |                 my ($min, $max) = split ":", $range->{$id};
61 |                 if($val > $min && $val <= $max){
62 |                         return $id;
63 |                 }
64 |         }
65 |         die("Error: did not find a group\n");
66 | }
67 | 
68 | 


--------------------------------------------------------------------------------
/bin/eHiC/test_frag_corr_eHiC.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | if len(sys.argv)!=4:
 6 | 	print "Usage: ./test_frag_corr.py <read_sum> <frag_GC_mappability> <frag_loop>"
 7 | 	sys.exit()
 8 | 
 9 | reads_count={}
10 | mappability={}
11 | read_sum=open(sys.argv[1])
12 | sum=0
13 | NR=0
14 | for line in read_sum.readlines():
15 | 	frag,read=line.rstrip().split('\t')
16 | 	reads_count[frag]=float(read)
17 | read_sum.close()
18 | 
19 | GC={}
20 | map_file=open(sys.argv[2])
21 | for line in map_file.readlines():
22 | 	frag,gc,map=line.rstrip().split('\t')
23 | 	mappability[frag]=float(map)
24 | 	GC[frag]=float(gc)
25 | 	#NR+=1
26 | map_file.close()
27 | 
28 | dic={}
29 | for frag in mappability:
30 | 	if frag in reads_count and mappability[frag]!=0 and GC[frag]!=0:
31 | 		dic[frag]=reads_count[frag]/mappability[frag]
32 | 		sum+=dic[frag]
33 | 		NR+=1
34 | 	else:
35 | 		dic[frag]=0
36 | 	#sum+=dic[frag]
37 | 	#NR+=1
38 | mean=sum/float(NR)
39 | 
40 | for frag in dic:
41 | 	dic[frag]=dic[frag]/mean
42 | 
43 | if sys.argv[3]!="-":
44 | 	loop=open(sys.argv[3])
45 | 	while True:
46 | 		line=loop.readline()
47 | 		if not line:
48 | 			break
49 | 		frag1,frag2,obs,expt=line.rstrip().split('\t')
50 | 		if frag1 in dic:
51 | 			corr1=dic[frag1]
52 | 		else:
53 | 			corr1=0
54 | 		if frag2 in dic:
55 | 			corr2=dic[frag2]
56 | 		else:
57 | 			corr2=0
58 | 		corr=corr1*corr2
59 | 		expt=float(expt)*corr
60 | 		if expt==0:
61 | 			obs="0"
62 | 		print frag1+"\t"+frag2+"\t"+obs+"\t"+str(expt)
63 | 	loop.close()
64 | else:
65 | 	for line in sys.stdin:
66 | 		frag1,frag2,obs,expt=line.rstrip().split('\t')
67 |                 if frag1 in dic:
68 |                         corr1=dic[frag1]
69 |                 else:
70 |                         corr1=0
71 |                 if frag2 in dic:
72 |                         corr2=dic[frag2]
73 |                 else:
74 |                         corr2=0
75 |                 corr=corr1*corr2
76 |                 expt=float(expt)*corr
77 |                 if expt==0:
78 |                         obs="0"
79 |                 print frag1+"\t"+frag2+"\t"+obs+"\t"+str(expt)
80 | 
81 | 


--------------------------------------------------------------------------------
/documents/Arima.preprocessing.allValidPairs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # This is to show process allValidPairs(HiCPro output) to fragment pairs that HiCorr can take as input
 3 | lib=preprocess/lib
 4 | allValidPairsFile=$1 # gz format, if not, remove gunzip in the following command
 5 | name=$2 
 6 | bed=Arima_HiCorr_ref/hg19.Arima.frag.bed
 7 | 
 8 | # 1. map allValidPairs read pairs to fragment pairs, and split to categories
 9 | cat $allValidPairsFile | gunzip | cut -f2-7 | $lib/reads_2_trans_frag_loop.pl $bed 50 $name.loop.trans - & # 50 is read length for mapping
10 | cat $allValidPairsFile | gunzip | cut -f2-7 | $lib/reads_2_cis_frag_loop.pl $bed 50 $name.loop.inward $name.loop.outward $name.loop.samestrand summary.frag_loop.read_count $name - & # 50 is read length for mapping
11 | wait
12 | # 2. sort and filter fragment pairs
13 | for file in `ls *loop* | grep -v trans`;do
14 |         cat $file | $lib/summary_sorted_frag_loop.pl $bed  > temp.$file &
15 | done
16 | cat $name.loop.trans | $lib/summary_sorted_trans_frag_loop.pl - > temp.$file &
17 | wait
18 | for file in `ls temp*loop* | grep -v trans`;do
19 |         $lib/resort_by_frag_id.pl $bed $file &
20 | done
21 | wait
22 | cat temp.$name.loop.inward | awk '{if($4>1000)print $0}' > temp.$name.loop.inward & 
23 | cat temp.$name.loop.outward | awk '{if($4>5000)print $0}' > temp.$name.loop.outward & 
24 | wait 
25 | # 3. merge fragment pairs
26 | $lib/merge_sorted_frag_loop.pl temp.$name.loop.samestrand temp.$name.loop.inward temp.$name.loop.outward > frag_loop.$name.cis &
27 | $lib/merge_sorted_frag_loop.pl temp.$name.loop.trans > frag_loop.$name.trans &
28 | wait
29 | # 4. this step is to make sure both upper and lower triangle pairs are included, which can be used as HiCorr_Arima.sh input
30 | for file in frag_loop.$name.cis frag_loop.$name.trans;do
31 |         cat $file <(cat $file | awk '{print $2 "\t" $1 "\t" $3 "\t" $4}') | sed s/"frag_"//g | sort -k1,2n -k2,2n | awk '{print "frag_"$1 "\t" "frag_"$2 "\t" $3 "\t" $4}' > $file.tmp & 
32 | done
33 | wait 
34 | for file in frag_loop.$name.cis frag_loop.$name.trans;do
35 |   mv $file.tmp $file 
36 | done 
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/bin/Arima/list_full_matrix.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | my $usage = 	"Usage:./list_frag_pairs.pl <fragment bed> <max_distance>\n".
 4 | 		"\tPrint the list of fragment pairs within a distance.\n";
 5 | 
 6 | my ($fragfile, $maxdist) = @ARGV;
 7 | if(not defined $maxdist){
 8 |         die($usage);
 9 | }
10 | 
11 | my $frag_beg;
12 | my $frag_end;
13 | my $frag_chr;
14 | my @frag_id;
15 | 
16 | open(IN, $fragfile);
17 | while(my $line = <IN>){
18 |         chomp $line;
19 |         my ($chr, $beg, $end, $id, $len) = split "\t", $line;
20 | 	
21 | 	push @frag_id, $id;
22 | 	$frag_chr->{$id} = $chr;
23 | 	$frag_beg->{$id} = $beg;
24 | 	$frag_end->{$id} = $end;
25 | }
26 | close(IN);
27 | 
28 | my $ind_up = 0;
29 | my $up_id = $frag_id[$ind_up];
30 | my $up_chr = $frag_chr->{$up_id};
31 | my $up_end = $frag_end->{$up_id};
32 | 
33 | for(my $i = 0; $i <= $#frag_id; $i++){
34 | 	my $curr_id = $frag_id[$i];
35 | 	my $curr_chr = $frag_chr->{$curr_id};
36 | 	my $curr_beg = $frag_beg->{$curr_id};
37 | 	my $curr_end = $frag_end->{$curr_id};
38 | 
39 | 	############ find the first upstream frag within maxdist #######################
40 | 	my $up_dist = $curr_beg - $up_end - 1;
41 | 	if($up_chr ne $curr_chr){
42 | 		$ind_up = $i;
43 | 		$up_chr = $curr_chr;
44 | 		$up_id = $frag_id[$ind_up];
45 | 		$up_end = $frag_end->{$up_id};
46 |                 $up_dist = $curr_beg - $up_end - 1;
47 | 	}
48 | 	while($up_dist > $maxdist && $ind_up < $i){
49 | 		$ind_up ++;
50 | 		$up_id = $frag_id[$ind_up];
51 | 		$up_end = $frag_end->{$up_id};
52 | 		$up_dist = $curr_beg - $up_end - 1;
53 | 	}
54 | 
55 | 	for(my $j = $ind_up; $j < $i; $j++){
56 | 		my $id = $frag_id[$j];
57 | 		my $end = $frag_end->{$id};
58 | 		print join("\t", $curr_id, $id, 0, $curr_beg - $end -1)."\n";
59 | 	}
60 | 
61 | 	my $j = $i + 1;
62 | 	my $id = $frag_id[$j];
63 | 	my $chr = $frag_chr->{$id};
64 | 	my $beg = $frag_beg->{$id};
65 | 	while($chr eq $curr_chr && ($beg - $curr_end - 1) <= $maxdist){
66 | 		print join("\t", $curr_id, $id, 0, $beg - $curr_end - 1)."\n";
67 | 		$j++;
68 | 		$id = $frag_id[$j];
69 | 		$chr = $frag_chr->{$id};
70 | 		$beg = $frag_beg->{$id};
71 | 	}
72 | }
73 | 
74 | exit;
75 | 


--------------------------------------------------------------------------------
/bin/microC/list_full_matrix.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | my $usage = 	"Usage:./list_frag_pairs.pl <fragment bed> <max_distance>\n".
 4 | 		"\tPrint the list of fragment pairs within a distance.\n";
 5 | 
 6 | my ($fragfile, $maxdist) = @ARGV;
 7 | if(not defined $maxdist){
 8 |         die($usage);
 9 | }
10 | 
11 | my $frag_beg;
12 | my $frag_end;
13 | my $frag_chr;
14 | my @frag_id;
15 | 
16 | open(IN, $fragfile);
17 | while(my $line = <IN>){
18 |         chomp $line;
19 |         my ($chr, $beg, $end, $id, $len) = split "\t", $line;
20 | 	
21 | 	push @frag_id, $id;
22 | 	$frag_chr->{$id} = $chr;
23 | 	$frag_beg->{$id} = $beg;
24 | 	$frag_end->{$id} = $end;
25 | }
26 | close(IN);
27 | 
28 | my $ind_up = 0;
29 | my $up_id = $frag_id[$ind_up];
30 | my $up_chr = $frag_chr->{$up_id};
31 | my $up_end = $frag_end->{$up_id};
32 | 
33 | for(my $i = 0; $i <= $#frag_id; $i++){
34 | 	my $curr_id = $frag_id[$i];
35 | 	my $curr_chr = $frag_chr->{$curr_id};
36 | 	my $curr_beg = $frag_beg->{$curr_id};
37 | 	my $curr_end = $frag_end->{$curr_id};
38 | 
39 | 	############ find the first upstream frag within maxdist #######################
40 | 	my $up_dist = $curr_beg - $up_end - 1;
41 | 	if($up_chr ne $curr_chr){
42 | 		$ind_up = $i;
43 | 		$up_chr = $curr_chr;
44 | 		$up_id = $frag_id[$ind_up];
45 | 		$up_end = $frag_end->{$up_id};
46 |                 $up_dist = $curr_beg - $up_end - 1;
47 | 	}
48 | 	while($up_dist > $maxdist && $ind_up < $i){
49 | 		$ind_up ++;
50 | 		$up_id = $frag_id[$ind_up];
51 | 		$up_end = $frag_end->{$up_id};
52 | 		$up_dist = $curr_beg - $up_end - 1;
53 | 	}
54 | 
55 | 	for(my $j = $ind_up; $j < $i; $j++){
56 | 		my $id = $frag_id[$j];
57 | 		my $end = $frag_end->{$id};
58 | 		print join("\t", $curr_id, $id, 0, $curr_beg - $end -1)."\n";
59 | 	}
60 | 
61 | 	my $j = $i + 1;
62 | 	my $id = $frag_id[$j];
63 | 	my $chr = $frag_chr->{$id};
64 | 	my $beg = $frag_beg->{$id};
65 | 	while($chr eq $curr_chr && ($beg - $curr_end - 1) <= $maxdist){
66 | 		print join("\t", $curr_id, $id, 0, $beg - $curr_end - 1)."\n";
67 | 		$j++;
68 | 		$id = $frag_id[$j];
69 | 		$chr = $frag_chr->{$id};
70 | 		$beg = $frag_beg->{$id};
71 | 	}
72 | }
73 | 
74 | exit;
75 | 


--------------------------------------------------------------------------------
/bin/DPNII/list_full_matrix_DPNII.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | my $usage = 	"Usage:./list_frag_pairs.pl <fragment bed> <max_distance>\n".
 4 | 		"\tPrint the list of fragment pairs within a distance.\n";
 5 | 
 6 | my ($fragfile, $maxdist) = @ARGV;
 7 | if(not defined $maxdist){
 8 |         die($usage);
 9 | }
10 | 
11 | my $frag_beg;
12 | my $frag_end;
13 | my $frag_chr;
14 | my @frag_id;
15 | 
16 | open(IN, $fragfile);
17 | while(my $line = <IN>){
18 |         chomp $line;
19 |         my ($chr, $beg, $end, $id, $len) = split "\t", $line;
20 | 	
21 | 	push @frag_id, $id;
22 | 	$frag_chr->{$id} = $chr;
23 | 	$frag_beg->{$id} = $beg;
24 | 	$frag_end->{$id} = $end;
25 | }
26 | close(IN);
27 | 
28 | my $ind_up = 0;
29 | my $up_id = $frag_id[$ind_up];
30 | my $up_chr = $frag_chr->{$up_id};
31 | my $up_end = $frag_end->{$up_id};
32 | 
33 | for(my $i = 0; $i <= $#frag_id; $i++){
34 | 	my $curr_id = $frag_id[$i];
35 | 	my $curr_chr = $frag_chr->{$curr_id};
36 | 	my $curr_beg = $frag_beg->{$curr_id};
37 | 	my $curr_end = $frag_end->{$curr_id};
38 | 
39 | 	############ find the first upstream frag within maxdist #######################
40 | 	my $up_dist = $curr_beg - $up_end - 1;
41 | 	if($up_chr ne $curr_chr){
42 | 		$ind_up = $i;
43 | 		$up_chr = $curr_chr;
44 | 		$up_id = $frag_id[$ind_up];
45 | 		$up_end = $frag_end->{$up_id};
46 |                 $up_dist = $curr_beg - $up_end - 1;
47 | 	}
48 | 	while($up_dist > $maxdist && $ind_up < $i){
49 | 		$ind_up ++;
50 | 		$up_id = $frag_id[$ind_up];
51 | 		$up_end = $frag_end->{$up_id};
52 | 		$up_dist = $curr_beg - $up_end - 1;
53 | 	}
54 | 
55 | 	for(my $j = $ind_up; $j < $i; $j++){
56 | 		my $id = $frag_id[$j];
57 | 		my $end = $frag_end->{$id};
58 | 		print join("\t", $curr_id, $id, 0, $curr_beg - $end -1)."\n";
59 | 	}
60 | 
61 | 	my $j = $i + 1;
62 | 	my $id = $frag_id[$j];
63 | 	my $chr = $frag_chr->{$id};
64 | 	my $beg = $frag_beg->{$id};
65 | 	while($chr eq $curr_chr && ($beg - $curr_end - 1) <= $maxdist){
66 | 		print join("\t", $curr_id, $id, 0, $beg - $curr_end - 1)."\n";
67 | 		$j++;
68 | 		$id = $frag_id[$j];
69 | 		$chr = $frag_chr->{$id};
70 | 		$beg = $frag_beg->{$id};
71 | 	}
72 | }
73 | 
74 | exit;
75 | 


--------------------------------------------------------------------------------
/bin/eHiC/batch_anchor_by_chrom.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = "Usage:./batch_anchor_by_chrom.pl <frag_bed> <loop_list> <promoter_map>\n";
 6 | 
 7 | my ($frag_bed, $loop_list, $map_file) = @ARGV;
 8 | 
 9 | if(not defined $map_file){
10 |         die($usage);
11 | }
12 | 
13 | my $chr_ref;
14 | ################## Read fragment chrom ##################
15 | my $frag_chrom;
16 | open(IN, $frag_bed);
17 | while(my $line = <IN>){
18 |         chomp $line;
19 |         my ($chr, $beg, $end, $id) = split "\t", $line;
20 |         $frag_chrom->{$id} = $chr;
21 | 	$chr_ref->{$chr} = 1;
22 | }
23 | close(IN);
24 | 
25 | my $dir = "temp.by.chrom";
26 | `mkdir $dir`;
27 | 
28 | split_by_chr($frag_bed, $frag_chrom, $dir, $chr_ref, 4);
29 | split_by_chr($map_file, $frag_chrom, $dir, $chr_ref, 1);
30 | split_by_chr($loop_list, $frag_chrom, $dir, $chr_ref, 1);
31 | 
32 | foreach my $chr (keys %$chr_ref){
33 | 	# convert fragment-to-fragment data to anchor-to-anchor data
34 | 	`./fragdata_to_anchordata.pl $dir/$loop_list.$chr $dir/$map_file.$chr > $dir/anchor_2_anchor.loop.$chr`;
35 | 
36 | 	# calculate p value for the achor-to-anchor data
37 | 	open(IN, "get_anchor_pval.r");
38 | 	open(OUT, ">$dir/get_anchor_pval.$chr.r");
39 | 	while(my $line = <IN>){
40 | 		$line =~ s/FILE/$dir\/anchor_2_anchor.loop.$chr/g;
41 | 		print OUT $line;
42 | 	}
43 | 	close(OUT);
44 | 	close(IN);
45 | 	`R --quiet --vanilla < $dir/get_anchor_pval.$chr.r`;
46 | }
47 | 
48 | exit;
49 | 
50 | 
51 | ##########################################################################
52 | sub split_by_chr{
53 | 	my ($file, $frag_chrom, $dir, $chr_ref, $col) = @_;
54 | 	my $fh_ref;
55 | 	foreach my $chr (keys %$chr_ref){
56 | 		open($fh_ref->{$chr}, ">$dir/$file.$chr");
57 | 	}
58 | 	
59 | 	open(IN, $file);
60 | 	while(my $line = <IN>){
61 | 		chomp $line;
62 | 		my @vals = split "\t", $line;
63 | 		my $fid = $vals[$col - 1];
64 | 		my $chr = $frag_chrom->{$fid};
65 | 		my $fh = $fh_ref->{$chr};
66 | 		print $fh $line."\n";
67 | 	}
68 | 	close(IN);
69 | 	
70 |         foreach my $chr (keys %$chr_ref){
71 |                 close $fh_ref->{$chr};
72 |         }
73 | 	return;       
74 | }
75 | 


--------------------------------------------------------------------------------
/bin/preprocess/bam_to_frag_loop.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | 
 3 | bam_file=$1
 4 | name=$2
 5 | rdlen=$3	#read length
 6 | ref=$4
 7 | bin=$5
 8 | genome=$6
 9 | enzyme=$7
10 | 
11 | fragbed=$ref/$enzyme/$genome.$enzyme.frag.bed
12 | 
13 | samtools view $bam_file | cut -f2-8 | perl $bin/bam_to_temp_HiC.pl | perl $bin/reads_2_cis_frag_loop.pl $fragbed $rdlen $name.loop.inward $name.loop.outward $name.loop.samestrand $name &
14 | samtools view $bam_file | cut -f2-8 | perl $bin/bam_to_temp_HiC.pl | perl $bin/reads_2_trans_frag_loop.pl $fragbed $rdlen $name.loop.trans &
15 | wait
16 | 
17 | sed -i 's/BIN/$bin/g' $bin/resort_by_frag_id.pl
18 | perl $bin/summary_sorted_frag_loop.pl $fragbed $name.loop.inward >temp.$name.loop.inward &
19 | perl $bin/summary_sorted_frag_loop.pl $fragbed $name.loop.outward >temp.$name.loop.outward &
20 | perl $bin/summary_sorted_frag_loop.pl $fragbed $name.loop.samestrand >temp.$name.loop.samestrand &
21 | perl $bin/summary_sorted_trans_frag_loop.pl $name.loop.trans >temp.$name.loop.trans &
22 | wait
23 | mv temp.$name.loop.inward $name.loop.inward
24 | mv temp.$name.loop.outward $name.loop.outward
25 | mv temp.$name.loop.samestrand $name.loop.samestrand
26 | mv temp.$name.loop.trans $name.trans.frag_loop
27 | 
28 | perl $bin/resort_by_frag_id.pl $fragbed $name.loop.inward $bin &
29 | perl $bin/resort_by_frag_id.pl $fragbed $name.loop.outward $bin &
30 | perl $bin/resort_by_frag_id.pl $fragbed $name.loop.samestrand $bin &
31 | perl $bin/resort_by_frag_id.pl $fragbed $name.loop.trans $bin &
32 | wait
33 | 
34 | awk '{if($4>1000)print $0}' $name.loop.inward >temp.$name.loop.inward &
35 | 
36 | if [[ $enzyme -gt HindIII ]]
37 | then
38 | 	awk '{if($4>25000)print $0}' $name.loop.outward >temp.$name.loop.outward &
39 | elif [[ $enzyme -gt DPNII ]]
40 | then
41 | 	awk '{if($4>5000)print $0}' $name.loop.outward >temp.$name.loop.outward &
42 | fi
43 | wait
44 | 
45 | mv -f temp.$name.loop.inward $name.loop.inward
46 | mv -f temp.$name.loop.outward $name.loop.outward
47 | 
48 | perl $bin/merge_sorted_frag_loop.pl $name.loop.inward $name.loop.outward $name.loop.samestrand >$name.cis.frag_loop
49 | #rm $name.loop.inward $name.loop.outward $name.loop.samestrand
50 | 


--------------------------------------------------------------------------------
/documents/lib/list_full_matrix.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | my $usage = 	"Usage:./list_frag_pairs.pl <fragment bed> <max_distance>\n".
 4 | 		"\tPrint the list of fragment pairs within a distance.\n";
 5 | 
 6 | my ($fragfile, $maxdist) = @ARGV;
 7 | if(not defined $maxdist){
 8 |         die($usage);
 9 | }
10 | 
11 | my $frag_beg;
12 | my $frag_end;
13 | my $frag_chr;
14 | my @frag_id;
15 | 
16 | open(IN, $fragfile);
17 | while(my $line = <IN>){
18 |         chomp $line;
19 |         my ($chr, $beg, $end, $id, $len) = split "\t", $line;
20 | 	
21 | 	push @frag_id, $id;
22 | 	$frag_chr->{$id} = $chr;
23 | 	$frag_beg->{$id} = $beg;
24 | 	$frag_end->{$id} = $end;
25 | }
26 | close(IN);
27 | 
28 | my $ind_up = 0;
29 | my $up_id = $frag_id[$ind_up];
30 | my $up_chr = $frag_chr->{$up_id};
31 | my $up_end = $frag_end->{$up_id};
32 | 
33 | for(my $i = 0; $i <= $#frag_id; $i++){
34 | 	my $curr_id = $frag_id[$i];
35 | 	my $curr_chr = $frag_chr->{$curr_id};
36 | 	my $curr_beg = $frag_beg->{$curr_id};
37 | 	my $curr_end = $frag_end->{$curr_id};
38 | 
39 | 	############ find the first upstream frag within maxdist #######################
40 | 	my $up_dist = $curr_beg - $up_end - 1;
41 | 	if($up_chr ne $curr_chr){
42 | 		$ind_up = $i;
43 | 		$up_chr = $curr_chr;
44 | 		$up_id = $frag_id[$ind_up];
45 | 		$up_end = $frag_end->{$up_id};
46 |                 $up_dist = $curr_beg - $up_end - 1;
47 | 	}
48 | 	while($up_dist > $maxdist && $ind_up < $i){
49 | 		$ind_up ++;
50 | 		$up_id = $frag_id[$ind_up];
51 | 		$up_end = $frag_end->{$up_id};
52 | 		$up_dist = $curr_beg - $up_end - 1;
53 | 	}
54 | 
55 | 	for(my $j = $ind_up; $j < $i; $j++){
56 | 		my $id = $frag_id[$j];
57 | 		my $end = $frag_end->{$id};
58 | 		print join("\t", $curr_id, $id, 0, $curr_beg - $end -1)."\n";
59 | 	}
60 | 
61 | 	my $j = $i + 1;
62 | 	my $id = $frag_id[$j];
63 | 	my $chr = $frag_chr->{$id};
64 | 	my $beg = $frag_beg->{$id};
65 | 	while($chr eq $curr_chr && ($beg - $curr_end - 1) <= $maxdist){
66 | 		print join("\t", $curr_id, $id, 0, $beg - $curr_end - 1)."\n";
67 | 		$j++;
68 | 		$id = $frag_id[$j];
69 | 		$chr = $frag_chr->{$id};
70 | 		$beg = $frag_beg->{$id};
71 | 	}
72 | }
73 | 
74 | exit;
75 | 


--------------------------------------------------------------------------------
/bin/HindIII/batch_anchor_by_chrom.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = "Usage:./batch_anchor_by_chrom.pl <frag_bed> <loop_list> <promoter_map>\n";
 6 | 
 7 | my ($frag_bed, $loop_list, $map_file) = @ARGV;
 8 | 
 9 | if(not defined $map_file){
10 |         die($usage);
11 | }
12 | 
13 | my $chr_ref;
14 | ################## Read fragment chrom ##################
15 | my $frag_chrom;
16 | open(IN, $frag_bed);
17 | while(my $line = <IN>){
18 |         chomp $line;
19 |         my ($chr, $beg, $end, $id) = split "\t", $line;
20 |         $frag_chrom->{$id} = $chr;
21 | 	$chr_ref->{$chr} = 1;
22 | }
23 | close(IN);
24 | 
25 | my $dir = "temp.by.chrom";
26 | `mkdir $dir`;
27 | 
28 | split_by_chr($frag_bed, $frag_chrom, $dir, $chr_ref, 4);
29 | split_by_chr($map_file, $frag_chrom, $dir, $chr_ref, 1);
30 | split_by_chr($loop_list, $frag_chrom, $dir, $chr_ref, 1);
31 | 
32 | foreach my $chr (keys %$chr_ref){
33 | 	# convert fragment-to-fragment data to anchor-to-anchor data
34 | 	`./fragdata_to_anchordata.pl $dir/$loop_list.$chr $dir/$map_file.$chr > $dir/anchor_2_anchor.loop.$chr`;
35 | 
36 | 	# calculate p value for the achor-to-anchor data
37 | 	open(IN, "get_anchor_pval.r");
38 | 	open(OUT, ">$dir/get_anchor_pval.$chr.r");
39 | 	while(my $line = <IN>){
40 | 		$line =~ s/FILE/$dir\/anchor_2_anchor.loop.$chr/g;
41 | 		print OUT $line;
42 | 	}
43 | 	close(OUT);
44 | 	close(IN);
45 | 	`R --quiet --vanilla < $dir/get_anchor_pval.$chr.r`;
46 | }
47 | 
48 | exit;
49 | 
50 | 
51 | ##########################################################################
52 | sub split_by_chr{
53 | 	my ($file, $frag_chrom, $dir, $chr_ref, $col) = @_;
54 | 	my $fh_ref;
55 | 	foreach my $chr (keys %$chr_ref){
56 | 		open($fh_ref->{$chr}, ">$dir/$file.$chr");
57 | 	}
58 | 	
59 | 	open(IN, $file);
60 | 	while(my $line = <IN>){
61 | 		chomp $line;
62 | 		my @vals = split "\t", $line;
63 | 		my $fid = $vals[$col - 1];
64 | 		my $chr = $frag_chrom->{$fid};
65 | 		my $fh = $fh_ref->{$chr};
66 | 		print $fh $line."\n";
67 | 	}
68 | 	close(IN);
69 | 	
70 |         foreach my $chr (keys %$chr_ref){
71 |                 close $fh_ref->{$chr};
72 |         }
73 | 	return;       
74 | }
75 | 


--------------------------------------------------------------------------------
/bin/generateReference_lib/list_full_matrix.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | my $usage = 	"Usage:./list_frag_pairs.pl <fragment bed> <max_distance>\n".
 4 | 		"\tPrint the list of fragment pairs within a distance.\n";
 5 | 
 6 | my ($fragfile, $maxdist) = @ARGV;
 7 | if(not defined $maxdist){
 8 |         die($usage);
 9 | }
10 | 
11 | my $frag_beg;
12 | my $frag_end;
13 | my $frag_chr;
14 | my @frag_id;
15 | 
16 | open(IN, $fragfile);
17 | while(my $line = <IN>){
18 |         chomp $line;
19 |         my ($chr, $beg, $end, $id, $len) = split "\t", $line;
20 | 	
21 | 	push @frag_id, $id;
22 | 	$frag_chr->{$id} = $chr;
23 | 	$frag_beg->{$id} = $beg;
24 | 	$frag_end->{$id} = $end;
25 | }
26 | close(IN);
27 | 
28 | my $ind_up = 0;
29 | my $up_id = $frag_id[$ind_up];
30 | my $up_chr = $frag_chr->{$up_id};
31 | my $up_end = $frag_end->{$up_id};
32 | 
33 | for(my $i = 0; $i <= $#frag_id; $i++){
34 | 	my $curr_id = $frag_id[$i];
35 | 	my $curr_chr = $frag_chr->{$curr_id};
36 | 	my $curr_beg = $frag_beg->{$curr_id};
37 | 	my $curr_end = $frag_end->{$curr_id};
38 | 
39 | 	############ find the first upstream frag within maxdist #######################
40 | 	my $up_dist = $curr_beg - $up_end - 1;
41 | 	if($up_chr ne $curr_chr){
42 | 		$ind_up = $i;
43 | 		$up_chr = $curr_chr;
44 | 		$up_id = $frag_id[$ind_up];
45 | 		$up_end = $frag_end->{$up_id};
46 |                 $up_dist = $curr_beg - $up_end - 1;
47 | 	}
48 | 	while($up_dist > $maxdist && $ind_up < $i){
49 | 		$ind_up ++;
50 | 		$up_id = $frag_id[$ind_up];
51 | 		$up_end = $frag_end->{$up_id};
52 | 		$up_dist = $curr_beg - $up_end - 1;
53 | 	}
54 | 
55 | 	for(my $j = $ind_up; $j < $i; $j++){
56 | 		my $id = $frag_id[$j];
57 | 		my $end = $frag_end->{$id};
58 | 		print join("\t", $curr_id, $id, 0, $curr_beg - $end -1)."\n";
59 | 	}
60 | 
61 | 	my $j = $i + 1;
62 | 	my $id = $frag_id[$j];
63 | 	my $chr = $frag_chr->{$id};
64 | 	my $beg = $frag_beg->{$id};
65 | 	while($chr eq $curr_chr && ($beg - $curr_end - 1) <= $maxdist){
66 | 		print join("\t", $curr_id, $id, 0, $beg - $curr_end - 1)."\n";
67 | 		$j++;
68 | 		$id = $frag_id[$j];
69 | 		$chr = $frag_chr->{$id};
70 | 		$beg = $frag_beg->{$id};
71 | 	}
72 | }
73 | 
74 | exit;
75 | 


--------------------------------------------------------------------------------
/documents/lib/count_trans_pairs_by_GC.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my ($frag_bed, $frag_stat_file, $GC_group_file) = @ARGV;
 6 | 
 7 | 
 8 | my $frag_chr;
 9 | open(IN, $frag_bed);
10 | while(my $line = <IN>){
11 |         chomp $line;
12 |         my ($chr, $beg, $end, $id) = split "\t", $line;
13 |         $frag_chr->{$id} = $chr;
14 | }
15 | close(IN);
16 | 
17 | ##################### determine the GC group for fragments #################
18 | my $GC_range;
19 | open(IN, $GC_group_file);
20 | while(my $line = <IN>){
21 |         chomp $line;
22 |         my ($id, $min, $max) = split "\t", $line;
23 |         $GC_range->{$id} = "$min:$max";
24 | }
25 | close(IN);
26 | 
27 | my $total_count;
28 | my $chrom_count;
29 | open(IN, $frag_stat_file);
30 | while(my $line=<IN>){
31 |         chomp $line;
32 |         my ($chr,$beg,$end,$id, $dist,$gc) = split "\t", $line;
33 | 	
34 | 	my $gc_group = get_id($gc, $GC_range);
35 | 	my $chr = $frag_chr->{$id};
36 | 	if(not defined $total_count->{$gc_group}){
37 | 		$total_count->{$gc_group} = 0;
38 | 	}
39 | 	$total_count->{$gc_group} ++;
40 | 	if(not defined $chrom_count->{$gc_group}->{$chr}){
41 | 		$chrom_count->{$gc_group}->{$chr} = 0;
42 | 	}
43 | 	$chrom_count->{$gc_group}->{$chr} ++;
44 | }
45 | close(IN);
46 | 
47 | foreach my $g1 (sort {$a<=>$b} keys %$chrom_count){
48 | 	foreach my $g2 (sort {$a<=>$b} keys %$chrom_count){
49 | 		my $count = 0;
50 | 		foreach my $chr1 (keys %{$chrom_count->{$g1}}){
51 | 			foreach my $chr2 (keys %{$chrom_count->{$g2}}){
52 | 				if($chr1 ne $chr2){
53 | 					$count += $chrom_count->{$g1}->{$chr1} * $chrom_count->{$g2}->{$chr2}
54 | 					
55 | 				}
56 | 			}
57 | 		}
58 | 		print join("\t", $g1, $g2, $count)."\n";
59 | 	}
60 | }
61 | 
62 | 
63 | exit;
64 | 
65 | ##############################################################
66 | sub get_id{
67 |         my ($val, $range) = @_;
68 |         foreach my $id (keys %{$range}){
69 |                 my ($min, $max) = split ":", $range->{$id};
70 |                 if($val > $min && $val <= $max){
71 |                         return $id;
72 |                 }
73 |         }
74 | 	#print $val;
75 |         die("$val");
76 | }
77 | 
78 | 


--------------------------------------------------------------------------------
/bin/generateReference_lib/count_trans_pairs_by_GC.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my ($frag_bed, $frag_stat_file, $GC_group_file) = @ARGV;
 6 | 
 7 | 
 8 | my $frag_chr;
 9 | open(IN, $frag_bed);
10 | while(my $line = <IN>){
11 |         chomp $line;
12 |         my ($chr, $beg, $end, $id) = split "\t", $line;
13 |         $frag_chr->{$id} = $chr;
14 | }
15 | close(IN);
16 | 
17 | ##################### determine the GC group for fragments #################
18 | my $GC_range;
19 | open(IN, $GC_group_file);
20 | while(my $line = <IN>){
21 |         chomp $line;
22 |         my ($id, $min, $max) = split "\t", $line;
23 |         $GC_range->{$id} = "$min:$max";
24 | }
25 | close(IN);
26 | 
27 | my $total_count;
28 | my $chrom_count;
29 | open(IN, $frag_stat_file);
30 | while(my $line=<IN>){
31 |         chomp $line;
32 |         my ($chr,$beg,$end,$id, $dist,$gc) = split "\t", $line;
33 | 	
34 | 	my $gc_group = get_id($gc, $GC_range);
35 | 	my $chr = $frag_chr->{$id};
36 | 	if(not defined $total_count->{$gc_group}){
37 | 		$total_count->{$gc_group} = 0;
38 | 	}
39 | 	$total_count->{$gc_group} ++;
40 | 	if(not defined $chrom_count->{$gc_group}->{$chr}){
41 | 		$chrom_count->{$gc_group}->{$chr} = 0;
42 | 	}
43 | 	$chrom_count->{$gc_group}->{$chr} ++;
44 | }
45 | close(IN);
46 | 
47 | foreach my $g1 (sort {$a<=>$b} keys %$chrom_count){
48 | 	foreach my $g2 (sort {$a<=>$b} keys %$chrom_count){
49 | 		my $count = 0;
50 | 		foreach my $chr1 (keys %{$chrom_count->{$g1}}){
51 | 			foreach my $chr2 (keys %{$chrom_count->{$g2}}){
52 | 				if($chr1 ne $chr2){
53 | 					$count += $chrom_count->{$g1}->{$chr1} * $chrom_count->{$g2}->{$chr2}
54 | 					
55 | 				}
56 | 			}
57 | 		}
58 | 		print join("\t", $g1, $g2, $count)."\n";
59 | 	}
60 | }
61 | 
62 | 
63 | exit;
64 | 
65 | ##############################################################
66 | sub get_id{
67 |         my ($val, $range) = @_;
68 |         foreach my $id (keys %{$range}){
69 |                 my ($min, $max) = split ":", $range->{$id};
70 |                 if($val > $min && $val <= $max){
71 |                         return $id;
72 |                 }
73 |         }
74 | 	#print $val;
75 |         die("$val");
76 | }
77 | 
78 | 


--------------------------------------------------------------------------------
/bin/Arima/Arima.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ref=$1
 4 | bin=$2
 5 | cis_loop=$3
 6 | trans_loop=$4
 7 | name=$5
 8 | genome=$6
 9 | anchorbed=${ref}/${genome}_anchors_avg.bed
10 | 
11 | cat $cis_loop | $bin/fragdata_to_anchordata.pl - $ref/${genome}_frag_2_anchor  | ${bin}/remove.blacklist.py $ref/${genome}_5kb_anchors_blacklist |${bin}/pick.dist.pl $anchorbed - | awk '{if($4<=2000000) print $0}' > end_loop.2M.rmbl &
12 | 
13 | cat $cis_loop | $bin/fragdata_to_anchordata.pl - $ref/${genome}_frag_2_anchor  | ${bin}/remove.blacklist.py $ref/${genome}_5kb_anchors_blacklist |${bin}/pick.dist.pl $anchorbed - | awk '{if($4>2000000) print $0}' > end_loop.gt.2M  &
14 | 
15 | cat $trans_loop | $bin/fragdata_to_anchordata.pl - $ref/${genome}_frag_2_anchor  | ${bin}/remove.blacklist.py $ref/${genome}_5kb_anchors_blacklist > end_loop.rmbl.trans &
16 | 
17 | wait
18 | 
19 | cat end_loop.gt.2M end_loop.rmbl.trans | cut -f1-3 > end_loop.merged.trans
20 | 
21 | ${bin}/merge_sorted_anchor_loop.pl $ref/${genome}.full.matrix end_loop.2M.rmbl > end_loop.full &
22 | 
23 | ${bin}/get_trans.avg_by_len.pl end_loop.merged.trans $ref/${genome}_anchor_length.groups $anchorbed $ref/${genome}.trans.possible.pairs > trans.stat &
24 | 
25 | wait
26 | 
27 | ${bin}/get_corr_factor_by_len.py trans.stat > len.factor 
28 | 
29 | $bin/correct.trans.reads.by.corr.pl end_loop.merged.trans $anchorbed $ref/${genome}_anchor_length.groups len.factor > trans.corr.by.all 
30 | 
31 | ${bin}/sum_anchor_reads.py trans.corr.by.all > anchors.sum 
32 | 
33 | avg=`cat anchors.sum | awk '{s+=$2;n++}END{print s/n}'`
34 | 
35 | cat anchors.sum | awk -v avg=$avg '{print $1,$2/avg}' OFS='\t'  > anchor.vis.list
36 | 
37 | ${bin}/get_group_statistics.pl end_loop.full $anchorbed $ref/${genome}_anchor_length.groups $ref/${genome}.dist.401.group > dist.len.stat 
38 | 
39 | ${bin}/get_loop_lambda.pl end_loop.full $anchorbed $ref/${genome}_anchor_length.groups $ref/${genome}.dist.401.group dist.len.stat > end_loop.after.dist.len
40 | 
41 | ${bin}/add.vis.to.cis.2M.pl end_loop.after.dist.len anchor.vis.list > end_loop.after.vis
42 | wait
43 | cat end_loop.after.vis | $bin/split_chromo.py $anchorbed 
44 | 
45 | rm -rf end_loop.2M.rmbl end_loop.gt.2M end_loop.rmbl.trans 
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/documents/lib/count_cis_2M_pair.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my ($frag_bed, $GC_group_file, $frag_stat_file, $min_map) = @ARGV;
 5 | if(not defined $min_map){
 6 |         die("Usage:./count_cis_pairs_by_GC.pl <chr_bin_bed> <bin_cutting_site_group> <bin_stat> <minimum_mappability>");
 7 | }
 8 | 
 9 | my $frag_chr;
10 | open(IN, $frag_bed);
11 | while(my $line = <IN>){
12 |         chomp $line;
13 |         my ($chr, $beg, $end, $id) = split "\t", $line;
14 |         $frag_chr->{$id} = $chr;
15 | }
16 | close(IN);
17 | 
18 | ##################### determine the GC group for fragments #################
19 | my $GC_range;
20 | open(IN, $GC_group_file);
21 | while(my $line = <IN>){
22 |         chomp $line;
23 |         my ($id, $min, $max) = split "\t", $line;
24 |         $GC_range->{$id} = "$min:$max";
25 | }
26 | close(IN);
27 | 
28 | my $total_count;
29 | my $chrom_count;
30 | open(IN, $frag_stat_file);
31 | while(my $line=<IN>){
32 |         chomp $line;
33 |         my ($chr,$beg,$end,$id, $gc) = split "\t", $line;
34 | 	
35 | 	my $gc_group = get_id($gc, $GC_range);
36 | 	my $chr = $frag_chr->{$id};
37 | 	if(not defined $total_count->{$gc_group}){
38 | 		$total_count->{$gc_group} = 0;
39 | 	}
40 | 	$total_count->{$gc_group} ++;
41 | 	if(not defined $chrom_count->{$gc_group}->{$chr}){
42 | 		$chrom_count->{$gc_group}->{$chr} = 0;
43 | 	}
44 | 	$chrom_count->{$gc_group}->{$chr} ++;
45 | }
46 | close(IN);
47 | 
48 | foreach my $g1 (sort {$a<=>$b} keys %$chrom_count){
49 | 	foreach my $g2 (sort {$a<=>$b} keys %$chrom_count){
50 | 		my $count = 0;
51 | 		foreach my $chr (keys %{$chrom_count->{$g1}}){
52 | 			$count += $chrom_count->{$g1}->{$chr} * ($chrom_count->{$g2}->{$chr} - 1);
53 | 		}
54 | 		print join("\t", $g1, $g2, $count)."\n";
55 | 	}
56 | }
57 | 
58 | 
59 | exit;
60 | 
61 | ##############################################################
62 | sub get_id{
63 |         my ($val, $range) = @_;
64 |         foreach my $id (keys %{$range}){
65 |                 my ($min, $max) = split ":", $range->{$id};
66 |                 if($val > $min && $val <= $max){
67 |                         return $id;
68 |                 }
69 |         }
70 |         die("Error: did not find a group\n");
71 | }
72 | 
73 | 


--------------------------------------------------------------------------------
/documents/lib/count_cis_pairs_by_GC.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my ($frag_bed, $GC_group_file, $frag_stat_file, $min_map) = @ARGV;
 5 | if(not defined $min_map){
 6 |         die("Usage:./count_cis_pairs_by_GC.pl <frag_bed> <frag_GC_group> <frag_stat> <minimum_mappability>");
 7 | }
 8 | 
 9 | my $frag_chr;
10 | open(IN, $frag_bed);
11 | while(my $line = <IN>){
12 |         chomp $line;
13 |         my ($chr, $beg, $end, $id) = split "\t", $line;
14 |         $frag_chr->{$id} = $chr;
15 | }
16 | close(IN);
17 | 
18 | ##################### determine the GC group for fragments #################
19 | my $GC_range;
20 | open(IN, $GC_group_file);
21 | while(my $line = <IN>){
22 |         chomp $line;
23 |         my ($id, $min, $max) = split "\t", $line;
24 |         $GC_range->{$id} = "$min:$max";
25 | }
26 | close(IN);
27 | 
28 | my $total_count;
29 | my $chrom_count;
30 | open(IN, $frag_stat_file);
31 | while(my $line=<IN>){
32 |         chomp $line;
33 |         my ($chr,$beg,$end,$id,$dist,$gc) = split "\t", $line;
34 | 	
35 | 	my $gc_group = get_id($gc, $GC_range);
36 | 	my $chr = $frag_chr->{$id};
37 | 	if(not defined $total_count->{$gc_group}){
38 | 		$total_count->{$gc_group} = 0;
39 | 	}
40 | 	$total_count->{$gc_group} ++;
41 | 	if(not defined $chrom_count->{$gc_group}->{$chr}){
42 | 		$chrom_count->{$gc_group}->{$chr} = 0;
43 | 	}
44 | 	$chrom_count->{$gc_group}->{$chr} ++;
45 | }
46 | close(IN);
47 | 
48 | foreach my $g1 (sort {$a<=>$b} keys %$chrom_count){
49 | 	foreach my $g2 (sort {$a<=>$b} keys %$chrom_count){
50 | 		my $count = 0;
51 | 		foreach my $chr (keys %{$chrom_count->{$g1}}){
52 | 			$count += $chrom_count->{$g1}->{$chr} * ($chrom_count->{$g2}->{$chr} - 1);
53 | 		}
54 | 		print join("\t", $g1, $g2, $count)."\n";
55 | 	}
56 | }
57 | 
58 | 
59 | exit;
60 | 
61 | ##############################################################
62 | sub get_id{
63 |         my ($val, $range) = @_;
64 |         foreach my $id (keys %{$range}){
65 |                 my ($min, $max) = split ":", $range->{$id};
66 |                 if($val > $min && $val <= $max){
67 |                         return $id;
68 |                 }
69 |         }
70 |         die("Error: did not find a group\n");
71 | }
72 | 
73 | 


--------------------------------------------------------------------------------
/HiCorr_Arima.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ref=$1
 4 | bin=$2
 5 | cis_loop=$3
 6 | trans_loop=$4
 7 | name=$5
 8 | genome=$6
 9 | anchorbed=${ref}/${genome}_Arima_anchors_avg.bed
10 | 
11 | cat $cis_loop | $bin/fragdata_to_anchordata.pl - $ref/${genome}_Arima_frag_2_anchor | ${bin}/remove.blacklist.py $ref/${genome}_5kb_anchors_blacklist |${bin}/pick.dist.pl $anchorbed - | awk '{if($4<=2000000) print $0}' > end_loop.2M.rmbl &
12 | 
13 | cat $cis_loop | $bin/fragdata_to_anchordata.pl - $ref/${genome}_Arima_frag_2_anchor | ${bin}/remove.blacklist.py $ref/${genome}_5kb_anchors_blacklist |${bin}/pick.dist.pl $anchorbed - | awk '{if($4>2000000) print $0}' > end_loop.gt.2M  &
14 | 
15 | cat $trans_loop | $bin/fragdata_to_anchordata.pl - $ref/${genome}_Arima_frag_2_anchor | ${bin}/remove.blacklist.py $ref/${genome}_5kb_anchors_blacklist > end_loop.rmbl.trans &
16 | 
17 | wait
18 | 
19 | cat end_loop.gt.2M end_loop.rmbl.trans | cut -f1-3 > end_loop.merged.trans
20 | 
21 | ${bin}/merge_sorted_anchor_loop.pl $ref/${genome}.full.matrix end_loop.2M.rmbl > end_loop.full &
22 | 
23 | ${bin}/get_trans.avg_by_len.pl end_loop.merged.trans $ref/${genome}_anchor_length.groups $anchorbed $ref/${genome}.trans.possible.pairs > trans.stat &
24 | 
25 | wait
26 | 
27 | ${bin}/get_corr_factor_by_len.py trans.stat > len.factor 
28 | 
29 | $bin/correct.trans.reads.by.corr.pl end_loop.merged.trans $anchorbed $ref/${genome}_anchor_length.groups len.factor > trans.corr.by.all 
30 | 
31 | ${bin}/sum_anchor_reads.py trans.corr.by.all > anchors.sum 
32 | 
33 | avg=`cat anchors.sum | awk '{s+=$2;n++}END{print s/n}'`
34 | 
35 | cat anchors.sum | awk -v avg=$avg '{print $1,$2/avg}' OFS='\t'  > anchor.vis.list
36 | 
37 | ${bin}/get_group_statistics.pl end_loop.full $anchorbed $ref/${genome}_anchor_length.groups $ref/${genome}.dist.401.group > dist.len.stat 
38 | 
39 | ${bin}/get_loop_lambda.pl end_loop.full $anchorbed $ref/${genome}_anchor_length.groups $ref/${genome}.dist.401.group dist.len.stat > end_loop.after.dist.len
40 | 
41 | ${bin}/add.vis.to.cis.2M.pl end_loop.after.dist.len anchor.vis.list > end_loop.after.vis
42 | wait
43 | cat end_loop.after.vis | $bin/split_chromo.py $anchorbed 
44 | 
45 | rm -rf end_loop.2M.rmbl end_loop.gt.2M end_loop.rmbl.trans 
46 | 


--------------------------------------------------------------------------------
/bin/generateReference_lib/count_cis_pairs_by_GC.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my ($frag_bed, $GC_group_file, $frag_stat_file, $min_map) = @ARGV;
 5 | if(not defined $min_map){
 6 |         die("Usage:./count_cis_pairs_by_GC.pl <frag_bed> <frag_GC_group> <frag_stat> <minimum_mappability>");
 7 | }
 8 | 
 9 | my $frag_chr;
10 | open(IN, $frag_bed);
11 | while(my $line = <IN>){
12 |         chomp $line;
13 |         my ($chr, $beg, $end, $id) = split "\t", $line;
14 |         $frag_chr->{$id} = $chr;
15 | }
16 | close(IN);
17 | 
18 | ##################### determine the GC group for fragments #################
19 | my $GC_range;
20 | open(IN, $GC_group_file);
21 | while(my $line = <IN>){
22 |         chomp $line;
23 |         my ($id, $min, $max) = split "\t", $line;
24 |         $GC_range->{$id} = "$min:$max";
25 | }
26 | close(IN);
27 | 
28 | my $total_count;
29 | my $chrom_count;
30 | open(IN, $frag_stat_file);
31 | while(my $line=<IN>){
32 |         chomp $line;
33 |         my ($chr,$beg,$end,$id,$dist,$gc) = split "\t", $line;
34 | 	
35 | 	my $gc_group = get_id($gc, $GC_range);
36 | 	my $chr = $frag_chr->{$id};
37 | 	if(not defined $total_count->{$gc_group}){
38 | 		$total_count->{$gc_group} = 0;
39 | 	}
40 | 	$total_count->{$gc_group} ++;
41 | 	if(not defined $chrom_count->{$gc_group}->{$chr}){
42 | 		$chrom_count->{$gc_group}->{$chr} = 0;
43 | 	}
44 | 	$chrom_count->{$gc_group}->{$chr} ++;
45 | }
46 | close(IN);
47 | 
48 | foreach my $g1 (sort {$a<=>$b} keys %$chrom_count){
49 | 	foreach my $g2 (sort {$a<=>$b} keys %$chrom_count){
50 | 		my $count = 0;
51 | 		foreach my $chr (keys %{$chrom_count->{$g1}}){
52 | 			$count += $chrom_count->{$g1}->{$chr} * ($chrom_count->{$g2}->{$chr} - 1);
53 | 		}
54 | 		print join("\t", $g1, $g2, $count)."\n";
55 | 	}
56 | }
57 | 
58 | 
59 | exit;
60 | 
61 | ##############################################################
62 | sub get_id{
63 |         my ($val, $range) = @_;
64 |         foreach my $id (keys %{$range}){
65 |                 my ($min, $max) = split ":", $range->{$id};
66 |                 if($val > $min && $val <= $max){
67 |                         return $id;
68 |                 }
69 |         }
70 |         die("Error: did not find a group\n");
71 | }
72 | 
73 | 


--------------------------------------------------------------------------------
/bin/generateReference_lib/count_cis_2M_pair.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my ($frag_bed, $GC_group_file, $frag_stat_file, $min_map) = @ARGV;
 5 | if(not defined $min_map){
 6 |         die("Usage:./count_cis_pairs_by_GC.pl <chr_bin_bed> <bin_cutting_site_group> <bin_stat> <minimum_mappability>");
 7 | }
 8 | 
 9 | my $frag_chr;
10 | open(IN, $frag_bed);
11 | while(my $line = <IN>){
12 |         chomp $line;
13 |         my ($chr, $beg, $end, $id) = split "\t", $line;
14 |         $frag_chr->{$id} = $chr;
15 | }
16 | close(IN);
17 | 
18 | ##################### determine the GC group for fragments #################
19 | my $GC_range;
20 | open(IN, $GC_group_file);
21 | while(my $line = <IN>){
22 |         chomp $line;
23 |         my ($id, $min, $max) = split "\t", $line;
24 |         $GC_range->{$id} = "$min:$max";
25 | }
26 | close(IN);
27 | 
28 | my $total_count;
29 | my $chrom_count;
30 | open(IN, $frag_stat_file);
31 | while(my $line=<IN>){
32 |         chomp $line;
33 |         my ($chr,$beg,$end,$id, $gc) = split "\t", $line;
34 | 	
35 | 	my $gc_group = get_id($gc, $GC_range);
36 | 	my $chr = $frag_chr->{$id};
37 | 	if(not defined $total_count->{$gc_group}){
38 | 		$total_count->{$gc_group} = 0;
39 | 	}
40 | 	$total_count->{$gc_group} ++;
41 | 	if(not defined $chrom_count->{$gc_group}->{$chr}){
42 | 		$chrom_count->{$gc_group}->{$chr} = 0;
43 | 	}
44 | 	$chrom_count->{$gc_group}->{$chr} ++;
45 | }
46 | close(IN);
47 | 
48 | foreach my $g1 (sort {$a<=>$b} keys %$chrom_count){
49 | 	foreach my $g2 (sort {$a<=>$b} keys %$chrom_count){
50 | 		my $count = 0;
51 | 		foreach my $chr (keys %{$chrom_count->{$g1}}){
52 | 			$count += $chrom_count->{$g1}->{$chr} * ($chrom_count->{$g2}->{$chr} - 1);
53 | 		}
54 | 		print join("\t", $g1, $g2, $count)."\n";
55 | 	}
56 | }
57 | 
58 | 
59 | exit;
60 | 
61 | ##############################################################
62 | sub get_id{
63 |         my ($val, $range) = @_;
64 |         foreach my $id (keys %{$range}){
65 |                 my ($min, $max) = split ":", $range->{$id};
66 |                 if($val > $min && $val <= $max){
67 |                         return $id;
68 |                 }
69 |         }
70 |         die("Error: did not find a group\n");
71 | }
72 | 
73 | 


--------------------------------------------------------------------------------
/bin/bam_to_frag_loop.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | 
 3 | bam_file=$1
 4 | name=$2
 5 | rdlen=$3	#read length
 6 | 
 7 | ref=$4
 8 | bin=$5
 9 | genome=$6
10 | 
11 | samtools view $bam_file | cut -f2-8 | perl $bin/bam_to_temp_HiC.pl | perl $bin/reads_2_cis_frag_loop.pl $ref/HindIII/$genome.HindIII.frag.bed $rdlen $name.loop.inward $name.loop.outward $name.loop.samestrand $name &
12 | samtools view $bam_file | cut -f2-8 | perl $bin/bam_to_temp_HiC.pl | perl $bin/reads_2_trans_frag_loop.pl $ref/HindIII/$genome.HindIII.frag.bed $rdlen $name.loop.trans &
13 | wait
14 | 
15 | sed -i 's/BIN/$bin/g' $bin/resort_by_frag_id.pl
16 | perl $bin/summary_sorted_frag_loop.pl $ref/HindIII/$genome.HindIII.frag.bed $name.loop.inward >temp.$name.loop.inward &
17 | perl $bin/summary_sorted_frag_loop.pl $ref/HindIII/$genome.HindIII.frag.bed $name.loop.outward >temp.$name.loop.outward &
18 | perl $bin/summary_sorted_frag_loop.pl $ref/HindIII/$genome.HindIII.frag.bed $name.loop.samestrand >temp.$name.loop.samestrand &
19 | perl $bin/summary_sorted_trans_frag_loop.pl $name.loop.trans >temp.$name.loop.trans &
20 | wait
21 | mv temp.$name.loop.inward $name.loop.inward
22 | mv temp.$name.loop.outward $name.loop.outward
23 | mv temp.$name.loop.samestrand $name.loop.samestrand
24 | mv temp.$name.loop.trans $name.trans.frag_loop
25 | 
26 | perl $bin/resort_by_frag_id.pl $ref/HindIII/$genome.HindIII.frag.bed $name.loop.inward $bin &
27 | perl $bin/resort_by_frag_id.pl $ref/HindIII/$genome.HindIII.frag.bed $name.loop.outward $bin &
28 | perl $bin/resort_by_frag_id.pl $ref/HindIII/$genome.HindIII.frag.bed $name.loop.samestrand $bin &
29 | perl $bin/resort_by_frag_id.pl $ref/HindIII/$genome.HindIII.frag.bed $name.loop.trans $bin &
30 | wait
31 | 
32 | awk '{if($4>1000)print $0}' $name.loop.inward >temp.$name.loop.inward &
33 | awk '{if($4>25000)print $0}' $name.loop.outward >temp.$name.loop.outward &
34 | 
35 | #awk '{if($4>5000)print $0}' $name.loop.outward > temp.$name.loop.outward
36 | 
37 | wait
38 | mv temp.$name.loop.inward $name.loop.inward
39 | mv temp.$name.loop.outward $name.loop.outward
40 | 
41 | perl $bin/merge_sorted_frag_loop.pl $name.loop.inward $name.loop.outward $name.loop.samestrand >$name.cis.frag_loop
42 | rm $name.loop.inward $name.loop.outward $name.loop.samestrand
43 | 


--------------------------------------------------------------------------------
/documents/Generate.reference.5kb_bin.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # prepare reference file ##########################################################################################
 3 | - 1. Genome build version. e.g. hg38
 4 |   - download chrom.sizes: 
 5 |   - donwload fa directory: 
 6 |   - download blacklist region: https://github.com/Boyle-Lab/ 
 7 | - 2. We use 500bp windows for micro-C/Dnase-Hi-C "frag", and 5kb as "anchor", generated by bedtools makewindows
 8 | - 3. path to the scripts, <HiCorr_dir>/bin/generateReference_lib/
 9 | ```
10 | wget https://hgdownload.cse.ucsc.edu/goldenpath/hg38/bigZips/hg38.chrom.sizes
11 | wget https://hgdownload.cse.ucsc.edu/goldenpath/hg38/bigZips/hg38.chromFa.tar.gz
12 | wget http://mitra.stanford.edu/kundaje/akundaje/release/blacklists/hg38-human/hg38.blacklist.bed.gz
13 | gunzip hg38.blacklist.bed.gz
14 | tar -xvf hg38.chromFa.tar.gz
15 | for file in `ls chroms/ | grep _`;do
16 |         rm chroms/$file
17 | done
18 | # reformat hg38.chrom.sizes by the 1st column: chr1, chr2, ... chr22, chrX, chrY
19 | cat <(cat hg38.chrom.sizes | grep -v "_\|M" | sed s/"chr"//g | sort -gk 1 | grep -v 'X\|Y') \
20 |     <(cat hg38.chrom.sizes | grep -v "_\|M" | sed s/"chr"//g | sort -gk 1 | grep 'X\|Y') \
21 |     | awk '{print "chr"$0}' > hg38.chrom.sizes.reformat
22 | 
23 | ## start build references #################################################################################
24 | bedtools makewindows -g hg38.chrom.sizes.reformat -w 500 | awk '{print $1 "\t" $2+1 "\t" $3 "\t" "frag_"NR}' > hg38.500bp.bed
25 | bedtools makewindows -g hg38.chrom.sizes.reformat -w 5000 | awk '{print $1 "\t" $2+1 "\t" $3 "\t" "A_"NR}' > hg38.5kb.bed
26 | bedtools intersect -wa -a hg38.5kb.bed -b hg38.blacklist.bed | cut -f1-4 | sort -u > hg38.5kb.bed.blacklist
27 | bedtools intersect -wa -wb -a hg38.500bp.bed -b hg38.5kb.bed | awk '{print $4 "\t" $8}' > hg38.500bp_5kb
28 | cp ${HiCorr_path}/bin/dist.401.group hg38.dist.5kb.group
29 | ${HiCorr_path}/bin/generateReference_lib/list_full_matrix.pl hg38.5kb.bed 2000000 | python $lib/remove.blacklist.py hg38.5kb.bed.blacklist > hg38.full.filter.matrix &
30 | ${HiCorr_path}/bin/microC/get_group_statistics.pl hg38.full.filter.matrix hg38.dist.5kb.group | awk '{print $0,0}' OFS='\t' > $genome.full.dist.stat.5kb
31 | 
32 | ```
33 | 


--------------------------------------------------------------------------------
/documents/HiCorr_heatmap.old.md:
--------------------------------------------------------------------------------
 1 | l
 2 | ### Heatmap
 3 | Heatmap mode generates Hi-C heatmaps of a certain region you choosed(up to 2,000,000bp). This mode need to be run after either HindIII mode or eHiC mode, since it takes an anchor-to-anchor looping-pair file as input.
 4 | <br/>
 5 | To run the Heatmap mode: <br/>
 6 |    ```./HiCorr Heatmap <chr> <start> <end> <anchor_loop_file> <reference_genome> <enzyme> [option]``` <br/>
 7 | Example run: <br/>
 8 |    #### Download test dataset for H9 chr11 (restriction enzyme: HindIII; genome build:hg19) from GSE130711
 9 |    ```
10 |    wget --no-check-certificate https://hiview10.gene.cwru.edu/public/DeepLoop_ref/HiCorr_test_data/HiCorr_output.tar.gz
11 |    # old path: http://hiview.case.edu/ssz20/tmp.HiCorr.ref/HiCorr_test_data/HiCorr_output.tar.gz 
12 |    tar -xvf HiCorr_output.tar.gz
13 |    ls
14 |    ls HiCorr_output
15 |    ```
16 |    #### Plot heatmaps
17 |    ```./HiCorr Heatmap chr11 130000000 130800000 HiCorr_output/anchor_2_anchor.loop.chr11 hg19 HindIII``` <br/>
18 |    You will see three png files named as "hg19.HindIII.chr11_130000000_130800000.raw.matrix.png", "hg19.HindIII.chr11_130000000_130800000.expt.matrix.png" and "hg19.HindIII.chr11_130000000_130800000.ratio.matrix.png" <br/>
19 |    <p float="center">
20 |       <img src="https://github.com/JinLabBioinfo/HiCorr/blob/master/png/hg19.HindIII.chr11_130000000_130800000.raw.matrix.png" width="200" />
21 |       <img src="https://github.com/JinLabBioinfo/HiCorr/blob/master/png/hg19.HindIII.chr11_130000000_130800000.expt.matrix.png" width="200" /> 
22 |       <img src="https://github.com/JinLabBioinfo/HiCorr/blob/master/png/hg19.HindIII.chr11_130000000_130800000.ratio.matrix.png" width="200" />
23 |    </p>
24 |    
25 | #### Options
26 | *  _Default_ <br/>
27 |    By defult, heatmap mode will generates 3 heatmaps for the region you entered: a raw heatmap of observed reads, a heatmap of expected reads, and a heatmap of bias-corrected reads(as a ratio of observeds reads over expected reads). If you want all 3 of these heatmaps, leave the option as blank.
28 | * _-raw_ <br/>
29 |    Only generates a raw heatmap of observed reads
30 | * _-expected_ <br/>
31 |    Only generates a heatmap of expected reads
32 | * _-ratio_ <br/>
33 |    Only generates a bias-corrected heatmap
34 | 


--------------------------------------------------------------------------------
/documents/old.md.left.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### Download reference files
 3 | After you run the following commands, you will see "ref/" in the current directory. There are 4 subdirectories under "ref/": "DPNII/  eHiC/  eHiC-QC/  HindIII".
 4 | In each subdirectory, there are reference files for genome build hg19 and mm10. </br>
 5 | [More descriptions for the reference files](https://github.com/JinLabBioinfo/HiCorr/blob/master/documents/reference_file_description.md).</br>
 6 | 
 7 | ```
 8 | wget --no-check-certificate https://hiview10.gene.cwru.edu/public/DeepLoop_ref/HiCorr.tar.gz
 9 | # old path: http://hiview.case.edu/ssz20/tmp.HiCorr.ref/HiCorr.tar.gz # download reference files 
10 | # It needs ~103G space after decompress
11 | tar -xvf HiCorr.tar.gz 
12 | ls
13 | ls ref/
14 | ```
15 | ### Change variables ref and bin in HiCorr file
16 | > In HiCorr file, you can manually replace the "PATH_TO_REF" with the path to your directory "ref", Replace "PATH_TO_BIN" with the path to your directory "bin" 
17 | > Or use the command below: 
18 | ```
19 | new_bin=`pwd`"/bin" 
20 | new_ref=`pwd`"/ref" 
21 | sed -i "s|PATH_TO_REF|${new_ref}|" HiCorr
22 | sed -i "s|PATH_TO_BIN|${new_bin}|" HiCorr
23 | ```
24 | 
25 | ## Run HiCorr
26 | Usage:<br/>
27 |    ```./HiCorr <mode> <parameters>```
28 | <br/>
29 | 
30 | **_HiCorr has different modes: Bam-process-HindIII, Bam-process-DPNII, HindIII, DPNII, eHiC-QC, eHiC and Heatmap_**
31 | 
32 | ### Bam-process
33 | Bam-process mode takes a sorted bam file as input, processes and generates two files as outputs. The two output files are the required input files when using the HiCorr HindIII mode. The two output files are intra-chromosome looping fragment-pair file and inter-chromosome looping fragment-pair file. <br/>
34 | This mode currently is only able to process bam file of HindIII Hi-C data. <br/>
35 | To run the Bam-process mode, you need 6 arguments:
36 |    
37 |    ```./HiCorr Bam-process-HindIII <bam_file> <name_of_your_data> <mapped_read_length_in_your_bam_file> <genome> HindIII```
38 |    
39 |    ```./HiCorr Bam-process-DPNII <bam_file> <name_of_your_data> <mapped_read_length_in_your_bam_file> <genome> DPNII```
40 | 
41 | More details about the preprocessing (fastq to bam files to fragment loops) are [here](https://github.com/shanshan950/Hi-C-data-preprocess)
42 | 


--------------------------------------------------------------------------------
/bin/Arima/get_trans.avg_by_len.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my ($regress, $len_group_file, $anchor_stat_file, $loop_count_file) = @ARGV;
 5 | 
 6 | 
 7 | ##################### determine the avg length group for anchors #################
 8 | my $len_range;
 9 | open(IN, $len_group_file);
10 | while(my $line = <IN>){
11 |         chomp $line;
12 |         my ($id, $min, $max) = split "\t", $line;
13 |         $len_range->{$id} = "$min:$max";
14 | }
15 | close(IN);
16 | 
17 | #my $frag_mappability;
18 | my $frag_len_group;
19 | #my $frag_gc;
20 | open(IN, $anchor_stat_file);
21 | while(my $line=<IN>){
22 | 	chomp $line;
23 | 	my ($chr, $beg, $end, $id, $len, $avg) = split "\t", $line;
24 | 	$frag_len_group->{$id} = get_id($avg, $len_range);
25 | }
26 | close(IN);
27 | 
28 | my $g_count;
29 | open(IN, $loop_count_file);
30 | while(my $line = <IN>){
31 | 	chomp $line;
32 | 	my ($g1, $g2, $count) = split "\t", $line;
33 | 	$g_count->{$g1}->{$g2} = $count;
34 | }
35 | close(IN);
36 | 
37 | 
38 | ################## Calculate group averages #######################################
39 | my $g_sum;
40 | open(IN, $regress);
41 | while(my $line = <IN>){
42 | 	chomp $line;
43 | 	my ($frag1, $frag2, $count) = split "\t", $line;
44 | 	if (not defined $count){
45 | 
46 | 		$count=1;
47 | 	}
48 | 
49 | 	
50 | 	my $g1 = $frag_len_group->{$frag1};
51 | 	my $g2 = $frag_len_group->{$frag2};
52 | 	if(not defined $g_sum->{$g1}->{$g2}){
53 | 		$g_sum->{$g1}->{$g2} = 0;
54 | 	}
55 | 	$g_sum->{$g1}->{$g2} += $count;
56 | }
57 | close(IN);
58 | 
59 | foreach my $g1 (sort {$a<=>$b} keys %{$g_sum}){
60 | 	foreach my $g2 (sort {$a<=>$b} keys %{$g_sum->{$g1}}){
61 | 		my $count = $g_count->{$g1}->{$g2};
62 | 		my $sum = $g_sum->{$g1}->{$g2};
63 | 		my $avg = $sum / $count;
64 | 		print join("\t", $g1, $g2, $count, $avg)."\n";
65 | 	}
66 | }
67 | 
68 | exit;
69 | 
70 | ##############################################################
71 | sub get_id{
72 |         my ($val, $range) = @_;
73 |         foreach my $id (keys %{$range}){
74 |                 my ($min, $max) = split ":", $range->{$id};
75 |                 if($val > $min && $val <= $max){
76 |                         return $id;
77 |                 }
78 |         }
79 | 	print $val . "\n";
80 |         die("Error: did not find a group\n");
81 | }
82 | 
83 | 


--------------------------------------------------------------------------------
/bin/DPNII/get_trans.avg_by_len_DPNII.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my ($regress, $len_group_file, $anchor_stat_file, $loop_count_file) = @ARGV;
 5 | 
 6 | 
 7 | ##################### determine the avg length group for anchors #################
 8 | my $len_range;
 9 | open(IN, $len_group_file);
10 | while(my $line = <IN>){
11 |         chomp $line;
12 |         my ($id, $min, $max) = split "\t", $line;
13 |         $len_range->{$id} = "$min:$max";
14 | }
15 | close(IN);
16 | 
17 | #my $frag_mappability;
18 | my $frag_len_group;
19 | #my $frag_gc;
20 | open(IN, $anchor_stat_file);
21 | while(my $line=<IN>){
22 | 	chomp $line;
23 | 	my ($chr, $beg, $end, $id, $len, $avg) = split "\t", $line;
24 | 	$frag_len_group->{$id} = get_id($avg, $len_range);
25 | }
26 | close(IN);
27 | 
28 | my $g_count;
29 | open(IN, $loop_count_file);
30 | while(my $line = <IN>){
31 | 	chomp $line;
32 | 	my ($g1, $g2, $count) = split "\t", $line;
33 | 	$g_count->{$g1}->{$g2} = $count;
34 | }
35 | close(IN);
36 | 
37 | 
38 | ################## Calculate group averages #######################################
39 | my $g_sum;
40 | open(IN, $regress);
41 | while(my $line = <IN>){
42 | 	chomp $line;
43 | 	my ($frag1, $frag2, $count) = split "\t", $line;
44 | 	if (not defined $count){
45 | 
46 | 		$count=1;
47 | 	}
48 | 
49 | 	
50 | 	my $g1 = $frag_len_group->{$frag1};
51 | 	my $g2 = $frag_len_group->{$frag2};
52 | 	if(not defined $g_sum->{$g1}->{$g2}){
53 | 		$g_sum->{$g1}->{$g2} = 0;
54 | 	}
55 | 	$g_sum->{$g1}->{$g2} += $count;
56 | }
57 | close(IN);
58 | 
59 | foreach my $g1 (sort {$a<=>$b} keys %{$g_sum}){
60 | 	foreach my $g2 (sort {$a<=>$b} keys %{$g_sum->{$g1}}){
61 | 		my $count = $g_count->{$g1}->{$g2};
62 | 		my $sum = $g_sum->{$g1}->{$g2};
63 | 		my $avg = $sum / $count;
64 | 		print join("\t", $g1, $g2, $count, $avg)."\n";
65 | 	}
66 | }
67 | 
68 | exit;
69 | 
70 | ##############################################################
71 | sub get_id{
72 |         my ($val, $range) = @_;
73 |         foreach my $id (keys %{$range}){
74 |                 my ($min, $max) = split ":", $range->{$id};
75 |                 if($val > $min && $val <= $max){
76 |                         return $id;
77 |                 }
78 |         }
79 | 	print $val . "\n";
80 |         die("Error: did not find a group\n");
81 | }
82 | 
83 | 


--------------------------------------------------------------------------------
/bin/HindIII/get_loop_lambda_GC_correct.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my ($regress, $GC_group_file, $frag_stat_file, $group_stat_file) = @ARGV;
 5 | if(not defined $group_stat_file){
 6 | 	die("Usage:./get_loop_lambda_GC_correct.pl <frag_loop_data> <GC_group_file> <frag_stat_file> <group_correct_factor_file>\n");
 7 | }
 8 | 
 9 | ##################### determine the GC group for fragments #################
10 | my $GC_range;
11 | open(IN, $GC_group_file) || die("Error: Cannot open file $GC_group_file!\n");
12 | while(my $line = <IN>){
13 |         chomp $line;
14 |         my ($id, $min, $max) = split "\t", $line;
15 |         $GC_range->{$id} = "$min:$max";
16 | }
17 | close(IN);
18 | 
19 | my $frag_gc_group;
20 | open(IN, $frag_stat_file);
21 | while(my $line=<IN>){
22 |         chomp $line;
23 |         my ($id, $gc, $map) = split "\t", $line;
24 |         $frag_gc_group->{$id} = get_id($gc, $GC_range);
25 | }
26 | close(IN);
27 | 
28 | ################## get group correction factors ###################################
29 | my $GC_factor;
30 | open(IN, $group_stat_file) || die("Error: Cannot open file $group_stat_file!\n");
31 | while(my $line = <IN>){
32 | 	chomp $line;
33 | 	my ($g1, $g2, $factor) = split "\t", $line;
34 | 	$GC_factor->{$g1}->{$g2} = $factor;
35 | }
36 | close(IN);
37 | 
38 | 
39 | ################## print new loop file with GC corrected lambda #####################
40 | open(IN, $regress) || die("Error: Cannot open file $regress!\n");
41 | while(my $line = <IN>){
42 | 	chomp $line;
43 | 	my ($frag1, $frag2, $count, $lambda) = split "\t", $line;
44 | 	my $g1 = $frag_gc_group->{$frag1};
45 | 	my $g2 = $frag_gc_group->{$frag2};
46 | 	my $factor = $GC_factor->{$g1}->{$g2};
47 | 	if($lambda == 0 || $factor ==0){
48 | 		$count = 0;
49 | 	}
50 | 	print join("\t", $frag1, $frag2, $count, $lambda * $factor)."\n";
51 | }
52 | close(IN);
53 | 
54 | exit;
55 | 
56 | 
57 | ##############################################################
58 | sub get_id{
59 |         my ($val, $range) = @_;
60 |         foreach my $id (keys %{$range}){
61 |                 my ($min, $max) = split ":", $range->{$id};
62 |                 if($val > $min && $val <= $max){
63 |                         return $id;
64 |                 }
65 |         }
66 |         die("Error: did not find a group!\n");
67 | }
68 | 
69 | 


--------------------------------------------------------------------------------
/bin/eHiC/get_loop_lambda_GC_correct.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my ($regress, $GC_group_file, $frag_stat_file, $group_stat_file) = @ARGV;
 5 | if(not defined $group_stat_file){
 6 | 	die("Usage:./get_loop_lambda_GC_correct.pl <frag_loop_data> <GC_group_file> <frag_stat_file> <group_correct_factor_file>\n");
 7 | }
 8 | 
 9 | ##################### determine the GC group for fragments #################
10 | my $GC_range;
11 | open(IN, $GC_group_file) || die("Error: Cannot open file $GC_group_file!\n");
12 | while(my $line = <IN>){
13 |         chomp $line;
14 |         my ($id, $min, $max) = split "\t", $line;
15 |         $GC_range->{$id} = "$min:$max";
16 | }
17 | close(IN);
18 | 
19 | my $frag_gc_group;
20 | open(IN, $frag_stat_file);
21 | while(my $line=<IN>){
22 |         chomp $line;
23 |         my ($id, $gc, $map) = split "\t", $line;
24 |         $frag_gc_group->{$id} = get_id($gc, $GC_range);
25 | }
26 | close(IN);
27 | 
28 | ################## get group correction factors ###################################
29 | my $GC_factor;
30 | open(IN, $group_stat_file) || die("Error: Cannot open file $group_stat_file!\n");
31 | while(my $line = <IN>){
32 | 	chomp $line;
33 | 	my ($g1, $g2, $factor) = split "\t", $line;
34 | 	$GC_factor->{$g1}->{$g2} = $factor;
35 | }
36 | close(IN);
37 | 
38 | 
39 | ################## print new loop file with GC corrected lambda #####################
40 | open(IN, $regress) || die("Error: Cannot open file $regress!\n");
41 | while(my $line = <IN>){
42 | 	chomp $line;
43 | 	my ($frag1, $frag2, $count, $lambda) = split "\t", $line;
44 | 	my $g1 = $frag_gc_group->{$frag1};
45 | 	my $g2 = $frag_gc_group->{$frag2};
46 | 	my $factor = $GC_factor->{$g1}->{$g2};
47 | 	if($lambda == 0 || $factor ==0){
48 | 		$count = 0;
49 | 	}
50 | 	print join("\t", $frag1, $frag2, $count, $lambda * $factor)."\n";
51 | }
52 | close(IN);
53 | 
54 | exit;
55 | 
56 | 
57 | ##############################################################
58 | sub get_id{
59 |         my ($val, $range) = @_;
60 |         foreach my $id (keys %{$range}){
61 |                 my ($min, $max) = split ":", $range->{$id};
62 |                 if($val > $min && $val <= $max){
63 |                         return $id;
64 |                 }
65 |         }
66 |         die("Error: did not find a group!\n");
67 | }
68 | 
69 | 


--------------------------------------------------------------------------------
/HiCorr_DPNII.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ref=$1
 4 | bin=$2
 5 | cis_loop=$3
 6 | trans_loop=$4
 7 | name=$5
 8 | genome=$6
 9 | anchorbed=${ref}/${genome}_DPNII_anchors_avg.bed
10 | 
11 | cat $cis_loop | $bin/fragdata_to_anchordata_DNPII.pl - $ref/${genome}_DPNII_frag_2_anchor  | ${bin}/remove.blacklist_DPNII.py $ref/${genome}_5kb_anchors_blacklist |${bin}/pick.dist.pl $anchorbed - | awk '{if($4<=2000000) print $0}' > end_loop.2M.rmbl &
12 | 
13 | cat $cis_loop | $bin/fragdata_to_anchordata_DNPII.pl - $ref/${genome}_DPNII_frag_2_anchor  | ${bin}/remove.blacklist_DPNII.py $ref/${genome}_5kb_anchors_blacklist |${bin}/pick.dist.pl $anchorbed - | awk '{if($4>2000000) print $0}' > end_loop.gt.2M  &
14 | 
15 | cat $trans_loop | $bin/fragdata_to_anchordata_DNPII.pl - $ref/${genome}_DPNII_frag_2_anchor  | ${bin}/remove.blacklist_DPNII.py $ref/${genome}_5kb_anchors_blacklist > end_loop.rmbl.trans &
16 | 
17 | wait
18 | 
19 | cat end_loop.gt.2M end_loop.rmbl.trans | cut -f1-3 > end_loop.merged.trans
20 | 
21 | ${bin}/merge_sorted_anchor_loop_DPNII.pl $ref/${genome}.full.matrix end_loop.2M.rmbl > end_loop.full &
22 | 
23 | ${bin}/get_trans.avg_by_len_DPNII.pl end_loop.merged.trans $ref/${genome}_anchor_length.groups $anchorbed $ref/${genome}.trans.possible.pairs > trans.stat &
24 | 
25 | wait
26 | 
27 | ${bin}/get_corr_factor_by_len_DPNII.py trans.stat > len.factor 
28 | 
29 | $bin/correct.trans.reads.by.corr_DPNII.pl end_loop.merged.trans $anchorbed $ref/${genome}_anchor_length.groups len.factor > trans.corr.by.all 
30 | 
31 | python3 ${bin}/sum_anchor_reads_DPNII.py trans.corr.by.all > anchors.sum 
32 | 
33 | avg=`cat anchors.sum | awk '{s+=$2;n++}END{print s/n}'`
34 | 
35 | cat anchors.sum | awk -v avg=$avg '{print $1,$2/avg}' OFS='\t'  > anchor.vis.list
36 | 
37 | ${bin}/get_group_statistics_DPNII.pl end_loop.full $anchorbed $ref/${genome}_anchor_length.groups $ref/${genome}.dist.401.group > dist.len.stat 
38 | 
39 | ${bin}/get_loop_lambda_DPNII.pl end_loop.full $anchorbed $ref/${genome}_anchor_length.groups $ref/${genome}.dist.401.group dist.len.stat > end_loop.after.dist.len
40 | 
41 | ${bin}/add.vis.to.cis.2M_DPNII.pl end_loop.after.dist.len anchor.vis.list > end_loop.after.vis
42 | wait
43 | cat end_loop.after.vis | $bin/split_chromo.py $anchorbed 
44 | 
45 | rm -rf end_loop.2M.rmbl end_loop.gt.2M end_loop.rmbl.trans 
46 | 
47 | 


--------------------------------------------------------------------------------
/bin/DPNII/DPNII.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ref=$1
 4 | bin=$2
 5 | cis_loop=$3
 6 | trans_loop=$4
 7 | name=$5
 8 | genome=$6
 9 | anchorbed=${ref}/${genome}_DPNII_anchors_avg.bed
10 | 
11 | cat $cis_loop | $bin/fragdata_to_anchordata_DNPII.pl - $ref/${genome}_DPNII_frag_2_anchor  | ${bin}/remove.blacklist_DPNII.py $ref/${genome}_5kb_anchors_blacklist |${bin}/pick.dist.pl $anchorbed - | awk '{if($4<=2000000) print $0}' > end_loop.2M.rmbl &
12 | 
13 | cat $cis_loop | $bin/fragdata_to_anchordata_DNPII.pl - $ref/${genome}_DPNII_frag_2_anchor  | ${bin}/remove.blacklist_DPNII.py $ref/${genome}_5kb_anchors_blacklist |${bin}/pick.dist.pl $anchorbed - | awk '{if($4>2000000) print $0}' > end_loop.gt.2M  &
14 | 
15 | cat $trans_loop | $bin/fragdata_to_anchordata_DNPII.pl - $ref/${genome}_DPNII_frag_2_anchor  | ${bin}/remove.blacklist_DPNII.py $ref/${genome}_5kb_anchors_blacklist > end_loop.rmbl.trans &
16 | 
17 | wait
18 | 
19 | cat end_loop.gt.2M end_loop.rmbl.trans | cut -f1-3 > end_loop.merged.trans
20 | 
21 | ${bin}/merge_sorted_anchor_loop_DPNII.pl $ref/${genome}.full.matrix end_loop.2M.rmbl > end_loop.full &
22 | 
23 | ${bin}/get_trans.avg_by_len_DPNII.pl end_loop.merged.trans $ref/${genome}_anchor_length.groups $anchorbed $ref/${genome}.trans.possible.pairs > trans.stat &
24 | 
25 | wait
26 | 
27 | ${bin}/get_corr_factor_by_len_DPNII.py trans.stat > len.factor 
28 | 
29 | $bin/correct.trans.reads.by.corr_DPNII.pl end_loop.merged.trans $anchorbed $ref/${genome}_anchor_length.groups len.factor > trans.corr.by.all 
30 | 
31 | ${bin}/sum_anchor_reads_DPNII.py trans.corr.by.all > anchors.sum 
32 | 
33 | avg=`cat anchors.sum | awk '{s+=$2;n++}END{print s/n}'`
34 | 
35 | cat anchors.sum | awk -v avg=$avg '{print $1,$2/avg}' OFS='\t'  > anchor.vis.list
36 | 
37 | ${bin}/get_group_statistics_DPNII.pl end_loop.full $anchorbed $ref/${genome}_anchor_length.groups $ref/${genome}.dist.401.group > dist.len.stat 
38 | 
39 | ${bin}/get_loop_lambda_DPNII.pl end_loop.full $anchorbed $ref/${genome}_anchor_length.groups $ref/${genome}.dist.401.group dist.len.stat > end_loop.after.dist.len
40 | 
41 | ${bin}/add.vis.to.cis.2M_DPNII.pl end_loop.after.dist.len anchor.vis.list > end_loop.after.vis
42 | wait
43 | cat end_loop.after.vis | $bin/split_chromo.py $anchorbed 
44 | 
45 | rm -rf end_loop.2M.rmbl end_loop.gt.2M end_loop.rmbl.trans 
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/bin/microC/get_group_statistics.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my ($regress,$dist_group_file) = @ARGV;
 5 | if(not defined $dist_group_file){
 6 | 	die("Usage:./get_group_statistics.pl <anchor_loop_data> <anchor bed> <anchor_length_group> <anchor_dist_group>\n");
 7 | }
 8 | 
 9 | ##################### determine the length group for fragments #################
10 | 
11 | 
12 | ################### use an array to store group info for differenct distance ######
13 | my @dist_group;
14 | open(IN, $dist_group_file);
15 | while(my $line = <IN>){
16 | 	chomp $line;
17 | 	my ($g, $low, $high) = split "\t", $line;
18 | 	for(my $i = $low + 1; $i <= $high; $i ++){
19 | 		$dist_group[$i] = $g;
20 | 	}
21 | }
22 | close(IN);
23 | 
24 | ################## Calculate group averages #######################################
25 | my $g_sum;
26 | my $g_count;
27 | open(IN, $regress);
28 | while(my $line = <IN>){
29 | 	chomp $line;
30 | 	my ($anchor1, $anchor2, $count, $dist) = split "\t", $line;
31 | #	my $g1 = $frag_len_group->{$anchor1};
32 | #	my $g2 = $frag_len_group->{$anchor2};
33 | 	my $g3 = $dist_group[$dist];
34 | 	if(not defined $g_sum->{$g3}){
35 | 		$g_sum->{$g3} = 0;
36 | 	}
37 | 	if(not defined $g_count->{$g3}){
38 |                 $g_count->{$g3} = 0;
39 |         }
40 | 	
41 | 	$g_count->{$g3} ++;
42 | 	$g_sum->{$g3} += $count
43 | 	
44 | 
45 | }
46 | close(IN);
47 | 
48 | foreach my $g (sort {$a<=>$b} keys %{$g_sum}){
49 | 	#foreach my $g2 (sort {$a<=>$b} keys %{$g_sum->{$g1}}){
50 | 	#	foreach my $g3 (sort {$a<=>$b} keys %{$g_sum->{$g1}->{$g2}}){
51 | 			my $count = $g_count->{$g};
52 | 			my $sum = $g_sum->{$g};
53 | 			if($count != 0){
54 | 				my $avg = $sum / $count;
55 | 				print join("\t", $g, $count, $avg)."\n";
56 | #				print join("\t", $g, $count, $sum)."\n";
57 | 			}else{
58 | 				print join("\t", $g, $count,"NA")."\n";
59 | 			}
60 | }
61 | 	
62 | 
63 | 
64 | exit;
65 | 
66 | 
67 | ##############################################################
68 | sub get_id{
69 |         my ($val, $range) = @_;
70 |         foreach my $id (keys %{$range}){
71 |                 my ($min, $max) = split ":", $range->{$id};
72 |                 if($val > $min && $val <= $max){
73 |                         return $id;
74 |                 }
75 |         }
76 |         die("Error: did not find a group\n");
77 | }
78 | 
79 | 


--------------------------------------------------------------------------------
/bin/HindIII/get_cis_avg_by_GC.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my ($regress, $GC_group_file, $frag_stat_file, $loop_count_file, $min_map) = @ARGV;
 5 | if(not defined $min_map){
 6 | 	die("Usage:./get_cis_avg_by_GC.pl <frag_loop_data> <frag_GC_group> <frag_stat> <trans_loop_count> <minimum mappability>\n");
 7 | }
 8 | 
 9 | ##################### determine the GC group for fragments #################
10 | my $GC_range;
11 | open(IN, $GC_group_file);
12 | while(my $line = <IN>){
13 |         chomp $line;
14 |         my ($id, $min, $max) = split "\t", $line;
15 |         $GC_range->{$id} = "$min:$max";
16 | }
17 | close(IN);
18 | 
19 | my $frag_mappability;
20 | my $frag_gc_group;
21 | open(IN, $frag_stat_file);
22 | while(my $line=<IN>){
23 | 	chomp $line;
24 | 	my ($id, $gc, $map) = split "\t", $line;
25 | 	$frag_gc_group->{$id} = get_id($gc, $GC_range);
26 | 	$frag_mappability->{$id} = $map;
27 | }
28 | close(IN);
29 | 
30 | my $g_count;
31 | open(IN, $loop_count_file);
32 | while(my $line = <IN>){
33 | 	chomp $line;
34 | 	my ($g1, $g2, $count) = split "\t", $line;
35 | 	$g_count->{$g1}->{$g2} = $count;
36 | }
37 | close(IN);
38 | 
39 | 
40 | ################## Calculate group averages #######################################
41 | my $g_sum;
42 | open(IN, $regress);
43 | while(my $line = <IN>){
44 | 	chomp $line;
45 | 	my ($frag1, $frag2, $count) = split "\t", $line;
46 | 	if($frag_mappability->{$frag1} < $min_map || $frag_mappability->{$frag2} < $min_map){
47 | 		next;
48 | 	}
49 | 	my $g1 = $frag_gc_group->{$frag1};
50 | 	my $g2 = $frag_gc_group->{$frag2};
51 | 	if(not defined $g_sum->{$g1}->{$g2}){
52 | 		$g_sum->{$g1}->{$g2} = 0;
53 | 	}
54 | 	$g_sum->{$g1}->{$g2} += $count;
55 | }
56 | close(IN);
57 | 
58 | foreach my $g1 (sort {$a<=>$b} keys %{$g_sum}){
59 | 	foreach my $g2 (sort {$a<=>$b} keys %{$g_sum->{$g1}}){
60 | 		my $count = $g_count->{$g1}->{$g2};
61 | 		my $sum = $g_sum->{$g1}->{$g2};
62 | 		my $avg = $sum / $count;
63 | 		print join("\t", $g1, $g2, $count, $avg)."\n";
64 | 	}
65 | }
66 | 
67 | exit;
68 | 
69 | ##############################################################
70 | sub get_id{
71 |         my ($val, $range) = @_;
72 |         foreach my $id (keys %{$range}){
73 |                 my ($min, $max) = split ":", $range->{$id};
74 |                 if($val > $min && $val <= $max){
75 |                         return $id;
76 |                 }
77 |         }
78 |         die("Error: did not find a group\n");
79 | }
80 | 
81 | 


--------------------------------------------------------------------------------
/bin/eHiC/get_trans_avg_by_GC.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my ($regress, $GC_group_file, $frag_stat_file, $loop_count_file, $min_map) = @ARGV;
 5 | if(not defined $min_map){
 6 | 	die("Usage:./get_trans_avg_by_GC.pl <frag_loop_data> <frag_GC_group> <frag_stat> <trans_loop_count> <minimum mappability>\n");
 7 | }
 8 | 
 9 | ##################### determine the GC group for fragments #################
10 | my $GC_range;
11 | open(IN, $GC_group_file);
12 | while(my $line = <IN>){
13 |         chomp $line;
14 |         my ($id, $min, $max) = split "\t", $line;
15 |         $GC_range->{$id} = "$min:$max";
16 | }
17 | close(IN);
18 | 
19 | my $frag_mappability;
20 | my $frag_gc_group;
21 | open(IN, $frag_stat_file);
22 | while(my $line=<IN>){
23 | 	chomp $line;
24 | 	my ($id, $gc, $map) = split "\t", $line;
25 | 	$frag_gc_group->{$id} = get_id($gc, $GC_range);
26 | 	$frag_mappability->{$id} = $map;
27 | }
28 | close(IN);
29 | 
30 | my $g_count;
31 | open(IN, $loop_count_file);
32 | while(my $line = <IN>){
33 | 	chomp $line;
34 | 	my ($g1, $g2, $count) = split "\t", $line;
35 | 	$g_count->{$g1}->{$g2} = $count;
36 | }
37 | close(IN);
38 | 
39 | 
40 | ################## Calculate group averages #######################################
41 | my $g_sum;
42 | open(IN, $regress);
43 | while(my $line = <IN>){
44 | 	chomp $line;
45 | 	my ($frag1, $frag2, $count) = split "\t", $line;
46 | 	if($frag_mappability->{$frag1} < $min_map || $frag_mappability->{$frag2} < $min_map){
47 | 		next;
48 | 	}
49 | 	my $g1 = $frag_gc_group->{$frag1};
50 | 	my $g2 = $frag_gc_group->{$frag2};
51 | 	if(not defined $g_sum->{$g1}->{$g2}){
52 | 		$g_sum->{$g1}->{$g2} = 0;
53 | 	}
54 | 	$g_sum->{$g1}->{$g2} += $count;
55 | }
56 | close(IN);
57 | 
58 | foreach my $g1 (sort {$a<=>$b} keys %{$g_sum}){
59 | 	foreach my $g2 (sort {$a<=>$b} keys %{$g_sum->{$g1}}){
60 | 		my $count = $g_count->{$g1}->{$g2};
61 | 		my $sum = $g_sum->{$g1}->{$g2};
62 | 		my $avg = $sum / $count;
63 | 		print join("\t", $g1, $g2, $count, $avg)."\n";
64 | 	}
65 | }
66 | 
67 | exit;
68 | 
69 | ##############################################################
70 | sub get_id{
71 |         my ($val, $range) = @_;
72 |         foreach my $id (keys %{$range}){
73 |                 my ($min, $max) = split ":", $range->{$id};
74 |                 if($val > $min && $val <= $max){
75 |                         return $id;
76 |                 }
77 |         }
78 |         die("Error: did not find a group\n");
79 | }
80 | 
81 | 


--------------------------------------------------------------------------------
/bin/eHiC/model_fit.r:
--------------------------------------------------------------------------------
 1 | group_list <- read.table("lambda_group.tab", col.names=c("group", "lambda", "range"))
 2 | 
 3 | group <- group_list$group
 4 | lambda <- group_list$lambda
 5 | file <- paste("data_list.group.", group, sep="")
 6 | graph <- paste("hist.group.", group, ".pdf", sep="")
 7 | avg <- c()
 8 | var <- c()
 9 | avg_adj <- c()
10 | var_adj <- c()
11 | 
12 | for(i in 1:length(group)){
13 |         data <- read.table(file[i], col.names=c("count","actual_lambda"))
14 | 	attach(data)
15 |         data$pval <- ppois(count-1, lambda[i], lower.tail=FALSE)
16 |         detach()
17 |         attach(data)
18 |         max <- max(count)
19 |         avg <- c(avg, mean(count))
20 |         var <- c(var, var(count))
21 |         xlim <- max(6, 1+qpois(1e-5, lambda[i], lower.tail=FALSE))
22 | 
23 | 
24 |         avg_adj <- c(avg_adj, mean(count[count < xlim]))
25 |         var_adj <- c(var_adj, var(count[count < xlim]))
26 |         detach()
27 | }
28 | model <- lm(var_adj ~ lambda + 0)
29 | slope <- as.numeric(model$coefficients)
30 | scale <- slope
31 | 
32 | write.table(slope, file="slope_value.tab", col.names=FALSE, row.names=FALSE)
33 | 
34 | pdf("plot.var_2_mean.pdf", width=5, height=5)
35 | plot(lambda, var_adj, xlab="Expected value", ylab="Variance", main="Statistical property of read counts")
36 | lines(c(0,max(lambda)+1), slope*c(0,max(lambda)+1), col="red")
37 | legend("topleft", paste("y = ", as.integer(slope*1000 + 0.5)/1000, "x", sep=""), lty = 1, col="red")
38 | dev.off()
39 | 
40 | for(i in 1:length(group)){
41 | 	data <- read.table(file[i], col.names=c("count","actual_lambda"))
42 | 	attach(data)
43 | 
44 | 	max <- max(count)
45 |         xlim <- max(6, 1+qpois(1e-5, lambda[i], lower.tail=FALSE))
46 | 	
47 | 	shape <- lambda[i] / scale
48 | 	prob <- 1 - 1/slope
49 | 	r <- lambda[i] / (slope - 1)
50 | 
51 | 	
52 | 	pdf(graph[i], width=5, height=5)
53 | 	hist(count - 0.5, breaks=0:max - 0.5, probability=TRUE, main=paste("Distribution of read counts (Exp = ",lambda[i],")", sep=""), xlab="Read counts", ylab="Probability", xlim=c(0,xlim)-0.5)
54 | #	lines(0:xlim, dpois(0:xlim, lambda[i]), col="blue")
55 | #	lines(0:xlim, pgamma(0:xlim + 0.5, shape=shape, scale=scale) - pgamma(0:xlim - 0.5, shape=shape, scale=scale), col="red")
56 | 	lines(0:xlim, dnbinom(0:xlim, size=r, mu=lambda[i]), col="blue")	
57 | 
58 | 	legend("topright", c("Histogram of fraction", "Neg binomial distribution"), bty="n", pch=c(0, NA), lty=c(0,1), pt.cex = 2, col=c("black","blue"))
59 | 	
60 | 	dev.off()
61 | 
62 | 	detach()
63 | }
64 | 
65 | 


--------------------------------------------------------------------------------
/bin/Arima/pairing_two_SAM_reads.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl  
 2 | use strict;
 3 | 
 4 | my $usage = 	"Usage:./pairing_two_SAM_reads.pl <sam1> <sam2>\n".
 5 | 		"\tAfter mapping each end of a PE run independently, this program take the two SAM files and link them into one pair end SAM file\n";
 6 | my ($file1, $file2) = @ARGV;
 7 | if(not defined $file2){
 8 | 	die($usage);
 9 | }
10 | 
11 | open (FILE1, $file1);
12 | open (FILE2, $file2); 
13 | 
14 | while ((my $line1 = <FILE1>) and (my $line2 = <FILE2>)){
15 | 	chomp $line1; chomp $line2;
16 | 	while(substr($line1,0,1) eq '@'){
17 | #		print $line1."\n";
18 | 		$line1 = <FILE1>; chomp $line1;
19 | 	}
20 | 	while(substr($line2,0,1) eq '@'){
21 | #		print $line2."\n";
22 | 		$line2 = <FILE2>; chomp $line2;
23 | 	} 
24 | 	my ($id1, $flag1, $chr1, $pos1, $qal1, $cigar1, $mate_chr1, $mate_pos1, $size1, $seq1, @rest1) = split "\t", $line1;
25 | 	my ($id2, $flag2, $chr2, $pos2, $qal2, $cigar2, $mate_chr2, $mate_pos2, $size2, $seq2, @rest2) = split "\t", $line2;
26 | 	$id1 =~ s/\/.$//; $id2 =~ s/\/.$//;
27 | 		
28 | 
29 | 	if($qal1 < 10 || $qal2 < 10) { next;}	
30 | 	if($id1 ne $id2){
31 | 		die("Error: The two sam files are not pair-end!\n");
32 | 	}
33 | 	$flag1 = $flag1 | 64; 				# read1 flag
34 | 	$flag2 = $flag2 | 128;				# read2 flag
35 | 	$flag1 = $flag1 | 1; $flag2 = $flag2 | 1; 	# flag say both reads are one of a pair
36 | 	if(($flag1 & 4) || ($flag2 & 4)) { next;}       # both reads must have alignment
37 | 	$flag1 = $flag1 | 2; $flag2 = $flag2 | 2;
38 | 	my ($strand1, $strand2) = ("+", "+");
39 | 	my ($beg1, $beg2) = ($pos1, $pos2);
40 | 	if($flag1 & 16) { $flag2 = $flag2 | 32; $strand1 = "-"; }	# mate of read2 is - strand
41 | 	if($flag2 & 16) { $flag1 = $flag1 | 32; $strand2 = "-"; }	# mate of read1 is - strand
42 | 	
43 | 	$mate_pos1 = $pos2; $mate_pos2 = $pos1;
44 | 	if($chr1 eq $chr2){
45 | 		$mate_chr1 = "="; $mate_chr2 = "=";
46 | 		my ($left1, $right1) = ($pos1, $pos1 + length($seq1));
47 | 		my ($left2, $right2) = ($pos2, $pos2 + length($seq2));
48 | 		$size1 = $pos2 - $pos1;
49 | 		if($size1 > 0){
50 | 			$size1 += length($seq2);
51 | 		}else{
52 | 			$size1 -= length($seq1);
53 | 		}
54 | 		$size2 = 0 - $size1;
55 | 	}else{
56 | 		$mate_chr2 = $chr1; $mate_chr1 = $chr2;
57 | 		$size1 = 0; $size2 = 0;
58 | 	}
59 | 	print join("\t", $id1, $flag1, $chr1, $pos1, $qal1, $cigar1, $mate_chr1, $mate_pos1, $size1, $seq1, @rest1)."\n";
60 | 	print join("\t", $id2, $flag2, $chr2, $pos2, $qal2, $cigar2, $mate_chr2, $mate_pos2, $size2, $seq2, @rest2)."\n";
61 | }
62 | 
63 | close FILE1;
64 | close FILE2;
65 | 
66 | exit;
67 | 


--------------------------------------------------------------------------------
/bin/preprocess/pairing_two_SAM_reads.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl  
 2 | use strict;
 3 | 
 4 | my $usage = 	"Usage:./pairing_two_SAM_reads.pl <sam1> <sam2>\n".
 5 | 		"\tAfter mapping each end of a PE run independently, this program take the two SAM files and link them into one pair end SAM file\n";
 6 | my ($file1, $file2) = @ARGV;
 7 | if(not defined $file2){
 8 | 	die($usage);
 9 | }
10 | 
11 | open (FILE1, $file1);
12 | open (FILE2, $file2); 
13 | 
14 | while ((my $line1 = <FILE1>) and (my $line2 = <FILE2>)){
15 | 	chomp $line1; chomp $line2;
16 | 	while(substr($line1,0,1) eq '@'){
17 | #		print $line1."\n";
18 | 		$line1 = <FILE1>; chomp $line1;
19 | 	}
20 | 	while(substr($line2,0,1) eq '@'){
21 | #		print $line2."\n";
22 | 		$line2 = <FILE2>; chomp $line2;
23 | 	} 
24 | 	my ($id1, $flag1, $chr1, $pos1, $qal1, $cigar1, $mate_chr1, $mate_pos1, $size1, $seq1, @rest1) = split "\t", $line1;
25 | 	my ($id2, $flag2, $chr2, $pos2, $qal2, $cigar2, $mate_chr2, $mate_pos2, $size2, $seq2, @rest2) = split "\t", $line2;
26 | 	$id1 =~ s/\/.$//; $id2 =~ s/\/.$//;
27 | 		
28 | 
29 | 	if($qal1 < 10 || $qal2 < 10) { next;}	
30 | 	if($id1 ne $id2){
31 | 		die("Error: The two sam files are not pair-end!\n");
32 | 	}
33 | 	$flag1 = $flag1 | 64; 				# read1 flag
34 | 	$flag2 = $flag2 | 128;				# read2 flag
35 | 	$flag1 = $flag1 | 1; $flag2 = $flag2 | 1; 	# flag say both reads are one of a pair
36 | 	if(($flag1 & 4) || ($flag2 & 4)) { next;}       # both reads must have alignment
37 | 	$flag1 = $flag1 | 2; $flag2 = $flag2 | 2;
38 | 	my ($strand1, $strand2) = ("+", "+");
39 | 	my ($beg1, $beg2) = ($pos1, $pos2);
40 | 	if($flag1 & 16) { $flag2 = $flag2 | 32; $strand1 = "-"; }	# mate of read2 is - strand
41 | 	if($flag2 & 16) { $flag1 = $flag1 | 32; $strand2 = "-"; }	# mate of read1 is - strand
42 | 	
43 | 	$mate_pos1 = $pos2; $mate_pos2 = $pos1;
44 | 	if($chr1 eq $chr2){
45 | 		$mate_chr1 = "="; $mate_chr2 = "=";
46 | 		my ($left1, $right1) = ($pos1, $pos1 + length($seq1));
47 | 		my ($left2, $right2) = ($pos2, $pos2 + length($seq2));
48 | 		$size1 = $pos2 - $pos1;
49 | 		if($size1 > 0){
50 | 			$size1 += length($seq2);
51 | 		}else{
52 | 			$size1 -= length($seq1);
53 | 		}
54 | 		$size2 = 0 - $size1;
55 | 	}else{
56 | 		$mate_chr2 = $chr1; $mate_chr1 = $chr2;
57 | 		$size1 = 0; $size2 = 0;
58 | 	}
59 | 	print join("\t", $id1, $flag1, $chr1, $pos1, $qal1, $cigar1, $mate_chr1, $mate_pos1, $size1, $seq1, @rest1)."\n";
60 | 	print join("\t", $id2, $flag2, $chr2, $pos2, $qal2, $cigar2, $mate_chr2, $mate_pos2, $size2, $seq2, @rest2)."\n";
61 | }
62 | 
63 | close FILE1;
64 | close FILE2;
65 | 
66 | exit;
67 | 


--------------------------------------------------------------------------------
/bin/DPNII/pairing_two_SAM_reads_DPNII.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl  
 2 | use strict;
 3 | 
 4 | my $usage = 	"Usage:./pairing_two_SAM_reads.pl <sam1> <sam2>\n".
 5 | 		"\tAfter mapping each end of a PE run independently, this program take the two SAM files and link them into one pair end SAM file\n";
 6 | my ($file1, $file2) = @ARGV;
 7 | if(not defined $file2){
 8 | 	die($usage);
 9 | }
10 | 
11 | open (FILE1, $file1);
12 | open (FILE2, $file2); 
13 | 
14 | while ((my $line1 = <FILE1>) and (my $line2 = <FILE2>)){
15 | 	chomp $line1; chomp $line2;
16 | 	while(substr($line1,0,1) eq '@'){
17 | #		print $line1."\n";
18 | 		$line1 = <FILE1>; chomp $line1;
19 | 	}
20 | 	while(substr($line2,0,1) eq '@'){
21 | #		print $line2."\n";
22 | 		$line2 = <FILE2>; chomp $line2;
23 | 	} 
24 | 	my ($id1, $flag1, $chr1, $pos1, $qal1, $cigar1, $mate_chr1, $mate_pos1, $size1, $seq1, @rest1) = split "\t", $line1;
25 | 	my ($id2, $flag2, $chr2, $pos2, $qal2, $cigar2, $mate_chr2, $mate_pos2, $size2, $seq2, @rest2) = split "\t", $line2;
26 | 	$id1 =~ s/\/.$//; $id2 =~ s/\/.$//;
27 | 		
28 | 
29 | 	if($qal1 < 10 || $qal2 < 10) { next;}	
30 | 	if($id1 ne $id2){
31 | 		die("Error: The two sam files are not pair-end!\n");
32 | 	}
33 | 	$flag1 = $flag1 | 64; 				# read1 flag
34 | 	$flag2 = $flag2 | 128;				# read2 flag
35 | 	$flag1 = $flag1 | 1; $flag2 = $flag2 | 1; 	# flag say both reads are one of a pair
36 | 	if(($flag1 & 4) || ($flag2 & 4)) { next;}       # both reads must have alignment
37 | 	$flag1 = $flag1 | 2; $flag2 = $flag2 | 2;
38 | 	my ($strand1, $strand2) = ("+", "+");
39 | 	my ($beg1, $beg2) = ($pos1, $pos2);
40 | 	if($flag1 & 16) { $flag2 = $flag2 | 32; $strand1 = "-"; }	# mate of read2 is - strand
41 | 	if($flag2 & 16) { $flag1 = $flag1 | 32; $strand2 = "-"; }	# mate of read1 is - strand
42 | 	
43 | 	$mate_pos1 = $pos2; $mate_pos2 = $pos1;
44 | 	if($chr1 eq $chr2){
45 | 		$mate_chr1 = "="; $mate_chr2 = "=";
46 | 		my ($left1, $right1) = ($pos1, $pos1 + length($seq1));
47 | 		my ($left2, $right2) = ($pos2, $pos2 + length($seq2));
48 | 		$size1 = $pos2 - $pos1;
49 | 		if($size1 > 0){
50 | 			$size1 += length($seq2);
51 | 		}else{
52 | 			$size1 -= length($seq1);
53 | 		}
54 | 		$size2 = 0 - $size1;
55 | 	}else{
56 | 		$mate_chr2 = $chr1; $mate_chr1 = $chr2;
57 | 		$size1 = 0; $size2 = 0;
58 | 	}
59 | 	print join("\t", $id1, $flag1, $chr1, $pos1, $qal1, $cigar1, $mate_chr1, $mate_pos1, $size1, $seq1, @rest1)."\n";
60 | 	print join("\t", $id2, $flag2, $chr2, $pos2, $qal2, $cigar2, $mate_chr2, $mate_pos2, $size2, $seq2, @rest2)."\n";
61 | }
62 | 
63 | close FILE1;
64 | close FILE2;
65 | 
66 | exit;
67 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # *HiCorr*
 2 | HiCorr is a pipeline designed to do bias-correction and visualization for multi-platform Hi-C(in-situ Hi-C, Arima, micro-C). HiCorr focuses on the mapping of chromatin interactions at high-resolution, especially the sub-TAD ~5kb resolution enhancer-promoter interactions, which requires more rigorous bias-correction. It needs to be run in an unix/linux environment. Currently it includes reference files of genome build hg19 and mm10, the reference files for other genome build will be provided upon request, please contact Shanshan Zhang(ssz20@case.edu) or Fulai Jin(fxj45@case.edu). For a noise-free and enhanced signal, please check [DeepLoop](https://github.com/JinLabBioinfo/DeepLoop) we recently developed.<br/>
 3 | ## Setup
 4 | ```
 5 | git clone https://github.com/shanshan950/HiCorr.git
 6 | cd HiCorr/
 7 | chmod 755 HiCorr
 8 | chmod -R 755 bin/*
 9 | ```
10 | ## Gateway for different Hi-C data type:
11 | Each section descibes reference file downloading, preprocessing (mapping and fragment filteration), and how to run HiCorr.
12 |  ## :point_right:  [*HiCorr on micro-C (beta version)*](https://github.com/JinLabBioinfo/HiCorr/blob/master/documents/HiCorr_micro-C.md)
13 |  ## :point_right:  [*HiCorr on Arima (beta version)*](https://github.com/JinLabBioinfo/HiCorr/blob/master/documents/HiCorr_Arima.md)
14 |  ## :point_right:  [*HiCorr on HindIII enzyme Hi-C*](https://github.com/JinLabBioinfo/HiCorr/blob/master/documents/HiCorr_HindIII.md)
15 |  ## :point_right:  [*HiCorr on eHi-C*](https://github.com/JinLabBioinfo/HiCorr/blob/master/documents/HiCorr_eHi-C.md)
16 |  ## :point_right:  [*HiCorr on in-situ Hi-C or DPNII/Mbol enzyme Hi-C*](https://github.com/JinLabBioinfo/HiCorr/blob/master/documents/HiCorr_insituHi-C.md)
17 |  ## :point_right:  [*Visualize HiCorr contact heatmaps*](https://github.com/JinLabBioinfo/HiCorr/blob/master/documents/HiCorr_heatmap.md)
18 |  ## :point_right:  [*Compatible with HiCPro valid pairs*](https://github.com/JinLabBioinfo/HiCorr/blob/master/documents/validPairs_2_fragloop.md)
19 |  ## :point_right:  [*Generate reference files for HiCorr*](https://github.com/JinLabBioinfo/HiCorr/blob/master/documents/generate_reference_files.md)
20 |  
21 | ## :eyes: 40 Processed Hi-C datasets by *HiCorr* and *DeepLoop* can be visualized in [website](https://hiview.case.edu/public/DeepLoop/) <br/>
22 | 
23 | ## Citation: <br/>
24 | _Lu,L. et al._ Robust Hi-C Maps of Enhancer-Promoter Interactions Reveal the Function of Non-coding Genome in Neural Development and Diseases. Molecular Cell; doi: https://doi.org/10.1016/j.molcel.2020.06.007
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/bin/reads_2_trans_frag_loop.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my $usage = 	"Usage:./reads_2_trans_frag_loop.pl <frag_bed> <read_length> <trans_outfile> <reads file>\n".
 5 | 		"\tThis program takes reads summary txt file and output the cis- fragment looping files categoried based on inward, outward and samestrand reads.\n";
 6 | 
 7 | my ($frag_bed, $len, $out_trans_file, $reads_file) = @ARGV;
 8 | if(not defined $len){
 9 | 	die($usage);
10 | }
11 | 
12 | if(not defined $reads_file){
13 | 	$reads_file = "-";
14 | }
15 | 
16 | if(not defined $out_trans_file){
17 |         $out_trans_file = "-";
18 | }
19 | 
20 | open(OUT_trans, ">$out_trans_file");
21 | 
22 | my $frag_HASH;
23 | my $frag_loc;
24 | open(IN, $frag_bed);
25 | while(my $line = <IN>){
26 | 	chomp $line;
27 | 	my ($chr, $beg, $end, $id) = split "\t", $line;
28 | 	$frag_loc->{$id} = join(":", $beg, $end);
29 | 	for(my $ind = int($beg/10000); $ind <= int($end/10000); $ind++){
30 | 		push @{$frag_HASH->{$chr}->{$ind}}, $id;
31 | 	}
32 | }
33 | close(IN);
34 | 
35 | open(FH, $reads_file) || die("Error: Cannot open file $reads_file!\n");
36 | while(my $line = <FH>){
37 | 	chomp $line;
38 | 	my ($chr1, $beg1, $str1, $chr2, $beg2, $str2) = split "\t", $line;
39 | 	if($chr1 eq $chr2){
40 | 		next;
41 | 	}else{
42 | 		my $frag1 = find_frag($frag_HASH->{$chr1}, $frag_loc, $beg1, $str1, $len);
43 | 		my $frag2 = find_frag($frag_HASH->{$chr2}, $frag_loc, $beg2, $str2, $len);
44 | 
45 | 		if((!$frag1) || (!$frag2) || ($frag1 eq $frag2)){
46 | 			next;
47 | 		}
48 | 		print OUT_trans join("\t", $frag1, $frag2)."\n";
49 | 	}
50 | }
51 | close(FH);
52 | 
53 | close(OUT_trans);
54 | 
55 | exit;
56 | 
57 | ########################################################################
58 | sub find_frag{
59 | 	my ($hash, $frag_loc, $beg, $strand, $len) = @_;
60 | 	my $end = $beg + $len - 1;
61 | 	my $left = $beg;
62 | 	my $right = $end;
63 | 	if($strand eq "+"){
64 | 		$right -= 16;
65 | 	}else{
66 | 		$left += 16;
67 | 	}
68 | 	
69 | 	for(my $ind = int($left/10000); $ind <= int($right/10000); $ind++){
70 | 		foreach my $fid (@{$hash->{$ind}}){
71 | 			my ($f_beg, $f_end) = split ":", $frag_loc->{$fid};
72 | 
73 | 			if($left >= $f_beg && $right <= $f_end){
74 | 				if($strand eq "+" && $left >= ($f_end - 500)){
75 |                                         return $fid;
76 |                                 }elsif($strand eq "-" && $right <= ($f_beg + 500)){
77 |                                         return $fid;
78 |                                 }else{
79 |                                 }
80 | 			}
81 | 		}
82 | 	}
83 | 	return 0;
84 | }
85 | 


--------------------------------------------------------------------------------
/bin/Arima/reads_2_trans_frag_loop.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my $usage = 	"Usage:./reads_2_trans_frag_loop.pl <frag_bed> <read_length> <trans_outfile> <reads file>\n".
 5 | 		"\tThis program takes reads summary txt file and output the cis- fragment looping files categoried based on inward, outward and samestrand reads.\n";
 6 | 
 7 | my ($frag_bed, $len, $out_trans_file, $reads_file) = @ARGV;
 8 | if(not defined $len){
 9 | 	die($usage);
10 | }
11 | 
12 | if(not defined $reads_file){
13 | 	$reads_file = "-";
14 | }
15 | 
16 | if(not defined $out_trans_file){
17 |         $out_trans_file = "-";
18 | }
19 | 
20 | open(OUT_trans, ">$out_trans_file");
21 | 
22 | my $frag_HASH;
23 | my $frag_loc;
24 | open(IN, $frag_bed);
25 | while(my $line = <IN>){
26 | 	chomp $line;
27 | 	my ($chr, $beg, $end, $id) = split "\t", $line;
28 | 	$frag_loc->{$id} = join(":", $beg, $end);
29 | 	for(my $ind = int($beg/10000); $ind <= int($end/10000); $ind++){
30 | 		push @{$frag_HASH->{$chr}->{$ind}}, $id;
31 | 	}
32 | }
33 | close(IN);
34 | 
35 | open(FH, $reads_file) || die("Error: Cannot open file $reads_file!\n");
36 | while(my $line = <FH>){
37 | 	chomp $line;
38 | 	my ($chr1, $beg1, $str1, $chr2, $beg2, $str2) = split "\t", $line;
39 | 	if($chr1 eq $chr2){
40 | 		next;
41 | 	}else{
42 | 		my $frag1 = find_frag($frag_HASH->{$chr1}, $frag_loc, $beg1, $str1, $len);
43 | 		my $frag2 = find_frag($frag_HASH->{$chr2}, $frag_loc, $beg2, $str2, $len);
44 | 
45 | 		if((!$frag1) || (!$frag2) || ($frag1 eq $frag2)){
46 | 			next;
47 | 		}
48 | 		print OUT_trans join("\t", $frag1, $frag2)."\n";
49 | 	}
50 | }
51 | close(FH);
52 | 
53 | close(OUT_trans);
54 | 
55 | exit;
56 | 
57 | ########################################################################
58 | sub find_frag{
59 | 	my ($hash, $frag_loc, $beg, $strand, $len) = @_;
60 | 	my $end = $beg + $len - 1;
61 | 	my $left = $beg;
62 | 	my $right = $end;
63 | 	if($strand eq "+"){
64 | 		$right -= 16;
65 | 	}else{
66 | 		$left += 16;
67 | 	}
68 | 	
69 | 	for(my $ind = int($left/10000); $ind <= int($right/10000); $ind++){
70 | 		foreach my $fid (@{$hash->{$ind}}){
71 | 			my ($f_beg, $f_end) = split ":", $frag_loc->{$fid};
72 | 
73 | 			if($left >= $f_beg && $right <= $f_end){
74 | 				if($strand eq "+" && $left >= ($f_end - 500)){
75 |                                         return $fid;
76 |                                 }elsif($strand eq "-" && $right <= ($f_beg + 500)){
77 |                                         return $fid;
78 |                                 }else{
79 |                                 }
80 | 			}
81 | 		}
82 | 	}
83 | 	return 0;
84 | }
85 | 


--------------------------------------------------------------------------------
/bin/DPNII/reads_2_trans_frag_loop_DPNII.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my $usage = 	"Usage:./reads_2_trans_frag_loop.pl <frag_bed> <read_length> <trans_outfile> <reads file>\n".
 5 | 		"\tThis program takes reads summary txt file and output the cis- fragment looping files categoried based on inward, outward and samestrand reads.\n";
 6 | 
 7 | my ($frag_bed, $len, $out_trans_file, $reads_file) = @ARGV;
 8 | if(not defined $len){
 9 | 	die($usage);
10 | }
11 | 
12 | if(not defined $reads_file){
13 | 	$reads_file = "-";
14 | }
15 | 
16 | if(not defined $out_trans_file){
17 |         $out_trans_file = "-";
18 | }
19 | 
20 | open(OUT_trans, ">$out_trans_file");
21 | 
22 | my $frag_HASH;
23 | my $frag_loc;
24 | open(IN, $frag_bed);
25 | while(my $line = <IN>){
26 | 	chomp $line;
27 | 	my ($chr, $beg, $end, $id) = split "\t", $line;
28 | 	$frag_loc->{$id} = join(":", $beg, $end);
29 | 	for(my $ind = int($beg/10000); $ind <= int($end/10000); $ind++){
30 | 		push @{$frag_HASH->{$chr}->{$ind}}, $id;
31 | 	}
32 | }
33 | close(IN);
34 | 
35 | open(FH, $reads_file) || die("Error: Cannot open file $reads_file!\n");
36 | while(my $line = <FH>){
37 | 	chomp $line;
38 | 	my ($chr1, $beg1, $str1, $chr2, $beg2, $str2) = split "\t", $line;
39 | 	if($chr1 eq $chr2){
40 | 		next;
41 | 	}else{
42 | 		my $frag1 = find_frag($frag_HASH->{$chr1}, $frag_loc, $beg1, $str1, $len);
43 | 		my $frag2 = find_frag($frag_HASH->{$chr2}, $frag_loc, $beg2, $str2, $len);
44 | 
45 | 		if((!$frag1) || (!$frag2) || ($frag1 eq $frag2)){
46 | 			next;
47 | 		}
48 | 		print OUT_trans join("\t", $frag1, $frag2)."\n";
49 | 	}
50 | }
51 | close(FH);
52 | 
53 | close(OUT_trans);
54 | 
55 | exit;
56 | 
57 | ########################################################################
58 | sub find_frag{
59 | 	my ($hash, $frag_loc, $beg, $strand, $len) = @_;
60 | 	my $end = $beg + $len - 1;
61 | 	my $left = $beg;
62 | 	my $right = $end;
63 | 	if($strand eq "+"){
64 | 		$right -= 16;
65 | 	}else{
66 | 		$left += 16;
67 | 	}
68 | 	
69 | 	for(my $ind = int($left/10000); $ind <= int($right/10000); $ind++){
70 | 		foreach my $fid (@{$hash->{$ind}}){
71 | 			my ($f_beg, $f_end) = split ":", $frag_loc->{$fid};
72 | 
73 | 			if($left >= $f_beg && $right <= $f_end){
74 | 				if($strand eq "+" && $left >= ($f_end - 500)){
75 |                                         return $fid;
76 |                                 }elsif($strand eq "-" && $right <= ($f_beg + 500)){
77 |                                         return $fid;
78 |                                 }else{
79 |                                 }
80 | 			}
81 | 		}
82 | 	}
83 | 	return 0;
84 | }
85 | 


--------------------------------------------------------------------------------
/bin/preprocess/reads_2_trans_frag_loop.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my $usage = 	"Usage:./reads_2_trans_frag_loop.pl <frag_bed> <read_length> <trans_outfile> <reads file>\n".
 5 | 		"\tThis program takes reads summary txt file and output the cis- fragment looping files categoried based on inward, outward and samestrand reads.\n";
 6 | 
 7 | my ($frag_bed, $len, $out_trans_file, $reads_file) = @ARGV;
 8 | if(not defined $len){
 9 | 	die($usage);
10 | }
11 | 
12 | if(not defined $reads_file){
13 | 	$reads_file = "-";
14 | }
15 | 
16 | if(not defined $out_trans_file){
17 |         $out_trans_file = "-";
18 | }
19 | 
20 | open(OUT_trans, ">$out_trans_file");
21 | 
22 | my $frag_HASH;
23 | my $frag_loc;
24 | open(IN, $frag_bed);
25 | while(my $line = <IN>){
26 | 	chomp $line;
27 | 	my ($chr, $beg, $end, $id) = split "\t", $line;
28 | 	$frag_loc->{$id} = join(":", $beg, $end);
29 | 	for(my $ind = int($beg/10000); $ind <= int($end/10000); $ind++){
30 | 		push @{$frag_HASH->{$chr}->{$ind}}, $id;
31 | 	}
32 | }
33 | close(IN);
34 | 
35 | open(FH, $reads_file) || die("Error: Cannot open file $reads_file!\n");
36 | while(my $line = <FH>){
37 | 	chomp $line;
38 | 	my ($chr1, $beg1, $str1, $chr2, $beg2, $str2) = split "\t", $line;
39 | 	if($chr1 eq $chr2){
40 | 		next;
41 | 	}else{
42 | 		my $frag1 = find_frag($frag_HASH->{$chr1}, $frag_loc, $beg1, $str1, $len);
43 | 		my $frag2 = find_frag($frag_HASH->{$chr2}, $frag_loc, $beg2, $str2, $len);
44 | 
45 | 		if((!$frag1) || (!$frag2) || ($frag1 eq $frag2)){
46 | 			next;
47 | 		}
48 | 		print OUT_trans join("\t", $frag1, $frag2)."\n";
49 | 	}
50 | }
51 | close(FH);
52 | 
53 | close(OUT_trans);
54 | 
55 | exit;
56 | 
57 | ########################################################################
58 | sub find_frag{
59 | 	my ($hash, $frag_loc, $beg, $strand, $len) = @_;
60 | 	my $end = $beg + $len - 1;
61 | 	my $left = $beg;
62 | 	my $right = $end;
63 | 	if($strand eq "+"){
64 | 		$right -= 16;
65 | 	}else{
66 | 		$left += 16;
67 | 	}
68 | 	
69 | 	for(my $ind = int($left/10000); $ind <= int($right/10000); $ind++){
70 | 		foreach my $fid (@{$hash->{$ind}}){
71 | 			my ($f_beg, $f_end) = split ":", $frag_loc->{$fid};
72 | 
73 | 			if($left >= $f_beg && $right <= $f_end){
74 | 				if($strand eq "+" && $left >= ($f_end - 500)){
75 |                                         return $fid;
76 |                                 }elsif($strand eq "-" && $right <= ($f_beg + 500)){
77 |                                         return $fid;
78 |                                 }else{
79 |                                 }
80 | 			}
81 | 		}
82 | 	}
83 | 	return 0;
84 | }
85 | 


--------------------------------------------------------------------------------
/bin/Arima/get_loop_lambda.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my ($regress, $anchor_bed, $len_group_file, $dist_group_file, $group_stat_file) = @ARGV;
 5 | if(not defined $dist_group_file){
 6 | 	die("Usage:./get_loop_lambda.pl <frag_loop_data> <frag bed> <frag_stat> <frag_length_group> <frag_dist_group> <group_statistic_file>\n");
 7 | }
 8 | 
 9 | ##################### determine the length group for fragments #################
10 | my $len_range;
11 | open(IN, $len_group_file);
12 | while(my $line = <IN>){
13 |         chomp $line;
14 |         my ($id, $min, $max) = split "\t", $line;
15 |         $len_range->{$id} = "$min:$max";
16 | }
17 | close(IN);
18 | 
19 | my $frag_len_group;
20 | open(IN, $anchor_bed);
21 | while(my $line = <IN>){
22 | 	chomp $line;
23 | 	my ($chr, $beg, $end, $id, $dsit, $len) = split "\t", $line;
24 | 	$frag_len_group->{$id} = get_id($len, $len_range);
25 | }
26 | close(IN);
27 | 
28 | 
29 | ################### use an array to store group info for differenct distance ######
30 | my @dist_group;
31 | open(IN, $dist_group_file);
32 | while(my $line = <IN>){
33 | 	chomp $line;
34 | 	my ($g, $low, $high) = split "\t", $line;
35 | 	for(my $i = $low + 1; $i <= $high; $i ++){
36 | 		$dist_group[$i] = $g;
37 | 	}
38 | }
39 | close(IN);
40 | 
41 | ################## get group average values #######################################
42 | my $g_avg;
43 | open(IN, $group_stat_file);
44 | while(my $line = <IN>){
45 | 	chomp $line;
46 | 	my ($g1, $g2, $g3, $count, $avg) = split "\t", $line;
47 | 	$g_avg->{$g1}->{$g2}->{$g3} = $avg;
48 | }
49 | close(IN);
50 | 
51 | 
52 | ################## print new data with group avg ##################################
53 | open(IN, $regress);
54 | while(my $line = <IN>){
55 | 	chomp $line;
56 | 	my ($frag1, $frag2, $count, $dist) = split "\t", $line;
57 | 	my $g1 = $frag_len_group->{$frag1};
58 | 	my $g2 = $frag_len_group->{$frag2};
59 | 	my $g3 = $dist_group[$dist];
60 | 	my $avg = $g_avg->{$g1}->{$g2}->{$g3};
61 | 	#my $map1 = $frag_map->{$frag1};
62 | 	#my $map2 = $frag_map->{$frag2};
63 | 	my $lambda = $avg;
64 | 	if($lambda == 0){
65 | 		$count = 0;
66 | 	}
67 | 	print join("\t", $frag1, $frag2, $count, $lambda)."\n";
68 | }
69 | close(IN);
70 | 
71 | exit;
72 | 
73 | 
74 | ##############################################################
75 | sub get_id{
76 |         my ($val, $range) = @_;
77 |         foreach my $id (keys %{$range}){
78 |                 my ($min, $max) = split ":", $range->{$id};
79 |                 if($val > $min && $val <= $max){
80 |                         return $id;
81 |                 }
82 |         }
83 |         die("Error: did not find a group\n");
84 | }
85 | 
86 | 


--------------------------------------------------------------------------------
/bin/DPNII/get_loop_lambda_DPNII.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my ($regress, $anchor_bed, $len_group_file, $dist_group_file, $group_stat_file) = @ARGV;
 5 | if(not defined $dist_group_file){
 6 | 	die("Usage:./get_loop_lambda.pl <frag_loop_data> <frag bed> <frag_stat> <frag_length_group> <frag_dist_group> <group_statistic_file>\n");
 7 | }
 8 | 
 9 | ##################### determine the length group for fragments #################
10 | my $len_range;
11 | open(IN, $len_group_file);
12 | while(my $line = <IN>){
13 |         chomp $line;
14 |         my ($id, $min, $max) = split "\t", $line;
15 |         $len_range->{$id} = "$min:$max";
16 | }
17 | close(IN);
18 | 
19 | my $frag_len_group;
20 | open(IN, $anchor_bed);
21 | while(my $line = <IN>){
22 | 	chomp $line;
23 | 	my ($chr, $beg, $end, $id, $dsit, $len) = split "\t", $line;
24 | 	$frag_len_group->{$id} = get_id($len, $len_range);
25 | }
26 | close(IN);
27 | 
28 | 
29 | ################### use an array to store group info for differenct distance ######
30 | my @dist_group;
31 | open(IN, $dist_group_file);
32 | while(my $line = <IN>){
33 | 	chomp $line;
34 | 	my ($g, $low, $high) = split "\t", $line;
35 | 	for(my $i = $low + 1; $i <= $high; $i ++){
36 | 		$dist_group[$i] = $g;
37 | 	}
38 | }
39 | close(IN);
40 | 
41 | ################## get group average values #######################################
42 | my $g_avg;
43 | open(IN, $group_stat_file);
44 | while(my $line = <IN>){
45 | 	chomp $line;
46 | 	my ($g1, $g2, $g3, $count, $avg) = split "\t", $line;
47 | 	$g_avg->{$g1}->{$g2}->{$g3} = $avg;
48 | }
49 | close(IN);
50 | 
51 | 
52 | ################## print new data with group avg ##################################
53 | open(IN, $regress);
54 | while(my $line = <IN>){
55 | 	chomp $line;
56 | 	my ($frag1, $frag2, $count, $dist) = split "\t", $line;
57 | 	my $g1 = $frag_len_group->{$frag1};
58 | 	my $g2 = $frag_len_group->{$frag2};
59 | 	my $g3 = $dist_group[$dist];
60 | 	my $avg = $g_avg->{$g1}->{$g2}->{$g3};
61 | 	#my $map1 = $frag_map->{$frag1};
62 | 	#my $map2 = $frag_map->{$frag2};
63 | 	my $lambda = $avg;
64 | 	if($lambda == 0){
65 | 		$count = 0;
66 | 	}
67 | 	print join("\t", $frag1, $frag2, $count, $lambda)."\n";
68 | }
69 | close(IN);
70 | 
71 | exit;
72 | 
73 | 
74 | ##############################################################
75 | sub get_id{
76 |         my ($val, $range) = @_;
77 |         foreach my $id (keys %{$range}){
78 |                 my ($min, $max) = split ":", $range->{$id};
79 |                 if($val > $min && $val <= $max){
80 |                         return $id;
81 |                 }
82 |         }
83 |         die("Error: did not find a group\n");
84 | }
85 | 
86 | 


--------------------------------------------------------------------------------
/bin/preprocess/generate_data_matrix.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = "./generate_data_matrix.pl <frag_bed> <frag_loops> <grid lines>";
 6 | 
 7 | my ($frag_bed, $loop_file, $grid_file) = @ARGV;
 8 | 
 9 | if(not defined $loop_file){
10 |         die($usage);
11 | }
12 | 
13 | my @obs_matrix;
14 | my @expt;
15 | my @ratio;
16 | my @frags = ();
17 | my @ends = ();
18 | my $frag_idx;
19 | 
20 | ################## Read fragment chrom ##################
21 | my $frag_chrom;
22 | open(IN, $frag_bed);
23 | my $i = 0;
24 | while(my $line = <IN>){
25 |         chomp $line;
26 |         my ($chr, $beg, $end, $id) = split "\t", $line;
27 |         push @frags, $id;
28 |         push @ends, $end;
29 |         $frag_idx->{$id} = $i;
30 |         $i ++;
31 | }
32 | close(IN);
33 | 
34 | my $len=$#frags +1;
35 | ################### initiate matrix ####################
36 | for(my $i = 0; $i < $len; $i ++){
37 |         my @array = ();
38 |         for(my $j = 0; $j < $len; $j ++){
39 |                 push @array, 0;
40 |         }
41 |         push @obs_matrix, \@array;
42 | 
43 | }
44 | 
45 | for(my $i = 0; $i < $len; $i ++){
46 |         for(my $j = 0; $j < $len; $j ++){
47 |                 $expt[$i][$j] = 0;
48 |         }
49 | }
50 | 
51 | for(my $i = 0; $i < $len; $i ++){
52 |         for(my $j = 0; $j < $len; $j ++){
53 |                 $ratio[$i][$j] = 0;
54 |         }
55 | }
56 | 
57 | #################### Read frag loops  ###################
58 | open(IN, $loop_file);
59 | while(my $line = <IN>){
60 |         chomp $line;
61 |         my ($fid1, $fid2, $obs_count, $expt_count) = split "\t", $line;
62 |         my $i = $frag_idx->{$fid1};
63 |         my $j = $frag_idx->{$fid2};
64 |         $obs_matrix[$i][$j] = $obs_count;
65 |         $obs_matrix[$j][$i] = $obs_count;
66 |         $expt[$i][$j] = $expt_count;
67 |         $expt[$j][$i] = $expt_count;
68 | 	$ratio[$i][$j] = ($obs_matrix[$i][$j]+10)/($expt[$i][$j]+10);
69 | 	$ratio[$j][$i] = ($obs_matrix[$j][$i]+10)/($expt[$j][$i]+10);
70 | }
71 | close(IN);
72 | 
73 | open(OUT,">matrix.obs");
74 | for(my $i = 0; $i < $len; $i++){
75 |         print OUT join("\t", @{$obs_matrix[$i]})."\n";
76 | }
77 | close(OUT);
78 | open(OUT,">matrix.expt");
79 | for(my $i = 0; $i < $len; $i++){
80 |         print OUT join("\t", @{$expt[$i]})."\n";
81 | }
82 | close(OUT);
83 | 
84 | open(OUT,">matrix.ratio");
85 | for(my $i = 0; $i < $len; $i++){
86 |         print OUT join("\t", @{$ratio[$i]})."\n";
87 | }
88 | close(OUT);
89 | 
90 | open(OUT, ">$grid_file");
91 | print OUT "Index\tLoc\n";
92 | for(my $i = 1; $i <= $len; $i ++){
93 |         print OUT join("\t", $i, $ends[$i - 1])."\n";
94 | }
95 | close(OUT);
96 | 
97 | exit;
98 | 
99 | 


--------------------------------------------------------------------------------
/bin/eHiC-QC/eHiC-QC.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | name=$4
 4 | 
 5 | bin=$1
 6 | bowtie_index=$2
 7 | genome=$3
 8 | ref=$5
 9 | 
10 | cat $name.R1.fastq.gz | gunzip | $bin/reformat_fastq.py 6 36 | bowtie -v 3 -m 1 --best --strata --time -p 1 --sam $bowtie_index - $name.R1.sam &
11 | cat $name.R2.fastq.gz | gunzip | $bin/reformat_fastq.py 6 36 | bowtie -v 3 -m 1 --best --strata --time -p 1 --sam $bowtie_index - $name.R2.sam &
12 | 
13 | wait
14 | $bin/pair_two_SAM.pl $name.R1.sam $name.R2.sam $name
15 | 
16 | echo Total Reads count for $name is `grep -v "^@" $name.R1.sam | wc -l` >>summary.total.read_count &
17 | cat $name.mapped.pair | samtools view -bS -t $genome -o - - > $name.bam
18 | let x=`samtools view $name.bam | wc -l`
19 | let x=x/2
20 | echo Uniquely mapped read pairs for $name is $x >> summary.total.read_count &
21 | samtools sort $name.bam | samtools view - | $bin/remove_dup_PE_ELPU.pl | samtools view -bS -t $genome -o - - > $name.sorted.nodup.bam
22 | let x=`samtools view $name.sorted.nodup.bam | wc -l`
23 | let x=x/2
24 | echo Non-redundant mapped read pairs for $name is $x>> summary.total.read_count &
25 | 
26 | samtools view $name.sorted.nodup.bam | cut -f2-13 | $bin/bam_to_temp_HiC.pl| awk '{OFS="\t"; print $1,$2,$3,$4,$5,$6,42,42}'> $name.temp
27 | $bin/reads_to_frag_loop_ELPU.py $ref/eHiC/$genome.HindIII.frag.bed $name.temp $name 0 >>summary.frag_loop.read_count
28 | rm $name.temp $name.bam $name.mapped.pair $name.loop.nofrag $name.loop.samefrag $name.loop.trans.nofrag
29 | 
30 | for file in `ls $name*.loop.* | grep -v trans`;do
31 |         awk '{OFS="\t";print $4$3,$8$7}' $file | $bin/reform_end_id.py | $bin/summary_sorted_frag_loop.pl $ref/$genome.end.transfored-id.bed > temp.$file
32 |         $bin/resort_by_frag_id.pl $ref/$genome.end.transfored-id.bed temp.$file
33 | done &
34 | 
35 | for file in `ls $name*.loop.trans`;do
36 |         awk '{OFS="\t";print $4$3,$8$7}' $file | $bin/reform_end_id.py | $bin/summary_sorted_trans_frag_loop.pl >temp.$file
37 |         $bin/resort_by_frag_id.pl $ref/$genome.end.transfored-id.bed temp.$file
38 | done &
39 | 
40 | wait
41 | $bin/merge_sorted_frag_loop.pl `ls temp*.samestrand` > frag_loop.$name.samestrand &
42 | $bin/merge_sorted_frag_loop.pl `ls temp*.inward` | awk '{if($4>2000)print $0}' > frag_loop.$name.inward &
43 | $bin/merge_sorted_frag_loop.pl `ls temp*.outward` | awk '{if($4>100000)print $0}' > frag_loop.$name.outward &
44 | wait
45 | rm temp*
46 | $bin/merge_sorted_frag_loop.pl `ls temp*.trans` | $bin/end_id_to_original.py >end_loop.$name.trans
47 | $bin/merge_sorted_frag_loop.pl frag_loop.$name.samestrand frag_loop.$name.inward frag_loop.$name.outward | $bin/end_id_to_original.py >end_loop.$name.cis
48 | rm *inward *outward *samestrand
49 | 
50 | 


--------------------------------------------------------------------------------
/bin/generate_data_matrix.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | 
 5 | my $usage = "./generate_data_matrix.pl <frag_bed> <frag_loops> <grid lines>";
 6 | 
 7 | my ($frag_bed, $loop_file, $grid_file) = @ARGV;
 8 | 
 9 | if(not defined $loop_file){
10 |         die($usage);
11 | }
12 | 
13 | my @obs_matrix;
14 | my @expt;
15 | my @ratio;
16 | my @frags = ();
17 | my @ends = ();
18 | my $frag_idx;
19 | 
20 | ################## Read fragment chrom ##################
21 | my $frag_chrom;
22 | open(IN, $frag_bed);
23 | my $i = 0;
24 | while(my $line = <IN>){
25 |         chomp $line;
26 |         my ($chr, $beg, $end, $id) = split "\t", $line;
27 |         push @frags, $id;
28 |         push @ends, $end;
29 |         $frag_idx->{$id} = $i;
30 |         $i ++;
31 | }
32 | close(IN);
33 | 
34 | my $len=$#frags +1;
35 | ################### initiate matrix ####################
36 | for(my $i = 0; $i < $len; $i ++){
37 |         my @array = ();
38 |         for(my $j = 0; $j < $len; $j ++){
39 |                 push @array, 0;
40 |         }
41 |         push @obs_matrix, \@array;
42 | 
43 | }
44 | 
45 | for(my $i = 0; $i < $len; $i ++){
46 |         for(my $j = 0; $j < $len; $j ++){
47 |                 $expt[$i][$j] = 0;
48 |         }
49 | }
50 | 
51 | for(my $i = 0; $i < $len; $i ++){
52 |         for(my $j = 0; $j < $len; $j ++){
53 |                 $ratio[$i][$j] = 0;
54 |         }
55 | }
56 | 
57 | #################### Read frag loops  ###################
58 | open(IN, $loop_file);
59 | while(my $line = <IN>){
60 |         chomp $line;
61 |         my ($fid1, $fid2, $obs_count, $expt_count) = split "\t", $line;
62 |         my $i = $frag_idx->{$fid1};
63 |         my $j = $frag_idx->{$fid2};
64 |         $obs_matrix[$i][$j] = $obs_count;
65 |         $obs_matrix[$j][$i] = $obs_count;
66 |         $expt[$i][$j] = $expt_count;
67 |         $expt[$j][$i] = $expt_count;
68 | 	$ratio[$i][$j] = ($obs_matrix[$i][$j]+10)/($expt[$i][$j]+10);
69 | 	$ratio[$j][$i] = ($obs_matrix[$j][$i]+10)/($expt[$j][$i]+10);
70 | }
71 | close(IN);
72 | 
73 | open(OUT,">$loop_file.matrix.obs");
74 | for(my $i = 0; $i < $len; $i++){
75 |         print OUT join("\t", @{$obs_matrix[$i]})."\n";
76 | }
77 | close(OUT);
78 | open(OUT,">$loop_file.matrix.expt");
79 | for(my $i = 0; $i < $len; $i++){
80 |         print OUT join("\t", @{$expt[$i]})."\n";
81 | }
82 | close(OUT);
83 | 
84 | open(OUT,">$loop_file.matrix.ratio");
85 | for(my $i = 0; $i < $len; $i++){
86 |         print OUT join("\t", @{$ratio[$i]})."\n";
87 | }
88 | close(OUT);
89 | 
90 | open(OUT, ">$grid_file");
91 | print OUT "Index\tLoc\n";
92 | for(my $i = 1; $i <= $len; $i ++){
93 |         print OUT join("\t", $i, $ends[$i - 1])."\n";
94 | }
95 | close(OUT);
96 | 
97 | exit;
98 | 
99 | 


--------------------------------------------------------------------------------
/bin/Arima/merge_sorted_anchor_loop.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/perl
  2 | 
  3 | use strict;
  4 | 
  5 | my $usage = "Usage:./merge_sorted_A_loop.pl <loop files>\n";
  6 | 
  7 | my @loop_files = @ARGV;
  8 | if(! @loop_files){
  9 | 	die($usage);
 10 | }
 11 | 
 12 | my $file_handles;
 13 | foreach my $file (@loop_files){
 14 | 	my $fh;
 15 | 	open($fh, $file);
 16 | 	$file_handles->{$file} = $fh;
 17 | }
 18 | 
 19 | my $curr_data;
 20 | my $next_id1;
 21 | my $next_id2;
 22 | my $next_count;
 23 | my $next_dist;
 24 | 
 25 | foreach my $file (@loop_files){
 26 | 	my $fh = $file_handles->{$file};
 27 | 	if(my $line = <$fh>){
 28 | 		chomp $line;
 29 | 		($next_id1->{$file}, $next_id2->{$file}, $next_count->{$file}, $next_dist->{$file}) = split "\t", $line;
 30 | 		$next_id1->{$file} =~ s/A_//;
 31 | 		$next_id2->{$file} =~ s/A_//;
 32 | 		if(not defined $next_dist->{$file}){
 33 | 			$next_dist->{$file} = -1;
 34 | 		}
 35 | 	}
 36 | }
 37 | my $curr_id1 = min(values %$next_id1);
 38 | my $curr_dist;
 39 | while($curr_id1){
 40 | 	foreach my $file (keys %$file_handles){
 41 | 		my $fh = $file_handles->{$file};
 42 | 		while($next_id1->{$file} == $curr_id1){
 43 | 			my $id2 = $next_id2->{$file};
 44 | 			if(not defined $curr_data->{$id2}){
 45 | 				$curr_data->{$id2} = 0;
 46 | 			}
 47 | 			$curr_dist->{$id2} = $next_dist->{$file};
 48 | 			$curr_data->{$id2} += $next_count->{$file};
 49 | 			if(my $line = <$fh>){
 50 | 				chomp $line;
 51 | 				($next_id1->{$file}, $next_id2->{$file}, $next_count->{$file}, $next_dist->{$file}) = split "\t", $line;
 52 | 				$next_id1->{$file} =~ s/A_//;
 53 | 				$next_id2->{$file} =~ s/A_//;
 54 | 				if(not defined $next_dist->{$file}){
 55 | 			                $next_dist->{$file} = -1;
 56 |         			}
 57 | 			}else{
 58 | 				close($fh);
 59 | 				delete $file_handles->{$file};
 60 | 				delete $next_id1->{$file};
 61 | 				delete $next_id2->{$file};
 62 | 				delete $next_count->{$file};
 63 | 				delete $next_dist->{$file};
 64 | 			}
 65 | 		}
 66 | 	}
 67 | 	if($curr_id1 > 0){
 68 | 		output($curr_id1, $curr_data, $curr_dist);
 69 | 	}
 70 | 	if(keys %$next_id1){
 71 | 		$curr_id1 = min(values %$next_id1);
 72 | 	}else{
 73 | 		$curr_id1 = 0;
 74 | 	}
 75 | }
 76 | 
 77 | exit;
 78 | 
 79 | ###############################################################
 80 | sub min{
 81 | 	my @array = sort {$a<=>$b} @_;
 82 | 	return $array[0];
 83 | }
 84 | 
 85 | sub output{
 86 | 	my ($id1, $ref, $dist_ref) = @_;
 87 | 	foreach my $id2 (sort {$a<=>$b} keys %$ref){
 88 | 		print join("\t", "A_$id1", "A_$id2", $ref->{$id2});
 89 | 		delete $ref->{$id2};
 90 | 		my $dist = $dist_ref->{$id2};
 91 | 		if($dist < 0){
 92 | 			print "\n";
 93 | 		}else{
 94 | 			print "\t".$dist."\n";
 95 | 		}
 96 | 		delete $dist_ref->{$id2};
 97 | 	}
 98 | 	return 1;
 99 | }
100 | 
101 | 


--------------------------------------------------------------------------------
/bin/microC/merge_sorted_anchor_loop.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/perl
  2 | 
  3 | use strict;
  4 | 
  5 | my $usage = "Usage:./merge_sorted_A_loop.pl <loop files>\n";
  6 | 
  7 | my @loop_files = @ARGV;
  8 | if(! @loop_files){
  9 | 	die($usage);
 10 | }
 11 | 
 12 | my $file_handles;
 13 | foreach my $file (@loop_files){
 14 | 	my $fh;
 15 | 	open($fh, $file);
 16 | 	$file_handles->{$file} = $fh;
 17 | }
 18 | 
 19 | my $curr_data;
 20 | my $next_id1;
 21 | my $next_id2;
 22 | my $next_count;
 23 | my $next_dist;
 24 | 
 25 | foreach my $file (@loop_files){
 26 | 	my $fh = $file_handles->{$file};
 27 | 	if(my $line = <$fh>){
 28 | 		chomp $line;
 29 | 		($next_id1->{$file}, $next_id2->{$file}, $next_count->{$file}, $next_dist->{$file}) = split "\t", $line;
 30 | 		$next_id1->{$file} =~ s/A_//;
 31 | 		$next_id2->{$file} =~ s/A_//;
 32 | 		if(not defined $next_dist->{$file}){
 33 | 			$next_dist->{$file} = -1;
 34 | 		}
 35 | 	}
 36 | }
 37 | my $curr_id1 = min(values %$next_id1);
 38 | my $curr_dist;
 39 | while($curr_id1){
 40 | 	foreach my $file (keys %$file_handles){
 41 | 		my $fh = $file_handles->{$file};
 42 | 		while($next_id1->{$file} == $curr_id1){
 43 | 			my $id2 = $next_id2->{$file};
 44 | 			if(not defined $curr_data->{$id2}){
 45 | 				$curr_data->{$id2} = 0;
 46 | 			}
 47 | 			$curr_dist->{$id2} = $next_dist->{$file};
 48 | 			$curr_data->{$id2} += $next_count->{$file};
 49 | 			if(my $line = <$fh>){
 50 | 				chomp $line;
 51 | 				($next_id1->{$file}, $next_id2->{$file}, $next_count->{$file}, $next_dist->{$file}) = split "\t", $line;
 52 | 				$next_id1->{$file} =~ s/A_//;
 53 | 				$next_id2->{$file} =~ s/A_//;
 54 | 				if(not defined $next_dist->{$file}){
 55 | 			                $next_dist->{$file} = -1;
 56 |         			}
 57 | 			}else{
 58 | 				close($fh);
 59 | 				delete $file_handles->{$file};
 60 | 				delete $next_id1->{$file};
 61 | 				delete $next_id2->{$file};
 62 | 				delete $next_count->{$file};
 63 | 				delete $next_dist->{$file};
 64 | 			}
 65 | 		}
 66 | 	}
 67 | 	if($curr_id1 > 0){
 68 | 		output($curr_id1, $curr_data, $curr_dist);
 69 | 	}
 70 | 	if(keys %$next_id1){
 71 | 		$curr_id1 = min(values %$next_id1);
 72 | 	}else{
 73 | 		$curr_id1 = 0;
 74 | 	}
 75 | }
 76 | 
 77 | exit;
 78 | 
 79 | ###############################################################
 80 | sub min{
 81 | 	my @array = sort {$a<=>$b} @_;
 82 | 	return $array[0];
 83 | }
 84 | 
 85 | sub output{
 86 | 	my ($id1, $ref, $dist_ref) = @_;
 87 | 	foreach my $id2 (sort {$a<=>$b} keys %$ref){
 88 | 		print join("\t", "A_$id1", "A_$id2", $ref->{$id2});
 89 | 		delete $ref->{$id2};
 90 | 		my $dist = $dist_ref->{$id2};
 91 | 		if($dist < 0){
 92 | 			print "\n";
 93 | 		}else{
 94 | 			print "\t".$dist."\n";
 95 | 		}
 96 | 		delete $dist_ref->{$id2};
 97 | 	}
 98 | 	return 1;
 99 | }
100 | 
101 | 


--------------------------------------------------------------------------------