├── Jazzlib
    ├── __init__.py
    ├── Peak.pyc
    ├── sta.pyc
    ├── FRegion.pyc
    ├── Hotspot.pyc
    ├── bgcount.pyc
    ├── cEM_zip.so
    ├── jazzio.pyc
    ├── kernel.pyc
    ├── region.pyc
    ├── __init__.pyc
    ├── countreads.pyc
    ├── localmax.pyc
    ├── peakcount.pyc
    ├── peaksscan.pyc
    ├── randombg.pyc
    ├── hotspotsscan.pyc
    ├── kernelsmooth.pyc
    ├── readscounter.pyc
    ├── hotspotscount.pyc
    ├── normalize_ratio.pyc
    ├── cEM_zip.cpython-36m-darwin.so
    ├── cEM_zip.cpython-37m-darwin.so
    ├── __pycache__
    │   ├── Peak.cpython-36.pyc
    │   ├── sta.cpython-36.pyc
    │   ├── FRegion.cpython-36.pyc
    │   ├── FRegion.cpython-37.pyc
    │   ├── Hotspot.cpython-36.pyc
    │   ├── jazzio.cpython-36.pyc
    │   ├── kernel.cpython-36.pyc
    │   ├── region.cpython-36.pyc
    │   ├── __init__.cpython-36.pyc
    │   ├── __init__.cpython-37.pyc
    │   ├── localmax.cpython-36.pyc
    │   ├── peaksscan.cpython-36.pyc
    │   ├── randombg.cpython-36.pyc
    │   ├── countreads.cpython-36.pyc
    │   ├── countreads.cpython-37.pyc
    │   ├── hotspotsscan.cpython-36.pyc
    │   ├── kernelsmooth.cpython-36.pyc
    │   └── normalize_ratio.cpython-36.pyc
    ├── cEM_zip.cpython-36m-x86_64-linux-gnu.so
    ├── Peak.py
    ├── Hotspot.py
    ├── kernel.py
    ├── cEM_zip.pyx
    ├── normalize_ratio.py
    ├── normalize_ratio.py.bak
    ├── kernelsmooth.py
    ├── kernelsmooth.py.bak
    ├── region.py
    ├── region.py.bak
    ├── jazzio.py.bak
    ├── jazzio.py
    ├── randombg.py
    ├── randombg.py.bak
    ├── localmax.py
    ├── sta.py
    ├── localmax.py.bak
    ├── sta.py.bak
    ├── FRegion.py.bak
    ├── FRegion.py
    ├── Jazz.py
    ├── Jazz.py.bak
    ├── bgcount.py
    ├── bgcount.py.bak
    ├── hotspotsscan.py
    ├── hotspotsscan.py.bak
    ├── peaksscan.py
    └── peaksscan.py.bak
├── requirements.txt
├── .idea
    ├── .gitignore
    ├── vcs.xml
    ├── inspectionProfiles
    │   └── profiles_settings.xml
    ├── modules.xml
    ├── misc.xml
    └── Jazz.iml
├── .DS_Store
├── setup.py
├── Readme.txt
└── Jazz.py


/Jazzlib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Cython
2 | numpy
3 | pysam
4 | scipy
5 | 


--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | # Default ignored files
3 | /workspace.xml


--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/.DS_Store


--------------------------------------------------------------------------------
/Jazzlib/Peak.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/Peak.pyc


--------------------------------------------------------------------------------
/Jazzlib/sta.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/sta.pyc


--------------------------------------------------------------------------------
/Jazzlib/FRegion.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/FRegion.pyc


--------------------------------------------------------------------------------
/Jazzlib/Hotspot.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/Hotspot.pyc


--------------------------------------------------------------------------------
/Jazzlib/bgcount.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/bgcount.pyc


--------------------------------------------------------------------------------
/Jazzlib/cEM_zip.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/cEM_zip.so


--------------------------------------------------------------------------------
/Jazzlib/jazzio.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/jazzio.pyc


--------------------------------------------------------------------------------
/Jazzlib/kernel.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/kernel.pyc


--------------------------------------------------------------------------------
/Jazzlib/region.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/region.pyc


--------------------------------------------------------------------------------
/Jazzlib/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/__init__.pyc


--------------------------------------------------------------------------------
/Jazzlib/countreads.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/countreads.pyc


--------------------------------------------------------------------------------
/Jazzlib/localmax.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/localmax.pyc


--------------------------------------------------------------------------------
/Jazzlib/peakcount.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/peakcount.pyc


--------------------------------------------------------------------------------
/Jazzlib/peaksscan.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/peaksscan.pyc


--------------------------------------------------------------------------------
/Jazzlib/randombg.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/randombg.pyc


--------------------------------------------------------------------------------
/Jazzlib/hotspotsscan.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/hotspotsscan.pyc


--------------------------------------------------------------------------------
/Jazzlib/kernelsmooth.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/kernelsmooth.pyc


--------------------------------------------------------------------------------
/Jazzlib/readscounter.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/readscounter.pyc


--------------------------------------------------------------------------------
/Jazzlib/hotspotscount.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/hotspotscount.pyc


--------------------------------------------------------------------------------
/Jazzlib/normalize_ratio.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/normalize_ratio.pyc


--------------------------------------------------------------------------------
/Jazzlib/cEM_zip.cpython-36m-darwin.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/cEM_zip.cpython-36m-darwin.so


--------------------------------------------------------------------------------
/Jazzlib/cEM_zip.cpython-37m-darwin.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/cEM_zip.cpython-37m-darwin.so


--------------------------------------------------------------------------------
/Jazzlib/__pycache__/Peak.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/__pycache__/Peak.cpython-36.pyc


--------------------------------------------------------------------------------
/Jazzlib/__pycache__/sta.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/__pycache__/sta.cpython-36.pyc


--------------------------------------------------------------------------------
/Jazzlib/__pycache__/FRegion.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/__pycache__/FRegion.cpython-36.pyc


--------------------------------------------------------------------------------
/Jazzlib/__pycache__/FRegion.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/__pycache__/FRegion.cpython-37.pyc


--------------------------------------------------------------------------------
/Jazzlib/__pycache__/Hotspot.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/__pycache__/Hotspot.cpython-36.pyc


--------------------------------------------------------------------------------
/Jazzlib/__pycache__/jazzio.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/__pycache__/jazzio.cpython-36.pyc


--------------------------------------------------------------------------------
/Jazzlib/__pycache__/kernel.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/__pycache__/kernel.cpython-36.pyc


--------------------------------------------------------------------------------
/Jazzlib/__pycache__/region.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/__pycache__/region.cpython-36.pyc


--------------------------------------------------------------------------------
/Jazzlib/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/Jazzlib/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Jazzlib/__pycache__/localmax.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/__pycache__/localmax.cpython-36.pyc


--------------------------------------------------------------------------------
/Jazzlib/__pycache__/peaksscan.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/__pycache__/peaksscan.cpython-36.pyc


--------------------------------------------------------------------------------
/Jazzlib/__pycache__/randombg.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/__pycache__/randombg.cpython-36.pyc


--------------------------------------------------------------------------------
/Jazzlib/__pycache__/countreads.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/__pycache__/countreads.cpython-36.pyc


--------------------------------------------------------------------------------
/Jazzlib/__pycache__/countreads.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/__pycache__/countreads.cpython-37.pyc


--------------------------------------------------------------------------------
/Jazzlib/__pycache__/hotspotsscan.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/__pycache__/hotspotsscan.cpython-36.pyc


--------------------------------------------------------------------------------
/Jazzlib/__pycache__/kernelsmooth.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/__pycache__/kernelsmooth.cpython-36.pyc


--------------------------------------------------------------------------------
/Jazzlib/cEM_zip.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/cEM_zip.cpython-36m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/Jazzlib/__pycache__/normalize_ratio.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangtaolab/Jazz/master/Jazzlib/__pycache__/normalize_ratio.cpython-36.pyc


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from Cython.Build import cythonize
 3 | 
 4 | setup(
 5 |     name = "cEM_zip",
 6 |     ext_modules = cythonize("Jazzlib/*.pyx"),
 7 | )
 8 | 
 9 | #python setup.py build_ext --inplace
10 | 


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/Jazz.iml" filepath="$PROJECT_DIR$/.idea/Jazz.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="JavaScriptSettings">
4 |     <option name="languageLevel" value="ES6" />
5 |   </component>
6 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (bioconda) (2)" project-jdk-type="Python SDK" />
7 | </project>


--------------------------------------------------------------------------------
/.idea/Jazz.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="jdk" jdkName="Python 3.6 (bioconda) (2)" jdkType="Python SDK" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="PROJECT_TEST_RUNNER" value="Unittests" />
10 |   </component>
11 | </module>


--------------------------------------------------------------------------------
/Jazzlib/Peak.py:
--------------------------------------------------------------------------------
 1 | class Peak:
 2 |     """
 3 |         Peaks
 4 |     
 5 |     """    
 6 |    
 7 |     def __init__(self, start, end, chromosome, peakpoint, peakid, score, parent=1, fdr=1):
 8 | 
 9 |         self.start = start
10 | 
11 |         self.end = end
12 | 
13 |         self.chromosome = chromosome
14 | 
15 |         self.peakpoint = peakpoint
16 | 
17 |         self.peakid = peakid
18 | 
19 |         self.score = score
20 | 
21 |         self.fdr = fdr
22 | 
23 |         self.parent = parent
24 | 
25 | 


--------------------------------------------------------------------------------
/Jazzlib/Hotspot.py:
--------------------------------------------------------------------------------
 1 | class Hotspot:
 2 |     """
 3 |         Hotspot
 4 | 
 5 |     """
 6 |    
 7 |     def __init__(self, start, end, chromosome, hotspotid, peaks=list(), score=0, fdr=1):
 8 | 
 9 |         self.start = start
10 | 
11 |         self.end = end
12 | 
13 |         self.chromosome = chromosome
14 | 
15 |         self.hotspotid = hotspotid
16 | 
17 |         self.score = score
18 | 
19 |         self.fdr = fdr
20 | 
21 |         self.peaks = peaks
22 | 
23 |     def addpeak(self, peak):
24 | 
25 |         self.peaks.append(peak)
26 | 


--------------------------------------------------------------------------------
/Jazzlib/kernel.py:
--------------------------------------------------------------------------------
 1 | from numpy import *
 2 | #from scipy.ndimage.filters import *
 3 | 
 4 | 
 5 | def kde(z, w, xv):
 6 | 
 7 |     return sum(exp(-0.5*((z-xv)/w)**2)/sqrt(2*pi*w**2))
 8 | 
 9 | 
10 | def smooth_kernel(length):
11 | 
12 |     if length % 2 == 0:
13 | 
14 |         length = length + 1
15 | 
16 |     bandwidth = (length - 1)/6.0
17 | 
18 |     one_kernel = dict()
19 |     
20 |     for pos in linspace(-(length-1)/2, (length-1)/2, length):
21 |     
22 |         one_kernel[int(pos)] = kde(pos, bandwidth, 0)
23 |     
24 |     return one_kernel
25 | 
26 | 
27 | def smooth_kernel_adj(length, minscore):
28 | 
29 |     if length % 2 == 0:
30 | 
31 |         length = length + 1
32 | 
33 |     bandwidth = (length - 1)/6.0
34 | 
35 |     one_kernel = dict()
36 | 
37 |     for pos in linspace(-(length-1)/2, (length-1)/2, length):
38 | 
39 |         one_kernel[int(pos)] = kde(pos, bandwidth, 0)/minscore
40 | 
41 |     return one_kernel
42 | 


--------------------------------------------------------------------------------
/Jazzlib/cEM_zip.pyx:
--------------------------------------------------------------------------------
 1 | def cEM_zip(testdata):
 2 | 
 3 |     cdef float sumzip = sum(testdata)
 4 | 
 5 |     cdef int lengthoflist = len(testdata)
 6 | 
 7 |     cdef float phat = 0.5
 8 | 
 9 |     cdef float phatpre = -1.0
10 | 
11 |     cdef float lhatpre = -1.0
12 | 
13 |     cdef float base
14 | 
15 |     cdef int i = 0
16 | 
17 |     cdef int j = 0
18 | 
19 |     cdef int n
20 | 
21 |     cdef float c
22 | 
23 |     zhat = []
24 | 
25 |     zerolist = []
26 | 
27 |     for i  from 0<=i<lengthoflist:
28 | 
29 |         if testdata[i] == 0:
30 | 
31 |             zerolist.append(i)
32 | 
33 |     for j  from 0<=j<lengthoflist:
34 | 
35 |         zhat.append(0)
36 | 
37 |     lhat = sumzip/lengthoflist
38 | 
39 |     while (i<1000000):
40 | 
41 |         i = i + 1
42 |         base = ((phat + (1-phat) * 2.718281828459045 **(0-lhat)))
43 |         for n in zerolist:
44 | 
45 |             if testdata[n] == 0:
46 | 
47 |                 zhat[n] = phat/base
48 | 
49 |         sumzhat = sum(zhat)
50 | 
51 |         c = (lengthoflist-sumzhat)
52 | 
53 |         lhat = sumzip/c
54 | 
55 |         phat = sumzhat/lengthoflist
56 | 
57 |         if (abs(lhat-lhatpre)<0.000001 and abs(phat-phatpre)<0.000001):
58 | 
59 |             break
60 | 
61 |         else:
62 | 
63 |             phatpre = phat
64 | 
65 |             lhatpre = lhat
66 | 
67 |     return (lhat, phat)
68 | 


--------------------------------------------------------------------------------
/Jazzlib/normalize_ratio.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from .FRegion import *
 3 | import sys
 4 | 
 5 | 
 6 | def normalize_ratio(fregionsdicts):
 7 | 
 8 |     totalreads = dict()
 9 | 
10 |     totalfilterreads = dict()
11 | 
12 |     totalajdreads = dict()
13 | 
14 |     normalizedratio = dict()
15 | 
16 |     for sample in fregionsdicts:
17 | 
18 |         nowFRegion = fregionsdicts[sample]
19 | 
20 |         totalreads[sample] = nowFRegion.totalreads
21 | 
22 |         totalfilterreads[sample] = nowFRegion.filterreadscount
23 | 
24 |         totalajdreads[sample] = totalreads[sample] - totalfilterreads[sample]
25 | 
26 |     mintotal = sorted(totalajdreads.values())[0]
27 | 
28 |     for sample in totalajdreads:
29 | 
30 |         normalizedratio[sample] = totalajdreads[sample] / mintotal
31 | 
32 |     return normalizedratio
33 | 
34 | 
35 | def normalize_ratio_input(fregegion_input, fregion_chip):
36 | 
37 |     inputadjreads = fregegion_input.totalreads - fregegion_input.filterreadscount
38 | 
39 |     chipadjreads = fregion_chip.totalreads - fregion_chip.filterreadscount
40 | 
41 |     ratio = chipadjreads/inputadjreads
42 | 
43 |     return ratio
44 | 
45 | 
46 | def normalize_ratio_input2(fregegion_input, fregion_chip):
47 | 
48 |     inputadjreads = fregegion_input.totalreads - fregegion_input.filterreadscount
49 | 
50 |     chipadjreads = fregion_chip.totalreads - fregion_chip.filterreadscount
51 | 
52 |     ratio = (chipadjreads/fregion_chip.countgenomeuniqlength)/(inputadjreads/fregion_chip.countgenomeuniqlength)
53 | 
54 |     return ratio
55 | 
56 | 


--------------------------------------------------------------------------------
/Jazzlib/normalize_ratio.py.bak:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from FRegion import *
 3 | import sys
 4 | 
 5 | 
 6 | def normalize_ratio(fregionsdicts):
 7 | 
 8 |     totalreads = dict()
 9 | 
10 |     totalfilterreads = dict()
11 | 
12 |     totalajdreads = dict()
13 | 
14 |     normalizedratio = dict()
15 | 
16 |     for sample in fregionsdicts:
17 | 
18 |         nowFRegion = fregionsdicts[sample]
19 | 
20 |         totalreads[sample] = nowFRegion.totalreads
21 | 
22 |         totalfilterreads[sample] = nowFRegion.filterreadscount
23 | 
24 |         totalajdreads[sample] = totalreads[sample] - totalfilterreads[sample]
25 | 
26 |     mintotal = sorted(totalajdreads.values())[0]
27 | 
28 |     for sample in totalajdreads:
29 | 
30 |         normalizedratio[sample] = totalajdreads[sample] / mintotal
31 | 
32 |     return normalizedratio
33 | 
34 | 
35 | def normalize_ratio_input(fregegion_input, fregion_chip):
36 | 
37 |     inputadjreads = fregegion_input.totalreads - fregegion_input.filterreadscount
38 | 
39 |     chipadjreads = fregion_chip.totalreads - fregion_chip.filterreadscount
40 | 
41 |     ratio = chipadjreads/inputadjreads
42 | 
43 |     return ratio
44 | 
45 | 
46 | def normalize_ratio_input2(fregegion_input, fregion_chip):
47 | 
48 |     inputadjreads = fregegion_input.totalreads - fregegion_input.filterreadscount
49 | 
50 |     chipadjreads = fregion_chip.totalreads - fregion_chip.filterreadscount
51 | 
52 |     ratio = (chipadjreads/fregion_chip.countgenomeuniqlength)/(inputadjreads/fregion_chip.countgenomeuniqlength)
53 | 
54 |     return ratio
55 | 
56 | 


--------------------------------------------------------------------------------
/Readme.txt:
--------------------------------------------------------------------------------
 1 | Jazz 
 2 | 
 3 | Non-Histone protein banding site identification
 4 | 
 5 | Jazz Dependncies:
 6 | samtools http://samtools.sourceforge.net
 7 | pysam http://code.google.com/p/pysam/
 8 | scipy http://www.scipy.org
 9 | numpy http://www.numpy.org
10 | 
11 | Author: Tao Zhang @ Yangzhou University
12 | 
13 | Options:
14 |   --version             show program's version number and exit
15 |   -h, --help            show this help message and exit.
16 |   -d DATAFILE, --data=DATAFILE
17 |                         data file, should be sorted bam format
18 |   -c CONTROLFILE, --control=CONTROLFILE
19 |                         control(input) file, should be sorted bam format
20 |   -n SAMPLENAME, --name=SAMPLENAME
21 |                         NH sample name default=NH_sample
22 |   -b BW, --bandwidth=BW
23 |                         kernel smooth band width, should >20, default==600
24 |   -t THRESHOLD, --threshold=THRESHOLD
25 |                         Hot spots threshold, default=4.0
26 |   -l MINLENGTH, --minlength=MINLENGTH
27 |                         minimum length of hot spots, default=50
28 |   -p PVALUE, --pavlue=PVALUE
29 |                         p-value cutoff for peak identification, default=0.01
30 |   -i INITIAL, --initial=INITIAL
31 |                         Peak's initial length, >5 and <minlength, default=20
32 |   --threads=NTHREADS    threads number or cpu number, default=4
33 |   -w, --wig             whether out put wiggle file, default=False
34 |   -f, --fdr             using FDR instead p-value
35 |   -x EXCLUDECHR, --excludechr=EXCLUDECHR
36 |                         Don't count those DHs, example='-x ChrM,ChrC'
37 |   -g, --gff             whether out put gff file, default=False
38 |   -j JOBTYPE, --jobtype=JOBTYPE
39 |                         job type, such as dh, nhpaired or nhsingle
40 |   -m MAXINSERT, --maxinsert=MAXINSERT
41 |                         when you use paired library, please set the maxinsert
42 |                         size
43 |   --pe                  paired-end reads or single-end reads, default=False
44 |                         (single end)


--------------------------------------------------------------------------------
/Jazzlib/kernelsmooth.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from .countreads import *
 3 | from .kernel import *
 4 | import numpy as np
 5 | 
 6 | 
 7 | class KeyboardInterruptError(Exception):
 8 | 
 9 |     pass
10 | 
11 | 
12 | def regionsmooth(bamfile, jobtype, maxinsert, regionchromosome, regionstart, regionend, chr_length):
13 | 
14 |     try:
15 | 
16 |         renewstart = regionstart - maxinsert*2
17 | 
18 |         renewend = regionend + maxinsert*2
19 | 
20 |         if renewstart < 1:
21 | 
22 |             renewstart = 1
23 | 
24 |         if renewend > chr_length:
25 | 
26 |             renewend = chr_length
27 | 
28 |         insertsize_middle_site_count = midsiteinsersizecounter(bamfile=bamfile, regionchromosome=regionchromosome,
29 |                                                                regionstart=renewstart, regionend=renewend,
30 |                                                                jobtype=jobtype, maxinsert=maxinsert)
31 | 
32 | 
33 |         renewlength = renewend - renewstart + 1
34 | 
35 |         smoothed_score = np.repeat(0, renewlength)
36 | 
37 |         for insertlen in insertsize_middle_site_count:
38 | 
39 |             # print ("count size", insertlen)
40 | 
41 |             readcount_nowinsertsize = list()
42 | 
43 |             kernelnow = smooth_kernel(insertlen)
44 | 
45 |             kernel_score = list()
46 | 
47 |             for w in sorted(kernelnow):
48 | 
49 |                 kernel_score.append(kernelnow[w])
50 | 
51 |             for n in range(renewstart, renewend+1):
52 | 
53 |                 nowscore = 0
54 | 
55 |                 if n in insertsize_middle_site_count[insertlen]:
56 | 
57 |                     nowscore = insertsize_middle_site_count[insertlen][n]
58 | 
59 |                 readcount_nowinsertsize.append(nowscore)
60 | 
61 |             nowsmoothed = np.correlate(np.array(readcount_nowinsertsize), kernel_score, "same")
62 | 
63 |             smoothed_score = nowsmoothed + smoothed_score
64 | 
65 |         outputscore = dict()
66 | 
67 |         outputscore['chromosome'] = regionchromosome
68 | 
69 |         outputscore['score'] = dict()
70 | 
71 |         # print (smoothed_score[0])
72 | 
73 |         for j in range(0, renewlength):
74 | 
75 |             nowsite = j + renewstart
76 | 
77 |             nowscore = smoothed_score[j]
78 | 
79 |             if regionstart <= nowsite <= regionend:
80 | 
81 |                 outputscore['score'][nowsite] = nowscore
82 | 
83 |         return outputscore
84 | 
85 |     except KeyboardInterrupt:
86 | 
87 |         raise KeyboardInterruptError()
88 |         sys.exit(0)


--------------------------------------------------------------------------------
/Jazzlib/kernelsmooth.py.bak:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from countreads import *
 3 | from kernel import *
 4 | import numpy as np
 5 | 
 6 | 
 7 | class KeyboardInterruptError(Exception):
 8 | 
 9 |     pass
10 | 
11 | 
12 | def regionsmooth(bamfile, jobtype, maxinsert, regionchromosome, regionstart, regionend, chr_length):
13 | 
14 |     try:
15 | 
16 |         renewstart = regionstart - maxinsert*2
17 | 
18 |         renewend = regionend + maxinsert*2
19 | 
20 |         if renewstart < 1:
21 | 
22 |             renewstart = 1
23 | 
24 |         if renewend > chr_length:
25 | 
26 |             renewend = chr_length
27 | 
28 |         insertsize_middle_site_count = midsiteinsersizecounter(bamfile=bamfile, regionchromosome=regionchromosome,
29 |                                                                regionstart=renewstart, regionend=renewend,
30 |                                                                jobtype=jobtype, maxinsert=maxinsert)
31 | 
32 | 
33 |         renewlength = renewend - renewstart + 1
34 | 
35 |         smoothed_score = np.repeat(0, renewlength)
36 | 
37 |         for insertlen in insertsize_middle_site_count:
38 | 
39 |             # print ("count size", insertlen)
40 | 
41 |             readcount_nowinsertsize = list()
42 | 
43 |             kernelnow = smooth_kernel(insertlen)
44 | 
45 |             kernel_score = list()
46 | 
47 |             for w in sorted(kernelnow):
48 | 
49 |                 kernel_score.append(kernelnow[w])
50 | 
51 |             for n in range(renewstart, renewend+1):
52 | 
53 |                 nowscore = 0
54 | 
55 |                 if n in insertsize_middle_site_count[insertlen]:
56 | 
57 |                     nowscore = insertsize_middle_site_count[insertlen][n]
58 | 
59 |                 readcount_nowinsertsize.append(nowscore)
60 | 
61 |             nowsmoothed = np.correlate(np.array(readcount_nowinsertsize), kernel_score, "same")
62 | 
63 |             smoothed_score = nowsmoothed + smoothed_score
64 | 
65 |         outputscore = dict()
66 | 
67 |         outputscore['chromosome'] = regionchromosome
68 | 
69 |         outputscore['score'] = dict()
70 | 
71 |         # print (smoothed_score[0])
72 | 
73 |         for j in range(0, renewlength):
74 | 
75 |             nowsite = j + renewstart
76 | 
77 |             nowscore = smoothed_score[j]
78 | 
79 |             if regionstart <= nowsite <= regionend:
80 | 
81 |                 outputscore['score'][nowsite] = nowscore
82 | 
83 |         return outputscore
84 | 
85 |     except KeyboardInterrupt:
86 | 
87 |         raise KeyboardInterruptError()
88 |         sys.exit(0)


--------------------------------------------------------------------------------
/Jazzlib/region.py:
--------------------------------------------------------------------------------
  1 | def effectregion(chrlength, windowsize, bw):
  2 | 
  3 |     """
  4 |         count effect region
  5 |         ===================--
  6 |                         --=================--
  7 | 
  8 |     """
  9 |     scare = int(chrlength/windowsize)
 10 | 
 11 |     efregions = dict()
 12 | 
 13 |     for i in range(0, scare+1):
 14 |         efregions[i] = dict()
 15 |         if i == 0:
 16 | 
 17 |             efregions[i]['ctstart'] = 1
 18 |             efregions[i]['ctend'] = int(windowsize + 1.5 * bw)
 19 |             efregions[i]['efstart'] = 1
 20 |             efregions[i]['efend'] = int(windowsize)
 21 |         elif i == scare:
 22 | 
 23 |             efregions[i]['ctstart'] = int(i * windowsize - 1.5 * bw)
 24 |             efregions[i]['ctend'] = int(chrlength)
 25 |             efregions[i]['efstart'] = int(i * windowsize + 1)
 26 |             efregions[i]['efend'] = int(chrlength)
 27 | 
 28 |         else:
 29 | 
 30 |             efregions[i]['ctstart'] = int(i * windowsize - 1.5 * bw)
 31 |             efregions[i]['ctend'] = int((i + 1) * windowsize + 1.5 * bw)
 32 |             efregions[i]['efstart'] = int(i * windowsize + 1)
 33 |             efregions[i]['efend'] = int((i + 1) * windowsize)
 34 | 
 35 |     return efregions
 36 | 
 37 | 
 38 | def continueregion(points, minlength=2):
 39 | 
 40 |     try:
 41 | 
 42 |         points.sort()
 43 | 
 44 |         start_index = 0
 45 | 
 46 |         end_index = 0
 47 | 
 48 |         continue_region = list()
 49 | 
 50 |         for index_now in range(1, len(points)):
 51 | 
 52 |             pre_index = index_now - 1
 53 | 
 54 |             if points[pre_index] + 1 == points[index_now]:
 55 | 
 56 |                 if index_now == len(points) -1:
 57 | 
 58 |                     if points[index_now] - points[start_index] + 1>= minlength :
 59 |                         #print (points[start_index], points[index_now])
 60 |                         region_now = dict()
 61 |                         region_now['start_site'] = points[start_index]
 62 |                         region_now['end_site'] = points[index_now]
 63 |                         continue_region.append(region_now)
 64 | 
 65 |                 else:
 66 | 
 67 |                     end_index = index_now
 68 | 
 69 |             else:
 70 | 
 71 |                 if points[end_index] - points[start_index] + 1 >= minlength :
 72 | 
 73 |                     #print (points[start_index], points[end_index])
 74 |                     region_now = dict()
 75 |                     region_now['start_site'] = points[start_index]
 76 |                     region_now['end_site'] = points[end_index]
 77 |                     continue_region.append(region_now)
 78 | 
 79 |                 start_index = index_now
 80 | 
 81 |                 end_index = index_now
 82 | 
 83 |         return continue_region
 84 | 
 85 |     except Exception as e:
 86 | 
 87 |         print(('got exception in Jazzlib.region.continueregion: %r, terminating the pool' % (e,)))
 88 | 
 89 | 
 90 | def windowregion(chr_length, site, windowsize, chromsome):
 91 | 
 92 |     windowstart = site - int(windowsize/2)
 93 | 
 94 |     windowend = site + int(windowsize/2)
 95 | 
 96 |     if windowstart < 1:
 97 | 
 98 |         windowstart = 1
 99 | 
100 |     if windowend > chr_length:
101 | 
102 |         windowend = chr_length
103 | 
104 |     windowregion = chromsome+":"+str(windowstart)+'-'+str(windowend)
105 | 
106 |     return windowregion
107 | 
108 | 
109 | if __name__ == "__main__":
110 | 
111 |     try:
112 | 
113 |         relist = [1,2,3,4,7,9,10]
114 | 
115 |         creg = continueregion(relist, 1)
116 | 
117 |         print (creg)
118 | 
119 |     except:
120 | 
121 |         pass
122 | 
123 | 
124 | 


--------------------------------------------------------------------------------
/Jazzlib/region.py.bak:
--------------------------------------------------------------------------------
  1 | def effectregion(chrlength, windowsize, bw):
  2 | 
  3 |     """
  4 |         count effect region
  5 |         ===================--
  6 |                         --=================--
  7 | 
  8 |     """
  9 |     scare = int(chrlength/windowsize)
 10 | 
 11 |     efregions = dict()
 12 | 
 13 |     for i in range(0, scare+1):
 14 |         efregions[i] = dict()
 15 |         if i == 0:
 16 | 
 17 |             efregions[i]['ctstart'] = 1
 18 |             efregions[i]['ctend'] = int(windowsize + 1.5 * bw)
 19 |             efregions[i]['efstart'] = 1
 20 |             efregions[i]['efend'] = int(windowsize)
 21 |         elif i == scare:
 22 | 
 23 |             efregions[i]['ctstart'] = int(i * windowsize - 1.5 * bw)
 24 |             efregions[i]['ctend'] = int(chrlength)
 25 |             efregions[i]['efstart'] = int(i * windowsize + 1)
 26 |             efregions[i]['efend'] = int(chrlength)
 27 | 
 28 |         else:
 29 | 
 30 |             efregions[i]['ctstart'] = int(i * windowsize - 1.5 * bw)
 31 |             efregions[i]['ctend'] = int((i + 1) * windowsize + 1.5 * bw)
 32 |             efregions[i]['efstart'] = int(i * windowsize + 1)
 33 |             efregions[i]['efend'] = int((i + 1) * windowsize)
 34 | 
 35 |     return efregions
 36 | 
 37 | 
 38 | def continueregion(points, minlength=2):
 39 | 
 40 |     try:
 41 | 
 42 |         points.sort()
 43 | 
 44 |         start_index = 0
 45 | 
 46 |         end_index = 0
 47 | 
 48 |         continue_region = list()
 49 | 
 50 |         for index_now in range(1, len(points)):
 51 | 
 52 |             pre_index = index_now - 1
 53 | 
 54 |             if points[pre_index] + 1 == points[index_now]:
 55 | 
 56 |                 if index_now == len(points) -1:
 57 | 
 58 |                     if points[index_now] - points[start_index] + 1>= minlength :
 59 |                         #print (points[start_index], points[index_now])
 60 |                         region_now = dict()
 61 |                         region_now['start_site'] = points[start_index]
 62 |                         region_now['end_site'] = points[index_now]
 63 |                         continue_region.append(region_now)
 64 | 
 65 |                 else:
 66 | 
 67 |                     end_index = index_now
 68 | 
 69 |             else:
 70 | 
 71 |                 if points[end_index] - points[start_index] + 1 >= minlength :
 72 | 
 73 |                     #print (points[start_index], points[end_index])
 74 |                     region_now = dict()
 75 |                     region_now['start_site'] = points[start_index]
 76 |                     region_now['end_site'] = points[end_index]
 77 |                     continue_region.append(region_now)
 78 | 
 79 |                 start_index = index_now
 80 | 
 81 |                 end_index = index_now
 82 | 
 83 |         return continue_region
 84 | 
 85 |     except Exception, e:
 86 | 
 87 |         print ('got exception in Jazzlib.region.continueregion: %r, terminating the pool' % (e,))
 88 | 
 89 | 
 90 | def windowregion(chr_length, site, windowsize, chromsome):
 91 | 
 92 |     windowstart = site - int(windowsize/2)
 93 | 
 94 |     windowend = site + int(windowsize/2)
 95 | 
 96 |     if windowstart < 1:
 97 | 
 98 |         windowstart = 1
 99 | 
100 |     if windowend > chr_length:
101 | 
102 |         windowend = chr_length
103 | 
104 |     windowregion = chromsome+":"+str(windowstart)+'-'+str(windowend)
105 | 
106 |     return windowregion
107 | 
108 | 
109 | if __name__ == "__main__":
110 | 
111 |     try:
112 | 
113 |         relist = [1,2,3,4,7,9,10]
114 | 
115 |         creg = continueregion(relist, 1)
116 | 
117 |         print (creg)
118 | 
119 |     except:
120 | 
121 |         pass
122 | 
123 | 
124 | 


--------------------------------------------------------------------------------
/Jazzlib/jazzio.py.bak:
--------------------------------------------------------------------------------
  1 | import io
  2 | from Peak import *
  3 | from Hotspot import *
  4 | from Peak import *
  5 | from FRegion import *
  6 | 
  7 | def peakbedswriter(samplename, peaks):
  8 | 
  9 |     bedfilename =samplename+ '_' + 'peak' + ".bed"
 10 | 
 11 |     open_bed = io.FileIO(bedfilename, 'w')
 12 | 
 13 |     for peak in peaks:
 14 | 
 15 |         #bedlist = [str(hotspot.chromosome), str(hotspot.start), str(hotspot.end), hotspot.hotspotid]
 16 |         bedlist = [str(peak.chromosome), str(peak.start), str(peak.end),str(peak.peakid),str(peak.score)]
 17 | 
 18 |         linker = "\t"
 19 | 
 20 |         outstring = linker.join(bedlist) + "\n"
 21 | 
 22 |         open_bed.write(outstring)
 23 | 
 24 |     open_bed.close()
 25 | 
 26 | 
 27 | def peakbedgraphswriter(samplename, peaks):
 28 | 
 29 |     bedfilename =samplename+ '_' + 'peak' + ".bedgraph"
 30 | 
 31 |     open_bed = io.FileIO(bedfilename, 'w')
 32 | 
 33 |     for peak in peaks:
 34 | 
 35 |         #bedlist = [str(hotspot.chromosome), str(hotspot.start), str(hotspot.end), hotspot.hotspotid]
 36 |         bedlist = [str(peak.chromosome), str(peak.start), str(peak.end),str(peak.score)]
 37 | 
 38 |         linker = "\t"
 39 | 
 40 |         outstring = linker.join(bedlist) + "\n"
 41 | 
 42 |         open_bed.write(outstring)
 43 | 
 44 |     open_bed.close()
 45 | 
 46 | 
 47 | def hotspotsbedswriter(samplename, hotspots):
 48 | 
 49 |     bedfilename =samplename+ '_' + 'hotspots' + ".bed"
 50 | 
 51 |     open_bed = io.FileIO(bedfilename, 'w')
 52 | 
 53 |     for hotspot in hotspots:
 54 | 
 55 |         #bedlist = [str(hotspot.chromosome), str(hotspot.start), str(hotspot.end), hotspot.hotspotid]
 56 |         bedlist = [str(hotspot.chromosome), str(hotspot.start), str(hotspot.end)]
 57 | 
 58 |         linker = "\t"
 59 | 
 60 |         outstring = linker.join(bedlist) + "\n"
 61 | 
 62 |         open_bed.write(outstring)
 63 | 
 64 |     open_bed.close()
 65 | 
 66 | 
 67 | def hotpeakbedswriter2(samplename, hotspots):
 68 | 
 69 |     bedfilename =samplename+ '_' + 'peaks' + ".bed"
 70 | 
 71 |     open_bed = io.FileIO(bedfilename, 'w')
 72 | 
 73 |     for hotspot in hotspots:
 74 | 
 75 |         for peak in hotspot.peaks:
 76 | 
 77 |             bedlist = [str(peak.chromosome), str(peak.start), str(peak.end),str(peak.peakid),str(peak.score)]
 78 | 
 79 |             linker = "\t"
 80 | 
 81 |             outstring = linker.join(bedlist) + "\n"
 82 | 
 83 |             open_bed.write(outstring)
 84 | 
 85 |     open_bed.close()
 86 | 
 87 | 
 88 | def jazzgffout(samplename, hotspots, peaks, fregion):
 89 | 
 90 |     bedfilename =samplename+ '_' + 'peaks_hotspots' + ".gff3"
 91 | 
 92 |     open_bed = io.FileIO(bedfilename, 'w')
 93 |     linker = "\t"
 94 | 
 95 |     frsite = dict()
 96 | 
 97 |     for fr in fregion.filted_region:
 98 | 
 99 |         (frchrnow,frstartend) = fr.split(":")
100 | 
101 |         (frstart,frend) = frstartend.split("-")
102 | 
103 |         for sitenow in range(int(frstart), int(frend)+1):
104 | 
105 |             if frchrnow in frsite:
106 | 
107 |                 frsite[frchrnow][sitenow] = 1
108 | 
109 |             else:
110 | 
111 |                 frsite[frchrnow] = dict()
112 | 
113 |                 frsite[frchrnow][sitenow] = 1
114 | 
115 |     hotspotsinfr = dict()
116 | 
117 |     for hotspot in hotspots:
118 | 
119 | 
120 | 
121 |         if hotspot.chromosome in frsite:
122 | 
123 |             for nowsite in range(hotspot.start, hotspot.end+1):
124 | 
125 |                 if nowsite in frsite[frchrnow]:
126 | 
127 |                     hotspotanno = "ID="+str(hotspot.hotspotid)+";anno=FREGION"
128 | 
129 |                     hotspotsinfr[hotspot.hotspotid] = 1
130 | 
131 |                 else:
132 | 
133 |                     hotspotanno = "ID="+str(hotspot.hotspotid)
134 | 
135 |         else:
136 | 
137 |             hotspotanno = "ID="+str(hotspot.hotspotid)
138 | 
139 |         hotspotsstr = [str(hotspot.chromosome), "JAZZ", "gene", str(hotspot.start), str(hotspot.end),
140 |                         '.', '.', '.',hotspotanno
141 |                       ]
142 | 
143 |         hotspotstring = linker.join(hotspotsstr) + "\n"
144 | 
145 |         open_bed.write(hotspotstring)
146 | 
147 | 
148 |     for peak in peaks:
149 | 
150 |         # peakanno = "Parent="+str(peak.parent)+";"+"ID="+str(peak.peakid)
151 | 
152 |         if peak.parent in hotspotsinfr:
153 |             peakanno = "Parent="+str(peak.parent)+";"+"ID="+str(peak.peakid)+";anno=FREGION"
154 |         else:
155 |             peakanno = "Parent="+str(peak.parent)+";"+"ID="+str(peak.peakid)
156 | 
157 |         peakstr = [str(peak.chromosome), "JAZZ", "CDS", str(peak.start), str(peak.end),
158 |                                 '.', '.', '.',peakanno]
159 | 
160 |         peakstring = linker.join(peakstr)+"\n"
161 | 
162 |         open_bed.write(peakstring)
163 | 
164 | 
165 |     open_bed.close()
166 | 


--------------------------------------------------------------------------------
/Jazzlib/jazzio.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | from .Peak import *
  3 | from .Hotspot import *
  4 | from .Peak import *
  5 | from .FRegion import *
  6 | 
  7 | def peakbedswriter(samplename, peaks):
  8 | 
  9 |     bedfilename =samplename+ '_' + 'peak' + ".bed"
 10 | 
 11 |     open_bed = io.FileIO(bedfilename, 'w')
 12 | 
 13 |     for peak in peaks:
 14 | 
 15 |         #bedlist = [str(hotspot.chromosome), str(hotspot.start), str(hotspot.end), hotspot.hotspotid]
 16 |         bedlist = [str(peak.chromosome), str(peak.start), str(peak.end),str(peak.peakid),str(peak.score)]
 17 | 
 18 |         linker = "\t"
 19 | 
 20 |         outstring = linker.join(bedlist) + "\n"
 21 | 
 22 |         open_bed.write(bytes(outstring, encoding = 'utf-8'))
 23 | 
 24 |     open_bed.close()
 25 | 
 26 | 
 27 | def peakbedgraphswriter(samplename, peaks):
 28 | 
 29 |     bedfilename =samplename+ '_' + 'peak' + ".bedgraph"
 30 | 
 31 |     open_bed = io.FileIO(bedfilename, 'w')
 32 | 
 33 |     for peak in peaks:
 34 | 
 35 |         #bedlist = [str(hotspot.chromosome), str(hotspot.start), str(hotspot.end), hotspot.hotspotid]
 36 |         bedlist = [str(peak.chromosome), str(peak.start), str(peak.end),str(peak.score)]
 37 | 
 38 |         linker = "\t"
 39 | 
 40 |         outstring = linker.join(bedlist) + "\n"
 41 | 
 42 |         open_bed.write(bytes(outstring, encoding = 'utf-8'))
 43 | 
 44 |     open_bed.close()
 45 | 
 46 | 
 47 | def hotspotsbedswriter(samplename, hotspots):
 48 | 
 49 |     bedfilename =samplename+ '_' + 'hotspots' + ".bed"
 50 | 
 51 |     open_bed = io.FileIO(bedfilename, 'w')
 52 | 
 53 |     for hotspot in hotspots:
 54 | 
 55 |         #bedlist = [str(hotspot.chromosome), str(hotspot.start), str(hotspot.end), hotspot.hotspotid]
 56 |         bedlist = [str(hotspot.chromosome), str(hotspot.start), str(hotspot.end)]
 57 | 
 58 |         linker = "\t"
 59 | 
 60 |         outstring = linker.join(bedlist) + "\n"
 61 | 
 62 |         open_bed.write(bytes(outstring, encoding = 'utf-8'))
 63 | 
 64 |     open_bed.close()
 65 | 
 66 | 
 67 | def hotpeakbedswriter2(samplename, hotspots):
 68 | 
 69 |     bedfilename =samplename+ '_' + 'peaks' + ".bed"
 70 | 
 71 |     open_bed = io.FileIO(bedfilename, 'w')
 72 | 
 73 |     for hotspot in hotspots:
 74 | 
 75 |         for peak in hotspot.peaks:
 76 | 
 77 |             bedlist = [str(peak.chromosome), str(peak.start), str(peak.end),str(peak.peakid),str(peak.score)]
 78 | 
 79 |             linker = "\t"
 80 | 
 81 |             outstring = linker.join(bedlist) + "\n"
 82 | 
 83 |             open_bed.write(bytes(outstring, encoding = 'utf-8'))
 84 | 
 85 |     open_bed.close()
 86 | 
 87 | 
 88 | def jazzgffout(samplename, hotspots, peaks, fregion):
 89 | 
 90 |     bedfilename =samplename+ '_' + 'peaks_hotspots' + ".gff3"
 91 | 
 92 |     open_bed = io.FileIO(bedfilename, 'w')
 93 |     linker = "\t"
 94 | 
 95 |     frsite = dict()
 96 | 
 97 |     for fr in fregion.filted_region:
 98 | 
 99 |         (frchrnow,frstartend) = fr.split(":")
100 | 
101 |         (frstart,frend) = frstartend.split("-")
102 | 
103 |         for sitenow in range(int(frstart), int(frend)+1):
104 | 
105 |             if frchrnow in frsite:
106 | 
107 |                 frsite[frchrnow][sitenow] = 1
108 | 
109 |             else:
110 | 
111 |                 frsite[frchrnow] = dict()
112 | 
113 |                 frsite[frchrnow][sitenow] = 1
114 | 
115 |     hotspotsinfr = dict()
116 | 
117 |     for hotspot in hotspots:
118 | 
119 | 
120 | 
121 |         if hotspot.chromosome in frsite:
122 | 
123 |             for nowsite in range(hotspot.start, hotspot.end+1):
124 | 
125 |                 if nowsite in frsite[frchrnow]:
126 | 
127 |                     hotspotanno = "ID="+str(hotspot.hotspotid)+";anno=FREGION"
128 | 
129 |                     hotspotsinfr[hotspot.hotspotid] = 1
130 | 
131 |                 else:
132 | 
133 |                     hotspotanno = "ID="+str(hotspot.hotspotid)
134 | 
135 |         else:
136 | 
137 |             hotspotanno = "ID="+str(hotspot.hotspotid)
138 | 
139 |         hotspotsstr = [str(hotspot.chromosome), "JAZZ", "gene", str(hotspot.start), str(hotspot.end),
140 |                         '.', '.', '.',hotspotanno
141 |                       ]
142 | 
143 |         hotspotstring = linker.join(hotspotsstr) + "\n"
144 | 
145 |         # open_bed.write(hotspotstring)
146 |         open_bed.write(bytes(hotspotstring, encoding='utf-8'))
147 | 
148 |     for peak in peaks:
149 | 
150 |         # peakanno = "Parent="+str(peak.parent)+";"+"ID="+str(peak.peakid)
151 | 
152 |         if peak.parent in hotspotsinfr:
153 |             peakanno = "Parent="+str(peak.parent)+";"+"ID="+str(peak.peakid)+";anno=FREGION"
154 |         else:
155 |             peakanno = "Parent="+str(peak.parent)+";"+"ID="+str(peak.peakid)
156 | 
157 |         peakstr = [str(peak.chromosome), "JAZZ", "CDS", str(peak.start), str(peak.end),
158 |                                 '.', '.', '.',peakanno]
159 | 
160 |         peakstring = linker.join(peakstr)+"\n"
161 | 
162 |         #open_bed.write(peakstring)
163 |         open_bed.write(bytes(peakstring, encoding='utf-8'))
164 | 
165 | 
166 |     open_bed.close()
167 | 


--------------------------------------------------------------------------------
/Jazzlib/randombg.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | from multiprocessing import Pool
  4 | from .FRegion import *
  5 | import random as rnd
  6 | from .kernel import *
  7 | from numpy import *
  8 | 
  9 | class KeyboardInterruptError(Exception):
 10 | 
 11 |     pass
 12 | 
 13 | 
 14 | def randombg2(fregion, nthreads, maxinsert, randomthreshold=2, runtime=1000, randomwindow=10000):
 15 | 
 16 |     countgenomelength = fregion.countgenomelength
 17 | 
 18 |     adjreads = fregion.adjreads
 19 | 
 20 |     bg = adjreads/countgenomelength
 21 | 
 22 |     return bg
 23 | 
 24 | 
 25 | def randombg(fregion, nthreads, maxinsert, randomthreshold=2, runtime=1000, randomwindow=10000):
 26 | 
 27 |     pool = Pool(nthreads)
 28 | 
 29 |     try:
 30 | 
 31 | 
 32 | 
 33 |         countgenomeuniqlength = fregion.countgenomeuniqlength
 34 | 
 35 |         adjreads = fregion.adjreads
 36 | 
 37 |         countgenomelength = fregion.countgenomelength
 38 | 
 39 |         uniqrate = countgenomeuniqlength/countgenomelength
 40 | 
 41 |         if uniqrate <0.5:
 42 | 
 43 |             uniqrate = uniqrate * 2
 44 | 
 45 |         countreads = int(adjreads/countgenomeuniqlength * randomwindow)+1
 46 | 
 47 |         onekernel = smooth_kernel(length=maxinsert)
 48 | 
 49 |         kernel_score = list()
 50 | 
 51 |         pars = list()
 52 | 
 53 |         for i in sorted(onekernel):
 54 | 
 55 |             kernel_score.append(onekernel[i])
 56 | 
 57 |         for j in range(runtime):
 58 | 
 59 |             par = dict()
 60 | 
 61 |             par['countreads'] = countreads
 62 | 
 63 |             par['kernel_score'] = kernel_score
 64 | 
 65 |             par['uniqrate'] = uniqrate
 66 | 
 67 |             par['randomwindo'] = randomwindow
 68 | 
 69 |             par['randomthreshold'] =randomthreshold
 70 | 
 71 |             # print (par)
 72 | 
 73 |             pars.append(par)
 74 | 
 75 |         randths = pool.map(sim_bg_worker, pars)
 76 | 
 77 |         thsum = 0
 78 | 
 79 |         for randth in randths:
 80 | 
 81 |             thsum = thsum + randth
 82 | 
 83 |         random_th = thsum/runtime
 84 | 
 85 |         pool.close()
 86 | 
 87 |         return random_th
 88 | 
 89 |     except KeyboardInterrupt:
 90 | 
 91 |         pool.terminate()
 92 | 
 93 |         print ("You cancelled the program!")
 94 | 
 95 |         sys.exit(1)
 96 | 
 97 |     except Exception as e:
 98 | 
 99 |         print ('got exception in Jazzlib.randombg.randombg: %r, terminating the pool' % (e,))
100 | 
101 |         pool.terminate()
102 | 
103 |         print ('pool is terminated')
104 | 
105 |     finally:
106 |         # print ('joining pool processes')
107 |         pool.join()
108 | 
109 | 
110 | def sim_bg_worker(par):
111 | 
112 |     try:
113 | 
114 |         countreads = par['countreads']
115 | 
116 |         kernel_score = par['kernel_score']
117 | 
118 |         uniqrate = par['uniqrate']
119 | 
120 |         randomwindow = par['randomwindo']
121 | 
122 |         randomthreshold = par['randomthreshold']
123 | 
124 |         totaluniqsite = int(uniqrate * randomwindow)
125 | 
126 |         rand_reads_count = list()
127 | 
128 |         region_site = list(range(0, randomwindow))
129 | 
130 |         for i in range(0, randomwindow):
131 | 
132 |             rand_reads_count.append(0)
133 | 
134 |         sim_uniqsite = rnd.sample(region_site, totaluniqsite)
135 | 
136 | 
137 |         for k in range(0, countreads):
138 | 
139 |             rand_number = int(rnd.uniform(0, totaluniqsite))
140 | 
141 |             rand_reads = sim_uniqsite[rand_number]
142 | 
143 |             # print (rand_number, rand_reads)
144 | 
145 |             rand_reads_count[rand_reads] = rand_reads_count[rand_reads] + 1
146 | 
147 |         smoothed_result = correlate(array(rand_reads_count), kernel_score, "same")
148 | 
149 |         # scores = list()
150 | 
151 |         rand_mean = smoothed_result.mean()
152 | 
153 |         rand_std = smoothed_result.std()
154 | 
155 |         # total_sum = smoothed_result.sum()
156 |         # print (rand_mean, rand_std, randomthreshold)
157 | 
158 |         rand_threshhold = rand_mean + randomthreshold * rand_std
159 | 
160 |         return rand_threshhold
161 | 
162 | 
163 |     except KeyboardInterrupt:
164 | 
165 |         print ("You cancelled the program!")
166 | 
167 |         sys.exit(1)
168 | 
169 |     except Exception as e:
170 | 
171 |         print ('got exception in Jazzlib.randombg.sim_bg_worker: %r, terminating the pool' % (e,))
172 | 
173 | 
174 | if __name__ == "__main__":
175 |     try:
176 | 
177 |         onekernel = smooth_kernel(length=100)
178 | 
179 |         kernel_score = list()
180 | 
181 |         pars = list()
182 | 
183 |         for i in sorted(onekernel):
184 | 
185 |             kernel_score.append(onekernel[i])
186 | 
187 |         par = dict()
188 | 
189 |         par['countreads'] = 100000
190 | 
191 |         par['kernel_score'] = kernel_score
192 | 
193 |         par['uniqrate'] = 0.3
194 | 
195 |         par['randomwindo'] = int(1e5)
196 | 
197 |         par['randomthreshold'] = 3
198 | 
199 |         th = sim_bg_worker(par)
200 | 
201 |         print (th)
202 | 
203 |     except KeyboardInterrupt:
204 |         sys.stderr.write("User interrupt\n")
205 |         sys.exit(0)


--------------------------------------------------------------------------------
/Jazzlib/randombg.py.bak:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from multiprocessing import Pool
  4 | from FRegion import *
  5 | import random as rnd
  6 | from kernel import *
  7 | from numpy import *
  8 | 
  9 | class KeyboardInterruptError(Exception):
 10 | 
 11 |     pass
 12 | 
 13 | 
 14 | def randombg2(fregion, nthreads, maxinsert, randomthreshold=2, runtime=1000, randomwindow=10000):
 15 | 
 16 |     countgenomelength = fregion.countgenomelength
 17 | 
 18 |     adjreads = fregion.adjreads
 19 | 
 20 |     bg = adjreads/countgenomelength
 21 | 
 22 |     return bg
 23 | 
 24 | 
 25 | def randombg(fregion, nthreads, maxinsert, randomthreshold=2, runtime=1000, randomwindow=10000):
 26 | 
 27 |     pool = Pool(nthreads)
 28 | 
 29 |     try:
 30 | 
 31 | 
 32 | 
 33 |         countgenomeuniqlength = fregion.countgenomeuniqlength
 34 | 
 35 |         adjreads = fregion.adjreads
 36 | 
 37 |         countgenomelength = fregion.countgenomelength
 38 | 
 39 |         uniqrate = countgenomeuniqlength/countgenomelength
 40 | 
 41 |         if uniqrate <0.5:
 42 | 
 43 |             uniqrate = uniqrate * 2
 44 | 
 45 |         countreads = int(adjreads/countgenomeuniqlength * randomwindow)+1
 46 | 
 47 |         onekernel = smooth_kernel(length=maxinsert)
 48 | 
 49 |         kernel_score = list()
 50 | 
 51 |         pars = list()
 52 | 
 53 |         for i in sorted(onekernel):
 54 | 
 55 |             kernel_score.append(onekernel[i])
 56 | 
 57 |         for j in range(runtime):
 58 | 
 59 |             par = dict()
 60 | 
 61 |             par['countreads'] = countreads
 62 | 
 63 |             par['kernel_score'] = kernel_score
 64 | 
 65 |             par['uniqrate'] = uniqrate
 66 | 
 67 |             par['randomwindo'] = randomwindow
 68 | 
 69 |             par['randomthreshold'] =randomthreshold
 70 | 
 71 |             # print (par)
 72 | 
 73 |             pars.append(par)
 74 | 
 75 |         randths = pool.map(sim_bg_worker, pars)
 76 | 
 77 |         thsum = 0
 78 | 
 79 |         for randth in randths:
 80 | 
 81 |             thsum = thsum + randth
 82 | 
 83 |         random_th = thsum/runtime
 84 | 
 85 |         pool.close()
 86 | 
 87 |         return random_th
 88 | 
 89 |     except KeyboardInterrupt:
 90 | 
 91 |         pool.terminate()
 92 | 
 93 |         print ("You cancelled the program!")
 94 | 
 95 |         sys.exit(1)
 96 | 
 97 |     except Exception, e:
 98 | 
 99 |         print ('got exception in Jazzlib.randombg.randombg: %r, terminating the pool' % (e,))
100 | 
101 |         pool.terminate()
102 | 
103 |         print ('pool is terminated')
104 | 
105 |     finally:
106 |         # print ('joining pool processes')
107 |         pool.join()
108 | 
109 | 
110 | def sim_bg_worker(par):
111 | 
112 |     try:
113 | 
114 |         countreads = par['countreads']
115 | 
116 |         kernel_score = par['kernel_score']
117 | 
118 |         uniqrate = par['uniqrate']
119 | 
120 |         randomwindow = par['randomwindo']
121 | 
122 |         randomthreshold = par['randomthreshold']
123 | 
124 |         totaluniqsite = int(uniqrate * randomwindow)
125 | 
126 |         rand_reads_count = list()
127 | 
128 |         region_site = range(0, randomwindow)
129 | 
130 |         for i in range(0, randomwindow):
131 | 
132 |             rand_reads_count.append(0)
133 | 
134 |         sim_uniqsite = rnd.sample(region_site, totaluniqsite)
135 | 
136 | 
137 |         for k in range(0, countreads):
138 | 
139 |             rand_number = int(rnd.uniform(0, totaluniqsite))
140 | 
141 |             rand_reads = sim_uniqsite[rand_number]
142 | 
143 |             # print (rand_number, rand_reads)
144 | 
145 |             rand_reads_count[rand_reads] = rand_reads_count[rand_reads] + 1
146 | 
147 |         smoothed_result = correlate(array(rand_reads_count), kernel_score, "same")
148 | 
149 |         # scores = list()
150 | 
151 |         rand_mean = smoothed_result.mean()
152 | 
153 |         rand_std = smoothed_result.std()
154 | 
155 |         # total_sum = smoothed_result.sum()
156 |         # print (rand_mean, rand_std, randomthreshold)
157 | 
158 |         rand_threshhold = rand_mean + randomthreshold * rand_std
159 | 
160 |         return rand_threshhold
161 | 
162 | 
163 |     except KeyboardInterrupt:
164 | 
165 |         print ("You cancelled the program!")
166 | 
167 |         sys.exit(1)
168 | 
169 |     except Exception, e:
170 | 
171 |         print ('got exception in Jazzlib.randombg.sim_bg_worker: %r, terminating the pool' % (e,))
172 | 
173 | 
174 | if __name__ == "__main__":
175 |     try:
176 | 
177 |         onekernel = smooth_kernel(length=100)
178 | 
179 |         kernel_score = list()
180 | 
181 |         pars = list()
182 | 
183 |         for i in sorted(onekernel):
184 | 
185 |             kernel_score.append(onekernel[i])
186 | 
187 |         par = dict()
188 | 
189 |         par['countreads'] = 100000
190 | 
191 |         par['kernel_score'] = kernel_score
192 | 
193 |         par['uniqrate'] = 0.3
194 | 
195 |         par['randomwindo'] = int(1e5)
196 | 
197 |         par['randomthreshold'] = 3
198 | 
199 |         th = sim_bg_worker(par)
200 | 
201 |         print (th)
202 | 
203 |     except KeyboardInterrupt:
204 |         sys.stderr.write("User interrupt\n")
205 |         sys.exit(0)


--------------------------------------------------------------------------------
/Jazzlib/localmax.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | from .kernelsmooth import *
  4 | from multiprocessing import Pool
  5 | from .kernel import *
  6 | from .FRegion import *
  7 | 
  8 | 
  9 | class KeyboardInterruptError(Exception):
 10 | 
 11 |     pass
 12 | 
 13 | 
 14 | def get_all_localmax(bamfile, jobtype, maxinsert, nthreads, fregion, countchr, rndth):
 15 | 
 16 |     pool = Pool(nthreads)
 17 | 
 18 |     try:
 19 | 
 20 |         pars = list()
 21 | 
 22 |         windowsize = 100000
 23 | 
 24 |         adjreads = fregion.adjreads
 25 | 
 26 |         totallength = 0
 27 | 
 28 |         onesmoothkernel = smooth_kernel(30)
 29 | 
 30 |         kermax = max(onesmoothkernel.values())
 31 |         #
 32 | 
 33 | 
 34 |         for chromosmoe in countchr:
 35 | 
 36 |             chr_length = fregion.chrs_length[chromosmoe]
 37 | 
 38 |             totallength = totallength + chr_length
 39 | 
 40 |             for scare in range(0, int(chr_length/windowsize)+1):
 41 | 
 42 |                 nowstart = scare*windowsize + 1 -200
 43 | 
 44 |                 nowend = (scare+1)*windowsize + 200
 45 | 
 46 |                 if nowend > chr_length:
 47 | 
 48 |                     nowend = chr_length
 49 | 
 50 |                 if nowstart < 1:
 51 | 
 52 |                     nowstart = 1
 53 | 
 54 |                 nowregion = chromosmoe + ":" + str(nowstart) + "-" + str(nowend)
 55 | 
 56 |                 par = dict()
 57 | 
 58 |                 par['region'] = nowregion
 59 | 
 60 |                 par['maxinsert'] = maxinsert
 61 | 
 62 |                 par['bamfile'] = bamfile
 63 | 
 64 |                 par['jobtype'] = jobtype
 65 | 
 66 |                 par['chrlength'] = chr_length
 67 | 
 68 |                 par['regionchromosome'] = chromosmoe
 69 | 
 70 |                 par['regionstart'] = nowstart
 71 | 
 72 |                 par['regionend'] = nowend
 73 | 
 74 |                 par['rndth'] = rndth
 75 | 
 76 |                 pars.append(par)
 77 | 
 78 |         avgcount = adjreads/totallength
 79 | 
 80 |         threshhold = int(avgcount + 1) * kermax
 81 |         ###test threhhold
 82 |         #threshhold = avgcount
 83 | 
 84 |         # print ("threshhold:", threshhold)
 85 | 
 86 |         filted_region = fregion.filted_region
 87 | 
 88 |         filted_site = dict()
 89 | 
 90 |         for fr in filted_region:
 91 | 
 92 |             chromosome, sesite = fr.split(':')
 93 | 
 94 |             startsite, endsite = sesite.split('-')
 95 | 
 96 |             startsite = int(startsite)
 97 | 
 98 |             endsite = int(endsite)
 99 | 
100 |             if chromosome in filted_site:
101 | 
102 |                 for i in range(startsite,endsite):
103 | 
104 |                     filted_site[chromosome][i] = 1
105 | 
106 |             else:
107 | 
108 |                 filted_site[chromosome] = dict()
109 | 
110 |                 for i in range(startsite,endsite):
111 | 
112 |                     filted_site[chromosome][i] = 1
113 | 
114 |         localmax = dict()
115 | 
116 |         localmax_worker_returnres = pool.map(localmax_worker, pars)
117 | 
118 |         for each_worker_res in localmax_worker_returnres:
119 | 
120 |             for chromosome in each_worker_res:
121 | 
122 |                 for site in each_worker_res[chromosome]:
123 | 
124 |                     if chromosome in localmax:
125 | 
126 |                         if each_worker_res[chromosome][site] > threshhold:
127 | 
128 |                             if chromosome in filted_site:
129 | 
130 |                                 if site in filted_site[chromosome]:
131 | 
132 |                                     continue
133 | 
134 |                             localmax[chromosome][site] = each_worker_res[chromosome][site]
135 | 
136 |                     else:
137 | 
138 |                         if each_worker_res[chromosome][site]>threshhold:
139 | 
140 |                             if chromosome in filted_site:
141 | 
142 |                                 if site in filted_site[chromosome]:
143 | 
144 |                                     continue
145 | 
146 |                             localmax[chromosome] = dict()
147 | 
148 |                             localmax[chromosome][site] = each_worker_res[chromosome][site]
149 | 
150 |         pool.close()
151 | 
152 |         # print (localmax)
153 | 
154 |         return localmax
155 | 
156 |     except KeyboardInterrupt:
157 | 
158 |         pool.terminate()
159 | 
160 |         print ("You cancelled the program!")
161 | 
162 |         sys.exit(1)
163 | 
164 |     except Exception as e:
165 | 
166 |         print ('got exception in Jazzlib.localmax.get_all_localmax: %r, terminating the pool' % (e,))
167 | 
168 |         pool.terminate()
169 | 
170 |         print ('pool is terminated')
171 | 
172 |     finally:
173 |         #     print ('joining pool processes')
174 |         pool.join()
175 |             # print ('join complete')
176 | 
177 | 
178 | def localmax_worker(par):
179 | 
180 |     try:
181 | 
182 |         nowregion = par['region']
183 | 
184 |         maxinsert = par['maxinsert']
185 | 
186 |         bamfile = par['bamfile']
187 | 
188 |         jobtype = par['jobtype']
189 | 
190 |         chr_length = par['chrlength']
191 | 
192 |         regionchromosome = par['regionchromosome']
193 | 
194 |         regionstart = par['regionstart']
195 | 
196 |         regionend = par['regionend']
197 | 
198 |         rndth = par['rndth']
199 | 
200 |         # smoothedscore = regionsmooth(bamfile=bamfile, maxinsert=maxinsert, jobtype=jobtype, region=nowregion,
201 |         #                              chr_length=chr_length)
202 | 
203 |         smoothedscore = regionsmooth(bamfile=bamfile, maxinsert=maxinsert, jobtype=jobtype,
204 |                                      regionchromosome=regionchromosome,
205 |                                      regionstart=regionstart, regionend=regionend,
206 |                                      chr_length=chr_length)
207 | 
208 |         localmax = smoothedlocalmax(smoothedscore, rndth)
209 | 
210 |         return localmax
211 | 
212 |     except KeyboardInterrupt:
213 | 
214 |         raise KeyboardInterruptError()
215 | 
216 |     except Exception as e:
217 | 
218 |         print ('got exception in Jazzlib.localmax.localmax_worker: %r,' % (e,))
219 | 
220 | 
221 | def smoothedlocalmax(smoothedscore, rndth):
222 | 
223 |     try:
224 | 
225 |         maxsites = dict()
226 | 
227 |         startsite = min(smoothedscore['score'].keys())
228 | 
229 |         endsite = max(smoothedscore['score'].keys())
230 | 
231 |         chromosome = smoothedscore['chromosome']
232 | 
233 |         maxsites[chromosome] = dict()
234 | 
235 |         for nowsite in range(startsite+2, endsite-2):
236 | 
237 |             if smoothedscore['score'][nowsite] >=rndth:
238 | 
239 |                 if (smoothedscore['score'][nowsite-2]<smoothedscore['score'][nowsite-1]<=smoothedscore['score'][nowsite]>=smoothedscore['score'][nowsite+1]>smoothedscore['score'][nowsite+2]):
240 | 
241 |                     maxsites[chromosome][nowsite] = smoothedscore['score'][nowsite]
242 | 
243 |                 # print (nowsite)
244 | 
245 |         return maxsites
246 | 
247 |     except KeyboardInterrupt:
248 | 
249 |         raise KeyboardInterruptError()
250 | 
251 |     except Exception as e:
252 | 
253 |         print ('got exception in Jazzlib.localmax.smoothedlocalmax: %r,' % (e,))
254 | 
255 | 


--------------------------------------------------------------------------------
/Jazzlib/sta.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | from scipy.special import gammaincc
  4 | from scipy import math
  5 | import scipy.stats as stats
  6 | from decimal import Decimal, localcontext
  7 | from .Peak import *
  8 | import sys
  9 | 
 10 | def bionompvalue(x, n, p):
 11 | 
 12 |     bionompvalue = 1 - stats.binom.cdf(x, n, p)
 13 | 
 14 |     return bionompvalue
 15 | 
 16 | 
 17 | def poissonpvalue(x,mu):
 18 | 
 19 |     poissonpvalue = Decimal(1) - Decimal(stats.poisson.cdf(x, mu))
 20 | 
 21 |     return poissonpvalue
 22 | 
 23 | 
 24 | 
 25 | def fdr(pnow, plist, prank):
 26 |     #FDR=length(pvalue)*pvalue/rank(pvalue)
 27 | 
 28 |     rankofplist = prank
 29 | 
 30 |     lengthofplist = len(plist)
 31 | 
 32 |     for i in range(0,lengthofplist):
 33 | 
 34 |         if plist[i] == pnow:
 35 |             now_rank = rankofplist[i]
 36 |             fdr = lengthofplist*pnow/now_rank
 37 |             fdr = min(1,fdr)
 38 |             break
 39 | 
 40 |     return fdr
 41 | 
 42 | 
 43 | def bayesfactor(locallambda, peakscore):
 44 | 
 45 |     try:
 46 | 
 47 |         # bayesfactor = 2 * (math.log((gammaincc(peakscore-1, locallambda)*gamma(peakscore-1)), math.e) - (peakscore-1)*math.log(locallambda, math.e) + locallambda)
 48 |         #
 49 |         # a = (math.log(gammaincc(peakscore-1, locallambda), math.e) )
 50 |         # b = math.lgamma(peakscore-1)
 51 |         # c=(peakscore-1)*math.log(locallambda, math.e)
 52 |         # print (locallambda,peakscore,a,b,c)
 53 |         bayesfactor2 = 2 * (math.log(gammaincc(peakscore-1, locallambda), math.e)+math.lgamma(peakscore-1) - (peakscore-1)*math.log(locallambda, math.e) + locallambda)
 54 | 
 55 |         return bayesfactor2
 56 | 
 57 |     except Exception as e:
 58 | 
 59 |         print ('got exception in Jazzlib.sta.bayesfactor: %r,' % (e,))
 60 | 
 61 |         print (locallambda, peakscore)
 62 | 
 63 |     except KeyboardInterrupt:
 64 | 
 65 |         sys.stderr.write("User interrupt\n")
 66 | 
 67 |         sys.exit(0)
 68 | 
 69 | def fdr_control(chippeaks, inputpeaks, fdr):
 70 | 
 71 |     fdrpeakdict = dict()
 72 | 
 73 |     chipscore = list()
 74 | 
 75 |     inputscore = list()
 76 | 
 77 |     overlaptedpeak = dict()
 78 | 
 79 |     fdrth = -1
 80 | 
 81 |     # print ("check fdr")
 82 | 
 83 |     for inputpeak in inputpeaks:
 84 | 
 85 |         start = inputpeak.start
 86 | 
 87 |         end = inputpeak.end
 88 | 
 89 |         inputscore.append(inputpeak.score)
 90 | 
 91 |         for chippeak in chippeaks:
 92 | 
 93 |             if chippeak.chromosome == inputpeak.chromosome:
 94 | 
 95 |                 if chippeak.peakpoint == inputpeak.peakpoint:
 96 | 
 97 |                     overlaptedpeak[chippeak.peakid] = dict()
 98 | 
 99 |                     overlaptedpeak[chippeak.peakid]['inputscore'] = inputpeak.score
100 | 
101 |                     overlaptedpeak[chippeak.peakid]['chipscore'] = chippeak.score
102 | 
103 |                     chipscore.append(chippeak.score)
104 | 
105 |                     # print(chippeak.chromosome, chippeak.peakpoint, chippeak.score, inputpeak.score, chippeak.peakid)
106 | 
107 |     for i in sorted(chipscore):
108 | 
109 |         # print("score", i)
110 | 
111 |         chippeakcount = 0.0
112 | 
113 |         inputpeakcount = 0.0
114 | 
115 |         for peakid in overlaptedpeak:
116 | 
117 |             if i <= overlaptedpeak[peakid]['inputscore']:
118 | 
119 |                 inputpeakcount = inputpeakcount + 1
120 | 
121 |         for chippeak in chippeaks:
122 | 
123 |             if i <= chippeak.score:
124 | 
125 |                 chippeakcount = chippeakcount + 1
126 | 
127 |         nowfdr = inputpeakcount/chippeakcount
128 | 
129 |         # print (i, chippeakcount, inputpeakcount, nowfdr)
130 | 
131 |         if chippeakcount == 0:
132 | 
133 |             break
134 | 
135 |         for peaknow in chippeaks:
136 | 
137 |             if peaknow.score > i:
138 | 
139 |                 peaknow.fdr = nowfdr
140 | 
141 | 
142 |         # if (inputpeakcount/chippeakcount) < fdr:
143 |         #
144 |         #     fdrth = i
145 |         #
146 |         #     break
147 | 
148 |     return chippeaks
149 | 
150 | 
151 | 
152 | def fdr_control2(chippeaks, inputpeaks, fdr):
153 | 
154 |     fdrpeakdict = dict()
155 | 
156 |     chipscore = list()
157 | 
158 |     inputscore = list()
159 | 
160 |     overlaptedpeak = dict()
161 | 
162 |     fdrth = -1
163 | 
164 |     # print ("check fdr")
165 | 
166 |     for inputpeak in inputpeaks:
167 | 
168 |         start = inputpeak.start
169 | 
170 |         end = inputpeak.end
171 | 
172 |         inputscore.append(inputpeak.score)
173 | 
174 |         for chippeak in chippeaks:
175 | 
176 |             if chippeak.chromosome == inputpeak.chromosome:
177 | 
178 |                 if inputpeak.start <chippeak.peakpoint < inputpeak.end:
179 | 
180 |                     overlaptedpeak[chippeak.peakid] = dict()
181 | 
182 |                     overlaptedpeak[chippeak.peakid]['inputscore'] = inputpeak.score
183 | 
184 |                     overlaptedpeak[chippeak.peakid]['chipscore'] = chippeak.score
185 | 
186 |                     chipscore.append(chippeak.score)
187 | 
188 |                     # print(chippeak.chromosome, chippeak.peakpoint, chippeak.score, inputpeak.score, chippeak.peakid)
189 | 
190 |     for i in sorted(chipscore):
191 | 
192 |         # print("score", i)
193 | 
194 |         chippeakcount = 0.0
195 | 
196 |         inputpeakcount = 0.0
197 | 
198 |         for peakid in overlaptedpeak:
199 | 
200 |             if i <= overlaptedpeak[peakid]['inputscore']:
201 | 
202 |                 inputpeakcount = inputpeakcount + 1
203 | 
204 |         for chippeak in chippeaks:
205 | 
206 |             if i <= chippeak.score:
207 | 
208 |                 chippeakcount = chippeakcount + 1
209 | 
210 |         nowfdr = inputpeakcount/chippeakcount
211 | 
212 |         if chippeakcount == 0:
213 | 
214 |             break
215 | 
216 |         for peaknow in chippeaks:
217 | 
218 |             if peaknow.score > i:
219 | 
220 |                 peaknow.fdr = nowfdr
221 | 
222 |     return chippeaks
223 | 
224 | 
225 | def fdr_bh(peaks):
226 | 
227 |     b01s = list()
228 | 
229 |     peakscores = list()
230 | 
231 |     for peak in peaks:
232 | 
233 |         b01 = 1/(math.e**(peak.score/2))
234 | 
235 |         peakscores.append(peak.score)
236 | 
237 |         b01s.append(b01)
238 | 
239 |     sortedb01s = sorted(b01s,reverse=True)
240 | 
241 |     listlength = len(sortedb01s)
242 | 
243 |     for peak in peaks:
244 | 
245 |         b01 = 1/(math.e**(peak.score/2))
246 | 
247 |         rank = 1
248 | 
249 |         for i in range(0,listlength):
250 | 
251 |             if sortedb01s[i] == b01:
252 | 
253 |                 rank = i + 1
254 | 
255 |                 break
256 | 
257 |         fdr = b01*listlength/rank
258 | 
259 |         peak.fdr = fdr
260 | 
261 |     return peaks
262 | 
263 | 
264 | 
265 | 
266 | 
267 | if __name__ == "__main__":
268 | 
269 |     try:
270 | 
271 |         for i in range(100,2000,100):
272 | 
273 |             for j in range (2,80):
274 | 
275 | 
276 |                 bs = bayesfactor(locallambda=i, peakscore=j)
277 |                 # if bs == 1500:
278 |                 #     bs = 'error'
279 |                 print ("locallambda:",i, "peakscore",j,"bs",bs)
280 | 
281 |     except KeyboardInterrupt:
282 | 
283 |         sys.stderr.write("User interrupt\n")
284 | 
285 |         sys.exit(0)


--------------------------------------------------------------------------------
/Jazzlib/localmax.py.bak:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from kernelsmooth import *
  4 | from multiprocessing import Pool
  5 | from kernel import *
  6 | from FRegion import *
  7 | 
  8 | 
  9 | class KeyboardInterruptError(Exception):
 10 | 
 11 |     pass
 12 | 
 13 | 
 14 | def get_all_localmax(bamfile, jobtype, maxinsert, nthreads, fregion, countchr, rndth):
 15 | 
 16 |     pool = Pool(nthreads)
 17 | 
 18 |     try:
 19 | 
 20 |         pars = list()
 21 | 
 22 |         windowsize = 100000
 23 | 
 24 |         adjreads = fregion.adjreads
 25 | 
 26 |         totallength = 0
 27 | 
 28 |         onesmoothkernel = smooth_kernel(30)
 29 | 
 30 |         kermax = max(onesmoothkernel.values())
 31 |         #
 32 | 
 33 | 
 34 |         for chromosmoe in countchr:
 35 | 
 36 |             chr_length = fregion.chrs_length[chromosmoe]
 37 | 
 38 |             totallength = totallength + chr_length
 39 | 
 40 |             for scare in range(0, int(chr_length/windowsize)+1):
 41 | 
 42 |                 nowstart = scare*windowsize + 1 -200
 43 | 
 44 |                 nowend = (scare+1)*windowsize + 200
 45 | 
 46 |                 if nowend > chr_length:
 47 | 
 48 |                     nowend = chr_length
 49 | 
 50 |                 if nowstart < 1:
 51 | 
 52 |                     nowstart = 1
 53 | 
 54 |                 nowregion = chromosmoe + ":" + str(nowstart) + "-" + str(nowend)
 55 | 
 56 |                 par = dict()
 57 | 
 58 |                 par['region'] = nowregion
 59 | 
 60 |                 par['maxinsert'] = maxinsert
 61 | 
 62 |                 par['bamfile'] = bamfile
 63 | 
 64 |                 par['jobtype'] = jobtype
 65 | 
 66 |                 par['chrlength'] = chr_length
 67 | 
 68 |                 par['regionchromosome'] = chromosmoe
 69 | 
 70 |                 par['regionstart'] = nowstart
 71 | 
 72 |                 par['regionend'] = nowend
 73 | 
 74 |                 par['rndth'] = rndth
 75 | 
 76 |                 pars.append(par)
 77 | 
 78 |         avgcount = adjreads/totallength
 79 | 
 80 |         threshhold = int(avgcount + 1) * kermax
 81 |         ###test threhhold
 82 |         #threshhold = avgcount
 83 | 
 84 |         # print ("threshhold:", threshhold)
 85 | 
 86 |         filted_region = fregion.filted_region
 87 | 
 88 |         filted_site = dict()
 89 | 
 90 |         for fr in filted_region:
 91 | 
 92 |             chromosome, sesite = fr.split(':')
 93 | 
 94 |             startsite, endsite = sesite.split('-')
 95 | 
 96 |             startsite = int(startsite)
 97 | 
 98 |             endsite = int(endsite)
 99 | 
100 |             if chromosome in filted_site:
101 | 
102 |                 for i in range(startsite,endsite):
103 | 
104 |                     filted_site[chromosome][i] = 1
105 | 
106 |             else:
107 | 
108 |                 filted_site[chromosome] = dict()
109 | 
110 |                 for i in range(startsite,endsite):
111 | 
112 |                     filted_site[chromosome][i] = 1
113 | 
114 |         localmax = dict()
115 | 
116 |         localmax_worker_returnres = pool.map(localmax_worker, pars)
117 | 
118 |         for each_worker_res in localmax_worker_returnres:
119 | 
120 |             for chromosome in each_worker_res:
121 | 
122 |                 for site in each_worker_res[chromosome]:
123 | 
124 |                     if chromosome in localmax:
125 | 
126 |                         if each_worker_res[chromosome][site] > threshhold:
127 | 
128 |                             if chromosome in filted_site:
129 | 
130 |                                 if site in filted_site[chromosome]:
131 | 
132 |                                     continue
133 | 
134 |                             localmax[chromosome][site] = each_worker_res[chromosome][site]
135 | 
136 |                     else:
137 | 
138 |                         if each_worker_res[chromosome][site]>threshhold:
139 | 
140 |                             if chromosome in filted_site:
141 | 
142 |                                 if site in filted_site[chromosome]:
143 | 
144 |                                     continue
145 | 
146 |                             localmax[chromosome] = dict()
147 | 
148 |                             localmax[chromosome][site] = each_worker_res[chromosome][site]
149 | 
150 |         pool.close()
151 | 
152 |         # print (localmax)
153 | 
154 |         return localmax
155 | 
156 |     except KeyboardInterrupt:
157 | 
158 |         pool.terminate()
159 | 
160 |         print ("You cancelled the program!")
161 | 
162 |         sys.exit(1)
163 | 
164 |     except Exception, e:
165 | 
166 |         print ('got exception in Jazzlib.localmax.get_all_localmax: %r, terminating the pool' % (e,))
167 | 
168 |         pool.terminate()
169 | 
170 |         print ('pool is terminated')
171 | 
172 |     finally:
173 |         #     print ('joining pool processes')
174 |         pool.join()
175 |             # print ('join complete')
176 | 
177 | 
178 | def localmax_worker(par):
179 | 
180 |     try:
181 | 
182 |         nowregion = par['region']
183 | 
184 |         maxinsert = par['maxinsert']
185 | 
186 |         bamfile = par['bamfile']
187 | 
188 |         jobtype = par['jobtype']
189 | 
190 |         chr_length = par['chrlength']
191 | 
192 |         regionchromosome = par['regionchromosome']
193 | 
194 |         regionstart = par['regionstart']
195 | 
196 |         regionend = par['regionend']
197 | 
198 |         rndth = par['rndth']
199 | 
200 |         # smoothedscore = regionsmooth(bamfile=bamfile, maxinsert=maxinsert, jobtype=jobtype, region=nowregion,
201 |         #                              chr_length=chr_length)
202 | 
203 |         smoothedscore = regionsmooth(bamfile=bamfile, maxinsert=maxinsert, jobtype=jobtype,
204 |                                      regionchromosome=regionchromosome,
205 |                                      regionstart=regionstart, regionend=regionend,
206 |                                      chr_length=chr_length)
207 | 
208 |         localmax = smoothedlocalmax(smoothedscore, rndth)
209 | 
210 |         return localmax
211 | 
212 |     except KeyboardInterrupt:
213 | 
214 |         raise KeyboardInterruptError()
215 | 
216 |     except Exception, e:
217 | 
218 |         print ('got exception in Jazzlib.localmax.localmax_worker: %r,' % (e,))
219 | 
220 | 
221 | def smoothedlocalmax(smoothedscore, rndth):
222 | 
223 |     try:
224 | 
225 |         maxsites = dict()
226 | 
227 |         startsite = min(smoothedscore['score'].keys())
228 | 
229 |         endsite = max(smoothedscore['score'].keys())
230 | 
231 |         chromosome = smoothedscore['chromosome']
232 | 
233 |         maxsites[chromosome] = dict()
234 | 
235 |         for nowsite in range(startsite+2, endsite-2):
236 | 
237 |             if smoothedscore['score'][nowsite] >=rndth:
238 | 
239 |                 if (smoothedscore['score'][nowsite-2]<smoothedscore['score'][nowsite-1]<=smoothedscore['score'][nowsite]>=smoothedscore['score'][nowsite+1]>smoothedscore['score'][nowsite+2]):
240 | 
241 |                     maxsites[chromosome][nowsite] = smoothedscore['score'][nowsite]
242 | 
243 |                 # print (nowsite)
244 | 
245 |         return maxsites
246 | 
247 |     except KeyboardInterrupt:
248 | 
249 |         raise KeyboardInterruptError()
250 | 
251 |     except Exception, e:
252 | 
253 |         print ('got exception in Jazzlib.localmax.smoothedlocalmax: %r,' % (e,))
254 | 
255 | 


--------------------------------------------------------------------------------
/Jazzlib/sta.py.bak:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from scipy.special import gammaincc
  4 | from scipy import math
  5 | import scipy.stats as stats
  6 | from decimal import Decimal, localcontext
  7 | from Peak import *
  8 | import sys
  9 | 
 10 | def bionompvalue(x, n, p):
 11 | 
 12 |     bionompvalue = 1 - stats.binom.cdf(x, n, p)
 13 | 
 14 |     return bionompvalue
 15 | 
 16 | 
 17 | def poissonpvalue(x,mu):
 18 | 
 19 |     poissonpvalue = Decimal(1) - Decimal(stats.poisson.cdf(x, mu))
 20 | 
 21 |     return poissonpvalue
 22 | 
 23 | 
 24 | 
 25 | def fdr(pnow, plist, prank):
 26 |     #FDR=length(pvalue)*pvalue/rank(pvalue)
 27 | 
 28 |     rankofplist = prank
 29 | 
 30 |     lengthofplist = len(plist)
 31 | 
 32 |     for i in range(0,lengthofplist):
 33 | 
 34 |         if plist[i] == pnow:
 35 |             now_rank = rankofplist[i]
 36 |             fdr = lengthofplist*pnow/now_rank
 37 |             fdr = min(1,fdr)
 38 |             break
 39 | 
 40 |     return fdr
 41 | 
 42 | 
 43 | def bayesfactor(locallambda, peakscore):
 44 | 
 45 |     try:
 46 | 
 47 |         # bayesfactor = 2 * (math.log((gammaincc(peakscore-1, locallambda)*gamma(peakscore-1)), math.e) - (peakscore-1)*math.log(locallambda, math.e) + locallambda)
 48 |         #
 49 |         # a = (math.log(gammaincc(peakscore-1, locallambda), math.e) )
 50 |         # b = math.lgamma(peakscore-1)
 51 |         # c=(peakscore-1)*math.log(locallambda, math.e)
 52 |         # print (locallambda,peakscore,a,b,c)
 53 |         bayesfactor2 = 2 * (math.log(gammaincc(peakscore-1, locallambda), math.e)+math.lgamma(peakscore-1) - (peakscore-1)*math.log(locallambda, math.e) + locallambda)
 54 | 
 55 |         return bayesfactor2
 56 | 
 57 |     except Exception, e:
 58 | 
 59 |         print ('got exception in Jazzlib.sta.bayesfactor: %r,' % (e,))
 60 | 
 61 |         print (locallambda, peakscore)
 62 | 
 63 |     except KeyboardInterrupt:
 64 | 
 65 |         sys.stderr.write("User interrupt\n")
 66 | 
 67 |         sys.exit(0)
 68 | 
 69 | def fdr_control(chippeaks, inputpeaks, fdr):
 70 | 
 71 |     fdrpeakdict = dict()
 72 | 
 73 |     chipscore = list()
 74 | 
 75 |     inputscore = list()
 76 | 
 77 |     overlaptedpeak = dict()
 78 | 
 79 |     fdrth = -1
 80 | 
 81 |     # print ("check fdr")
 82 | 
 83 |     for inputpeak in inputpeaks:
 84 | 
 85 |         start = inputpeak.start
 86 | 
 87 |         end = inputpeak.end
 88 | 
 89 |         inputscore.append(inputpeak.score)
 90 | 
 91 |         for chippeak in chippeaks:
 92 | 
 93 |             if chippeak.chromosome == inputpeak.chromosome:
 94 | 
 95 |                 if chippeak.peakpoint == inputpeak.peakpoint:
 96 | 
 97 |                     overlaptedpeak[chippeak.peakid] = dict()
 98 | 
 99 |                     overlaptedpeak[chippeak.peakid]['inputscore'] = inputpeak.score
100 | 
101 |                     overlaptedpeak[chippeak.peakid]['chipscore'] = chippeak.score
102 | 
103 |                     chipscore.append(chippeak.score)
104 | 
105 |                     # print(chippeak.chromosome, chippeak.peakpoint, chippeak.score, inputpeak.score, chippeak.peakid)
106 | 
107 |     for i in sorted(chipscore):
108 | 
109 |         # print("score", i)
110 | 
111 |         chippeakcount = 0.0
112 | 
113 |         inputpeakcount = 0.0
114 | 
115 |         for peakid in overlaptedpeak:
116 | 
117 |             if i <= overlaptedpeak[peakid]['inputscore']:
118 | 
119 |                 inputpeakcount = inputpeakcount + 1
120 | 
121 |         for chippeak in chippeaks:
122 | 
123 |             if i <= chippeak.score:
124 | 
125 |                 chippeakcount = chippeakcount + 1
126 | 
127 |         nowfdr = inputpeakcount/chippeakcount
128 | 
129 |         # print (i, chippeakcount, inputpeakcount, nowfdr)
130 | 
131 |         if chippeakcount == 0:
132 | 
133 |             break
134 | 
135 |         for peaknow in chippeaks:
136 | 
137 |             if peaknow.score > i:
138 | 
139 |                 peaknow.fdr = nowfdr
140 | 
141 | 
142 |         # if (inputpeakcount/chippeakcount) < fdr:
143 |         #
144 |         #     fdrth = i
145 |         #
146 |         #     break
147 | 
148 |     return chippeaks
149 | 
150 | 
151 | 
152 | def fdr_control2(chippeaks, inputpeaks, fdr):
153 | 
154 |     fdrpeakdict = dict()
155 | 
156 |     chipscore = list()
157 | 
158 |     inputscore = list()
159 | 
160 |     overlaptedpeak = dict()
161 | 
162 |     fdrth = -1
163 | 
164 |     # print ("check fdr")
165 | 
166 |     for inputpeak in inputpeaks:
167 | 
168 |         start = inputpeak.start
169 | 
170 |         end = inputpeak.end
171 | 
172 |         inputscore.append(inputpeak.score)
173 | 
174 |         for chippeak in chippeaks:
175 | 
176 |             if chippeak.chromosome == inputpeak.chromosome:
177 | 
178 |                 if inputpeak.start <chippeak.peakpoint < inputpeak.end:
179 | 
180 |                     overlaptedpeak[chippeak.peakid] = dict()
181 | 
182 |                     overlaptedpeak[chippeak.peakid]['inputscore'] = inputpeak.score
183 | 
184 |                     overlaptedpeak[chippeak.peakid]['chipscore'] = chippeak.score
185 | 
186 |                     chipscore.append(chippeak.score)
187 | 
188 |                     # print(chippeak.chromosome, chippeak.peakpoint, chippeak.score, inputpeak.score, chippeak.peakid)
189 | 
190 |     for i in sorted(chipscore):
191 | 
192 |         # print("score", i)
193 | 
194 |         chippeakcount = 0.0
195 | 
196 |         inputpeakcount = 0.0
197 | 
198 |         for peakid in overlaptedpeak:
199 | 
200 |             if i <= overlaptedpeak[peakid]['inputscore']:
201 | 
202 |                 inputpeakcount = inputpeakcount + 1
203 | 
204 |         for chippeak in chippeaks:
205 | 
206 |             if i <= chippeak.score:
207 | 
208 |                 chippeakcount = chippeakcount + 1
209 | 
210 |         nowfdr = inputpeakcount/chippeakcount
211 | 
212 |         if chippeakcount == 0:
213 | 
214 |             break
215 | 
216 |         for peaknow in chippeaks:
217 | 
218 |             if peaknow.score > i:
219 | 
220 |                 peaknow.fdr = nowfdr
221 | 
222 |     return chippeaks
223 | 
224 | 
225 | def fdr_bh(peaks):
226 | 
227 |     b01s = list()
228 | 
229 |     peakscores = list()
230 | 
231 |     for peak in peaks:
232 | 
233 |         b01 = 1/(math.e**(peak.score/2))
234 | 
235 |         peakscores.append(peak.score)
236 | 
237 |         b01s.append(b01)
238 | 
239 |     sortedb01s = sorted(b01s,reverse=True)
240 | 
241 |     listlength = len(sortedb01s)
242 | 
243 |     for peak in peaks:
244 | 
245 |         b01 = 1/(math.e**(peak.score/2))
246 | 
247 |         rank = 1
248 | 
249 |         for i in range(0,listlength):
250 | 
251 |             if sortedb01s[i] == b01:
252 | 
253 |                 rank = i + 1
254 | 
255 |                 break
256 | 
257 |         fdr = b01*listlength/rank
258 | 
259 |         peak.fdr = fdr
260 | 
261 |     return peaks
262 | 
263 | 
264 | 
265 | 
266 | 
267 | if __name__ == "__main__":
268 | 
269 |     try:
270 | 
271 |         for i in range(100,2000,100):
272 | 
273 |             for j in range (2,80):
274 | 
275 | 
276 |                 bs = bayesfactor(locallambda=i, peakscore=j)
277 |                 # if bs == 1500:
278 |                 #     bs = 'error'
279 |                 print ("locallambda:",i, "peakscore",j,"bs",bs)
280 | 
281 |     except KeyboardInterrupt:
282 | 
283 |         sys.stderr.write("User interrupt\n")
284 | 
285 |         sys.exit(0)


--------------------------------------------------------------------------------
/Jazzlib/FRegion.py.bak:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from numpy import *
  4 | from countreads import *
  5 | from multiprocessing import Pool
  6 | from countreads import *
  7 | import timeit
  8 | import sys
  9 | 
 10 | 
 11 | class KeyboardInterruptError(Exception):
 12 | 
 13 |     pass
 14 | 
 15 | 
 16 | class FRegion:
 17 | 
 18 |     def __init__(self, bamfile, nthreads, maxinsert, jobtype, countchr=[]):
 19 | 
 20 |         self.bamfile = bamfile
 21 | 
 22 |         self.count_chr = countchr
 23 | 
 24 |         self.nthreads = nthreads
 25 | 
 26 |         self.maxinsert = maxinsert
 27 | 
 28 |         self.jobtype = jobtype
 29 | 
 30 |         self.__filte_region()
 31 | 
 32 |     def filte_region(self):
 33 | 
 34 |         bam_file = self.bamfile
 35 | 
 36 |         count_chr = self.count_chr
 37 | 
 38 |         nthreads = self.nthreads
 39 | 
 40 |         jobtype = self.jobtype
 41 | 
 42 |         maxinsert = self.maxinsert
 43 | 
 44 |         res = filter_region(bamfile=bam_file, count_chr=count_chr, nthreads=nthreads, maxinsert=maxinsert,
 45 |                             jobtype=jobtype)
 46 | 
 47 |         filted_region = res['filted_region']
 48 | 
 49 |         thresh_hold = res['thresh_hold']
 50 | 
 51 |         scare_std = res['region_std']
 52 | 
 53 |         scare_mean = res['region_mean']
 54 | 
 55 |         chr_total_reads = res['chr_total_reads']
 56 | 
 57 |         chrs_length = res['chrs_length']
 58 | 
 59 |         chrsfrcount = res['chrfrcount']
 60 | 
 61 |         filterreadscount = res['filterreadscount']
 62 | 
 63 |         totalreads = res['totalreads']
 64 | 
 65 |         chruniqlength = res['chruniqlength']
 66 | 
 67 |         readlengthmean = res['readlengthmean']
 68 | 
 69 |         adjreads = totalreads - filterreadscount
 70 | 
 71 |         countgenomelength = 0
 72 | 
 73 |         countgenomeuniqlength = 0
 74 | 
 75 |         for chromosome in count_chr:
 76 | 
 77 |             countgenomelength = countgenomelength + int(chrs_length[chromosome])
 78 | 
 79 |             countgenomeuniqlength = countgenomeuniqlength + int(chruniqlength[chromosome])
 80 | 
 81 |         self.countgenomelength = countgenomelength
 82 | 
 83 |         self.filted_region = filted_region
 84 | 
 85 |         self.thresh_hold = thresh_hold
 86 | 
 87 |         self.region_std = scare_std
 88 | 
 89 |         self.region_mean = scare_mean
 90 | 
 91 |         self.chr_total_reads = chr_total_reads
 92 | 
 93 |         self.chrs_length = chrs_length
 94 | 
 95 |         self.chrsfcount = chrsfrcount
 96 | 
 97 |         self.totalreads = totalreads
 98 | 
 99 |         self.filterreadscount = filterreadscount
100 | 
101 |         self.adjreads = adjreads
102 | 
103 |         self.chruniqlength = chruniqlength
104 | 
105 |         self.countgenomeuniqlength = countgenomeuniqlength
106 | 
107 |         self.readlengthmean = readlengthmean
108 | 
109 |     __filte_region = filte_region
110 | 
111 | 
112 | def filter_region(bamfile, count_chr, nthreads, maxinsert, jobtype):
113 | 
114 |     pool = Pool(nthreads)
115 | 
116 |     try:
117 | 
118 |         samfile = pysam.Samfile(bamfile)
119 | 
120 |         windowsize = 1000
121 | 
122 |         totalreads = 0
123 | 
124 |         refere_ncenumber = samfile.nreferences
125 | 
126 |         ref_lengths = samfile.lengths
127 | 
128 |         sam_ref = samfile.references
129 | 
130 |         chrs_length = dict()
131 | 
132 |         chr_total_reads = dict()
133 | 
134 |         pars = list()
135 | 
136 |         chruniqlength = dict()
137 | 
138 |         chrreadlengthmean = dict()
139 | 
140 |         for chromosome in count_chr:
141 | 
142 |             for i in range(refere_ncenumber):
143 | 
144 |                 if sam_ref[i] == chromosome:
145 | 
146 |                     chr_length = ref_lengths[i]
147 | 
148 |                     chrs_length[chromosome] = chr_length
149 | 
150 |                     chrcount = windowcounter(bamfile=bamfile, regionchromosome=chromosome,
151 |                                              regionstart=1, regionend=int(chr_length),
152 |                                              maxinsert=maxinsert,
153 |                                              jobtype=jobtype)
154 | 
155 |                     chr_total_reads[chromosome] = chrcount
156 | 
157 |                     totalreads = chrcount + totalreads
158 | 
159 |         for chromosome in chrs_length:
160 | 
161 |             par = dict()
162 | 
163 |             par['chrmosome'] = chromosome
164 | 
165 |             par['windowsize'] = windowsize
166 | 
167 |             par['chr_length'] = chrs_length[chromosome]
168 | 
169 |             par['bamfile'] = bamfile
170 | 
171 |             par['maxinsert'] = maxinsert
172 | 
173 |             par['jobtype'] = jobtype
174 | 
175 |             pars.append(par)
176 | 
177 |         windowcountlist = list()
178 | 
179 |         windowregionlist = list()
180 | 
181 |         chrswindow = pool.map(chrwindow_counter, pars)
182 | 
183 |         for nowchrcount in chrswindow:
184 | 
185 |             nowchromosome = nowchrcount['chromosome']
186 | 
187 |             nowchromosome = str(nowchromosome)
188 | 
189 |             nowwindowcount = nowchrcount['windowcount']
190 | 
191 |             nowuniqcount = nowchrcount['uniqcount']
192 | 
193 |             nowreadslengthmean = nowchrcount['readlengthmean']
194 | 
195 |             chrreadlengthmean[nowchromosome] = nowreadslengthmean
196 | 
197 |             chruniqlength[nowchromosome] = nowuniqcount
198 | 
199 |             for nowscare in nowwindowcount:
200 | 
201 |                 nowstart = nowscare * windowsize + 1
202 | 
203 |                 nowend = (nowscare+1) * windowsize
204 | 
205 |                 if nowend > chrs_length[nowchromosome]:
206 | 
207 |                     nowend = chrs_length[nowchromosome]
208 | 
209 |                 nowregion = nowchromosome+":"+str(nowstart)+"-"+str(nowend)
210 | 
211 |                 windowcountlist.append(nowwindowcount[nowscare])
212 | 
213 |                 windowregionlist.append(nowregion)
214 | 
215 |         scare_mean = mean(windowcountlist)
216 | 
217 |         scare_std = std(windowcountlist)
218 | 
219 |         print ("mean:", scare_mean, "std",scare_std)
220 | 
221 |         thresh_hold = scare_mean + 10 * scare_std
222 | 
223 |         chrsfrcount = 0
224 | 
225 |         filterreadscount = 0
226 | 
227 |         filted_region = list()
228 | 
229 |         for i in range(0, len(windowcountlist)):
230 | 
231 |             if windowcountlist[i] >= thresh_hold:
232 | 
233 |                 # print (windowregionlist[i]," reads count ", windowcountlist[i])
234 | 
235 |                 filted_region.append(windowregionlist[i])
236 | 
237 |                 filterreadscount = filterreadscount + windowcountlist[i]
238 | 
239 |         res = dict()
240 | 
241 |         res['filted_region'] = filted_region
242 | 
243 |         res['thresh_hold'] = thresh_hold
244 | 
245 |         res['region_std'] = scare_std
246 | 
247 |         res['region_mean'] = scare_mean
248 | 
249 |         res['chr_total_reads'] = chr_total_reads
250 | 
251 |         res['chrs_length'] = chrs_length
252 | 
253 |         res['chrfrcount'] = chrsfrcount
254 | 
255 |         res['filterreadscount'] = filterreadscount
256 | 
257 |         res['totalreads'] = totalreads
258 | 
259 |         res['chruniqlength'] = chruniqlength
260 | 
261 |         # res['chrreadlengthmean'] = chrreadlengthmean
262 | 
263 |         totallengmean = 0
264 | 
265 |         totalchrnumber = 0
266 | 
267 |         for chromsome in count_chr:
268 | 
269 |             if chromsome in chrreadlengthmean:
270 | 
271 |                 totallengmean = totallengmean + chrreadlengthmean[chromsome]
272 | 
273 |                 totalchrnumber = totalchrnumber + 1
274 | 
275 |         readlengthmean = totallengmean/totalchrnumber
276 | 
277 |         res['readlengthmean'] = readlengthmean
278 | 
279 |         pool.close()
280 | 
281 |         return res
282 | 
283 |     except KeyboardInterrupt:
284 | 
285 |         pool.terminate()
286 | 
287 |         print ("You cancelled the program!")
288 | 
289 |         sys.exit(1)
290 | 
291 |     except Exception, e:
292 | 
293 |         print ('got exception in Jazzlib.FRegion.filter_region: %r, terminating the pool' % (e,))
294 | 
295 |         pool.terminate()
296 | 
297 |         print ('pool is terminated')
298 | 
299 |     finally:
300 |         #     print ('joining pool processes')
301 |         pool.join()
302 |             # print ('join complete')
303 | 
304 | 
305 | def chrwindow_counter(par):
306 | 
307 |     try:
308 | 
309 |         chromosome = par['chrmosome']
310 | 
311 |         windowsize = par['windowsize']
312 | 
313 |         chr_length = par['chr_length']
314 | 
315 |         bamfile = par['bamfile']
316 | 
317 |         maxinsert = par['maxinsert']
318 | 
319 |         jobtype = par['jobtype']
320 | 
321 |         windowcount = windowscarecounter(bamfile=bamfile, regionchromosome=chromosome,
322 |                                          regionstart=1, regionend=chr_length,
323 |                                          windowsize=windowsize, maxinsert=maxinsert, jobtype=jobtype)
324 | 
325 |         uniqcount = uniqsitecount(bamfile=bamfile, regionchromosome=chromosome,
326 |                                   regionstart=1, regionend=chr_length, maxinsert=maxinsert,
327 |                                   jobtype=jobtype)
328 | 
329 |         readlengthmean = readslengthmean(bamfile=bamfile, regionchromosome=chromosome,
330 |                                         regionstart=1, regionend=chr_length, maxinsert=maxinsert,
331 |                                         jobtype=jobtype)
332 | 
333 |         chrwindowcount = dict()
334 | 
335 |         chrwindowcount['windowcount'] = windowcount
336 | 
337 |         chrwindowcount['chromosome'] = chromosome
338 | 
339 |         chrwindowcount['uniqcount'] = uniqcount
340 | 
341 |         chrwindowcount['readlengthmean'] = readlengthmean
342 | 
343 |         return chrwindowcount
344 | 
345 |     except KeyboardInterrupt:
346 | 
347 |         print ("You cancelled the program!")
348 | 
349 |         sys.exit(1)
350 | 


--------------------------------------------------------------------------------
/Jazzlib/FRegion.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | from numpy import *
  4 | from .countreads import *
  5 | from multiprocessing import Pool
  6 | from .countreads import *
  7 | import timeit
  8 | import sys
  9 | 
 10 | 
 11 | class KeyboardInterruptError(Exception):
 12 | 
 13 |     pass
 14 | 
 15 | 
 16 | class FRegion:
 17 | 
 18 |     def __init__(self, bamfile, nthreads, maxinsert, jobtype, countchr=[]):
 19 | 
 20 |         self.bamfile = bamfile
 21 | 
 22 |         self.count_chr = countchr
 23 | 
 24 |         self.nthreads = nthreads
 25 | 
 26 |         self.maxinsert = maxinsert
 27 | 
 28 |         self.jobtype = jobtype
 29 | 
 30 |         self.__filte_region()
 31 | 
 32 |     def filte_region(self):
 33 | 
 34 |         bam_file = self.bamfile
 35 | 
 36 |         count_chr = self.count_chr
 37 | 
 38 |         nthreads = self.nthreads
 39 | 
 40 |         jobtype = self.jobtype
 41 | 
 42 |         maxinsert = self.maxinsert
 43 | 
 44 |         res = filter_region(bamfile=bam_file, count_chr=count_chr, nthreads=nthreads, maxinsert=maxinsert,
 45 |                             jobtype=jobtype)
 46 | 
 47 |         filted_region = res['filted_region']
 48 | 
 49 |         thresh_hold = res['thresh_hold']
 50 | 
 51 |         scare_std = res['region_std']
 52 | 
 53 |         scare_mean = res['region_mean']
 54 | 
 55 |         chr_total_reads = res['chr_total_reads']
 56 | 
 57 |         chrs_length = res['chrs_length']
 58 | 
 59 |         chrsfrcount = res['chrfrcount']
 60 | 
 61 |         filterreadscount = res['filterreadscount']
 62 | 
 63 |         totalreads = res['totalreads']
 64 | 
 65 |         chruniqlength = res['chruniqlength']
 66 | 
 67 |         readlengthmean = res['readlengthmean']
 68 | 
 69 |         adjreads = totalreads - filterreadscount
 70 | 
 71 |         countgenomelength = 0
 72 | 
 73 |         countgenomeuniqlength = 0
 74 | 
 75 |         for chromosome in count_chr:
 76 | 
 77 |             countgenomelength = countgenomelength + int(chrs_length[chromosome])
 78 | 
 79 |             countgenomeuniqlength = countgenomeuniqlength + int(chruniqlength[chromosome])
 80 | 
 81 |         self.countgenomelength = countgenomelength
 82 | 
 83 |         self.filted_region = filted_region
 84 | 
 85 |         self.thresh_hold = thresh_hold
 86 | 
 87 |         self.region_std = scare_std
 88 | 
 89 |         self.region_mean = scare_mean
 90 | 
 91 |         self.chr_total_reads = chr_total_reads
 92 | 
 93 |         self.chrs_length = chrs_length
 94 | 
 95 |         self.chrsfcount = chrsfrcount
 96 | 
 97 |         self.totalreads = totalreads
 98 | 
 99 |         self.filterreadscount = filterreadscount
100 | 
101 |         self.adjreads = adjreads
102 | 
103 |         self.chruniqlength = chruniqlength
104 | 
105 |         self.countgenomeuniqlength = countgenomeuniqlength
106 | 
107 |         self.readlengthmean = readlengthmean
108 | 
109 |     __filte_region = filte_region
110 | 
111 | 
112 | def filter_region(bamfile, count_chr, nthreads, maxinsert, jobtype):
113 | 
114 |     pool = Pool(nthreads)
115 | 
116 |     try:
117 | 
118 |         samfile = pysam.Samfile(bamfile)
119 | 
120 |         windowsize = 1000
121 | 
122 |         totalreads = 0
123 | 
124 |         refere_ncenumber = samfile.nreferences
125 | 
126 |         ref_lengths = samfile.lengths
127 | 
128 |         sam_ref = samfile.references
129 | 
130 |         chrs_length = dict()
131 | 
132 |         chr_total_reads = dict()
133 | 
134 |         pars = list()
135 | 
136 |         chruniqlength = dict()
137 | 
138 |         chrreadlengthmean = dict()
139 | 
140 |         for chromosome in count_chr:
141 | 
142 |             for i in range(refere_ncenumber):
143 | 
144 |                 if sam_ref[i] == chromosome:
145 | 
146 |                     chr_length = ref_lengths[i]
147 | 
148 |                     chrs_length[chromosome] = chr_length
149 | 
150 |                     chrcount = windowcounter(bamfile=bamfile, regionchromosome=chromosome,
151 |                                              regionstart=1, regionend=int(chr_length),
152 |                                              maxinsert=maxinsert,
153 |                                              jobtype=jobtype)
154 | 
155 |                     chr_total_reads[chromosome] = chrcount
156 | 
157 |                     totalreads = chrcount + totalreads
158 | 
159 |         for chromosome in chrs_length:
160 | 
161 |             par = dict()
162 | 
163 |             par['chrmosome'] = chromosome
164 | 
165 |             par['windowsize'] = windowsize
166 | 
167 |             par['chr_length'] = chrs_length[chromosome]
168 | 
169 |             par['bamfile'] = bamfile
170 | 
171 |             par['maxinsert'] = maxinsert
172 | 
173 |             par['jobtype'] = jobtype
174 | 
175 |             pars.append(par)
176 | 
177 |         windowcountlist = list()
178 | 
179 |         windowregionlist = list()
180 | 
181 |         chrswindow = pool.map(chrwindow_counter, pars)
182 | 
183 |         for nowchrcount in chrswindow:
184 | 
185 |             nowchromosome = nowchrcount['chromosome']
186 | 
187 |             nowchromosome = str(nowchromosome)
188 | 
189 |             nowwindowcount = nowchrcount['windowcount']
190 | 
191 |             nowuniqcount = nowchrcount['uniqcount']
192 | 
193 |             nowreadslengthmean = nowchrcount['readlengthmean']
194 | 
195 |             print(nowchromosome, nowreadslengthmean)
196 | 
197 |             chrreadlengthmean[nowchromosome] = nowreadslengthmean
198 | 
199 |             chruniqlength[nowchromosome] = nowuniqcount
200 | 
201 |             for nowscare in nowwindowcount:
202 | 
203 |                 nowstart = nowscare * windowsize + 1
204 | 
205 |                 nowend = (nowscare+1) * windowsize
206 | 
207 |                 if nowend > chrs_length[nowchromosome]:
208 | 
209 |                     nowend = chrs_length[nowchromosome]
210 | 
211 |                 nowregion = nowchromosome+":"+str(nowstart)+"-"+str(nowend)
212 | 
213 |                 windowcountlist.append(nowwindowcount[nowscare])
214 | 
215 |                 windowregionlist.append(nowregion)
216 | 
217 |         scare_mean = mean(windowcountlist)
218 | 
219 |         scare_std = std(windowcountlist)
220 | 
221 |         print ("mean:", scare_mean, "std",scare_std)
222 | 
223 |         thresh_hold = scare_mean + 10 * scare_std
224 | 
225 |         chrsfrcount = 0
226 | 
227 |         filterreadscount = 0
228 | 
229 |         filted_region = list()
230 | 
231 |         for i in range(0, len(windowcountlist)):
232 | 
233 |             if windowcountlist[i] >= thresh_hold:
234 | 
235 |                 # print (windowregionlist[i]," reads count ", windowcountlist[i])
236 | 
237 |                 filted_region.append(windowregionlist[i])
238 | 
239 |                 filterreadscount = filterreadscount + windowcountlist[i]
240 | 
241 |         res = dict()
242 | 
243 |         res['filted_region'] = filted_region
244 | 
245 |         res['thresh_hold'] = thresh_hold
246 | 
247 |         res['region_std'] = scare_std
248 | 
249 |         res['region_mean'] = scare_mean
250 | 
251 |         res['chr_total_reads'] = chr_total_reads
252 | 
253 |         res['chrs_length'] = chrs_length
254 | 
255 |         res['chrfrcount'] = chrsfrcount
256 | 
257 |         res['filterreadscount'] = filterreadscount
258 | 
259 |         res['totalreads'] = totalreads
260 | 
261 |         res['chruniqlength'] = chruniqlength
262 | 
263 |         # res['chrreadlengthmean'] = chrreadlengthmean
264 | 
265 |         totallengmean = 0
266 | 
267 |         totalchrnumber = 0
268 | 
269 |         for chromsome in count_chr:
270 | 
271 |             if chromsome in chrreadlengthmean:
272 | 
273 |                 totallengmean = totallengmean + chrreadlengthmean[chromsome]
274 | 
275 |                 totalchrnumber = totalchrnumber + 1
276 | 
277 |         readlengthmean = totallengmean/totalchrnumber
278 | 
279 |         res['readlengthmean'] = readlengthmean
280 | 
281 |         pool.close()
282 | 
283 |         return res
284 | 
285 |     except KeyboardInterrupt:
286 | 
287 |         pool.terminate()
288 | 
289 |         print ("You cancelled the program!")
290 | 
291 |         sys.exit(1)
292 | 
293 |     except Exception as e:
294 | 
295 |         print ('got exception in Jazzlib.FRegion.filter_region: %r, terminating the pool' % (e,))
296 | 
297 |         pool.terminate()
298 | 
299 |         print ('pool is terminated')
300 | 
301 |     finally:
302 |         #     print ('joining pool processes')
303 |         pool.join()
304 |             # print ('join complete')
305 | 
306 | 
307 | def chrwindow_counter(par):
308 | 
309 |     try:
310 | 
311 |         chromosome = par['chrmosome']
312 | 
313 |         windowsize = par['windowsize']
314 | 
315 |         chr_length = par['chr_length']
316 | 
317 |         bamfile = par['bamfile']
318 | 
319 |         maxinsert = par['maxinsert']
320 | 
321 |         jobtype = par['jobtype']
322 | 
323 |         windowcount = windowscarecounter(bamfile=bamfile, regionchromosome=chromosome,
324 |                                          regionstart=1, regionend=chr_length,
325 |                                          windowsize=windowsize, maxinsert=maxinsert, jobtype=jobtype)
326 | 
327 |         uniqcount = uniqsitecount(bamfile=bamfile, regionchromosome=chromosome,
328 |                                   regionstart=1, regionend=chr_length, maxinsert=maxinsert,
329 |                                   jobtype=jobtype)
330 | 
331 |         readlengthmean = readslengthmean(bamfile=bamfile, regionchromosome=chromosome,
332 |                                         regionstart=1, regionend=chr_length, maxinsert=maxinsert,
333 |                                         jobtype=jobtype)
334 | 
335 |         chrwindowcount = dict()
336 | 
337 |         chrwindowcount['windowcount'] = windowcount
338 | 
339 |         chrwindowcount['chromosome'] = chromosome
340 | 
341 |         chrwindowcount['uniqcount'] = uniqcount
342 | 
343 |         chrwindowcount['readlengthmean'] = readlengthmean
344 | 
345 |         # for debug
346 |         print("in chrwindow_counter", readlengthmean)
347 | 
348 |         return chrwindowcount
349 | 
350 |     except KeyboardInterrupt:
351 | 
352 |         print ("You cancelled the program!")
353 | 
354 |         sys.exit(1)
355 | 


--------------------------------------------------------------------------------
/Jazzlib/Jazz.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import os
  4 | import sys
  5 | from optparse import OptionParser
  6 | import logging
  7 | from Jazzlib.FRegion import *
  8 | from Jazzlib.localmax import *
  9 | from Jazzlib.normalize_ratio import *
 10 | from Jazzlib.countreads import *
 11 | from Jazzlib.Peak import *
 12 | from Jazzlib.sta import *
 13 | from Jazzlib.jazzio import *
 14 | from Jazzlib.randombg import *
 15 | from Jazzlib.hotspotsscan import *
 16 | from Jazzlib.Hotspot import *
 17 | 
 18 | 
 19 | def main():
 20 | 
 21 |     opt = opt_check(get_optparser())
 22 | 
 23 |     if opt.controlfile == "no":
 24 | 
 25 |         nocontrol(opt)
 26 | 
 27 |     else:
 28 | 
 29 |         withcontrol(opt)
 30 | 
 31 | 
 32 | def withcontrol(opt):
 33 | 
 34 |     try:
 35 | 
 36 |         datafile = opt.datafile
 37 | 
 38 |         inputfile = opt.controlfile
 39 | 
 40 |         jobtype = opt.jobtype
 41 | 
 42 |         count_chr = opt.countchr
 43 | 
 44 |         maxinsert = opt.maxinsert
 45 | 
 46 |         nthreads = opt.nthreads
 47 | 
 48 |         bayesfactorthreshold = opt.threshold
 49 | 
 50 |         # bayesfactorthreshold = 10
 51 | 
 52 |         samplename = opt.samplename
 53 | 
 54 |         fdr = opt.fdr
 55 | 
 56 |         chipfregion = FRegion(bamfile=datafile, jobtype=jobtype, countchr=count_chr, nthreads=nthreads, maxinsert=maxinsert)
 57 | 
 58 |         inputfregion = FRegion(bamfile=inputfile, jobtype=jobtype, countchr=count_chr, nthreads=nthreads, maxinsert=maxinsert)
 59 | 
 60 |         rndth = randombg(fregion=chipfregion, nthreads=nthreads, maxinsert=maxinsert)
 61 | 
 62 |         localmax = get_all_localmax(bamfile=datafile, jobtype=jobtype, countchr=count_chr, nthreads=nthreads,
 63 |                                     maxinsert=maxinsert, fregion=chipfregion, rndth=rndth)
 64 | 
 65 |         ratio = normalize_ratio_input2(fregegion_input=inputfregion, fregion_chip=chipfregion)
 66 | 
 67 |         inputadjreads = inputfregion.totalreads - inputfregion.filterreadscount
 68 | 
 69 |         genomelength = inputfregion.countgenomelength
 70 | 
 71 |         gloablumbda = inputadjreads/genomelength
 72 | 
 73 |         windowscare = 1000000
 74 | 
 75 |         # print (ratio, inputadjreads, genomelength,gloablumbda)
 76 | 
 77 |         # peaks = peakscan_control(localmax=localmax,
 78 |         #                          datafile=datafile,
 79 |         #                          inputfile=inputfile,
 80 |         #                          maxinsert=maxinsert,
 81 |         #                          windowscare=windowscare,
 82 |         #                          gloablumbda=gloablumbda,
 83 |         #                          ratio=ratio,
 84 |         #                          bayesfactorthreshold=bayesfactorthreshold,
 85 |         #                          nthreads=nthreads,
 86 |         #                          inputfregion=inputfregion,
 87 |         #                          chipfregion=chipfregion,
 88 |         #                          jobtype=jobtype)
 89 | 
 90 |         # peakbedgraphswriter(samplename, peaks)
 91 | 
 92 |     except KeyboardInterrupt:
 93 | 
 94 |         sys.stderr.write("User interrupt\n")
 95 | 
 96 |         sys.exit(0)
 97 | 
 98 | 
 99 | def nocontrol(opt):
100 | 
101 |     try:
102 | 
103 |         datafile = opt.datafile
104 | 
105 |         jobtype = opt.jobtype
106 | 
107 |         count_chr = opt.countchr
108 | 
109 |         maxinsert = opt.maxinsert
110 | 
111 |         print ("maxinsert",maxinsert)
112 | 
113 |         nthreads = opt.nthreads
114 | 
115 |         bayesfactorthreshold = opt.threshold
116 | 
117 |         fdr = opt.fdr
118 | 
119 |         samplename = opt.samplename
120 | 
121 |         chipfregion = FRegion(bamfile=datafile, jobtype=jobtype, countchr=count_chr, nthreads=nthreads, maxinsert=maxinsert)
122 | 
123 |         # rndth = randombg2(fregion=chipfregion, nthreads=nthreads, maxinsert=maxinsert)
124 | 
125 | 
126 | 
127 |         hotspots = hotspotsscan_withoutcontrol(file=datafile, maxinsert=maxinsert, windowscare=100000, countchr=count_chr,
128 |                                                bayesfactorthreshold=bayesfactorthreshold, nthreads=nthreads,
129 |                                                fregion=chipfregion, jobtype=jobtype)
130 | 
131 |         hotspotsbedswriter(hotspots=hotspots, samplename=samplename)
132 | 
133 |         # print (rndth)
134 |         #
135 |         # localmax = get_all_localmax(bamfile=datafile, jobtype=jobtype, countchr=count_chr, nthreads=nthreads,
136 |         #                             maxinsert=maxinsert, fregion=chipfregion, rndth=rndth)
137 |         #
138 |         # ratio = 1
139 |         #
140 |         # inputadjreads = chipfregion.totalreads - chipfregion.filterreadscount
141 |         #
142 |         # genomelength = chipfregion.countgenomelength
143 |         #
144 |         # gloablumbda = inputadjreads/genomelength*maxinsert
145 |         #
146 |         # print ("gloablumbda",gloablumbda)
147 |         #
148 |         # windowscare = 1000000
149 |         #
150 |         # peaks = peakscan_withoutcontrol(localmax=localmax,
151 |         #                          file=datafile,
152 |         #                          maxinsert=maxinsert,
153 |         #                          windowscare=windowscare,
154 |         #                          gloablumbda=gloablumbda,
155 |         #                          ratio=ratio,
156 |         #                          bayesfactorthreshold=bayesfactorthreshold,
157 |         #                          nthreads=nthreads,
158 |         #                          fregion=chipfregion,
159 |         #                          jobtype=jobtype)
160 |         #
161 |         #
162 |         # peakbedgraphswriter(samplename, peaks)
163 | 
164 |     except KeyboardInterrupt:
165 | 
166 |         sys.stderr.write("User interrupt\n")
167 | 
168 |         sys.exit(0)
169 | 
170 | 
171 | def get_optparser():
172 | 
173 |     usage = """usage: %prog <-d datafile> [-n name] [options]
174 |     Example %prog -i nh_sample1.bam -n sample1
175 |     """
176 | 
177 |     description = "%prog Non-Histone protein banding site identification"
178 | 
179 |     jazzopt = OptionParser(version="%prog 0.1 20140521", description=description, usage=usage, add_help_option=False)
180 | 
181 |     jazzopt.add_option("-h", "--help", action="help", help="show this help message and exit.")
182 | 
183 |     jazzopt.add_option("-d", "--data", dest="datafile", type="string", help='data file, should be sorted bam format')
184 | 
185 |     jazzopt.add_option("-c", "--control", dest="controlfile", type="string", help='control(input) file, should be sorted bam format', default="no")
186 | 
187 |     jazzopt.add_option("-n", "--name", dest="samplename", help="NH sample name default=NH_sample", type="string" , default="DH_sample")
188 | 
189 |     jazzopt.add_option("-t", "--threshold", dest="threshold", type="float", help="peak threshold, default=6.0", default=6.0)
190 | 
191 |     jazzopt.add_option("--threads", dest="nthreads", type="int", help="threads number or cpu number, default=4", default=4)
192 | 
193 |     jazzopt.add_option("-w", "--wig", action="store_true", help="whether out put wiggle file, default=False", default=False)
194 | 
195 |     jazzopt.add_option("-f","--fdr", dest="fdr", type="float",help="using FDR as threshold", default=0.1)
196 | 
197 |     jazzopt.add_option("-x", "--excludechr", dest="excludechr", help="Don't count those chromosome, strongly suggest skip mitochondrion and chloroplast, example='-x ChrM,ChrC'")
198 | 
199 |     jazzopt.add_option("-g", "--gff", action="store_true", help="whether out put gff file, default=False", default=False)
200 | 
201 |     jazzopt.add_option("-j","--jobtype",dest="jobtype",type="string",help="job type, such as nhpaired or nhsingle")
202 | 
203 |     jazzopt.add_option("-m","--maxinsert",dest="maxinsert",type="int",help="when you use paired library, please set the maxinsert size",default=80)
204 | 
205 |     jazzopt.add_option("--pe", dest="pe", action="store_true", help="paired-end reads or single-end reads, default=False (single end)", default=False)
206 | 
207 |     return jazzopt
208 | 
209 | 
210 | def opt_check(jazzopt):
211 | 
212 |     (opt, args) = jazzopt.parse_args()
213 | 
214 |     if not opt.datafile:
215 | 
216 |         logging.error("you need input a bam file, '-d nh_sample1.bam -j nhsingle'")
217 | 
218 |         jazzopt.print_help()
219 | 
220 |         sys.exit(1)
221 | 
222 |     if not os.path.isfile (opt.datafile):
223 | 
224 |         logging.error("No such file: %s" % opt.datafile)
225 | 
226 |         sys.exit(1)
227 | 
228 |     dataindexfile = opt.datafile + '.bai'
229 | 
230 |     if not os.path.isfile (dataindexfile):
231 | 
232 |         logging.error("Missing bam index file: %s" % dataindexfile)
233 | 
234 |         sys.exit(1)
235 | 
236 |     if not opt.controlfile == "no":
237 | 
238 |         if not os.path.isfile (opt.controlfile):
239 | 
240 |             logging.error("No such file: %s" % opt.controlfile)
241 | 
242 |             sys.exit(1)
243 | 
244 |         controlindexfile = opt.controlfile + '.bai'
245 | 
246 |         if not os.path.isfile (controlindexfile):
247 | 
248 |             logging.error("Missing bam index file: %s" % controlindexfile)
249 | 
250 |             sys.exit(1)
251 | 
252 |     else:
253 | 
254 |         opt.controlfile = "no"
255 | 
256 |     if not (opt.nthreads > 0):
257 | 
258 |         logging.error("threads number should >=1")
259 | 
260 |         jazzopt.print_help()
261 | 
262 |         sys.exit(1)
263 | 
264 |     if (opt.jobtype):
265 | 
266 |         if opt.jobtype == 'nhsingle':
267 | 
268 |             if (opt.maxinsert < 0):
269 | 
270 |                 logging.error("maxinsert size error")
271 | 
272 |                 jazzopt.print_help()
273 | 
274 |                 sys.exit(1)
275 | 
276 |         elif opt.jobtype == 'nhpaired':
277 | 
278 |             if (opt.maxinsert < 0):
279 | 
280 |                 logging.error("maxinsert size error")
281 | 
282 |                 jazzopt.print_help()
283 | 
284 |                 sys.exit(1)
285 | 
286 |         else:
287 | 
288 |             logging.error("missing or wrong jobtype")
289 | 
290 |             jazzopt.print_help()
291 | 
292 |             sys.exit(1)
293 | 
294 |     else:
295 | 
296 |         logging.error("missing or wrong jobtype")
297 | 
298 |         jazzopt.print_help()
299 | 
300 |         sys.exit(1)
301 | 
302 |     opt.countchr = list()
303 | 
304 |     samfile = pysam.Samfile(opt.datafile)
305 | 
306 |     sam_ref = samfile.references
307 | 
308 |     for i in sam_ref:
309 | 
310 |         opt.countchr.append(i)
311 | 
312 |     if (opt.excludechr):
313 | 
314 |         excludchr = opt.excludechr.split(',')
315 | 
316 |         for chri in excludchr:
317 | 
318 |             if not chri in sam_ref:
319 | 
320 |                 print (chri,'not in the %s file' % opt.datafile)
321 | 
322 |                 print ("try to selcet exclude Chr from", end =" : ")
323 | 
324 |                 print (sam_ref, sep=",")
325 | 
326 |                 jazzopt.print_help()
327 | 
328 |                 sys.exit(1)
329 | 
330 |             else:
331 | 
332 |                 j = 0
333 | 
334 |                 for n in opt.countchr:
335 | 
336 |                     if chri == n:
337 | 
338 |                         del opt.countchr[j]
339 | 
340 |                     j = j + 1
341 | 
342 |     return opt
343 | 
344 | if __name__ == "__main__":
345 | 
346 |     try:
347 | 
348 |         main()
349 | 
350 |     except KeyboardInterrupt:
351 | 
352 |         sys.stderr.write("User interrupt\n")
353 | 
354 |         sys.exit(0)
355 | 
356 | 


--------------------------------------------------------------------------------
/Jazzlib/Jazz.py.bak:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | import os
  4 | import sys
  5 | from optparse import OptionParser
  6 | import logging
  7 | from Jazzlib.FRegion import *
  8 | from Jazzlib.localmax import *
  9 | from Jazzlib.normalize_ratio import *
 10 | from Jazzlib.countreads import *
 11 | from Jazzlib.Peak import *
 12 | from Jazzlib.sta import *
 13 | from Jazzlib.jazzio import *
 14 | from Jazzlib.randombg import *
 15 | from Jazzlib.hotspotsscan import *
 16 | from Jazzlib.Hotspot import *
 17 | 
 18 | 
 19 | def main():
 20 | 
 21 |     opt = opt_check(get_optparser())
 22 | 
 23 |     if opt.controlfile == "no":
 24 | 
 25 |         nocontrol(opt)
 26 | 
 27 |     else:
 28 | 
 29 |         withcontrol(opt)
 30 | 
 31 | 
 32 | def withcontrol(opt):
 33 | 
 34 |     try:
 35 | 
 36 |         datafile = opt.datafile
 37 | 
 38 |         inputfile = opt.controlfile
 39 | 
 40 |         jobtype = opt.jobtype
 41 | 
 42 |         count_chr = opt.countchr
 43 | 
 44 |         maxinsert = opt.maxinsert
 45 | 
 46 |         nthreads = opt.nthreads
 47 | 
 48 |         bayesfactorthreshold = opt.threshold
 49 | 
 50 |         # bayesfactorthreshold = 10
 51 | 
 52 |         samplename = opt.samplename
 53 | 
 54 |         fdr = opt.fdr
 55 | 
 56 |         chipfregion = FRegion(bamfile=datafile, jobtype=jobtype, countchr=count_chr, nthreads=nthreads, maxinsert=maxinsert)
 57 | 
 58 |         inputfregion = FRegion(bamfile=inputfile, jobtype=jobtype, countchr=count_chr, nthreads=nthreads, maxinsert=maxinsert)
 59 | 
 60 |         rndth = randombg(fregion=chipfregion, nthreads=nthreads, maxinsert=maxinsert)
 61 | 
 62 |         localmax = get_all_localmax(bamfile=datafile, jobtype=jobtype, countchr=count_chr, nthreads=nthreads,
 63 |                                     maxinsert=maxinsert, fregion=chipfregion, rndth=rndth)
 64 | 
 65 |         ratio = normalize_ratio_input2(fregegion_input=inputfregion, fregion_chip=chipfregion)
 66 | 
 67 |         inputadjreads = inputfregion.totalreads - inputfregion.filterreadscount
 68 | 
 69 |         genomelength = inputfregion.countgenomelength
 70 | 
 71 |         gloablumbda = inputadjreads/genomelength
 72 | 
 73 |         windowscare = 1000000
 74 | 
 75 |         # print (ratio, inputadjreads, genomelength,gloablumbda)
 76 | 
 77 |         # peaks = peakscan_control(localmax=localmax,
 78 |         #                          datafile=datafile,
 79 |         #                          inputfile=inputfile,
 80 |         #                          maxinsert=maxinsert,
 81 |         #                          windowscare=windowscare,
 82 |         #                          gloablumbda=gloablumbda,
 83 |         #                          ratio=ratio,
 84 |         #                          bayesfactorthreshold=bayesfactorthreshold,
 85 |         #                          nthreads=nthreads,
 86 |         #                          inputfregion=inputfregion,
 87 |         #                          chipfregion=chipfregion,
 88 |         #                          jobtype=jobtype)
 89 | 
 90 |         # peakbedgraphswriter(samplename, peaks)
 91 | 
 92 |     except KeyboardInterrupt:
 93 | 
 94 |         sys.stderr.write("User interrupt\n")
 95 | 
 96 |         sys.exit(0)
 97 | 
 98 | 
 99 | def nocontrol(opt):
100 | 
101 |     try:
102 | 
103 |         datafile = opt.datafile
104 | 
105 |         jobtype = opt.jobtype
106 | 
107 |         count_chr = opt.countchr
108 | 
109 |         maxinsert = opt.maxinsert
110 | 
111 |         print ("maxinsert",maxinsert)
112 | 
113 |         nthreads = opt.nthreads
114 | 
115 |         bayesfactorthreshold = opt.threshold
116 | 
117 |         fdr = opt.fdr
118 | 
119 |         samplename = opt.samplename
120 | 
121 |         chipfregion = FRegion(bamfile=datafile, jobtype=jobtype, countchr=count_chr, nthreads=nthreads, maxinsert=maxinsert)
122 | 
123 |         # rndth = randombg2(fregion=chipfregion, nthreads=nthreads, maxinsert=maxinsert)
124 | 
125 | 
126 | 
127 |         hotspots = hotspotsscan_withoutcontrol(file=datafile, maxinsert=maxinsert, windowscare=100000, countchr=count_chr,
128 |                                                bayesfactorthreshold=bayesfactorthreshold, nthreads=nthreads,
129 |                                                fregion=chipfregion, jobtype=jobtype)
130 | 
131 |         hotspotsbedswriter(hotspots=hotspots, samplename=samplename)
132 | 
133 |         # print (rndth)
134 |         #
135 |         # localmax = get_all_localmax(bamfile=datafile, jobtype=jobtype, countchr=count_chr, nthreads=nthreads,
136 |         #                             maxinsert=maxinsert, fregion=chipfregion, rndth=rndth)
137 |         #
138 |         # ratio = 1
139 |         #
140 |         # inputadjreads = chipfregion.totalreads - chipfregion.filterreadscount
141 |         #
142 |         # genomelength = chipfregion.countgenomelength
143 |         #
144 |         # gloablumbda = inputadjreads/genomelength*maxinsert
145 |         #
146 |         # print ("gloablumbda",gloablumbda)
147 |         #
148 |         # windowscare = 1000000
149 |         #
150 |         # peaks = peakscan_withoutcontrol(localmax=localmax,
151 |         #                          file=datafile,
152 |         #                          maxinsert=maxinsert,
153 |         #                          windowscare=windowscare,
154 |         #                          gloablumbda=gloablumbda,
155 |         #                          ratio=ratio,
156 |         #                          bayesfactorthreshold=bayesfactorthreshold,
157 |         #                          nthreads=nthreads,
158 |         #                          fregion=chipfregion,
159 |         #                          jobtype=jobtype)
160 |         #
161 |         #
162 |         # peakbedgraphswriter(samplename, peaks)
163 | 
164 |     except KeyboardInterrupt:
165 | 
166 |         sys.stderr.write("User interrupt\n")
167 | 
168 |         sys.exit(0)
169 | 
170 | 
171 | def get_optparser():
172 | 
173 |     usage = """usage: %prog <-d datafile> [-n name] [options]
174 |     Example %prog -i nh_sample1.bam -n sample1
175 |     """
176 | 
177 |     description = "%prog Non-Histone protein banding site identification"
178 | 
179 |     jazzopt = OptionParser(version="%prog 0.1 20140521", description=description, usage=usage, add_help_option=False)
180 | 
181 |     jazzopt.add_option("-h", "--help", action="help", help="show this help message and exit.")
182 | 
183 |     jazzopt.add_option("-d", "--data", dest="datafile", type="string", help='data file, should be sorted bam format')
184 | 
185 |     jazzopt.add_option("-c", "--control", dest="controlfile", type="string", help='control(input) file, should be sorted bam format', default="no")
186 | 
187 |     jazzopt.add_option("-n", "--name", dest="samplename", help="NH sample name default=NH_sample", type="string" , default="DH_sample")
188 | 
189 |     jazzopt.add_option("-t", "--threshold", dest="threshold", type="float", help="peak threshold, default=6.0", default=6.0)
190 | 
191 |     jazzopt.add_option("--threads", dest="nthreads", type="int", help="threads number or cpu number, default=4", default=4)
192 | 
193 |     jazzopt.add_option("-w", "--wig", action="store_true", help="whether out put wiggle file, default=False", default=False)
194 | 
195 |     jazzopt.add_option("-f","--fdr", dest="fdr", type="float",help="using FDR as threshold", default=0.1)
196 | 
197 |     jazzopt.add_option("-x", "--excludechr", dest="excludechr", help="Don't count those chromosome, strongly suggest skip mitochondrion and chloroplast, example='-x ChrM,ChrC'")
198 | 
199 |     jazzopt.add_option("-g", "--gff", action="store_true", help="whether out put gff file, default=False", default=False)
200 | 
201 |     jazzopt.add_option("-j","--jobtype",dest="jobtype",type="string",help="job type, such as nhpaired or nhsingle")
202 | 
203 |     jazzopt.add_option("-m","--maxinsert",dest="maxinsert",type="int",help="when you use paired library, please set the maxinsert size",default=80)
204 | 
205 |     jazzopt.add_option("--pe", dest="pe", action="store_true", help="paired-end reads or single-end reads, default=False (single end)", default=False)
206 | 
207 |     return jazzopt
208 | 
209 | 
210 | def opt_check(jazzopt):
211 | 
212 |     (opt, args) = jazzopt.parse_args()
213 | 
214 |     if not opt.datafile:
215 | 
216 |         logging.error("you need input a bam file, '-d nh_sample1.bam -j nhsingle'")
217 | 
218 |         jazzopt.print_help()
219 | 
220 |         sys.exit(1)
221 | 
222 |     if not os.path.isfile (opt.datafile):
223 | 
224 |         logging.error("No such file: %s" % opt.datafile)
225 | 
226 |         sys.exit(1)
227 | 
228 |     dataindexfile = opt.datafile + '.bai'
229 | 
230 |     if not os.path.isfile (dataindexfile):
231 | 
232 |         logging.error("Missing bam index file: %s" % dataindexfile)
233 | 
234 |         sys.exit(1)
235 | 
236 |     if not opt.controlfile == "no":
237 | 
238 |         if not os.path.isfile (opt.controlfile):
239 | 
240 |             logging.error("No such file: %s" % opt.controlfile)
241 | 
242 |             sys.exit(1)
243 | 
244 |         controlindexfile = opt.controlfile + '.bai'
245 | 
246 |         if not os.path.isfile (controlindexfile):
247 | 
248 |             logging.error("Missing bam index file: %s" % controlindexfile)
249 | 
250 |             sys.exit(1)
251 | 
252 |     else:
253 | 
254 |         opt.controlfile = "no"
255 | 
256 |     if not (opt.nthreads > 0):
257 | 
258 |         logging.error("threads number should >=1")
259 | 
260 |         jazzopt.print_help()
261 | 
262 |         sys.exit(1)
263 | 
264 |     if (opt.jobtype):
265 | 
266 |         if opt.jobtype == 'nhsingle':
267 | 
268 |             if (opt.maxinsert < 0):
269 | 
270 |                 logging.error("maxinsert size error")
271 | 
272 |                 jazzopt.print_help()
273 | 
274 |                 sys.exit(1)
275 | 
276 |         elif opt.jobtype == 'nhpaired':
277 | 
278 |             if (opt.maxinsert < 0):
279 | 
280 |                 logging.error("maxinsert size error")
281 | 
282 |                 jazzopt.print_help()
283 | 
284 |                 sys.exit(1)
285 | 
286 |         else:
287 | 
288 |             logging.error("missing or wrong jobtype")
289 | 
290 |             jazzopt.print_help()
291 | 
292 |             sys.exit(1)
293 | 
294 |     else:
295 | 
296 |         logging.error("missing or wrong jobtype")
297 | 
298 |         jazzopt.print_help()
299 | 
300 |         sys.exit(1)
301 | 
302 |     opt.countchr = list()
303 | 
304 |     samfile = pysam.Samfile(opt.datafile)
305 | 
306 |     sam_ref = samfile.references
307 | 
308 |     for i in sam_ref:
309 | 
310 |         opt.countchr.append(i)
311 | 
312 |     if (opt.excludechr):
313 | 
314 |         excludchr = opt.excludechr.split(',')
315 | 
316 |         for chri in excludchr:
317 | 
318 |             if not chri in sam_ref:
319 | 
320 |                 print (chri,'not in the %s file' % opt.datafile)
321 | 
322 |                 print ("try to selcet exclude Chr from", end =" : ")
323 | 
324 |                 print (sam_ref, sep=",")
325 | 
326 |                 jazzopt.print_help()
327 | 
328 |                 sys.exit(1)
329 | 
330 |             else:
331 | 
332 |                 j = 0
333 | 
334 |                 for n in opt.countchr:
335 | 
336 |                     if chri == n:
337 | 
338 |                         del opt.countchr[j]
339 | 
340 |                     j = j + 1
341 | 
342 |     return opt
343 | 
344 | if __name__ == "__main__":
345 | 
346 |     try:
347 | 
348 |         main()
349 | 
350 |     except KeyboardInterrupt:
351 | 
352 |         sys.stderr.write("User interrupt\n")
353 | 
354 |         sys.exit(0)
355 | 
356 | 


--------------------------------------------------------------------------------
/Jazzlib/bgcount.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import pysam
  4 | from numpy import *
  5 | from multiprocessing import Pool
  6 | import random as rnd
  7 | from .kernel import *
  8 | import sys
  9 | from . import readscounter
 10 | 
 11 | 
 12 | class KeyboardInterruptError(Exception):
 13 |     
 14 |     pass
 15 | 
 16 | 
 17 | def nhnoncontrol(uniqueratio, threshold, kernellength, nthreads=4):
 18 |     
 19 |     bgscore = sim_replicate_nthreads(run_times=200, uniqueratio=uniqueratio,
 20 |                                      nthreads=nthreads, kernellength=kernellength, threshold=threshold)
 21 |     
 22 |     cutoff = bgscore['mean'] + bgscore['std'] * threshold
 23 |     
 24 |     return cutoff
 25 | 
 26 | 
 27 | def dhnoncontrol(uniqueratio, threshold, kernellength, nthreads=4):
 28 |     
 29 |     bgscore = sim_replicate_nthreads(run_times=200, uniqueratio=uniqueratio,
 30 |                                      nthreads=nthreads, kernellength=kernellength, threshold=threshold)
 31 |     
 32 |     cutoff = bgscore['mean'] + bgscore['std'] * threshold
 33 | 
 34 |     return cutoff
 35 | 
 36 | 
 37 | def nhcontrol(bamfile, chromosome, paired, chrlength, ultratio, filted_region,maxinsert, kernellength = 600, threshold = 4):
 38 |     
 39 |     """
 40 |         region: chr:start-end
 41 |         ultraio = chrlength * uniqratio / chr_total_reads
 42 |         filter region
 43 |         
 44 |     """
 45 |     
 46 |     region = chromosome + ':' + str(1) + '-' + str(chrlength)
 47 |     
 48 |     readscount = readscounter.nhreadscounter(bamfile, region, paired,  maxinsert=maxinsert)
 49 |     
 50 |     kernel = smooth_kernel(kernellength)
 51 |     
 52 |     kernel_score = list()
 53 | 
 54 |     for i in sorted(kernel):
 55 |         
 56 |         kernel_score.append(kernel[i])
 57 | 
 58 |     threshold = filted_region.threshold/100
 59 | 
 60 |     for site in readscount:
 61 |         
 62 |         if readscount[site] > threshold:
 63 |             
 64 |             readscount[site] = threshold
 65 | 
 66 |     smoothed_result = correlate(array(readscount), kernel_score)
 67 | 
 68 |     ultratiolist = list()
 69 | 
 70 |     ultratiolist.append(ultratio)
 71 | 
 72 |     smoothed_result = correlate(smoothed_result, ultratiolist)
 73 | 
 74 |     #scores = list()
 75 |     
 76 |     bg_mean = smoothed_result.mean()
 77 |     
 78 |     bg_std = smoothed_result.std()
 79 |     
 80 |     bg_threshold = bg_mean + threshold * bg_std
 81 |     
 82 |     #bgscore['rand_mean'] = bg_mean
 83 |     
 84 |     #bgscore['rand_std'] = bg_std
 85 |     
 86 |     cutoff = bg_threshold
 87 | 
 88 |     return cutoff
 89 | 
 90 | 
 91 | def nhuniquerate(bamfile, chromosome, paired, fregion, regionstart=1, regionend = -1, maxinsert = 100000):
 92 |     
 93 |     samfile = pysam.Samfile(bamfile)
 94 | 
 95 |     ref_lengths = samfile.lengths
 96 | 
 97 |     sam_ref = samfile.references
 98 | 
 99 |     refere_ncenumber = samfile.nreferences
100 | 
101 |     if regionend == -1:
102 |         
103 |         for i in range(refere_ncenumber):
104 |             
105 |             if sam_ref[i] == chromosome:
106 |                 
107 |                 regionend = ref_lengths[i]
108 | 
109 |     region = chromosome + ':' + str(regionstart) + '-' + str(regionend)
110 |     
111 |     region_length = regionend - regionstart
112 |         
113 |     nhreadscount = readscounter.nhreadscounter(bamfile, region, paired, maxinsert=maxinsert)
114 | 
115 |     totaluniq = len(nhreadscount)  + 0.0
116 |     
117 |     uniquerate = totaluniq/region_length
118 |     
119 |     return uniquerate
120 | 
121 | 
122 | def dhuniquerate(bamfile, chromosome, regionstart=1, regionend=-1):
123 |     
124 |     """
125 |         Count unique Rate in a region
126 |     
127 |     """
128 |     
129 |     samfile = pysam.Samfile(bamfile)
130 | 
131 |     ref_lengths = samfile.lengths
132 | 
133 |     sam_ref = samfile.references
134 | 
135 |     refere_ncenumber = samfile.nreferences
136 | 
137 |     if regionend == -1:
138 |         
139 |         for i in range(refere_ncenumber):
140 |             
141 |             if sam_ref[i] == chromosome:
142 |                 
143 |                 regionend = ref_lengths[i]
144 | 
145 |     region = chromosome + ':' + str(regionstart) + '-' + str(regionend)
146 |     
147 |     region_length = regionend - regionstart
148 |     
149 |     dhreadscount = readscounter.dhreadscounter(bamfile, region)
150 |     
151 |     totaluniq = len(dhreadscount) + 0.0
152 |     
153 |     uniquerate = totaluniq/region_length
154 |     
155 |     return uniquerate
156 | 
157 | 
158 | def ultratio(chrlength, uniqueratio, chrtotalreads, frcount):
159 |     """
160 |     ultraio = chrlength * uniqueratio / chr_total_reads
161 |     """
162 |     ultratio = chrlength * uniqueratio / (chrtotalreads - frcount)
163 | 
164 |     return ultratio
165 | 
166 | 
167 | def sim_replicate_nthreads(run_times=1000, uniqueratio=1, kernellength = 600, threshold = 4, nthreads = 2):
168 |     # randomthresh = list()
169 |     
170 |     pars = list()
171 |     
172 |     for i in range(0,run_times):
173 |         
174 |         par=dict()
175 |         
176 |         par['uniqueratio'] = uniqueratio
177 |         
178 |         par['kernellength'] = kernellength
179 |         
180 |         par['threshold'] = threshold
181 |         
182 |         pars.append(par)
183 |     
184 |     pool=Pool(nthreads)
185 |     
186 |     outscore = dict()
187 |     
188 |     try:
189 |         randomthresh = pool.map(sim_bg_thread_worker, pars)
190 | 
191 |         summean = 0.0
192 |     
193 |         sumstd = 0.0
194 | 
195 |         for randscore in randomthresh:
196 | 
197 |             randmean = randscore['rand_mean']
198 |             
199 |             randstd = randscore['rand_std']
200 |             # print (randmean, randstd)
201 |             summean = summean + randmean
202 |             
203 |             sumstd = sumstd + randstd
204 | 
205 |         mean_of_mean = summean/run_times
206 |         
207 |         mean_of_std = sumstd/run_times
208 |         # print ('mean_of_mean',mean_of_mean, 'mean_of_std',mean_of_std)
209 | 
210 |         outscore['mean'] = mean_of_mean
211 |         
212 |         outscore['std'] = mean_of_std
213 |         #return (mean_of_mean, mean_of_std)
214 | 
215 |         pool.close()
216 |         
217 |         return outscore
218 | 
219 |     except KeyboardInterrupt:
220 |         
221 |         pool.terminate()
222 |         
223 |         print ("You cancelled the program!")
224 |         
225 |         sys.exit(1)
226 |         
227 |     except Exception as e:
228 |         
229 |         print ('got exception: %r, terminating the pool' % (e,))
230 |         
231 |         pool.terminate()
232 |         
233 |         print ('pool is terminated')
234 |         
235 |     finally:
236 |         # print ('joining pool processes')
237 |         pool.join()
238 |     # print ('join complete')
239 |     # pool.join()
240 |     # pool.close()
241 | 
242 | 
243 | def sim_bg_thread_worker(par):
244 | 
245 |     try:
246 |         
247 |         uniqueratio=par['uniqueratio']
248 |         
249 |         kernellength = par['kernellength']
250 |         
251 |         threshold = par['threshold']
252 | 
253 |         kernel = smooth_kernel(length=kernellength)
254 | 
255 |         sim_genome_size = int(1e5)
256 | 
257 |         total_reads = int(sim_genome_size * uniqueratio)
258 | 
259 |         region_site = list(range(0,sim_genome_size))
260 | 
261 |         sim_uniqsite = rnd.sample(region_site, total_reads)
262 | 
263 |         rand_reads_count = list()
264 | 
265 |         for i in range(0,sim_genome_size):
266 |             
267 |             rand_reads_count.append(0)
268 | 
269 |         kernel_score = list()
270 | 
271 |         for i in sorted(kernel):
272 |             kernel_score.append(kernel[i])
273 | 
274 | 
275 | 
276 |         kdesmooth_result = dict()
277 | 
278 |         for i in range(0,total_reads):
279 |             
280 |             rand_number = int(rnd.uniform(0,total_reads))
281 |             
282 |             rand_reads = sim_uniqsite[rand_number]
283 | 
284 |             rand_reads_count[rand_reads] = rand_reads_count[rand_reads] + 1.0
285 | 
286 |         smoothed_result = correlate(array(rand_reads_count), kernel_score)
287 | 
288 |         scores = list()
289 | 
290 |         rand_mean = smoothed_result.mean()
291 |         
292 |         rand_std = smoothed_result.std()
293 |         
294 |         total_sum = smoothed_result.sum()
295 |         
296 |         rand_threshhold = rand_mean + threshold * rand_std
297 | 
298 |         higher_count = 0
299 | 
300 |         for now_site in kdesmooth_result:
301 |             
302 |             if kdesmooth_result[now_site] > rand_threshhold:
303 |                 
304 |                 higher_count = higher_count + 1
305 | 
306 |         # print (total_sum, rand_mean, rand_std, rand_threshhold, higher_count, total_reads)
307 | 
308 |         randscore = dict()
309 |         
310 |         randscore['rand_mean'] = rand_mean
311 |         
312 |         randscore['rand_std'] = rand_std
313 | 
314 |         return randscore
315 | 
316 |     except KeyboardInterrupt:
317 |         
318 |         raise KeyboardInterruptError()
319 | 
320 | 
321 | def get_bpc(bamfile,  hotspots, jobtype, filted_region, nthreads, maxinsert = 100000):
322 | 
323 |     #bpc average readscount per basepare
324 | 
325 |     # total_reads = 0
326 |     #
327 |     total_length = 0
328 |     #
329 |     # samfile = pysam.Samfile(bamfile)
330 | 
331 |     pars = list()
332 | 
333 |     for hotspot_now in hotspots:
334 | 
335 |         par = dict()
336 | 
337 |         par['bamfile'] = bamfile
338 | 
339 |         par['hotspot'] = hotspot_now
340 | 
341 |         par['jobtype'] = jobtype
342 | 
343 |         par['filted_region'] = filted_region
344 | 
345 |         par['maxinsert'] = maxinsert
346 | 
347 |         pars.append(par)
348 | 
349 |         total_length = hotspot_now.end - hotspot_now.start + 1 + total_length
350 | 
351 | 
352 |     # print ("total length %s" % total_length)
353 | 
354 |     pool = Pool(nthreads)
355 | 
356 |     try:
357 | 
358 |         reads_count = pool.map(bpc_runner, pars)
359 | 
360 |         total_reads = 0.0
361 | 
362 |         for count_now in reads_count:
363 | 
364 |             total_reads = total_reads + count_now
365 | 
366 |         bpc = (total_reads+0.0)/total_length
367 | 
368 |         pool.close()
369 | 
370 |         return bpc
371 | 
372 |     except KeyboardInterrupt:
373 | 
374 |         pool.terminate()
375 | 
376 |         print ("You cancelled the program!")
377 | 
378 |         sys.exit(1)
379 | 
380 |     except Exception as e:
381 | 
382 |         print ('got exception: %r, terminating the pool' % (e,))
383 | 
384 |         pool.terminate()
385 | 
386 |         print ('pool is terminated')
387 | 
388 |     finally:
389 |         # print ('joining pool processes')
390 |         pool.join()
391 |         # print ('join complete')
392 | 
393 | 
394 | def bpc_runner(par):
395 | 
396 |     try:
397 | 
398 |         bamfile = par['bamfile']
399 | 
400 |         hotspot = par['hotspot']
401 | 
402 |         jobtype = par['jobtype']
403 | 
404 |         filted_region = par['filted_region']
405 | 
406 |         maxinsert = par['maxinsert']
407 | 
408 |         start_site = hotspot.start
409 | 
410 |         end_site = hotspot.end
411 | 
412 |         whether_in_fr = 0
413 | 
414 |         chromosome = hotspot.chromosome
415 | 
416 |         hotspotregio = chromosome + ':' + str(start_site) + '-' + str(end_site)
417 | 
418 |         hotspotreads = 0
419 | 
420 |         for i in range(start_site, end_site + 1):
421 | 
422 |             parentscare = int(i/100)
423 | 
424 |             if chromosome in filted_region:
425 | 
426 |                 if parentscare in filted_region[chromosome]:
427 | 
428 |                     whether_in_fr = 1
429 | 
430 |         if whether_in_fr == 0:
431 | 
432 |             readscount = dict()
433 | 
434 |             if jobtype == 'nhsingle':
435 | 
436 |                 readscount = readscounter.nhreadscounter(bamfile = bamfile, region=hotspotregio, paired=False, maxinsert=maxinsert)
437 | 
438 |             elif jobtype == 'nhpaired':
439 | 
440 |                 readscount = readscounter.nhreadscounter(bamfile = bamfile, region=hotspotregio, paired=True, maxinsert=maxinsert)
441 | 
442 |             elif jobtype == 'dh':
443 | 
444 |                 readscount = readscounter.dhreadscounter(bamfile = bamfile, region = hotspotregio)
445 | 
446 |             else:
447 | 
448 |                 print ("%s count type error!!!!" % jobtype)
449 | 
450 |                 sys.exit(1)
451 | 
452 |             for i in readscount:
453 | 
454 |                 hotspotreads = hotspotreads+readscount[i]
455 | 
456 |         # print (hotspotreads)
457 |         return hotspotreads
458 | 
459 |     except KeyboardInterrupt:
460 | 
461 |         raise KeyboardInterruptError()


--------------------------------------------------------------------------------
/Jazzlib/bgcount.py.bak:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | import pysam
  4 | from numpy import *
  5 | from multiprocessing import Pool
  6 | import random as rnd
  7 | from kernel import *
  8 | import sys
  9 | import readscounter
 10 | 
 11 | 
 12 | class KeyboardInterruptError(Exception):
 13 |     
 14 |     pass
 15 | 
 16 | 
 17 | def nhnoncontrol(uniqueratio, threshold, kernellength, nthreads=4):
 18 |     
 19 |     bgscore = sim_replicate_nthreads(run_times=200, uniqueratio=uniqueratio,
 20 |                                      nthreads=nthreads, kernellength=kernellength, threshold=threshold)
 21 |     
 22 |     cutoff = bgscore['mean'] + bgscore['std'] * threshold
 23 |     
 24 |     return cutoff
 25 | 
 26 | 
 27 | def dhnoncontrol(uniqueratio, threshold, kernellength, nthreads=4):
 28 |     
 29 |     bgscore = sim_replicate_nthreads(run_times=200, uniqueratio=uniqueratio,
 30 |                                      nthreads=nthreads, kernellength=kernellength, threshold=threshold)
 31 |     
 32 |     cutoff = bgscore['mean'] + bgscore['std'] * threshold
 33 | 
 34 |     return cutoff
 35 | 
 36 | 
 37 | def nhcontrol(bamfile, chromosome, paired, chrlength, ultratio, filted_region,maxinsert, kernellength = 600, threshold = 4):
 38 |     
 39 |     """
 40 |         region: chr:start-end
 41 |         ultraio = chrlength * uniqratio / chr_total_reads
 42 |         filter region
 43 |         
 44 |     """
 45 |     
 46 |     region = chromosome + ':' + str(1) + '-' + str(chrlength)
 47 |     
 48 |     readscount = readscounter.nhreadscounter(bamfile, region, paired,  maxinsert=maxinsert)
 49 |     
 50 |     kernel = smooth_kernel(kernellength)
 51 |     
 52 |     kernel_score = list()
 53 | 
 54 |     for i in sorted(kernel):
 55 |         
 56 |         kernel_score.append(kernel[i])
 57 | 
 58 |     threshold = filted_region.threshold/100
 59 | 
 60 |     for site in readscount:
 61 |         
 62 |         if readscount[site] > threshold:
 63 |             
 64 |             readscount[site] = threshold
 65 | 
 66 |     smoothed_result = correlate(array(readscount), kernel_score)
 67 | 
 68 |     ultratiolist = list()
 69 | 
 70 |     ultratiolist.append(ultratio)
 71 | 
 72 |     smoothed_result = correlate(smoothed_result, ultratiolist)
 73 | 
 74 |     #scores = list()
 75 |     
 76 |     bg_mean = smoothed_result.mean()
 77 |     
 78 |     bg_std = smoothed_result.std()
 79 |     
 80 |     bg_threshold = bg_mean + threshold * bg_std
 81 |     
 82 |     #bgscore['rand_mean'] = bg_mean
 83 |     
 84 |     #bgscore['rand_std'] = bg_std
 85 |     
 86 |     cutoff = bg_threshold
 87 | 
 88 |     return cutoff
 89 | 
 90 | 
 91 | def nhuniquerate(bamfile, chromosome, paired, fregion, regionstart=1, regionend = -1, maxinsert = 100000):
 92 |     
 93 |     samfile = pysam.Samfile(bamfile)
 94 | 
 95 |     ref_lengths = samfile.lengths
 96 | 
 97 |     sam_ref = samfile.references
 98 | 
 99 |     refere_ncenumber = samfile.nreferences
100 | 
101 |     if regionend == -1:
102 |         
103 |         for i in range(refere_ncenumber):
104 |             
105 |             if sam_ref[i] == chromosome:
106 |                 
107 |                 regionend = ref_lengths[i]
108 | 
109 |     region = chromosome + ':' + str(regionstart) + '-' + str(regionend)
110 |     
111 |     region_length = regionend - regionstart
112 |         
113 |     nhreadscount = readscounter.nhreadscounter(bamfile, region, paired, maxinsert=maxinsert)
114 | 
115 |     totaluniq = len(nhreadscount)  + 0.0
116 |     
117 |     uniquerate = totaluniq/region_length
118 |     
119 |     return uniquerate
120 | 
121 | 
122 | def dhuniquerate(bamfile, chromosome, regionstart=1, regionend=-1):
123 |     
124 |     """
125 |         Count unique Rate in a region
126 |     
127 |     """
128 |     
129 |     samfile = pysam.Samfile(bamfile)
130 | 
131 |     ref_lengths = samfile.lengths
132 | 
133 |     sam_ref = samfile.references
134 | 
135 |     refere_ncenumber = samfile.nreferences
136 | 
137 |     if regionend == -1:
138 |         
139 |         for i in range(refere_ncenumber):
140 |             
141 |             if sam_ref[i] == chromosome:
142 |                 
143 |                 regionend = ref_lengths[i]
144 | 
145 |     region = chromosome + ':' + str(regionstart) + '-' + str(regionend)
146 |     
147 |     region_length = regionend - regionstart
148 |     
149 |     dhreadscount = readscounter.dhreadscounter(bamfile, region)
150 |     
151 |     totaluniq = len(dhreadscount) + 0.0
152 |     
153 |     uniquerate = totaluniq/region_length
154 |     
155 |     return uniquerate
156 | 
157 | 
158 | def ultratio(chrlength, uniqueratio, chrtotalreads, frcount):
159 |     """
160 |     ultraio = chrlength * uniqueratio / chr_total_reads
161 |     """
162 |     ultratio = chrlength * uniqueratio / (chrtotalreads - frcount)
163 | 
164 |     return ultratio
165 | 
166 | 
167 | def sim_replicate_nthreads(run_times=1000, uniqueratio=1, kernellength = 600, threshold = 4, nthreads = 2):
168 |     # randomthresh = list()
169 |     
170 |     pars = list()
171 |     
172 |     for i in range(0,run_times):
173 |         
174 |         par=dict()
175 |         
176 |         par['uniqueratio'] = uniqueratio
177 |         
178 |         par['kernellength'] = kernellength
179 |         
180 |         par['threshold'] = threshold
181 |         
182 |         pars.append(par)
183 |     
184 |     pool=Pool(nthreads)
185 |     
186 |     outscore = dict()
187 |     
188 |     try:
189 |         randomthresh = pool.map(sim_bg_thread_worker, pars)
190 | 
191 |         summean = 0.0
192 |     
193 |         sumstd = 0.0
194 | 
195 |         for randscore in randomthresh:
196 | 
197 |             randmean = randscore['rand_mean']
198 |             
199 |             randstd = randscore['rand_std']
200 |             # print (randmean, randstd)
201 |             summean = summean + randmean
202 |             
203 |             sumstd = sumstd + randstd
204 | 
205 |         mean_of_mean = summean/run_times
206 |         
207 |         mean_of_std = sumstd/run_times
208 |         # print ('mean_of_mean',mean_of_mean, 'mean_of_std',mean_of_std)
209 | 
210 |         outscore['mean'] = mean_of_mean
211 |         
212 |         outscore['std'] = mean_of_std
213 |         #return (mean_of_mean, mean_of_std)
214 | 
215 |         pool.close()
216 |         
217 |         return outscore
218 | 
219 |     except KeyboardInterrupt:
220 |         
221 |         pool.terminate()
222 |         
223 |         print ("You cancelled the program!")
224 |         
225 |         sys.exit(1)
226 |         
227 |     except Exception, e:
228 |         
229 |         print ('got exception: %r, terminating the pool' % (e,))
230 |         
231 |         pool.terminate()
232 |         
233 |         print ('pool is terminated')
234 |         
235 |     finally:
236 |         # print ('joining pool processes')
237 |         pool.join()
238 |     # print ('join complete')
239 |     # pool.join()
240 |     # pool.close()
241 | 
242 | 
243 | def sim_bg_thread_worker(par):
244 | 
245 |     try:
246 |         
247 |         uniqueratio=par['uniqueratio']
248 |         
249 |         kernellength = par['kernellength']
250 |         
251 |         threshold = par['threshold']
252 | 
253 |         kernel = smooth_kernel(length=kernellength)
254 | 
255 |         sim_genome_size = int(1e5)
256 | 
257 |         total_reads = int(sim_genome_size * uniqueratio)
258 | 
259 |         region_site = range(0,sim_genome_size)
260 | 
261 |         sim_uniqsite = rnd.sample(region_site, total_reads)
262 | 
263 |         rand_reads_count = list()
264 | 
265 |         for i in range(0,sim_genome_size):
266 |             
267 |             rand_reads_count.append(0)
268 | 
269 |         kernel_score = list()
270 | 
271 |         for i in sorted(kernel):
272 |             kernel_score.append(kernel[i])
273 | 
274 | 
275 | 
276 |         kdesmooth_result = dict()
277 | 
278 |         for i in range(0,total_reads):
279 |             
280 |             rand_number = int(rnd.uniform(0,total_reads))
281 |             
282 |             rand_reads = sim_uniqsite[rand_number]
283 | 
284 |             rand_reads_count[rand_reads] = rand_reads_count[rand_reads] + 1.0
285 | 
286 |         smoothed_result = correlate(array(rand_reads_count), kernel_score)
287 | 
288 |         scores = list()
289 | 
290 |         rand_mean = smoothed_result.mean()
291 |         
292 |         rand_std = smoothed_result.std()
293 |         
294 |         total_sum = smoothed_result.sum()
295 |         
296 |         rand_threshhold = rand_mean + threshold * rand_std
297 | 
298 |         higher_count = 0
299 | 
300 |         for now_site in kdesmooth_result:
301 |             
302 |             if kdesmooth_result[now_site] > rand_threshhold:
303 |                 
304 |                 higher_count = higher_count + 1
305 | 
306 |         # print (total_sum, rand_mean, rand_std, rand_threshhold, higher_count, total_reads)
307 | 
308 |         randscore = dict()
309 |         
310 |         randscore['rand_mean'] = rand_mean
311 |         
312 |         randscore['rand_std'] = rand_std
313 | 
314 |         return randscore
315 | 
316 |     except KeyboardInterrupt:
317 |         
318 |         raise KeyboardInterruptError()
319 | 
320 | 
321 | def get_bpc(bamfile,  hotspots, jobtype, filted_region, nthreads, maxinsert = 100000):
322 | 
323 |     #bpc average readscount per basepare
324 | 
325 |     # total_reads = 0
326 |     #
327 |     total_length = 0
328 |     #
329 |     # samfile = pysam.Samfile(bamfile)
330 | 
331 |     pars = list()
332 | 
333 |     for hotspot_now in hotspots:
334 | 
335 |         par = dict()
336 | 
337 |         par['bamfile'] = bamfile
338 | 
339 |         par['hotspot'] = hotspot_now
340 | 
341 |         par['jobtype'] = jobtype
342 | 
343 |         par['filted_region'] = filted_region
344 | 
345 |         par['maxinsert'] = maxinsert
346 | 
347 |         pars.append(par)
348 | 
349 |         total_length = hotspot_now.end - hotspot_now.start + 1 + total_length
350 | 
351 | 
352 |     # print ("total length %s" % total_length)
353 | 
354 |     pool = Pool(nthreads)
355 | 
356 |     try:
357 | 
358 |         reads_count = pool.map(bpc_runner, pars)
359 | 
360 |         total_reads = 0.0
361 | 
362 |         for count_now in reads_count:
363 | 
364 |             total_reads = total_reads + count_now
365 | 
366 |         bpc = (total_reads+0.0)/total_length
367 | 
368 |         pool.close()
369 | 
370 |         return bpc
371 | 
372 |     except KeyboardInterrupt:
373 | 
374 |         pool.terminate()
375 | 
376 |         print ("You cancelled the program!")
377 | 
378 |         sys.exit(1)
379 | 
380 |     except Exception, e:
381 | 
382 |         print ('got exception: %r, terminating the pool' % (e,))
383 | 
384 |         pool.terminate()
385 | 
386 |         print ('pool is terminated')
387 | 
388 |     finally:
389 |         # print ('joining pool processes')
390 |         pool.join()
391 |         # print ('join complete')
392 | 
393 | 
394 | def bpc_runner(par):
395 | 
396 |     try:
397 | 
398 |         bamfile = par['bamfile']
399 | 
400 |         hotspot = par['hotspot']
401 | 
402 |         jobtype = par['jobtype']
403 | 
404 |         filted_region = par['filted_region']
405 | 
406 |         maxinsert = par['maxinsert']
407 | 
408 |         start_site = hotspot.start
409 | 
410 |         end_site = hotspot.end
411 | 
412 |         whether_in_fr = 0
413 | 
414 |         chromosome = hotspot.chromosome
415 | 
416 |         hotspotregio = chromosome + ':' + str(start_site) + '-' + str(end_site)
417 | 
418 |         hotspotreads = 0
419 | 
420 |         for i in range(start_site, end_site + 1):
421 | 
422 |             parentscare = int(i/100)
423 | 
424 |             if chromosome in filted_region:
425 | 
426 |                 if parentscare in filted_region[chromosome]:
427 | 
428 |                     whether_in_fr = 1
429 | 
430 |         if whether_in_fr == 0:
431 | 
432 |             readscount = dict()
433 | 
434 |             if jobtype == 'nhsingle':
435 | 
436 |                 readscount = readscounter.nhreadscounter(bamfile = bamfile, region=hotspotregio, paired=False, maxinsert=maxinsert)
437 | 
438 |             elif jobtype == 'nhpaired':
439 | 
440 |                 readscount = readscounter.nhreadscounter(bamfile = bamfile, region=hotspotregio, paired=True, maxinsert=maxinsert)
441 | 
442 |             elif jobtype == 'dh':
443 | 
444 |                 readscount = readscounter.dhreadscounter(bamfile = bamfile, region = hotspotregio)
445 | 
446 |             else:
447 | 
448 |                 print ("%s count type error!!!!" % jobtype)
449 | 
450 |                 sys.exit(1)
451 | 
452 |             for i in readscount:
453 | 
454 |                 hotspotreads = hotspotreads+readscount[i]
455 | 
456 |         # print (hotspotreads)
457 |         return hotspotreads
458 | 
459 |     except KeyboardInterrupt:
460 | 
461 |         raise KeyboardInterruptError()


--------------------------------------------------------------------------------
/Jazz.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import os
  4 | import sys
  5 | from optparse import OptionParser
  6 | import logging
  7 | from Jazzlib.FRegion import *
  8 | from Jazzlib.localmax import *
  9 | from Jazzlib.normalize_ratio import *
 10 | from Jazzlib.countreads import *
 11 | from Jazzlib.Peak import *
 12 | from Jazzlib.sta import *
 13 | from Jazzlib.jazzio import *
 14 | from Jazzlib.randombg import *
 15 | from Jazzlib.hotspotsscan import *
 16 | from Jazzlib.Hotspot import *
 17 | from Jazzlib.peaksscan import *
 18 | 
 19 | def main():
 20 | 
 21 |     opt = opt_check(get_optparser())
 22 | 
 23 |     if opt.controlfile == "no":
 24 | 
 25 |         nocontrol(opt)
 26 | 
 27 |     else:
 28 | 
 29 |         withcontrol(opt)
 30 | 
 31 | 
 32 | def withcontrol(opt):
 33 | 
 34 |     try:
 35 | 
 36 |         datafile = opt.datafile
 37 | 
 38 |         inputfile = opt.controlfile
 39 | 
 40 |         jobtype = opt.jobtype
 41 | 
 42 |         count_chr = opt.countchr
 43 | 
 44 |         maxinsert = opt.maxinsert
 45 | 
 46 |         nthreads = opt.nthreads
 47 | 
 48 |         bayesfactorthreshold = opt.threshold
 49 | 
 50 |         # bayesfactorthreshold = 10
 51 | 
 52 |         samplename = opt.samplename
 53 | 
 54 |         fdr = opt.fdr
 55 | 
 56 | 
 57 |         chipfregion = FRegion(bamfile=datafile, jobtype=jobtype, countchr=count_chr, nthreads=nthreads, maxinsert=maxinsert)
 58 | 
 59 |         inputfregion = FRegion(bamfile=inputfile, jobtype=jobtype, countchr=count_chr, nthreads=nthreads, maxinsert=maxinsert)
 60 | 
 61 |         ratio = normalize_ratio_input2(fregegion_input=inputfregion, fregion_chip=chipfregion)
 62 | 
 63 |         if opt.genomesize:
 64 | 
 65 |             print("###chipfregion.adjreads * chipfregion.readlengthmean/chipfregion.countgenomelength,",
 66 |                   chipfregion.adjreads, chipfregion.readlengthmean, opt.genomesize)
 67 | 
 68 |             gloablumbda = chipfregion.adjreads * chipfregion.readlengthmean / opt.genomesize
 69 | 
 70 |         else:
 71 | 
 72 |             print("###inputfregion.adjreads,inputfregion.readlengthmean,inputfregion.countgenomelength",
 73 |                   inputfregion.adjreads , inputfregion.readlengthmean,inputfregion.countgenomelength)
 74 | 
 75 |             gloablumbda = inputfregion.adjreads * inputfregion.readlengthmean/inputfregion.countgenomelength
 76 | 
 77 |         windowscare=100000
 78 | 
 79 |         hotspots = hotspotsscan_withcontrol(chipfile=datafile,maxinsert=maxinsert, windowscare=windowscare,
 80 |                                             countchr=count_chr, inputgloablumbda=gloablumbda,
 81 |                                             bayesfactorthreshold=bayesfactorthreshold, nthreads=nthreads,
 82 |                                             chipfregion=chipfregion, jobtype=jobtype, ratio=ratio, inputfile=inputfile,
 83 |                                             inputfregion=inputfregion)
 84 | 
 85 |         peaks = peakscan_control(datafile=datafile,maxinsert=maxinsert, bayesfactorthreshold=bayesfactorthreshold,
 86 |                                  nthreads=nthreads,chipfregion=chipfregion, jobtype=jobtype, hotspots=hotspots,
 87 |                                  gloablumbda=gloablumbda,inputfile=inputfile,ratio=ratio,inputfregion=inputfregion)
 88 | 
 89 |         if opt.hotonly:
 90 | 
 91 |             hotspotsbedswriter(hotspots=hotspots, samplename=samplename)
 92 | 
 93 |         else:
 94 | 
 95 |             hotspotsenrich = hotspotsfilter(hotspots=hotspots, peaks=peaks)
 96 | 
 97 |             hotspotsbedswriter(hotspots=hotspotsenrich, samplename=samplename)
 98 | 
 99 |             peakbedswriter(samplename=samplename,peaks=peaks)
100 | 
101 |             jazzgffout(samplename=samplename, hotspots=hotspotsenrich, peaks=peaks, fregion=chipfregion)
102 | 
103 |     except KeyboardInterrupt:
104 | 
105 |         sys.stderr.write("User interrupt\n")
106 | 
107 |         sys.exit(0)
108 | 
109 | 
110 | def nocontrol(opt):
111 | 
112 |     try:
113 | 
114 |         datafile = opt.datafile
115 | 
116 |         jobtype = opt.jobtype
117 | 
118 |         count_chr = opt.countchr
119 | 
120 |         maxinsert = opt.maxinsert
121 | 
122 |         print ("maxinsert",maxinsert)
123 | 
124 |         nthreads = opt.nthreads
125 | 
126 |         bayesfactorthreshold = opt.threshold
127 | 
128 |         samplename = opt.samplename
129 | 
130 |         chipfregion = FRegion(bamfile=datafile, jobtype=jobtype, countchr=count_chr, nthreads=nthreads,
131 |                               maxinsert=maxinsert)
132 | 
133 |         if opt.genomesize:
134 | 
135 |             print("###chipfregion.adjreads * chipfregion.readlengthmean/chipfregion.countgenomelength,", chipfregion.adjreads, chipfregion.readlengthmean,opt.genomesize)
136 | 
137 |             gloablumbda = chipfregion.adjreads * chipfregion.readlengthmean / opt.genomesize
138 | 
139 |         else:
140 | 
141 |             print("###chipfregion.adjreads * chipfregion.readlengthmean/chipfregion.countgenomelength,", chipfregion.adjreads, chipfregion.readlengthmean,chipfregion.countgenomelength)
142 | 
143 |             gloablumbda = chipfregion.adjreads * chipfregion.readlengthmean/chipfregion.countgenomelength
144 | 
145 |         windowscare=100000
146 | 
147 |         for fregions in chipfregion.filted_region:
148 | 
149 |             print (fregions)
150 | 
151 |         hotspots = hotspotsscan_withoutcontrol(file=datafile, maxinsert=maxinsert, windowscare=windowscare, countchr=count_chr,
152 |                                                bayesfactorthreshold=bayesfactorthreshold, nthreads=nthreads,
153 |                                                fregion=chipfregion, jobtype=jobtype, gloablumbda=gloablumbda)
154 | 
155 |         peaks = peakscan_without_control(datafile=datafile,maxinsert=maxinsert,
156 |                                          bayesfactorthreshold=bayesfactorthreshold, nthreads=nthreads,
157 |                                          fregion=chipfregion,jobtype=jobtype,
158 |                                          hotspots=hotspots, gloablumbda=gloablumbda)
159 | 
160 |         if opt.hotonly:
161 | 
162 |             hotspotsbedswriter(hotspots=hotspots, samplename=samplename)
163 | 
164 |         else:
165 | 
166 |             hotspotsenrich = hotspotsfilter(hotspots=hotspots, peaks=peaks)
167 | 
168 |             hotspotsbedswriter(hotspots=hotspotsenrich, samplename=samplename)
169 | 
170 |             peakbedswriter(samplename=samplename,peaks=peaks)
171 | 
172 |             jazzgffout(samplename=samplename, hotspots=hotspotsenrich, peaks=peaks, fregion=chipfregion)
173 | 
174 |     except KeyboardInterrupt:
175 | 
176 |         sys.stderr.write("User interrupt\n")
177 | 
178 |         sys.exit(0)
179 | 
180 | 
181 | def get_optparser():
182 | 
183 |     usage = """usage: %prog <-d datafile> [-n name] [options]
184 |     Example %prog -i nh_sample1.bam -n sample1
185 |     """
186 | 
187 |     description = "%prog Non-Histone protein banding site identification"
188 | 
189 |     jazzopt = OptionParser(version="%prog 0.1 20140521", description=description, usage=usage, add_help_option=False)
190 | 
191 |     jazzopt.add_option("-h", "--help", action="help", help="show this help message and exit.")
192 | 
193 |     jazzopt.add_option("-d", "--data", dest="datafile", type="string", help='data file, should be sorted bam format')
194 | 
195 |     jazzopt.add_option("-c", "--control", dest="controlfile", type="string", help='control(input) file, should be sorted bam format', default="no")
196 | 
197 |     jazzopt.add_option("-n", "--name", dest="samplename", help="NH sample name default=NH_sample", type="string" , default="DH_sample")
198 | 
199 |     jazzopt.add_option("-t", "--threshold", dest="threshold", type="float", help="peak threshold, default=6.0", default=6.0)
200 | 
201 |     jazzopt.add_option("--threads", dest="nthreads", type="int", help="threads number or cpu number, default=4", default=4)
202 | 
203 |     jazzopt.add_option("-w", "--wig", action="store_true", help="whether out put wiggle file, default=False", default=False)
204 | 
205 |     jazzopt.add_option("-f","--fdr", dest="fdr", type="float",help="using FDR as threshold", default=0.1)
206 | 
207 |     jazzopt.add_option("-x", "--excludechr", dest="excludechr", help="Don't count those chromosome, strongly suggest skip mitochondrion and chloroplast, example='-x ChrM,ChrC'")
208 | 
209 |     jazzopt.add_option("-g", "--gff", action="store_true", help="whether out put gff file, default=False", default=False)
210 | 
211 |     jazzopt.add_option("-j","--jobtype",dest="jobtype",type="string",help="job type, such as nhpaired or nhsingle")
212 | 
213 |     jazzopt.add_option("-m","--maxinsert",dest="maxinsert",type="int",help="when you use paired library, please set the maxinsert size",default=130)
214 | 
215 |     jazzopt.add_option("--pe", dest="pe", action="store_true", help="paired-end reads or single-end reads, default=False (single end)", default=False)
216 | 
217 |     jazzopt.add_option("--genomesize", dest="genomesize", type="int",
218 |                        help="Set genome size", default=False)
219 | 
220 |     jazzopt.add_option("--hotonly", dest="hotonly", action="store_true", default=False, help="calculate hotsports only.")
221 | 
222 |     return jazzopt
223 | 
224 | 
225 | def opt_check(jazzopt):
226 | 
227 |     (opt, args) = jazzopt.parse_args()
228 | 
229 |     if not opt.datafile:
230 | 
231 |         logging.error("you need input a bam file, '-d nh_sample1.bam -j nhsingle'")
232 | 
233 |         jazzopt.print_help()
234 | 
235 |         sys.exit(1)
236 | 
237 |     if not os.path.isfile (opt.datafile):
238 | 
239 |         logging.error("No such file: %s" % opt.datafile)
240 | 
241 |         sys.exit(1)
242 | 
243 |     dataindexfile1 = opt.datafile + '.bai'
244 | 
245 |     dataindexfile2 = opt.datafile + '.csi'
246 | 
247 |     if not (os.path.isfile(dataindexfile1) or os.path.isfile(dataindexfile2)):
248 | 
249 |         logging.error("Missing bam index file: %s or %s" % (dataindexfile1, dataindexfile2))
250 | 
251 |         sys.exit(1)
252 | 
253 |     if not opt.controlfile == "no":
254 | 
255 |         if not os.path.isfile (opt.controlfile):
256 | 
257 |             logging.error("No such file: %s" % opt.controlfile)
258 | 
259 |             sys.exit(1)
260 | 
261 |         controlindexfile1 = opt.controlfile + '.bai'
262 | 
263 |         controlindexfile2 = opt.controlfile + '.csi'
264 | 
265 |         if not (os.path.isfile(controlindexfile1) or os.path.isfile(controlindexfile2)):
266 | 
267 |             logging.error("Missing bam index file: %s or %s" % (controlindexfile1, controlindexfile2))
268 | 
269 |             sys.exit(1)
270 | 
271 |     else:
272 | 
273 |         opt.controlfile = "no"
274 | 
275 |     if not (opt.nthreads > 0):
276 | 
277 |         logging.error("threads number should >=1")
278 | 
279 |         jazzopt.print_help()
280 | 
281 |         sys.exit(1)
282 | 
283 |     if (opt.jobtype):
284 | 
285 |         if opt.jobtype == 'nhsingle':
286 | 
287 |             if (opt.maxinsert < 0):
288 | 
289 |                 logging.error("maxinsert size error")
290 | 
291 |                 jazzopt.print_help()
292 | 
293 |                 sys.exit(1)
294 | 
295 |         elif opt.jobtype == 'nhpaired':
296 | 
297 |             if (opt.maxinsert < 0):
298 | 
299 |                 logging.error("maxinsert size error")
300 | 
301 |                 jazzopt.print_help()
302 | 
303 |                 sys.exit(1)
304 | 
305 |         else:
306 | 
307 |             logging.error("missing or wrong jobtype")
308 | 
309 |             jazzopt.print_help()
310 | 
311 |             sys.exit(1)
312 | 
313 |     else:
314 | 
315 |         logging.error("missing or wrong jobtype")
316 | 
317 |         jazzopt.print_help()
318 | 
319 |         sys.exit(1)
320 | 
321 |     opt.countchr = list()
322 | 
323 |     samfile = pysam.Samfile(opt.datafile)
324 | 
325 |     sam_ref = samfile.references
326 | 
327 |     for i in sam_ref:
328 | 
329 |         opt.countchr.append(i)
330 | 
331 |     if (opt.excludechr):
332 | 
333 |         excludchr = opt.excludechr.split(',')
334 | 
335 |         for chri in excludchr:
336 | 
337 |             if not chri in sam_ref:
338 | 
339 |                 print (chri,'not in the %s file' % opt.datafile)
340 | 
341 |                 print ("try to selcet exclude Chr from", end =" : ")
342 | 
343 |                 print (sam_ref, sep=",")
344 | 
345 |                 jazzopt.print_help()
346 | 
347 |                 sys.exit(1)
348 | 
349 |             else:
350 | 
351 |                 j = 0
352 | 
353 |                 for n in opt.countchr:
354 | 
355 |                     if chri == n:
356 | 
357 |                         del opt.countchr[j]
358 | 
359 |                     j = j + 1
360 | 
361 |     return opt
362 | 
363 | if __name__ == "__main__":
364 | 
365 |     try:
366 | 
367 |         main()
368 | 
369 |     except KeyboardInterrupt:
370 | 
371 |         sys.stderr.write("User interrupt\n")
372 | 
373 |         sys.exit(0)
374 | 
375 | 


--------------------------------------------------------------------------------
/Jazzlib/hotspotsscan.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | from .countreads import *
  4 | from .cEM_zip import *
  5 | from .FRegion import *
  6 | from multiprocessing import Pool
  7 | from .Hotspot import *
  8 | from .sta import *
  9 | from .region import *
 10 | from .Peak import *
 11 | 
 12 | 
 13 | class KeyboardInterruptError(Exception):
 14 | 
 15 |     pass
 16 | 
 17 | 
 18 | def hotspotsscan_withoutcontrol(file, maxinsert, windowscare,countchr,gloablumbda,
 19 |                                 bayesfactorthreshold, nthreads, fregion, jobtype):
 20 | 
 21 |     pool = Pool(nthreads)
 22 | 
 23 |     try:
 24 | 
 25 |         pars = list()
 26 | 
 27 |         hotspots = list()
 28 | 
 29 |         print ("gloablumbda",gloablumbda , "readlengthmean", fregion.readlengthmean)
 30 | 
 31 |         bayesfactorthresholdcount = 2
 32 | 
 33 |         i = 2
 34 | 
 35 |         while True:
 36 | 
 37 |             nowbayesfactor = bayesfactor(gloablumbda, i)
 38 | 
 39 |             if nowbayesfactor > bayesfactorthreshold:
 40 | 
 41 |                 break
 42 | 
 43 |             bayesfactorthresholdcount = i
 44 | 
 45 |             i = i + 1
 46 | 
 47 |         print ("bayesfactorthresholdcount", bayesfactorthresholdcount)
 48 | 
 49 |         windowsize = 100000
 50 | 
 51 |         for chromosmoe in countchr:
 52 | 
 53 |             chr_length = fregion.chrs_length[chromosmoe]
 54 | 
 55 |             for scare in range(0, int(chr_length/windowsize)+1):
 56 | 
 57 |                 nowstart = scare*windowsize + 1 -200
 58 | 
 59 |                 nowend = (scare+1)*windowsize + 200
 60 | 
 61 |                 if nowend > chr_length:
 62 | 
 63 |                     nowend = chr_length
 64 | 
 65 |                 if nowstart < 1:
 66 | 
 67 |                     nowstart = 1
 68 | 
 69 |                 nowregion = chromosmoe + ":" + str(nowstart) + "-" + str(nowend)
 70 | 
 71 |                 par = dict()
 72 | 
 73 |                 par['region'] = nowregion
 74 | 
 75 |                 par['maxinsert'] = maxinsert
 76 | 
 77 |                 par['bamfile'] = file
 78 | 
 79 |                 par['jobtype'] = jobtype
 80 | 
 81 |                 par['chrlength'] = chr_length
 82 | 
 83 |                 par['regionchromosome'] = chromosmoe
 84 | 
 85 |                 par['regionstart'] = nowstart
 86 | 
 87 |                 par['regionend'] = nowend
 88 | 
 89 |                 # par['bayesfactordic'] = bayesfactordic
 90 | 
 91 |                 par['bayesfactorcount'] = bayesfactorthresholdcount
 92 | 
 93 |                 par['readlengthmean'] = fregion.readlengthmean
 94 | 
 95 |                 pars.append(par)
 96 | 
 97 |         enrichedinthreads = pool.map(hotspot_withoutcontrol_worker, pars)
 98 | 
 99 |         chrenrichedpotin = dict()
100 | 
101 |         for enrichedinthread in enrichedinthreads:
102 | 
103 |             nowchr = enrichedinthread['chromosome']
104 | 
105 |             if nowchr in chrenrichedpotin:
106 | 
107 |                 chrenrichedpotin[nowchr].append(enrichedinthread['list'])
108 | 
109 |             else:
110 | 
111 |                 chrenrichedpotin[nowchr] = list()
112 | 
113 |                 chrenrichedpotin[nowchr].append(enrichedinthread['list'])
114 | 
115 |         chrhotpars = list()
116 | 
117 |         for nowchr in chrenrichedpotin:
118 | 
119 |             hotpar = dict()
120 | 
121 |             hotpar['chromosome'] = nowchr
122 | 
123 |             hotpar['preregion'] = chrenrichedpotin[nowchr]
124 | 
125 |             hotpar['chr_length'] = fregion.chrs_length[chromosmoe]
126 | 
127 |             hotpar['fregion'] = fregion
128 | 
129 |             chrhotpars.append(hotpar)
130 | 
131 |         hotsptosinthreads = pool.map(hotspots_chromsome_merge,chrhotpars)
132 | 
133 |         for hotinth in hotsptosinthreads:
134 | 
135 |             for hotspotnow in hotinth:
136 | 
137 |                 hotspots.append(hotspotnow)
138 | 
139 |         pool.close()
140 | 
141 |         return hotspots
142 | 
143 |     except KeyboardInterrupt:
144 | 
145 |         pool.terminate()
146 | 
147 |         print ("You cancelled the program!")
148 | 
149 |         sys.exit(1)
150 | 
151 |     except Exception as e:
152 | 
153 |         print ('got exception in Jazzlib.hotspotsscan.hotspotsscan_withoutcontrol: %r, terminating the pool' % (e,))
154 | 
155 |         pool.terminate()
156 | 
157 |         print ('pool is terminated')
158 | 
159 |     finally:
160 | 
161 |         pool.join()
162 | 
163 | 
164 | def hotspot_withoutcontrol_worker(par):
165 | 
166 |     try:
167 | 
168 |         maxinsert = par['maxinsert']
169 | 
170 |         bamfile = par['bamfile']
171 | 
172 |         jobtype = par['jobtype']
173 | 
174 |         chromosome = par['regionchromosome']
175 | 
176 |         nowstart = par['regionstart']
177 | 
178 |         nowend = par['regionend']
179 | 
180 |         bayesfactorcount = par['bayesfactorcount']
181 | 
182 |         readlengthmean = par['readlengthmean']
183 | 
184 |         datacount = extenddepthcount(bamfile=bamfile, regionchromosome=chromosome, regionstart=nowstart,
185 |                                      regionend=nowend, maxinsert=maxinsert, jobtype=jobtype,
186 |                                      readlengthmean=readlengthmean)
187 | 
188 |         enrichedlist = dict()
189 | 
190 |         enrichedlist['chromosome'] = chromosome
191 | 
192 |         enrichedlist['list'] = list()
193 | 
194 |         for site in datacount:
195 | 
196 |             if datacount[site] >= bayesfactorcount:
197 | 
198 |                 enrichedlist['list'].append(site)
199 | 
200 |         return enrichedlist
201 | 
202 |     except Exception as e:
203 | 
204 |         print ('got exception in Jazzlib.hotspotsscan.hotspot_withoutcontrol_worker: %r, terminating the pool' % (e,))
205 | 
206 |         print ('pool is terminated')
207 | 
208 |     except KeyboardInterrupt:
209 | 
210 |          print ("You cancelled the program!")
211 | 
212 |          sys.exit(1)
213 | 
214 | 
215 | def hotspotsscan_withcontrol(chipfile, maxinsert, windowscare,countchr,inputgloablumbda,
216 |                              bayesfactorthreshold, nthreads, chipfregion, jobtype, ratio,
217 |                              inputfile, inputfregion):
218 | 
219 |     pool = Pool(nthreads)
220 | 
221 |     try:
222 | 
223 |         pars = list()
224 | 
225 |         hotspots = list()
226 | 
227 |         print ("gloablumbda",inputgloablumbda , "readlengthmean", inputfregion.readlengthmean)
228 | 
229 |         bayesfactorthresholdcount = 2
230 | 
231 |         i = 2
232 | 
233 |         while True:
234 | 
235 |             nowbayesfactor = bayesfactor(inputgloablumbda, i)
236 | 
237 |             if nowbayesfactor > bayesfactorthreshold:
238 | 
239 |                 break
240 | 
241 |             bayesfactorthresholdcount = i
242 | 
243 |             i = i + 1
244 | 
245 |         print ("bayesfactorthresholdcount", bayesfactorthresholdcount)
246 | 
247 |         windowsize = 100000
248 | 
249 |         for chromosmoe in countchr:
250 | 
251 |             chr_length = chipfregion.chrs_length[chromosmoe]
252 | 
253 |             for scare in range(0, int(chr_length/windowsize)+1):
254 | 
255 |                 nowstart = scare*windowsize + 1 -200
256 | 
257 |                 nowend = (scare+1)*windowsize + 200
258 | 
259 |                 if nowend > chr_length:
260 | 
261 |                     nowend = chr_length
262 | 
263 |                 if nowstart < 1:
264 | 
265 |                     nowstart = 1
266 | 
267 |                 nowregion = chromosmoe + ":" + str(nowstart) + "-" + str(nowend)
268 | 
269 |                 par = dict()
270 | 
271 |                 par['region'] = nowregion
272 | 
273 |                 par['maxinsert'] = maxinsert
274 | 
275 |                 par['bamfile'] = chipfile
276 | 
277 |                 par['jobtype'] = jobtype
278 | 
279 |                 par['chrlength'] = chr_length
280 | 
281 |                 par['regionchromosome'] = chromosmoe
282 | 
283 |                 par['regionstart'] = nowstart
284 | 
285 |                 par['regionend'] = nowend
286 | 
287 |                 par['ratio'] = ratio
288 | 
289 |                 # par['bayesfactordic'] = bayesfactordic
290 | 
291 |                 par['bayesfactorcount'] = bayesfactorthresholdcount
292 | 
293 |                 par['readlengthmean'] = chipfregion.readlengthmean
294 | 
295 |                 pars.append(par)
296 | 
297 |         enrichedinthreads = pool.map(hotspot_control_worker, pars)
298 | 
299 |         chrenrichedpotin = dict()
300 | 
301 |         for enrichedinthread in enrichedinthreads:
302 | 
303 |             nowchr = enrichedinthread['chromosome']
304 | 
305 |             if nowchr in chrenrichedpotin:
306 | 
307 |                 chrenrichedpotin[nowchr].append(enrichedinthread['list'])
308 | 
309 |             else:
310 | 
311 |                 chrenrichedpotin[nowchr] = list()
312 | 
313 |                 chrenrichedpotin[nowchr].append(enrichedinthread['list'])
314 | 
315 |         chrhotpars = list()
316 | 
317 |         for nowchr in chrenrichedpotin:
318 | 
319 |             hotpar = dict()
320 | 
321 |             hotpar['chromosome'] = nowchr
322 | 
323 |             hotpar['preregion'] = chrenrichedpotin[nowchr]
324 | 
325 |             hotpar['chr_length'] = chipfregion.chrs_length[chromosmoe]
326 | 
327 |             hotpar['fregion'] = chipfregion
328 | 
329 |             chrhotpars.append(hotpar)
330 | 
331 |         hotsptosinthreads = pool.map(hotspots_chromsome_merge, chrhotpars)
332 | 
333 |         for hotinth in hotsptosinthreads:
334 | 
335 |             for hotspotnow in hotinth:
336 | 
337 |                 hotspots.append(hotspotnow)
338 | 
339 |         pool.close()
340 | 
341 |         pool.close()
342 | 
343 |         return hotspots
344 | 
345 |     except KeyboardInterrupt:
346 | 
347 |         pool.terminate()
348 | 
349 |         print ("You cancelled the program!")
350 | 
351 |         sys.exit(1)
352 | 
353 |     except Exception as e:
354 | 
355 |         print ('got exception in Jazzlib.hotspotsscan.hotspotsscan_withcontrol: %r, terminating the pool' % (e,))
356 | 
357 |         pool.terminate()
358 | 
359 |         print ('pool is terminated')
360 | 
361 |     finally:
362 | 
363 |         pool.join()
364 | 
365 | 
366 | def hotspot_control_worker(par):
367 | 
368 |     try:
369 | 
370 |         maxinsert = par['maxinsert']
371 | 
372 |         bamfile = par['bamfile']
373 | 
374 |         jobtype = par['jobtype']
375 | 
376 |         chromosome = par['regionchromosome']
377 | 
378 |         nowstart = par['regionstart']
379 | 
380 |         nowend = par['regionend']
381 | 
382 |         bayesfactorcount = par['bayesfactorcount']
383 | 
384 |         readlengthmean = par['readlengthmean']
385 | 
386 |         ratio = par['ratio']
387 | 
388 |         datacount = extenddepthcount(bamfile=bamfile, regionchromosome=chromosome, regionstart=nowstart,
389 |                                      regionend=nowend, maxinsert=maxinsert, jobtype=jobtype,
390 |                                      readlengthmean=readlengthmean)
391 | 
392 |         enrichedlist = dict()
393 | 
394 |         enrichedlist['chromosome'] = chromosome
395 | 
396 |         enrichedlist['list'] = list()
397 | 
398 |         for site in datacount:
399 | 
400 |             if datacount[site]*ratio >= bayesfactorcount:
401 | 
402 |                 enrichedlist['list'].append(site)
403 | 
404 |         return enrichedlist
405 | 
406 |     except Exception as e:
407 | 
408 |         print ('got exception in Jazzlib.hotspotsscan.hotspot_withoutcontrol_worker: %r, terminating the pool' % (e,))
409 | 
410 |         print ('pool is terminated')
411 | 
412 |     except KeyboardInterrupt:
413 | 
414 |          print ("You cancelled the program!")
415 | 
416 |          sys.exit(1)
417 | 
418 | 
419 | 
420 | def hotspotsfilter(hotspots, peaks):
421 | 
422 |     peaksparent = dict()
423 | 
424 |     for peak in peaks:
425 | 
426 |         if peak.parent not in peaksparent:
427 | 
428 |             peaksparent[peak.parent] = 1
429 | 
430 |     hotspotreturen = list()
431 | 
432 |     for hotspot in hotspots:
433 | 
434 |         if hotspot.hotspotid in peaksparent:
435 | 
436 |             hotspotreturen.append(hotspot)
437 | 
438 |     return hotspotreturen
439 | 
440 | 
441 | def hotspots_chromsome_merge(par):
442 | 
443 |     try:
444 | 
445 |         chromosome = par['chromosome']
446 | 
447 |         preregion = par['preregion']
448 | 
449 |         chr_length = par['chr_length']
450 | 
451 |         fregion = par['fregion']
452 | 
453 |         hotspotslist = list()
454 | 
455 |         enrichedpotin = dict()
456 | 
457 |         for regionpoint in preregion:
458 | 
459 |             for nowsite in regionpoint:
460 | 
461 |                 if not nowsite in enrichedpotin:
462 | 
463 |                     enrichedpotin[nowsite] = 1
464 | 
465 |         chrenrichlist = list(enrichedpotin.keys())
466 | 
467 |         temphotspots = continueregion(chrenrichlist, 2)
468 | 
469 |         for hotspotstarend in temphotspots:
470 | 
471 |             hotspotstart = hotspotstarend['start_site']
472 | 
473 |             hotspotend = hotspotstarend['end_site']
474 | 
475 |             if hotspotend-hotspotstart < fregion.readlengthmean/2:
476 | 
477 |                 continue
478 | 
479 |             hotspotid = str(chromosome) + ":" + str(hotspotstart) +"-"+ str(hotspotend)
480 | 
481 |             hotspot = Hotspot(start=hotspotstart, end=hotspotend, chromosome=chromosome, hotspotid=hotspotid)
482 | 
483 |             hotspotslist.append(hotspot)
484 | 
485 |         return hotspotslist
486 | 
487 |     except Exception as e:
488 | 
489 |         print ('got exception in Jazzlib.hotspotsscan.hotspots_chromsome_merge: %r, terminating the pool' % (e,))
490 | 
491 |         print (par)
492 | 
493 |         print ('pool is terminated')
494 | 
495 |     except KeyboardInterrupt:
496 | 
497 |          print ("You cancelled the program!")
498 | 
499 |          sys.exit(1)


--------------------------------------------------------------------------------
/Jazzlib/hotspotsscan.py.bak:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from countreads import *
  4 | from cEM_zip import *
  5 | from FRegion import *
  6 | from multiprocessing import Pool
  7 | from Hotspot import *
  8 | from sta import *
  9 | from region import *
 10 | from Peak import *
 11 | 
 12 | 
 13 | class KeyboardInterruptError(Exception):
 14 | 
 15 |     pass
 16 | 
 17 | 
 18 | def hotspotsscan_withoutcontrol(file, maxinsert, windowscare,countchr,gloablumbda,
 19 |                                 bayesfactorthreshold, nthreads, fregion, jobtype):
 20 | 
 21 |     pool = Pool(nthreads)
 22 | 
 23 |     try:
 24 | 
 25 |         pars = list()
 26 | 
 27 |         hotspots = list()
 28 | 
 29 |         print ("gloablumbda",gloablumbda , "readlengthmean", fregion.readlengthmean)
 30 | 
 31 |         bayesfactorthresholdcount = 2
 32 | 
 33 |         i = 2
 34 | 
 35 |         while True:
 36 | 
 37 |             nowbayesfactor = bayesfactor(gloablumbda, i)
 38 | 
 39 |             if nowbayesfactor > bayesfactorthreshold:
 40 | 
 41 |                 break
 42 | 
 43 |             bayesfactorthresholdcount = i
 44 | 
 45 |             i = i + 1
 46 | 
 47 |         print ("bayesfactorthresholdcount", bayesfactorthresholdcount)
 48 | 
 49 |         windowsize = 100000
 50 | 
 51 |         for chromosmoe in countchr:
 52 | 
 53 |             chr_length = fregion.chrs_length[chromosmoe]
 54 | 
 55 |             for scare in range(0, int(chr_length/windowsize)+1):
 56 | 
 57 |                 nowstart = scare*windowsize + 1 -200
 58 | 
 59 |                 nowend = (scare+1)*windowsize + 200
 60 | 
 61 |                 if nowend > chr_length:
 62 | 
 63 |                     nowend = chr_length
 64 | 
 65 |                 if nowstart < 1:
 66 | 
 67 |                     nowstart = 1
 68 | 
 69 |                 nowregion = chromosmoe + ":" + str(nowstart) + "-" + str(nowend)
 70 | 
 71 |                 par = dict()
 72 | 
 73 |                 par['region'] = nowregion
 74 | 
 75 |                 par['maxinsert'] = maxinsert
 76 | 
 77 |                 par['bamfile'] = file
 78 | 
 79 |                 par['jobtype'] = jobtype
 80 | 
 81 |                 par['chrlength'] = chr_length
 82 | 
 83 |                 par['regionchromosome'] = chromosmoe
 84 | 
 85 |                 par['regionstart'] = nowstart
 86 | 
 87 |                 par['regionend'] = nowend
 88 | 
 89 |                 # par['bayesfactordic'] = bayesfactordic
 90 | 
 91 |                 par['bayesfactorcount'] = bayesfactorthresholdcount
 92 | 
 93 |                 par['readlengthmean'] = fregion.readlengthmean
 94 | 
 95 |                 pars.append(par)
 96 | 
 97 |         enrichedinthreads = pool.map(hotspot_withoutcontrol_worker, pars)
 98 | 
 99 |         chrenrichedpotin = dict()
100 | 
101 |         for enrichedinthread in enrichedinthreads:
102 | 
103 |             nowchr = enrichedinthread['chromosome']
104 | 
105 |             if nowchr in chrenrichedpotin:
106 | 
107 |                 chrenrichedpotin[nowchr].append(enrichedinthread['list'])
108 | 
109 |             else:
110 | 
111 |                 chrenrichedpotin[nowchr] = list()
112 | 
113 |                 chrenrichedpotin[nowchr].append(enrichedinthread['list'])
114 | 
115 |         chrhotpars = list()
116 | 
117 |         for nowchr in chrenrichedpotin:
118 | 
119 |             hotpar = dict()
120 | 
121 |             hotpar['chromosome'] = nowchr
122 | 
123 |             hotpar['preregion'] = chrenrichedpotin[nowchr]
124 | 
125 |             hotpar['chr_length'] = fregion.chrs_length[chromosmoe]
126 | 
127 |             hotpar['fregion'] = fregion
128 | 
129 |             chrhotpars.append(hotpar)
130 | 
131 |         hotsptosinthreads = pool.map(hotspots_chromsome_merge,chrhotpars)
132 | 
133 |         for hotinth in hotsptosinthreads:
134 | 
135 |             for hotspotnow in hotinth:
136 | 
137 |                 hotspots.append(hotspotnow)
138 | 
139 |         pool.close()
140 | 
141 |         return hotspots
142 | 
143 |     except KeyboardInterrupt:
144 | 
145 |         pool.terminate()
146 | 
147 |         print ("You cancelled the program!")
148 | 
149 |         sys.exit(1)
150 | 
151 |     except Exception, e:
152 | 
153 |         print ('got exception in Jazzlib.hotspotsscan.hotspotsscan_withoutcontrol: %r, terminating the pool' % (e,))
154 | 
155 |         pool.terminate()
156 | 
157 |         print ('pool is terminated')
158 | 
159 |     finally:
160 | 
161 |         pool.join()
162 | 
163 | 
164 | def hotspot_withoutcontrol_worker(par):
165 | 
166 |     try:
167 | 
168 |         maxinsert = par['maxinsert']
169 | 
170 |         bamfile = par['bamfile']
171 | 
172 |         jobtype = par['jobtype']
173 | 
174 |         chromosome = par['regionchromosome']
175 | 
176 |         nowstart = par['regionstart']
177 | 
178 |         nowend = par['regionend']
179 | 
180 |         bayesfactorcount = par['bayesfactorcount']
181 | 
182 |         readlengthmean = par['readlengthmean']
183 | 
184 |         datacount = extenddepthcount(bamfile=bamfile, regionchromosome=chromosome, regionstart=nowstart,
185 |                                      regionend=nowend, maxinsert=maxinsert, jobtype=jobtype,
186 |                                      readlengthmean=readlengthmean)
187 | 
188 |         enrichedlist = dict()
189 | 
190 |         enrichedlist['chromosome'] = chromosome
191 | 
192 |         enrichedlist['list'] = list()
193 | 
194 |         for site in datacount:
195 | 
196 |             if datacount[site] >= bayesfactorcount:
197 | 
198 |                 enrichedlist['list'].append(site)
199 | 
200 |         return enrichedlist
201 | 
202 |     except Exception, e:
203 | 
204 |         print ('got exception in Jazzlib.hotspotsscan.hotspot_withoutcontrol_worker: %r, terminating the pool' % (e,))
205 | 
206 |         print ('pool is terminated')
207 | 
208 |     except KeyboardInterrupt:
209 | 
210 |          print ("You cancelled the program!")
211 | 
212 |          sys.exit(1)
213 | 
214 | 
215 | def hotspotsscan_withcontrol(chipfile, maxinsert, windowscare,countchr,inputgloablumbda,
216 |                              bayesfactorthreshold, nthreads, chipfregion, jobtype, ratio,
217 |                              inputfile, inputfregion):
218 | 
219 |     pool = Pool(nthreads)
220 | 
221 |     try:
222 | 
223 |         pars = list()
224 | 
225 |         hotspots = list()
226 | 
227 |         print ("gloablumbda",inputgloablumbda , "readlengthmean", inputfregion.readlengthmean)
228 | 
229 |         bayesfactorthresholdcount = 2
230 | 
231 |         i = 2
232 | 
233 |         while True:
234 | 
235 |             nowbayesfactor = bayesfactor(inputgloablumbda, i)
236 | 
237 |             if nowbayesfactor > bayesfactorthreshold:
238 | 
239 |                 break
240 | 
241 |             bayesfactorthresholdcount = i
242 | 
243 |             i = i + 1
244 | 
245 |         print ("bayesfactorthresholdcount", bayesfactorthresholdcount)
246 | 
247 |         windowsize = 100000
248 | 
249 |         for chromosmoe in countchr:
250 | 
251 |             chr_length = chipfregion.chrs_length[chromosmoe]
252 | 
253 |             for scare in range(0, int(chr_length/windowsize)+1):
254 | 
255 |                 nowstart = scare*windowsize + 1 -200
256 | 
257 |                 nowend = (scare+1)*windowsize + 200
258 | 
259 |                 if nowend > chr_length:
260 | 
261 |                     nowend = chr_length
262 | 
263 |                 if nowstart < 1:
264 | 
265 |                     nowstart = 1
266 | 
267 |                 nowregion = chromosmoe + ":" + str(nowstart) + "-" + str(nowend)
268 | 
269 |                 par = dict()
270 | 
271 |                 par['region'] = nowregion
272 | 
273 |                 par['maxinsert'] = maxinsert
274 | 
275 |                 par['bamfile'] = chipfile
276 | 
277 |                 par['jobtype'] = jobtype
278 | 
279 |                 par['chrlength'] = chr_length
280 | 
281 |                 par['regionchromosome'] = chromosmoe
282 | 
283 |                 par['regionstart'] = nowstart
284 | 
285 |                 par['regionend'] = nowend
286 | 
287 |                 par['ratio'] = ratio
288 | 
289 |                 # par['bayesfactordic'] = bayesfactordic
290 | 
291 |                 par['bayesfactorcount'] = bayesfactorthresholdcount
292 | 
293 |                 par['readlengthmean'] = chipfregion.readlengthmean
294 | 
295 |                 pars.append(par)
296 | 
297 |         enrichedinthreads = pool.map(hotspot_control_worker, pars)
298 | 
299 |         chrenrichedpotin = dict()
300 | 
301 |         for enrichedinthread in enrichedinthreads:
302 | 
303 |             nowchr = enrichedinthread['chromosome']
304 | 
305 |             if nowchr in chrenrichedpotin:
306 | 
307 |                 chrenrichedpotin[nowchr].append(enrichedinthread['list'])
308 | 
309 |             else:
310 | 
311 |                 chrenrichedpotin[nowchr] = list()
312 | 
313 |                 chrenrichedpotin[nowchr].append(enrichedinthread['list'])
314 | 
315 |         chrhotpars = list()
316 | 
317 |         for nowchr in chrenrichedpotin:
318 | 
319 |             hotpar = dict()
320 | 
321 |             hotpar['chromosome'] = nowchr
322 | 
323 |             hotpar['preregion'] = chrenrichedpotin[nowchr]
324 | 
325 |             hotpar['chr_length'] = chipfregion.chrs_length[chromosmoe]
326 | 
327 |             hotpar['fregion'] = chipfregion
328 | 
329 |             chrhotpars.append(hotpar)
330 | 
331 |         hotsptosinthreads = pool.map(hotspots_chromsome_merge, chrhotpars)
332 | 
333 |         for hotinth in hotsptosinthreads:
334 | 
335 |             for hotspotnow in hotinth:
336 | 
337 |                 hotspots.append(hotspotnow)
338 | 
339 |         pool.close()
340 | 
341 |         pool.close()
342 | 
343 |         return hotspots
344 | 
345 |     except KeyboardInterrupt:
346 | 
347 |         pool.terminate()
348 | 
349 |         print ("You cancelled the program!")
350 | 
351 |         sys.exit(1)
352 | 
353 |     except Exception, e:
354 | 
355 |         print ('got exception in Jazzlib.hotspotsscan.hotspotsscan_withcontrol: %r, terminating the pool' % (e,))
356 | 
357 |         pool.terminate()
358 | 
359 |         print ('pool is terminated')
360 | 
361 |     finally:
362 | 
363 |         pool.join()
364 | 
365 | 
366 | def hotspot_control_worker(par):
367 | 
368 |     try:
369 | 
370 |         maxinsert = par['maxinsert']
371 | 
372 |         bamfile = par['bamfile']
373 | 
374 |         jobtype = par['jobtype']
375 | 
376 |         chromosome = par['regionchromosome']
377 | 
378 |         nowstart = par['regionstart']
379 | 
380 |         nowend = par['regionend']
381 | 
382 |         bayesfactorcount = par['bayesfactorcount']
383 | 
384 |         readlengthmean = par['readlengthmean']
385 | 
386 |         ratio = par['ratio']
387 | 
388 |         datacount = extenddepthcount(bamfile=bamfile, regionchromosome=chromosome, regionstart=nowstart,
389 |                                      regionend=nowend, maxinsert=maxinsert, jobtype=jobtype,
390 |                                      readlengthmean=readlengthmean)
391 | 
392 |         enrichedlist = dict()
393 | 
394 |         enrichedlist['chromosome'] = chromosome
395 | 
396 |         enrichedlist['list'] = list()
397 | 
398 |         for site in datacount:
399 | 
400 |             if datacount[site]*ratio >= bayesfactorcount:
401 | 
402 |                 enrichedlist['list'].append(site)
403 | 
404 |         return enrichedlist
405 | 
406 |     except Exception, e:
407 | 
408 |         print ('got exception in Jazzlib.hotspotsscan.hotspot_withoutcontrol_worker: %r, terminating the pool' % (e,))
409 | 
410 |         print ('pool is terminated')
411 | 
412 |     except KeyboardInterrupt:
413 | 
414 |          print ("You cancelled the program!")
415 | 
416 |          sys.exit(1)
417 | 
418 | 
419 | 
420 | def hotspotsfilter(hotspots, peaks):
421 | 
422 |     peaksparent = dict()
423 | 
424 |     for peak in peaks:
425 | 
426 |         if peak.parent not in peaksparent:
427 | 
428 |             peaksparent[peak.parent] = 1
429 | 
430 |     hotspotreturen = list()
431 | 
432 |     for hotspot in hotspots:
433 | 
434 |         if hotspot.hotspotid in peaksparent:
435 | 
436 |             hotspotreturen.append(hotspot)
437 | 
438 |     return hotspotreturen
439 | 
440 | 
441 | def hotspots_chromsome_merge(par):
442 | 
443 |     try:
444 | 
445 |         chromosome = par['chromosome']
446 | 
447 |         preregion = par['preregion']
448 | 
449 |         chr_length = par['chr_length']
450 | 
451 |         fregion = par['fregion']
452 | 
453 |         hotspotslist = list()
454 | 
455 |         enrichedpotin = dict()
456 | 
457 |         for regionpoint in preregion:
458 | 
459 |             for nowsite in regionpoint:
460 | 
461 |                 if not nowsite in enrichedpotin:
462 | 
463 |                     enrichedpotin[nowsite] = 1
464 | 
465 |         chrenrichlist = enrichedpotin.keys()
466 | 
467 |         temphotspots = continueregion(chrenrichlist, 2)
468 | 
469 |         for hotspotstarend in temphotspots:
470 | 
471 |             hotspotstart = hotspotstarend['start_site']
472 | 
473 |             hotspotend = hotspotstarend['end_site']
474 | 
475 |             if hotspotend-hotspotstart < fregion.readlengthmean/2:
476 | 
477 |                 continue
478 | 
479 |             hotspotid = str(chromosome) + ":" + str(hotspotstart) +"-"+ str(hotspotend)
480 | 
481 |             hotspot = Hotspot(start=hotspotstart, end=hotspotend, chromosome=chromosome, hotspotid=hotspotid)
482 | 
483 |             hotspotslist.append(hotspot)
484 | 
485 |         return hotspotslist
486 | 
487 |     except Exception, e:
488 | 
489 |         print ('got exception in Jazzlib.hotspotsscan.hotspots_chromsome_merge: %r, terminating the pool' % (e,))
490 | 
491 |         print (par)
492 | 
493 |         print ('pool is terminated')
494 | 
495 |     except KeyboardInterrupt:
496 | 
497 |          print ("You cancelled the program!")
498 | 
499 |          sys.exit(1)


--------------------------------------------------------------------------------
/Jazzlib/peaksscan.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from .countreads import *
  3 | from .cEM_zip import *
  4 | from .FRegion import *
  5 | from multiprocessing import Pool
  6 | from .Peak import *
  7 | from .sta import *
  8 | from .region import *
  9 | from .Hotspot import *
 10 | 
 11 | 
 12 | class KeyboardInterruptError(Exception):
 13 | 
 14 |     pass
 15 | 
 16 | 
 17 | def peakscan_without_control(datafile, maxinsert, bayesfactorthreshold, nthreads, fregion,
 18 |                              jobtype, hotspots, gloablumbda):
 19 | 
 20 |     pool = Pool(nthreads)
 21 | 
 22 |     try:
 23 | 
 24 |         pars = list()
 25 | 
 26 |         for hotspot in hotspots:
 27 | 
 28 |             par = dict()
 29 | 
 30 |             par['hotspot'] = hotspot
 31 | 
 32 |             par['datafile'] = datafile
 33 | 
 34 |             par['maxinsert'] = maxinsert
 35 | 
 36 |             par['bayesfactorthreashold'] = bayesfactorthreshold
 37 | 
 38 |             par['jobtype'] = jobtype
 39 | 
 40 |             par['gloablumbda'] = gloablumbda
 41 | 
 42 |             par['ratio'] = 1
 43 | 
 44 |             par['fregion'] = fregion
 45 | 
 46 |             pars.append(par)
 47 | 
 48 |         peaksinthreads = pool.map(peakscan_withoutcontrol_worker, pars)
 49 | 
 50 |         peaks = list()
 51 | 
 52 |         for hotspotnow in peaksinthreads:
 53 | 
 54 |             for peaknow in hotspotnow:
 55 | 
 56 |                 print((peaknow.peakid))
 57 | 
 58 |                 peaks.append(peaknow)
 59 | 
 60 |         pool.close()
 61 | 
 62 |         return peaks
 63 | 
 64 |     except KeyboardInterrupt:
 65 | 
 66 |         pool.terminate()
 67 | 
 68 |         print ("You cancelled the program!")
 69 | 
 70 |         sys.exit(1)
 71 | 
 72 |     except Exception as e:
 73 | 
 74 |         print(('got exception in Jazzlib.peaksscan.peakscan_without_control: %r, terminating the pool' % (e,)))
 75 | 
 76 |         pool.terminate()
 77 | 
 78 |         print ('pool is terminated')
 79 | 
 80 |     finally:
 81 |         #     print ('joining pool processes')
 82 |         pool.join()
 83 |             # print ('join complete')
 84 | 
 85 | 
 86 | def peakscan_withoutcontrol_worker(par):
 87 | 
 88 |     try:
 89 |         peaks = list()
 90 | 
 91 |         hotspot = par['hotspot']
 92 | 
 93 |         datafile = par['datafile']
 94 | 
 95 |         maxinsert = par['maxinsert']
 96 | 
 97 |         bayesfactorthreshold = par['bayesfactorthreashold']
 98 | 
 99 |         jobtype = par['jobtype']
100 | 
101 |         gloablumbda = par['gloablumbda']
102 | 
103 |         ratio = par['ratio']
104 | 
105 |         fregion = par['fregion']
106 | 
107 |         start = hotspot.start
108 | 
109 |         end = hotspot.end
110 | 
111 |         chromosome = hotspot.chromosome
112 | 
113 |         chrlength = fregion.chrs_length[chromosome]
114 | 
115 |         regionstart = start - 5100
116 | 
117 |         regionend = end + 5100
118 | 
119 |         if regionstart < 1:
120 | 
121 |             regionstart = 1
122 | 
123 |         if regionend > chrlength:
124 | 
125 |             regionend = chrlength
126 | 
127 |         datacount = depthcount(bamfile=datafile, regionchromosome=chromosome, regionstart=regionstart,
128 |                                 regionend=regionend, maxinsert=maxinsert, jobtype=jobtype)
129 | 
130 |         # datacount = midsitecount(bamfile=datafile, regionchromosome=chromosome, regionstart=regionstart,
131 |         #                         regionend=regionend, maxinsert=maxinsert, jobtype=jobtype)
132 | 
133 |         enrichedsite = dict()
134 | 
135 |         bayesfactorscore = dict()
136 | 
137 |         inputwindow5k = list()
138 | 
139 |         inputwindow10k = list()
140 | 
141 |         for sitenow in range(start-5000,end+5000):
142 | 
143 |             nowcount = 0
144 | 
145 |             if sitenow < 0:
146 | 
147 |                 continue
148 | 
149 |             if sitenow > chrlength:
150 | 
151 |                 continue
152 | 
153 |             if sitenow in datacount:
154 | 
155 |                 nowcount = datacount[sitenow]
156 | 
157 |             inputwindow10k.append(nowcount)
158 | 
159 |         for sitenow in range(start-2500,end+2500):
160 | 
161 |             nowcount = 0
162 | 
163 |             if sitenow < 0:
164 | 
165 |                 continue
166 | 
167 |             if sitenow > chrlength:
168 | 
169 |                 continue
170 | 
171 |             if sitenow in datacount:
172 | 
173 |                 nowcount = datacount[sitenow]
174 | 
175 |             inputwindow5k.append(nowcount)
176 | 
177 | 
178 |         (window5klhat, window5kphat) = cEM_zip(inputwindow5k)
179 | 
180 |         (window10klhat, window10kphat) = cEM_zip(inputwindow10k)
181 | 
182 |         maxlhat = max(window5klhat, window10klhat, gloablumbda)
183 | 
184 |         if maxlhat > 400:
185 | 
186 |             maxlhat = gloablumbda * 5
187 | 
188 |         for wsite in range(start-1, end+1):
189 | 
190 |             if wsite in datacount:
191 | 
192 |                 nowcount = datacount[wsite]
193 | 
194 |                 if nowcount < 2:
195 | 
196 |                     nowbayesfactor = bayesfactor(locallambda=maxlhat, peakscore=2)
197 | 
198 |                 else:
199 | 
200 |                     nowbayesfactor = bayesfactor(locallambda=maxlhat, peakscore=nowcount)
201 | 
202 |                 bayesfactorscore[wsite] = nowbayesfactor
203 | 
204 |                 if nowbayesfactor > bayesfactorthreshold:
205 | 
206 |                     enrichedsite[wsite] = 1
207 | 
208 |         regionlist = list(enrichedsite.keys())
209 | 
210 |         tmppeaks = continueregion(points=regionlist, minlength=1)
211 | 
212 |         iniid = 1
213 | 
214 |         for tmppeak in tmppeaks:
215 | 
216 |             tmppeakstart = tmppeak['start_site']
217 | 
218 |             tmppeakend = tmppeak['end_site']
219 | 
220 |             totalbayesscore = 0
221 | 
222 |             maxscore = 0
223 | 
224 |             maxsite = 0
225 | 
226 |             for site in range(tmppeakstart, tmppeakend+1):
227 | 
228 |                 score = bayesfactorscore[site]
229 | 
230 |                 totalbayesscore = totalbayesscore + score
231 | 
232 |                 if score > maxscore:
233 | 
234 |                     score = maxscore
235 | 
236 |                     maxsite = site
237 | 
238 |             avgbayescore = totalbayesscore/(tmppeakend - tmppeakstart + 1)
239 | 
240 |             peakid = hotspot.hotspotid+'.'+str(iniid)
241 | 
242 |             peak = Peak(start=tmppeakstart, end=tmppeakend, chromosome=chromosome, peakpoint=maxsite, peakid=peakid,
243 |                         score=avgbayescore, parent=hotspot.hotspotid)
244 | 
245 |             iniid= iniid +1
246 | 
247 |             peaks.append(peak)
248 | 
249 |         return peaks
250 | 
251 |     except Exception as e:
252 | 
253 |         print(('got exception in Jazzlib.peaksscan.peakscan_without_control: %r, terminating the pool' % (e,)))
254 | 
255 |         print((par['hotspot'].chromosome, par['hotspot'].start,par['hotspot'].end))
256 | 
257 | 
258 |     except KeyboardInterrupt:
259 | 
260 |         print ("You cancelled the program!")
261 | 
262 |         sys.exit(1)
263 | 
264 | 
265 | 
266 | 
267 | def peakscan_control(datafile, maxinsert, bayesfactorthreshold, nthreads, chipfregion,
268 |                      jobtype, hotspots, gloablumbda, inputfile, ratio, inputfregion):
269 | 
270 |     pool = Pool(nthreads)
271 | 
272 |     try:
273 | 
274 |         pars = list()
275 | 
276 |         for hotspot in hotspots:
277 | 
278 |             par = dict()
279 | 
280 |             par['hotspot'] = hotspot
281 | 
282 |             par['datafile'] = datafile
283 | 
284 |             par['maxinsert'] = maxinsert
285 | 
286 |             par['bayesfactorthreashold'] = bayesfactorthreshold
287 | 
288 |             par['jobtype'] = jobtype
289 | 
290 |             par['gloablumbda'] = gloablumbda
291 | 
292 |             par['ratio'] = ratio
293 | 
294 |             par['inputfile'] = inputfile
295 | 
296 |             par['fregion'] = inputfregion
297 | 
298 |             pars.append(par)
299 | 
300 |         peaksinthreads = pool.map(peakscan_withoutcontrol_worker, pars)
301 | 
302 |         peaks = list()
303 | 
304 |         for hotspotnow in peaksinthreads:
305 | 
306 |             for peaknow in hotspotnow:
307 | 
308 |                 print((peaknow.peakid))
309 | 
310 |                 peaks.append(peaknow)
311 | 
312 |         pool.close()
313 | 
314 |         return peaks
315 | 
316 |     except KeyboardInterrupt:
317 | 
318 |         pool.terminate()
319 | 
320 |         print ("You cancelled the program!")
321 | 
322 |         sys.exit(1)
323 | 
324 |     except Exception as e:
325 | 
326 |         print(('got exception in Jazzlib.peaksscan.peakscan_without_control: %r, terminating the pool' % (e,)))
327 | 
328 |         pool.terminate()
329 | 
330 |         print ('pool is terminated')
331 | 
332 |     finally:
333 |         #     print ('joining pool processes')
334 |         pool.join()
335 |             # print ('join complete')
336 | 
337 | 
338 | def peakscan_control_worker(par):
339 | 
340 |     try:
341 |         peaks = list()
342 | 
343 |         hotspot = par['hotspot']
344 | 
345 |         datafile = par['datafile']
346 | 
347 |         inputfile = par['inputfile']
348 | 
349 |         maxinsert = par['maxinsert']
350 | 
351 |         bayesfactorthreshold = par['bayesfactorthreashold']
352 | 
353 |         jobtype = par['jobtype']
354 | 
355 |         gloablumbda = par['gloablumbda']
356 | 
357 |         ratio = par['ratio']
358 | 
359 |         fregion = par['fregion']
360 | 
361 |         start = hotspot.start
362 | 
363 |         end = hotspot.end
364 | 
365 |         chromosome = hotspot.chromosome
366 | 
367 |         chrlength = fregion.chrs_length[chromosome]
368 | 
369 |         regionstart = start - 5100
370 | 
371 |         regionend = end + 5100
372 | 
373 |         if regionstart < 1:
374 | 
375 |             regionstart = 1
376 | 
377 |         if regionend > chrlength:
378 | 
379 |             regionend = chrlength
380 | 
381 |         datacount = depthcount(bamfile=datafile, regionchromosome=chromosome, regionstart=regionstart,
382 |                                 regionend=regionend, maxinsert=maxinsert, jobtype=jobtype)
383 | 
384 |         inputcount = depthcount(bamfile=inputfile, regionchromosome=chromosome, regionstart=regionstart,
385 |                                 regionend=regionend, maxinsert=maxinsert, jobtype=jobtype)
386 | 
387 |         # datacount = midsitecount(bamfile=datafile, regionchromosome=chromosome, regionstart=regionstart,
388 |         #                         regionend=regionend, maxinsert=maxinsert, jobtype=jobtype)
389 | 
390 |         enrichedsite = dict()
391 | 
392 |         bayesfactorscore = dict()
393 | 
394 |         inputwindow5k = list()
395 | 
396 |         inputwindow10k = list()
397 | 
398 |         inputwindow1k = list()
399 | 
400 |         for sitenow in range(start-5000,end+5000):
401 | 
402 |             nowcount = 0
403 | 
404 |             if sitenow < 0:
405 | 
406 |                 continue
407 | 
408 |             if sitenow > chrlength:
409 | 
410 |                 continue
411 | 
412 |             if sitenow in inputcount:
413 | 
414 |                 nowcount = inputcount[sitenow]
415 | 
416 |             inputwindow10k.append(nowcount)
417 | 
418 |         for sitenow in range(start-2500,end+2500):
419 | 
420 |             nowcount = 0
421 | 
422 |             if sitenow < 0:
423 | 
424 |                 continue
425 | 
426 |             if sitenow > chrlength:
427 | 
428 |                 continue
429 | 
430 |             if sitenow in inputcount:
431 | 
432 |                 nowcount = inputcount[sitenow]
433 | 
434 |             inputwindow5k.append(nowcount)
435 | 
436 |         for sitenow in range(start-500,end+500):
437 | 
438 |             nowcount = 0
439 | 
440 |             if sitenow < 0:
441 | 
442 |                 continue
443 | 
444 |             if sitenow > chrlength:
445 | 
446 |                 continue
447 | 
448 |             if sitenow in inputcount:
449 | 
450 |                 nowcount = inputcount[sitenow]
451 | 
452 |             inputwindow1k.append(nowcount)
453 | 
454 | 
455 |         (window5klhat, window5kphat) = cEM_zip(inputwindow5k)
456 | 
457 |         (window10klhat, window10kphat) = cEM_zip(inputwindow10k)
458 | 
459 |         (window1klhat, window1kphat) = cEM_zip(inputwindow1k)
460 | 
461 |         maxlhat = max(window5klhat, window10klhat, window1klhat, gloablumbda)
462 | 
463 |         if maxlhat > 400:
464 | 
465 |             maxlhat = gloablumbda * 5
466 | 
467 |         for wsite in range(start-1, end+1):
468 | 
469 |             if wsite in datacount:
470 | 
471 |                 nowcount = int(datacount[wsite]*ratio)
472 | 
473 |                 if nowcount < 2:
474 | 
475 |                     nowbayesfactor = bayesfactor(locallambda=maxlhat, peakscore=2)
476 | 
477 |                 else:
478 | 
479 |                     nowbayesfactor = bayesfactor(locallambda=maxlhat, peakscore=nowcount)
480 | 
481 |                 bayesfactorscore[wsite] = nowbayesfactor
482 | 
483 |                 if nowbayesfactor > bayesfactorthreshold:
484 | 
485 |                     enrichedsite[wsite] = 1
486 | 
487 |         regionlist = list(enrichedsite.keys())
488 | 
489 |         tmppeaks = continueregion(points=regionlist, minlength=1)
490 | 
491 |         iniid = 1
492 | 
493 |         for tmppeak in tmppeaks:
494 | 
495 |             tmppeakstart = tmppeak['start_site']
496 | 
497 |             tmppeakend = tmppeak['end_site']
498 | 
499 |             totalbayesscore = 0
500 | 
501 |             maxscore = 0
502 | 
503 |             maxsite = 0
504 | 
505 |             for site in range(tmppeakstart, tmppeakend+1):
506 | 
507 |                 score = bayesfactorscore[site]
508 | 
509 |                 totalbayesscore = totalbayesscore + score
510 | 
511 |                 if score > maxscore:
512 | 
513 |                     score = maxscore
514 | 
515 |                     maxsite = site
516 | 
517 |             avgbayescore = totalbayesscore/(tmppeakend - tmppeakstart + 1)
518 | 
519 |             peakid = hotspot.hotspotid+'.'+str(iniid)
520 | 
521 |             peak = Peak(start=tmppeakstart, end=tmppeakend, chromosome=chromosome, peakpoint=maxsite, peakid=peakid,
522 |                         score=avgbayescore, parent=hotspot.hotspotid)
523 | 
524 |             iniid= iniid +1
525 | 
526 |             peaks.append(peak)
527 | 
528 |         return peaks
529 | 
530 |     except Exception as e:
531 | 
532 |         print(('got exception in Jazzlib.peaksscan.peakscan_without_control: %r, terminating the pool' % (e,)))
533 | 
534 |         print((par['hotspot'].chromosome, par['hotspot'].start,par['hotspot'].end))
535 | 
536 | 
537 |     except KeyboardInterrupt:
538 | 
539 |         print ("You cancelled the program!")
540 | 
541 |         sys.exit(1)


--------------------------------------------------------------------------------
/Jazzlib/peaksscan.py.bak:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from countreads import *
  3 | from cEM_zip import *
  4 | from FRegion import *
  5 | from multiprocessing import Pool
  6 | from Peak import *
  7 | from sta import *
  8 | from region import *
  9 | from Hotspot import *
 10 | 
 11 | 
 12 | class KeyboardInterruptError(Exception):
 13 | 
 14 |     pass
 15 | 
 16 | 
 17 | def peakscan_without_control(datafile, maxinsert, bayesfactorthreshold, nthreads, fregion,
 18 |                              jobtype, hotspots, gloablumbda):
 19 | 
 20 |     pool = Pool(nthreads)
 21 | 
 22 |     try:
 23 | 
 24 |         pars = list()
 25 | 
 26 |         for hotspot in hotspots:
 27 | 
 28 |             par = dict()
 29 | 
 30 |             par['hotspot'] = hotspot
 31 | 
 32 |             par['datafile'] = datafile
 33 | 
 34 |             par['maxinsert'] = maxinsert
 35 | 
 36 |             par['bayesfactorthreashold'] = bayesfactorthreshold
 37 | 
 38 |             par['jobtype'] = jobtype
 39 | 
 40 |             par['gloablumbda'] = gloablumbda
 41 | 
 42 |             par['ratio'] = 1
 43 | 
 44 |             par['fregion'] = fregion
 45 | 
 46 |             pars.append(par)
 47 | 
 48 |         peaksinthreads = pool.map(peakscan_withoutcontrol_worker, pars)
 49 | 
 50 |         peaks = list()
 51 | 
 52 |         for hotspotnow in peaksinthreads:
 53 | 
 54 |             for peaknow in hotspotnow:
 55 | 
 56 |                 print (peaknow.peakid)
 57 | 
 58 |                 peaks.append(peaknow)
 59 | 
 60 |         pool.close()
 61 | 
 62 |         return peaks
 63 | 
 64 |     except KeyboardInterrupt:
 65 | 
 66 |         pool.terminate()
 67 | 
 68 |         print ("You cancelled the program!")
 69 | 
 70 |         sys.exit(1)
 71 | 
 72 |     except Exception, e:
 73 | 
 74 |         print ('got exception in Jazzlib.peaksscan.peakscan_without_control: %r, terminating the pool' % (e,))
 75 | 
 76 |         pool.terminate()
 77 | 
 78 |         print ('pool is terminated')
 79 | 
 80 |     finally:
 81 |         #     print ('joining pool processes')
 82 |         pool.join()
 83 |             # print ('join complete')
 84 | 
 85 | 
 86 | def peakscan_withoutcontrol_worker(par):
 87 | 
 88 |     try:
 89 |         peaks = list()
 90 | 
 91 |         hotspot = par['hotspot']
 92 | 
 93 |         datafile = par['datafile']
 94 | 
 95 |         maxinsert = par['maxinsert']
 96 | 
 97 |         bayesfactorthreshold = par['bayesfactorthreashold']
 98 | 
 99 |         jobtype = par['jobtype']
100 | 
101 |         gloablumbda = par['gloablumbda']
102 | 
103 |         ratio = par['ratio']
104 | 
105 |         fregion = par['fregion']
106 | 
107 |         start = hotspot.start
108 | 
109 |         end = hotspot.end
110 | 
111 |         chromosome = hotspot.chromosome
112 | 
113 |         chrlength = fregion.chrs_length[chromosome]
114 | 
115 |         regionstart = start - 5100
116 | 
117 |         regionend = end + 5100
118 | 
119 |         if regionstart < 1:
120 | 
121 |             regionstart = 1
122 | 
123 |         if regionend > chrlength:
124 | 
125 |             regionend = chrlength
126 | 
127 |         datacount = depthcount(bamfile=datafile, regionchromosome=chromosome, regionstart=regionstart,
128 |                                 regionend=regionend, maxinsert=maxinsert, jobtype=jobtype)
129 | 
130 |         # datacount = midsitecount(bamfile=datafile, regionchromosome=chromosome, regionstart=regionstart,
131 |         #                         regionend=regionend, maxinsert=maxinsert, jobtype=jobtype)
132 | 
133 |         enrichedsite = dict()
134 | 
135 |         bayesfactorscore = dict()
136 | 
137 |         inputwindow5k = list()
138 | 
139 |         inputwindow10k = list()
140 | 
141 |         for sitenow in range(start-5000,end+5000):
142 | 
143 |             nowcount = 0
144 | 
145 |             if sitenow < 0:
146 | 
147 |                 continue
148 | 
149 |             if sitenow > chrlength:
150 | 
151 |                 continue
152 | 
153 |             if sitenow in datacount:
154 | 
155 |                 nowcount = datacount[sitenow]
156 | 
157 |             inputwindow10k.append(nowcount)
158 | 
159 |         for sitenow in range(start-2500,end+2500):
160 | 
161 |             nowcount = 0
162 | 
163 |             if sitenow < 0:
164 | 
165 |                 continue
166 | 
167 |             if sitenow > chrlength:
168 | 
169 |                 continue
170 | 
171 |             if sitenow in datacount:
172 | 
173 |                 nowcount = datacount[sitenow]
174 | 
175 |             inputwindow5k.append(nowcount)
176 | 
177 | 
178 |         (window5klhat, window5kphat) = cEM_zip(inputwindow5k)
179 | 
180 |         (window10klhat, window10kphat) = cEM_zip(inputwindow10k)
181 | 
182 |         maxlhat = max(window5klhat, window10klhat, gloablumbda)
183 | 
184 |         if maxlhat > 400:
185 | 
186 |             maxlhat = gloablumbda * 5
187 | 
188 |         for wsite in range(start-1, end+1):
189 | 
190 |             if wsite in datacount:
191 | 
192 |                 nowcount = datacount[wsite]
193 | 
194 |                 if nowcount < 2:
195 | 
196 |                     nowbayesfactor = bayesfactor(locallambda=maxlhat, peakscore=2)
197 | 
198 |                 else:
199 | 
200 |                     nowbayesfactor = bayesfactor(locallambda=maxlhat, peakscore=nowcount)
201 | 
202 |                 bayesfactorscore[wsite] = nowbayesfactor
203 | 
204 |                 if nowbayesfactor > bayesfactorthreshold:
205 | 
206 |                     enrichedsite[wsite] = 1
207 | 
208 |         regionlist = enrichedsite.keys()
209 | 
210 |         tmppeaks = continueregion(points=regionlist, minlength=1)
211 | 
212 |         iniid = 1
213 | 
214 |         for tmppeak in tmppeaks:
215 | 
216 |             tmppeakstart = tmppeak['start_site']
217 | 
218 |             tmppeakend = tmppeak['end_site']
219 | 
220 |             totalbayesscore = 0
221 | 
222 |             maxscore = 0
223 | 
224 |             maxsite = 0
225 | 
226 |             for site in range(tmppeakstart, tmppeakend+1):
227 | 
228 |                 score = bayesfactorscore[site]
229 | 
230 |                 totalbayesscore = totalbayesscore + score
231 | 
232 |                 if score > maxscore:
233 | 
234 |                     score = maxscore
235 | 
236 |                     maxsite = site
237 | 
238 |             avgbayescore = totalbayesscore/(tmppeakend - tmppeakstart + 1)
239 | 
240 |             peakid = hotspot.hotspotid+'.'+str(iniid)
241 | 
242 |             peak = Peak(start=tmppeakstart, end=tmppeakend, chromosome=chromosome, peakpoint=maxsite, peakid=peakid,
243 |                         score=avgbayescore, parent=hotspot.hotspotid)
244 | 
245 |             iniid= iniid +1
246 | 
247 |             peaks.append(peak)
248 | 
249 |         return peaks
250 | 
251 |     except Exception, e:
252 | 
253 |         print ('got exception in Jazzlib.peaksscan.peakscan_without_control: %r, terminating the pool' % (e,))
254 | 
255 |         print (par['hotspot'].chromosome, par['hotspot'].start,par['hotspot'].end)
256 | 
257 | 
258 |     except KeyboardInterrupt:
259 | 
260 |         print ("You cancelled the program!")
261 | 
262 |         sys.exit(1)
263 | 
264 | 
265 | 
266 | 
267 | def peakscan_control(datafile, maxinsert, bayesfactorthreshold, nthreads, chipfregion,
268 |                      jobtype, hotspots, gloablumbda, inputfile, ratio, inputfregion):
269 | 
270 |     pool = Pool(nthreads)
271 | 
272 |     try:
273 | 
274 |         pars = list()
275 | 
276 |         for hotspot in hotspots:
277 | 
278 |             par = dict()
279 | 
280 |             par['hotspot'] = hotspot
281 | 
282 |             par['datafile'] = datafile
283 | 
284 |             par['maxinsert'] = maxinsert
285 | 
286 |             par['bayesfactorthreashold'] = bayesfactorthreshold
287 | 
288 |             par['jobtype'] = jobtype
289 | 
290 |             par['gloablumbda'] = gloablumbda
291 | 
292 |             par['ratio'] = ratio
293 | 
294 |             par['inputfile'] = inputfile
295 | 
296 |             par['fregion'] = inputfregion
297 | 
298 |             pars.append(par)
299 | 
300 |         peaksinthreads = pool.map(peakscan_withoutcontrol_worker, pars)
301 | 
302 |         peaks = list()
303 | 
304 |         for hotspotnow in peaksinthreads:
305 | 
306 |             for peaknow in hotspotnow:
307 | 
308 |                 print (peaknow.peakid)
309 | 
310 |                 peaks.append(peaknow)
311 | 
312 |         pool.close()
313 | 
314 |         return peaks
315 | 
316 |     except KeyboardInterrupt:
317 | 
318 |         pool.terminate()
319 | 
320 |         print ("You cancelled the program!")
321 | 
322 |         sys.exit(1)
323 | 
324 |     except Exception, e:
325 | 
326 |         print ('got exception in Jazzlib.peaksscan.peakscan_without_control: %r, terminating the pool' % (e,))
327 | 
328 |         pool.terminate()
329 | 
330 |         print ('pool is terminated')
331 | 
332 |     finally:
333 |         #     print ('joining pool processes')
334 |         pool.join()
335 |             # print ('join complete')
336 | 
337 | 
338 | def peakscan_control_worker(par):
339 | 
340 |     try:
341 |         peaks = list()
342 | 
343 |         hotspot = par['hotspot']
344 | 
345 |         datafile = par['datafile']
346 | 
347 |         inputfile = par['inputfile']
348 | 
349 |         maxinsert = par['maxinsert']
350 | 
351 |         bayesfactorthreshold = par['bayesfactorthreashold']
352 | 
353 |         jobtype = par['jobtype']
354 | 
355 |         gloablumbda = par['gloablumbda']
356 | 
357 |         ratio = par['ratio']
358 | 
359 |         fregion = par['fregion']
360 | 
361 |         start = hotspot.start
362 | 
363 |         end = hotspot.end
364 | 
365 |         chromosome = hotspot.chromosome
366 | 
367 |         chrlength = fregion.chrs_length[chromosome]
368 | 
369 |         regionstart = start - 5100
370 | 
371 |         regionend = end + 5100
372 | 
373 |         if regionstart < 1:
374 | 
375 |             regionstart = 1
376 | 
377 |         if regionend > chrlength:
378 | 
379 |             regionend = chrlength
380 | 
381 |         datacount = depthcount(bamfile=datafile, regionchromosome=chromosome, regionstart=regionstart,
382 |                                 regionend=regionend, maxinsert=maxinsert, jobtype=jobtype)
383 | 
384 |         inputcount = depthcount(bamfile=inputfile, regionchromosome=chromosome, regionstart=regionstart,
385 |                                 regionend=regionend, maxinsert=maxinsert, jobtype=jobtype)
386 | 
387 |         # datacount = midsitecount(bamfile=datafile, regionchromosome=chromosome, regionstart=regionstart,
388 |         #                         regionend=regionend, maxinsert=maxinsert, jobtype=jobtype)
389 | 
390 |         enrichedsite = dict()
391 | 
392 |         bayesfactorscore = dict()
393 | 
394 |         inputwindow5k = list()
395 | 
396 |         inputwindow10k = list()
397 | 
398 |         inputwindow1k = list()
399 | 
400 |         for sitenow in range(start-5000,end+5000):
401 | 
402 |             nowcount = 0
403 | 
404 |             if sitenow < 0:
405 | 
406 |                 continue
407 | 
408 |             if sitenow > chrlength:
409 | 
410 |                 continue
411 | 
412 |             if sitenow in inputcount:
413 | 
414 |                 nowcount = inputcount[sitenow]
415 | 
416 |             inputwindow10k.append(nowcount)
417 | 
418 |         for sitenow in range(start-2500,end+2500):
419 | 
420 |             nowcount = 0
421 | 
422 |             if sitenow < 0:
423 | 
424 |                 continue
425 | 
426 |             if sitenow > chrlength:
427 | 
428 |                 continue
429 | 
430 |             if sitenow in inputcount:
431 | 
432 |                 nowcount = inputcount[sitenow]
433 | 
434 |             inputwindow5k.append(nowcount)
435 | 
436 |         for sitenow in range(start-500,end+500):
437 | 
438 |             nowcount = 0
439 | 
440 |             if sitenow < 0:
441 | 
442 |                 continue
443 | 
444 |             if sitenow > chrlength:
445 | 
446 |                 continue
447 | 
448 |             if sitenow in inputcount:
449 | 
450 |                 nowcount = inputcount[sitenow]
451 | 
452 |             inputwindow1k.append(nowcount)
453 | 
454 | 
455 |         (window5klhat, window5kphat) = cEM_zip(inputwindow5k)
456 | 
457 |         (window10klhat, window10kphat) = cEM_zip(inputwindow10k)
458 | 
459 |         (window1klhat, window1kphat) = cEM_zip(inputwindow1k)
460 | 
461 |         maxlhat = max(window5klhat, window10klhat, window1klhat, gloablumbda)
462 | 
463 |         if maxlhat > 400:
464 | 
465 |             maxlhat = gloablumbda * 5
466 | 
467 |         for wsite in range(start-1, end+1):
468 | 
469 |             if wsite in datacount:
470 | 
471 |                 nowcount = int(datacount[wsite]*ratio)
472 | 
473 |                 if nowcount < 2:
474 | 
475 |                     nowbayesfactor = bayesfactor(locallambda=maxlhat, peakscore=2)
476 | 
477 |                 else:
478 | 
479 |                     nowbayesfactor = bayesfactor(locallambda=maxlhat, peakscore=nowcount)
480 | 
481 |                 bayesfactorscore[wsite] = nowbayesfactor
482 | 
483 |                 if nowbayesfactor > bayesfactorthreshold:
484 | 
485 |                     enrichedsite[wsite] = 1
486 | 
487 |         regionlist = enrichedsite.keys()
488 | 
489 |         tmppeaks = continueregion(points=regionlist, minlength=1)
490 | 
491 |         iniid = 1
492 | 
493 |         for tmppeak in tmppeaks:
494 | 
495 |             tmppeakstart = tmppeak['start_site']
496 | 
497 |             tmppeakend = tmppeak['end_site']
498 | 
499 |             totalbayesscore = 0
500 | 
501 |             maxscore = 0
502 | 
503 |             maxsite = 0
504 | 
505 |             for site in range(tmppeakstart, tmppeakend+1):
506 | 
507 |                 score = bayesfactorscore[site]
508 | 
509 |                 totalbayesscore = totalbayesscore + score
510 | 
511 |                 if score > maxscore:
512 | 
513 |                     score = maxscore
514 | 
515 |                     maxsite = site
516 | 
517 |             avgbayescore = totalbayesscore/(tmppeakend - tmppeakstart + 1)
518 | 
519 |             peakid = hotspot.hotspotid+'.'+str(iniid)
520 | 
521 |             peak = Peak(start=tmppeakstart, end=tmppeakend, chromosome=chromosome, peakpoint=maxsite, peakid=peakid,
522 |                         score=avgbayescore, parent=hotspot.hotspotid)
523 | 
524 |             iniid= iniid +1
525 | 
526 |             peaks.append(peak)
527 | 
528 |         return peaks
529 | 
530 |     except Exception, e:
531 | 
532 |         print ('got exception in Jazzlib.peaksscan.peakscan_without_control: %r, terminating the pool' % (e,))
533 | 
534 |         print (par['hotspot'].chromosome, par['hotspot'].start,par['hotspot'].end)
535 | 
536 | 
537 |     except KeyboardInterrupt:
538 | 
539 |         print ("You cancelled the program!")
540 | 
541 |         sys.exit(1)


--------------------------------------------------------------------------------