├── .environmentLinux.yaml
├── .github
    └── workflows
    │   ├── build.yml
    │   └── pypi.yml
├── .gitignore
├── .gitmodules
├── LICENSE.txt
├── MANIFEST.in
├── README.md
├── libBigWig
    ├── LICENSE
    ├── README.md
    ├── bigWig.h
    ├── bigWigIO.h
    ├── bwCommon.h
    ├── bwRead.c
    ├── bwStats.c
    ├── bwValues.c
    ├── bwValues.h
    ├── bwWrite.c
    └── io.c
├── pyBigWig.c
├── pyBigWig.h
├── pyBigWigTest
    ├── __init__.py
    ├── test.bigBed
    ├── test.bw
    └── test.py
├── pyproject.toml
├── setup.cfg
└── setup.py


/.environmentLinux.yaml:
--------------------------------------------------------------------------------
 1 | name: foo
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - default
 6 | dependencies:
 7 |   - gcc_linux-64
 8 |   - curl
 9 |   - zlib
10 |   - python = 3.9
11 |   - pip
12 |   - numpy
13 |   - pytest


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | on: 
 3 |   pull_request:
 4 |   push:
 5 | 
 6 | jobs:
 7 |   testLinux:
 8 |     name: Test Conda Linux
 9 |     runs-on: "ubuntu-latest"
10 |     defaults:
11 |       run:
12 |         shell: bash -l {0}
13 |     steps:
14 |       - uses: actions/checkout@v2
15 |       - uses: conda-incubator/setup-miniconda@v2
16 |         with:
17 |           activate-environment: foo
18 |           environment-file: .environmentLinux.yaml
19 |           python-version: 3.9
20 |           auto-activate-base: false
21 |       - run: |
22 |           pip install .
23 |           pytest pyBigWigTest/test.py
24 | 
25 |   test-builds:
26 |     runs-on: ubuntu-latest
27 | 
28 |     steps:
29 |     - uses: actions/checkout@v3
30 |       with:
31 |         fetch-depth: 0
32 |     - name: Set up Python
33 |       uses: actions/setup-python@v4
34 |       with:
35 |         python-version: '3.9'   
36 |     - name: Install build prerequisites
37 |       run: |
38 |         python -m pip install --upgrade build numpy
39 |     - name: Install cibuildwheel
40 |       run: |
41 |         python -m pip install --upgrade cibuildwheel      
42 |     - name: Build wheel(s)
43 |       run: |
44 |         python -m cibuildwheel --output-dir wheelhouse
45 |     - name: Build sdist
46 |       run: |
47 |         python -m build --sdist
48 |     - uses: actions/upload-artifact@v3
49 |       with:
50 |         name: pyBigWig-build
51 |         path: |
52 |           wheelhouse/*
53 |           dist/pyBigWig*.tar.gz
54 | 


--------------------------------------------------------------------------------
/.github/workflows/pypi.yml:
--------------------------------------------------------------------------------
 1 | name: pypi
 2 | on: [push]
 3 | jobs:
 4 |   pypi:
 5 |     name: upload to pypi
 6 |     runs-on: ubuntu-latest
 7 |     steps:
 8 |     - uses: actions/checkout@v3
 9 |       with:
10 |         fetch-depth: 0
11 |     - name: Set up Python
12 |       uses: actions/setup-python@v4
13 |       with:
14 |         python-version: '3.9'
15 |     - name: Install build prerequisites
16 |       if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags')
17 |       run: |
18 |         python -m pip install --upgrade twine build cibuildwheel numpy 
19 |     - name: sdist
20 |       if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags')
21 |       run: |
22 |         python -m build --sdist
23 |     - name: wheel
24 |       if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags')
25 |       run: |
26 |         python -m cibuildwheel --output-dir wheelhouse
27 |     - name: upload
28 |       if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags')
29 |       env:
30 |         TWINE_USERNAME: "__token__"
31 |         TWINE_PASSWORD: ${{ secrets.pypi_password }}
32 |       run: |
33 |         twine upload dist/*
34 |         twine upload wheelhouse/*
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | 
25 | # PyInstaller
26 | #  Usually these files are written by a python script from a template
27 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
28 | *.manifest
29 | *.spec
30 | 
31 | # Installer logs
32 | pip-log.txt
33 | pip-delete-this-directory.txt
34 | 
35 | # Unit test / coverage reports
36 | htmlcov/
37 | .tox/
38 | .coverage
39 | .coverage.*
40 | .cache
41 | nosetests.xml
42 | coverage.xml
43 | *,cover
44 | 
45 | # Translations
46 | *.mo
47 | *.pot
48 | 
49 | # Django stuff:
50 | *.log
51 | 
52 | # Sphinx documentation
53 | docs/_build/
54 | 
55 | # PyBuilder
56 | target/
57 | 
58 | *.o
59 | #./setup.py sdist creates this
60 | MANIFEST
61 | 
62 | *.swp
63 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeptools/pyBigWig/7300b0a4599e7f72085c3c27c19b375e3a2c2cc0/.gitmodules


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Devon Ryan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.h
2 | include **/*.h
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![PyPI version](https://badge.fury.io/py/pyBigWig.svg)](https://badge.fury.io/py/pyBigWig) [![Travis-CI status](https://travis-ci.org/deeptools/pyBigWig.svg?branch=master)](https://travis-ci.org/dpryan79/pyBigWig.svg?branch=master) [![bioconda-badge](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io) [![DOI](https://zenodo.org/badge/doi/10.5281/zenodo.45238.svg)](http://dx.doi.org/10.5281/zenodo.45238)
  2 | 
  3 | # pyBigWig
  4 | A python extension, written in C, for quick access to bigBed files and access to and creation of bigWig files. This extension uses [libBigWig](https://github.com/dpryan79/libBigWig) for local and remote file access.
  5 | 
  6 | Table of Contents
  7 | =================
  8 | 
  9 |   * [Installation](#installation)
 10 |     * [Requirements](#requirements)
 11 |   * [Usage](#usage)
 12 |     * [Load the extension](#load-the-extension)
 13 |     * [Open a bigWig or bigBed file](#open-a-bigwig-or-bigbed-file)
 14 |     * [Determining the file type](#determining-the-file-type)
 15 |     * [Access the list of chromosomes and their lengths](#access-the-list-of-chromosomes-and-their-lengths)
 16 |     * [Print the header](#print-the-header)
 17 |     * [Compute summary information on a range](#compute-summary-information-on-a-range)
 18 |       * [A note on statistics and zoom levels](#a-note-on-statistics-and-zoom-levels)
 19 |     * [Retrieve values for individual bases in a range](#retrieve-values-for-individual-bases-in-a-range)
 20 |     * [Retrieve all intervals in a range](#retrieve-all-intervals-in-a-range)
 21 |     * [Retrieving bigBed entries](#retrieving-bigbed-entries)
 22 |     * [Add a header to a bigWig file](#add-a-header-to-a-bigwig-file)
 23 |     * [Adding entries to a bigWig file](#adding-entries-to-a-bigwig-file)
 24 |     * [Close a bigWig or bigBed file](#close-a-bigwig-or-bigbed-file)
 25 |   * [Numpy](#numpy)
 26 |   * [Remote file access](#remote-file-access)
 27 |   * [Empty files](#empty-files)
 28 |   * [A note on coordinates](#a-note-on-coordinates)
 29 |   * [Galaxy](#galaxy)
 30 | 
 31 | # Installation
 32 | You can install this extension directly from github with:
 33 | 
 34 |     pip install pyBigWig
 35 | 
 36 | or with conda
 37 | 
 38 |     conda install pybigwig -c conda-forge -c bioconda
 39 | 
 40 | ## Requirements
 41 | 
 42 | The follow non-python requirements must be installed:
 43 | 
 44 |  - libcurl (and the `curl-config` config)
 45 |  - zlib
 46 | 
 47 | The headers and libraries for these are required.
 48 | 
 49 | # Usage
 50 | Basic usage is as follows:
 51 | 
 52 | ## Load the extension
 53 | 
 54 |     >>> import pyBigWig
 55 | 
 56 | ## Open a bigWig or bigBed file
 57 | 
 58 | This will work if your working directory is the pyBigWig source code directory.
 59 | 
 60 |     >>> bw = pyBigWig.open("test/test.bw")
 61 | 
 62 | Note that if the file doesn't exist you'll see an error message and `None` will be returned. Be default, all files are opened for reading and not writing. You can alter this by passing a mode containing `w`:
 63 | 
 64 |     >>> bw = pyBigWig.open("test/output.bw", "w")
 65 | 
 66 | Note that a file opened for writing can't be queried for its intervals or statistics, it can *only* be written to. If you open a file for writing then you will next need to add a header (see the section on this below).
 67 | 
 68 | Local and remote bigBed read access is also supported:
 69 | 
 70 |     >>> bb = pyBigWig.open("https://www.encodeproject.org/files/ENCFF001JBR/@@download/ENCFF001JBR.bigBed")
 71 | 
 72 | While you can specify a mode for bigBed files, it is ignored. The object returned by `pyBigWig.open()` is the same regardless of whether you're opening a bigWig or bigBed file.
 73 | 
 74 | ## Determining the file type
 75 | 
 76 | Since bigWig and bigBed files can both be opened, it may be necessary to determine whether a given `bigWigFile` object points to a bigWig or bigBed file. To that end, one can use the `isBigWig()` and `isBigBed()` functions:
 77 | 
 78 |     >>> bw = pyBigWig.open("test/test.bw")
 79 |     >>> bw.isBigWig()
 80 |     True
 81 |     >>> bw.isBigBed()
 82 |     False
 83 | 
 84 | ## Access the list of chromosomes and their lengths
 85 | 
 86 | `bigWigFile` objects contain a dictionary holding the chromosome lengths, which can be accessed with the `chroms()` accessor.
 87 | 
 88 |     >>> bw.chroms()
 89 |     dict_proxy({'1': 195471971L, '10': 130694993L})
 90 | 
 91 | You can also directly query a particular chromosome.
 92 | 
 93 |     >>> bw.chroms("1")
 94 |     195471971L
 95 | 
 96 | The lengths are stored a the "long" integer type, which is why there's an `L` suffix. If you specify a non-existant chromosome then nothing is output.
 97 | 
 98 |     >>> bw.chroms("c")
 99 |     >>> 
100 | 
101 | ## Print the header
102 | 
103 | It's sometimes useful to print a bigWig's header. This is presented here as a python dictionary containing: the version (typically `4`), the number of zoom levels (`nLevels`), the number of bases described (`nBasesCovered`), the minimum value (`minVal`), the maximum value (`maxVal`), the sum of all values (`sumData`), and the sum of all squared values (`sumSquared`). The last two of these are needed for determining the mean and standard deviation.
104 | 
105 |     >>> bw.header()
106 |     {'maxVal': 2L, 'sumData': 272L, 'minVal': 0L, 'version': 4L, 'sumSquared': 500L, 'nLevels': 1L, 'nBasesCovered': 154L}
107 | 
108 | Note that this is also possible for bigBed files and the same dictionary keys will be present. Entries such as `maxVal`, `sumData`, `minVal`, and `sumSquared` are then largely not meaningful.
109 | 
110 | ## Compute summary information on a range
111 | 
112 | bigWig files are used to store values associated with positions and ranges of them. Typically we want to quickly access the average value over a range, which is very simple:
113 | 
114 |     >>> bw.stats("1", 0, 3)
115 |     [0.2000000054637591]
116 | 
117 | Suppose instead of the mean value, we instead wanted the maximum value:
118 | 
119 |     >>> bw.stats("1", 0, 3, type="max")
120 |     [0.30000001192092896]
121 | 
122 | Other options are "min" (the minimum value), "coverage" (the fraction of bases covered), and "std" (the standard deviation of the values).
123 | 
124 | It's often the case that we would instead like to compute values of some number of evenly spaced bins in a given interval, which is also simple:
125 | 
126 |     >>> bw.stats("1",99, 200, type="max", nBins=2)
127 |     [1.399999976158142, 1.5]
128 | 
129 | `nBins` defaults to 1, just as `type` defaults to `mean`.
130 | 
131 | If the start and end positions are omitted then the entire chromosome is used:
132 | 
133 |     >>> bw.stats("1")
134 |     [1.3351851569281683]
135 | 
136 | ### A note on statistics and zoom levels
137 | 
138 | > A note to the lay reader: This section is rather technical and included only for the sake of completeness. The summary is that if your needs require exact mean/max/etc. summary values for an interval or intervals and that a small trade-off in speed is acceptable, that you should use the `exact=True` option in the `stats()` function.
139 | 
140 | By default, there are some unintuitive aspects to computing statistics on ranges in a bigWig file. The bigWig format was originally created in the context of genome browsers. There, computing exact summary statistics for a given interval is less important than quickly being able to compute an approximate statistic (after all, browsers need to be able to quickly display a number of contiguous intervals and support scrolling/zooming). Because of this, bigWig files contain not only interval-value associations, but also `sum of values`/`sum of squared values`/`minimum value`/`maximum value`/`number of bases covered` for equally sized bins of various sizes. These different sizes are referred to as "zoom levels". The smallest zoom level has bins that are 16 times the mean interval size in the file and each subsequent zoom level has bins 4 times larger than the previous. This methodology is used in Kent's tools and, therefore, likely used in almost every currently existing bigWig file.
141 | 
142 | When a bigWig file is queried for a summary statistic, the size of the interval is used to determine whether to use a zoom level and, if so, which one. The optimal zoom level is that which has the largest bins no more than half the width of the desired interval. If no such zoom level exists, the original intervals are instead used for the calculation.
143 | 
144 | For the sake of consistency with other tools, pyBigWig adopts this same methodology. However, since this is (A) unintuitive and (B) undesirable in some applications, pyBigWig enables computation of exact summary statistics regardless of the interval size (i.e., it allows ignoring the zoom levels). This was originally proposed [here](https://github.com/dpryan79/pyBigWig/issues/12) and an example is below:
145 | 
146 |     >>> import pyBigWig
147 |     >>> from numpy import mean
148 |     >>> bw = pyBigWig.open("http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeMapability/wgEncodeCrgMapabilityAlign75mer.bigWig")
149 |     >>> bw.stats('chr1', 89294, 91629)
150 |     [0.20120902053804418]
151 |     >>> mean(bw.values('chr1', 89294, 91629))
152 |     0.22213841940688142
153 |     >>> bw.stats('chr1', 89294, 91629, exact=True)
154 |     [0.22213841940688142]
155 | 
156 | ## Retrieve values for individual bases in a range
157 | 
158 | While the `stats()` method **can** be used to retrieve the original values for each base (e.g., by setting `nBins` to the number of bases), it's preferable to instead use the `values()` accessor.
159 | 
160 |     >>> bw.values("1", 0, 3)
161 |     [0.10000000149011612, 0.20000000298023224, 0.30000001192092896]
162 | 
163 | The list produced will always contain one value for every base in the range specified. If a particular base has no associated value in the bigWig file then the returned value will be `nan`.
164 | 
165 |     >>> bw.values("1", 0, 4)
166 |     [0.10000000149011612, 0.20000000298023224, 0.30000001192092896, nan]
167 | 
168 | ## Retrieve all intervals in a range
169 | 
170 | Sometimes it's convenient to retrieve all entries overlapping some range. This can be done with the `intervals()` function:
171 | 
172 |     >>> bw.intervals("1", 0, 3)
173 |     ((0, 1, 0.10000000149011612), (1, 2, 0.20000000298023224), (2, 3, 0.30000001192092896))
174 | 
175 | What's returned is a list of tuples containing: the start position, end end position, and the value. Thus, the example above has values of `0.1`, `0.2`, and `0.3` at positions `0`, `1`, and `2`, respectively.
176 | 
177 | If the start and end position are omitted then all intervals on the chromosome specified are returned:
178 | 
179 |     >>> bw.intervals("1")
180 |     ((0, 1, 0.10000000149011612), (1, 2, 0.20000000298023224), (2, 3, 0.30000001192092896), (100, 150, 1.399999976158142), (150, 151, 1.5))
181 | 
182 | ## Retrieving bigBed entries
183 | 
184 | As opposed to bigWig files, bigBed files hold entries, which are intervals with an associated string. You can access these entries using the `entries()` function:
185 | 
186 |     >>> bb = pyBigWig.open("https://www.encodeproject.org/files/ENCFF001JBR/@@download/ENCFF001JBR.bigBed")
187 |     >>> bb.entries('chr1', 10000000, 10020000)
188 |     [(10009333, 10009640, '61035\t130\t-\t0.026\t0.42\t404'), (10014007, 10014289, '61047\t136\t-\t0.029\t0.42\t404'), (10014373, 10024307, '61048\t630\t-\t5.420\t0.00\t2672399')]
189 | 
190 | The output is a list of entry tuples. The tuple elements are the `start` and `end` position of each entry, followed by its associated `string`. The string is returned exactly as it's held in the bigBed file, so parsing it is left to you. To determine what the various fields are in these string, consult the SQL string:
191 | 
192 |     >>> bb.SQL()
193 |     table RnaElements
194 |     "BED6 + 3 scores for RNA Elements data"
195 |         (
196 |         string chrom;      "Reference sequence chromosome or scaffold"
197 |         uint   chromStart; "Start position in chromosome"
198 |         uint   chromEnd;   "End position in chromosome"
199 |         string name;       "Name of item"
200 |         uint   score;      "Normalized score from 0-1000"
201 |         char[1] strand;    "+ or - or . for unknown"
202 |         float level;       "Expression level such as RPKM or FPKM. Set to -1 for no data."
203 |         float signif;      "Statistical significance such as IDR. Set to -1 for no data."
204 |         uint score2;       "Additional measurement/count e.g. number of reads. Set to 0 for no data."
205 |         )
206 | 
207 | Note that the first three entries in the SQL string are not part of the string.
208 | 
209 | If you only need to know where entries are and not their associated values, you can save memory by additionally specifying `withString=False` in `entries()`:
210 | 
211 |     >>> bb.entries('chr1', 10000000, 10020000, withString=False)
212 |     [(10009333, 10009640), (10014007, 10014289), (10014373, 10024307)]
213 | 
214 | ## Add a header to a bigWig file
215 | 
216 | If you've opened a file for writing then you'll need to give it a header before you can add any entries. The header contains all of the chromosomes, **in order**, and their sizes. If your genome has two chromosomes, chr1 and chr2, of lengths 1 and 1.5 million bases, then the following would add an appropriate header:
217 | 
218 |     >>> bw.addHeader([("chr1", 1000000), ("chr2", 1500000)])
219 | 
220 | bigWig headers are case-sensitive, so `chr1` and `Chr1` are different. Likewise, `1` and `chr1` are not the same, so you can't mix Ensembl and UCSC chromosome names. After adding a header, you can then add entries.
221 | 
222 | By default, up to 10 "zoom levels" are constructed for bigWig files. You can change this default number with the `maxZooms` optional argument. A common use of this is to create a bigWig file that simply holds intervals and no zoom levels:
223 | 
224 |     >>> bw.addHeader([("chr1", 1000000), ("chr2", 1500000)], maxZooms=0)
225 | 
226 | If you set `maxTooms=0`, please note that IGV and many other tools WILL NOT WORK as they assume that at least one zoom level will be present. You are advised to use the default unless you do not expect the bigWig files to be used by other packages.
227 | 
228 | ## Adding entries to a bigWig file
229 | 
230 | Assuming you've opened a file for writing and added a header, you can then add entries. Note that the entries **must** be added in order, as bigWig files always contain ordered intervals. There are three formats that bigWig files can use internally to store entries. The most commonly observed format is identical to a [bedGraph](https://genome.ucsc.edu/goldenpath/help/bedgraph.html) file:
231 | 
232 |     chr1	0	100	0.0
233 |     chr1	100	120	1.0
234 |     chr1	125	126	200.0
235 | 
236 | These entries would be added as follows:
237 | 
238 |     >>> bw.addEntries(["chr1", "chr1", "chr1"], [0, 100, 125], ends=[5, 120, 126], values=[0.0, 1.0, 200.0])
239 | 
240 | Each entry occupies 12 bytes before compression.
241 | 
242 | The second format uses a fixed span, but a variable step size between entries. These can be represented in a [wiggle](http://genome.ucsc.edu/goldenpath/help/wiggle.html) file as:
243 | 
244 |     variableStep chrom=chr1 span=20
245 |     500	-2.0
246 |     600	150.0
247 |     635	25.0
248 | 
249 | The above entries describe (1-based) positions 501-520, 601-620 and 636-655. These would be added as follows:
250 | 
251 |     >>> bw.addEntries("chr1", [500, 600, 635], values=[-2.0, 150.0, 25.0], span=20)
252 | 
253 | Each entry of this type occupies 8 bytes before compression.
254 | 
255 | The final format uses a fixed step and span for each entry, corresponding to the fixedStep [wiggle format](http://genome.ucsc.edu/goldenpath/help/wiggle.html):
256 | 
257 |     fixedStep chrom=chr1 step=30 span=20
258 |     -5.0
259 |     -20.0
260 |     25.0
261 | 
262 | The above entries describe (1-based) bases 901-920, 931-950 and 961-980 and would be added as follows:
263 | 
264 |     >>> bw.addEntries("chr1", 900, values=[-5.0, -20.0, 25.0], span=20, step=30)
265 | 
266 | Each entry of this type occupies 4 bytes.
267 | 
268 | Note that pyBigWig will try to prevent you from adding entries in an incorrect order. This, however, requires additional over-head. Should that not be acceptable, you can simply specify `validate=False` when adding entries:
269 | 
270 |     >>> bw.addEntries(["chr1", "chr1", "chr1"], [100, 0, 125], ends=[120, 5, 126], values=[0.0, 1.0, 200.0], validate=False)
271 | 
272 | You're obviously then responsible for ensuring that you **do not** add entries out of order. The resulting files would otherwise largley not be usable.
273 | 
274 | ## Close a bigWig or bigBed file
275 | 
276 | A file can be closed with a simple `bw.close()`, as is commonly done with other file types. For files opened for writing, closing a file writes any buffered entries to disk, constructs and writes the file index, and constructs zoom levels. Consequently, this can take a bit of time.
277 | 
278 | # Numpy
279 | 
280 | As of version 0.3.0, pyBigWig supports input of coordinates using numpy integers and vectors in some functions **if numpy was installed prior to installing pyBigWig**. To determine if pyBigWig was installed with numpy support by checking the `numpy` accessor:
281 | 
282 |     >>> import pyBigWig
283 |     >>> pyBigWig.numpy
284 |     1
285 | 
286 | If `pyBigWig.numpy` is `1`, then pyBigWig was compiled with numpy support. This means that `addEntries()` can accept numpy coordinates:
287 | 
288 |     >>> import pyBigWig
289 |     >>> import numpy
290 |     >>> bw = pyBigWig.open("/tmp/delete.bw", "w")
291 |     >>> bw.addHeader([("1", 1000)], maxZooms=0)
292 |     >>> chroms = np.array(["1"] * 10)
293 |     >>> starts = np.array([0, 10, 20, 30, 40, 50, 60, 70, 80, 90], dtype=np.int64)
294 |     >>> ends = np.array([5, 15, 25, 35, 45, 55, 65, 75, 85, 95], dtype=np.int64)
295 |     >>> values0 = np.array(np.random.random_sample(10), dtype=np.float64)
296 |     >>> bw.addEntries(chroms, starts, ends=ends, values=values0)
297 |     >>> bw.close()
298 | 
299 | Additionally, `values()` can directly output a numpy vector:
300 | 
301 |     >>> bw = bw.open("/tmp/delete.bw")
302 |     >>> bw.values('1', 0, 10, numpy=True)
303 |     [ 0.74336642  0.74336642  0.74336642  0.74336642  0.74336642         nan
304 |          nan         nan         nan         nan]
305 |     >>> type(bw.values('1', 0, 10, numpy=True))
306 |     <type 'numpy.ndarray'>
307 | 
308 | # Remote file access
309 | 
310 | If you do not have curl installed, pyBigWig will be installed without the ability to access remote files. You can determine if you will be able to access remote files with `pyBigWig.remote`. If that returns 1, then you can access remote files. If it returns 0 then you can't.
311 | 
312 | # Empty files
313 | 
314 | As of version 0.3.5, pyBigWig is able to read and write bigWig files lacking entries. Please note that such files are generally not compatible with other programs, since there's no definition of how a bigWig file with no entries should look. For such a file, the `intervals()` accessor will return `None`, the `stats()` function will return a list of `None` of the desired length, and `values()` will return `[]` (an empty list). This should generally allow programs utilizing pyBigWig to continue without issue.
315 | 
316 | For those wishing to mimic the functionality of pyBigWig/libBigWig in this regard, please note that it looks at the number of bases covered (as reported in the file header) to check for "empty" files.
317 | 
318 | # A note on coordinates
319 | 
320 | Wiggle, bigWig, and bigBed files use 0-based half-open coordinates, which are also used by this extension. So to access the value for the first base on `chr1`, one would specify the starting position as `0` and the end position as `1`. Similarly, bases 100 to 115 would have a start of `99` and an end of `115`. This is simply for the sake of consistency with the underlying bigWig file and may change in the future.
321 | 
322 | # Galaxy
323 | 
324 | pyBigWig is also available as a package in [Galaxy](http://www.usegalaxy.org). You can find it in the toolshed and the [IUC](https://wiki.galaxyproject.org/IUC) is currently hosting the XML definition of this on [github](https://github.com/galaxyproject/tools-iuc/tree/master/packages/package_python_2_7_10_pybigwig_0_2_8).
325 | 


--------------------------------------------------------------------------------
/libBigWig/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Devon Ryan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/libBigWig/README.md:
--------------------------------------------------------------------------------
  1 | ![Master build status](https://travis-ci.org/dpryan79/libBigWig.svg?branch=master) [![DOI](https://zenodo.org/badge/doi/10.5281/zenodo.45278.svg)](http://dx.doi.org/10.5281/zenodo.45278)
  2 | 
  3 | A C library for reading/parsing local and remote bigWig and bigBed files. While Kent's source code is free to use for these purposes, it's really inappropriate as library code since it has the unfortunate habit of calling `exit()` whenever there's an error. If that's then used inside of something like python then the python interpreter gets killed. This library is aimed at resolving these sorts of issues and should also use more standard things like curl and has a friendlier license to boot.
  4 | 
  5 | Documentation is automatically generated by doxygen and can be found under `docs/html` or online [here](https://cdn.rawgit.com/dpryan79/libBigWig/master/docs/html/index.html).
  6 | 
  7 | # Example
  8 | 
  9 | The only functions and structures that end users need to care about are in "bigWig.h". Below is a commented example. You can see the files under `test/` for further examples.
 10 | 
 11 |     #include "bigWig.h"
 12 |     int main(int argc, char *argv[]) {
 13 |         bigWigFile_t *fp = NULL;
 14 |         bwOverlappingIntervals_t *intervals = NULL;
 15 |         double *stats = NULL;
 16 |         if(argc != 2) {
 17 |             fprintf(stderr, "Usage: %s {file.bw|URL://path/file.bw}\n", argv[0]);
 18 |             return 1;
 19 |         }
 20 | 
 21 |         //Initialize enough space to hold 128KiB (1<<17) of data at a time
 22 |         if(bwInit(1<<17) != 0) {
 23 |             fprintf(stderr, "Received an error in bwInit\n");
 24 |             return 1;
 25 |         }
 26 | 
 27 |         //Open the local/remote file
 28 |         fp = bwOpen(argv[1], NULL, "r");
 29 |         if(!fp) {
 30 |             fprintf(stderr, "An error occurred while opening %s\n", argv[1]);
 31 |             return 1;
 32 |         }
 33 | 
 34 |         //Get values in a range (0-based, half open) without NAs
 35 |         intervals = bwGetValues(fp, "chr1", 10000000, 10000100, 0);
 36 |         bwDestroyOverlappingIntervals(intervals); //Free allocated memory
 37 | 
 38 |         //Get values in a range (0-based, half open) with NAs
 39 |         intervals = bwGetValues(fp, "chr1", 10000000, 10000100, 1);
 40 |         bwDestroyOverlappingIntervals(intervals); //Free allocated memory
 41 | 
 42 |         //Get the full intervals that overlap
 43 |         intervals = bwGetOverlappingIntervals(fp, "chr1", 10000000, 10000100);
 44 |         bwDestroyOverlappingIntervals(intervals);
 45 | 
 46 |         //Get an example statistic - standard deviation
 47 |         //We want ~4 bins in the range
 48 |         stats = bwStats(fp, "chr1", 10000000, 10000100, 4, dev);
 49 |         if(stats) {
 50 |             printf("chr1:10000000-10000100 std. dev.: %f %f %f %f\n", stats[0], stats[1], stats[2], stats[3]);
 51 |             free(stats);
 52 |         }
 53 | 
 54 |         bwClose(fp);
 55 |         bwCleanup();
 56 |         return 0;
 57 |     }
 58 | 
 59 | ##Writing example
 60 | 
 61 | N.B., creation of bigBed files is not supported (there are no plans to change this).
 62 | 
 63 | Below is an example of how to write bigWig files. You can also find this file under `test/exampleWrite.c`. Unlike with Kent's tools, you can create bigWig files entry by entry without needing an intermediate wiggle or bedGraph file. Entries in bigWig files are stored in blocks with each entry in a block referring to the same chromosome and having the same type, of which there are three (see the [wiggle specification](http://genome.ucsc.edu/goldenpath/help/wiggle.html) for more information on this).
 64 | 
 65 |     #include "bigWig.h"
 66 |     
 67 |     int main(int argc, char *argv[]) {
 68 |         bigWigFile_t *fp = NULL;
 69 |         char *chroms[] = {"1", "2"};
 70 |         char *chromsUse[] = {"1", "1", "1"};
 71 |         uint32_t chrLens[] = {1000000, 1500000};
 72 |         uint32_t starts[] = {0, 100, 125,
 73 |                              200, 220, 230,
 74 |                              500, 600, 625,
 75 |                              700, 800, 850};
 76 |         uint32_t ends[] = {5, 120, 126,
 77 |                            205, 226, 231};
 78 |         float values[] = {0.0f, 1.0f, 200.0f,
 79 |                           -2.0f, 150.0f, 25.0f,
 80 |                           0.0f, 1.0f, 200.0f,
 81 |                           -2.0f, 150.0f, 25.0f,
 82 |                           -5.0f, -20.0f, 25.0f,
 83 |                           -5.0f, -20.0f, 25.0f};
 84 |         
 85 |         if(bwInit(1<<17) != 0) {
 86 |             fprintf(stderr, "Received an error in bwInit\n");
 87 |             return 1;
 88 |         }
 89 |     
 90 |         fp = bwOpen("example_output.bw", NULL, "w");
 91 |         if(!fp) {
 92 |             fprintf(stderr, "An error occurred while opening example_output.bw for writingn\n");
 93 |             return 1;
 94 |         }
 95 |     
 96 |         //Allow up to 10 zoom levels, though fewer will be used in practice
 97 |         if(bwCreateHdr(fp, 10)) goto error;
 98 |     
 99 |         //Create the chromosome lists
100 |         fp->cl = bwCreateChromList(chroms, chrLens, 2);
101 |         if(!fp->cl) goto error;
102 |     
103 |         //Write the header
104 |         if(bwWriteHdr(fp)) goto error;
105 |     
106 |         //Some example bedGraph-like entries
107 |         if(bwAddIntervals(fp, chromsUse, starts, ends, values, 3)) goto error;
108 |         //We can continue appending similarly formatted entries
109 |         //N.B. you can't append a different chromosome (those always go into different
110 |         if(bwAppendIntervals(fp, starts+3, ends+3, values+3, 3)) goto error;
111 |     
112 |         //Add a new block of entries with a span. Since bwAdd/AppendIntervals was just used we MUST create a new block
113 |         if(bwAddIntervalSpans(fp, "1", starts+6, 20, values+6, 3)) goto error;
114 |         //We can continue appending similarly formatted entries
115 |         if(bwAppendIntervalSpans(fp, starts+9, values+9, 3)) goto error;
116 |     
117 |         //Add a new block of fixed-step entries
118 |         if(bwAddIntervalSpanSteps(fp, "1", 900, 20, 30, values+12, 3)) goto error;
119 |         //The start is then 760, since that's where the previous step ended
120 |         if(bwAppendIntervalSpanSteps(fp, values+15, 3)) goto error;
121 | 
122 |         //Add a new chromosome
123 |         chromsUse[0] = "2";
124 |         chromsUse[1] = "2";
125 |         chromsUse[2] = "2";
126 |         if(bwAddIntervals(fp, chromsUse, starts, ends, values, 3)) goto error;
127 |     
128 |         //Closing the file causes the zoom levels to be created
129 |         bwClose(fp);
130 |         bwCleanup();
131 |     
132 |         return 0;
133 |     
134 |     error:
135 |         fprintf(stderr, "Received an error somewhere!\n");
136 |         bwClose(fp);
137 |         bwCleanup();
138 |         return 1;
139 |     }
140 | 
141 | # Testing file types
142 | 
143 | As of version 0.3.0, this library supports accessing bigBed files, which are related to bigWig files. Applications that need to support both bigWig and bigBed input can use the `bwIsBigWig` and `bbIsBigBed` functions to determine if their inputs are bigWig/bigBed files:
144 | 
145 |     ...code...
146 |     if(bwIsBigWig(input_file_name, NULL)) {
147 |         //do something
148 |     } else if(bbIsBigBed(input_file_name, NULL)) {
149 |         //do something else
150 |     } else {
151 |         //handle unknown input
152 |     }
153 | 
154 | Note that these two functions rely on the "magic number" at the beginning of each file, which differs between bigWig and bigBed files.
155 | 
156 | # bigBed support
157 | 
158 | Support for accessing bigBed files was added in version 0.3.0. The function names used for accessing bigBed files are similar to those used for bigWig files.
159 | 
160 |     Function | Use
161 |     --- | ---
162 |     bbOpen | Opens a bigBed file
163 |     bbGetSQL | Returns the SQL string (if it exists) in a bigBed file
164 |     bbGetOverlappingEntries | Returns all entries overlapping an interval (either with or without their associated strings
165 |     bbDestroyOverlappingEntries | Free memory allocated by the above command
166 | 
167 | Other functions, such as `bwClose` and `bwInit`, are shared between bigWig and bigBed files. See `test/testBigBed.c` for a full example.
168 | 
169 | # A note on bigBed entries
170 | 
171 | Inside bigBed files, entries are stored as chromosome, start, and end coordinates with an (optional) associated string. For example, a "bedRNAElements" file from Encode has name, score, strand, "level", "significance", and "score2" values associated with each entry. These are stored inside the bigBed files as a single tab-separated character vector (char \*), which makes parsing difficult. The names of the various fields inside of bigBed files is stored as an SQL string, for example:
172 | 
173 |     table RnaElements 
174 |     "BED6 + 3 scores for RNA Elements data "
175 |         (
176 |         string chrom;      "Reference sequence chromosome or scaffold"
177 |         uint   chromStart; "Start position in chromosome"
178 |         uint   chromEnd;   "End position in chromosome"
179 |         string name;       "Name of item"
180 |         uint   score;      "Normalized score from 0-1000"
181 |         char[1] strand;    "+ or - or . for unknown"
182 |         float level;       "Expression level such as RPKM or FPKM. Set to -1 for no data."
183 |         float signif;      "Statistical significance such as IDR. Set to -1 for no data."
184 |         uint score2;       "Additional measurement/count e.g. number of reads. Set to 0 for no data."
185 |         )
186 | 
187 | Entries will then be of the form (one per line):
188 | 
189 |     59426	115	-	0.021	0.48	218
190 |     51	209	+	0.071	0.74	130
191 |     52	170	+	0.045	0.61	171
192 |     59433	178	-	0.049	0.34	296
193 |     53	156	+	0.038	0.19	593
194 |     59436	186	-	0.054	0.15	1010
195 |     59437	506	-	1.560	0.00	430611
196 | 
197 | Note that chromosome and start/end intervals are stored separately, so there's no need to parse them out of string. libBigWig can return these entries, either with or without the above associated strings. Parsing these string is left to the application requiring them and is currently outside the scope of this library.
198 | 
199 | # Interval/Entry iterators
200 | 
201 | Sometimes it is desirable to request a large number of intervals from a bigWig file or entries from a bigBed file, but not hold them all in memory at once (e.g., due to saving memory). To support this, libBigWig (since version 0.3.0) supports two kinds of iterators. The general process of using iterators is: (1) iterator creation, (2) traversal, and finally (3) iterator destruction. Only iterator creation differs between bigWig and bigBed files.
202 | 
203 | Importantly, iterators return results by one or more blocks. This is for convenience, since bigWig intervals and bigBed entries are stored in together in fixed-size groups, called blocks. The number of blocks of entries returned, therefore, is an option that can be specified to balance performance and memory usage.
204 | 
205 | ## Iterator creation
206 | 
207 | For bigwig files, iterators are created with the `bwOverlappingIntervalsIterator()`. This function takes chromosomal bounds (chromosome name, start, and end position) as well as a number of blocks. The equivalent function for bigBed files is `bbOverlappingEntriesIterator()`, which additionally takes a `withString` argutment, which dictates whether the returned entries include the associated string values or not.
208 | 
209 | Each of the aforementioned files returns a pointer to a `bwOverlapIterator_t` object. The only important parts of this structure for end users are the following members: `entries`, `intervals`, and `data`. `entries` is a pointer to a `bbOverlappingEntries_t` object, or `NULL` if a bigWig file is being used. Likewise, `intervals` is a pointer to a `bwOverlappingIntervals_t` object, or `NULL` if a bigBed file is being used. `data` is a special pointer, used to signify the end of iteration. Thus, when `data` is a `NULL` pointer, iteration has ended.
210 | 
211 | ## Iterator traversal
212 | 
213 | Regardless of whether a bigWig or bigBed file is being used, the `bwIteratorNext()` function will free currently used memory and load the appropriate intervals or entries for the next block(s). On error, this will return a NULL pointer (memory is already internally freed in this case).
214 | 
215 | ## Iterator destruction
216 | 
217 | `bwOverlapIterator_t` objects MUST be destroyed after use. This can be done with the `bwIteratorDestroy()` function.
218 | 
219 | ## Example
220 | 
221 | A full example is provided in `tests/testIterator.c`, but a small example of iterating over all bigWig intervals in `chr1:0-10000000` in chunks of 5 blocks follows:
222 | 
223 |     iter = bwOverlappingIntervalsIterator(fp, "chr1", 0, 10000000, 5);
224 |     while(iter->data) {
225 |         //Do stuff with iter->intervals
226 |         iter = bwIteratorNext(iter);
227 |     }
228 |     bwIteratorDestroy(iter);
229 | 
230 | # A note on bigWig statistics
231 | 
232 | The results of `min`, `max`, and `mean` should be the same as those from `BigWigSummary`. `stdev` and `coverage`, however, may differ due to Kent's tools producing incorrect results (at least for `coverage`, though the same appears to be the case for `stdev`).
233 | 
234 | # Python interface
235 | 
236 | There are currently two python interfaces that make use of libBigWig: [pyBigWig](https://github.com/dpryan79/pyBigWig) by me and [bw-python](https://github.com/brentp/bw-python) by Brent Pederson. Those interested are encouraged to give both a try!
237 | 


--------------------------------------------------------------------------------
/libBigWig/bigWig.h:
--------------------------------------------------------------------------------
  1 | #ifndef LIBBIGWIG_H
  2 | #define LIBBIGWIG_H
  3 | 
  4 | #include "bigWigIO.h"
  5 | #include "bwValues.h"
  6 | #include <inttypes.h>
  7 | #include <zlib.h>
  8 | 
  9 | #ifdef __cplusplus
 10 | extern "C" {
 11 | #endif
 12 | 
 13 | /*! \mainpage libBigWig
 14 |  *
 15 |  * \section Introduction
 16 |  *
 17 |  * libBigWig is a C library for parsing local/remote bigWig and bigBed files. This is similar to Kent's library from UCSC, except 
 18 |  *  * The license is much more liberal
 19 |  *  * This code doesn't call `exit()` on error, thereby killing the calling application.
 20 |  *
 21 |  * External files are accessed using [curl](http://curl.haxx.se/).
 22 |  *
 23 |  * Please submit issues and pull requests [here](https://github.com/dpryan79/libBigWig).
 24 |  *
 25 |  * \section Compilation
 26 |  *
 27 |  * Assuming you already have the curl libraries installed (not just the curl binary!):
 28 |  *
 29 |  *     make install prefix=/some/path
 30 |  *
 31 |  * \section Writing bigWig files
 32 |  *
 33 |  * There are three methods for storing values in a bigWig file, further described in the [wiggle format](http://genome.ucsc.edu/goldenpath/help/wiggle.html). The entries within the file are grouped into "blocks" and each such block is limited to storing entries of a single type. So, it is unwise to use a single bedGraph-like endtry followed by a single fixed-step entry followed by a variable-step entry, as that would require three separate blocks, with additional space required for each.
 34 |  *
 35 |  * \section Testing file types
 36 |  *
 37 |  * As of version 0.3.0, libBigWig supports reading bigBed files. If an application needs to support both bigBed and bigWig input, then the `bwIsBigWig` and `bbIsBigBed` functions can be used to determine the file type. These both use the "magic" number at the beginning of the file to determine the file type.
 38 |  *
 39 |  * \section Interval and entry iterators
 40 |  *
 41 |  * As of version 0.3.0, libBigWig supports iterating over intervals in bigWig files and entries in bigBed files. The number of intervals/entries returned with each iteration can be controlled by setting the number of blocks processed in each iteration (intervals and entries are group inside of bigWig and bigBed files into blocks of entries). See `test/testIterator.c` for an example.
 42 |  *
 43 |  * \section Examples
 44 |  * 
 45 |  * Please see [README.md](README.md) and the files under `test/` for examples.
 46 |  */
 47 |  
 48 | 
 49 | /*! \file bigWig.h
 50 |  *
 51 |  * These are the functions and structured that should be used by external users. While I don't particularly recommend dealing with some of the structures (e.g., a bigWigHdr_t), they're described here in case you need them.
 52 |  *
 53 |  * BTW, this library doesn't switch endianness as appropriate, since I kind of assume that there's only one type produced these days.
 54 |  */
 55 | 
 56 | /*!
 57 |  * The library version number
 58 |  */
 59 | #define LIBBIGWIG_VERSION 0.4.8
 60 | 
 61 | /*!
 62 |  * If 1, then this library was compiled with remote file support.
 63 |  */
 64 | #ifdef NOCURL
 65 | #define LIBBIGWIG_CURL 0
 66 | #ifndef CURLTYPE_DEFINED
 67 | #define CURLTYPE_DEFINED
 68 | typedef int CURLcode;
 69 | typedef void CURL;
 70 | #endif
 71 | #else
 72 | #define LIBBIGWIG_CURL 1
 73 | #endif
 74 | 
 75 | /*!
 76 |  * The magic number of a bigWig file.
 77 |  */
 78 | #define BIGWIG_MAGIC 0x888FFC26
 79 | /*!
 80 |  * The magic number of a bigBed file.
 81 |  */
 82 | #define BIGBED_MAGIC 0x8789F2EB
 83 | /*!
 84 |  * The magic number of a "cirTree" block in a file.
 85 |  */
 86 | #define CIRTREE_MAGIC 0x78ca8c91
 87 | /*!
 88 |  * The magic number of an index block in a file.
 89 |  */
 90 | #define IDX_MAGIC 0x2468ace0
 91 | /*!
 92 |  * The default number of children per block.
 93 |  */
 94 | #define DEFAULT_nCHILDREN 64
 95 | /*!
 96 |  * The default decompression buffer size in bytes. This is used to determin
 97 |  */
 98 | #define DEFAULT_BLOCKSIZE 32768
 99 | 
100 | /*!
101 |  * An enum that dictates the type of statistic to fetch for a given interval
102 |  */
103 | enum bwStatsType {
104 |     doesNotExist = -1, /*!< This does nothing */
105 |     mean = 0, /*!< The mean value */
106 |     average = 0, /*!< The mean value */
107 |     stdev = 1, /*!< The standard deviation of the values */
108 |     dev = 1, /*!< The standard deviation of the values */
109 |     max = 2, /*!< The maximum value */
110 |     min = 3, /*!< The minimum value */
111 |     cov = 4, /*!< The number of bases covered */
112 |     coverage = 4, /*!<The number of bases covered */ 
113 |     sum = 5 /*!< The sum of per-base values */
114 | };
115 | 
116 | //Should hide this from end users
117 | /*!
118 |  * @brief BigWig files have multiple "zoom" levels, each of which has its own header. This hold those headers
119 |  *
120 |  * N.B., there's 4 bytes of padding in the on disk representation of level and dataOffset.
121 |  */
122 | typedef struct {
123 |     uint32_t *level; /**<The zoom level, which is an integer starting with 0.*/
124 |     //There's 4 bytes of padding between these
125 |     uint64_t *dataOffset; /**<The offset to the on-disk start of the data. This isn't used currently.*/
126 |     uint64_t *indexOffset; /**<The offset to the on-disk start of the index. This *is* used.*/
127 |     bwRTree_t **idx; /**<Index for each zoom level. Represented as a tree*/
128 | } bwZoomHdr_t;
129 | 
130 | /*!
131 |  * @brief The header section of a bigWig file.
132 |  *
133 |  * Some of the values aren't currently used for anything. Others may optionally not exist.
134 |  */
135 | typedef struct {
136 |     uint16_t version; /**<The version information of the file.*/
137 |     uint16_t nLevels; /**<The number of "zoom" levels.*/
138 |     uint64_t ctOffset; /**<The offset to the on-disk chromosome tree list.*/
139 |     uint64_t dataOffset; /**<The on-disk offset to the first block of data.*/
140 |     uint64_t indexOffset; /**<The on-disk offset to the data index.*/
141 |     uint16_t fieldCount; /**<Total number of fields.*/
142 |     uint16_t definedFieldCount; /**<Number of fixed-format BED fields.*/
143 |     uint64_t sqlOffset; /**<The on-disk offset to an SQL string. This is unused.*/
144 |     uint64_t summaryOffset; /**<If there's a summary, this is the offset to it on the disk.*/
145 |     uint32_t bufSize; /**<The compression buffer size (if the data is compressed).*/
146 |     uint64_t extensionOffset; /**<Unused*/
147 |     bwZoomHdr_t *zoomHdrs; /**<Pointers to the header for each zoom level.*/
148 |     //total Summary
149 |     uint64_t nBasesCovered; /**<The total bases covered in the file.*/
150 |     double minVal; /**<The minimum value in the file.*/
151 |     double maxVal; /**<The maximum value in the file.*/
152 |     double sumData; /**<The sum of all values in the file.*/
153 |     double sumSquared; /**<The sum of the squared values in the file.*/
154 | } bigWigHdr_t;
155 | 
156 | //Should probably replace this with a hash
157 | /*!
158 |  * @brief Holds the chromosomes and their lengths
159 |  */
160 | typedef struct {
161 |     int64_t nKeys; /**<The number of chromosomes */
162 |     char **chrom; /**<A list of null terminated chromosomes */
163 |     uint32_t *len; /**<The lengths of each chromosome */
164 | } chromList_t;
165 | 
166 | //TODO remove from bigWig.h
167 | /// @cond SKIP
168 | typedef struct bwLL bwLL;
169 | struct bwLL {
170 |     bwRTreeNode_t *node;
171 |     struct bwLL *next;
172 | };
173 | typedef struct bwZoomBuffer_t bwZoomBuffer_t;
174 | struct bwZoomBuffer_t { //each individual entry takes 32 bytes
175 |     void *p;
176 |     uint32_t l, m;
177 |     struct bwZoomBuffer_t *next;
178 | };
179 | /// @endcond
180 | 
181 | /*!
182 |  * @brief This is only needed for writing bigWig files (and won't be created otherwise)
183 |  * This should be removed from bigWig.h
184 |  */
185 | typedef struct {
186 |     uint64_t nBlocks; /**<The number of blocks written*/
187 |     uint32_t blockSize; /**<The maximum number of children*/
188 |     uint64_t nEntries; /**<The number of entries processed. This is used for the first contig and determining how the zoom levels are computed*/
189 |     uint64_t runningWidthSum; /**<The running sum of the entry widths for the first contig (again, used for the first contig and computing zoom levels)*/
190 |     uint32_t tid; /**<The current TID that's being processed*/
191 |     uint32_t start; /**<The start position of the block*/
192 |     uint32_t end; /**<The end position of the block*/
193 |     uint32_t span; /**<The span of each entry, if applicable*/
194 |     uint32_t step; /**<The step size, if applicable*/
195 |     uint8_t ltype; /**<The type of the last entry added*/
196 |     uint32_t l; /**<The current size of p. This and the type determine the number of items held*/
197 |     void *p; /**<A buffer of size hdr->bufSize*/
198 |     bwLL *firstIndexNode; /**<The first index node in the linked list*/
199 |     bwLL *currentIndexNode; /**<The last index node in a linked list*/
200 |     bwZoomBuffer_t **firstZoomBuffer; /**<The first node in a linked list of leaf nodes*/
201 |     bwZoomBuffer_t **lastZoomBuffer; /**<The last node in a linked list of leaf nodes*/
202 |     uint64_t *nNodes; /**<The number of leaf nodes per zoom level, useful for determining duplicate levels*/
203 |     uLongf compressPsz; /**<The size of the compression buffer*/
204 |     void *compressP; /**<A compressed buffer of size compressPsz*/
205 | } bwWriteBuffer_t;
206 | 
207 | /*!
208 |  * @brief A structure that holds everything needed to access a bigWig file.
209 |  */
210 | typedef struct {
211 |     URL_t *URL; /**<A pointer that can handle both local and remote files (including a buffer if needed).*/
212 |     bigWigHdr_t *hdr; /**<The file header.*/
213 |     chromList_t *cl; /**<A list of chromosome names (the order is the ID).*/
214 |     bwRTree_t *idx; /**<The index for the full dataset.*/
215 |     bwWriteBuffer_t *writeBuffer; /**<The buffer used for writing.*/
216 |     int isWrite; /**<0: Opened for reading, 1: Opened for writing.*/
217 |     int type; /**<0: bigWig, 1: bigBed.*/
218 | } bigWigFile_t;
219 | 
220 | /*!
221 |  * @brief Holds interval:value associations
222 |  */
223 | typedef struct {
224 |     uint32_t l; /**<Number of intervals held*/
225 |     uint32_t m; /**<Maximum number of values/intervals the struct can hold*/
226 |     uint32_t *start; /**<The start positions (0-based half open)*/
227 |     uint32_t *end; /**<The end positions (0-based half open)*/
228 |     float *value; /**<The value associated with each position*/
229 | } bwOverlappingIntervals_t;
230 | 
231 | /*!
232 |  * @brief Holds interval:str associations
233 |  */
234 | typedef struct {
235 |     uint32_t l; /**<Number of intervals held*/
236 |     uint32_t m; /**<Maximum number of values/intervals the struct can hold*/
237 |     uint32_t *start; /**<The start positions (0-based half open)*/
238 |     uint32_t *end; /**<The end positions (0-based half open)*/
239 |     char **str; /**<The strings associated with a given entry.*/
240 | } bbOverlappingEntries_t;
241 | 
242 | /*!
243 |  * @brief A structure to hold iterations
244 |  * One of intervals and entries should be used to access records from bigWig or bigBed files, respectively.
245 |  */
246 | typedef struct {
247 |     bigWigFile_t *bw; /**<Pointer to the bigWig/bigBed file.*/
248 |     uint32_t tid; /**<The contig/chromosome ID.*/
249 |     uint32_t start; /**<Start position of the query interval.*/
250 |     uint32_t end; /**<End position of the query interval.*/
251 |     uint64_t offset; /**<Offset into the blocks.*/
252 |     uint32_t blocksPerIteration; /**<Number of blocks to use per iteration.*/
253 |     int withString; /**<For bigBed entries, whether to return the string with the entries.*/
254 |     void *blocks; /**<Overlapping blocks.*/
255 |     bwOverlappingIntervals_t *intervals; /**<Overlapping intervals (or NULL).*/
256 |     bbOverlappingEntries_t *entries; /**<Overlapping entries (or NULL).*/
257 |     void *data; /**<Points to either intervals or entries. If there are no further intervals/entries, then this is NULL. Use this to test for whether to continue iterating.*/
258 | } bwOverlapIterator_t;
259 | 
260 | /*!
261 |  * @brief Initializes curl and global variables. This *MUST* be called before other functions (at least if you want to connect to remote files).
262 |  * For remote file, curl must be initialized and regions of a file read into an internal buffer. If the buffer is too small then an excessive number of connections will be made. If the buffer is too large than more data than required is fetched. 128KiB is likely sufficient for most needs.
263 |  * @param bufSize The internal buffer size used for remote connection.
264 |  * @see bwCleanup
265 |  * @return 0 on success and 1 on error.
266 |  */
267 | int bwInit(size_t bufSize);
268 | 
269 | /*!
270 |  * @brief The counterpart to bwInit, this cleans up curl.
271 |  * @see bwInit
272 |  */
273 | void bwCleanup(void);
274 | 
275 | /*!
276 |  * @brief Determine if a file is a bigWig file.
277 |  * This function will quickly check either local or remote files to determine if they appear to be valid bigWig files. This can be determined by reading the first 4 bytes of the file.
278 |  * @param fname The file name or URL (http, https, and ftp are supported)
279 |  * @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information. See `test/testRemote` for an example.
280 |  * @return 1 if the file appears to be bigWig, otherwise 0.
281 |  */
282 | int bwIsBigWig(const char *fname, CURLcode (*callBack)(CURL*));
283 | 
284 | /*!
285 |  * @brief Determine is a file is a bigBed file.
286 |  * This function will quickly check either local or remote files to determine if they appear to be valid bigWig files. This can be determined by reading the first 4 bytes of the file.
287 |  * @param fname The file name or URL (http, https, and ftp are supported)
288 |  * @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information. See `test/testRemote` for an example.
289 |  * @return 1 if the file appears to be bigWig, otherwise 0.
290 |  */
291 | int bbIsBigBed(const char *fname, CURLcode (*callBack)(CURL*));
292 | 
293 | /*!
294 |  * @brief Opens a local or remote bigWig file.
295 |  * This will open a local or remote bigWig file. Writing of local bigWig files is also supported.
296 |  * @param fname The file name or URL (http, https, and ftp are supported)
297 |  * @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information. See `test/testRemote` for an example.
298 |  * @param mode The mode, by default "r". Both local and remote files can be read, but only local files can be written. For files being written the callback function is ignored. If and only if the mode contains "w" will the file be opened for writing (in all other cases the file will be opened for reading.
299 |  * @return A bigWigFile_t * on success and NULL on error.
300 |  */
301 | bigWigFile_t *bwOpen(const char *fname, CURLcode (*callBack)(CURL*), const char* mode);
302 | 
303 | /*!
304 |  * @brief Opens a local or remote bigBed file.
305 |  * This will open a local or remote bigBed file. Note that this file format can only be read and NOT written!
306 |  * @param fname The file name or URL (http, https, and ftp are supported)
307 |  * @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information. See `test/testRemote` for an example.
308 |  * @return A bigWigFile_t * on success and NULL on error.
309 |  */
310 | bigWigFile_t *bbOpen(const char *fname, CURLcode (*callBack)(CURL*));
311 | 
312 | /*!
313 |  * @brief Returns a string containing the SQL entry (or NULL).
314 |  * The "auto SQL" field contains the names and value types of the entries in
315 |  * each bigBed entry. If you need to parse a particular value out of each entry,
316 |  * then you'll need to first parse this.
317 |  * @param fp The file pointer to a valid bigWigFile_t
318 |  * @return A char *, which you MUST free!
319 |  */
320 | char *bbGetSQL(bigWigFile_t *fp);
321 | 
322 | /*!
323 |  * @brief Closes a bigWigFile_t and frees up allocated memory
324 |  * This closes both bigWig and bigBed files.
325 |  * @param fp The file pointer.
326 |  */
327 | void bwClose(bigWigFile_t *fp);
328 | 
329 | /*******************************************************************************
330 | *
331 | * The following are in bwStats.c
332 | *
333 | *******************************************************************************/
334 | 
335 | /*!
336 |  * @brief Converts between chromosome name and ID
337 |  *
338 |  * @param fp A valid bigWigFile_t pointer
339 |  * @param chrom A chromosome name
340 |  * @return An ID, -1 will be returned on error (note that this is an unsigned value, so that's ~4 billion. bigWig/bigBed files can't store that many chromosomes anyway.
341 |  */
342 | uint32_t bwGetTid(const bigWigFile_t *fp, const char *chrom);
343 | 
344 | /*!
345 |  * @brief Frees space allocated by `bwGetOverlappingIntervals`
346 |  * @param o A valid `bwOverlappingIntervals_t` pointer.
347 |  * @see bwGetOverlappingIntervals
348 |  */
349 | void bwDestroyOverlappingIntervals(bwOverlappingIntervals_t *o);
350 | 
351 | /*!
352 |  * @brief Frees space allocated by `bbGetOverlappingEntries`
353 |  * @param o A valid `bbOverlappingEntries_t` pointer.
354 |  * @see bbGetOverlappingEntries
355 |  */
356 | void bbDestroyOverlappingEntries(bbOverlappingEntries_t *o);
357 | 
358 | /*!
359 |  * @brief Return bigWig entries overlapping an interval.
360 |  * Find all bigWig entries overlapping a range and returns them, including their associated values.
361 |  * @param fp A valid bigWigFile_t pointer. This MUST be for a bigWig file!
362 |  * @param chrom A valid chromosome name.
363 |  * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
364 |  * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
365 |  * @return NULL on error or no overlapping values, otherwise a `bwOverlappingIntervals_t *` holding the values and intervals.
366 |  * @see bwOverlappingIntervals_t
367 |  * @see bwDestroyOverlappingIntervals
368 |  * @see bwGetValues
369 |  */
370 | bwOverlappingIntervals_t *bwGetOverlappingIntervals(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end);
371 | 
372 | /*!
373 |  * @brief Return bigBed entries overlapping an interval.
374 |  * Find all bigBed entries overlapping a range and returns them.
375 |  * @param fp A valid bigWigFile_t pointer. This MUST be for a bigBed file!
376 |  * @param chrom A valid chromosome name.
377 |  * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
378 |  * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
379 |  * @param withString If not 0, return the string associated with each entry in the output. If 0, there are no associated strings returned. This is useful if the only information needed are the locations of the entries, which require significantly less memory.
380 |  * @return NULL on error or no overlapping values, otherwise a `bbOverlappingEntries_t *` holding the intervals and (optionally) the associated string.
381 |  * @see bbOverlappingEntries_t
382 |  * @see bbDestroyOverlappingEntries
383 |  */
384 | bbOverlappingEntries_t *bbGetOverlappingEntries(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end, int withString);
385 | 
386 | /*!
387 |  * @brief Creates an iterator over intervals in a bigWig file
388 |  * Iterators can be traversed with `bwIteratorNext()` and destroyed with `bwIteratorDestroy()`.
389 |  * Intervals are in the `intervals` member and `data` can be used to determine when to end iteration.
390 |  * @param fp A valid bigWigFile_t pointer. This MUST be for a bigWig file!
391 |  * @param chrom A valid chromosome name.
392 |  * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
393 |  * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
394 |  * @param blocksPerIteration The number of blocks (internal groupings of intervals in bigWig files) to return per iteration.
395 |  * @return NULL on error, otherwise a bwOverlapIterator_t pointer
396 |  * @see bwOverlapIterator_t
397 |  * @see bwIteratorNext
398 |  * @see bwIteratorDestroy
399 |  */ 
400 | bwOverlapIterator_t *bwOverlappingIntervalsIterator(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end, uint32_t blocksPerIteration);
401 | 
402 | /*!
403 |  * @brief Creates an iterator over entries in a bigBed file
404 |  * Iterators can be traversed with `bwIteratorNext()` and destroyed with `bwIteratorDestroy()`.
405 |  * Entries are in the `entries` member and `data` can be used to determine when to end iteration.
406 |  * @param fp A valid bigWigFile_t pointer. This MUST be for a bigBed file!
407 |  * @param chrom A valid chromosome name.
408 |  * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
409 |  * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
410 |  * @param withString Whether the returned entries should include their associated strings.
411 |  * @param blocksPerIteration The number of blocks (internal groupings of entries in bigBed files) to return per iteration.
412 |  * @return NULL on error, otherwise a bwOverlapIterator_t pointer
413 |  * @see bbGetOverlappingEntries
414 |  * @see bwOverlapIterator_t
415 |  * @see bwIteratorNext
416 |  * @see bwIteratorDestroy
417 |  */ 
418 | bwOverlapIterator_t *bbOverlappingEntriesIterator(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end, int withString, uint32_t blocksPerIteration);
419 | 
420 | /*!
421 |  * @brief Traverses to the entries/intervals in the next group of blocks.
422 |  * @param iter A bwOverlapIterator_t pointer that is updated (or destroyed on error)
423 |  * @return NULL on error, otherwise a bwOverlapIterator_t pointer with the intervals or entries from the next set of blocks.
424 |  * @see bwOverlapIterator_t
425 |  * @see bwIteratorDestroy
426 |  */ 
427 | bwOverlapIterator_t *bwIteratorNext(bwOverlapIterator_t *iter);
428 | 
429 | /*!
430 |  * @brief Destroys a bwOverlapIterator_t
431 |  * @param iter The bwOverlapIterator_t that should be destroyed
432 |  */
433 | void bwIteratorDestroy(bwOverlapIterator_t *iter);
434 | 
435 | /*!
436 |  * @brief Return all per-base bigWig values in a given interval.
437 |  * Given an interval (e.g., chr1:0-100), return the value at each position in a bigWig file. Positions without associated values are suppressed by default, but may be returned if `includeNA` is not 0.
438 |  * @param fp A valid bigWigFile_t pointer.
439 |  * @param chrom A valid chromosome name.
440 |  * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
441 |  * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
442 |  * @param includeNA If not 0, report NA values as well (as NA).
443 |  * @return NULL on error or no overlapping values, otherwise a `bwOverlappingIntervals_t *` holding the values and positions.
444 |  * @see bwOverlappingIntervals_t
445 |  * @see bwDestroyOverlappingIntervals
446 |  * @see bwGetOverlappingIntervals
447 |  */
448 | bwOverlappingIntervals_t *bwGetValues(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end, int includeNA);
449 | 
450 | /*!
451 |  * @brief Determines per-interval bigWig statistics
452 |  * Can determine mean/min/max/coverage/standard deviation of values in one or more intervals in a bigWig file. You can optionally give it an interval and ask for values from X number of sub-intervals.
453 |  * @param fp The file from which to extract statistics.
454 |  * @param chrom A valid chromosome name.
455 |  * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
456 |  * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
457 |  * @param nBins The number of bins within the interval to calculate statistics for.
458 |  * @param type The type of statistic.
459 |  * @see bwStatsType
460 |  * @return A pointer to an array of double precission floating point values. Note that bigWig files only hold 32-bit values, so this is done to help prevent overflows.
461 |  */
462 | double *bwStats(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type);
463 | 
464 | /*!
465 |  * @brief Determines per-interval bigWig statistics
466 |  * Can determine mean/min/max/coverage/standard deviation of values in one or more intervals in a bigWig file. You can optionally give it an interval and ask for values from X number of sub-intervals. The difference with bwStats is that zoom levels are never used.
467 |  * @param fp The file from which to extract statistics.
468 |  * @param chrom A valid chromosome name.
469 |  * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
470 |  * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
471 |  * @param nBins The number of bins within the interval to calculate statistics for.
472 |  * @param type The type of statistic.
473 |  * @see bwStatsType
474 |  * @return A pointer to an array of double precission floating point values. Note that bigWig files only hold 32-bit values, so this is done to help prevent overflows.
475 | */
476 | double *bwStatsFromFull(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type);
477 | 
478 | //Writer functions
479 | 
480 | /*!
481 |  * @brief Create a largely empty bigWig header
482 |  * Every bigWig file has a header, this creates the template for one. It also takes care of space allocation in the output write buffer.
483 |  * @param fp The bigWigFile_t* that you want to write to.
484 |  * @param maxZooms The maximum number of zoom levels. If you specify 0 then there will be no zoom levels. A value <0 or > 65535 will result in a maximum of 10.
485 |  * @return 0 on success.
486 |  */
487 | int bwCreateHdr(bigWigFile_t *fp, int32_t maxZooms);
488 | 
489 | /*!
490 |  * @brief Take a list of chromosome names and lengths and return a pointer to a chromList_t
491 |  * This MUST be run before `bwWriteHdr()`. Note that the input is NOT free()d!
492 |  * @param chroms A list of chromosomes.
493 |  * @param lengths The length of each chromosome.
494 |  * @param n The number of chromosomes (thus, the length of `chroms` and `lengths`)
495 |  * @return A pointer to a chromList_t or NULL on error.
496 |  */
497 | chromList_t *bwCreateChromList(const char* const* chroms, const uint32_t *lengths, int64_t n);
498 | 
499 | /*!
500 |  * @brief Write a the header to a bigWig file.
501 |  * You must have already opened the output file, created a header and a chromosome list.
502 |  * @param bw The output bigWigFile_t pointer.
503 |  * @see bwCreateHdr
504 |  * @see bwCreateChromList
505 |  */
506 | int bwWriteHdr(bigWigFile_t *bw);
507 | 
508 | /*!
509 |  * @brief Write a new block of bedGraph-like intervals to a bigWig file
510 |  * Adds entries of the form:
511 |  * chromosome	start	end	value
512 |  * to the file. These will always be added in a new block, so you may have previously used a different storage type.
513 |  * 
514 |  * In general it's more efficient to use the bwAppend* functions, but then you MUST know that the previously written block is of the same type. In other words, you can only use bwAppendIntervals() after bwAddIntervals() or a previous bwAppendIntervals().
515 |  * @param fp The output file pointer.
516 |  * @param chrom A list of chromosomes, of length `n`.
517 |  * @param start A list of start positions of length`n`.
518 |  * @param end A list of end positions of length`n`.
519 |  * @param values A list of values of length`n`.
520 |  * @param n The length of the aforementioned lists.
521 |  * @return 0 on success and another value on error.
522 |  * @see bwAppendIntervals
523 |  */
524 | int bwAddIntervals(bigWigFile_t *fp, const char* const* chrom, const uint32_t *start, const uint32_t *end, const float *values, uint32_t n);
525 | 
526 | /*!
527 |  * @brief Append bedGraph-like intervals to a previous block of bedGraph-like intervals in a bigWig file.
528 |  * If you have previously used bwAddIntervals() then this will append additional entries into the previous block (or start a new one if needed).
529 |  * @param fp The output file pointer.
530 |  * @param start A list of start positions of length`n`.
531 |  * @param end A list of end positions of length`n`.
532 |  * @param values A list of values of length`n`.
533 |  * @param n The length of the aforementioned lists.
534 |  * @return 0 on success and another value on error.
535 |  * @warning Do NOT use this after `bwAddIntervalSpanSteps()`, `bwAppendIntervalSpanSteps()`, `bwAddIntervalSpanSteps()`, or `bwAppendIntervalSpanSteps()`.
536 |  * @see bwAddIntervals
537 |  */
538 | int bwAppendIntervals(bigWigFile_t *fp, const uint32_t *start, const uint32_t *end, const float *values, uint32_t n);
539 | 
540 | /*!
541 |  * @brief Add a new block of variable-step entries to a bigWig file
542 |  * Adds entries for the form
543 |  * chromosome	start	value
544 |  * to the file. Each block of such entries has an associated "span", so each value describes the region chromosome:start-(start+span)
545 |  *
546 |  * This will always start a new block of values.
547 |  * @param fp The output file pointer.
548 |  * @param chrom A list of chromosomes, of length `n`.
549 |  * @param start A list of start positions of length`n`.
550 |  * @param span The span of each entry (the must all be the same).
551 |  * @param values A list of values of length`n`.
552 |  * @param n The length of the aforementioned lists.
553 |  * @return 0 on success and another value on error.
554 |  * @see bwAppendIntervalSpans
555 |  */
556 | int bwAddIntervalSpans(bigWigFile_t *fp, const char *chrom, const uint32_t *start, uint32_t span, const float *values, uint32_t n);
557 | 
558 | /*!
559 |  * @brief Append to a previous block of variable-step entries.
560 |  * If you previously used `bwAddIntervalSpans()`, this will continue appending more values to the block(s) it created.
561 |  * @param fp The output file pointer.
562 |  * @param start A list of start positions of length`n`.
563 |  * @param values A list of values of length`n`.
564 |  * @param n The length of the aforementioned lists.
565 |  * @return 0 on success and another value on error.
566 |  * @warning Do NOT use this after `bwAddIntervals()`, `bwAppendIntervals()`, `bwAddIntervalSpanSteps()` or `bwAppendIntervalSpanSteps()`
567 |  * @see bwAddIntervalSpans
568 |  */
569 | int bwAppendIntervalSpans(bigWigFile_t *fp, const uint32_t *start, const float *values, uint32_t n);
570 | 
571 | /*!
572 |  * @brief Add a new block of fixed-step entries to a bigWig file
573 |  * Adds entries for the form
574 |  * value
575 |  * to the file. Each block of such entries has an associated "span", "step", chromosome and start position. See the wiggle format for more details.
576 |  *
577 |  * This will always start a new block of values.
578 |  * @param fp The output file pointer.
579 |  * @param chrom The chromosome that the entries describe.
580 |  * @param start The starting position of the block of entries.
581 |  * @param span The span of each entry (i.e., the number of bases it describes).
582 |  * @param step The step between entry start positions.
583 |  * @param values A list of values of length`n`.
584 |  * @param n The length of the aforementioned lists.
585 |  * @return 0 on success and another value on error.
586 |  * @see bwAddIntervalSpanSteps
587 |  */
588 | int bwAddIntervalSpanSteps(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t span, uint32_t step, const float *values, uint32_t n);
589 | 
590 | /*!
591 |  * @brief Append to a previous block of fixed-step entries.
592 |  * If you previously used `bwAddIntervalSpanSteps()`, this will continue appending more values to the block(s) it created.
593 |  * @param fp The output file pointer.
594 |  * @param values A list of values of length`n`.
595 |  * @param n The length of the aforementioned lists.
596 |  * @return 0 on success and another value on error.
597 |  * @warning Do NOT use this after `bwAddIntervals()`, `bwAppendIntervals()`, `bwAddIntervalSpans()` or `bwAppendIntervalSpans()`
598 |  * @see bwAddIntervalSpanSteps
599 |  */
600 | int bwAppendIntervalSpanSteps(bigWigFile_t *fp, const float *values, uint32_t n);
601 | 
602 | #ifdef __cplusplus
603 | }
604 | #endif
605 | 
606 | #endif // LIBBIGWIG_H
607 | 


--------------------------------------------------------------------------------
/libBigWig/bigWigIO.h:
--------------------------------------------------------------------------------
  1 | #ifndef LIBBIGWIG_IO_H
  2 | #define LIBBIGWIG_IO_H
  3 | 
  4 | #ifndef NOCURL
  5 | #include <curl/curl.h>
  6 | #else
  7 | #include <stdio.h>
  8 | #ifndef CURLTYPE_DEFINED
  9 | #define CURLTYPE_DEFINED
 10 | typedef int CURLcode;
 11 | typedef void CURL;
 12 | #endif
 13 | #define CURLE_OK 0
 14 | #define CURLE_FAILED_INIT 1
 15 | #endif
 16 | /*! \file bigWigIO.h
 17 |  * These are (typically internal) IO functions, so there's generally no need for you to directly use them!
 18 |  */
 19 | 
 20 | /*!
 21 |  * The size of the buffer used for remote files.
 22 |  */
 23 | extern size_t GLOBAL_DEFAULTBUFFERSIZE;
 24 | 
 25 | /*!
 26 |  * The enumerated values that indicate the connection type used to access a file.
 27 |  */
 28 | enum bigWigFile_type_enum {
 29 |     BWG_FILE = 0,
 30 |     BWG_HTTP = 1,
 31 |     BWG_HTTPS = 2,
 32 |     BWG_FTP = 3
 33 | };
 34 | 
 35 | /*!
 36 |  * @brief This structure holds the file pointers and buffers needed for raw access to local and remote files.
 37 |  */
 38 | typedef struct {
 39 |     union {
 40 | #ifndef NOCURL
 41 |         CURL *curl; /**<The CURL * file pointer for remote files.*/
 42 | #endif
 43 |         FILE *fp; /**<The FILE * file pointer for local files.**/
 44 |     } x; /**<A union holding curl and fp.*/
 45 |     void *memBuf; /**<A void * pointing to memory of size bufSize.*/
 46 |     size_t filePos; /**<Current position inside the file.*/
 47 |     size_t bufPos; /**<Curent position inside the buffer.*/
 48 |     size_t bufSize; /**<The size of the buffer.*/
 49 |     size_t bufLen; /**<The actual size of the buffer used.*/
 50 |     enum bigWigFile_type_enum type; /**<The connection type*/
 51 |     int isCompressed; /**<1 if the file is compressed, otherwise 0*/
 52 |     const char *fname; /**<Only needed for remote connections. The original URL/filename requested, since we need to make multiple connections.*/
 53 | } URL_t;
 54 | 
 55 | /*!
 56 |  *  @brief Reads data into the given buffer.
 57 |  *
 58 |  *  This function will store bufSize data into buf for both local and remote files. For remote files an internal buffer is used to store a (typically larger) segment of the remote file.
 59 |  *
 60 |  *  @param URL A URL_t * pointing to a valid opened file or remote URL.
 61 |  *  @param buf The buffer in memory that you would like filled. It must be able to hold bufSize bytes!
 62 |  *  @param bufSize The number of bytes to transfer to buf.
 63 |  *
 64 |  *  @return Returns the number of bytes stored in buf, which should be bufSize on success and something else on error.
 65 |  *
 66 |  *  @warning Note that on error, URL for remote files is left in an unusable state. You can get around this by running urlSeek() to a position outside of the range held by the internal buffer.
 67 |  */
 68 | size_t urlRead(URL_t *URL, void *buf, size_t bufSize);
 69 | 
 70 | /*!
 71 |  *  @brief Seeks to a given position in a local or remote file.
 72 |  * 
 73 |  *  For local files, this will set the file position indicator for the file pointer to the desired position. For remote files, it sets the position to start downloading data for the next urlRead(). Note that for remote files that running urlSeek() with a pos within the current buffer will simply modify the internal offset.
 74 |  *
 75 |  *  @param URL A URL_t * pointing to a valid opened file or remote URL.
 76 |  *  @param pos The position to seek to.
 77 |  *
 78 |  *  @return CURLE_OK on success and a different CURLE_XXX on error. For local files, the error return value is always CURLE_FAILED_INIT
 79 |  */
 80 | CURLcode urlSeek(URL_t *URL, size_t pos);
 81 | 
 82 | /*!
 83 |  *  @brief Open a local or remote file
 84 |  *
 85 |  *  Opens a local or remote file. Currently, http, https, and ftp are the only supported protocols and the URL must then begin with "http://", "https://", or "ftp://" as appropriate.
 86 |  *
 87 |  *  For remote files, an internal buffer is used to hold file contents, to avoid downloading entire files before starting. The size of this buffer and various variable related to connection timeout are set with bwInit().
 88 |  *
 89 |  *  Note that you **must** run urlClose() on this when finished. However, you would typically just use bwOpen() rather than directly calling this function.
 90 |  *
 91 |  * @param fname The file name or URL to open.
 92 |  * @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information.
 93 |  * @param mode "r", "w" or NULL. If and only if the mode contains the character "w" will the file be opened for writing.
 94 |  *
 95 |  *  @return A URL_t * or NULL on error.
 96 |  */
 97 | URL_t *urlOpen(const char *fname, CURLcode (*callBack)(CURL*), const char* mode);
 98 | 
 99 | /*!
100 |  *  @brief Close a local/remote file
101 |  *
102 |  *  This will perform the cleanup required on a URL_t*, releasing memory as needed.
103 |  *
104 |  *  @param URL A URL_t * pointing to a valid opened file or remote URL.
105 |  *
106 |  *  @warning URL will no longer point to a valid location in memory!
107 |  */
108 | void urlClose(URL_t *URL);
109 | 
110 | #endif // LIBBIGWIG_IO_H
111 | 


--------------------------------------------------------------------------------
/libBigWig/bwCommon.h:
--------------------------------------------------------------------------------
 1 | /*! \file bwCommon.h
 2 |  *
 3 |  * You have no reason to use these functions. They may change without warning because there's no reason for them to be used outside of libBigWig's internals.
 4 |  *
 5 |  * These are structures and functions from a variety of files that are used across files internally but don't need to be see by libBigWig users.
 6 |  */
 7 | 
 8 | /*!
 9 |  * @brief Like fsetpos, but for local or remote bigWig files.
10 |  * This will set the file position indicator to the specified point. For local files this literally is `fsetpos`, while for remote files it fills a memory buffer with data starting at the desired position.
11 |  * @param fp A valid opened bigWigFile_t.
12 |  * @param pos The position within the file to seek to.
13 |  * @return 0 on success and -1 on error.
14 |  */
15 | int bwSetPos(bigWigFile_t *fp, size_t pos);
16 | 
17 | /*!
18 |  * @brief A local/remote version of `fread`.
19 |  * Reads data from either local or remote bigWig files.
20 |  * @param data An allocated memory block big enough to hold the data.
21 |  * @param sz The size of each member that should be copied.
22 |  * @param nmemb The number of members to copy.
23 |  * @param fp The bigWigFile_t * from which to copy the data.
24 |  * @see bwSetPos
25 |  * @return For nmemb==1, the size of the copied data. For nmemb>1, the number of members fully copied (this is equivalent to `fread`).
26 |  */
27 | size_t bwRead(void *data, size_t sz, size_t nmemb, bigWigFile_t *fp);
28 | 
29 | /*!
30 |  * @brief Determine what the file position indicator say.
31 |  * This is equivalent to `ftell` for local or remote files.
32 |  * @param fp The file.
33 |  * @return The position in the file.
34 |  */
35 | long bwTell(bigWigFile_t *fp);
36 | 
37 | /*!
38 |  * @brief Reads a data index (either full data or a zoom level) from a bigWig file.
39 |  * There is little reason for end users to use this function. This must be freed with `bwDestroyIndex`
40 |  * @param fp A valid bigWigFile_t pointer
41 |  * @param offset The file offset where the index begins
42 |  * @return A bwRTree_t pointer or NULL on error.
43 |  */
44 | bwRTree_t *bwReadIndex(bigWigFile_t *fp, uint64_t offset);
45 | 
46 | /*!
47 |  * @brief Destroy an bwRTreeNode_t and all of its children.
48 |  * @param node The node to destroy.
49 |  */
50 | void bwDestroyIndexNode(bwRTreeNode_t *node);
51 | 
52 | /*!
53 |  * @brief Frees space allocated by `bwReadIndex`
54 |  * There is generally little reason to use this, since end users should typically not need to run `bwReadIndex` themselves.
55 |  * @param idx A bwRTree_t pointer allocated by `bwReadIndex`.
56 |  */
57 | void bwDestroyIndex(bwRTree_t *idx);
58 | 
59 | /// @cond SKIP
60 | bwOverlapBlock_t *walkRTreeNodes(bigWigFile_t *bw, bwRTreeNode_t *root, uint32_t tid, uint32_t start, uint32_t end);
61 | void destroyBWOverlapBlock(bwOverlapBlock_t *b);
62 | /// @endcond
63 | 
64 | /*!
65 |  * @brief Finishes what's needed to write a bigWigFile
66 |  * Flushes the buffer, converts the index linked list to a tree, writes that to disk, handles zoom level stuff, writes magic at the end
67 |  * @param fp A valid bigWigFile_t pointer
68 |  * @return 0 on success
69 |  */
70 | int bwFinalize(bigWigFile_t *fp);
71 | 
72 | /// @cond SKIP
73 | char *bwStrdup(const char *s);
74 | /// @endcond
75 | 


--------------------------------------------------------------------------------
/libBigWig/bwRead.c:
--------------------------------------------------------------------------------
  1 | #include "bigWig.h"
  2 | #include "bwCommon.h"
  3 | #include <stdlib.h>
  4 | #include <math.h>
  5 | #include <string.h>
  6 | #include <stdio.h>
  7 | 
  8 | static uint64_t readChromBlock(bigWigFile_t *bw, chromList_t *cl, uint32_t keySize);
  9 | 
 10 | //Return the position in the file
 11 | long bwTell(bigWigFile_t *fp) {
 12 |     if(fp->URL->type == BWG_FILE) return ftell(fp->URL->x.fp);
 13 |     return (long) (fp->URL->filePos + fp->URL->bufPos);
 14 | }
 15 | 
 16 | //Seek to a given position, always from the beginning of the file
 17 | //Return 0 on success and -1 on error
 18 | //To do, use the return code of urlSeek() in a more useful way.
 19 | int bwSetPos(bigWigFile_t *fp, size_t pos) {
 20 |     CURLcode rv = urlSeek(fp->URL, pos);
 21 |     if(rv == CURLE_OK) return 0;
 22 |     return -1;
 23 | }
 24 | 
 25 | //returns the number of full members read (nmemb on success, something less on error)
 26 | size_t bwRead(void *data, size_t sz, size_t nmemb, bigWigFile_t *fp) {
 27 |     size_t i, rv;
 28 |     for(i=0; i<nmemb; i++) {
 29 |         rv = urlRead(fp->URL, data+i*sz, sz);
 30 |         if(rv != sz) return i;
 31 |     }
 32 |     return nmemb;
 33 | }
 34 | 
 35 | //Initializes curl and sets global variables
 36 | //Returns 0 on success and 1 on error
 37 | //This should be called only once and bwCleanup() must be called when finished.
 38 | int bwInit(size_t defaultBufSize) {
 39 |     //set the buffer size, number of iterations, sleep time between iterations, etc.
 40 |     GLOBAL_DEFAULTBUFFERSIZE = defaultBufSize;
 41 | 
 42 |     //call curl_global_init()
 43 | #ifndef NOCURL
 44 |     CURLcode rv;
 45 |     rv = curl_global_init(CURL_GLOBAL_ALL);
 46 |     if(rv != CURLE_OK) return 1;
 47 | #endif
 48 |     return 0;
 49 | }
 50 | 
 51 | //This should be called before quiting, to release memory acquired by curl
 52 | void bwCleanup() {
 53 | #ifndef NOCURL
 54 |     curl_global_cleanup();
 55 | #endif
 56 | }
 57 | 
 58 | static bwZoomHdr_t *bwReadZoomHdrs(bigWigFile_t *bw) {
 59 |     if(bw->isWrite) return NULL;
 60 |     uint16_t i;
 61 |     bwZoomHdr_t *zhdr = malloc(sizeof(bwZoomHdr_t));
 62 |     if(!zhdr) return NULL;
 63 |     uint32_t *level = malloc(bw->hdr->nLevels * sizeof(uint64_t));
 64 |     if(!level) {
 65 |         free(zhdr);
 66 |         return NULL;
 67 |     }
 68 |     uint32_t padding = 0;
 69 |     uint64_t *dataOffset = malloc(sizeof(uint64_t) * bw->hdr->nLevels);
 70 |     if(!dataOffset) {
 71 |         free(zhdr);
 72 |         free(level);
 73 |         return NULL;
 74 |     }
 75 |     uint64_t *indexOffset = malloc(sizeof(uint64_t) * bw->hdr->nLevels);
 76 |     if(!indexOffset) {
 77 |         free(zhdr);
 78 |         free(level);
 79 |         free(dataOffset);
 80 |         return NULL;
 81 |     }
 82 | 
 83 |     for(i=0; i<bw->hdr->nLevels; i++) {
 84 |         if(bwRead((void*) &(level[i]), sizeof(uint32_t), 1, bw) != 1) goto error;
 85 |         if(bwRead((void*) &padding, sizeof(uint32_t), 1, bw) != 1) goto error;
 86 |         if(bwRead((void*) &(dataOffset[i]), sizeof(uint64_t), 1, bw) != 1) goto error;
 87 |         if(bwRead((void*) &(indexOffset[i]), sizeof(uint64_t), 1, bw) != 1) goto error;
 88 |     }
 89 | 
 90 |     zhdr->level = level;
 91 |     zhdr->dataOffset = dataOffset;
 92 |     zhdr->indexOffset = indexOffset;
 93 |     zhdr->idx = calloc(bw->hdr->nLevels, sizeof(bwRTree_t*));
 94 |     if(!zhdr->idx) goto error;
 95 | 
 96 |     return zhdr;
 97 | 
 98 | error:
 99 |     for(i=0; i<bw->hdr->nLevels; i++) {
100 |         if(zhdr->idx[i]) bwDestroyIndex(zhdr->idx[i]);
101 |     }
102 |     free(zhdr);
103 |     free(level);
104 |     free(dataOffset);
105 |     free(indexOffset);
106 |     return NULL;
107 | }
108 | 
109 | static void bwHdrDestroy(bigWigHdr_t *hdr) {
110 |     int i;
111 |     if(hdr->zoomHdrs) {
112 |         free(hdr->zoomHdrs->level);
113 |         free(hdr->zoomHdrs->dataOffset);
114 |         free(hdr->zoomHdrs->indexOffset);
115 |         for(i=0; i<hdr->nLevels; i++) {
116 |             if(hdr->zoomHdrs->idx[i]) bwDestroyIndex(hdr->zoomHdrs->idx[i]);
117 |         }
118 |         free(hdr->zoomHdrs->idx);
119 |         free(hdr->zoomHdrs);
120 |     }
121 |     free(hdr);
122 | }
123 | 
124 | static void bwHdrRead(bigWigFile_t *bw) {
125 |     uint32_t magic;
126 |     if(bw->isWrite) return;
127 |     bw->hdr = calloc(1, sizeof(bigWigHdr_t));
128 |     if(!bw->hdr) return;
129 | 
130 |     if(bwRead((void*) &magic, sizeof(uint32_t), 1, bw) != 1) goto error; //0x0
131 |     if(magic != BIGWIG_MAGIC && magic != BIGBED_MAGIC) goto error;
132 | 
133 |     if(bwRead((void*) &(bw->hdr->version), sizeof(uint16_t), 1, bw) != 1) goto error; //0x4
134 |     if(bwRead((void*) &(bw->hdr->nLevels), sizeof(uint16_t), 1, bw) != 1) goto error; //0x6
135 |     if(bwRead((void*) &(bw->hdr->ctOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x8
136 |     if(bwRead((void*) &(bw->hdr->dataOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x10
137 |     if(bwRead((void*) &(bw->hdr->indexOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x18
138 |     if(bwRead((void*) &(bw->hdr->fieldCount), sizeof(uint16_t), 1, bw) != 1) goto error; //0x20
139 |     if(bwRead((void*) &(bw->hdr->definedFieldCount), sizeof(uint16_t), 1, bw) != 1) goto error; //0x22
140 |     if(bwRead((void*) &(bw->hdr->sqlOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x24
141 |     if(bwRead((void*) &(bw->hdr->summaryOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x2c
142 |     if(bwRead((void*) &(bw->hdr->bufSize), sizeof(uint32_t), 1, bw) != 1) goto error; //0x34
143 |     if(bwRead((void*) &(bw->hdr->extensionOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x38
144 | 
145 |     //zoom headers
146 |     if(bw->hdr->nLevels) {
147 |         if(!(bw->hdr->zoomHdrs = bwReadZoomHdrs(bw))) goto error;
148 |     }
149 | 
150 |     //File summary information
151 |     if(bw->hdr->summaryOffset) {
152 |         if(urlSeek(bw->URL, bw->hdr->summaryOffset) != CURLE_OK) goto error;
153 |         if(bwRead((void*) &(bw->hdr->nBasesCovered), sizeof(uint64_t), 1, bw) != 1) goto error;
154 |         if(bwRead((void*) &(bw->hdr->minVal), sizeof(uint64_t), 1, bw) != 1) goto error;
155 |         if(bwRead((void*) &(bw->hdr->maxVal), sizeof(uint64_t), 1, bw) != 1) goto error;
156 |         if(bwRead((void*) &(bw->hdr->sumData), sizeof(uint64_t), 1, bw) != 1) goto error;
157 |         if(bwRead((void*) &(bw->hdr->sumSquared), sizeof(uint64_t), 1, bw) != 1) goto error;
158 |     }
159 | 
160 |     //In case of uncompressed remote files, let the IO functions know to request larger chunks
161 |     bw->URL->isCompressed = (bw->hdr->bufSize > 0)?1:0;
162 | 
163 |     return;
164 | 
165 | error:
166 |     bwHdrDestroy(bw->hdr);
167 |     fprintf(stderr, "[bwHdrRead] There was an error while reading in the header!\n");
168 |     bw->hdr = NULL;
169 | }
170 | 
171 | static void destroyChromList(chromList_t *cl) {
172 |     uint32_t i;
173 |     if(!cl) return;
174 |     if(cl->nKeys && cl->chrom) {
175 |         for(i=0; i<cl->nKeys; i++) {
176 |             if(cl->chrom[i]) free(cl->chrom[i]);
177 |         }
178 |     }
179 |     if(cl->chrom) free(cl->chrom);
180 |     if(cl->len) free(cl->len);
181 |     free(cl);
182 | }
183 | 
184 | static uint64_t readChromLeaf(bigWigFile_t *bw, chromList_t *cl, uint32_t valueSize) {
185 |     uint16_t nVals, i;
186 |     uint32_t idx;
187 |     char *chrom = NULL;
188 | 
189 |     if(bwRead((void*) &nVals, sizeof(uint16_t), 1, bw) != 1) return -1;
190 |     chrom = calloc(valueSize+1, sizeof(char));
191 |     if(!chrom) return -1;
192 | 
193 |     for(i=0; i<nVals; i++) {
194 |         if(bwRead((void*) chrom, sizeof(char), valueSize, bw) != valueSize) goto error;
195 |         if(bwRead((void*) &idx, sizeof(uint32_t), 1, bw) != 1) goto error;
196 |         if(bwRead((void*) &(cl->len[idx]), sizeof(uint32_t), 1, bw) != 1) goto error;
197 |         cl->chrom[idx] = bwStrdup(chrom);
198 |         if(!(cl->chrom[idx])) goto error;
199 |     }
200 | 
201 |     free(chrom);
202 |     return nVals;
203 | 
204 | error:
205 |     free(chrom);
206 |     return -1;
207 | }
208 | 
209 | static uint64_t readChromNonLeaf(bigWigFile_t *bw, chromList_t *cl, uint32_t keySize) {
210 |     uint64_t offset , rv = 0, previous;
211 |     uint16_t nVals, i;
212 | 
213 |     if(bwRead((void*) &nVals, sizeof(uint16_t), 1, bw) != 1) return -1;
214 | 
215 |     previous = bwTell(bw) + keySize;
216 |     for(i=0; i<nVals; i++) {
217 |         if(bwSetPos(bw, previous)) return -1;
218 |         if(bwRead((void*) &offset, sizeof(uint64_t), 1, bw) != 1) return -1;
219 |         if(bwSetPos(bw, offset)) return -1;
220 |         rv += readChromBlock(bw, cl, keySize);
221 |         previous += 8 + keySize;
222 |     }
223 | 
224 |     return rv;
225 | }
226 | 
227 | static uint64_t readChromBlock(bigWigFile_t *bw, chromList_t *cl, uint32_t keySize) {
228 |     uint8_t isLeaf, padding;
229 | 
230 |     if(bwRead((void*) &isLeaf, sizeof(uint8_t), 1, bw) != 1) return -1;
231 |     if(bwRead((void*) &padding, sizeof(uint8_t), 1, bw) != 1) return -1;
232 | 
233 |     if(isLeaf) {
234 |         return readChromLeaf(bw, cl, keySize);
235 |     } else { //I've never actually observed one of these, which is good since they're pointless
236 |         return readChromNonLeaf(bw, cl, keySize);
237 |     }
238 | }
239 | 
240 | static chromList_t *bwReadChromList(bigWigFile_t *bw) {
241 |     chromList_t *cl = NULL;
242 |     uint32_t magic, keySize, valueSize, itemsPerBlock;
243 |     uint64_t rv, itemCount;
244 |     if(bw->isWrite) return NULL;
245 |     if(bwSetPos(bw, bw->hdr->ctOffset)) return NULL;
246 | 
247 |     cl = calloc(1, sizeof(chromList_t));
248 |     if(!cl) return NULL;
249 | 
250 |     if(bwRead((void*) &magic, sizeof(uint32_t), 1, bw) != 1) goto error;
251 |     if(magic != CIRTREE_MAGIC) goto error;
252 | 
253 |     if(bwRead((void*) &itemsPerBlock, sizeof(uint32_t), 1, bw) != 1) goto error;
254 |     if(bwRead((void*) &keySize, sizeof(uint32_t), 1, bw) != 1) goto error;
255 |     if(bwRead((void*) &valueSize, sizeof(uint32_t), 1, bw) != 1) goto error;
256 |     if(bwRead((void*) &itemCount, sizeof(uint64_t), 1, bw) != 1) goto error;
257 | 
258 |     cl->nKeys = itemCount;
259 |     cl->chrom = calloc(itemCount, sizeof(char*));
260 |     cl->len = calloc(itemCount, sizeof(uint32_t));
261 |     if(!cl->chrom) goto error;
262 |     if(!cl->len) goto error;
263 | 
264 |     if(bwRead((void*) &magic, sizeof(uint32_t), 1, bw) != 1) goto error;
265 |     if(bwRead((void*) &magic, sizeof(uint32_t), 1, bw) != 1) goto error;
266 | 
267 |     //Read in the blocks
268 |     rv = readChromBlock(bw, cl, keySize);
269 |     if(rv == (uint64_t) -1) goto error;
270 |     if(rv != itemCount) goto error;
271 | 
272 |     return cl;
273 | 
274 | error:
275 |     destroyChromList(cl);
276 |     return NULL;
277 | }
278 | 
279 | //This is here mostly for convenience
280 | static void bwDestroyWriteBuffer(bwWriteBuffer_t *wb) {
281 |     if(wb->p) free(wb->p);
282 |     if(wb->compressP) free(wb->compressP);
283 |     if(wb->firstZoomBuffer) free(wb->firstZoomBuffer);
284 |     if(wb->lastZoomBuffer) free(wb->lastZoomBuffer);
285 |     if(wb->nNodes) free(wb->nNodes);
286 |     free(wb);
287 | }
288 | 
289 | void bwClose(bigWigFile_t *fp) {
290 |     if(!fp) return;
291 |     if(bwFinalize(fp)) {
292 |         fprintf(stderr, "[bwClose] There was an error while finishing writing a bigWig file! The output is likely truncated.\n");
293 |     }
294 |     if(fp->URL) urlClose(fp->URL);
295 |     if(fp->hdr) bwHdrDestroy(fp->hdr);
296 |     if(fp->cl) destroyChromList(fp->cl);
297 |     if(fp->idx) bwDestroyIndex(fp->idx);
298 |     if(fp->writeBuffer) bwDestroyWriteBuffer(fp->writeBuffer);
299 |     free(fp);
300 | }
301 | 
302 | int bwIsBigWig(const char *fname, CURLcode (*callBack) (CURL*)) {
303 |     uint32_t magic = 0;
304 |     URL_t *URL = NULL;
305 | 
306 |     URL = urlOpen(fname, *callBack, NULL);
307 | 
308 |     if(!URL) return 0;
309 |     if(urlRead(URL, (void*) &magic, sizeof(uint32_t)) != sizeof(uint32_t)) magic = 0;
310 |     urlClose(URL);
311 |     if(magic == BIGWIG_MAGIC) return 1;
312 |     return 0;
313 | }
314 | 
315 | char *bbGetSQL(bigWigFile_t *fp) {
316 |     char *o = NULL;
317 |     uint64_t len;
318 |     if(!fp->hdr->sqlOffset) return NULL;
319 |     len = fp->hdr->summaryOffset - fp->hdr->sqlOffset; //This includes the NULL terminator
320 |     o = malloc(sizeof(char) * len);
321 |     if(!o) goto error;
322 |     if(bwSetPos(fp, fp->hdr->sqlOffset)) goto error;
323 |     if(bwRead((void*) o, len, 1, fp) != 1) goto error;
324 |     return o;
325 | 
326 | error:
327 |     if(o) free(o);
328 |     printf("Got an error in bbGetSQL!\n");
329 |     return NULL;
330 | }
331 | 
332 | int bbIsBigBed(const char *fname, CURLcode (*callBack) (CURL*)) {
333 |     uint32_t magic = 0;
334 |     URL_t *URL = NULL;
335 | 
336 |     URL = urlOpen(fname, *callBack, NULL);
337 | 
338 |     if(!URL) return 0;
339 |     if(urlRead(URL, (void*) &magic, sizeof(uint32_t)) != sizeof(uint32_t)) magic = 0;
340 |     urlClose(URL);
341 |     if(magic == BIGBED_MAGIC) return 1;
342 |     return 0;
343 | }
344 | 
345 | bigWigFile_t *bwOpen(const char *fname, CURLcode (*callBack) (CURL*), const char *mode) {
346 |     bigWigFile_t *bwg = calloc(1, sizeof(bigWigFile_t));
347 |     if(!bwg) {
348 |         fprintf(stderr, "[bwOpen] Couldn't allocate space to create the output object!\n");
349 |         return NULL;
350 |     }
351 |     if((!mode) || (strchr(mode, 'w') == NULL)) {
352 |         bwg->isWrite = 0;
353 |         bwg->URL = urlOpen(fname, *callBack, NULL);
354 |         if(!bwg->URL) {
355 |             fprintf(stderr, "[bwOpen] urlOpen is NULL!\n");
356 |             goto error;
357 |         }
358 | 
359 |         //Attempt to read in the fixed header
360 |         bwHdrRead(bwg);
361 |         if(!bwg->hdr) {
362 |             fprintf(stderr, "[bwOpen] bwg->hdr is NULL!\n");
363 |             goto error;
364 |         }
365 | 
366 |         //Read in the chromosome list
367 |         bwg->cl = bwReadChromList(bwg);
368 |         if(!bwg->cl) {
369 |             fprintf(stderr, "[bwOpen] bwg->cl is NULL (%s)!\n", fname);
370 |             goto error;
371 |         }
372 | 
373 |         //Read in the index
374 |         if(bwg->hdr->indexOffset) {
375 |             bwg->idx = bwReadIndex(bwg, 0);
376 |             if(!bwg->idx) {
377 |                 fprintf(stderr, "[bwOpen] bwg->idx is NULL bwg->hdr->dataOffset 0x%"PRIx64"!\n", bwg->hdr->dataOffset);
378 |                 goto error;
379 |             }
380 |         }
381 |     } else {
382 |         bwg->isWrite = 1;
383 |         bwg->URL = urlOpen(fname, NULL, "w+");
384 |         if(!bwg->URL) goto error;
385 |         bwg->writeBuffer = calloc(1,sizeof(bwWriteBuffer_t));
386 |         if(!bwg->writeBuffer) goto error;
387 |         bwg->writeBuffer->l = 24;
388 |     }
389 | 
390 |     return bwg;
391 | 
392 | error:
393 |     bwClose(bwg);
394 |     return NULL;
395 | }
396 | 
397 | bigWigFile_t *bbOpen(const char *fname, CURLcode (*callBack) (CURL*)) {
398 |     bigWigFile_t *bb = calloc(1, sizeof(bigWigFile_t));
399 |     if(!bb) {
400 |         fprintf(stderr, "[bbOpen] Couldn't allocate space to create the output object!\n");
401 |         return NULL;
402 |     }
403 | 
404 |     //Set the type to 1 for bigBed
405 |     bb->type = 1;
406 | 
407 |     bb->URL = urlOpen(fname, *callBack, NULL);
408 |     if(!bb->URL) goto error;
409 | 
410 |     //Attempt to read in the fixed header
411 |     bwHdrRead(bb);
412 |     if(!bb->hdr) goto error;
413 | 
414 |     //Read in the chromosome list
415 |     bb->cl = bwReadChromList(bb);
416 |     if(!bb->cl) goto error;
417 | 
418 |     //Read in the index
419 |     bb->idx = bwReadIndex(bb, 0);
420 |     if(!bb->idx) goto error;
421 | 
422 |     return bb;
423 | 
424 | error:
425 |     bwClose(bb);
426 |     return NULL;
427 | }
428 | 
429 | 
430 | //Implementation taken from musl:
431 | //https://git.musl-libc.org/cgit/musl/tree/src/string/strdup.c
432 | //License: https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
433 | char* bwStrdup(const char *s) {
434 | 	size_t l = strlen(s);
435 | 	char *d = malloc(l+1);
436 | 	if (!d) return NULL;
437 | 	return memcpy(d, s, l+1);
438 | }
439 | 


--------------------------------------------------------------------------------
/libBigWig/bwStats.c:
--------------------------------------------------------------------------------
  1 | #include "bigWig.h"
  2 | #include "bwCommon.h"
  3 | #include <errno.h>
  4 | #include <stdlib.h>
  5 | #include <zlib.h>
  6 | #include <math.h>
  7 | #include <string.h>
  8 | 
  9 | //Returns -1 if there are no applicable levels, otherwise an integer indicating the most appropriate level.
 10 | //Like Kent's library, this divides the desired bin size by 2 to minimize the effect of blocks overlapping multiple bins
 11 | static int32_t determineZoomLevel(const bigWigFile_t *fp, int basesPerBin) {
 12 |     int32_t out = -1;
 13 |     int64_t diff;
 14 |     uint32_t bestDiff = -1;
 15 |     uint16_t i;
 16 | 
 17 |     basesPerBin/=2;
 18 |     for(i=0; i<fp->hdr->nLevels; i++) {
 19 |         diff = basesPerBin - (int64_t) fp->hdr->zoomHdrs->level[i];
 20 |         if(diff >= 0 && diff < bestDiff) {
 21 |             bestDiff = diff;
 22 |             out = i;
 23 |         }
 24 |     }
 25 |     return out;
 26 | }
 27 | 
 28 | /// @cond SKIP
 29 | struct val_t {
 30 |     uint32_t nBases;
 31 |     float min, max, sum, sumsq;
 32 |     double scalar;
 33 | };
 34 | 
 35 | struct vals_t {
 36 |     uint32_t n;
 37 |     struct val_t **vals;
 38 | };
 39 | /// @endcond
 40 | 
 41 | void destroyVals_t(struct vals_t *v) {
 42 |     uint32_t i;
 43 |     if(!v) return;
 44 |     for(i=0; i<v->n; i++) free(v->vals[i]);
 45 |     if(v->vals) free(v->vals);
 46 |     free(v);
 47 | }
 48 | 
 49 | //Determine the base-pair overlap between an interval and a block
 50 | double getScalar(uint32_t i_start, uint32_t i_end, uint32_t b_start, uint32_t b_end) {
 51 |     double rv = 0.0;
 52 |     if(b_start <= i_start) {
 53 |         if(b_end > i_start) rv = ((double)(b_end - i_start))/(b_end-b_start);
 54 |     } else if(b_start < i_end) {
 55 |         if(b_end < i_end) rv = ((double)(b_end - b_start))/(b_end-b_start);
 56 |         else rv = ((double)(i_end - b_start))/(b_end-b_start);
 57 |     }
 58 | 
 59 |     return rv;
 60 | }
 61 | 
 62 | //Returns NULL on error
 63 | static struct vals_t *getVals(bigWigFile_t *fp, bwOverlapBlock_t *o, int i, uint32_t tid, uint32_t start, uint32_t end) {
 64 |     void *buf = NULL, *compBuf = NULL;
 65 |     uLongf sz = fp->hdr->bufSize;
 66 |     int compressed = 0, rv;
 67 |     uint32_t *p, vtid, vstart, vend;
 68 |     struct vals_t *vals = NULL;
 69 |     struct val_t *v = NULL;
 70 | 
 71 |     if(sz) {
 72 |         compressed = 1;
 73 |         buf = malloc(sz);
 74 |     }
 75 |     sz = 0; //This is now the size of the compressed buffer
 76 | 
 77 |     if(bwSetPos(fp, o->offset[i])) goto error;
 78 | 
 79 |     vals = calloc(1,sizeof(struct vals_t));
 80 |     if(!vals) goto error;
 81 | 
 82 |     v = malloc(sizeof(struct val_t));
 83 |     if(!v) goto error;
 84 | 
 85 |     if(sz < o->size[i]) compBuf = malloc(o->size[i]);
 86 |     if(!compBuf) goto error;
 87 | 
 88 |     if(bwRead(compBuf, o->size[i], 1, fp) != 1) goto error;
 89 |     if(compressed) {
 90 |         sz = fp->hdr->bufSize;
 91 |         rv = uncompress(buf, &sz, compBuf, o->size[i]);
 92 |         if(rv != Z_OK) goto error;
 93 |     } else {
 94 |         buf = compBuf;
 95 |         sz = o->size[i];
 96 |     }
 97 | 
 98 |     p = buf;
 99 |     while(((uLongf) ((char*)p - (char*)buf)) < sz) {
100 |         vtid = p[0];
101 |         vstart = p[1];
102 |         vend = p[2];
103 |         v->nBases = p[3];
104 |         v->min = ((float*) p)[4];
105 |         v->max = ((float*) p)[5];
106 |         v->sum = ((float*) p)[6];
107 |         v->sumsq = ((float*) p)[7];
108 |         v->scalar = getScalar(start, end, vstart, vend);
109 | 
110 |         if(tid == vtid) {
111 |             if((start <= vstart && end > vstart) || (start < vend && start >= vstart)) {
112 |                 vals->vals = realloc(vals->vals, sizeof(struct val_t*)*(vals->n+1));
113 |                 if(!vals->vals) goto error;
114 |                 vals->vals[vals->n++] = v;
115 |                 v = malloc(sizeof(struct val_t));
116 |                 if(!v) goto error;
117 |             }
118 |             if(vstart > end) break;
119 |         } else if(vtid > tid) {
120 |             break;
121 |         }
122 |         p+=8;
123 |     }
124 | 
125 |     free(v);
126 |     free(buf);
127 |     if(compressed) free(compBuf);
128 |     return vals;
129 | 
130 | error:
131 |     if(buf) free(buf);
132 |     if(compBuf && compressed) free(compBuf);
133 |     if(v) free(v);
134 |     destroyVals_t(vals);
135 |     return NULL;
136 | }
137 | 
138 | //On error, errno is set to ENOMEM and NaN is returned (though NaN can be returned normally)
139 | static double blockMean(bigWigFile_t *fp, bwOverlapBlock_t *blocks, uint32_t tid, uint32_t start, uint32_t end) {
140 |     uint32_t i, j;
141 |     double output = 0.0, coverage = 0.0;
142 |     struct vals_t *v = NULL;
143 | 
144 |     if(!blocks->n) return strtod("NaN", NULL);
145 | 
146 |     //Iterate over the blocks
147 |     for(i=0; i<blocks->n; i++) {
148 |         v = getVals(fp, blocks, i, tid, start, end);
149 |         if(!v) goto error;
150 |         for(j=0; j<v->n; j++) {
151 |             output += v->vals[j]->sum * v->vals[j]->scalar;
152 |             coverage += v->vals[j]->nBases * v->vals[j]->scalar;
153 |         }
154 |         destroyVals_t(v);
155 |     }
156 | 
157 | 
158 |     if(!coverage) return strtod("NaN", NULL);
159 | 
160 |     return output/coverage;
161 | 
162 | error:
163 |     if(v) free(v);
164 |     errno = ENOMEM;
165 |     return strtod("NaN", NULL);
166 | }
167 | 
168 | static double intMean(bwOverlappingIntervals_t* ints, uint32_t start, uint32_t end) {
169 |     double sum = 0.0;
170 |     uint32_t nBases = 0, i, start_use, end_use;
171 | 
172 |     if(!ints->l) return strtod("NaN", NULL);
173 | 
174 |     for(i=0; i<ints->l; i++) {
175 |         start_use = ints->start[i];
176 |         end_use = ints->end[i];
177 |         if(ints->start[i] < start) start_use = start;
178 |         if(ints->end[i] > end) end_use = end;
179 |         nBases += end_use-start_use;
180 |         sum += (end_use-start_use)*((double) ints->value[i]);
181 |     }
182 | 
183 |     return sum/nBases;
184 | }
185 | 
186 | //Does UCSC compensate for partial block/range overlap?
187 | static double blockDev(bigWigFile_t *fp, bwOverlapBlock_t *blocks, uint32_t tid, uint32_t start, uint32_t end) {
188 |     uint32_t i, j;
189 |     double mean = 0.0, ssq = 0.0, coverage = 0.0, diff;
190 |     struct vals_t *v = NULL;
191 | 
192 |     if(!blocks->n) return strtod("NaN", NULL);
193 | 
194 |     //Iterate over the blocks
195 |     for(i=0; i<blocks->n; i++) {
196 |         v = getVals(fp, blocks, i, tid, start, end);
197 |         if(!v) goto error;
198 |         for(j=0; j<v->n; j++) {
199 |             coverage += v->vals[j]->nBases * v->vals[j]->scalar;
200 |             mean += v->vals[j]->sum * v->vals[j]->scalar;
201 |             ssq += v->vals[j]->sumsq * v->vals[j]->scalar;
202 |         }
203 |         destroyVals_t(v);
204 |         v = NULL;
205 |     }
206 | 
207 |     if(coverage<=1.0) return strtod("NaN", NULL);
208 |     diff = ssq-mean*mean/coverage;
209 |     if(coverage > 1.0) diff /= coverage-1;
210 |     if(fabs(diff) > 1e-8) { //Ignore floating point differences
211 |         return sqrt(diff);
212 |     } else {
213 |         return 0.0;
214 |     }
215 | 
216 | error:
217 |     if(v) destroyVals_t(v);
218 |     errno = ENOMEM;
219 |     return strtod("NaN", NULL);
220 | }
221 | 
222 | //This uses compensated summation to account for finite precision math
223 | static double intDev(bwOverlappingIntervals_t* ints, uint32_t start, uint32_t end) {
224 |     double v1 = 0.0, mean, rv;
225 |     uint32_t nBases = 0, i, start_use, end_use;
226 | 
227 |     if(!ints->l) return strtod("NaN", NULL);
228 |     mean = intMean(ints, start, end);
229 | 
230 |     for(i=0; i<ints->l; i++) {
231 |         start_use = ints->start[i];
232 |         end_use = ints->end[i];
233 |         if(ints->start[i] < start) start_use = start;
234 |         if(ints->end[i] > end) end_use = end;
235 |         nBases += end_use-start_use;
236 |         v1 += (end_use-start_use) * pow(ints->value[i]-mean, 2.0); //running sum of squared difference
237 |     }
238 | 
239 |     if(nBases>=2) rv = sqrt(v1/(nBases-1));
240 |     else if(nBases==1) rv = sqrt(v1);
241 |     else rv = strtod("NaN", NULL);
242 | 
243 |     return rv;
244 | }
245 | 
246 | static double blockMax(bigWigFile_t *fp, bwOverlapBlock_t *blocks, uint32_t tid, uint32_t start, uint32_t end) {
247 |     uint32_t i, j, isNA = 1;
248 |     double o = strtod("NaN", NULL);
249 |     struct vals_t *v = NULL;
250 | 
251 |     if(!blocks->n) return o;
252 | 
253 |     //Iterate the blocks
254 |     for(i=0; i<blocks->n; i++) {
255 |         v = getVals(fp, blocks, i, tid, start, end);
256 |         if(!v) goto error;
257 |         for(j=0; j<v->n; j++) {
258 |             if(isNA) {
259 |                 o = v->vals[j]->max;
260 |                 isNA = 0;
261 |             } else if(v->vals[j]->max > o) {
262 |                 o = v->vals[j]->max;
263 |             }
264 |         }
265 |         destroyVals_t(v);
266 |     }
267 | 
268 |     return o;
269 | 
270 | error:
271 |     destroyVals_t(v);
272 |     errno = ENOMEM;
273 |     return strtod("NaN", NULL);
274 | }
275 | 
276 | static double intMax(bwOverlappingIntervals_t* ints) {
277 |     uint32_t i;
278 |     double o;
279 | 
280 |     if(ints->l < 1) return strtod("NaN", NULL);
281 | 
282 |     o = ints->value[0];
283 |     for(i=1; i<ints->l; i++) {
284 |         if(ints->value[i] > o) o = ints->value[i];
285 |     }
286 | 
287 |     return o;
288 | }
289 | 
290 | static double blockMin(bigWigFile_t *fp, bwOverlapBlock_t *blocks, uint32_t tid, uint32_t start, uint32_t end) {
291 |     uint32_t i, j, isNA = 1;
292 |     double o = strtod("NaN", NULL);
293 |     struct vals_t *v = NULL;
294 | 
295 |     if(!blocks->n) return o;
296 | 
297 |     //Iterate the blocks
298 |     for(i=0; i<blocks->n; i++) {
299 |         v = getVals(fp, blocks, i, tid, start, end);
300 |         if(!v) goto error;
301 |         for(j=0; j<v->n; j++) {
302 |             if(isNA) {
303 |                 o = v->vals[j]->min;
304 |                 isNA = 0;
305 |             } else if(v->vals[j]->min < o) o = v->vals[j]->min;
306 |         }
307 |         destroyVals_t(v);
308 |     }
309 | 
310 |     return o;
311 | 
312 | error:
313 |     destroyVals_t(v);
314 |     errno = ENOMEM;
315 |     return strtod("NaN", NULL);
316 | }
317 | 
318 | static double intMin(bwOverlappingIntervals_t* ints) {
319 |     uint32_t i;
320 |     double o;
321 | 
322 |     if(ints->l < 1) return strtod("NaN", NULL);
323 | 
324 |     o = ints->value[0];
325 |     for(i=1; i<ints->l; i++) {
326 |         if(ints->value[i] < o) o = ints->value[i];
327 |     }
328 | 
329 |     return o;
330 | }
331 | 
332 | //Does UCSC compensate for only partial block/interval overlap?
333 | static double blockCoverage(bigWigFile_t *fp, bwOverlapBlock_t *blocks, uint32_t tid, uint32_t start, uint32_t end) {
334 |     uint32_t i, j;
335 |     double o = 0.0;
336 |     struct vals_t *v = NULL;
337 | 
338 |     if(!blocks->n) return strtod("NaN", NULL);
339 | 
340 |     //Iterate over the blocks
341 |     for(i=0; i<blocks->n; i++) {
342 |         v = getVals(fp, blocks, i, tid, start, end);
343 |         if(!v) goto error;
344 |         for(j=0; j<v->n; j++) {
345 |             o+= v->vals[j]->nBases * v->vals[j]->scalar;
346 |         }
347 |         destroyVals_t(v);
348 |     }
349 | 
350 |     if(o == 0.0) return strtod("NaN", NULL);
351 |     return o;
352 | 
353 | error:
354 |     destroyVals_t(v);
355 |     errno = ENOMEM;
356 |     return strtod("NaN", NULL);
357 | }
358 | 
359 | static double intCoverage(bwOverlappingIntervals_t* ints, uint32_t start, uint32_t end) {
360 |     uint32_t i, start_use, end_use;
361 |     double o = 0.0;
362 | 
363 |     if(!ints->l) return strtod("NaN", NULL);
364 | 
365 |     for(i=0; i<ints->l; i++) {
366 |         start_use = ints->start[i];
367 |         end_use = ints->end[i];
368 |         if(start_use < start) start_use = start;
369 |         if(end_use > end) end_use = end;
370 |         o += end_use - start_use;
371 |     }
372 | 
373 |     return o/(end-start);
374 | }
375 | 
376 | static double blockSum(bigWigFile_t *fp, bwOverlapBlock_t *blocks, uint32_t tid, uint32_t start, uint32_t end) {
377 |     uint32_t i, j, sizeUse;
378 |     double o = 0.0;
379 |     struct vals_t *v = NULL;
380 | 
381 |     if(!blocks->n) return strtod("NaN", NULL);
382 | 
383 |     //Iterate over the blocks
384 |     for(i=0; i<blocks->n; i++) {
385 |         v = getVals(fp, blocks, i, tid, start, end);
386 |         if(!v) goto error;
387 |         for(j=0; j<v->n; j++) {
388 |             //Multiply the block average by min(bases covered, block overlap with interval)
389 |             sizeUse = v->vals[j]->scalar;
390 |             if(sizeUse > v->vals[j]->nBases) sizeUse = v->vals[j]->nBases;
391 |             o+= (v->vals[j]->sum * sizeUse) / v->vals[j]->nBases;
392 |         }
393 |         destroyVals_t(v);
394 |     }
395 | 
396 |     if(o == 0.0) return strtod("NaN", NULL);
397 |     return o;
398 | 
399 | error:
400 |     destroyVals_t(v);
401 |     errno = ENOMEM;
402 |     return strtod("NaN", NULL);
403 | }
404 | 
405 | static double intSum(bwOverlappingIntervals_t* ints, uint32_t start, uint32_t end) {
406 |     uint32_t i, start_use, end_use;
407 |     double o = 0.0;
408 | 
409 |     if(!ints->l) return strtod("NaN", NULL);
410 | 
411 |     for(i=0; i<ints->l; i++) {
412 |         start_use = ints->start[i];
413 |         end_use = ints->end[i];
414 |         if(start_use < start) start_use = start;
415 |         if(end_use > end) end_use = end;
416 |         o += (end_use - start_use) * ints->value[i];
417 |     }
418 | 
419 |     return o;
420 | }
421 | 
422 | //Returns NULL on error, otherwise a double* that needs to be free()d
423 | static double *bwStatsFromZoom(bigWigFile_t *fp, int32_t level, uint32_t tid, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type) {
424 |     bwOverlapBlock_t *blocks = NULL;
425 |     double *output = NULL;
426 |     uint32_t pos = start, i, end2;
427 | 
428 |     if(!fp->hdr->zoomHdrs->idx[level]) {
429 |         fp->hdr->zoomHdrs->idx[level] = bwReadIndex(fp, fp->hdr->zoomHdrs->indexOffset[level]);
430 |         if(!fp->hdr->zoomHdrs->idx[level]) return NULL;
431 |     }
432 |     errno = 0; //Sometimes libCurls sets and then doesn't unset errno on errors
433 | 
434 |     output = malloc(sizeof(double)*nBins);
435 |     if(!output) return NULL;
436 | 
437 |     for(i=0, pos=start; i<nBins; i++) {
438 |         end2 = start + ((double)(end-start)*(i+1))/((int) nBins);
439 |         blocks = walkRTreeNodes(fp, fp->hdr->zoomHdrs->idx[level]->root, tid, pos, end2);
440 |         if(!blocks) goto error;
441 | 
442 |         switch(type) {
443 |         case 0:
444 |             //mean
445 |             output[i] = blockMean(fp, blocks, tid, pos, end2);
446 |             break;
447 |         case 1:
448 |             //stdev
449 |             output[i] = blockDev(fp, blocks, tid, pos, end2);
450 |             break;
451 |         case 2:
452 |             //max
453 |             output[i] = blockMax(fp, blocks, tid, pos, end2);
454 |             break;
455 |         case 3:
456 |             //min
457 |             output[i] = blockMin(fp, blocks, tid, pos, end2);
458 |             break;
459 |         case 4:
460 |             //cov
461 |             output[i] = blockCoverage(fp, blocks, tid, pos, end2)/(end2-pos);
462 |             break;
463 |         case 5:
464 |             //sum
465 |             output[i] = blockSum(fp, blocks, tid, pos, end2);
466 |             break;
467 |         default:
468 |             goto error;
469 |             break;
470 |         }
471 |         if(errno) goto error;
472 |         destroyBWOverlapBlock(blocks);
473 |         pos = end2;
474 |     }
475 | 
476 |     return output;
477 | 
478 | error:
479 |     fprintf(stderr, "got an error in bwStatsFromZoom in the range %"PRIu32"-%"PRIu32": %s\n", pos, end2, strerror(errno));
480 |     if(blocks) destroyBWOverlapBlock(blocks);
481 |     if(output) free(output);
482 |     return NULL;
483 | }
484 | 
485 | double *bwStatsFromFull(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type) {
486 |     bwOverlappingIntervals_t *ints = NULL;
487 |     double *output = malloc(sizeof(double)*nBins);
488 |     uint32_t i, pos = start, end2;
489 |     if(!output) return NULL;
490 | 
491 |     for(i=0; i<nBins; i++) {
492 |         end2 = start + ((double)(end-start)*(i+1))/((int) nBins);
493 |         ints = bwGetOverlappingIntervals(fp, chrom, pos, end2);
494 | 
495 |         if(!ints) {
496 |             output[i] = strtod("NaN", NULL);
497 |             continue;
498 |         }
499 | 
500 |         switch(type) {
501 |         default :
502 |         case 0:
503 |             output[i] = intMean(ints, pos, end2);
504 |             break;
505 |         case 1:
506 |             output[i] = intDev(ints, pos, end2);
507 |             break;
508 |         case 2:
509 |             output[i] = intMax(ints);
510 |             break;
511 |         case 3:
512 |             output[i] = intMin(ints);
513 |             break;
514 |         case 4:
515 |             output[i] = intCoverage(ints, pos, end2);
516 |             break;
517 |         case 5:
518 |             output[i] = intSum(ints, pos, end2);
519 |             break;
520 |         }
521 |         bwDestroyOverlappingIntervals(ints);
522 |         pos = end2;
523 |     }
524 | 
525 |     return output;
526 | }
527 | 
528 | //Returns a list of floats of length nBins that must be free()d
529 | //On error, NULL is returned
530 | double *bwStats(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type) {
531 |     int32_t level = determineZoomLevel(fp, ((double)(end-start))/((int) nBins));
532 |     uint32_t tid = bwGetTid(fp, chrom);
533 |     if(tid == (uint32_t) -1) return NULL;
534 | 
535 |     if(level == -1) return bwStatsFromFull(fp, chrom, start, end, nBins, type);
536 |     return bwStatsFromZoom(fp, level, tid, start, end, nBins, type);
537 | }
538 | 


--------------------------------------------------------------------------------
/libBigWig/bwValues.c:
--------------------------------------------------------------------------------
  1 | #include "bigWig.h"
  2 | #include "bwCommon.h"
  3 | #include <stdlib.h>
  4 | #include <math.h>
  5 | #include <string.h>
  6 | #include <zlib.h>
  7 | #include <errno.h>
  8 | 
  9 | static uint32_t roundup(uint32_t v) {
 10 |     v--;
 11 |     v |= v >> 1;
 12 |     v |= v >> 2;
 13 |     v |= v >> 4;
 14 |     v |= v >> 8;
 15 |     v |= v >> 16;
 16 |     v++;
 17 |     return v;
 18 | }
 19 | 
 20 | //Returns the root node on success and NULL on error
 21 | static bwRTree_t *readRTreeIdx(bigWigFile_t *fp, uint64_t offset) {
 22 |     uint32_t magic;
 23 |     bwRTree_t *node;
 24 | 
 25 |     if(!offset) {
 26 |         if(bwSetPos(fp, fp->hdr->indexOffset)) return NULL;
 27 |     } else {
 28 |         if(bwSetPos(fp, offset)) return NULL;
 29 |     }
 30 | 
 31 |     if(bwRead(&magic, sizeof(uint32_t), 1, fp) != 1) return NULL;
 32 |     if(magic != IDX_MAGIC) {
 33 |         fprintf(stderr, "[readRTreeIdx] Mismatch in the magic number!\n");
 34 |         return NULL;
 35 |     }
 36 | 
 37 |     node = calloc(1, sizeof(bwRTree_t));
 38 |     if(!node) return NULL;
 39 | 
 40 |     if(bwRead(&(node->blockSize), sizeof(uint32_t), 1, fp) != 1) goto error;
 41 |     if(bwRead(&(node->nItems), sizeof(uint64_t), 1, fp) != 1) goto error;
 42 |     if(bwRead(&(node->chrIdxStart), sizeof(uint32_t), 1, fp) != 1) goto error;
 43 |     if(bwRead(&(node->baseStart), sizeof(uint32_t), 1, fp) != 1) goto error;
 44 |     if(bwRead(&(node->chrIdxEnd), sizeof(uint32_t), 1, fp) != 1) goto error;
 45 |     if(bwRead(&(node->baseEnd), sizeof(uint32_t), 1, fp) != 1) goto error;
 46 |     if(bwRead(&(node->idxSize), sizeof(uint64_t), 1, fp) != 1) goto error;
 47 |     if(bwRead(&(node->nItemsPerSlot), sizeof(uint32_t), 1, fp) != 1) goto error;
 48 |     //Padding
 49 |     if(bwRead(&(node->blockSize), sizeof(uint32_t), 1, fp) != 1) goto error;
 50 |     node->rootOffset = bwTell(fp);
 51 | 
 52 |     //For remote files, libCurl sometimes sets errno to 115 and doesn't clear it
 53 |     errno = 0;
 54 | 
 55 |     return node;
 56 | 
 57 | error:
 58 |     free(node);
 59 |     return NULL;
 60 | }
 61 | 
 62 | //Returns a bwRTreeNode_t on success and NULL on an error
 63 | //For the root node, set offset to 0
 64 | static bwRTreeNode_t *bwGetRTreeNode(bigWigFile_t *fp, uint64_t offset) {
 65 |     bwRTreeNode_t *node = NULL;
 66 |     uint8_t padding;
 67 |     uint16_t i;
 68 |     if(offset) {
 69 |         if(bwSetPos(fp, offset)) return NULL;
 70 |     } else {
 71 |         //seek
 72 |         if(bwSetPos(fp, fp->idx->rootOffset)) return NULL;
 73 |     }
 74 | 
 75 |     node = calloc(1, sizeof(bwRTreeNode_t));
 76 |     if(!node) return NULL;
 77 | 
 78 |     if(bwRead(&(node->isLeaf), sizeof(uint8_t), 1, fp) != 1) goto error;
 79 |     if(bwRead(&padding, sizeof(uint8_t), 1, fp) != 1) goto error;
 80 |     if(bwRead(&(node->nChildren), sizeof(uint16_t), 1, fp) != 1) goto error;
 81 | 
 82 |     node->chrIdxStart = malloc(sizeof(uint32_t)*(node->nChildren));
 83 |     if(!node->chrIdxStart) goto error;
 84 |     node->baseStart = malloc(sizeof(uint32_t)*(node->nChildren));
 85 |     if(!node->baseStart) goto error;
 86 |     node->chrIdxEnd = malloc(sizeof(uint32_t)*(node->nChildren));
 87 |     if(!node->chrIdxEnd) goto error;
 88 |     node->baseEnd = malloc(sizeof(uint32_t)*(node->nChildren));
 89 |     if(!node->baseEnd) goto error;
 90 |     node->dataOffset = malloc(sizeof(uint64_t)*(node->nChildren));
 91 |     if(!node->dataOffset) goto error;
 92 |     if(node->isLeaf) {
 93 |         node->x.size = malloc(node->nChildren * sizeof(uint64_t));
 94 |         if(!node->x.size) goto error;
 95 |     } else {
 96 |         node->x.child = calloc(node->nChildren, sizeof(struct bwRTreeNode_t *));
 97 |         if(!node->x.child) goto error;
 98 |     }
 99 |     for(i=0; i<node->nChildren; i++) {
100 |         if(bwRead(&(node->chrIdxStart[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
101 |         if(bwRead(&(node->baseStart[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
102 |         if(bwRead(&(node->chrIdxEnd[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
103 |         if(bwRead(&(node->baseEnd[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
104 |         if(bwRead(&(node->dataOffset[i]), sizeof(uint64_t), 1, fp) != 1) goto error;
105 |         if(node->isLeaf) {
106 |             if(bwRead(&(node->x.size[i]), sizeof(uint64_t), 1, fp) != 1) goto error;
107 |         }
108 |     }
109 | 
110 |     return node;
111 | 
112 | error:
113 |     if(node->chrIdxStart) free(node->chrIdxStart);
114 |     if(node->baseStart) free(node->baseStart);
115 |     if(node->chrIdxEnd) free(node->chrIdxEnd);
116 |     if(node->baseEnd) free(node->baseEnd);
117 |     if(node->dataOffset) free(node->dataOffset);
118 |     if(node->isLeaf && node->x.size) free(node->x.size);
119 |     else if((!node->isLeaf) && node->x.child) free(node->x.child);
120 |     free(node);
121 |     return NULL;
122 | }
123 | 
124 | void destroyBWOverlapBlock(bwOverlapBlock_t *b) {
125 |     if(!b) return;
126 |     if(b->size) free(b->size);
127 |     if(b->offset) free(b->offset);
128 |     free(b);
129 | }
130 | 
131 | //Returns a bwOverlapBlock_t * object or NULL on error.
132 | static bwOverlapBlock_t *overlapsLeaf(bwRTreeNode_t *node, uint32_t tid, uint32_t start, uint32_t end) {
133 |     uint16_t i, idx = 0;
134 |     bwOverlapBlock_t *o = calloc(1, sizeof(bwOverlapBlock_t));
135 |     if(!o) return NULL;
136 | 
137 |     for(i=0; i<node->nChildren; i++) {
138 |         if(tid < node->chrIdxStart[i] || tid > node->chrIdxEnd[i]) continue;
139 | 
140 |         /*
141 |           The individual blocks can theoretically span multiple contigs.
142 |           So if we treat the first/last contig in the range as special
143 |           but anything in the middle is a guaranteed match
144 |         */
145 |         if(node->chrIdxStart[i] != node->chrIdxEnd[i]) {
146 |             if(tid == node->chrIdxStart[i]) {
147 |                 if(node->baseStart[i] >= end) break;
148 |             } else if(tid == node->chrIdxEnd[i]) {
149 |                 if(node->baseEnd[i] <= start) continue;
150 |             }
151 |         } else {
152 |             if(node->baseStart[i] >= end || node->baseEnd[i] <= start) continue;
153 |         }
154 |         o->n++;
155 |     }
156 | 
157 |     if(o->n) {
158 |         o->offset = malloc(sizeof(uint64_t) * (o->n));
159 |         if(!o->offset) goto error;
160 |         o->size = malloc(sizeof(uint64_t) * (o->n));
161 |         if(!o->size) goto error;
162 | 
163 |         for(i=0; i<node->nChildren; i++) {
164 |             if(tid < node->chrIdxStart[i] || tid > node->chrIdxEnd[i]) continue;
165 |             if(node->chrIdxStart[i] != node->chrIdxEnd[i]) {
166 |                 if(tid == node->chrIdxStart[i]) {
167 |                     if(node->baseStart[i] >= end) continue;
168 |                 } else if(tid == node->chrIdxEnd[i]) {
169 |                     if(node->baseEnd[i] <= start) continue;
170 |                 }
171 |             } else {
172 |                 if(node->baseStart[i] >= end || node->baseEnd[i] <= start) continue;
173 |             }
174 |             o->offset[idx] = node->dataOffset[i];
175 |             o->size[idx++] = node->x.size[i];
176 |             if(idx >= o->n) break;
177 |         }
178 |     }
179 | 
180 |     if(idx != o->n) { //This should never happen
181 |         fprintf(stderr, "[overlapsLeaf] Mismatch between number of overlaps calculated and found!\n");
182 |         goto error;
183 |     }
184 | 
185 |     return o;
186 | 
187 | error:
188 |     if(o) destroyBWOverlapBlock(o);
189 |     return NULL;
190 | }
191 | 
192 | //This will free l2 unless there's an error!
193 | //Returns NULL on error, otherwise the merged lists
194 | static bwOverlapBlock_t *mergeOverlapBlocks(bwOverlapBlock_t *b1, bwOverlapBlock_t *b2) {
195 |     uint64_t i,j;
196 |     if(!b2) return b1;
197 |     if(!b2->n) {
198 |         destroyBWOverlapBlock(b2);
199 |         return b1;
200 |     }
201 |     if(!b1->n) {
202 |         destroyBWOverlapBlock(b1);
203 |         return b2;
204 |     }
205 |     j = b1->n;
206 |     b1->n += b2->n;
207 |     b1->offset = realloc(b1->offset, sizeof(uint64_t) * (b1->n+b2->n));
208 |     if(!b1->offset) goto error;
209 |     b1->size = realloc(b1->size, sizeof(uint64_t) * (b1->n+b2->n));
210 |     if(!b1->size) goto error;
211 | 
212 |     for(i=0; i<b2->n; i++) {
213 |         b1->offset[j+i] = b2->offset[i];
214 |         b1->size[j+i] = b2->size[i];
215 |     }
216 |     destroyBWOverlapBlock(b2);
217 |     return b1;
218 | 
219 | error:
220 |     destroyBWOverlapBlock(b1);
221 |     return NULL;
222 | }
223 | 
224 | //Returns NULL and sets nOverlaps to >0 on error, otherwise nOverlaps is the number of file offsets returned
225 | //The output needs to be free()d if not NULL (likewise with *sizes)
226 | static bwOverlapBlock_t *overlapsNonLeaf(bigWigFile_t *fp, bwRTreeNode_t *node, uint32_t tid, uint32_t start, uint32_t end) {
227 |     uint16_t i;
228 |     bwOverlapBlock_t *nodeBlocks, *output = calloc(1, sizeof(bwOverlapBlock_t));
229 |     if(!output) return NULL;
230 | 
231 |     for(i=0; i<node->nChildren; i++) {
232 |         if(tid < node->chrIdxStart[i]) break;
233 |         if(tid < node->chrIdxStart[i] || tid > node->chrIdxEnd[i]) continue;
234 |         if(node->chrIdxStart[i] != node->chrIdxEnd[i]) { //child spans contigs
235 |             if(tid == node->chrIdxStart[i]) {
236 |                 if(node->baseStart[i] >= end) continue;
237 |             } else if(tid == node->chrIdxEnd[i]) {
238 |                 if(node->baseEnd[i] <= start) continue;
239 |             }
240 |         } else {
241 |             if(end <= node->baseStart[i] || start >= node->baseEnd[i]) continue;
242 |         }
243 | 
244 |         //We have an overlap!
245 |         if(!node->x.child[i])
246 |           node->x.child[i] = bwGetRTreeNode(fp, node->dataOffset[i]);
247 |         if(!node->x.child[i]) goto error;
248 | 
249 |         if(node->x.child[i]->isLeaf) { //leaf
250 |             nodeBlocks = overlapsLeaf(node->x.child[i], tid, start, end);
251 |         } else { //non-leaf
252 |             nodeBlocks = overlapsNonLeaf(fp, node->x.child[i], tid, start, end);
253 |         }
254 | 
255 |         //The output is processed the same regardless of leaf/non-leaf
256 |         if(!nodeBlocks) goto error;
257 |         else {
258 |             output = mergeOverlapBlocks(output, nodeBlocks);
259 |             if(!output) {
260 |                 destroyBWOverlapBlock(nodeBlocks);
261 |                 goto error;
262 |             }
263 |         }
264 |     }
265 | 
266 |     return output;
267 | 
268 | error:
269 |     destroyBWOverlapBlock(output);
270 |     return NULL;
271 | }
272 | 
273 | //Returns NULL and sets nOverlaps to >0 on error, otherwise nOverlaps is the number of file offsets returned
274 | //The output must be free()d
275 | bwOverlapBlock_t *walkRTreeNodes(bigWigFile_t *bw, bwRTreeNode_t *root, uint32_t tid, uint32_t start, uint32_t end) {
276 |     if(root->isLeaf) return overlapsLeaf(root, tid, start, end);
277 |     return overlapsNonLeaf(bw, root, tid, start, end);
278 | }
279 | 
280 | //In reality, a hash or some sort of tree structure is probably faster...
281 | //Return -1 (AKA 0xFFFFFFFF...) on "not there", so we can hold (2^32)-1 items.
282 | uint32_t bwGetTid(const bigWigFile_t *fp, const char *chrom) {
283 |     uint32_t i;
284 |     if(!chrom) return -1;
285 |     for(i=0; i<fp->cl->nKeys; i++) {
286 |         if(strcmp(chrom, fp->cl->chrom[i]) == 0) return i;
287 |     }
288 |     return -1;
289 | }
290 | 
291 | static bwOverlapBlock_t *bwGetOverlappingBlocks(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end) {
292 |     uint32_t tid = bwGetTid(fp, chrom);
293 | 
294 |     if(tid == (uint32_t) -1) {
295 |         fprintf(stderr, "[bwGetOverlappingBlocks] Non-existent contig: %s\n", chrom);
296 |         return NULL;
297 |     }
298 | 
299 |     //Get the info if needed
300 |     if(!fp->idx) {
301 |         fp->idx = readRTreeIdx(fp, fp->hdr->indexOffset);
302 |         if(!fp->idx) {
303 |             return NULL;
304 |         }
305 |     }
306 | 
307 |     if(!fp->idx->root) fp->idx->root = bwGetRTreeNode(fp, 0);
308 |     if(!fp->idx->root) return NULL;
309 | 
310 |     return walkRTreeNodes(fp, fp->idx->root, tid, start, end);
311 | }
312 | 
313 | void bwFillDataHdr(bwDataHeader_t *hdr, void *b) {
314 |     hdr->tid = ((uint32_t*)b)[0];
315 |     hdr->start = ((uint32_t*)b)[1];
316 |     hdr->end = ((uint32_t*)b)[2];
317 |     hdr->step = ((uint32_t*)b)[3];
318 |     hdr->span = ((uint32_t*)b)[4];
319 |     hdr->type = ((uint8_t*)b)[20];
320 |     hdr->nItems = ((uint16_t*)b)[11];
321 | }
322 | 
323 | void bwDestroyOverlappingIntervals(bwOverlappingIntervals_t *o) {
324 |     if(!o) return;
325 |     if(o->start) free(o->start);
326 |     if(o->end) free(o->end);
327 |     if(o->value) free(o->value);
328 |     free(o);
329 | }
330 | 
331 | void bbDestroyOverlappingEntries(bbOverlappingEntries_t *o) {
332 |     uint32_t i;
333 |     if(!o) return;
334 |     if(o->start) free(o->start);
335 |     if(o->end) free(o->end);
336 |     if(o->str) {
337 |         for(i=0; i<o->l; i++) {
338 |             if(o->str[i]) free(o->str[i]);
339 |         }
340 |         free(o->str);
341 |     }
342 |     free(o);
343 | }
344 | 
345 | //Returns NULL on error, in which case o has been free()d
346 | static bwOverlappingIntervals_t *pushIntervals(bwOverlappingIntervals_t *o, uint32_t start, uint32_t end, float value) {
347 |     if(o->l+1 >= o->m) {
348 |         o->m = roundup(o->l+1);
349 |         o->start = realloc(o->start, o->m * sizeof(uint32_t));
350 |         if(!o->start) goto error;
351 |         o->end = realloc(o->end, o->m * sizeof(uint32_t));
352 |         if(!o->end) goto error;
353 |         o->value = realloc(o->value, o->m * sizeof(float));
354 |         if(!o->value) goto error;
355 |     }
356 |     o->start[o->l] = start;
357 |     o->end[o->l] = end;
358 |     o->value[o->l++] = value;
359 |     return o;
360 | 
361 | error:
362 |     bwDestroyOverlappingIntervals(o);
363 |     return NULL;
364 | }
365 | 
366 | static bbOverlappingEntries_t *pushBBIntervals(bbOverlappingEntries_t *o, uint32_t start, uint32_t end, char *str, int withString) {
367 |     if(o->l+1 >= o->m) {
368 |         o->m = roundup(o->l+1);
369 |         o->start = realloc(o->start, o->m * sizeof(uint32_t));
370 |         if(!o->start) goto error;
371 |         o->end = realloc(o->end, o->m * sizeof(uint32_t));
372 |         if(!o->end) goto error;
373 |         if(withString) {
374 |             o->str = realloc(o->str, o->m * sizeof(char**));
375 |             if(!o->str) goto error;
376 |         }
377 |     }
378 |     o->start[o->l] = start;
379 |     o->end[o->l] = end;
380 |     if(withString) o->str[o->l] = bwStrdup(str);
381 |     o->l++;
382 |     return o;
383 | 
384 | error:
385 |     bbDestroyOverlappingEntries(o);
386 |     return NULL;
387 | }
388 | 
389 | //Returns NULL on error
390 | bwOverlappingIntervals_t *bwGetOverlappingIntervalsCore(bigWigFile_t *fp, bwOverlapBlock_t *o, uint32_t tid, uint32_t ostart, uint32_t oend) {
391 |     uint64_t i;
392 |     uint16_t j;
393 |     int compressed = 0, rv;
394 |     uLongf sz = fp->hdr->bufSize, tmp;
395 |     void *buf = NULL, *compBuf = NULL;
396 |     uint32_t start = 0, end , *p;
397 |     float value;
398 |     bwDataHeader_t hdr;
399 |     bwOverlappingIntervals_t *output = calloc(1, sizeof(bwOverlappingIntervals_t));
400 | 
401 |     if(!output) goto error;
402 | 
403 |     if(!o) return output;
404 |     if(!o->n) return output;
405 | 
406 |     if(sz) {
407 |         compressed = 1;
408 |         buf = malloc(sz);
409 |     }
410 |     sz = 0; //This is now the size of the compressed buffer
411 | 
412 |     for(i=0; i<o->n; i++) {
413 |         if(bwSetPos(fp, o->offset[i])) goto error;
414 | 
415 |         if(sz < o->size[i]) {
416 |             compBuf = realloc(compBuf, o->size[i]);
417 |             sz = o->size[i];
418 |         }
419 |         if(!compBuf) goto error;
420 | 
421 |         if(bwRead(compBuf, o->size[i], 1, fp) != 1) goto error;
422 |         if(compressed) {
423 |             tmp = fp->hdr->bufSize; //This gets over-written by uncompress
424 |             rv = uncompress(buf, (uLongf *) &tmp, compBuf, o->size[i]);
425 |             if(rv != Z_OK) goto error;
426 |         } else {
427 |             buf = compBuf;
428 |         }
429 | 
430 |         //TODO: ensure that tmp is large enough!
431 |         bwFillDataHdr(&hdr, buf);
432 | 
433 |         p = ((uint32_t*) buf);
434 |         p += 6;
435 |         if(hdr.tid != tid) continue;
436 | 
437 |         if(hdr.type == 3) start = hdr.start - hdr.step;
438 | 
439 |         //FIXME: We should ensure that sz is large enough to hold nItems of the given type
440 |         for(j=0; j<hdr.nItems; j++) {
441 |             switch(hdr.type) {
442 |             case 1:
443 |                 start = *p;
444 |                 p++;
445 |                 end = *p;
446 |                 p++;
447 |                 value = *((float *)p);
448 |                 p++;
449 |                 break;
450 |             case 2:
451 |                 start = *p;
452 |                 p++;
453 |                 end = start + hdr.span;
454 |                 value = *((float *)p);
455 |                 p++;
456 |                 break;
457 |             case 3:
458 |                 start += hdr.step;
459 |                 end = start+hdr.span;
460 |                 value = *((float *)p);
461 |                 p++;
462 |                 break;
463 |             default :
464 |                 goto error;
465 |                 break;
466 |             }
467 | 
468 |             if(end <= ostart || start >= oend) continue;
469 |             //Push the overlap
470 |             if(!pushIntervals(output, start, end, value)) goto error;
471 |         }
472 |     }
473 | 
474 |     if(compressed && buf) free(buf);
475 |     if(compBuf) free(compBuf);
476 |     return output;
477 | 
478 | error:
479 |     fprintf(stderr, "[bwGetOverlappingIntervalsCore] Got an error\n");
480 |     if(output) bwDestroyOverlappingIntervals(output);
481 |     if(compressed && buf) free(buf);
482 |     if(compBuf) free(compBuf);
483 |     return NULL;
484 | }
485 | 
486 | bbOverlappingEntries_t *bbGetOverlappingEntriesCore(bigWigFile_t *fp, bwOverlapBlock_t *o, uint32_t tid, uint32_t ostart, uint32_t oend, int withString) {
487 |     uint64_t i;
488 |     int compressed = 0, rv, slen;
489 |     uLongf sz = fp->hdr->bufSize, tmp = 0;
490 |     void *buf = NULL, *bufEnd = NULL, *compBuf = NULL;
491 |     uint32_t entryTid = 0, start = 0, end;
492 |     char *str;
493 |     bbOverlappingEntries_t *output = calloc(1, sizeof(bbOverlappingEntries_t));
494 | 
495 |     if(!output) goto error;
496 | 
497 |     if(!o) return output;
498 |     if(!o->n) return output;
499 | 
500 |     if(sz) {
501 |         compressed = 1;
502 |         buf = malloc(sz);
503 |     }
504 |     sz = 0; //This is now the size of the compressed buffer
505 | 
506 |     for(i=0; i<o->n; i++) {
507 |         if(bwSetPos(fp, o->offset[i])) goto error;
508 | 
509 |         if(sz < o->size[i]) {
510 |             compBuf = realloc(compBuf, o->size[i]);
511 |             sz = o->size[i];
512 |         }
513 |         if(!compBuf) goto error;
514 | 
515 |         if(bwRead(compBuf, o->size[i], 1, fp) != 1) goto error;
516 |         if(compressed) {
517 |             tmp = fp->hdr->bufSize; //This gets over-written by uncompress
518 |             rv = uncompress(buf, (uLongf *) &tmp, compBuf, o->size[i]);
519 |             if(rv != Z_OK) goto error;
520 |         } else {
521 |             buf = compBuf;
522 |             tmp = o->size[i]; //TODO: Is this correct? Do non-gzipped bigBeds exist?
523 |         }
524 | 
525 |         bufEnd = (char*)buf + tmp;
526 |         while(buf < bufEnd) {
527 |             entryTid = ((uint32_t*)buf)[0];
528 |             start = ((uint32_t*)buf)[1];
529 |             end = ((uint32_t*)buf)[2];
530 |             buf = (char*)buf + 12;
531 |             str = (char*)buf;
532 |             slen = strlen(str) + 1;
533 |             buf = (char*)buf + slen;
534 | 
535 |             if(entryTid < tid) continue;
536 |             if(entryTid > tid) break;
537 |             if(end <= ostart) continue;
538 |             if(start >= oend) break;
539 | 
540 |             //Push the overlap
541 |             if(!pushBBIntervals(output, start, end, str, withString)) goto error;
542 |         }
543 | 
544 |         buf = (char*)bufEnd - tmp; //reset the buffer pointer
545 |     }
546 | 
547 |     if(compressed && buf) free(buf);
548 |     if(compBuf) free(compBuf);
549 |     return output;
550 | 
551 | error:
552 |     fprintf(stderr, "[bbGetOverlappingEntriesCore] Got an error\n");
553 |     buf = (char*)bufEnd - tmp;
554 |     if(output) bbDestroyOverlappingEntries(output);
555 |     if(compressed && buf) free(buf);
556 |     if(compBuf) free(compBuf);
557 |     return NULL;
558 | }
559 | 
560 | //Returns NULL on error OR no intervals, which is a bad design...
561 | bwOverlappingIntervals_t *bwGetOverlappingIntervals(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end) {
562 |     bwOverlappingIntervals_t *output;
563 |     uint32_t tid = bwGetTid(fp, chrom);
564 |     if(tid == (uint32_t) -1) return NULL;
565 |     bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
566 |     if(!blocks) return NULL;
567 |     output = bwGetOverlappingIntervalsCore(fp, blocks, tid, start, end);
568 |     destroyBWOverlapBlock(blocks);
569 |     return output;
570 | }
571 | 
572 | //Like above, but for bigBed files
573 | bbOverlappingEntries_t *bbGetOverlappingEntries(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end, int withString) {
574 |     bbOverlappingEntries_t *output;
575 |     uint32_t tid = bwGetTid(fp, chrom);
576 |     if(tid == (uint32_t) -1) return NULL;
577 |     bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
578 |     if(!blocks) return NULL;
579 |     output = bbGetOverlappingEntriesCore(fp, blocks, tid, start, end, withString);
580 |     destroyBWOverlapBlock(blocks);
581 |     return output;
582 | }
583 | 
584 | //Returns NULL on error
585 | bwOverlapIterator_t *bwOverlappingIntervalsIterator(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end, uint32_t blocksPerIteration) {
586 |     bwOverlapIterator_t *output = NULL;
587 |     uint64_t n;
588 |     uint32_t tid = bwGetTid(fp, chrom);
589 |     if(tid == (uint32_t) -1) return output;
590 |     output = calloc(1, sizeof(bwOverlapIterator_t));
591 |     if(!output) return output;
592 |     bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
593 | 
594 |     output->bw = fp;
595 |     output->tid = tid;
596 |     output->start = start;
597 |     output->end = end;
598 |     output->blocks = blocks;
599 |     output->blocksPerIteration = blocksPerIteration;
600 | 
601 |     if(blocks) {
602 |         n = blocks->n;
603 |         if(n>blocksPerIteration) blocks->n = blocksPerIteration;
604 |         output->intervals = bwGetOverlappingIntervalsCore(fp, blocks,tid, start, end);
605 |         blocks->n = n;
606 |         output->offset = blocksPerIteration;
607 |     }
608 |     output->data = output->intervals;
609 |     return output;
610 | }
611 | 
612 | //Returns NULL on error
613 | bwOverlapIterator_t *bbOverlappingEntriesIterator(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end, int withString, uint32_t blocksPerIteration) {
614 |     bwOverlapIterator_t *output = NULL;
615 |     uint64_t n;
616 |     uint32_t tid = bwGetTid(fp, chrom);
617 |     if(tid == (uint32_t) -1) return output;
618 |     output = calloc(1, sizeof(bwOverlapIterator_t));
619 |     if(!output) return output;
620 |     bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
621 | 
622 |     output->bw = fp;
623 |     output->tid = tid;
624 |     output->start = start;
625 |     output->end = end;
626 |     output->blocks = blocks;
627 |     output->blocksPerIteration = blocksPerIteration;
628 |     output->withString = withString;
629 | 
630 |     if(blocks) {
631 |         n = blocks->n;
632 |         if(n>blocksPerIteration) blocks->n = blocksPerIteration;
633 |         output->entries = bbGetOverlappingEntriesCore(fp, blocks,tid, start, end, withString);
634 |         blocks->n = n;
635 |         output->offset = blocksPerIteration;
636 |     }
637 |     output->data = output->entries;
638 |     return output;
639 | }
640 | 
641 | void bwIteratorDestroy(bwOverlapIterator_t *iter) {
642 |     if(!iter) return;
643 |     if(iter->blocks) destroyBWOverlapBlock((bwOverlapBlock_t*) iter->blocks);
644 |     if(iter->intervals) bwDestroyOverlappingIntervals(iter->intervals);
645 |     if(iter->entries) bbDestroyOverlappingEntries(iter->entries);
646 |     free(iter);
647 | }
648 | 
649 | //On error, points to NULL and destroys the input
650 | bwOverlapIterator_t *bwIteratorNext(bwOverlapIterator_t *iter) {
651 |     uint64_t n, *offset, *size;
652 |     bwOverlapBlock_t *blocks = iter->blocks;
653 | 
654 |     if(iter->intervals) {
655 |         bwDestroyOverlappingIntervals(iter->intervals);
656 |         iter->intervals = NULL;
657 |     }
658 |     if(iter->entries) {
659 |         bbDestroyOverlappingEntries(iter->entries);
660 |         iter->entries = NULL;
661 |     }
662 |     iter->data = NULL;
663 | 
664 |     if(iter->offset < blocks->n) {
665 |         //store the previous values
666 |         n = blocks->n;
667 |         offset = blocks->offset;
668 |         size = blocks->size;
669 | 
670 |         //Move the start of the blocks
671 |         blocks->offset += iter->offset;
672 |         blocks->size += iter->offset;
673 |         if(iter->offset + iter->blocksPerIteration > n) {
674 |             blocks->n = blocks->n - iter->offset;
675 |         } else {
676 |             blocks->n = iter->blocksPerIteration;
677 |         }
678 | 
679 |         //Get the intervals or entries, as appropriate
680 |         if(iter->bw->type == 0) {
681 |             //bigWig
682 |             iter->intervals = bwGetOverlappingIntervalsCore(iter->bw, blocks, iter->tid, iter->start, iter->end);
683 |             iter->data = iter->intervals;
684 |         } else {
685 |             //bigBed
686 |             iter->entries = bbGetOverlappingEntriesCore(iter->bw, blocks, iter->tid, iter->start, iter->end, iter->withString);
687 |             iter->data = iter->entries;
688 |         }
689 |         iter->offset += iter->blocksPerIteration;
690 | 
691 |         //reset the values in iter->blocks
692 |         blocks->n = n;
693 |         blocks->offset = offset;
694 |         blocks->size = size;
695 | 
696 |         //Check for error
697 |         if(!iter->intervals && !iter->entries) goto error;
698 |     }
699 | 
700 |     return iter;
701 | 
702 | error:
703 |     bwIteratorDestroy(iter);
704 |     return NULL;
705 | }
706 | 
707 | //This is like bwGetOverlappingIntervals, except it returns 1 base windows. If includeNA is not 0, then a value will be returned for every position in the range (defaulting to NAN).
708 | //The ->end member is NULL
709 | //If includeNA is not 0 then ->start is also NULL, since it's implied
710 | //Note that bwDestroyOverlappingIntervals() will work in either case
711 | bwOverlappingIntervals_t *bwGetValues(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end, int includeNA) {
712 |     uint32_t i, j, n;
713 |     bwOverlappingIntervals_t *output = NULL;
714 |     bwOverlappingIntervals_t *intermediate = bwGetOverlappingIntervals(fp, chrom, start, end);
715 |     if(!intermediate) return NULL;
716 | 
717 |     output = calloc(1, sizeof(bwOverlappingIntervals_t));
718 |     if(!output) goto error;
719 |     if(includeNA) {
720 |         output->l = end-start;
721 |         output->value = malloc(output->l*sizeof(float));
722 |         if(!output->value) goto error;
723 |         for(i=0; i<output->l; i++) output->value[i] = NAN;
724 |         for(i=0; i<intermediate->l; i++) {
725 |             for(j=intermediate->start[i]; j<intermediate->end[i]; j++) {
726 |                 if(j < start || j >= end) continue;
727 |                 output->value[j-start] = intermediate->value[i];
728 |             }
729 |         }
730 |     } else {
731 |         n = 0;
732 |         for(i=0; i<intermediate->l; i++) {
733 |             if(intermediate->start[i] < start) intermediate->start[i] = start;
734 |             if(intermediate->end[i] > end) intermediate->end[i] = end;
735 |             n += intermediate->end[i]-intermediate->start[i];
736 |         }
737 |         output->l = n;
738 |         output->start = malloc(sizeof(uint32_t)*n);
739 |         if(!output->start) goto error;
740 |         output->value = malloc(sizeof(float)*n);
741 |         if(!output->value) goto error;
742 |         n = 0; //this is now the index
743 |         for(i=0; i<intermediate->l; i++) {
744 |             for(j=intermediate->start[i]; j<intermediate->end[i]; j++) {
745 |                 if(j < start || j >= end) continue;
746 |                 output->start[n] = j;
747 |                 output->value[n++] = intermediate->value[i];
748 |             }
749 |         }
750 |     }
751 | 
752 |     bwDestroyOverlappingIntervals(intermediate);
753 |     return output;
754 | 
755 | error:
756 |     if(intermediate) bwDestroyOverlappingIntervals(intermediate);
757 |     if(output) bwDestroyOverlappingIntervals(output);
758 |     return NULL;
759 | }
760 | 
761 | void bwDestroyIndexNode(bwRTreeNode_t *node) {
762 |     uint16_t i;
763 | 
764 |     if(!node) return;
765 | 
766 |     free(node->chrIdxStart);
767 |     free(node->baseStart);
768 |     free(node->chrIdxEnd);
769 |     free(node->baseEnd);
770 |     free(node->dataOffset);
771 |     if(!node->isLeaf) {
772 |         for(i=0; i<node->nChildren; i++) {
773 |             bwDestroyIndexNode(node->x.child[i]);
774 |         }
775 |         free(node->x.child);
776 |     } else {
777 |         free(node->x.size);
778 |     }
779 |     free(node);
780 | }
781 | 
782 | void bwDestroyIndex(bwRTree_t *idx) {
783 |     bwDestroyIndexNode(idx->root);
784 |     free(idx);
785 | }
786 | 
787 | //Returns a pointer to the requested index (@offset, unless it's 0, in which case the index for the values is returned
788 | //Returns NULL on error
789 | bwRTree_t *bwReadIndex(bigWigFile_t *fp, uint64_t offset) {
790 |     bwRTree_t *idx = readRTreeIdx(fp, offset);
791 |     if(!idx) return NULL;
792 | 
793 |     //Read in the root node
794 |     idx->root = bwGetRTreeNode(fp, idx->rootOffset);
795 | 
796 |     if(!idx->root) {
797 |         bwDestroyIndex(idx);
798 |         return NULL;
799 |     }
800 |     return idx;
801 | }
802 | 


--------------------------------------------------------------------------------
/libBigWig/bwValues.h:
--------------------------------------------------------------------------------
 1 | #ifndef LIBBIGWIG_VALUES_H
 2 | #define LIBBIGWIG_VALUES_H
 3 | 
 4 | #include <inttypes.h>
 5 | /*! \file bwValues.h
 6 |  *
 7 |  * You should not directly use functions and structures defined here. They're really meant for internal use only.
 8 |  *
 9 |  * All of the structures here need to be destroyed or you'll leak memory! There are methods available to destroy anything that you need to take care of yourself.
10 |  */
11 | 
12 | //N.B., coordinates are still 0-based half open!
13 | /*!
14 |  * @brief A node within an R-tree holding the index for data.
15 |  *
16 |  * Note that there are two types of nodes: leaf and twig. Leaf nodes point to where data actually is. Twig nodes point to additional index nodes, which may or may not be leaves. Each of these nodes has additional children, which may span multiple chromosomes/contigs.
17 |  *
18 |  * With the start/end position, these positions refer specifically to the chromosomes specified in chrIdxStart/chrIdxEnd. Any chromosomes between these are completely spanned by a given child.
19 |  */
20 | typedef struct bwRTreeNode_t {
21 |     uint8_t isLeaf; /**<Is this node a leaf?*/
22 |     //1 byte of padding
23 |     uint16_t nChildren; /**<The number of children of this node, all lists have this length.*/
24 |     uint32_t *chrIdxStart; /**<A list of the starting chromosome indices of each child.*/
25 |     uint32_t *baseStart; /**<A list of the start position of each child.*/
26 |     uint32_t *chrIdxEnd; /**<A list of the end chromosome indices of each child.*/
27 |     uint32_t *baseEnd; /**<A list of the end position of each child.*/
28 |     uint64_t *dataOffset; /**<For leaves, the offset to the on-disk data. For twigs, the offset to the child node.*/
29 |     union {
30 |         uint64_t *size; /**<Leaves only: The size of the data block.*/
31 |         struct bwRTreeNode_t **child; /**<Twigs only: The child node(s).*/
32 |     } x; /**<A union holding either size or child*/
33 | } bwRTreeNode_t;
34 | 
35 | /*!
36 |  * A header and index that points to an R-tree that in turn points to data blocks.
37 |  */
38 | //TODO rootOffset is pointless, it's 48bytes after the indexOffset
39 | typedef struct {
40 |     uint32_t blockSize; /**<The maximum number of children a node can have*/
41 |     uint64_t nItems; /**<The total number of data blocks pointed to by the tree. This is completely redundant.*/
42 |     uint32_t chrIdxStart; /**<The index to the first chromosome described.*/
43 |     uint32_t baseStart; /**<The first position on chrIdxStart with a value.*/
44 |     uint32_t chrIdxEnd; /**<The index of the last chromosome with an entry.*/
45 |     uint32_t baseEnd; /**<The last position on chrIdxEnd with an entry.*/
46 |     uint64_t idxSize; /**<This is actually the offset of the index rather than the size?!? Yes, it's completely redundant.*/
47 |     uint32_t nItemsPerSlot; /**<This is always 1!*/
48 |     //There's 4 bytes of padding in the file here
49 |     uint64_t rootOffset; /**<The offset to the root node of the R-Tree (on disk). Yes, this is redundant.*/
50 |     bwRTreeNode_t *root; /**<A pointer to the root node.*/
51 | } bwRTree_t;
52 | 
53 | /*!
54 |  * @brief This structure holds the data blocks that overlap a given interval.
55 |  */
56 | typedef struct {
57 |     uint64_t n; /**<The number of blocks that overlap. This *MAY* be 0!.*/
58 |     uint64_t *offset; /**<The offset to the on-disk position of the block.*/
59 |     uint64_t *size; /**<The size of each block on disk (in bytes).*/
60 | } bwOverlapBlock_t;
61 | 
62 | /*!
63 |  * @brief The header section of a given data block.
64 |  *
65 |  * There are 3 types of data blocks in bigWig files, each with slightly different needs. This is all taken care of internally.
66 |  */
67 | typedef struct {
68 |     uint32_t tid; /**<The chromosome ID.*/
69 |     uint32_t start; /**<The start position of a block*/
70 |     uint32_t end; /**<The end position of a block*/
71 |     uint32_t step; /**<The step size of the values*/
72 |     uint32_t span; /**<The span of each data value*/
73 |     uint8_t type; /**<The block type: 1, bedGraph; 2, variable step; 3, fixed step.*/
74 |     uint16_t nItems; /**<The number of values in a given block.*/
75 | } bwDataHeader_t;
76 | 
77 | #endif // LIBBIGWIG_VALUES_H
78 | 


--------------------------------------------------------------------------------
/libBigWig/io.c:
--------------------------------------------------------------------------------
  1 | #ifndef NOCURL
  2 | #include <curl/curl.h>
  3 | #endif
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | #include <string.h>
  7 | #include <unistd.h>
  8 | #include "bigWigIO.h"
  9 | #include <inttypes.h>
 10 | #include <errno.h>
 11 | 
 12 | size_t GLOBAL_DEFAULTBUFFERSIZE;
 13 | 
 14 | #ifndef NOCURL
 15 | uint64_t getContentLength(const URL_t *URL) {
 16 |     double size;
 17 |     if(curl_easy_getinfo(URL->x.curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &size) != CURLE_OK) {
 18 |         return 0;
 19 |     }
 20 |     if(size== -1.0) return 0;
 21 |     return (uint64_t) size;
 22 | }
 23 | 
 24 | //Fill the buffer, note that URL may be left in an unusable state on error!
 25 | CURLcode urlFetchData(URL_t *URL, unsigned long bufSize) {
 26 |     CURLcode rv;
 27 |     char range[1024];
 28 | 
 29 |     if(URL->filePos != (size_t) -1) URL->filePos += URL->bufLen;
 30 |     else URL->filePos = 0;
 31 | 
 32 |     URL->bufPos = URL->bufLen = 0; //Otherwise, we can't copy anything into the buffer!
 33 |     sprintf(range,"%lu-%lu", URL->filePos, URL->filePos+bufSize-1);
 34 |     rv = curl_easy_setopt(URL->x.curl, CURLOPT_RANGE, range);
 35 |     if(rv != CURLE_OK) {
 36 |         fprintf(stderr, "[urlFetchData] Couldn't set the range (%s)\n", range);
 37 |         return rv;
 38 |     }
 39 | 
 40 |     rv = curl_easy_perform(URL->x.curl);
 41 |     errno = 0; //Sometimes curl_easy_perform leaves a random errno remnant
 42 |     return rv;
 43 | }
 44 | 
 45 | //Read data into a buffer, ideally from a buffer already in memory
 46 | //The loop is likely no longer needed.
 47 | size_t url_fread(void *obuf, size_t obufSize, URL_t *URL) {
 48 |     size_t remaining = obufSize, fetchSize;
 49 |     void *p = obuf;
 50 |     CURLcode rv;
 51 | 
 52 |     while(remaining) {
 53 |         if(!URL->bufLen) {
 54 |             rv = urlFetchData(URL, URL->bufSize);
 55 |             if(rv != CURLE_OK) {
 56 |                 fprintf(stderr, "[url_fread] urlFetchData (A) returned %s\n", curl_easy_strerror(rv));
 57 |                 return 0;
 58 |             }  
 59 |         } else if(URL->bufLen < URL->bufPos + remaining) { //Copy the remaining buffer and reload the buffer as needed
 60 |             p = memcpy(p, URL->memBuf+URL->bufPos, URL->bufLen - URL->bufPos);
 61 |             if(!p) return 0;
 62 |             p += URL->bufLen - URL->bufPos;
 63 |             remaining -= URL->bufLen - URL->bufPos;
 64 |             if(remaining) {
 65 |                 if(!URL->isCompressed) {
 66 |                     fetchSize = URL->bufSize;
 67 |                 } else {
 68 |                     fetchSize = (remaining<URL->bufSize)?remaining:URL->bufSize;
 69 |                 }
 70 |                 rv = urlFetchData(URL, fetchSize);
 71 |                 if(rv != CURLE_OK) {
 72 |                     fprintf(stderr, "[url_fread] urlFetchData (B) returned %s\n", curl_easy_strerror(rv));
 73 |                     return 0;
 74 |                 }
 75 |             }
 76 |         } else {
 77 |             p = memcpy(p, URL->memBuf+URL->bufPos, remaining);
 78 |             if(!p) return 0;
 79 |             URL->bufPos += remaining;
 80 |             remaining = 0;
 81 |         }
 82 |     }
 83 |     return obufSize;
 84 | }
 85 | #endif
 86 | 
 87 | //Returns the number of bytes requested or a smaller number on error
 88 | //Note that in the case of remote files, the actual amount read may be less than the return value!
 89 | size_t urlRead(URL_t *URL, void *buf, size_t bufSize) {
 90 | #ifndef NOCURL
 91 |     if(URL->type==0) {
 92 |         return fread(buf, bufSize, 1, URL->x.fp)*bufSize;
 93 |     } else {
 94 |         return url_fread(buf, bufSize, URL);
 95 |     }
 96 | #else
 97 |     return fread(buf, bufSize, 1, URL->x.fp)*bufSize;
 98 | #endif
 99 | }
100 | 
101 | size_t bwFillBuffer(const void *inBuf, size_t l, size_t nmemb, void *pURL) {
102 |     URL_t *URL = (URL_t*) pURL;
103 |     void *p = URL->memBuf;
104 |     size_t copied = l*nmemb;
105 |     if(!p) return 0;
106 | 
107 |     p += URL->bufLen;
108 |     if(l*nmemb > URL->bufSize - URL->bufPos) { //We received more than we can store!
109 |         copied = URL->bufSize - URL->bufLen;
110 |     }
111 |     memcpy(p, inBuf, copied);
112 |     URL->bufLen += copied;
113 | 
114 |     if(!URL->memBuf) return 0; //signal error
115 |     return copied;
116 | }
117 | 
118 | //Seek to an arbitrary location, returning a CURLcode
119 | //Note that a local file returns CURLE_OK on success or CURLE_FAILED_INIT on any error;
120 | CURLcode urlSeek(URL_t *URL, size_t pos) {
121 | #ifndef NOCURL
122 |     char range[1024];
123 |     CURLcode rv;
124 | 
125 |     if(URL->type == BWG_FILE) {
126 | #endif
127 |         if(fseek(URL->x.fp, pos, SEEK_SET) == 0) {
128 |             errno = 0;
129 |             return CURLE_OK;
130 |         } else {
131 |             return CURLE_FAILED_INIT; //This is arbitrary
132 |         }
133 | #ifndef NOCURL
134 |     } else {
135 |         //If the location is covered by the buffer then don't seek!
136 |         if(pos < URL->filePos || pos >= URL->filePos+URL->bufLen) {
137 |             URL->filePos = pos;
138 |             URL->bufLen = 0; //Otherwise, filePos will get incremented on the next read!
139 |             URL->bufPos = 0;
140 |             //Maybe this works for FTP?
141 |             sprintf(range,"%lu-%lu", pos, pos+URL->bufSize-1);
142 |             rv = curl_easy_setopt(URL->x.curl, CURLOPT_RANGE, range);
143 |             if(rv != CURLE_OK) {
144 |                 fprintf(stderr, "[urlSeek] Couldn't set the range (%s)\n", range);
145 |                 return rv;
146 |             }
147 |             rv = curl_easy_perform(URL->x.curl);
148 |             if(rv != CURLE_OK) {
149 |                 fprintf(stderr, "[urlSeek] curl_easy_perform received an error!\n");
150 |             }
151 |             errno = 0;  //Don't propogate remnant resolved libCurl errors
152 |             return rv;
153 |         } else {
154 |             URL->bufPos = pos-URL->filePos;
155 |             return CURLE_OK;
156 |         }
157 |     }
158 | #endif
159 | }
160 | 
161 | URL_t *urlOpen(const char *fname, CURLcode (*callBack)(CURL*), const char *mode) {
162 |     URL_t *URL = calloc(1, sizeof(URL_t));
163 |     if(!URL) return NULL;
164 |     char *url = NULL, *req = NULL;
165 | #ifndef NOCURL
166 |     CURLcode code;
167 |     char range[1024];
168 | #endif
169 | 
170 |     URL->fname = fname;
171 | 
172 |     if((!mode) || (strchr(mode, 'w') == 0)) {
173 |         //Set the protocol
174 | #ifndef NOCURL
175 |         if(strncmp(fname, "http://", 7) == 0) URL->type = BWG_HTTP;
176 |         else if(strncmp(fname, "https://", 8) == 0) URL->type = BWG_HTTPS;
177 |         else if(strncmp(fname, "ftp://", 6) == 0) URL->type = BWG_FTP;
178 |         else URL->type = BWG_FILE;
179 | #else
180 |         URL->type = BWG_FILE;
181 | #endif
182 | 
183 |         //local file?
184 |         if(URL->type == BWG_FILE) {
185 |             URL->filePos = -1; //This signals that nothing has been read
186 |             URL->x.fp = fopen(fname, "rb");
187 |             if(!(URL->x.fp)) {
188 |                 free(URL);
189 |                 fprintf(stderr, "[urlOpen] Couldn't open %s for reading\n", fname);
190 |                 return NULL;
191 |             }
192 | #ifndef NOCURL
193 |         } else {
194 |             //Remote file, set up the memory buffer and get CURL ready
195 |             URL->memBuf = malloc(GLOBAL_DEFAULTBUFFERSIZE);
196 |             if(!(URL->memBuf)) {
197 |                 free(URL);
198 |                 fprintf(stderr, "[urlOpen] Couldn't allocate enough space for the file buffer!\n");
199 |                 return NULL;
200 |             }
201 |             URL->bufSize = GLOBAL_DEFAULTBUFFERSIZE;
202 |             URL->x.curl = curl_easy_init();
203 |             if(!(URL->x.curl)) {
204 |                 fprintf(stderr, "[urlOpen] curl_easy_init() failed!\n");
205 |                 goto error;
206 |             }
207 |             //Negotiate a reasonable HTTP authentication method
208 |             if(curl_easy_setopt(URL->x.curl, CURLOPT_HTTPAUTH, CURLAUTH_ANY) != CURLE_OK) {
209 |                 fprintf(stderr, "[urlOpen] Failed instructing curl to use any HTTP authentication it finds to be suitable!\n");
210 |                 goto error;
211 |             }
212 |             //Follow redirects
213 |             if(curl_easy_setopt(URL->x.curl, CURLOPT_FOLLOWLOCATION, 1L) != CURLE_OK) {
214 |                 fprintf(stderr, "[urlOpen] Failed instructing curl to follow redirects!\n");
215 |                 goto error;
216 |             }
217 |             //Set the URL
218 |             if(curl_easy_setopt(URL->x.curl, CURLOPT_URL, fname) != CURLE_OK) {
219 |                 fprintf(stderr, "[urlOpen] Couldn't set CURLOPT_URL!\n");
220 |                 goto error;
221 |             }
222 |             //Set the range, which doesn't do anything for HTTP
223 |             sprintf(range, "0-%lu", URL->bufSize-1);
224 |             if(curl_easy_setopt(URL->x.curl, CURLOPT_RANGE, range) != CURLE_OK) {
225 |                 fprintf(stderr, "[urlOpen] Couldn't set CURLOPT_RANGE (%s)!\n", range);
226 |                 goto error;
227 |             }
228 |             //Set the callback info, which means we no longer need to directly deal with sockets and header!
229 |             if(curl_easy_setopt(URL->x.curl, CURLOPT_WRITEFUNCTION, bwFillBuffer) != CURLE_OK) {
230 |                 fprintf(stderr, "[urlOpen] Couldn't set CURLOPT_WRITEFUNCTION!\n");
231 |                 goto error;
232 |             }
233 |             if(curl_easy_setopt(URL->x.curl, CURLOPT_WRITEDATA, (void*)URL) != CURLE_OK) {
234 |                 fprintf(stderr, "[urlOpen] Couldn't set CURLOPT_WRITEDATA!\n");
235 |                 goto error;
236 |             }
237 |             //Ignore certificate errors with https, libcurl just isn't reliable enough with conda
238 |             if(curl_easy_setopt(URL->x.curl, CURLOPT_SSL_VERIFYPEER, 0) != CURLE_OK) {
239 |                 fprintf(stderr, "[urlOpen] Couldn't set CURLOPT_SSL_VERIFYPEER to 0!\n");
240 |                 goto error;
241 |             }
242 |             if(curl_easy_setopt(URL->x.curl, CURLOPT_SSL_VERIFYHOST, 0) != CURLE_OK) {
243 |                 fprintf(stderr, "[urlOpen] Couldn't set CURLOPT_SSL_VERIFYHOST to 0!\n");
244 |                 goto error;
245 |             }
246 |             if(callBack) {
247 |                 code = callBack(URL->x.curl);
248 |                 if(code != CURLE_OK) {
249 |                     fprintf(stderr, "[urlOpen] The user-supplied call back function returned an error: %s\n", curl_easy_strerror(code));
250 |                     goto error;
251 |                 }
252 |             }
253 |             code = curl_easy_perform(URL->x.curl);
254 |             errno = 0; //Sometimes curl_easy_perform leaves a random errno remnant
255 |             if(code != CURLE_OK) {
256 |                 fprintf(stderr, "[urlOpen] curl_easy_perform received an error: %s\n", curl_easy_strerror(code));
257 |                 goto error;
258 |             }
259 | #endif
260 |         }
261 |     } else {
262 |         URL->type = BWG_FILE;
263 |         URL->x.fp = fopen(fname, mode);
264 |         if(!(URL->x.fp)) {
265 |             free(URL);
266 |             fprintf(stderr, "[urlOpen] Couldn't open %s for writing\n", fname);
267 |             return NULL;
268 |         }
269 |     }
270 |     if(url) free(url);
271 |     if(req) free(req);
272 |     return URL;
273 | 
274 | #ifndef NOCURL
275 | error:
276 |     if(url) free(url);
277 |     if(req) free(req);
278 |     free(URL->memBuf);
279 |     curl_easy_cleanup(URL->x.curl);
280 |     free(URL);
281 |     return NULL;
282 | #endif
283 | }
284 | 
285 | //Performs the necessary free() operations and handles cleaning up curl
286 | void urlClose(URL_t *URL) {
287 |     if(URL->type == BWG_FILE) {
288 |         fclose(URL->x.fp);
289 | #ifndef NOCURL
290 |     } else {
291 |         free(URL->memBuf);
292 |         curl_easy_cleanup(URL->x.curl);
293 | #endif
294 |     }
295 |     free(URL);
296 | }
297 | 


--------------------------------------------------------------------------------
/pyBigWig.h:
--------------------------------------------------------------------------------
  1 | #include <Python.h>
  2 | #include <structmember.h>
  3 | #include "bigWig.h"
  4 | 
  5 | #define pyBigWigVersion "0.3.24"
  6 | 
  7 | typedef struct {
  8 |     PyObject_HEAD
  9 |     bigWigFile_t *bw;
 10 |     int32_t lastTid; //The TID of the last written entry (or -1)
 11 |     uint32_t lastSpan; //The span of the last written entry (if applicable)
 12 |     uint32_t lastStep; //The step of the last written entry (if applicable)
 13 |     uint32_t lastStart; //The next start position (if applicable)
 14 |     int lastType; //The type of the last written entry
 15 | } pyBigWigFile_t;
 16 | 
 17 | static PyObject *pyBwOpen(PyObject *self, PyObject *pyFname);
 18 | static PyObject *pyBwEnter(pyBigWigFile_t *self, PyObject *args);
 19 | static PyObject *pyBwClose(pyBigWigFile_t *pybw, PyObject *args);
 20 | static PyObject *pyBwGetChroms(pyBigWigFile_t *pybw, PyObject *args);
 21 | static PyObject *pyIsBigWig(pyBigWigFile_t *pybw, PyObject *args);
 22 | static PyObject *pyIsBigBed(pyBigWigFile_t *pybw, PyObject *args);
 23 | static PyObject *pyBwGetStats(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds);
 24 | #ifdef WITHNUMPY
 25 | static PyObject *pyBwGetValues(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds);
 26 | #else
 27 | static PyObject *pyBwGetValues(pyBigWigFile_t *pybw, PyObject *args);
 28 | #endif
 29 | static PyObject *pyBwGetIntervals(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds);
 30 | 	static PyObject *pyBBGetEntries(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds);
 31 | static PyObject *pyBBGetSQL(pyBigWigFile_t *pybw, PyObject *args);
 32 | static PyObject *pyBwGetHeader(pyBigWigFile_t *pybw, PyObject *args);
 33 | static PyObject *pyBwAddHeader(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds);
 34 | static PyObject *pyBwAddEntries(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds);
 35 | static void pyBwDealloc(pyBigWigFile_t *pybw);
 36 | 
 37 | //The function types aren't actually correct...
 38 | static PyMethodDef bwMethods[] = {
 39 |     {"open", (PyCFunction)pyBwOpen, METH_VARARGS,
 40 | "Open a bigWig or bigBed file. For remote files, give a URL starting with HTTP,\n\
 41 | FTP, or HTTPS.\n\
 42 | \n\
 43 | Optional arguments:\n\
 44 |     mode: An optional mode. The default is 'r', which opens a file for reading.\n\
 45 |           If you specify a mode containing 'w' then you'll instead open a file\n\
 46 |           for writing. Note that you then need to add an appropriate header\n\
 47 |           before use. For bigBed files, only reading is supported.\n\
 48 | \n\
 49 | Returns:\n\
 50 |    A bigWigFile object on success, otherwise None.\n\
 51 | \n\
 52 | Arguments:\n\
 53 |     file: The name of a bigWig file.\n\
 54 | \n\
 55 | >>> import pyBigWig\n\
 56 | >>> bw = pyBigWig.open(\"some_file.bw\")\n"},
 57 |     {NULL, NULL, 0, NULL}
 58 | };
 59 | 
 60 | static PyMethodDef bwObjMethods[] = {
 61 |     {"header", (PyCFunction)pyBwGetHeader, METH_VARARGS,
 62 | "Returns the header of a bigWig file. This contains information such as: \n\
 63 |   * The version number of the file ('version').\n\
 64 |   * The number of zoom levels ('nLevels').\n\
 65 |   * The number of bases covered ('nBasesCovered').\n\
 66 |   * The minimum value ('minVal').\n\
 67 |   * The maximum value ('maxVal').\n\
 68 |   * The sum of all values ('sumData').\n\
 69 |   * The sum of the square of all values ('sumSquared').\n\
 70 | These are returned as a dictionary.\n\
 71 | \n\
 72 | >>> import pyBigWig\n\
 73 | >>> bw = pyBigWig.open(\"some_file.bw\")\n\
 74 | >>> bw.header()\n\
 75 | {'maxVal': 2L, 'sumData': 272L, 'minVal': 0L, 'version': 4L,\n\
 76 | 'sumSquared': 500L, 'nLevels': 1L, 'nBasesCovered': 154L}\n\
 77 | >>> bw.close()\n"},
 78 |     {"close", (PyCFunction)pyBwClose, METH_VARARGS,
 79 | "Close a bigWig file.\n\
 80 | \n\
 81 | >>> import pyBigWig\n\
 82 | >>> bw = pyBigWig.open(\"some_file.bw\")\n\
 83 | >>> bw.close()\n"},
 84 |     {"isBigWig", (PyCFunction)pyIsBigWig, METH_VARARGS,
 85 | "Returns True if the object is a bigWig file (otherwise False).\n\
 86 | >>> import pyBigWig\n\
 87 | >>> bw = pyBigWig.open(\"some_file.bigWig\")\n\
 88 | >>> bw.isBigWig()\n\
 89 | True\n\
 90 | >>> bw.isBigBed()\n\
 91 | False\n"},
 92 |     {"isBigBed", (PyCFunction)pyIsBigBed, METH_VARARGS,
 93 | "Returns true if the object is a bigBed file (otherwise False).\n\
 94 | >>> import pyBigWig\n\
 95 | >>> bw = pyBigWig.open(\"some_file.bigBed\")\n\
 96 | >>> bw.isBigWig()\n\
 97 | False\n\
 98 | >>> bw.isBigBed()\n\
 99 | True\n"},
100 |     {"chroms", (PyCFunction)pyBwGetChroms, METH_VARARGS,
101 | "Return a chromosome: length dictionary. The order is typically not\n\
102 | alphabetical and the lengths are long (thus the 'L' suffix).\n\
103 | \n\
104 | Optional arguments:\n\
105 |     chrom: An optional chromosome name\n\
106 | \n\
107 | Returns:\n\
108 |     A list of chromosome lengths or a dictionary of them.\n\
109 | \n\
110 | >>> import pyBigWig\n\
111 | >>> bw = pyBigWig.open(\"test/test.bw\")\n\
112 | >>> bw.chroms()\n\
113 | {'1': 195471971L, '10': 130694993L}\n\
114 | \n\
115 | Note that you may optionally supply a specific chromosome:\n\
116 | \n\
117 | >>> bw.chroms(\"chr1\")\n\
118 | 195471971L\n\
119 | \n\
120 | If you specify a non-existant chromosome then no output is produced:\n\
121 | \n\
122 | >>> bw.chroms(\"foo\")\n\
123 | >>>\n"},
124 |     {"stats", (PyCFunction)pyBwGetStats, METH_VARARGS|METH_KEYWORDS,
125 | "Return summary statistics for a given range. On error, this function throws a\n\
126 | runtime exception.\n\
127 | \n\
128 | Positional arguments:\n\
129 |     chr:   Chromosome name\n\
130 | \n\
131 | Keyword arguments:\n\
132 |     start: Starting position\n\
133 |     end:   Ending position\n\
134 |     type:  Summary type (mean, min, max, coverage, std, sum), default 'mean'.\n\
135 |     nBins: Number of bins into which the range should be divided before\n\
136 |            computing summary statistics. The default is 1.\n\
137 |     exact: By default, pyBigWig uses the same method as Kent's tools from UCSC\n\
138 |            for computing statistics. This means that 'zoom levels' may be\n\
139 |            used, rather than actual values (please see the pyBigWig repository\n\
140 |            on github for further information on this). To avoid this behaviour,\n\
141 |            simply specify 'exact=True'. Note that values returned will then\n\
142 |            differ from what UCSC, IGV, and similar other tools will report.\n\
143 | \n\
144 | >>> import pyBigWig\n\
145 | >>> bw = pyBigWig.open(\"test/test.bw\")\n\
146 | >>> bw.stats(\"1\", 0, 3)\n\
147 | [0.2000000054637591]\n\
148 | \n\
149 | This is the mean value over the range 1:1-3 (in 1-based coordinates). If\n\
150 | the start and end positions aren't given the entire chromosome is used.\n\
151 | There are additional optional parameters 'type' and 'nBins'. 'type'\n\
152 | specifies the type of summary information to calculate, which is 'mean'\n\
153 | by default. Other possibilites for 'type' are: 'min' (minimum value),\n\
154 | 'max' (maximum value), 'coverage' (number of covered bases), and 'std'\n\
155 |  (standard deviation). 'nBins' defines how many bins the region will be\n\
156 |  divided into and defaults to 1.\n\
157 | \n\
158 | >>> bw.stats(\"1\", 0, 3, type=\"min\")\n\
159 | [0.10000000149011612]\n\
160 | >>> bw.stats(\"1\", 0, 3, type=\"max\")\n\
161 | [0.30000001192092896]\n\
162 | >>> bw.stats(\"1\", 0, 10, type=\"coverage\")\n\
163 | [0.30000000000000004]\n\
164 | >>> bw.stats(\"1\", 0, 3, type=\"std\")\n\
165 | [0.10000000521540645]\n\
166 | >>> bw.stats(\"1\",99,200, type=\"max\", nBins=2)\n\
167 | [1.399999976158142, 1.5]\n"},
168 | #ifdef WITHNUMPY
169 |     {"values", (PyCFunction)pyBwGetValues, METH_VARARGS|METH_KEYWORDS,
170 | "Retrieve the value stored for each position (or None). On error, a runtime\n\
171 | exception is thrown.\n\
172 | \n\
173 | Positional arguments:\n\
174 |     chr:   Chromosome name\n\
175 |     start: Starting position\n\
176 |     end:   Ending position\n\
177 | \n\
178 | Optional arguments:\n\
179 |     numpy: If True, return a numpy array rather than a list of values. This\n\
180 |            is generally more memory efficient. Note that this option is only\n\
181 |            available if pyBigWig was installed with numpy support (check the\n\
182 |            pyBigWig.numpy() function).\n\
183 | \n\
184 | >>> import pyBigWig\n\
185 | >>> bw = pyBigWig.open(\"test/test.bw\")\n\
186 | >>> bw.values(\"1\", 0, 3)\n\
187 | [0.10000000149011612, 0.20000000298023224, 0.30000001192092896]\n\
188 | \n\
189 | The length of the returned list will always match the length of the\n\
190 | range. Any uncovered bases will have a value of None.\n\
191 | \n\
192 | >>> bw.values(\"1\", 0, 4)\n\
193 | [0.10000000149011612, 0.20000000298023224, 0.30000001192092896, None]\n\
194 | \n"},
195 | #else
196 |     {"values", (PyCFunction)pyBwGetValues, METH_VARARGS,
197 | "Retrieve the value stored for each position (or None). On error, a runtime\n\
198 | exception is thrown.\n\
199 | \n\
200 | Positional arguments:\n\
201 |     chr:   Chromosome name\n\
202 |     start: Starting position\n\
203 |     end:   Ending position\n\
204 | \n\
205 | >>> import pyBigWig\n\
206 | >>> bw = pyBigWig.open(\"test/test.bw\")\n\
207 | >>> bw.values(\"1\", 0, 3)\n\
208 | [0.10000000149011612, 0.20000000298023224, 0.30000001192092896]\n\
209 | \n\
210 | The length of the returned list will always match the length of the\n\
211 | range. Any uncovered bases will have a value of None.\n\
212 | \n\
213 | >>> bw.values(\"1\", 0, 4)\n\
214 | [0.10000000149011612, 0.20000000298023224, 0.30000001192092896, None]\n\
215 | \n"},
216 | #endif
217 |     {"intervals", (PyCFunction)pyBwGetIntervals, METH_VARARGS|METH_KEYWORDS,
218 | "Retrieve each interval covering a part of a chromosome/region. On error, a\n\
219 | runtime exception is thrown.\n\
220 | \n\
221 | Positional arguments:\n\
222 |     chr:   Chromosome name\n\
223 | \n\
224 | Keyword arguments:\n\
225 |     start: Starting position\n\
226 |     end:   Ending position\n\
227 | \n\
228 | If start and end aren't specified, the entire chromosome is returned.\n\
229 | The returned object is a tuple containing the starting position, end\n\
230 | position, and value of each interval in the file. As with all bigWig\n\
231 | positions, those returned are 0-based half-open (e.g., a start of 0 and\n\
232 | end of 10 specifies the first 10 positions).\n\
233 | \n\
234 | >>> import pyBigWig\n\
235 | >>> bw = pyBigWig.open(\"test/test.bw\")\n\
236 | >>> bw.intervals(\"1\", 0, 3)\n\
237 | ((0, 1, 0.10000000149011612), (1, 2, 0.20000000298023224),\n\
238 |  (2, 3, 0.30000001192092896))\n\
239 | >>> bw.close()"},
240 |     {"entries", (PyCFunction) pyBBGetEntries, METH_VARARGS|METH_KEYWORDS,
241 | "Retrieves entries from a bigBed file. These can optionally contain the string\n\
242 | associated with each entry.\n\
243 | \n\
244 | Positional arguments:\n\
245 |     chr:   Chromosome name\n\
246 | \n\
247 | Keyword arguments:\n\
248 |     start: Starting position\n\
249 |     end:   Ending position\n\
250 |     withString: If True, return the string associated with each entry.\n\
251 |            Default True.\n\
252 | \n\
253 | The output is a list of tuples, with members \"start\", \"end\", and \"string\"\n\
254 | (assuming \"withString=True\"). If there are no overlapping entries, then None\n\
255 | is returned.\n\
256 | \n\
257 | >>> import pyBigWig\n\
258 | >>> bb = pyBigWig.open(\"https://www.encodeproject.org/files/ENCFF001JBR/@@download/ENCFF001JBR.bigBed\")\n\
259 | >>> print(bw.entries('chr1',10000000,10020000))\n\
260 | [(10009333, 10009640, '61035\t130\t-\t0.026\t0.42\t404'),\n\
261 | (10014007, 10014289, '61047\t136\t-\t0.029\t0.42\t404'),\n\
262 | (10014373, 10024307, '61048\t630\t-\t5.420\t0.00\t2672399')]\n\
263 | >>> print(bb.entries(\"chr1\", 10000000, 10000500, withString=False))\n\
264 | [(10009333, 10009640), (10014007, 10014289), (10014373, 10024307)]\n\
265 | \n"},
266 |     {"SQL", (PyCFunction) pyBBGetSQL, METH_VARARGS,
267 | "Returns the SQL string associated with the file. This is typically useful for\n\
268 | bigBed files, where this determines what is held in each column of the text\n\
269 | string associated with entries.\n\
270 | \n\
271 | If there is no SQL string, then None is returned.\n\
272 | \n\
273 | >>> import pyBigWig\n\
274 | >>> bb = pyBigWig.open(\"https://www.encodeproject.org/files/ENCFF001JBR/@@download/ENCFF001JBR.bigBed\")\n\
275 | >>> print(bb.SQL())\n\
276 | table RnaElements\n\
277 | \"BED6 + 3 scores for RNA Elements data \"\n\
278 |     (\n\
279 |     string chrom;      \"Reference sequence chromosome or scaffold\"\n\
280 |     uint   chromStart; \"Start position in chromosome\"\n\
281 |     uint   chromEnd;   \"End position in chromosome\"\n\
282 |     string name;       \"Name of item\"\n\
283 |     uint   score;      \"Normalized score from 0-1000\"\n\
284 |     char[1] strand;    \"+ or - or . for unknown\"\n\
285 |     float level;       \"Expression level such as RPKM or FPKM. Set to -1 for no data.\"\n\
286 |     float signif;      \"Statistical significance such as IDR. Set to -1 for no data.\"\n\
287 |     uint score2;       \"Additional measurement/count e.g. number of reads. Set to 0 for no data.\"\n\
288 |     )\n\
289 | \n\
290 | \n"},
291 |     {"addHeader", (PyCFunction)pyBwAddHeader, METH_VARARGS|METH_KEYWORDS,
292 | "Adds a header to a file opened for writing. This MUST be called before adding\n\
293 | any entries. On error, a runtime exception is thrown.\n\
294 | \n\
295 | Positional arguments:\n\
296 |     cl:    A chromosome list, of the form (('chr1', 1000), ('chr2', 2000), ...).\n\
297 |            In other words, each element of the list is a tuple containing a\n\
298 |            chromosome name and its associated length.\n\
299 | \n\
300 | Keyword arguments:\n\
301 |     maxZooms:  The maximum number of zoom levels. The value must be >=0. The\n\
302 |                default is 10.\n\
303 | \n\
304 | >>> import pyBigWig\n\
305 | >>> import tempfile\n\
306 | >>> import os\n\
307 | >>> ofile = tempfile.NamedTemporaryFile(delete=False)\n\
308 | >>> oname = ofile.name\n\
309 | >>> ofile.close()\n\
310 | >>> bw = pyBigWig.open(oname, 'w')\n\
311 | >>> bw.addHeader([(\"1\", 1000000), (\"2\", 1500000)], maxZooms=0)\n\
312 | >>> bw.close()\n\
313 | >>> os.remove(oname)"},
314 |     {"addEntries", (PyCFunction)pyBwAddEntries, METH_VARARGS|METH_KEYWORDS,
315 | "Adds one or more entries to a bigWig file. This returns nothing, but throws a\n\
316 | runtime exception on error.\n\
317 | \n\
318 | This function always accepts an optional 'validate' option. If set to 'True',\n\
319 | which is the default, the input entries are checked to ensure that they come\n\
320 | after previously entered entries. This comes with significant overhead, so if\n\
321 | this is instead 'False' then this validation is not performed.\n\
322 | \n\
323 | There are three manners in which entries can be stored in bigWig files.\n\
324 | \n\
325 | \n\
326 | bedGraph-like entries (12 bytes each):\n\
327 | \n\
328 | Positional arguments:\n\
329 |     chrom:  A list of chromosome. These MUST match those added with addHeader().\n\
330 |     starts: A list of start positions. These are 0-based.\n\
331 | \n\
332 | Keyword arguments:\n\
333 |     ends:   A list of end positions. These are 0-based half open, so a start of\n\
334 |             0 and end of 10 specifies the first 10 bases.\n\
335 |     values: A list of values.\n\
336 | \n\
337 | \n\
338 | Variable-step entries (8 bytes each):\n\
339 | \n\
340 | Positional arguments:\n\
341 |     chrom:  A chromosome name. This MUST match one added with addHeader().\n\
342 |     starts: A list of start positions. These are 0-based.\n\
343 | \n\
344 | Keyword arguments:\n\
345 |     values: A list of values.\n\
346 |     span:   A span width. This is an integer value and specifies how many bases\n\
347 |             each entry describes. An entry with a start position of 0 and a span\n\
348 |             of 10 describes the first 10 bases.\n\
349 | \n\
350 | \n\
351 | Fixed-step entries (4 bytes each):\n\
352 | \n\
353 | Positional arguments:\n\
354 |     chrom:  A chromosome name. This MUST match one added with addHeader().\n\
355 |     starts: A start position. These are 0-based. The start position of each\n\
356 |             entry starts 'step' after the previous and describes 'span' bases.\n\
357 | \n\
358 | Keyword arguments:\n\
359 |     values: A list of values.\n\
360 |     span:   A span width. This is an integer value and specifies how many bases\n\
361 |             each entry describes. An entry with a start position of 0 and a span\n\
362 |             of 10 describes the first 10 bases.\n\
363 |     step:   A step width. Each subsequent entry begins this number of bases\n\
364 |             after the previous. So if the first entry has a start of 0 and step\n\
365 |             or 30, the second entry will start at 30.\n\
366 | \n\
367 | >>> import pyBigWig\n\
368 | >>> import tempfile\n\
369 | >>> import os\n\
370 | >>> ofile = tempfile.NamedTemporaryFile(delete=False)\n\
371 | >>> oname = ofile.name\n\
372 | >>> ofile.close()\n\
373 | >>> bw = pyBigWig.open(oname, 'w')\n\
374 | >>> bw.addHeader([(\"1\", 1000000), (\"2\", 1500000)])\n\
375 | >>> #Add some bedGraph-like entries\n\
376 | >>> bw.addEntries([\"1\", \"1\", \"1\"], [0, 100, 125], ends=[5, 120, 126], values=[0.0, 1.0, 200.0])\n\
377 | >>> #Variable-step entries, the span 500-520, 600-620, and 635-655\n\
378 | >>> bw.addEntries(\"1\", [500, 600, 635], values=[-2.0, 150.0, 25.0], span=20)\n\
379 | >>> #Fixed-step entries, the bases described are 900-920, 930-950, and 960-980\n\
380 | >>> bw.addEntries(\"1\", 900, values=[-5.0, -20.0, 25.0], span=20, step=30)\n\
381 | >>> #This only works due to using validate=False. Obviously the file is then corrupt.\n\
382 | >>> bw.addEntries([\"1\", \"1\", \"1\"], [0, 100, 125], ends=[5, 120, 126], values=[0.0, 1.0, 200.0], validate=False)\n\
383 | >>> bw.close()\n\
384 | >>> os.remove(oname)"},
385 |     {"__enter__", (PyCFunction)pyBwEnter, METH_NOARGS, NULL},
386 |     {"__exit__", (PyCFunction)pyBwClose, METH_VARARGS, NULL},
387 |     {NULL, NULL, 0, NULL}
388 | };
389 | 
390 | #if PY_MAJOR_VERSION >= 3
391 | struct pyBigWigmodule_state {
392 |     PyObject *error;
393 | };
394 | 
395 | #define GETSTATE(m) ((struct pyBigWigmodule_state*)PyModule_GetState(m))
396 | 
397 | static PyModuleDef pyBigWigmodule = {
398 |     PyModuleDef_HEAD_INIT,
399 |     "pyBigWig",
400 |     "A python module for bigWig file access",
401 |     -1,
402 |     bwMethods,
403 |     NULL, NULL, NULL, NULL
404 | };
405 | #endif
406 | 
407 | //Should set tp_dealloc, tp_print, tp_repr, tp_str, tp_members
408 | static PyTypeObject bigWigFile = {
409 | #if PY_MAJOR_VERSION >= 3
410 |     PyVarObject_HEAD_INIT(NULL, 0)
411 | #else
412 |     PyObject_HEAD_INIT(NULL)
413 |     0,              /*ob_size*/
414 | #endif
415 |     "pyBigWig.bigWigFile",     /*tp_name*/
416 |     sizeof(pyBigWigFile_t),      /*tp_basicsize*/
417 |     0,                         /*tp_itemsize*/
418 |     (destructor)pyBwDealloc,     /*tp_dealloc*/
419 |     0,                         /*tp_print*/
420 |     0,                         /*tp_getattr*/
421 |     0,                         /*tp_setattr*/
422 |     0,                         /*tp_compare*/
423 |     0,                         /*tp_repr*/
424 |     0,                         /*tp_as_number*/
425 |     0,                         /*tp_as_sequence*/
426 |     0,                         /*tp_as_mapping*/
427 |     0,                         /*tp_hash*/
428 |     0,                         /*tp_call*/
429 |     0,                         /*tp_str*/
430 |     PyObject_GenericGetAttr, /*tp_getattro*/
431 |     PyObject_GenericSetAttr, /*tp_setattro*/
432 |     0,                         /*tp_as_buffer*/
433 | #if PY_MAJOR_VERSION >= 3
434 |     Py_TPFLAGS_DEFAULT,        /*tp_flags*/
435 | #else
436 |     Py_TPFLAGS_HAVE_CLASS,     /*tp_flags*/
437 | #endif
438 |     "bigWig File",             /*tp_doc*/
439 |     0,                         /*tp_traverse*/
440 |     0,                         /*tp_clear*/
441 |     0,                         /*tp_richcompare*/
442 |     0,                         /*tp_weaklistoffset*/
443 |     0,                         /*tp_iter*/
444 |     0,                         /*tp_iternext*/
445 |     bwObjMethods,                 /*tp_methods*/
446 |     0,                         /*tp_members*/
447 |     0,                         /*tp_getset*/
448 |     0,                         /*tp_base*/
449 |     0,                         /*tp_dict*/
450 |     0,                         /*tp_descr_get*/
451 |     0,                         /*tp_descr_set*/
452 |     0,                         /*tp_dictoffset*/
453 |     0,                         /*tp_init*/
454 |     0,                         /*tp_alloc*/
455 |     0,                         /*tp_new*/
456 |     0,0,0,0,0,0
457 | };
458 | 


--------------------------------------------------------------------------------
/pyBigWigTest/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeptools/pyBigWig/7300b0a4599e7f72085c3c27c19b375e3a2c2cc0/pyBigWigTest/__init__.py


--------------------------------------------------------------------------------
/pyBigWigTest/test.bigBed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeptools/pyBigWig/7300b0a4599e7f72085c3c27c19b375e3a2c2cc0/pyBigWigTest/test.bigBed


--------------------------------------------------------------------------------
/pyBigWigTest/test.bw:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeptools/pyBigWig/7300b0a4599e7f72085c3c27c19b375e3a2c2cc0/pyBigWigTest/test.bw


--------------------------------------------------------------------------------
/pyBigWigTest/test.py:
--------------------------------------------------------------------------------
  1 | import pyBigWig
  2 | import tempfile
  3 | import os
  4 | import sys
  5 | import hashlib
  6 | import numpy as np
  7 | 
  8 | class TestRemote():
  9 |     fname = "http://raw.githubusercontent.com/dpryan79/pyBigWig/master/pyBigWigTest/test.bw"
 10 | 
 11 |     def doOpen(self):
 12 |         bw = pyBigWig.open(self.fname)
 13 |         assert(bw is not None)
 14 |         return bw
 15 | 
 16 |     def doOpenWith(self):
 17 |         with pyBigWig.open(self.fname) as bw:
 18 |             assert(bw.chroms() == {'1': 195471971, '10': 130694993})
 19 | 
 20 |     def doChroms(self, bw):
 21 |         assert(bw.chroms() == {'1': 195471971, '10': 130694993})
 22 |         assert(bw.chroms("1") == 195471971)
 23 |         assert(bw.chroms("c") is None)
 24 | 
 25 |     def doHeader(self, bw):
 26 |         assert(bw.header() == {'maxVal': 2, 'sumData': 272, 'minVal': 0, 'version': 4, 'sumSquared': 500, 'nLevels': 1, 'nBasesCovered': 154})
 27 | 
 28 |     def doStats(self, bw):
 29 |         assert(bw.stats("1", 0, 3) == [0.2000000054637591])
 30 |         assert(bw.stats("1", 0, 3, type="max") == [0.30000001192092896])
 31 |         assert(bw.stats("1",99,200, type="max", nBins=2) == [1.399999976158142, 1.5])
 32 |         assert(bw.stats("1",np.int64(99), np.int64(200), type="max", nBins=2) == [1.399999976158142, 1.5])
 33 |         assert(bw.stats("1") == [1.3351851569281683])
 34 | 
 35 |     def doValues(self, bw):
 36 |         assert(bw.values("1", 0, 3) == [0.10000000149011612, 0.20000000298023224, 0.30000001192092896])
 37 |         assert(bw.values("1", np.int64(0), np.int64(3)) == [0.10000000149011612, 0.20000000298023224, 0.30000001192092896])
 38 |         #assert(bw.values("1", 0, 4) == [0.10000000149011612, 0.20000000298023224, 0.30000001192092896, 'nan'])
 39 | 
 40 |     def doIntervals(self, bw):
 41 |         assert(bw.intervals("1", 0, 3) == ((0, 1, 0.10000000149011612), (1, 2, 0.20000000298023224), (2, 3, 0.30000001192092896)))
 42 |         assert(bw.intervals("1", np.int64(0), np.int64(3)) == ((0, 1, 0.10000000149011612), (1, 2, 0.20000000298023224), (2, 3, 0.30000001192092896)))
 43 |         assert(bw.intervals("1") == ((0, 1, 0.10000000149011612), (1, 2, 0.20000000298023224), (2, 3, 0.30000001192092896), (100, 150, 1.399999976158142), (150, 151, 1.5)))
 44 | 
 45 |     def doSum(self, bw):
 46 |         assert(bw.stats("1", 100, 151, type="sum", nBins=2) == [35.0, 36.5])
 47 | 
 48 |     def doWrite(self, bw):
 49 |         ofile = tempfile.NamedTemporaryFile(delete=False)
 50 |         oname = ofile.name
 51 |         ofile.close()
 52 |         bw2 = pyBigWig.open(oname, "w")
 53 |         assert(bw2 is not None)
 54 |         #Since this is an unordered dict(), iterating over the items can swap the order!
 55 |         chroms = [("1", bw.chroms("1")), ("10", bw.chroms("10"))]
 56 |         assert(len(bw.chroms()) == 2)
 57 |         bw2.addHeader(chroms, maxZooms=1)
 58 |         #Copy the input file
 59 |         for c in chroms:
 60 |             ints = bw.intervals(c[0])
 61 |             chroms2 = []
 62 |             starts = []
 63 |             ends = []
 64 |             values = []
 65 |             for entry in ints:
 66 |                 chroms2.append(c[0])
 67 |                 starts.append(entry[0])
 68 |                 ends.append(entry[1])
 69 |                 values.append(entry[2])
 70 |             bw2.addEntries(chroms2, starts, ends=ends, values=values)
 71 |         bw2.close()
 72 |         #Ensure that the copied file has the same entries and max/min/etc.
 73 |         bw2 = pyBigWig.open(oname)
 74 |         assert(bw.header() == bw2.header())
 75 |         assert(bw.chroms() == bw2.chroms())
 76 |         for c in chroms:
 77 |             ints1 = bw.intervals(c[0])
 78 |             ints2 = bw2.intervals(c[0])
 79 |             assert(ints1 == ints2)
 80 |         bw.close()
 81 |         bw2.close()
 82 |         #Clean up
 83 |         os.remove(oname)
 84 | 
 85 |     def doWrite2(self):
 86 |         '''
 87 |         Test all three modes of storing entries. Also test to ensure that we get error messages when doing something silly
 88 | 
 89 |         This is a modified version of the writing example from libBigWig
 90 |         '''
 91 |         chroms = ["1"]*6
 92 |         starts = [0, 100, 125, 200, 220, 230, 500, 600, 625, 700, 800, 850]
 93 |         ends = [5, 120, 126, 205, 226, 231]
 94 |         values = [0.0, 1.0, 200.0, -2.0, 150.0, 25.0, 0.0, 1.0, 200.0, -2.0, 150.0, 25.0, -5.0, -20.0, 25.0, -5.0, -20.0, 25.0]
 95 |         ofile = tempfile.NamedTemporaryFile(delete=False)
 96 |         oname = ofile.name
 97 |         ofile.close()
 98 |         bw = pyBigWig.open(oname, "w")
 99 |         bw.addHeader([("1", 1000000), ("2", 1500000)])
100 | 
101 |         #Intervals
102 |         bw.addEntries(chroms[0:3], starts[0:3], ends=ends[0:3], values=values[0:3])
103 |         bw.addEntries(chroms[3:6], starts[3:6], ends=ends[3:6], values=values[3:6])
104 | 
105 |         #IntervalSpans
106 |         bw.addEntries("1", starts[6:9], values=values[6:9], span=20)
107 |         bw.addEntries("1", starts[9:12], values=values[9:12], span=20)
108 | 
109 |         #IntervalSpanSteps, this should instead take an int
110 |         bw.addEntries("1", 900, values=values[12:15], span=20, step=30)
111 |         bw.addEntries("1", 990, values=values[15:18], span=20, step=30)
112 | 
113 |         #Attempt to add incorrect values. These MUST raise an exception
114 |         try:
115 |             bw.addEntries(chroms[0:3], starts[0:3], ends=ends[0:3], values=values[0:3])
116 |             assert(1==0)
117 |         except RuntimeError:
118 |             pass
119 |         try:
120 |             bw.addEntries("1", starts[6:9], values=values[6:9], span=20)
121 |             assert(1==0)
122 |         except RuntimeError:
123 |             pass
124 |         try:
125 |             bw.addEntries("3", starts[6:9], values=values[6:9], span=20)
126 |             assert(1==0)
127 |         except RuntimeError:
128 |             pass
129 |         try:
130 |             bw.addEntries("1", 900, values=values[12:15], span=20, step=30)
131 |             assert(1==0)
132 |         except RuntimeError:
133 |             pass
134 | 
135 |         #Add a few intervals on a new chromosome
136 |         bw.addEntries(["2"]*3, starts[0:3], ends=ends[0:3], values=values[0:3])
137 |         bw.close()
138 |         #check md5sum, this is the simplest method to check correctness
139 |         h = hashlib.md5(open(oname, "rb").read()).hexdigest()
140 |         assert(h=="ef104f198c6ce8310acc149d0377fc16")
141 |         #Clean up
142 |         os.remove(oname)
143 | 
144 |     def doWriteEmpty(self):
145 |         ofile = tempfile.NamedTemporaryFile(delete=False)
146 |         oname = ofile.name
147 |         ofile.close()
148 |         bw = pyBigWig.open(oname, "w")
149 |         bw.addHeader([("1", 1000000), ("2", 1500000)])
150 |         bw.close()
151 | 
152 |         #check md5sum
153 |         h = hashlib.md5(open(oname, "rb").read()).hexdigest()
154 |         assert(h=="361c600e5badf0b45d819552a7822937")
155 | 
156 |         #Ensure we can open and get reasonable results
157 |         bw = pyBigWig.open(oname)
158 |         assert(bw.chroms() == {'1': 1000000, '2': 1500000})
159 |         assert(bw.intervals("1") == None)
160 |         assert(bw.values("1", 0, 1000000) == [])
161 |         assert(bw.stats("1", 0, 1000000, nBins=2) == [None, None])
162 |         bw.close()
163 | 
164 |         #Clean up
165 |         os.remove(oname)
166 | 
167 |     def doWriteNumpy(self):
168 |         ofile = tempfile.NamedTemporaryFile(delete=False)
169 |         oname = ofile.name
170 |         ofile.close()
171 |         bw = pyBigWig.open(oname, "w")
172 |         bw.addHeader([("chr1", 100), ("chr2", 150), ("chr3", 200), ("chr4", 250)])
173 |         chroms = np.array(["chr1"] * 2 + ["chr2"] * 2 + ["chr3"] * 2 + ["chr4"] * 2)
174 |         starts = np.array([0, 10, 40, 50, 60, 70, 80, 90], dtype=np.int64)
175 |         ends = np.array([5, 15, 45, 55, 65, 75, 85, 95], dtype=np.int64)
176 |         values0 = np.array(np.random.random_sample(8), dtype=np.float64)
177 |         bw.addEntries(chroms, starts, ends=ends, values=values0)
178 |         bw.close()
179 | 
180 |         vals = [(x, y, z) for x, y, z in zip(starts, ends, values0)]
181 |         bw = pyBigWig.open(oname)
182 |         assert(bw.chroms() == {'chr1': 100, 'chr2': 150, 'chr3': 200, 'chr4': 250})
183 |         for idx1, chrom in enumerate(["chr1", "chr2", "chr3", "chr4"]):
184 |             for idx2, tup in enumerate(bw.intervals(chrom)):
185 |                 assert(tup[0] == starts[2 * idx1 + idx2])
186 |                 assert(tup[1] == ends[2 * idx1 + idx2])
187 |                 assert(np.isclose(tup[2], values0[2 * idx1 + idx2]))
188 |         bw.close()
189 | 
190 |         #Clean up
191 |         os.remove(oname)
192 | 
193 |     def testAll(self):
194 |         bw = self.doOpen()
195 |         self.doChroms(bw)
196 |         if not self.fname.startswith("http"):
197 |             self.doHeader(bw)
198 |             self.doStats(bw)
199 |             self.doSum(bw)
200 |             self.doValues(bw)
201 |             self.doIntervals(bw)
202 |             self.doWrite(bw)
203 |             self.doOpenWith()
204 |             self.doWrite2()
205 |             self.doWriteEmpty()
206 |             self.doWriteNumpy()
207 |         bw.close()
208 | 
209 | class TestLocal():
210 |     def testFoo(self):
211 |         blah = TestRemote()
212 |         blah.fname = os.path.dirname(pyBigWig.__file__) + "/pyBigWigTest/test.bw"
213 |         blah.testAll()
214 | 
215 | class TestBigBed():
216 |     def testBigBed(self):
217 |         fname = os.path.dirname(pyBigWig.__file__) + "/pyBigWigTest/test.bigBed"
218 |         bb = pyBigWig.open(fname)
219 |         assert(bb is not None)
220 |         assert(bb.isBigWig() == 0)
221 |         assert(bb.isBigBed() == 1)
222 |         SQL = """table RnaElements 
223 | "BED6 + 3 scores for RNA Elements data "
224 |     (
225 |     string chrom;      "Reference sequence chromosome or scaffold"
226 |     uint   chromStart; "Start position in chromosome"
227 |     uint   chromEnd;   "End position in chromosome"
228 |     string name;       "Name of item"
229 |     uint   score;      "Normalized score from 0-1000"
230 |     char[1] strand;    "+ or - or . for unknown"
231 |     float level;       "Expression level such as RPKM or FPKM. Set to -1 for no data."
232 |     float signif;      "Statistical significance such as IDR. Set to -1 for no data."
233 |     uint score2;       "Additional measurement/count e.g. number of reads. Set to 0 for no data."
234 |     )
235 | """
236 |         output = bb.SQL()
237 |         if isinstance(output, bytes):
238 |             output = output.decode('ASCII')
239 |         assert(output == SQL)
240 |         o = bb.entries('chr1',10000000,10020000)
241 |         expected = [(10009333, 10009640, '61035\t130\t-\t0.026\t0.42\t404'), (10014007, 10014289, '61047\t136\t-\t0.029\t0.42\t404'), (10014373, 10024307, '61048\t630\t-\t5.420\t0.00\t2672399')]
242 |         assert(o == expected)
243 |         o = bb.entries('chr1',np.int64(10000000),np.int64(10020000))
244 |         assert(o == expected)
245 |         bb.close()
246 | 
247 | class TestNumpy():
248 |     def testNumpy(self):
249 |         import os
250 |         if pyBigWig.numpy == 0:
251 |             return 0
252 |         import numpy as np
253 | 
254 |         bw = pyBigWig.open("/tmp/delete.bw", "w")
255 |         bw.addHeader([("1", 1000)], maxZooms=0)
256 |         # Type 0
257 |         chroms = np.array(["1"] * 10)
258 |         starts = np.array([0, 10, 20, 30, 40, 50, 60, 70, 80, 90], dtype=np.int64)
259 |         ends = np.array([5, 15, 25, 35, 45, 55, 65, 75, 85, 95], dtype=np.int64)
260 |         values0 = np.array(np.random.random_sample(10), dtype=np.float64)
261 |         bw.addEntries(chroms, starts, ends=ends, values=values0)
262 | 
263 |         starts = np.array([100, 110, 120, 130, 140, 150, 160, 170, 180, 190], dtype=np.int64)
264 |         ends = np.array([105, 115, 125, 135, 145, 155, 165, 175, 185, 195], dtype=np.int64)
265 |         values1 = np.array(np.random.random_sample(10), dtype=np.float64)
266 |         bw.addEntries(chroms, starts, ends=ends, values=values1)
267 | 
268 |         # Type 1, single chrom, multiple starts/values, single span
269 |         starts = np.array([200, 210, 220, 230, 240, 250, 260, 270, 280, 290], dtype=np.int64)
270 |         values2 = np.array(np.random.random_sample(10), dtype=np.float64)
271 |         bw.addEntries("1", starts, span=np.int64(8), values=values2)
272 | 
273 |         starts = np.array([300, 310, 320, 330, 340, 350, 360, 370, 380, 390], dtype=np.int64)
274 |         values3 = np.array(np.random.random_sample(10), dtype=np.float64)
275 |         bw.addEntries("1", starts, span=np.int64(8), values=values3)
276 | 
277 |         # Type 2, single chrom/start/span/step, multiple values
278 |         values4 = np.array(np.random.random_sample(10), dtype=np.float64)
279 |         bw.addEntries("1", np.int64(400), span=np.int64(8), step=np.int64(2), values=values4)
280 | 
281 |         values5 = np.array(np.random.random_sample(10), dtype=np.float64)
282 |         bw.addEntries("1", np.int64(500), span=np.int64(8), step=np.int64(2), values=values5)
283 | 
284 |         bw.close()
285 | 
286 |         bw = pyBigWig.open("/tmp/delete.bw")
287 |         assert(bw is not None)
288 | 
289 |         def compy(start, v2):
290 |             v = []
291 |             for t in bw.intervals("1", start, start + 100):
292 |                 v.append(t[2])
293 |             v = np.array(v)
294 |             assert(np.all(np.abs(v - v2) < 1e-5))
295 | 
296 |         compy(0, values0)
297 |         compy(100, values1)
298 |         compy(200, values2)
299 |         compy(300, values3)
300 |         compy(400, values4)
301 |         compy(500, values5)
302 | 
303 |         # Get values as a numpy array
304 |         foo = bw.values("1", 0, 100, numpy=False)
305 |         assert(isinstance(foo, list))
306 |         foo = bw.values("1", 0, 100, numpy=True)
307 |         assert(isinstance(foo, np.ndarray))
308 | 
309 |         bw.close()
310 |         os.remove("/tmp/delete.bw")
311 | 
312 |     def testNumpyValues(self):
313 |         if pyBigWig.numpy == 0:
314 |             return 0
315 |         import numpy as np
316 | 
317 |         fname = "http://raw.githubusercontent.com/dpryan79/pyBigWig/master/pyBigWigTest/test.bw"
318 |         bw = pyBigWig.open(fname, "r")
319 | 
320 |         assert np.allclose(
321 |             bw.values("1", 0, 20, numpy=True),
322 |             np.array(bw.values("1", 0, 20), dtype=np.float32),
323 |             equal_nan=True
324 |         )
325 | 
326 |         assert np.allclose(
327 |             bw.stats("1", 0, 20, "mean", 5, numpy=True),
328 |             np.array(bw.stats("1", 0, 20, "mean", 5), dtype=np.float64),
329 |             equal_nan=True
330 |         )
331 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | build-backend = "setuptools.build_meta"
 3 | requires = ["numpy >= 2.0.0", "setuptools", "setuptools-scm"]
 4 | 
 5 | [project]
 6 | authors = [{name = "Devon P. Ryan", email = "dryan79@gmail.com"}]
 7 | classifiers = [
 8 |   "Development Status :: 5 - Production/Stable",
 9 |   "Intended Audience :: Developers",
10 |   "License :: OSI Approved",
11 |   "Programming Language :: C",
12 |   "Programming Language :: Python",
13 |   "Programming Language :: Python :: 3",
14 |   "Programming Language :: Python :: 3.9",
15 |   "Programming Language :: Python :: Implementation :: CPython",
16 |   "Operating System :: POSIX",
17 |   "Operating System :: Unix",
18 |   "Operating System :: MacOS",
19 | ]
20 | description = "A package for accessing bigWig files using libBigWig"
21 | keywords = ["bioinformatics", "bigWig", "bigBed"]
22 | name = "pyBigWig"
23 | version = "0.3.24"
24 | readme = "README.md"
25 | requires-python = ">=3.9"
26 | 
27 | [project.license]
28 | text = "MIT"
29 | 
30 | [project.urls]
31 | "Bug Tracker" = "https://github.com/deeptools/pyBigWig/issues"
32 | "Download" = "https://pypi.python.org/pypi/pyBigWig"
33 | "Homepage" = "https://github.com/deeptools/pyBigWig"
34 | 
35 | [tool.setuptools]
36 | # Override setuptools autodiscovery algorithm
37 | # Only include package test data/source for wheel distribution
38 | include-package-data = true
39 | packages = ["pyBigWigTest"]
40 | 
41 | # Enable version inference from scm
42 | [tool.setuptools_scm]
43 | 
44 | # Target only minimum CPython version 3.9 on linux for wheel build
45 | [tool.cibuildwheel]
46 | skip = "pp* cp38-* *-manylinux_i686 *_ppc64le *_s390x *-musllinux_x86_64 *-musllinux_i686"
47 | 
48 | [tool.cibuildwheel.linux]
49 | manylinux-x86_64-image = "manylinux2014"
50 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | # This is required for setuptools to name the wheel with the correct
2 | # minimum python abi version
3 | # Commenting this out, since this ends up breaking wheels on anything except python 3.7
4 | #[bdist_wheel]
5 | #py-limited-api = cp37
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from setuptools import setup, Extension
 3 | from distutils import sysconfig
 4 | from pathlib import Path
 5 | import subprocess
 6 | import glob
 7 | import sys
 8 | 
 9 | srcs = [x for x in 
10 |     glob.glob("libBigWig/*.c")]
11 | srcs.append("pyBigWig.c")
12 | 
13 | libs=["m", "z"]
14 | 
15 | # do not link to python on mac, see https://github.com/deeptools/pyBigWig/issues/58
16 | if 'dynamic_lookup' not in (sysconfig.get_config_var('LDSHARED') or ''):
17 |     if sysconfig.get_config_vars('BLDLIBRARY') is not None:
18 |         #Note the "-l" prefix!
19 |         for e in sysconfig.get_config_vars('BLDLIBRARY')[0].split():
20 |             if e[0:2] == "-l":
21 |                 libs.append(e[2:])
22 |     elif sys.version_info[0] >= 3 and sys.version_info[1] >= 3:
23 |         libs.append("python%i.%im" % (sys.version_info[0], sys.version_info[1]))
24 |     else:
25 |         libs.append("python%i.%i" % (sys.version_info[0], sys.version_info[1]))
26 | 
27 | additional_libs = [sysconfig.get_config_var("LIBDIR"), sysconfig.get_config_var("LIBPL")]
28 | 
29 | defines = []
30 | try:
31 |     foo, _ = subprocess.Popen(['curl-config', '--libs'], stdout=subprocess.PIPE).communicate()
32 |     libs.append("curl")
33 |     foo = foo.decode().strip().split()
34 | except:
35 |     foo = []
36 |     defines.append(('NOCURL', None))
37 |     sys.stderr.write("Either libcurl isn't installed, it didn't come with curl-config, or curl-config isn't in your $PATH. pyBigWig will be installed without support for remote files.\n")
38 | 
39 | for v in foo:
40 |     if v[0:2] == '-L':
41 |         additional_libs.append(v[2:])
42 | 
43 | include_dirs = ['libBigWig', sysconfig.get_config_var("INCLUDEPY")]
44 | 
45 | # Add numpy build information if numpy is installed as a package
46 | try:
47 |     import numpy
48 |     defines.extend([('WITHNUMPY', None), ('NPY_NO_DEPRECATED_API', 'NPY_1_7_API_VERSION')])
49 | 
50 |     # Ref: https://numpy.org/doc/stable/reference/c-api/coremath.html#linking-against-the-core-math-library-in-an-extension
51 |     numpy_include_dir = numpy.get_include()
52 |     numpy_library_dir = str(Path(numpy_include_dir) / '..' / 'lib')
53 | 
54 |     include_dirs.append(numpy_include_dir)
55 |     additional_libs.append(numpy_library_dir)
56 |     libs.append('npymath')
57 | # Silently ignore a failed import of numpy
58 | except ImportError:
59 |     pass
60 | 
61 | module1 = Extension('pyBigWig',
62 |                     sources = srcs,
63 |                     libraries = libs,
64 |                     library_dirs = additional_libs, 
65 |                     define_macros = defines,
66 |                     include_dirs = include_dirs)
67 | 
68 | setup(
69 |     ext_modules=[module1]
70 | )
71 | 


--------------------------------------------------------------------------------