├── .gitignore ├── .travis.yml ├── CHANGES.md ├── LICENSE ├── README.md ├── bigwig.nimble ├── docs ├── bigwig.html └── lib.html ├── scripts └── ci-tests.sh ├── src ├── bigwig.nim └── bigwigpkg │ ├── cli.nim │ ├── lib.nim │ ├── utils.nim │ └── version.nim └── tests ├── all.nim ├── ex.bb ├── nim.cfg ├── test.bw ├── test_read.nim └── test_write.nim /.gitignore: -------------------------------------------------------------------------------- 1 | nimcache/ 2 | tests/test_read 3 | tests/test_write 4 | tests/all 5 | tests/writer.bw 6 | bigwig 7 | src/bigwigpkg/cli 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | services: docker 3 | before_install: 4 | - docker pull brentp/musl-hts-nim 5 | script: 6 | - docker run -w /test -v `pwd`:/test brentp/musl-hts-nim scripts/ci-tests.sh 7 | branches: 8 | except: 9 | - gh-pages 10 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | # v0.0.3 2 | 3 | + support bed file of regions where previously only a single chrom:start-stop region 4 | was allowed. 5 | 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Brent Pedersen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bigwig for nim 2 | 3 | [![Build Status](https://travis-ci.com/brentp/bigwig-nim.svg?branch=master)](https://travis-ci.com/brentp/bigwig-nim) 4 | [![badge](https://img.shields.io/badge/docs-latest-blue.svg)](https://brentp.github.io/bigwig-nim/lib.html) 5 | 6 | ## Command Line 7 | 8 | bigwig-nim includes a command-line tool distributed as a static binary [here](https://github.com/brentp/bigwig-nim/releases/latest). 9 | It supports converting bed to bigwig and bigwig to bed and extracting stats (mean, coverage, etc) for regions in a bigwig. 10 | 11 | There are other tools to do this, including [kentTools](https://hgwdev.gi.ucsc.edu/~kent/src/) which has a more restrictive license and does not supported (b)gzipped input and [bwtools](https://github.com/CRG-Barcelona/bwtool) which seems to provide similar functionality (but I am not able to build it). 12 | 13 | 14 | ### view 15 | 16 | To convert a bed with the value in the 4th column to bigwig, use: 17 | 18 | ```Shell 19 | bigwig view $bed_in --value-column 4 --chrom-sizes $fai -O bigwig -o $bigwig_out 20 | ``` 21 | `bigwig` will automatically determine the best data format for each block (fixed span and step or per-base) most of the 22 | CPU time is spent parsing the input bed file. 23 | 24 | ### stats 25 | 26 | To get the mean value for a given region (in this case on chromosome 22) 27 | 28 | ```Shell 29 | bigwig stats --stat mean $bigwig 22:145000-155000 30 | # or a bed file or regions 31 | bigwig stats --stat mean $bigwig $bed 32 | ``` 33 | 34 | Output is tab-delimited `chrom`, `start`, `stop`, `stat` for each row in the bed (or just once for the region). 35 | 36 | The supported stats are `mean`, `min`, `max`, `coverage`, `sum` with a special-case for the stat of `header` which 37 | shows the chromosomes, lengths and mean coverages for each chromosome in the bigwig file. 38 | 39 | 40 | ## Reading 41 | 42 | ```Nim 43 | var bw: BigWig 44 | bw.open(path, fmRead) 45 | 46 | # avoid allocating when possible 47 | var values: seq[float32] 48 | bw.values(values, "chr1", 0, 2222) 49 | 50 | for iv in bw.intervals("chr2", 999, 88888): # iterator. 51 | # tuple[start: int, stop: int, value: float32] 52 | 53 | # for bigbed 54 | for iv in bw.entries("chr2", 999, 88888): # iterator. 55 | # tuple[start: int, stop: int, value: cstring] 56 | # value contains "SQL" for bigbed entry. 57 | 58 | # single value 59 | var m: seq[float32] = bw.stats("chr2", 999, 9999, stat=Stat.mean) 60 | 61 | # multiple bins: 62 | var L: seq[float32] = bw.stats("chr2", 999, 9999, stat=Stat.min, nBins=10) 63 | 64 | echo bw.header # @[(name: "1", length: 195471971, tid: 0'u32), (name: "10", length: 130694993, tid: 1'u32)] 65 | 66 | bw.close 67 | ``` 68 | 69 | ## Writing 70 | 71 | ```Nim 72 | var wtr:BigWig 73 | doAssert wtr.open("tests/writer.bw", fmWrite) 74 | wtr.setHeader(@[(name:"chr1", length: 2000, tid: 0'u32)]) 75 | wtr.writeHeader 76 | 77 | # add intervals with tuples 78 | wtr.add("chr1", @[(start: 22, stop: 33, value: 0.01'f32), (start: 44, stop: 55, value: 155'f32)]) 79 | 80 | # or with, for example a span of 15 bases: 81 | wtr.add("chr1", 15, @[(start: 20, value: 0.01'f32), (start: 30, value: 155'f32)]) 82 | 83 | # or an array of values with a given span and step: 84 | var values = @[0.1'f32, 0.2, 0.3, 0.4] 85 | wtr.add("chr1", 100, values, span=100, step=200) # 100-200 is 0.1, 300-400 is 0.2 ... 86 | wtr.close() 87 | 88 | ``` 89 | -------------------------------------------------------------------------------- /bigwig.nimble: -------------------------------------------------------------------------------- 1 | import ospaths 2 | template thisModuleFile: string = instantiationInfo(fullPaths = true).filename 3 | 4 | when fileExists(thisModuleFile.parentDir / "src/bigwig.nim"): 5 | # In the git repository the Nimble sources are in a ``src`` directory. 6 | import src/bigwigpkg/version as _ 7 | else: 8 | # When the package is installed, the ``src`` directory disappears. 9 | import bigwigpkg/version as _ 10 | 11 | # Package 12 | 13 | version = bigwigVersion 14 | author = "Brent Pedersen" 15 | description = "ergonomic wrapper for libbigwig" 16 | license = "MIT" 17 | 18 | 19 | # Dependencies 20 | 21 | requires "nimbigwig", "argparse", "hts >= 0.2.20" 22 | srcDir = "src" 23 | installExt = @["nim"] 24 | 25 | bin = @["bigwig"] 26 | 27 | skipDirs = @["tests"] 28 | 29 | import ospaths,strutils 30 | 31 | task test, "run the tests": 32 | exec "nim c --lineDir:on --debuginfo -r --threads:on tests/all" 33 | 34 | task docs, "Builds documentation": 35 | mkDir("docs"/"bigwig") 36 | for file in @["src/bigwig.nim", "src/bigwigpkg/lib.nim"]: 37 | var f = file.changefileext("html").split("/") 38 | var fn = f[f.high] 39 | exec "nim doc2 --verbosity:0 --hints:off -o:" & "docs" /../ fn.changefileext("html") & " " & file 40 | 41 | -------------------------------------------------------------------------------- /docs/bigwig.html: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | bigwig 20 | 786 | 787 | 788 | 789 | 802 | 803 | 804 | 805 |
806 |
807 |

bigwig

808 |
809 |
810 | 814 |
815 | Search: 817 |
818 |
819 | Group by: 820 | 824 |
825 |
    826 |
  • 827 | Imports 828 |
      829 | 830 |
    831 |
  • 832 |
  • 833 | Procs 834 |
      835 |
    • main
    • 837 | 838 |
    839 |
  • 840 |
  • 841 | Exports 842 |
      843 | 844 |
    845 |
  • 846 | 847 |
848 | 849 |
850 |
851 |
852 | 853 |

854 |
855 |

Imports

856 |
857 | lib, version, cli 858 |
859 |
860 |

Procs

861 |
862 | 863 |
proc main() {...}{.raises: [IOError, ValueError, Exception, KeyError],
864 |             tags: [ReadIOEffect, WriteIOEffect, RootEffect].}
865 |
866 | 867 | 868 | 869 |
870 | 871 |
872 | 877 | 878 |
879 |
880 | 881 |
882 | 887 |
888 |
889 |
890 | 891 | 892 | 893 | -------------------------------------------------------------------------------- /docs/lib.html: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | lib 20 | 786 | 787 | 788 | 789 | 802 | 803 | 804 | 805 |
806 |
807 |

lib

808 |
809 |
810 | 814 |
815 | Search: 817 |
818 |
819 | Group by: 820 | 824 |
825 | 913 | 914 |
915 |
916 |
917 | 918 |

919 |
920 |

Imports

921 |
922 | version 923 |
924 |
925 |

Types

926 |
927 | 928 |
BigWig = ref object
 929 |   bw: ptr bigWigFile_t
 930 |   path: string
 931 |   isBigBed: bool
 932 |   starts: seq[uint32]
 933 |   stops: seq[uint32]
 934 |   values: seq[float32]
 935 |   cs: cstringArray
 936 | 
937 |
938 | 939 | 940 | 941 |
942 | 943 |
BigWigHeader = seq[tuple[name: string, length: int, tid: uint32]]
944 |
945 | 946 | 947 | 948 |
949 | 950 |
Stat {...}{.pure.} = enum
 951 |   mean = 0, stdev = 1, max = 2, min = 3, coverage = 4, sum = 5
952 |
953 | 954 | 955 | 956 |
957 | 958 |
959 |
960 |

Procs

961 |
962 | 963 |
proc close(bw: BigWig) {...}{.raises: [], tags: [].}
964 |
965 | 966 | close the file and free up resources 967 | 968 |
969 | 970 |
proc open(bw: var BigWig; path: string; mode: FileMode = fmRead; maxZooms: int = 8): bool {...}{.
 971 |     raises: [], tags: [].}
972 |
973 | 974 | open the bigwig file. maxZooms is only used when opening in write mode. 975 | 976 |
977 | 978 |
proc SQL(bw: BigWig): string {...}{.raises: [], tags: [].}
979 |
980 | 981 | 982 | 983 |
984 | 985 |
proc header(bw: var BigWig): BigWigHeader {...}{.raises: [], tags: [].}
986 |
987 | 988 | 989 | 990 |
991 | 992 |
proc values(bw: var BigWig; values: var seq[float32]; chrom: string; start: int = 0;
 993 |            stop: int = -1; includeNA: bool = true) {...}{.raises: [KeyError], tags: [].}
994 |
995 | 996 | exctract values for the given range into values 997 | 998 |
999 | 1000 |
proc stats(bw: var BigWig; chrom: string; start: int = 0; stop: int = -1;
1001 |           stat: Stat = Stat.mean; nBins = 1): seq[float64] {...}{.raises: [KeyError], tags: [].}
1002 |
1003 | 1004 | 1005 | 1006 |
1007 | 1008 |
proc setHeader(bw: BigWig; header: BigWigHeader) {...}{.raises: [], tags: [].}
1009 |
1010 | 1011 | set the header of a bigwig file opened for writing 1012 | 1013 |
1014 | 1015 |
proc writeHeader(bw: BigWig) {...}{.raises: [], tags: [].}
1016 |
1017 | 1018 | write the header (which must have been added in setHeader to file. 1019 | 1020 |
1021 | 1022 |
proc add[T: int | uint32 | uint64 | int32 | int64](bw: BigWig; chrom: string;
1023 |     intervals: seq[tuple[start: T, stop: T, value: float32]])
1024 |
1025 | 1026 | add intervals to the bigwig. 1027 | 1028 |
1029 | 1030 |
proc add[T: int | uint32 | uint64 | int32 | int64; U: int | uint32 | uint64 | int32 | int64](
1031 |     bw: BigWig; chrom: string; span: U;
1032 |     intervals: seq[tuple[start: T, value: float32]])
1033 |
1034 | 1035 | add spans to the bigwig. this adds fixed-length (span) intervals starting at the given positions. 1036 | 1037 |
1038 | 1039 |
proc add(bw: BigWig; chrom: string; start: uint32; values: var seq[float32];
1040 |         step: uint32 = 1; span: uint32 = 1) {...}{.raises: [], tags: [].}
1041 |
1042 | 1043 | add values to the bigwig starting at start and stepping by step. this is the most efficient way (space and performance) to add to a bigwig file if your intervals match this format. 1044 | 1045 |
1046 | 1047 |
1048 |
1049 |

Iterators

1050 |
1051 | 1052 |
iterator entries(bw: var BigWig; chrom: string; start: int = 0; stop: int = -1): tuple[
1053 |     start: int, stop: int, value: cstring] {...}{.raises: [KeyError], tags: [].}
1054 |
1055 | 1056 | yield bigbed entries. any values is returned as a string 1057 | 1058 |
1059 | 1060 |
iterator intervals(bw: var BigWig; chrom: string; start: int = 0; stop: int = -1): tuple[
1061 |     start: int, stop: int, value: float32] {...}{.raises: [KeyError, ValueError], tags: [].}
1062 |
1063 | 1064 | iterate over the values in the given region 1065 | 1066 |
1067 | 1068 |
1069 |
1070 |

Templates

1071 |
1072 | 1073 |
template isBigBed(b: BigWig): bool
1074 |
1075 | 1076 | indicate wether file is BigBed (true) or BigWig (false) 1077 | 1078 |
1079 | 1080 |
1081 |
1082 |

Exports

1083 |
1084 | bigwigGitCommit, version, bigwigVersion 1085 |
1086 | 1087 |
1088 |
1089 | 1090 |
1091 | 1096 |
1097 |
1098 |
1099 | 1100 | 1101 | 1102 | -------------------------------------------------------------------------------- /scripts/ci-tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -euo pipefail 3 | nimble install -y 4 | nimble test 5 | -------------------------------------------------------------------------------- /src/bigwig.nim: -------------------------------------------------------------------------------- 1 | import ./bigwigpkg/lib 2 | import ./bigwigpkg/version 3 | import ./bigwigpkg/cli 4 | import tables 5 | import strformat 6 | import os 7 | export lib 8 | 9 | proc main*() = 10 | type pair = object 11 | f: proc() 12 | description: string 13 | 14 | var dispatcher = { 15 | "view": pair(f:view_main, description:"view and convert bigwig"), 16 | "stats": pair(f:stats_main, description:"extract stats from a bigwig"), 17 | }.toOrderedTable 18 | 19 | var args = commandLineParams() 20 | 21 | if len(args) == 0 or not (args[0] in dispatcher): 22 | stderr.write_line &"version: {bigwigVersion}\n\nCommands:" 23 | for k, v in dispatcher: 24 | echo &" {k:<13}: {v.description}" 25 | if len(args) > 0 and (args[0] notin dispatcher) and args[0] notin @["-h", "-help"]: 26 | echo &"unknown program '{args[0]}'" 27 | quit "" 28 | 29 | dispatcher[args[0]].f() 30 | 31 | when isMainModule: 32 | main() 33 | 34 | -------------------------------------------------------------------------------- /src/bigwigpkg/cli.nim: -------------------------------------------------------------------------------- 1 | import strutils 2 | import tables 3 | import strformat 4 | import ./lib 5 | import hts/files 6 | import argparse 7 | 8 | type region = tuple[chrom: string, start: int, stop: int] 9 | 10 | proc isdigit2(s:string): bool {.inline.} = 11 | for c in s: 12 | if c < '0' or c > '9': return false 13 | return true 14 | 15 | proc looks_like_region_file(f:string): bool = 16 | if ':' in f and '-' in f: return false 17 | if not f.fileExists: return false 18 | var fh:HTSFile 19 | if not open(fh, f): 20 | stderr.write_line &"[slivar] tried '{f}' as a region file but couldn't open. Trying as an actual region" 21 | return false 22 | defer: 23 | fh.close() 24 | for l in fh.lines: 25 | if l[0] == '#' or l.strip().len == 0: continue 26 | var toks = l.strip().split("\t") 27 | if toks.len >= 3 and toks[1].isdigit2 and toks[2].isdigit2: return true 28 | stderr.write_line &"[slivar] tried '{f}' as a region file but it did not have proper format. Trying as an actual region" 29 | return false 30 | 31 | proc parse_colon_region(reg: string): region {.inline.} = 32 | let chrom_rest = reg.split(':', maxsplit=1) 33 | if chrom_rest.len == 1: 34 | return (chrom_rest[0], 0, -1) 35 | doAssert chrom_rest.len == 2, ("[bigwig] invalid region:" & reg) 36 | var ss = chrom_rest[1].split('-') 37 | result.chrom = chrom_rest[0] 38 | result.start = max(0, parseInt(ss[0]) - 1) 39 | result.stop = parseInt(ss[1]) 40 | if result.stop < result.start: 41 | quit ("[bigwig] ERROR. invalid region:" & reg) 42 | 43 | proc parse_one_region(reg:string): region {.inline.} = 44 | if reg == "": return ("", 0, -1) 45 | let chrom_rest = reg.rsplit('\t', maxsplit=4) 46 | if chrom_rest.len == 1: 47 | return parse_colon_region(reg) 48 | result.chrom = chrom_rest[0] 49 | result.start = max(0, parseInt(chrom_rest[1])) 50 | result.stop = parseInt(chrom_rest[2]) 51 | if result.stop < result.start: 52 | quit ("[bigwig] ERROR. invalid region:" & reg) 53 | 54 | iterator parse_region(reg_or_bed:string): region {.inline.} = 55 | if reg_or_bed.looks_like_region_file: 56 | for l in reg_or_bed.hts_lines: 57 | yield parse_one_region(l.strip(leading=false, chars={'\n', '\r'})) 58 | else: 59 | yield parse_one_region(reg_or_bed) 60 | 61 | 62 | proc from_fai(path: string): BigWigHeader = 63 | ## create a bigwig header from an fai (fasta index) or a genome file 64 | for l in path.lines: 65 | let vals = l.strip().split('\t') 66 | result.add((name: vals[0], length: parseInt(vals[1]), tid: result.len.uint32)) 67 | 68 | proc ffloat(f:float, precision:int=5): string {.inline.} = 69 | result = format_float(f, ffDecimal, precision=precision) 70 | result = result.strip(leading=false, chars={'0'}) 71 | if result[result.high] == '.': result.setLen(result.high) 72 | 73 | proc write_region_from(ofh:File, bw:var BigWig, reg:region) = 74 | for iv in bw.intervals(reg.chrom, reg.start, reg.stop): 75 | var v = ffloat(iv.value, precision=5) 76 | ofh.write_line(&"{reg.chrom}\t{iv.start}\t{iv.stop}\t{v}") 77 | 78 | type chunk = seq[tuple[start: int, stop:int, value:float32]] 79 | 80 | iterator chunks(bw: var BigWig, reg: region, n:int=2048): chunk = 81 | var cache = newSeqOfCap[tuple[start: int, stop:int, value:float32]](n) 82 | for iv in bw.intervals(reg.chrom, reg.start, reg.stop): 83 | cache.add(iv) 84 | if cache.len == n: 85 | yield cache 86 | cache = newSeqOfCap[tuple[start: int, stop:int, value:float32]](n) 87 | 88 | if cache.len != 0: 89 | yield cache 90 | 91 | proc make_interval(toks: seq[string], col: int): tuple[start: int, stop: int, value: float32] = 92 | return (parseInt(toks[1]), parseInt(toks[2]), parseFloat(toks[col]).float32) 93 | 94 | iterator chunks(bed_path: string, chrom: var string, n:int=2048, value_column: int= 4): chunk = 95 | let col = value_column - 1 96 | 97 | var cache = newSeqOfCap[tuple[start: int, stop:int, value:float32]](n) 98 | for l in bed_path.hts_lines: 99 | let toks = l.strip.split('\t') 100 | if toks[0] != chrom and cache.len > 0: 101 | yield cache 102 | cache = newSeqOfCap[tuple[start: int, stop:int, value:float32]](n) 103 | 104 | chrom = toks[0] 105 | var iv = make_interval(toks, col) 106 | # split on large chunks of 0 bases. 107 | if iv.value == 0 and iv.stop - iv.start > 100 and (cache.len == 0 or iv.stop - iv.start != cache[cache.high].stop - cache[cache.high].start): 108 | if cache.len > 0: 109 | yield cache 110 | cache = newSeqOfCap[tuple[start: int, stop:int, value:float32]](n) 111 | yield @[iv] 112 | continue 113 | 114 | cache.add(iv) 115 | if cache.len == n: 116 | yield cache 117 | cache = newSeqOfCap[tuple[start: int, stop:int, value:float32]](n) 118 | 119 | if cache.len != 0: 120 | yield cache 121 | 122 | proc looks_like_single_base(chunk: chunk): bool = 123 | var n = chunk.len.float32 124 | if chunk.len < 2: return false 125 | var nsmall = 0 126 | var nskip = 0 127 | var last_stop = chunk[0].start 128 | var total_bases = 0 129 | for c in chunk: 130 | nsmall += int(c.stop - c.start < 8) 131 | if last_stop > c.start: return false 132 | nskip += c.start - last_stop 133 | last_stop = c.stop 134 | total_bases += c.stop - c.start 135 | 136 | return nsmall.float32 / n > 0.95 and nskip == 0 137 | 138 | proc looks_like_fixed_span(chunk: chunk): bool = 139 | if chunk.len < 2: return false 140 | var sp = chunk[0].stop - chunk[0].start 141 | result = true 142 | for i, c in chunk: 143 | if likely(i < chunk.high) and c.stop - c.start != sp: return false 144 | 145 | proc write_fixed_span(ofh: var BigWig, chunk:chunk, chrom: string, span:int) = 146 | var values = newSeqOfCap[float32](chunk.len) 147 | 148 | # check for end of chrom 149 | let end_of_chrom = chunk.len > 1 and chunk[chunk.high].stop - chunk[chunk.high].start < span 150 | 151 | 152 | for c in chunk: 153 | for s in countup(c.start, c.stop - 1, span): 154 | values.add(c.value) 155 | 156 | let eoc_val = values[values.high] 157 | let eoc_start = chunk[chunk.high].start 158 | let eoc_stop = chunk[chunk.high].stop 159 | 160 | if end_of_chrom: 161 | values.setLen(values.high) 162 | ofh.add(chrom, chunk[0].start.uint32, values, span=span.uint32, step=span.uint32) 163 | if end_of_chrom: 164 | # intervals: seq[tuple[start:T, stop: T, value: float32]] 165 | ofh.add(chrom, @[(start: eoc_start.uint32, stop: eoc_stop.uint32, value: eoc_val)]) 166 | 167 | proc write_single_base(ofh: var BigWig, chunk:chunk, chrom: string) = 168 | ofh.write_fixed_span(chunk, chrom, 1) 169 | 170 | proc write_region_from(ofh:var BigWig, bw:var BigWig, reg:region, chunksize:int) = 171 | ## read from bw and write to ofh. try to do this efficiently 172 | ## read a chunk of a particular size and guess what the best bigwig 173 | ## representation might be 174 | for chunk in bw.chunks(reg, chunksize): 175 | if chunk.looks_like_single_base: 176 | ofh.write_single_base(chunk, reg.chrom) 177 | elif chunk.looks_like_fixed_span: 178 | ofh.write_fixed_span(chunk, reg.chrom, chunk[0].stop - chunk[0].start) 179 | else: 180 | ofh.add(reg.chrom, chunk) 181 | 182 | proc write_from(ofh:var BigWig, bed_path: string, value_column: int, chunksize:int) = 183 | ## read from bw and write to ofh. try to do this efficiently 184 | ## read a chunk of a particular size and guess what the best bigwig 185 | ## representation might be 186 | var chrom: string 187 | for chunk in bed_path.chunks(chrom, n=chunksize, value_column=value_column): 188 | #echo chunk[0.. 0 and args[0] == "stats": 209 | args = args[1..args.high] 210 | if len(args) == 0: args = @["--help"] 211 | 212 | try: 213 | discard p.parse(args) 214 | except UsageError: 215 | echo p.help 216 | echo "error:", getCurrentExceptionMsg() 217 | echo "specify a dummy region, even for --stat header" 218 | quit 1 219 | let opts = p.parse(args) 220 | if opts.help: 221 | quit 0 222 | 223 | var bw: BigWig 224 | if not bw.open(opts.input): 225 | quit "[bigwig] unable to open input file" 226 | defer: 227 | bw.close 228 | 229 | if opts.stat == "header": 230 | echo "#chrom\tlength\tmean_depth\tcoverage" 231 | for h in bw.header: 232 | var m = bw.stats(h.name, 0, h.length, stat=Stat.mean, nBins=1) 233 | var c = bw.stats(h.name, 0, h.length, stat=Stat.coverage, nBins=1) 234 | echo &"{h.name}\t{h.length}\t{ffloat(m[0])}\t{ffloat(c[0], 8)}" 235 | quit 0 236 | 237 | if opts.region == "": 238 | echo p.help 239 | quit "error: region is required" 240 | 241 | var L = {"mean": Stat.mean, "coverage": Stat.coverage, "min": Stat.min, "max": Stat.max, "sum": Stat.sum}.toTable 242 | var stat = L[opts.stat] 243 | var bins = parseInt(opts.bins) 244 | 245 | try: 246 | for region in opts.region.parse_region: 247 | var st = bw.stats(region.chrom, region.start, region.stop, stat=stat, nBins=bins) 248 | for v in st: 249 | echo &"{region.chrom}\t{region.start}\t{region.stop}\t{ffloat(v)}" 250 | except: 251 | echo "error:", getCurrentExceptionMsg() 252 | quit 1 253 | 254 | proc view_main*() = 255 | 256 | var p = newParser("bigwig view"): 257 | option("-r", "--region", help="optional chromosome, or chrom:start-stop region to view") 258 | option("-c", "--chrom-sizes", help="file indicating chromosome sizes (can be .fai), only used for converting BED->BigWig") 259 | option("-i", "--value-column", help="column-number (1-based) of the value to encode in to BigWig, only used for encoding BED->BigWig", default="4") 260 | option("-O", "--output-fmt", choices= @["bed", "bigwig"], default="bed", help="output format") 261 | option("-o", "--output-file", default="/dev/stdout", help="output bed or bigwig file") 262 | arg("input", nargs=1) 263 | 264 | var args = commandLineParams() 265 | if len(args) > 0 and args[0] == "view": 266 | args = args[1..args.high] 267 | if len(args) == 0: args = @["--help"] 268 | 269 | let opts = p.parse(args) 270 | if opts.help: 271 | quit 0 272 | if opts.input == "": 273 | # TODO: check for stdin (can't get libbigwig to open stdin) 274 | echo p.help 275 | echo "[bigwig] input file is required" 276 | quit 2 277 | 278 | let chunksize = 512 279 | if opts.input.isBig: 280 | 281 | var bw:BigWig 282 | if not bw.open(opts.input): 283 | quit "[bigwig] couldn't open file:" & opts.input 284 | 285 | if opts.output_fmt == "bed": 286 | ##################### 287 | ### BigWig To BED ### 288 | ##################### 289 | var ofh: File 290 | if not ofh.open(opts.output_file, fmWrite): 291 | quit "[bigwig] couldn't open output file:" & opts.output_file 292 | 293 | if opts.region == "": 294 | for chrom in bw.header: 295 | var reg: region = (chrom.name, 0, chrom.length) 296 | ofh.write_region_from(bw, reg) 297 | 298 | else: 299 | for region in opts.region.parse_region: 300 | ofh.write_region_from(bw, region) 301 | 302 | ofh.close 303 | 304 | elif opts.output_fmt == "bigwig": 305 | ######################## 306 | ### BigWig To BigWig ### 307 | ######################## 308 | var ofh: BigWig 309 | if not ofh.open(opts.output_file, fmWrite): 310 | quit "[bigwig] couldn't open output bigwig file:" & opts.output_file 311 | ofh.setHeader(bw.header) 312 | ofh.writeHeader 313 | 314 | if opts.region == "": 315 | for chrom in bw.header: 316 | var reg: region = (chrom.name, 0, chrom.length) 317 | ofh.write_region_from(bw, reg, chunksize) 318 | 319 | else: 320 | for region in opts.region.parse_region: 321 | ofh.write_region_from(bw, region, chunksize) 322 | 323 | ofh.close 324 | bw.close 325 | else: 326 | if opts.chrom_sizes == "": 327 | quit "[bigwig] --chrom-sizes is required when input is not bigwig." 328 | if opts.region != "": 329 | quit "[bigwig] --region is not supported for BED input" 330 | var h = opts.chrom_sizes.from_fai 331 | var ofh: BigWig 332 | if not ofh.open(opts.output_file, fmWrite): 333 | quit "[bigwig] couldn't open output bigwig file:" & opts.output_file 334 | ofh.setHeader(h) 335 | ofh.writeHeader 336 | ofh.write_from(opts.input, parseInt(opts.value_column), chunksize) 337 | ofh.close 338 | -------------------------------------------------------------------------------- /src/bigwigpkg/lib.nim: -------------------------------------------------------------------------------- 1 | import nimbigwig/bigWig 2 | export bbIsBigBed, bwIsBigWig 3 | 4 | import ./version 5 | export version 6 | 7 | type BigWig* = ref object 8 | bw : ptr bigWigFile_t 9 | path: string 10 | isBigBed: bool 11 | 12 | # these are internal, re-used cache before sending 13 | # to get data in format for bw from more common format 14 | starts: seq[uint32] 15 | stops: seq[uint32] 16 | values: seq[float32] 17 | cs: cstringArray 18 | 19 | proc c_free(p: pointer) {. 20 | importc: "free", header: "".} 21 | 22 | type BigWigHeader* = seq[tuple[name: string, length: int, tid: uint32]] 23 | 24 | type Stat* {.pure.} = enum 25 | #doesNotExist = -1 #!< This does nothing */ 26 | mean = 0 27 | stdev = 1 28 | max = 2 29 | min = 3 30 | # The number of bases covered 31 | coverage = 4 32 | # The sum of per-base values */ 33 | sum = 5 34 | 35 | template isBigBed*(b:BigWig): bool = 36 | ## indicate wether file is BigBed (true) or BigWig (false) 37 | b.isBigBed 38 | 39 | proc close*(bw: BigWig) = 40 | ## close the file and free up resources 41 | if bw.bw != nil: 42 | bwClose(bw.bw) 43 | bw.bw = nil 44 | if bw.cs != nil: 45 | deallocCStringArray(bw.cs) 46 | bw.cs = nil 47 | 48 | proc destroy_bigwig(bw: BigWig) = 49 | bw.close 50 | 51 | proc open*(bw: var BigWig, path: string, mode: FileMode=fmRead, maxZooms:int=8): bool = 52 | ## open the bigwig file. maxZooms is only used when opening in write mode. 53 | var fmode: string 54 | new(bw, destroy_bigwig) 55 | if mode == fmRead: fmode = "r" elif mode == fmWrite: fmode = "w" elif mode == fmAppend: fmode = "a" 56 | if mode == fmRead and bbIsBigBed(path, nil) == 1: 57 | bw = BigWig(bw: bbOpen(path, nil), path: path, isBigBed: true) 58 | else: 59 | bw = BigWig(bw: bwOpen(path, nil, fmode), path: path) 60 | if bw.bw == nil: return false 61 | result = true 62 | if mode == fmWrite: 63 | result = 0 == bw.bw.bwCreateHdr(maxZooms.int32) 64 | bw.cs = allocCStringArray(@[""]) 65 | 66 | type CPtr[T] = ptr UncheckedArray[T] 67 | 68 | proc SQL*(bw: BigWig): string = 69 | # return any SQL associated with a bigbed file; this can be used to parse the 70 | # extra columns in bigbed 71 | var cs = bbGetSQL(bw.bw) 72 | result = $cs 73 | c_free(cs) 74 | 75 | proc get_stop(bw: var BigWig, chrom: string, stop:int): int {.inline.} = 76 | if stop >= 0 : return stop 77 | let tid = bw.bw.bwGetTid(chrom) 78 | if tid == uint32.high: 79 | raise newException(KeyError, "[bigwig] unknown chromosome:" & chrom) 80 | result = cast[CPtr[uint32]](bw.bw.cl.len)[tid].int 81 | 82 | proc header*(bw: var BigWig): BigWigHeader = 83 | result = newSeq[tuple[name: string, length: int, tid:uint32]](bw.bw.cl.nKeys) 84 | var lens = cast[CPtr[uint32]](bw.bw.cl.len) 85 | var names = cast[cstringArray](bw.bw.cl.chrom) 86 | for i in 0.. 1: 176 | doAssert 0 == bw.bw.bwAppendIntervals(bw.starts[1].addr, bw.stops[1].addr, bw.values[1].addr, intervals.high.uint32), "[bigwig] error appending intervals" 177 | 178 | 179 | proc add*[T: int|uint32|uint64|int32|int64, U: int|uint32|uint64|int32|int64](bw:BigWig, chrom: string, span: U, intervals: seq[tuple[start:T, value: float32]]) = 180 | ## add spans to the bigwig. this adds fixed-length (span) intervals starting at the given positions. 181 | if intervals.len == 0: return 182 | bw.setLens(intervals.len) 183 | 184 | for i, iv in intervals: 185 | bw.starts[i] = iv.start.uint32 186 | bw.values[i] = iv.value 187 | 188 | doAssert 0 == bw.bw.bwAddIntervalSpans(chrom.cstring, bw.starts[0].addr, span.uint32, bw.values[0].addr, 1'u32), "[bigwig] error adding interval spans" 189 | if intervals.len > 1: 190 | doAssert 0 == bw.bw.bwAppendIntervalSpans(bw.starts[1].addr, bw.values[1].addr, intervals.high.uint32), "[bigwig] error appending interval spans" 191 | 192 | proc add*(bw:BigWig, chrom:string, start: uint32, values: var seq[float32], step:uint32=1, span:uint32=1) = 193 | ## add values to the bigwig starting at start and stepping by step. 194 | ## this is the most efficient way (space and performance) to add to a bigwig file if your intervals match this format. 195 | if values.len == 0: return 196 | doAssert 0 == bw.bw.bwAddIntervalSpanSteps(chrom, start, span, step, values[0].addr, 1) 197 | if values.len > 1: 198 | doAssert 0 == bw.bw.bwAppendIntervalSpanSteps(values[1].addr, values.high.uint32) 199 | -------------------------------------------------------------------------------- /src/bigwigpkg/utils.nim: -------------------------------------------------------------------------------- 1 | import strutils 2 | -------------------------------------------------------------------------------- /src/bigwigpkg/version.nim: -------------------------------------------------------------------------------- 1 | const bigwigVersion* = "0.0.3" 2 | const bigwigGitCommit* = staticExec("git rev-parse --verify HEAD") 3 | -------------------------------------------------------------------------------- /tests/all.nim: -------------------------------------------------------------------------------- 1 | import ./test_read 2 | import ./test_write 3 | -------------------------------------------------------------------------------- /tests/ex.bb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brentp/bigwig-nim/ce1cebfb34145fc30f685492c6024b4e38ddef72/tests/ex.bb -------------------------------------------------------------------------------- /tests/nim.cfg: -------------------------------------------------------------------------------- 1 | path = "$projectPath/../src" 2 | -------------------------------------------------------------------------------- /tests/test.bw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brentp/bigwig-nim/ce1cebfb34145fc30f685492c6024b4e38ddef72/tests/test.bw -------------------------------------------------------------------------------- /tests/test_read.nim: -------------------------------------------------------------------------------- 1 | import unittest 2 | import bigwig 3 | import math 4 | 5 | proc `~~`[T: float32|float64](a: T, b: T): bool = 6 | return abs(a - b) < 1e-4 7 | 8 | suite "test reading": 9 | test "that missing file returns false": 10 | var bw: BigWig 11 | check false == open(bw, "xxxxxx.bw") 12 | bw.close 13 | 14 | test "that reading values works": 15 | var bw: BigWig 16 | check true == open(bw, "tests/test.bw") 17 | 18 | var values: seq[float32] 19 | bw.values(values, "1", 0, 50) 20 | check values[0] ~~ 0.1 21 | check values[1] ~~ 0.2 22 | check values[2] ~~ 0.3 23 | check $values[3] == "nan" 24 | bw.close 25 | 26 | test "that interval iteration works": 27 | var bw: BigWig 28 | check true == open(bw, "tests/test.bw") 29 | var expect : seq[tuple[start:int, stop:int, value:float32]] = @[ 30 | (start: 0, stop: 1, value: 0.1'f32), 31 | (start: 1, stop: 2, value: 0.2'f32), 32 | (start: 2, stop: 3, value: 0.3'f32), 33 | (start: 100, stop: 150, value: 1.4'f32), 34 | (start: 150, stop: 151, value: 1.5'f32)] 35 | 36 | var i = 0 37 | for iv in bw.intervals("1"): 38 | check iv == expect[i] 39 | i += 1 40 | bw.close 41 | 42 | test "that isBedBed is false for bigwig": 43 | var bw: BigWig 44 | check true == open(bw, "tests/test.bw") 45 | check not bw.isBigBed 46 | #bw.close 47 | 48 | test "that stats work": 49 | var bw: BigWig 50 | check true == open(bw, "tests/test.bw") 51 | 52 | var mean = bw.stats("1", 0, 3) 53 | check mean.len == 1 54 | check 0.2 ~~ mean[0] 55 | 56 | var mins = bw.stats("1", 0, 4, Stat.min, 4) 57 | check mins[0] ~~ 0.1 58 | check mins[1] ~~ 0.2 59 | check mins[2] ~~ 0.3 60 | check $mins[3] == "nan" 61 | bw.close 62 | 63 | test "header": 64 | var bw: BigWig 65 | check true == open(bw, "tests/test.bw") 66 | check bw.header == @[(name: "1", length: 195471971, tid: 0'u32), (name: "10", length: 130694993, tid: 1'u32)] 67 | check bw.SQL.len == 0 68 | 69 | suite "bigbed suite": 70 | 71 | test "that bigbed reading works": 72 | var bw: BigWig 73 | check true == open(bw, "tests/ex.bb") 74 | check bw.header.len == 1 75 | 76 | for iv in bw.entries("chr21"): 77 | check iv.start > 0 78 | 79 | echo bw.SQL 80 | 81 | check bw.isBigBed 82 | 83 | -------------------------------------------------------------------------------- /tests/test_write.nim: -------------------------------------------------------------------------------- 1 | import unittest 2 | import bigwig 3 | import math 4 | 5 | proc `~~`[T: float32|float64](a: T, b: T): bool = 6 | return abs(a - b) < 1e-4 7 | 8 | suite "test writing": 9 | test "that setting header works": 10 | var bw: BigWig 11 | check true == open(bw, "tests/test.bw") 12 | 13 | var hdr = bw.header 14 | bw.close 15 | 16 | var wtr:BigWig 17 | check true == open(wtr, "tests/writer.bw", fmWrite) 18 | wtr.setHeader(hdr) 19 | check wtr.header == hdr 20 | 21 | wtr.setHeader(hdr[0..<1]) 22 | check wtr.header == hdr[0..<1] 23 | 24 | test "that adding intervals works": 25 | var wtr:BigWig 26 | check true == open(wtr, "tests/writer.bw", fmWrite) 27 | wtr.setHeader(@[(name:"chr1", length: 2000, tid: 0'u32)]) 28 | wtr.writeHeader 29 | 30 | wtr.add("chr1", @[(start: 22, stop: 33, value: 0.01'f32), (start: 44, stop: 55, value: 155'f32)]) 31 | wtr.close 32 | 33 | var rdr: BigWig 34 | check true == open(rdr, "tests/writer.bw") 35 | var i = 0 36 | for iv in rdr.intervals("chr1"): 37 | if i == 0: check iv == (start: 22, stop: 33, value: 0.01'f32) 38 | if i == 1: check iv == (start: 44, stop: 55, value: 155'f32) 39 | i.inc 40 | rdr.close 41 | 42 | test "that adding spans works": 43 | var wtr:BigWig 44 | check true == open(wtr, "tests/writer.bw", fmWrite) 45 | wtr.setHeader(@[(name:"chr1", length: 2000, tid: 0'u32)]) 46 | wtr.writeHeader 47 | 48 | # add intervals with span of 100 49 | wtr.add("chr1", 100, @[(start: 22, value: 0.01'f32), (start: 44, value: 155'f32)]) 50 | wtr.close 51 | 52 | var rdr: BigWig 53 | check true == open(rdr, "tests/writer.bw") 54 | var i = 0 55 | for iv in rdr.intervals("chr1"): 56 | if i == 0: check iv == (start: 22, stop: 122, value: 0.01'f32) 57 | if i == 1: check iv == (start: 44, stop: 144, value: 155'f32) 58 | i.inc 59 | rdr.close 60 | 61 | 62 | test "that add span step works": 63 | 64 | var values = @[1'f32, 2222.2'f32, 555.5'f32, 666.6'f32] 65 | 66 | var wtr:BigWig 67 | check true == open(wtr, "tests/writer.bw", fmWrite) 68 | wtr.setHeader(@[(name:"chr1", length: 2000, tid: 0'u32)]) 69 | wtr.writeHeader 70 | 71 | # add 1-base intervals starting at 100 72 | wtr.add("chr1", 100, values) 73 | wtr.close 74 | 75 | var rdr: BigWig 76 | check true == open(rdr, "tests/writer.bw") 77 | var i = 0 78 | for iv in rdr.intervals("chr1"): 79 | if i == 0: check iv == (start: 100, stop: 101, value: 1'f32) 80 | if i == 1: check iv == (start: 101, stop: 102, value: 2222.2'f32) 81 | check iv.value == values[i] 82 | check iv.start == 100 + i 83 | check iv.stop == 100 + i + 1 84 | i.inc 85 | 86 | test "that add span step works with span and step": 87 | 88 | var values = @[1'f32, 2222.2'f32, 555.5'f32, 666.6'f32] 89 | 90 | var wtr:BigWig 91 | check true == open(wtr, "tests/writer.bw", fmWrite) 92 | wtr.setHeader(@[(name:"chr1", length: 2000, tid: 0'u32)]) 93 | wtr.writeHeader 94 | let span = 200'u32 95 | let step = 33'u32 96 | 97 | # add intervals with start of 100 98 | wtr.add("chr1", 100, values, span=span, step=step) 99 | wtr.close 100 | 101 | var rdr: BigWig 102 | check true == open(rdr, "tests/writer.bw") 103 | var i = 0 104 | for iv in rdr.intervals("chr1"): 105 | check iv.start == 100 + i * step.int 106 | check iv.stop == 100 + i * step.int + span.int 107 | check iv.value == values[i] 108 | i.inc 109 | --------------------------------------------------------------------------------