├── .gitignore
├── .travis.yml
├── CHANGES.md
├── LICENSE
├── README.md
├── bigwig.nimble
├── docs
├── bigwig.html
└── lib.html
├── scripts
└── ci-tests.sh
├── src
├── bigwig.nim
└── bigwigpkg
│ ├── cli.nim
│ ├── lib.nim
│ ├── utils.nim
│ └── version.nim
└── tests
├── all.nim
├── ex.bb
├── nim.cfg
├── test.bw
├── test_read.nim
└── test_write.nim
/.gitignore:
--------------------------------------------------------------------------------
1 | nimcache/
2 | tests/test_read
3 | tests/test_write
4 | tests/all
5 | tests/writer.bw
6 | bigwig
7 | src/bigwigpkg/cli
8 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: c
2 | services: docker
3 | before_install:
4 | - docker pull brentp/musl-hts-nim
5 | script:
6 | - docker run -w /test -v `pwd`:/test brentp/musl-hts-nim scripts/ci-tests.sh
7 | branches:
8 | except:
9 | - gh-pages
10 |
--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
1 | # v0.0.3
2 |
3 | + support bed file of regions where previously only a single chrom:start-stop region
4 | was allowed.
5 |
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Brent Pedersen
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # bigwig for nim
2 |
3 | [](https://travis-ci.com/brentp/bigwig-nim)
4 | [](https://brentp.github.io/bigwig-nim/lib.html)
5 |
6 | ## Command Line
7 |
8 | bigwig-nim includes a command-line tool distributed as a static binary [here](https://github.com/brentp/bigwig-nim/releases/latest).
9 | It supports converting bed to bigwig and bigwig to bed and extracting stats (mean, coverage, etc) for regions in a bigwig.
10 |
11 | There are other tools to do this, including [kentTools](https://hgwdev.gi.ucsc.edu/~kent/src/) which has a more restrictive license and does not supported (b)gzipped input and [bwtools](https://github.com/CRG-Barcelona/bwtool) which seems to provide similar functionality (but I am not able to build it).
12 |
13 |
14 | ### view
15 |
16 | To convert a bed with the value in the 4th column to bigwig, use:
17 |
18 | ```Shell
19 | bigwig view $bed_in --value-column 4 --chrom-sizes $fai -O bigwig -o $bigwig_out
20 | ```
21 | `bigwig` will automatically determine the best data format for each block (fixed span and step or per-base) most of the
22 | CPU time is spent parsing the input bed file.
23 |
24 | ### stats
25 |
26 | To get the mean value for a given region (in this case on chromosome 22)
27 |
28 | ```Shell
29 | bigwig stats --stat mean $bigwig 22:145000-155000
30 | # or a bed file or regions
31 | bigwig stats --stat mean $bigwig $bed
32 | ```
33 |
34 | Output is tab-delimited `chrom`, `start`, `stop`, `stat` for each row in the bed (or just once for the region).
35 |
36 | The supported stats are `mean`, `min`, `max`, `coverage`, `sum` with a special-case for the stat of `header` which
37 | shows the chromosomes, lengths and mean coverages for each chromosome in the bigwig file.
38 |
39 |
40 | ## Reading
41 |
42 | ```Nim
43 | var bw: BigWig
44 | bw.open(path, fmRead)
45 |
46 | # avoid allocating when possible
47 | var values: seq[float32]
48 | bw.values(values, "chr1", 0, 2222)
49 |
50 | for iv in bw.intervals("chr2", 999, 88888): # iterator.
51 | # tuple[start: int, stop: int, value: float32]
52 |
53 | # for bigbed
54 | for iv in bw.entries("chr2", 999, 88888): # iterator.
55 | # tuple[start: int, stop: int, value: cstring]
56 | # value contains "SQL" for bigbed entry.
57 |
58 | # single value
59 | var m: seq[float32] = bw.stats("chr2", 999, 9999, stat=Stat.mean)
60 |
61 | # multiple bins:
62 | var L: seq[float32] = bw.stats("chr2", 999, 9999, stat=Stat.min, nBins=10)
63 |
64 | echo bw.header # @[(name: "1", length: 195471971, tid: 0'u32), (name: "10", length: 130694993, tid: 1'u32)]
65 |
66 | bw.close
67 | ```
68 |
69 | ## Writing
70 |
71 | ```Nim
72 | var wtr:BigWig
73 | doAssert wtr.open("tests/writer.bw", fmWrite)
74 | wtr.setHeader(@[(name:"chr1", length: 2000, tid: 0'u32)])
75 | wtr.writeHeader
76 |
77 | # add intervals with tuples
78 | wtr.add("chr1", @[(start: 22, stop: 33, value: 0.01'f32), (start: 44, stop: 55, value: 155'f32)])
79 |
80 | # or with, for example a span of 15 bases:
81 | wtr.add("chr1", 15, @[(start: 20, value: 0.01'f32), (start: 30, value: 155'f32)])
82 |
83 | # or an array of values with a given span and step:
84 | var values = @[0.1'f32, 0.2, 0.3, 0.4]
85 | wtr.add("chr1", 100, values, span=100, step=200) # 100-200 is 0.1, 300-400 is 0.2 ...
86 | wtr.close()
87 |
88 | ```
89 |
--------------------------------------------------------------------------------
/bigwig.nimble:
--------------------------------------------------------------------------------
1 | import ospaths
2 | template thisModuleFile: string = instantiationInfo(fullPaths = true).filename
3 |
4 | when fileExists(thisModuleFile.parentDir / "src/bigwig.nim"):
5 | # In the git repository the Nimble sources are in a ``src`` directory.
6 | import src/bigwigpkg/version as _
7 | else:
8 | # When the package is installed, the ``src`` directory disappears.
9 | import bigwigpkg/version as _
10 |
11 | # Package
12 |
13 | version = bigwigVersion
14 | author = "Brent Pedersen"
15 | description = "ergonomic wrapper for libbigwig"
16 | license = "MIT"
17 |
18 |
19 | # Dependencies
20 |
21 | requires "nimbigwig", "argparse", "hts >= 0.2.20"
22 | srcDir = "src"
23 | installExt = @["nim"]
24 |
25 | bin = @["bigwig"]
26 |
27 | skipDirs = @["tests"]
28 |
29 | import ospaths,strutils
30 |
31 | task test, "run the tests":
32 | exec "nim c --lineDir:on --debuginfo -r --threads:on tests/all"
33 |
34 | task docs, "Builds documentation":
35 | mkDir("docs"/"bigwig")
36 | for file in @["src/bigwig.nim", "src/bigwigpkg/lib.nim"]:
37 | var f = file.changefileext("html").split("/")
38 | var fn = f[f.high]
39 | exec "nim doc2 --verbosity:0 --hints:off -o:" & "docs" /../ fn.changefileext("html") & " " & file
40 |
41 |
--------------------------------------------------------------------------------
/docs/bigwig.html:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 | bigwig
20 |
786 |
787 |
788 |
789 |
802 |
803 |
804 |
805 |
806 |
807 |
bigwig
808 |
809 |
810 |
814 |
815 | Search:
817 |
818 |
819 | Group by:
820 |
821 | Section
822 | Type
823 |
824 |
825 |
826 |
827 | Imports
828 |
831 |
832 |
833 | Procs
834 |
839 |
840 |
841 | Exports
842 |
845 |
846 |
847 |
848 |
849 |
850 |
851 |
852 |
853 |
854 |
859 |
860 |
861 |
862 |
863 | proc main ( ) { ... } {. raises : [ IOError , ValueError , Exception , KeyError ] ,
864 | tags : [ ReadIOEffect , WriteIOEffect , RootEffect ] .}
865 |
866 |
867 |
868 |
869 |
870 |
871 |
872 |
873 |
874 |
875 | header , version , setHeader , BigWigHeader , SQL , stats , Stat , open , add , isBigBed , close , add , lib , BigWig , values , writeHeader , add , entries , bigwigVersion , intervals , bigwigGitCommit
876 |
877 |
878 |
879 |
880 |
881 |
882 |
883 |
884 |
885 | Made with Nim. Generated: 2019-08-17 21:14:55 UTC
886 |
887 |
888 |
889 |
890 |
891 |
892 |
893 |
--------------------------------------------------------------------------------
/docs/lib.html:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 | lib
20 |
786 |
787 |
788 |
789 |
802 |
803 |
804 |
805 |
806 |
807 |
lib
808 |
809 |
810 |
814 |
815 | Search:
817 |
818 |
819 | Group by:
820 |
821 | Section
822 | Type
823 |
824 |
825 |
913 |
914 |
915 |
916 |
917 |
918 |
919 |
924 |
925 |
926 |
927 |
928 | BigWig = ref object
929 | bw : ptr bigWigFile_t
930 | path : string
931 | isBigBed : bool
932 | starts : seq [ uint32 ]
933 | stops : seq [ uint32 ]
934 | values : seq [ float32 ]
935 | cs : cstringArray
936 |
937 |
938 |
939 |
940 |
941 |
942 |
943 | BigWigHeader = seq [ tuple [ name : string , length : int , tid : uint32 ] ]
944 |
945 |
946 |
947 |
948 |
949 |
950 | Stat { ... } {. pure .} = enum
951 | mean = 0 , stdev = 1 , max = 2 , min = 3 , coverage = 4 , sum = 5
952 |
953 |
954 |
955 |
956 |
957 |
958 |
959 |
960 |
961 |
962 |
963 | proc close ( bw : BigWig ) { ... } {. raises : [ ] , tags : [ ] .}
964 |
965 |
966 | close the file and free up resources
967 |
968 |
969 |
970 | proc open ( bw : var BigWig ; path : string ; mode : FileMode = fmRead ; maxZooms : int = 8 ) : bool { ... } {.
971 | raises : [ ] , tags : [ ] .}
972 |
973 |
974 | open the bigwig file. maxZooms is only used when opening in write mode.
975 |
976 |
977 |
978 | proc SQL ( bw : BigWig ) : string { ... } {. raises : [ ] , tags : [ ] .}
979 |
980 |
981 |
982 |
983 |
984 |
985 | proc header ( bw : var BigWig ) : BigWigHeader { ... } {. raises : [ ] , tags : [ ] .}
986 |
987 |
988 |
989 |
990 |
991 |
992 | proc values ( bw : var BigWig ; values : var seq [ float32 ] ; chrom : string ; start : int = 0 ;
993 | stop : int = - 1 ; includeNA : bool = true ) { ... } {. raises : [ KeyError ] , tags : [ ] .}
994 |
995 |
996 | exctract values for the given range into values
997 |
998 |
999 |
1000 | proc stats ( bw : var BigWig ; chrom : string ; start : int = 0 ; stop : int = - 1 ;
1001 | stat : Stat = Stat . mean ; nBins = 1 ) : seq [ float64 ] { ... } {. raises : [ KeyError ] , tags : [ ] .}
1002 |
1003 |
1004 |
1005 |
1006 |
1007 |
1008 | proc setHeader ( bw : BigWig ; header : BigWigHeader ) { ... } {. raises : [ ] , tags : [ ] .}
1009 |
1010 |
1011 | set the header of a bigwig file opened for writing
1012 |
1013 |
1014 |
1015 | proc writeHeader ( bw : BigWig ) { ... } {. raises : [ ] , tags : [ ] .}
1016 |
1017 |
1018 | write the header (which must have been added in setHeader to file.
1019 |
1020 |
1021 |
1022 | proc add [ T : int | uint32 | uint64 | int32 | int64 ] ( bw : BigWig ; chrom : string ;
1023 | intervals : seq [ tuple [ start : T , stop : T , value : float32 ] ] )
1024 |
1025 |
1026 | add intervals to the bigwig.
1027 |
1028 |
1029 |
1030 | proc add [ T : int | uint32 | uint64 | int32 | int64 ; U : int | uint32 | uint64 | int32 | int64 ] (
1031 | bw : BigWig ; chrom : string ; span : U ;
1032 | intervals : seq [ tuple [ start : T , value : float32 ] ] )
1033 |
1034 |
1035 | add spans to the bigwig. this adds fixed-length (span) intervals starting at the given positions.
1036 |
1037 |
1038 |
1039 | proc add ( bw : BigWig ; chrom : string ; start : uint32 ; values : var seq [ float32 ] ;
1040 | step : uint32 = 1 ; span : uint32 = 1 ) { ... } {. raises : [ ] , tags : [ ] .}
1041 |
1042 |
1043 | add values to the bigwig starting at start and stepping by step. this is the most efficient way (space and performance) to add to a bigwig file if your intervals match this format.
1044 |
1045 |
1046 |
1047 |
1048 |
1049 |
1050 |
1051 |
1052 | iterator entries ( bw : var BigWig ; chrom : string ; start : int = 0 ; stop : int = - 1 ) : tuple [
1053 | start : int , stop : int , value : cstring ] { ... } {. raises : [ KeyError ] , tags : [ ] .}
1054 |
1055 |
1056 | yield bigbed entries. any values is returned as a string
1057 |
1058 |
1059 |
1060 | iterator intervals ( bw : var BigWig ; chrom : string ; start : int = 0 ; stop : int = - 1 ) : tuple [
1061 | start : int , stop : int , value : float32 ] { ... } {. raises : [ KeyError , ValueError ] , tags : [ ] .}
1062 |
1063 |
1064 | iterate over the values in the given region
1065 |
1066 |
1067 |
1068 |
1069 |
1070 |
1071 |
1072 |
1073 | template isBigBed ( b : BigWig ) : bool
1074 |
1075 |
1076 | indicate wether file is BigBed (true) or BigWig (false)
1077 |
1078 |
1079 |
1080 |
1081 |
1086 |
1087 |
1088 |
1089 |
1090 |
1091 |
1092 |
1093 |
1094 | Made with Nim. Generated: 2019-08-17 21:14:56 UTC
1095 |
1096 |
1097 |
1098 |
1099 |
1100 |
1101 |
1102 |
--------------------------------------------------------------------------------
/scripts/ci-tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -euo pipefail
3 | nimble install -y
4 | nimble test
5 |
--------------------------------------------------------------------------------
/src/bigwig.nim:
--------------------------------------------------------------------------------
1 | import ./bigwigpkg/lib
2 | import ./bigwigpkg/version
3 | import ./bigwigpkg/cli
4 | import tables
5 | import strformat
6 | import os
7 | export lib
8 |
9 | proc main*() =
10 | type pair = object
11 | f: proc()
12 | description: string
13 |
14 | var dispatcher = {
15 | "view": pair(f:view_main, description:"view and convert bigwig"),
16 | "stats": pair(f:stats_main, description:"extract stats from a bigwig"),
17 | }.toOrderedTable
18 |
19 | var args = commandLineParams()
20 |
21 | if len(args) == 0 or not (args[0] in dispatcher):
22 | stderr.write_line &"version: {bigwigVersion}\n\nCommands:"
23 | for k, v in dispatcher:
24 | echo &" {k:<13}: {v.description}"
25 | if len(args) > 0 and (args[0] notin dispatcher) and args[0] notin @["-h", "-help"]:
26 | echo &"unknown program '{args[0]}'"
27 | quit ""
28 |
29 | dispatcher[args[0]].f()
30 |
31 | when isMainModule:
32 | main()
33 |
34 |
--------------------------------------------------------------------------------
/src/bigwigpkg/cli.nim:
--------------------------------------------------------------------------------
1 | import strutils
2 | import tables
3 | import strformat
4 | import ./lib
5 | import hts/files
6 | import argparse
7 |
8 | type region = tuple[chrom: string, start: int, stop: int]
9 |
10 | proc isdigit2(s:string): bool {.inline.} =
11 | for c in s:
12 | if c < '0' or c > '9': return false
13 | return true
14 |
15 | proc looks_like_region_file(f:string): bool =
16 | if ':' in f and '-' in f: return false
17 | if not f.fileExists: return false
18 | var fh:HTSFile
19 | if not open(fh, f):
20 | stderr.write_line &"[slivar] tried '{f}' as a region file but couldn't open. Trying as an actual region"
21 | return false
22 | defer:
23 | fh.close()
24 | for l in fh.lines:
25 | if l[0] == '#' or l.strip().len == 0: continue
26 | var toks = l.strip().split("\t")
27 | if toks.len >= 3 and toks[1].isdigit2 and toks[2].isdigit2: return true
28 | stderr.write_line &"[slivar] tried '{f}' as a region file but it did not have proper format. Trying as an actual region"
29 | return false
30 |
31 | proc parse_colon_region(reg: string): region {.inline.} =
32 | let chrom_rest = reg.split(':', maxsplit=1)
33 | if chrom_rest.len == 1:
34 | return (chrom_rest[0], 0, -1)
35 | doAssert chrom_rest.len == 2, ("[bigwig] invalid region:" & reg)
36 | var ss = chrom_rest[1].split('-')
37 | result.chrom = chrom_rest[0]
38 | result.start = max(0, parseInt(ss[0]) - 1)
39 | result.stop = parseInt(ss[1])
40 | if result.stop < result.start:
41 | quit ("[bigwig] ERROR. invalid region:" & reg)
42 |
43 | proc parse_one_region(reg:string): region {.inline.} =
44 | if reg == "": return ("", 0, -1)
45 | let chrom_rest = reg.rsplit('\t', maxsplit=4)
46 | if chrom_rest.len == 1:
47 | return parse_colon_region(reg)
48 | result.chrom = chrom_rest[0]
49 | result.start = max(0, parseInt(chrom_rest[1]))
50 | result.stop = parseInt(chrom_rest[2])
51 | if result.stop < result.start:
52 | quit ("[bigwig] ERROR. invalid region:" & reg)
53 |
54 | iterator parse_region(reg_or_bed:string): region {.inline.} =
55 | if reg_or_bed.looks_like_region_file:
56 | for l in reg_or_bed.hts_lines:
57 | yield parse_one_region(l.strip(leading=false, chars={'\n', '\r'}))
58 | else:
59 | yield parse_one_region(reg_or_bed)
60 |
61 |
62 | proc from_fai(path: string): BigWigHeader =
63 | ## create a bigwig header from an fai (fasta index) or a genome file
64 | for l in path.lines:
65 | let vals = l.strip().split('\t')
66 | result.add((name: vals[0], length: parseInt(vals[1]), tid: result.len.uint32))
67 |
68 | proc ffloat(f:float, precision:int=5): string {.inline.} =
69 | result = format_float(f, ffDecimal, precision=precision)
70 | result = result.strip(leading=false, chars={'0'})
71 | if result[result.high] == '.': result.setLen(result.high)
72 |
73 | proc write_region_from(ofh:File, bw:var BigWig, reg:region) =
74 | for iv in bw.intervals(reg.chrom, reg.start, reg.stop):
75 | var v = ffloat(iv.value, precision=5)
76 | ofh.write_line(&"{reg.chrom}\t{iv.start}\t{iv.stop}\t{v}")
77 |
78 | type chunk = seq[tuple[start: int, stop:int, value:float32]]
79 |
80 | iterator chunks(bw: var BigWig, reg: region, n:int=2048): chunk =
81 | var cache = newSeqOfCap[tuple[start: int, stop:int, value:float32]](n)
82 | for iv in bw.intervals(reg.chrom, reg.start, reg.stop):
83 | cache.add(iv)
84 | if cache.len == n:
85 | yield cache
86 | cache = newSeqOfCap[tuple[start: int, stop:int, value:float32]](n)
87 |
88 | if cache.len != 0:
89 | yield cache
90 |
91 | proc make_interval(toks: seq[string], col: int): tuple[start: int, stop: int, value: float32] =
92 | return (parseInt(toks[1]), parseInt(toks[2]), parseFloat(toks[col]).float32)
93 |
94 | iterator chunks(bed_path: string, chrom: var string, n:int=2048, value_column: int= 4): chunk =
95 | let col = value_column - 1
96 |
97 | var cache = newSeqOfCap[tuple[start: int, stop:int, value:float32]](n)
98 | for l in bed_path.hts_lines:
99 | let toks = l.strip.split('\t')
100 | if toks[0] != chrom and cache.len > 0:
101 | yield cache
102 | cache = newSeqOfCap[tuple[start: int, stop:int, value:float32]](n)
103 |
104 | chrom = toks[0]
105 | var iv = make_interval(toks, col)
106 | # split on large chunks of 0 bases.
107 | if iv.value == 0 and iv.stop - iv.start > 100 and (cache.len == 0 or iv.stop - iv.start != cache[cache.high].stop - cache[cache.high].start):
108 | if cache.len > 0:
109 | yield cache
110 | cache = newSeqOfCap[tuple[start: int, stop:int, value:float32]](n)
111 | yield @[iv]
112 | continue
113 |
114 | cache.add(iv)
115 | if cache.len == n:
116 | yield cache
117 | cache = newSeqOfCap[tuple[start: int, stop:int, value:float32]](n)
118 |
119 | if cache.len != 0:
120 | yield cache
121 |
122 | proc looks_like_single_base(chunk: chunk): bool =
123 | var n = chunk.len.float32
124 | if chunk.len < 2: return false
125 | var nsmall = 0
126 | var nskip = 0
127 | var last_stop = chunk[0].start
128 | var total_bases = 0
129 | for c in chunk:
130 | nsmall += int(c.stop - c.start < 8)
131 | if last_stop > c.start: return false
132 | nskip += c.start - last_stop
133 | last_stop = c.stop
134 | total_bases += c.stop - c.start
135 |
136 | return nsmall.float32 / n > 0.95 and nskip == 0
137 |
138 | proc looks_like_fixed_span(chunk: chunk): bool =
139 | if chunk.len < 2: return false
140 | var sp = chunk[0].stop - chunk[0].start
141 | result = true
142 | for i, c in chunk:
143 | if likely(i < chunk.high) and c.stop - c.start != sp: return false
144 |
145 | proc write_fixed_span(ofh: var BigWig, chunk:chunk, chrom: string, span:int) =
146 | var values = newSeqOfCap[float32](chunk.len)
147 |
148 | # check for end of chrom
149 | let end_of_chrom = chunk.len > 1 and chunk[chunk.high].stop - chunk[chunk.high].start < span
150 |
151 |
152 | for c in chunk:
153 | for s in countup(c.start, c.stop - 1, span):
154 | values.add(c.value)
155 |
156 | let eoc_val = values[values.high]
157 | let eoc_start = chunk[chunk.high].start
158 | let eoc_stop = chunk[chunk.high].stop
159 |
160 | if end_of_chrom:
161 | values.setLen(values.high)
162 | ofh.add(chrom, chunk[0].start.uint32, values, span=span.uint32, step=span.uint32)
163 | if end_of_chrom:
164 | # intervals: seq[tuple[start:T, stop: T, value: float32]]
165 | ofh.add(chrom, @[(start: eoc_start.uint32, stop: eoc_stop.uint32, value: eoc_val)])
166 |
167 | proc write_single_base(ofh: var BigWig, chunk:chunk, chrom: string) =
168 | ofh.write_fixed_span(chunk, chrom, 1)
169 |
170 | proc write_region_from(ofh:var BigWig, bw:var BigWig, reg:region, chunksize:int) =
171 | ## read from bw and write to ofh. try to do this efficiently
172 | ## read a chunk of a particular size and guess what the best bigwig
173 | ## representation might be
174 | for chunk in bw.chunks(reg, chunksize):
175 | if chunk.looks_like_single_base:
176 | ofh.write_single_base(chunk, reg.chrom)
177 | elif chunk.looks_like_fixed_span:
178 | ofh.write_fixed_span(chunk, reg.chrom, chunk[0].stop - chunk[0].start)
179 | else:
180 | ofh.add(reg.chrom, chunk)
181 |
182 | proc write_from(ofh:var BigWig, bed_path: string, value_column: int, chunksize:int) =
183 | ## read from bw and write to ofh. try to do this efficiently
184 | ## read a chunk of a particular size and guess what the best bigwig
185 | ## representation might be
186 | var chrom: string
187 | for chunk in bed_path.chunks(chrom, n=chunksize, value_column=value_column):
188 | #echo chunk[0.. 0 and args[0] == "stats":
209 | args = args[1..args.high]
210 | if len(args) == 0: args = @["--help"]
211 |
212 | try:
213 | discard p.parse(args)
214 | except UsageError:
215 | echo p.help
216 | echo "error:", getCurrentExceptionMsg()
217 | echo "specify a dummy region, even for --stat header"
218 | quit 1
219 | let opts = p.parse(args)
220 | if opts.help:
221 | quit 0
222 |
223 | var bw: BigWig
224 | if not bw.open(opts.input):
225 | quit "[bigwig] unable to open input file"
226 | defer:
227 | bw.close
228 |
229 | if opts.stat == "header":
230 | echo "#chrom\tlength\tmean_depth\tcoverage"
231 | for h in bw.header:
232 | var m = bw.stats(h.name, 0, h.length, stat=Stat.mean, nBins=1)
233 | var c = bw.stats(h.name, 0, h.length, stat=Stat.coverage, nBins=1)
234 | echo &"{h.name}\t{h.length}\t{ffloat(m[0])}\t{ffloat(c[0], 8)}"
235 | quit 0
236 |
237 | if opts.region == "":
238 | echo p.help
239 | quit "error: region is required"
240 |
241 | var L = {"mean": Stat.mean, "coverage": Stat.coverage, "min": Stat.min, "max": Stat.max, "sum": Stat.sum}.toTable
242 | var stat = L[opts.stat]
243 | var bins = parseInt(opts.bins)
244 |
245 | try:
246 | for region in opts.region.parse_region:
247 | var st = bw.stats(region.chrom, region.start, region.stop, stat=stat, nBins=bins)
248 | for v in st:
249 | echo &"{region.chrom}\t{region.start}\t{region.stop}\t{ffloat(v)}"
250 | except:
251 | echo "error:", getCurrentExceptionMsg()
252 | quit 1
253 |
254 | proc view_main*() =
255 |
256 | var p = newParser("bigwig view"):
257 | option("-r", "--region", help="optional chromosome, or chrom:start-stop region to view")
258 | option("-c", "--chrom-sizes", help="file indicating chromosome sizes (can be .fai), only used for converting BED->BigWig")
259 | option("-i", "--value-column", help="column-number (1-based) of the value to encode in to BigWig, only used for encoding BED->BigWig", default="4")
260 | option("-O", "--output-fmt", choices= @["bed", "bigwig"], default="bed", help="output format")
261 | option("-o", "--output-file", default="/dev/stdout", help="output bed or bigwig file")
262 | arg("input", nargs=1)
263 |
264 | var args = commandLineParams()
265 | if len(args) > 0 and args[0] == "view":
266 | args = args[1..args.high]
267 | if len(args) == 0: args = @["--help"]
268 |
269 | let opts = p.parse(args)
270 | if opts.help:
271 | quit 0
272 | if opts.input == "":
273 | # TODO: check for stdin (can't get libbigwig to open stdin)
274 | echo p.help
275 | echo "[bigwig] input file is required"
276 | quit 2
277 |
278 | let chunksize = 512
279 | if opts.input.isBig:
280 |
281 | var bw:BigWig
282 | if not bw.open(opts.input):
283 | quit "[bigwig] couldn't open file:" & opts.input
284 |
285 | if opts.output_fmt == "bed":
286 | #####################
287 | ### BigWig To BED ###
288 | #####################
289 | var ofh: File
290 | if not ofh.open(opts.output_file, fmWrite):
291 | quit "[bigwig] couldn't open output file:" & opts.output_file
292 |
293 | if opts.region == "":
294 | for chrom in bw.header:
295 | var reg: region = (chrom.name, 0, chrom.length)
296 | ofh.write_region_from(bw, reg)
297 |
298 | else:
299 | for region in opts.region.parse_region:
300 | ofh.write_region_from(bw, region)
301 |
302 | ofh.close
303 |
304 | elif opts.output_fmt == "bigwig":
305 | ########################
306 | ### BigWig To BigWig ###
307 | ########################
308 | var ofh: BigWig
309 | if not ofh.open(opts.output_file, fmWrite):
310 | quit "[bigwig] couldn't open output bigwig file:" & opts.output_file
311 | ofh.setHeader(bw.header)
312 | ofh.writeHeader
313 |
314 | if opts.region == "":
315 | for chrom in bw.header:
316 | var reg: region = (chrom.name, 0, chrom.length)
317 | ofh.write_region_from(bw, reg, chunksize)
318 |
319 | else:
320 | for region in opts.region.parse_region:
321 | ofh.write_region_from(bw, region, chunksize)
322 |
323 | ofh.close
324 | bw.close
325 | else:
326 | if opts.chrom_sizes == "":
327 | quit "[bigwig] --chrom-sizes is required when input is not bigwig."
328 | if opts.region != "":
329 | quit "[bigwig] --region is not supported for BED input"
330 | var h = opts.chrom_sizes.from_fai
331 | var ofh: BigWig
332 | if not ofh.open(opts.output_file, fmWrite):
333 | quit "[bigwig] couldn't open output bigwig file:" & opts.output_file
334 | ofh.setHeader(h)
335 | ofh.writeHeader
336 | ofh.write_from(opts.input, parseInt(opts.value_column), chunksize)
337 | ofh.close
338 |
--------------------------------------------------------------------------------
/src/bigwigpkg/lib.nim:
--------------------------------------------------------------------------------
1 | import nimbigwig/bigWig
2 | export bbIsBigBed, bwIsBigWig
3 |
4 | import ./version
5 | export version
6 |
7 | type BigWig* = ref object
8 | bw : ptr bigWigFile_t
9 | path: string
10 | isBigBed: bool
11 |
12 | # these are internal, re-used cache before sending
13 | # to get data in format for bw from more common format
14 | starts: seq[uint32]
15 | stops: seq[uint32]
16 | values: seq[float32]
17 | cs: cstringArray
18 |
19 | proc c_free(p: pointer) {.
20 | importc: "free", header: "".}
21 |
22 | type BigWigHeader* = seq[tuple[name: string, length: int, tid: uint32]]
23 |
24 | type Stat* {.pure.} = enum
25 | #doesNotExist = -1 #!< This does nothing */
26 | mean = 0
27 | stdev = 1
28 | max = 2
29 | min = 3
30 | # The number of bases covered
31 | coverage = 4
32 | # The sum of per-base values */
33 | sum = 5
34 |
35 | template isBigBed*(b:BigWig): bool =
36 | ## indicate wether file is BigBed (true) or BigWig (false)
37 | b.isBigBed
38 |
39 | proc close*(bw: BigWig) =
40 | ## close the file and free up resources
41 | if bw.bw != nil:
42 | bwClose(bw.bw)
43 | bw.bw = nil
44 | if bw.cs != nil:
45 | deallocCStringArray(bw.cs)
46 | bw.cs = nil
47 |
48 | proc destroy_bigwig(bw: BigWig) =
49 | bw.close
50 |
51 | proc open*(bw: var BigWig, path: string, mode: FileMode=fmRead, maxZooms:int=8): bool =
52 | ## open the bigwig file. maxZooms is only used when opening in write mode.
53 | var fmode: string
54 | new(bw, destroy_bigwig)
55 | if mode == fmRead: fmode = "r" elif mode == fmWrite: fmode = "w" elif mode == fmAppend: fmode = "a"
56 | if mode == fmRead and bbIsBigBed(path, nil) == 1:
57 | bw = BigWig(bw: bbOpen(path, nil), path: path, isBigBed: true)
58 | else:
59 | bw = BigWig(bw: bwOpen(path, nil, fmode), path: path)
60 | if bw.bw == nil: return false
61 | result = true
62 | if mode == fmWrite:
63 | result = 0 == bw.bw.bwCreateHdr(maxZooms.int32)
64 | bw.cs = allocCStringArray(@[""])
65 |
66 | type CPtr[T] = ptr UncheckedArray[T]
67 |
68 | proc SQL*(bw: BigWig): string =
69 | # return any SQL associated with a bigbed file; this can be used to parse the
70 | # extra columns in bigbed
71 | var cs = bbGetSQL(bw.bw)
72 | result = $cs
73 | c_free(cs)
74 |
75 | proc get_stop(bw: var BigWig, chrom: string, stop:int): int {.inline.} =
76 | if stop >= 0 : return stop
77 | let tid = bw.bw.bwGetTid(chrom)
78 | if tid == uint32.high:
79 | raise newException(KeyError, "[bigwig] unknown chromosome:" & chrom)
80 | result = cast[CPtr[uint32]](bw.bw.cl.len)[tid].int
81 |
82 | proc header*(bw: var BigWig): BigWigHeader =
83 | result = newSeq[tuple[name: string, length: int, tid:uint32]](bw.bw.cl.nKeys)
84 | var lens = cast[CPtr[uint32]](bw.bw.cl.len)
85 | var names = cast[cstringArray](bw.bw.cl.chrom)
86 | for i in 0.. 1:
176 | doAssert 0 == bw.bw.bwAppendIntervals(bw.starts[1].addr, bw.stops[1].addr, bw.values[1].addr, intervals.high.uint32), "[bigwig] error appending intervals"
177 |
178 |
179 | proc add*[T: int|uint32|uint64|int32|int64, U: int|uint32|uint64|int32|int64](bw:BigWig, chrom: string, span: U, intervals: seq[tuple[start:T, value: float32]]) =
180 | ## add spans to the bigwig. this adds fixed-length (span) intervals starting at the given positions.
181 | if intervals.len == 0: return
182 | bw.setLens(intervals.len)
183 |
184 | for i, iv in intervals:
185 | bw.starts[i] = iv.start.uint32
186 | bw.values[i] = iv.value
187 |
188 | doAssert 0 == bw.bw.bwAddIntervalSpans(chrom.cstring, bw.starts[0].addr, span.uint32, bw.values[0].addr, 1'u32), "[bigwig] error adding interval spans"
189 | if intervals.len > 1:
190 | doAssert 0 == bw.bw.bwAppendIntervalSpans(bw.starts[1].addr, bw.values[1].addr, intervals.high.uint32), "[bigwig] error appending interval spans"
191 |
192 | proc add*(bw:BigWig, chrom:string, start: uint32, values: var seq[float32], step:uint32=1, span:uint32=1) =
193 | ## add values to the bigwig starting at start and stepping by step.
194 | ## this is the most efficient way (space and performance) to add to a bigwig file if your intervals match this format.
195 | if values.len == 0: return
196 | doAssert 0 == bw.bw.bwAddIntervalSpanSteps(chrom, start, span, step, values[0].addr, 1)
197 | if values.len > 1:
198 | doAssert 0 == bw.bw.bwAppendIntervalSpanSteps(values[1].addr, values.high.uint32)
199 |
--------------------------------------------------------------------------------
/src/bigwigpkg/utils.nim:
--------------------------------------------------------------------------------
1 | import strutils
2 |
--------------------------------------------------------------------------------
/src/bigwigpkg/version.nim:
--------------------------------------------------------------------------------
1 | const bigwigVersion* = "0.0.3"
2 | const bigwigGitCommit* = staticExec("git rev-parse --verify HEAD")
3 |
--------------------------------------------------------------------------------
/tests/all.nim:
--------------------------------------------------------------------------------
1 | import ./test_read
2 | import ./test_write
3 |
--------------------------------------------------------------------------------
/tests/ex.bb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brentp/bigwig-nim/ce1cebfb34145fc30f685492c6024b4e38ddef72/tests/ex.bb
--------------------------------------------------------------------------------
/tests/nim.cfg:
--------------------------------------------------------------------------------
1 | path = "$projectPath/../src"
2 |
--------------------------------------------------------------------------------
/tests/test.bw:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brentp/bigwig-nim/ce1cebfb34145fc30f685492c6024b4e38ddef72/tests/test.bw
--------------------------------------------------------------------------------
/tests/test_read.nim:
--------------------------------------------------------------------------------
1 | import unittest
2 | import bigwig
3 | import math
4 |
5 | proc `~~`[T: float32|float64](a: T, b: T): bool =
6 | return abs(a - b) < 1e-4
7 |
8 | suite "test reading":
9 | test "that missing file returns false":
10 | var bw: BigWig
11 | check false == open(bw, "xxxxxx.bw")
12 | bw.close
13 |
14 | test "that reading values works":
15 | var bw: BigWig
16 | check true == open(bw, "tests/test.bw")
17 |
18 | var values: seq[float32]
19 | bw.values(values, "1", 0, 50)
20 | check values[0] ~~ 0.1
21 | check values[1] ~~ 0.2
22 | check values[2] ~~ 0.3
23 | check $values[3] == "nan"
24 | bw.close
25 |
26 | test "that interval iteration works":
27 | var bw: BigWig
28 | check true == open(bw, "tests/test.bw")
29 | var expect : seq[tuple[start:int, stop:int, value:float32]] = @[
30 | (start: 0, stop: 1, value: 0.1'f32),
31 | (start: 1, stop: 2, value: 0.2'f32),
32 | (start: 2, stop: 3, value: 0.3'f32),
33 | (start: 100, stop: 150, value: 1.4'f32),
34 | (start: 150, stop: 151, value: 1.5'f32)]
35 |
36 | var i = 0
37 | for iv in bw.intervals("1"):
38 | check iv == expect[i]
39 | i += 1
40 | bw.close
41 |
42 | test "that isBedBed is false for bigwig":
43 | var bw: BigWig
44 | check true == open(bw, "tests/test.bw")
45 | check not bw.isBigBed
46 | #bw.close
47 |
48 | test "that stats work":
49 | var bw: BigWig
50 | check true == open(bw, "tests/test.bw")
51 |
52 | var mean = bw.stats("1", 0, 3)
53 | check mean.len == 1
54 | check 0.2 ~~ mean[0]
55 |
56 | var mins = bw.stats("1", 0, 4, Stat.min, 4)
57 | check mins[0] ~~ 0.1
58 | check mins[1] ~~ 0.2
59 | check mins[2] ~~ 0.3
60 | check $mins[3] == "nan"
61 | bw.close
62 |
63 | test "header":
64 | var bw: BigWig
65 | check true == open(bw, "tests/test.bw")
66 | check bw.header == @[(name: "1", length: 195471971, tid: 0'u32), (name: "10", length: 130694993, tid: 1'u32)]
67 | check bw.SQL.len == 0
68 |
69 | suite "bigbed suite":
70 |
71 | test "that bigbed reading works":
72 | var bw: BigWig
73 | check true == open(bw, "tests/ex.bb")
74 | check bw.header.len == 1
75 |
76 | for iv in bw.entries("chr21"):
77 | check iv.start > 0
78 |
79 | echo bw.SQL
80 |
81 | check bw.isBigBed
82 |
83 |
--------------------------------------------------------------------------------
/tests/test_write.nim:
--------------------------------------------------------------------------------
1 | import unittest
2 | import bigwig
3 | import math
4 |
5 | proc `~~`[T: float32|float64](a: T, b: T): bool =
6 | return abs(a - b) < 1e-4
7 |
8 | suite "test writing":
9 | test "that setting header works":
10 | var bw: BigWig
11 | check true == open(bw, "tests/test.bw")
12 |
13 | var hdr = bw.header
14 | bw.close
15 |
16 | var wtr:BigWig
17 | check true == open(wtr, "tests/writer.bw", fmWrite)
18 | wtr.setHeader(hdr)
19 | check wtr.header == hdr
20 |
21 | wtr.setHeader(hdr[0..<1])
22 | check wtr.header == hdr[0..<1]
23 |
24 | test "that adding intervals works":
25 | var wtr:BigWig
26 | check true == open(wtr, "tests/writer.bw", fmWrite)
27 | wtr.setHeader(@[(name:"chr1", length: 2000, tid: 0'u32)])
28 | wtr.writeHeader
29 |
30 | wtr.add("chr1", @[(start: 22, stop: 33, value: 0.01'f32), (start: 44, stop: 55, value: 155'f32)])
31 | wtr.close
32 |
33 | var rdr: BigWig
34 | check true == open(rdr, "tests/writer.bw")
35 | var i = 0
36 | for iv in rdr.intervals("chr1"):
37 | if i == 0: check iv == (start: 22, stop: 33, value: 0.01'f32)
38 | if i == 1: check iv == (start: 44, stop: 55, value: 155'f32)
39 | i.inc
40 | rdr.close
41 |
42 | test "that adding spans works":
43 | var wtr:BigWig
44 | check true == open(wtr, "tests/writer.bw", fmWrite)
45 | wtr.setHeader(@[(name:"chr1", length: 2000, tid: 0'u32)])
46 | wtr.writeHeader
47 |
48 | # add intervals with span of 100
49 | wtr.add("chr1", 100, @[(start: 22, value: 0.01'f32), (start: 44, value: 155'f32)])
50 | wtr.close
51 |
52 | var rdr: BigWig
53 | check true == open(rdr, "tests/writer.bw")
54 | var i = 0
55 | for iv in rdr.intervals("chr1"):
56 | if i == 0: check iv == (start: 22, stop: 122, value: 0.01'f32)
57 | if i == 1: check iv == (start: 44, stop: 144, value: 155'f32)
58 | i.inc
59 | rdr.close
60 |
61 |
62 | test "that add span step works":
63 |
64 | var values = @[1'f32, 2222.2'f32, 555.5'f32, 666.6'f32]
65 |
66 | var wtr:BigWig
67 | check true == open(wtr, "tests/writer.bw", fmWrite)
68 | wtr.setHeader(@[(name:"chr1", length: 2000, tid: 0'u32)])
69 | wtr.writeHeader
70 |
71 | # add 1-base intervals starting at 100
72 | wtr.add("chr1", 100, values)
73 | wtr.close
74 |
75 | var rdr: BigWig
76 | check true == open(rdr, "tests/writer.bw")
77 | var i = 0
78 | for iv in rdr.intervals("chr1"):
79 | if i == 0: check iv == (start: 100, stop: 101, value: 1'f32)
80 | if i == 1: check iv == (start: 101, stop: 102, value: 2222.2'f32)
81 | check iv.value == values[i]
82 | check iv.start == 100 + i
83 | check iv.stop == 100 + i + 1
84 | i.inc
85 |
86 | test "that add span step works with span and step":
87 |
88 | var values = @[1'f32, 2222.2'f32, 555.5'f32, 666.6'f32]
89 |
90 | var wtr:BigWig
91 | check true == open(wtr, "tests/writer.bw", fmWrite)
92 | wtr.setHeader(@[(name:"chr1", length: 2000, tid: 0'u32)])
93 | wtr.writeHeader
94 | let span = 200'u32
95 | let step = 33'u32
96 |
97 | # add intervals with start of 100
98 | wtr.add("chr1", 100, values, span=span, step=step)
99 | wtr.close
100 |
101 | var rdr: BigWig
102 | check true == open(rdr, "tests/writer.bw")
103 | var i = 0
104 | for iv in rdr.intervals("chr1"):
105 | check iv.start == 100 + i * step.int
106 | check iv.stop == 100 + i * step.int + span.int
107 | check iv.value == values[i]
108 | i.inc
109 |
--------------------------------------------------------------------------------