├── .github └── workflows │ └── ci.yml ├── .gitignore ├── CITATION.cff ├── LICENSE ├── README.md ├── bench.nim ├── docs ├── .nojekyll └── index.html ├── example.nim ├── lapper.nimble ├── nim.cfg └── src └── lapper.nim /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | nim-version: ['1.6.20', '2.0.4', 'stable'] 15 | 16 | steps: 17 | - uses: actions/checkout@v4 18 | 19 | - name: Setup Nim 20 | uses: jiro4989/setup-nim-action@v2 21 | with: 22 | nim-version: ${{ matrix.nim-version }} 23 | repo-token: ${{ secrets.GITHUB_TOKEN }} 24 | 25 | - name: Run example 26 | run: nim c -r example.nim -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | nimcache 2 | src/lapper 3 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - 5 | family-names: Pedersen 6 | given-names: Brent S. 7 | email: bpederse@gmail.com 8 | 9 | title: "nim-lapper: fast, simple interval overlapping" 10 | version: 0.1.7 11 | date-released: 2021-01-01 12 | license: MIT 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Brent S. Pedersen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | simple, fast interval searches for nim 2 | 3 | This uses a binary search in a sorted list of intervals along with knowledge of the longest interval. 4 | It works when the size of the largest interval is smaller than the average distance between intervals. 5 | As that ratio of largest-size::mean-distance increases, the performance decreases. 6 | On realistic (for my use-case) data, this is 1000 times faster to query results and >5000 7 | times faster to check for presence than a brute-force method. 8 | 9 | Lapper also has a special case `seek` method when we know that the queries will be in order. 10 | This method uses a cursor to indicate that start of the last search and does a linear search 11 | from that cursor to find matching intervals. This gives an additional 2-fold speedup over 12 | the `find` method. 13 | 14 | API docs and examples in `nim-doc` format are available [here](https://brentp.github.io/nim-lapper/index.html) 15 | 16 | See the `Performance` section for how large the intervals can be and still get a performance 17 | benefit. 18 | 19 | To use this, it's simply required that your type have a `start(m) int` and `stop(m) int` method to satisfy 20 | the [concept](https://nim-lang.org/docs/manual.html#generics-concepts) used by `Lapper` 21 | 22 | You can install this with `nimble install lapper`. 23 | 24 | ## Example 25 | 26 | ```nim 27 | import lapper 28 | import strutils 29 | 30 | # define an appropriate data-type. it must have a `start(m) int` and `stop(m) int` method. 31 | #type myinterval = tuple[start:int, stop:int, val:int] 32 | # if we want to modify the result, then we have to use a ref object type 33 | type myinterval = ref object 34 | start: int 35 | stop: int 36 | val: int 37 | 38 | proc start(m: myinterval): int {.inline.} = return m.start 39 | proc stop(m: myinterval): int {.inline.} = return m.stop 40 | proc `$`(m:myinterval): string = return "(start:$#, stop:$#, val:$#)" % [$m.start, $m.stop, $m.val] 41 | 42 | # create some fake data 43 | var ivs = new_seq[myinterval]() 44 | for i in countup(0, 100, 10): 45 | ivs.add(myinterval(start:i, stop:i + 15, val:0)) 46 | 47 | # make the Lapper "data-structure" 48 | var l = lapify(ivs) 49 | var empty:seq[myinterval] 50 | 51 | assert l.find(10, 20, empty) 52 | var notfound = not l.find(200, 300, empty) 53 | assert notfound 54 | 55 | var res = new_seq[myinterval]() 56 | 57 | # find is the more general case, l.seek gives a speed benefit when consecutive queries are in order. 58 | echo l.find(50, 70, res) 59 | echo res 60 | # @[(start: 40, stop: 55, val:0), (start: 50, stop: 65, val: 0), (start: 60, stop: 75, val: 0), (start: 70, stop: 85, val: 0)] 61 | for r in res: 62 | r.val += 1 63 | 64 | # or we can do a function on each overlapping interval 65 | l.each_seek(50, 60, proc(a:myinterval) = inc(a.val)) 66 | # or 67 | l.each_find(50, 60, proc(a:myinterval) = a.val += 10) 68 | 69 | discard l.seek(50, 70, res) 70 | echo res 71 | #@[(start:40, stop:55, val:12), (start:50, stop:65, val:12), (start:60, stop:75, val:1)] 72 | 73 | ``` 74 | 75 | 76 | ## Performance 77 | 78 | The output of running `bench.nim` (with -d:release) which generates *200K intervals* 79 | with positions ranging from 0 to 50 million and max lengths from 10 to 1M is: 80 | 81 | | max interval size | lapper time | lapper seek time | brute-force time | speedup | seek speedup | each-seek speedup | 82 | | ----------------- | ----------- | ---------------- | --------------- | ------- | ------------ | ----------------- | 83 | |10|0.06|0.04|387.44|6983.81|9873.11|9681.66| 84 | |100|0.05|0.04|384.92|7344.32|10412.97|15200.84| 85 | |1000|0.06|0.05|375.37|6250.23|7942.50|15703.24| 86 | |10000|0.15|0.14|377.29|2554.61|2702.13|15942.76| 87 | |100000|0.99|0.99|377.88|383.36|381.37|16241.61| 88 | |1000000|12.52|12.53|425.61|34.01|33.96|17762.58| 89 | 90 | Note that this is a worst-case scenario as we could also 91 | simulate a case where there are few long intervals instead of 92 | many large ones as in this case. Even so, we get a 34X speedup with `lapper`. 93 | 94 | Also note that testing for presence will be even faster than 95 | the above comparisons as it returns true as soon as an overlap is found. 96 | -------------------------------------------------------------------------------- /bench.nim: -------------------------------------------------------------------------------- 1 | import lapper 2 | import algorithm 3 | import math 4 | import strutils 5 | import random 6 | import times 7 | 8 | #type myinterval = tuple[start:int, stop:int] 9 | #proc start(m: myinterval): int {.inline.} = return m.start 10 | #proc stop(m: myinterval): int {.inline.} = return m.stop 11 | 12 | # define an appropriate data-type. it must have a `start(m) int` and `stop(m) int` method. 13 | #type myinterval = tuple[start:int, stop:int, val:int] 14 | # if we want to modify the result, then we have to use a ref object type 15 | type myinterval = ref object 16 | start: int 17 | stop: int 18 | val: int 19 | 20 | 21 | proc start(m: myinterval): int {.inline.} = return m.start 22 | proc stop(m: myinterval): int {.inline.} = return m.stop 23 | proc `$`(m:myinterval): string = return "(start:$#, stop:$#, val:$#)" % [$m.start, $m.stop, $m.val] 24 | 25 | proc randomi(imin:int, imax:int): int = 26 | return imin + random(imax - imin) 27 | 28 | proc brute_force(ivs: seq[Interval], start:int, stop:int, res: var seq[Interval]) = 29 | if res.len != 0: res.set_len(0) 30 | for i in ivs: 31 | if i.start <= stop and i.stop >= start: res.add(i) 32 | 33 | proc make_random(n:int, range_max:int, size_min:int, size_max:int): seq[myinterval] = 34 | result = new_seq[myinterval](n) 35 | for i in 0.. 2 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | Module lapper 20 | 1165 | 1166 | 1167 | 1168 | 1193 | 1194 | 1195 | 1196 |
1197 |
1198 |

Module lapper

1199 |
1200 |
1201 |
1202 | Search: 1204 |
1205 |
1206 | Group by: 1207 | 1211 |
1212 | 1256 | 1257 |
1258 |
1259 |
1260 |

This module provides a simple data-structure for fast interval searches. It does not use an interval tree, instead, it operates on the assumption that most intervals are of similar length; or, more exactly, that the longest interval in the set is not long compared to the average distance between intervals. On any dataset where that is not the case, this method will not perform well. For cases where this holds true (as it often does with genomic data), we can sort by start and use binary search on the starts, accounting for the length of the longest interval. The advantage of this approach is simplicity of implementation and speed. In realistic tests queries returning the overlapping intervals are 1000 times faster than brute force and queries that merely check for the overlaps are > 5000 times faster.

1261 |

The main methods are find and seek where the latter uses a cursor and is very fast for cases when the queries are sorted. This is another innovation in this library that allows an addition ~50% speed improvement when consecutive queries are known to be in sort order.

1262 |

For both find and seek, if the given intervals parameter is nil, the function will return a boolean indicating if any intervals in the set overlap the query. This is much faster than modifying the intervals.

1263 |

The example below shows off most of the API of Lapper.

1264 |
import lapper
1265 | type myinterval = ref object
1266 |    start: int
1267 |    stop: int
1268 |    val: int
1269 |  
1270 |  proc start(m: myinterval): int {.inline.} = return m.start
1271 |  proc stop(m: myinterval): int {.inline.} = return m.stop
1272 |  proc `$`(m:myinterval): string = return "(start:$#, stop:$#, val:$#)" % [$m.start, $m.stop, $m.val]

create some fake data

1273 |
var ivs = new_seq[myinterval]()
1274 | for i in countup(0, 100, 10):
1275 |   ivs.add(myinterval(start:i, stop:i + 15, val:0))
make the Lapper "data-structure"
l = lapify(ivs)
1276 | empty:seq[myinterval]
l.find(10, 20, empty)
1277 | notfound = not l.find(200, 300, empty)
1278 | assert notfound
res = new_seq[myinterval]()
find is the more general case, l.seek gives a speed benefit when consecutive queries are in order.
echo l.find(50, 70, res)
1279 | echo res
1280 | # @[(start: 40, stop: 55, val:0), (start: 50, stop: 65, val: 0), (start: 60, stop: 75, val: 0), (start: 70, stop: 85, val: 0)]
1281 | for r in res:
1282 |    r.val += 1
or we can do a function on each overlapping interval
l.each_seek(50, 60, proc(a:myinterval) = inc(a.val))
or
l.each_find(50, 60, proc(a:myinterval) = a.val += 10)
discard l.seek(50, 70, res)
1283 | echo res
1284 | # @[(start:40, stop:55, val:12), (start:50, stop:65, val:12), (start:60, stop:75, val:1)]

1285 |
1286 |

Imports

1287 |
1288 | algorithm, random, times, strutils 1289 |
1290 |
1291 |

Types

1292 |
1293 |
Interval = concept i
1294 |     start(i) is int
1295 |     stop(i) is int
1296 |
1297 | An object/tuple must implement these 2 methods to use this module 1298 | 1299 |
1300 |
Lapper[T] = object
1301 |   intervals: seq[T]
1302 |   max_len: int
1303 |   cursor: int                  ## `cursor` is used internally by ordered find
1304 |   
1305 |
1306 | Lapper enables fast interval searches 1307 | 1308 |
1309 | 1310 |
1311 |
1312 |

Procs

1313 |
1314 |
proc overlap[T: Interval](a: T; start: int; stop: int): bool {.
inline
.}
1315 |
1316 | overlap returns true if half-open intervals overlap 1317 | 1318 |
1319 |
proc lapify[T: Interval](ivs: var seq[T]): Lapper[T]
1320 |
1321 | create a new Lapper object; ivs will be sorted. 1322 | 1323 |
1324 |
proc len[T: Interval](L: Lapper[T]): int
1325 |
1326 | len returns the number of intervals in the Lapper 1327 | 1328 |
1329 |
proc find[T: Interval](L: Lapper[T]; start: int; stop: int; ivs: var seq[T]): bool
1330 |
1331 | fill ivs with all intervals in L that overlap start .. stop. if ivs is nil, then this will just return true if it finds an interval and false otherwise 1332 | 1333 |
1334 |
proc each_find[T: Interval](L: Lapper[T]; start: int; stop: int; fn: proc (v: T))
1335 |
1336 | call fn(x) for each interval x in L that overlaps start..stop 1337 | 1338 |
1339 |
proc seek[T: Interval](L: var Lapper[T]; start: int; stop: int; ivs: var seq[T]): bool
1340 |
1341 | fill ivs with all intervals in L that overlap start .. stop inclusive. this method will work when queries to this lapper are in sorted (start) order it uses a linear search from the last query instead of a binary search. if ivs is nil, then this will just return true if it finds an interval and false otherwise 1342 | 1343 |
1344 |
proc each_seek[T: Interval](L: var Lapper[T]; start: int; stop: int; fn: proc (v: T)) {.
1345 | inline
.}
1346 |
1347 | call fn(x) for each interval x in L that overlaps start..stop this assumes that subsequent calls to this function will be in sorted order 1348 | 1349 |
1350 | 1351 |
1352 | 1353 |
1354 |
1355 | 1356 |
1357 | 1362 |
1363 |
1364 |
1365 | 1366 | 1367 | 1368 | -------------------------------------------------------------------------------- /example.nim: -------------------------------------------------------------------------------- 1 | import lapper 2 | import strutils 3 | 4 | # define an appropriate data-type. it must have a `start(m) int` and `stop(m) int` method. 5 | #type myinterval = tuple[start:int, stop:int, val:int] 6 | # if we want to modify the result, then we have to use a ref object type 7 | type myinterval = ref object 8 | start: int 9 | stop: int 10 | val: int 11 | 12 | proc start(m: myinterval): int {.inline.} = return m.start 13 | proc stop(m: myinterval): int {.inline.} = return m.stop 14 | proc `$`(m:myinterval): string = return "(start:$#, stop:$#, val:$#)" % [$m.start, $m.stop, $m.val] 15 | 16 | # create some fake data 17 | var ivs = new_seq[myinterval]() 18 | for i in countup(0, 100, 10): 19 | ivs.add(myinterval(start:i, stop:i + 15, val:0)) 20 | 21 | # make the Lapper "data-structure" 22 | var l = lapify(ivs) 23 | var empty:seq[myinterval] 24 | 25 | assert l.find(10, 20, empty) 26 | var notfound = not l.find(200, 300, empty) 27 | assert notfound 28 | 29 | var res = new_seq[myinterval]() 30 | 31 | # find is the more general case, l.seek gives a speed benefit when consecutive queries are in order. 32 | echo l.find(50, 70, res) 33 | echo res 34 | # @[(start: 40, stop: 55, val:0), (start: 50, stop: 65, val: 0), (start: 60, stop: 75, val: 0), (start: 70, stop: 85, val: 0)] 35 | for r in res: 36 | r.val += 1 37 | 38 | # or we can do a function on each overlapping interval 39 | l.each_seek(50, 60, proc(a:myinterval) = inc(a.val)) 40 | # or 41 | l.each_find(50, 60, proc(a:myinterval) = a.val += 10) 42 | 43 | discard l.seek(50, 70, res) 44 | echo res 45 | #@[(start:40, stop:55, val:12), (start:50, stop:65, val:12), (start:60, stop:75, val:1)] 46 | -------------------------------------------------------------------------------- /lapper.nimble: -------------------------------------------------------------------------------- 1 | # Package 2 | 3 | version = "0.1.8" 4 | author = "Brent Pedersen" 5 | description = "fast, simple interval overlaps with binary search" 6 | license = "MIT" 7 | 8 | # Dependencies 9 | requires "nim >= 0.19.2" #, "nim-lang/c2nim>=0.9.13" 10 | srcDir = "src" 11 | 12 | skipFiles = @["bench.nim", "example.nim"] 13 | 14 | skipDirs = @["tests"] 15 | 16 | task test, "run the tests": 17 | exec "nim c -d:release --lineDir:on -r src/lapper" 18 | 19 | task docs, "make docs": 20 | exec "nim doc2 src/lapper; mkdir -p docs; mv lapper.html docs/index.html" 21 | -------------------------------------------------------------------------------- /nim.cfg: -------------------------------------------------------------------------------- 1 | path = "$projectPath/src" 2 | -------------------------------------------------------------------------------- /src/lapper.nim: -------------------------------------------------------------------------------- 1 | ## This module provides a simple data-structure for fast interval searches. It does not use an interval tree, 2 | ## instead, it operates on the assumption that most intervals are of similar length; or, more exactly, that the 3 | ## longest interval in the set is not long compared to the average distance between intervals. On any dataset 4 | ## where that is not the case, this method will not perform well. For cases where this holds true (as it often 5 | ## does with genomic data), we can sort by start and use binary search on the starts, accounting for the length 6 | ## of the longest interval. The advantage of this approach is simplicity of implementation and speed. In realistic 7 | ## tests queries returning the overlapping intervals are 1000 times faster than brute force and queries that merely 8 | ## check for the overlaps are > 5000 times faster. 9 | ## 10 | ## The main methods are `find` and `seek` where the latter uses a cursor and is very fast for cases when the queries 11 | ## are sorted. This is another innovation in this library that allows an addition ~50% speed improvement when 12 | ## consecutive queries are known to be in sort order. 13 | ## 14 | ## For both find and seek, if the given intervals parameter is nil, the function will return a boolean indicating if 15 | ## any intervals in the set overlap the query. This is much faster than modifying the 16 | ## intervals. 17 | ## 18 | ## The example below shows off most of the API of `Lapper`. 19 | ## 20 | ## .. code-block:: nim 21 | ## import lapper 22 | ## type myinterval = ref object 23 | ## start: int 24 | ## stop: int 25 | ## val: int 26 | ## 27 | ## proc start(m: myinterval): int {.inline.} = return m.start 28 | ## proc stop(m: myinterval): int {.inline.} = return m.stop 29 | ## proc `$`(m:myinterval): string = return "(start:$#, stop:$#, val:$#)" % [$m.start, $m.stop, $m.val] 30 | ## 31 | ## create some fake data 32 | ## .. code-block:: nim 33 | ## var ivs = new_seq[myinterval]() 34 | ## for i in countup(0, 100, 10): 35 | ## ivs.add(myinterval(start:i, stop:i + 15, val:0)) 36 | 37 | ## make the Lapper "data-structure" 38 | 39 | ## .. code-block:: nim 40 | ## l = lapify(ivs) 41 | ## empty:seq[myinterval] 42 | 43 | ## .. code-block:: nim 44 | ## l.find(10, 20, empty) 45 | ## notfound = not l.find(200, 300, empty) 46 | ## assert notfound 47 | 48 | ## .. code-block:: nim 49 | ## res = new_seq[myinterval]() 50 | 51 | ## find is the more general case, l.seek gives a speed benefit when consecutive queries are in order. 52 | 53 | ## .. code-block:: nim 54 | ## echo l.find(50, 70, res) 55 | ## echo res 56 | ## # @[(start: 40, stop: 55, val:0), (start: 50, stop: 65, val: 0), (start: 60, stop: 75, val: 0), (start: 70, stop: 85, val: 0)] 57 | ## for r in res: 58 | ## r.val += 1 59 | 60 | ## or we can do a function on each overlapping interval 61 | 62 | ## .. code-block:: nim 63 | ## l.each_seek(50, 60, proc(a:myinterval) = inc(a.val)) 64 | 65 | ## or 66 | 67 | ## .. code-block:: nim 68 | ## l.each_find(50, 60, proc(a:myinterval) = a.val += 10) 69 | 70 | ## .. code-block:: nim 71 | ## discard l.seek(50, 70, res) 72 | ## echo res 73 | ## # @[(start:40, stop:55, val:12), (start:50, stop:65, val:12), (start:60, stop:75, val:1)] 74 | import algorithm 75 | 76 | type 77 | 78 | Interval* = concept i 79 | ## An object/tuple must implement these 2 methods to use this module 80 | start(i) is int 81 | stop(i) is int 82 | 83 | Lapper*[T] = object 84 | ## Lapper enables fast interval searches 85 | intervals: seq[T] 86 | max_len*: int 87 | cursor: int ## `cursor` is used internally by ordered find 88 | 89 | template overlap*[T:Interval](a: T, start:int, stop:int): bool = 90 | ## overlap returns true if half-open intervals overlap 91 | #return a.start < stop and a.stop > start 92 | a.stop > start and a.start < stop 93 | 94 | 95 | proc iv_cmp[T:Interval](a, b: T): int = 96 | if a.start < b.start: return -1 97 | if b.start < a.start: return 1 98 | return cmp(a.stop, b.stop) 99 | 100 | 101 | proc lapify*[T:Interval](ivs:var seq[T]): Lapper[T] = 102 | ## create a new Lapper object; ivs will be sorted. 103 | sort(ivs, iv_cmp) 104 | result = Lapper[T](max_len: 0, intervals:ivs) 105 | for iv in ivs: 106 | if iv.stop - iv.start > result.max_len: 107 | result.max_len = iv.stop - iv.start 108 | 109 | proc lowerBound[T:Interval](a: var seq[T], start: int): int = 110 | result = a.low 111 | var count = a.high - a.low + 1 112 | var step, pos: int 113 | while count != 0: 114 | step = count div 2 115 | pos = result + step 116 | if a[pos].start < start: 117 | result = pos + 1 118 | count -= step + 1 119 | else: 120 | count = step 121 | 122 | proc len*[T:Interval](L:Lapper[T]): int {.inline.} = 123 | ## len returns the number of intervals in the Lapper 124 | L.intervals.len 125 | 126 | proc empty*[T:Interval](L:Lapper[T]): bool {.inline.} = 127 | return L.intervals.len == 0 128 | 129 | iterator find*[T:Interval](L:var Lapper[T], start:int, stop:int): T = 130 | ## fill ivs with all intervals in L that overlap start .. stop. 131 | #if ivs.len != 0: ivs.set_len(0) 132 | let off = lowerBound(L.intervals, start - L.max_len) 133 | for i in off..L.intervals.high: 134 | let x = L.intervals[i] 135 | if likely(x.overlap(start, stop)): 136 | yield x 137 | elif x.start >= stop: break 138 | 139 | proc find*[T:Interval](L:var Lapper[T], start:int, stop:int, ivs:var seq[T]): bool = 140 | ## fill ivs with all intervals in L that overlap start .. stop. 141 | #if ivs.len != 0: ivs.set_len(0) 142 | let off = lowerBound(L.intervals, start - L.max_len) 143 | var n = 0 144 | for i in off..L.intervals.high: 145 | let x = L.intervals[i] 146 | if x.overlap(start, stop): 147 | if n < ivs.len: 148 | ivs[n] = x 149 | else: 150 | ivs.add(x) 151 | n += 1 152 | elif x.start >= stop: break 153 | if ivs.len > n: 154 | ivs.setLen(n) 155 | return len(ivs) > 0 156 | 157 | proc count*[T:Interval](L:var Lapper[T], start:int, stop:int): int = 158 | ## fill ivs with all intervals in L that overlap start .. stop. 159 | let off = lowerBound(L.intervals, start - L.max_len) 160 | for i in off..L.intervals.high: 161 | let x = L.intervals[i] 162 | if x.overlap(start, stop): 163 | result.inc 164 | elif x.start >= stop: break 165 | 166 | proc each_find*[T:Interval](L:var Lapper[T], start:int, stop:int, fn: proc (v:T)) = 167 | ## call fn(x) for each interval x in L that overlaps start..stop 168 | let off = lowerBound(L.intervals, start - L.max_len) 169 | for i in off..L.intervals.high: 170 | let x = L.intervals[i] 171 | if x.overlap(start, stop): 172 | fn(x) 173 | elif x.start >= stop: break 174 | 175 | iterator seek*[T:Interval](L:var Lapper[T], start:int, stop:int): T = 176 | if L.cursor == 0 or L.intervals[L.cursor].start > start: 177 | L.cursor = lowerBound(L.intervals, start - L.max_len) 178 | while (L.cursor + 1) < L.intervals.high and L.intervals[L.cursor + 1].start < (start - L.max_len): 179 | L.cursor += 1 180 | let old_cursor = L.cursor 181 | for i in L.cursor..L.intervals.high: 182 | let x = L.intervals[i] 183 | if x.overlap(start, stop): 184 | yield x 185 | elif x.start >= stop: break 186 | L.cursor = old_cursor 187 | 188 | proc seek*[T:Interval](L:var Lapper[T], start:int, stop:int, ivs:var seq[T]): bool = 189 | ## fill ivs with all intervals in L that overlap start .. stop inclusive. 190 | ## this method will work when queries to this lapper are in sorted (start) order 191 | ## it uses a linear search from the last query instead of a binary search. 192 | ## if ivs is nil, then this will just return true if it finds an interval and false otherwise 193 | if ivs.len != 0: ivs.set_len(0) 194 | if L.cursor == 0 or L.intervals[L.cursor].start > start: 195 | L.cursor = lowerBound(L.intervals, start - L.max_len) 196 | let old_cursor = L.cursor 197 | while (L.cursor + 1) < L.intervals.high and L.intervals[L.cursor + 1].start < (start - L.max_len): 198 | L.cursor += 1 199 | for i in L.cursor..L.intervals.high: 200 | let x = L.intervals[i] 201 | if x.overlap(start, stop): 202 | ivs.add(x) 203 | elif x.start >= stop: break 204 | L.cursor = old_cursor 205 | return ivs.len != 0 206 | 207 | proc each_seek*[T:Interval](L:var Lapper[T], start:int, stop:int, fn:proc (v:T)) {.inline.} = 208 | ## call fn(x) for each interval x in L that overlaps start..stop 209 | ## this assumes that subsequent calls to this function will be in sorted order 210 | if L.cursor == 0 or L.cursor >= L.intervals.high or L.intervals[L.cursor].start > start: 211 | L.cursor = lowerBound(L.intervals, start - L.max_len) 212 | while (L.cursor + 1) < L.intervals.high and L.intervals[L.cursor + 1].start < (start - L.max_len): 213 | L.cursor += 1 214 | let old_cursor = L.cursor 215 | for i in L.cursor..L.intervals.high: 216 | let x = L.intervals[i] 217 | if x.start >= stop: break 218 | elif x.stop > start: 219 | fn(x) 220 | L.cursor = old_cursor 221 | 222 | iterator items*[T:Interval](L: Lapper[T]): T = 223 | for i in L.intervals: yield i 224 | 225 | when isMainModule: 226 | 227 | import random 228 | import times 229 | import strutils 230 | 231 | proc randomi(imin:int, imax:int): int = 232 | return imin + rand(imax - imin) 233 | 234 | proc brute_force(ivs: seq[Interval], start:int, stop:int, res: var seq[Interval]) = 235 | if res.len != 0: res.set_len(0) 236 | for i in ivs: 237 | if i.overlap(start, stop): res.add(i) 238 | 239 | # example implementation 240 | type myinterval = tuple[start:int, stop:int, val:int] 241 | proc start(m: myinterval): int {.inline.} = return m.start 242 | proc stop(m: myinterval): int {.inline.} = return m.stop 243 | 244 | proc make_random(n:int, range_max:int, size_min:int, size_max:int): seq[myinterval] = 245 | result = new_seq[myinterval](n) 246 | for i in 0..