├── .gitignore ├── .hgtags ├── Cargo.toml ├── Changelog.txt ├── Readme.md ├── flake.lock ├── flake.nix └── src └── lib.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target* 2 | **/*.rs.bk 3 | Cargo.lock 4 | -------------------------------------------------------------------------------- /.hgtags: -------------------------------------------------------------------------------- 1 | 8700ee93b433f00a161a3f6262c2d773f82a5e24 master 2 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "nested_intervals" 3 | version = "0.6.0" 4 | authors = ["Florian Finkernagel "] 5 | edition = "2018" 6 | description = "nested & overlapping interval set functions, overlap, union, etc" 7 | repository = "https://github.com/TyberiusPrime/nested_intervals" 8 | readme="Readme.md" 9 | keywords=["intervals"] 10 | categories = ["data-structures"] 11 | license = "MIT" 12 | 13 | [dependencies] 14 | superslice="1.0.0" 15 | itertools = "0.12.0" 16 | num = "0.4.1" 17 | -------------------------------------------------------------------------------- /Changelog.txt: -------------------------------------------------------------------------------- 1 | nested_intervals Changelog 2 | ===================================== 3 | 4 | 5 | 0.2.2 - fixed a bug in merge_split where outer intervals being split in "three" would not be processed correctly. 6 | 0.2.1 - removed feature(nll) 7 | 0.2.0 - switched from 'panic' input error handling to Result 8 | 0.1.9 - introduced overlap_status 9 | 0.1.8 - fixed a panic in merge_split with repeated ids 10 | 0.1.7 - refactoring only 11 | 0.1.6 - find_closest now correctly looks both directions 12 | 0.1.5 - has_overlap return false on empty (x..y is x>=y) intervals 13 | 0.1.4 - fixed a bug in merge_hull which would sometimes ommit the last interval 14 | 0.1.3 - added merge_connected 15 | 0.1.2 - ids in merge_hull now sorted like in merge_split 16 | 0.1.1 - fixed new_with_ids to sort ids correctly 17 | 0.1.0 - initial release 18 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | nested_intervals 2 | ----------------- 3 | [![Crates.io](https://img.shields.io/crates/d/nested_intervals.svg)](https://crates.io/crates/nested_intervals) 4 | [![docs.rs](https://docs.rs/nested_intervals/badge.svg)](https://docs.rs/nested_intervals/badge.svg) 5 | 6 | 7 | This crate deals with interval sets which are lists of 8 | [Ranges](https://doc.rust-lang.org/std/ops/struct.Range.html) that may be both 9 | overlapping and nested. 10 | 11 | The implementation is based on nested containment lists as proposed 12 | by [Alekseyenko et al. 2007](https://www.ncbi.nlm.nih.gov/pubmed/17234640), 13 | which offers the same big-O complexity s interval trees (O(n * log(n)) construction, 14 | O(n + m) queries). The construction of the query data structure is lazy and only happens 15 | the first time a method relying on it is called. 16 | 17 | Each interval has a vec of u32 ids attached, which allows linking back the results to 18 | other data structures. 19 | 20 | Full [documentation at docs.rs](https://docs.rs/nested_intervals) 21 | Source at [GitHub](https://github.com/TyberiusPrime/nested_intervals) 22 | 23 | 24 | Example 25 | -------- 26 | 27 | Code example: 28 | ``` 29 | fn test_example() { 30 | let intervals = vec![0..20, 15..30, 50..100]; 31 | let mut interval_set = IntervalSet::new(&intervals); 32 | assert_eq!(interval_set.ids, vec![vec![0], vec![1], vec![2]]); // automatic ids, use new_with_ids otherwise 33 | let hits = interval_set.query_overlapping(10..16); 34 | assert_eq!(hits.intervals, [0..20, 15..30]); 35 | let merged = hits.merge_hull(); 36 | assert_eq!(merged.intervals, [0..30]); 37 | assert_eq!(merged.ids, vec![vec![0,1]]); 38 | } 39 | ``` 40 | 41 | 42 | Functionality 43 | ------------- 44 | 45 | - [x] check for overlap with a query range -> 46 | [has_overlap](http://docs.rs/nested_intervals/struct.IntervalSet.html#method.has_overlap) 47 | - [x] query for overlapping with a query range -> 48 | [query_overlapping](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.query_overlapping) 49 | - [x] query for overlapping with a query set -> 50 | [filter_to_overlapping_multiple](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.filter_to_overlapping_multiple) 51 | - [x] query for non-overlapping with a query set -> 52 | [filter_to_non_overlapping](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.filter_to_non_overlapping) 53 | - [x] check for internally overlapping -> 54 | [any_overlapping](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.any_overlapping) 55 | - [x] check for internally nested -> 56 | [any_nested](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.any_nested) 57 | - [x] remove duplicate intervals (start&stop!)-> 58 | [remove_duplicates](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.remove_duplicates) 59 | - [x] remove duplicate intervals and complain about non-duplicate overlapping -> 60 | [remove_duplictes 61 | ](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.remove_duplictes 62 | & 63 | [any_overlapping](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.any_overlapping) 64 | - [x] remove empty intervals -> 65 | [remove_empty](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.remove_empty) 66 | - [x] merge internally overlapping by joining them -> 67 | [merge_hull](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.merge_hull) 68 | - [x] merge internally overlapping by dropping them -> 69 | [merge_drop](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.merge_drop) 70 | - [x] split internally overlapping into non-overlapping subsets (ie. [10..100, 20..30] becomes 71 | [10..20, 20..30, 30..100] -> 72 | [merge_split](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.merge_split) 73 | - [x] invert an interval set (given outer borders) -> 74 | [invert](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.invert) 75 | - [x] find the interval with the closest start -> 76 | [find_closest_start](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.find_closest_start) 77 | - [x] find the interval with the closest start to the left of a point -> 78 | [find_closest_start_left](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.find_closest_start_left) 79 | - [x] find the interval with the closest start to the right of a point -> 80 | [find_closest_start_right](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.find_closest_start_right) 81 | 82 | - [x] calculate the units covered by the intervals -> 83 | [covered_units](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.covered_units) 84 | - [x] find the mean size of the intervals -> 85 | [mean_interval_size](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.mean_interval_size) 86 | - [x] build the union of n interval sets -> union 87 | - [x] substract two interval sets -> 88 | [filter_to_non_overlapping](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.filter_to_non_overlapping) 89 | - [x] keep only those overlapping with n other sets -> 90 | [filter_to_overlapping_k_others](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.filter_to_overlapping_k_others) 91 | - [x] remove those overlapping with more than n other sets -> 92 | [filter_to_non_overlapping_k_others](https://docs.rs/nested_intervals/struct.IntervalSet.html#method.filter_to_non_overlapping_k_others) 93 | 94 | Not (yet) supported 95 | -------------------- 96 | We currently can not 97 | - [ ] find the interval with the closest end 98 | - [ ] find the interval with the closest end to the left of a point //going be expensive O(n/2) 99 | - [ ] find the interval with the closest end to the right of a point //going be 100 | expensiv O(n/2) 101 | 102 | - [ ] intersect two interval sects (ie. covered units in both sets) 103 | - [ ] intersect more than two interval sects (ie. covered units in multiple sets, possibly 104 | applying a 'k' threshold) 105 | 106 | - [ ] merge internally overlapping by intersecting them? What does than even mean 107 | for nested sets? 108 | 109 | 110 | 111 | # Changelog 112 | 113 | - 0.6.0 - Fixed a bug with has_overlap, which would produce false negatives if intervals with identical starts were in the dataset. 114 | -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "flake-utils": { 4 | "inputs": { 5 | "systems": "systems" 6 | }, 7 | "locked": { 8 | "lastModified": 1681202837, 9 | "narHash": "sha256-H+Rh19JDwRtpVPAWp64F+rlEtxUWBAQW28eAi3SRSzg=", 10 | "owner": "numtide", 11 | "repo": "flake-utils", 12 | "rev": "cfacdce06f30d2b68473a46042957675eebb3401", 13 | "type": "github" 14 | }, 15 | "original": { 16 | "owner": "numtide", 17 | "repo": "flake-utils", 18 | "type": "github" 19 | } 20 | }, 21 | "nixpkgs": { 22 | "locked": { 23 | "lastModified": 1696860751, 24 | "narHash": "sha256-/msth8BXFnhBfFAbf0V7U7qnLQ7k+eihdL9CIMpdmv8=", 25 | "owner": "NixOS", 26 | "repo": "nixpkgs", 27 | "rev": "235aaea29ad2d58623679fd1409bcf01c19502be", 28 | "type": "github" 29 | }, 30 | "original": { 31 | "owner": "NixOS", 32 | "repo": "nixpkgs", 33 | "rev": "235aaea29ad2d58623679fd1409bcf01c19502be", 34 | "type": "github" 35 | } 36 | }, 37 | "root": { 38 | "inputs": { 39 | "nixpkgs": "nixpkgs", 40 | "rust-overlay": "rust-overlay", 41 | "utils": "utils" 42 | } 43 | }, 44 | "rust-overlay": { 45 | "inputs": { 46 | "flake-utils": "flake-utils", 47 | "nixpkgs": [ 48 | "nixpkgs" 49 | ] 50 | }, 51 | "locked": { 52 | "lastModified": 1696817516, 53 | "narHash": "sha256-Xt9OY4Wnk9/vuUfA0OHFtmSlaen5GyiS9msgwOz3okI=", 54 | "owner": "oxalica", 55 | "repo": "rust-overlay", 56 | "rev": "c0df7f2a856b5ff27a3ce314f6d7aacf5fda546f", 57 | "type": "github" 58 | }, 59 | "original": { 60 | "owner": "oxalica", 61 | "repo": "rust-overlay", 62 | "type": "github" 63 | } 64 | }, 65 | "systems": { 66 | "locked": { 67 | "lastModified": 1681028828, 68 | "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", 69 | "owner": "nix-systems", 70 | "repo": "default", 71 | "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", 72 | "type": "github" 73 | }, 74 | "original": { 75 | "owner": "nix-systems", 76 | "repo": "default", 77 | "type": "github" 78 | } 79 | }, 80 | "systems_2": { 81 | "locked": { 82 | "lastModified": 1681028828, 83 | "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", 84 | "owner": "nix-systems", 85 | "repo": "default", 86 | "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", 87 | "type": "github" 88 | }, 89 | "original": { 90 | "owner": "nix-systems", 91 | "repo": "default", 92 | "type": "github" 93 | } 94 | }, 95 | "utils": { 96 | "inputs": { 97 | "systems": "systems_2" 98 | }, 99 | "locked": { 100 | "lastModified": 1694529238, 101 | "narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=", 102 | "owner": "numtide", 103 | "repo": "flake-utils", 104 | "rev": "ff7b65b44d01cf9ba6a71320833626af21126384", 105 | "type": "github" 106 | }, 107 | "original": { 108 | "owner": "numtide", 109 | "repo": "flake-utils", 110 | "type": "github" 111 | } 112 | } 113 | }, 114 | "root": "root", 115 | "version": 7 116 | } 117 | -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | inputs = { 3 | nixpkgs.url = "github:NixOS/nixpkgs?rev=235aaea29ad2d58623679fd1409bcf01c19502be"; # that's 23.05 4 | utils.url = "github:numtide/flake-utils"; 5 | utils.inputs.nixpkgs.follows = "nixpkgs"; 6 | rust-overlay.url = "github:oxalica/rust-overlay"; 7 | rust-overlay.inputs.nixpkgs.follows = "nixpkgs"; 8 | #mozillapkgs = { 9 | #url = "github:mozilla/nixpkgs-mozilla"; 10 | #flake = false; 11 | #}; 12 | }; 13 | 14 | outputs = { 15 | self, 16 | nixpkgs, 17 | utils, 18 | rust-overlay, 19 | }: 20 | utils.lib.eachDefaultSystem (system: let 21 | #pkgs = nixpkgs.legacyPackages."${system}"; 22 | overlays = [(import rust-overlay)]; 23 | pkgs = import nixpkgs {inherit system overlays;}; 24 | rust = pkgs.rust-bin.stable."1.72.0".default.override { 25 | targets = ["x86_64-unknown-linux-musl"]; 26 | }; 27 | 28 | bacon = pkgs.bacon; 29 | in rec { 30 | # `nix build` 31 | # no binary... 32 | 33 | # `nix develop` 34 | devShell = pkgs.mkShell { 35 | # supply the specific rust version 36 | nativeBuildInputs = [ 37 | rust 38 | pkgs.rust-analyzer 39 | pkgs.git 40 | pkgs.cargo-udeps 41 | pkgs.cargo-crev 42 | pkgs.cargo-vet 43 | pkgs.cargo-outdated 44 | pkgs.cargo-audit 45 | bacon 46 | ]; 47 | }; 48 | }); 49 | } 50 | # { 51 | 52 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | use itertools::Itertools; 2 | use std::cell::RefCell; 3 | use std::cmp::{max, min, Ordering}; 4 | use std::collections::HashMap; 5 | use std::ops::Range; 6 | use superslice::*; 7 | 8 | trait FilterByBools { 9 | fn filter_by_bools(&self, keep: &[bool]) -> Vec; 10 | } 11 | 12 | impl FilterByBools for Vec 13 | where 14 | T: Clone, 15 | { 16 | fn filter_by_bools(&self, keep: &[bool]) -> Vec { 17 | if self.len() != keep.len() { 18 | panic!("v and keep had unequal length"); 19 | } 20 | self.iter() 21 | .enumerate() 22 | .filter(|(idx, _value)| keep[*idx]) 23 | .map(|(_idx, value)| value.clone()) 24 | .collect() 25 | } 26 | } 27 | 28 | /// the integer type for the interval's ranges. 29 | /// e.g. u32, u64 30 | pub trait Rangable: num::PrimInt {} 31 | 32 | impl Rangable for u16 {} 33 | impl Rangable for u32 {} 34 | impl Rangable for u64 {} 35 | impl Rangable for u128 {} 36 | 37 | impl Rangable for i16 {} 38 | impl Rangable for i32 {} 39 | impl Rangable for i64 {} 40 | impl Rangable for i128 {} 41 | 42 | /// IntervalSetGeneric 43 | /// 44 | /// A collection of Range and associated ids (u32). 45 | /// 46 | /// If no ids are supplied, default ones will be provided. 47 | /// Merging functions return sorted ids 48 | /// Little assumption is placed on the intervals, they may 49 | /// overlap and nest. They must be start <= end though. 50 | /// 51 | /// Our ranges are like Rust ranges, left closed, right open. 52 | /// 53 | /// Internal storage is sorted by (start, -end), which is enforced 54 | /// at construction time. 55 | #[derive(Debug)] 56 | pub struct IntervalSetGeneric { 57 | intervals: Vec>, 58 | ids: Vec>, 59 | root: RefCell>, 60 | } 61 | 62 | /// An IntervalSetGeneric 63 | pub type IntervalSet = IntervalSetGeneric; 64 | 65 | /// IntervalSetEntry 66 | /// 67 | /// Used internally to build the nested containment list 68 | /// Note that we do not reference the Intervals directly 69 | /// but hide them behind an index into IntervalSet.intervals (and .ids) 70 | /// thus avoiding all lifetime discussions but incuring a bit of 71 | /// runtime overhead. On the otherhand, we'd need two pointers 72 | /// to the .intervals and the .ids, or replace those by tuples 73 | /// 74 | #[derive(Debug)] 75 | struct IntervalSetEntry { 76 | no: i32, 77 | children: Vec, 78 | } 79 | 80 | impl Clone for IntervalSetGeneric { 81 | fn clone(&self) -> IntervalSetGeneric { 82 | IntervalSetGeneric { 83 | intervals: self.intervals.clone(), 84 | ids: self.ids.clone(), 85 | root: self.root.clone(), 86 | } 87 | } 88 | } 89 | 90 | impl Clone for IntervalSetEntry { 91 | fn clone(&self) -> IntervalSetEntry { 92 | IntervalSetEntry { 93 | no: self.no, 94 | children: self.children.clone(), 95 | } 96 | } 97 | } 98 | 99 | trait IntervalCollector { 100 | fn collect(&mut self, iset: &IntervalSetGeneric, no: u32); 101 | } 102 | 103 | struct VecIntervalCollector { 104 | intervals: Vec>, 105 | ids: Vec>, 106 | } 107 | 108 | impl VecIntervalCollector { 109 | fn new() -> VecIntervalCollector { 110 | VecIntervalCollector { 111 | intervals: Vec::new(), 112 | ids: Vec::new(), 113 | } 114 | } 115 | } 116 | 117 | impl IntervalCollector for VecIntervalCollector { 118 | fn collect(&mut self, iset: &IntervalSetGeneric, no: u32) { 119 | self.intervals.push(iset.intervals[no as usize].clone()); 120 | self.ids.push(iset.ids[no as usize].clone()); 121 | } 122 | } 123 | struct TagIntervalCollector { 124 | hit: Vec, 125 | } 126 | 127 | impl TagIntervalCollector { 128 | fn new(iset: &IntervalSetGeneric) -> TagIntervalCollector { 129 | TagIntervalCollector { 130 | hit: vec![false; iset.len()], 131 | } 132 | } 133 | } 134 | 135 | impl IntervalCollector for TagIntervalCollector { 136 | fn collect(&mut self, _iset: &IntervalSetGeneric, no: u32) { 137 | self.hit[no as usize] = true; 138 | } 139 | } 140 | 141 | /// nclists are based on sorting the intervals by (start, -end) 142 | #[allow(clippy::needless_return)] 143 | fn nclist_range_sort(a: &Range, b: &Range) -> Ordering { 144 | if a.start < b.start { 145 | return Ordering::Less; 146 | } else if a.start > b.start { 147 | return Ordering::Greater; 148 | } else if a.end > b.end { 149 | return Ordering::Less; // the magic trick to get contained intervals 150 | } else if a.end < b.end { 151 | return Ordering::Greater; 152 | } else { 153 | return Ordering::Equal; 154 | } 155 | } 156 | 157 | #[derive(Debug)] 158 | pub enum NestedIntervalError { 159 | ///a negative interval was passed, ie. stop < start 160 | NegativeInterval, 161 | ///intervals and ids had differing lengths 162 | IntervalIdMisMatch, 163 | } 164 | 165 | impl IntervalSetGeneric { 166 | /// Create an IntervalSet without supplying ids 167 | /// 168 | /// ids will be 0..n in the order of the *sorted* intervals 169 | pub fn new(intervals: &[Range]) -> Result, NestedIntervalError> { 170 | for r in intervals { 171 | if r.start >= r.end { 172 | return Err(NestedIntervalError::NegativeInterval); 173 | } 174 | } 175 | let mut iv = intervals.to_vec(); 176 | iv.sort_unstable_by(nclist_range_sort); 177 | let count = iv.len(); 178 | Ok(IntervalSetGeneric { 179 | intervals: iv, 180 | ids: (0..count).map(|x| vec![x as u32]).collect(), 181 | root: RefCell::new(None), 182 | }) 183 | } 184 | 185 | /// Create an IntervalSet 186 | /// 187 | /// Ids may be non-unique 188 | /// This consumes both the intervals and ids 189 | /// which should safe an allocation in the most common use case 190 | pub fn new_with_ids( 191 | intervals: &[Range], 192 | ids: &[u32], 193 | ) -> Result, NestedIntervalError> { 194 | let vec_ids = ids.iter().map(|x| vec![*x]).collect::>>(); 195 | Self::new_with_ids_multiple(intervals, vec_ids) 196 | } 197 | pub fn new_with_ids_multiple( 198 | intervals: &[Range], 199 | ids: Vec>, 200 | ) -> Result, NestedIntervalError> { 201 | for r in intervals { 202 | if r.start >= r.end { 203 | return Err(NestedIntervalError::NegativeInterval); 204 | } 205 | } 206 | if intervals.len() != ids.len() { 207 | return Err(NestedIntervalError::IntervalIdMisMatch); 208 | } 209 | let mut idx: Vec = (0..intervals.len()).collect(); 210 | idx.sort_unstable_by(|idx_a, idx_b| { 211 | nclist_range_sort(&intervals[*idx_a], &intervals[*idx_b]) 212 | }); 213 | let mut out_iv: Vec> = Vec::with_capacity(intervals.len()); 214 | let mut out_ids: Vec> = Vec::with_capacity(intervals.len()); 215 | for ii in 0..idx.len() { 216 | out_iv.push(intervals[idx[ii]].clone()); 217 | out_ids.push(ids[idx[ii]].clone()); 218 | } 219 | Ok(IntervalSetGeneric { 220 | intervals: out_iv, 221 | ids: out_ids, 222 | root: RefCell::new(None), 223 | }) 224 | } 225 | 226 | /// filter this interval set by a bool vec, true are kept 227 | fn new_filtered(&self, keep: &[bool]) -> IntervalSetGeneric { 228 | IntervalSetGeneric { 229 | intervals: self.intervals.filter_by_bools(keep), 230 | ids: self.ids.filter_by_bools(keep), 231 | root: RefCell::new(None), 232 | } 233 | } 234 | 235 | /// used by the merge functions to bypass the sorting and checking 236 | /// on already sorted & checked intervals 237 | fn new_presorted(intervals: Vec>, ids: Vec>) -> IntervalSetGeneric { 238 | IntervalSetGeneric { 239 | intervals, 240 | ids, 241 | root: RefCell::new(None), 242 | } 243 | } 244 | 245 | /// How many intervals are there? 246 | pub fn len(&self) -> usize { 247 | self.intervals.len() 248 | } 249 | 250 | /// Is the number of intervals 0? 251 | pub fn is_empty(&self) -> bool { 252 | self.len() == 0 253 | } 254 | 255 | /// internal build-the-nested-containment-list-tree 256 | fn build_tree( 257 | &self, 258 | parent: &mut IntervalSetEntry, 259 | it: &mut std::iter::Peekable< 260 | std::iter::Enumerate>>, 261 | >, 262 | ) { 263 | loop { 264 | match it.peek() { 265 | Some((_, next)) => { 266 | if (parent.no != -1) && (next.end > self.intervals[parent.no as usize].end) { 267 | return; 268 | } 269 | let (ii, r) = it.next().unwrap(); 270 | if r.start > r.end { 271 | //should be handled by the constructors 272 | panic!("invalid interval end < start"); 273 | } 274 | let entry = IntervalSetEntry { 275 | no: ii as i32, 276 | children: Vec::new(), 277 | }; 278 | parent.children.push(entry); 279 | self.build_tree(parent.children.last_mut().unwrap(), it); 280 | } 281 | None => { 282 | return; 283 | } 284 | } 285 | } 286 | } 287 | 288 | /// create the nclist if we don't have one yet 289 | fn ensure_nclist(&self) { 290 | let mut root = self.root.borrow_mut(); 291 | if root.is_none() { 292 | let mut new_root = IntervalSetEntry { 293 | no: -1, 294 | children: Vec::new(), 295 | }; 296 | self.build_tree( 297 | &mut new_root, 298 | &mut self.intervals.iter().enumerate().peekable(), 299 | ); 300 | *root = Some(new_root); 301 | } 302 | } 303 | 304 | fn depth_first_search>( 305 | &self, 306 | node: &IntervalSetEntry, 307 | query: &Range, 308 | collector: &mut S, 309 | ) { 310 | let children = &node.children[..]; 311 | //find the first interval that has a stop > query.start 312 | //this is also the left most interval in terms of start with such a stop 313 | 314 | ////Todo: is this the correct algorithm from the paper?! 315 | let first = children 316 | .upper_bound_by_key(&query.start, |entry| self.intervals[entry.no as usize].end); 317 | if first == children.len() { 318 | return; 319 | } 320 | for next in &children[first..] { 321 | let next_iv = &self.intervals[next.no as usize]; 322 | if !next_iv.overlaps(query) { 323 | return; 324 | } 325 | collector.collect(self, next.no as u32); 326 | if !next.children.is_empty() { 327 | self.depth_first_search(next, query, collector); 328 | } 329 | } 330 | } 331 | /// Is there any interval overlapping with the query? 332 | pub fn has_overlap(&self, query: &Range) -> Result { 333 | if query.start > query.end { 334 | return Err(NestedIntervalError::NegativeInterval); 335 | } 336 | self.ensure_nclist(); 337 | //has overlap is easy because all we have to do is scan the first level 338 | let binding = self.root.borrow(); 339 | let root = binding.as_ref(); 340 | let children = &root.unwrap().children[..]; 341 | //find the first interval that has a stop > query.start 342 | //this is also the left most interval in terms of start with such a stop 343 | let first = children 344 | .upper_bound_by_key(&query.start, |entry| self.intervals[entry.no as usize].end); 345 | if first == children.len() { 346 | dbg!("no entry larger"); 347 | // ie no entry larger... 348 | return Ok(false); 349 | } 350 | let next = &self.intervals[children[first].no as usize]; 351 | //no need to descend - 352 | Ok(next.overlaps(query)) 353 | } 354 | 355 | /// create an iterator over ```(Range, &vec![id])``` tuples. 356 | pub fn iter( 357 | &self, 358 | ) -> std::iter::Zip>, std::slice::Iter<'_, Vec>> 359 | { 360 | self.intervals.iter().zip(self.ids.iter()) 361 | } 362 | 363 | /// retrieve a new IntervalSet with all intervals overlapping the query 364 | pub fn query_overlapping(&self, query: &Range) -> IntervalSetGeneric { 365 | self.ensure_nclist(); 366 | let mut collector = VecIntervalCollector::new(); 367 | self.depth_first_search(self.root.borrow().as_ref().unwrap(), query, &mut collector); 368 | IntervalSetGeneric::new_presorted(collector.intervals, collector.ids) 369 | } 370 | 371 | /// does this IntervalSet contain overlapping intervals? 372 | pub fn any_overlapping(&self) -> bool { 373 | for (next, last) in self.intervals.iter().skip(1).zip(self.intervals.iter()) { 374 | if last.overlaps(next) { 375 | return true; 376 | } 377 | } 378 | false 379 | } 380 | /// which intervals are overlapping? 381 | /// 382 | /// Result is a Vec 383 | pub fn overlap_status(&self) -> Vec { 384 | let mut result = vec![false; self.intervals.len()]; 385 | for (ii, (next, last)) in self 386 | .intervals 387 | .iter() 388 | .skip(1) 389 | .zip(self.intervals.iter()) 390 | .enumerate() 391 | { 392 | if last.overlaps(next) { 393 | result[ii] = true; // ii starts at 0 394 | result[ii + 1] = true; 395 | } 396 | } 397 | result 398 | } 399 | 400 | /// does this IntervalSet contain nested intervals? 401 | pub fn any_nested(&self) -> bool { 402 | self.ensure_nclist(); 403 | for entry in self.root.borrow().as_ref().unwrap().children.iter() { 404 | if !entry.children.is_empty() { 405 | return true; 406 | } 407 | } 408 | false 409 | } 410 | 411 | /// remove intervals that have the same coordinates 412 | /// 413 | /// Ids are **not** merged, the first set is being kept 414 | pub fn remove_duplicates(&self) -> IntervalSetGeneric { 415 | let mut keep = vec![false; self.len()]; 416 | keep[0] = true; 417 | for (ix, (v1, v2)) in self 418 | .intervals 419 | .iter() 420 | .skip(1) 421 | .zip(self.intervals.iter()) 422 | .enumerate() 423 | { 424 | keep[ix + 1] = v1 != v2; 425 | } 426 | self.new_filtered(&keep) 427 | } 428 | 429 | /// remove empty intervals, ie. those with start == end 430 | pub fn remove_empty(&self) -> IntervalSetGeneric { 431 | let keep: Vec = self.intervals.iter().map(|r| r.start != r.end).collect(); 432 | self.new_filtered(&keep) 433 | } 434 | 435 | /// Merge overlapping & nested intervals to their outer bounds 436 | /// 437 | /// Examples: 438 | /// - 0..15, 10..20 -> 0..20 439 | /// - 0..20, 3..5 -> 0..20 440 | pub fn merge_hull(&self) -> IntervalSetGeneric { 441 | let mut new_intervals: Vec> = Vec::new(); 442 | let mut new_ids: Vec> = Vec::new(); 443 | let mut it = self.intervals.iter().zip(self.ids.iter()).peekable(); 444 | while let Some(this_element) = it.next() { 445 | let mut this_iv = this_element.0.start..this_element.0.end; 446 | let mut this_ids = this_element.1.clone(); 447 | while let Some(next) = it.peek() { 448 | if next.0.start < this_iv.end { 449 | if next.0.end > this_iv.end { 450 | this_iv.end = next.0.end; 451 | } 452 | this_ids.extend_from_slice(next.1); 453 | it.next(); // consume that one! 454 | } else { 455 | break; 456 | } 457 | } 458 | new_intervals.push(this_iv); 459 | this_ids.sort(); 460 | new_ids.push(this_ids) 461 | } 462 | IntervalSetGeneric::new_presorted(new_intervals, new_ids) 463 | } 464 | 465 | /// Merge intervals that are butted up against each other 466 | /// 467 | ///This first induces a merge_hull()! 468 | /// 469 | /// Examples: 470 | /// - 0..15, 15..20 -> 0..20 471 | /// - 0..15, 16..20, 20..30 > 0..15, 16..30 472 | pub fn merge_connected(&self) -> IntervalSetGeneric { 473 | let hull = self.merge_hull(); 474 | let mut new_intervals: Vec> = Vec::new(); 475 | let mut new_ids: Vec> = Vec::new(); 476 | let mut it = hull.intervals.iter().zip(hull.ids.iter()).peekable(); 477 | while let Some(this_element) = it.next() { 478 | let mut this_iv = this_element.0.start..this_element.0.end; 479 | let mut this_ids = this_element.1.clone(); 480 | while let Some(next) = it.peek() { 481 | if next.0.start == this_iv.end { 482 | if next.0.end > this_iv.end { 483 | this_iv.end = next.0.end; 484 | } 485 | this_ids.extend_from_slice(next.1); 486 | it.next(); // consume that one! 487 | } else { 488 | break; 489 | } 490 | } 491 | new_intervals.push(this_iv); 492 | this_ids.sort(); 493 | new_ids.push(this_ids) 494 | } 495 | IntervalSetGeneric::new_presorted(new_intervals, new_ids) 496 | } 497 | 498 | /// Remove all intervals that are overlapping or nested 499 | /// by simply dropping them. 500 | /// 501 | /// Examples: 502 | /// - 0..20, 10..15, 20..35, 30..40, 40..50 -> 40..50 503 | /// 504 | /// Ids of the remaining intervals are unchanged 505 | pub fn merge_drop(&self) -> IntervalSetGeneric { 506 | let mut keep = vec![true; self.len()]; 507 | let mut last_stop = T::zero(); 508 | for ii in 0..self.len() { 509 | if self.intervals[ii].start < last_stop { 510 | keep[ii] = false; 511 | keep[ii - 1] = false; 512 | } 513 | if self.intervals[ii].end > last_stop { 514 | last_stop = self.intervals[ii].end; 515 | } 516 | } 517 | self.new_filtered(&keep) 518 | } 519 | 520 | /// Create fully disjoint intervals by splitting 521 | /// the existing ones based on their overlap. 522 | /// 523 | /// Example: 524 | /// - 0..20, 10..30 -> 0..10, 10..20, 20..30 525 | /// 526 | /// Ids are merged, ie. in the above example, 527 | /// if the input ids are ```[[0], [1]]```, the output ids are 528 | /// ```[[0], [0,1], [1]]``` 529 | /// 530 | /// merge_split on an already disjoint set is a no-op 531 | pub fn merge_split(&self) -> IntervalSetGeneric { 532 | #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Debug)] 533 | enum SiteKind { 534 | End, 535 | Start, 536 | } 537 | #[derive(Debug)] 538 | struct Site { 539 | pos: T, 540 | kind: SiteKind, 541 | id: Vec, 542 | } 543 | let mut sites: Vec> = Vec::new(); 544 | for (iv, id) in self.intervals.iter().zip(self.ids.iter()) { 545 | sites.push(Site { 546 | pos: iv.start, 547 | kind: SiteKind::Start, 548 | id: id.clone(), 549 | }); 550 | sites.push(Site { 551 | pos: iv.end, 552 | kind: SiteKind::End, 553 | id: id.clone(), 554 | }); 555 | } 556 | sites.sort_by_key(|x| (x.pos, x.kind)); 557 | let mut new_intervals = Vec::new(); 558 | let mut new_ids: Vec> = Vec::new(); 559 | if !sites.is_empty() { 560 | let mut it = sites.iter_mut(); 561 | let mut last = it.next().unwrap(); 562 | let mut last_ids: HashMap = HashMap::new(); 563 | for id in &last.id { 564 | last_ids.insert(*id, 1); 565 | } 566 | for next in it { 567 | //if (last.kind == SiteKind::Start) 568 | // || ((last.kind == SiteKind::End) && (next.kind == SiteKind::End)) 569 | { 570 | if last.pos != next.pos && !last_ids.is_empty() { 571 | new_intervals.push(last.pos..next.pos); 572 | let mut ids_here: Vec = last_ids.keys().cloned().collect(); 573 | ids_here.sort(); 574 | new_ids.push(ids_here); 575 | } 576 | } 577 | match next.kind { 578 | SiteKind::Start => { 579 | for id in &next.id { 580 | *last_ids.entry(*id).or_insert(0) += 1; 581 | } 582 | } 583 | SiteKind::End => { 584 | for id in &next.id { 585 | *last_ids.get_mut(id).unwrap() -= 1; 586 | } 587 | last_ids.retain(|_k, v| (*v > 0)); 588 | } 589 | } 590 | last = next; 591 | } 592 | } 593 | IntervalSetGeneric { 594 | intervals: new_intervals, 595 | ids: new_ids, 596 | root: RefCell::new(None), 597 | } 598 | } 599 | 600 | /// find the interval with the closest start to the left of pos 601 | /// None if there are no intervals to the left of pos 602 | pub fn find_closest_start_left(&self, pos: T) -> Option<(Range, Vec)> { 603 | let first = self.intervals.upper_bound_by_key(&pos, |entry| entry.start); 604 | if first == 0 { 605 | return None; 606 | } 607 | let prev = first - 1; 608 | Some((self.intervals[prev].clone(), self.ids[prev].clone())) 609 | } 610 | 611 | /// find the interval with the closest start to the right of pos 612 | /// None if there are no intervals to the right of pos 613 | pub fn find_closest_start_right(&self, pos: T) -> Option<(Range, Vec)> { 614 | let first = self 615 | .intervals 616 | .upper_bound_by_key(&pos, |entry| entry.start + T::one()); 617 | // since this the first element strictly greater, we have to do -1 618 | if first == self.len() { 619 | return None; 620 | } 621 | Some((self.intervals[first].clone(), self.ids[first].clone())) 622 | } 623 | 624 | /// find the interval with the closest start to pos 625 | /// 626 | /// None if the IntervalSet is empty 627 | /// On a tie, the left interval wins. 628 | pub fn find_closest_start(&self, pos: T) -> Option<(Range, Vec)> { 629 | let left = self.find_closest_start_left(pos); 630 | let right = self.find_closest_start_right(pos); 631 | match (left, right) { 632 | (None, None) => None, 633 | (Some(l), None) => Some(l), 634 | (None, Some(r)) => Some(r), 635 | (Some(l), Some(r)) => { 636 | /* let distance_left = (i64::from(l.0.start) - i64::from(pos)).abs(); 637 | let distance_right = (i64::from(r.0.start) - i64::from(pos)).abs(); */ 638 | let distance_left = if l.0.start > pos { 639 | l.0.start - pos 640 | } else { 641 | pos - l.0.start 642 | }; 643 | let distance_right = if r.0.start > pos { 644 | r.0.start - pos 645 | } else { 646 | pos - r.0.start 647 | }; 648 | 649 | if distance_left <= distance_right { 650 | Some(l) 651 | } else { 652 | Some(r) 653 | } 654 | } 655 | } 656 | } 657 | 658 | /// how many units in interval space does this IntervalSet cover 659 | pub fn covered_units(&self) -> T { 660 | let merged = self.merge_hull(); 661 | let mut total = T::zero(); 662 | for iv in merged.intervals.iter() { 663 | total = total + iv.end - iv.start; 664 | } 665 | total 666 | } 667 | 668 | /// What is the mean size of the intervals 669 | #[allow(clippy::cast_lossless)] 670 | pub fn mean_interval_size(&self) -> f64 { 671 | let mut total = T::zero(); 672 | for iv in self.intervals.iter() { 673 | total = total + iv.end - iv.start; 674 | } 675 | let total: f64 = total.to_f64().unwrap(); 676 | total / self.len() as f64 677 | } 678 | 679 | /// Invert the intervals in this set 680 | /// 681 | /// Actual applied lower_bound is min(lower_bound, first_interval_start) 682 | /// Actual applied upper_bound is max(upper_bound, last_interval_end) 683 | /// 684 | /// Examples 685 | /// - invert([15..20], 0, 30) -> [0..15, 20..30] 686 | /// - invert([15..20], 20, 30) -> [0..15, 20..30] 687 | /// 688 | /// Ids are lost 689 | pub fn invert(&self, lower_bound: T, upper_bound: T) -> IntervalSetGeneric { 690 | let mut new_intervals: Vec> = Vec::new(); 691 | let mut new_ids: Vec> = Vec::new(); 692 | 693 | if self.is_empty() { 694 | new_intervals.push(lower_bound..upper_bound); 695 | new_ids.push(vec![0]); 696 | } else { 697 | let lower = min(lower_bound, self.intervals[0].start); 698 | let upper = max(upper_bound, self._highest_end().unwrap()); 699 | let n = self.merge_hull(); 700 | let mut paired = vec![lower]; 701 | for iv in n.intervals { 702 | paired.push(iv.start); 703 | paired.push(iv.end); 704 | } 705 | paired.push(upper); 706 | new_intervals.extend( 707 | paired 708 | .chunks(2) 709 | .filter(|se| se[0] != se[1]) 710 | .map(|se| se[0]..se[1]), 711 | ); 712 | new_ids.extend((0..new_intervals.len()).map(|x| vec![x as u32])); 713 | } 714 | IntervalSetGeneric::new_presorted(new_intervals, new_ids) 715 | } 716 | 717 | /// Filter to those intervals that have an overlap in other. 718 | pub fn filter_to_overlapping( 719 | &self, 720 | other: &IntervalSetGeneric, 721 | ) -> IntervalSetGeneric { 722 | //I'm not certain this is the fastest way to do this - but it does work 723 | //depending on the number of queries, it might be faster 724 | //to do it the other way around, or do full scan of all entries here 725 | //(we have to visit them any how to check the keep tags) 726 | self.ensure_nclist(); 727 | let mut collector = TagIntervalCollector::new(self); 728 | for q in other.intervals.iter() { 729 | self.depth_first_search(self.root.borrow().as_ref().unwrap(), q, &mut collector); 730 | } 731 | self.new_filtered(&collector.hit) 732 | } 733 | 734 | /// Filter to those intervals having an overlap in other, and split/truncate them 735 | /// so the resulting intervals are fully contained within the intervals covered by other. 736 | /// ids are being kept. 737 | /// 738 | /// so 10..20 vs 5..11, 15..17, 18..30 -> 10..11, 15..17, 18..20 739 | pub fn filter_to_overlapping_and_split( 740 | &self, 741 | other: &IntervalSetGeneric, 742 | ) -> IntervalSetGeneric { 743 | let mut out_ivs = Vec::new(); 744 | let mut out_ids = Vec::new(); 745 | 746 | let other = other.merge_connected(); 747 | for (ii, iv) in self.intervals.iter().enumerate() { 748 | let id = &self.ids[ii]; 749 | let overlapping = other.query_overlapping(iv).merge_connected(); 750 | for (new_iv, _) in overlapping.iter() { 751 | //remember, these are disjoint 752 | let start = max(iv.start, new_iv.start); 753 | let stop = min(iv.end, new_iv.end); 754 | out_ivs.push(start..stop); 755 | out_ids.push(id.clone()); 756 | } 757 | } 758 | 759 | IntervalSetGeneric::new_with_ids_multiple(&out_ivs, out_ids) 760 | .expect("Unexpected failure in rebuilding intervals") 761 | } 762 | 763 | /// Filter to those intervals that have no overlap in other. 764 | pub fn filter_to_non_overlapping( 765 | &self, 766 | other: &IntervalSetGeneric, 767 | ) -> IntervalSetGeneric { 768 | //I'm not certain this is the fastest way to do this - but it does work 769 | //depending on the number of queries, it might be faster 770 | //to do it the other way around, or do full scan of all entries here 771 | 772 | let keep: Result, NestedIntervalError> = self 773 | .intervals 774 | .iter() 775 | .enumerate() 776 | .map(|(_ii, iv)| other.has_overlap(iv)) 777 | .map_ok(|x| !x) 778 | .collect(); 779 | match keep { 780 | Ok(keep) => self.new_filtered(&keep), 781 | Err(_) => panic!( 782 | "Negative intervals encountered inside IntervalSets - check input sanity code" 783 | ), 784 | } 785 | } 786 | 787 | /// Filter to those intervals that have an overlap in at least k others. 788 | pub fn filter_to_overlapping_k_others( 789 | &self, 790 | others: &[&IntervalSetGeneric], 791 | min_k: u32, 792 | ) -> IntervalSetGeneric { 793 | //I'm not certain this is the fastest way to do this - but it does work 794 | //depending on the number of queries, it might be faster 795 | //to do it the other way around, or do full scan of all entries here 796 | //(we have to visit them any how to check the keep tags) 797 | 798 | let counts = self._count_overlapping(others); 799 | let mut keep = vec![false; self.len()]; 800 | for (ii, value) in counts.iter().enumerate() { 801 | if *value >= min_k { 802 | keep[ii] = true; 803 | } 804 | } 805 | self.new_filtered(&keep) 806 | } 807 | /// Filter to those intervals that have an overlap in no more than k others 808 | pub fn filter_to_non_overlapping_k_others( 809 | &self, 810 | others: &[&IntervalSetGeneric], 811 | max_k: u32, 812 | ) -> IntervalSetGeneric { 813 | //I'm not certain this is the fastest way to do this - but it does work 814 | //depending on the number of queries, it might be faster 815 | //to do it the other way around, or do full scan of all entries here 816 | //(we have to visit them any how to check the keep tags) 817 | let counts = self._count_overlapping(others); 818 | let mut keep = vec![false; self.len()]; 819 | for (ii, value) in counts.iter().enumerate() { 820 | if *value <= max_k { 821 | keep[ii] = true; 822 | } 823 | } 824 | self.new_filtered(&keep) 825 | } 826 | 827 | /// Build the union of two IntervalSets 828 | /// 829 | /// No merging is performed 830 | pub fn union(&self, others: &[&IntervalSetGeneric]) -> IntervalSetGeneric { 831 | let mut new_intervals: Vec> = Vec::new(); 832 | new_intervals.extend_from_slice(&self.intervals); 833 | for o in others.iter() { 834 | new_intervals.extend_from_slice(&o.intervals); 835 | } 836 | IntervalSetGeneric::new(&new_intervals[..]).unwrap() 837 | } 838 | 839 | /// find the highest interval.end in our data set 840 | fn _highest_end(&self) -> Option { 841 | match self.root.borrow().as_ref() { 842 | // if we have a nclist we can just look at those intervals. 843 | Some(root) => root 844 | .children 845 | .iter() 846 | .map(|entry| self.intervals[entry.no as usize].end) 847 | .max(), 848 | //and if we don't we have to brutforce, I believe, since our list is sorted by start 849 | None => self.intervals.iter().map(|range| range.end).max(), 850 | } 851 | } 852 | 853 | fn _count_overlapping(&self, others: &[&IntervalSetGeneric]) -> Vec { 854 | self.ensure_nclist(); 855 | let mut counts: Vec = vec![0; self.len()]; 856 | for o in others { 857 | let mut collector = TagIntervalCollector::new(self); 858 | for q in o.intervals.iter() { 859 | self.depth_first_search(self.root.borrow().as_ref().unwrap(), q, &mut collector); 860 | } 861 | for (ii, value) in collector.hit.iter().enumerate() { 862 | if *value { 863 | counts[ii] += 1; 864 | } 865 | } 866 | } 867 | counts 868 | } 869 | } 870 | 871 | impl Eq for IntervalSetGeneric {} 872 | impl PartialEq for IntervalSetGeneric { 873 | fn eq(&self, other: &IntervalSetGeneric) -> bool { 874 | (self.intervals == other.intervals) && (self.ids == other.ids) 875 | } 876 | } 877 | 878 | /// Extend Range functionality with some often used bool functions 879 | pub trait RangePlus { 880 | /// Does this interval overlap the other one? 881 | fn overlaps(&self, other: &Range) -> bool; 882 | } 883 | 884 | impl RangePlus for Range { 885 | fn overlaps(&self, other: &Range) -> bool { 886 | self.start < other.end && other.start < self.end && other.start < other.end 887 | } 888 | } 889 | 890 | #[cfg(test)] 891 | #[allow(dead_code)] 892 | #[allow(clippy::single_range_in_vec_init)] 893 | mod tests { 894 | use crate::{IntervalSet, IntervalSetGeneric}; 895 | use std::ops::Range; 896 | #[test] 897 | fn test_has_overlap() { 898 | let r = vec![0..5, 10..15]; 899 | let n = IntervalSet::new(&r).unwrap(); 900 | assert!(n.has_overlap(&(3..4)).unwrap()); 901 | assert!(n.has_overlap(&(5..20)).unwrap()); 902 | assert!(!n.has_overlap(&(6..10)).unwrap()); 903 | assert!(!n.has_overlap(&(100..110)).unwrap()); 904 | assert!(!n.has_overlap(&(3..3)).unwrap()); 905 | 906 | let r2 = vec![0..15, 0..6]; 907 | let n = IntervalSet::new(&r2).unwrap(); 908 | assert!(n.has_overlap(&(3..4)).unwrap()); 909 | assert!(n.has_overlap(&(5..20)).unwrap()); 910 | assert!(n.has_overlap(&(6..10)).unwrap()); 911 | assert!(!n.has_overlap(&(20..30)).unwrap()); 912 | 913 | let r2 = vec![100..150, 30..40, 200..400]; 914 | let n = IntervalSet::new(&r2).unwrap(); 915 | assert!(n.has_overlap(&(101..102)).unwrap()); 916 | assert!(n.has_overlap(&(149..150)).unwrap()); 917 | assert!(n.has_overlap(&(39..99)).unwrap()); 918 | assert!(n.has_overlap(&(29..99)).unwrap()); 919 | assert!(n.has_overlap(&(19..99)).unwrap()); 920 | assert!(!n.has_overlap(&(0..5)).unwrap()); 921 | assert!(!n.has_overlap(&(0..29)).unwrap()); 922 | assert!(!n.has_overlap(&(0..30)).unwrap()); 923 | assert!(n.has_overlap(&(0..31)).unwrap()); 924 | assert!(!n.has_overlap(&(40..41)).unwrap()); 925 | assert!(!n.has_overlap(&(40..99)).unwrap()); 926 | assert!(!n.has_overlap(&(40..100)).unwrap()); 927 | assert!(n.has_overlap(&(40..101)).unwrap()); 928 | assert!(n.has_overlap(&(399..400)).unwrap()); 929 | assert!(!n.has_overlap(&(400..4000)).unwrap()); 930 | } 931 | 932 | #[test] 933 | fn test_iter() { 934 | let n = IntervalSet::new(&[100..150, 30..40, 200..400, 250..300]).unwrap(); 935 | let c: Vec<(&Range, &Vec)> = n.iter().collect(); 936 | assert!(!c.is_empty()); 937 | let c: Vec> = c 938 | .iter() 939 | .map(|(interval, _id)| (*interval).clone()) 940 | .collect(); 941 | dbg!(&c); 942 | assert!(c == vec![30..40, 100..150, 200..400, 250..300]); 943 | } 944 | 945 | #[test] 946 | fn test_query() { 947 | let n = IntervalSet::new(&[100..150, 30..40, 200..400, 250..300]).unwrap(); 948 | let c = n.query_overlapping(&(0..5)); 949 | assert!(c.is_empty()); 950 | let c = n.query_overlapping(&(0..31)); 951 | assert_eq!(c.intervals, vec![30..40]); 952 | let c = n.query_overlapping(&(200..250)); 953 | assert_eq!(c.intervals, vec![200..400]); 954 | let c = n.query_overlapping(&(200..251)); 955 | assert_eq!(c.intervals, vec![200..400, 250..300]); 956 | let c = n.query_overlapping(&(0..1000)); 957 | dbg!(&c); 958 | assert_eq!(c.intervals, vec![30..40, 100..150, 200..400, 250..300]); 959 | let c = n.query_overlapping(&(401..1000)); 960 | assert!(c.is_empty()); 961 | } 962 | #[test] 963 | fn test_query_multiple() { 964 | let n = IntervalSet::new(&[100..150, 30..40, 200..400, 250..300]).unwrap(); 965 | let c = n.filter_to_overlapping(&IntervalSet::new(&[0..5, 0..105]).unwrap()); 966 | assert_eq!(c.intervals, vec![30..40, 100..150]); 967 | let c = n.filter_to_overlapping(&IntervalSet::new(&[500..600, 550..700]).unwrap()); 968 | assert!(c.is_empty()); 969 | let c = n.filter_to_overlapping(&IntervalSet::new(&[45..230]).unwrap()); 970 | assert_eq!(c.intervals, vec![100..150, 200..400]); 971 | let c = n.filter_to_overlapping(&IntervalSet::new(&[45..101, 101..230]).unwrap()); 972 | assert_eq!(c.intervals, vec![100..150, 200..400]); 973 | } 974 | 975 | #[test] 976 | fn test_query_multiple_non_overlapping() { 977 | let n = IntervalSet::new(&[100..150, 30..40, 200..400, 250..300]).unwrap(); 978 | let c = n.filter_to_non_overlapping(&IntervalSet::new(&[0..5, 0..105]).unwrap()); 979 | assert_eq!(c.intervals, vec![200..400, 250..300]); 980 | let c = n.filter_to_non_overlapping(&IntervalSet::new(&[500..600, 550..700]).unwrap()); 981 | assert_eq!(c.intervals, vec![30..40, 100..150, 200..400, 250..300]); 982 | let c = n.filter_to_non_overlapping(&IntervalSet::new(&[0..600]).unwrap()); 983 | assert!(c.is_empty()); 984 | let c = n.filter_to_non_overlapping(&IntervalSet::new(&[45..230]).unwrap()); 985 | assert_eq!(c.intervals, vec![30..40, 250..300]); 986 | let c = n.filter_to_non_overlapping(&IntervalSet::new(&[45..101, 101..230]).unwrap()); 987 | assert_eq!(c.intervals, vec![30..40, 250..300]); 988 | } 989 | 990 | #[test] 991 | fn test_any_overlapping() { 992 | let n = IntervalSet::new(&[100..150]).unwrap(); 993 | assert!(!n.any_overlapping()); 994 | let n = IntervalSet::new(&[100..150, 200..300]).unwrap(); 995 | assert!(!n.any_overlapping()); 996 | let n = IntervalSet::new(&[100..150, 150..300]).unwrap(); 997 | assert!(!n.any_overlapping()); 998 | let n = IntervalSet::new(&[100..151, 150..300]).unwrap(); 999 | assert!(n.any_overlapping()); 1000 | let n = IntervalSet::new(&[100..151, 105..110]).unwrap(); 1001 | assert!(n.any_overlapping()); 1002 | let n = IntervalSet::new(&[100..151, 105..110, 0..1000]).unwrap(); 1003 | assert!(n.any_overlapping()); 1004 | let n = IntervalSet::new(&[100..150, 150..210, 0..1000]).unwrap(); 1005 | assert!(n.any_overlapping()); 1006 | let n = IntervalSet::new(&[100..150, 150..210, 0..130]).unwrap(); 1007 | assert!(n.any_overlapping()); 1008 | let n = IntervalSet::new(&[100..150, 150..210, 150..250]).unwrap(); 1009 | assert!(n.any_overlapping()); 1010 | let n = IntervalSet::new(&[100..150, 150..210, 149..250]).unwrap(); 1011 | assert!(n.any_overlapping()); 1012 | let n = IntervalSet::new(&[100..150, 150..210, 209..250]).unwrap(); 1013 | assert!(n.any_overlapping()); 1014 | } 1015 | 1016 | #[test] 1017 | fn test_any_nested() { 1018 | assert!(!IntervalSet::new(&[]).unwrap().any_nested()); 1019 | assert!(!IntervalSet::new(&[100..150]).unwrap().any_nested()); 1020 | assert!(!IntervalSet::new(&[100..150, 150..300]) 1021 | .unwrap() 1022 | .any_nested()); 1023 | assert!(!IntervalSet::new(&[100..151, 150..300]) 1024 | .unwrap() 1025 | .any_nested()); 1026 | assert!(IntervalSet::new(&[100..151, 150..300, 100..130]) 1027 | .unwrap() 1028 | .any_nested()); 1029 | assert!(IntervalSet::new(&[100..151, 150..300, 0..1000]) 1030 | .unwrap() 1031 | .any_nested()); 1032 | } 1033 | 1034 | #[test] 1035 | fn test_remove_duplicates() { 1036 | let n = IntervalSet::new(&[100..150]).unwrap().remove_duplicates(); 1037 | assert!(!n.any_overlapping()); 1038 | assert_eq!(n.len(), 1); 1039 | 1040 | let n = IntervalSet::new(&[30..40, 30..40, 100..150]) 1041 | .unwrap() 1042 | .remove_duplicates(); 1043 | assert!(!n.any_overlapping()); 1044 | assert_eq!(n.len(), 2); 1045 | let n = IntervalSet::new(&[30..40, 30..40, 35..150]) 1046 | .unwrap() 1047 | .remove_duplicates(); 1048 | assert_eq!(n.len(), 2); 1049 | let n = IntervalSet::new_with_ids( 1050 | &[30..40, 30..40, 35..150, 35..150, 36..38], 1051 | &[55, 56, 57, 58, 59], 1052 | ) 1053 | .unwrap() 1054 | .remove_duplicates(); 1055 | assert_eq!(n.len(), 3); 1056 | dbg!(&n); 1057 | assert_eq!(n.ids, vec![vec![55], vec![57], vec![59]]); 1058 | } 1059 | 1060 | #[test] 1061 | fn test_merge_hull() { 1062 | let n = IntervalSet::new(&[100..150, 120..180, 110..115, 200..201]) 1063 | .unwrap() 1064 | .merge_hull(); 1065 | assert_eq!(n.intervals, vec![100..180, 200..201]); 1066 | assert_eq!(n.ids, vec![vec![0, 1, 2], vec![3]]); 1067 | assert!(!n.any_overlapping()); 1068 | 1069 | let n = IntervalSet::new(&[100..150]).unwrap().merge_hull(); 1070 | assert_eq!(n.len(), 1); 1071 | assert!(!n.any_overlapping()); 1072 | 1073 | let n = IntervalSet::new(&[100..150, 120..180]) 1074 | .unwrap() 1075 | .merge_hull(); 1076 | assert_eq!(n.len(), 1); 1077 | assert!(!n.any_overlapping()); 1078 | assert_eq!(n.intervals, vec![100..180]); 1079 | assert_eq!(n.ids, vec![vec![0, 1]]); 1080 | 1081 | let n = IntervalSet::new(&[100..150, 120..180, 110..115]) 1082 | .unwrap() 1083 | .merge_hull(); 1084 | assert!(n.len() == 1); 1085 | assert!(!n.any_overlapping()); 1086 | assert_eq!(n.intervals, vec![100..180]); 1087 | assert_eq!(n.ids, vec![vec![0, 1, 2]]); 1088 | 1089 | let n = 1090 | IntervalSet::new_with_ids(&[300..400, 400..450, 450..500, 510..520], &[20, 10, 30, 40]) 1091 | .unwrap(); 1092 | assert_eq!(n.intervals, vec![300..400, 400..450, 450..500, 510..520]); 1093 | } 1094 | 1095 | #[test] 1096 | fn test_merge_drop() { 1097 | let n = IntervalSet::new(&[]).unwrap().merge_drop(); 1098 | assert_eq!(n.len(), 0); 1099 | assert!(!n.any_overlapping()); 1100 | 1101 | let n = IntervalSet::new(&[100..150]).unwrap().merge_drop(); 1102 | assert_eq!(n.len(), 1); 1103 | assert!(!n.any_overlapping()); 1104 | 1105 | let n = IntervalSet::new(&[100..150, 120..180]) 1106 | .unwrap() 1107 | .merge_drop(); 1108 | assert_eq!(n.len(), 0); 1109 | assert!(!n.any_overlapping()); 1110 | assert_eq!(n.intervals, vec![]); 1111 | assert_eq!(n.ids, Vec::>::new()); 1112 | 1113 | let n = IntervalSet::new(&[100..150, 120..180, 200..250]) 1114 | .unwrap() 1115 | .merge_drop(); 1116 | assert_eq!(n.len(), 1); 1117 | assert!(!n.any_overlapping()); 1118 | assert_eq!(n.intervals, vec![200..250]); 1119 | assert_eq!(n.ids, vec![vec![2]]); 1120 | 1121 | let n = IntervalSet::new(&[100..150, 120..180, 200..250, 106..110]) 1122 | .unwrap() 1123 | .merge_drop(); 1124 | assert_eq!(n.len(), 1); 1125 | assert!(!n.any_overlapping()); 1126 | assert_eq!(n.intervals, vec![200..250]); 1127 | assert_eq!(n.ids, vec![vec![3]]); 1128 | 1129 | let n = IntervalSet::new(&[100..150, 120..180, 200..250, 106..110, 80..105]) 1130 | .unwrap() 1131 | .merge_drop(); 1132 | assert_eq!(n.len(), 1); 1133 | assert!(!n.any_overlapping()); 1134 | assert_eq!(n.intervals, vec![200..250]); 1135 | assert_eq!(n.ids, vec![vec![4]]); 1136 | 1137 | let n = IntervalSet::new(&[100..150, 120..180, 200..250, 106..110, 80..105, 30..40]) 1138 | .unwrap() 1139 | .merge_drop(); 1140 | assert_eq!(n.len(), 2); 1141 | assert!(!n.any_overlapping()); 1142 | assert_eq!(n.intervals, vec![30..40, 200..250]); 1143 | assert_eq!(n.ids, vec![vec![0], vec![5]]); 1144 | 1145 | let n = IntervalSet::new(&[ 1146 | 100..150, 1147 | 120..180, 1148 | 200..250, 1149 | 106..110, 1150 | 80..105, 1151 | 30..40, 1152 | 400..405, 1153 | ]) 1154 | .unwrap() 1155 | .merge_drop(); 1156 | assert_eq!(n.len(), 3); 1157 | assert!(!n.any_overlapping()); 1158 | assert_eq!(n.intervals, vec![30..40, 200..250, 400..405]); 1159 | assert_eq!(n.ids, vec![vec![0], vec![5], vec![6]]); 1160 | let n = IntervalSet::new(&[0..20, 10..15, 20..35, 30..40, 40..50]) 1161 | .unwrap() 1162 | .merge_drop(); 1163 | assert_eq!(n.intervals, vec![40..50]); 1164 | } 1165 | 1166 | #[test] 1167 | fn test_find_closest_start_left() { 1168 | let n = IntervalSet::new(&[ 1169 | 30..40, 1170 | 80..105, 1171 | 100..150, 1172 | 106..110, 1173 | 106..120, 1174 | 107..125, 1175 | 120..180, 1176 | 200..250, 1177 | 400..405, 1178 | ]) 1179 | .unwrap(); 1180 | //find the first range that has an end to the left of this 1181 | assert!(n.find_closest_start_left(29).is_none()); 1182 | assert_eq!(n.find_closest_start_left(100).unwrap(), (100..150, vec![2])); 1183 | assert_eq!(n.find_closest_start_left(105).unwrap(), (100..150, vec![2])); 1184 | assert_eq!(n.find_closest_start_left(106).unwrap(), (106..110, vec![4])); 1185 | assert_eq!(n.find_closest_start_left(109).unwrap(), (107..125, vec![5])); 1186 | assert_eq!(n.find_closest_start_left(110).unwrap(), (107..125, vec![5])); 1187 | assert_eq!(n.find_closest_start_left(111).unwrap(), (107..125, vec![5])); 1188 | assert_eq!(n.find_closest_start_left(120).unwrap(), (120..180, vec![6])); 1189 | assert_eq!(n.find_closest_start_left(121).unwrap(), (120..180, vec![6])); 1190 | assert_eq!(n.find_closest_start_left(125).unwrap(), (120..180, vec![6])); 1191 | assert_eq!(n.find_closest_start_left(127).unwrap(), (120..180, vec![6])); 1192 | assert_eq!( 1193 | n.find_closest_start_left(121000).unwrap(), 1194 | (400..405, vec![8]) 1195 | ); 1196 | let n = IntervalSet::new(&[]).unwrap(); 1197 | assert!(n.find_closest_start_left(29).is_none()); 1198 | } 1199 | 1200 | #[test] 1201 | fn test_find_closest_start_right() { 1202 | let n = IntervalSet::new(&[ 1203 | 30..40, 1204 | 80..105, 1205 | 100..150, 1206 | 106..110, 1207 | 106..120, 1208 | 107..125, 1209 | 120..180, 1210 | 200..250, 1211 | 400..405, 1212 | ]) 1213 | .unwrap(); 1214 | //find the first range that has an end to the right of this 1215 | assert_eq!(n.find_closest_start_right(10).unwrap(), (30..40, vec![0])); 1216 | assert_eq!(n.find_closest_start_right(29).unwrap(), (30..40, vec![0])); 1217 | assert_eq!(n.find_closest_start_right(30).unwrap(), (30..40, vec![0])); 1218 | assert_eq!(n.find_closest_start_right(31).unwrap(), (80..105, vec![1])); 1219 | assert_eq!(n.find_closest_start_right(99).unwrap(), (100..150, vec![2])); 1220 | assert_eq!( 1221 | n.find_closest_start_right(100).unwrap(), 1222 | (100..150, vec![2]) 1223 | ); 1224 | assert_eq!( 1225 | n.find_closest_start_right(101).unwrap(), 1226 | (106..120, vec![3]) 1227 | ); 1228 | assert_eq!( 1229 | n.find_closest_start_right(107).unwrap(), 1230 | (107..125, vec![5]) 1231 | ); 1232 | assert_eq!( 1233 | n.find_closest_start_right(110).unwrap(), 1234 | (120..180, vec![6]) 1235 | ); 1236 | assert_eq!( 1237 | n.find_closest_start_right(111).unwrap(), 1238 | (120..180, vec![6]) 1239 | ); 1240 | assert_eq!( 1241 | n.find_closest_start_right(120).unwrap(), 1242 | (120..180, vec![6]) 1243 | ); 1244 | assert_eq!( 1245 | n.find_closest_start_right(121).unwrap(), 1246 | (200..250, vec![7]) 1247 | ); 1248 | assert_eq!( 1249 | n.find_closest_start_right(125).unwrap(), 1250 | (200..250, vec![7]) 1251 | ); 1252 | assert_eq!( 1253 | n.find_closest_start_right(127).unwrap(), 1254 | (200..250, vec![7]) 1255 | ); 1256 | assert!(n.find_closest_start_right(121000).is_none()); 1257 | let n = IntervalSet::new(&[]).unwrap(); 1258 | assert!(n.find_closest_start_right(29).is_none()); 1259 | } 1260 | 1261 | #[test] 1262 | fn test_find_closest_start() { 1263 | let n = IntervalSet::new(&[]).unwrap(); 1264 | assert!(n.find_closest_start(100).is_none()); 1265 | let n = IntervalSet::new(&[100..110, 200..300]).unwrap(); 1266 | assert_eq!(n.find_closest_start(0).unwrap(), (100..110, vec![0])); 1267 | assert_eq!(n.find_closest_start(100).unwrap(), (100..110, vec![0])); 1268 | assert_eq!(n.find_closest_start(149).unwrap(), (100..110, vec![0])); 1269 | assert_eq!(n.find_closest_start(150).unwrap(), (100..110, vec![0])); 1270 | assert_eq!(n.find_closest_start(151).unwrap(), (200..300, vec![1])); 1271 | assert_eq!(n.find_closest_start(251).unwrap(), (200..300, vec![1])); 1272 | assert_eq!(n.find_closest_start(351).unwrap(), (200..300, vec![1])); 1273 | 1274 | let n = IntervalSet::new(&[10..11, 1000..1110]).unwrap(); 1275 | assert_eq!(n.find_closest_start(5).unwrap(), (10..11, vec![0])); 1276 | let n = IntervalSet::new(&[ 1277 | 566564..667063, 1278 | 569592..570304, 1279 | 713866..714288, 1280 | 935162..937142, 1281 | 1051311..1052403, 1282 | 1279151..1281233, 1283 | 1282803..1283631, 1284 | 1310387..1311060, 1285 | 1337193..1337881, 1286 | 1447089..1447626, 1287 | ]) 1288 | .unwrap(); 1289 | 1290 | assert_eq!( 1291 | n.find_closest_start(570000).unwrap(), 1292 | (569592..570304, vec![1]) 1293 | ); 1294 | } 1295 | 1296 | #[test] 1297 | fn test_covered_units() { 1298 | let n = IntervalSet::new(&[]).unwrap(); 1299 | assert_eq!(n.covered_units(), 0); 1300 | let n = IntervalSet::new(&[10..100]).unwrap(); 1301 | assert_eq!(n.covered_units(), 90); 1302 | let n = IntervalSet::new(&[10..100, 200..300]).unwrap(); 1303 | assert_eq!(n.covered_units(), 90 + 100); 1304 | let n = IntervalSet::new(&[10..100, 200..300, 15..99]).unwrap(); 1305 | assert_eq!(n.covered_units(), 90 + 100); 1306 | let n = IntervalSet::new(&[10..100, 200..300, 15..99, 15..105]).unwrap(); 1307 | assert_eq!(n.covered_units(), 90 + 100 + 5); 1308 | } 1309 | 1310 | #[test] 1311 | fn test_mean_interval_size() { 1312 | let n = IntervalSet::new(&[]).unwrap(); 1313 | assert!(n.mean_interval_size().is_nan()); 1314 | let n = IntervalSet::new(&[10..100]).unwrap(); 1315 | assert_eq!(n.mean_interval_size(), 90.); 1316 | let n = IntervalSet::new(&[10..100, 200..300]).unwrap(); 1317 | assert_eq!(n.mean_interval_size(), (90 + 100) as f64 / 2.0); 1318 | let n = IntervalSet::new(&[10..100, 200..300, 15..99]).unwrap(); 1319 | assert_eq!(n.mean_interval_size(), (90 + 100 + (99 - 15)) as f64 / 3.0); 1320 | let n = IntervalSet::new(&[10..100, 200..300, 15..99, 15..105]).unwrap(); 1321 | assert_eq!( 1322 | n.mean_interval_size(), 1323 | (((100 - 10) + (300 - 200) + (99 - 15) + (105 - 15)) as f64 / 4.0) 1324 | ); 1325 | } 1326 | 1327 | #[test] 1328 | fn test_invert() { 1329 | let n = IntervalSet::new(&[]).unwrap().invert(0, 100); 1330 | assert_eq!(n.intervals, vec![0..100,]); 1331 | assert_eq!(n.ids, vec![vec![0]]); 1332 | let n = IntervalSet::new(&[30..40]).unwrap().invert(0, 100); 1333 | assert_eq!(n.intervals, vec![0..30, 40..100,]); 1334 | assert_eq!(n.ids, vec![vec![0], vec![1]]); 1335 | let n = IntervalSet::new(&[30..40, 35..38]).unwrap().invert(0, 100); 1336 | assert_eq!(n.intervals, vec![0..30, 40..100,]); 1337 | assert_eq!(n.ids, vec![vec![0], vec![1]]); 1338 | let n = IntervalSet::new(&[30..40, 35..38, 35..50]) 1339 | .unwrap() 1340 | .invert(0, 100); 1341 | assert_eq!(n.intervals, vec![0..30, 50..100,]); 1342 | assert_eq!(n.ids, vec![vec![0], vec![1]]); 1343 | let n = IntervalSet::new(&[30..40, 35..38, 35..50]) 1344 | .unwrap() 1345 | .invert(40, 100); 1346 | assert_eq!(n.intervals, vec![50..100,]); 1347 | assert_eq!(n.ids, vec![vec![0]]); 1348 | let n = IntervalSet::new(&[30..40, 35..38, 35..50, 55..60]) 1349 | .unwrap() 1350 | .invert(40, 40); 1351 | assert_eq!(n.intervals, vec![50..55]); 1352 | assert_eq!(n.ids, vec![vec![0]]); 1353 | let n = IntervalSet::new(&[30..40, 35..38, 35..50]) 1354 | .unwrap() 1355 | .invert(40, 40); 1356 | 1357 | assert!(n.intervals.is_empty()); 1358 | assert!(n.ids.is_empty()); 1359 | 1360 | // this of course only works for distinct intervals 1361 | let n = IntervalSet::new(&[10..20, 35..38, 40..50]).unwrap(); 1362 | let ni = n.invert(0, 50); 1363 | let nii = ni.invert(0, 50); 1364 | assert_eq!(n.intervals, nii.intervals); 1365 | 1366 | let n = IntervalSet::new(&[10..36, 35..38, 40..50]).unwrap(); 1367 | let ni = n.invert(0, 100); 1368 | let n2 = n.union(&[&ni]).merge_connected(); 1369 | assert_eq!(n2.intervals, vec![0..100]); 1370 | 1371 | let n = IntervalSet::new(&[]).unwrap(); 1372 | let ni = n.invert(0, 100); 1373 | assert_eq!(ni.intervals, vec![0..100]); 1374 | } 1375 | 1376 | #[test] 1377 | fn test_union() { 1378 | let n = IntervalSet::new(&[]) 1379 | .unwrap() 1380 | .union(&[&IntervalSet::new(&[0..100]).unwrap()]); 1381 | assert_eq!(n.intervals, vec![0..100]); 1382 | 1383 | let n = IntervalSet::new(&[0..10]) 1384 | .unwrap() 1385 | .union(&[&IntervalSet::new(&[0..100]).unwrap()]); 1386 | assert_eq!(n.intervals, vec![0..100, 0..10]); 1387 | 1388 | let n = IntervalSet::new(&[0..10]) 1389 | .unwrap() 1390 | .union(&[&IntervalSet::new(&[0..100, 200..300]).unwrap()]); 1391 | assert_eq!(n.intervals, vec![0..100, 0..10, 200..300]); 1392 | assert_eq!(n.ids, vec![vec![0], vec![1], vec![2]]); 1393 | 1394 | let n = IntervalSet::new(&[0..10]) 1395 | .unwrap() 1396 | .union(&[&IntervalSet::new(&[]).unwrap()]); 1397 | assert_eq!(n.intervals, vec![0..10]); 1398 | let n = IntervalSet::new(&[0..10]).unwrap().union(&[ 1399 | &IntervalSet::new(&[0..100]).unwrap(), 1400 | &IntervalSet::new(&[200..300]).unwrap(), 1401 | ]); 1402 | assert_eq!(n.intervals, vec![0..100, 0..10, 200..300]); 1403 | assert_eq!(n.ids, vec![vec![0], vec![1], vec![2]]); 1404 | } 1405 | 1406 | #[test] 1407 | fn test_substract() { 1408 | let n = IntervalSet::new(&[]) 1409 | .unwrap() 1410 | .filter_to_non_overlapping(&IntervalSet::new(&[0..100]).unwrap()); 1411 | assert!(n.intervals.is_empty()); 1412 | 1413 | let n = IntervalSet::new(&[0..10]) 1414 | .unwrap() 1415 | .filter_to_non_overlapping(&IntervalSet::new(&[0..100]).unwrap()); 1416 | assert!(n.intervals.is_empty()); 1417 | 1418 | let n = IntervalSet::new(&[0..10, 100..150]) 1419 | .unwrap() 1420 | .filter_to_non_overlapping(&IntervalSet::new(&[0..100]).unwrap()); 1421 | assert_eq!(n.intervals, vec![100..150]); 1422 | 1423 | let n = IntervalSet::new(&[0..10, 100..150, 150..300]) 1424 | .unwrap() 1425 | .filter_to_non_overlapping(&IntervalSet::new(&[55..101]).unwrap()); 1426 | assert_eq!(n.intervals, vec![0..10, 150..300]); 1427 | assert_eq!(n.ids, vec![vec![0], vec![2]]); 1428 | 1429 | let n = IntervalSet::new(&[0..10, 5..6, 100..150, 150..300]) 1430 | .unwrap() 1431 | .filter_to_non_overlapping(&IntervalSet::new(&[55..101]).unwrap()); 1432 | assert_eq!(n.intervals, vec![0..10, 5..6, 150..300]); 1433 | assert_eq!(n.ids, vec![vec![0], vec![1], vec![3]]); 1434 | } 1435 | 1436 | #[test] 1437 | fn test_filter_overlapping_multiples() { 1438 | let n = IntervalSet::new(&[100..150, 30..40, 200..400, 250..300]).unwrap(); 1439 | let c = n.filter_to_overlapping_k_others(&[&IntervalSet::new(&[0..5, 0..105]).unwrap()], 1); 1440 | assert_eq!(c.intervals, vec![30..40, 100..150]); 1441 | let c = n.filter_to_overlapping_k_others(&[&IntervalSet::new(&[0..5, 0..105]).unwrap()], 0); 1442 | assert_eq!(c, n); 1443 | let c = n.filter_to_overlapping_k_others(&[&IntervalSet::new(&[0..5, 0..105]).unwrap()], 2); 1444 | assert!(c.is_empty()); 1445 | 1446 | let c = n.filter_to_overlapping_k_others( 1447 | &[ 1448 | &IntervalSet::new(&[0..35]).unwrap(), 1449 | &IntervalSet::new(&[0..160]).unwrap(), 1450 | ], 1451 | 2, 1452 | ); 1453 | assert_eq!(c.intervals, vec![30..40,]); 1454 | let c = n.filter_to_overlapping_k_others( 1455 | &[ 1456 | &IntervalSet::new(&[0..35]).unwrap(), 1457 | &IntervalSet::new(&[0..160]).unwrap(), 1458 | ], 1459 | 1, 1460 | ); 1461 | assert_eq!(c.intervals, vec![30..40, 100..150]); 1462 | } 1463 | 1464 | #[test] 1465 | fn test_filter_non_overlapping_multiples() { 1466 | let n = IntervalSet::new(&[100..150, 30..40, 200..400, 250..300]).unwrap(); 1467 | let c = 1468 | n.filter_to_non_overlapping_k_others(&[&IntervalSet::new(&[0..5, 0..105]).unwrap()], 1); 1469 | assert_eq!(c.intervals, vec![30..40, 100..150, 200..400, 250..300]); 1470 | let c = 1471 | n.filter_to_non_overlapping_k_others(&[&IntervalSet::new(&[0..5, 0..105]).unwrap()], 0); 1472 | assert_eq!(c.intervals, vec![200..400, 250..300]); 1473 | let c = 1474 | n.filter_to_non_overlapping_k_others(&[&IntervalSet::new(&[0..5, 0..105]).unwrap()], 2); 1475 | assert_eq!(c.intervals, vec![30..40, 100..150, 200..400, 250..300]); 1476 | 1477 | let c = n.filter_to_non_overlapping_k_others( 1478 | &[ 1479 | &IntervalSet::new(&[0..35]).unwrap(), 1480 | &IntervalSet::new(&[0..160]).unwrap(), 1481 | ], 1482 | 2, 1483 | ); 1484 | assert_eq!(c.intervals, vec![30..40, 100..150, 200..400, 250..300]); 1485 | let c = n.filter_to_non_overlapping_k_others( 1486 | &[ 1487 | &IntervalSet::new(&[0..35]).unwrap(), 1488 | &IntervalSet::new(&[0..160]).unwrap(), 1489 | ], 1490 | 1, 1491 | ); 1492 | assert_eq!(c.intervals, vec![100..150, 200..400, 250..300]); 1493 | } 1494 | 1495 | #[test] 1496 | fn test_split() { 1497 | let n = IntervalSet::new(&[0..100, 20..30]).unwrap(); 1498 | let c = n.merge_split(); 1499 | assert_eq!(c.intervals, [0..20, 20..30, 30..100]); 1500 | assert_eq!(c.ids, vec![vec![0], vec![0, 1,], vec![0]]); 1501 | 1502 | let n = IntervalSet::new(&[0..100, 0..90, 70..95, 110..150]).unwrap(); 1503 | let c = n.merge_split(); 1504 | assert_eq!(c.intervals, [0..70, 70..90, 90..95, 95..100, 110..150]); 1505 | assert_eq!( 1506 | c.ids, 1507 | vec![vec![0, 1], vec![0, 1, 2], vec![0, 2], vec![0], vec![3]] 1508 | ); 1509 | let n = 1510 | IntervalSet::new_with_ids(&[0..100, 0..90, 70..95, 110..150], &[100, 200, 300, 400]) 1511 | .unwrap(); 1512 | let c = n.merge_split(); 1513 | assert_eq!(c.intervals, [0..70, 70..90, 90..95, 95..100, 110..150]); 1514 | assert_eq!( 1515 | c.ids, 1516 | vec![ 1517 | vec![100, 200], 1518 | vec![100, 200, 300], 1519 | vec![100, 300], 1520 | vec![100], 1521 | vec![400] 1522 | ] 1523 | ); 1524 | let d = c.merge_split(); 1525 | assert_eq!(c, d); 1526 | 1527 | let n = IntervalSet::new_with_ids(&[0..100, 5..10, 15..20], &[100, 200, 300]).unwrap(); 1528 | let c = n.merge_split(); 1529 | assert_eq!(c.intervals, [0..5, 5..10, 10..15, 15..20, 20..100]); 1530 | assert_eq!( 1531 | c.ids, 1532 | vec![ 1533 | vec![100], 1534 | vec![100, 200], 1535 | vec![100], 1536 | vec![100, 300], 1537 | vec![100] 1538 | ] 1539 | ); 1540 | 1541 | let n = IntervalSet::new_with_ids(&[0..100, 5..50, 10..15, 25..75], &[100, 200, 300, 400]) 1542 | .unwrap(); 1543 | let c = n.merge_split(); 1544 | assert_eq!( 1545 | c.intervals, 1546 | [0..5, 5..10, 10..15, 15..25, 25..50, 50..75, 75..100] 1547 | ); 1548 | assert_eq!( 1549 | c.ids, 1550 | vec![ 1551 | vec![100], // 0..5 1552 | vec![100, 200], // 5..10 1553 | vec![100, 200, 300], //10..15 1554 | vec![100, 200], //15..25 1555 | vec![100, 200, 400], //25..50 1556 | vec![100, 400], //50..75 1557 | vec![100] //75..100 1558 | ] 1559 | ); 1560 | } 1561 | 1562 | #[test] 1563 | fn test_example() { 1564 | let intervals = vec![0..20, 15..30, 50..100]; 1565 | let interval_set = IntervalSet::new(&intervals).unwrap(); 1566 | assert_eq!(interval_set.ids, vec![vec![0], vec![1], vec![2]]); // automatic ids, use new_with_ids otherwise 1567 | let hits = interval_set.query_overlapping(&(10..16)); 1568 | assert_eq!(hits.intervals, [0..20, 15..30]); 1569 | let merged = hits.merge_hull(); 1570 | assert_eq!(merged.intervals, [0..30]); 1571 | assert_eq!(merged.ids, vec![vec![0, 1]]); 1572 | } 1573 | 1574 | #[test] 1575 | fn test_new_with_ids_sorting() { 1576 | let n = IntervalSet::new_with_ids(&[300..400, 30..40], &[20, 10]).unwrap(); 1577 | assert_eq!(n.intervals, [30..40, 300..400]); 1578 | assert_eq!(n.ids, vec![vec![10], vec![20]]); 1579 | } 1580 | 1581 | #[test] 1582 | fn test_merge_connected() { 1583 | let n = IntervalSet::new_with_ids(&[300..400, 400..450, 450..500], &[20, 10, 30]).unwrap(); 1584 | assert_eq!(n.merge_hull().intervals, vec![300..400, 400..450, 450..500]); 1585 | let n = n.merge_connected(); 1586 | assert_eq!(n.intervals, [300..500]); 1587 | assert_eq!(n.ids, vec![vec![10, 20, 30],]); 1588 | 1589 | let n = IntervalSet::new_with_ids(&[300..400, 400..450, 451..500], &[20, 10, 30]) 1590 | .unwrap() 1591 | .merge_connected(); 1592 | assert_eq!(n.intervals, [300..450, 451..500]); 1593 | assert_eq!(n.ids, vec![vec![10, 20], vec![30]]); 1594 | let n = 1595 | IntervalSet::new_with_ids(&[300..400, 400..450, 451..500, 350..355], &[20, 10, 30, 40]) 1596 | .unwrap() 1597 | .merge_connected(); 1598 | assert_eq!(n.intervals, [300..450, 451..500]); 1599 | assert_eq!(n.ids, vec![vec![10, 20, 40], vec![30]]); 1600 | let n = 1601 | IntervalSet::new_with_ids(&[300..400, 400..450, 450..500, 510..520], &[20, 10, 30, 40]) 1602 | .unwrap() 1603 | .merge_connected(); 1604 | assert_eq!(n.intervals, vec![300..500, 510..520]); 1605 | } 1606 | 1607 | #[test] 1608 | fn test_clone() { 1609 | let n = IntervalSet::new_with_ids(&[300..400, 400..450, 450..500], &[20, 10, 30]).unwrap(); 1610 | let n2 = n.clone(); 1611 | assert_eq!(n.intervals, n2.intervals); 1612 | assert_eq!(n.ids, n2.ids); 1613 | assert!(n2.root.borrow().is_none()); 1614 | n.has_overlap(&(0..1)).unwrap(); 1615 | assert!(n2.root.borrow().is_none()); 1616 | let n2 = n.clone(); 1617 | assert!(n2.root.borrow().is_some()); 1618 | } 1619 | 1620 | #[test] 1621 | fn test_split_repeated_ids() { 1622 | let intervals = vec![ 1623 | 10736170..10736283, 1624 | 10939387..10939423, 1625 | 10940596..10940707, 1626 | 10941690..10941780, 1627 | 10944966..10945053, 1628 | 10947303..10947418, 1629 | 10949211..10949269, 1630 | 10950048..10950174, 1631 | 10959066..10959136, 1632 | 10961282..10961338, 1633 | 10939423..10940596, 1634 | 10940707..10941690, 1635 | 10941780..10944966, 1636 | 10945053..10947303, 1637 | 10947418..10949211, 1638 | 10949269..10950048, 1639 | 10950174..10959066, 1640 | 10959136..10961282, 1641 | 11066417..11066515, 1642 | 11067984..11068174, 1643 | 11066515..11067984, 1644 | 11124336..11124379, 1645 | 11124507..11125705, 1646 | 11124379..11124507, 1647 | 11249808..11249959, 1648 | 12602465..12602527, 1649 | 12604669..12604739, 1650 | 12615408..12615534, 1651 | 12616313..12616371, 1652 | 12618170..12618289, 1653 | 12620837..12620938, 1654 | 12624241..12624331, 1655 | 12625316..12625428, 1656 | 12626606..12626642, 1657 | 12602527..12604669, 1658 | 12604739..12615408, 1659 | 12615534..12616313, 1660 | 12616371..12618170, 1661 | 12618289..12620837, 1662 | 12620938..12624241, 1663 | 12624331..12625316, 1664 | 12625428..12626606, 1665 | 15273854..15273961, 1666 | 15282556..15288670, 1667 | 15290717..15290836, 1668 | 15290994..15291024, 1669 | 15295331..15295410, 1670 | 15295729..15295832, 1671 | 15297156..15297196, 1672 | 15290836..15290994, 1673 | 15291024..15295331, 1674 | 15295410..15295729, 1675 | 15295832..15297156, 1676 | 15298377..15304556, 1677 | 15326036..15327756, 1678 | 15342359..15342426, 1679 | 15342944..15343065, 1680 | 15327756..15342359, 1681 | 15342426..15342944, 1682 | 15349466..15350043, 1683 | 15489989..15490131, 1684 | 15490871..15490947, 1685 | 15492485..15492564, 1686 | 15490131..15490871, 1687 | 15490947..15492485, 1688 | 15489989..15490131, 1689 | 15490871..15490947, 1690 | 15492485..15492564, 1691 | 15541435..15541607, 1692 | 15558445..15558626, 1693 | 15575611..15575786, 1694 | 15577966..15578006, 1695 | 15541607..15558445, 1696 | 15558626..15575611, 1697 | 15575786..15577966, 1698 | 15550903..15552887, 1699 | 15553210..15553586, 1700 | 15553690..15553838, 1701 | 15552887..15553210, 1702 | 15553586..15553690, 1703 | 15557576..15558591, 1704 | 15560155..15560264, 1705 | 15560524..15560694, 1706 | 15558591..15560155, 1707 | 15560264..15560524, 1708 | 15562016..15564276, 1709 | 15572088..15573265, 1710 | 15588360..15588478, 1711 | 15600907..15602514, 1712 | 15604051..15604133, 1713 | 15604841..15604882, 1714 | 15602514..15604051, 1715 | 15604133..15604841, 1716 | 15611758..15613096, 1717 | 15615401..15615578, 1718 | 15622600..15622712, 1719 | 15623986..15624071, 1720 | 15624538..15624674, 1721 | 15624955..15625094, 1722 | ]; 1723 | let ids = vec![ 1724 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10, 10, 5, 5, 9, 5, 5, 9, 6, 5, 1725 | 5, 5, 5, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9, 9, 9, 9, 5, 6, 5, 5, 5, 5, 5, 9, 9, 9, 9, 6, 6, 1726 | 6, 6, 10, 10, 5, 5, 5, 5, 9, 9, 5, 5, 5, 5, 5, 5, 5, 9, 9, 9, 6, 6, 6, 10, 10, 6, 6, 6, 1727 | 10, 10, 6, 6, 5, 6, 6, 6, 10, 10, 6, 6, 6, 6, 6, 6, 1728 | ]; 1729 | 1730 | let n = IntervalSet::new_with_ids(&intervals, &ids).unwrap(); 1731 | n.merge_split(); 1732 | } 1733 | 1734 | #[test] 1735 | fn test_merge_split_non_remains_overlapping() { 1736 | let intervals = vec![ 1737 | 15489989..15490131, 1738 | 15490871..15490947, 1739 | 15492485..15492564, 1740 | 15490131..15490871, 1741 | 15490947..15492485, 1742 | 15489989..15490131, 1743 | 15490871..15490947, 1744 | 15492485..15492564, 1745 | 15541435..15541607, 1746 | 15558445..15558626, 1747 | ]; 1748 | let ids = vec![5, 5, 5, 9, 9, 5, 5, 5, 5, 5]; 1749 | let n = IntervalSet::new_with_ids(&intervals, &ids) 1750 | .unwrap() 1751 | .merge_split(); 1752 | assert!(!n.any_overlapping()); 1753 | } 1754 | 1755 | #[test] 1756 | fn test_overlap_status() { 1757 | let intervals = vec![ 1758 | 0..100, 1759 | 50..150, 1760 | 150..200, 1761 | 201..230, 1762 | 230..250, 1763 | 249..500, 1764 | 550..600, 1765 | ]; 1766 | let n = IntervalSet::new(&intervals).unwrap(); 1767 | assert_eq!( 1768 | n.overlap_status(), 1769 | vec![true, true, false, false, true, true, false] 1770 | ); 1771 | } 1772 | 1773 | #[test] 1774 | fn test_has_overlap_u64() { 1775 | let r = vec![0..5, 10..15]; 1776 | let n = IntervalSetGeneric::::new(&r).unwrap(); 1777 | assert!(n.has_overlap(&(3..4)).unwrap()); 1778 | assert!(n.has_overlap(&(5..20)).unwrap()); 1779 | assert!(!n.has_overlap(&(6..10)).unwrap()); 1780 | assert!(!n.has_overlap(&(100..110)).unwrap()); 1781 | assert!(!n.has_overlap(&(3..3)).unwrap()); 1782 | } 1783 | 1784 | #[test] 1785 | fn test_has_overlap_i128() { 1786 | let r = vec![-50..-40, 10..15]; 1787 | let n = IntervalSetGeneric::::new(&r).unwrap(); 1788 | assert!(n.has_overlap(&(-45..46)).unwrap()); 1789 | assert!(n.has_overlap(&(-50..-49)).unwrap()); 1790 | assert!(!n.has_overlap(&(-39..-38)).unwrap()); 1791 | assert!(!n.has_overlap(&(-40..-39)).unwrap()); 1792 | assert!(n.has_overlap(&(5..20)).unwrap()); 1793 | assert!(!n.has_overlap(&(6..10)).unwrap()); 1794 | assert!(!n.has_overlap(&(100..110)).unwrap()); 1795 | assert!(!n.has_overlap(&(3..3)).unwrap()); 1796 | } 1797 | 1798 | #[test] 1799 | fn test_filter_overlapping_split_basic() { 1800 | let a = vec![0..100]; 1801 | let ids = [1]; 1802 | let iv_a = IntervalSet::new_with_ids(&a, &ids).unwrap(); 1803 | let b = vec![0..100]; 1804 | let other = IntervalSet::new(&b).unwrap(); 1805 | let c = iv_a.filter_to_overlapping_and_split(&other); 1806 | assert_eq!(c.intervals, vec![0..100]); 1807 | assert_eq!(c.ids, vec![vec![1]]); 1808 | } 1809 | 1810 | #[test] 1811 | fn test_filter_overlapping_split_basic2() { 1812 | let a = vec![0..100]; 1813 | let ids = [1]; 1814 | let iv_a = IntervalSet::new_with_ids(&a, &ids).unwrap(); 1815 | let b = vec![25..50]; 1816 | let other = IntervalSet::new(&b).unwrap(); 1817 | let c = iv_a.filter_to_overlapping_and_split(&other); 1818 | assert_eq!(c.intervals, vec![25..50]); 1819 | assert_eq!(c.ids, vec![vec![1]]); 1820 | } 1821 | #[test] 1822 | fn test_filter_overlapping_split() { 1823 | let a = vec![0..100, 200..300, 300..450, 490..510]; 1824 | let ids = [1, 2, 3, 4]; 1825 | let iv_a = IntervalSet::new_with_ids(&a, &ids).unwrap(); 1826 | let b = vec![0..50, 50..60, 75..99, 350..500]; 1827 | let other = IntervalSet::new(&b).unwrap(); 1828 | let c = iv_a.filter_to_overlapping_and_split(&other); 1829 | assert_eq!(c.intervals, vec![0..60, 75..99, 350..450, 490..500]); 1830 | assert_eq!( 1831 | c.ids, 1832 | vec![vec![1], vec![1], vec![3], vec![4]] 1833 | ); 1834 | } 1835 | 1836 | #[test] 1837 | fn test_filter_overlapping_split_2() { 1838 | let a = vec![0..100, 100..300, 300..450, 490..510]; 1839 | let ids = [1, 2, 3, 4]; 1840 | let iv_a = IntervalSet::new_with_ids(&a, &ids).unwrap(); 1841 | let b = vec![0..50, 50..60, 75..110, 350..500]; 1842 | let other = IntervalSet::new(&b).unwrap(); 1843 | let c = iv_a.filter_to_overlapping_and_split(&other); 1844 | assert_eq!(c.intervals, vec![0..60, 75..100, 100..110, 350..450, 490..500]); 1845 | assert_eq!( 1846 | c.ids, 1847 | vec![vec![1], vec![1], vec![2], vec![3], vec![4]] 1848 | ); 1849 | } 1850 | 1851 | #[test] 1852 | fn test_filter_overlapping_split_doc() { 1853 | let a = vec![10..20]; 1854 | let ids = [99]; 1855 | let iv_a = IntervalSet::new_with_ids(&a, &ids).unwrap(); 1856 | let b = vec![5..11, 15..17, 18..30]; 1857 | let other = IntervalSet::new(&b).unwrap(); 1858 | let c = iv_a.filter_to_overlapping_and_split(&other); 1859 | assert_eq!(c.intervals, vec![10..11, 15..17, 18..20]); 1860 | assert_eq!( 1861 | c.ids, 1862 | vec![vec![99], vec![99], vec![99]] 1863 | ); 1864 | } 1865 | #[test] 1866 | fn test_identical_starts () { 1867 | 1868 | let a = vec![ 1869 | (1.. 10), 1870 | (20.. 30), 1871 | (20.. 40), 1872 | (50.. 100), 1873 | ]; 1874 | let iv_a = IntervalSet::new(&a).unwrap(); 1875 | for range in a { 1876 | println!("{:?}", range); 1877 | println!("{}",iv_a.has_overlap(&(range.start..range.end)).unwrap()); 1878 | assert!(iv_a.has_overlap(&(range.start..range.end)).unwrap()); 1879 | } 1880 | 1881 | assert!(iv_a.query_overlapping(&(1.. 10)).intervals.len() == 1); 1882 | assert!(iv_a.query_overlapping(&(20.. 30)).intervals.len() == 2); 1883 | assert!(iv_a.query_overlapping(&(20.. 40)).intervals.len() == 2); 1884 | assert!(iv_a.query_overlapping(&(30.. 32)).intervals.len() == 1); 1885 | assert!(iv_a.query_overlapping(&(29.. 32)).intervals.len() == 2); 1886 | } 1887 | 1888 | #[test] 1889 | fn test_identical_intervals () { 1890 | let a = vec![ 1891 | (266854.. 266855), 1892 | (268816.. 268818), 1893 | (268816.. 268818), 1894 | (297502.. 297503), 1895 | ]; 1896 | let iv_a = IntervalSet::new(&a).unwrap(); 1897 | for range in a { 1898 | dbg!(&range); 1899 | assert!(iv_a.has_overlap(&(range.start..range.end)).unwrap()); 1900 | } 1901 | assert!(iv_a.query_overlapping(&(266854.. 266855)).intervals.len() == 1); 1902 | assert!(iv_a.query_overlapping(&(268816.. 268818)).intervals.len() == 2); 1903 | assert!(iv_a.query_overlapping(&(297502.. 297503)).intervals.len() == 1); 1904 | 1905 | } 1906 | 1907 | #[test] 1908 | fn test_nested () { 1909 | let a = vec![ 1910 | (10.. 30), 1911 | (15.. 25), 1912 | (25.. 28), 1913 | ]; 1914 | let iv_a = IntervalSet::new(&a).unwrap(); 1915 | for range in a { 1916 | assert!(iv_a.has_overlap(&(range.start..range.end)).unwrap()); 1917 | } 1918 | } 1919 | 1920 | 1921 | } 1922 | --------------------------------------------------------------------------------