├── .editorconfig
├── .gitattributes
├── .github
    └── workflows
    │   ├── build.yml
    │   ├── codecov.yml
    │   └── publish.yml
├── .gitignore
├── CHANGELOG.md
├── Cargo.toml
├── LICENSE
├── README.md
├── benchmarks
    ├── implementations.md
    ├── linkr.md
    ├── linkr
    │   ├── links.blast.tsv
    │   ├── links.lastz.tsv
    │   ├── run.sh
    │   └── sort.clean.tsv
    ├── musl.md
    ├── musl
    │   ├── .gitignore
    │   └── run.sh
    ├── rgr.md
    ├── spanr.md
    └── spanr
    │   ├── chr.sizes
    │   ├── dust.yml
    │   ├── paralog.yml
    │   ├── run.sh
    │   └── sep-gene.yml
├── doc
    ├── intspans.pdf
    ├── intspans.png
    ├── intspans.tex
    ├── ranges.pdf
    ├── ranges.png
    └── ranges.tex
├── examples
    ├── benchmark.rs
    ├── file.rs
    └── test.rs
├── release.toml
├── src
    ├── cmd_linkr
    │   ├── circos.rs
    │   ├── clean.rs
    │   ├── connect.rs
    │   ├── filter.rs
    │   ├── mod.rs
    │   └── sort.rs
    ├── cmd_rgr
    │   ├── count.rs
    │   ├── dedup.rs
    │   ├── field.rs
    │   ├── filter.rs
    │   ├── keep.rs
    │   ├── md.rs
    │   ├── merge.rs
    │   ├── mod.rs
    │   ├── pl_2rmp.rs
    │   ├── prop.rs
    │   ├── replace.rs
    │   ├── runlist.rs
    │   ├── select.rs
    │   ├── sort.rs
    │   └── span.rs
    ├── cmd_spanr
    │   ├── combine.rs
    │   ├── compare.rs
    │   ├── convert.rs
    │   ├── cover.rs
    │   ├── coverage.rs
    │   ├── genome.rs
    │   ├── gff.rs
    │   ├── merge.rs
    │   ├── mod.rs
    │   ├── some.rs
    │   ├── span.rs
    │   ├── split.rs
    │   ├── stat.rs
    │   └── statop.rs
    ├── lib.rs
    ├── libs
    │   ├── coverage.rs
    │   ├── intspan.rs
    │   ├── linalg.rs
    │   ├── matrix.rs
    │   ├── mod.rs
    │   └── range.rs
    ├── linkr.rs
    ├── rgr.rs
    ├── spanr.rs
    └── utils.rs
└── tests
    ├── Atha
        ├── chr.sizes
        ├── links.blast.tsv.gz
        └── links.lastz.tsv.gz
    ├── S288c
        ├── chr.sizes
        ├── links.blast.tsv
        └── links.lastz.tsv
    ├── cli_linkr.rs
    ├── cli_rgr.rs
    ├── cli_rgr_tsv.rs
    ├── cli_spanr.rs
    ├── fasr
        ├── NC_000932.fa
        └── NC_000932.fa.fai
    ├── linkr
        ├── II.clean.tsv
        ├── II.connect.tsv
        ├── II.links.tsv
        ├── II.merge.tsv
        └── II.sort.tsv
    ├── rgr
        ├── 1_4.ovlp.tsv
        ├── 1_4.replace.tsv
        ├── II.links.tsv
        ├── S288c.rg
        ├── ctg.range.tsv
        ├── ctg.tsv
        ├── ctg_2_1_.gc.tsv
        ├── intergenic.json
        ├── intergenic.yml
        ├── ranges.tsv.gz
        └── tn.tsv
    └── spanr
        ├── Atha.chr.sizes
        ├── Atha.json
        ├── Atha.list
        ├── I.II.json
        ├── I.json
        ├── II.json
        ├── II.other.json
        ├── NC_007942.gff
        ├── NC_007942.rm.gff
        ├── S288c.chr.sizes
        ├── S288c.rg
        ├── brca2.json
        ├── dazzname.rg
        ├── intergenic.json
        ├── paralog.json
        └── repeat.json


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # EditorConfig is awesome: http://EditorConfig.org
 2 | 
 3 | # top-most EditorConfig file
 4 | root = true
 5 | 
 6 | [*]
 7 | charset = utf-8
 8 | indent_style = space
 9 | indent_size = 4
10 | end_of_line = lf
11 | trim_trailing_whitespace = true
12 | insert_final_newline = true
13 | 
14 | [*.json]
15 | indent_size = 2
16 | 
17 | [*.yml]
18 | indent_size = 2
19 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Detect text files automatically
2 | * text=auto
3 | 
4 | # Force Unix-style line endings on these files
5 | * eol=lf
6 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     name: Build ${{ matrix.rust }} on ${{ matrix.os }}
 8 |     runs-on: ${{ matrix.os }}
 9 |     strategy:
10 |       matrix:
11 |         os: [ubuntu-latest, windows-latest, macOS-latest]
12 |         rust: [stable, nightly]
13 | 
14 |     steps:
15 |       - uses: hecrj/setup-rust-action@v1
16 |         with:
17 |           rust-version: ${{ matrix.rust }}
18 |       - uses: actions/checkout@v4
19 |         with:
20 |           persist-credentials: false
21 |       - name: Build
22 |         run: cargo build --verbose
23 |       - name: Run tests
24 |         run: cargo test --verbose
25 | 


--------------------------------------------------------------------------------
/.github/workflows/codecov.yml:
--------------------------------------------------------------------------------
 1 | name: Code Coverage
 2 | 
 3 | on: [push]
 4 | 
 5 | env:
 6 |   CARGO_TERM_COLOR: always
 7 | 
 8 | jobs:
 9 |   check:
10 |     name: codecov
11 |     runs-on: ubuntu-latest
12 |     container:
13 |       image: xd009642/tarpaulin:develop-nightly
14 |       options: --security-opt seccomp=unconfined
15 |     steps:
16 |       - uses: actions/checkout@v4
17 |         with:
18 |           persist-credentials: false
19 | 
20 |       - name: Generate code coverage
21 |         run: |
22 |           cargo +nightly tarpaulin --bins --tests --doc --follow-exec --engine llvm --out xml -- --test-threads 1
23 | 
24 |       - name: Upload to codecov.io
25 |         uses: codecov/codecov-action@v4
26 |         with:
27 |           fail_ci_if_error: true
28 |           token: ${{secrets.CODECOV_TOKEN}}
29 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
  1 | name: Publish
  2 | 
  3 | on:
  4 |   push:
  5 |     tags:
  6 |       - '*'
  7 | 
  8 | # We need this to be able to create releases.
  9 | permissions:
 10 |   contents: write
 11 | 
 12 | jobs:
 13 |   linux-gnu:
 14 |     runs-on: ubuntu-latest
 15 |     steps:
 16 |       - uses: hecrj/setup-rust-action@v1
 17 |         with:
 18 |           rust-version: stable
 19 |           targets: x86_64-unknown-linux-gnu
 20 |       - uses: goto-bus-stop/setup-zig@v2
 21 |       - name: Install cargo-zigbuild
 22 |         run: cargo install cargo-zigbuild
 23 |       - uses: actions/checkout@v4
 24 |         with:
 25 |           persist-credentials: false
 26 |       - name: Build
 27 |         run: cargo zigbuild --verbose --release --target x86_64-unknown-linux-gnu.2.17
 28 |       - name: Create tarball
 29 |         run: tar cvfz intspan.tar.gz --transform 's|.*/||' target/x86_64-unknown-linux-gnu/release/spanr target/x86_64-unknown-linux-gnu/release/rgr target/x86_64-unknown-linux-gnu/release/linkr
 30 |         shell: bash
 31 |       - name: Upload binaries to releases
 32 |         uses: svenstaro/upload-release-action@v2
 33 |         with:
 34 |           repo_token: ${{ secrets.GITHUB_TOKEN }}
 35 |           file: intspan.tar.gz
 36 |           asset_name: intspan-x86_64-unknown-linux-gnu.tar.gz
 37 |           tag: ${{ github.ref }}
 38 |           overwrite: true
 39 |   linux-musl:
 40 |     runs-on: ubuntu-latest
 41 |     steps:
 42 |       - uses: hecrj/setup-rust-action@v1
 43 |         with:
 44 |           rust-version: stable
 45 |           targets: x86_64-unknown-linux-musl
 46 |       - uses: actions/checkout@v4
 47 |         with:
 48 |           persist-credentials: false
 49 |       - name: Build
 50 |         run: cargo build --verbose --release --target x86_64-unknown-linux-musl
 51 |       - name: Create tarball
 52 |         run: tar cvfz intspan.tar.gz --transform 's|.*/||' target/x86_64-unknown-linux-musl/release/spanr target/x86_64-unknown-linux-musl/release/rgr target/x86_64-unknown-linux-musl/release/linkr
 53 |         shell: bash
 54 |       - name: Upload binaries to releases
 55 |         uses: svenstaro/upload-release-action@v2
 56 |         with:
 57 |           repo_token: ${{ secrets.GITHUB_TOKEN }}
 58 |           file: intspan.tar.gz
 59 |           asset_name: intspan-x86_64-unknown-linux-musl.tar.gz
 60 |           tag: ${{ github.ref }}
 61 |           overwrite: true
 62 |   macos:
 63 |     runs-on: macOS-latest
 64 |     steps:
 65 |       - uses: hecrj/setup-rust-action@v1
 66 |         with:
 67 |           rust-version: stable
 68 |       - uses: actions/checkout@v4
 69 |         with:
 70 |           persist-credentials: false
 71 |       - name: Build
 72 |         env:
 73 |           MACOSX_DEPLOYMENT_TARGET: 10.12
 74 |         run: cargo build --verbose --release
 75 |       - name: Create tarball
 76 |         run: |
 77 |           cd target/release
 78 |           tar cvfz ../../intspan.tar.gz spanr rgr linkr
 79 |         shell: bash
 80 |       - name: Upload binaries to releases
 81 |         uses: svenstaro/upload-release-action@v2
 82 |         with:
 83 |           repo_token: ${{ secrets.GITHUB_TOKEN }}
 84 |           file: intspan.tar.gz
 85 |           asset_name: intspan-x86_64-apple-darwin.tar.gz
 86 |           tag: ${{ github.ref }}
 87 |           overwrite: true
 88 |   windows:
 89 |     runs-on: windows-latest
 90 |     steps:
 91 |       - uses: hecrj/setup-rust-action@v1
 92 |         with:
 93 |           rust-version: stable
 94 |       - uses: actions/checkout@v4
 95 |         with:
 96 |           persist-credentials: false
 97 |       - name: Enable static CRT linkage
 98 |         run: |
 99 |           mkdir .cargo
100 |           echo '[target.x86_64-pc-windows-msvc]' >> .cargo/config
101 |           echo 'rustflags = ["-Ctarget-feature=+crt-static"]' >> .cargo/config
102 |       - name: Build
103 |         run: cargo build --verbose --release
104 |       - name: Create zip
105 |         run: Compress-Archive -DestinationPath ./intspan.zip -Path ./target/release/spanr.exe,./target/release/rgr.exe,./target/release/linkr.exe
106 |         shell: powershell
107 |       - name: Upload binaries to releases
108 |         uses: svenstaro/upload-release-action@v2
109 |         with:
110 |           repo_token: ${{ secrets.GITHUB_TOKEN }}
111 |           file: intspan.zip
112 |           asset_name: intspan-x86_64-pc-windows-msvc.zip
113 |           tag: ${{ github.ref }}
114 |           overwrite: true
115 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | #----------------------------#
  2 | # IDE
  3 | #----------------------------#
  4 | .idea
  5 | 
  6 | #----------------------------#
  7 | # macOS
  8 | #----------------------------#
  9 | 
 10 | # General
 11 | .DS_Store
 12 | .AppleDouble
 13 | .LSOverride
 14 | 
 15 | # Icon must end with two \r
 16 | Icon
 17 | 
 18 | # Thumbnails
 19 | ._*
 20 | 
 21 | # Files that might appear in the root of a volume
 22 | .DocumentRevisions-V100
 23 | .fseventsd
 24 | .Spotlight-V100
 25 | .TemporaryItems
 26 | .Trashes
 27 | .VolumeIcon.icns
 28 | .com.apple.timemachine.donotpresent
 29 | 
 30 | # Directories potentially created on remote AFP share
 31 | .AppleDB
 32 | .AppleDesktop
 33 | Network Trash Folder
 34 | Temporary Items
 35 | .apdisk
 36 | 
 37 | #----------------------------#
 38 | # Linux
 39 | #----------------------------#
 40 | 
 41 | *~
 42 | 
 43 | # temporary files which can be created if a process still has a handle open of a deleted file
 44 | .fuse_hidden*
 45 | 
 46 | # KDE directory preferences
 47 | .directory
 48 | 
 49 | # Linux trash folder which might appear on any partition or disk
 50 | .Trash-*
 51 | 
 52 | # .nfs files are created when an open file is removed but is still being accessed
 53 | .nfs*
 54 | 
 55 | #----------------------------#
 56 | # Windows
 57 | #----------------------------#
 58 | 
 59 | # Windows thumbnail cache files
 60 | Thumbs.db
 61 | ehthumbs.db
 62 | ehthumbs_vista.db
 63 | 
 64 | # Dump file
 65 | *.stackdump
 66 | 
 67 | # Folder config file
 68 | [Dd]esktop.ini
 69 | 
 70 | # Recycle Bin used on file shares
 71 | $RECYCLE.BIN/
 72 | 
 73 | # Windows Installer files
 74 | *.cab
 75 | *.msi
 76 | *.msix
 77 | *.msm
 78 | *.msp
 79 | 
 80 | # Windows shortcuts
 81 | *.lnk
 82 | 
 83 | #----------------------------#
 84 | # LaTeX
 85 | #----------------------------#
 86 | 
 87 | # LaTeX tmp
 88 | *.toc
 89 | *.aux
 90 | *.fdb_latexmk
 91 | *.fls
 92 | *.nav
 93 | *.out
 94 | *.snm
 95 | *.vrb
 96 | *(busy)
 97 | *.synctex.gz
 98 | 
 99 | # bibtex tmp
100 | *.bbl
101 | *.bcf
102 | *.blg
103 | *-blx.bib
104 | *.run.xml
105 | 
106 | # nomencl tmp
107 | *.ilg
108 | *.nlo
109 | *.nls
110 | 
111 | # latexindent backup
112 | *.bak*
113 | indent.log
114 | 
115 | #----------------------------#
116 | # This project
117 | #----------------------------#
118 | 
119 | # Generated by Cargo
120 | # will have compiled files and executables
121 | /target/
122 | 
123 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
124 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
125 | Cargo.lock
126 | 
127 | # These are backup files generated by rustfmt
128 | **/*.rs.bk
129 | 
130 | *.tmp
131 | tests/S288c/
132 | tests/Atha/
133 | *.sizes
134 | *.xlsx
135 | PL-*/
136 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Change Log
  2 | 
  3 | ## Unreleased - ReleaseDate
  4 | 
  5 | ## 0.8.7 - 2025-04-06
  6 | 
  7 | * Add linear algebra functions
  8 | * Add matrix operations
  9 |     * Add `ScoringMatrix` for pairwise scores with missing values
 10 |     * Add `NamedMatrix` for complete distance matrices in PHYLIP format
 11 |     * Support PHYLIP format input
 12 | * Remove AppVeyor CI
 13 | 
 14 | ## 0.8.6 - 2025-04-02
 15 | 
 16 | * Add `--ff-eq` and `--ff-ne` to `rgr filter`
 17 | * Improve code quality
 18 |     * Use `into_iter()` and `collect()` to simplify code
 19 |     * Refactor `IntSpan::to_vec()`, `spans()`, `ranges()`, `runs()` and `intses()`
 20 | * Improve CI/CD
 21 |     * Use `cargo-zigbuild` to build linux-gnu binary with GLIBC 2.17
 22 |     * Simplify tar archives by removing path prefixes
 23 | 
 24 | ## 0.8.4 - 2024-12-30
 25 | 
 26 | * Add `--lines` and `--delete` to `rgr keep`
 27 | 
 28 | * Refactor the code in `rgr` to make it cleaner and more maintainable
 29 | 
 30 | ## 0.8.3 - 2024-12-29
 31 | 
 32 | * Add `rgr span`
 33 | * Add `rgr keep`
 34 | 
 35 | * Add numeric comparisons to `rgr filter`
 36 | * Add operations to `Range`
 37 | * Remove --fields from `rgr field`
 38 | 
 39 | ## 0.8.2 - 2024-12-21
 40 | 
 41 | * Add `IntSpan::valid()`
 42 | 
 43 | * Add `rgr filter`
 44 | * Add `rgr select`
 45 | 
 46 | ## 0.8.0 - 2024-11-30
 47 | 
 48 | * Move `fasr` to `hnsm`
 49 | 
 50 | * Add `--longest` to `spanr convert`
 51 | * Format Markdown tables in `rgr md`
 52 | 
 53 | ## 0.7.9 - 2024-11-15
 54 | 
 55 | * Add `rgr dedup`
 56 | 
 57 | * Add --fmt to `rgr md`
 58 | 
 59 | ## 0.7.8 - 2024-11-04
 60 | 
 61 | * Add `rgr md`
 62 | 
 63 | ## 0.7.7 - 2024-07-19
 64 | 
 65 | * Move `ovlpr` to `anchr`
 66 | 
 67 | * Add `rgr pl-2rmp`
 68 | * Add `--group` to `rgr sort`
 69 | * Use `MultiGzDecoder` in intspan::reader()
 70 | 
 71 | ## 0.7.3 - 2023-07-18
 72 | 
 73 | * Add `utils::get_seq_faidx()`
 74 | * Add `IntSpan.find_islands_n()` and `IntSpan.find_islands_ints()`
 75 | 
 76 | * Use json to replace yaml in `spanr`
 77 | 
 78 | * Bump versions of deps
 79 |     * clap v4
 80 |     * Use anyhow
 81 | 
 82 | ## 0.7.1 - 2022-06-14
 83 | 
 84 | * Store `IntSpan.edges` in VecDeque
 85 | * Switch to `clap` v3.2
 86 | 
 87 | ## 0.7.0 - 2022-05-23
 88 | 
 89 | * Add `rgr sort`
 90 | * Add `rgr prop`
 91 | 
 92 | * Add --fields to `rgr field`
 93 | * Add --header, --sharp, --field to `rgr count` and `rgr runlist`
 94 | 
 95 | ## 0.6.9 - 2022-05-15
 96 | 
 97 | * Add `rgr field`
 98 | 
 99 | * Move `spanr range` to `rgr runlist`
100 | * Move `spanr count` to `rgr count`
101 | 
102 | * Rename .ranges to .rg
103 | 
104 | ## 0.6.8 - 2022-05-14
105 | 
106 | * New binary `rgr`
107 | 
108 | * Move `ovlpr replace` to `rgr replace`
109 | * Move `linkr merge` to `rgr merge`
110 | 
111 | ## 0.6.7 - 2022-04-24
112 | 
113 | * Add `spanr count`
114 | * Add `--detailed` to `spanr coverage`
115 | * Use `Box<dyn std::error::Error>`
116 | 
117 | ## 0.6.5 - 2022-04-22
118 | 
119 | * Use rust_lapper as an intermediate layer instead of intspan::Coverage
120 |     * Greatly improves the speed of `spanr coverage`
121 | 
122 | ## 0.6.4 - 2022-04-21
123 | 
124 | * Move `far` out
125 | * `spanr stat` use i64 in the `all` lines
126 | * Update Github actions
127 |     * Use a container with GLIBC 2.17 to build linux-gnu binary
128 |     * Codecov with cargo-tarpaulin
129 | 
130 | ## 0.6.0 - 2022-02-22
131 | 
132 | * Move `nwr` out
133 | 
134 | ## 0.4.16 - 2022-02-12
135 | 
136 | * Switch to `clap` v3
137 | 
138 | ## 0.4.15 - 2021-08-19
139 | 
140 | * Add `far some`
141 | * Add `slice()` to `IntSpan`
142 | * `ovlpr replace` now processes any .tsv files
143 | 
144 | ## 0.4.14 - 2020-05-15
145 | 
146 | ## 0.4.13 - 2020-05-15
147 | 
148 | * New binary `far`
149 | 
150 | ## 0.4.12 - 2020-03-05
151 | 
152 | * Split `spanr cover` into `cover` and `coverage`
153 | 
154 | ## 0.4.11 - 2020-02-15
155 | 
156 | * Add `--all` to `spanr merge`
157 | 
158 | ## 0.4.10 - 2020-02-15
159 | 
160 | * Add `--op` to `spanr combine`
161 | 
162 | ## 0.4.9 - 2019-12-09
163 | 
164 | * Add `ovlpr replace`
165 | * Add `ovlpr restrict`
166 | 
167 | * Github Actions publish.yml
168 | 
169 | ## 0.4.1 - 2019-09-10
170 | 
171 | * Add benchmarks.md
172 | * Add `ovlpr paf2ovlp`
173 | 
174 | * Binary releases by Github Actions
175 | 
176 | ## 0.4.0 - 2019-09-07
177 | 
178 | * New binary `ovlpr`
179 | * Struct `Overlap`
180 | 
181 | * Move libraries to libs/
182 | * Passing `&str` when calling methods
183 | * Add `new_len()` and `uniq_tiers()` to `Coverage`
184 | * Add `from_pair()` to `IntSpan`
185 | * Wrap IO functions in utils.rs with Result
186 | * Satisfy clippy
187 | 
188 | ## 0.3.3 - 2019-09-04
189 | 
190 | * `spanr merge`: take the first part of filename
191 | * `spanr compare`: compare more than two infiles
192 | 
193 | ## 0.3.2 - 2019-09-03
194 | 
195 | * Add `--suffix` to `spanr split`
196 | 
197 | ## 0.3.1 - 2019-09-03
198 | 
199 | * Detailed benchmarks on `linkr`
200 | 
201 | * Make POS_INF, NEG_INF and EMPTY_STRING as lazy_static
202 | * About 10-20% faster
203 | 
204 | ## 0.3.0 - 2019-09-03
205 | 
206 | * New binary `linkr` for commands ported from `App::Rangeops` and `jrange`
207 | * Illustrations for some concepts
208 |     * IntSpans
209 |     * Ranges
210 | 
211 | * Rename binary `intspan` to `spanr`
212 | 
213 | ## 0.2.0 - 2019-08-24
214 | 
215 | * Ported all commands from `App::RL` and `jrunlist`
216 | * Struct `Range`
217 | * Struct `Coverage`
218 | * Adopt `cargo release`
219 | 
220 | ## 0.1.0 - 2019-08-13
221 | 
222 | * Struct `IntSpan`
223 | * Examples
224 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "intspan"
 3 | version = "0.8.7"
 4 | authors = ["wang-q <wang-q@outlook.com>"]
 5 | description = "Command line tools for IntSpan related bioinformatics operations"
 6 | documentation = "https://github.com/wang-q/intspan"
 7 | homepage = "https://github.com/wang-q/intspan"
 8 | repository = "https://github.com/wang-q/intspan"
 9 | readme = "README.md"
10 | categories = ["command-line-utilities", "science"]
11 | license = "MIT"
12 | edition = "2018"
13 | 
14 | [lib]
15 | name = "intspan"
16 | 
17 | [[bin]]
18 | name = "spanr"
19 | path = "src/spanr.rs"
20 | 
21 | [[bin]]
22 | name = "linkr"
23 | path = "src/linkr.rs"
24 | 
25 | [[bin]]
26 | name = "rgr"
27 | path = "src/rgr.rs"
28 | 
29 | [[example]]
30 | name = "test"
31 | 
32 | [[example]]
33 | name = "benchmark"
34 | 
35 | [[example]]
36 | name = "file"
37 | 
38 | [dependencies]
39 | clap = { version = "4.3.12", features = ["cargo"] }
40 | serde = "1.0.171"
41 | serde_json = "1.0.103"
42 | anyhow = "1.0.72"
43 | regex = "1.9.1"
44 | lazy_static = "1.4.0"
45 | flate2 = "1.0.26"
46 | itertools = "0.11.0"
47 | 
48 | petgraph = "0.6.3"
49 | indexmap = "2.0.0"
50 | rust-lapper = "1.1.0"
51 | bio = "0.30.1"
52 | crossbeam = "0.8.2"
53 | rust_xlsxwriter = "0.43.0"
54 | 
55 | csv = "1.3.1"
56 | xxhash-rust = { version = "0.8.15", features = ["xxh3"] }
57 | markdown-table-formatter = "0.3.0"
58 | 
59 | which = "4.4.0"
60 | cmd_lib = "1.9.4"
61 | tempfile = "3.6.0"
62 | path-clean = "1.0.1"
63 | 
64 | [build-dependencies]
65 | 
66 | [dev-dependencies]
67 | assert_cmd = "2.0.12"
68 | predicates = "3.0.3"
69 | 
70 | [profile.release]
71 | lto = true
72 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Qiang Wang
 4 | 
 5 | Permission is hereby granted, free of charge, to any
 6 | person obtaining a copy of this software and associated
 7 | documentation files (the "Software"), to deal in the
 8 | Software without restriction, including without
 9 | limitation the rights to use, copy, modify, merge,
10 | publish, distribute, sublicense, and/or sell copies of
11 | the Software, and to permit persons to whom the Software
12 | is furnished to do so, subject to the following
13 | conditions:
14 | 
15 | The above copyright notice and this permission notice
16 | shall be included in all copies or substantial portions
17 | of the Software.
18 | 
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
20 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
21 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
22 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
23 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
26 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 | DEALINGS IN THE SOFTWARE.
28 | 


--------------------------------------------------------------------------------
/benchmarks/linkr/run.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | #----------------------------#
  4 | # Colors in term
  5 | #----------------------------#
  6 | # http://stackoverflow.com/questions/5947742/how-to-change-the-output-color-of-echo-in-linux
  7 | GREEN=
  8 | RED=
  9 | NC=
 10 | if tty -s < /dev/fd/1 2> /dev/null; then
 11 |     GREEN='\033[0;32m'
 12 |     RED='\033[0;31m'
 13 |     NC='\033[0m' # No Color
 14 | fi
 15 | 
 16 | log_warn () {
 17 |     echo >&2 -e "${RED}==> $@ <==${NC}"
 18 | }
 19 | 
 20 | log_info () {
 21 |     echo >&2 -e "${GREEN}==> $@${NC}"
 22 | }
 23 | 
 24 | log_debug () {
 25 |     echo >&2 -e "==> $@"
 26 | }
 27 | 
 28 | #----------------------------#
 29 | # Prepare
 30 | #----------------------------#
 31 | COMMAND_TIME="command time -v"
 32 | if [[ `uname` == 'Darwin' ]]; then
 33 |     COMMAND_TIME="command time -l"
 34 | fi
 35 | 
 36 | # enter BASE_DIR
 37 | BASE_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
 38 | cd ${BASE_DIR}
 39 | 
 40 | #----------------------------#
 41 | # Run
 42 | #----------------------------#
 43 | log_warn "merge"
 44 | log_info "jrange merge lastz blast"
 45 | ${COMMAND_TIME} jrange \
 46 |     merge \
 47 |     -o stdout -c 0.95 \
 48 |     links.lastz.tsv \
 49 |     links.blast.tsv |
 50 |     sort \
 51 |     > jmerge.tsv.tmp
 52 | 
 53 | log_info "rgr merge lastz blast"
 54 | ${COMMAND_TIME} rgr \
 55 |     merge \
 56 |     -o stdout -c 0.95 \
 57 |     links.lastz.tsv \
 58 |     links.blast.tsv |
 59 |     sort \
 60 |     > rmerge.tsv.tmp
 61 | 
 62 | log_info "rangeops merge lastz blast"
 63 | ${COMMAND_TIME} rangeops \
 64 |     merge \
 65 |     -o stdout -c 0.95 -p 8 \
 66 |     links.lastz.tsv \
 67 |     links.blast.tsv |
 68 |     sort \
 69 |     > pmerge.tsv.tmp
 70 | echo >&2
 71 | 
 72 | log_warn "clean"
 73 | log_info "jrange clean sort.clean"
 74 | ${COMMAND_TIME} jrange \
 75 |     clean \
 76 |     -o stdout \
 77 |     sort.clean.tsv \
 78 |     > jclean.tsv.tmp
 79 | 
 80 | log_info "linkr clean sort.clean"
 81 | ${COMMAND_TIME} linkr \
 82 |     clean \
 83 |     -o stdout \
 84 |     sort.clean.tsv \
 85 |     > rclean.tsv.tmp
 86 | 
 87 | log_info "rangeops clean sort.clean"
 88 | ${COMMAND_TIME} rangeops \
 89 |     clean \
 90 |     -o stdout \
 91 |     sort.clean.tsv \
 92 |     > pclean.tsv.tmp
 93 | echo >&2
 94 | 
 95 | log_warn "clean bundle"
 96 | log_info "jrange clean bundle sort.clean"
 97 | ${COMMAND_TIME} jrange \
 98 |     clean \
 99 |     -o stdout \
100 |     --bundle 500 \
101 |     sort.clean.tsv \
102 |     > jbundle.tsv.tmp
103 | 
104 | log_info "linkr clean bundle sort.clean"
105 | ${COMMAND_TIME} linkr \
106 |     clean \
107 |     -o stdout \
108 |     --bundle 500 \
109 |     sort.clean.tsv \
110 |     > rbundle.tsv.tmp
111 | 
112 | log_info "rangeops clean bundle sort.clean"
113 | ${COMMAND_TIME} rangeops \
114 |     clean \
115 |     -o stdout \
116 |     --bundle 500 \
117 |     sort.clean.tsv \
118 |     > pbundle.tsv.tmp
119 | echo >&2
120 | 


--------------------------------------------------------------------------------
/benchmarks/musl.md:
--------------------------------------------------------------------------------
  1 | # `gcc` vs `musl`
  2 | 
  3 | * Ubuntu 14.04 E5-2690 v3
  4 |     * rustc 1.40
  5 |     * gcc with lto
  6 |     * musl with lto
  7 | * Ryzen 7 5800 Windows 11 WSL
  8 |     * rustc 1.60.0
  9 | * i7-12700T Windows 11 WSL
 10 |     * rustc 1.82.0
 11 | 
 12 | ## `bash benchmarks/musl/run.sh`
 13 | 
 14 | ```shell
 15 | # cargo install --path ~/Scripts/rust/intspan --force
 16 | 
 17 | bash ~/Scripts/intspan/benchmarks/musl/run.sh
 18 | 
 19 | find ~/Scripts/intspan/benchmarks/musl/* |
 20 |     grep -v "run.sh" |
 21 |     grep -v ".gitignore" |
 22 |     xargs rm -fr
 23 | 
 24 | ```
 25 | 
 26 | ## Results
 27 | 
 28 | * Above - E5-2690 v3
 29 | * Mid - Ryzen 7 5800
 30 | * Below - i5-12500H
 31 | 
 32 | * sort
 33 | 
 34 | | Command |    Mean [ms] | Min [ms] | Max [ms] | Relative |
 35 | |:--------|-------------:|---------:|---------:|---------:|
 36 | | cargo   | 127.9 ± 22.7 |     97.3 |    147.7 |      1.2 |
 37 | | gcc     | 107.9 ± 21.6 |     92.2 |    142.5 |      1.0 |
 38 | | musl    | 132.9 ± 23.2 |    102.2 |    151.4 |      1.2 |
 39 | 
 40 | | Command |  Mean [ms] | Min [ms] | Max [ms] |    Relative |
 41 | |:--------|-----------:|---------:|---------:|------------:|
 42 | | `cargo` | 36.6 ± 0.5 |     35.8 |     38.0 |        1.00 |
 43 | | `gcc`   | 68.0 ± 1.3 |     63.3 |     71.4 | 1.86 ± 0.04 |
 44 | | `musl`  | 74.6 ± 1.9 |     70.1 |     77.6 | 2.04 ± 0.06 |
 45 | 
 46 | | Command  |    Mean [ms] | Min [ms] | Max [ms] |    Relative |
 47 | |:---------|-------------:|---------:|---------:|------------:|
 48 | | `cargo`  |   43.0 ± 1.7 |     39.9 |     47.8 |        1.00 |
 49 | | `gcc`    | 137.9 ± 21.0 |    116.4 |    171.2 | 3.21 ± 0.51 |
 50 | | `musl`   | 180.8 ± 20.1 |    139.7 |    204.2 | 4.21 ± 0.50 |
 51 | | `zig cc` | 138.8 ± 23.9 |    109.6 |    174.3 | 3.23 ± 0.57 |
 52 | 
 53 | * clean
 54 | 
 55 | | Command |      Mean [s] | Min [s] | Max [s] | Relative |
 56 | |:--------|--------------:|--------:|--------:|---------:|
 57 | | cargo   | 4.266 ± 0.075 |   4.224 |   4.478 |      1.0 |
 58 | | gcc     | 6.090 ± 2.789 |   3.824 |   9.361 |      1.4 |
 59 | | musl    | 7.869 ± 3.102 |   4.839 |  11.355 |      1.8 |
 60 | 
 61 | | Command |      Mean [s] | Min [s] | Max [s] |    Relative |
 62 | |:--------|--------------:|--------:|--------:|------------:|
 63 | | `cargo` | 1.446 ± 0.344 |   1.279 |   2.351 | 1.06 ± 0.25 |
 64 | | `gcc`   | 1.361 ± 0.029 |   1.334 |   1.420 |        1.00 |
 65 | | `musl`  | 2.624 ± 0.029 |   2.586 |   2.697 | 1.93 ± 0.05 |
 66 | 
 67 | | Command  |      Mean [s] | Min [s] | Max [s] |    Relative |
 68 | |:---------|--------------:|--------:|--------:|------------:|
 69 | | `cargo`  | 1.545 ± 0.058 |   1.476 |   1.631 |        1.00 |
 70 | | `gcc`    | 1.587 ± 0.050 |   1.531 |   1.700 | 1.03 ± 0.05 |
 71 | | `musl`   | 3.142 ± 0.052 |   3.079 |   3.232 | 2.03 ± 0.08 |
 72 | | `zig cc` | 1.560 ± 0.036 |   1.521 |   1.644 | 1.01 ± 0.04 |
 73 | 
 74 | * merge
 75 | 
 76 | | Command |      Mean [s] | Min [s] | Max [s] | Relative |
 77 | |:--------|--------------:|--------:|--------:|---------:|
 78 | | cargo   | 2.991 ± 0.006 |   2.976 |   2.999 |      1.1 |
 79 | | gcc     | 2.712 ± 0.003 |   2.707 |   2.716 |      1.0 |
 80 | | musl    | 4.527 ± 0.086 |   4.492 |   4.770 |      1.7 |
 81 | 
 82 | | Command |      Mean [s] | Min [s] | Max [s] |    Relative |
 83 | |:--------|--------------:|--------:|--------:|------------:|
 84 | | `cargo` | 1.251 ± 0.041 |   1.218 |   1.335 |        1.00 |
 85 | | `gcc`   | 1.253 ± 0.022 |   1.228 |   1.289 | 1.00 ± 0.04 |
 86 | | `musl`  | 2.791 ± 0.027 |   2.766 |   2.833 | 2.23 ± 0.08 |
 87 | 
 88 | | Command  |      Mean [s] | Min [s] | Max [s] |    Relative |
 89 | |:---------|--------------:|--------:|--------:|------------:|
 90 | | `cargo`  | 1.560 ± 0.081 |   1.468 |   1.756 |        1.00 |
 91 | | `gcc`    | 1.633 ± 0.031 |   1.593 |   1.688 | 1.05 ± 0.06 |
 92 | | `musl`   | 2.974 ± 0.112 |   2.887 |   3.252 | 1.91 ± 0.12 |
 93 | | `zig cc` | 1.663 ± 0.062 |   1.603 |   1.807 | 1.07 ± 0.07 |
 94 | 
 95 | * clean2
 96 | 
 97 | | Command |      Mean [s] | Min [s] | Max [s] | Relative |
 98 | |:--------|--------------:|--------:|--------:|---------:|
 99 | | cargo   | 5.152 ± 0.026 |   5.132 |   5.221 |      1.1 |
100 | | gcc     | 4.821 ± 0.003 |   4.817 |   4.826 |      1.0 |
101 | | musl    | 5.983 ± 0.924 |   3.370 |   6.544 |      1.2 |
102 | 
103 | | Command |      Mean [ms] | Min [ms] | Max [ms] |    Relative |
104 | |:--------|---------------:|---------:|---------:|------------:|
105 | | `cargo` |    828.6 ± 6.9 |    817.6 |    838.1 |        1.00 |
106 | | `gcc`   |   873.9 ± 15.7 |    857.5 |    915.8 | 1.05 ± 0.02 |
107 | | `musl`  | 2198.2 ± 650.3 |   1468.9 |   3210.7 | 2.65 ± 0.79 |
108 | 
109 | | Command  |     Mean [ms] | Min [ms] | Max [ms] |    Relative |
110 | |:---------|--------------:|---------:|---------:|------------:|
111 | | `cargo`  |  945.5 ± 48.9 |    890.1 |   1037.9 |        1.00 |
112 | | `gcc`    | 1014.3 ± 30.7 |    960.1 |   1057.8 | 1.07 ± 0.06 |
113 | | `musl`   | 1669.3 ± 19.1 |   1642.6 |   1695.9 | 1.77 ± 0.09 |
114 | | `zig cc` |  991.3 ± 43.3 |    942.7 |   1068.4 | 1.05 ± 0.07 |
115 | 
116 | * connect
117 | 
118 | | Command |    Mean [ms] | Min [ms] | Max [ms] | Relative |
119 | |:--------|-------------:|---------:|---------:|---------:|
120 | | cargo   |  387.0 ± 0.7 |    386.2 |    387.8 |      1.0 |
121 | | gcc     | 374.5 ± 74.3 |    247.8 |    532.1 |      1.0 |
122 | | musl    |  383.0 ± 0.6 |    382.0 |    384.2 |      1.0 |
123 | 
124 | | Command |   Mean [ms] | Min [ms] | Max [ms] |    Relative |
125 | |:--------|------------:|---------:|---------:|------------:|
126 | | `cargo` |  83.8 ± 4.2 |     79.0 |    102.0 |        1.00 |
127 | | `gcc`   | 118.5 ± 7.9 |    107.4 |    135.2 | 1.41 ± 0.12 |
128 | | `musl`  | 131.6 ± 3.9 |    125.5 |    138.9 | 1.57 ± 0.09 |
129 | 
130 | | Command  |    Mean [ms] | Min [ms] | Max [ms] |    Relative |
131 | |:---------|-------------:|---------:|---------:|------------:|
132 | | `cargo`  |   89.2 ± 3.1 |     84.4 |     99.8 |        1.00 |
133 | | `gcc`    | 187.5 ± 20.5 |    160.7 |    216.1 | 2.10 ± 0.24 |
134 | | `musl`   | 256.1 ± 20.2 |    235.0 |    290.3 | 2.87 ± 0.25 |
135 | | `zig cc` | 196.9 ± 31.0 |    170.0 |    303.5 | 2.21 ± 0.36 |
136 | 
137 | * filter
138 | 
139 | | Command |   Mean [ms] | Min [ms] | Max [ms] | Relative |
140 | |:--------|------------:|---------:|---------:|---------:|
141 | | cargo   |  59.2 ± 1.1 |     58.4 |     66.2 |      1.1 |
142 | | gcc     |  54.7 ± 0.3 |     54.2 |     55.7 |      1.0 |
143 | | musl    | 55.6 ± 22.3 |     30.9 |    126.5 |      1.0 |
144 | 
145 | | Command |  Mean [ms] | Min [ms] | Max [ms] |    Relative |
146 | |:--------|-----------:|---------:|---------:|------------:|
147 | | `cargo` | 14.8 ± 0.9 |     13.4 |     20.4 |        1.00 |
148 | | `gcc`   | 48.7 ± 2.6 |     41.5 |     56.6 | 3.29 ± 0.27 |
149 | | `musl`  | 49.9 ± 2.6 |     44.9 |     61.0 | 3.37 ± 0.28 |
150 | 
151 | | Command  |    Mean [ms] | Min [ms] | Max [ms] |    Relative |
152 | |:---------|-------------:|---------:|---------:|------------:|
153 | | `cargo`  |   19.3 ± 1.7 |     16.5 |     33.0 |        1.00 |
154 | | `gcc`    | 128.0 ± 19.0 |     92.5 |    151.2 | 6.63 ± 1.14 |
155 | | `musl`   | 141.4 ± 17.5 |    116.7 |    184.3 | 7.32 ± 1.11 |
156 | | `zig cc` | 138.0 ± 14.4 |    103.6 |    162.2 | 7.15 ± 0.97 |
157 | 


--------------------------------------------------------------------------------
/benchmarks/musl/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | 


--------------------------------------------------------------------------------
/benchmarks/musl/run.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | #----------------------------#
  4 | # Colors in term
  5 | #----------------------------#
  6 | # http://stackoverflow.com/questions/5947742/how-to-change-the-output-color-of-echo-in-linux
  7 | GREEN=
  8 | RED=
  9 | NC=
 10 | if tty -s < /dev/fd/1 2> /dev/null; then
 11 |     GREEN='\033[0;32m'
 12 |     RED='\033[0;31m'
 13 |     NC='\033[0m' # No Color
 14 | fi
 15 | 
 16 | log_warn () {
 17 |     echo >&2 -e "${RED}==> $@ <==${NC}"
 18 | }
 19 | 
 20 | log_info () {
 21 |     echo >&2 -e "${GREEN}==> $@${NC}"
 22 | }
 23 | 
 24 | log_debug () {
 25 |     echo >&2 -e "==> $@"
 26 | }
 27 | 
 28 | #----------------------------#
 29 | # Prepare
 30 | #----------------------------#
 31 | COMMAND_TIME="command time -v"
 32 | if [[ `uname` == 'Darwin' ]]; then
 33 |     COMMAND_TIME="command time -l"
 34 | fi
 35 | 
 36 | # enter BASE_DIR
 37 | BASE_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
 38 | cd ${BASE_DIR}
 39 | 
 40 | if [[ ! -e intspan-x86_64-unknown-linux-gnu.tar.gz ]]; then
 41 |     curl -LO https://github.com/wang-q/intspan/releases/download/v0.8.0/intspan-x86_64-unknown-linux-gnu.tar.gz
 42 | fi
 43 | 
 44 | if [[ ! -e intspan-x86_64-unknown-linux-musl.tar.gz ]]; then
 45 |     curl -LO https://github.com/wang-q/intspan/releases/download/v0.8.0/intspan-x86_64-unknown-linux-musl.tar.gz
 46 | fi
 47 | 
 48 | if [[ ! -e intspan.x86_64-unknown-linux-gnu.tar.gz ]]; then
 49 |     curl -LO https://github.com/wang-q/builds/raw/refs/heads/master/tar/intspan.x86_64-unknown-linux-gnu.tar.gz
 50 | fi
 51 | 
 52 | tar xvfz intspan-x86_64-unknown-linux-gnu.tar.gz
 53 | tar xvfz intspan-x86_64-unknown-linux-musl.tar.gz
 54 | tar xvfz intspan.x86_64-unknown-linux-gnu.tar.gz
 55 | 
 56 | #----------------------------#
 57 | # Run
 58 | #----------------------------#
 59 | log_info "sort"
 60 | hyperfine --warmup 1 --export-markdown sort.md \
 61 |     -n cargo \
 62 |     -n gcc \
 63 |     -n musl \
 64 |     -n "zig cc" \
 65 |     'gzip -dcf ../../tests/Atha/links.lastz.tsv.gz tests/Atha/links.blast.tsv.gz | ~/.cargo/bin/linkr                             sort stdin -o /dev/null' \
 66 |     'gzip -dcf ../../tests/Atha/links.lastz.tsv.gz tests/Atha/links.blast.tsv.gz | target/release/linkr                           sort stdin -o /dev/null' \
 67 |     'gzip -dcf ../../tests/Atha/links.lastz.tsv.gz tests/Atha/links.blast.tsv.gz | target/x86_64-unknown-linux-musl/release/linkr sort stdin -o /dev/null' \
 68 |     'gzip -dcf ../../tests/Atha/links.lastz.tsv.gz tests/Atha/links.blast.tsv.gz | ./linkr                                        sort stdin -o /dev/null'
 69 | 
 70 | echo >&2
 71 | 
 72 | log_info "clean"
 73 | hyperfine --warmup 1 --export-markdown clean.md \
 74 |     -n cargo \
 75 |     -n gcc \
 76 |     -n musl \
 77 |     -n "zig cc" \
 78 |     '~/.cargo/bin/linkr                             clean ../../tests/Atha/sort.tsv -o /dev/null' \
 79 |     'target/release/linkr                           clean ../../tests/Atha/sort.tsv -o /dev/null' \
 80 |     'target/x86_64-unknown-linux-musl/release/linkr clean ../../tests/Atha/sort.tsv -o /dev/null' \
 81 |     './linkr                                        clean ../../tests/Atha/sort.tsv -o /dev/null'
 82 | 
 83 | echo >&2
 84 | 
 85 | log_info "merge"
 86 | hyperfine --warmup 1 --export-markdown merge.md \
 87 |     -n cargo \
 88 |     -n gcc \
 89 |     -n musl \
 90 |     -n "zig cc" \
 91 |     '~/.cargo/bin/rgr                             merge ../../tests/Atha/sort.clean.tsv -c 0.95 -o /dev/null' \
 92 |     'target/release/rgr                           merge ../../tests/Atha/sort.clean.tsv -c 0.95 -o /dev/null' \
 93 |     'target/x86_64-unknown-linux-musl/release/rgr merge ../../tests/Atha/sort.clean.tsv -c 0.95 -o /dev/null' \
 94 |     './rgr                                        merge ../../tests/Atha/sort.clean.tsv -c 0.95 -o /dev/null'
 95 | 
 96 | echo >&2
 97 | 
 98 | log_info "clean2"
 99 | hyperfine --warmup 1 --export-markdown clean2.md \
100 |     -n cargo \
101 |     -n gcc \
102 |     -n musl \
103 |     -n "zig cc" \
104 |     '~/.cargo/bin/linkr                             clean ../../tests/Atha/sort.clean.tsv -r ../../tests/Atha/merge.tsv --bundle 500 -o /dev/null' \
105 |     'target/release/linkr                           clean ../../tests/Atha/sort.clean.tsv -r ../../tests/Atha/merge.tsv --bundle 500 -o /dev/null' \
106 |     'target/x86_64-unknown-linux-musl/release/linkr clean ../../tests/Atha/sort.clean.tsv -r ../../tests/Atha/merge.tsv --bundle 500 -o /dev/null' \
107 |     './linkr                                        clean ../../tests/Atha/sort.clean.tsv -r ../../tests/Atha/merge.tsv --bundle 500 -o /dev/null'
108 | 
109 | echo >&2
110 | 
111 | log_info "connect"
112 | hyperfine --warmup 1 --export-markdown connect.md \
113 |     -n cargo \
114 |     -n gcc \
115 |     -n musl \
116 |     -n "zig cc" \
117 |     '~/.cargo/bin/linkr                             connect ../../tests/Atha/clean.tsv -o /dev/null' \
118 |     'target/release/linkr                           connect ../../tests/Atha/clean.tsv -o /dev/null' \
119 |     'target/x86_64-unknown-linux-musl/release/linkr connect ../../tests/Atha/clean.tsv -o /dev/null' \
120 |     './linkr                                        connect ../../tests/Atha/clean.tsv -o /dev/null'
121 | 
122 | echo >&2
123 | 
124 | log_info "filter"
125 | hyperfine --warmup 1 --export-markdown filter.md \
126 |     -n cargo \
127 |     -n gcc \
128 |     -n musl \
129 |     -n "zig cc" \
130 |     '~/.cargo/bin/linkr                             filter ../../tests/Atha/connect.tsv -r 0.8 -o /dev/null' \
131 |     'target/release/linkr                           filter ../../tests/Atha/connect.tsv -r 0.8 -o /dev/null' \
132 |     'target/x86_64-unknown-linux-musl/release/linkr filter ../../tests/Atha/connect.tsv -r 0.8 -o /dev/null' \
133 |     './linkr                                        filter ../../tests/Atha/connect.tsv -r 0.8 -o /dev/null'
134 | 
135 | echo >&2
136 | 


--------------------------------------------------------------------------------
/benchmarks/rgr.md:
--------------------------------------------------------------------------------
  1 | # `rgr`
  2 | 
  3 | ## Test materials
  4 | 
  5 | ```shell
  6 | cd ~/gars
  7 | 
  8 | redis-server &
  9 | 
 10 | gars env
 11 | 
 12 | gars status drop
 13 | gars gen genome/genome.fa.gz --piece 500000
 14 | 
 15 | gars range features/T-DNA.CSHL.rg
 16 | gars range features/T-DNA.FLAG.rg
 17 | gars range features/T-DNA.MX.rg
 18 | gars range features/T-DNA.RATM.rg
 19 | 
 20 | gars tsv -s "range:*" | gzip -9 > ranges.tsv.gz
 21 | 
 22 | gzip -dcf ranges.tsv.gz | wc -l
 23 | #102973
 24 | 
 25 | mv ranges.tsv.gz ~/Scripts/intspan/tests/rgr/
 26 | 
 27 | ```
 28 | 
 29 | ## `rgr sort`
 30 | 
 31 | ```shell
 32 | cd ~/Scripts/intspan/
 33 | 
 34 | hyperfine --warmup 1  \
 35 |     -n 'sort' \
 36 |     '
 37 |     rgr sort -H tests/rgr/ranges.tsv.gz tests/rgr/ranges.tsv.gz tests/rgr/ranges.tsv.gz
 38 |     ' \
 39 |     -n 'sort -f' \
 40 |     '
 41 |     rgr sort -H -f 5 tests/rgr/ranges.tsv.gz tests/rgr/ranges.tsv.gz tests/rgr/ranges.tsv.gz
 42 |     ' \
 43 |     -n 'sort -g' \
 44 |     '
 45 |     rgr sort -H -f 5 -g 6 tests/rgr/ranges.tsv.gz tests/rgr/ranges.tsv.gz tests/rgr/ranges.tsv.gz
 46 |     ' \
 47 |     --export-markdown rgr.sort.md.tmp
 48 | 
 49 | cat rgr.sort.md.tmp
 50 | 
 51 | ```
 52 | 
 53 | | Command   |    Mean [ms] | Min [ms] | Max [ms] |    Relative |
 54 | |:----------|-------------:|---------:|---------:|------------:|
 55 | | `sort`    |  621.0 ± 3.3 |    617.1 |    626.3 | 1.01 ± 0.08 |
 56 | | `sort -f` |  629.4 ± 3.0 |    625.3 |    635.0 | 1.02 ± 0.08 |
 57 | | `sort -g` | 615.0 ± 50.4 |    471.9 |    636.2 |        1.00 |
 58 | 
 59 | ## `rgr filter`
 60 | 
 61 | ```shell
 62 | cd ~/Scripts/intspan/
 63 | 
 64 | hyperfine --warmup 1 \
 65 |     -n 'rgr filter' \
 66 |     '
 67 |     rgr filter tests/rgr/ctg_2_1_.gc.tsv --str-eq 3:1 > /dev/null
 68 |     ' \
 69 |     -n 'tsv-filter' \
 70 |     '
 71 |     tsv-filter tests/rgr/ctg_2_1_.gc.tsv --str-eq 3:1 > /dev/null
 72 |     ' \
 73 |     --export-markdown rgr.filter.md.tmp
 74 | 
 75 | cat rgr.filter.md.tmp
 76 | 
 77 | ```
 78 | 
 79 | | Command      |  Mean [ms] | Min [ms] | Max [ms] |    Relative |
 80 | |:-------------|-----------:|---------:|---------:|------------:|
 81 | | `rgr filter` | 10.5 ± 1.1 |      9.3 |     14.6 | 2.25 ± 0.91 |
 82 | | `tsv-filter` |  4.7 ± 1.8 |      2.3 |      7.9 |        1.00 |
 83 | 
 84 | ## `rgr select`
 85 | 
 86 | ```shell
 87 | cd ~/Scripts/intspan/
 88 | 
 89 | hyperfine --warmup 1 \
 90 |     -n 'rgr filter' \
 91 |     '
 92 |     rgr select tests/rgr/ctg_2_1_.gc.tsv -f 1,3 > /dev/null
 93 |     ' \
 94 |     -n 'tsv-filter' \
 95 |     '
 96 |     tsv-select tests/rgr/ctg_2_1_.gc.tsv -f 1,3 > /dev/null
 97 |     ' \
 98 |     -n 'rgr filter -H' \
 99 |     '
100 |     rgr select tests/rgr/ctg_2_1_.gc.tsv -H -f "#range,signal" > /dev/null
101 |     ' \
102 |     -n 'tsv-filter -H' \
103 |     '
104 |     tsv-select tests/rgr/ctg_2_1_.gc.tsv -H -f "#range,signal" > /dev/null
105 |     ' \
106 |     --export-markdown rgr.select.md.tmp
107 | 
108 | cat rgr.select.md.tmp
109 | 
110 | ```
111 | 
112 | | Command         |  Mean [ms] | Min [ms] | Max [ms] |    Relative |
113 | |:----------------|-----------:|---------:|---------:|------------:|
114 | | `rgr filter`    | 13.9 ± 0.7 |     12.9 |     17.8 | 2.54 ± 0.86 |
115 | | `tsv-filter`    |  5.6 ± 1.9 |      3.7 |     10.1 | 1.01 ± 0.49 |
116 | | `rgr filter -H` | 13.9 ± 0.8 |     12.9 |     17.5 | 2.53 ± 0.86 |
117 | | `tsv-filter -H` |  5.5 ± 1.8 |      3.6 |     10.1 |        1.00 |
118 | 
119 | ## Sampling
120 | 
121 | ```shell
122 | cd ~/Scripts/intspan/
123 | 
124 | hyperfine --warmup 1 \
125 |     -n 'tsv-sample' \
126 |     '
127 |     tsv-sample tests/rgr/ctg_2_1_.gc.tsv --prob 0.4 > /dev/null
128 |     ' \
129 |     -n 'qsv sample' \
130 |     '
131 |     qsv sample 0.4 tests/rgr/ctg_2_1_.gc.tsv > /dev/null
132 |     ' \
133 |     --export-markdown rgr.sample.md.tmp
134 | 
135 | cat rgr.sample.md.tmp
136 | 
137 | 
138 | ```
139 | 
140 | | Command      |    Mean [ms] | Min [ms] | Max [ms] |    Relative |
141 | |:-------------|-------------:|---------:|---------:|------------:|
142 | | `tsv-sample` |   14.0 ± 1.4 |     10.8 |     19.8 |        1.00 |
143 | | `qsv sample` | 127.4 ± 14.5 |    111.5 |    165.6 | 9.13 ± 1.38 |
144 | 


--------------------------------------------------------------------------------
/benchmarks/spanr/chr.sizes:
--------------------------------------------------------------------------------
1 | 1	30427671
2 | 2	19698289
3 | 3	23459830
4 | 4	18585056
5 | 5	26975502
6 | 


--------------------------------------------------------------------------------
/benchmarks/spanr/run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #----------------------------#
 4 | # Colors in term
 5 | #----------------------------#
 6 | # http://stackoverflow.com/questions/5947742/how-to-change-the-output-color-of-echo-in-linux
 7 | GREEN=
 8 | RED=
 9 | NC=
10 | if tty -s < /dev/fd/1 2> /dev/null; then
11 |     GREEN='\033[0;32m'
12 |     RED='\033[0;31m'
13 |     NC='\033[0m' # No Color
14 | fi
15 | 
16 | log_warn () {
17 |     echo >&2 -e "${RED}==> $@ <==${NC}"
18 | }
19 | 
20 | log_info () {
21 |     echo >&2 -e "${GREEN}==> $@${NC}"
22 | }
23 | 
24 | log_debug () {
25 |     echo >&2 -e "==> $@"
26 | }
27 | 
28 | #----------------------------#
29 | # Prepare
30 | #----------------------------#
31 | COMMAND_TIME="command time -v"
32 | if [[ `uname` == 'Darwin' ]]; then
33 |     COMMAND_TIME="command time -l"
34 | fi
35 | 
36 | # enter BASE_DIR
37 | BASE_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
38 | cd ${BASE_DIR}
39 | 
40 | #----------------------------#
41 | # Run
42 | #----------------------------#
43 | log_info "jrunlist"
44 | ${COMMAND_TIME} jrunlist \
45 |     statop \
46 |     chr.sizes sep-gene.yml paralog.yml  \
47 |     --op intersect --all \
48 |     -o stdout \
49 |     > jstatop.csv.tmp
50 | 
51 | log_info "spanr"
52 | ${COMMAND_TIME} spanr \
53 |     statop \
54 |     chr.sizes sep-gene.yml paralog.yml  \
55 |     --op intersect --all \
56 |     -o stdout \
57 |     > rstatop.csv.tmp
58 | 
59 | log_info "App::RL"
60 | ${COMMAND_TIME} runlist \
61 |     stat2 \
62 |     -s chr.sizes sep-gene.yml paralog.yml  \
63 |     --op intersect --all --mk \
64 |     -o stdout \
65 |     > pstatop.csv.tmp
66 | echo >&2
67 | 


--------------------------------------------------------------------------------
/doc/intspans.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wang-q/intspan/0d4e5153268bf51eb824f7cc93a51415a255a3c2/doc/intspans.pdf


--------------------------------------------------------------------------------
/doc/intspans.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wang-q/intspan/0d4e5153268bf51eb824f7cc93a51415a255a3c2/doc/intspans.png


--------------------------------------------------------------------------------
/doc/intspans.tex:
--------------------------------------------------------------------------------
 1 | % !TEX TS-program = arara
 2 | % arara: xelatex: { shell: yes }
 3 | % arara: indent: { overwrite: yes }
 4 | % arara: clean: {extensions: [aux, bak, bbl, bcf, blg, idx, ilg, ind, ist, log, nlo, nls, out, run.xml, synctex.gz,]}
 5 | 
 6 | \documentclass[
 7 |     convert,
 8 |     outext=.png,
 9 |     border=2bp,
10 |     tikz,
11 | ]{standalone}
12 | 
13 | \usepackage{fontspec}
14 | \defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase}
15 | \setmainfont[BoldFont={Fira Sans}]{Fira Sans Light}
16 | \setmonofont{Fira Mono}
17 | 
18 | \usepackage{tikz}
19 | \usetikzlibrary{arrows,positioning}
20 | \usetikzlibrary{shapes.misc}
21 | \usetikzlibrary{graphs}
22 | \usepackage{color}
23 | 
24 | \begin{document}
25 | 
26 | \tikzset{
27 | >=stealth', black!50, text=black, thick,
28 | every new ->/.style       = {thick, shorten <=0pt, shorten >=1pt, color=black!70},
29 | every new --/.style       = {thick, color=black!70},
30 | graphs/every graph/.style = {edges=rounded corners},
31 | skip loop/.style          = {to path={-- ++(0,#1) -| (\tikztotarget)}},
32 | hv path/.style            = {to path={-| (\tikztotarget)}},
33 | vh path/.style            = {to path={|- (\tikztotarget)}},
34 | nonterminal/.style={
35 |         rectangle, minimum size=6mm, very thick, draw=red!50!black!50,
36 |         top color=white, bottom color=red!50!black!20,
37 |         font=\itshape, text height=1.5ex,text depth=.25ex},
38 | terminal/.style={
39 |         rounded rectangle, minimum size=6mm, very thick, draw=black!50,
40 |         top color=white, bottom color=black!20,
41 |         font=\ttfamily, text height=1.5ex, text depth=.25ex},
42 | invisible/.style={
43 |         draw, circle, minimum size=0mm,
44 |         inner sep=0pt, outer sep=0pt},
45 | shape = coordinate
46 | }
47 | 
48 | %-99--10,1-10,19,45-48
49 | 
50 | \begin{tikzpicture}
51 |     \graph[
52 |     grow right sep,
53 |     branch down=7mm,
54 |     simple,
55 |     ]{
56 |     / --
57 |     p1 --
58 |     p2 ->
59 |     start_negative[as={-}, terminal] --
60 |     p3 --
61 |     p4 ->
62 |     start_digit[as={digit}, terminal] --
63 |     p5 --
64 |     p6 ->
65 |     hyphen[as={-}, terminal] --
66 |     p7 ->
67 |     end_negative[as={-}, terminal] --
68 |     p8 --
69 |     p9 ->
70 |     end_digit[as={digit}, terminal] --
71 |     p10 --
72 |     p11 --
73 |     p12 ->
74 |     "," [terminal] --
75 |     p13 -!-
76 |     p14 --
77 |     / [coordinate];
78 | 
79 |     p2 -- [skip loop=5mm] p3;
80 |     p6 -- [skip loop=9mm] p11;
81 |     p7 -- [skip loop=5mm] p8;
82 |     p12 -- [skip loop=5mm] p14;
83 |     p5 -> [skip loop=-5mm] p4;
84 |     p10 -> [skip loop=-5mm] p9;
85 |     p13 -> [skip loop=-9mm] p1;
86 |     };
87 | \end{tikzpicture}
88 | 
89 | \end{document}
90 | 


--------------------------------------------------------------------------------
/doc/ranges.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wang-q/intspan/0d4e5153268bf51eb824f7cc93a51415a255a3c2/doc/ranges.pdf


--------------------------------------------------------------------------------
/doc/ranges.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wang-q/intspan/0d4e5153268bf51eb824f7cc93a51415a255a3c2/doc/ranges.png


--------------------------------------------------------------------------------
/doc/ranges.tex:
--------------------------------------------------------------------------------
 1 | % !TEX TS-program = arara
 2 | % arara: xelatex: { shell: yes }
 3 | % arara: indent: { overwrite: yes }
 4 | % arara: clean: {extensions: [aux, bak, bbl, bcf, blg, idx, ilg, ind, ist, log, nlo, nls, out, run.xml, synctex.gz,]}
 5 | 
 6 | \documentclass[
 7 |     convert,
 8 |     outext=.png,
 9 |     border=2bp,
10 |     tikz,
11 | ]{standalone}
12 | 
13 | \usepackage{fontspec}
14 | \defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase}
15 | \setmainfont[BoldFont={Fira Sans}]{Fira Sans Light}
16 | \setmonofont{Fira Mono}
17 | 
18 | \usepackage{tikz}
19 | \usetikzlibrary{arrows,positioning}
20 | \usetikzlibrary{shapes.misc}
21 | \usetikzlibrary{graphs}
22 | \usepackage{color}
23 | 
24 | \begin{document}
25 | 
26 | \tikzset{
27 | >=stealth', black!50, text=black, thick,
28 | every new ->/.style       = {thick, shorten <=0pt, shorten >=1pt, color=black!70},
29 | every new --/.style       = {thick, color=black!70},
30 | graphs/every graph/.style = {edges=rounded corners},
31 | skip loop/.style          = {to path={-- ++(0,#1) -| (\tikztotarget)}},
32 | hv path/.style            = {to path={-| (\tikztotarget)}},
33 | vh path/.style            = {to path={|- (\tikztotarget)}},
34 | nonterminal/.style={
35 |         rectangle, minimum size=6mm, very thick, draw=red!50!black!50,
36 |         top color=white, bottom color=red!50!black!20,
37 |         font=\itshape, text height=1.5ex,text depth=.25ex},
38 | terminal/.style={
39 |         rounded rectangle, minimum size=6mm, very thick, draw=black!50,
40 |         top color=white, bottom color=black!20,
41 |         font=\ttfamily, text height=1.5ex, text depth=.25ex},
42 | shape = coordinate
43 | }
44 | 
45 | %species.chromosome(strand):start-end
46 | %--------^^^^^^^^^^--------^^^^^^----
47 | 
48 | \begin{tikzpicture}
49 |     \graph[
50 |     grow right sep,
51 |     branch down=7mm,
52 |     simple,
53 |     ]{
54 |     / --
55 |     p1 ->
56 |     species [nonterminal] ->
57 |     "." [terminal] --
58 |     p2 ->
59 |     chromosome [nonterminal] --
60 |     p4 ->
61 |     "(" [terminal] --
62 |     q1 -> [vh path]
63 |     {[nodes={yshift=3.5mm}]
64 |     strand_positive[as={+}, terminal], strand_negative[as={-}, terminal]
65 |     } -- [hv path]
66 |     q3 ->
67 |     ")" [terminal] --
68 |     p5 ->
69 |     ":" [terminal] ->
70 |     start [nonterminal] --
71 |     p6 ->
72 |     "-" [terminal] ->
73 |     end [nonterminal] --
74 |     p7 --
75 |     / [coordinate];
76 | 
77 |     p1 -- [skip loop=5mm] p2;
78 |     p4 -- [skip loop=9mm] p5;
79 |     p6 -- [skip loop=5mm] p7;
80 |     };
81 | \end{tikzpicture}
82 | 
83 | \end{document}
84 | 


--------------------------------------------------------------------------------
/examples/benchmark.rs:
--------------------------------------------------------------------------------
 1 | use intspan::IntSpan;
 2 | use std::env;
 3 | use std::time::Instant;
 4 | 
 5 | fn run_benchmark() {
 6 |     for step in 2..7 {
 7 |         println!("step {}", step);
 8 |         let start = Instant::now();
 9 | 
10 |         test_add_range(step);
11 | 
12 |         let elapsed = start.elapsed();
13 |         println!(
14 |             "duration: {} s",
15 |             (elapsed.as_nanos() as f64) / 1000.0 / 1000.0 / 1000.0
16 |         );
17 |     }
18 | 
19 |     fn test_add_range(step: i32) {
20 |         let vec1 = vec![
21 |             1, 30, 32, 149, 153, 155, 159, 247, 250, 250, 253, 464, 516, 518, 520, 523, 582, 585,
22 |             595, 600, 622, 1679,
23 |         ];
24 |         let vec2 = vec![100, 1_000_000];
25 | 
26 |         for _i in 1..=50000 {
27 |             let mut set = IntSpan::new();
28 | 
29 |             if step >= 2 {
30 |                 set.add_ranges(&vec1);
31 |             }
32 |             if step >= 3 {
33 |                 set.add_ranges(&vec2);
34 |             }
35 |             if step >= 4 {
36 |                 set.to_string();
37 |             }
38 |             if step >= 5 {
39 |                 for j in 1..=200 {
40 |                     set.add_pair(j, j);
41 |                 }
42 |             }
43 |             if step >= 6 {
44 |                 for j in 1..=200 {
45 |                     set.add_pair(j * 5, j * 10);
46 |                 }
47 |             }
48 |         }
49 |     }
50 | }
51 | 
52 | fn main() {
53 |     let args: Vec<String> = env::args().collect();
54 |     println!("{:?}", args);
55 | 
56 |     run_benchmark();
57 | }
58 | 


--------------------------------------------------------------------------------
/examples/test.rs:
--------------------------------------------------------------------------------
 1 | use intspan::IntSpan;
 2 | use std::env;
 3 | 
 4 | fn run_test() {
 5 |     let mut intspan = IntSpan::new();
 6 |     intspan.add_pair(1, 9);
 7 |     intspan.add_pair(20, 39);
 8 | 
 9 |     println!("{}", intspan);
10 |     println!("is_empty {}", intspan.is_empty());
11 |     println!("edge_size {}", intspan.edge_size());
12 |     println!("ranges {:?}", intspan.ranges());
13 |     println!("cardinality {}", intspan.cardinality());
14 | 
15 |     for n in &[-5, 29, 40] {
16 |         println!("val {} is contained {}", n, intspan.contains(*n));
17 |     }
18 | 
19 |     intspan.add_ranges(&[60, 70, 80, 90]);
20 |     println!("{}", intspan);
21 | 
22 |     intspan.add_ranges(&[68, 75]);
23 |     println!("{}", intspan);
24 | 
25 |     intspan.add_n(99);
26 |     println!("{}", intspan);
27 | 
28 |     intspan.add_vec(&[77, 79]);
29 |     println!("{}", intspan);
30 | 
31 |     intspan.invert();
32 |     println!("{}", intspan);
33 | 
34 |     intspan.invert();
35 |     println!("{}", intspan);
36 | 
37 |     intspan.remove_pair(66, 71);
38 |     println!("{}", intspan);
39 | 
40 |     intspan.remove_n(85);
41 |     println!("{}", intspan);
42 | 
43 |     intspan.remove_vec(&[87, 88]);
44 |     println!("{}", intspan);
45 | 
46 |     intspan.add_runlist("-30--10");
47 |     println!("{}", intspan);
48 | 
49 |     intspan.remove_runlist("62-78");
50 |     println!("{}", intspan);
51 | 
52 |     let mut other = IntSpan::new();
53 |     other.add_runlist("-15-5");
54 |     println!("{}", other);
55 | 
56 |     intspan.merge(&other);
57 |     println!("{}", intspan);
58 | 
59 |     other.clear();
60 |     println!("{}", other);
61 |     other.add_runlist("-20--5");
62 |     println!("{}", other);
63 |     intspan.subtract(&other);
64 |     println!("{}", intspan);
65 | 
66 |     //    -30--21,-4-9,20-39,60-61,79-84,86,89-90,99
67 | }
68 | 
69 | fn main() {
70 |     let args: Vec<String> = env::args().collect();
71 |     println!("{:?}", args);
72 | 
73 |     run_test();
74 | }
75 | 


--------------------------------------------------------------------------------
/release.toml:
--------------------------------------------------------------------------------
1 | pre-release-replacements = [
2 |   {file="README.md", search="Current release: [a-z0-9\\.-]+", replace="Current release: {{version}}"} ,
3 |   {file="CHANGELOG.md", search="Unreleased", replace="{{version}}"},
4 |   {file="CHANGELOG.md", search="ReleaseDate", replace="{{date}}"},
5 |   {file="CHANGELOG.md", search="Change Log", replace="Change Log\n\n## Unreleased - ReleaseDate"}
6 | ]
7 | 


--------------------------------------------------------------------------------
/src/cmd_linkr/circos.rs:
--------------------------------------------------------------------------------
  1 | use clap::*;
  2 | use intspan::*;
  3 | use std::io::BufRead;
  4 | 
  5 | // Create clap subcommand arguments
  6 | pub fn make_subcommand() -> Command {
  7 |     Command::new("circos")
  8 |         .about("Convert links to circos links or highlights")
  9 |         .after_help(
 10 |             r###"
 11 | * It's assumed that all ranges in input files are valid
 12 | 
 13 | "###,
 14 |         )
 15 |         .arg(
 16 |             Arg::new("infiles")
 17 |                 .required(true)
 18 |                 .num_args(1..)
 19 |                 .index(1)
 20 |                 .help("Set the input files to use"),
 21 |         )
 22 |         .arg(
 23 |             Arg::new("highlight")
 24 |                 .long("highlight")
 25 |                 .action(ArgAction::SetTrue)
 26 |                 .help("Create highlights instead of links"),
 27 |         )
 28 |         .arg(
 29 |             Arg::new("outfile")
 30 |                 .long("outfile")
 31 |                 .short('o')
 32 |                 .num_args(1)
 33 |                 .default_value("stdout")
 34 |                 .help("Output filename. [stdout] for screen"),
 35 |         )
 36 | }
 37 | 
 38 | // command implementation
 39 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
 40 |     //----------------------------
 41 |     // Loading
 42 |     //----------------------------
 43 |     let mut writer = writer(args.get_one::<String>("outfile").unwrap());
 44 |     let is_highlight = args.get_flag("highlight");
 45 | 
 46 |     let mut colors = (1..=12)
 47 |         .map(|n| format!("paired-12-qual-{}", n))
 48 |         .collect::<Vec<String>>();
 49 |     colors.reverse();
 50 |     let mut color_idx = 0;
 51 | 
 52 |     for infile in args.get_many::<String>("infiles").unwrap() {
 53 |         let reader = reader(infile);
 54 |         for line in reader.lines().map_while(Result::ok) {
 55 |             let parts: Vec<&str> = line.split('\t').collect();
 56 | 
 57 |             if is_highlight {
 58 |                 for part in parts {
 59 |                     let range = Range::from_str(part);
 60 |                     if !range.is_valid() {
 61 |                         continue;
 62 |                     }
 63 | 
 64 |                     //----------------------------
 65 |                     // Output
 66 |                     //----------------------------
 67 |                     writer.write_all(
 68 |                         format!(
 69 |                             "{} {} {} fill_color={}\n",
 70 |                             range.chr(),
 71 |                             range.start(),
 72 |                             range.end(),
 73 |                             colors[color_idx]
 74 |                         )
 75 |                         .as_ref(),
 76 |                     )?;
 77 |                 }
 78 | 
 79 |                 // rotate color
 80 |                 color_idx += 1;
 81 |                 if color_idx > 11 {
 82 |                     color_idx = 0;
 83 |                 }
 84 |             } else {
 85 |                 let count = parts.len();
 86 | 
 87 |                 // 2-combinations of parts forms a pair
 88 |                 for i in 0..count {
 89 |                     'PAIR: for j in i + 1..count {
 90 |                         let mut fields: Vec<String> = vec![];
 91 |                         for idx in &[i, j] {
 92 |                             let range = Range::from_str(parts[*idx]);
 93 |                             if !range.is_valid() {
 94 |                                 continue 'PAIR;
 95 |                             }
 96 | 
 97 |                             fields.push(range.chr().to_string());
 98 |                             if range.strand() == "-" {
 99 |                                 fields.push(range.end().to_string());
100 |                                 fields.push(range.start().to_string());
101 |                             } else {
102 |                                 fields.push(range.start().to_string());
103 |                                 fields.push(range.end().to_string());
104 |                             }
105 |                         }
106 | 
107 |                         //----------------------------
108 |                         // Output
109 |                         //----------------------------
110 |                         writer.write_all(format!("{}\n", fields.join(" ")).as_ref())?;
111 |                     }
112 |                 }
113 |             }
114 |         } // end of line
115 |     }
116 | 
117 |     Ok(())
118 | }
119 | 


--------------------------------------------------------------------------------
/src/cmd_linkr/filter.rs:
--------------------------------------------------------------------------------
  1 | use clap::*;
  2 | use intspan::*;
  3 | use std::io::BufRead;
  4 | 
  5 | // Create clap subcommand arguments
  6 | pub fn make_subcommand() -> Command {
  7 |     Command::new("filter")
  8 |         .about("Filter links by numbers of ranges or length differences")
  9 |         .after_help(
 10 |             r###"
 11 | * It's assumed that all ranges in input files are valid
 12 | * Inputs should not contain hit strands
 13 | 
 14 | "###,
 15 |         )
 16 |         .arg(
 17 |             Arg::new("infiles")
 18 |                 .required(true)
 19 |                 .num_args(1..)
 20 |                 .index(1)
 21 |                 .help("Set the input files to use"),
 22 |         )
 23 |         .arg(
 24 |             Arg::new("number")
 25 |                 .long("number")
 26 |                 .short('n')
 27 |                 .num_args(1)
 28 |                 .help("Numbers of ranges, an IntSpan like [2-10]"),
 29 |         )
 30 |         .arg(
 31 |             Arg::new("ratio")
 32 |                 .long("ratio")
 33 |                 .short('r')
 34 |                 .num_args(1)
 35 |                 .value_parser(value_parser!(f32))
 36 |                 .help("Ratio of lengths differences. The suggested value is [0.8]"),
 37 |         )
 38 |         .arg(
 39 |             Arg::new("outfile")
 40 |                 .long("outfile")
 41 |                 .short('o')
 42 |                 .num_args(1)
 43 |                 .default_value("stdout")
 44 |                 .help("Output filename. [stdout] for screen"),
 45 |         )
 46 | }
 47 | 
 48 | // command implementation
 49 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
 50 |     //----------------------------
 51 |     // Loading
 52 |     //----------------------------
 53 |     let mut writer = writer(args.get_one::<String>("outfile").unwrap());
 54 | 
 55 |     let numbers = if args.contains_id("number") {
 56 |         IntSpan::from(args.get_one::<String>("number").unwrap())
 57 |     } else {
 58 |         IntSpan::new()
 59 |     };
 60 |     let ratio = if args.contains_id("ratio") {
 61 |         *args.get_one::<f32>("ratio").unwrap()
 62 |     } else {
 63 |         -1.0
 64 |     };
 65 | 
 66 |     for infile in args.get_many::<String>("infiles").unwrap() {
 67 |         let reader = reader(infile);
 68 |         for line in reader.lines().map_while(Result::ok) {
 69 |             let parts: Vec<&str> = line.split('\t').collect();
 70 | 
 71 |             if !numbers.is_empty() && !numbers.contains(parts.len() as i32) {
 72 |                 continue;
 73 |             }
 74 | 
 75 |             if ratio > 0.0 {
 76 |                 let mut lengths: Vec<i32> = vec![];
 77 | 
 78 |                 for part in &parts {
 79 |                     let range = Range::from_str(part);
 80 |                     if !range.is_valid() {
 81 |                         continue;
 82 |                     }
 83 |                     lengths.push(range.intspan().cardinality());
 84 |                 }
 85 | 
 86 |                 let min = lengths.iter().min().unwrap();
 87 |                 let max = lengths.iter().max().unwrap();
 88 |                 let diff_ratio = *min as f32 / *max as f32;
 89 | 
 90 |                 if diff_ratio < ratio {
 91 |                     continue;
 92 |                 }
 93 |             }
 94 | 
 95 |             //----------------------------
 96 |             // Output
 97 |             //----------------------------
 98 |             writer.write_all(format!("{}\n", line).as_ref())?;
 99 |         } // end of line
100 |     }
101 | 
102 |     Ok(())
103 | }
104 | 


--------------------------------------------------------------------------------
/src/cmd_linkr/mod.rs:
--------------------------------------------------------------------------------
1 | //! Subcommand modules for the `linkr` binary.
2 | 
3 | pub mod circos;
4 | pub mod clean;
5 | pub mod connect;
6 | pub mod filter;
7 | pub mod sort;
8 | 


--------------------------------------------------------------------------------
/src/cmd_linkr/sort.rs:
--------------------------------------------------------------------------------
 1 | use clap::*;
 2 | use intspan::*;
 3 | use std::collections::BTreeSet;
 4 | use std::io::BufRead;
 5 | 
 6 | // Create clap subcommand arguments
 7 | pub fn make_subcommand() -> Command {
 8 |     Command::new("sort")
 9 |         .about("Sort links and ranges within links")
10 |         .arg(
11 |             Arg::new("infiles")
12 |                 .required(true)
13 |                 .num_args(1..)
14 |                 .index(1)
15 |                 .help("Set the input files to use"),
16 |         )
17 |         .arg(
18 |             Arg::new("outfile")
19 |                 .long("outfile")
20 |                 .short('o')
21 |                 .num_args(1)
22 |                 .default_value("stdout")
23 |                 .help("Output filename. [stdout] for screen"),
24 |         )
25 | }
26 | 
27 | // command implementation
28 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
29 |     //----------------------------
30 |     // Loading
31 |     //----------------------------
32 |     let mut line_set: BTreeSet<String> = BTreeSet::new();
33 | 
34 |     for infile in args.get_many::<String>("infiles").unwrap() {
35 |         let reader = reader(infile);
36 |         'LINE: for line in reader.lines().map_while(Result::ok) {
37 |             let parts: Vec<&str> = line.split('\t').collect();
38 | 
39 |             for part in parts {
40 |                 let range = Range::from_str(part);
41 |                 if range.is_valid() {
42 |                     line_set.insert(line.clone());
43 |                     continue 'LINE;
44 |                 }
45 |             }
46 |         } // end of line
47 |     }
48 | 
49 |     //----------------------------
50 |     // Sorting
51 |     //----------------------------
52 |     let mut lines = line_set.into_iter().collect::<Vec<String>>();
53 |     lines = sort_links(&lines);
54 | 
55 |     //----------------------------
56 |     // Output
57 |     //----------------------------
58 |     write_lines(args.get_one::<String>("outfile").unwrap(), &lines)?;
59 | 
60 |     Ok(())
61 | }
62 | 


--------------------------------------------------------------------------------
/src/cmd_rgr/count.rs:
--------------------------------------------------------------------------------
  1 | use clap::*;
  2 | use rust_lapper::{Interval, Lapper};
  3 | use std::collections::BTreeMap;
  4 | use std::io::BufRead;
  5 | 
  6 | // Interval: represent a range from [start, stop), carrying val
  7 | type Iv = Interval<u32, u32>; // the first type should be Unsigned
  8 | 
  9 | // Create clap subcommand arguments
 10 | pub fn make_subcommand() -> Command {
 11 |     Command::new("count")
 12 |         .about("Count overlaps between ranges in a target file and other range files")
 13 |         .after_help(
 14 |             r###"
 15 | * Lines without a valid range will not be output
 16 | 
 17 | Example:
 18 | 
 19 |     # Count overlaps between two .rg files
 20 |     rgr count tests/rgr/S288c.rg tests/rgr/S288c.rg
 21 | 
 22 |     # Count overlaps in a .tsv file with headers
 23 |     rgr count tests/rgr/ctg.range.tsv tests/rgr/S288c.rg -H -f 3
 24 | 
 25 |     # For large .rg files, pre-sorting may improve perfermonce.
 26 |     cat *.rg | rgr sort stdin | rgr count target.rg stdin
 27 | 
 28 | "###,
 29 |         )
 30 |         .arg(
 31 |             Arg::new("target")
 32 |                 .required(true)
 33 |                 .index(1)
 34 |                 .num_args(1)
 35 |                 .help("Target .rg/.tsv file"),
 36 |         )
 37 |         .arg(
 38 |             Arg::new("infiles")
 39 |                 .required(true)
 40 |                 .index(2)
 41 |                 .num_args(1..)
 42 |                 .help("Input .rg files to count overlaps with"),
 43 |         )
 44 |         .arg(
 45 |             Arg::new("header")
 46 |                 .long("header")
 47 |                 .short('H')
 48 |                 .action(ArgAction::SetTrue)
 49 |                 .help("Treat the first line of each file as a header"),
 50 |         )
 51 |         .arg(
 52 |             Arg::new("sharp")
 53 |                 .long("sharp")
 54 |                 .short('s')
 55 |                 .action(ArgAction::SetTrue)
 56 |                 .help("Include lines starting with `#` without changes (default: ignore them)"),
 57 |         )
 58 |         .arg(
 59 |             Arg::new("field")
 60 |                 .long("field")
 61 |                 .short('f')
 62 |                 .value_parser(value_parser!(usize))
 63 |                 .num_args(1)
 64 |                 .help("Index of the range field. If not set, the first valid range will be used"),
 65 |         )
 66 |         .arg(
 67 |             Arg::new("outfile")
 68 |                 .long("outfile")
 69 |                 .short('o')
 70 |                 .num_args(1)
 71 |                 .default_value("stdout")
 72 |                 .help("Output filename. [stdout] for screen"),
 73 |         )
 74 | }
 75 | 
 76 | // command implementation
 77 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
 78 |     //----------------------------
 79 |     // Options
 80 |     //----------------------------
 81 |     let mut writer = intspan::writer(args.get_one::<String>("outfile").unwrap());
 82 | 
 83 |     let is_sharp = args.get_flag("sharp");
 84 |     let is_header = args.get_flag("header");
 85 | 
 86 |     let opt_idx_range = args.get_one::<usize>("field").copied().unwrap_or(0);
 87 | 
 88 |     //----------------------------
 89 |     // Loading
 90 |     //----------------------------
 91 |     // seq_name => Vector of Intervals
 92 |     let mut iv_of: BTreeMap<String, Vec<Iv>> = BTreeMap::new();
 93 | 
 94 |     for infile in args.get_many::<String>("infiles").unwrap() {
 95 |         let reader = intspan::reader(infile);
 96 |         for line in reader.lines().map_while(Result::ok) {
 97 |             if line.starts_with('#') {
 98 |                 continue;
 99 |             }
100 | 
101 |             let range = intspan::Range::from_str(&line);
102 |             if !range.is_valid() {
103 |                 continue;
104 |             }
105 | 
106 |             let iv = Iv {
107 |                 start: *range.start() as u32,
108 |                 stop: *range.end() as u32 + 1,
109 |                 val: 0,
110 |             };
111 |             let chr = range.chr();
112 |             iv_of.entry(chr.to_string()).or_default().push(iv);
113 |         }
114 |     }
115 | 
116 |     // seq_name => Lapper
117 |     let mut lapper_of = BTreeMap::new();
118 |     for (chr, ivs) in iv_of {
119 |         let lapper = Lapper::new(ivs);
120 |         lapper_of.insert(chr, lapper);
121 |     }
122 | 
123 |     //----------------------------
124 |     // Operating
125 |     //----------------------------
126 |     let reader = intspan::reader(args.get_one::<String>("target").unwrap());
127 |     'LINE: for (i, line) in reader.lines().map_while(Result::ok).enumerate() {
128 |         // Handle the header line
129 |         if is_header && i == 0 {
130 |             writer.write_fmt(format_args!("{}\t{}\n", line, "count"))?;
131 |             continue 'LINE;
132 |         }
133 | 
134 |         // Handle lines starting with '#'
135 |         if line.starts_with('#') {
136 |             if is_sharp {
137 |                 writer.write_fmt(format_args!("{}\n", line))?;
138 |             }
139 |             continue 'LINE;
140 |         }
141 | 
142 |         let rg = match intspan::extract_rg(&line, opt_idx_range) {
143 |             // Extract the range
144 |             Some(range) => range,
145 |             // Skip lines without a valid range
146 |             None => continue 'LINE,
147 |         };
148 | 
149 |         let mut count = 0;
150 |         if lapper_of.contains_key(rg.chr()) {
151 |             let lapper = lapper_of.get(rg.chr()).unwrap();
152 |             count = lapper.count(*rg.start() as u32, *rg.end() as u32 + 1);
153 |         }
154 | 
155 |         //----------------------------
156 |         // Output
157 |         //----------------------------
158 |         writer.write_all(format!("{}\t{}\n", line, count).as_ref())?;
159 |     }
160 | 
161 |     Ok(())
162 | }
163 | 


--------------------------------------------------------------------------------
/src/cmd_rgr/dedup.rs:
--------------------------------------------------------------------------------
 1 | use clap::*;
 2 | use std::collections::HashSet;
 3 | use std::io::{BufRead, Write};
 4 | 
 5 | // Create clap subcommand arguments
 6 | pub fn make_subcommand() -> Command {
 7 |     Command::new("dedup")
 8 |         .about("Deduplicate lines in .tsv file(s) based on specified fields or the entire line")
 9 |         .after_help(
10 |             r###"
11 | This command removes duplicate lines from .tsv file(s) in a single pass without sorting.
12 | Each line consumes 8 bytes (u64) of memory for hashing, making it memory-efficient.
13 | As a trade-off, this program cannot count the occurrences of duplicates.
14 | 
15 | * If no fields are specified, the entire line is used as the key for deduplication.
16 | * If fields are specified, only the selected fields are used as the key.
17 | 
18 | Examples:
19 |     # Deduplicates lines in file1.tsv and file2.tsv, writing the result to output.tsv
20 |     rgr dedup file1.tsv file2.tsv -o output.tsv
21 | 
22 |     # Deduplicates lines in file1.tsv based on the 1st and 3rd fields, printing the result to stdout
23 |     rgr dedup file1.tsv -f 1,3
24 | 
25 | "###,
26 |         )
27 |         .arg(
28 |             Arg::new("infiles")
29 |                 .required(true)
30 |                 .num_args(1..)
31 |                 .index(1)
32 |                 .help("Input file(s) to process"),
33 |         )
34 |         .arg(
35 |             Arg::new("fields")
36 |                 .long("fields")
37 |                 .short('f')
38 |                 .num_args(1)
39 |                 .help("Fields to use as the key"),
40 |         )
41 |         .arg(
42 |             Arg::new("outfile")
43 |                 .long("outfile")
44 |                 .short('o')
45 |                 .num_args(1)
46 |                 .default_value("stdout")
47 |                 .help("Output filename. [stdout] for screen"),
48 |         )
49 | }
50 | 
51 | // command implementation
52 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
53 |     //----------------------------
54 |     // Args
55 |     //----------------------------
56 |     let mut writer = intspan::writer(args.get_one::<String>("outfile").unwrap());
57 | 
58 |     let opt_fields: intspan::IntSpan = if args.contains_id("fields") {
59 |         intspan::fields_to_ints(args.get_one::<String>("fields").unwrap())
60 |     } else {
61 |         intspan::IntSpan::new()
62 |     };
63 | 
64 |     //----------------------------
65 |     // Ops
66 |     //----------------------------
67 |     let mut subject_set: HashSet<u64> = HashSet::new();
68 | 
69 |     for infile in args.get_many::<String>("infiles").unwrap() {
70 |         let reader = intspan::reader(infile);
71 | 
72 |         for line in reader.lines().map_while(Result::ok) {
73 |             let subject = if opt_fields.is_empty() {
74 |                 // whole line
75 |                 xxhash_rust::xxh3::xxh3_64(&line.clone().into_bytes())
76 |             } else {
77 |                 // Get elements at specified indices
78 |                 let fields: Vec<&str> = line.split('\t').collect();
79 |                 let subset: Vec<&str> = opt_fields
80 |                     .elements()
81 |                     .iter()
82 |                     .filter_map(|&i| fields.get(i as usize - 1))
83 |                     .copied()
84 |                     .collect();
85 |                 let concat = subset.join("\t");
86 |                 xxhash_rust::xxh3::xxh3_64(&concat.into_bytes())
87 |             };
88 | 
89 |             if !subject_set.contains(&subject) {
90 |                 writer.write_fmt(format_args!("{}\n", line))?;
91 |                 subject_set.insert(subject);
92 |             }
93 |         }
94 |     }
95 | 
96 |     Ok(())
97 | }
98 | 


--------------------------------------------------------------------------------
/src/cmd_rgr/field.rs:
--------------------------------------------------------------------------------
  1 | use clap::*;
  2 | use std::io::BufRead;
  3 | 
  4 | // Create clap subcommand arguments
  5 | pub fn make_subcommand() -> Command {
  6 |     Command::new("field")
  7 |         .about("Create/append ranges from fields")
  8 |         .after_help(
  9 |             r###"
 10 | Examples:
 11 | 
 12 | 1. Create ranges from a chromosome size file:
 13 |     rgr field tests/Atha/chr.sizes --chr 1 --start 2 -a -s
 14 | 
 15 | 2. Create ranges from a GFF file:
 16 |     rgr field tests/spanr/NC_007942.gff -H --chr 1 --start 4 --end 5 --strand 7
 17 | 
 18 | 3. Create ranges from a .tsv file:
 19 |     rgr field tests/rgr/ctg.tsv --chr 2 --start 3 --end 4 -H
 20 | 
 21 | "###,
 22 |         )
 23 |         .arg(
 24 |             Arg::new("infiles")
 25 |                 .required(true)
 26 |                 .num_args(1..)
 27 |                 .index(1)
 28 |                 .help("Input files to process"),
 29 |         )
 30 |         .arg(
 31 |             Arg::new("header")
 32 |                 .long("header")
 33 |                 .short('H')
 34 |                 .action(ArgAction::SetTrue)
 35 |                 .help("Treat the first line of each file as a header"),
 36 |         )
 37 |         .arg(
 38 |             Arg::new("sharp")
 39 |                 .long("sharp")
 40 |                 .short('s')
 41 |                 .action(ArgAction::SetTrue)
 42 |                 .help("Preserve lines starting with a `#` without changes. The default is to ignore them"),
 43 |         )
 44 |         .arg(
 45 |             Arg::new("chr")
 46 |                 .long("chr")
 47 |                 .num_args(1)
 48 |                 .required(true)
 49 |                 .value_parser(value_parser!(usize))
 50 |                 .help("Field index for chr"),
 51 |         )
 52 |         .arg(
 53 |             Arg::new("strand")
 54 |                 .long("strand")
 55 |                 .num_args(1)
 56 |                 .value_parser(value_parser!(usize))
 57 |                 .help("Optional field index for strand"),
 58 |         )
 59 |         .arg(
 60 |             Arg::new("start")
 61 |                 .long("start")
 62 |                 .num_args(1)
 63 |                 .required(true)
 64 |                 .value_parser(value_parser!(usize))
 65 |                 .help("Field index for start"),
 66 |         )
 67 |         .arg(
 68 |             Arg::new("end")
 69 |                 .long("end")
 70 |                 .num_args(1)
 71 |                 .value_parser(value_parser!(usize))
 72 |                 .help("Optional field index for end"),
 73 |         )
 74 |         .arg(
 75 |             Arg::new("append")
 76 |                 .long("append")
 77 |                 .short('a')
 78 |                 .action(ArgAction::SetTrue)
 79 |                 .help("Append a field for the range (default: only write the range)"),
 80 |         )
 81 |         .arg(
 82 |             Arg::new("outfile")
 83 |                 .long("outfile")
 84 |                 .short('o')
 85 |                 .num_args(1)
 86 |                 .default_value("stdout")
 87 |                 .help("Output filename. [stdout] for screen"),
 88 |         )
 89 | }
 90 | 
 91 | // command implementation
 92 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
 93 |     //----------------------------
 94 |     // Args
 95 |     //----------------------------
 96 |     let mut writer = intspan::writer(args.get_one::<String>("outfile").unwrap());
 97 | 
 98 |     let is_header = args.get_flag("header");
 99 |     let is_sharp = args.get_flag("sharp");
100 | 
101 |     let opt_idx_chr = *args.get_one::<usize>("chr").unwrap();
102 |     let opt_idx_strand = args.get_one::<usize>("strand").copied().unwrap_or(0);
103 |     let opt_idx_start = *args.get_one::<usize>("start").unwrap();
104 |     let opt_idx_end = args.get_one::<usize>("end").copied().unwrap_or(0);
105 | 
106 |     let is_append = args.get_flag("append");
107 | 
108 |     //----------------------------
109 |     // Ops
110 |     //----------------------------
111 |     for infile in args.get_many::<String>("infiles").unwrap() {
112 |         let reader = intspan::reader(infile);
113 |         'LINE: for (i, line) in reader.lines().map_while(Result::ok).enumerate() {
114 |             let parts: Vec<&str> = line.split('\t').collect();
115 | 
116 |             // Handle the header line
117 |             if is_header && i == 0 {
118 |                 if is_append {
119 |                     writer.write_fmt(format_args!("{}\t{}\n", line, "range"))?;
120 |                 } else {
121 |                     writer.write_fmt(format_args!("{}\n", "range"))?;
122 |                 }
123 |                 continue 'LINE;
124 |             }
125 | 
126 |             // Handle lines starting with '#'
127 |             if line.starts_with('#') {
128 |                 if is_sharp {
129 |                     writer.write_fmt(format_args!("{}\n", line))?;
130 |                 }
131 |                 continue 'LINE;
132 |             }
133 | 
134 |             // Build ranges
135 |             let chr = parts.get(opt_idx_chr - 1).unwrap();
136 |             let strand = if opt_idx_strand == 0 {
137 |                 ""
138 |             } else {
139 |                 parts.get(opt_idx_strand - 1).unwrap()
140 |             };
141 |             let start = parts
142 |                 .get(opt_idx_start - 1)
143 |                 .unwrap()
144 |                 .parse::<i32>()
145 |                 .unwrap();
146 |             let end = if opt_idx_end == 0 {
147 |                 start
148 |             } else {
149 |                 parts.get(opt_idx_end - 1).unwrap().parse::<i32>().unwrap()
150 |             };
151 | 
152 |             let rg = intspan::Range {
153 |                 name: "".to_string(),
154 |                 chr: chr.to_string(),
155 |                 strand: strand.to_string(),
156 |                 start,
157 |                 end,
158 |             };
159 | 
160 |             //----------------------------
161 |             // Output
162 |             //----------------------------
163 |             let new_line: String = if is_append {
164 |                 format!("{}\t{}", parts.join("\t"), rg)
165 |             } else {
166 |                 rg.to_string()
167 |             };
168 | 
169 |             writer.write_fmt(format_args!("{}\n", new_line))?;
170 |         }
171 |     }
172 | 
173 |     Ok(())
174 | }
175 | 


--------------------------------------------------------------------------------
/src/cmd_rgr/keep.rs:
--------------------------------------------------------------------------------
  1 | use clap::*;
  2 | use std::io::{BufRead, Write};
  3 | 
  4 | // Create clap subcommand arguments
  5 | pub fn make_subcommand() -> Command {
  6 |     Command::new("keep")
  7 |         .about("Keep the the initial header line(s)")
  8 |         .after_help(
  9 |             r###"
 10 | The first N lines of each file is treated as a header and the one of first file is output unchanged.
 11 | Subsequent lines are sent to the specified command via stdin, excluding headers from other files.
 12 | The output from the command is appended to the initial header.
 13 | 
 14 | * Use a double hyphen (--) to separate the command from the file arguments.
 15 | 
 16 | Examples:
 17 |     # Keeps the first 2 lines of file1.txt as headers, processes the rest with `wc -l`
 18 |     rgr keep -l 2 file1.txt file2.txt -- wc -l
 19 | 
 20 |     # Skips headers and processes all lines with `sort`
 21 |     rgr keep --delete file1.txt file2.txt -- sort
 22 | 
 23 | "###,
 24 |         )
 25 |         .arg(
 26 |             Arg::new("infiles")
 27 |                 .required(true)
 28 |                 .num_args(1..)
 29 |                 .help("Input file(s) to process"),
 30 |         )
 31 |         .arg(
 32 |             Arg::new("lines")
 33 |                 .long("lines")
 34 |                 .short('l')
 35 |                 .num_args(1)
 36 |                 .default_value("1")
 37 |                 .value_parser(value_parser!(usize))
 38 |                 .help("Number of header lines to keep"),
 39 |         )
 40 |         .arg(
 41 |             Arg::new("delete")
 42 |                 .long("delete")
 43 |                 .short('d')
 44 |                 .action(ArgAction::SetTrue)
 45 |                 .help("Skip writing headers"),
 46 |         )
 47 |         .arg(
 48 |             Arg::new("commands")
 49 |                 .required(true)
 50 |                 .num_args(1..)
 51 |                 .last(true)
 52 |                 .value_parser(value_parser!(String))
 53 |                 .help("Command to process subsequent lines"),
 54 |         )
 55 | }
 56 | 
 57 | // command implementation
 58 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
 59 |     //----------------------------
 60 |     // Args
 61 |     //----------------------------
 62 |     let infiles = args
 63 |         .get_many::<String>("infiles")
 64 |         .map(|vals| vals.collect::<Vec<_>>())
 65 |         .unwrap_or_default();
 66 | 
 67 |     let opt_lines = *args.get_one::<usize>("lines").unwrap();
 68 |     let is_delete = args.get_flag("delete");
 69 | 
 70 |     let commands = args
 71 |         .get_many::<String>("commands")
 72 |         .map(|vals| vals.collect::<Vec<_>>())
 73 |         .unwrap_or_default();
 74 | 
 75 |     //----------------------------
 76 |     // Ops
 77 |     //----------------------------
 78 |     let mut child = std::process::Command::new(commands[0])
 79 |         .args(&commands[1..])
 80 |         .stdin(std::process::Stdio::piped())
 81 |         .stdout(std::process::Stdio::inherit())
 82 |         .stderr(std::process::Stdio::inherit())
 83 |         .spawn()?;
 84 |     let stdin = child.stdin.as_mut().expect("Failed to open child stdin");
 85 | 
 86 |     let mut first_file = true; // Track if we are processing the first file
 87 |     for infile in infiles {
 88 |         let reader = intspan::reader(infile);
 89 |         let mut header_written = 0;
 90 |         let mut lines = reader.lines();
 91 | 
 92 |         while let Some(Ok(line)) = lines.next() {
 93 |             if header_written < opt_lines {
 94 |                 if first_file && !is_delete {
 95 |                     // Only print headers from the first file
 96 |                     println!("{}", line);
 97 |                 }
 98 |                 header_written += 1;
 99 |             } else {
100 |                 // Send subsequent lines to the command
101 |                 writeln!(stdin, "{}", line)?;
102 |             }
103 |         }
104 | 
105 |         // After processing the first file, set first_file to false
106 |         first_file = false;
107 |     }
108 | 
109 |     stdin.flush()?;
110 |     child.wait()?;
111 |     Ok(())
112 | }
113 | 


--------------------------------------------------------------------------------
/src/cmd_rgr/md.rs:
--------------------------------------------------------------------------------
  1 | use clap::*;
  2 | use std::io::{BufRead, Write};
  3 | 
  4 | // Create clap subcommand arguments
  5 | pub fn make_subcommand() -> Command {
  6 |     Command::new("md")
  7 |         .about("Convert a .tsv file to a Markdown table")
  8 |         .after_help(
  9 |             r###"
 10 | You can customize the alignment of columns and format numeric values.
 11 | 
 12 | Examples:
 13 |     # right-align numeric columns, and center-align the 2nd column
 14 |     rgr md tests/rgr/ctg.range.tsv --num --center 2
 15 | 
 16 |     # right-align numeric columns and format them to 2 decimal places
 17 |     rgr md input.tsv --right 2 --fmt --digits 2
 18 | 
 19 | "###,
 20 |         )
 21 |         .arg(
 22 |             Arg::new("infile")
 23 |                 .required(true)
 24 |                 .num_args(1)
 25 |                 .index(1)
 26 |                 .help("Input file to process"),
 27 |         )
 28 |         .arg(
 29 |             Arg::new("center")
 30 |                 .long("center")
 31 |                 .short('c')
 32 |                 .num_args(1)
 33 |                 .help("List of columns to center-align (e.g., `1,3-5`)"),
 34 |         )
 35 |         .arg(
 36 |             Arg::new("right")
 37 |                 .long("right")
 38 |                 .short('r')
 39 |                 .num_args(1)
 40 |                 .help("Columns to right-align"),
 41 |         )
 42 |         .arg(
 43 |             Arg::new("num")
 44 |                 .long("num")
 45 |                 .action(ArgAction::SetTrue)
 46 |                 .help("Automatically right-align numeric columns"),
 47 |         )
 48 |         .arg(
 49 |             Arg::new("fmt")
 50 |                 .long("fmt")
 51 |                 .action(ArgAction::SetTrue)
 52 |                 .help("Format numeric columns and enable the `--num` option"),
 53 |         )
 54 |         .arg(
 55 |             Arg::new("digits")
 56 |                 .long("digits")
 57 |                 .num_args(1)
 58 |                 .default_value("0")
 59 |                 .value_parser(value_parser!(usize))
 60 |                 .help("Number of decimal digits"),
 61 |         )
 62 |         .arg(
 63 |             Arg::new("outfile")
 64 |                 .long("outfile")
 65 |                 .short('o')
 66 |                 .num_args(1)
 67 |                 .default_value("stdout")
 68 |                 .help("Output filename. [stdout] for screen"),
 69 |         )
 70 | }
 71 | 
 72 | // command implementation
 73 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
 74 |     //----------------------------
 75 |     // Loading
 76 |     //----------------------------
 77 |     let mut writer = intspan::writer(args.get_one::<String>("outfile").unwrap());
 78 |     let reader = intspan::reader(args.get_one::<String>("infile").unwrap());
 79 | 
 80 |     let mut opt_center: intspan::IntSpan = if args.contains_id("center") {
 81 |         intspan::fields_to_ints(args.get_one::<String>("center").unwrap())
 82 |     } else {
 83 |         intspan::IntSpan::new()
 84 |     };
 85 |     let mut opt_right: intspan::IntSpan = if args.contains_id("right") {
 86 |         intspan::fields_to_ints(args.get_one::<String>("right").unwrap())
 87 |     } else {
 88 |         intspan::IntSpan::new()
 89 |     };
 90 |     let mut is_num = args.get_flag("num");
 91 |     let is_fmt = args.get_flag("fmt");
 92 |     if is_fmt {
 93 |         is_num = true;
 94 |     }
 95 |     let opt_digits: usize = *args.get_one("digits").unwrap();
 96 | 
 97 |     //----------------------------
 98 |     // Output
 99 |     //----------------------------
100 |     let mut is_numeric_column = vec![];
101 | 
102 |     let mut data: Vec<Vec<String>> = Vec::new();
103 |     for line in reader.lines().map_while(Result::ok) {
104 |         let fields: Vec<String> = line.split('\t').map(|s| s.to_string()).collect();
105 |         data.push(fields);
106 |     }
107 | 
108 |     let mut table = String::new();
109 |     if !data.is_empty() {
110 |         let num_columns = data[0].len();
111 |         if is_num {
112 |             // Determine if each column is numeric
113 |             is_numeric_column = vec![true; num_columns];
114 | 
115 |             for row in data.iter().skip(1) {
116 |                 // Skip the header row
117 |                 for (i, value) in row.iter().enumerate() {
118 |                     if is_numeric_column[i] && value.parse::<f64>().is_err() {
119 |                         is_numeric_column[i] = false;
120 |                     }
121 |                 }
122 |             }
123 | 
124 |             for (i, &flag) in is_numeric_column.iter().enumerate().take(num_columns) {
125 |                 if flag {
126 |                     opt_center.remove_n((i + 1) as i32);
127 |                     opt_right.add_n((i + 1) as i32);
128 |                 }
129 |             }
130 |         }
131 | 
132 |         // Print the Markdown table
133 |         for (i, row) in data.iter().enumerate() {
134 |             let formatted_row: Vec<String> = row
135 |                 .iter()
136 |                 .enumerate()
137 |                 .map(|(j, value)| {
138 |                     // Don't touch first row
139 |                     if i == 0 {
140 |                         value.to_string()
141 |                     } else if is_fmt && is_numeric_column[j] {
142 |                         let num = value.parse::<f64>().unwrap();
143 |                         let v = intspan::format_number(num, opt_digits);
144 |                         v.to_string()
145 |                     } else {
146 |                         value.to_string()
147 |                     }
148 |                 })
149 |                 .collect();
150 |             table += format!("| {} |\n", formatted_row.join(" | ")).as_str();
151 | 
152 |             // Print the header separator
153 |             if i == 0 {
154 |                 let separator: Vec<String> = (0..num_columns)
155 |                     .collect::<Vec<_>>()
156 |                     .iter()
157 |                     .map(|&j| {
158 |                         if opt_right.contains((j + 1) as i32) {
159 |                             "---:".to_string()
160 |                         } else if opt_center.contains((j + 1) as i32) {
161 |                             ":---:".to_string()
162 |                         } else {
163 |                             "---".to_string()
164 |                         }
165 |                     })
166 |                     .collect();
167 |                 table += format!("| {} |\n", separator.join(" | ")).as_str();
168 |             }
169 |         }
170 |     }
171 | 
172 |     if !table.is_empty() {
173 |         writer.write_fmt(format_args!(
174 |             "{}",
175 |             markdown_table_formatter::format_tables(table)
176 |         ))?;
177 |     }
178 | 
179 |     Ok(())
180 | }
181 | 


--------------------------------------------------------------------------------
/src/cmd_rgr/merge.rs:
--------------------------------------------------------------------------------
  1 | use clap::*;
  2 | use intspan::*;
  3 | use petgraph::prelude::NodeIndex;
  4 | use petgraph::*;
  5 | use std::collections::{HashMap, HashSet};
  6 | use std::io::BufRead;
  7 | 
  8 | // Create clap subcommand arguments
  9 | pub fn make_subcommand() -> Command {
 10 |     Command::new("merge")
 11 |         .about("Merge overlapped ranges via overlapping graph")
 12 |         .after_help(
 13 |             r###"
 14 | This command merges overlapping ranges from input files based on a specified coverage threshold.
 15 | It builds an overlapping graph for each chromosome and merges ranges that meet the coverage criteria.
 16 | 
 17 | Examples:
 18 | 
 19 |     # Merge all ranges in the .tsv file with a coverage threshold of 0.98
 20 |     rgr merge tests/rgr/II.links.tsv --coverage 0.98
 21 | 
 22 |     # Enable verbose mode to see detailed processing information
 23 |     rgr merge input1.rg input2.rg --coverage 0.95 --verbose
 24 | 
 25 | "###,
 26 |         )
 27 |         .arg(
 28 |             Arg::new("infiles")
 29 |                 .required(true)
 30 |                 .num_args(1..)
 31 |                 .index(1)
 32 |                 .help("Input files to process. Multiple files can be specified"),
 33 |         )
 34 |         .arg(
 35 |             Arg::new("coverage")
 36 |                 .long("coverage")
 37 |                 .short('c')
 38 |                 .num_args(1)
 39 |                 .default_value("0.95")
 40 |                 .value_parser(value_parser!(f32))
 41 |                 .help("Ranges with coverage larger than this value will be merged"),
 42 |         )
 43 |         .arg(
 44 |             Arg::new("verbose")
 45 |                 .long("verbose")
 46 |                 .short('v')
 47 |                 .action(ArgAction::SetTrue)
 48 |                 .help("Enable verbose mode"),
 49 |         )
 50 |         .arg(
 51 |             Arg::new("outfile")
 52 |                 .long("outfile")
 53 |                 .short('o')
 54 |                 .num_args(1)
 55 |                 .default_value("stdout")
 56 |                 .help("Output filename. [stdout] for screen"),
 57 |         )
 58 | }
 59 | 
 60 | // command implementation
 61 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
 62 |     //----------------------------
 63 |     // Loading
 64 |     //----------------------------
 65 |     let opt_coverage = *args.get_one::<f32>("coverage").unwrap();
 66 |     let is_verbose = args.get_flag("verbose");
 67 | 
 68 |     // store graph separately by chromosomes
 69 |     // petgraph use NodeIndex to store and identify nodes
 70 |     let mut graph_of_chr: HashMap<String, Graph<String, (), Undirected>> = HashMap::new();
 71 | 
 72 |     // cache ranges
 73 |     let mut range_of_part: HashMap<String, Range> = HashMap::new();
 74 |     // cache node indices
 75 |     let mut idx_of_part: HashMap<String, NodeIndex> = HashMap::new();
 76 | 
 77 |     // all chromosomes
 78 |     let mut chrs: HashSet<String> = HashSet::new();
 79 | 
 80 |     // Load ranges from input files
 81 |     for infile in args.get_many::<String>("infiles").unwrap() {
 82 |         let reader = reader(infile);
 83 |         for line in reader.lines().map_while(Result::ok) {
 84 |             for part in line.split('\t') {
 85 |                 let range = Range::from_str(part);
 86 |                 if !range.is_valid() {
 87 |                     continue;
 88 |                 }
 89 | 
 90 |                 if range_of_part.contains_key(part) {
 91 |                     continue;
 92 |                 }
 93 | 
 94 |                 let chr = range.chr();
 95 |                 graph_of_chr
 96 |                     .entry(chr.to_string())
 97 |                     .or_insert_with(Graph::new_undirected);
 98 |                 chrs.insert(chr.to_string());
 99 | 
100 |                 let idx = graph_of_chr
101 |                     .get_mut(chr)
102 |                     .unwrap()
103 |                     .add_node(part.to_string());
104 |                 idx_of_part.insert(part.to_string(), idx);
105 | 
106 |                 range_of_part.insert(part.to_string(), range);
107 |             }
108 |         } // end of line
109 |     } // end of file
110 |     let mut chrs = chrs.into_iter().collect::<Vec<String>>();
111 |     chrs.sort();
112 | 
113 |     //----------------------------
114 |     // Checking coverages
115 |     //----------------------------
116 |     for chr in &chrs {
117 |         if is_verbose {
118 |             eprintln!("Chromosome {}", chr);
119 |         }
120 | 
121 |         let graph = graph_of_chr.get_mut(chr).unwrap();
122 |         let indices = graph.node_indices().collect::<Vec<NodeIndex>>();
123 | 
124 |         for i in 0..indices.len() {
125 |             let node_i = graph.node_weight(indices[i]).unwrap();
126 |             let intspan_i = range_of_part[node_i].intspan();
127 |             if is_verbose {
128 |                 eprintln!("    Range {}/{}\t{}", i, indices.len(), node_i);
129 |             }
130 | 
131 |             for j in i + 1..indices.len() {
132 |                 let node_j = graph.node_weight(indices[j]).unwrap();
133 |                 let intspan_j = range_of_part[node_j].intspan();
134 | 
135 |                 let intersect = intspan_i.intersect(&intspan_j);
136 |                 if !intersect.is_empty() {
137 |                     let coverage_i =
138 |                         intersect.cardinality() as f32 / intspan_i.cardinality() as f32;
139 |                     let coverage_j =
140 |                         intersect.cardinality() as f32 / intspan_j.cardinality() as f32;
141 | 
142 |                     if coverage_i >= opt_coverage && coverage_j >= opt_coverage {
143 |                         if is_verbose {
144 |                             eprintln!(
145 |                                 "        Merge with Range {}/{}\t{}",
146 |                                 j,
147 |                                 indices.len(),
148 |                                 node_j
149 |                             );
150 |                         }
151 |                         graph.add_edge(indices[i], indices[j], ());
152 |                     }
153 |                 }
154 |             }
155 |         }
156 |     }
157 | 
158 |     //----------------------------
159 |     // Merging
160 |     //----------------------------
161 |     let mut out_lines: Vec<String> = Vec::new();
162 |     for chr in &chrs {
163 |         let graph = graph_of_chr.get(chr).unwrap();
164 | 
165 |         let scc: Vec<Vec<NodeIndex>> = petgraph::algo::tarjan_scc(graph);
166 |         for cc_indices in &scc {
167 |             if cc_indices.len() < 2 {
168 |                 continue;
169 |             }
170 | 
171 |             if is_verbose {
172 |                 eprintln!("Chromosome {}: Merge {} ranges", chr, cc_indices.len());
173 |             }
174 | 
175 |             // connected ranges
176 |             let mut part_list = cc_indices
177 |                 .iter()
178 |                 .map(|idx| graph.node_weight(*idx).unwrap().clone())
179 |                 .collect::<Vec<String>>();
180 |             part_list.sort();
181 | 
182 |             // collect info for merged range
183 |             let mut intspan = IntSpan::new();
184 |             for part in &part_list {
185 |                 let range = range_of_part.get(part).unwrap();
186 |                 intspan.merge(&range.intspan());
187 |             }
188 | 
189 |             // create merged range
190 |             let merged: String = format!("{}(+):{}", chr, intspan);
191 | 
192 |             for part in &part_list {
193 |                 if *part == merged {
194 |                     continue;
195 |                 }
196 | 
197 |                 let out_line = format!("{}\t{}", part, merged);
198 |                 if is_verbose {
199 |                     eprintln!("{}", out_line);
200 |                 }
201 |                 out_lines.push(out_line);
202 |             }
203 |         }
204 |     }
205 | 
206 |     //----------------------------
207 |     // Output
208 |     //----------------------------
209 |     write_lines(args.get_one::<String>("outfile").unwrap(), &out_lines)?;
210 | 
211 |     Ok(())
212 | }
213 | 


--------------------------------------------------------------------------------
/src/cmd_rgr/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Subcommand modules for the `rgr` binary.
 2 | 
 3 | pub mod count;
 4 | pub mod dedup;
 5 | pub mod field;
 6 | pub mod filter;
 7 | pub mod keep;
 8 | pub mod md;
 9 | pub mod merge;
10 | pub mod pl_2rmp;
11 | pub mod prop;
12 | pub mod replace;
13 | pub mod runlist;
14 | pub mod select;
15 | pub mod sort;
16 | pub mod span;
17 | 


--------------------------------------------------------------------------------
/src/cmd_rgr/prop.rs:
--------------------------------------------------------------------------------
  1 | use clap::*;
  2 | use std::ffi::OsStr;
  3 | use std::io::BufRead;
  4 | use std::path::Path;
  5 | 
  6 | // Create clap subcommand arguments
  7 | pub fn make_subcommand() -> Command {
  8 |     Command::new("prop")
  9 |         .about("Proportion of the ranges intersecting a runlist file")
 10 |         .after_help(
 11 |             r###"
 12 | * Lines without a valid range will not be output
 13 | * Appended fields
 14 |     * `prop`
 15 |     * `length`: length of the range (if `--full` is set)
 16 |     * `size`: size of the intersection (if `--full` is set)
 17 | 
 18 | Example:
 19 | 
 20 |     rgr prop tests/rgr/intergenic.json tests/rgr/S288c.rg
 21 | 
 22 |     rgr prop tests/rgr/intergenic.json tests/rgr/ctg.range.tsv -H -f 3 --prefix --full
 23 | 
 24 | "###,
 25 |         )
 26 |         .arg(
 27 |             Arg::new("runlist")
 28 |                 .required(true)
 29 |                 .index(1)
 30 |                 .num_args(1)
 31 |                 .help("Runlist file to calculate intersections against"),
 32 |         )
 33 |         .arg(
 34 |             Arg::new("infiles")
 35 |                 .required(true)
 36 |                 .index(2)
 37 |                 .num_args(1..)
 38 |                 .help("Input files to process. Multiple files can be specified"),
 39 |         )
 40 |         .arg(
 41 |             Arg::new("header")
 42 |                 .long("header")
 43 |                 .short('H')
 44 |                 .action(ArgAction::SetTrue)
 45 |                 .help("Treat the first line of each file as a header"),
 46 |         )
 47 |         .arg(
 48 |             Arg::new("sharp")
 49 |                 .long("sharp")
 50 |                 .short('s')
 51 |                 .action(ArgAction::SetTrue)
 52 |                 .help("Include lines starting with `#` without changes (default: ignore them)"),
 53 |         )
 54 |         .arg(
 55 |             Arg::new("field")
 56 |                 .long("field")
 57 |                 .short('f')
 58 |                 .num_args(1)
 59 |                 .value_parser(value_parser!(usize))
 60 |                 .help("Index of the range field. If not set, the first valid range will be used"),
 61 |         )
 62 |         .arg(
 63 |             Arg::new("full")
 64 |                 .long("full")
 65 |                 .action(ArgAction::SetTrue)
 66 |                 .help("Also append `length` and `size` fields"),
 67 |         )
 68 |         .arg(
 69 |             Arg::new("prefix")
 70 |                 .long("prefix")
 71 |                 .action(ArgAction::SetTrue)
 72 |                 .help("Prefix the basename of the runlist file if `--header` is set"),
 73 |         )
 74 |         .arg(
 75 |             Arg::new("outfile")
 76 |                 .long("outfile")
 77 |                 .short('o')
 78 |                 .num_args(1)
 79 |                 .default_value("stdout")
 80 |                 .help("Output filename. [stdout] for screen"),
 81 |         )
 82 | }
 83 | 
 84 | // command implementation
 85 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
 86 |     //----------------------------
 87 |     // Args
 88 |     //----------------------------
 89 |     let mut writer = intspan::writer(args.get_one::<String>("outfile").unwrap());
 90 | 
 91 |     let is_sharp = args.get_flag("sharp");
 92 |     let is_header = args.get_flag("header");
 93 | 
 94 |     let opt_idx_range = args.get_one::<usize>("field").copied().unwrap_or(0);
 95 | 
 96 |     let is_full = args.get_flag("full");
 97 |     let is_prefix = args.get_flag("prefix");
 98 | 
 99 |     //----------------------------
100 |     // Loading
101 |     //----------------------------
102 |     let json = intspan::read_json(args.get_one::<String>("runlist").unwrap());
103 |     let set = intspan::json2set(&json);
104 | 
105 |     //----------------------------
106 |     // Ops
107 |     //----------------------------
108 |     for infile in args.get_many::<String>("infiles").unwrap() {
109 |         let reader = intspan::reader(infile);
110 |         'LINE: for (i, line) in reader.lines().map_while(Result::ok).enumerate() {
111 |             // Handle the header line
112 |             if is_header && i == 0 {
113 |                 if is_prefix {
114 |                     let prefix = Path::new(args.get_one::<String>("runlist").unwrap())
115 |                         .file_stem()
116 |                         .and_then(OsStr::to_str)
117 |                         .unwrap()
118 |                         .split('.')
119 |                         .next()
120 |                         .unwrap()
121 |                         .to_string();
122 |                     if is_full {
123 |                         writer.write_fmt(format_args!(
124 |                             "{}\t{}{}\t{}{}\t{}{}\n",
125 |                             line, prefix, "Prop", prefix, "Length", prefix, "Size"
126 |                         ))?;
127 |                     } else {
128 |                         writer.write_fmt(format_args!("{}\t{}{}\n", line, prefix, "Prop"))?;
129 |                     }
130 |                 } else if is_full {
131 |                     writer.write_fmt(format_args!(
132 |                         "{}\t{}\t{}\t{}\n",
133 |                         line, "prop", "length", "size"
134 |                     ))?;
135 |                 } else {
136 |                     writer.write_fmt(format_args!("{}\t{}\n", line, "prop"))?;
137 |                 }
138 | 
139 |                 continue 'LINE;
140 |             }
141 | 
142 |             // Handle lines starting with '#'
143 |             if line.starts_with('#') {
144 |                 if is_sharp {
145 |                     writer.write_fmt(format_args!("{}\n", line))?;
146 |                 }
147 |                 continue 'LINE;
148 |             }
149 | 
150 |             let rg = match intspan::extract_rg(&line, opt_idx_range) {
151 |                 // Extract the range
152 |                 Some(range) => range,
153 |                 // Skip lines without a valid range
154 |                 None => continue 'LINE,
155 |             };
156 | 
157 |             // Calculate intersection
158 |             let chr = rg.chr();
159 |             let mut intspan = intspan::IntSpan::new();
160 |             intspan.add_pair(*rg.start(), *rg.end());
161 | 
162 |             let (prop, length, size) = if set.contains_key(chr) {
163 |                 let intxn = set.get(chr).unwrap().intersect(&intspan);
164 |                 let prop = intxn.cardinality() as f32 / intspan.cardinality() as f32;
165 |                 (prop, intspan.cardinality(), intxn.cardinality())
166 |             } else {
167 |                 (0.0, intspan.cardinality(), 0)
168 |             };
169 | 
170 |             //----------------------------
171 |             // Output
172 |             //----------------------------
173 |             if is_full {
174 |                 writer.write_fmt(format_args!(
175 |                     "{}\t{:.4}\t{}\t{}\n",
176 |                     line, prop, length, size
177 |                 ))?;
178 |             } else {
179 |                 writer.write_fmt(format_args!("{}\t{:.4}\n", line, prop))?;
180 |             }
181 |         }
182 |     }
183 | 
184 |     Ok(())
185 | }
186 | 


--------------------------------------------------------------------------------
/src/cmd_rgr/replace.rs:
--------------------------------------------------------------------------------
 1 | use clap::*;
 2 | use std::collections::HashMap;
 3 | use std::io::BufRead;
 4 | 
 5 | // Create clap subcommand arguments
 6 | pub fn make_subcommand() -> Command {
 7 |     Command::new("replace")
 8 |         .about("Replace fields in a .tsv file using a replacement map")
 9 |         .after_help(
10 |             r###"
11 | Examples:
12 | 
13 |     # Replace fields
14 |     rgr replace tests/rgr/1_4.ovlp.tsv tests/rgr/1_4.replace.tsv
15 | 
16 |     # Reverse the replacement map (To--From instead of From--To)
17 |     rgr replace tests/rgr/1_4.ovlp.tsv tests/rgr/1_4.replace.tsv -r
18 | 
19 | "###,
20 |         )
21 |         .arg(
22 |             Arg::new("infile")
23 |                 .required(true)
24 |                 .num_args(1)
25 |                 .index(1)
26 |                 .help("Input file to process"),
27 |         )
28 |         .arg(
29 |             Arg::new("replace")
30 |                 .required(true)
31 |                 .num_args(1)
32 |                 .index(2)
33 |                 .help("Replacement map file with two columns: From and To"),
34 |         )
35 |         .arg(
36 |             Arg::new("reverse")
37 |                 .long("reverse")
38 |                 .short('r')
39 |                 .action(ArgAction::SetTrue)
40 |                 .help("Use the replacement map in reverse order (To--From instead of From--To)"),
41 |         )
42 |         .arg(
43 |             Arg::new("outfile")
44 |                 .long("outfile")
45 |                 .short('o')
46 |                 .num_args(1)
47 |                 .default_value("stdout")
48 |                 .help("Output filename. [stdout] for screen"),
49 |         )
50 | }
51 | 
52 | // command implementation
53 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
54 |     //----------------------------
55 |     // Args
56 |     //----------------------------
57 |     let mut writer = intspan::writer(args.get_one::<String>("outfile").unwrap());
58 |     let reader = intspan::reader(args.get_one::<String>("infile").unwrap());
59 | 
60 |     //----------------------------
61 |     // Load replacements
62 |     //----------------------------
63 |     let mut replaces: HashMap<String, String> = HashMap::new();
64 |     for line in intspan::read_lines(args.get_one::<String>("replace").unwrap()) {
65 |         let parts: Vec<&str> = line.split('\t').collect();
66 |         if parts.len() == 2 {
67 |             if args.get_flag("reverse") {
68 |                 replaces.insert(parts[1].to_string(), parts[0].to_string());
69 |             } else {
70 |                 replaces.insert(parts[0].to_string(), parts[1].to_string());
71 |             }
72 |         }
73 |     }
74 | 
75 |     //----------------------------
76 |     // Output
77 |     //----------------------------
78 |     for line in reader.lines().map_while(Result::ok) {
79 |         let fields: Vec<&str> = line.split('\t').collect();
80 |         let mut out: Vec<&str> = vec![];
81 | 
82 |         for f in fields {
83 |             if let Some(replacement) = replaces.get(f) {
84 |                 out.push(replacement);
85 |             } else {
86 |                 out.push(f);
87 |             }
88 |         }
89 | 
90 |         writer.write_all((out.join("\t") + "\n").as_ref())?;
91 |     }
92 | 
93 |     Ok(())
94 | }
95 | 


--------------------------------------------------------------------------------
/src/cmd_rgr/runlist.rs:
--------------------------------------------------------------------------------
  1 | use clap::*;
  2 | use std::io::BufRead;
  3 | 
  4 | // Create clap subcommand arguments
  5 | pub fn make_subcommand() -> Command {
  6 |     Command::new("runlist")
  7 |         .about("Filter .rg and .tsv files by comparing with a runlist file")
  8 |         .after_help(
  9 |             r###"
 10 | * Lines without a valid range will not be output
 11 | 
 12 | Example:
 13 | 
 14 |     # Filter lines that overlap with the runlist
 15 |     rgr runlist tests/rgr/intergenic.json tests/rgr/S288c.rg --op overlap
 16 | 
 17 |     # # Filter lines that overlap with the runlist in a TSV file with headers
 18 |     rgr runlist tests/rgr/intergenic.json tests/rgr/ctg.range.tsv --op overlap -H -f 3
 19 | 
 20 | "###,
 21 |         )
 22 |         .arg(
 23 |             Arg::new("runlist")
 24 |                 .required(true)
 25 |                 .index(1)
 26 |                 .num_args(1)
 27 |                 .help("Set the runlist file to use"),
 28 |         )
 29 |         .arg(
 30 |             Arg::new("infiles")
 31 |                 .required(true)
 32 |                 .index(2)
 33 |                 .num_args(1..)
 34 |                 .help("Input files to process. Multiple files can be specified"),
 35 |         )
 36 |         .arg(
 37 |             Arg::new("header")
 38 |                 .long("header")
 39 |                 .short('H')
 40 |                 .action(ArgAction::SetTrue)
 41 |                 .help("Treat the first line of each file as a header"),
 42 |         )
 43 |         .arg(
 44 |             Arg::new("sharp")
 45 |                 .long("sharp")
 46 |                 .short('s')
 47 |                 .action(ArgAction::SetTrue)
 48 |                 .help("Preserve lines starting with a `#` without changes. The default is to ignore them"),
 49 |         )
 50 |         .arg(
 51 |             Arg::new("field")
 52 |                 .long("field")
 53 |                 .short('f')
 54 |                 .num_args(1)
 55 |                 .value_parser(value_parser!(usize))
 56 |                 .help("Index of the range field. If not set, the first valid range will be used"),
 57 |         )
 58 |         .arg(
 59 |             Arg::new("op")
 60 |                 .long("op")
 61 |                 .num_args(1)
 62 |                 .action(ArgAction::Set)
 63 |                 .value_parser([
 64 |                     builder::PossibleValue::new("overlap"),
 65 |                     builder::PossibleValue::new("non-overlap"),
 66 |                     builder::PossibleValue::new("superset"),
 67 |                 ])
 68 |                 .default_value("overlap")
 69 |                 .help("Filter operation: overlap, non-overlap or superset"),
 70 |         )
 71 |         .arg(
 72 |             Arg::new("outfile")
 73 |                 .long("outfile")
 74 |                 .short('o')
 75 |                 .num_args(1)
 76 |                 .default_value("stdout")
 77 |                 .help("Output filename. [stdout] for screen"),
 78 |         )
 79 | }
 80 | 
 81 | // command implementation
 82 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
 83 |     //----------------------------
 84 |     // Args
 85 |     //----------------------------
 86 |     let mut writer = intspan::writer(args.get_one::<String>("outfile").unwrap());
 87 | 
 88 |     let opt_op = args.get_one::<String>("op").unwrap().as_str();
 89 | 
 90 |     let is_sharp = args.get_flag("sharp");
 91 |     let is_header = args.get_flag("header");
 92 | 
 93 |     let opt_idx_range = args.get_one::<usize>("field").copied().unwrap_or(0);
 94 | 
 95 |     //----------------------------
 96 |     // Loading
 97 |     //----------------------------
 98 |     let json = intspan::read_json(args.get_one::<String>("runlist").unwrap());
 99 |     let set = intspan::json2set(&json);
100 | 
101 |     //----------------------------
102 |     // Ops
103 |     //----------------------------
104 |     for infile in args.get_many::<String>("infiles").unwrap() {
105 |         let reader = intspan::reader(infile);
106 |         'LINE: for (i, line) in reader.lines().map_while(Result::ok).enumerate() {
107 |             // Handle the header line
108 |             if is_header && i == 0 {
109 |                 writer.write_fmt(format_args!("{}\n", line))?;
110 |                 continue 'LINE;
111 |             }
112 | 
113 |             // Handle lines starting with '#'
114 |             if line.starts_with('#') {
115 |                 if is_sharp {
116 |                     writer.write_fmt(format_args!("{}\n", line))?;
117 |                 }
118 |                 continue 'LINE;
119 |             }
120 | 
121 |             let rg = match intspan::extract_rg(&line, opt_idx_range) {
122 |                 // Extract the range
123 |                 Some(range) => range,
124 |                 // Skip lines without a valid range
125 |                 None => continue 'LINE,
126 |             };
127 | 
128 |             // Prepare the range for comparison
129 |             let chr = rg.chr();
130 |             let mut intspan = intspan::IntSpan::new();
131 |             intspan.add_pair(*rg.start(), *rg.end());
132 | 
133 |             //----------------------------
134 |             // Output
135 |             //----------------------------
136 |             match opt_op {
137 |                 "overlap" => {
138 |                     if set.contains_key(chr)
139 |                         && !set.get(chr).unwrap().intersect(&intspan).is_empty()
140 |                     {
141 |                         writer.write_fmt(format_args!("{}\n", line))?;
142 |                     }
143 |                 }
144 |                 "non-overlap" => {
145 |                     if set.contains_key(chr) {
146 |                         if set.get(chr).unwrap().intersect(&intspan).is_empty() {
147 |                             writer.write_fmt(format_args!("{}\n", line))?;
148 |                         }
149 |                     } else {
150 |                         writer.write_fmt(format_args!("{}\n", line))?;
151 |                     }
152 |                 }
153 |                 "superset" => {
154 |                     if set.contains_key(chr) && set.get(chr).unwrap().superset(&intspan) {
155 |                         writer.write_fmt(format_args!("{}\n", line))?;
156 |                     }
157 |                 }
158 |                 _ => unreachable!("Invalid operation: {}", opt_op),
159 |             };
160 |         }
161 |     }
162 | 
163 |     Ok(())
164 | }
165 | 


--------------------------------------------------------------------------------
/src/cmd_rgr/select.rs:
--------------------------------------------------------------------------------
  1 | use clap::*;
  2 | use std::collections::HashMap;
  3 | use std::io::BufRead;
  4 | 
  5 | // Create clap subcommand arguments
  6 | pub fn make_subcommand() -> Command {
  7 |     Command::new("select")
  8 |         .about("Select fields in the order listed")
  9 |         .after_help(
 10 |             r###"
 11 | * Fields can be specified by field number or field name.
 12 | * Field names must not be specified as a valid IntSpan runlist.
 13 |   For example, avoid using formats like `1`, `2-6`, or `-`.
 14 | 
 15 | Examples:
 16 |     # Selects fields 6 and 1 from the input file, treating the first line as a header
 17 |     rgr select tests/rgr/ctg.tsv -H -f 6,1
 18 | 
 19 |     # Selects fields `ID` and `length` by names
 20 |     rgr select tests/rgr/ctg.tsv -H -f ID,length
 21 | 
 22 | "###,
 23 |         )
 24 |         .arg(
 25 |             Arg::new("infiles")
 26 |                 .required(true)
 27 |                 .num_args(1..)
 28 |                 .index(1)
 29 |                 .help("Input file to process"),
 30 |         )
 31 |         .arg(
 32 |             Arg::new("header")
 33 |                 .long("header")
 34 |                 .short('H')
 35 |                 .action(ArgAction::SetTrue)
 36 |                 .help("Treat the first line of each file as a header"),
 37 |         )
 38 |         .arg(
 39 |             Arg::new("sharp")
 40 |                 .long("sharp")
 41 |                 .short('s')
 42 |                 .action(ArgAction::SetTrue)
 43 |                 .help("Preserve lines starting with a `#` without changes. The default is to ignore them"),
 44 |         )
 45 |         .arg(
 46 |             Arg::new("fields")
 47 |                 .long("fields")
 48 |                 .short('f')
 49 |                 .num_args(1)
 50 |                 .help("Writes selected fields and the generated range field, in the order listed"),
 51 |         )
 52 |         .arg(
 53 |             Arg::new("outfile")
 54 |                 .long("outfile")
 55 |                 .short('o')
 56 |                 .num_args(1)
 57 |                 .default_value("stdout")
 58 |                 .help("Output filename. [stdout] for screen"),
 59 |         )
 60 | }
 61 | 
 62 | // command implementation
 63 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
 64 |     //----------------------------
 65 |     // Args
 66 |     //----------------------------
 67 |     let mut writer = intspan::writer(args.get_one::<String>("outfile").unwrap());
 68 | 
 69 |     let is_header = args.get_flag("header");
 70 |     let is_sharp = args.get_flag("sharp");
 71 | 
 72 |     //----------------------------
 73 |     // Ops
 74 |     //----------------------------
 75 |     for infile in args.get_many::<String>("infiles").unwrap() {
 76 |         let reader = intspan::reader(infile);
 77 |         let mut fields: Vec<usize> = vec![];
 78 | 
 79 |         'LINE: for (i, line) in reader.lines().map_while(Result::ok).enumerate() {
 80 |             let parts: Vec<&str> = line.split('\t').collect();
 81 | 
 82 |             // Handle the header line
 83 |             if i == 0 {
 84 |                 if is_header {
 85 |                     let idx_of: HashMap<String, usize> = parts
 86 |                         .iter()
 87 |                         .enumerate()
 88 |                         .map(|(i, field)| (field.to_string(), i + 1))
 89 |                         .collect();
 90 | 
 91 |                     if args.contains_id("fields") {
 92 |                         fields = intspan::named_field_to_idx(
 93 |                             args.get_one::<String>("fields").unwrap(),
 94 |                             &idx_of,
 95 |                         )
 96 |                         .unwrap()
 97 |                     };
 98 |                 } else if args.contains_id("fields") {
 99 |                     fields = intspan::ints_to_idx(args.get_one::<String>("fields").unwrap());
100 |                 }
101 | 
102 |                 if fields.is_empty() {
103 |                     writer.write_fmt(format_args!("{}\n", line))?;
104 |                 } else {
105 |                     let selected: Vec<String> = fields
106 |                         .iter()
107 |                         .map(|e| parts.get(*e - 1).unwrap().to_string())
108 |                         .collect();
109 | 
110 |                     writer.write_fmt(format_args!("{}\n", selected.join("\t")))?;
111 |                 }
112 |                 continue 'LINE;
113 |             }
114 | 
115 |             if line.starts_with('#') {
116 |                 if is_sharp {
117 |                     writer.write_fmt(format_args!("{}\n", line))?;
118 |                 }
119 |                 continue 'LINE;
120 |             }
121 | 
122 |             //----------------------------
123 |             // Output
124 |             //----------------------------
125 |             let new_line: String = if fields.is_empty() {
126 |                 parts.join("\t").to_string()
127 |             } else {
128 |                 let selected: Vec<String> = fields
129 |                     .iter()
130 |                     .map(|e| parts.get(*e - 1).unwrap().to_string())
131 |                     .collect();
132 | 
133 |                 selected.join("\t")
134 |             };
135 | 
136 |             writer.write_fmt(format_args!("{}\n", new_line))?;
137 |         }
138 |     }
139 | 
140 |     Ok(())
141 | }
142 | 


--------------------------------------------------------------------------------
/src/cmd_rgr/sort.rs:
--------------------------------------------------------------------------------
  1 | use clap::*;
  2 | use itertools::Itertools;
  3 | use std::collections::BTreeMap;
  4 | use std::io::BufRead;
  5 | 
  6 | // Create clap subcommand arguments
  7 | pub fn make_subcommand() -> Command {
  8 |     Command::new("sort")
  9 |         .about("Sort .rg and .tsv files by a range field")
 10 |         .after_help(
 11 |             r###"
 12 | * If no part of the line is a valid range, the line will be written to to the end of the output
 13 | 
 14 | * Using `--group` can improve performance on large datasets by grouping rows before sorting.
 15 |     * The group_key can be chr_id, ctg_id, etc.
 16 | 
 17 | Example:
 18 | 
 19 |     # Sort a .rg file
 20 |     rgr sort tests/rgr/S288c.rg
 21 | 
 22 |     # Sort a .tsv file by the first valid range
 23 |     rgr sort tests/rgr/ctg.range.tsv
 24 | 
 25 |     # Sort a .tsv file by a specific range field and treat the first line as a header
 26 |     rgr sort tests/rgr/ctg.range.tsv -H -f 3
 27 | 
 28 | "###,
 29 |         )
 30 |         .arg(
 31 |             Arg::new("infiles")
 32 |                 .required(true)
 33 |                 .num_args(1..)
 34 |                 .index(1)
 35 |                 .help("Input files to process. Multiple files can be specified"),
 36 |         )
 37 |         .arg(
 38 |             Arg::new("header")
 39 |                 .long("header")
 40 |                 .short('H')
 41 |                 .action(ArgAction::SetTrue)
 42 |                 .help("Treat the first line of each file as a header"),
 43 |         )
 44 |         .arg(
 45 |             Arg::new("field")
 46 |                 .long("field")
 47 |                 .short('f')
 48 |                 .num_args(1)
 49 |                 .value_parser(value_parser!(usize))
 50 |                 .help("Index of the range field. If not set, the first valid range will be used"),
 51 |         )
 52 |         .arg(
 53 |             Arg::new("group")
 54 |                 .long("group")
 55 |                 .short('g')
 56 |                 .num_args(1)
 57 |                 .value_parser(value_parser!(usize))
 58 |                 .help("Group the rows by this field and then sort within each group"),
 59 |         )
 60 |         .arg(
 61 |             Arg::new("outfile")
 62 |                 .long("outfile")
 63 |                 .short('o')
 64 |                 .num_args(1)
 65 |                 .default_value("stdout")
 66 |                 .help("Output filename. [stdout] for screen"),
 67 |         )
 68 | }
 69 | 
 70 | // command implementation
 71 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
 72 |     //----------------------------
 73 |     // Options
 74 |     //----------------------------
 75 |     let mut writer = intspan::writer(args.get_one::<String>("outfile").unwrap());
 76 | 
 77 |     let is_header = args.get_flag("header");
 78 | 
 79 |     let opt_idx_range = args.get_one::<usize>("field").copied().unwrap_or(0);
 80 |     let opt_idx_group = args.get_one::<usize>("group").copied().unwrap_or(0);
 81 | 
 82 |     //----------------------------
 83 |     // Loading
 84 |     //----------------------------
 85 |     let mut line_to_rg: BTreeMap<String, intspan::Range> = BTreeMap::new();
 86 |     let mut invalids: Vec<String> = vec![];
 87 | 
 88 |     for infile in args.get_many::<String>("infiles").unwrap() {
 89 |         let reader = intspan::reader(infile);
 90 |         'LINE: for (i, line) in reader.lines().map_while(Result::ok).enumerate() {
 91 |             // Handle the header line
 92 |             if is_header && i == 0 {
 93 |                 writer.write_fmt(format_args!("{}\n", line))?;
 94 |                 continue 'LINE;
 95 |             }
 96 | 
 97 |             // Extract the range
 98 |             if let Some(range) = intspan::extract_rg(&line, opt_idx_range) {
 99 |                 // Store the line and its range
100 |                 line_to_rg.insert(line.clone(), range);
101 |             } else {
102 |                 // No valid range found
103 |                 invalids.push(line.clone());
104 |             }
105 |         }
106 |     }
107 | 
108 |     //----------------------------
109 |     // Sorting
110 |     //----------------------------
111 |     let mut sorted: Vec<String> = vec![];
112 | 
113 |     if opt_idx_group == 0 {
114 |         // Sort all lines together
115 |         sorted = line_to_rg.keys().map(|e| e.to_string()).collect();
116 | 
117 |         sorted.sort_by_cached_key(|k| {
118 |             let range = line_to_rg.get(k).unwrap();
119 |             (range.chr().clone(), range.start(), range.strand().clone())
120 |         });
121 |     } else {
122 |         // Group lines by the specified field, then sort within each group
123 |         let mut lines_of: BTreeMap<String, Vec<String>> = BTreeMap::new();
124 | 
125 |         for line in line_to_rg.keys() {
126 |             let parts: Vec<&str> = line.split('\t').collect();
127 | 
128 |             let group_key = parts.get(opt_idx_group - 1).unwrap();
129 |             lines_of
130 |                 .entry(group_key.to_string())
131 |                 .or_default()
132 |                 .push(line.clone());
133 |         }
134 | 
135 |         for group_key in lines_of.keys().sorted() {
136 |             let mut lines = lines_of.get(group_key).unwrap().clone();
137 | 
138 |             lines.sort_by_cached_key(|k| {
139 |                 let range = line_to_rg.get(k).unwrap();
140 |                 (range.chr().clone(), range.start(), range.strand().clone())
141 |             });
142 |             sorted.extend(lines);
143 |         }
144 |     }
145 | 
146 |     //----------------------------
147 |     // Output
148 |     //----------------------------
149 |     for line in &sorted {
150 |         writer.write_fmt(format_args!("{}\n", line))?;
151 |     }
152 |     for line in &invalids {
153 |         writer.write_fmt(format_args!("{}\n", line))?;
154 |     }
155 | 
156 |     Ok(())
157 | }
158 | 


--------------------------------------------------------------------------------
/src/cmd_rgr/span.rs:
--------------------------------------------------------------------------------
  1 | use clap::*;
  2 | use std::io::{BufRead, Write};
  3 | 
  4 | // Create clap subcommand arguments
  5 | pub fn make_subcommand() -> Command {
  6 |     Command::new("span")
  7 |         .about("Operate spans in .tsv/.rg file")
  8 |         .after_help(
  9 |             r###"
 10 | This command is similar to `spanr span`, but the <infiles> represent chromosome ranges.
 11 | 
 12 | List of Operations
 13 | 
 14 | * General Ops (both, 5p, or 3p)
 15 |     * trim: Remove `N` integers from the ends of the range.
 16 |     * pad: Add `N` integers to the ends of the range.
 17 | * Directional Ops (5p or 3p)
 18 |     * shift: Shift a range by N toward the 5p or 3p end.
 19 |     * flank: Retrieve flank regions of size `N` from the range.
 20 | * Size-based Ops
 21 |     * excise: Remove any ranges that are smaller than `N`.
 22 | 
 23 | "###,
 24 |         )
 25 |         .arg(
 26 |             Arg::new("infiles")
 27 |                 .required(true)
 28 |                 .num_args(1..)
 29 |                 .index(1)
 30 |                 .help("Input files to process. Multiple files can be specified."),
 31 |         )
 32 |         .arg(
 33 |             Arg::new("header")
 34 |                 .long("header")
 35 |                 .short('H')
 36 |                 .action(ArgAction::SetTrue)
 37 |                 .help("Treat the first line of each file as a header"),
 38 |         )
 39 |         .arg(
 40 |             Arg::new("sharp")
 41 |                 .long("sharp")
 42 |                 .short('s')
 43 |                 .action(ArgAction::SetTrue)
 44 |                 .help("Include lines starting with `#` without changes (default: ignore them)"),
 45 |         )
 46 |         .arg(
 47 |             Arg::new("field")
 48 |                 .long("field")
 49 |                 .short('f')
 50 |                 .value_parser(value_parser!(usize))
 51 |                 .num_args(1)
 52 |                 .help("Index of the range field. If not set, the first valid range will be used"),
 53 |         )
 54 |         .arg(
 55 |             Arg::new("op")
 56 |                 .long("op")
 57 |                 .num_args(1)
 58 |                 .action(ArgAction::Set)
 59 |                 .value_parser([
 60 |                     builder::PossibleValue::new("trim"),
 61 |                     builder::PossibleValue::new("pad"),
 62 |                     builder::PossibleValue::new("shift"),
 63 |                     builder::PossibleValue::new("flank"),
 64 |                     builder::PossibleValue::new("excise"),
 65 |                 ])
 66 |                 .default_value("trim")
 67 |                 .help("Select the operation to perform"),
 68 |         )
 69 |         .arg(
 70 |             Arg::new("mode")
 71 |                 .long("mode")
 72 |                 .short('m')
 73 |                 .num_args(1)
 74 |                 .action(ArgAction::Set)
 75 |                 .value_parser([
 76 |                     builder::PossibleValue::new("both"),
 77 |                     builder::PossibleValue::new("5p"),
 78 |                     builder::PossibleValue::new("3p"),
 79 |                 ])
 80 |                 .default_value("both")
 81 |                 .help("Mode of the operation"),
 82 |         )
 83 |         .arg(
 84 |             Arg::new("number")
 85 |                 .long("number")
 86 |                 .short('n')
 87 |                 .num_args(1)
 88 |                 .value_parser(value_parser!(i32))
 89 |                 .default_value("0")
 90 |                 .help("Number of integers to trim, pad, shift, or flank"),
 91 |         )
 92 |         .arg(
 93 |             Arg::new("append")
 94 |                 .long("append")
 95 |                 .short('a')
 96 |                 .action(ArgAction::SetTrue)
 97 |                 .help("Append a field for the new range (default: only write the new range)"),
 98 |         )
 99 |         .arg(
100 |             Arg::new("outfile")
101 |                 .long("outfile")
102 |                 .short('o')
103 |                 .num_args(1)
104 |                 .default_value("stdout")
105 |                 .help("Output filename. [stdout] for screen"),
106 |         )
107 | }
108 | 
109 | // command implementation
110 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
111 |     //----------------------------
112 |     // Args
113 |     //----------------------------
114 |     let mut writer = intspan::writer(args.get_one::<String>("outfile").unwrap());
115 | 
116 |     let is_header = args.get_flag("header");
117 |     let is_sharp = args.get_flag("sharp");
118 | 
119 |     let opt_idx_range = args.get_one::<usize>("field").copied().unwrap_or(0);
120 | 
121 |     let opt_op = args.get_one::<String>("op").unwrap().as_str();
122 |     let opt_mode = args.get_one::<String>("mode").unwrap().as_str();
123 |     let opt_number = *args.get_one::<i32>("number").unwrap();
124 | 
125 |     let is_append = args.get_flag("append");
126 | 
127 |     //----------------------------
128 |     // Ops
129 |     //----------------------------
130 |     for infile in args.get_many::<String>("infiles").unwrap() {
131 |         let reader = intspan::reader(infile);
132 |         'LINE: for (i, line) in reader.lines().map_while(Result::ok).enumerate() {
133 |             // Handle the header line
134 |             if is_header && i == 0 {
135 |                 if is_append {
136 |                     writer.write_fmt(format_args!("{}\t{}\n", line, "rg"))?;
137 |                 } else {
138 |                     writer.write_fmt(format_args!("{}\n", "rg"))?;
139 |                 }
140 |                 continue 'LINE;
141 |             }
142 | 
143 |             // Handle lines starting with '#'
144 |             if line.starts_with('#') {
145 |                 if is_sharp {
146 |                     writer.write_fmt(format_args!("{}\n", line))?;
147 |                 }
148 |                 continue 'LINE;
149 |             }
150 | 
151 |             let rg = match intspan::extract_rg(&line, opt_idx_range) {
152 |                 // Extract the range
153 |                 Some(range) => range,
154 |                 // Skip lines without a valid range
155 |                 None => continue 'LINE,
156 |             };
157 | 
158 |             let new = match opt_op {
159 |                 "trim" => match opt_mode {
160 |                     "5p" => rg.trim_5p(opt_number),
161 |                     "3p" => rg.trim_3p(opt_number),
162 |                     _ => rg.trim(opt_number),
163 |                 },
164 |                 "pad" => match opt_mode {
165 |                     "5p" => rg.trim_5p(-opt_number),
166 |                     "3p" => rg.trim_3p(-opt_number),
167 |                     _ => rg.trim(-opt_number),
168 |                 },
169 |                 "shift" => match opt_mode {
170 |                     "5p" => rg.shift_5p(opt_number),
171 |                     "3p" => rg.shift_3p(opt_number),
172 |                     _ => unreachable!("Invalid mode for shift operation"),
173 |                 },
174 |                 "flank" => match opt_mode {
175 |                     "5p" => rg.flank_5p(opt_number),
176 |                     "3p" => rg.flank_3p(opt_number),
177 |                     _ => unreachable!("Invalid mode for flank operation"),
178 |                 },
179 |                 "excise" => {
180 |                     if rg.intspan().size() >= opt_number {
181 |                         rg.clone()
182 |                     } else {
183 |                         intspan::Range::new()
184 |                     }
185 |                 }
186 |                 _ => unreachable!("Invalid Op"),
187 |             };
188 | 
189 |             //----------------------------
190 |             // Output
191 |             //----------------------------
192 |             let new_line: String = if is_append {
193 |                 format!("{}\t{}", line, new)
194 |             } else {
195 |                 new.to_string()
196 |             };
197 | 
198 |             writer.write_fmt(format_args!("{}\n", new_line))?;
199 |         }
200 |     }
201 | 
202 |     Ok(())
203 | }
204 | 


--------------------------------------------------------------------------------
/src/cmd_spanr/combine.rs:
--------------------------------------------------------------------------------
 1 | use clap::*;
 2 | use intspan::*;
 3 | use serde_json::Value;
 4 | use std::collections::BTreeMap;
 5 | 
 6 | // Create clap subcommand arguments
 7 | pub fn make_subcommand() -> Command {
 8 |     Command::new("combine")
 9 |         .about("Combine multiple sets of runlists in a json file")
10 |         .after_help(
11 |             r###"
12 | It's expected that the JSON file contains multiple sets of runlists,
13 | otherwise this command will make no effects
14 | 
15 | "###,
16 |         )
17 |         .arg(
18 |             Arg::new("infile")
19 |                 .required(true)
20 |                 .index(1)
21 |                 .help("Sets the input file to use"),
22 |         )
23 |         .arg(
24 |             Arg::new("op")
25 |                 .long("op")
26 |                 .num_args(1)
27 |                 .default_value("union")
28 |                 .value_parser(clap::builder::NonEmptyStringValueParser::new())
29 |                 .help("Operations: intersect, union, diff or xor"),
30 |         )
31 |         .arg(
32 |             Arg::new("outfile")
33 |                 .long("outfile")
34 |                 .short('o')
35 |                 .num_args(1)
36 |                 .default_value("stdout")
37 |                 .help("Output filename. [stdout] for screen"),
38 |         )
39 | }
40 | 
41 | // command implementation
42 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
43 |     //----------------------------
44 |     // Loading
45 |     //----------------------------
46 |     let json: BTreeMap<String, Value> = read_json(args.get_one::<String>("infile").unwrap());
47 |     let s_of = json2set_m(&json);
48 |     let chrs = chrs_in_sets(&s_of);
49 | 
50 |     let op = args.get_one::<String>("op").unwrap().as_str();
51 | 
52 |     //----------------------------
53 |     // Operating
54 |     //----------------------------
55 |     let mut res: BTreeMap<String, IntSpan> = BTreeMap::new();
56 |     fill_up_s(&mut res, &chrs);
57 | 
58 |     let names: Vec<_> = s_of.keys().cloned().collect();
59 |     let first = names[0].clone();
60 | 
61 |     for name in names {
62 |         let set = s_of.get(name.as_str()).unwrap();
63 |         for chr in set.keys() {
64 |             if name == first {
65 |                 let intspan = set.get(chr).unwrap();
66 |                 res.entry(chr.to_string()).and_modify(|e| e.merge(intspan));
67 |             } else {
68 |                 let mut intspan_op = res.get(chr).unwrap().copy();
69 |                 intspan_op = match op {
70 |                     "intersect" => intspan_op.intersect(set.get(chr).unwrap()),
71 |                     "diff" => intspan_op.diff(set.get(chr).unwrap()),
72 |                     "union" => intspan_op.union(set.get(chr).unwrap()),
73 |                     "xor" => intspan_op.xor(set.get(chr).unwrap()),
74 |                     _ => panic!("Invalid IntSpan Op"),
75 |                 };
76 |                 //                eprintln!("Op {}: {}", op, intspan_op.to_string());
77 |                 res.insert(chr.into(), intspan_op);
78 |             }
79 |         }
80 |     }
81 | 
82 |     //----------------------------
83 |     // Output
84 |     //----------------------------
85 |     let out_json = set2json(&res);
86 |     write_json(args.get_one::<String>("outfile").unwrap(), &out_json)?;
87 | 
88 |     Ok(())
89 | }
90 | 


--------------------------------------------------------------------------------
/src/cmd_spanr/compare.rs:
--------------------------------------------------------------------------------
  1 | use clap::*;
  2 | use intspan::*;
  3 | use serde_json::Value;
  4 | use std::collections::BTreeMap;
  5 | 
  6 | // Create clap subcommand arguments
  7 | pub fn make_subcommand() -> Command {
  8 |     Command::new("compare")
  9 |         .about("Compare one JSON file against others")
 10 |         .after_help("Only the *first* file can contain multiple sets of runlists")
 11 |         .arg(
 12 |             Arg::new("infile")
 13 |                 .required(true)
 14 |                 .index(1)
 15 |                 .help("Sets the input file to use"),
 16 |         )
 17 |         .arg(
 18 |             Arg::new("infiles")
 19 |                 .required(true)
 20 |                 .index(2)
 21 |                 .num_args(1..)
 22 |                 .help("Sets the input file to use"),
 23 |         )
 24 |         .arg(
 25 |             Arg::new("op")
 26 |                 .long("op")
 27 |                 .num_args(1)
 28 |                 .default_value("intersect")
 29 |                 .value_parser(clap::builder::NonEmptyStringValueParser::new())
 30 |                 .help("Operations: intersect, union, diff or xor"),
 31 |         )
 32 |         .arg(
 33 |             Arg::new("outfile")
 34 |                 .long("outfile")
 35 |                 .short('o')
 36 |                 .num_args(1)
 37 |                 .default_value("stdout")
 38 |                 .help("Output filename. [stdout] for screen"),
 39 |         )
 40 | }
 41 | 
 42 | // command implementation
 43 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
 44 |     //----------------------------
 45 |     // Loading
 46 |     //----------------------------
 47 |     // first file
 48 |     let json: BTreeMap<String, Value> = read_json(args.get_one::<String>("infile").unwrap());
 49 |     let is_multi: bool = json.values().next().unwrap().is_object();
 50 |     let mut s1_of = json2set_m(&json);
 51 | 
 52 |     // second file or more
 53 |     let mut s2s = vec![];
 54 | 
 55 |     for infile in args.get_many::<String>("infiles").unwrap() {
 56 |         let json_s = read_json(infile);
 57 |         let s2 = json2set(&json_s);
 58 |         s2s.push(s2);
 59 |     }
 60 | 
 61 |     let op = args.get_one::<String>("op").unwrap().as_str();
 62 | 
 63 |     //----------------------------
 64 |     // Operating
 65 |     //----------------------------
 66 |     // give empty intspan to non-existed chrs
 67 |     let mut chrs = chrs_in_sets(&s1_of);
 68 |     for s2 in &s2s {
 69 |         for chr in s2.keys() {
 70 |             chrs.insert(chr.to_string());
 71 |         }
 72 |     }
 73 |     fill_up_m(&mut s1_of, &chrs);
 74 | 
 75 |     for s2 in s2s.iter_mut() {
 76 |         fill_up_s(s2, &chrs);
 77 |     }
 78 | 
 79 |     let mut res_of: BTreeMap<String, BTreeMap<String, IntSpan>> = BTreeMap::new();
 80 |     for (name, s1) in &s1_of {
 81 |         let mut res: BTreeMap<String, IntSpan> = BTreeMap::new();
 82 |         for chr in s1.keys() {
 83 |             let mut intspan_op = s1.get(chr).unwrap().copy();
 84 |             for s2 in s2s.iter() {
 85 |                 intspan_op = match op {
 86 |                     "intersect" => intspan_op.intersect(s2.get(chr).unwrap()),
 87 |                     "diff" => intspan_op.diff(s2.get(chr).unwrap()),
 88 |                     "union" => intspan_op.union(s2.get(chr).unwrap()),
 89 |                     "xor" => intspan_op.xor(s2.get(chr).unwrap()),
 90 |                     _ => panic!("Invalid IntSpan Op"),
 91 |                 };
 92 |                 //                eprintln!("Op {}: {}", op, intspan_op.to_string());
 93 |             }
 94 |             res.insert(chr.into(), intspan_op);
 95 |         }
 96 |         res_of.insert(name.into(), res);
 97 |     }
 98 | 
 99 |     //----------------------------
100 |     // Output
101 |     //----------------------------
102 |     let out_json = if is_multi {
103 |         set2json_m(&res_of)
104 |     } else {
105 |         set2json(res_of.get("__single").unwrap())
106 |     };
107 |     write_json(args.get_one::<String>("outfile").unwrap(), &out_json)?;
108 | 
109 |     Ok(())
110 | }
111 | 


--------------------------------------------------------------------------------
/src/cmd_spanr/convert.rs:
--------------------------------------------------------------------------------
 1 | use clap::*;
 2 | 
 3 | // Create clap subcommand arguments
 4 | pub fn make_subcommand() -> Command {
 5 |     Command::new("convert")
 6 |         .about("Convert runlist file to ranges file")
 7 |         .arg(
 8 |             Arg::new("infiles")
 9 |                 .required(true)
10 |                 .num_args(1..)
11 |                 .index(1)
12 |                 .help("Set the input files to use"),
13 |         )
14 |         .arg(
15 |             Arg::new("longest")
16 |                 .long("longest")
17 |                 .action(ArgAction::SetTrue)
18 |                 .help("Only keep the longest range"),
19 |         )
20 |         .arg(
21 |             Arg::new("outfile")
22 |                 .long("outfile")
23 |                 .short('o')
24 |                 .num_args(1)
25 |                 .default_value("stdout")
26 |                 .help("Output filename. [stdout] for screen"),
27 |         )
28 | }
29 | 
30 | // command implementation
31 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
32 |     //----------------------------
33 |     // Args
34 |     //----------------------------
35 |     let is_longest = args.get_flag("longest");
36 |     let mut writer = intspan::writer(args.get_one::<String>("outfile").unwrap());
37 | 
38 |     //----------------------------
39 |     // Ops
40 |     //----------------------------
41 |     for infile in args.get_many::<String>("infiles").unwrap() {
42 |         let json = intspan::read_json(infile);
43 |         let set_of = intspan::json2set_m(&json);
44 | 
45 |         for set in set_of.values() {
46 |             for chr in set.keys() {
47 |                 let ints = set.get(chr).unwrap();
48 |                 let mut intses = ints.intses();
49 | 
50 |                 //----------------------------
51 |                 // Output
52 |                 //----------------------------
53 |                 if is_longest {
54 |                     if !intses.is_empty() {
55 |                         // Negate the value for descending order
56 |                         intses.sort_by_cached_key(|e| -e.size());
57 |                         let longest = intses.first().unwrap();
58 |                         writer.write_all(format!("{}:{}\n", chr, longest).as_ref())?;
59 |                     }
60 |                 } else {
61 |                     for sub in &intses {
62 |                         writer.write_all(format!("{}:{}\n", chr, sub).as_ref())?;
63 |                     }
64 |                 }
65 |             }
66 |         }
67 |     }
68 | 
69 |     Ok(())
70 | }
71 | 


--------------------------------------------------------------------------------
/src/cmd_spanr/cover.rs:
--------------------------------------------------------------------------------
 1 | use clap::*;
 2 | use intspan::*;
 3 | use std::collections::BTreeMap;
 4 | use std::io::BufRead;
 5 | 
 6 | // Create clap subcommand arguments
 7 | pub fn make_subcommand() -> Command {
 8 |     Command::new("cover")
 9 |         .about("Output covers on chromosomes")
10 |         .after_help(
11 |             r###"
12 | Like command `combine`, but <infiles> are chromosome ranges
13 | 
14 |     I:1-100
15 |     I(+):90-150             # Strand will be omitted
16 |     S288c.I(-):190-200      # Species name will be omitted
17 | 
18 | "###,
19 |         )
20 |         .arg(
21 |             Arg::new("infiles")
22 |                 .required(true)
23 |                 .num_args(1..)
24 |                 .index(1)
25 |                 .help("Set the input files to use"),
26 |         )
27 |         .arg(
28 |             Arg::new("outfile")
29 |                 .long("outfile")
30 |                 .short('o')
31 |                 .num_args(1)
32 |                 .default_value("stdout")
33 |                 .help("Output filename. [stdout] for screen"),
34 |         )
35 | }
36 | 
37 | // command implementation
38 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
39 |     //----------------------------
40 |     // Loading
41 |     //----------------------------
42 | 
43 |     // seq_name => IntSpan
44 |     let mut set: BTreeMap<String, IntSpan> = BTreeMap::new();
45 | 
46 |     for infile in args.get_many::<String>("infiles").unwrap() {
47 |         let reader = reader(infile);
48 |         for line in reader.lines().map_while(Result::ok) {
49 |             let range = Range::from_str(&line);
50 |             if !range.is_valid() {
51 |                 continue;
52 |             }
53 |             let chr = range.chr();
54 |             if !set.contains_key(chr) {
55 |                 set.insert(chr.clone(), IntSpan::new());
56 |             }
57 | 
58 |             set.entry(chr.to_string())
59 |                 .and_modify(|e| e.add_pair(*range.start(), *range.end()));
60 |         }
61 |     }
62 | 
63 |     //----------------------------
64 |     // Output
65 |     //----------------------------
66 |     let out_json = set2json(&set);
67 |     write_json(args.get_one::<String>("outfile").unwrap(), &out_json)?;
68 | 
69 |     Ok(())
70 | }
71 | 


--------------------------------------------------------------------------------
/src/cmd_spanr/coverage.rs:
--------------------------------------------------------------------------------
  1 | use clap::*;
  2 | use intspan::*;
  3 | use rust_lapper::{Interval, Lapper};
  4 | use std::collections::BTreeMap;
  5 | use std::io::BufRead;
  6 | 
  7 | // Interval: represent a range from [start, stop), carrying val
  8 | type Iv = Interval<u32, u32>; // the first type should be Unsigned
  9 | 
 10 | // Create clap subcommand arguments
 11 | pub fn make_subcommand() -> Command {
 12 |     Command::new("coverage")
 13 |         .about("Output minimum or detailed depth of coverage on chromosomes")
 14 |         .arg(
 15 |             Arg::new("infiles")
 16 |                 .required(true)
 17 |                 .num_args(1..)
 18 |                 .index(1)
 19 |                 .help("Set the input file to use"),
 20 |         )
 21 |         .arg(
 22 |             Arg::new("minimum")
 23 |                 .long("minimum")
 24 |                 .short('m')
 25 |                 .value_parser(value_parser!(i32))
 26 |                 .num_args(1)
 27 |                 .default_value("1")
 28 |                 .help("Set the minimum depth of coverage"),
 29 |         )
 30 |         .arg(
 31 |             Arg::new("detailed")
 32 |                 .long("detailed")
 33 |                 .short('d')
 34 |                 .action(ArgAction::SetTrue)
 35 |                 .help("Output detailed depth"),
 36 |         )
 37 |         .arg(
 38 |             Arg::new("outfile")
 39 |                 .long("outfile")
 40 |                 .short('o')
 41 |                 .num_args(1)
 42 |                 .default_value("stdout")
 43 |                 .help("Output filename. [stdout] for screen"),
 44 |         )
 45 | }
 46 | 
 47 | // command implementation
 48 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
 49 |     //----------------------------
 50 |     // Loading
 51 |     //----------------------------
 52 |     let minimum = *args.get_one::<i32>("minimum").unwrap();
 53 |     let is_detailed = args.get_flag("detailed");
 54 | 
 55 |     // seq_name => Vector of Intervals
 56 |     let mut iv_of: BTreeMap<String, Vec<Iv>> = BTreeMap::new();
 57 | 
 58 |     for infile in args.get_many::<String>("infiles").unwrap() {
 59 |         let reader = reader(infile);
 60 |         for line in reader.lines().map_while(Result::ok) {
 61 |             if line.starts_with('#') {
 62 |                 continue;
 63 |             }
 64 |             let range = Range::from_str(&line);
 65 |             if !range.is_valid() {
 66 |                 continue;
 67 |             }
 68 |             let chr = range.chr();
 69 |             if !iv_of.contains_key(chr) {
 70 |                 let ivs: Vec<Iv> = vec![];
 71 |                 iv_of.insert(chr.clone(), ivs);
 72 |             }
 73 | 
 74 |             let iv = Iv {
 75 |                 start: *range.start() as u32,
 76 |                 stop: *range.end() as u32 + 1,
 77 |                 val: 0,
 78 |             };
 79 | 
 80 |             iv_of.entry(chr.to_string()).and_modify(|e| e.push(iv));
 81 |         }
 82 |     }
 83 | 
 84 |     //----------------------------
 85 |     // Output
 86 |     //----------------------------
 87 |     if is_detailed {
 88 |         // Multi
 89 |         let mut set_of: BTreeMap<String, BTreeMap<String, IntSpan>> = BTreeMap::new();
 90 | 
 91 |         for chr in iv_of.keys() {
 92 |             let lapper = Lapper::new(iv_of.get(chr).unwrap().to_owned());
 93 |             let ivs = lapper.depth().collect::<Vec<Interval<u32, u32>>>();
 94 | 
 95 |             // depth => IntSpan
 96 |             let mut intspan_of: BTreeMap<String, IntSpan> = BTreeMap::new();
 97 | 
 98 |             for iv in ivs {
 99 |                 let depth = iv.val as i32;
100 |                 if depth < minimum {
101 |                     continue;
102 |                 }
103 | 
104 |                 let depth = format!("{}", depth);
105 | 
106 |                 if !set_of.contains_key(&depth) {
107 |                     set_of.insert(depth.clone(), BTreeMap::new());
108 |                 }
109 | 
110 |                 if !intspan_of.contains_key(&depth) {
111 |                     intspan_of.insert(depth.clone(), IntSpan::new());
112 |                 }
113 | 
114 |                 intspan_of
115 |                     .entry(depth)
116 |                     .and_modify(|e| e.add_pair(iv.start as i32, iv.stop as i32 - 1));
117 |             }
118 | 
119 |             for depth in intspan_of.keys() {
120 |                 set_of
121 |                     .get_mut(depth)
122 |                     .unwrap()
123 |                     .insert(chr.clone(), intspan_of.get(depth).unwrap().clone());
124 |             }
125 |         }
126 | 
127 |         let out_json = set2json_m(&set_of);
128 |         write_json(args.get_one::<String>("outfile").unwrap(), &out_json)?;
129 |     } else {
130 |         // Single
131 |         // chr => IntSpan
132 |         let mut set: BTreeMap<String, IntSpan> = BTreeMap::new();
133 | 
134 |         for chr in iv_of.keys() {
135 |             let lapper = Lapper::new(iv_of.get(chr).unwrap().to_owned());
136 |             let ivs = lapper.depth().collect::<Vec<Interval<u32, u32>>>();
137 | 
138 |             let mut intspan = IntSpan::new();
139 |             for iv in ivs {
140 |                 let depth = iv.val as i32;
141 |                 if depth < minimum {
142 |                     continue;
143 |                 }
144 | 
145 |                 intspan.add_pair(iv.start as i32, iv.stop as i32 - 1);
146 |             }
147 | 
148 |             set.insert(chr.to_string(), intspan);
149 |         }
150 | 
151 |         let out_json = set2json(&set);
152 |         write_json(args.get_one::<String>("outfile").unwrap(), &out_json)?;
153 |     }
154 | 
155 |     Ok(())
156 | }
157 | 


--------------------------------------------------------------------------------
/src/cmd_spanr/genome.rs:
--------------------------------------------------------------------------------
 1 | use clap::*;
 2 | use intspan::*;
 3 | use serde_json::Value;
 4 | use std::collections::BTreeMap;
 5 | 
 6 | // Create clap subcommand arguments
 7 | pub fn make_subcommand() -> Command {
 8 |     Command::new("genome")
 9 |         .about("Convert chr.size to runlists")
10 |         .arg(
11 |             Arg::new("infile")
12 |                 .required(true)
13 |                 .index(1)
14 |                 .help("Sets the input file to use"),
15 |         )
16 |         .arg(
17 |             Arg::new("outfile")
18 |                 .long("outfile")
19 |                 .short('o')
20 |                 .num_args(1)
21 |                 .default_value("stdout")
22 |                 .help("Output filename. [stdout] for screen"),
23 |         )
24 | }
25 | 
26 | // command implementation
27 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
28 |     //----------------------------
29 |     // Loading
30 |     //----------------------------
31 |     let sizes = read_sizes(args.get_one::<String>("infile").unwrap());
32 | 
33 |     //----------------------------
34 |     // Operating
35 |     //----------------------------
36 |     let mut json: BTreeMap<String, Value> = BTreeMap::new();
37 |     for (key, value) in sizes {
38 |         let mut intspan = IntSpan::new();
39 |         intspan.add_pair(1, value);
40 |         json.insert(key, intspan.to_string().into());
41 |     }
42 | 
43 |     //----------------------------
44 |     // Output
45 |     //----------------------------
46 |     write_json(args.get_one::<String>("outfile").unwrap(), &json)?;
47 | 
48 |     Ok(())
49 | }
50 | 


--------------------------------------------------------------------------------
/src/cmd_spanr/gff.rs:
--------------------------------------------------------------------------------
 1 | use clap::*;
 2 | use intspan::*;
 3 | use std::collections::BTreeMap;
 4 | use std::io::BufRead;
 5 | 
 6 | // Create clap subcommand arguments
 7 | pub fn make_subcommand() -> Command {
 8 |     Command::new("gff")
 9 |         .about("Convert gff3 to covers on chromosomes")
10 |         .arg(
11 |             Arg::new("infiles")
12 |                 .required(true)
13 |                 .num_args(1..)
14 |                 .index(1)
15 |                 .help("Set the input files to use"),
16 |         )
17 |         .arg(
18 |             Arg::new("tag")
19 |                 .long("tag")
20 |                 .num_args(1)
21 |                 .help("primary tag (the third field)"),
22 |         )
23 |         .arg(
24 |             Arg::new("outfile")
25 |                 .long("outfile")
26 |                 .short('o')
27 |                 .num_args(1)
28 |                 .default_value("stdout")
29 |                 .help("Output filename. [stdout] for screen"),
30 |         )
31 | }
32 | 
33 | // command implementation
34 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
35 |     //----------------------------
36 |     // Loading
37 |     //----------------------------
38 |     let mut res: BTreeMap<String, IntSpan> = BTreeMap::new();
39 |     let tag = if args.contains_id("tag") {
40 |         args.get_one::<String>("tag").unwrap().as_str()
41 |     } else {
42 |         ""
43 |     };
44 | 
45 |     for infile in args.get_many::<String>("infiles").unwrap() {
46 |         let reader = reader(infile);
47 |         for line in reader.lines().map_while(Result::ok) {
48 |             if line.starts_with('#') {
49 |                 continue;
50 |             }
51 | 
52 |             let fields: Vec<&str> = line.split('\t').collect();
53 |             if fields.len() < 8 {
54 |                 continue;
55 |             }
56 | 
57 |             let feature = fields[2];
58 |             if !tag.is_empty() && feature != tag {
59 |                 continue;
60 |             }
61 | 
62 |             let chr = fields[0];
63 |             let start = fields[3].parse::<i32>().unwrap();
64 |             let end = fields[4].parse::<i32>().unwrap();
65 | 
66 |             if !res.contains_key(chr) {
67 |                 let intspan = IntSpan::new();
68 |                 res.insert(chr.to_string(), intspan);
69 |             }
70 |             res.entry(chr.to_string())
71 |                 .and_modify(|e| e.add_pair(start, end));
72 |         }
73 |     }
74 | 
75 |     //----------------------------
76 |     // Output
77 |     //----------------------------
78 |     let out_json = set2json(&res);
79 |     write_json(args.get_one::<String>("outfile").unwrap(), &out_json)?;
80 | 
81 |     Ok(())
82 | }
83 | 


--------------------------------------------------------------------------------
/src/cmd_spanr/merge.rs:
--------------------------------------------------------------------------------
 1 | use clap::*;
 2 | use intspan::*;
 3 | use serde_json::Value;
 4 | use std::collections::BTreeMap;
 5 | use std::ffi::OsStr;
 6 | use std::path::Path;
 7 | 
 8 | // Create clap subcommand arguments
 9 | pub fn make_subcommand() -> Command {
10 |     Command::new("merge")
11 |         .about("Merge runlist json files")
12 |         .arg(
13 |             Arg::new("infiles")
14 |                 .required(true)
15 |                 .num_args(1..)
16 |                 .index(1)
17 |                 .help("Set the input files to use"),
18 |         )
19 |         .arg(
20 |             Arg::new("all")
21 |                 .long("all")
22 |                 .action(ArgAction::SetTrue)
23 |                 .help("All parts of file_stem (aka basename), except the last one"),
24 |         )
25 |         .arg(
26 |             Arg::new("outfile")
27 |                 .long("outfile")
28 |                 .short('o')
29 |                 .num_args(1)
30 |                 .default_value("stdout")
31 |                 .help("Output filename. [stdout] for screen"),
32 |         )
33 | }
34 | 
35 | // command implementation
36 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
37 |     //----------------------------
38 |     // Loading
39 |     //----------------------------
40 |     let mut out_json: BTreeMap<String, Value> = BTreeMap::new();
41 | 
42 |     let is_all = args.get_flag("all");
43 | 
44 |     for infile in args.get_many::<String>("infiles").unwrap() {
45 |         let json = read_json(infile);
46 | 
47 |         let key = if is_all {
48 |             Path::new(infile)
49 |                 .file_stem()
50 |                 .and_then(OsStr::to_str)
51 |                 .unwrap()
52 |                 .to_string()
53 |         } else {
54 |             Path::new(infile)
55 |                 .file_stem()
56 |                 .and_then(OsStr::to_str)
57 |                 .unwrap()
58 |                 .split('.')
59 |                 .next()
60 |                 .unwrap()
61 |                 .to_string()
62 |         };
63 |         out_json.insert(key, serde_json::to_value(json).unwrap());
64 |     }
65 | 
66 |     //----------------------------
67 |     // Output
68 |     //----------------------------
69 |     write_json(args.get_one::<String>("outfile").unwrap(), &out_json)?;
70 | 
71 |     Ok(())
72 | }
73 | 


--------------------------------------------------------------------------------
/src/cmd_spanr/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Subcommand modules for the `spanr` binary.
 2 | 
 3 | pub mod combine;
 4 | pub mod compare;
 5 | pub mod convert;
 6 | pub mod cover;
 7 | pub mod coverage;
 8 | pub mod genome;
 9 | pub mod gff;
10 | pub mod merge;
11 | pub mod some;
12 | pub mod span;
13 | pub mod split;
14 | pub mod stat;
15 | pub mod statop;
16 | 


--------------------------------------------------------------------------------
/src/cmd_spanr/some.rs:
--------------------------------------------------------------------------------
 1 | use clap::*;
 2 | use intspan::*;
 3 | use serde_json::Value;
 4 | use std::collections::{BTreeMap, BTreeSet};
 5 | 
 6 | // Create clap subcommand arguments
 7 | pub fn make_subcommand() -> Command {
 8 |     Command::new("some")
 9 |         .about("Extract some records from a runlist json file")
10 |         .arg(
11 |             Arg::new("infile")
12 |                 .required(true)
13 |                 .num_args(1)
14 |                 .index(1)
15 |                 .help("Sets the input file to use"),
16 |         )
17 |         .arg(
18 |             Arg::new("list")
19 |                 .required(true)
20 |                 .num_args(1)
21 |                 .index(2)
22 |                 .help("Sets the input file to use"),
23 |         )
24 |         .arg(
25 |             Arg::new("outfile")
26 |                 .long("outfile")
27 |                 .short('o')
28 |                 .num_args(1)
29 |                 .default_value("stdout")
30 |                 .help("Output filename. [stdout] for screen"),
31 |         )
32 | }
33 | 
34 | // command implementation
35 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
36 |     //----------------------------
37 |     // Loading
38 |     //----------------------------
39 |     let json: BTreeMap<String, Value> = read_json(args.get_one::<String>("infile").unwrap());
40 | 
41 |     let mut names: BTreeSet<String> = BTreeSet::new();
42 |     for line in read_lines(args.get_one::<String>("list").unwrap()) {
43 |         names.insert(line);
44 |     }
45 | 
46 |     //----------------------------
47 |     // Operating
48 |     //----------------------------
49 |     let mut out_json: BTreeMap<String, Value> = BTreeMap::new();
50 |     for (key, value) in &json {
51 |         if names.contains(key) {
52 |             out_json.insert(key.into(), value.clone());
53 |         }
54 |     }
55 | 
56 |     //----------------------------
57 |     // Output
58 |     //----------------------------
59 |     write_json(args.get_one::<String>("outfile").unwrap(), &out_json)?;
60 | 
61 |     Ok(())
62 | }
63 | 


--------------------------------------------------------------------------------
/src/cmd_spanr/span.rs:
--------------------------------------------------------------------------------
  1 | use clap::*;
  2 | use intspan::*;
  3 | use serde_json::Value;
  4 | use std::collections::BTreeMap;
  5 | 
  6 | // Create clap subcommand arguments
  7 | pub fn make_subcommand() -> Command {
  8 |     Command::new("span")
  9 |         .about("Operate spans in a JSON file")
 10 |         .after_help(
 11 |             r###"
 12 | List of operations
 13 | 
 14 | * cover:  a single span from min to max
 15 | * holes:  all the holes in runlist
 16 | * trim:   remove N integers from each end of each span of runlist
 17 | * pad:    add N integers from each end of each span of runlist
 18 | * excise: remove all spans smaller than N
 19 | * fill:   fill in all holes smaller than or equals to N
 20 | 
 21 | "###,
 22 |         )
 23 |         .arg(
 24 |             Arg::new("infile")
 25 |                 .required(true)
 26 |                 .index(1)
 27 |                 .help("Sets the input file to use"),
 28 |         )
 29 |         .arg(
 30 |             Arg::new("op")
 31 |                 .long("op")
 32 |                 .num_args(1)
 33 |                 .action(ArgAction::Set)
 34 |                 .value_parser([
 35 |                     builder::PossibleValue::new("cover"),
 36 |                     builder::PossibleValue::new("holes"),
 37 |                     builder::PossibleValue::new("trim"),
 38 |                     builder::PossibleValue::new("pad"),
 39 |                     builder::PossibleValue::new("excise"),
 40 |                     builder::PossibleValue::new("fill"),
 41 |                 ])
 42 |                 .default_value("cover")
 43 |                 .help("Operations"),
 44 |         )
 45 |         .arg(
 46 |             Arg::new("number")
 47 |                 .long("number")
 48 |                 .short('n')
 49 |                 .num_args(1)
 50 |                 .value_parser(value_parser!(i32))
 51 |                 .default_value("0"),
 52 |         )
 53 |         .arg(
 54 |             Arg::new("outfile")
 55 |                 .long("outfile")
 56 |                 .short('o')
 57 |                 .num_args(1)
 58 |                 .default_value("stdout")
 59 |                 .help("Output filename. [stdout] for screen"),
 60 |         )
 61 | }
 62 | 
 63 | // command implementation
 64 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
 65 |     //----------------------------
 66 |     // Args
 67 |     //----------------------------
 68 |     let json: BTreeMap<String, Value> = read_json(args.get_one::<String>("infile").unwrap());
 69 |     let is_multi: bool = json.values().next().unwrap().is_object();
 70 |     let set_of = json2set_m(&json);
 71 | 
 72 |     let opt_op = args.get_one::<String>("op").unwrap().as_str();
 73 |     let opt_number = *args.get_one::<i32>("number").unwrap();
 74 | 
 75 |     //----------------------------
 76 |     // Ops
 77 |     //----------------------------
 78 |     let mut res_of: BTreeMap<String, BTreeMap<String, IntSpan>> = BTreeMap::new();
 79 |     for (name, set) in &set_of {
 80 |         let mut res: BTreeMap<String, IntSpan> = BTreeMap::new();
 81 |         for chr in set.keys() {
 82 |             let intspan = match opt_op {
 83 |                 "cover" => set.get(chr).unwrap().cover(),
 84 |                 "holes" => set.get(chr).unwrap().holes(),
 85 |                 "trim" => set.get(chr).unwrap().trim(opt_number),
 86 |                 "pad" => set.get(chr).unwrap().pad(opt_number),
 87 |                 "excise" => set.get(chr).unwrap().excise(opt_number),
 88 |                 "fill" => set.get(chr).unwrap().fill(opt_number),
 89 |                 _ => unreachable!("Invalid IntSpan Op"),
 90 |             };
 91 |             //            println!("Op {}: {}", op, op_intspan.to_string());
 92 |             res.insert(chr.into(), intspan);
 93 |         }
 94 |         res_of.insert(name.into(), res);
 95 |     }
 96 | 
 97 |     //----------------------------
 98 |     // Output
 99 |     //----------------------------
100 |     let out_json = if is_multi {
101 |         set2json_m(&res_of)
102 |     } else {
103 |         set2json(res_of.get("__single").unwrap())
104 |     };
105 |     write_json(args.get_one::<String>("outfile").unwrap(), &out_json)?;
106 | 
107 |     Ok(())
108 | }
109 | 


--------------------------------------------------------------------------------
/src/cmd_spanr/split.rs:
--------------------------------------------------------------------------------
 1 | use clap::*;
 2 | use intspan::*;
 3 | use serde_json::Value;
 4 | use std::collections::BTreeMap;
 5 | use std::fs;
 6 | use std::path::Path;
 7 | 
 8 | // Create clap subcommand arguments
 9 | pub fn make_subcommand() -> Command {
10 |     Command::new("split")
11 |         .about("Split a runlist json file")
12 |         .arg(
13 |             Arg::new("infile")
14 |                 .help("Sets the input file to use")
15 |                 .required(true)
16 |                 .index(1),
17 |         )
18 |         .arg(
19 |             Arg::new("suffix")
20 |                 .long("suffix")
21 |                 .short('s')
22 |                 .num_args(1)
23 |                 .default_value(".json")
24 |                 .value_parser(clap::builder::NonEmptyStringValueParser::new())
25 |                 .help("Extensions of output files"),
26 |         )
27 |         .arg(
28 |             Arg::new("outdir")
29 |                 .short('o')
30 |                 .long("outdir")
31 |                 .num_args(1)
32 |                 .default_value("stdout")
33 |                 .value_parser(clap::builder::NonEmptyStringValueParser::new())
34 |                 .help("Output location. [stdout] for screen"),
35 |         )
36 | }
37 | 
38 | // command implementation
39 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
40 |     //----------------------------
41 |     // Loading
42 |     //----------------------------
43 |     let json: BTreeMap<String, Value> = read_json(args.get_one::<String>("infile").unwrap());
44 | 
45 |     let outdir = args.get_one::<String>("outdir").unwrap();
46 |     if outdir != "stdout" {
47 |         fs::create_dir_all(outdir)?;
48 |     }
49 | 
50 |     let suffix = args.get_one::<String>("suffix").unwrap();
51 | 
52 |     //----------------------------
53 |     // Operating
54 |     //----------------------------
55 |     for (key, value) in &json {
56 |         if !value.is_object() {
57 |             panic!("Not a valid multi-key runlist json file");
58 |         }
59 | 
60 |         let string = serde_json::to_string(value).unwrap();
61 | 
62 |         //----------------------------
63 |         // Output
64 |         //----------------------------
65 |         if outdir == "stdout" {
66 |             write_lines("stdout", &vec![string])?;
67 |         } else {
68 |             let path = Path::new(outdir).join(key.to_owned() + suffix);
69 |             fs::write(path, string + "\n")?;
70 |         }
71 |     }
72 | 
73 |     Ok(())
74 | }
75 | 


--------------------------------------------------------------------------------
/src/cmd_spanr/stat.rs:
--------------------------------------------------------------------------------
  1 | use clap::*;
  2 | use intspan::*;
  3 | use serde_json::Value;
  4 | use std::collections::BTreeMap;
  5 | 
  6 | // Create clap subcommand arguments
  7 | pub fn make_subcommand() -> Command {
  8 |     Command::new("stat")
  9 |         .about("Coverage on chromosomes for runlists")
 10 |         .arg(
 11 |             Arg::new("chr.sizes")
 12 |                 .required(true)
 13 |                 .index(1)
 14 |                 .help("Sets the input file to use"),
 15 |         )
 16 |         .arg(
 17 |             Arg::new("infile")
 18 |                 .required(true)
 19 |                 .index(2)
 20 |                 .help("Sets the input file to use"),
 21 |         )
 22 |         .arg(
 23 |             Arg::new("all")
 24 |                 .long("all")
 25 |                 .action(ArgAction::SetTrue)
 26 |                 .help("Only write whole genome stats"),
 27 |         )
 28 |         .arg(
 29 |             Arg::new("outfile")
 30 |                 .long("outfile")
 31 |                 .short('o')
 32 |                 .num_args(1)
 33 |                 .default_value("stdout")
 34 |                 .help("Output filename. [stdout] for screen"),
 35 |         )
 36 | }
 37 | 
 38 | // command implementation
 39 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
 40 |     //----------------------------
 41 |     // Loading
 42 |     //----------------------------
 43 |     let sizes = read_sizes(args.get_one::<String>("chr.sizes").unwrap());
 44 | 
 45 |     let json: BTreeMap<String, Value> = read_json(args.get_one::<String>("infile").unwrap());
 46 |     let is_multi: bool = json.values().next().unwrap().is_object();
 47 | 
 48 |     let set_of = json2set_m(&json);
 49 | 
 50 |     let is_all = args.get_flag("all");
 51 | 
 52 |     //----------------------------
 53 |     // Operating
 54 |     //----------------------------
 55 |     let mut lines: Vec<String> = Vec::new(); // Avoid lifetime problems
 56 |     let mut header = "key,chr,chrLength,size,coverage".to_string();
 57 | 
 58 |     if is_multi {
 59 |         if is_all {
 60 |             header = header.replace("chr,", "");
 61 |         }
 62 |         lines.push(header);
 63 | 
 64 |         for (name, set) in &set_of {
 65 |             let key_lines = csv_lines(set, &sizes, is_all, Some(name));
 66 |             lines.push(key_lines);
 67 |         }
 68 |     } else {
 69 |         header = header.replace("key,", "");
 70 |         if is_all {
 71 |             header = header.replace("chr,", "");
 72 |         }
 73 |         lines.push(header);
 74 | 
 75 |         let key_lines = csv_lines(set_of.get("__single").unwrap(), &sizes, is_all, None);
 76 |         lines.push(key_lines);
 77 |     }
 78 | 
 79 |     //----------------------------
 80 |     // Output
 81 |     //----------------------------
 82 |     write_lines(args.get_one::<String>("outfile").unwrap(), &lines)?;
 83 | 
 84 |     Ok(())
 85 | }
 86 | 
 87 | fn csv_lines(
 88 |     set: &BTreeMap<String, IntSpan>,
 89 |     sizes: &BTreeMap<String, i32>,
 90 |     is_all: bool,
 91 |     prefix: Option<&str>,
 92 | ) -> String {
 93 |     let mut lines = String::new();
 94 | 
 95 |     let mut all_length: i64 = 0;
 96 |     let mut all_size: i64 = 0;
 97 |     for chr in set.keys() {
 98 |         let length = *sizes.get(chr).unwrap();
 99 |         let size = set.get(chr).unwrap().cardinality();
100 |         let line = format!(
101 |             "{},{},{},{:.4}\n",
102 |             chr,
103 |             length,
104 |             size,
105 |             size as f32 / length as f32
106 |         );
107 |         if let Some(s) = prefix {
108 |             lines.push_str(format!("{},", s).as_str())
109 |         };
110 |         lines.push_str(line.as_str());
111 | 
112 |         all_length += length as i64;
113 |         all_size += size as i64;
114 |     }
115 | 
116 |     let mut all_line = format!(
117 |         "{},{},{},{:.4}\n",
118 |         "all",
119 |         all_length,
120 |         all_size,
121 |         all_size as f64 / all_length as f64
122 |     );
123 |     // only keep whole genome
124 |     if is_all {
125 |         lines = String::new();
126 |         all_line = all_line.replace("all,", "");
127 |     }
128 |     if let Some(s) = prefix {
129 |         all_line.insert_str(0, format!("{},", s).as_str())
130 |     };
131 |     lines.push_str(all_line.as_str());
132 | 
133 |     // Remove last LF, as write_lines will append one
134 |     lines.trim_end().to_string()
135 | }
136 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | #[macro_use]
 2 | extern crate lazy_static;
 3 | 
 4 | mod libs;
 5 | mod utils;
 6 | 
 7 | pub use crate::libs::coverage::*;
 8 | pub use crate::libs::intspan::*;
 9 | pub use crate::libs::linalg::*;
10 | pub use crate::libs::matrix::*;
11 | pub use crate::libs::range::*;
12 | 
13 | pub use crate::utils::*;
14 | 


--------------------------------------------------------------------------------
/src/libs/coverage.rs:
--------------------------------------------------------------------------------
  1 | use crate::IntSpan;
  2 | use std::collections::BTreeMap;
  3 | 
  4 | #[derive(Default, Clone)]
  5 | pub struct Coverage {
  6 |     max: i32,
  7 |     tiers: BTreeMap<i32, IntSpan>,
  8 | }
  9 | 
 10 | impl Coverage {
 11 |     pub fn max(&self) -> &i32 {
 12 |         &self.max
 13 |     }
 14 |     pub fn tiers(&self) -> &BTreeMap<i32, IntSpan> {
 15 |         &self.tiers
 16 |     }
 17 | 
 18 |     pub fn new(max: i32) -> Self {
 19 |         Self::new_len(max, 1_000_000_000)
 20 |     }
 21 | 
 22 |     pub fn new_len(max: i32, len: i32) -> Self {
 23 |         let mut tiers: BTreeMap<i32, IntSpan> = BTreeMap::new();
 24 |         tiers.insert(-1, IntSpan::from_pair(1, len));
 25 |         tiers.insert(0, IntSpan::from_pair(1, len));
 26 | 
 27 |         for i in 1..=max {
 28 |             tiers.insert(i, IntSpan::new());
 29 |         }
 30 | 
 31 |         Self { max, tiers }
 32 |     }
 33 | 
 34 |     fn begin_end(begin: i32, end: i32) -> (i32, i32) {
 35 |         let mut tup = (begin.min(end), begin.max(end));
 36 | 
 37 |         if tup.0 == 0 {
 38 |             tup.0 = 1;
 39 |         }
 40 | 
 41 |         tup
 42 |     }
 43 | 
 44 |     /// ```
 45 |     /// # use intspan::Coverage;
 46 |     /// let mut cover = Coverage::new(1);
 47 |     /// cover.bump(1, 100);
 48 |     /// cover.bump(90, 150);
 49 |     /// assert_eq!(cover.tiers().get(&1).unwrap().to_string(), "1-150");
 50 |     /// # assert_eq!(cover.tiers().get(&0).unwrap().to_string(), "151-1000000000");
 51 |     ///
 52 |     /// let mut cover = Coverage::new_len(1, 500);
 53 |     /// cover.bump(1, 100);
 54 |     /// cover.bump(90, 150);
 55 |     /// assert_eq!(cover.tiers().get(&1).unwrap().to_string(), "1-150");
 56 |     /// # assert_eq!(cover.tiers().get(&0).unwrap().to_string(), "151-500");
 57 |     /// # assert_eq!(cover.tiers().get(&-1).unwrap().to_string(), "1-500");
 58 |     /// ```
 59 |     pub fn bump(&mut self, begin: i32, end: i32) {
 60 |         let tup = Self::begin_end(begin, end);
 61 |         let mut intspan = IntSpan::from_pair(tup.0, tup.1);
 62 | 
 63 |         // reach max coverage in full sequence
 64 |         if self
 65 |             .tiers
 66 |             .get(&-1)
 67 |             .unwrap()
 68 |             .equals(self.tiers.get(&self.max).unwrap())
 69 |         {
 70 |             return;
 71 |         }
 72 | 
 73 |         // remove intspan from uncovered regions
 74 |         self.tiers.entry(0).and_modify(|e| e.subtract(&intspan));
 75 | 
 76 |         for i in 1..=self.max {
 77 |             let intersect = self.tiers.get(&i).unwrap().intersect(&intspan);
 78 |             self.tiers.entry(i).and_modify(|e| e.merge(&intspan));
 79 | 
 80 |             if i + 1 > self.max {
 81 |                 break;
 82 |             }
 83 | 
 84 |             intspan = intersect.copy();
 85 |         }
 86 |     }
 87 | 
 88 |     /// ```
 89 |     /// # use intspan::Coverage;
 90 |     /// let mut cover = Coverage::new(2);
 91 |     /// cover.bump(1, 100);
 92 |     /// cover.bump(90, 150);
 93 |     /// assert_eq!(cover.max_tier().to_string(), "90-100");
 94 |     ///
 95 |     /// let mut cover = Coverage::new(5);
 96 |     /// cover.bump(1, 100);
 97 |     /// cover.bump(90, 150);
 98 |     /// assert_eq!(cover.max_tier().to_string(), "-");
 99 |     /// ```
100 |     pub fn max_tier(&self) -> IntSpan {
101 |         self.tiers().get(self.max()).unwrap().copy()
102 |     }
103 | 
104 |     /// ```
105 |     /// # use intspan::Coverage;
106 |     /// let mut cover = Coverage::new(2);
107 |     /// cover.bump(1, 100);
108 |     /// cover.bump(90, 150);
109 |     ///
110 |     /// assert_eq!(cover.uniq_tiers().get(&2).unwrap().to_string(), "90-100");
111 |     ///
112 |     /// assert_eq!(cover.tiers().get(&1).unwrap().to_string(), "1-150");
113 |     /// assert_eq!(cover.uniq_tiers().get(&1).unwrap().to_string(), "1-89,101-150");
114 |     /// ```
115 |     pub fn uniq_tiers(&self) -> BTreeMap<i32, IntSpan> {
116 |         let mut tiers = self.tiers.clone();
117 | 
118 |         for i in 1..self.max {
119 |             let intspan_next = tiers[&(i + 1)].copy();
120 |             tiers.entry(i).and_modify(|e| e.subtract(&intspan_next));
121 |         }
122 | 
123 |         tiers
124 |     }
125 | }
126 | 


--------------------------------------------------------------------------------
/src/libs/linalg.rs:
--------------------------------------------------------------------------------
  1 | /// Computes the sum of all elements in a vector `a`.
  2 | ///
  3 | /// # Arguments
  4 | /// * `a` - The vector.
  5 | ///
  6 | /// # Returns
  7 | /// The sum of all elements in `a`.
  8 | ///
  9 | /// # Examples
 10 | /// ```
 11 | /// let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
 12 | /// let sum_value = intspan::sum(&a);
 13 | /// assert_eq!(sum_value, 55.0);
 14 | /// ```
 15 | pub fn sum(a: &[f32]) -> f32 {
 16 |     a.iter().sum()
 17 | }
 18 | 
 19 | /// Computes the mean (average) of a vector `a`.
 20 | ///
 21 | /// # Arguments
 22 | /// * `a` - The vector.
 23 | ///
 24 | /// # Returns
 25 | /// The mean of the vector `a`.
 26 | ///
 27 | /// # Examples
 28 | /// ```
 29 | /// let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
 30 | /// let mean_value = intspan::mean(&a);
 31 | /// assert_eq!(mean_value, 5.5);
 32 | /// ```
 33 | pub fn mean(a: &[f32]) -> f32 {
 34 |     sum(a) / a.len() as f32
 35 | }
 36 | 
 37 | /// Computes the Pearson correlation coefficient between two vectors `a` and `b`.
 38 | ///
 39 | /// Two equivalent formulas:
 40 | ///
 41 | /// 1. Using deviations from mean (implemented here for better numerical stability):
 42 | /// `$r = \frac{\sum(x - \bar{x})(y - \bar{y})}{\sqrt{\sum(x - \bar{x})^2\sum(y - \bar{y})^2}}$`
 43 | ///
 44 | /// 2. Direct computation:
 45 | /// `$r = \frac{n\sum xy - \sum x\sum y}{\sqrt{(n\sum x^2 - (\sum x)^2)(n\sum y^2 - (\sum y)^2)}}$`
 46 | ///
 47 | /// where `$\bar{x}$` and `$\bar{y}$` are the means of vectors `$x$` and `$y$` respectively,
 48 | /// and `$n$` is the length of the vectors.
 49 | ///
 50 | /// Note: Formula 1 is used in this implementation because it:
 51 | /// * Reduces the risk of numerical overflow by centering the data
 52 | /// * Provides better numerical stability for large values
 53 | ///
 54 | /// # Arguments
 55 | /// * `a` - The first vector.
 56 | /// * `b` - The second vector.
 57 | ///
 58 | /// # Returns
 59 | /// The Pearson correlation coefficient between `a` and `b`.
 60 | /// If either vector is empty or their lengths do not match, returns `NaN`.
 61 | ///
 62 | /// # Examples
 63 | /// ```
 64 | /// let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
 65 | /// let b = [10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0];
 66 | /// let correlation = intspan::pearson_correlation(&a, &b);
 67 | /// assert_eq!(format!("{:.4}", correlation), "-1.0000".to_string()); // Perfect negative correlation
 68 | ///
 69 | /// let empty: [f32; 0] = [];
 70 | /// assert!(intspan::pearson_correlation(&empty, &empty).is_nan()); // Check handling of empty vectors
 71 | /// ```
 72 | pub fn pearson_correlation(a: &[f32], b: &[f32]) -> f32 {
 73 |     if a.len() != b.len() || a.is_empty() {
 74 |         return f32::NAN; // Return NaN if lengths do not match or vectors are empty
 75 |     }
 76 | 
 77 |     // Compute means of a and b
 78 |     let mean_a = mean(a);
 79 |     let mean_b = mean(b);
 80 | 
 81 |     let numerator = a
 82 |         .iter()
 83 |         .zip(b.iter())
 84 |         .map(|(a, b)| (a - mean_a) * (b - mean_b))
 85 |         .sum::<f32>();
 86 | 
 87 |     let denom1 = a.iter().map(|a| (a - mean_a).powi(2)).sum::<f32>().sqrt();
 88 | 
 89 |     let denom2 = b.iter().map(|b| (b - mean_b).powi(2)).sum::<f32>().sqrt();
 90 | 
 91 |     numerator / (denom1 * denom2)
 92 | }
 93 | 
 94 | /// Computes the Jaccard intersection of two vectors `a` and `b`.
 95 | /// The Jaccard intersection is the sum of the minimum values of corresponding elements.
 96 | ///
 97 | /// # Arguments
 98 | /// * `a` - The first vector.
 99 | /// * `b` - The second vector.
100 | ///
101 | /// # Returns
102 | /// The Jaccard intersection of `a` and `b`.
103 | ///
104 | /// # Examples
105 | /// ```
106 | /// let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
107 | /// let b = [10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0];
108 | /// let intersection = intspan::jaccard_intersection(&a, &b);
109 | /// assert_eq!(intersection, 30.0);
110 | /// ```
111 | pub fn jaccard_intersection(a: &[f32], b: &[f32]) -> f32 {
112 |     a.iter().zip(b.iter()).map(|(x, y)| f32::min(*x, *y)).sum()
113 | }
114 | 
115 | /// Computes the Jaccard union of two vectors `a` and `b`.
116 | /// The Jaccard union is the sum of the maximum values of corresponding elements.
117 | ///
118 | /// # Arguments
119 | /// * `a` - The first vector.
120 | /// * `b` - The second vector.
121 | ///
122 | /// # Returns
123 | /// The Jaccard union of `a` and `b`.
124 | ///
125 | /// # Examples
126 | /// ```
127 | /// let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
128 | /// let b = [10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0];
129 | /// let union = intspan::jaccard_union(&a, &b);
130 | /// assert_eq!(union, 80.0);
131 | /// ```
132 | pub fn jaccard_union(a: &[f32], b: &[f32]) -> f32 {
133 |     a.iter().zip(b.iter()).map(|(x, y)| f32::max(*x, *y)).sum()
134 | }
135 | 
136 | pub fn weighted_jaccard_similarity(a: &[f32], b: &[f32]) -> f32 {
137 |     let numerator = jaccard_intersection(a, b);
138 |     let denominator = jaccard_union(a, b);
139 | 
140 |     if denominator == 0.0 {
141 |         0.0
142 |     } else {
143 |         numerator / denominator
144 |     }
145 | }
146 | 
147 | /// Computes the dot product of two vectors `a` and `b`.
148 | ///
149 | /// # Arguments
150 | /// * `a` - The first vector.
151 | /// * `b` - The second vector.
152 | ///
153 | /// # Returns
154 | /// The dot product of `a` and `b`.
155 | ///
156 | /// # Examples
157 | /// ```
158 | /// let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
159 | /// let b = [10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0];
160 | /// let dot = intspan::dot_product(&a, &b);
161 | /// assert_eq!(dot, 220.0);
162 | /// ```
163 | pub fn dot_product(a: &[f32], b: &[f32]) -> f32 {
164 |     a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
165 | }
166 | 
167 | /// Computes the L2 norm (Euclidean norm) of a vector `a`.
168 | ///
169 | /// # Arguments
170 | /// * `a` - The vector.
171 | ///
172 | /// # Returns
173 | /// The L2 norm of `a`.
174 | ///
175 | /// # Examples
176 | /// ```
177 | /// let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
178 | /// let norm = intspan::norm_l2(&a);
179 | /// assert_eq!(format!("{:.4}", norm), "19.6214".to_string());
180 | /// ```
181 | #[inline]
182 | pub fn norm_l2(a: &[f32]) -> f32 {
183 |     norm_l2_sq(a).sqrt()
184 | }
185 | 
186 | /// Computes the squared L2 norm of a vector `a`.
187 | ///
188 | /// # Arguments
189 | /// * `a` - The vector.
190 | ///
191 | /// # Returns
192 | /// The squared L2 norm of `a`.
193 | ///
194 | /// # Examples
195 | /// ```
196 | /// let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
197 | /// let norm_sq = intspan::norm_l2_sq(&a);
198 | /// assert_eq!(norm_sq, 385.0);
199 | /// ```
200 | pub fn norm_l2_sq(a: &[f32]) -> f32 {
201 |     a.iter().map(|x| x * x).sum()
202 | }
203 | 
204 | pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
205 |     let dot_product = dot_product(a, b);
206 |     let denominator = norm_l2(a) * norm_l2(b);
207 | 
208 |     if denominator == 0.0 {
209 |         0.0
210 |     } else {
211 |         dot_product / denominator
212 |     }
213 | }
214 | 
215 | /// Computes the Euclidean distance between two vectors `a` and `b`.
216 | ///
217 | /// # Arguments
218 | /// * `a` - The first vector.
219 | /// * `b` - The second vector.
220 | ///
221 | /// # Returns
222 | /// The Euclidean distance between `a` and `b`.
223 | ///
224 | /// # Examples
225 | /// ```
226 | /// let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
227 | /// let b = [10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0];
228 | /// let distance = intspan::euclidean_distance(&a, &b);
229 | /// assert_eq!(format!("{:.4}", distance), "18.1659".to_string());
230 | /// ```
231 | pub fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 {
232 |     a.iter()
233 |         .zip(b.iter())
234 |         .map(|(x, y)| {
235 |             let diff = x - y;
236 |             diff * diff
237 |         })
238 |         .sum::<f32>()
239 |         .sqrt()
240 | }
241 | 


--------------------------------------------------------------------------------
/src/libs/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod coverage;
2 | pub mod intspan;
3 | pub mod linalg;
4 | pub mod matrix;
5 | pub mod range;
6 | 


--------------------------------------------------------------------------------
/src/linkr.rs:
--------------------------------------------------------------------------------
 1 | extern crate clap;
 2 | use clap::*;
 3 | 
 4 | mod cmd_linkr;
 5 | 
 6 | fn main() -> anyhow::Result<()> {
 7 |     let app = Command::new("linkr")
 8 |         .version(crate_version!())
 9 |         .author(crate_authors!())
10 |         .about("`linkr` operates ranges on chromosomes and links of ranges")
11 |         .propagate_version(true)
12 |         .arg_required_else_help(true)
13 |         .color(ColorChoice::Auto)
14 |         .subcommand(cmd_linkr::circos::make_subcommand())
15 |         .subcommand(cmd_linkr::sort::make_subcommand())
16 |         .subcommand(cmd_linkr::filter::make_subcommand())
17 |         .subcommand(cmd_linkr::clean::make_subcommand())
18 |         .subcommand(cmd_linkr::connect::make_subcommand());
19 | 
20 |     // Check which subcomamnd the user ran...
21 |     match app.get_matches().subcommand() {
22 |         Some(("circos", sub_matches)) => cmd_linkr::circos::execute(sub_matches),
23 |         Some(("sort", sub_matches)) => cmd_linkr::sort::execute(sub_matches),
24 |         Some(("filter", sub_matches)) => cmd_linkr::filter::execute(sub_matches),
25 |         Some(("clean", sub_matches)) => cmd_linkr::clean::execute(sub_matches),
26 |         Some(("connect", sub_matches)) => cmd_linkr::connect::execute(sub_matches),
27 |         _ => unreachable!(),
28 |     }
29 |     .unwrap();
30 | 
31 |     Ok(())
32 | }
33 | 
34 | // Variable naming conventions
35 | // range: a Range object
36 | // rg: String
37 | // rgs: Vec<String>
38 | // rg_of: BTreeMap<String, String>
39 | 
40 | // TODO: lav2link
41 | // TODO: paf2link
42 | 


--------------------------------------------------------------------------------
/src/rgr.rs:
--------------------------------------------------------------------------------
 1 | extern crate clap;
 2 | 
 3 | use clap::*;
 4 | 
 5 | mod cmd_rgr;
 6 | 
 7 | fn main() -> anyhow::Result<()> {
 8 |     let app = Command::new("rgr")
 9 |         .version(crate_version!())
10 |         .author(crate_authors!())
11 |         .about("`rgr` operates ranges in .rg and .tsv files")
12 |         .propagate_version(true)
13 |         .arg_required_else_help(true)
14 |         .color(ColorChoice::Auto)
15 |         .subcommand(cmd_rgr::count::make_subcommand())
16 |         .subcommand(cmd_rgr::dedup::make_subcommand())
17 |         .subcommand(cmd_rgr::field::make_subcommand())
18 |         .subcommand(cmd_rgr::filter::make_subcommand())
19 |         .subcommand(cmd_rgr::keep::make_subcommand())
20 |         .subcommand(cmd_rgr::md::make_subcommand())
21 |         .subcommand(cmd_rgr::merge::make_subcommand())
22 |         .subcommand(cmd_rgr::pl_2rmp::make_subcommand())
23 |         .subcommand(cmd_rgr::prop::make_subcommand())
24 |         .subcommand(cmd_rgr::replace::make_subcommand())
25 |         .subcommand(cmd_rgr::runlist::make_subcommand())
26 |         .subcommand(cmd_rgr::select::make_subcommand())
27 |         .subcommand(cmd_rgr::sort::make_subcommand())
28 |         .subcommand(cmd_rgr::span::make_subcommand())
29 |         .after_help(
30 |             r###"
31 | File formats
32 | 
33 | * .rg files are single-column .tsv
34 | * Field numbers in the TSV file start at 1
35 | 
36 | Subcommand groups:
37 | 
38 | * Generic .tsv
39 |     * dedup / keep / md / replace / filter / select
40 | * Single range field
41 |     * field / sort / count / prop / span / runlist
42 | * Multiple range fields
43 |     * merge / pl-2rmp
44 | 
45 | "###,
46 |         );
47 | 
48 |     // Check which subcomamnd the user ran...
49 |     match app.get_matches().subcommand() {
50 |         // Generic .tsv
51 |         Some(("dedup", sub_matches)) => cmd_rgr::dedup::execute(sub_matches),
52 |         Some(("keep", sub_matches)) => cmd_rgr::keep::execute(sub_matches),
53 |         Some(("md", sub_matches)) => cmd_rgr::md::execute(sub_matches),
54 |         Some(("replace", sub_matches)) => cmd_rgr::replace::execute(sub_matches),
55 |         Some(("filter", sub_matches)) => cmd_rgr::filter::execute(sub_matches),
56 |         Some(("select", sub_matches)) => cmd_rgr::select::execute(sub_matches),
57 |         // Single range field
58 |         Some(("field", sub_matches)) => cmd_rgr::field::execute(sub_matches),
59 |         Some(("sort", sub_matches)) => cmd_rgr::sort::execute(sub_matches),
60 |         Some(("count", sub_matches)) => cmd_rgr::count::execute(sub_matches),
61 |         Some(("prop", sub_matches)) => cmd_rgr::prop::execute(sub_matches),
62 |         Some(("span", sub_matches)) => cmd_rgr::span::execute(sub_matches),
63 |         Some(("runlist", sub_matches)) => cmd_rgr::runlist::execute(sub_matches),
64 |         // Multiple range fields
65 |         Some(("merge", sub_matches)) => cmd_rgr::merge::execute(sub_matches),
66 |         Some(("pl-2rmp", sub_matches)) => cmd_rgr::pl_2rmp::execute(sub_matches),
67 |         _ => unreachable!(),
68 |     }
69 |     .unwrap();
70 | 
71 |     Ok(())
72 | }
73 | 
74 | // TODO: --bed for `rgr field`
75 | 


--------------------------------------------------------------------------------
/src/spanr.rs:
--------------------------------------------------------------------------------
 1 | extern crate clap;
 2 | use clap::*;
 3 | 
 4 | mod cmd_spanr;
 5 | 
 6 | fn main() -> anyhow::Result<()> {
 7 |     let app = Command::new("spanr")
 8 |         .version(crate_version!())
 9 |         .author(crate_authors!())
10 |         .about("`spanr` operates chromosome IntSpan files")
11 |         .propagate_version(true)
12 |         .arg_required_else_help(true)
13 |         .color(ColorChoice::Auto)
14 |         .subcommand(cmd_spanr::genome::make_subcommand())
15 |         .subcommand(cmd_spanr::some::make_subcommand())
16 |         .subcommand(cmd_spanr::merge::make_subcommand())
17 |         .subcommand(cmd_spanr::split::make_subcommand())
18 |         .subcommand(cmd_spanr::stat::make_subcommand())
19 |         .subcommand(cmd_spanr::statop::make_subcommand())
20 |         .subcommand(cmd_spanr::combine::make_subcommand())
21 |         .subcommand(cmd_spanr::compare::make_subcommand())
22 |         .subcommand(cmd_spanr::span::make_subcommand())
23 |         .subcommand(cmd_spanr::cover::make_subcommand())
24 |         .subcommand(cmd_spanr::coverage::make_subcommand())
25 |         .subcommand(cmd_spanr::gff::make_subcommand())
26 |         .subcommand(cmd_spanr::convert::make_subcommand());
27 | 
28 |     // Check which subcomamnd the user ran...
29 |     match app.get_matches().subcommand() {
30 |         Some(("genome", sub_matches)) => cmd_spanr::genome::execute(sub_matches),
31 |         Some(("some", sub_matches)) => cmd_spanr::some::execute(sub_matches),
32 |         Some(("merge", sub_matches)) => cmd_spanr::merge::execute(sub_matches),
33 |         Some(("split", sub_matches)) => cmd_spanr::split::execute(sub_matches),
34 |         Some(("stat", sub_matches)) => cmd_spanr::stat::execute(sub_matches),
35 |         Some(("statop", sub_matches)) => cmd_spanr::statop::execute(sub_matches),
36 |         Some(("combine", sub_matches)) => cmd_spanr::combine::execute(sub_matches),
37 |         Some(("compare", sub_matches)) => cmd_spanr::compare::execute(sub_matches),
38 |         Some(("span", sub_matches)) => cmd_spanr::span::execute(sub_matches),
39 |         Some(("cover", sub_matches)) => cmd_spanr::cover::execute(sub_matches),
40 |         Some(("coverage", sub_matches)) => cmd_spanr::coverage::execute(sub_matches),
41 |         Some(("gff", sub_matches)) => cmd_spanr::gff::execute(sub_matches),
42 |         Some(("convert", sub_matches)) => cmd_spanr::convert::execute(sub_matches),
43 |         _ => unreachable!(),
44 |     }
45 |     .unwrap();
46 | 
47 |     Ok(())
48 | }
49 | 
50 | // Variable naming conventions
51 | // ints: an IntSpan object
52 | // set, runlists: single name IntSpan set or runlists
53 | //      set is a set of IntSpans
54 | //      set: BTreeMap<String, IntSpan>
55 | //      runlists: BTreeMap<String, String>
56 | // s_of, r_of: multiple names IntSpan or runlist
57 | //      name ==> chr ==> IntSpan
58 | //      name ==> chr ==> String
59 | // json: BTreeMap<String, Value>, single or multiple json
60 | // res: result, single name IntSpan set
61 | //      BTreeMap<String, IntSpan>
62 | // res_of: BTreeMap<String, BTreeMap<String, IntSpan>>
63 | // sizes: chr.sizes, BTreeMap<String, i32>
64 | // iv_of: BTreeMap<String, Vec<Iv>>
65 | 


--------------------------------------------------------------------------------
/tests/Atha/chr.sizes:
--------------------------------------------------------------------------------
1 | 1	30427671
2 | 2	19698289
3 | 3	23459830
4 | 4	18585056
5 | 5	26975502
6 | 


--------------------------------------------------------------------------------
/tests/Atha/links.blast.tsv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wang-q/intspan/0d4e5153268bf51eb824f7cc93a51415a255a3c2/tests/Atha/links.blast.tsv.gz


--------------------------------------------------------------------------------
/tests/Atha/links.lastz.tsv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wang-q/intspan/0d4e5153268bf51eb824f7cc93a51415a255a3c2/tests/Atha/links.lastz.tsv.gz


--------------------------------------------------------------------------------
/tests/S288c/chr.sizes:
--------------------------------------------------------------------------------
 1 | I	230218
 2 | II	813184
 3 | III	316620
 4 | IV	1531933
 5 | IX	439888
 6 | V	576874
 7 | VI	270161
 8 | VII	1090940
 9 | VIII	562643
10 | X	745751
11 | XI	666816
12 | XII	1078177
13 | XIII	924431
14 | XIV	784333
15 | XV	1091291
16 | XVI	948066
17 | 


--------------------------------------------------------------------------------
/tests/cli_linkr.rs:
--------------------------------------------------------------------------------
  1 | use assert_cmd::prelude::*; // Add methods on commands
  2 | use predicates::prelude::*; // Used for writing assertions
  3 | use std::process::Command; // Run programs
  4 | 
  5 | #[test]
  6 | fn command_invalid() -> anyhow::Result<()> {
  7 |     let mut cmd = Command::cargo_bin("linkr")?;
  8 |     cmd.arg("foobar");
  9 |     cmd.assert()
 10 |         .failure()
 11 |         .stderr(predicate::str::contains("recognized"));
 12 | 
 13 |     Ok(())
 14 | }
 15 | 
 16 | #[test]
 17 | fn command_circos() -> anyhow::Result<()> {
 18 |     let mut cmd = Command::cargo_bin("linkr")?;
 19 |     let output = cmd
 20 |         .arg("circos")
 21 |         .arg("tests/linkr/II.connect.tsv")
 22 |         .output()
 23 |         .unwrap();
 24 |     let stdout = String::from_utf8(output.stdout).unwrap();
 25 | 
 26 |     assert_eq!(stdout.lines().count(), 10);
 27 |     assert!(stdout.contains("XIII 7947 6395"), "negative strand");
 28 |     assert!(!stdout.contains("fill_color"), "links");
 29 | 
 30 |     Ok(())
 31 | }
 32 | 
 33 | #[test]
 34 | fn command_circos_highlight() -> anyhow::Result<()> {
 35 |     let mut cmd = Command::cargo_bin("linkr")?;
 36 |     let output = cmd
 37 |         .arg("circos")
 38 |         .arg("tests/linkr/II.connect.tsv")
 39 |         .arg("--highlight")
 40 |         .output()
 41 |         .unwrap();
 42 |     let stdout = String::from_utf8(output.stdout).unwrap();
 43 | 
 44 |     assert_eq!(stdout.lines().count(), 14);
 45 |     assert!(stdout.contains("fill_color"), "highlights");
 46 | 
 47 |     Ok(())
 48 | }
 49 | 
 50 | #[test]
 51 | fn command_sort() -> anyhow::Result<()> {
 52 |     let mut cmd = Command::cargo_bin("linkr")?;
 53 |     let output = cmd
 54 |         .arg("sort")
 55 |         .arg("tests/linkr/II.links.tsv")
 56 |         .output()
 57 |         .unwrap();
 58 |     let stdout = String::from_utf8(output.stdout).unwrap();
 59 | 
 60 |     assert_eq!(stdout.lines().count(), 15);
 61 |     assert!(!stdout.contains("\nVI"), "chromosome II first");
 62 | 
 63 |     Ok(())
 64 | }
 65 | 
 66 | #[test]
 67 | fn command_filter() -> anyhow::Result<()> {
 68 |     let mut cmd = Command::cargo_bin("linkr")?;
 69 |     let output = cmd
 70 |         .arg("filter")
 71 |         .arg("tests/linkr/II.connect.tsv")
 72 |         .arg("-n")
 73 |         .arg("2")
 74 |         .output()
 75 |         .unwrap();
 76 |     let stdout = String::from_utf8(output.stdout).unwrap();
 77 | 
 78 |     assert_eq!(stdout.lines().count(), 4);
 79 | 
 80 |     Ok(())
 81 | }
 82 | 
 83 | #[test]
 84 | fn command_filter_3() -> anyhow::Result<()> {
 85 |     let mut cmd = Command::cargo_bin("linkr")?;
 86 |     let output = cmd
 87 |         .arg("filter")
 88 |         .arg("tests/linkr/II.connect.tsv")
 89 |         .arg("-n")
 90 |         .arg("3")
 91 |         .arg("-r")
 92 |         .arg("0.99")
 93 |         .output()
 94 |         .unwrap();
 95 |     let stdout = String::from_utf8(output.stdout).unwrap();
 96 | 
 97 |     assert_eq!(stdout.lines().count(), 1);
 98 |     assert!(!stdout.contains("VI("), "filtered links");
 99 | 
100 |     Ok(())
101 | }
102 | 
103 | #[test]
104 | fn command_clean() -> anyhow::Result<()> {
105 |     let mut cmd = Command::cargo_bin("linkr")?;
106 |     let output = cmd
107 |         .arg("clean")
108 |         .arg("tests/linkr/II.sort.tsv")
109 |         .output()
110 |         .unwrap();
111 |     let stdout = String::from_utf8(output.stdout).unwrap();
112 | 
113 |     assert_eq!(stdout.lines().count(), 11);
114 |     assert!(stdout.contains("892-4684"), "range exists");
115 | 
116 |     Ok(())
117 | }
118 | 
119 | #[test]
120 | fn command_clean_bundle() -> anyhow::Result<()> {
121 |     let mut cmd = Command::cargo_bin("linkr")?;
122 |     let output = cmd
123 |         .arg("clean")
124 |         .arg("tests/linkr/II.sort.tsv")
125 |         .arg("--bundle")
126 |         .arg("500")
127 |         .output()
128 |         .unwrap();
129 |     let stdout = String::from_utf8(output.stdout).unwrap();
130 | 
131 |     assert_eq!(stdout.lines().count(), 10);
132 |     assert!(!stdout.contains("892-4684"), "original");
133 |     assert!(stdout.contains("892-4685"), "bundled");
134 | 
135 |     Ok(())
136 | }
137 | 
138 | #[test]
139 | fn command_clean_merge() -> anyhow::Result<()> {
140 |     let mut cmd = Command::cargo_bin("linkr")?;
141 |     let output = cmd
142 |         .arg("clean")
143 |         .arg("tests/linkr/II.sort.tsv")
144 |         .arg("-r")
145 |         .arg("tests/linkr/II.merge.tsv")
146 |         .arg("--verbose")
147 |         .output()
148 |         .unwrap();
149 |     let stdout = String::from_utf8(output.stdout).unwrap();
150 | 
151 |     assert_eq!(stdout.lines().count(), 8);
152 |     assert!(!stdout.contains("892-4684"), "original");
153 |     assert!(stdout.contains("892-4685"), "merged");
154 | 
155 |     Ok(())
156 | }
157 | 
158 | #[test]
159 | fn command_connect() -> anyhow::Result<()> {
160 |     let mut cmd = Command::cargo_bin("linkr")?;
161 |     let output = cmd
162 |         .arg("connect")
163 |         .arg("tests/linkr/II.clean.tsv")
164 |         .arg("--verbose")
165 |         .output()
166 |         .unwrap();
167 |     let stdout = String::from_utf8(output.stdout).unwrap();
168 | 
169 |     assert_eq!(stdout.lines().count(), 6);
170 |     assert_eq!(
171 |         stdout.lines().next().unwrap().split('\t').count(),
172 |         3,
173 |         "multilateral links"
174 |     );
175 | 
176 |     Ok(())
177 | }
178 | 


--------------------------------------------------------------------------------
/tests/fasr/NC_000932.fa.fai:
--------------------------------------------------------------------------------
1 | NC_000932	154478	11	50	51
2 | 


--------------------------------------------------------------------------------
/tests/linkr/II.clean.tsv:
--------------------------------------------------------------------------------
1 | II(+):1-2018	XII(+):204-2215
2 | II(+):1990-5850	XII(+):7326-11200
3 | II(+):1990-5850	VI(+):892-4685
4 | II(+):300165-301260	IV(+):471852-472948
5 | II(+):477671-479048	XVI(+):700594-701971
6 | II(+):804880-813096	VII(+):1076129-1084340
7 | II(+):810776-812328	XIII(-):6395-7947
8 | II(+):810776-812328	XIV(-):7479-9033
9 | 


--------------------------------------------------------------------------------
/tests/linkr/II.connect.tsv:
--------------------------------------------------------------------------------
1 | II(+):1990-5850	VI(+):892-4685	XII(+):7326-11200
2 | II(+):810776-812328	XIII(-):6395-7947	XIV(-):7479-9033
3 | II(+):1-2018	XII(+):204-2215
4 | II(+):300165-301260	IV(+):471852-472948
5 | II(+):477671-479048	XVI(+):700594-701971
6 | II(+):804880-813096	VII(+):1076129-1084340
7 | 


--------------------------------------------------------------------------------
/tests/linkr/II.links.tsv:
--------------------------------------------------------------------------------
 1 | II(+):1-2018	XII(+):204-2215	+
 2 | II(+):144228-145732	II(-):144228-145732	-
 3 | II(+):1990-5850	II(+):2026-5850	+
 4 | II(+):1990-5850	XII(+):7326-11200	+
 5 | II(+):2026-5850	II(+):1990-5850	+
 6 | II(+):2026-5850	VI(+):892-4684	+
 7 | II(+):2026-5850	XII(+):7326-11200	+
 8 | II(+):300165-301260	IV(+):471852-472948	+
 9 | II(+):429496-430989	II(+):429504-430965	+
10 | II(+):429504-430965	II(+):429496-430989	+
11 | II(+):477671-479048	XVI(+):700594-701971	+
12 | II(+):658738-662234	II(-):658738-662234	-
13 | II(+):804880-813096	VII(+):1076129-1084340	+
14 | II(+):806179-808955	VII(+):1077427-1080204	+
15 | II(+):810776-812328	XIII(-):6395-7947	+
16 | II(+):810776-812328	XIV(-):7479-9033	+
17 | II(-):144228-145732	II(+):144228-145732	-
18 | II(-):658738-662234	II(+):658738-662234	-
19 | IV(+):471852-472948	II(+):300165-301260	+
20 | VI(+):892-4684	II(+):2026-5850	+
21 | VI(+):893-4685	II(+):2026-5850	+
22 | VII(+):1076129-1084340	II(+):804880-813096	+
23 | VII(+):1077427-1080204	II(+):806179-808955	+
24 | XII(+):204-2215	II(+):1-2018	+
25 | XII(+):7326-11200	II(+):1990-5850	+
26 | XII(+):7326-11200	II(+):2026-5850	+
27 | XIII(-):6395-7947	II(+):810776-812328	+
28 | XIV(-):7479-9033	II(+):810776-812328	+
29 | XVI(+):700594-701971	II(+):477671-479048	+
30 | 


--------------------------------------------------------------------------------
/tests/linkr/II.merge.tsv:
--------------------------------------------------------------------------------
1 | II(+):2026-5850	II(+):1990-5850
2 | II(+):429504-430965	II(+):429496-430989
3 | II(-):144228-145732	II(+):144228-145732
4 | II(-):658738-662234	II(+):658738-662234
5 | VI(+):893-4685	VI(+):892-4685
6 | VI(+):892-4684	VI(+):892-4685
7 | 


--------------------------------------------------------------------------------
/tests/linkr/II.sort.tsv:
--------------------------------------------------------------------------------
 1 | II(+):1-2018	XII(+):204-2215	+
 2 | II(+):1990-5850	II(+):2026-5850	+
 3 | II(+):1990-5850	XII(+):7326-11200	+
 4 | II(+):2026-5850	VI(+):892-4684	+
 5 | II(+):2026-5850	VI(+):893-4685	+
 6 | II(+):2026-5850	XII(+):7326-11200	+
 7 | II(+):144228-145732	II(-):144228-145732	-
 8 | II(+):300165-301260	IV(+):471852-472948	+
 9 | II(+):429496-430989	II(+):429504-430965	+
10 | II(+):477671-479048	XVI(+):700594-701971	+
11 | II(+):658738-662234	II(-):658738-662234	-
12 | II(+):804880-813096	VII(+):1076129-1084340	+
13 | II(+):806179-808955	VII(+):1077427-1080204	+
14 | II(+):810776-812328	XIII(-):6395-7947	+
15 | II(+):810776-812328	XIV(-):7479-9033	+
16 | 


--------------------------------------------------------------------------------
/tests/rgr/1_4.ovlp.tsv:
--------------------------------------------------------------------------------
 1 | anchor148_9124	pac7556_20928	8327	0.890	0	797	9124	9124	0	0	8581	20928	overlap
 2 | anchor148_9124	pac7443_11454	2665	0.894	0	6459	9124	9124	0	0	2742	11454	overlap
 3 | anchor236_6430	pac7556_20928	6430	0.869	0	0	6430	6430	0	8601	15361	20928	contained
 4 | anchor236_6430	pac8852_20444	1099	0.839	0	5331	6430	6430	0	0	1183	20444	overlap
 5 | anchor236_6430	pac7443_11454	6430	0.889	0	0	6430	6430	0	2759	9226	11454	contained
 6 | anchor575_1626	pac7556_20928	1626	0.864	0	0	1626	1626	0	15343	17070	20928	contained
 7 | anchor575_1626	pac8852_20444	1626	0.859	0	0	1626	1626	0	1166	2724	20444	contained
 8 | anchor575_1626	pac8559_13190	1626	0.884	0	0	1626	1626	0	329	2089	13190	contained
 9 | anchor575_1626	pac4710_11440	1626	0.841	0	0	1626	1626	0	410	2217	11440	contained
10 | anchor575_1626	pac1461_9030	1493	0.910	0	133	1626	1626	0	0	1581	9030	overlap
11 | anchor575_1626	pac4745_7148	1626	0.857	0	0	1626	1626	0	4459	6199	7148	contained
12 | anchor575_1626	pac6425_4471	1080	0.888	0	546	1626	1626	0	0	1136	4471	overlap
13 | anchor576_1624	pac7556_20928	1624	0.867	0	0	1624	1624	0	17033	18740	20928	contained
14 | anchor576_1624	pac8852_20444	1624	0.888	0	0	1624	1624	0	2685	4271	20444	contained
15 | anchor576_1624	pac8559_13190	1624	0.864	0	0	1624	1624	0	2043	3821	13190	contained
16 | anchor576_1624	pac4710_11440	1624	0.846	0	0	1624	1624	0	2174	3956	11440	contained
17 | anchor576_1624	pac1461_9030	1624	0.904	0	0	1624	1624	0	1537	3236	9030	contained
18 | anchor576_1624	pac6425_4471	1624	0.854	0	0	1624	1624	0	1094	2892	4471	contained
19 | pac7556_20928	anchor148_9124	8581	0.890	0	0	8581	20928	0	797	9124	9124	overlap
20 | pac7556_20928	anchor236_6430	6760	0.869	0	8601	15361	20928	0	0	6430	6430	contains
21 | pac7556_20928	anchor575_1626	1727	0.864	0	15343	17070	20928	0	0	1626	1626	contains
22 | pac7556_20928	anchor576_1624	1707	0.867	0	17033	18740	20928	0	0	1624	1624	contains
23 | pac8852_20444	anchor236_6430	1183	0.839	0	0	1183	20444	0	5331	6430	6430	overlap
24 | pac8852_20444	anchor575_1626	1558	0.859	0	1166	2724	20444	0	0	1626	1626	contains
25 | pac8852_20444	anchor576_1624	1586	0.888	0	2685	4271	20444	0	0	1624	1624	contains
26 | pac8852_20444	pac1461_9030	8217	0.794	0	1295	9512	20444	0	0	9030	9030	contains
27 | pac8852_20444	pac4745_7148	3617	0.759	0	0	3617	20444	0	3366	7148	7148	overlap
28 | pac8559_13190	anchor575_1626	1760	0.884	0	329	2089	13190	0	0	1626	1626	contains
29 | pac8559_13190	anchor576_1624	1778	0.864	0	2043	3821	13190	0	0	1624	1624	contains
30 | pac8559_13190	pac4745_7148	3100	0.778	0	0	3100	13190	0	4141	7148	7148	overlap
31 | pac7443_11454	anchor148_9124	2742	0.894	0	0	2742	11454	0	6459	9124	9124	overlap
32 | pac7443_11454	anchor236_6430	6467	0.889	0	2759	9226	11454	0	0	6430	6430	contains
33 | pac4710_11440	anchor575_1626	1807	0.841	0	410	2217	11440	0	0	1626	1626	contains
34 | pac4710_11440	anchor576_1624	1782	0.846	0	2174	3956	11440	0	0	1624	1624	contains
35 | pac4710_11440	pac1461_9030	9108	0.800	0	543	9651	11440	0	0	9030	9030	contains
36 | pac4710_11440	pac4745_7148	3231	0.761	0	0	3231	11440	0	4106	7148	7148	overlap
37 | pac1461_9030	anchor575_1626	1581	0.910	0	0	1581	9030	0	133	1626	1626	overlap
38 | pac1461_9030	anchor576_1624	1699	0.904	0	1537	3236	9030	0	0	1624	1624	contains
39 | pac1461_9030	pac8852_20444	9030	0.794	0	0	9030	9030	0	1295	9512	20444	contained
40 | pac1461_9030	pac4710_11440	9030	0.800	0	0	9030	9030	0	543	9651	11440	contained
41 | pac1461_9030	pac4745_7148	2535	0.808	0	0	2535	9030	0	4599	7148	7148	overlap
42 | pac4745_7148	anchor575_1626	1740	0.857	0	4459	6199	7148	0	0	1626	1626	contains
43 | pac4745_7148	pac8852_20444	3782	0.759	0	3366	7148	7148	0	0	3617	20444	overlap
44 | pac4745_7148	pac8559_13190	3007	0.778	0	4141	7148	7148	0	0	3100	13190	overlap
45 | pac4745_7148	pac4710_11440	3042	0.761	0	4106	7148	7148	0	0	3231	11440	overlap
46 | pac4745_7148	pac1461_9030	2549	0.808	0	4599	7148	7148	0	0	2535	9030	overlap
47 | pac4745_7148	pac6425_4471	2097	0.785	0	5051	7148	7148	0	0	2151	4471	overlap
48 | pac6425_4471	anchor575_1626	1136	0.888	0	0	1136	4471	0	546	1626	1626	overlap
49 | pac6425_4471	anchor576_1624	1798	0.854	0	1094	2892	4471	0	0	1624	1624	contains
50 | pac6425_4471	pac4745_7148	2151	0.785	0	0	2151	4471	0	5051	7148	7148	overlap
51 | 


--------------------------------------------------------------------------------
/tests/rgr/1_4.replace.tsv:
--------------------------------------------------------------------------------
 1 | falcon_read/1/0_9124	anchor148_9124
 2 | falcon_read/2/0_6430	anchor236_6430
 3 | falcon_read/3/0_1626	anchor575_1626
 4 | falcon_read/4/0_1624	anchor576_1624
 5 | falcon_read/5/0_20928	pac7556_20928
 6 | falcon_read/6/0_20444	pac8852_20444
 7 | falcon_read/7/0_13190	pac8559_13190
 8 | falcon_read/8/0_11454	pac7443_11454
 9 | falcon_read/9/0_11440	pac4710_11440
10 | falcon_read/10/0_9030	pac1461_9030
11 | falcon_read/11/0_7148	pac4745_7148
12 | falcon_read/12/0_4471	pac6425_4471
13 | 


--------------------------------------------------------------------------------
/tests/rgr/II.links.tsv:
--------------------------------------------------------------------------------
 1 | II(+):1-2018	XII(+):204-2215	+
 2 | II(+):144228-145732	II(-):144228-145732	-
 3 | II(+):1990-5850	II(+):2026-5850	+
 4 | II(+):1990-5850	XII(+):7326-11200	+
 5 | II(+):2026-5850	II(+):1990-5850	+
 6 | II(+):2026-5850	VI(+):892-4684	+
 7 | II(+):2026-5850	XII(+):7326-11200	+
 8 | II(+):300165-301260	IV(+):471852-472948	+
 9 | II(+):429496-430989	II(+):429504-430965	+
10 | II(+):429504-430965	II(+):429496-430989	+
11 | II(+):477671-479048	XVI(+):700594-701971	+
12 | II(+):658738-662234	II(-):658738-662234	-
13 | II(+):804880-813096	VII(+):1076129-1084340	+
14 | II(+):806179-808955	VII(+):1077427-1080204	+
15 | II(+):810776-812328	XIII(-):6395-7947	+
16 | II(+):810776-812328	XIV(-):7479-9033	+
17 | II(-):144228-145732	II(+):144228-145732	-
18 | II(-):658738-662234	II(+):658738-662234	-
19 | IV(+):471852-472948	II(+):300165-301260	+
20 | VI(+):892-4684	II(+):2026-5850	+
21 | VI(+):893-4685	II(+):2026-5850	+
22 | VII(+):1076129-1084340	II(+):804880-813096	+
23 | VII(+):1077427-1080204	II(+):806179-808955	+
24 | XII(+):204-2215	II(+):1-2018	+
25 | XII(+):7326-11200	II(+):1990-5850	+
26 | XII(+):7326-11200	II(+):2026-5850	+
27 | XIII(-):6395-7947	II(+):810776-812328	+
28 | XIV(-):7479-9033	II(+):810776-812328	+
29 | XVI(+):700594-701971	II(+):477671-479048	+
30 | 


--------------------------------------------------------------------------------
/tests/rgr/S288c.rg:
--------------------------------------------------------------------------------
1 | I:1-100
2 | I(+):90-150
3 | S288c.I(-):190-200
4 | II:21294-22075
5 | II:23537-24097
6 | S288c.I(-):190-200|Species=Yeast
7 | 


--------------------------------------------------------------------------------
/tests/rgr/ctg.range.tsv:
--------------------------------------------------------------------------------
1 | length	ID	range
2 | 100000	ctg:I:1	I:1-100000
3 | 85779	ctg:Mito:1	Mito:1-85779
4 | 130218	ctg:I:2	I:100001-230218
5 | 


--------------------------------------------------------------------------------
/tests/rgr/ctg.tsv:
--------------------------------------------------------------------------------
1 | ID	chr_id	chr_start	chr_end	chr_strand	length
2 | ctg:I:1	I	1	100000	+	100000
3 | ctg:Mito:1	Mito	1	85779	+	85779
4 | ctg:I:2	I	100001	230218	+	130218
5 | 


--------------------------------------------------------------------------------
/tests/rgr/ranges.tsv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wang-q/intspan/0d4e5153268bf51eb824f7cc93a51415a255a3c2/tests/rgr/ranges.tsv.gz


--------------------------------------------------------------------------------
/tests/rgr/tn.tsv:
--------------------------------------------------------------------------------
 1 | IS10L-AF162223	IS10L-AF162223	0.0000	1.0000	1.0000
 2 | IS10L-AF162223	Tn10-AF162223	0.1999	0.1408	1.0000
 3 | IS10L-AF162223	Tn5393.11-CP000602.1	0.2784	0.0767	1.0000
 4 | IS10L-AF162223	Tn5393.2-CP030921.1	0.2753	0.0785	0.9615
 5 | IS10L-AF162223	Tn7241-KY437729	0.2592	0.0887	0.9077
 6 | IS10_p-MH626558	IS10L-AF162223	0.1303	0.2513	1.0000
 7 | IS10L-AF162223	Tn10-AF162223	0.1999	0.1408	1.0000
 8 | IS10_p-MH626558	Tn10-AF162223	0.3833	0.0354	1.0000
 9 | Tn10-AF162223	Tn10-AF162223	0.0000	1.0000	1.0000
10 | Tn10-AF162223	Tn5393.11-CP000602.1	0.0499	0.5447	1.0000
11 | Tn10-AF162223	Tn5393.2-CP030921.1	0.0449	0.5750	0.9928
12 | Tn10_p-MH626558	Tn10-AF162223	0.0337	0.6525	0.9929
13 | Tn10_p2-CP027411.1	Tn10-AF162223	0.0301	0.6808	0.9891
14 | 


--------------------------------------------------------------------------------
/tests/spanr/Atha.chr.sizes:
--------------------------------------------------------------------------------
1 | 1	30427671
2 | 2	19698289
3 | 3	23459830
4 | 4	18585056
5 | 5	26975502
6 | 


--------------------------------------------------------------------------------
/tests/spanr/Atha.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "AT1G01010.1": {
 3 |     "1": "3631-3913,3996-4276,4486-4605,4706-5095,5174-5326,5439-5899"
 4 |   },
 5 |   "AT1G01020.1": {
 6 |     "1": "5928-6263,6437-7069,7157-7232,7384-7450,7564-7649,7762-7835,7942-7987,8236-8325,8417-8464,8571-8737"
 7 |   },
 8 |   "AT1G01020.2": {
 9 |     "1": "6790-7069,7157-7450,7564-7649,7762-7835,7942-7987,8236-8325,8417-8464,8571-8737"
10 |   },
11 |   "AT2G01008.1": {
12 |     "2": "1025-1272,1458-1510,1873-2810,3706-5513,5782-5945"
13 |   },
14 |   "AT2G01021.1": {
15 |     "2": "6571-6672"
16 |   }
17 | }
18 | 


--------------------------------------------------------------------------------
/tests/spanr/Atha.list:
--------------------------------------------------------------------------------
1 | AT1G01010.1
2 | AT1G01020.1
3 | AT2G01008.1
4 | 


--------------------------------------------------------------------------------
/tests/spanr/I.II.json:
--------------------------------------------------------------------------------
1 | {
2 |   "I": {
3 |     "I": "13744-17133,20043-21352,27969-29557,32941-33447,45023-45898,67521-68715,69526-71785"
4 |   },
5 |   "II": {
6 |     "II": "21294-22075,23537-24097,28547-29194,36489-36988,44919-45977,63330-63875,71128-71865"
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/tests/spanr/I.json:
--------------------------------------------------------------------------------
1 | {
2 |   "I": "13744-17133,20043-21352,27969-29557,32941-33447,45023-45898,67521-68715,69526-71785"
3 | }
4 | 


--------------------------------------------------------------------------------
/tests/spanr/II.json:
--------------------------------------------------------------------------------
1 | {
2 |   "II": "21294-22075,23537-24097,28547-29194,36489-36988,44919-45977,63330-63875,71128-71865"
3 | }
4 | 


--------------------------------------------------------------------------------
/tests/spanr/II.other.json:
--------------------------------------------------------------------------------
1 | {
2 |   "II": "21294-22075,23537-24097,28547-29194,36489-36988,44919-45977,63330-63875,71128-71865"
3 | }
4 | 


--------------------------------------------------------------------------------
/tests/spanr/NC_007942.rm.gff:
--------------------------------------------------------------------------------
  1 | ##gff-version 3
  2 | ##sequence-region NC_007942 1 152218
  3 | NC_007942	RepeatMasker	dispersed_repeat	177	240	15	+	.	Target=A-rich 1 63
  4 | NC_007942	RepeatMasker	dispersed_repeat	4502	4542	15	+	.	Target=(TTATAA)n 1 39
  5 | NC_007942	RepeatMasker	dispersed_repeat	5159	5195	38	+	.	Target=(AT)n 1 37
  6 | NC_007942	RepeatMasker	dispersed_repeat	6960	7013	13	+	.	Target=(AATTTTT)n 1 55
  7 | NC_007942	RepeatMasker	dispersed_repeat	10659	10700	14	+	.	Target=(AATTTAT)n 1 43
  8 | NC_007942	RepeatMasker	dispersed_repeat	10775	10823	12	+	.	Target=(ATTTCTA)n 1 48
  9 | NC_007942	RepeatMasker	dispersed_repeat	13065	13138	345	-	.	Target=tRNA-Tyr-TAC 1 74
 10 | NC_007942	RepeatMasker	dispersed_repeat	14917	14976	183	+	.	Target=MamSINE1 9 69
 11 | NC_007942	RepeatMasker	dispersed_repeat	17926	17963	13	+	.	Target=(GATATAT)n 1 41
 12 | NC_007942	RepeatMasker	dispersed_repeat	18422	18468	17	+	.	Target=(TATC)n 1 47
 13 | NC_007942	RepeatMasker	dispersed_repeat	18815	18857	12	+	.	Target=(AATTA)n 1 43
 14 | NC_007942	RepeatMasker	dispersed_repeat	24588	24615	16	+	.	Target=(T)n 1 28
 15 | NC_007942	RepeatMasker	dispersed_repeat	25609	25646	13	+	.	Target=(AATAGT)n 1 39
 16 | NC_007942	RepeatMasker	dispersed_repeat	26216	26274	15	+	.	Target=A-rich 1 59
 17 | NC_007942	RepeatMasker	dispersed_repeat	28592	28658	30	+	.	Target=(TTA)n 1 67
 18 | NC_007942	RepeatMasker	dispersed_repeat	29618	29639	13	+	.	Target=(AAT)n 1 24
 19 | NC_007942	RepeatMasker	dispersed_repeat	30851	30918	200	+	.	Target=MamSINE1 3 72
 20 | NC_007942	RepeatMasker	dispersed_repeat	31910	31959	13	+	.	Target=(TATAA)n 1 50
 21 | NC_007942	RepeatMasker	dispersed_repeat	32788	32847	27	+	.	Target=(AT)n 1 65
 22 | NC_007942	RepeatMasker	dispersed_repeat	33368	33400	15	+	.	Target=(AT)n 1 32
 23 | NC_007942	RepeatMasker	dispersed_repeat	33401	33407	13	+	.	Target=(ATAGAT)n 1 39
 24 | NC_007942	RepeatMasker	dispersed_repeat	38374	38397	12	+	.	Target=(TTTAA)n 1 26
 25 | NC_007942	RepeatMasker	dispersed_repeat	38485	38529	12	+	.	Target=(ATAA)n 1 46
 26 | NC_007942	RepeatMasker	dispersed_repeat	42812	42844	16	+	.	Target=A-rich 1 33
 27 | NC_007942	RepeatMasker	dispersed_repeat	44787	44832	13	+	.	Target=(ATA)n 1 46
 28 | NC_007942	RepeatMasker	dispersed_repeat	45788	45831	15	+	.	Target=A-rich 1 47
 29 | NC_007942	RepeatMasker	dispersed_repeat	45872	45908	14	+	.	Target=A-rich 1 37
 30 | NC_007942	RepeatMasker	dispersed_repeat	46793	46826	14	+	.	Target=(ATCTAT)n 1 32
 31 | NC_007942	RepeatMasker	dispersed_repeat	46816	46849	14	+	.	Target=(TAGA)n 1 38
 32 | NC_007942	RepeatMasker	dispersed_repeat	46895	46919	16	+	.	Target=(A)n 1 25
 33 | NC_007942	RepeatMasker	dispersed_repeat	48397	48455	29	+	.	Target=(AT)n 1 61
 34 | NC_007942	RepeatMasker	dispersed_repeat	49560	49584	13	+	.	Target=(AATTT)n 1 26
 35 | NC_007942	RepeatMasker	dispersed_repeat	51482	51555	22	+	.	Target=A-rich 1 74
 36 | NC_007942	RepeatMasker	dispersed_repeat	51694	51718	16	+	.	Target=(T)n 1 25
 37 | NC_007942	RepeatMasker	dispersed_repeat	51755	51792	19	+	.	Target=(ATTCTAT)n 1 37
 38 | NC_007942	RepeatMasker	dispersed_repeat	52808	52849	13	+	.	Target=(AATATTC)n 1 38
 39 | NC_007942	RepeatMasker	dispersed_repeat	53026	53131	25	+	.	Target=A-rich 1 105
 40 | NC_007942	RepeatMasker	dispersed_repeat	53217	53290	308	+	.	Target=tRNA-Ser-AGY 4 72
 41 | NC_007942	RepeatMasker	dispersed_repeat	53328	53365	12	+	.	Target=(TTGT)n 1 38
 42 | NC_007942	RepeatMasker	dispersed_repeat	54243	54300	21	+	.	Target=(TTATTT)n 1 58
 43 | NC_007942	RepeatMasker	dispersed_repeat	54299	54378	26	+	.	Target=A-rich 1 76
 44 | NC_007942	RepeatMasker	dispersed_repeat	54530	54557	13	+	.	Target=(TAGA)n 1 30
 45 | NC_007942	RepeatMasker	dispersed_repeat	56554	56614	14	+	.	Target=A-rich 1 64
 46 | NC_007942	RepeatMasker	dispersed_repeat	58332	58372	19	+	.	Target=A-rich 1 41
 47 | NC_007942	RepeatMasker	dispersed_repeat	63709	63745	12	+	.	Target=(AATTGA)n 1 39
 48 | NC_007942	RepeatMasker	dispersed_repeat	63753	63795	13	+	.	Target=(TTATTT)n 1 45
 49 | NC_007942	RepeatMasker	dispersed_repeat	64767	64806	19	+	.	Target=(TA)n 1 39
 50 | NC_007942	RepeatMasker	dispersed_repeat	64866	64913	13	+	.	Target=(ATATA)n 1 43
 51 | NC_007942	RepeatMasker	dispersed_repeat	65068	65098	21	+	.	Target=(AT)n 1 32
 52 | NC_007942	RepeatMasker	dispersed_repeat	65451	65486	11	+	.	Target=(ATTAAGT)n 1 36
 53 | NC_007942	RepeatMasker	dispersed_repeat	65884	65913	12	+	.	Target=(TTTTCT)n 1 31
 54 | NC_007942	RepeatMasker	dispersed_repeat	67087	67128	13	+	.	Target=(TATATAC)n 1 39
 55 | NC_007942	RepeatMasker	dispersed_repeat	73941	74006	14	+	.	Target=(AAT)n 1 66
 56 | NC_007942	RepeatMasker	dispersed_repeat	76532	76550	15	+	.	Target=(A)n 1 19
 57 | NC_007942	RepeatMasker	dispersed_repeat	79464	79492	15	+	.	Target=(T)n 1 29
 58 | NC_007942	RepeatMasker	dispersed_repeat	79499	79539	24	+	.	Target=(TA)n 1 43
 59 | NC_007942	RepeatMasker	dispersed_repeat	80695	80751	15	+	.	Target=(ATAT)n 1 62
 60 | NC_007942	RepeatMasker	dispersed_repeat	81023	81055	12	+	.	Target=A-rich 1 35
 61 | NC_007942	RepeatMasker	dispersed_repeat	81611	81634	15	+	.	Target=(T)n 1 24
 62 | NC_007942	RepeatMasker	dispersed_repeat	82638	82665	16	+	.	Target=(T)n 1 28
 63 | NC_007942	RepeatMasker	dispersed_repeat	82672	82718	22	+	.	Target=(ATAT)n 1 47
 64 | NC_007942	RepeatMasker	dispersed_repeat	83338	83362	12	+	.	Target=(TTC)n 1 24
 65 | NC_007942	RepeatMasker	dispersed_repeat	83812	83903	29	+	.	Target=(GATATA)n 1 90
 66 | NC_007942	RepeatMasker	dispersed_repeat	90781	90820	27	+	.	Target=(GATAGTGAC)n 1 40
 67 | NC_007942	RepeatMasker	dispersed_repeat	93250	93284	15	+	.	Target=(ATATAT)n 1 36
 68 | NC_007942	RepeatMasker	dispersed_repeat	96095	96127	13	+	.	Target=(TTTTTG)n 1 33
 69 | NC_007942	RepeatMasker	dispersed_repeat	98387	98437	13	+	.	Target=(TTCTAT)n 1 51
 70 | NC_007942	RepeatMasker	dispersed_repeat	100312	100963	390	+	.	Target=SSU-rRNA_Hsa 1031 1715
 71 | NC_007942	RepeatMasker	dispersed_repeat	103687	103779	261	+	.	Target=LSU-rRNA_Hsa 21 113
 72 | NC_007942	RepeatMasker	dispersed_repeat	103960	104056	277	+	.	Target=LSU-rRNA_Hsa 319 415
 73 | NC_007942	RepeatMasker	dispersed_repeat	104206	104360	279	+	.	Target=LSU-rRNA_Hsa 1503 1658
 74 | NC_007942	RepeatMasker	dispersed_repeat	105430	105538	259	+	.	Target=LSU-rRNA_Hsa 3714 3822
 75 | NC_007942	RepeatMasker	dispersed_repeat	105767	106148	408	+	.	Target=LSU-rRNA_Hsa 4137 4519
 76 | NC_007942	RepeatMasker	dispersed_repeat	108220	108261	13	+	.	Target=A-rich 1 42
 77 | NC_007942	RepeatMasker	dispersed_repeat	108939	108973	12	+	.	Target=(TTAT)n 1 35
 78 | NC_007942	RepeatMasker	dispersed_repeat	109741	109774	15	+	.	Target=(TAA)n 1 34
 79 | NC_007942	RepeatMasker	dispersed_repeat	109920	109966	16	+	.	Target=A-rich 1 47
 80 | NC_007942	RepeatMasker	dispersed_repeat	112616	112675	15	+	.	Target=(AATGAA)n 1 66
 81 | NC_007942	RepeatMasker	dispersed_repeat	113267	113310	12	+	.	Target=(ACAAGA)n 1 48
 82 | NC_007942	RepeatMasker	dispersed_repeat	113674	113751	19	+	.	Target=(ATAT)n 1 79
 83 | NC_007942	RepeatMasker	dispersed_repeat	116626	116676	13	+	.	Target=(ATAA)n 1 50
 84 | NC_007942	RepeatMasker	dispersed_repeat	122126	122154	15	+	.	Target=A-rich 1 29
 85 | NC_007942	RepeatMasker	dispersed_repeat	123642	123793	43	+	.	Target=(ATAT)n 1 156
 86 | NC_007942	RepeatMasker	dispersed_repeat	126573	126632	17	+	.	Target=(TATATTT)n 1 63
 87 | NC_007942	RepeatMasker	dispersed_repeat	126633	126641	13	+	.	Target=(TATAT)n 1 71
 88 | NC_007942	RepeatMasker	dispersed_repeat	129246	129627	408	-	.	Target=LSU-rRNA_Hsa 4137 4519
 89 | NC_007942	RepeatMasker	dispersed_repeat	129856	129964	259	-	.	Target=LSU-rRNA_Hsa 3714 3822
 90 | NC_007942	RepeatMasker	dispersed_repeat	131034	131188	279	-	.	Target=LSU-rRNA_Hsa 1503 1658
 91 | NC_007942	RepeatMasker	dispersed_repeat	131338	131434	277	-	.	Target=LSU-rRNA_Hsa 319 415
 92 | NC_007942	RepeatMasker	dispersed_repeat	131615	131707	261	-	.	Target=LSU-rRNA_Hsa 21 113
 93 | NC_007942	RepeatMasker	dispersed_repeat	134729	135082	366	-	.	Target=SSU-rRNA_Hsa 1031 1392
 94 | NC_007942	RepeatMasker	dispersed_repeat	136957	137007	13	+	.	Target=(AATAGA)n 1 51
 95 | NC_007942	RepeatMasker	dispersed_repeat	139267	139299	13	+	.	Target=A-rich 1 33
 96 | NC_007942	RepeatMasker	dispersed_repeat	142105	142143	13	+	.	Target=(ATAGAT)n 1 39
 97 | NC_007942	RepeatMasker	dispersed_repeat	144574	144613	27	+	.	Target=(TATCGTCAC)n 1 40
 98 | NC_007942	RepeatMasker	dispersed_repeat	151491	151499	29	+	.	Target=(TATATC)n 1 86
 99 | NC_007942	RepeatMasker	dispersed_repeat	151500	151578	23	+	.	Target=(TATC)n 1 82
100 | NC_007942	RepeatMasker	dispersed_repeat	152032	152056	12	+	.	Target=(GAA)n 1 24
101 | 


--------------------------------------------------------------------------------
/tests/spanr/S288c.chr.sizes:
--------------------------------------------------------------------------------
 1 | I	230218
 2 | II	813184
 3 | III	316620
 4 | IV	1531933
 5 | IX	439888
 6 | V	576874
 7 | VI	270161
 8 | VII	1090940
 9 | VIII	562643
10 | X	745751
11 | XI	666816
12 | XII	1078177
13 | XIII	924431
14 | XIV	784333
15 | XV	1091291
16 | XVI	948066
17 | 


--------------------------------------------------------------------------------
/tests/spanr/S288c.rg:
--------------------------------------------------------------------------------
1 | I:1-100
2 | I(+):90-150
3 | S288c.I(-):190-200
4 | II:21294-22075
5 | II:23537-24097
6 | S288c.I(-):190-200|Species=Yeast
7 | 


--------------------------------------------------------------------------------
/tests/spanr/brca2.json:
--------------------------------------------------------------------------------
1 | {
2 |   "13": "32316461-32316527,32319077-32319325,32325076-32325184,32326101-32326150,32326242-32326282,32326499-32326613,32329443-32329492,32330919-32331030,32332272-32333387,32336265-32341196,32344558-32344653,32346827-32346896,32354861-32355288,32356428-32356609,32357742-32357929,32362523-32362693,32363179-32363533,32370402-32370557,32370956-32371100,32376670-32376791,32379317-32379515,32379750-32379913,32380007-32380145,32394689-32394933,32396898-32397044,32398162-32398770"
3 | }
4 | 


--------------------------------------------------------------------------------
/tests/spanr/dazzname.rg:
--------------------------------------------------------------------------------
  1 | infile_0/1/0_514:19-25
  2 | infile_0/1/0_514:26-37
  3 | infile_0/1/0_514:38-50
  4 | infile_0/1/0_514:51-55
  5 | infile_0/1/0_514:56-61
  6 | infile_0/1/0_514:62-62
  7 | infile_0/1/0_514:63-82
  8 | infile_0/1/0_514:83-92
  9 | infile_0/1/0_514:93-94
 10 | infile_0/1/0_514:95-98
 11 | infile_0/1/0_514:99-105
 12 | infile_0/1/0_514:106-111
 13 | infile_0/1/0_514:112-112
 14 | infile_0/1/0_514:113-113
 15 | infile_0/1/0_514:114-114
 16 | infile_0/1/0_514:115-119
 17 | infile_0/1/0_514:120-125
 18 | infile_0/1/0_514:126-128
 19 | infile_0/1/0_514:129-132
 20 | infile_0/1/0_514:133-142
 21 | infile_0/1/0_514:143-151
 22 | infile_0/1/0_514:152-152
 23 | infile_0/1/0_514:153-153
 24 | infile_0/1/0_514:154-168
 25 | infile_0/1/0_514:169-169
 26 | infile_0/1/0_514:170-171
 27 | infile_0/1/0_514:172-176
 28 | infile_0/1/0_514:177-182
 29 | infile_0/1/0_514:183-186
 30 | infile_0/1/0_514:187-188
 31 | infile_0/1/0_514:189-189
 32 | infile_0/1/0_514:190-192
 33 | infile_0/1/0_514:193-193
 34 | infile_0/1/0_514:194-196
 35 | infile_0/1/0_514:197-198
 36 | infile_0/1/0_514:199-201
 37 | infile_0/1/0_514:202-205
 38 | infile_0/1/0_514:206-206
 39 | infile_0/1/0_514:207-208
 40 | infile_0/1/0_514:209-212
 41 | infile_0/1/0_514:213-213
 42 | infile_0/1/0_514:214-224
 43 | infile_0/1/0_514:225-233
 44 | infile_0/1/0_514:234-242
 45 | infile_0/1/0_514:243-243
 46 | infile_0/1/0_514:244-244
 47 | infile_0/1/0_514:245-249
 48 | infile_0/1/0_514:250-254
 49 | infile_0/1/0_514:255-256
 50 | infile_0/1/0_514:257-257
 51 | infile_0/1/0_514:258-259
 52 | infile_0/1/0_514:260-262
 53 | infile_0/1/0_514:263-263
 54 | infile_0/1/0_514:264-264
 55 | infile_0/1/0_514:265-269
 56 | infile_0/1/0_514:270-270
 57 | infile_0/1/0_514:271-276
 58 | infile_0/1/0_514:277-278
 59 | infile_0/1/0_514:279-279
 60 | infile_0/1/0_514:280-282
 61 | infile_0/1/0_514:283-283
 62 | infile_0/1/0_514:284-293
 63 | infile_0/1/0_514:294-300
 64 | infile_0/1/0_514:301-303
 65 | infile_0/1/0_514:304-304
 66 | infile_0/1/0_514:305-307
 67 | infile_0/1/0_514:308-312
 68 | infile_0/1/0_514:313-319
 69 | infile_0/1/0_514:320-322
 70 | infile_0/1/0_514:323-324
 71 | infile_0/1/0_514:325-328
 72 | infile_0/1/0_514:329-333
 73 | infile_0/1/0_514:334-337
 74 | infile_0/1/0_514:338-340
 75 | infile_0/1/0_514:341-343
 76 | infile_0/1/0_514:344-344
 77 | infile_0/1/0_514:345-345
 78 | infile_0/1/0_514:346-347
 79 | infile_0/1/0_514:348-348
 80 | infile_0/1/0_514:349-349
 81 | infile_0/1/0_514:350-356
 82 | infile_0/1/0_514:357-359
 83 | infile_0/1/0_514:360-363
 84 | infile_0/1/0_514:364-375
 85 | infile_0/1/0_514:376-393
 86 | infile_0/1/0_514:394-394
 87 | infile_0/1/0_514:395-395
 88 | infile_0/1/0_514:396-396
 89 | infile_0/1/0_514:397-405
 90 | infile_0/1/0_514:406-408
 91 | infile_0/1/0_514:409-410
 92 | infile_0/1/0_514:411-416
 93 | infile_0/1/0_514:417-420
 94 | infile_0/1/0_514:421-429
 95 | infile_0/1/0_514:430-433
 96 | infile_0/1/0_514:434-451
 97 | infile_0/1/0_514:452-458
 98 | infile_0/1/0_514:459-463
 99 | infile_0/1/0_514:464-475
100 | infile_0/1/0_514:476-479
101 | infile_0/1/0_514:480-496
102 | infile_0/1/0_514:497-499
103 | 


--------------------------------------------------------------------------------
/tests/spanr/repeat.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "I": "-",
 3 |   "II": "327069-327703",
 4 |   "III": "-",
 5 |   "IV": "512988-513590,757572-759779,802895-805654,981142-987119,1017673-1018183,1175134-1175738,1307621-1308556,1504223-1504728",
 6 |   "IX": "-",
 7 |   "V": "354135-354917",
 8 |   "VI": "-",
 9 |   "VII": "778784-779515,878539-879235",
10 |   "VIII": "116405-117059,133581-134226",
11 |   "X": "366757-367499,712641-713226",
12 |   "XI": "162831-163399",
13 |   "XII": "64067-65208,91960-92481,451418-455181,455933-457732,460517-464318,465070-466869,489753-490545,817840-818474",
14 |   "XIII": "609100-609861",
15 |   "XIV": "-",
16 |   "XV": "437522-438484",
17 |   "XVI": "560481-561065"
18 | }
19 | 


--------------------------------------------------------------------------------