├── .editorconfig ├── .gitattributes ├── .github └── workflows │ ├── build.yml │ ├── codecov.yml │ └── publish.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── README.md ├── benchmarks ├── implementations.md ├── linkr.md ├── linkr │ ├── links.blast.tsv │ ├── links.lastz.tsv │ ├── run.sh │ └── sort.clean.tsv ├── musl.md ├── musl │ ├── .gitignore │ └── run.sh ├── rgr.md ├── spanr.md └── spanr │ ├── chr.sizes │ ├── dust.yml │ ├── paralog.yml │ ├── run.sh │ └── sep-gene.yml ├── doc ├── intspans.pdf ├── intspans.png ├── intspans.tex ├── ranges.pdf ├── ranges.png └── ranges.tex ├── examples ├── benchmark.rs ├── file.rs └── test.rs ├── release.toml ├── src ├── cmd_linkr │ ├── circos.rs │ ├── clean.rs │ ├── connect.rs │ ├── filter.rs │ ├── mod.rs │ └── sort.rs ├── cmd_rgr │ ├── count.rs │ ├── dedup.rs │ ├── field.rs │ ├── filter.rs │ ├── keep.rs │ ├── md.rs │ ├── merge.rs │ ├── mod.rs │ ├── pl_2rmp.rs │ ├── prop.rs │ ├── replace.rs │ ├── runlist.rs │ ├── select.rs │ ├── sort.rs │ └── span.rs ├── cmd_spanr │ ├── combine.rs │ ├── compare.rs │ ├── convert.rs │ ├── cover.rs │ ├── coverage.rs │ ├── genome.rs │ ├── gff.rs │ ├── merge.rs │ ├── mod.rs │ ├── some.rs │ ├── span.rs │ ├── split.rs │ ├── stat.rs │ └── statop.rs ├── lib.rs ├── libs │ ├── coverage.rs │ ├── intspan.rs │ ├── linalg.rs │ ├── matrix.rs │ ├── mod.rs │ └── range.rs ├── linkr.rs ├── rgr.rs ├── spanr.rs └── utils.rs └── tests ├── Atha ├── chr.sizes ├── links.blast.tsv.gz └── links.lastz.tsv.gz ├── S288c ├── chr.sizes ├── links.blast.tsv └── links.lastz.tsv ├── cli_linkr.rs ├── cli_rgr.rs ├── cli_rgr_tsv.rs ├── cli_spanr.rs ├── fasr ├── NC_000932.fa └── NC_000932.fa.fai ├── linkr ├── II.clean.tsv ├── II.connect.tsv ├── II.links.tsv ├── II.merge.tsv └── II.sort.tsv ├── rgr ├── 1_4.ovlp.tsv ├── 1_4.replace.tsv ├── II.links.tsv ├── S288c.rg ├── ctg.range.tsv ├── ctg.tsv ├── ctg_2_1_.gc.tsv ├── intergenic.json ├── intergenic.yml ├── ranges.tsv.gz └── tn.tsv └── spanr ├── Atha.chr.sizes ├── Atha.json ├── Atha.list ├── I.II.json ├── I.json ├── II.json ├── II.other.json ├── NC_007942.gff ├── NC_007942.rm.gff ├── S288c.chr.sizes ├── S288c.rg ├── brca2.json ├── dazzname.rg ├── intergenic.json ├── paralog.json └── repeat.json /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: http://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | [*] 7 | charset = utf-8 8 | indent_style = space 9 | indent_size = 4 10 | end_of_line = lf 11 | trim_trailing_whitespace = true 12 | insert_final_newline = true 13 | 14 | [*.json] 15 | indent_size = 2 16 | 17 | [*.yml] 18 | indent_size = 2 19 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Detect text files automatically 2 | * text=auto 3 | 4 | # Force Unix-style line endings on these files 5 | * eol=lf 6 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | name: Build ${{ matrix.rust }} on ${{ matrix.os }} 8 | runs-on: ${{ matrix.os }} 9 | strategy: 10 | matrix: 11 | os: [ubuntu-latest, windows-latest, macOS-latest] 12 | rust: [stable, nightly] 13 | 14 | steps: 15 | - uses: hecrj/setup-rust-action@v1 16 | with: 17 | rust-version: ${{ matrix.rust }} 18 | - uses: actions/checkout@v4 19 | with: 20 | persist-credentials: false 21 | - name: Build 22 | run: cargo build --verbose 23 | - name: Run tests 24 | run: cargo test --verbose 25 | -------------------------------------------------------------------------------- /.github/workflows/codecov.yml: -------------------------------------------------------------------------------- 1 | name: Code Coverage 2 | 3 | on: [push] 4 | 5 | env: 6 | CARGO_TERM_COLOR: always 7 | 8 | jobs: 9 | check: 10 | name: codecov 11 | runs-on: ubuntu-latest 12 | container: 13 | image: xd009642/tarpaulin:develop-nightly 14 | options: --security-opt seccomp=unconfined 15 | steps: 16 | - uses: actions/checkout@v4 17 | with: 18 | persist-credentials: false 19 | 20 | - name: Generate code coverage 21 | run: | 22 | cargo +nightly tarpaulin --bins --tests --doc --follow-exec --engine llvm --out xml -- --test-threads 1 23 | 24 | - name: Upload to codecov.io 25 | uses: codecov/codecov-action@v4 26 | with: 27 | fail_ci_if_error: true 28 | token: ${{secrets.CODECOV_TOKEN}} 29 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*' 7 | 8 | # We need this to be able to create releases. 9 | permissions: 10 | contents: write 11 | 12 | jobs: 13 | linux-gnu: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: hecrj/setup-rust-action@v1 17 | with: 18 | rust-version: stable 19 | targets: x86_64-unknown-linux-gnu 20 | - uses: goto-bus-stop/setup-zig@v2 21 | - name: Install cargo-zigbuild 22 | run: cargo install cargo-zigbuild 23 | - uses: actions/checkout@v4 24 | with: 25 | persist-credentials: false 26 | - name: Build 27 | run: cargo zigbuild --verbose --release --target x86_64-unknown-linux-gnu.2.17 28 | - name: Create tarball 29 | run: tar cvfz intspan.tar.gz --transform 's|.*/||' target/x86_64-unknown-linux-gnu/release/spanr target/x86_64-unknown-linux-gnu/release/rgr target/x86_64-unknown-linux-gnu/release/linkr 30 | shell: bash 31 | - name: Upload binaries to releases 32 | uses: svenstaro/upload-release-action@v2 33 | with: 34 | repo_token: ${{ secrets.GITHUB_TOKEN }} 35 | file: intspan.tar.gz 36 | asset_name: intspan-x86_64-unknown-linux-gnu.tar.gz 37 | tag: ${{ github.ref }} 38 | overwrite: true 39 | linux-musl: 40 | runs-on: ubuntu-latest 41 | steps: 42 | - uses: hecrj/setup-rust-action@v1 43 | with: 44 | rust-version: stable 45 | targets: x86_64-unknown-linux-musl 46 | - uses: actions/checkout@v4 47 | with: 48 | persist-credentials: false 49 | - name: Build 50 | run: cargo build --verbose --release --target x86_64-unknown-linux-musl 51 | - name: Create tarball 52 | run: tar cvfz intspan.tar.gz --transform 's|.*/||' target/x86_64-unknown-linux-musl/release/spanr target/x86_64-unknown-linux-musl/release/rgr target/x86_64-unknown-linux-musl/release/linkr 53 | shell: bash 54 | - name: Upload binaries to releases 55 | uses: svenstaro/upload-release-action@v2 56 | with: 57 | repo_token: ${{ secrets.GITHUB_TOKEN }} 58 | file: intspan.tar.gz 59 | asset_name: intspan-x86_64-unknown-linux-musl.tar.gz 60 | tag: ${{ github.ref }} 61 | overwrite: true 62 | macos: 63 | runs-on: macOS-latest 64 | steps: 65 | - uses: hecrj/setup-rust-action@v1 66 | with: 67 | rust-version: stable 68 | - uses: actions/checkout@v4 69 | with: 70 | persist-credentials: false 71 | - name: Build 72 | env: 73 | MACOSX_DEPLOYMENT_TARGET: 10.12 74 | run: cargo build --verbose --release 75 | - name: Create tarball 76 | run: | 77 | cd target/release 78 | tar cvfz ../../intspan.tar.gz spanr rgr linkr 79 | shell: bash 80 | - name: Upload binaries to releases 81 | uses: svenstaro/upload-release-action@v2 82 | with: 83 | repo_token: ${{ secrets.GITHUB_TOKEN }} 84 | file: intspan.tar.gz 85 | asset_name: intspan-x86_64-apple-darwin.tar.gz 86 | tag: ${{ github.ref }} 87 | overwrite: true 88 | windows: 89 | runs-on: windows-latest 90 | steps: 91 | - uses: hecrj/setup-rust-action@v1 92 | with: 93 | rust-version: stable 94 | - uses: actions/checkout@v4 95 | with: 96 | persist-credentials: false 97 | - name: Enable static CRT linkage 98 | run: | 99 | mkdir .cargo 100 | echo '[target.x86_64-pc-windows-msvc]' >> .cargo/config 101 | echo 'rustflags = ["-Ctarget-feature=+crt-static"]' >> .cargo/config 102 | - name: Build 103 | run: cargo build --verbose --release 104 | - name: Create zip 105 | run: Compress-Archive -DestinationPath ./intspan.zip -Path ./target/release/spanr.exe,./target/release/rgr.exe,./target/release/linkr.exe 106 | shell: powershell 107 | - name: Upload binaries to releases 108 | uses: svenstaro/upload-release-action@v2 109 | with: 110 | repo_token: ${{ secrets.GITHUB_TOKEN }} 111 | file: intspan.zip 112 | asset_name: intspan-x86_64-pc-windows-msvc.zip 113 | tag: ${{ github.ref }} 114 | overwrite: true 115 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #----------------------------# 2 | # IDE 3 | #----------------------------# 4 | .idea 5 | 6 | #----------------------------# 7 | # macOS 8 | #----------------------------# 9 | 10 | # General 11 | .DS_Store 12 | .AppleDouble 13 | .LSOverride 14 | 15 | # Icon must end with two \r 16 | Icon 17 | 18 | # Thumbnails 19 | ._* 20 | 21 | # Files that might appear in the root of a volume 22 | .DocumentRevisions-V100 23 | .fseventsd 24 | .Spotlight-V100 25 | .TemporaryItems 26 | .Trashes 27 | .VolumeIcon.icns 28 | .com.apple.timemachine.donotpresent 29 | 30 | # Directories potentially created on remote AFP share 31 | .AppleDB 32 | .AppleDesktop 33 | Network Trash Folder 34 | Temporary Items 35 | .apdisk 36 | 37 | #----------------------------# 38 | # Linux 39 | #----------------------------# 40 | 41 | *~ 42 | 43 | # temporary files which can be created if a process still has a handle open of a deleted file 44 | .fuse_hidden* 45 | 46 | # KDE directory preferences 47 | .directory 48 | 49 | # Linux trash folder which might appear on any partition or disk 50 | .Trash-* 51 | 52 | # .nfs files are created when an open file is removed but is still being accessed 53 | .nfs* 54 | 55 | #----------------------------# 56 | # Windows 57 | #----------------------------# 58 | 59 | # Windows thumbnail cache files 60 | Thumbs.db 61 | ehthumbs.db 62 | ehthumbs_vista.db 63 | 64 | # Dump file 65 | *.stackdump 66 | 67 | # Folder config file 68 | [Dd]esktop.ini 69 | 70 | # Recycle Bin used on file shares 71 | $RECYCLE.BIN/ 72 | 73 | # Windows Installer files 74 | *.cab 75 | *.msi 76 | *.msix 77 | *.msm 78 | *.msp 79 | 80 | # Windows shortcuts 81 | *.lnk 82 | 83 | #----------------------------# 84 | # LaTeX 85 | #----------------------------# 86 | 87 | # LaTeX tmp 88 | *.toc 89 | *.aux 90 | *.fdb_latexmk 91 | *.fls 92 | *.nav 93 | *.out 94 | *.snm 95 | *.vrb 96 | *(busy) 97 | *.synctex.gz 98 | 99 | # bibtex tmp 100 | *.bbl 101 | *.bcf 102 | *.blg 103 | *-blx.bib 104 | *.run.xml 105 | 106 | # nomencl tmp 107 | *.ilg 108 | *.nlo 109 | *.nls 110 | 111 | # latexindent backup 112 | *.bak* 113 | indent.log 114 | 115 | #----------------------------# 116 | # This project 117 | #----------------------------# 118 | 119 | # Generated by Cargo 120 | # will have compiled files and executables 121 | /target/ 122 | 123 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 124 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 125 | Cargo.lock 126 | 127 | # These are backup files generated by rustfmt 128 | **/*.rs.bk 129 | 130 | *.tmp 131 | tests/S288c/ 132 | tests/Atha/ 133 | *.sizes 134 | *.xlsx 135 | PL-*/ 136 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | ## Unreleased - ReleaseDate 4 | 5 | ## 0.8.7 - 2025-04-06 6 | 7 | * Add linear algebra functions 8 | * Add matrix operations 9 | * Add `ScoringMatrix` for pairwise scores with missing values 10 | * Add `NamedMatrix` for complete distance matrices in PHYLIP format 11 | * Support PHYLIP format input 12 | * Remove AppVeyor CI 13 | 14 | ## 0.8.6 - 2025-04-02 15 | 16 | * Add `--ff-eq` and `--ff-ne` to `rgr filter` 17 | * Improve code quality 18 | * Use `into_iter()` and `collect()` to simplify code 19 | * Refactor `IntSpan::to_vec()`, `spans()`, `ranges()`, `runs()` and `intses()` 20 | * Improve CI/CD 21 | * Use `cargo-zigbuild` to build linux-gnu binary with GLIBC 2.17 22 | * Simplify tar archives by removing path prefixes 23 | 24 | ## 0.8.4 - 2024-12-30 25 | 26 | * Add `--lines` and `--delete` to `rgr keep` 27 | 28 | * Refactor the code in `rgr` to make it cleaner and more maintainable 29 | 30 | ## 0.8.3 - 2024-12-29 31 | 32 | * Add `rgr span` 33 | * Add `rgr keep` 34 | 35 | * Add numeric comparisons to `rgr filter` 36 | * Add operations to `Range` 37 | * Remove --fields from `rgr field` 38 | 39 | ## 0.8.2 - 2024-12-21 40 | 41 | * Add `IntSpan::valid()` 42 | 43 | * Add `rgr filter` 44 | * Add `rgr select` 45 | 46 | ## 0.8.0 - 2024-11-30 47 | 48 | * Move `fasr` to `hnsm` 49 | 50 | * Add `--longest` to `spanr convert` 51 | * Format Markdown tables in `rgr md` 52 | 53 | ## 0.7.9 - 2024-11-15 54 | 55 | * Add `rgr dedup` 56 | 57 | * Add --fmt to `rgr md` 58 | 59 | ## 0.7.8 - 2024-11-04 60 | 61 | * Add `rgr md` 62 | 63 | ## 0.7.7 - 2024-07-19 64 | 65 | * Move `ovlpr` to `anchr` 66 | 67 | * Add `rgr pl-2rmp` 68 | * Add `--group` to `rgr sort` 69 | * Use `MultiGzDecoder` in intspan::reader() 70 | 71 | ## 0.7.3 - 2023-07-18 72 | 73 | * Add `utils::get_seq_faidx()` 74 | * Add `IntSpan.find_islands_n()` and `IntSpan.find_islands_ints()` 75 | 76 | * Use json to replace yaml in `spanr` 77 | 78 | * Bump versions of deps 79 | * clap v4 80 | * Use anyhow 81 | 82 | ## 0.7.1 - 2022-06-14 83 | 84 | * Store `IntSpan.edges` in VecDeque 85 | * Switch to `clap` v3.2 86 | 87 | ## 0.7.0 - 2022-05-23 88 | 89 | * Add `rgr sort` 90 | * Add `rgr prop` 91 | 92 | * Add --fields to `rgr field` 93 | * Add --header, --sharp, --field to `rgr count` and `rgr runlist` 94 | 95 | ## 0.6.9 - 2022-05-15 96 | 97 | * Add `rgr field` 98 | 99 | * Move `spanr range` to `rgr runlist` 100 | * Move `spanr count` to `rgr count` 101 | 102 | * Rename .ranges to .rg 103 | 104 | ## 0.6.8 - 2022-05-14 105 | 106 | * New binary `rgr` 107 | 108 | * Move `ovlpr replace` to `rgr replace` 109 | * Move `linkr merge` to `rgr merge` 110 | 111 | ## 0.6.7 - 2022-04-24 112 | 113 | * Add `spanr count` 114 | * Add `--detailed` to `spanr coverage` 115 | * Use `Box` 116 | 117 | ## 0.6.5 - 2022-04-22 118 | 119 | * Use rust_lapper as an intermediate layer instead of intspan::Coverage 120 | * Greatly improves the speed of `spanr coverage` 121 | 122 | ## 0.6.4 - 2022-04-21 123 | 124 | * Move `far` out 125 | * `spanr stat` use i64 in the `all` lines 126 | * Update Github actions 127 | * Use a container with GLIBC 2.17 to build linux-gnu binary 128 | * Codecov with cargo-tarpaulin 129 | 130 | ## 0.6.0 - 2022-02-22 131 | 132 | * Move `nwr` out 133 | 134 | ## 0.4.16 - 2022-02-12 135 | 136 | * Switch to `clap` v3 137 | 138 | ## 0.4.15 - 2021-08-19 139 | 140 | * Add `far some` 141 | * Add `slice()` to `IntSpan` 142 | * `ovlpr replace` now processes any .tsv files 143 | 144 | ## 0.4.14 - 2020-05-15 145 | 146 | ## 0.4.13 - 2020-05-15 147 | 148 | * New binary `far` 149 | 150 | ## 0.4.12 - 2020-03-05 151 | 152 | * Split `spanr cover` into `cover` and `coverage` 153 | 154 | ## 0.4.11 - 2020-02-15 155 | 156 | * Add `--all` to `spanr merge` 157 | 158 | ## 0.4.10 - 2020-02-15 159 | 160 | * Add `--op` to `spanr combine` 161 | 162 | ## 0.4.9 - 2019-12-09 163 | 164 | * Add `ovlpr replace` 165 | * Add `ovlpr restrict` 166 | 167 | * Github Actions publish.yml 168 | 169 | ## 0.4.1 - 2019-09-10 170 | 171 | * Add benchmarks.md 172 | * Add `ovlpr paf2ovlp` 173 | 174 | * Binary releases by Github Actions 175 | 176 | ## 0.4.0 - 2019-09-07 177 | 178 | * New binary `ovlpr` 179 | * Struct `Overlap` 180 | 181 | * Move libraries to libs/ 182 | * Passing `&str` when calling methods 183 | * Add `new_len()` and `uniq_tiers()` to `Coverage` 184 | * Add `from_pair()` to `IntSpan` 185 | * Wrap IO functions in utils.rs with Result 186 | * Satisfy clippy 187 | 188 | ## 0.3.3 - 2019-09-04 189 | 190 | * `spanr merge`: take the first part of filename 191 | * `spanr compare`: compare more than two infiles 192 | 193 | ## 0.3.2 - 2019-09-03 194 | 195 | * Add `--suffix` to `spanr split` 196 | 197 | ## 0.3.1 - 2019-09-03 198 | 199 | * Detailed benchmarks on `linkr` 200 | 201 | * Make POS_INF, NEG_INF and EMPTY_STRING as lazy_static 202 | * About 10-20% faster 203 | 204 | ## 0.3.0 - 2019-09-03 205 | 206 | * New binary `linkr` for commands ported from `App::Rangeops` and `jrange` 207 | * Illustrations for some concepts 208 | * IntSpans 209 | * Ranges 210 | 211 | * Rename binary `intspan` to `spanr` 212 | 213 | ## 0.2.0 - 2019-08-24 214 | 215 | * Ported all commands from `App::RL` and `jrunlist` 216 | * Struct `Range` 217 | * Struct `Coverage` 218 | * Adopt `cargo release` 219 | 220 | ## 0.1.0 - 2019-08-13 221 | 222 | * Struct `IntSpan` 223 | * Examples 224 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "intspan" 3 | version = "0.8.7" 4 | authors = ["wang-q "] 5 | description = "Command line tools for IntSpan related bioinformatics operations" 6 | documentation = "https://github.com/wang-q/intspan" 7 | homepage = "https://github.com/wang-q/intspan" 8 | repository = "https://github.com/wang-q/intspan" 9 | readme = "README.md" 10 | categories = ["command-line-utilities", "science"] 11 | license = "MIT" 12 | edition = "2018" 13 | 14 | [lib] 15 | name = "intspan" 16 | 17 | [[bin]] 18 | name = "spanr" 19 | path = "src/spanr.rs" 20 | 21 | [[bin]] 22 | name = "linkr" 23 | path = "src/linkr.rs" 24 | 25 | [[bin]] 26 | name = "rgr" 27 | path = "src/rgr.rs" 28 | 29 | [[example]] 30 | name = "test" 31 | 32 | [[example]] 33 | name = "benchmark" 34 | 35 | [[example]] 36 | name = "file" 37 | 38 | [dependencies] 39 | clap = { version = "4.3.12", features = ["cargo"] } 40 | serde = "1.0.171" 41 | serde_json = "1.0.103" 42 | anyhow = "1.0.72" 43 | regex = "1.9.1" 44 | lazy_static = "1.4.0" 45 | flate2 = "1.0.26" 46 | itertools = "0.11.0" 47 | 48 | petgraph = "0.6.3" 49 | indexmap = "2.0.0" 50 | rust-lapper = "1.1.0" 51 | bio = "0.30.1" 52 | crossbeam = "0.8.2" 53 | rust_xlsxwriter = "0.43.0" 54 | 55 | csv = "1.3.1" 56 | xxhash-rust = { version = "0.8.15", features = ["xxh3"] } 57 | markdown-table-formatter = "0.3.0" 58 | 59 | which = "4.4.0" 60 | cmd_lib = "1.9.4" 61 | tempfile = "3.6.0" 62 | path-clean = "1.0.1" 63 | 64 | [build-dependencies] 65 | 66 | [dev-dependencies] 67 | assert_cmd = "2.0.12" 68 | predicates = "3.0.3" 69 | 70 | [profile.release] 71 | lto = true 72 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Qiang Wang 4 | 5 | Permission is hereby granted, free of charge, to any 6 | person obtaining a copy of this software and associated 7 | documentation files (the "Software"), to deal in the 8 | Software without restriction, including without 9 | limitation the rights to use, copy, modify, merge, 10 | publish, distribute, sublicense, and/or sell copies of 11 | the Software, and to permit persons to whom the Software 12 | is furnished to do so, subject to the following 13 | conditions: 14 | 15 | The above copyright notice and this permission notice 16 | shall be included in all copies or substantial portions 17 | of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 20 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 21 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 22 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 23 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 24 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 25 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 26 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 27 | DEALINGS IN THE SOFTWARE. 28 | -------------------------------------------------------------------------------- /benchmarks/linkr/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | #----------------------------# 4 | # Colors in term 5 | #----------------------------# 6 | # http://stackoverflow.com/questions/5947742/how-to-change-the-output-color-of-echo-in-linux 7 | GREEN= 8 | RED= 9 | NC= 10 | if tty -s < /dev/fd/1 2> /dev/null; then 11 | GREEN='\033[0;32m' 12 | RED='\033[0;31m' 13 | NC='\033[0m' # No Color 14 | fi 15 | 16 | log_warn () { 17 | echo >&2 -e "${RED}==> $@ <==${NC}" 18 | } 19 | 20 | log_info () { 21 | echo >&2 -e "${GREEN}==> $@${NC}" 22 | } 23 | 24 | log_debug () { 25 | echo >&2 -e "==> $@" 26 | } 27 | 28 | #----------------------------# 29 | # Prepare 30 | #----------------------------# 31 | COMMAND_TIME="command time -v" 32 | if [[ `uname` == 'Darwin' ]]; then 33 | COMMAND_TIME="command time -l" 34 | fi 35 | 36 | # enter BASE_DIR 37 | BASE_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) 38 | cd ${BASE_DIR} 39 | 40 | #----------------------------# 41 | # Run 42 | #----------------------------# 43 | log_warn "merge" 44 | log_info "jrange merge lastz blast" 45 | ${COMMAND_TIME} jrange \ 46 | merge \ 47 | -o stdout -c 0.95 \ 48 | links.lastz.tsv \ 49 | links.blast.tsv | 50 | sort \ 51 | > jmerge.tsv.tmp 52 | 53 | log_info "rgr merge lastz blast" 54 | ${COMMAND_TIME} rgr \ 55 | merge \ 56 | -o stdout -c 0.95 \ 57 | links.lastz.tsv \ 58 | links.blast.tsv | 59 | sort \ 60 | > rmerge.tsv.tmp 61 | 62 | log_info "rangeops merge lastz blast" 63 | ${COMMAND_TIME} rangeops \ 64 | merge \ 65 | -o stdout -c 0.95 -p 8 \ 66 | links.lastz.tsv \ 67 | links.blast.tsv | 68 | sort \ 69 | > pmerge.tsv.tmp 70 | echo >&2 71 | 72 | log_warn "clean" 73 | log_info "jrange clean sort.clean" 74 | ${COMMAND_TIME} jrange \ 75 | clean \ 76 | -o stdout \ 77 | sort.clean.tsv \ 78 | > jclean.tsv.tmp 79 | 80 | log_info "linkr clean sort.clean" 81 | ${COMMAND_TIME} linkr \ 82 | clean \ 83 | -o stdout \ 84 | sort.clean.tsv \ 85 | > rclean.tsv.tmp 86 | 87 | log_info "rangeops clean sort.clean" 88 | ${COMMAND_TIME} rangeops \ 89 | clean \ 90 | -o stdout \ 91 | sort.clean.tsv \ 92 | > pclean.tsv.tmp 93 | echo >&2 94 | 95 | log_warn "clean bundle" 96 | log_info "jrange clean bundle sort.clean" 97 | ${COMMAND_TIME} jrange \ 98 | clean \ 99 | -o stdout \ 100 | --bundle 500 \ 101 | sort.clean.tsv \ 102 | > jbundle.tsv.tmp 103 | 104 | log_info "linkr clean bundle sort.clean" 105 | ${COMMAND_TIME} linkr \ 106 | clean \ 107 | -o stdout \ 108 | --bundle 500 \ 109 | sort.clean.tsv \ 110 | > rbundle.tsv.tmp 111 | 112 | log_info "rangeops clean bundle sort.clean" 113 | ${COMMAND_TIME} rangeops \ 114 | clean \ 115 | -o stdout \ 116 | --bundle 500 \ 117 | sort.clean.tsv \ 118 | > pbundle.tsv.tmp 119 | echo >&2 120 | -------------------------------------------------------------------------------- /benchmarks/musl.md: -------------------------------------------------------------------------------- 1 | # `gcc` vs `musl` 2 | 3 | * Ubuntu 14.04 E5-2690 v3 4 | * rustc 1.40 5 | * gcc with lto 6 | * musl with lto 7 | * Ryzen 7 5800 Windows 11 WSL 8 | * rustc 1.60.0 9 | * i7-12700T Windows 11 WSL 10 | * rustc 1.82.0 11 | 12 | ## `bash benchmarks/musl/run.sh` 13 | 14 | ```shell 15 | # cargo install --path ~/Scripts/rust/intspan --force 16 | 17 | bash ~/Scripts/intspan/benchmarks/musl/run.sh 18 | 19 | find ~/Scripts/intspan/benchmarks/musl/* | 20 | grep -v "run.sh" | 21 | grep -v ".gitignore" | 22 | xargs rm -fr 23 | 24 | ``` 25 | 26 | ## Results 27 | 28 | * Above - E5-2690 v3 29 | * Mid - Ryzen 7 5800 30 | * Below - i5-12500H 31 | 32 | * sort 33 | 34 | | Command | Mean [ms] | Min [ms] | Max [ms] | Relative | 35 | |:--------|-------------:|---------:|---------:|---------:| 36 | | cargo | 127.9 ± 22.7 | 97.3 | 147.7 | 1.2 | 37 | | gcc | 107.9 ± 21.6 | 92.2 | 142.5 | 1.0 | 38 | | musl | 132.9 ± 23.2 | 102.2 | 151.4 | 1.2 | 39 | 40 | | Command | Mean [ms] | Min [ms] | Max [ms] | Relative | 41 | |:--------|-----------:|---------:|---------:|------------:| 42 | | `cargo` | 36.6 ± 0.5 | 35.8 | 38.0 | 1.00 | 43 | | `gcc` | 68.0 ± 1.3 | 63.3 | 71.4 | 1.86 ± 0.04 | 44 | | `musl` | 74.6 ± 1.9 | 70.1 | 77.6 | 2.04 ± 0.06 | 45 | 46 | | Command | Mean [ms] | Min [ms] | Max [ms] | Relative | 47 | |:---------|-------------:|---------:|---------:|------------:| 48 | | `cargo` | 43.0 ± 1.7 | 39.9 | 47.8 | 1.00 | 49 | | `gcc` | 137.9 ± 21.0 | 116.4 | 171.2 | 3.21 ± 0.51 | 50 | | `musl` | 180.8 ± 20.1 | 139.7 | 204.2 | 4.21 ± 0.50 | 51 | | `zig cc` | 138.8 ± 23.9 | 109.6 | 174.3 | 3.23 ± 0.57 | 52 | 53 | * clean 54 | 55 | | Command | Mean [s] | Min [s] | Max [s] | Relative | 56 | |:--------|--------------:|--------:|--------:|---------:| 57 | | cargo | 4.266 ± 0.075 | 4.224 | 4.478 | 1.0 | 58 | | gcc | 6.090 ± 2.789 | 3.824 | 9.361 | 1.4 | 59 | | musl | 7.869 ± 3.102 | 4.839 | 11.355 | 1.8 | 60 | 61 | | Command | Mean [s] | Min [s] | Max [s] | Relative | 62 | |:--------|--------------:|--------:|--------:|------------:| 63 | | `cargo` | 1.446 ± 0.344 | 1.279 | 2.351 | 1.06 ± 0.25 | 64 | | `gcc` | 1.361 ± 0.029 | 1.334 | 1.420 | 1.00 | 65 | | `musl` | 2.624 ± 0.029 | 2.586 | 2.697 | 1.93 ± 0.05 | 66 | 67 | | Command | Mean [s] | Min [s] | Max [s] | Relative | 68 | |:---------|--------------:|--------:|--------:|------------:| 69 | | `cargo` | 1.545 ± 0.058 | 1.476 | 1.631 | 1.00 | 70 | | `gcc` | 1.587 ± 0.050 | 1.531 | 1.700 | 1.03 ± 0.05 | 71 | | `musl` | 3.142 ± 0.052 | 3.079 | 3.232 | 2.03 ± 0.08 | 72 | | `zig cc` | 1.560 ± 0.036 | 1.521 | 1.644 | 1.01 ± 0.04 | 73 | 74 | * merge 75 | 76 | | Command | Mean [s] | Min [s] | Max [s] | Relative | 77 | |:--------|--------------:|--------:|--------:|---------:| 78 | | cargo | 2.991 ± 0.006 | 2.976 | 2.999 | 1.1 | 79 | | gcc | 2.712 ± 0.003 | 2.707 | 2.716 | 1.0 | 80 | | musl | 4.527 ± 0.086 | 4.492 | 4.770 | 1.7 | 81 | 82 | | Command | Mean [s] | Min [s] | Max [s] | Relative | 83 | |:--------|--------------:|--------:|--------:|------------:| 84 | | `cargo` | 1.251 ± 0.041 | 1.218 | 1.335 | 1.00 | 85 | | `gcc` | 1.253 ± 0.022 | 1.228 | 1.289 | 1.00 ± 0.04 | 86 | | `musl` | 2.791 ± 0.027 | 2.766 | 2.833 | 2.23 ± 0.08 | 87 | 88 | | Command | Mean [s] | Min [s] | Max [s] | Relative | 89 | |:---------|--------------:|--------:|--------:|------------:| 90 | | `cargo` | 1.560 ± 0.081 | 1.468 | 1.756 | 1.00 | 91 | | `gcc` | 1.633 ± 0.031 | 1.593 | 1.688 | 1.05 ± 0.06 | 92 | | `musl` | 2.974 ± 0.112 | 2.887 | 3.252 | 1.91 ± 0.12 | 93 | | `zig cc` | 1.663 ± 0.062 | 1.603 | 1.807 | 1.07 ± 0.07 | 94 | 95 | * clean2 96 | 97 | | Command | Mean [s] | Min [s] | Max [s] | Relative | 98 | |:--------|--------------:|--------:|--------:|---------:| 99 | | cargo | 5.152 ± 0.026 | 5.132 | 5.221 | 1.1 | 100 | | gcc | 4.821 ± 0.003 | 4.817 | 4.826 | 1.0 | 101 | | musl | 5.983 ± 0.924 | 3.370 | 6.544 | 1.2 | 102 | 103 | | Command | Mean [ms] | Min [ms] | Max [ms] | Relative | 104 | |:--------|---------------:|---------:|---------:|------------:| 105 | | `cargo` | 828.6 ± 6.9 | 817.6 | 838.1 | 1.00 | 106 | | `gcc` | 873.9 ± 15.7 | 857.5 | 915.8 | 1.05 ± 0.02 | 107 | | `musl` | 2198.2 ± 650.3 | 1468.9 | 3210.7 | 2.65 ± 0.79 | 108 | 109 | | Command | Mean [ms] | Min [ms] | Max [ms] | Relative | 110 | |:---------|--------------:|---------:|---------:|------------:| 111 | | `cargo` | 945.5 ± 48.9 | 890.1 | 1037.9 | 1.00 | 112 | | `gcc` | 1014.3 ± 30.7 | 960.1 | 1057.8 | 1.07 ± 0.06 | 113 | | `musl` | 1669.3 ± 19.1 | 1642.6 | 1695.9 | 1.77 ± 0.09 | 114 | | `zig cc` | 991.3 ± 43.3 | 942.7 | 1068.4 | 1.05 ± 0.07 | 115 | 116 | * connect 117 | 118 | | Command | Mean [ms] | Min [ms] | Max [ms] | Relative | 119 | |:--------|-------------:|---------:|---------:|---------:| 120 | | cargo | 387.0 ± 0.7 | 386.2 | 387.8 | 1.0 | 121 | | gcc | 374.5 ± 74.3 | 247.8 | 532.1 | 1.0 | 122 | | musl | 383.0 ± 0.6 | 382.0 | 384.2 | 1.0 | 123 | 124 | | Command | Mean [ms] | Min [ms] | Max [ms] | Relative | 125 | |:--------|------------:|---------:|---------:|------------:| 126 | | `cargo` | 83.8 ± 4.2 | 79.0 | 102.0 | 1.00 | 127 | | `gcc` | 118.5 ± 7.9 | 107.4 | 135.2 | 1.41 ± 0.12 | 128 | | `musl` | 131.6 ± 3.9 | 125.5 | 138.9 | 1.57 ± 0.09 | 129 | 130 | | Command | Mean [ms] | Min [ms] | Max [ms] | Relative | 131 | |:---------|-------------:|---------:|---------:|------------:| 132 | | `cargo` | 89.2 ± 3.1 | 84.4 | 99.8 | 1.00 | 133 | | `gcc` | 187.5 ± 20.5 | 160.7 | 216.1 | 2.10 ± 0.24 | 134 | | `musl` | 256.1 ± 20.2 | 235.0 | 290.3 | 2.87 ± 0.25 | 135 | | `zig cc` | 196.9 ± 31.0 | 170.0 | 303.5 | 2.21 ± 0.36 | 136 | 137 | * filter 138 | 139 | | Command | Mean [ms] | Min [ms] | Max [ms] | Relative | 140 | |:--------|------------:|---------:|---------:|---------:| 141 | | cargo | 59.2 ± 1.1 | 58.4 | 66.2 | 1.1 | 142 | | gcc | 54.7 ± 0.3 | 54.2 | 55.7 | 1.0 | 143 | | musl | 55.6 ± 22.3 | 30.9 | 126.5 | 1.0 | 144 | 145 | | Command | Mean [ms] | Min [ms] | Max [ms] | Relative | 146 | |:--------|-----------:|---------:|---------:|------------:| 147 | | `cargo` | 14.8 ± 0.9 | 13.4 | 20.4 | 1.00 | 148 | | `gcc` | 48.7 ± 2.6 | 41.5 | 56.6 | 3.29 ± 0.27 | 149 | | `musl` | 49.9 ± 2.6 | 44.9 | 61.0 | 3.37 ± 0.28 | 150 | 151 | | Command | Mean [ms] | Min [ms] | Max [ms] | Relative | 152 | |:---------|-------------:|---------:|---------:|------------:| 153 | | `cargo` | 19.3 ± 1.7 | 16.5 | 33.0 | 1.00 | 154 | | `gcc` | 128.0 ± 19.0 | 92.5 | 151.2 | 6.63 ± 1.14 | 155 | | `musl` | 141.4 ± 17.5 | 116.7 | 184.3 | 7.32 ± 1.11 | 156 | | `zig cc` | 138.0 ± 14.4 | 103.6 | 162.2 | 7.15 ± 0.97 | 157 | -------------------------------------------------------------------------------- /benchmarks/musl/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | -------------------------------------------------------------------------------- /benchmarks/musl/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | #----------------------------# 4 | # Colors in term 5 | #----------------------------# 6 | # http://stackoverflow.com/questions/5947742/how-to-change-the-output-color-of-echo-in-linux 7 | GREEN= 8 | RED= 9 | NC= 10 | if tty -s < /dev/fd/1 2> /dev/null; then 11 | GREEN='\033[0;32m' 12 | RED='\033[0;31m' 13 | NC='\033[0m' # No Color 14 | fi 15 | 16 | log_warn () { 17 | echo >&2 -e "${RED}==> $@ <==${NC}" 18 | } 19 | 20 | log_info () { 21 | echo >&2 -e "${GREEN}==> $@${NC}" 22 | } 23 | 24 | log_debug () { 25 | echo >&2 -e "==> $@" 26 | } 27 | 28 | #----------------------------# 29 | # Prepare 30 | #----------------------------# 31 | COMMAND_TIME="command time -v" 32 | if [[ `uname` == 'Darwin' ]]; then 33 | COMMAND_TIME="command time -l" 34 | fi 35 | 36 | # enter BASE_DIR 37 | BASE_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) 38 | cd ${BASE_DIR} 39 | 40 | if [[ ! -e intspan-x86_64-unknown-linux-gnu.tar.gz ]]; then 41 | curl -LO https://github.com/wang-q/intspan/releases/download/v0.8.0/intspan-x86_64-unknown-linux-gnu.tar.gz 42 | fi 43 | 44 | if [[ ! -e intspan-x86_64-unknown-linux-musl.tar.gz ]]; then 45 | curl -LO https://github.com/wang-q/intspan/releases/download/v0.8.0/intspan-x86_64-unknown-linux-musl.tar.gz 46 | fi 47 | 48 | if [[ ! -e intspan.x86_64-unknown-linux-gnu.tar.gz ]]; then 49 | curl -LO https://github.com/wang-q/builds/raw/refs/heads/master/tar/intspan.x86_64-unknown-linux-gnu.tar.gz 50 | fi 51 | 52 | tar xvfz intspan-x86_64-unknown-linux-gnu.tar.gz 53 | tar xvfz intspan-x86_64-unknown-linux-musl.tar.gz 54 | tar xvfz intspan.x86_64-unknown-linux-gnu.tar.gz 55 | 56 | #----------------------------# 57 | # Run 58 | #----------------------------# 59 | log_info "sort" 60 | hyperfine --warmup 1 --export-markdown sort.md \ 61 | -n cargo \ 62 | -n gcc \ 63 | -n musl \ 64 | -n "zig cc" \ 65 | 'gzip -dcf ../../tests/Atha/links.lastz.tsv.gz tests/Atha/links.blast.tsv.gz | ~/.cargo/bin/linkr sort stdin -o /dev/null' \ 66 | 'gzip -dcf ../../tests/Atha/links.lastz.tsv.gz tests/Atha/links.blast.tsv.gz | target/release/linkr sort stdin -o /dev/null' \ 67 | 'gzip -dcf ../../tests/Atha/links.lastz.tsv.gz tests/Atha/links.blast.tsv.gz | target/x86_64-unknown-linux-musl/release/linkr sort stdin -o /dev/null' \ 68 | 'gzip -dcf ../../tests/Atha/links.lastz.tsv.gz tests/Atha/links.blast.tsv.gz | ./linkr sort stdin -o /dev/null' 69 | 70 | echo >&2 71 | 72 | log_info "clean" 73 | hyperfine --warmup 1 --export-markdown clean.md \ 74 | -n cargo \ 75 | -n gcc \ 76 | -n musl \ 77 | -n "zig cc" \ 78 | '~/.cargo/bin/linkr clean ../../tests/Atha/sort.tsv -o /dev/null' \ 79 | 'target/release/linkr clean ../../tests/Atha/sort.tsv -o /dev/null' \ 80 | 'target/x86_64-unknown-linux-musl/release/linkr clean ../../tests/Atha/sort.tsv -o /dev/null' \ 81 | './linkr clean ../../tests/Atha/sort.tsv -o /dev/null' 82 | 83 | echo >&2 84 | 85 | log_info "merge" 86 | hyperfine --warmup 1 --export-markdown merge.md \ 87 | -n cargo \ 88 | -n gcc \ 89 | -n musl \ 90 | -n "zig cc" \ 91 | '~/.cargo/bin/rgr merge ../../tests/Atha/sort.clean.tsv -c 0.95 -o /dev/null' \ 92 | 'target/release/rgr merge ../../tests/Atha/sort.clean.tsv -c 0.95 -o /dev/null' \ 93 | 'target/x86_64-unknown-linux-musl/release/rgr merge ../../tests/Atha/sort.clean.tsv -c 0.95 -o /dev/null' \ 94 | './rgr merge ../../tests/Atha/sort.clean.tsv -c 0.95 -o /dev/null' 95 | 96 | echo >&2 97 | 98 | log_info "clean2" 99 | hyperfine --warmup 1 --export-markdown clean2.md \ 100 | -n cargo \ 101 | -n gcc \ 102 | -n musl \ 103 | -n "zig cc" \ 104 | '~/.cargo/bin/linkr clean ../../tests/Atha/sort.clean.tsv -r ../../tests/Atha/merge.tsv --bundle 500 -o /dev/null' \ 105 | 'target/release/linkr clean ../../tests/Atha/sort.clean.tsv -r ../../tests/Atha/merge.tsv --bundle 500 -o /dev/null' \ 106 | 'target/x86_64-unknown-linux-musl/release/linkr clean ../../tests/Atha/sort.clean.tsv -r ../../tests/Atha/merge.tsv --bundle 500 -o /dev/null' \ 107 | './linkr clean ../../tests/Atha/sort.clean.tsv -r ../../tests/Atha/merge.tsv --bundle 500 -o /dev/null' 108 | 109 | echo >&2 110 | 111 | log_info "connect" 112 | hyperfine --warmup 1 --export-markdown connect.md \ 113 | -n cargo \ 114 | -n gcc \ 115 | -n musl \ 116 | -n "zig cc" \ 117 | '~/.cargo/bin/linkr connect ../../tests/Atha/clean.tsv -o /dev/null' \ 118 | 'target/release/linkr connect ../../tests/Atha/clean.tsv -o /dev/null' \ 119 | 'target/x86_64-unknown-linux-musl/release/linkr connect ../../tests/Atha/clean.tsv -o /dev/null' \ 120 | './linkr connect ../../tests/Atha/clean.tsv -o /dev/null' 121 | 122 | echo >&2 123 | 124 | log_info "filter" 125 | hyperfine --warmup 1 --export-markdown filter.md \ 126 | -n cargo \ 127 | -n gcc \ 128 | -n musl \ 129 | -n "zig cc" \ 130 | '~/.cargo/bin/linkr filter ../../tests/Atha/connect.tsv -r 0.8 -o /dev/null' \ 131 | 'target/release/linkr filter ../../tests/Atha/connect.tsv -r 0.8 -o /dev/null' \ 132 | 'target/x86_64-unknown-linux-musl/release/linkr filter ../../tests/Atha/connect.tsv -r 0.8 -o /dev/null' \ 133 | './linkr filter ../../tests/Atha/connect.tsv -r 0.8 -o /dev/null' 134 | 135 | echo >&2 136 | -------------------------------------------------------------------------------- /benchmarks/rgr.md: -------------------------------------------------------------------------------- 1 | # `rgr` 2 | 3 | ## Test materials 4 | 5 | ```shell 6 | cd ~/gars 7 | 8 | redis-server & 9 | 10 | gars env 11 | 12 | gars status drop 13 | gars gen genome/genome.fa.gz --piece 500000 14 | 15 | gars range features/T-DNA.CSHL.rg 16 | gars range features/T-DNA.FLAG.rg 17 | gars range features/T-DNA.MX.rg 18 | gars range features/T-DNA.RATM.rg 19 | 20 | gars tsv -s "range:*" | gzip -9 > ranges.tsv.gz 21 | 22 | gzip -dcf ranges.tsv.gz | wc -l 23 | #102973 24 | 25 | mv ranges.tsv.gz ~/Scripts/intspan/tests/rgr/ 26 | 27 | ``` 28 | 29 | ## `rgr sort` 30 | 31 | ```shell 32 | cd ~/Scripts/intspan/ 33 | 34 | hyperfine --warmup 1 \ 35 | -n 'sort' \ 36 | ' 37 | rgr sort -H tests/rgr/ranges.tsv.gz tests/rgr/ranges.tsv.gz tests/rgr/ranges.tsv.gz 38 | ' \ 39 | -n 'sort -f' \ 40 | ' 41 | rgr sort -H -f 5 tests/rgr/ranges.tsv.gz tests/rgr/ranges.tsv.gz tests/rgr/ranges.tsv.gz 42 | ' \ 43 | -n 'sort -g' \ 44 | ' 45 | rgr sort -H -f 5 -g 6 tests/rgr/ranges.tsv.gz tests/rgr/ranges.tsv.gz tests/rgr/ranges.tsv.gz 46 | ' \ 47 | --export-markdown rgr.sort.md.tmp 48 | 49 | cat rgr.sort.md.tmp 50 | 51 | ``` 52 | 53 | | Command | Mean [ms] | Min [ms] | Max [ms] | Relative | 54 | |:----------|-------------:|---------:|---------:|------------:| 55 | | `sort` | 621.0 ± 3.3 | 617.1 | 626.3 | 1.01 ± 0.08 | 56 | | `sort -f` | 629.4 ± 3.0 | 625.3 | 635.0 | 1.02 ± 0.08 | 57 | | `sort -g` | 615.0 ± 50.4 | 471.9 | 636.2 | 1.00 | 58 | 59 | ## `rgr filter` 60 | 61 | ```shell 62 | cd ~/Scripts/intspan/ 63 | 64 | hyperfine --warmup 1 \ 65 | -n 'rgr filter' \ 66 | ' 67 | rgr filter tests/rgr/ctg_2_1_.gc.tsv --str-eq 3:1 > /dev/null 68 | ' \ 69 | -n 'tsv-filter' \ 70 | ' 71 | tsv-filter tests/rgr/ctg_2_1_.gc.tsv --str-eq 3:1 > /dev/null 72 | ' \ 73 | --export-markdown rgr.filter.md.tmp 74 | 75 | cat rgr.filter.md.tmp 76 | 77 | ``` 78 | 79 | | Command | Mean [ms] | Min [ms] | Max [ms] | Relative | 80 | |:-------------|-----------:|---------:|---------:|------------:| 81 | | `rgr filter` | 10.5 ± 1.1 | 9.3 | 14.6 | 2.25 ± 0.91 | 82 | | `tsv-filter` | 4.7 ± 1.8 | 2.3 | 7.9 | 1.00 | 83 | 84 | ## `rgr select` 85 | 86 | ```shell 87 | cd ~/Scripts/intspan/ 88 | 89 | hyperfine --warmup 1 \ 90 | -n 'rgr filter' \ 91 | ' 92 | rgr select tests/rgr/ctg_2_1_.gc.tsv -f 1,3 > /dev/null 93 | ' \ 94 | -n 'tsv-filter' \ 95 | ' 96 | tsv-select tests/rgr/ctg_2_1_.gc.tsv -f 1,3 > /dev/null 97 | ' \ 98 | -n 'rgr filter -H' \ 99 | ' 100 | rgr select tests/rgr/ctg_2_1_.gc.tsv -H -f "#range,signal" > /dev/null 101 | ' \ 102 | -n 'tsv-filter -H' \ 103 | ' 104 | tsv-select tests/rgr/ctg_2_1_.gc.tsv -H -f "#range,signal" > /dev/null 105 | ' \ 106 | --export-markdown rgr.select.md.tmp 107 | 108 | cat rgr.select.md.tmp 109 | 110 | ``` 111 | 112 | | Command | Mean [ms] | Min [ms] | Max [ms] | Relative | 113 | |:----------------|-----------:|---------:|---------:|------------:| 114 | | `rgr filter` | 13.9 ± 0.7 | 12.9 | 17.8 | 2.54 ± 0.86 | 115 | | `tsv-filter` | 5.6 ± 1.9 | 3.7 | 10.1 | 1.01 ± 0.49 | 116 | | `rgr filter -H` | 13.9 ± 0.8 | 12.9 | 17.5 | 2.53 ± 0.86 | 117 | | `tsv-filter -H` | 5.5 ± 1.8 | 3.6 | 10.1 | 1.00 | 118 | 119 | ## Sampling 120 | 121 | ```shell 122 | cd ~/Scripts/intspan/ 123 | 124 | hyperfine --warmup 1 \ 125 | -n 'tsv-sample' \ 126 | ' 127 | tsv-sample tests/rgr/ctg_2_1_.gc.tsv --prob 0.4 > /dev/null 128 | ' \ 129 | -n 'qsv sample' \ 130 | ' 131 | qsv sample 0.4 tests/rgr/ctg_2_1_.gc.tsv > /dev/null 132 | ' \ 133 | --export-markdown rgr.sample.md.tmp 134 | 135 | cat rgr.sample.md.tmp 136 | 137 | 138 | ``` 139 | 140 | | Command | Mean [ms] | Min [ms] | Max [ms] | Relative | 141 | |:-------------|-------------:|---------:|---------:|------------:| 142 | | `tsv-sample` | 14.0 ± 1.4 | 10.8 | 19.8 | 1.00 | 143 | | `qsv sample` | 127.4 ± 14.5 | 111.5 | 165.6 | 9.13 ± 1.38 | 144 | -------------------------------------------------------------------------------- /benchmarks/spanr/chr.sizes: -------------------------------------------------------------------------------- 1 | 1 30427671 2 | 2 19698289 3 | 3 23459830 4 | 4 18585056 5 | 5 26975502 6 | -------------------------------------------------------------------------------- /benchmarks/spanr/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | #----------------------------# 4 | # Colors in term 5 | #----------------------------# 6 | # http://stackoverflow.com/questions/5947742/how-to-change-the-output-color-of-echo-in-linux 7 | GREEN= 8 | RED= 9 | NC= 10 | if tty -s < /dev/fd/1 2> /dev/null; then 11 | GREEN='\033[0;32m' 12 | RED='\033[0;31m' 13 | NC='\033[0m' # No Color 14 | fi 15 | 16 | log_warn () { 17 | echo >&2 -e "${RED}==> $@ <==${NC}" 18 | } 19 | 20 | log_info () { 21 | echo >&2 -e "${GREEN}==> $@${NC}" 22 | } 23 | 24 | log_debug () { 25 | echo >&2 -e "==> $@" 26 | } 27 | 28 | #----------------------------# 29 | # Prepare 30 | #----------------------------# 31 | COMMAND_TIME="command time -v" 32 | if [[ `uname` == 'Darwin' ]]; then 33 | COMMAND_TIME="command time -l" 34 | fi 35 | 36 | # enter BASE_DIR 37 | BASE_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) 38 | cd ${BASE_DIR} 39 | 40 | #----------------------------# 41 | # Run 42 | #----------------------------# 43 | log_info "jrunlist" 44 | ${COMMAND_TIME} jrunlist \ 45 | statop \ 46 | chr.sizes sep-gene.yml paralog.yml \ 47 | --op intersect --all \ 48 | -o stdout \ 49 | > jstatop.csv.tmp 50 | 51 | log_info "spanr" 52 | ${COMMAND_TIME} spanr \ 53 | statop \ 54 | chr.sizes sep-gene.yml paralog.yml \ 55 | --op intersect --all \ 56 | -o stdout \ 57 | > rstatop.csv.tmp 58 | 59 | log_info "App::RL" 60 | ${COMMAND_TIME} runlist \ 61 | stat2 \ 62 | -s chr.sizes sep-gene.yml paralog.yml \ 63 | --op intersect --all --mk \ 64 | -o stdout \ 65 | > pstatop.csv.tmp 66 | echo >&2 67 | -------------------------------------------------------------------------------- /doc/intspans.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wang-q/intspan/0d4e5153268bf51eb824f7cc93a51415a255a3c2/doc/intspans.pdf -------------------------------------------------------------------------------- /doc/intspans.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wang-q/intspan/0d4e5153268bf51eb824f7cc93a51415a255a3c2/doc/intspans.png -------------------------------------------------------------------------------- /doc/intspans.tex: -------------------------------------------------------------------------------- 1 | % !TEX TS-program = arara 2 | % arara: xelatex: { shell: yes } 3 | % arara: indent: { overwrite: yes } 4 | % arara: clean: {extensions: [aux, bak, bbl, bcf, blg, idx, ilg, ind, ist, log, nlo, nls, out, run.xml, synctex.gz,]} 5 | 6 | \documentclass[ 7 | convert, 8 | outext=.png, 9 | border=2bp, 10 | tikz, 11 | ]{standalone} 12 | 13 | \usepackage{fontspec} 14 | \defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase} 15 | \setmainfont[BoldFont={Fira Sans}]{Fira Sans Light} 16 | \setmonofont{Fira Mono} 17 | 18 | \usepackage{tikz} 19 | \usetikzlibrary{arrows,positioning} 20 | \usetikzlibrary{shapes.misc} 21 | \usetikzlibrary{graphs} 22 | \usepackage{color} 23 | 24 | \begin{document} 25 | 26 | \tikzset{ 27 | >=stealth', black!50, text=black, thick, 28 | every new ->/.style = {thick, shorten <=0pt, shorten >=1pt, color=black!70}, 29 | every new --/.style = {thick, color=black!70}, 30 | graphs/every graph/.style = {edges=rounded corners}, 31 | skip loop/.style = {to path={-- ++(0,#1) -| (\tikztotarget)}}, 32 | hv path/.style = {to path={-| (\tikztotarget)}}, 33 | vh path/.style = {to path={|- (\tikztotarget)}}, 34 | nonterminal/.style={ 35 | rectangle, minimum size=6mm, very thick, draw=red!50!black!50, 36 | top color=white, bottom color=red!50!black!20, 37 | font=\itshape, text height=1.5ex,text depth=.25ex}, 38 | terminal/.style={ 39 | rounded rectangle, minimum size=6mm, very thick, draw=black!50, 40 | top color=white, bottom color=black!20, 41 | font=\ttfamily, text height=1.5ex, text depth=.25ex}, 42 | invisible/.style={ 43 | draw, circle, minimum size=0mm, 44 | inner sep=0pt, outer sep=0pt}, 45 | shape = coordinate 46 | } 47 | 48 | %-99--10,1-10,19,45-48 49 | 50 | \begin{tikzpicture} 51 | \graph[ 52 | grow right sep, 53 | branch down=7mm, 54 | simple, 55 | ]{ 56 | / -- 57 | p1 -- 58 | p2 -> 59 | start_negative[as={-}, terminal] -- 60 | p3 -- 61 | p4 -> 62 | start_digit[as={digit}, terminal] -- 63 | p5 -- 64 | p6 -> 65 | hyphen[as={-}, terminal] -- 66 | p7 -> 67 | end_negative[as={-}, terminal] -- 68 | p8 -- 69 | p9 -> 70 | end_digit[as={digit}, terminal] -- 71 | p10 -- 72 | p11 -- 73 | p12 -> 74 | "," [terminal] -- 75 | p13 -!- 76 | p14 -- 77 | / [coordinate]; 78 | 79 | p2 -- [skip loop=5mm] p3; 80 | p6 -- [skip loop=9mm] p11; 81 | p7 -- [skip loop=5mm] p8; 82 | p12 -- [skip loop=5mm] p14; 83 | p5 -> [skip loop=-5mm] p4; 84 | p10 -> [skip loop=-5mm] p9; 85 | p13 -> [skip loop=-9mm] p1; 86 | }; 87 | \end{tikzpicture} 88 | 89 | \end{document} 90 | -------------------------------------------------------------------------------- /doc/ranges.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wang-q/intspan/0d4e5153268bf51eb824f7cc93a51415a255a3c2/doc/ranges.pdf -------------------------------------------------------------------------------- /doc/ranges.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wang-q/intspan/0d4e5153268bf51eb824f7cc93a51415a255a3c2/doc/ranges.png -------------------------------------------------------------------------------- /doc/ranges.tex: -------------------------------------------------------------------------------- 1 | % !TEX TS-program = arara 2 | % arara: xelatex: { shell: yes } 3 | % arara: indent: { overwrite: yes } 4 | % arara: clean: {extensions: [aux, bak, bbl, bcf, blg, idx, ilg, ind, ist, log, nlo, nls, out, run.xml, synctex.gz,]} 5 | 6 | \documentclass[ 7 | convert, 8 | outext=.png, 9 | border=2bp, 10 | tikz, 11 | ]{standalone} 12 | 13 | \usepackage{fontspec} 14 | \defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase} 15 | \setmainfont[BoldFont={Fira Sans}]{Fira Sans Light} 16 | \setmonofont{Fira Mono} 17 | 18 | \usepackage{tikz} 19 | \usetikzlibrary{arrows,positioning} 20 | \usetikzlibrary{shapes.misc} 21 | \usetikzlibrary{graphs} 22 | \usepackage{color} 23 | 24 | \begin{document} 25 | 26 | \tikzset{ 27 | >=stealth', black!50, text=black, thick, 28 | every new ->/.style = {thick, shorten <=0pt, shorten >=1pt, color=black!70}, 29 | every new --/.style = {thick, color=black!70}, 30 | graphs/every graph/.style = {edges=rounded corners}, 31 | skip loop/.style = {to path={-- ++(0,#1) -| (\tikztotarget)}}, 32 | hv path/.style = {to path={-| (\tikztotarget)}}, 33 | vh path/.style = {to path={|- (\tikztotarget)}}, 34 | nonterminal/.style={ 35 | rectangle, minimum size=6mm, very thick, draw=red!50!black!50, 36 | top color=white, bottom color=red!50!black!20, 37 | font=\itshape, text height=1.5ex,text depth=.25ex}, 38 | terminal/.style={ 39 | rounded rectangle, minimum size=6mm, very thick, draw=black!50, 40 | top color=white, bottom color=black!20, 41 | font=\ttfamily, text height=1.5ex, text depth=.25ex}, 42 | shape = coordinate 43 | } 44 | 45 | %species.chromosome(strand):start-end 46 | %--------^^^^^^^^^^--------^^^^^^---- 47 | 48 | \begin{tikzpicture} 49 | \graph[ 50 | grow right sep, 51 | branch down=7mm, 52 | simple, 53 | ]{ 54 | / -- 55 | p1 -> 56 | species [nonterminal] -> 57 | "." [terminal] -- 58 | p2 -> 59 | chromosome [nonterminal] -- 60 | p4 -> 61 | "(" [terminal] -- 62 | q1 -> [vh path] 63 | {[nodes={yshift=3.5mm}] 64 | strand_positive[as={+}, terminal], strand_negative[as={-}, terminal] 65 | } -- [hv path] 66 | q3 -> 67 | ")" [terminal] -- 68 | p5 -> 69 | ":" [terminal] -> 70 | start [nonterminal] -- 71 | p6 -> 72 | "-" [terminal] -> 73 | end [nonterminal] -- 74 | p7 -- 75 | / [coordinate]; 76 | 77 | p1 -- [skip loop=5mm] p2; 78 | p4 -- [skip loop=9mm] p5; 79 | p6 -- [skip loop=5mm] p7; 80 | }; 81 | \end{tikzpicture} 82 | 83 | \end{document} 84 | -------------------------------------------------------------------------------- /examples/benchmark.rs: -------------------------------------------------------------------------------- 1 | use intspan::IntSpan; 2 | use std::env; 3 | use std::time::Instant; 4 | 5 | fn run_benchmark() { 6 | for step in 2..7 { 7 | println!("step {}", step); 8 | let start = Instant::now(); 9 | 10 | test_add_range(step); 11 | 12 | let elapsed = start.elapsed(); 13 | println!( 14 | "duration: {} s", 15 | (elapsed.as_nanos() as f64) / 1000.0 / 1000.0 / 1000.0 16 | ); 17 | } 18 | 19 | fn test_add_range(step: i32) { 20 | let vec1 = vec![ 21 | 1, 30, 32, 149, 153, 155, 159, 247, 250, 250, 253, 464, 516, 518, 520, 523, 582, 585, 22 | 595, 600, 622, 1679, 23 | ]; 24 | let vec2 = vec![100, 1_000_000]; 25 | 26 | for _i in 1..=50000 { 27 | let mut set = IntSpan::new(); 28 | 29 | if step >= 2 { 30 | set.add_ranges(&vec1); 31 | } 32 | if step >= 3 { 33 | set.add_ranges(&vec2); 34 | } 35 | if step >= 4 { 36 | set.to_string(); 37 | } 38 | if step >= 5 { 39 | for j in 1..=200 { 40 | set.add_pair(j, j); 41 | } 42 | } 43 | if step >= 6 { 44 | for j in 1..=200 { 45 | set.add_pair(j * 5, j * 10); 46 | } 47 | } 48 | } 49 | } 50 | } 51 | 52 | fn main() { 53 | let args: Vec = env::args().collect(); 54 | println!("{:?}", args); 55 | 56 | run_benchmark(); 57 | } 58 | -------------------------------------------------------------------------------- /examples/test.rs: -------------------------------------------------------------------------------- 1 | use intspan::IntSpan; 2 | use std::env; 3 | 4 | fn run_test() { 5 | let mut intspan = IntSpan::new(); 6 | intspan.add_pair(1, 9); 7 | intspan.add_pair(20, 39); 8 | 9 | println!("{}", intspan); 10 | println!("is_empty {}", intspan.is_empty()); 11 | println!("edge_size {}", intspan.edge_size()); 12 | println!("ranges {:?}", intspan.ranges()); 13 | println!("cardinality {}", intspan.cardinality()); 14 | 15 | for n in &[-5, 29, 40] { 16 | println!("val {} is contained {}", n, intspan.contains(*n)); 17 | } 18 | 19 | intspan.add_ranges(&[60, 70, 80, 90]); 20 | println!("{}", intspan); 21 | 22 | intspan.add_ranges(&[68, 75]); 23 | println!("{}", intspan); 24 | 25 | intspan.add_n(99); 26 | println!("{}", intspan); 27 | 28 | intspan.add_vec(&[77, 79]); 29 | println!("{}", intspan); 30 | 31 | intspan.invert(); 32 | println!("{}", intspan); 33 | 34 | intspan.invert(); 35 | println!("{}", intspan); 36 | 37 | intspan.remove_pair(66, 71); 38 | println!("{}", intspan); 39 | 40 | intspan.remove_n(85); 41 | println!("{}", intspan); 42 | 43 | intspan.remove_vec(&[87, 88]); 44 | println!("{}", intspan); 45 | 46 | intspan.add_runlist("-30--10"); 47 | println!("{}", intspan); 48 | 49 | intspan.remove_runlist("62-78"); 50 | println!("{}", intspan); 51 | 52 | let mut other = IntSpan::new(); 53 | other.add_runlist("-15-5"); 54 | println!("{}", other); 55 | 56 | intspan.merge(&other); 57 | println!("{}", intspan); 58 | 59 | other.clear(); 60 | println!("{}", other); 61 | other.add_runlist("-20--5"); 62 | println!("{}", other); 63 | intspan.subtract(&other); 64 | println!("{}", intspan); 65 | 66 | // -30--21,-4-9,20-39,60-61,79-84,86,89-90,99 67 | } 68 | 69 | fn main() { 70 | let args: Vec = env::args().collect(); 71 | println!("{:?}", args); 72 | 73 | run_test(); 74 | } 75 | -------------------------------------------------------------------------------- /release.toml: -------------------------------------------------------------------------------- 1 | pre-release-replacements = [ 2 | {file="README.md", search="Current release: [a-z0-9\\.-]+", replace="Current release: {{version}}"} , 3 | {file="CHANGELOG.md", search="Unreleased", replace="{{version}}"}, 4 | {file="CHANGELOG.md", search="ReleaseDate", replace="{{date}}"}, 5 | {file="CHANGELOG.md", search="Change Log", replace="Change Log\n\n## Unreleased - ReleaseDate"} 6 | ] 7 | -------------------------------------------------------------------------------- /src/cmd_linkr/circos.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use intspan::*; 3 | use std::io::BufRead; 4 | 5 | // Create clap subcommand arguments 6 | pub fn make_subcommand() -> Command { 7 | Command::new("circos") 8 | .about("Convert links to circos links or highlights") 9 | .after_help( 10 | r###" 11 | * It's assumed that all ranges in input files are valid 12 | 13 | "###, 14 | ) 15 | .arg( 16 | Arg::new("infiles") 17 | .required(true) 18 | .num_args(1..) 19 | .index(1) 20 | .help("Set the input files to use"), 21 | ) 22 | .arg( 23 | Arg::new("highlight") 24 | .long("highlight") 25 | .action(ArgAction::SetTrue) 26 | .help("Create highlights instead of links"), 27 | ) 28 | .arg( 29 | Arg::new("outfile") 30 | .long("outfile") 31 | .short('o') 32 | .num_args(1) 33 | .default_value("stdout") 34 | .help("Output filename. [stdout] for screen"), 35 | ) 36 | } 37 | 38 | // command implementation 39 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 40 | //---------------------------- 41 | // Loading 42 | //---------------------------- 43 | let mut writer = writer(args.get_one::("outfile").unwrap()); 44 | let is_highlight = args.get_flag("highlight"); 45 | 46 | let mut colors = (1..=12) 47 | .map(|n| format!("paired-12-qual-{}", n)) 48 | .collect::>(); 49 | colors.reverse(); 50 | let mut color_idx = 0; 51 | 52 | for infile in args.get_many::("infiles").unwrap() { 53 | let reader = reader(infile); 54 | for line in reader.lines().map_while(Result::ok) { 55 | let parts: Vec<&str> = line.split('\t').collect(); 56 | 57 | if is_highlight { 58 | for part in parts { 59 | let range = Range::from_str(part); 60 | if !range.is_valid() { 61 | continue; 62 | } 63 | 64 | //---------------------------- 65 | // Output 66 | //---------------------------- 67 | writer.write_all( 68 | format!( 69 | "{} {} {} fill_color={}\n", 70 | range.chr(), 71 | range.start(), 72 | range.end(), 73 | colors[color_idx] 74 | ) 75 | .as_ref(), 76 | )?; 77 | } 78 | 79 | // rotate color 80 | color_idx += 1; 81 | if color_idx > 11 { 82 | color_idx = 0; 83 | } 84 | } else { 85 | let count = parts.len(); 86 | 87 | // 2-combinations of parts forms a pair 88 | for i in 0..count { 89 | 'PAIR: for j in i + 1..count { 90 | let mut fields: Vec = vec![]; 91 | for idx in &[i, j] { 92 | let range = Range::from_str(parts[*idx]); 93 | if !range.is_valid() { 94 | continue 'PAIR; 95 | } 96 | 97 | fields.push(range.chr().to_string()); 98 | if range.strand() == "-" { 99 | fields.push(range.end().to_string()); 100 | fields.push(range.start().to_string()); 101 | } else { 102 | fields.push(range.start().to_string()); 103 | fields.push(range.end().to_string()); 104 | } 105 | } 106 | 107 | //---------------------------- 108 | // Output 109 | //---------------------------- 110 | writer.write_all(format!("{}\n", fields.join(" ")).as_ref())?; 111 | } 112 | } 113 | } 114 | } // end of line 115 | } 116 | 117 | Ok(()) 118 | } 119 | -------------------------------------------------------------------------------- /src/cmd_linkr/filter.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use intspan::*; 3 | use std::io::BufRead; 4 | 5 | // Create clap subcommand arguments 6 | pub fn make_subcommand() -> Command { 7 | Command::new("filter") 8 | .about("Filter links by numbers of ranges or length differences") 9 | .after_help( 10 | r###" 11 | * It's assumed that all ranges in input files are valid 12 | * Inputs should not contain hit strands 13 | 14 | "###, 15 | ) 16 | .arg( 17 | Arg::new("infiles") 18 | .required(true) 19 | .num_args(1..) 20 | .index(1) 21 | .help("Set the input files to use"), 22 | ) 23 | .arg( 24 | Arg::new("number") 25 | .long("number") 26 | .short('n') 27 | .num_args(1) 28 | .help("Numbers of ranges, an IntSpan like [2-10]"), 29 | ) 30 | .arg( 31 | Arg::new("ratio") 32 | .long("ratio") 33 | .short('r') 34 | .num_args(1) 35 | .value_parser(value_parser!(f32)) 36 | .help("Ratio of lengths differences. The suggested value is [0.8]"), 37 | ) 38 | .arg( 39 | Arg::new("outfile") 40 | .long("outfile") 41 | .short('o') 42 | .num_args(1) 43 | .default_value("stdout") 44 | .help("Output filename. [stdout] for screen"), 45 | ) 46 | } 47 | 48 | // command implementation 49 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 50 | //---------------------------- 51 | // Loading 52 | //---------------------------- 53 | let mut writer = writer(args.get_one::("outfile").unwrap()); 54 | 55 | let numbers = if args.contains_id("number") { 56 | IntSpan::from(args.get_one::("number").unwrap()) 57 | } else { 58 | IntSpan::new() 59 | }; 60 | let ratio = if args.contains_id("ratio") { 61 | *args.get_one::("ratio").unwrap() 62 | } else { 63 | -1.0 64 | }; 65 | 66 | for infile in args.get_many::("infiles").unwrap() { 67 | let reader = reader(infile); 68 | for line in reader.lines().map_while(Result::ok) { 69 | let parts: Vec<&str> = line.split('\t').collect(); 70 | 71 | if !numbers.is_empty() && !numbers.contains(parts.len() as i32) { 72 | continue; 73 | } 74 | 75 | if ratio > 0.0 { 76 | let mut lengths: Vec = vec![]; 77 | 78 | for part in &parts { 79 | let range = Range::from_str(part); 80 | if !range.is_valid() { 81 | continue; 82 | } 83 | lengths.push(range.intspan().cardinality()); 84 | } 85 | 86 | let min = lengths.iter().min().unwrap(); 87 | let max = lengths.iter().max().unwrap(); 88 | let diff_ratio = *min as f32 / *max as f32; 89 | 90 | if diff_ratio < ratio { 91 | continue; 92 | } 93 | } 94 | 95 | //---------------------------- 96 | // Output 97 | //---------------------------- 98 | writer.write_all(format!("{}\n", line).as_ref())?; 99 | } // end of line 100 | } 101 | 102 | Ok(()) 103 | } 104 | -------------------------------------------------------------------------------- /src/cmd_linkr/mod.rs: -------------------------------------------------------------------------------- 1 | //! Subcommand modules for the `linkr` binary. 2 | 3 | pub mod circos; 4 | pub mod clean; 5 | pub mod connect; 6 | pub mod filter; 7 | pub mod sort; 8 | -------------------------------------------------------------------------------- /src/cmd_linkr/sort.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use intspan::*; 3 | use std::collections::BTreeSet; 4 | use std::io::BufRead; 5 | 6 | // Create clap subcommand arguments 7 | pub fn make_subcommand() -> Command { 8 | Command::new("sort") 9 | .about("Sort links and ranges within links") 10 | .arg( 11 | Arg::new("infiles") 12 | .required(true) 13 | .num_args(1..) 14 | .index(1) 15 | .help("Set the input files to use"), 16 | ) 17 | .arg( 18 | Arg::new("outfile") 19 | .long("outfile") 20 | .short('o') 21 | .num_args(1) 22 | .default_value("stdout") 23 | .help("Output filename. [stdout] for screen"), 24 | ) 25 | } 26 | 27 | // command implementation 28 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 29 | //---------------------------- 30 | // Loading 31 | //---------------------------- 32 | let mut line_set: BTreeSet = BTreeSet::new(); 33 | 34 | for infile in args.get_many::("infiles").unwrap() { 35 | let reader = reader(infile); 36 | 'LINE: for line in reader.lines().map_while(Result::ok) { 37 | let parts: Vec<&str> = line.split('\t').collect(); 38 | 39 | for part in parts { 40 | let range = Range::from_str(part); 41 | if range.is_valid() { 42 | line_set.insert(line.clone()); 43 | continue 'LINE; 44 | } 45 | } 46 | } // end of line 47 | } 48 | 49 | //---------------------------- 50 | // Sorting 51 | //---------------------------- 52 | let mut lines = line_set.into_iter().collect::>(); 53 | lines = sort_links(&lines); 54 | 55 | //---------------------------- 56 | // Output 57 | //---------------------------- 58 | write_lines(args.get_one::("outfile").unwrap(), &lines)?; 59 | 60 | Ok(()) 61 | } 62 | -------------------------------------------------------------------------------- /src/cmd_rgr/count.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use rust_lapper::{Interval, Lapper}; 3 | use std::collections::BTreeMap; 4 | use std::io::BufRead; 5 | 6 | // Interval: represent a range from [start, stop), carrying val 7 | type Iv = Interval; // the first type should be Unsigned 8 | 9 | // Create clap subcommand arguments 10 | pub fn make_subcommand() -> Command { 11 | Command::new("count") 12 | .about("Count overlaps between ranges in a target file and other range files") 13 | .after_help( 14 | r###" 15 | * Lines without a valid range will not be output 16 | 17 | Example: 18 | 19 | # Count overlaps between two .rg files 20 | rgr count tests/rgr/S288c.rg tests/rgr/S288c.rg 21 | 22 | # Count overlaps in a .tsv file with headers 23 | rgr count tests/rgr/ctg.range.tsv tests/rgr/S288c.rg -H -f 3 24 | 25 | # For large .rg files, pre-sorting may improve perfermonce. 26 | cat *.rg | rgr sort stdin | rgr count target.rg stdin 27 | 28 | "###, 29 | ) 30 | .arg( 31 | Arg::new("target") 32 | .required(true) 33 | .index(1) 34 | .num_args(1) 35 | .help("Target .rg/.tsv file"), 36 | ) 37 | .arg( 38 | Arg::new("infiles") 39 | .required(true) 40 | .index(2) 41 | .num_args(1..) 42 | .help("Input .rg files to count overlaps with"), 43 | ) 44 | .arg( 45 | Arg::new("header") 46 | .long("header") 47 | .short('H') 48 | .action(ArgAction::SetTrue) 49 | .help("Treat the first line of each file as a header"), 50 | ) 51 | .arg( 52 | Arg::new("sharp") 53 | .long("sharp") 54 | .short('s') 55 | .action(ArgAction::SetTrue) 56 | .help("Include lines starting with `#` without changes (default: ignore them)"), 57 | ) 58 | .arg( 59 | Arg::new("field") 60 | .long("field") 61 | .short('f') 62 | .value_parser(value_parser!(usize)) 63 | .num_args(1) 64 | .help("Index of the range field. If not set, the first valid range will be used"), 65 | ) 66 | .arg( 67 | Arg::new("outfile") 68 | .long("outfile") 69 | .short('o') 70 | .num_args(1) 71 | .default_value("stdout") 72 | .help("Output filename. [stdout] for screen"), 73 | ) 74 | } 75 | 76 | // command implementation 77 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 78 | //---------------------------- 79 | // Options 80 | //---------------------------- 81 | let mut writer = intspan::writer(args.get_one::("outfile").unwrap()); 82 | 83 | let is_sharp = args.get_flag("sharp"); 84 | let is_header = args.get_flag("header"); 85 | 86 | let opt_idx_range = args.get_one::("field").copied().unwrap_or(0); 87 | 88 | //---------------------------- 89 | // Loading 90 | //---------------------------- 91 | // seq_name => Vector of Intervals 92 | let mut iv_of: BTreeMap> = BTreeMap::new(); 93 | 94 | for infile in args.get_many::("infiles").unwrap() { 95 | let reader = intspan::reader(infile); 96 | for line in reader.lines().map_while(Result::ok) { 97 | if line.starts_with('#') { 98 | continue; 99 | } 100 | 101 | let range = intspan::Range::from_str(&line); 102 | if !range.is_valid() { 103 | continue; 104 | } 105 | 106 | let iv = Iv { 107 | start: *range.start() as u32, 108 | stop: *range.end() as u32 + 1, 109 | val: 0, 110 | }; 111 | let chr = range.chr(); 112 | iv_of.entry(chr.to_string()).or_default().push(iv); 113 | } 114 | } 115 | 116 | // seq_name => Lapper 117 | let mut lapper_of = BTreeMap::new(); 118 | for (chr, ivs) in iv_of { 119 | let lapper = Lapper::new(ivs); 120 | lapper_of.insert(chr, lapper); 121 | } 122 | 123 | //---------------------------- 124 | // Operating 125 | //---------------------------- 126 | let reader = intspan::reader(args.get_one::("target").unwrap()); 127 | 'LINE: for (i, line) in reader.lines().map_while(Result::ok).enumerate() { 128 | // Handle the header line 129 | if is_header && i == 0 { 130 | writer.write_fmt(format_args!("{}\t{}\n", line, "count"))?; 131 | continue 'LINE; 132 | } 133 | 134 | // Handle lines starting with '#' 135 | if line.starts_with('#') { 136 | if is_sharp { 137 | writer.write_fmt(format_args!("{}\n", line))?; 138 | } 139 | continue 'LINE; 140 | } 141 | 142 | let rg = match intspan::extract_rg(&line, opt_idx_range) { 143 | // Extract the range 144 | Some(range) => range, 145 | // Skip lines without a valid range 146 | None => continue 'LINE, 147 | }; 148 | 149 | let mut count = 0; 150 | if lapper_of.contains_key(rg.chr()) { 151 | let lapper = lapper_of.get(rg.chr()).unwrap(); 152 | count = lapper.count(*rg.start() as u32, *rg.end() as u32 + 1); 153 | } 154 | 155 | //---------------------------- 156 | // Output 157 | //---------------------------- 158 | writer.write_all(format!("{}\t{}\n", line, count).as_ref())?; 159 | } 160 | 161 | Ok(()) 162 | } 163 | -------------------------------------------------------------------------------- /src/cmd_rgr/dedup.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use std::collections::HashSet; 3 | use std::io::{BufRead, Write}; 4 | 5 | // Create clap subcommand arguments 6 | pub fn make_subcommand() -> Command { 7 | Command::new("dedup") 8 | .about("Deduplicate lines in .tsv file(s) based on specified fields or the entire line") 9 | .after_help( 10 | r###" 11 | This command removes duplicate lines from .tsv file(s) in a single pass without sorting. 12 | Each line consumes 8 bytes (u64) of memory for hashing, making it memory-efficient. 13 | As a trade-off, this program cannot count the occurrences of duplicates. 14 | 15 | * If no fields are specified, the entire line is used as the key for deduplication. 16 | * If fields are specified, only the selected fields are used as the key. 17 | 18 | Examples: 19 | # Deduplicates lines in file1.tsv and file2.tsv, writing the result to output.tsv 20 | rgr dedup file1.tsv file2.tsv -o output.tsv 21 | 22 | # Deduplicates lines in file1.tsv based on the 1st and 3rd fields, printing the result to stdout 23 | rgr dedup file1.tsv -f 1,3 24 | 25 | "###, 26 | ) 27 | .arg( 28 | Arg::new("infiles") 29 | .required(true) 30 | .num_args(1..) 31 | .index(1) 32 | .help("Input file(s) to process"), 33 | ) 34 | .arg( 35 | Arg::new("fields") 36 | .long("fields") 37 | .short('f') 38 | .num_args(1) 39 | .help("Fields to use as the key"), 40 | ) 41 | .arg( 42 | Arg::new("outfile") 43 | .long("outfile") 44 | .short('o') 45 | .num_args(1) 46 | .default_value("stdout") 47 | .help("Output filename. [stdout] for screen"), 48 | ) 49 | } 50 | 51 | // command implementation 52 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 53 | //---------------------------- 54 | // Args 55 | //---------------------------- 56 | let mut writer = intspan::writer(args.get_one::("outfile").unwrap()); 57 | 58 | let opt_fields: intspan::IntSpan = if args.contains_id("fields") { 59 | intspan::fields_to_ints(args.get_one::("fields").unwrap()) 60 | } else { 61 | intspan::IntSpan::new() 62 | }; 63 | 64 | //---------------------------- 65 | // Ops 66 | //---------------------------- 67 | let mut subject_set: HashSet = HashSet::new(); 68 | 69 | for infile in args.get_many::("infiles").unwrap() { 70 | let reader = intspan::reader(infile); 71 | 72 | for line in reader.lines().map_while(Result::ok) { 73 | let subject = if opt_fields.is_empty() { 74 | // whole line 75 | xxhash_rust::xxh3::xxh3_64(&line.clone().into_bytes()) 76 | } else { 77 | // Get elements at specified indices 78 | let fields: Vec<&str> = line.split('\t').collect(); 79 | let subset: Vec<&str> = opt_fields 80 | .elements() 81 | .iter() 82 | .filter_map(|&i| fields.get(i as usize - 1)) 83 | .copied() 84 | .collect(); 85 | let concat = subset.join("\t"); 86 | xxhash_rust::xxh3::xxh3_64(&concat.into_bytes()) 87 | }; 88 | 89 | if !subject_set.contains(&subject) { 90 | writer.write_fmt(format_args!("{}\n", line))?; 91 | subject_set.insert(subject); 92 | } 93 | } 94 | } 95 | 96 | Ok(()) 97 | } 98 | -------------------------------------------------------------------------------- /src/cmd_rgr/field.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use std::io::BufRead; 3 | 4 | // Create clap subcommand arguments 5 | pub fn make_subcommand() -> Command { 6 | Command::new("field") 7 | .about("Create/append ranges from fields") 8 | .after_help( 9 | r###" 10 | Examples: 11 | 12 | 1. Create ranges from a chromosome size file: 13 | rgr field tests/Atha/chr.sizes --chr 1 --start 2 -a -s 14 | 15 | 2. Create ranges from a GFF file: 16 | rgr field tests/spanr/NC_007942.gff -H --chr 1 --start 4 --end 5 --strand 7 17 | 18 | 3. Create ranges from a .tsv file: 19 | rgr field tests/rgr/ctg.tsv --chr 2 --start 3 --end 4 -H 20 | 21 | "###, 22 | ) 23 | .arg( 24 | Arg::new("infiles") 25 | .required(true) 26 | .num_args(1..) 27 | .index(1) 28 | .help("Input files to process"), 29 | ) 30 | .arg( 31 | Arg::new("header") 32 | .long("header") 33 | .short('H') 34 | .action(ArgAction::SetTrue) 35 | .help("Treat the first line of each file as a header"), 36 | ) 37 | .arg( 38 | Arg::new("sharp") 39 | .long("sharp") 40 | .short('s') 41 | .action(ArgAction::SetTrue) 42 | .help("Preserve lines starting with a `#` without changes. The default is to ignore them"), 43 | ) 44 | .arg( 45 | Arg::new("chr") 46 | .long("chr") 47 | .num_args(1) 48 | .required(true) 49 | .value_parser(value_parser!(usize)) 50 | .help("Field index for chr"), 51 | ) 52 | .arg( 53 | Arg::new("strand") 54 | .long("strand") 55 | .num_args(1) 56 | .value_parser(value_parser!(usize)) 57 | .help("Optional field index for strand"), 58 | ) 59 | .arg( 60 | Arg::new("start") 61 | .long("start") 62 | .num_args(1) 63 | .required(true) 64 | .value_parser(value_parser!(usize)) 65 | .help("Field index for start"), 66 | ) 67 | .arg( 68 | Arg::new("end") 69 | .long("end") 70 | .num_args(1) 71 | .value_parser(value_parser!(usize)) 72 | .help("Optional field index for end"), 73 | ) 74 | .arg( 75 | Arg::new("append") 76 | .long("append") 77 | .short('a') 78 | .action(ArgAction::SetTrue) 79 | .help("Append a field for the range (default: only write the range)"), 80 | ) 81 | .arg( 82 | Arg::new("outfile") 83 | .long("outfile") 84 | .short('o') 85 | .num_args(1) 86 | .default_value("stdout") 87 | .help("Output filename. [stdout] for screen"), 88 | ) 89 | } 90 | 91 | // command implementation 92 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 93 | //---------------------------- 94 | // Args 95 | //---------------------------- 96 | let mut writer = intspan::writer(args.get_one::("outfile").unwrap()); 97 | 98 | let is_header = args.get_flag("header"); 99 | let is_sharp = args.get_flag("sharp"); 100 | 101 | let opt_idx_chr = *args.get_one::("chr").unwrap(); 102 | let opt_idx_strand = args.get_one::("strand").copied().unwrap_or(0); 103 | let opt_idx_start = *args.get_one::("start").unwrap(); 104 | let opt_idx_end = args.get_one::("end").copied().unwrap_or(0); 105 | 106 | let is_append = args.get_flag("append"); 107 | 108 | //---------------------------- 109 | // Ops 110 | //---------------------------- 111 | for infile in args.get_many::("infiles").unwrap() { 112 | let reader = intspan::reader(infile); 113 | 'LINE: for (i, line) in reader.lines().map_while(Result::ok).enumerate() { 114 | let parts: Vec<&str> = line.split('\t').collect(); 115 | 116 | // Handle the header line 117 | if is_header && i == 0 { 118 | if is_append { 119 | writer.write_fmt(format_args!("{}\t{}\n", line, "range"))?; 120 | } else { 121 | writer.write_fmt(format_args!("{}\n", "range"))?; 122 | } 123 | continue 'LINE; 124 | } 125 | 126 | // Handle lines starting with '#' 127 | if line.starts_with('#') { 128 | if is_sharp { 129 | writer.write_fmt(format_args!("{}\n", line))?; 130 | } 131 | continue 'LINE; 132 | } 133 | 134 | // Build ranges 135 | let chr = parts.get(opt_idx_chr - 1).unwrap(); 136 | let strand = if opt_idx_strand == 0 { 137 | "" 138 | } else { 139 | parts.get(opt_idx_strand - 1).unwrap() 140 | }; 141 | let start = parts 142 | .get(opt_idx_start - 1) 143 | .unwrap() 144 | .parse::() 145 | .unwrap(); 146 | let end = if opt_idx_end == 0 { 147 | start 148 | } else { 149 | parts.get(opt_idx_end - 1).unwrap().parse::().unwrap() 150 | }; 151 | 152 | let rg = intspan::Range { 153 | name: "".to_string(), 154 | chr: chr.to_string(), 155 | strand: strand.to_string(), 156 | start, 157 | end, 158 | }; 159 | 160 | //---------------------------- 161 | // Output 162 | //---------------------------- 163 | let new_line: String = if is_append { 164 | format!("{}\t{}", parts.join("\t"), rg) 165 | } else { 166 | rg.to_string() 167 | }; 168 | 169 | writer.write_fmt(format_args!("{}\n", new_line))?; 170 | } 171 | } 172 | 173 | Ok(()) 174 | } 175 | -------------------------------------------------------------------------------- /src/cmd_rgr/keep.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use std::io::{BufRead, Write}; 3 | 4 | // Create clap subcommand arguments 5 | pub fn make_subcommand() -> Command { 6 | Command::new("keep") 7 | .about("Keep the the initial header line(s)") 8 | .after_help( 9 | r###" 10 | The first N lines of each file is treated as a header and the one of first file is output unchanged. 11 | Subsequent lines are sent to the specified command via stdin, excluding headers from other files. 12 | The output from the command is appended to the initial header. 13 | 14 | * Use a double hyphen (--) to separate the command from the file arguments. 15 | 16 | Examples: 17 | # Keeps the first 2 lines of file1.txt as headers, processes the rest with `wc -l` 18 | rgr keep -l 2 file1.txt file2.txt -- wc -l 19 | 20 | # Skips headers and processes all lines with `sort` 21 | rgr keep --delete file1.txt file2.txt -- sort 22 | 23 | "###, 24 | ) 25 | .arg( 26 | Arg::new("infiles") 27 | .required(true) 28 | .num_args(1..) 29 | .help("Input file(s) to process"), 30 | ) 31 | .arg( 32 | Arg::new("lines") 33 | .long("lines") 34 | .short('l') 35 | .num_args(1) 36 | .default_value("1") 37 | .value_parser(value_parser!(usize)) 38 | .help("Number of header lines to keep"), 39 | ) 40 | .arg( 41 | Arg::new("delete") 42 | .long("delete") 43 | .short('d') 44 | .action(ArgAction::SetTrue) 45 | .help("Skip writing headers"), 46 | ) 47 | .arg( 48 | Arg::new("commands") 49 | .required(true) 50 | .num_args(1..) 51 | .last(true) 52 | .value_parser(value_parser!(String)) 53 | .help("Command to process subsequent lines"), 54 | ) 55 | } 56 | 57 | // command implementation 58 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 59 | //---------------------------- 60 | // Args 61 | //---------------------------- 62 | let infiles = args 63 | .get_many::("infiles") 64 | .map(|vals| vals.collect::>()) 65 | .unwrap_or_default(); 66 | 67 | let opt_lines = *args.get_one::("lines").unwrap(); 68 | let is_delete = args.get_flag("delete"); 69 | 70 | let commands = args 71 | .get_many::("commands") 72 | .map(|vals| vals.collect::>()) 73 | .unwrap_or_default(); 74 | 75 | //---------------------------- 76 | // Ops 77 | //---------------------------- 78 | let mut child = std::process::Command::new(commands[0]) 79 | .args(&commands[1..]) 80 | .stdin(std::process::Stdio::piped()) 81 | .stdout(std::process::Stdio::inherit()) 82 | .stderr(std::process::Stdio::inherit()) 83 | .spawn()?; 84 | let stdin = child.stdin.as_mut().expect("Failed to open child stdin"); 85 | 86 | let mut first_file = true; // Track if we are processing the first file 87 | for infile in infiles { 88 | let reader = intspan::reader(infile); 89 | let mut header_written = 0; 90 | let mut lines = reader.lines(); 91 | 92 | while let Some(Ok(line)) = lines.next() { 93 | if header_written < opt_lines { 94 | if first_file && !is_delete { 95 | // Only print headers from the first file 96 | println!("{}", line); 97 | } 98 | header_written += 1; 99 | } else { 100 | // Send subsequent lines to the command 101 | writeln!(stdin, "{}", line)?; 102 | } 103 | } 104 | 105 | // After processing the first file, set first_file to false 106 | first_file = false; 107 | } 108 | 109 | stdin.flush()?; 110 | child.wait()?; 111 | Ok(()) 112 | } 113 | -------------------------------------------------------------------------------- /src/cmd_rgr/md.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use std::io::{BufRead, Write}; 3 | 4 | // Create clap subcommand arguments 5 | pub fn make_subcommand() -> Command { 6 | Command::new("md") 7 | .about("Convert a .tsv file to a Markdown table") 8 | .after_help( 9 | r###" 10 | You can customize the alignment of columns and format numeric values. 11 | 12 | Examples: 13 | # right-align numeric columns, and center-align the 2nd column 14 | rgr md tests/rgr/ctg.range.tsv --num --center 2 15 | 16 | # right-align numeric columns and format them to 2 decimal places 17 | rgr md input.tsv --right 2 --fmt --digits 2 18 | 19 | "###, 20 | ) 21 | .arg( 22 | Arg::new("infile") 23 | .required(true) 24 | .num_args(1) 25 | .index(1) 26 | .help("Input file to process"), 27 | ) 28 | .arg( 29 | Arg::new("center") 30 | .long("center") 31 | .short('c') 32 | .num_args(1) 33 | .help("List of columns to center-align (e.g., `1,3-5`)"), 34 | ) 35 | .arg( 36 | Arg::new("right") 37 | .long("right") 38 | .short('r') 39 | .num_args(1) 40 | .help("Columns to right-align"), 41 | ) 42 | .arg( 43 | Arg::new("num") 44 | .long("num") 45 | .action(ArgAction::SetTrue) 46 | .help("Automatically right-align numeric columns"), 47 | ) 48 | .arg( 49 | Arg::new("fmt") 50 | .long("fmt") 51 | .action(ArgAction::SetTrue) 52 | .help("Format numeric columns and enable the `--num` option"), 53 | ) 54 | .arg( 55 | Arg::new("digits") 56 | .long("digits") 57 | .num_args(1) 58 | .default_value("0") 59 | .value_parser(value_parser!(usize)) 60 | .help("Number of decimal digits"), 61 | ) 62 | .arg( 63 | Arg::new("outfile") 64 | .long("outfile") 65 | .short('o') 66 | .num_args(1) 67 | .default_value("stdout") 68 | .help("Output filename. [stdout] for screen"), 69 | ) 70 | } 71 | 72 | // command implementation 73 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 74 | //---------------------------- 75 | // Loading 76 | //---------------------------- 77 | let mut writer = intspan::writer(args.get_one::("outfile").unwrap()); 78 | let reader = intspan::reader(args.get_one::("infile").unwrap()); 79 | 80 | let mut opt_center: intspan::IntSpan = if args.contains_id("center") { 81 | intspan::fields_to_ints(args.get_one::("center").unwrap()) 82 | } else { 83 | intspan::IntSpan::new() 84 | }; 85 | let mut opt_right: intspan::IntSpan = if args.contains_id("right") { 86 | intspan::fields_to_ints(args.get_one::("right").unwrap()) 87 | } else { 88 | intspan::IntSpan::new() 89 | }; 90 | let mut is_num = args.get_flag("num"); 91 | let is_fmt = args.get_flag("fmt"); 92 | if is_fmt { 93 | is_num = true; 94 | } 95 | let opt_digits: usize = *args.get_one("digits").unwrap(); 96 | 97 | //---------------------------- 98 | // Output 99 | //---------------------------- 100 | let mut is_numeric_column = vec![]; 101 | 102 | let mut data: Vec> = Vec::new(); 103 | for line in reader.lines().map_while(Result::ok) { 104 | let fields: Vec = line.split('\t').map(|s| s.to_string()).collect(); 105 | data.push(fields); 106 | } 107 | 108 | let mut table = String::new(); 109 | if !data.is_empty() { 110 | let num_columns = data[0].len(); 111 | if is_num { 112 | // Determine if each column is numeric 113 | is_numeric_column = vec![true; num_columns]; 114 | 115 | for row in data.iter().skip(1) { 116 | // Skip the header row 117 | for (i, value) in row.iter().enumerate() { 118 | if is_numeric_column[i] && value.parse::().is_err() { 119 | is_numeric_column[i] = false; 120 | } 121 | } 122 | } 123 | 124 | for (i, &flag) in is_numeric_column.iter().enumerate().take(num_columns) { 125 | if flag { 126 | opt_center.remove_n((i + 1) as i32); 127 | opt_right.add_n((i + 1) as i32); 128 | } 129 | } 130 | } 131 | 132 | // Print the Markdown table 133 | for (i, row) in data.iter().enumerate() { 134 | let formatted_row: Vec = row 135 | .iter() 136 | .enumerate() 137 | .map(|(j, value)| { 138 | // Don't touch first row 139 | if i == 0 { 140 | value.to_string() 141 | } else if is_fmt && is_numeric_column[j] { 142 | let num = value.parse::().unwrap(); 143 | let v = intspan::format_number(num, opt_digits); 144 | v.to_string() 145 | } else { 146 | value.to_string() 147 | } 148 | }) 149 | .collect(); 150 | table += format!("| {} |\n", formatted_row.join(" | ")).as_str(); 151 | 152 | // Print the header separator 153 | if i == 0 { 154 | let separator: Vec = (0..num_columns) 155 | .collect::>() 156 | .iter() 157 | .map(|&j| { 158 | if opt_right.contains((j + 1) as i32) { 159 | "---:".to_string() 160 | } else if opt_center.contains((j + 1) as i32) { 161 | ":---:".to_string() 162 | } else { 163 | "---".to_string() 164 | } 165 | }) 166 | .collect(); 167 | table += format!("| {} |\n", separator.join(" | ")).as_str(); 168 | } 169 | } 170 | } 171 | 172 | if !table.is_empty() { 173 | writer.write_fmt(format_args!( 174 | "{}", 175 | markdown_table_formatter::format_tables(table) 176 | ))?; 177 | } 178 | 179 | Ok(()) 180 | } 181 | -------------------------------------------------------------------------------- /src/cmd_rgr/merge.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use intspan::*; 3 | use petgraph::prelude::NodeIndex; 4 | use petgraph::*; 5 | use std::collections::{HashMap, HashSet}; 6 | use std::io::BufRead; 7 | 8 | // Create clap subcommand arguments 9 | pub fn make_subcommand() -> Command { 10 | Command::new("merge") 11 | .about("Merge overlapped ranges via overlapping graph") 12 | .after_help( 13 | r###" 14 | This command merges overlapping ranges from input files based on a specified coverage threshold. 15 | It builds an overlapping graph for each chromosome and merges ranges that meet the coverage criteria. 16 | 17 | Examples: 18 | 19 | # Merge all ranges in the .tsv file with a coverage threshold of 0.98 20 | rgr merge tests/rgr/II.links.tsv --coverage 0.98 21 | 22 | # Enable verbose mode to see detailed processing information 23 | rgr merge input1.rg input2.rg --coverage 0.95 --verbose 24 | 25 | "###, 26 | ) 27 | .arg( 28 | Arg::new("infiles") 29 | .required(true) 30 | .num_args(1..) 31 | .index(1) 32 | .help("Input files to process. Multiple files can be specified"), 33 | ) 34 | .arg( 35 | Arg::new("coverage") 36 | .long("coverage") 37 | .short('c') 38 | .num_args(1) 39 | .default_value("0.95") 40 | .value_parser(value_parser!(f32)) 41 | .help("Ranges with coverage larger than this value will be merged"), 42 | ) 43 | .arg( 44 | Arg::new("verbose") 45 | .long("verbose") 46 | .short('v') 47 | .action(ArgAction::SetTrue) 48 | .help("Enable verbose mode"), 49 | ) 50 | .arg( 51 | Arg::new("outfile") 52 | .long("outfile") 53 | .short('o') 54 | .num_args(1) 55 | .default_value("stdout") 56 | .help("Output filename. [stdout] for screen"), 57 | ) 58 | } 59 | 60 | // command implementation 61 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 62 | //---------------------------- 63 | // Loading 64 | //---------------------------- 65 | let opt_coverage = *args.get_one::("coverage").unwrap(); 66 | let is_verbose = args.get_flag("verbose"); 67 | 68 | // store graph separately by chromosomes 69 | // petgraph use NodeIndex to store and identify nodes 70 | let mut graph_of_chr: HashMap> = HashMap::new(); 71 | 72 | // cache ranges 73 | let mut range_of_part: HashMap = HashMap::new(); 74 | // cache node indices 75 | let mut idx_of_part: HashMap = HashMap::new(); 76 | 77 | // all chromosomes 78 | let mut chrs: HashSet = HashSet::new(); 79 | 80 | // Load ranges from input files 81 | for infile in args.get_many::("infiles").unwrap() { 82 | let reader = reader(infile); 83 | for line in reader.lines().map_while(Result::ok) { 84 | for part in line.split('\t') { 85 | let range = Range::from_str(part); 86 | if !range.is_valid() { 87 | continue; 88 | } 89 | 90 | if range_of_part.contains_key(part) { 91 | continue; 92 | } 93 | 94 | let chr = range.chr(); 95 | graph_of_chr 96 | .entry(chr.to_string()) 97 | .or_insert_with(Graph::new_undirected); 98 | chrs.insert(chr.to_string()); 99 | 100 | let idx = graph_of_chr 101 | .get_mut(chr) 102 | .unwrap() 103 | .add_node(part.to_string()); 104 | idx_of_part.insert(part.to_string(), idx); 105 | 106 | range_of_part.insert(part.to_string(), range); 107 | } 108 | } // end of line 109 | } // end of file 110 | let mut chrs = chrs.into_iter().collect::>(); 111 | chrs.sort(); 112 | 113 | //---------------------------- 114 | // Checking coverages 115 | //---------------------------- 116 | for chr in &chrs { 117 | if is_verbose { 118 | eprintln!("Chromosome {}", chr); 119 | } 120 | 121 | let graph = graph_of_chr.get_mut(chr).unwrap(); 122 | let indices = graph.node_indices().collect::>(); 123 | 124 | for i in 0..indices.len() { 125 | let node_i = graph.node_weight(indices[i]).unwrap(); 126 | let intspan_i = range_of_part[node_i].intspan(); 127 | if is_verbose { 128 | eprintln!(" Range {}/{}\t{}", i, indices.len(), node_i); 129 | } 130 | 131 | for j in i + 1..indices.len() { 132 | let node_j = graph.node_weight(indices[j]).unwrap(); 133 | let intspan_j = range_of_part[node_j].intspan(); 134 | 135 | let intersect = intspan_i.intersect(&intspan_j); 136 | if !intersect.is_empty() { 137 | let coverage_i = 138 | intersect.cardinality() as f32 / intspan_i.cardinality() as f32; 139 | let coverage_j = 140 | intersect.cardinality() as f32 / intspan_j.cardinality() as f32; 141 | 142 | if coverage_i >= opt_coverage && coverage_j >= opt_coverage { 143 | if is_verbose { 144 | eprintln!( 145 | " Merge with Range {}/{}\t{}", 146 | j, 147 | indices.len(), 148 | node_j 149 | ); 150 | } 151 | graph.add_edge(indices[i], indices[j], ()); 152 | } 153 | } 154 | } 155 | } 156 | } 157 | 158 | //---------------------------- 159 | // Merging 160 | //---------------------------- 161 | let mut out_lines: Vec = Vec::new(); 162 | for chr in &chrs { 163 | let graph = graph_of_chr.get(chr).unwrap(); 164 | 165 | let scc: Vec> = petgraph::algo::tarjan_scc(graph); 166 | for cc_indices in &scc { 167 | if cc_indices.len() < 2 { 168 | continue; 169 | } 170 | 171 | if is_verbose { 172 | eprintln!("Chromosome {}: Merge {} ranges", chr, cc_indices.len()); 173 | } 174 | 175 | // connected ranges 176 | let mut part_list = cc_indices 177 | .iter() 178 | .map(|idx| graph.node_weight(*idx).unwrap().clone()) 179 | .collect::>(); 180 | part_list.sort(); 181 | 182 | // collect info for merged range 183 | let mut intspan = IntSpan::new(); 184 | for part in &part_list { 185 | let range = range_of_part.get(part).unwrap(); 186 | intspan.merge(&range.intspan()); 187 | } 188 | 189 | // create merged range 190 | let merged: String = format!("{}(+):{}", chr, intspan); 191 | 192 | for part in &part_list { 193 | if *part == merged { 194 | continue; 195 | } 196 | 197 | let out_line = format!("{}\t{}", part, merged); 198 | if is_verbose { 199 | eprintln!("{}", out_line); 200 | } 201 | out_lines.push(out_line); 202 | } 203 | } 204 | } 205 | 206 | //---------------------------- 207 | // Output 208 | //---------------------------- 209 | write_lines(args.get_one::("outfile").unwrap(), &out_lines)?; 210 | 211 | Ok(()) 212 | } 213 | -------------------------------------------------------------------------------- /src/cmd_rgr/mod.rs: -------------------------------------------------------------------------------- 1 | //! Subcommand modules for the `rgr` binary. 2 | 3 | pub mod count; 4 | pub mod dedup; 5 | pub mod field; 6 | pub mod filter; 7 | pub mod keep; 8 | pub mod md; 9 | pub mod merge; 10 | pub mod pl_2rmp; 11 | pub mod prop; 12 | pub mod replace; 13 | pub mod runlist; 14 | pub mod select; 15 | pub mod sort; 16 | pub mod span; 17 | -------------------------------------------------------------------------------- /src/cmd_rgr/prop.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use std::ffi::OsStr; 3 | use std::io::BufRead; 4 | use std::path::Path; 5 | 6 | // Create clap subcommand arguments 7 | pub fn make_subcommand() -> Command { 8 | Command::new("prop") 9 | .about("Proportion of the ranges intersecting a runlist file") 10 | .after_help( 11 | r###" 12 | * Lines without a valid range will not be output 13 | * Appended fields 14 | * `prop` 15 | * `length`: length of the range (if `--full` is set) 16 | * `size`: size of the intersection (if `--full` is set) 17 | 18 | Example: 19 | 20 | rgr prop tests/rgr/intergenic.json tests/rgr/S288c.rg 21 | 22 | rgr prop tests/rgr/intergenic.json tests/rgr/ctg.range.tsv -H -f 3 --prefix --full 23 | 24 | "###, 25 | ) 26 | .arg( 27 | Arg::new("runlist") 28 | .required(true) 29 | .index(1) 30 | .num_args(1) 31 | .help("Runlist file to calculate intersections against"), 32 | ) 33 | .arg( 34 | Arg::new("infiles") 35 | .required(true) 36 | .index(2) 37 | .num_args(1..) 38 | .help("Input files to process. Multiple files can be specified"), 39 | ) 40 | .arg( 41 | Arg::new("header") 42 | .long("header") 43 | .short('H') 44 | .action(ArgAction::SetTrue) 45 | .help("Treat the first line of each file as a header"), 46 | ) 47 | .arg( 48 | Arg::new("sharp") 49 | .long("sharp") 50 | .short('s') 51 | .action(ArgAction::SetTrue) 52 | .help("Include lines starting with `#` without changes (default: ignore them)"), 53 | ) 54 | .arg( 55 | Arg::new("field") 56 | .long("field") 57 | .short('f') 58 | .num_args(1) 59 | .value_parser(value_parser!(usize)) 60 | .help("Index of the range field. If not set, the first valid range will be used"), 61 | ) 62 | .arg( 63 | Arg::new("full") 64 | .long("full") 65 | .action(ArgAction::SetTrue) 66 | .help("Also append `length` and `size` fields"), 67 | ) 68 | .arg( 69 | Arg::new("prefix") 70 | .long("prefix") 71 | .action(ArgAction::SetTrue) 72 | .help("Prefix the basename of the runlist file if `--header` is set"), 73 | ) 74 | .arg( 75 | Arg::new("outfile") 76 | .long("outfile") 77 | .short('o') 78 | .num_args(1) 79 | .default_value("stdout") 80 | .help("Output filename. [stdout] for screen"), 81 | ) 82 | } 83 | 84 | // command implementation 85 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 86 | //---------------------------- 87 | // Args 88 | //---------------------------- 89 | let mut writer = intspan::writer(args.get_one::("outfile").unwrap()); 90 | 91 | let is_sharp = args.get_flag("sharp"); 92 | let is_header = args.get_flag("header"); 93 | 94 | let opt_idx_range = args.get_one::("field").copied().unwrap_or(0); 95 | 96 | let is_full = args.get_flag("full"); 97 | let is_prefix = args.get_flag("prefix"); 98 | 99 | //---------------------------- 100 | // Loading 101 | //---------------------------- 102 | let json = intspan::read_json(args.get_one::("runlist").unwrap()); 103 | let set = intspan::json2set(&json); 104 | 105 | //---------------------------- 106 | // Ops 107 | //---------------------------- 108 | for infile in args.get_many::("infiles").unwrap() { 109 | let reader = intspan::reader(infile); 110 | 'LINE: for (i, line) in reader.lines().map_while(Result::ok).enumerate() { 111 | // Handle the header line 112 | if is_header && i == 0 { 113 | if is_prefix { 114 | let prefix = Path::new(args.get_one::("runlist").unwrap()) 115 | .file_stem() 116 | .and_then(OsStr::to_str) 117 | .unwrap() 118 | .split('.') 119 | .next() 120 | .unwrap() 121 | .to_string(); 122 | if is_full { 123 | writer.write_fmt(format_args!( 124 | "{}\t{}{}\t{}{}\t{}{}\n", 125 | line, prefix, "Prop", prefix, "Length", prefix, "Size" 126 | ))?; 127 | } else { 128 | writer.write_fmt(format_args!("{}\t{}{}\n", line, prefix, "Prop"))?; 129 | } 130 | } else if is_full { 131 | writer.write_fmt(format_args!( 132 | "{}\t{}\t{}\t{}\n", 133 | line, "prop", "length", "size" 134 | ))?; 135 | } else { 136 | writer.write_fmt(format_args!("{}\t{}\n", line, "prop"))?; 137 | } 138 | 139 | continue 'LINE; 140 | } 141 | 142 | // Handle lines starting with '#' 143 | if line.starts_with('#') { 144 | if is_sharp { 145 | writer.write_fmt(format_args!("{}\n", line))?; 146 | } 147 | continue 'LINE; 148 | } 149 | 150 | let rg = match intspan::extract_rg(&line, opt_idx_range) { 151 | // Extract the range 152 | Some(range) => range, 153 | // Skip lines without a valid range 154 | None => continue 'LINE, 155 | }; 156 | 157 | // Calculate intersection 158 | let chr = rg.chr(); 159 | let mut intspan = intspan::IntSpan::new(); 160 | intspan.add_pair(*rg.start(), *rg.end()); 161 | 162 | let (prop, length, size) = if set.contains_key(chr) { 163 | let intxn = set.get(chr).unwrap().intersect(&intspan); 164 | let prop = intxn.cardinality() as f32 / intspan.cardinality() as f32; 165 | (prop, intspan.cardinality(), intxn.cardinality()) 166 | } else { 167 | (0.0, intspan.cardinality(), 0) 168 | }; 169 | 170 | //---------------------------- 171 | // Output 172 | //---------------------------- 173 | if is_full { 174 | writer.write_fmt(format_args!( 175 | "{}\t{:.4}\t{}\t{}\n", 176 | line, prop, length, size 177 | ))?; 178 | } else { 179 | writer.write_fmt(format_args!("{}\t{:.4}\n", line, prop))?; 180 | } 181 | } 182 | } 183 | 184 | Ok(()) 185 | } 186 | -------------------------------------------------------------------------------- /src/cmd_rgr/replace.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use std::collections::HashMap; 3 | use std::io::BufRead; 4 | 5 | // Create clap subcommand arguments 6 | pub fn make_subcommand() -> Command { 7 | Command::new("replace") 8 | .about("Replace fields in a .tsv file using a replacement map") 9 | .after_help( 10 | r###" 11 | Examples: 12 | 13 | # Replace fields 14 | rgr replace tests/rgr/1_4.ovlp.tsv tests/rgr/1_4.replace.tsv 15 | 16 | # Reverse the replacement map (To--From instead of From--To) 17 | rgr replace tests/rgr/1_4.ovlp.tsv tests/rgr/1_4.replace.tsv -r 18 | 19 | "###, 20 | ) 21 | .arg( 22 | Arg::new("infile") 23 | .required(true) 24 | .num_args(1) 25 | .index(1) 26 | .help("Input file to process"), 27 | ) 28 | .arg( 29 | Arg::new("replace") 30 | .required(true) 31 | .num_args(1) 32 | .index(2) 33 | .help("Replacement map file with two columns: From and To"), 34 | ) 35 | .arg( 36 | Arg::new("reverse") 37 | .long("reverse") 38 | .short('r') 39 | .action(ArgAction::SetTrue) 40 | .help("Use the replacement map in reverse order (To--From instead of From--To)"), 41 | ) 42 | .arg( 43 | Arg::new("outfile") 44 | .long("outfile") 45 | .short('o') 46 | .num_args(1) 47 | .default_value("stdout") 48 | .help("Output filename. [stdout] for screen"), 49 | ) 50 | } 51 | 52 | // command implementation 53 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 54 | //---------------------------- 55 | // Args 56 | //---------------------------- 57 | let mut writer = intspan::writer(args.get_one::("outfile").unwrap()); 58 | let reader = intspan::reader(args.get_one::("infile").unwrap()); 59 | 60 | //---------------------------- 61 | // Load replacements 62 | //---------------------------- 63 | let mut replaces: HashMap = HashMap::new(); 64 | for line in intspan::read_lines(args.get_one::("replace").unwrap()) { 65 | let parts: Vec<&str> = line.split('\t').collect(); 66 | if parts.len() == 2 { 67 | if args.get_flag("reverse") { 68 | replaces.insert(parts[1].to_string(), parts[0].to_string()); 69 | } else { 70 | replaces.insert(parts[0].to_string(), parts[1].to_string()); 71 | } 72 | } 73 | } 74 | 75 | //---------------------------- 76 | // Output 77 | //---------------------------- 78 | for line in reader.lines().map_while(Result::ok) { 79 | let fields: Vec<&str> = line.split('\t').collect(); 80 | let mut out: Vec<&str> = vec![]; 81 | 82 | for f in fields { 83 | if let Some(replacement) = replaces.get(f) { 84 | out.push(replacement); 85 | } else { 86 | out.push(f); 87 | } 88 | } 89 | 90 | writer.write_all((out.join("\t") + "\n").as_ref())?; 91 | } 92 | 93 | Ok(()) 94 | } 95 | -------------------------------------------------------------------------------- /src/cmd_rgr/runlist.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use std::io::BufRead; 3 | 4 | // Create clap subcommand arguments 5 | pub fn make_subcommand() -> Command { 6 | Command::new("runlist") 7 | .about("Filter .rg and .tsv files by comparing with a runlist file") 8 | .after_help( 9 | r###" 10 | * Lines without a valid range will not be output 11 | 12 | Example: 13 | 14 | # Filter lines that overlap with the runlist 15 | rgr runlist tests/rgr/intergenic.json tests/rgr/S288c.rg --op overlap 16 | 17 | # # Filter lines that overlap with the runlist in a TSV file with headers 18 | rgr runlist tests/rgr/intergenic.json tests/rgr/ctg.range.tsv --op overlap -H -f 3 19 | 20 | "###, 21 | ) 22 | .arg( 23 | Arg::new("runlist") 24 | .required(true) 25 | .index(1) 26 | .num_args(1) 27 | .help("Set the runlist file to use"), 28 | ) 29 | .arg( 30 | Arg::new("infiles") 31 | .required(true) 32 | .index(2) 33 | .num_args(1..) 34 | .help("Input files to process. Multiple files can be specified"), 35 | ) 36 | .arg( 37 | Arg::new("header") 38 | .long("header") 39 | .short('H') 40 | .action(ArgAction::SetTrue) 41 | .help("Treat the first line of each file as a header"), 42 | ) 43 | .arg( 44 | Arg::new("sharp") 45 | .long("sharp") 46 | .short('s') 47 | .action(ArgAction::SetTrue) 48 | .help("Preserve lines starting with a `#` without changes. The default is to ignore them"), 49 | ) 50 | .arg( 51 | Arg::new("field") 52 | .long("field") 53 | .short('f') 54 | .num_args(1) 55 | .value_parser(value_parser!(usize)) 56 | .help("Index of the range field. If not set, the first valid range will be used"), 57 | ) 58 | .arg( 59 | Arg::new("op") 60 | .long("op") 61 | .num_args(1) 62 | .action(ArgAction::Set) 63 | .value_parser([ 64 | builder::PossibleValue::new("overlap"), 65 | builder::PossibleValue::new("non-overlap"), 66 | builder::PossibleValue::new("superset"), 67 | ]) 68 | .default_value("overlap") 69 | .help("Filter operation: overlap, non-overlap or superset"), 70 | ) 71 | .arg( 72 | Arg::new("outfile") 73 | .long("outfile") 74 | .short('o') 75 | .num_args(1) 76 | .default_value("stdout") 77 | .help("Output filename. [stdout] for screen"), 78 | ) 79 | } 80 | 81 | // command implementation 82 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 83 | //---------------------------- 84 | // Args 85 | //---------------------------- 86 | let mut writer = intspan::writer(args.get_one::("outfile").unwrap()); 87 | 88 | let opt_op = args.get_one::("op").unwrap().as_str(); 89 | 90 | let is_sharp = args.get_flag("sharp"); 91 | let is_header = args.get_flag("header"); 92 | 93 | let opt_idx_range = args.get_one::("field").copied().unwrap_or(0); 94 | 95 | //---------------------------- 96 | // Loading 97 | //---------------------------- 98 | let json = intspan::read_json(args.get_one::("runlist").unwrap()); 99 | let set = intspan::json2set(&json); 100 | 101 | //---------------------------- 102 | // Ops 103 | //---------------------------- 104 | for infile in args.get_many::("infiles").unwrap() { 105 | let reader = intspan::reader(infile); 106 | 'LINE: for (i, line) in reader.lines().map_while(Result::ok).enumerate() { 107 | // Handle the header line 108 | if is_header && i == 0 { 109 | writer.write_fmt(format_args!("{}\n", line))?; 110 | continue 'LINE; 111 | } 112 | 113 | // Handle lines starting with '#' 114 | if line.starts_with('#') { 115 | if is_sharp { 116 | writer.write_fmt(format_args!("{}\n", line))?; 117 | } 118 | continue 'LINE; 119 | } 120 | 121 | let rg = match intspan::extract_rg(&line, opt_idx_range) { 122 | // Extract the range 123 | Some(range) => range, 124 | // Skip lines without a valid range 125 | None => continue 'LINE, 126 | }; 127 | 128 | // Prepare the range for comparison 129 | let chr = rg.chr(); 130 | let mut intspan = intspan::IntSpan::new(); 131 | intspan.add_pair(*rg.start(), *rg.end()); 132 | 133 | //---------------------------- 134 | // Output 135 | //---------------------------- 136 | match opt_op { 137 | "overlap" => { 138 | if set.contains_key(chr) 139 | && !set.get(chr).unwrap().intersect(&intspan).is_empty() 140 | { 141 | writer.write_fmt(format_args!("{}\n", line))?; 142 | } 143 | } 144 | "non-overlap" => { 145 | if set.contains_key(chr) { 146 | if set.get(chr).unwrap().intersect(&intspan).is_empty() { 147 | writer.write_fmt(format_args!("{}\n", line))?; 148 | } 149 | } else { 150 | writer.write_fmt(format_args!("{}\n", line))?; 151 | } 152 | } 153 | "superset" => { 154 | if set.contains_key(chr) && set.get(chr).unwrap().superset(&intspan) { 155 | writer.write_fmt(format_args!("{}\n", line))?; 156 | } 157 | } 158 | _ => unreachable!("Invalid operation: {}", opt_op), 159 | }; 160 | } 161 | } 162 | 163 | Ok(()) 164 | } 165 | -------------------------------------------------------------------------------- /src/cmd_rgr/select.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use std::collections::HashMap; 3 | use std::io::BufRead; 4 | 5 | // Create clap subcommand arguments 6 | pub fn make_subcommand() -> Command { 7 | Command::new("select") 8 | .about("Select fields in the order listed") 9 | .after_help( 10 | r###" 11 | * Fields can be specified by field number or field name. 12 | * Field names must not be specified as a valid IntSpan runlist. 13 | For example, avoid using formats like `1`, `2-6`, or `-`. 14 | 15 | Examples: 16 | # Selects fields 6 and 1 from the input file, treating the first line as a header 17 | rgr select tests/rgr/ctg.tsv -H -f 6,1 18 | 19 | # Selects fields `ID` and `length` by names 20 | rgr select tests/rgr/ctg.tsv -H -f ID,length 21 | 22 | "###, 23 | ) 24 | .arg( 25 | Arg::new("infiles") 26 | .required(true) 27 | .num_args(1..) 28 | .index(1) 29 | .help("Input file to process"), 30 | ) 31 | .arg( 32 | Arg::new("header") 33 | .long("header") 34 | .short('H') 35 | .action(ArgAction::SetTrue) 36 | .help("Treat the first line of each file as a header"), 37 | ) 38 | .arg( 39 | Arg::new("sharp") 40 | .long("sharp") 41 | .short('s') 42 | .action(ArgAction::SetTrue) 43 | .help("Preserve lines starting with a `#` without changes. The default is to ignore them"), 44 | ) 45 | .arg( 46 | Arg::new("fields") 47 | .long("fields") 48 | .short('f') 49 | .num_args(1) 50 | .help("Writes selected fields and the generated range field, in the order listed"), 51 | ) 52 | .arg( 53 | Arg::new("outfile") 54 | .long("outfile") 55 | .short('o') 56 | .num_args(1) 57 | .default_value("stdout") 58 | .help("Output filename. [stdout] for screen"), 59 | ) 60 | } 61 | 62 | // command implementation 63 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 64 | //---------------------------- 65 | // Args 66 | //---------------------------- 67 | let mut writer = intspan::writer(args.get_one::("outfile").unwrap()); 68 | 69 | let is_header = args.get_flag("header"); 70 | let is_sharp = args.get_flag("sharp"); 71 | 72 | //---------------------------- 73 | // Ops 74 | //---------------------------- 75 | for infile in args.get_many::("infiles").unwrap() { 76 | let reader = intspan::reader(infile); 77 | let mut fields: Vec = vec![]; 78 | 79 | 'LINE: for (i, line) in reader.lines().map_while(Result::ok).enumerate() { 80 | let parts: Vec<&str> = line.split('\t').collect(); 81 | 82 | // Handle the header line 83 | if i == 0 { 84 | if is_header { 85 | let idx_of: HashMap = parts 86 | .iter() 87 | .enumerate() 88 | .map(|(i, field)| (field.to_string(), i + 1)) 89 | .collect(); 90 | 91 | if args.contains_id("fields") { 92 | fields = intspan::named_field_to_idx( 93 | args.get_one::("fields").unwrap(), 94 | &idx_of, 95 | ) 96 | .unwrap() 97 | }; 98 | } else if args.contains_id("fields") { 99 | fields = intspan::ints_to_idx(args.get_one::("fields").unwrap()); 100 | } 101 | 102 | if fields.is_empty() { 103 | writer.write_fmt(format_args!("{}\n", line))?; 104 | } else { 105 | let selected: Vec = fields 106 | .iter() 107 | .map(|e| parts.get(*e - 1).unwrap().to_string()) 108 | .collect(); 109 | 110 | writer.write_fmt(format_args!("{}\n", selected.join("\t")))?; 111 | } 112 | continue 'LINE; 113 | } 114 | 115 | if line.starts_with('#') { 116 | if is_sharp { 117 | writer.write_fmt(format_args!("{}\n", line))?; 118 | } 119 | continue 'LINE; 120 | } 121 | 122 | //---------------------------- 123 | // Output 124 | //---------------------------- 125 | let new_line: String = if fields.is_empty() { 126 | parts.join("\t").to_string() 127 | } else { 128 | let selected: Vec = fields 129 | .iter() 130 | .map(|e| parts.get(*e - 1).unwrap().to_string()) 131 | .collect(); 132 | 133 | selected.join("\t") 134 | }; 135 | 136 | writer.write_fmt(format_args!("{}\n", new_line))?; 137 | } 138 | } 139 | 140 | Ok(()) 141 | } 142 | -------------------------------------------------------------------------------- /src/cmd_rgr/sort.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use itertools::Itertools; 3 | use std::collections::BTreeMap; 4 | use std::io::BufRead; 5 | 6 | // Create clap subcommand arguments 7 | pub fn make_subcommand() -> Command { 8 | Command::new("sort") 9 | .about("Sort .rg and .tsv files by a range field") 10 | .after_help( 11 | r###" 12 | * If no part of the line is a valid range, the line will be written to to the end of the output 13 | 14 | * Using `--group` can improve performance on large datasets by grouping rows before sorting. 15 | * The group_key can be chr_id, ctg_id, etc. 16 | 17 | Example: 18 | 19 | # Sort a .rg file 20 | rgr sort tests/rgr/S288c.rg 21 | 22 | # Sort a .tsv file by the first valid range 23 | rgr sort tests/rgr/ctg.range.tsv 24 | 25 | # Sort a .tsv file by a specific range field and treat the first line as a header 26 | rgr sort tests/rgr/ctg.range.tsv -H -f 3 27 | 28 | "###, 29 | ) 30 | .arg( 31 | Arg::new("infiles") 32 | .required(true) 33 | .num_args(1..) 34 | .index(1) 35 | .help("Input files to process. Multiple files can be specified"), 36 | ) 37 | .arg( 38 | Arg::new("header") 39 | .long("header") 40 | .short('H') 41 | .action(ArgAction::SetTrue) 42 | .help("Treat the first line of each file as a header"), 43 | ) 44 | .arg( 45 | Arg::new("field") 46 | .long("field") 47 | .short('f') 48 | .num_args(1) 49 | .value_parser(value_parser!(usize)) 50 | .help("Index of the range field. If not set, the first valid range will be used"), 51 | ) 52 | .arg( 53 | Arg::new("group") 54 | .long("group") 55 | .short('g') 56 | .num_args(1) 57 | .value_parser(value_parser!(usize)) 58 | .help("Group the rows by this field and then sort within each group"), 59 | ) 60 | .arg( 61 | Arg::new("outfile") 62 | .long("outfile") 63 | .short('o') 64 | .num_args(1) 65 | .default_value("stdout") 66 | .help("Output filename. [stdout] for screen"), 67 | ) 68 | } 69 | 70 | // command implementation 71 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 72 | //---------------------------- 73 | // Options 74 | //---------------------------- 75 | let mut writer = intspan::writer(args.get_one::("outfile").unwrap()); 76 | 77 | let is_header = args.get_flag("header"); 78 | 79 | let opt_idx_range = args.get_one::("field").copied().unwrap_or(0); 80 | let opt_idx_group = args.get_one::("group").copied().unwrap_or(0); 81 | 82 | //---------------------------- 83 | // Loading 84 | //---------------------------- 85 | let mut line_to_rg: BTreeMap = BTreeMap::new(); 86 | let mut invalids: Vec = vec![]; 87 | 88 | for infile in args.get_many::("infiles").unwrap() { 89 | let reader = intspan::reader(infile); 90 | 'LINE: for (i, line) in reader.lines().map_while(Result::ok).enumerate() { 91 | // Handle the header line 92 | if is_header && i == 0 { 93 | writer.write_fmt(format_args!("{}\n", line))?; 94 | continue 'LINE; 95 | } 96 | 97 | // Extract the range 98 | if let Some(range) = intspan::extract_rg(&line, opt_idx_range) { 99 | // Store the line and its range 100 | line_to_rg.insert(line.clone(), range); 101 | } else { 102 | // No valid range found 103 | invalids.push(line.clone()); 104 | } 105 | } 106 | } 107 | 108 | //---------------------------- 109 | // Sorting 110 | //---------------------------- 111 | let mut sorted: Vec = vec![]; 112 | 113 | if opt_idx_group == 0 { 114 | // Sort all lines together 115 | sorted = line_to_rg.keys().map(|e| e.to_string()).collect(); 116 | 117 | sorted.sort_by_cached_key(|k| { 118 | let range = line_to_rg.get(k).unwrap(); 119 | (range.chr().clone(), range.start(), range.strand().clone()) 120 | }); 121 | } else { 122 | // Group lines by the specified field, then sort within each group 123 | let mut lines_of: BTreeMap> = BTreeMap::new(); 124 | 125 | for line in line_to_rg.keys() { 126 | let parts: Vec<&str> = line.split('\t').collect(); 127 | 128 | let group_key = parts.get(opt_idx_group - 1).unwrap(); 129 | lines_of 130 | .entry(group_key.to_string()) 131 | .or_default() 132 | .push(line.clone()); 133 | } 134 | 135 | for group_key in lines_of.keys().sorted() { 136 | let mut lines = lines_of.get(group_key).unwrap().clone(); 137 | 138 | lines.sort_by_cached_key(|k| { 139 | let range = line_to_rg.get(k).unwrap(); 140 | (range.chr().clone(), range.start(), range.strand().clone()) 141 | }); 142 | sorted.extend(lines); 143 | } 144 | } 145 | 146 | //---------------------------- 147 | // Output 148 | //---------------------------- 149 | for line in &sorted { 150 | writer.write_fmt(format_args!("{}\n", line))?; 151 | } 152 | for line in &invalids { 153 | writer.write_fmt(format_args!("{}\n", line))?; 154 | } 155 | 156 | Ok(()) 157 | } 158 | -------------------------------------------------------------------------------- /src/cmd_rgr/span.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use std::io::{BufRead, Write}; 3 | 4 | // Create clap subcommand arguments 5 | pub fn make_subcommand() -> Command { 6 | Command::new("span") 7 | .about("Operate spans in .tsv/.rg file") 8 | .after_help( 9 | r###" 10 | This command is similar to `spanr span`, but the represent chromosome ranges. 11 | 12 | List of Operations 13 | 14 | * General Ops (both, 5p, or 3p) 15 | * trim: Remove `N` integers from the ends of the range. 16 | * pad: Add `N` integers to the ends of the range. 17 | * Directional Ops (5p or 3p) 18 | * shift: Shift a range by N toward the 5p or 3p end. 19 | * flank: Retrieve flank regions of size `N` from the range. 20 | * Size-based Ops 21 | * excise: Remove any ranges that are smaller than `N`. 22 | 23 | "###, 24 | ) 25 | .arg( 26 | Arg::new("infiles") 27 | .required(true) 28 | .num_args(1..) 29 | .index(1) 30 | .help("Input files to process. Multiple files can be specified."), 31 | ) 32 | .arg( 33 | Arg::new("header") 34 | .long("header") 35 | .short('H') 36 | .action(ArgAction::SetTrue) 37 | .help("Treat the first line of each file as a header"), 38 | ) 39 | .arg( 40 | Arg::new("sharp") 41 | .long("sharp") 42 | .short('s') 43 | .action(ArgAction::SetTrue) 44 | .help("Include lines starting with `#` without changes (default: ignore them)"), 45 | ) 46 | .arg( 47 | Arg::new("field") 48 | .long("field") 49 | .short('f') 50 | .value_parser(value_parser!(usize)) 51 | .num_args(1) 52 | .help("Index of the range field. If not set, the first valid range will be used"), 53 | ) 54 | .arg( 55 | Arg::new("op") 56 | .long("op") 57 | .num_args(1) 58 | .action(ArgAction::Set) 59 | .value_parser([ 60 | builder::PossibleValue::new("trim"), 61 | builder::PossibleValue::new("pad"), 62 | builder::PossibleValue::new("shift"), 63 | builder::PossibleValue::new("flank"), 64 | builder::PossibleValue::new("excise"), 65 | ]) 66 | .default_value("trim") 67 | .help("Select the operation to perform"), 68 | ) 69 | .arg( 70 | Arg::new("mode") 71 | .long("mode") 72 | .short('m') 73 | .num_args(1) 74 | .action(ArgAction::Set) 75 | .value_parser([ 76 | builder::PossibleValue::new("both"), 77 | builder::PossibleValue::new("5p"), 78 | builder::PossibleValue::new("3p"), 79 | ]) 80 | .default_value("both") 81 | .help("Mode of the operation"), 82 | ) 83 | .arg( 84 | Arg::new("number") 85 | .long("number") 86 | .short('n') 87 | .num_args(1) 88 | .value_parser(value_parser!(i32)) 89 | .default_value("0") 90 | .help("Number of integers to trim, pad, shift, or flank"), 91 | ) 92 | .arg( 93 | Arg::new("append") 94 | .long("append") 95 | .short('a') 96 | .action(ArgAction::SetTrue) 97 | .help("Append a field for the new range (default: only write the new range)"), 98 | ) 99 | .arg( 100 | Arg::new("outfile") 101 | .long("outfile") 102 | .short('o') 103 | .num_args(1) 104 | .default_value("stdout") 105 | .help("Output filename. [stdout] for screen"), 106 | ) 107 | } 108 | 109 | // command implementation 110 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 111 | //---------------------------- 112 | // Args 113 | //---------------------------- 114 | let mut writer = intspan::writer(args.get_one::("outfile").unwrap()); 115 | 116 | let is_header = args.get_flag("header"); 117 | let is_sharp = args.get_flag("sharp"); 118 | 119 | let opt_idx_range = args.get_one::("field").copied().unwrap_or(0); 120 | 121 | let opt_op = args.get_one::("op").unwrap().as_str(); 122 | let opt_mode = args.get_one::("mode").unwrap().as_str(); 123 | let opt_number = *args.get_one::("number").unwrap(); 124 | 125 | let is_append = args.get_flag("append"); 126 | 127 | //---------------------------- 128 | // Ops 129 | //---------------------------- 130 | for infile in args.get_many::("infiles").unwrap() { 131 | let reader = intspan::reader(infile); 132 | 'LINE: for (i, line) in reader.lines().map_while(Result::ok).enumerate() { 133 | // Handle the header line 134 | if is_header && i == 0 { 135 | if is_append { 136 | writer.write_fmt(format_args!("{}\t{}\n", line, "rg"))?; 137 | } else { 138 | writer.write_fmt(format_args!("{}\n", "rg"))?; 139 | } 140 | continue 'LINE; 141 | } 142 | 143 | // Handle lines starting with '#' 144 | if line.starts_with('#') { 145 | if is_sharp { 146 | writer.write_fmt(format_args!("{}\n", line))?; 147 | } 148 | continue 'LINE; 149 | } 150 | 151 | let rg = match intspan::extract_rg(&line, opt_idx_range) { 152 | // Extract the range 153 | Some(range) => range, 154 | // Skip lines without a valid range 155 | None => continue 'LINE, 156 | }; 157 | 158 | let new = match opt_op { 159 | "trim" => match opt_mode { 160 | "5p" => rg.trim_5p(opt_number), 161 | "3p" => rg.trim_3p(opt_number), 162 | _ => rg.trim(opt_number), 163 | }, 164 | "pad" => match opt_mode { 165 | "5p" => rg.trim_5p(-opt_number), 166 | "3p" => rg.trim_3p(-opt_number), 167 | _ => rg.trim(-opt_number), 168 | }, 169 | "shift" => match opt_mode { 170 | "5p" => rg.shift_5p(opt_number), 171 | "3p" => rg.shift_3p(opt_number), 172 | _ => unreachable!("Invalid mode for shift operation"), 173 | }, 174 | "flank" => match opt_mode { 175 | "5p" => rg.flank_5p(opt_number), 176 | "3p" => rg.flank_3p(opt_number), 177 | _ => unreachable!("Invalid mode for flank operation"), 178 | }, 179 | "excise" => { 180 | if rg.intspan().size() >= opt_number { 181 | rg.clone() 182 | } else { 183 | intspan::Range::new() 184 | } 185 | } 186 | _ => unreachable!("Invalid Op"), 187 | }; 188 | 189 | //---------------------------- 190 | // Output 191 | //---------------------------- 192 | let new_line: String = if is_append { 193 | format!("{}\t{}", line, new) 194 | } else { 195 | new.to_string() 196 | }; 197 | 198 | writer.write_fmt(format_args!("{}\n", new_line))?; 199 | } 200 | } 201 | 202 | Ok(()) 203 | } 204 | -------------------------------------------------------------------------------- /src/cmd_spanr/combine.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use intspan::*; 3 | use serde_json::Value; 4 | use std::collections::BTreeMap; 5 | 6 | // Create clap subcommand arguments 7 | pub fn make_subcommand() -> Command { 8 | Command::new("combine") 9 | .about("Combine multiple sets of runlists in a json file") 10 | .after_help( 11 | r###" 12 | It's expected that the JSON file contains multiple sets of runlists, 13 | otherwise this command will make no effects 14 | 15 | "###, 16 | ) 17 | .arg( 18 | Arg::new("infile") 19 | .required(true) 20 | .index(1) 21 | .help("Sets the input file to use"), 22 | ) 23 | .arg( 24 | Arg::new("op") 25 | .long("op") 26 | .num_args(1) 27 | .default_value("union") 28 | .value_parser(clap::builder::NonEmptyStringValueParser::new()) 29 | .help("Operations: intersect, union, diff or xor"), 30 | ) 31 | .arg( 32 | Arg::new("outfile") 33 | .long("outfile") 34 | .short('o') 35 | .num_args(1) 36 | .default_value("stdout") 37 | .help("Output filename. [stdout] for screen"), 38 | ) 39 | } 40 | 41 | // command implementation 42 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 43 | //---------------------------- 44 | // Loading 45 | //---------------------------- 46 | let json: BTreeMap = read_json(args.get_one::("infile").unwrap()); 47 | let s_of = json2set_m(&json); 48 | let chrs = chrs_in_sets(&s_of); 49 | 50 | let op = args.get_one::("op").unwrap().as_str(); 51 | 52 | //---------------------------- 53 | // Operating 54 | //---------------------------- 55 | let mut res: BTreeMap = BTreeMap::new(); 56 | fill_up_s(&mut res, &chrs); 57 | 58 | let names: Vec<_> = s_of.keys().cloned().collect(); 59 | let first = names[0].clone(); 60 | 61 | for name in names { 62 | let set = s_of.get(name.as_str()).unwrap(); 63 | for chr in set.keys() { 64 | if name == first { 65 | let intspan = set.get(chr).unwrap(); 66 | res.entry(chr.to_string()).and_modify(|e| e.merge(intspan)); 67 | } else { 68 | let mut intspan_op = res.get(chr).unwrap().copy(); 69 | intspan_op = match op { 70 | "intersect" => intspan_op.intersect(set.get(chr).unwrap()), 71 | "diff" => intspan_op.diff(set.get(chr).unwrap()), 72 | "union" => intspan_op.union(set.get(chr).unwrap()), 73 | "xor" => intspan_op.xor(set.get(chr).unwrap()), 74 | _ => panic!("Invalid IntSpan Op"), 75 | }; 76 | // eprintln!("Op {}: {}", op, intspan_op.to_string()); 77 | res.insert(chr.into(), intspan_op); 78 | } 79 | } 80 | } 81 | 82 | //---------------------------- 83 | // Output 84 | //---------------------------- 85 | let out_json = set2json(&res); 86 | write_json(args.get_one::("outfile").unwrap(), &out_json)?; 87 | 88 | Ok(()) 89 | } 90 | -------------------------------------------------------------------------------- /src/cmd_spanr/compare.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use intspan::*; 3 | use serde_json::Value; 4 | use std::collections::BTreeMap; 5 | 6 | // Create clap subcommand arguments 7 | pub fn make_subcommand() -> Command { 8 | Command::new("compare") 9 | .about("Compare one JSON file against others") 10 | .after_help("Only the *first* file can contain multiple sets of runlists") 11 | .arg( 12 | Arg::new("infile") 13 | .required(true) 14 | .index(1) 15 | .help("Sets the input file to use"), 16 | ) 17 | .arg( 18 | Arg::new("infiles") 19 | .required(true) 20 | .index(2) 21 | .num_args(1..) 22 | .help("Sets the input file to use"), 23 | ) 24 | .arg( 25 | Arg::new("op") 26 | .long("op") 27 | .num_args(1) 28 | .default_value("intersect") 29 | .value_parser(clap::builder::NonEmptyStringValueParser::new()) 30 | .help("Operations: intersect, union, diff or xor"), 31 | ) 32 | .arg( 33 | Arg::new("outfile") 34 | .long("outfile") 35 | .short('o') 36 | .num_args(1) 37 | .default_value("stdout") 38 | .help("Output filename. [stdout] for screen"), 39 | ) 40 | } 41 | 42 | // command implementation 43 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 44 | //---------------------------- 45 | // Loading 46 | //---------------------------- 47 | // first file 48 | let json: BTreeMap = read_json(args.get_one::("infile").unwrap()); 49 | let is_multi: bool = json.values().next().unwrap().is_object(); 50 | let mut s1_of = json2set_m(&json); 51 | 52 | // second file or more 53 | let mut s2s = vec![]; 54 | 55 | for infile in args.get_many::("infiles").unwrap() { 56 | let json_s = read_json(infile); 57 | let s2 = json2set(&json_s); 58 | s2s.push(s2); 59 | } 60 | 61 | let op = args.get_one::("op").unwrap().as_str(); 62 | 63 | //---------------------------- 64 | // Operating 65 | //---------------------------- 66 | // give empty intspan to non-existed chrs 67 | let mut chrs = chrs_in_sets(&s1_of); 68 | for s2 in &s2s { 69 | for chr in s2.keys() { 70 | chrs.insert(chr.to_string()); 71 | } 72 | } 73 | fill_up_m(&mut s1_of, &chrs); 74 | 75 | for s2 in s2s.iter_mut() { 76 | fill_up_s(s2, &chrs); 77 | } 78 | 79 | let mut res_of: BTreeMap> = BTreeMap::new(); 80 | for (name, s1) in &s1_of { 81 | let mut res: BTreeMap = BTreeMap::new(); 82 | for chr in s1.keys() { 83 | let mut intspan_op = s1.get(chr).unwrap().copy(); 84 | for s2 in s2s.iter() { 85 | intspan_op = match op { 86 | "intersect" => intspan_op.intersect(s2.get(chr).unwrap()), 87 | "diff" => intspan_op.diff(s2.get(chr).unwrap()), 88 | "union" => intspan_op.union(s2.get(chr).unwrap()), 89 | "xor" => intspan_op.xor(s2.get(chr).unwrap()), 90 | _ => panic!("Invalid IntSpan Op"), 91 | }; 92 | // eprintln!("Op {}: {}", op, intspan_op.to_string()); 93 | } 94 | res.insert(chr.into(), intspan_op); 95 | } 96 | res_of.insert(name.into(), res); 97 | } 98 | 99 | //---------------------------- 100 | // Output 101 | //---------------------------- 102 | let out_json = if is_multi { 103 | set2json_m(&res_of) 104 | } else { 105 | set2json(res_of.get("__single").unwrap()) 106 | }; 107 | write_json(args.get_one::("outfile").unwrap(), &out_json)?; 108 | 109 | Ok(()) 110 | } 111 | -------------------------------------------------------------------------------- /src/cmd_spanr/convert.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | 3 | // Create clap subcommand arguments 4 | pub fn make_subcommand() -> Command { 5 | Command::new("convert") 6 | .about("Convert runlist file to ranges file") 7 | .arg( 8 | Arg::new("infiles") 9 | .required(true) 10 | .num_args(1..) 11 | .index(1) 12 | .help("Set the input files to use"), 13 | ) 14 | .arg( 15 | Arg::new("longest") 16 | .long("longest") 17 | .action(ArgAction::SetTrue) 18 | .help("Only keep the longest range"), 19 | ) 20 | .arg( 21 | Arg::new("outfile") 22 | .long("outfile") 23 | .short('o') 24 | .num_args(1) 25 | .default_value("stdout") 26 | .help("Output filename. [stdout] for screen"), 27 | ) 28 | } 29 | 30 | // command implementation 31 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 32 | //---------------------------- 33 | // Args 34 | //---------------------------- 35 | let is_longest = args.get_flag("longest"); 36 | let mut writer = intspan::writer(args.get_one::("outfile").unwrap()); 37 | 38 | //---------------------------- 39 | // Ops 40 | //---------------------------- 41 | for infile in args.get_many::("infiles").unwrap() { 42 | let json = intspan::read_json(infile); 43 | let set_of = intspan::json2set_m(&json); 44 | 45 | for set in set_of.values() { 46 | for chr in set.keys() { 47 | let ints = set.get(chr).unwrap(); 48 | let mut intses = ints.intses(); 49 | 50 | //---------------------------- 51 | // Output 52 | //---------------------------- 53 | if is_longest { 54 | if !intses.is_empty() { 55 | // Negate the value for descending order 56 | intses.sort_by_cached_key(|e| -e.size()); 57 | let longest = intses.first().unwrap(); 58 | writer.write_all(format!("{}:{}\n", chr, longest).as_ref())?; 59 | } 60 | } else { 61 | for sub in &intses { 62 | writer.write_all(format!("{}:{}\n", chr, sub).as_ref())?; 63 | } 64 | } 65 | } 66 | } 67 | } 68 | 69 | Ok(()) 70 | } 71 | -------------------------------------------------------------------------------- /src/cmd_spanr/cover.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use intspan::*; 3 | use std::collections::BTreeMap; 4 | use std::io::BufRead; 5 | 6 | // Create clap subcommand arguments 7 | pub fn make_subcommand() -> Command { 8 | Command::new("cover") 9 | .about("Output covers on chromosomes") 10 | .after_help( 11 | r###" 12 | Like command `combine`, but are chromosome ranges 13 | 14 | I:1-100 15 | I(+):90-150 # Strand will be omitted 16 | S288c.I(-):190-200 # Species name will be omitted 17 | 18 | "###, 19 | ) 20 | .arg( 21 | Arg::new("infiles") 22 | .required(true) 23 | .num_args(1..) 24 | .index(1) 25 | .help("Set the input files to use"), 26 | ) 27 | .arg( 28 | Arg::new("outfile") 29 | .long("outfile") 30 | .short('o') 31 | .num_args(1) 32 | .default_value("stdout") 33 | .help("Output filename. [stdout] for screen"), 34 | ) 35 | } 36 | 37 | // command implementation 38 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 39 | //---------------------------- 40 | // Loading 41 | //---------------------------- 42 | 43 | // seq_name => IntSpan 44 | let mut set: BTreeMap = BTreeMap::new(); 45 | 46 | for infile in args.get_many::("infiles").unwrap() { 47 | let reader = reader(infile); 48 | for line in reader.lines().map_while(Result::ok) { 49 | let range = Range::from_str(&line); 50 | if !range.is_valid() { 51 | continue; 52 | } 53 | let chr = range.chr(); 54 | if !set.contains_key(chr) { 55 | set.insert(chr.clone(), IntSpan::new()); 56 | } 57 | 58 | set.entry(chr.to_string()) 59 | .and_modify(|e| e.add_pair(*range.start(), *range.end())); 60 | } 61 | } 62 | 63 | //---------------------------- 64 | // Output 65 | //---------------------------- 66 | let out_json = set2json(&set); 67 | write_json(args.get_one::("outfile").unwrap(), &out_json)?; 68 | 69 | Ok(()) 70 | } 71 | -------------------------------------------------------------------------------- /src/cmd_spanr/coverage.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use intspan::*; 3 | use rust_lapper::{Interval, Lapper}; 4 | use std::collections::BTreeMap; 5 | use std::io::BufRead; 6 | 7 | // Interval: represent a range from [start, stop), carrying val 8 | type Iv = Interval; // the first type should be Unsigned 9 | 10 | // Create clap subcommand arguments 11 | pub fn make_subcommand() -> Command { 12 | Command::new("coverage") 13 | .about("Output minimum or detailed depth of coverage on chromosomes") 14 | .arg( 15 | Arg::new("infiles") 16 | .required(true) 17 | .num_args(1..) 18 | .index(1) 19 | .help("Set the input file to use"), 20 | ) 21 | .arg( 22 | Arg::new("minimum") 23 | .long("minimum") 24 | .short('m') 25 | .value_parser(value_parser!(i32)) 26 | .num_args(1) 27 | .default_value("1") 28 | .help("Set the minimum depth of coverage"), 29 | ) 30 | .arg( 31 | Arg::new("detailed") 32 | .long("detailed") 33 | .short('d') 34 | .action(ArgAction::SetTrue) 35 | .help("Output detailed depth"), 36 | ) 37 | .arg( 38 | Arg::new("outfile") 39 | .long("outfile") 40 | .short('o') 41 | .num_args(1) 42 | .default_value("stdout") 43 | .help("Output filename. [stdout] for screen"), 44 | ) 45 | } 46 | 47 | // command implementation 48 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 49 | //---------------------------- 50 | // Loading 51 | //---------------------------- 52 | let minimum = *args.get_one::("minimum").unwrap(); 53 | let is_detailed = args.get_flag("detailed"); 54 | 55 | // seq_name => Vector of Intervals 56 | let mut iv_of: BTreeMap> = BTreeMap::new(); 57 | 58 | for infile in args.get_many::("infiles").unwrap() { 59 | let reader = reader(infile); 60 | for line in reader.lines().map_while(Result::ok) { 61 | if line.starts_with('#') { 62 | continue; 63 | } 64 | let range = Range::from_str(&line); 65 | if !range.is_valid() { 66 | continue; 67 | } 68 | let chr = range.chr(); 69 | if !iv_of.contains_key(chr) { 70 | let ivs: Vec = vec![]; 71 | iv_of.insert(chr.clone(), ivs); 72 | } 73 | 74 | let iv = Iv { 75 | start: *range.start() as u32, 76 | stop: *range.end() as u32 + 1, 77 | val: 0, 78 | }; 79 | 80 | iv_of.entry(chr.to_string()).and_modify(|e| e.push(iv)); 81 | } 82 | } 83 | 84 | //---------------------------- 85 | // Output 86 | //---------------------------- 87 | if is_detailed { 88 | // Multi 89 | let mut set_of: BTreeMap> = BTreeMap::new(); 90 | 91 | for chr in iv_of.keys() { 92 | let lapper = Lapper::new(iv_of.get(chr).unwrap().to_owned()); 93 | let ivs = lapper.depth().collect::>>(); 94 | 95 | // depth => IntSpan 96 | let mut intspan_of: BTreeMap = BTreeMap::new(); 97 | 98 | for iv in ivs { 99 | let depth = iv.val as i32; 100 | if depth < minimum { 101 | continue; 102 | } 103 | 104 | let depth = format!("{}", depth); 105 | 106 | if !set_of.contains_key(&depth) { 107 | set_of.insert(depth.clone(), BTreeMap::new()); 108 | } 109 | 110 | if !intspan_of.contains_key(&depth) { 111 | intspan_of.insert(depth.clone(), IntSpan::new()); 112 | } 113 | 114 | intspan_of 115 | .entry(depth) 116 | .and_modify(|e| e.add_pair(iv.start as i32, iv.stop as i32 - 1)); 117 | } 118 | 119 | for depth in intspan_of.keys() { 120 | set_of 121 | .get_mut(depth) 122 | .unwrap() 123 | .insert(chr.clone(), intspan_of.get(depth).unwrap().clone()); 124 | } 125 | } 126 | 127 | let out_json = set2json_m(&set_of); 128 | write_json(args.get_one::("outfile").unwrap(), &out_json)?; 129 | } else { 130 | // Single 131 | // chr => IntSpan 132 | let mut set: BTreeMap = BTreeMap::new(); 133 | 134 | for chr in iv_of.keys() { 135 | let lapper = Lapper::new(iv_of.get(chr).unwrap().to_owned()); 136 | let ivs = lapper.depth().collect::>>(); 137 | 138 | let mut intspan = IntSpan::new(); 139 | for iv in ivs { 140 | let depth = iv.val as i32; 141 | if depth < minimum { 142 | continue; 143 | } 144 | 145 | intspan.add_pair(iv.start as i32, iv.stop as i32 - 1); 146 | } 147 | 148 | set.insert(chr.to_string(), intspan); 149 | } 150 | 151 | let out_json = set2json(&set); 152 | write_json(args.get_one::("outfile").unwrap(), &out_json)?; 153 | } 154 | 155 | Ok(()) 156 | } 157 | -------------------------------------------------------------------------------- /src/cmd_spanr/genome.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use intspan::*; 3 | use serde_json::Value; 4 | use std::collections::BTreeMap; 5 | 6 | // Create clap subcommand arguments 7 | pub fn make_subcommand() -> Command { 8 | Command::new("genome") 9 | .about("Convert chr.size to runlists") 10 | .arg( 11 | Arg::new("infile") 12 | .required(true) 13 | .index(1) 14 | .help("Sets the input file to use"), 15 | ) 16 | .arg( 17 | Arg::new("outfile") 18 | .long("outfile") 19 | .short('o') 20 | .num_args(1) 21 | .default_value("stdout") 22 | .help("Output filename. [stdout] for screen"), 23 | ) 24 | } 25 | 26 | // command implementation 27 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 28 | //---------------------------- 29 | // Loading 30 | //---------------------------- 31 | let sizes = read_sizes(args.get_one::("infile").unwrap()); 32 | 33 | //---------------------------- 34 | // Operating 35 | //---------------------------- 36 | let mut json: BTreeMap = BTreeMap::new(); 37 | for (key, value) in sizes { 38 | let mut intspan = IntSpan::new(); 39 | intspan.add_pair(1, value); 40 | json.insert(key, intspan.to_string().into()); 41 | } 42 | 43 | //---------------------------- 44 | // Output 45 | //---------------------------- 46 | write_json(args.get_one::("outfile").unwrap(), &json)?; 47 | 48 | Ok(()) 49 | } 50 | -------------------------------------------------------------------------------- /src/cmd_spanr/gff.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use intspan::*; 3 | use std::collections::BTreeMap; 4 | use std::io::BufRead; 5 | 6 | // Create clap subcommand arguments 7 | pub fn make_subcommand() -> Command { 8 | Command::new("gff") 9 | .about("Convert gff3 to covers on chromosomes") 10 | .arg( 11 | Arg::new("infiles") 12 | .required(true) 13 | .num_args(1..) 14 | .index(1) 15 | .help("Set the input files to use"), 16 | ) 17 | .arg( 18 | Arg::new("tag") 19 | .long("tag") 20 | .num_args(1) 21 | .help("primary tag (the third field)"), 22 | ) 23 | .arg( 24 | Arg::new("outfile") 25 | .long("outfile") 26 | .short('o') 27 | .num_args(1) 28 | .default_value("stdout") 29 | .help("Output filename. [stdout] for screen"), 30 | ) 31 | } 32 | 33 | // command implementation 34 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 35 | //---------------------------- 36 | // Loading 37 | //---------------------------- 38 | let mut res: BTreeMap = BTreeMap::new(); 39 | let tag = if args.contains_id("tag") { 40 | args.get_one::("tag").unwrap().as_str() 41 | } else { 42 | "" 43 | }; 44 | 45 | for infile in args.get_many::("infiles").unwrap() { 46 | let reader = reader(infile); 47 | for line in reader.lines().map_while(Result::ok) { 48 | if line.starts_with('#') { 49 | continue; 50 | } 51 | 52 | let fields: Vec<&str> = line.split('\t').collect(); 53 | if fields.len() < 8 { 54 | continue; 55 | } 56 | 57 | let feature = fields[2]; 58 | if !tag.is_empty() && feature != tag { 59 | continue; 60 | } 61 | 62 | let chr = fields[0]; 63 | let start = fields[3].parse::().unwrap(); 64 | let end = fields[4].parse::().unwrap(); 65 | 66 | if !res.contains_key(chr) { 67 | let intspan = IntSpan::new(); 68 | res.insert(chr.to_string(), intspan); 69 | } 70 | res.entry(chr.to_string()) 71 | .and_modify(|e| e.add_pair(start, end)); 72 | } 73 | } 74 | 75 | //---------------------------- 76 | // Output 77 | //---------------------------- 78 | let out_json = set2json(&res); 79 | write_json(args.get_one::("outfile").unwrap(), &out_json)?; 80 | 81 | Ok(()) 82 | } 83 | -------------------------------------------------------------------------------- /src/cmd_spanr/merge.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use intspan::*; 3 | use serde_json::Value; 4 | use std::collections::BTreeMap; 5 | use std::ffi::OsStr; 6 | use std::path::Path; 7 | 8 | // Create clap subcommand arguments 9 | pub fn make_subcommand() -> Command { 10 | Command::new("merge") 11 | .about("Merge runlist json files") 12 | .arg( 13 | Arg::new("infiles") 14 | .required(true) 15 | .num_args(1..) 16 | .index(1) 17 | .help("Set the input files to use"), 18 | ) 19 | .arg( 20 | Arg::new("all") 21 | .long("all") 22 | .action(ArgAction::SetTrue) 23 | .help("All parts of file_stem (aka basename), except the last one"), 24 | ) 25 | .arg( 26 | Arg::new("outfile") 27 | .long("outfile") 28 | .short('o') 29 | .num_args(1) 30 | .default_value("stdout") 31 | .help("Output filename. [stdout] for screen"), 32 | ) 33 | } 34 | 35 | // command implementation 36 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 37 | //---------------------------- 38 | // Loading 39 | //---------------------------- 40 | let mut out_json: BTreeMap = BTreeMap::new(); 41 | 42 | let is_all = args.get_flag("all"); 43 | 44 | for infile in args.get_many::("infiles").unwrap() { 45 | let json = read_json(infile); 46 | 47 | let key = if is_all { 48 | Path::new(infile) 49 | .file_stem() 50 | .and_then(OsStr::to_str) 51 | .unwrap() 52 | .to_string() 53 | } else { 54 | Path::new(infile) 55 | .file_stem() 56 | .and_then(OsStr::to_str) 57 | .unwrap() 58 | .split('.') 59 | .next() 60 | .unwrap() 61 | .to_string() 62 | }; 63 | out_json.insert(key, serde_json::to_value(json).unwrap()); 64 | } 65 | 66 | //---------------------------- 67 | // Output 68 | //---------------------------- 69 | write_json(args.get_one::("outfile").unwrap(), &out_json)?; 70 | 71 | Ok(()) 72 | } 73 | -------------------------------------------------------------------------------- /src/cmd_spanr/mod.rs: -------------------------------------------------------------------------------- 1 | //! Subcommand modules for the `spanr` binary. 2 | 3 | pub mod combine; 4 | pub mod compare; 5 | pub mod convert; 6 | pub mod cover; 7 | pub mod coverage; 8 | pub mod genome; 9 | pub mod gff; 10 | pub mod merge; 11 | pub mod some; 12 | pub mod span; 13 | pub mod split; 14 | pub mod stat; 15 | pub mod statop; 16 | -------------------------------------------------------------------------------- /src/cmd_spanr/some.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use intspan::*; 3 | use serde_json::Value; 4 | use std::collections::{BTreeMap, BTreeSet}; 5 | 6 | // Create clap subcommand arguments 7 | pub fn make_subcommand() -> Command { 8 | Command::new("some") 9 | .about("Extract some records from a runlist json file") 10 | .arg( 11 | Arg::new("infile") 12 | .required(true) 13 | .num_args(1) 14 | .index(1) 15 | .help("Sets the input file to use"), 16 | ) 17 | .arg( 18 | Arg::new("list") 19 | .required(true) 20 | .num_args(1) 21 | .index(2) 22 | .help("Sets the input file to use"), 23 | ) 24 | .arg( 25 | Arg::new("outfile") 26 | .long("outfile") 27 | .short('o') 28 | .num_args(1) 29 | .default_value("stdout") 30 | .help("Output filename. [stdout] for screen"), 31 | ) 32 | } 33 | 34 | // command implementation 35 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 36 | //---------------------------- 37 | // Loading 38 | //---------------------------- 39 | let json: BTreeMap = read_json(args.get_one::("infile").unwrap()); 40 | 41 | let mut names: BTreeSet = BTreeSet::new(); 42 | for line in read_lines(args.get_one::("list").unwrap()) { 43 | names.insert(line); 44 | } 45 | 46 | //---------------------------- 47 | // Operating 48 | //---------------------------- 49 | let mut out_json: BTreeMap = BTreeMap::new(); 50 | for (key, value) in &json { 51 | if names.contains(key) { 52 | out_json.insert(key.into(), value.clone()); 53 | } 54 | } 55 | 56 | //---------------------------- 57 | // Output 58 | //---------------------------- 59 | write_json(args.get_one::("outfile").unwrap(), &out_json)?; 60 | 61 | Ok(()) 62 | } 63 | -------------------------------------------------------------------------------- /src/cmd_spanr/span.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use intspan::*; 3 | use serde_json::Value; 4 | use std::collections::BTreeMap; 5 | 6 | // Create clap subcommand arguments 7 | pub fn make_subcommand() -> Command { 8 | Command::new("span") 9 | .about("Operate spans in a JSON file") 10 | .after_help( 11 | r###" 12 | List of operations 13 | 14 | * cover: a single span from min to max 15 | * holes: all the holes in runlist 16 | * trim: remove N integers from each end of each span of runlist 17 | * pad: add N integers from each end of each span of runlist 18 | * excise: remove all spans smaller than N 19 | * fill: fill in all holes smaller than or equals to N 20 | 21 | "###, 22 | ) 23 | .arg( 24 | Arg::new("infile") 25 | .required(true) 26 | .index(1) 27 | .help("Sets the input file to use"), 28 | ) 29 | .arg( 30 | Arg::new("op") 31 | .long("op") 32 | .num_args(1) 33 | .action(ArgAction::Set) 34 | .value_parser([ 35 | builder::PossibleValue::new("cover"), 36 | builder::PossibleValue::new("holes"), 37 | builder::PossibleValue::new("trim"), 38 | builder::PossibleValue::new("pad"), 39 | builder::PossibleValue::new("excise"), 40 | builder::PossibleValue::new("fill"), 41 | ]) 42 | .default_value("cover") 43 | .help("Operations"), 44 | ) 45 | .arg( 46 | Arg::new("number") 47 | .long("number") 48 | .short('n') 49 | .num_args(1) 50 | .value_parser(value_parser!(i32)) 51 | .default_value("0"), 52 | ) 53 | .arg( 54 | Arg::new("outfile") 55 | .long("outfile") 56 | .short('o') 57 | .num_args(1) 58 | .default_value("stdout") 59 | .help("Output filename. [stdout] for screen"), 60 | ) 61 | } 62 | 63 | // command implementation 64 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 65 | //---------------------------- 66 | // Args 67 | //---------------------------- 68 | let json: BTreeMap = read_json(args.get_one::("infile").unwrap()); 69 | let is_multi: bool = json.values().next().unwrap().is_object(); 70 | let set_of = json2set_m(&json); 71 | 72 | let opt_op = args.get_one::("op").unwrap().as_str(); 73 | let opt_number = *args.get_one::("number").unwrap(); 74 | 75 | //---------------------------- 76 | // Ops 77 | //---------------------------- 78 | let mut res_of: BTreeMap> = BTreeMap::new(); 79 | for (name, set) in &set_of { 80 | let mut res: BTreeMap = BTreeMap::new(); 81 | for chr in set.keys() { 82 | let intspan = match opt_op { 83 | "cover" => set.get(chr).unwrap().cover(), 84 | "holes" => set.get(chr).unwrap().holes(), 85 | "trim" => set.get(chr).unwrap().trim(opt_number), 86 | "pad" => set.get(chr).unwrap().pad(opt_number), 87 | "excise" => set.get(chr).unwrap().excise(opt_number), 88 | "fill" => set.get(chr).unwrap().fill(opt_number), 89 | _ => unreachable!("Invalid IntSpan Op"), 90 | }; 91 | // println!("Op {}: {}", op, op_intspan.to_string()); 92 | res.insert(chr.into(), intspan); 93 | } 94 | res_of.insert(name.into(), res); 95 | } 96 | 97 | //---------------------------- 98 | // Output 99 | //---------------------------- 100 | let out_json = if is_multi { 101 | set2json_m(&res_of) 102 | } else { 103 | set2json(res_of.get("__single").unwrap()) 104 | }; 105 | write_json(args.get_one::("outfile").unwrap(), &out_json)?; 106 | 107 | Ok(()) 108 | } 109 | -------------------------------------------------------------------------------- /src/cmd_spanr/split.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use intspan::*; 3 | use serde_json::Value; 4 | use std::collections::BTreeMap; 5 | use std::fs; 6 | use std::path::Path; 7 | 8 | // Create clap subcommand arguments 9 | pub fn make_subcommand() -> Command { 10 | Command::new("split") 11 | .about("Split a runlist json file") 12 | .arg( 13 | Arg::new("infile") 14 | .help("Sets the input file to use") 15 | .required(true) 16 | .index(1), 17 | ) 18 | .arg( 19 | Arg::new("suffix") 20 | .long("suffix") 21 | .short('s') 22 | .num_args(1) 23 | .default_value(".json") 24 | .value_parser(clap::builder::NonEmptyStringValueParser::new()) 25 | .help("Extensions of output files"), 26 | ) 27 | .arg( 28 | Arg::new("outdir") 29 | .short('o') 30 | .long("outdir") 31 | .num_args(1) 32 | .default_value("stdout") 33 | .value_parser(clap::builder::NonEmptyStringValueParser::new()) 34 | .help("Output location. [stdout] for screen"), 35 | ) 36 | } 37 | 38 | // command implementation 39 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 40 | //---------------------------- 41 | // Loading 42 | //---------------------------- 43 | let json: BTreeMap = read_json(args.get_one::("infile").unwrap()); 44 | 45 | let outdir = args.get_one::("outdir").unwrap(); 46 | if outdir != "stdout" { 47 | fs::create_dir_all(outdir)?; 48 | } 49 | 50 | let suffix = args.get_one::("suffix").unwrap(); 51 | 52 | //---------------------------- 53 | // Operating 54 | //---------------------------- 55 | for (key, value) in &json { 56 | if !value.is_object() { 57 | panic!("Not a valid multi-key runlist json file"); 58 | } 59 | 60 | let string = serde_json::to_string(value).unwrap(); 61 | 62 | //---------------------------- 63 | // Output 64 | //---------------------------- 65 | if outdir == "stdout" { 66 | write_lines("stdout", &vec![string])?; 67 | } else { 68 | let path = Path::new(outdir).join(key.to_owned() + suffix); 69 | fs::write(path, string + "\n")?; 70 | } 71 | } 72 | 73 | Ok(()) 74 | } 75 | -------------------------------------------------------------------------------- /src/cmd_spanr/stat.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use intspan::*; 3 | use serde_json::Value; 4 | use std::collections::BTreeMap; 5 | 6 | // Create clap subcommand arguments 7 | pub fn make_subcommand() -> Command { 8 | Command::new("stat") 9 | .about("Coverage on chromosomes for runlists") 10 | .arg( 11 | Arg::new("chr.sizes") 12 | .required(true) 13 | .index(1) 14 | .help("Sets the input file to use"), 15 | ) 16 | .arg( 17 | Arg::new("infile") 18 | .required(true) 19 | .index(2) 20 | .help("Sets the input file to use"), 21 | ) 22 | .arg( 23 | Arg::new("all") 24 | .long("all") 25 | .action(ArgAction::SetTrue) 26 | .help("Only write whole genome stats"), 27 | ) 28 | .arg( 29 | Arg::new("outfile") 30 | .long("outfile") 31 | .short('o') 32 | .num_args(1) 33 | .default_value("stdout") 34 | .help("Output filename. [stdout] for screen"), 35 | ) 36 | } 37 | 38 | // command implementation 39 | pub fn execute(args: &ArgMatches) -> anyhow::Result<()> { 40 | //---------------------------- 41 | // Loading 42 | //---------------------------- 43 | let sizes = read_sizes(args.get_one::("chr.sizes").unwrap()); 44 | 45 | let json: BTreeMap = read_json(args.get_one::("infile").unwrap()); 46 | let is_multi: bool = json.values().next().unwrap().is_object(); 47 | 48 | let set_of = json2set_m(&json); 49 | 50 | let is_all = args.get_flag("all"); 51 | 52 | //---------------------------- 53 | // Operating 54 | //---------------------------- 55 | let mut lines: Vec = Vec::new(); // Avoid lifetime problems 56 | let mut header = "key,chr,chrLength,size,coverage".to_string(); 57 | 58 | if is_multi { 59 | if is_all { 60 | header = header.replace("chr,", ""); 61 | } 62 | lines.push(header); 63 | 64 | for (name, set) in &set_of { 65 | let key_lines = csv_lines(set, &sizes, is_all, Some(name)); 66 | lines.push(key_lines); 67 | } 68 | } else { 69 | header = header.replace("key,", ""); 70 | if is_all { 71 | header = header.replace("chr,", ""); 72 | } 73 | lines.push(header); 74 | 75 | let key_lines = csv_lines(set_of.get("__single").unwrap(), &sizes, is_all, None); 76 | lines.push(key_lines); 77 | } 78 | 79 | //---------------------------- 80 | // Output 81 | //---------------------------- 82 | write_lines(args.get_one::("outfile").unwrap(), &lines)?; 83 | 84 | Ok(()) 85 | } 86 | 87 | fn csv_lines( 88 | set: &BTreeMap, 89 | sizes: &BTreeMap, 90 | is_all: bool, 91 | prefix: Option<&str>, 92 | ) -> String { 93 | let mut lines = String::new(); 94 | 95 | let mut all_length: i64 = 0; 96 | let mut all_size: i64 = 0; 97 | for chr in set.keys() { 98 | let length = *sizes.get(chr).unwrap(); 99 | let size = set.get(chr).unwrap().cardinality(); 100 | let line = format!( 101 | "{},{},{},{:.4}\n", 102 | chr, 103 | length, 104 | size, 105 | size as f32 / length as f32 106 | ); 107 | if let Some(s) = prefix { 108 | lines.push_str(format!("{},", s).as_str()) 109 | }; 110 | lines.push_str(line.as_str()); 111 | 112 | all_length += length as i64; 113 | all_size += size as i64; 114 | } 115 | 116 | let mut all_line = format!( 117 | "{},{},{},{:.4}\n", 118 | "all", 119 | all_length, 120 | all_size, 121 | all_size as f64 / all_length as f64 122 | ); 123 | // only keep whole genome 124 | if is_all { 125 | lines = String::new(); 126 | all_line = all_line.replace("all,", ""); 127 | } 128 | if let Some(s) = prefix { 129 | all_line.insert_str(0, format!("{},", s).as_str()) 130 | }; 131 | lines.push_str(all_line.as_str()); 132 | 133 | // Remove last LF, as write_lines will append one 134 | lines.trim_end().to_string() 135 | } 136 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate lazy_static; 3 | 4 | mod libs; 5 | mod utils; 6 | 7 | pub use crate::libs::coverage::*; 8 | pub use crate::libs::intspan::*; 9 | pub use crate::libs::linalg::*; 10 | pub use crate::libs::matrix::*; 11 | pub use crate::libs::range::*; 12 | 13 | pub use crate::utils::*; 14 | -------------------------------------------------------------------------------- /src/libs/coverage.rs: -------------------------------------------------------------------------------- 1 | use crate::IntSpan; 2 | use std::collections::BTreeMap; 3 | 4 | #[derive(Default, Clone)] 5 | pub struct Coverage { 6 | max: i32, 7 | tiers: BTreeMap, 8 | } 9 | 10 | impl Coverage { 11 | pub fn max(&self) -> &i32 { 12 | &self.max 13 | } 14 | pub fn tiers(&self) -> &BTreeMap { 15 | &self.tiers 16 | } 17 | 18 | pub fn new(max: i32) -> Self { 19 | Self::new_len(max, 1_000_000_000) 20 | } 21 | 22 | pub fn new_len(max: i32, len: i32) -> Self { 23 | let mut tiers: BTreeMap = BTreeMap::new(); 24 | tiers.insert(-1, IntSpan::from_pair(1, len)); 25 | tiers.insert(0, IntSpan::from_pair(1, len)); 26 | 27 | for i in 1..=max { 28 | tiers.insert(i, IntSpan::new()); 29 | } 30 | 31 | Self { max, tiers } 32 | } 33 | 34 | fn begin_end(begin: i32, end: i32) -> (i32, i32) { 35 | let mut tup = (begin.min(end), begin.max(end)); 36 | 37 | if tup.0 == 0 { 38 | tup.0 = 1; 39 | } 40 | 41 | tup 42 | } 43 | 44 | /// ``` 45 | /// # use intspan::Coverage; 46 | /// let mut cover = Coverage::new(1); 47 | /// cover.bump(1, 100); 48 | /// cover.bump(90, 150); 49 | /// assert_eq!(cover.tiers().get(&1).unwrap().to_string(), "1-150"); 50 | /// # assert_eq!(cover.tiers().get(&0).unwrap().to_string(), "151-1000000000"); 51 | /// 52 | /// let mut cover = Coverage::new_len(1, 500); 53 | /// cover.bump(1, 100); 54 | /// cover.bump(90, 150); 55 | /// assert_eq!(cover.tiers().get(&1).unwrap().to_string(), "1-150"); 56 | /// # assert_eq!(cover.tiers().get(&0).unwrap().to_string(), "151-500"); 57 | /// # assert_eq!(cover.tiers().get(&-1).unwrap().to_string(), "1-500"); 58 | /// ``` 59 | pub fn bump(&mut self, begin: i32, end: i32) { 60 | let tup = Self::begin_end(begin, end); 61 | let mut intspan = IntSpan::from_pair(tup.0, tup.1); 62 | 63 | // reach max coverage in full sequence 64 | if self 65 | .tiers 66 | .get(&-1) 67 | .unwrap() 68 | .equals(self.tiers.get(&self.max).unwrap()) 69 | { 70 | return; 71 | } 72 | 73 | // remove intspan from uncovered regions 74 | self.tiers.entry(0).and_modify(|e| e.subtract(&intspan)); 75 | 76 | for i in 1..=self.max { 77 | let intersect = self.tiers.get(&i).unwrap().intersect(&intspan); 78 | self.tiers.entry(i).and_modify(|e| e.merge(&intspan)); 79 | 80 | if i + 1 > self.max { 81 | break; 82 | } 83 | 84 | intspan = intersect.copy(); 85 | } 86 | } 87 | 88 | /// ``` 89 | /// # use intspan::Coverage; 90 | /// let mut cover = Coverage::new(2); 91 | /// cover.bump(1, 100); 92 | /// cover.bump(90, 150); 93 | /// assert_eq!(cover.max_tier().to_string(), "90-100"); 94 | /// 95 | /// let mut cover = Coverage::new(5); 96 | /// cover.bump(1, 100); 97 | /// cover.bump(90, 150); 98 | /// assert_eq!(cover.max_tier().to_string(), "-"); 99 | /// ``` 100 | pub fn max_tier(&self) -> IntSpan { 101 | self.tiers().get(self.max()).unwrap().copy() 102 | } 103 | 104 | /// ``` 105 | /// # use intspan::Coverage; 106 | /// let mut cover = Coverage::new(2); 107 | /// cover.bump(1, 100); 108 | /// cover.bump(90, 150); 109 | /// 110 | /// assert_eq!(cover.uniq_tiers().get(&2).unwrap().to_string(), "90-100"); 111 | /// 112 | /// assert_eq!(cover.tiers().get(&1).unwrap().to_string(), "1-150"); 113 | /// assert_eq!(cover.uniq_tiers().get(&1).unwrap().to_string(), "1-89,101-150"); 114 | /// ``` 115 | pub fn uniq_tiers(&self) -> BTreeMap { 116 | let mut tiers = self.tiers.clone(); 117 | 118 | for i in 1..self.max { 119 | let intspan_next = tiers[&(i + 1)].copy(); 120 | tiers.entry(i).and_modify(|e| e.subtract(&intspan_next)); 121 | } 122 | 123 | tiers 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /src/libs/linalg.rs: -------------------------------------------------------------------------------- 1 | /// Computes the sum of all elements in a vector `a`. 2 | /// 3 | /// # Arguments 4 | /// * `a` - The vector. 5 | /// 6 | /// # Returns 7 | /// The sum of all elements in `a`. 8 | /// 9 | /// # Examples 10 | /// ``` 11 | /// let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]; 12 | /// let sum_value = intspan::sum(&a); 13 | /// assert_eq!(sum_value, 55.0); 14 | /// ``` 15 | pub fn sum(a: &[f32]) -> f32 { 16 | a.iter().sum() 17 | } 18 | 19 | /// Computes the mean (average) of a vector `a`. 20 | /// 21 | /// # Arguments 22 | /// * `a` - The vector. 23 | /// 24 | /// # Returns 25 | /// The mean of the vector `a`. 26 | /// 27 | /// # Examples 28 | /// ``` 29 | /// let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]; 30 | /// let mean_value = intspan::mean(&a); 31 | /// assert_eq!(mean_value, 5.5); 32 | /// ``` 33 | pub fn mean(a: &[f32]) -> f32 { 34 | sum(a) / a.len() as f32 35 | } 36 | 37 | /// Computes the Pearson correlation coefficient between two vectors `a` and `b`. 38 | /// 39 | /// Two equivalent formulas: 40 | /// 41 | /// 1. Using deviations from mean (implemented here for better numerical stability): 42 | /// `$r = \frac{\sum(x - \bar{x})(y - \bar{y})}{\sqrt{\sum(x - \bar{x})^2\sum(y - \bar{y})^2}}$` 43 | /// 44 | /// 2. Direct computation: 45 | /// `$r = \frac{n\sum xy - \sum x\sum y}{\sqrt{(n\sum x^2 - (\sum x)^2)(n\sum y^2 - (\sum y)^2)}}$` 46 | /// 47 | /// where `$\bar{x}$` and `$\bar{y}$` are the means of vectors `$x$` and `$y$` respectively, 48 | /// and `$n$` is the length of the vectors. 49 | /// 50 | /// Note: Formula 1 is used in this implementation because it: 51 | /// * Reduces the risk of numerical overflow by centering the data 52 | /// * Provides better numerical stability for large values 53 | /// 54 | /// # Arguments 55 | /// * `a` - The first vector. 56 | /// * `b` - The second vector. 57 | /// 58 | /// # Returns 59 | /// The Pearson correlation coefficient between `a` and `b`. 60 | /// If either vector is empty or their lengths do not match, returns `NaN`. 61 | /// 62 | /// # Examples 63 | /// ``` 64 | /// let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]; 65 | /// let b = [10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0]; 66 | /// let correlation = intspan::pearson_correlation(&a, &b); 67 | /// assert_eq!(format!("{:.4}", correlation), "-1.0000".to_string()); // Perfect negative correlation 68 | /// 69 | /// let empty: [f32; 0] = []; 70 | /// assert!(intspan::pearson_correlation(&empty, &empty).is_nan()); // Check handling of empty vectors 71 | /// ``` 72 | pub fn pearson_correlation(a: &[f32], b: &[f32]) -> f32 { 73 | if a.len() != b.len() || a.is_empty() { 74 | return f32::NAN; // Return NaN if lengths do not match or vectors are empty 75 | } 76 | 77 | // Compute means of a and b 78 | let mean_a = mean(a); 79 | let mean_b = mean(b); 80 | 81 | let numerator = a 82 | .iter() 83 | .zip(b.iter()) 84 | .map(|(a, b)| (a - mean_a) * (b - mean_b)) 85 | .sum::(); 86 | 87 | let denom1 = a.iter().map(|a| (a - mean_a).powi(2)).sum::().sqrt(); 88 | 89 | let denom2 = b.iter().map(|b| (b - mean_b).powi(2)).sum::().sqrt(); 90 | 91 | numerator / (denom1 * denom2) 92 | } 93 | 94 | /// Computes the Jaccard intersection of two vectors `a` and `b`. 95 | /// The Jaccard intersection is the sum of the minimum values of corresponding elements. 96 | /// 97 | /// # Arguments 98 | /// * `a` - The first vector. 99 | /// * `b` - The second vector. 100 | /// 101 | /// # Returns 102 | /// The Jaccard intersection of `a` and `b`. 103 | /// 104 | /// # Examples 105 | /// ``` 106 | /// let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]; 107 | /// let b = [10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0]; 108 | /// let intersection = intspan::jaccard_intersection(&a, &b); 109 | /// assert_eq!(intersection, 30.0); 110 | /// ``` 111 | pub fn jaccard_intersection(a: &[f32], b: &[f32]) -> f32 { 112 | a.iter().zip(b.iter()).map(|(x, y)| f32::min(*x, *y)).sum() 113 | } 114 | 115 | /// Computes the Jaccard union of two vectors `a` and `b`. 116 | /// The Jaccard union is the sum of the maximum values of corresponding elements. 117 | /// 118 | /// # Arguments 119 | /// * `a` - The first vector. 120 | /// * `b` - The second vector. 121 | /// 122 | /// # Returns 123 | /// The Jaccard union of `a` and `b`. 124 | /// 125 | /// # Examples 126 | /// ``` 127 | /// let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]; 128 | /// let b = [10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0]; 129 | /// let union = intspan::jaccard_union(&a, &b); 130 | /// assert_eq!(union, 80.0); 131 | /// ``` 132 | pub fn jaccard_union(a: &[f32], b: &[f32]) -> f32 { 133 | a.iter().zip(b.iter()).map(|(x, y)| f32::max(*x, *y)).sum() 134 | } 135 | 136 | pub fn weighted_jaccard_similarity(a: &[f32], b: &[f32]) -> f32 { 137 | let numerator = jaccard_intersection(a, b); 138 | let denominator = jaccard_union(a, b); 139 | 140 | if denominator == 0.0 { 141 | 0.0 142 | } else { 143 | numerator / denominator 144 | } 145 | } 146 | 147 | /// Computes the dot product of two vectors `a` and `b`. 148 | /// 149 | /// # Arguments 150 | /// * `a` - The first vector. 151 | /// * `b` - The second vector. 152 | /// 153 | /// # Returns 154 | /// The dot product of `a` and `b`. 155 | /// 156 | /// # Examples 157 | /// ``` 158 | /// let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]; 159 | /// let b = [10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0]; 160 | /// let dot = intspan::dot_product(&a, &b); 161 | /// assert_eq!(dot, 220.0); 162 | /// ``` 163 | pub fn dot_product(a: &[f32], b: &[f32]) -> f32 { 164 | a.iter().zip(b.iter()).map(|(x, y)| x * y).sum() 165 | } 166 | 167 | /// Computes the L2 norm (Euclidean norm) of a vector `a`. 168 | /// 169 | /// # Arguments 170 | /// * `a` - The vector. 171 | /// 172 | /// # Returns 173 | /// The L2 norm of `a`. 174 | /// 175 | /// # Examples 176 | /// ``` 177 | /// let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]; 178 | /// let norm = intspan::norm_l2(&a); 179 | /// assert_eq!(format!("{:.4}", norm), "19.6214".to_string()); 180 | /// ``` 181 | #[inline] 182 | pub fn norm_l2(a: &[f32]) -> f32 { 183 | norm_l2_sq(a).sqrt() 184 | } 185 | 186 | /// Computes the squared L2 norm of a vector `a`. 187 | /// 188 | /// # Arguments 189 | /// * `a` - The vector. 190 | /// 191 | /// # Returns 192 | /// The squared L2 norm of `a`. 193 | /// 194 | /// # Examples 195 | /// ``` 196 | /// let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]; 197 | /// let norm_sq = intspan::norm_l2_sq(&a); 198 | /// assert_eq!(norm_sq, 385.0); 199 | /// ``` 200 | pub fn norm_l2_sq(a: &[f32]) -> f32 { 201 | a.iter().map(|x| x * x).sum() 202 | } 203 | 204 | pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { 205 | let dot_product = dot_product(a, b); 206 | let denominator = norm_l2(a) * norm_l2(b); 207 | 208 | if denominator == 0.0 { 209 | 0.0 210 | } else { 211 | dot_product / denominator 212 | } 213 | } 214 | 215 | /// Computes the Euclidean distance between two vectors `a` and `b`. 216 | /// 217 | /// # Arguments 218 | /// * `a` - The first vector. 219 | /// * `b` - The second vector. 220 | /// 221 | /// # Returns 222 | /// The Euclidean distance between `a` and `b`. 223 | /// 224 | /// # Examples 225 | /// ``` 226 | /// let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]; 227 | /// let b = [10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0]; 228 | /// let distance = intspan::euclidean_distance(&a, &b); 229 | /// assert_eq!(format!("{:.4}", distance), "18.1659".to_string()); 230 | /// ``` 231 | pub fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 { 232 | a.iter() 233 | .zip(b.iter()) 234 | .map(|(x, y)| { 235 | let diff = x - y; 236 | diff * diff 237 | }) 238 | .sum::() 239 | .sqrt() 240 | } 241 | -------------------------------------------------------------------------------- /src/libs/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod coverage; 2 | pub mod intspan; 3 | pub mod linalg; 4 | pub mod matrix; 5 | pub mod range; 6 | -------------------------------------------------------------------------------- /src/linkr.rs: -------------------------------------------------------------------------------- 1 | extern crate clap; 2 | use clap::*; 3 | 4 | mod cmd_linkr; 5 | 6 | fn main() -> anyhow::Result<()> { 7 | let app = Command::new("linkr") 8 | .version(crate_version!()) 9 | .author(crate_authors!()) 10 | .about("`linkr` operates ranges on chromosomes and links of ranges") 11 | .propagate_version(true) 12 | .arg_required_else_help(true) 13 | .color(ColorChoice::Auto) 14 | .subcommand(cmd_linkr::circos::make_subcommand()) 15 | .subcommand(cmd_linkr::sort::make_subcommand()) 16 | .subcommand(cmd_linkr::filter::make_subcommand()) 17 | .subcommand(cmd_linkr::clean::make_subcommand()) 18 | .subcommand(cmd_linkr::connect::make_subcommand()); 19 | 20 | // Check which subcomamnd the user ran... 21 | match app.get_matches().subcommand() { 22 | Some(("circos", sub_matches)) => cmd_linkr::circos::execute(sub_matches), 23 | Some(("sort", sub_matches)) => cmd_linkr::sort::execute(sub_matches), 24 | Some(("filter", sub_matches)) => cmd_linkr::filter::execute(sub_matches), 25 | Some(("clean", sub_matches)) => cmd_linkr::clean::execute(sub_matches), 26 | Some(("connect", sub_matches)) => cmd_linkr::connect::execute(sub_matches), 27 | _ => unreachable!(), 28 | } 29 | .unwrap(); 30 | 31 | Ok(()) 32 | } 33 | 34 | // Variable naming conventions 35 | // range: a Range object 36 | // rg: String 37 | // rgs: Vec 38 | // rg_of: BTreeMap 39 | 40 | // TODO: lav2link 41 | // TODO: paf2link 42 | -------------------------------------------------------------------------------- /src/rgr.rs: -------------------------------------------------------------------------------- 1 | extern crate clap; 2 | 3 | use clap::*; 4 | 5 | mod cmd_rgr; 6 | 7 | fn main() -> anyhow::Result<()> { 8 | let app = Command::new("rgr") 9 | .version(crate_version!()) 10 | .author(crate_authors!()) 11 | .about("`rgr` operates ranges in .rg and .tsv files") 12 | .propagate_version(true) 13 | .arg_required_else_help(true) 14 | .color(ColorChoice::Auto) 15 | .subcommand(cmd_rgr::count::make_subcommand()) 16 | .subcommand(cmd_rgr::dedup::make_subcommand()) 17 | .subcommand(cmd_rgr::field::make_subcommand()) 18 | .subcommand(cmd_rgr::filter::make_subcommand()) 19 | .subcommand(cmd_rgr::keep::make_subcommand()) 20 | .subcommand(cmd_rgr::md::make_subcommand()) 21 | .subcommand(cmd_rgr::merge::make_subcommand()) 22 | .subcommand(cmd_rgr::pl_2rmp::make_subcommand()) 23 | .subcommand(cmd_rgr::prop::make_subcommand()) 24 | .subcommand(cmd_rgr::replace::make_subcommand()) 25 | .subcommand(cmd_rgr::runlist::make_subcommand()) 26 | .subcommand(cmd_rgr::select::make_subcommand()) 27 | .subcommand(cmd_rgr::sort::make_subcommand()) 28 | .subcommand(cmd_rgr::span::make_subcommand()) 29 | .after_help( 30 | r###" 31 | File formats 32 | 33 | * .rg files are single-column .tsv 34 | * Field numbers in the TSV file start at 1 35 | 36 | Subcommand groups: 37 | 38 | * Generic .tsv 39 | * dedup / keep / md / replace / filter / select 40 | * Single range field 41 | * field / sort / count / prop / span / runlist 42 | * Multiple range fields 43 | * merge / pl-2rmp 44 | 45 | "###, 46 | ); 47 | 48 | // Check which subcomamnd the user ran... 49 | match app.get_matches().subcommand() { 50 | // Generic .tsv 51 | Some(("dedup", sub_matches)) => cmd_rgr::dedup::execute(sub_matches), 52 | Some(("keep", sub_matches)) => cmd_rgr::keep::execute(sub_matches), 53 | Some(("md", sub_matches)) => cmd_rgr::md::execute(sub_matches), 54 | Some(("replace", sub_matches)) => cmd_rgr::replace::execute(sub_matches), 55 | Some(("filter", sub_matches)) => cmd_rgr::filter::execute(sub_matches), 56 | Some(("select", sub_matches)) => cmd_rgr::select::execute(sub_matches), 57 | // Single range field 58 | Some(("field", sub_matches)) => cmd_rgr::field::execute(sub_matches), 59 | Some(("sort", sub_matches)) => cmd_rgr::sort::execute(sub_matches), 60 | Some(("count", sub_matches)) => cmd_rgr::count::execute(sub_matches), 61 | Some(("prop", sub_matches)) => cmd_rgr::prop::execute(sub_matches), 62 | Some(("span", sub_matches)) => cmd_rgr::span::execute(sub_matches), 63 | Some(("runlist", sub_matches)) => cmd_rgr::runlist::execute(sub_matches), 64 | // Multiple range fields 65 | Some(("merge", sub_matches)) => cmd_rgr::merge::execute(sub_matches), 66 | Some(("pl-2rmp", sub_matches)) => cmd_rgr::pl_2rmp::execute(sub_matches), 67 | _ => unreachable!(), 68 | } 69 | .unwrap(); 70 | 71 | Ok(()) 72 | } 73 | 74 | // TODO: --bed for `rgr field` 75 | -------------------------------------------------------------------------------- /src/spanr.rs: -------------------------------------------------------------------------------- 1 | extern crate clap; 2 | use clap::*; 3 | 4 | mod cmd_spanr; 5 | 6 | fn main() -> anyhow::Result<()> { 7 | let app = Command::new("spanr") 8 | .version(crate_version!()) 9 | .author(crate_authors!()) 10 | .about("`spanr` operates chromosome IntSpan files") 11 | .propagate_version(true) 12 | .arg_required_else_help(true) 13 | .color(ColorChoice::Auto) 14 | .subcommand(cmd_spanr::genome::make_subcommand()) 15 | .subcommand(cmd_spanr::some::make_subcommand()) 16 | .subcommand(cmd_spanr::merge::make_subcommand()) 17 | .subcommand(cmd_spanr::split::make_subcommand()) 18 | .subcommand(cmd_spanr::stat::make_subcommand()) 19 | .subcommand(cmd_spanr::statop::make_subcommand()) 20 | .subcommand(cmd_spanr::combine::make_subcommand()) 21 | .subcommand(cmd_spanr::compare::make_subcommand()) 22 | .subcommand(cmd_spanr::span::make_subcommand()) 23 | .subcommand(cmd_spanr::cover::make_subcommand()) 24 | .subcommand(cmd_spanr::coverage::make_subcommand()) 25 | .subcommand(cmd_spanr::gff::make_subcommand()) 26 | .subcommand(cmd_spanr::convert::make_subcommand()); 27 | 28 | // Check which subcomamnd the user ran... 29 | match app.get_matches().subcommand() { 30 | Some(("genome", sub_matches)) => cmd_spanr::genome::execute(sub_matches), 31 | Some(("some", sub_matches)) => cmd_spanr::some::execute(sub_matches), 32 | Some(("merge", sub_matches)) => cmd_spanr::merge::execute(sub_matches), 33 | Some(("split", sub_matches)) => cmd_spanr::split::execute(sub_matches), 34 | Some(("stat", sub_matches)) => cmd_spanr::stat::execute(sub_matches), 35 | Some(("statop", sub_matches)) => cmd_spanr::statop::execute(sub_matches), 36 | Some(("combine", sub_matches)) => cmd_spanr::combine::execute(sub_matches), 37 | Some(("compare", sub_matches)) => cmd_spanr::compare::execute(sub_matches), 38 | Some(("span", sub_matches)) => cmd_spanr::span::execute(sub_matches), 39 | Some(("cover", sub_matches)) => cmd_spanr::cover::execute(sub_matches), 40 | Some(("coverage", sub_matches)) => cmd_spanr::coverage::execute(sub_matches), 41 | Some(("gff", sub_matches)) => cmd_spanr::gff::execute(sub_matches), 42 | Some(("convert", sub_matches)) => cmd_spanr::convert::execute(sub_matches), 43 | _ => unreachable!(), 44 | } 45 | .unwrap(); 46 | 47 | Ok(()) 48 | } 49 | 50 | // Variable naming conventions 51 | // ints: an IntSpan object 52 | // set, runlists: single name IntSpan set or runlists 53 | // set is a set of IntSpans 54 | // set: BTreeMap 55 | // runlists: BTreeMap 56 | // s_of, r_of: multiple names IntSpan or runlist 57 | // name ==> chr ==> IntSpan 58 | // name ==> chr ==> String 59 | // json: BTreeMap, single or multiple json 60 | // res: result, single name IntSpan set 61 | // BTreeMap 62 | // res_of: BTreeMap> 63 | // sizes: chr.sizes, BTreeMap 64 | // iv_of: BTreeMap> 65 | -------------------------------------------------------------------------------- /tests/Atha/chr.sizes: -------------------------------------------------------------------------------- 1 | 1 30427671 2 | 2 19698289 3 | 3 23459830 4 | 4 18585056 5 | 5 26975502 6 | -------------------------------------------------------------------------------- /tests/Atha/links.blast.tsv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wang-q/intspan/0d4e5153268bf51eb824f7cc93a51415a255a3c2/tests/Atha/links.blast.tsv.gz -------------------------------------------------------------------------------- /tests/Atha/links.lastz.tsv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wang-q/intspan/0d4e5153268bf51eb824f7cc93a51415a255a3c2/tests/Atha/links.lastz.tsv.gz -------------------------------------------------------------------------------- /tests/S288c/chr.sizes: -------------------------------------------------------------------------------- 1 | I 230218 2 | II 813184 3 | III 316620 4 | IV 1531933 5 | IX 439888 6 | V 576874 7 | VI 270161 8 | VII 1090940 9 | VIII 562643 10 | X 745751 11 | XI 666816 12 | XII 1078177 13 | XIII 924431 14 | XIV 784333 15 | XV 1091291 16 | XVI 948066 17 | -------------------------------------------------------------------------------- /tests/cli_linkr.rs: -------------------------------------------------------------------------------- 1 | use assert_cmd::prelude::*; // Add methods on commands 2 | use predicates::prelude::*; // Used for writing assertions 3 | use std::process::Command; // Run programs 4 | 5 | #[test] 6 | fn command_invalid() -> anyhow::Result<()> { 7 | let mut cmd = Command::cargo_bin("linkr")?; 8 | cmd.arg("foobar"); 9 | cmd.assert() 10 | .failure() 11 | .stderr(predicate::str::contains("recognized")); 12 | 13 | Ok(()) 14 | } 15 | 16 | #[test] 17 | fn command_circos() -> anyhow::Result<()> { 18 | let mut cmd = Command::cargo_bin("linkr")?; 19 | let output = cmd 20 | .arg("circos") 21 | .arg("tests/linkr/II.connect.tsv") 22 | .output() 23 | .unwrap(); 24 | let stdout = String::from_utf8(output.stdout).unwrap(); 25 | 26 | assert_eq!(stdout.lines().count(), 10); 27 | assert!(stdout.contains("XIII 7947 6395"), "negative strand"); 28 | assert!(!stdout.contains("fill_color"), "links"); 29 | 30 | Ok(()) 31 | } 32 | 33 | #[test] 34 | fn command_circos_highlight() -> anyhow::Result<()> { 35 | let mut cmd = Command::cargo_bin("linkr")?; 36 | let output = cmd 37 | .arg("circos") 38 | .arg("tests/linkr/II.connect.tsv") 39 | .arg("--highlight") 40 | .output() 41 | .unwrap(); 42 | let stdout = String::from_utf8(output.stdout).unwrap(); 43 | 44 | assert_eq!(stdout.lines().count(), 14); 45 | assert!(stdout.contains("fill_color"), "highlights"); 46 | 47 | Ok(()) 48 | } 49 | 50 | #[test] 51 | fn command_sort() -> anyhow::Result<()> { 52 | let mut cmd = Command::cargo_bin("linkr")?; 53 | let output = cmd 54 | .arg("sort") 55 | .arg("tests/linkr/II.links.tsv") 56 | .output() 57 | .unwrap(); 58 | let stdout = String::from_utf8(output.stdout).unwrap(); 59 | 60 | assert_eq!(stdout.lines().count(), 15); 61 | assert!(!stdout.contains("\nVI"), "chromosome II first"); 62 | 63 | Ok(()) 64 | } 65 | 66 | #[test] 67 | fn command_filter() -> anyhow::Result<()> { 68 | let mut cmd = Command::cargo_bin("linkr")?; 69 | let output = cmd 70 | .arg("filter") 71 | .arg("tests/linkr/II.connect.tsv") 72 | .arg("-n") 73 | .arg("2") 74 | .output() 75 | .unwrap(); 76 | let stdout = String::from_utf8(output.stdout).unwrap(); 77 | 78 | assert_eq!(stdout.lines().count(), 4); 79 | 80 | Ok(()) 81 | } 82 | 83 | #[test] 84 | fn command_filter_3() -> anyhow::Result<()> { 85 | let mut cmd = Command::cargo_bin("linkr")?; 86 | let output = cmd 87 | .arg("filter") 88 | .arg("tests/linkr/II.connect.tsv") 89 | .arg("-n") 90 | .arg("3") 91 | .arg("-r") 92 | .arg("0.99") 93 | .output() 94 | .unwrap(); 95 | let stdout = String::from_utf8(output.stdout).unwrap(); 96 | 97 | assert_eq!(stdout.lines().count(), 1); 98 | assert!(!stdout.contains("VI("), "filtered links"); 99 | 100 | Ok(()) 101 | } 102 | 103 | #[test] 104 | fn command_clean() -> anyhow::Result<()> { 105 | let mut cmd = Command::cargo_bin("linkr")?; 106 | let output = cmd 107 | .arg("clean") 108 | .arg("tests/linkr/II.sort.tsv") 109 | .output() 110 | .unwrap(); 111 | let stdout = String::from_utf8(output.stdout).unwrap(); 112 | 113 | assert_eq!(stdout.lines().count(), 11); 114 | assert!(stdout.contains("892-4684"), "range exists"); 115 | 116 | Ok(()) 117 | } 118 | 119 | #[test] 120 | fn command_clean_bundle() -> anyhow::Result<()> { 121 | let mut cmd = Command::cargo_bin("linkr")?; 122 | let output = cmd 123 | .arg("clean") 124 | .arg("tests/linkr/II.sort.tsv") 125 | .arg("--bundle") 126 | .arg("500") 127 | .output() 128 | .unwrap(); 129 | let stdout = String::from_utf8(output.stdout).unwrap(); 130 | 131 | assert_eq!(stdout.lines().count(), 10); 132 | assert!(!stdout.contains("892-4684"), "original"); 133 | assert!(stdout.contains("892-4685"), "bundled"); 134 | 135 | Ok(()) 136 | } 137 | 138 | #[test] 139 | fn command_clean_merge() -> anyhow::Result<()> { 140 | let mut cmd = Command::cargo_bin("linkr")?; 141 | let output = cmd 142 | .arg("clean") 143 | .arg("tests/linkr/II.sort.tsv") 144 | .arg("-r") 145 | .arg("tests/linkr/II.merge.tsv") 146 | .arg("--verbose") 147 | .output() 148 | .unwrap(); 149 | let stdout = String::from_utf8(output.stdout).unwrap(); 150 | 151 | assert_eq!(stdout.lines().count(), 8); 152 | assert!(!stdout.contains("892-4684"), "original"); 153 | assert!(stdout.contains("892-4685"), "merged"); 154 | 155 | Ok(()) 156 | } 157 | 158 | #[test] 159 | fn command_connect() -> anyhow::Result<()> { 160 | let mut cmd = Command::cargo_bin("linkr")?; 161 | let output = cmd 162 | .arg("connect") 163 | .arg("tests/linkr/II.clean.tsv") 164 | .arg("--verbose") 165 | .output() 166 | .unwrap(); 167 | let stdout = String::from_utf8(output.stdout).unwrap(); 168 | 169 | assert_eq!(stdout.lines().count(), 6); 170 | assert_eq!( 171 | stdout.lines().next().unwrap().split('\t').count(), 172 | 3, 173 | "multilateral links" 174 | ); 175 | 176 | Ok(()) 177 | } 178 | -------------------------------------------------------------------------------- /tests/fasr/NC_000932.fa.fai: -------------------------------------------------------------------------------- 1 | NC_000932 154478 11 50 51 2 | -------------------------------------------------------------------------------- /tests/linkr/II.clean.tsv: -------------------------------------------------------------------------------- 1 | II(+):1-2018 XII(+):204-2215 2 | II(+):1990-5850 XII(+):7326-11200 3 | II(+):1990-5850 VI(+):892-4685 4 | II(+):300165-301260 IV(+):471852-472948 5 | II(+):477671-479048 XVI(+):700594-701971 6 | II(+):804880-813096 VII(+):1076129-1084340 7 | II(+):810776-812328 XIII(-):6395-7947 8 | II(+):810776-812328 XIV(-):7479-9033 9 | -------------------------------------------------------------------------------- /tests/linkr/II.connect.tsv: -------------------------------------------------------------------------------- 1 | II(+):1990-5850 VI(+):892-4685 XII(+):7326-11200 2 | II(+):810776-812328 XIII(-):6395-7947 XIV(-):7479-9033 3 | II(+):1-2018 XII(+):204-2215 4 | II(+):300165-301260 IV(+):471852-472948 5 | II(+):477671-479048 XVI(+):700594-701971 6 | II(+):804880-813096 VII(+):1076129-1084340 7 | -------------------------------------------------------------------------------- /tests/linkr/II.links.tsv: -------------------------------------------------------------------------------- 1 | II(+):1-2018 XII(+):204-2215 + 2 | II(+):144228-145732 II(-):144228-145732 - 3 | II(+):1990-5850 II(+):2026-5850 + 4 | II(+):1990-5850 XII(+):7326-11200 + 5 | II(+):2026-5850 II(+):1990-5850 + 6 | II(+):2026-5850 VI(+):892-4684 + 7 | II(+):2026-5850 XII(+):7326-11200 + 8 | II(+):300165-301260 IV(+):471852-472948 + 9 | II(+):429496-430989 II(+):429504-430965 + 10 | II(+):429504-430965 II(+):429496-430989 + 11 | II(+):477671-479048 XVI(+):700594-701971 + 12 | II(+):658738-662234 II(-):658738-662234 - 13 | II(+):804880-813096 VII(+):1076129-1084340 + 14 | II(+):806179-808955 VII(+):1077427-1080204 + 15 | II(+):810776-812328 XIII(-):6395-7947 + 16 | II(+):810776-812328 XIV(-):7479-9033 + 17 | II(-):144228-145732 II(+):144228-145732 - 18 | II(-):658738-662234 II(+):658738-662234 - 19 | IV(+):471852-472948 II(+):300165-301260 + 20 | VI(+):892-4684 II(+):2026-5850 + 21 | VI(+):893-4685 II(+):2026-5850 + 22 | VII(+):1076129-1084340 II(+):804880-813096 + 23 | VII(+):1077427-1080204 II(+):806179-808955 + 24 | XII(+):204-2215 II(+):1-2018 + 25 | XII(+):7326-11200 II(+):1990-5850 + 26 | XII(+):7326-11200 II(+):2026-5850 + 27 | XIII(-):6395-7947 II(+):810776-812328 + 28 | XIV(-):7479-9033 II(+):810776-812328 + 29 | XVI(+):700594-701971 II(+):477671-479048 + 30 | -------------------------------------------------------------------------------- /tests/linkr/II.merge.tsv: -------------------------------------------------------------------------------- 1 | II(+):2026-5850 II(+):1990-5850 2 | II(+):429504-430965 II(+):429496-430989 3 | II(-):144228-145732 II(+):144228-145732 4 | II(-):658738-662234 II(+):658738-662234 5 | VI(+):893-4685 VI(+):892-4685 6 | VI(+):892-4684 VI(+):892-4685 7 | -------------------------------------------------------------------------------- /tests/linkr/II.sort.tsv: -------------------------------------------------------------------------------- 1 | II(+):1-2018 XII(+):204-2215 + 2 | II(+):1990-5850 II(+):2026-5850 + 3 | II(+):1990-5850 XII(+):7326-11200 + 4 | II(+):2026-5850 VI(+):892-4684 + 5 | II(+):2026-5850 VI(+):893-4685 + 6 | II(+):2026-5850 XII(+):7326-11200 + 7 | II(+):144228-145732 II(-):144228-145732 - 8 | II(+):300165-301260 IV(+):471852-472948 + 9 | II(+):429496-430989 II(+):429504-430965 + 10 | II(+):477671-479048 XVI(+):700594-701971 + 11 | II(+):658738-662234 II(-):658738-662234 - 12 | II(+):804880-813096 VII(+):1076129-1084340 + 13 | II(+):806179-808955 VII(+):1077427-1080204 + 14 | II(+):810776-812328 XIII(-):6395-7947 + 15 | II(+):810776-812328 XIV(-):7479-9033 + 16 | -------------------------------------------------------------------------------- /tests/rgr/1_4.ovlp.tsv: -------------------------------------------------------------------------------- 1 | anchor148_9124 pac7556_20928 8327 0.890 0 797 9124 9124 0 0 8581 20928 overlap 2 | anchor148_9124 pac7443_11454 2665 0.894 0 6459 9124 9124 0 0 2742 11454 overlap 3 | anchor236_6430 pac7556_20928 6430 0.869 0 0 6430 6430 0 8601 15361 20928 contained 4 | anchor236_6430 pac8852_20444 1099 0.839 0 5331 6430 6430 0 0 1183 20444 overlap 5 | anchor236_6430 pac7443_11454 6430 0.889 0 0 6430 6430 0 2759 9226 11454 contained 6 | anchor575_1626 pac7556_20928 1626 0.864 0 0 1626 1626 0 15343 17070 20928 contained 7 | anchor575_1626 pac8852_20444 1626 0.859 0 0 1626 1626 0 1166 2724 20444 contained 8 | anchor575_1626 pac8559_13190 1626 0.884 0 0 1626 1626 0 329 2089 13190 contained 9 | anchor575_1626 pac4710_11440 1626 0.841 0 0 1626 1626 0 410 2217 11440 contained 10 | anchor575_1626 pac1461_9030 1493 0.910 0 133 1626 1626 0 0 1581 9030 overlap 11 | anchor575_1626 pac4745_7148 1626 0.857 0 0 1626 1626 0 4459 6199 7148 contained 12 | anchor575_1626 pac6425_4471 1080 0.888 0 546 1626 1626 0 0 1136 4471 overlap 13 | anchor576_1624 pac7556_20928 1624 0.867 0 0 1624 1624 0 17033 18740 20928 contained 14 | anchor576_1624 pac8852_20444 1624 0.888 0 0 1624 1624 0 2685 4271 20444 contained 15 | anchor576_1624 pac8559_13190 1624 0.864 0 0 1624 1624 0 2043 3821 13190 contained 16 | anchor576_1624 pac4710_11440 1624 0.846 0 0 1624 1624 0 2174 3956 11440 contained 17 | anchor576_1624 pac1461_9030 1624 0.904 0 0 1624 1624 0 1537 3236 9030 contained 18 | anchor576_1624 pac6425_4471 1624 0.854 0 0 1624 1624 0 1094 2892 4471 contained 19 | pac7556_20928 anchor148_9124 8581 0.890 0 0 8581 20928 0 797 9124 9124 overlap 20 | pac7556_20928 anchor236_6430 6760 0.869 0 8601 15361 20928 0 0 6430 6430 contains 21 | pac7556_20928 anchor575_1626 1727 0.864 0 15343 17070 20928 0 0 1626 1626 contains 22 | pac7556_20928 anchor576_1624 1707 0.867 0 17033 18740 20928 0 0 1624 1624 contains 23 | pac8852_20444 anchor236_6430 1183 0.839 0 0 1183 20444 0 5331 6430 6430 overlap 24 | pac8852_20444 anchor575_1626 1558 0.859 0 1166 2724 20444 0 0 1626 1626 contains 25 | pac8852_20444 anchor576_1624 1586 0.888 0 2685 4271 20444 0 0 1624 1624 contains 26 | pac8852_20444 pac1461_9030 8217 0.794 0 1295 9512 20444 0 0 9030 9030 contains 27 | pac8852_20444 pac4745_7148 3617 0.759 0 0 3617 20444 0 3366 7148 7148 overlap 28 | pac8559_13190 anchor575_1626 1760 0.884 0 329 2089 13190 0 0 1626 1626 contains 29 | pac8559_13190 anchor576_1624 1778 0.864 0 2043 3821 13190 0 0 1624 1624 contains 30 | pac8559_13190 pac4745_7148 3100 0.778 0 0 3100 13190 0 4141 7148 7148 overlap 31 | pac7443_11454 anchor148_9124 2742 0.894 0 0 2742 11454 0 6459 9124 9124 overlap 32 | pac7443_11454 anchor236_6430 6467 0.889 0 2759 9226 11454 0 0 6430 6430 contains 33 | pac4710_11440 anchor575_1626 1807 0.841 0 410 2217 11440 0 0 1626 1626 contains 34 | pac4710_11440 anchor576_1624 1782 0.846 0 2174 3956 11440 0 0 1624 1624 contains 35 | pac4710_11440 pac1461_9030 9108 0.800 0 543 9651 11440 0 0 9030 9030 contains 36 | pac4710_11440 pac4745_7148 3231 0.761 0 0 3231 11440 0 4106 7148 7148 overlap 37 | pac1461_9030 anchor575_1626 1581 0.910 0 0 1581 9030 0 133 1626 1626 overlap 38 | pac1461_9030 anchor576_1624 1699 0.904 0 1537 3236 9030 0 0 1624 1624 contains 39 | pac1461_9030 pac8852_20444 9030 0.794 0 0 9030 9030 0 1295 9512 20444 contained 40 | pac1461_9030 pac4710_11440 9030 0.800 0 0 9030 9030 0 543 9651 11440 contained 41 | pac1461_9030 pac4745_7148 2535 0.808 0 0 2535 9030 0 4599 7148 7148 overlap 42 | pac4745_7148 anchor575_1626 1740 0.857 0 4459 6199 7148 0 0 1626 1626 contains 43 | pac4745_7148 pac8852_20444 3782 0.759 0 3366 7148 7148 0 0 3617 20444 overlap 44 | pac4745_7148 pac8559_13190 3007 0.778 0 4141 7148 7148 0 0 3100 13190 overlap 45 | pac4745_7148 pac4710_11440 3042 0.761 0 4106 7148 7148 0 0 3231 11440 overlap 46 | pac4745_7148 pac1461_9030 2549 0.808 0 4599 7148 7148 0 0 2535 9030 overlap 47 | pac4745_7148 pac6425_4471 2097 0.785 0 5051 7148 7148 0 0 2151 4471 overlap 48 | pac6425_4471 anchor575_1626 1136 0.888 0 0 1136 4471 0 546 1626 1626 overlap 49 | pac6425_4471 anchor576_1624 1798 0.854 0 1094 2892 4471 0 0 1624 1624 contains 50 | pac6425_4471 pac4745_7148 2151 0.785 0 0 2151 4471 0 5051 7148 7148 overlap 51 | -------------------------------------------------------------------------------- /tests/rgr/1_4.replace.tsv: -------------------------------------------------------------------------------- 1 | falcon_read/1/0_9124 anchor148_9124 2 | falcon_read/2/0_6430 anchor236_6430 3 | falcon_read/3/0_1626 anchor575_1626 4 | falcon_read/4/0_1624 anchor576_1624 5 | falcon_read/5/0_20928 pac7556_20928 6 | falcon_read/6/0_20444 pac8852_20444 7 | falcon_read/7/0_13190 pac8559_13190 8 | falcon_read/8/0_11454 pac7443_11454 9 | falcon_read/9/0_11440 pac4710_11440 10 | falcon_read/10/0_9030 pac1461_9030 11 | falcon_read/11/0_7148 pac4745_7148 12 | falcon_read/12/0_4471 pac6425_4471 13 | -------------------------------------------------------------------------------- /tests/rgr/II.links.tsv: -------------------------------------------------------------------------------- 1 | II(+):1-2018 XII(+):204-2215 + 2 | II(+):144228-145732 II(-):144228-145732 - 3 | II(+):1990-5850 II(+):2026-5850 + 4 | II(+):1990-5850 XII(+):7326-11200 + 5 | II(+):2026-5850 II(+):1990-5850 + 6 | II(+):2026-5850 VI(+):892-4684 + 7 | II(+):2026-5850 XII(+):7326-11200 + 8 | II(+):300165-301260 IV(+):471852-472948 + 9 | II(+):429496-430989 II(+):429504-430965 + 10 | II(+):429504-430965 II(+):429496-430989 + 11 | II(+):477671-479048 XVI(+):700594-701971 + 12 | II(+):658738-662234 II(-):658738-662234 - 13 | II(+):804880-813096 VII(+):1076129-1084340 + 14 | II(+):806179-808955 VII(+):1077427-1080204 + 15 | II(+):810776-812328 XIII(-):6395-7947 + 16 | II(+):810776-812328 XIV(-):7479-9033 + 17 | II(-):144228-145732 II(+):144228-145732 - 18 | II(-):658738-662234 II(+):658738-662234 - 19 | IV(+):471852-472948 II(+):300165-301260 + 20 | VI(+):892-4684 II(+):2026-5850 + 21 | VI(+):893-4685 II(+):2026-5850 + 22 | VII(+):1076129-1084340 II(+):804880-813096 + 23 | VII(+):1077427-1080204 II(+):806179-808955 + 24 | XII(+):204-2215 II(+):1-2018 + 25 | XII(+):7326-11200 II(+):1990-5850 + 26 | XII(+):7326-11200 II(+):2026-5850 + 27 | XIII(-):6395-7947 II(+):810776-812328 + 28 | XIV(-):7479-9033 II(+):810776-812328 + 29 | XVI(+):700594-701971 II(+):477671-479048 + 30 | -------------------------------------------------------------------------------- /tests/rgr/S288c.rg: -------------------------------------------------------------------------------- 1 | I:1-100 2 | I(+):90-150 3 | S288c.I(-):190-200 4 | II:21294-22075 5 | II:23537-24097 6 | S288c.I(-):190-200|Species=Yeast 7 | -------------------------------------------------------------------------------- /tests/rgr/ctg.range.tsv: -------------------------------------------------------------------------------- 1 | length ID range 2 | 100000 ctg:I:1 I:1-100000 3 | 85779 ctg:Mito:1 Mito:1-85779 4 | 130218 ctg:I:2 I:100001-230218 5 | -------------------------------------------------------------------------------- /tests/rgr/ctg.tsv: -------------------------------------------------------------------------------- 1 | ID chr_id chr_start chr_end chr_strand length 2 | ctg:I:1 I 1 100000 + 100000 3 | ctg:Mito:1 Mito 1 85779 + 85779 4 | ctg:I:2 I 100001 230218 + 130218 5 | -------------------------------------------------------------------------------- /tests/rgr/ranges.tsv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wang-q/intspan/0d4e5153268bf51eb824f7cc93a51415a255a3c2/tests/rgr/ranges.tsv.gz -------------------------------------------------------------------------------- /tests/rgr/tn.tsv: -------------------------------------------------------------------------------- 1 | IS10L-AF162223 IS10L-AF162223 0.0000 1.0000 1.0000 2 | IS10L-AF162223 Tn10-AF162223 0.1999 0.1408 1.0000 3 | IS10L-AF162223 Tn5393.11-CP000602.1 0.2784 0.0767 1.0000 4 | IS10L-AF162223 Tn5393.2-CP030921.1 0.2753 0.0785 0.9615 5 | IS10L-AF162223 Tn7241-KY437729 0.2592 0.0887 0.9077 6 | IS10_p-MH626558 IS10L-AF162223 0.1303 0.2513 1.0000 7 | IS10L-AF162223 Tn10-AF162223 0.1999 0.1408 1.0000 8 | IS10_p-MH626558 Tn10-AF162223 0.3833 0.0354 1.0000 9 | Tn10-AF162223 Tn10-AF162223 0.0000 1.0000 1.0000 10 | Tn10-AF162223 Tn5393.11-CP000602.1 0.0499 0.5447 1.0000 11 | Tn10-AF162223 Tn5393.2-CP030921.1 0.0449 0.5750 0.9928 12 | Tn10_p-MH626558 Tn10-AF162223 0.0337 0.6525 0.9929 13 | Tn10_p2-CP027411.1 Tn10-AF162223 0.0301 0.6808 0.9891 14 | -------------------------------------------------------------------------------- /tests/spanr/Atha.chr.sizes: -------------------------------------------------------------------------------- 1 | 1 30427671 2 | 2 19698289 3 | 3 23459830 4 | 4 18585056 5 | 5 26975502 6 | -------------------------------------------------------------------------------- /tests/spanr/Atha.json: -------------------------------------------------------------------------------- 1 | { 2 | "AT1G01010.1": { 3 | "1": "3631-3913,3996-4276,4486-4605,4706-5095,5174-5326,5439-5899" 4 | }, 5 | "AT1G01020.1": { 6 | "1": "5928-6263,6437-7069,7157-7232,7384-7450,7564-7649,7762-7835,7942-7987,8236-8325,8417-8464,8571-8737" 7 | }, 8 | "AT1G01020.2": { 9 | "1": "6790-7069,7157-7450,7564-7649,7762-7835,7942-7987,8236-8325,8417-8464,8571-8737" 10 | }, 11 | "AT2G01008.1": { 12 | "2": "1025-1272,1458-1510,1873-2810,3706-5513,5782-5945" 13 | }, 14 | "AT2G01021.1": { 15 | "2": "6571-6672" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /tests/spanr/Atha.list: -------------------------------------------------------------------------------- 1 | AT1G01010.1 2 | AT1G01020.1 3 | AT2G01008.1 4 | -------------------------------------------------------------------------------- /tests/spanr/I.II.json: -------------------------------------------------------------------------------- 1 | { 2 | "I": { 3 | "I": "13744-17133,20043-21352,27969-29557,32941-33447,45023-45898,67521-68715,69526-71785" 4 | }, 5 | "II": { 6 | "II": "21294-22075,23537-24097,28547-29194,36489-36988,44919-45977,63330-63875,71128-71865" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /tests/spanr/I.json: -------------------------------------------------------------------------------- 1 | { 2 | "I": "13744-17133,20043-21352,27969-29557,32941-33447,45023-45898,67521-68715,69526-71785" 3 | } 4 | -------------------------------------------------------------------------------- /tests/spanr/II.json: -------------------------------------------------------------------------------- 1 | { 2 | "II": "21294-22075,23537-24097,28547-29194,36489-36988,44919-45977,63330-63875,71128-71865" 3 | } 4 | -------------------------------------------------------------------------------- /tests/spanr/II.other.json: -------------------------------------------------------------------------------- 1 | { 2 | "II": "21294-22075,23537-24097,28547-29194,36489-36988,44919-45977,63330-63875,71128-71865" 3 | } 4 | -------------------------------------------------------------------------------- /tests/spanr/NC_007942.rm.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | ##sequence-region NC_007942 1 152218 3 | NC_007942 RepeatMasker dispersed_repeat 177 240 15 + . Target=A-rich 1 63 4 | NC_007942 RepeatMasker dispersed_repeat 4502 4542 15 + . Target=(TTATAA)n 1 39 5 | NC_007942 RepeatMasker dispersed_repeat 5159 5195 38 + . Target=(AT)n 1 37 6 | NC_007942 RepeatMasker dispersed_repeat 6960 7013 13 + . Target=(AATTTTT)n 1 55 7 | NC_007942 RepeatMasker dispersed_repeat 10659 10700 14 + . Target=(AATTTAT)n 1 43 8 | NC_007942 RepeatMasker dispersed_repeat 10775 10823 12 + . Target=(ATTTCTA)n 1 48 9 | NC_007942 RepeatMasker dispersed_repeat 13065 13138 345 - . Target=tRNA-Tyr-TAC 1 74 10 | NC_007942 RepeatMasker dispersed_repeat 14917 14976 183 + . Target=MamSINE1 9 69 11 | NC_007942 RepeatMasker dispersed_repeat 17926 17963 13 + . Target=(GATATAT)n 1 41 12 | NC_007942 RepeatMasker dispersed_repeat 18422 18468 17 + . Target=(TATC)n 1 47 13 | NC_007942 RepeatMasker dispersed_repeat 18815 18857 12 + . Target=(AATTA)n 1 43 14 | NC_007942 RepeatMasker dispersed_repeat 24588 24615 16 + . Target=(T)n 1 28 15 | NC_007942 RepeatMasker dispersed_repeat 25609 25646 13 + . Target=(AATAGT)n 1 39 16 | NC_007942 RepeatMasker dispersed_repeat 26216 26274 15 + . Target=A-rich 1 59 17 | NC_007942 RepeatMasker dispersed_repeat 28592 28658 30 + . Target=(TTA)n 1 67 18 | NC_007942 RepeatMasker dispersed_repeat 29618 29639 13 + . Target=(AAT)n 1 24 19 | NC_007942 RepeatMasker dispersed_repeat 30851 30918 200 + . Target=MamSINE1 3 72 20 | NC_007942 RepeatMasker dispersed_repeat 31910 31959 13 + . Target=(TATAA)n 1 50 21 | NC_007942 RepeatMasker dispersed_repeat 32788 32847 27 + . Target=(AT)n 1 65 22 | NC_007942 RepeatMasker dispersed_repeat 33368 33400 15 + . Target=(AT)n 1 32 23 | NC_007942 RepeatMasker dispersed_repeat 33401 33407 13 + . Target=(ATAGAT)n 1 39 24 | NC_007942 RepeatMasker dispersed_repeat 38374 38397 12 + . Target=(TTTAA)n 1 26 25 | NC_007942 RepeatMasker dispersed_repeat 38485 38529 12 + . Target=(ATAA)n 1 46 26 | NC_007942 RepeatMasker dispersed_repeat 42812 42844 16 + . Target=A-rich 1 33 27 | NC_007942 RepeatMasker dispersed_repeat 44787 44832 13 + . Target=(ATA)n 1 46 28 | NC_007942 RepeatMasker dispersed_repeat 45788 45831 15 + . Target=A-rich 1 47 29 | NC_007942 RepeatMasker dispersed_repeat 45872 45908 14 + . Target=A-rich 1 37 30 | NC_007942 RepeatMasker dispersed_repeat 46793 46826 14 + . Target=(ATCTAT)n 1 32 31 | NC_007942 RepeatMasker dispersed_repeat 46816 46849 14 + . Target=(TAGA)n 1 38 32 | NC_007942 RepeatMasker dispersed_repeat 46895 46919 16 + . Target=(A)n 1 25 33 | NC_007942 RepeatMasker dispersed_repeat 48397 48455 29 + . Target=(AT)n 1 61 34 | NC_007942 RepeatMasker dispersed_repeat 49560 49584 13 + . Target=(AATTT)n 1 26 35 | NC_007942 RepeatMasker dispersed_repeat 51482 51555 22 + . Target=A-rich 1 74 36 | NC_007942 RepeatMasker dispersed_repeat 51694 51718 16 + . Target=(T)n 1 25 37 | NC_007942 RepeatMasker dispersed_repeat 51755 51792 19 + . Target=(ATTCTAT)n 1 37 38 | NC_007942 RepeatMasker dispersed_repeat 52808 52849 13 + . Target=(AATATTC)n 1 38 39 | NC_007942 RepeatMasker dispersed_repeat 53026 53131 25 + . Target=A-rich 1 105 40 | NC_007942 RepeatMasker dispersed_repeat 53217 53290 308 + . Target=tRNA-Ser-AGY 4 72 41 | NC_007942 RepeatMasker dispersed_repeat 53328 53365 12 + . Target=(TTGT)n 1 38 42 | NC_007942 RepeatMasker dispersed_repeat 54243 54300 21 + . Target=(TTATTT)n 1 58 43 | NC_007942 RepeatMasker dispersed_repeat 54299 54378 26 + . Target=A-rich 1 76 44 | NC_007942 RepeatMasker dispersed_repeat 54530 54557 13 + . Target=(TAGA)n 1 30 45 | NC_007942 RepeatMasker dispersed_repeat 56554 56614 14 + . Target=A-rich 1 64 46 | NC_007942 RepeatMasker dispersed_repeat 58332 58372 19 + . Target=A-rich 1 41 47 | NC_007942 RepeatMasker dispersed_repeat 63709 63745 12 + . Target=(AATTGA)n 1 39 48 | NC_007942 RepeatMasker dispersed_repeat 63753 63795 13 + . Target=(TTATTT)n 1 45 49 | NC_007942 RepeatMasker dispersed_repeat 64767 64806 19 + . Target=(TA)n 1 39 50 | NC_007942 RepeatMasker dispersed_repeat 64866 64913 13 + . Target=(ATATA)n 1 43 51 | NC_007942 RepeatMasker dispersed_repeat 65068 65098 21 + . Target=(AT)n 1 32 52 | NC_007942 RepeatMasker dispersed_repeat 65451 65486 11 + . Target=(ATTAAGT)n 1 36 53 | NC_007942 RepeatMasker dispersed_repeat 65884 65913 12 + . Target=(TTTTCT)n 1 31 54 | NC_007942 RepeatMasker dispersed_repeat 67087 67128 13 + . Target=(TATATAC)n 1 39 55 | NC_007942 RepeatMasker dispersed_repeat 73941 74006 14 + . Target=(AAT)n 1 66 56 | NC_007942 RepeatMasker dispersed_repeat 76532 76550 15 + . Target=(A)n 1 19 57 | NC_007942 RepeatMasker dispersed_repeat 79464 79492 15 + . Target=(T)n 1 29 58 | NC_007942 RepeatMasker dispersed_repeat 79499 79539 24 + . Target=(TA)n 1 43 59 | NC_007942 RepeatMasker dispersed_repeat 80695 80751 15 + . Target=(ATAT)n 1 62 60 | NC_007942 RepeatMasker dispersed_repeat 81023 81055 12 + . Target=A-rich 1 35 61 | NC_007942 RepeatMasker dispersed_repeat 81611 81634 15 + . Target=(T)n 1 24 62 | NC_007942 RepeatMasker dispersed_repeat 82638 82665 16 + . Target=(T)n 1 28 63 | NC_007942 RepeatMasker dispersed_repeat 82672 82718 22 + . Target=(ATAT)n 1 47 64 | NC_007942 RepeatMasker dispersed_repeat 83338 83362 12 + . Target=(TTC)n 1 24 65 | NC_007942 RepeatMasker dispersed_repeat 83812 83903 29 + . Target=(GATATA)n 1 90 66 | NC_007942 RepeatMasker dispersed_repeat 90781 90820 27 + . Target=(GATAGTGAC)n 1 40 67 | NC_007942 RepeatMasker dispersed_repeat 93250 93284 15 + . Target=(ATATAT)n 1 36 68 | NC_007942 RepeatMasker dispersed_repeat 96095 96127 13 + . Target=(TTTTTG)n 1 33 69 | NC_007942 RepeatMasker dispersed_repeat 98387 98437 13 + . Target=(TTCTAT)n 1 51 70 | NC_007942 RepeatMasker dispersed_repeat 100312 100963 390 + . Target=SSU-rRNA_Hsa 1031 1715 71 | NC_007942 RepeatMasker dispersed_repeat 103687 103779 261 + . Target=LSU-rRNA_Hsa 21 113 72 | NC_007942 RepeatMasker dispersed_repeat 103960 104056 277 + . Target=LSU-rRNA_Hsa 319 415 73 | NC_007942 RepeatMasker dispersed_repeat 104206 104360 279 + . Target=LSU-rRNA_Hsa 1503 1658 74 | NC_007942 RepeatMasker dispersed_repeat 105430 105538 259 + . Target=LSU-rRNA_Hsa 3714 3822 75 | NC_007942 RepeatMasker dispersed_repeat 105767 106148 408 + . Target=LSU-rRNA_Hsa 4137 4519 76 | NC_007942 RepeatMasker dispersed_repeat 108220 108261 13 + . Target=A-rich 1 42 77 | NC_007942 RepeatMasker dispersed_repeat 108939 108973 12 + . Target=(TTAT)n 1 35 78 | NC_007942 RepeatMasker dispersed_repeat 109741 109774 15 + . Target=(TAA)n 1 34 79 | NC_007942 RepeatMasker dispersed_repeat 109920 109966 16 + . Target=A-rich 1 47 80 | NC_007942 RepeatMasker dispersed_repeat 112616 112675 15 + . Target=(AATGAA)n 1 66 81 | NC_007942 RepeatMasker dispersed_repeat 113267 113310 12 + . Target=(ACAAGA)n 1 48 82 | NC_007942 RepeatMasker dispersed_repeat 113674 113751 19 + . Target=(ATAT)n 1 79 83 | NC_007942 RepeatMasker dispersed_repeat 116626 116676 13 + . Target=(ATAA)n 1 50 84 | NC_007942 RepeatMasker dispersed_repeat 122126 122154 15 + . Target=A-rich 1 29 85 | NC_007942 RepeatMasker dispersed_repeat 123642 123793 43 + . Target=(ATAT)n 1 156 86 | NC_007942 RepeatMasker dispersed_repeat 126573 126632 17 + . Target=(TATATTT)n 1 63 87 | NC_007942 RepeatMasker dispersed_repeat 126633 126641 13 + . Target=(TATAT)n 1 71 88 | NC_007942 RepeatMasker dispersed_repeat 129246 129627 408 - . Target=LSU-rRNA_Hsa 4137 4519 89 | NC_007942 RepeatMasker dispersed_repeat 129856 129964 259 - . Target=LSU-rRNA_Hsa 3714 3822 90 | NC_007942 RepeatMasker dispersed_repeat 131034 131188 279 - . Target=LSU-rRNA_Hsa 1503 1658 91 | NC_007942 RepeatMasker dispersed_repeat 131338 131434 277 - . Target=LSU-rRNA_Hsa 319 415 92 | NC_007942 RepeatMasker dispersed_repeat 131615 131707 261 - . Target=LSU-rRNA_Hsa 21 113 93 | NC_007942 RepeatMasker dispersed_repeat 134729 135082 366 - . Target=SSU-rRNA_Hsa 1031 1392 94 | NC_007942 RepeatMasker dispersed_repeat 136957 137007 13 + . Target=(AATAGA)n 1 51 95 | NC_007942 RepeatMasker dispersed_repeat 139267 139299 13 + . Target=A-rich 1 33 96 | NC_007942 RepeatMasker dispersed_repeat 142105 142143 13 + . Target=(ATAGAT)n 1 39 97 | NC_007942 RepeatMasker dispersed_repeat 144574 144613 27 + . Target=(TATCGTCAC)n 1 40 98 | NC_007942 RepeatMasker dispersed_repeat 151491 151499 29 + . Target=(TATATC)n 1 86 99 | NC_007942 RepeatMasker dispersed_repeat 151500 151578 23 + . Target=(TATC)n 1 82 100 | NC_007942 RepeatMasker dispersed_repeat 152032 152056 12 + . Target=(GAA)n 1 24 101 | -------------------------------------------------------------------------------- /tests/spanr/S288c.chr.sizes: -------------------------------------------------------------------------------- 1 | I 230218 2 | II 813184 3 | III 316620 4 | IV 1531933 5 | IX 439888 6 | V 576874 7 | VI 270161 8 | VII 1090940 9 | VIII 562643 10 | X 745751 11 | XI 666816 12 | XII 1078177 13 | XIII 924431 14 | XIV 784333 15 | XV 1091291 16 | XVI 948066 17 | -------------------------------------------------------------------------------- /tests/spanr/S288c.rg: -------------------------------------------------------------------------------- 1 | I:1-100 2 | I(+):90-150 3 | S288c.I(-):190-200 4 | II:21294-22075 5 | II:23537-24097 6 | S288c.I(-):190-200|Species=Yeast 7 | -------------------------------------------------------------------------------- /tests/spanr/brca2.json: -------------------------------------------------------------------------------- 1 | { 2 | "13": "32316461-32316527,32319077-32319325,32325076-32325184,32326101-32326150,32326242-32326282,32326499-32326613,32329443-32329492,32330919-32331030,32332272-32333387,32336265-32341196,32344558-32344653,32346827-32346896,32354861-32355288,32356428-32356609,32357742-32357929,32362523-32362693,32363179-32363533,32370402-32370557,32370956-32371100,32376670-32376791,32379317-32379515,32379750-32379913,32380007-32380145,32394689-32394933,32396898-32397044,32398162-32398770" 3 | } 4 | -------------------------------------------------------------------------------- /tests/spanr/dazzname.rg: -------------------------------------------------------------------------------- 1 | infile_0/1/0_514:19-25 2 | infile_0/1/0_514:26-37 3 | infile_0/1/0_514:38-50 4 | infile_0/1/0_514:51-55 5 | infile_0/1/0_514:56-61 6 | infile_0/1/0_514:62-62 7 | infile_0/1/0_514:63-82 8 | infile_0/1/0_514:83-92 9 | infile_0/1/0_514:93-94 10 | infile_0/1/0_514:95-98 11 | infile_0/1/0_514:99-105 12 | infile_0/1/0_514:106-111 13 | infile_0/1/0_514:112-112 14 | infile_0/1/0_514:113-113 15 | infile_0/1/0_514:114-114 16 | infile_0/1/0_514:115-119 17 | infile_0/1/0_514:120-125 18 | infile_0/1/0_514:126-128 19 | infile_0/1/0_514:129-132 20 | infile_0/1/0_514:133-142 21 | infile_0/1/0_514:143-151 22 | infile_0/1/0_514:152-152 23 | infile_0/1/0_514:153-153 24 | infile_0/1/0_514:154-168 25 | infile_0/1/0_514:169-169 26 | infile_0/1/0_514:170-171 27 | infile_0/1/0_514:172-176 28 | infile_0/1/0_514:177-182 29 | infile_0/1/0_514:183-186 30 | infile_0/1/0_514:187-188 31 | infile_0/1/0_514:189-189 32 | infile_0/1/0_514:190-192 33 | infile_0/1/0_514:193-193 34 | infile_0/1/0_514:194-196 35 | infile_0/1/0_514:197-198 36 | infile_0/1/0_514:199-201 37 | infile_0/1/0_514:202-205 38 | infile_0/1/0_514:206-206 39 | infile_0/1/0_514:207-208 40 | infile_0/1/0_514:209-212 41 | infile_0/1/0_514:213-213 42 | infile_0/1/0_514:214-224 43 | infile_0/1/0_514:225-233 44 | infile_0/1/0_514:234-242 45 | infile_0/1/0_514:243-243 46 | infile_0/1/0_514:244-244 47 | infile_0/1/0_514:245-249 48 | infile_0/1/0_514:250-254 49 | infile_0/1/0_514:255-256 50 | infile_0/1/0_514:257-257 51 | infile_0/1/0_514:258-259 52 | infile_0/1/0_514:260-262 53 | infile_0/1/0_514:263-263 54 | infile_0/1/0_514:264-264 55 | infile_0/1/0_514:265-269 56 | infile_0/1/0_514:270-270 57 | infile_0/1/0_514:271-276 58 | infile_0/1/0_514:277-278 59 | infile_0/1/0_514:279-279 60 | infile_0/1/0_514:280-282 61 | infile_0/1/0_514:283-283 62 | infile_0/1/0_514:284-293 63 | infile_0/1/0_514:294-300 64 | infile_0/1/0_514:301-303 65 | infile_0/1/0_514:304-304 66 | infile_0/1/0_514:305-307 67 | infile_0/1/0_514:308-312 68 | infile_0/1/0_514:313-319 69 | infile_0/1/0_514:320-322 70 | infile_0/1/0_514:323-324 71 | infile_0/1/0_514:325-328 72 | infile_0/1/0_514:329-333 73 | infile_0/1/0_514:334-337 74 | infile_0/1/0_514:338-340 75 | infile_0/1/0_514:341-343 76 | infile_0/1/0_514:344-344 77 | infile_0/1/0_514:345-345 78 | infile_0/1/0_514:346-347 79 | infile_0/1/0_514:348-348 80 | infile_0/1/0_514:349-349 81 | infile_0/1/0_514:350-356 82 | infile_0/1/0_514:357-359 83 | infile_0/1/0_514:360-363 84 | infile_0/1/0_514:364-375 85 | infile_0/1/0_514:376-393 86 | infile_0/1/0_514:394-394 87 | infile_0/1/0_514:395-395 88 | infile_0/1/0_514:396-396 89 | infile_0/1/0_514:397-405 90 | infile_0/1/0_514:406-408 91 | infile_0/1/0_514:409-410 92 | infile_0/1/0_514:411-416 93 | infile_0/1/0_514:417-420 94 | infile_0/1/0_514:421-429 95 | infile_0/1/0_514:430-433 96 | infile_0/1/0_514:434-451 97 | infile_0/1/0_514:452-458 98 | infile_0/1/0_514:459-463 99 | infile_0/1/0_514:464-475 100 | infile_0/1/0_514:476-479 101 | infile_0/1/0_514:480-496 102 | infile_0/1/0_514:497-499 103 | -------------------------------------------------------------------------------- /tests/spanr/repeat.json: -------------------------------------------------------------------------------- 1 | { 2 | "I": "-", 3 | "II": "327069-327703", 4 | "III": "-", 5 | "IV": "512988-513590,757572-759779,802895-805654,981142-987119,1017673-1018183,1175134-1175738,1307621-1308556,1504223-1504728", 6 | "IX": "-", 7 | "V": "354135-354917", 8 | "VI": "-", 9 | "VII": "778784-779515,878539-879235", 10 | "VIII": "116405-117059,133581-134226", 11 | "X": "366757-367499,712641-713226", 12 | "XI": "162831-163399", 13 | "XII": "64067-65208,91960-92481,451418-455181,455933-457732,460517-464318,465070-466869,489753-490545,817840-818474", 14 | "XIII": "609100-609861", 15 | "XIV": "-", 16 | "XV": "437522-438484", 17 | "XVI": "560481-561065" 18 | } 19 | --------------------------------------------------------------------------------