├── .github ├── demo.gif ├── demo.tape └── workflows │ ├── lint.yml │ ├── release.yml │ └── test.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── dist-workspace.toml ├── src ├── app.rs ├── columns_filter.rs ├── common.rs ├── csv.rs ├── delimiter.rs ├── errors.rs ├── find.rs ├── help.rs ├── history.rs ├── input.rs ├── io.rs ├── lib.rs ├── main.rs ├── runner.rs ├── sort.rs ├── theme.rs ├── ui.rs ├── util │ ├── events.rs │ └── mod.rs ├── view.rs └── wrap.rs └── tests └── data ├── bad_73.csv ├── bad_double_quote.csv ├── cities.csv ├── empty.csv ├── filter.csv ├── gnu_lgpl.txt ├── good_double_quote.csv ├── irregular.csv ├── irregular_last_row.csv ├── irregular_more_fields.csv ├── multi_lines.csv ├── multi_lines_carriage_return.csv ├── multiple_newlines.csv ├── no_headers.csv ├── one_wide_column.txt ├── simple.csv ├── small.bsv ├── small.csv └── starts_with_newline.csv /.github/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YS-L/csvlens/f41e5e3d2ee8d9d5985759048161a6a9412599a6/.github/demo.gif -------------------------------------------------------------------------------- /.github/demo.tape: -------------------------------------------------------------------------------- 1 | Output "demo.gif" 2 | Set Theme "Tomorrow Night" 3 | Set Width 1500 4 | Set Height 800 5 | 6 | Type "csvlens Pokemon.csv" 7 | Sleep 0.5s 8 | Enter @2s 9 | 10 | Down @0.5s 5 11 | Sleep 2s 12 | 13 | Type @1s "/" 14 | Type @0.2s "Flying" 15 | Enter @2s 16 | Type @0.5s "nnn" 17 | Sleep 2s 18 | 19 | Type @1s "&" 20 | Type @0.2s "Bug" 21 | Enter @4s 22 | 23 | Type @1s "*" 24 | Type @0.2s "Name|Att|Leg" 25 | Enter @2s 26 | 27 | Tab @1s 1 28 | Right @1s 29 | Type @6s "J" 30 | 31 | Type "q" 32 | Sleep 1s -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: lint 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | 8 | env: 9 | CARGO_TERM_COLOR: always 10 | 11 | jobs: 12 | build: 13 | 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - uses: actions/checkout@v3 18 | - name: Format 19 | run: cargo fmt --check 20 | - name: Clippy 21 | run: cargo clippy -- -Dwarnings -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | # This file was autogenerated by dist: https://opensource.axo.dev/cargo-dist/ 2 | # 3 | # Copyright 2022-2024, axodotdev 4 | # SPDX-License-Identifier: MIT or Apache-2.0 5 | # 6 | # CI that: 7 | # 8 | # * checks for a Git Tag that looks like a release 9 | # * builds artifacts with dist (archives, installers, hashes) 10 | # * uploads those artifacts to temporary workflow zip 11 | # * on success, uploads the artifacts to a GitHub Release 12 | # 13 | # Note that the GitHub Release will be created with a generated 14 | # title/body based on your changelogs. 15 | 16 | name: Release 17 | permissions: 18 | "contents": "write" 19 | 20 | # This task will run whenever you push a git tag that looks like a version 21 | # like "1.0.0", "v0.1.0-prerelease.1", "my-app/0.1.0", "releases/v1.0.0", etc. 22 | # Various formats will be parsed into a VERSION and an optional PACKAGE_NAME, where 23 | # PACKAGE_NAME must be the name of a Cargo package in your workspace, and VERSION 24 | # must be a Cargo-style SemVer Version (must have at least major.minor.patch). 25 | # 26 | # If PACKAGE_NAME is specified, then the announcement will be for that 27 | # package (erroring out if it doesn't have the given version or isn't dist-able). 28 | # 29 | # If PACKAGE_NAME isn't specified, then the announcement will be for all 30 | # (dist-able) packages in the workspace with that version (this mode is 31 | # intended for workspaces with only one dist-able package, or with all dist-able 32 | # packages versioned/released in lockstep). 33 | # 34 | # If you push multiple tags at once, separate instances of this workflow will 35 | # spin up, creating an independent announcement for each one. However, GitHub 36 | # will hard limit this to 3 tags per commit, as it will assume more tags is a 37 | # mistake. 38 | # 39 | # If there's a prerelease-style suffix to the version, then the release(s) 40 | # will be marked as a prerelease. 41 | on: 42 | pull_request: 43 | push: 44 | tags: 45 | - '**[0-9]+.[0-9]+.[0-9]+*' 46 | 47 | jobs: 48 | # Run 'dist plan' (or host) to determine what tasks we need to do 49 | plan: 50 | runs-on: "ubuntu-22.04" 51 | outputs: 52 | val: ${{ steps.plan.outputs.manifest }} 53 | tag: ${{ !github.event.pull_request && github.ref_name || '' }} 54 | tag-flag: ${{ !github.event.pull_request && format('--tag={0}', github.ref_name) || '' }} 55 | publishing: ${{ !github.event.pull_request }} 56 | env: 57 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 58 | steps: 59 | - uses: actions/checkout@v4 60 | with: 61 | submodules: recursive 62 | - name: Install dist 63 | # we specify bash to get pipefail; it guards against the `curl` command 64 | # failing. otherwise `sh` won't catch that `curl` returned non-0 65 | shell: bash 66 | run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.28.0/cargo-dist-installer.sh | sh" 67 | - name: Cache dist 68 | uses: actions/upload-artifact@v4 69 | with: 70 | name: cargo-dist-cache 71 | path: ~/.cargo/bin/dist 72 | # sure would be cool if github gave us proper conditionals... 73 | # so here's a doubly-nested ternary-via-truthiness to try to provide the best possible 74 | # functionality based on whether this is a pull_request, and whether it's from a fork. 75 | # (PRs run on the *source* but secrets are usually on the *target* -- that's *good* 76 | # but also really annoying to build CI around when it needs secrets to work right.) 77 | - id: plan 78 | run: | 79 | dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json 80 | echo "dist ran successfully" 81 | cat plan-dist-manifest.json 82 | echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT" 83 | - name: "Upload dist-manifest.json" 84 | uses: actions/upload-artifact@v4 85 | with: 86 | name: artifacts-plan-dist-manifest 87 | path: plan-dist-manifest.json 88 | 89 | # Build and packages all the platform-specific things 90 | build-local-artifacts: 91 | name: build-local-artifacts (${{ join(matrix.targets, ', ') }}) 92 | # Let the initial task tell us to not run (currently very blunt) 93 | needs: 94 | - plan 95 | if: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix.include != null && (needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload') }} 96 | strategy: 97 | fail-fast: false 98 | # Target platforms/runners are computed by dist in create-release. 99 | # Each member of the matrix has the following arguments: 100 | # 101 | # - runner: the github runner 102 | # - dist-args: cli flags to pass to dist 103 | # - install-dist: expression to run to install dist on the runner 104 | # 105 | # Typically there will be: 106 | # - 1 "global" task that builds universal installers 107 | # - N "local" tasks that build each platform's binaries and platform-specific installers 108 | matrix: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix }} 109 | runs-on: ${{ matrix.runner }} 110 | container: ${{ matrix.container && matrix.container.image || null }} 111 | env: 112 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 113 | BUILD_MANIFEST_NAME: target/distrib/${{ join(matrix.targets, '-') }}-dist-manifest.json 114 | steps: 115 | - name: enable windows longpaths 116 | run: | 117 | git config --global core.longpaths true 118 | - uses: actions/checkout@v4 119 | with: 120 | submodules: recursive 121 | - name: Install Rust non-interactively if not already installed 122 | if: ${{ matrix.container }} 123 | run: | 124 | if ! command -v cargo > /dev/null 2>&1; then 125 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y 126 | echo "$HOME/.cargo/bin" >> $GITHUB_PATH 127 | fi 128 | - name: Install dist 129 | run: ${{ matrix.install_dist.run }} 130 | # Get the dist-manifest 131 | - name: Fetch local artifacts 132 | uses: actions/download-artifact@v4 133 | with: 134 | pattern: artifacts-* 135 | path: target/distrib/ 136 | merge-multiple: true 137 | - name: Install dependencies 138 | run: | 139 | ${{ matrix.packages_install }} 140 | - name: Build artifacts 141 | run: | 142 | # Actually do builds and make zips and whatnot 143 | dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json 144 | echo "dist ran successfully" 145 | - id: cargo-dist 146 | name: Post-build 147 | # We force bash here just because github makes it really hard to get values up 148 | # to "real" actions without writing to env-vars, and writing to env-vars has 149 | # inconsistent syntax between shell and powershell. 150 | shell: bash 151 | run: | 152 | # Parse out what we just built and upload it to scratch storage 153 | echo "paths<> "$GITHUB_OUTPUT" 154 | dist print-upload-files-from-manifest --manifest dist-manifest.json >> "$GITHUB_OUTPUT" 155 | echo "EOF" >> "$GITHUB_OUTPUT" 156 | 157 | cp dist-manifest.json "$BUILD_MANIFEST_NAME" 158 | - name: "Upload artifacts" 159 | uses: actions/upload-artifact@v4 160 | with: 161 | name: artifacts-build-local-${{ join(matrix.targets, '_') }} 162 | path: | 163 | ${{ steps.cargo-dist.outputs.paths }} 164 | ${{ env.BUILD_MANIFEST_NAME }} 165 | 166 | # Build and package all the platform-agnostic(ish) things 167 | build-global-artifacts: 168 | needs: 169 | - plan 170 | - build-local-artifacts 171 | runs-on: "ubuntu-22.04" 172 | env: 173 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 174 | BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json 175 | steps: 176 | - uses: actions/checkout@v4 177 | with: 178 | submodules: recursive 179 | - name: Install cached dist 180 | uses: actions/download-artifact@v4 181 | with: 182 | name: cargo-dist-cache 183 | path: ~/.cargo/bin/ 184 | - run: chmod +x ~/.cargo/bin/dist 185 | # Get all the local artifacts for the global tasks to use (for e.g. checksums) 186 | - name: Fetch local artifacts 187 | uses: actions/download-artifact@v4 188 | with: 189 | pattern: artifacts-* 190 | path: target/distrib/ 191 | merge-multiple: true 192 | - id: cargo-dist 193 | shell: bash 194 | run: | 195 | dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json 196 | echo "dist ran successfully" 197 | 198 | # Parse out what we just built and upload it to scratch storage 199 | echo "paths<> "$GITHUB_OUTPUT" 200 | jq --raw-output ".upload_files[]" dist-manifest.json >> "$GITHUB_OUTPUT" 201 | echo "EOF" >> "$GITHUB_OUTPUT" 202 | 203 | cp dist-manifest.json "$BUILD_MANIFEST_NAME" 204 | - name: "Upload artifacts" 205 | uses: actions/upload-artifact@v4 206 | with: 207 | name: artifacts-build-global 208 | path: | 209 | ${{ steps.cargo-dist.outputs.paths }} 210 | ${{ env.BUILD_MANIFEST_NAME }} 211 | # Determines if we should publish/announce 212 | host: 213 | needs: 214 | - plan 215 | - build-local-artifacts 216 | - build-global-artifacts 217 | # Only run if we're "publishing", and only if local and global didn't fail (skipped is fine) 218 | if: ${{ always() && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }} 219 | env: 220 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 221 | runs-on: "ubuntu-22.04" 222 | outputs: 223 | val: ${{ steps.host.outputs.manifest }} 224 | steps: 225 | - uses: actions/checkout@v4 226 | with: 227 | submodules: recursive 228 | - name: Install cached dist 229 | uses: actions/download-artifact@v4 230 | with: 231 | name: cargo-dist-cache 232 | path: ~/.cargo/bin/ 233 | - run: chmod +x ~/.cargo/bin/dist 234 | # Fetch artifacts from scratch-storage 235 | - name: Fetch artifacts 236 | uses: actions/download-artifact@v4 237 | with: 238 | pattern: artifacts-* 239 | path: target/distrib/ 240 | merge-multiple: true 241 | - id: host 242 | shell: bash 243 | run: | 244 | dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json 245 | echo "artifacts uploaded and released successfully" 246 | cat dist-manifest.json 247 | echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT" 248 | - name: "Upload dist-manifest.json" 249 | uses: actions/upload-artifact@v4 250 | with: 251 | # Overwrite the previous copy 252 | name: artifacts-dist-manifest 253 | path: dist-manifest.json 254 | # Create a GitHub Release while uploading all files to it 255 | - name: "Download GitHub Artifacts" 256 | uses: actions/download-artifact@v4 257 | with: 258 | pattern: artifacts-* 259 | path: artifacts 260 | merge-multiple: true 261 | - name: Cleanup 262 | run: | 263 | # Remove the granular manifests 264 | rm -f artifacts/*-dist-manifest.json 265 | - name: Create GitHub Release 266 | env: 267 | PRERELEASE_FLAG: "${{ fromJson(steps.host.outputs.manifest).announcement_is_prerelease && '--prerelease' || '' }}" 268 | ANNOUNCEMENT_TITLE: "${{ fromJson(steps.host.outputs.manifest).announcement_title }}" 269 | ANNOUNCEMENT_BODY: "${{ fromJson(steps.host.outputs.manifest).announcement_github_body }}" 270 | RELEASE_COMMIT: "${{ github.sha }}" 271 | run: | 272 | # Write and read notes from a file to avoid quoting breaking things 273 | echo "$ANNOUNCEMENT_BODY" > $RUNNER_TEMP/notes.txt 274 | 275 | gh release create "${{ needs.plan.outputs.tag }}" --target "$RELEASE_COMMIT" $PRERELEASE_FLAG --title "$ANNOUNCEMENT_TITLE" --notes-file "$RUNNER_TEMP/notes.txt" artifacts/* 276 | 277 | announce: 278 | needs: 279 | - plan 280 | - host 281 | # use "always() && ..." to allow us to wait for all publish jobs while 282 | # still allowing individual publish jobs to skip themselves (for prereleases). 283 | # "host" however must run to completion, no skipping allowed! 284 | if: ${{ always() && needs.host.result == 'success' }} 285 | runs-on: "ubuntu-22.04" 286 | env: 287 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 288 | steps: 289 | - uses: actions/checkout@v4 290 | with: 291 | submodules: recursive 292 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | 8 | env: 9 | CARGO_TERM_COLOR: always 10 | 11 | jobs: 12 | build: 13 | 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - uses: actions/checkout@v3 18 | - name: Build 19 | run: cargo build --verbose 20 | - name: Run tests 21 | run: cargo test --verbose 22 | - name: Run tests (no default features) 23 | run: cargo test --verbose --no-default-features 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | 3 | .vscode 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # v0.13.0 2 | 3 | * Add `--color-columns` to display each column in a different color (#39) 4 | * Add `--prompt` to show a custom prompt message in the status bar (#135) 5 | * Expose freeze columns option in library usage (#124 by @jqnatividad) 6 | * Improve visibility of line numbers and borders 7 | * Add `aarch64` release targets (#55) 8 | 9 | # v0.12.0 10 | 11 | * Freeze first `n` columns when scrolling via `f` (#62, #117) 12 | * Support searching in header row (#102) 13 | * Support moving find mode cursor horizontally via `n` / `N` 14 | * Support library usage without clap dependency (#118 by @jqnatividad) 15 | * Fix search highlighting when columns are filtered 16 | * Fix column scrolling for CSV with irregular last row (#99) 17 | 18 | # v0.11.0 19 | 20 | * Support copying a row to the clipboard (#97 by @mendelmaleh) 21 | * Improve rendering performance by using line-buffered `stderr` (#107) 22 | * Fix panic when filtering columns with irregular CSV (#112 by @antmelon) 23 | * Disable `arboard` default features to reduce image related dependencies (#114 by @jqnatividad) 24 | * Improve test stability across different environments (#101) 25 | 26 | # v0.10.1 27 | 28 | * Fix `--echo-column` option error 29 | 30 | # v0.10.0 31 | 32 | * Improve horizontal space utilization when rendering wide columns 33 | * Support toggling sort direction 34 | * Accept tab delimiter specified as `-d \t` without quotes 35 | * Add library target 36 | 37 | # v0.9.1 38 | 39 | ## Bug fixes 40 | 41 | * Fixed issue with finding and filtering using an empty cell leading to incorrect matches 42 | * Fixed certain Shift key-related key bindings not functioning correctly on Windows (Issue #82) 43 | 44 | # v0.9.0 45 | 46 | * Improve scrolling responsiveness for large csv 47 | * Find and filter using selected cell (`#` and `@`) to search for exact matches 48 | * Fix rendering of cursor in input prompt 49 | * Fix app freeze on line wrap in some cases 50 | * Fix potential overflow when subtracting durations in Windows (#77) 51 | * Fix rendering of right border with irregular columns (#73) 52 | * Fix misspelling ([#72](https://github.com/YS-L/csvlens/pull/72) by @theKnightsOfRohan) 53 | 54 | # v0.8.1 55 | 56 | * Fix rendering of consecutive newlines 57 | * Fix clipboard support on Wayland (@ram02z) 58 | * Allow opting out of clipboard feature 59 | 60 | # v0.8.0 61 | 62 | This release adds support for the following: 63 | 64 | * Find and filter within the selected column 65 | * Find and filter using the selected cell (`#` and `@`) 66 | * Wrap lines by words (toggled via `-W`) 67 | * Copy selected cell to clipboard (`y`) 68 | 69 | # v0.7.0 70 | 71 | * Support sorting rows by a column (`Shift + ↓` or `Shift + j`) 72 | * Support CSV without headers via `--no-headers` 73 | * Add `--columns`, `--filter`, and `--find` options for filtering rows and columns 74 | 75 | # v0.6.0 76 | 77 | * Accept `"\t"` for tab as delimiter argument ([#49](https://github.com/YS-L/csvlens/pull/49) by @peterjc) 78 | * Add `-t` flag for tsv files ([#47](https://github.com/YS-L/csvlens/pull/47) by @JojiiOfficial) 79 | * Print lower level causes on error 80 | 81 | # v0.5.1 82 | 83 | * Fix panic caused by unicode and newline 84 | 85 | # v0.5.0 86 | 87 | * Migrate to Ratatui ([#42](https://github.com/YS-L/csvlens/pull/42) by @joshka) 88 | * Better readline support using tui-input (move cursor forward / backward, jump to the start, etc) 89 | * Improve buffer history to retrieve more than just the last input 90 | 91 | # v0.4.0 92 | 93 | This release adds support for the following: 94 | 95 | * Show help page with key bindings (`H`) 96 | * Scroll to left most and right most columns (`Ctrl + ←` or `Ctrl + →`) 97 | * Scroll forward and backward half a window (`Ctrl + d` or `Ctrl + u`) 98 | * Resize columns (`<` or `>`) 99 | * Reset to default view (`r`) 100 | 101 | # v0.3.2 102 | 103 | * Fix incorrectly truncated content due to highlighting 104 | * Fix potential overflow panic 105 | 106 | # v0.3.1 107 | 108 | * Fix panic due to unicode handling 109 | * Fix row height calculation to account for column widths properly 110 | * Reduce maximum column width fraction to make more columns visible 111 | 112 | # v0.3.0 113 | 114 | * Support line wrapping for displaying long or multiline content 115 | 116 | # v0.2.0 117 | 118 | * Add `-d auto` option to auto-detect delimiter 119 | * Add `Home` and `End` key bindings 120 | * Support row, column and cell selection modes (toggle via `TAB`) 121 | 122 | # v0.1.14 123 | 124 | * Implement --ignore-case option 125 | * Fix crossterm double input issue on Windows 126 | 127 | # v0.1.13 128 | 129 | * Switch to Rust 2021 edition and update dependencies (#25) 130 | * Fix crossterm panic by upgrading to version 0.26.1 131 | * New styling for selected row 132 | 133 | # v0.1.12 134 | 135 | * Add --version option 136 | * Add --echo-column option to print column's value at selected row to stdout 137 | * Use stderr as tui buffer to support piping from csvlens 138 | 139 | # v0.1.11 140 | 141 | * Attempt to restore terminal state on panic 142 | * Fix piped input not working on macOS 143 | 144 | # v0.1.10 145 | 146 | * Handle irregular CSV when calculating column widths 147 | * Improved event loop handling 148 | * Improved memory usage when creating temporary file from stdin 149 | 150 | # v0.1.9 151 | 152 | * Support filtering on columns 153 | * Support basic command history 154 | 155 | # v0.1.8 156 | 157 | * Support horizontal scrolling 158 | 159 | # v0.1.7 160 | 161 | * Ensure terminal state is restored on error 162 | 163 | # v0.1.6 164 | 165 | * Fix bug where program sometimes crashes due to unicode characters 166 | * Switch to `crossterm` 167 | 168 | # v0.1.5 169 | 170 | * Support irregular CSV to some extent (parse CSV in non-strict mode) 171 | * Support regex patterns in search and filter 172 | * Support scrolling to top with `g` -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "csvlens" 3 | version = "0.13.0" 4 | authors = ["Yung Siang Liau "] 5 | license = "MIT" 6 | description = "Command line csv viewer" 7 | readme = "README.md" 8 | homepage = "https://github.com/YS-L/csvlens" 9 | repository = "https://github.com/YS-L/csvlens" 10 | exclude = [".github/*", "tests/*"] 11 | keywords = ["cli", "csv", "viewer", "pager", "tui"] 12 | edition = "2024" 13 | 14 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 15 | 16 | [dependencies] 17 | csv = "1.3" 18 | ratatui = "0.29" 19 | crossterm = { version = "0.28", features = ["use-dev-tty"] } 20 | anyhow = "1.0" 21 | clap = { version = "4.5", features = ["derive", "wrap_help"], optional = true } 22 | tempfile = "3" 23 | regex = "1" 24 | csv-sniffer = "0.3.1" 25 | tui-input = { version = "0.11", features = ["crossterm"] } 26 | arrow = {version = "54", default-features = false, features = ["csv"]} 27 | sorted-vec = "0.8" 28 | arboard = { version = "3.5", default-features = false, features = ["wayland-data-control"], optional = true } 29 | thiserror = "2" 30 | terminal-colorsaurus = "0.4.8" 31 | ansi-to-tui = "7.0.0" 32 | 33 | [target.'cfg(windows)'.dependencies] 34 | crossterm = "0.28" 35 | 36 | [features] 37 | default = ["clipboard", "cli"] 38 | clipboard = ["dep:arboard"] 39 | cli = ["dep:clap"] 40 | 41 | # The profile that 'cargo dist' will build with 42 | [profile.dist] 43 | inherits = "release" 44 | lto = "thin" 45 | 46 | [profile.release] 47 | strip = "debuginfo" 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Yung Siang Liau 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # csvlens 2 | 3 | `csvlens` is a command line CSV file viewer. It is like `less` but made 4 | for CSV. 5 | 6 | ![Demo](.github/demo.gif) 7 | 8 | ## Usage 9 | 10 | Run `csvlens` by providing the CSV filename: 11 | 12 | ``` 13 | csvlens 14 | ``` 15 | 16 | Pipe CSV data directly to `csvlens`: 17 | 18 | ``` 19 | | csvlens 20 | ``` 21 | ### Key bindings 22 | 23 | Key | Action 24 | --- | --- 25 | `hjkl` (or `← ↓ ↑→ `) | Scroll one row or column in the given direction 26 | `Ctrl + f` (or `Page Down`) | Scroll one window down 27 | `Ctrl + b` (or `Page Up`) | Scroll one window up 28 | `Ctrl + d` (or `d`) | Scroll half a window down 29 | `Ctrl + u` (or `u`) | Scroll half a window up 30 | `Ctrl + h` | Scroll one window left 31 | `Ctrl + l` | Scroll one window right 32 | `Ctrl + ←` | Scroll left to first column 33 | `Ctrl + →` | Scroll right to last column 34 | `G` (or `End`) | Go to bottom 35 | `g` (or `Home`) | Go to top 36 | `G` | Go to line `n` 37 | `/` | Find content matching regex and highlight matches 38 | `n` (in Find mode) | Jump to next result 39 | `N` (in Find mode) | Jump to previous result 40 | `&` | Filter rows using regex (show only matches) 41 | `*` | Filter columns using regex (show only matches) 42 | `TAB` | Toggle between row, column or cell selection modes 43 | `>` | Increase selected column's width 44 | `<` | Decrease selected column's width 45 | `Shift + ↓` (or `Shift + j`) | Sort rows or toggle sort direction by the selected column 46 | `#` (in Cell mode) | Find and highlight rows like the selected cell 47 | `@` (in Cell mode) | Filter rows like the selected cell 48 | `y` | Copy the selected row or cell to clipboard 49 | `Enter` (in Cell mode) | Print the selected cell to stdout and exit 50 | `-S` | Toggle line wrapping 51 | `-W` | Toggle line wrapping by words 52 | `f` | Freeze this number of columns from the left 53 | `r` | Reset to default view (clear all filters and custom column widths) 54 | `H` (or `?`) | Display help 55 | `q` | Exit 56 | 57 | ### Optional parameters 58 | 59 | * `-d `: Use this delimiter when parsing the CSV 60 | (e.g. `csvlens file.csv -d '\t'`). 61 | 62 | Specify `-d auto` to auto-detect the delimiter. 63 | 64 | * `-t`, `--tab-separated`: Use tab as the delimiter (when specified, `-d` is ignored). 65 | 66 | * `-i`, `--ignore-case`: Ignore case when searching. This flag is ignored if any 67 | uppercase letters are present in the search string. 68 | 69 | * `--no-headers`: Do not interpret the first row as headers. 70 | 71 | * `--columns `: Use this regex to select columns to display by default. 72 | 73 | Example: `"column1|column2"` matches `"column1"`, `"column2"`, and also column names like 74 | `"column11"`, `"column22"`. 75 | 76 | * `--filter `: Use this regex to filter rows to display by default. 77 | 78 | The regex is matched against each cell in every column. 79 | 80 | Example: `"value1|value2"` filters rows with any cells containing `"value1"`, `"value2"`, or text 81 | like `"my_value1"` or `"value234"`. 82 | 83 | * `--find `: Use this regex to find and highlight matches by default. 84 | 85 | The regex is matched against each cell in every column. 86 | 87 | Example: `"value1|value2"` highlights text in any cells containing `"value1"`, `"value2"`, or 88 | longer text like `"value1_ok"`. 89 | 90 | * `--echo-column `: Print the value of this column at the selected 91 | row to stdout on `Enter` key and then exit. 92 | 93 | * `--prompt `: Show a custom prompt message in the status bar. Supports ANSI escape codes 94 | for colored or styled text. 95 | 96 | Example: 97 | ```bash 98 | csvlens Pokemon.csv --prompt $'\e[1m\e[32mSelect a Pokémon!\e[0m' 99 | ``` 100 | 101 | * `--color-columns` (or `--colorful`): Display each column in a different color. 102 | 103 | ## Installation 104 | 105 | ### Direct download 106 | 107 | You can download the `tar.xz` or `zip` file matching your operating system from the 108 | [releases page](https://github.com/YS-L/csvlens/releases), extract it and execute the `csvlens` 109 | binary. 110 | 111 | ### Homebrew 112 | 113 | For macOS, `csvlens` is available on [Homebrew](https://formulae.brew.sh/formula/csvlens). You can 114 | install it using: 115 | ``` 116 | brew install csvlens 117 | ``` 118 | 119 | ### Arch Linux 120 | `csvlens` is available in the [official repositories](https://archlinux.org/packages/extra/x86_64/csvlens). You can install it using: 121 | ``` 122 | pacman -S csvlens 123 | ``` 124 | 125 | ### Windows 126 | 127 | For Windows, `csvlens` is available on [winget](https://learn.microsoft.com/en-gb/windows/package-manager/). You can install it using: 128 | ```powershell 129 | winget install --id YS-L.csvlens 130 | ``` 131 | 132 | ### FreeBSD 133 | `csvlens` is available as a [FreeBSD pkg](https://www.freshports.org/textproc/csvlens/). You can install it using: 134 | ``` 135 | pkg install csvlens 136 | ``` 137 | 138 | ### NetBSD 139 | `csvlens` is available on [pkgsrc](https://ftp.netbsd.org/pub/pkgsrc/current/pkgsrc/textproc/csvlens/index.html). If you're using NetBSD you can install it using: 140 | ``` 141 | pkgin install csvlens 142 | ``` 143 | 144 | ### OpenBSD 145 | `csvlens` is available as an [OpenBSD port](https://cvsweb.openbsd.org/ports/textproc/csvlens/). If you're using OpenBSD 7.6-current or later, you can install it using: 146 | ``` 147 | doas pkg_add csvlens 148 | ``` 149 | 150 | ### Cargo 151 | 152 | If you have [Rust](https://www.rust-lang.org/tools/install) installed, `csvlens` is available on 153 | [crates.io](https://crates.io/crates/csvlens) and you can install it using: 154 | ``` 155 | cargo install csvlens 156 | ``` 157 | 158 | Or, build and install from source after cloning this repo: 159 | ``` 160 | cargo install --path $(pwd) 161 | ``` 162 | 163 | ## Library Usage 164 | 165 | This crate allows you to use csvlens as a library. 166 | 167 | In your `Cargo.toml`, add the following: 168 | 169 | ```toml 170 | [dependencies] 171 | csvlens = { version = "0.12.0", default-features = false, features = ["clipboard"] } 172 | ``` 173 | 174 | ### Example 175 | 176 | Here's a simple example of how to use `csvlens` as a library ([Documentation](https://docs.rs/csvlens/0.12.0/csvlens/index.html)): 177 | 178 | ```rust 179 | use csvlens::run_csvlens; 180 | 181 | let out = run_csvlens(&["/path/to/your.csv"]).unwrap(); 182 | if let Some(selected_cell) = out { 183 | println!("Selected: {}", selected_cell); 184 | } 185 | ``` 186 | 187 | For more advanced usage, you can use `CsvlensOptions` to customize the behavior: 188 | 189 | ```rust 190 | use csvlens::{run_csvlens_with_options, CsvlensOptions}; 191 | 192 | let options = CsvlensOptions { 193 | filename: "/path/to/your.csv".to_string(), 194 | delimiter: Some("|".to_string()), 195 | ignore_case: true, 196 | debug: true, 197 | ..Default::default() 198 | }; 199 | let out = run_csvlens_with_options(options).unwrap(); 200 | if let Some(selected_cell) = out { 201 | println!("Selected: {}", selected_cell); 202 | } 203 | ``` 204 | 205 | See how [qsv](https://github.com/dathere/qsv/tree/master?tab=readme-ov-file#qsv-blazing-fast-data-wrangling-toolkit) uses `csvlens` as a library [here](https://github.com/dathere/qsv/blob/master/src/cmd/lens.rs#L2). 206 | -------------------------------------------------------------------------------- /dist-workspace.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = ["cargo:."] 3 | 4 | # Config for 'dist' 5 | [dist] 6 | # The preferred dist version to use in CI (Cargo.toml SemVer syntax) 7 | cargo-dist-version = "0.28.0" 8 | # CI backends to support 9 | ci = "github" 10 | # The installers to generate for each app 11 | installers = [] 12 | # Target platforms to build apps for (Rust target-triple syntax) 13 | targets = ["aarch64-apple-darwin", "aarch64-unknown-linux-gnu", "aarch64-pc-windows-msvc", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu", "x86_64-unknown-linux-musl", "x86_64-pc-windows-msvc"] 14 | # Which actions to run on pull requests 15 | pr-run-mode = "plan" 16 | 17 | [dist.github-custom-runners] 18 | global = "ubuntu-22.04" 19 | aarch64-unknown-linux-gnu = "ubuntu-22.04" 20 | aarch64-pc-windows-msvc = "ubuntu-22.04" 21 | x86_64-unknown-linux-gnu = "ubuntu-22.04" 22 | x86_64-unknown-linux-musl = "ubuntu-22.04" -------------------------------------------------------------------------------- /src/columns_filter.rs: -------------------------------------------------------------------------------- 1 | use regex::Regex; 2 | 3 | #[derive(Debug)] 4 | pub struct ColumnsFilter { 5 | pattern: Regex, 6 | indices: Vec, 7 | filtered_headers: Vec, 8 | filtered_flags: Vec, 9 | num_columns_before_filter: usize, 10 | disabled_because_no_match: bool, 11 | } 12 | 13 | impl ColumnsFilter { 14 | pub fn new(pattern: Regex, headers: &[String]) -> Self { 15 | let mut indices = vec![]; 16 | let mut filtered_headers: Vec = vec![]; 17 | let mut filtered_flags: Vec = vec![]; 18 | for (i, header) in headers.iter().enumerate() { 19 | if pattern.is_match(header) { 20 | indices.push(i); 21 | filtered_headers.push(header.clone()); 22 | filtered_flags.push(true); 23 | } else { 24 | filtered_flags.push(false); 25 | } 26 | } 27 | let disabled_because_no_match; 28 | if indices.is_empty() { 29 | indices = (0..headers.len()).collect(); 30 | filtered_headers = headers.into(); 31 | disabled_because_no_match = true; 32 | } else { 33 | disabled_because_no_match = false; 34 | } 35 | Self { 36 | pattern, 37 | indices, 38 | filtered_headers, 39 | filtered_flags, 40 | num_columns_before_filter: headers.len(), 41 | disabled_because_no_match, 42 | } 43 | } 44 | 45 | pub fn filtered_headers(&self) -> &Vec { 46 | &self.filtered_headers 47 | } 48 | 49 | pub fn indices(&self) -> &Vec { 50 | &self.indices 51 | } 52 | 53 | pub fn pattern(&self) -> Regex { 54 | self.pattern.to_owned() 55 | } 56 | 57 | pub fn num_filtered(&self) -> usize { 58 | self.indices.len() 59 | } 60 | 61 | pub fn num_original(&self) -> usize { 62 | self.num_columns_before_filter 63 | } 64 | 65 | pub fn disabled_because_no_match(&self) -> bool { 66 | self.disabled_because_no_match 67 | } 68 | 69 | pub fn is_column_filtered(&self, index: usize) -> bool { 70 | self.filtered_flags.get(index).cloned().unwrap_or(false) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/common.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | #[derive(Clone, PartialEq, Eq, Hash, Copy, Debug)] 4 | pub enum InputMode { 5 | Default, 6 | GotoLine, 7 | Find, 8 | Filter, 9 | FilterColumns, 10 | FreezeColumns, 11 | Option, 12 | Help, 13 | } 14 | 15 | impl fmt::Display for InputMode { 16 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 17 | write!(f, "{:?}", self) 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/csv.rs: -------------------------------------------------------------------------------- 1 | extern crate csv; 2 | 3 | use csv::{Position, Reader, ReaderBuilder}; 4 | use std::cmp::max; 5 | use std::fs::File; 6 | use std::sync::{Arc, Mutex}; 7 | use std::thread::{self, JoinHandle}; 8 | use std::time; 9 | 10 | use crate::errors::CsvlensResult; 11 | 12 | fn string_record_to_vec(record: &csv::StringRecord) -> Vec { 13 | let mut string_vec = Vec::with_capacity(record.len()); 14 | for field in record.iter() { 15 | string_vec.push(String::from(field)); 16 | } 17 | string_vec 18 | } 19 | 20 | pub struct CsvConfig { 21 | path: String, 22 | delimiter: u8, 23 | no_headers: bool, 24 | } 25 | 26 | impl CsvConfig { 27 | pub fn new(path: &str, delimiter: u8, no_headers: bool) -> CsvConfig { 28 | CsvConfig { 29 | path: path.to_string(), 30 | delimiter, 31 | no_headers, 32 | } 33 | } 34 | 35 | pub fn new_reader(&self) -> CsvlensResult> { 36 | let reader = ReaderBuilder::new() 37 | .flexible(true) 38 | .delimiter(self.delimiter) 39 | .has_headers(!self.no_headers) 40 | .from_path(self.path.as_str())?; 41 | Ok(reader) 42 | } 43 | 44 | pub fn filename(&self) -> &str { 45 | self.path.as_str() 46 | } 47 | 48 | pub fn delimiter(&self) -> u8 { 49 | self.delimiter 50 | } 51 | 52 | pub fn no_headers(&self) -> bool { 53 | self.no_headers 54 | } 55 | 56 | pub fn has_headers(&self) -> bool { 57 | !self.no_headers 58 | } 59 | 60 | /// Convert position to a 0-based record index 61 | pub fn position_to_record_index(&self, position: u64) -> u64 { 62 | if self.no_headers { 63 | position 64 | } else { 65 | position - 1 66 | } 67 | } 68 | 69 | /// Convert position to a 1-based record number 70 | pub fn position_to_record_num(&self, position: u64) -> u64 { 71 | if self.no_headers { 72 | position + 1 73 | } else { 74 | position 75 | } 76 | } 77 | } 78 | 79 | pub struct CsvLensReader { 80 | config: Arc, 81 | reader: Reader, 82 | pub headers: Vec, 83 | internal: Arc>, 84 | } 85 | 86 | #[derive(Debug, PartialEq, Eq, Clone)] 87 | pub struct Row { 88 | pub record_num: usize, 89 | pub fields: Vec, 90 | } 91 | 92 | impl Row { 93 | pub fn subset(&self, indices: &[usize]) -> Row { 94 | let mut subfields = vec![]; 95 | for i in indices { 96 | if let Some(field) = self.fields.get(*i) { 97 | subfields.push(field.clone()); 98 | } 99 | } 100 | Row { 101 | record_num: self.record_num, 102 | fields: subfields, 103 | } 104 | } 105 | 106 | fn empty() -> Row { 107 | Row { 108 | record_num: 0, 109 | fields: vec![], 110 | } 111 | } 112 | } 113 | 114 | #[derive(Debug)] 115 | struct GetRowIndex { 116 | // 0-based index of the record in the csv file 117 | record_index: u64, 118 | 119 | // Position where the record should be in the resulting list of rows 120 | order_index: usize, 121 | } 122 | 123 | impl CsvLensReader { 124 | pub fn new(config: Arc) -> CsvlensResult { 125 | let mut reader = config.new_reader()?; 126 | 127 | let headers_record = if config.no_headers() { 128 | let mut dummy_headers = csv::StringRecord::new(); 129 | for (i, _) in reader.headers()?.into_iter().enumerate() { 130 | dummy_headers.push_field((i + 1).to_string().as_str()); 131 | } 132 | dummy_headers 133 | } else { 134 | reader.headers()?.clone() 135 | }; 136 | let headers = string_record_to_vec(&headers_record); 137 | 138 | let (m_internal, _handle) = ReaderInternalState::init_internal(config.clone()); 139 | 140 | let reader = Self { 141 | config: config.clone(), 142 | reader, 143 | headers, 144 | internal: m_internal, 145 | }; 146 | Ok(reader) 147 | } 148 | 149 | pub fn get_rows( 150 | &mut self, 151 | rows_from: u64, 152 | num_rows: u64, 153 | ) -> CsvlensResult<(Vec, GetRowsStats)> { 154 | let indices: Vec = (rows_from..rows_from + num_rows).collect(); 155 | self.get_rows_impl(&indices) 156 | } 157 | 158 | pub fn get_rows_for_indices( 159 | &mut self, 160 | indices: &[u64], 161 | ) -> CsvlensResult<(Vec, GetRowsStats)> { 162 | self.get_rows_impl(indices) 163 | } 164 | 165 | fn get_rows_impl(&mut self, indices: &[u64]) -> CsvlensResult<(Vec, GetRowsStats)> { 166 | let mut get_row_indices = indices 167 | .iter() 168 | .enumerate() 169 | .map(|x| GetRowIndex { 170 | record_index: *x.1, 171 | order_index: x.0, 172 | }) 173 | .collect::>(); 174 | get_row_indices.sort_by(|a, b| a.record_index.cmp(&b.record_index)); 175 | self._get_rows_impl_sorted(&get_row_indices) 176 | } 177 | 178 | fn _get_rows_impl_sorted( 179 | &mut self, 180 | indices: &[GetRowIndex], 181 | ) -> CsvlensResult<(Vec, GetRowsStats)> { 182 | // stats for debugging and testing 183 | let mut stats = GetRowsStats::new(); 184 | 185 | let pos = Position::new(); 186 | self.reader.seek(pos)?; 187 | 188 | let tic = time::Instant::now(); 189 | let pos_table = self.get_pos_table(); 190 | stats.pos_table_elapsed = Some(tic.elapsed()); 191 | stats.pos_table_entry = pos_table.len(); 192 | 193 | let mut pos_iter = pos_table.iter(); 194 | let mut indices_iter = indices.iter(); 195 | 196 | let mut res = vec![Row::empty(); indices.len()]; 197 | let mut res_max_index: Option = None; 198 | 199 | let mut next_pos = pos_iter.next(); 200 | let mut next_wanted = indices_iter.next(); 201 | 202 | let num_fields = self.headers.len(); 203 | 204 | loop { 205 | if next_wanted.is_none() { 206 | break; 207 | } 208 | // seek as close to the next wanted record index as possible 209 | let index = next_wanted.unwrap(); 210 | let mut seek_pos: Option = None; 211 | while let Some(pos) = next_pos { 212 | if self.config.position_to_record_index(pos.record()) <= index.record_index { 213 | seek_pos.replace(pos.clone()); 214 | } else { 215 | break; 216 | } 217 | next_pos = pos_iter.next(); 218 | } 219 | if let Some(pos) = seek_pos { 220 | self.reader.seek(pos)?; 221 | stats.log_seek(); 222 | } 223 | 224 | // note that records() excludes header by default, but here the first entry is header 225 | // because of the seek() above. 226 | let mut records = self.reader.records(); 227 | 228 | // parse records and collect those that are wanted 229 | loop { 230 | // exit early if all found. This should be common in case of consecutive indices 231 | if next_wanted.is_none() { 232 | break; 233 | } 234 | let wanted = next_wanted.unwrap(); 235 | let record_position = records.reader().position().record(); 236 | if let Some(r) = records.next() { 237 | stats.log_parsed_record(); 238 | // no effective pre-seeking happened, this is still the header 239 | if self.config.has_headers() && record_position == 0 { 240 | continue; 241 | } 242 | if self.config.position_to_record_index(record_position) == wanted.record_index 243 | { 244 | let string_record = r?; 245 | let mut fields = Vec::with_capacity(num_fields); 246 | for field in string_record.iter() { 247 | fields.push(String::from(field)); 248 | } 249 | let row = Row { 250 | record_num: self.config.position_to_record_num(record_position) 251 | as usize, 252 | fields, 253 | }; 254 | res[wanted.order_index] = row; 255 | res_max_index.replace( 256 | res_max_index 257 | .map_or(wanted.order_index, |x| max(x, wanted.order_index)), 258 | ); 259 | next_wanted = indices_iter.next(); 260 | } 261 | // stop parsing if done scanning whole block between marked positions 262 | if let Some(pos) = next_pos { 263 | if record_position >= pos.record() { 264 | break; 265 | } 266 | } 267 | } else { 268 | // no more records 269 | break; 270 | } 271 | } 272 | 273 | if next_pos.is_none() { 274 | // If here, the last block had been scanned, and we should be 275 | // done. If next_wanted is not None, that means an out of bound 276 | // index was provided - that could happen for small input - and 277 | // we should ignore it and stop here regardless 278 | break; 279 | } 280 | } 281 | 282 | // In case requested indices are beyond the last record, truncate those indices. 283 | res.truncate(res_max_index.map_or(0, |x| x + 1)); 284 | 285 | Ok((res, stats)) 286 | } 287 | 288 | pub fn get_total_line_numbers(&self) -> Option { 289 | self.internal.lock().unwrap().total_line_number 290 | } 291 | 292 | pub fn get_last_indexed_line_number(&self) -> Option { 293 | self.internal 294 | .lock() 295 | .unwrap() 296 | .pos_table 297 | .last() 298 | .map(|x| x.record() as usize) 299 | } 300 | 301 | pub fn get_pos_table(&self) -> Vec { 302 | self.internal.lock().unwrap().pos_table.clone() 303 | } 304 | 305 | #[cfg(test)] 306 | pub fn wait_internal(&self) { 307 | loop { 308 | if self.internal.lock().unwrap().done { 309 | break; 310 | } 311 | thread::sleep(time::Duration::from_millis(100)); 312 | } 313 | } 314 | } 315 | 316 | #[derive(Debug, Clone, PartialEq)] 317 | pub struct GetRowsStats { 318 | pub num_seek: u64, 319 | pub num_parsed_record: u64, 320 | pub pos_table_elapsed: Option, 321 | pub pos_table_entry: usize, 322 | } 323 | 324 | impl GetRowsStats { 325 | fn new() -> GetRowsStats { 326 | GetRowsStats { 327 | num_seek: 0, 328 | num_parsed_record: 0, 329 | pos_table_elapsed: None, 330 | pos_table_entry: 0, 331 | } 332 | } 333 | 334 | fn log_seek(&mut self) { 335 | self.num_seek += 1; 336 | } 337 | 338 | fn log_parsed_record(&mut self) { 339 | self.num_parsed_record += 1 340 | } 341 | } 342 | 343 | struct ReaderInternalState { 344 | total_line_number: Option, 345 | pos_table: Vec, 346 | done: bool, 347 | } 348 | 349 | impl ReaderInternalState { 350 | fn init_internal(config: Arc) -> (Arc>, JoinHandle<()>) { 351 | let internal = ReaderInternalState { 352 | total_line_number: None, 353 | pos_table: vec![], 354 | done: false, 355 | }; 356 | 357 | let m_state = Arc::new(Mutex::new(internal)); 358 | 359 | let _m = m_state.clone(); 360 | let handle = thread::spawn(move || { 361 | let filesize = File::open(config.filename()) 362 | .unwrap() 363 | .metadata() 364 | .unwrap() 365 | .len(); 366 | let pos_table_num_entries = 10000; 367 | let minimum_interval = 500; // handle small csv (don't keep pos every byte) 368 | let pos_table_update_every = max(minimum_interval, filesize / pos_table_num_entries); 369 | 370 | // full csv parsing 371 | let bg_reader = config.new_reader().unwrap(); 372 | let mut n_lines = 0; 373 | let mut n_bytes: u64 = 0; 374 | let mut last_updated_at = 0; 375 | let mut iter = bg_reader.into_records(); 376 | loop { 377 | let next_pos = iter.reader().position().clone(); 378 | if iter.next().is_none() { 379 | break; 380 | } 381 | // must not include headers position here (n > 0) 382 | let cur = n_bytes / pos_table_update_every; 383 | if n_bytes > 0 && cur > last_updated_at { 384 | let mut m = _m.lock().unwrap(); 385 | m.pos_table.push(next_pos.clone()); 386 | last_updated_at = cur; 387 | } 388 | n_lines += 1; 389 | n_bytes = next_pos.byte(); 390 | } 391 | let mut m = _m.lock().unwrap(); 392 | m.total_line_number = Some(n_lines); 393 | m.done = true; 394 | }); 395 | 396 | (m_state, handle) 397 | } 398 | } 399 | 400 | #[cfg(test)] 401 | mod tests { 402 | use super::*; 403 | 404 | impl Row { 405 | pub fn new(record_num: usize, fields: Vec<&str>) -> Row { 406 | Row { 407 | record_num, 408 | fields: fields.iter().map(|x| x.to_string()).collect(), 409 | } 410 | } 411 | } 412 | 413 | #[test] 414 | fn test_cities_get_rows() { 415 | let config = Arc::new(CsvConfig::new("tests/data/cities.csv", b',', false)); 416 | let mut r = CsvLensReader::new(config).unwrap(); 417 | r.wait_internal(); 418 | let rows = r.get_rows(2, 3).unwrap().0; 419 | let expected = vec![ 420 | Row::new( 421 | 3, 422 | vec![ 423 | "46", "35", "59", "N", "120", "30", "36", "W", "Yakima", "WA", 424 | ], 425 | ), 426 | Row::new( 427 | 4, 428 | vec![ 429 | "42", 430 | "16", 431 | "12", 432 | "N", 433 | "71", 434 | "48", 435 | "0", 436 | "W", 437 | "Worcester", 438 | "MA", 439 | ], 440 | ), 441 | Row::new( 442 | 5, 443 | vec![ 444 | "43", 445 | "37", 446 | "48", 447 | "N", 448 | "89", 449 | "46", 450 | "11", 451 | "W", 452 | "Wisconsin Dells", 453 | "WI", 454 | ], 455 | ), 456 | ]; 457 | assert_eq!(rows, expected); 458 | } 459 | 460 | #[test] 461 | fn test_simple_get_rows() { 462 | let config = Arc::new(CsvConfig::new("tests/data/simple.csv", b',', false)); 463 | let mut r = CsvLensReader::new(config).unwrap(); 464 | r.wait_internal(); 465 | let rows = r.get_rows(1234, 2).unwrap().0; 466 | let expected = vec![ 467 | Row::new(1235, vec!["A1235", "B1235"]), 468 | Row::new(1236, vec!["A1236", "B1236"]), 469 | ]; 470 | assert_eq!(rows, expected); 471 | } 472 | 473 | #[test] 474 | fn test_simple_get_rows_out_of_bound() { 475 | let config = Arc::new(CsvConfig::new("tests/data/simple.csv", b',', false)); 476 | let mut r = CsvLensReader::new(config).unwrap(); 477 | r.wait_internal(); 478 | let indices = vec![5000]; 479 | let (rows, _stats) = r.get_rows_impl(&indices).unwrap(); 480 | assert_eq!(rows, vec![]); 481 | } 482 | 483 | #[test] 484 | fn test_simple_get_rows_impl_1() { 485 | let config = Arc::new(CsvConfig::new("tests/data/simple.csv", b',', false)); 486 | let mut r = CsvLensReader::new(config).unwrap(); 487 | r.wait_internal(); 488 | let indices = vec![1, 3, 5, 1234, 2345, 3456, 4999]; 489 | let (rows, mut stats) = r.get_rows_impl(&indices).unwrap(); 490 | let expected = vec![ 491 | Row::new(2, vec!["A2", "B2"]), 492 | Row::new(4, vec!["A4", "B4"]), 493 | Row::new(6, vec!["A6", "B6"]), 494 | Row::new(1235, vec!["A1235", "B1235"]), 495 | Row::new(2346, vec!["A2346", "B2346"]), 496 | Row::new(3457, vec!["A3457", "B3457"]), 497 | Row::new(5000, vec!["A5000", "B5000"]), 498 | ]; 499 | assert_eq!(rows, expected); 500 | stats.pos_table_elapsed.take(); 501 | let expected = GetRowsStats { 502 | num_seek: 4, 503 | num_parsed_record: 218, 504 | pos_table_elapsed: None, 505 | pos_table_entry: 115, 506 | }; 507 | assert_eq!(stats, expected); 508 | } 509 | 510 | #[test] 511 | fn test_simple_get_rows_impl_2() { 512 | let config = Arc::new(CsvConfig::new("tests/data/simple.csv", b',', false)); 513 | let mut r = CsvLensReader::new(config).unwrap(); 514 | r.wait_internal(); 515 | let indices = vec![1234]; 516 | let (rows, mut stats) = r.get_rows_impl(&indices).unwrap(); 517 | let expected = vec![Row::new(1235, vec!["A1235", "B1235"])]; 518 | assert_eq!(rows, expected); 519 | stats.pos_table_elapsed.take(); 520 | let expected = GetRowsStats { 521 | num_seek: 1, 522 | num_parsed_record: 8, 523 | pos_table_elapsed: None, 524 | pos_table_entry: 115, 525 | }; 526 | assert_eq!(stats, expected); 527 | } 528 | 529 | #[test] 530 | fn test_simple_get_rows_impl_3() { 531 | let config = Arc::new(CsvConfig::new("tests/data/simple.csv", b',', false)); 532 | let mut r = CsvLensReader::new(config).unwrap(); 533 | r.wait_internal(); 534 | let indices = vec![2]; 535 | let (rows, mut stats) = r.get_rows_impl(&indices).unwrap(); 536 | let expected = vec![Row::new(3, vec!["A3", "B3"])]; 537 | assert_eq!(rows, expected); 538 | stats.pos_table_elapsed.take(); 539 | let expected = GetRowsStats { 540 | num_seek: 0, 541 | num_parsed_record: 4, // 3 + 1 (including header) 542 | pos_table_elapsed: None, 543 | pos_table_entry: 115, 544 | }; 545 | assert_eq!(stats, expected); 546 | } 547 | 548 | #[test] 549 | fn test_small() { 550 | let config = Arc::new(CsvConfig::new("tests/data/small.csv", b',', false)); 551 | let mut r = CsvLensReader::new(config).unwrap(); 552 | let rows = r.get_rows(0, 50).unwrap().0; 553 | let expected = vec![ 554 | Row::new(1, vec!["c1", " v1"]), 555 | Row::new(2, vec!["c2", " v2"]), 556 | ]; 557 | assert_eq!(rows, expected); 558 | } 559 | 560 | #[test] 561 | fn test_small_delimiter() { 562 | let config = Arc::new(CsvConfig::new("tests/data/small.bsv", b'|', false)); 563 | let mut r = CsvLensReader::new(config).unwrap(); 564 | let rows = r.get_rows(0, 50).unwrap().0; 565 | let expected = vec![Row::new(1, vec!["c1", "v1"]), Row::new(2, vec!["c2", "v2"])]; 566 | assert_eq!(rows, expected); 567 | } 568 | 569 | #[test] 570 | fn test_irregular() { 571 | let config = Arc::new(CsvConfig::new("tests/data/irregular.csv", b',', false)); 572 | let mut r = CsvLensReader::new(config).unwrap(); 573 | let rows = r.get_rows(0, 50).unwrap().0; 574 | let expected = vec![Row::new(1, vec!["c1"]), Row::new(2, vec!["c2", " v2"])]; 575 | assert_eq!(rows, expected); 576 | } 577 | 578 | #[test] 579 | fn test_double_quoting_as_escape_chars() { 580 | let config = Arc::new(CsvConfig::new( 581 | "tests/data/good_double_quote.csv", 582 | b',', 583 | false, 584 | )); 585 | let mut r = CsvLensReader::new(config).unwrap(); 586 | let rows = r.get_rows(0, 50).unwrap().0; 587 | let expected = vec![ 588 | Row::new(1, vec!["1", "quote"]), 589 | Row::new(2, vec!["5", "Comma, comma"]), 590 | ]; 591 | assert_eq!(rows, expected); 592 | } 593 | 594 | #[test] 595 | fn get_rows_unsorted_indices() { 596 | let config = Arc::new(CsvConfig::new("tests/data/simple.csv", b',', false)); 597 | let mut r = CsvLensReader::new(config).unwrap(); 598 | r.wait_internal(); 599 | let rows = r.get_rows_for_indices(&vec![1235, 1234]).unwrap().0; 600 | let expected = vec![ 601 | Row::new(1236, vec!["A1236", "B1236"]), 602 | Row::new(1235, vec!["A1235", "B1235"]), 603 | ]; 604 | assert_eq!(rows, expected); 605 | } 606 | } 607 | -------------------------------------------------------------------------------- /src/delimiter.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::{CsvlensError, CsvlensResult}; 2 | 3 | /// Delimiter behaviour as specified in the command line 4 | pub enum Delimiter { 5 | /// Use the default delimiter (comma) 6 | Default, 7 | 8 | /// Use tab as the delimiter 9 | Tab, 10 | 11 | /// Use the specified delimiter 12 | Character(u8), 13 | 14 | /// Auto-detect the delimiter 15 | Auto, 16 | } 17 | 18 | impl Delimiter { 19 | /// Create a Delimiter by parsing the command line argument for the delimiter 20 | pub fn from_arg(delimiter_arg: &Option, tab_separation: bool) -> CsvlensResult { 21 | if tab_separation { 22 | return Ok(Delimiter::Tab); 23 | } 24 | 25 | if let Some(s) = delimiter_arg { 26 | if s == "auto" { 27 | return Ok(Delimiter::Auto); 28 | } 29 | if s == r"\t" { 30 | return Ok(Delimiter::Tab); 31 | } 32 | let mut chars = s.chars(); 33 | let c = chars.next().ok_or_else(|| CsvlensError::DelimiterEmpty)?; 34 | if !c.is_ascii() { 35 | return Err(CsvlensError::DelimiterNotAscii(c)); 36 | } 37 | if chars.next().is_some() { 38 | return Err(CsvlensError::DelimiterMultipleCharacters(s.clone())); 39 | } 40 | if c == 't' { 41 | // commonly occurrs when argument is specified like "-d \t" without quotes 42 | return Ok(Delimiter::Tab); 43 | } 44 | Ok(Delimiter::Character(c.try_into()?)) 45 | } else { 46 | Ok(Delimiter::Default) 47 | } 48 | } 49 | } 50 | 51 | /// Sniff the delimiter from the file 52 | pub fn sniff_delimiter(filename: &str) -> Option { 53 | let mut sniffer = csv_sniffer::Sniffer::new(); 54 | sniffer.sample_size(csv_sniffer::SampleSize::Records(200)); 55 | if let Ok(metadata) = sniffer.sniff_path(filename) { 56 | return Some(metadata.dialect.delimiter); 57 | } 58 | None 59 | } 60 | -------------------------------------------------------------------------------- /src/errors.rs: -------------------------------------------------------------------------------- 1 | use thiserror::Error; 2 | 3 | pub type CsvlensResult = std::result::Result; 4 | 5 | /// Errors csvlens can have 6 | #[derive(Debug, Error)] 7 | pub enum CsvlensError { 8 | #[error("File not found: {0}")] 9 | FileNotFound(String), 10 | 11 | #[error("Column name not found: {0}")] 12 | ColumnNameNotFound(String), 13 | 14 | #[error("Delimiter should not be empty")] 15 | DelimiterEmpty, 16 | 17 | #[error("Delimiter should be within the ASCII range: {0} is too fancy")] 18 | DelimiterNotAscii(char), 19 | 20 | #[error("Delimiter should be exactly one character (or \\t), got '{0}'")] 21 | DelimiterMultipleCharacters(String), 22 | 23 | #[error(transparent)] 24 | DelimiterParsing(#[from] std::char::TryFromCharError), 25 | 26 | #[error(transparent)] 27 | Csv(#[from] csv::Error), 28 | 29 | #[error(transparent)] 30 | Arrow(#[from] arrow::error::ArrowError), 31 | 32 | #[error(transparent)] 33 | Io(#[from] std::io::Error), 34 | } 35 | -------------------------------------------------------------------------------- /src/find.rs: -------------------------------------------------------------------------------- 1 | use crate::columns_filter; 2 | use crate::csv; 3 | use crate::errors::CsvlensResult; 4 | use crate::sort; 5 | use crate::sort::SortOrder; 6 | use regex::Regex; 7 | use sorted_vec::SortedVec; 8 | use std::cmp::min; 9 | use std::sync::{Arc, Mutex, MutexGuard}; 10 | use std::thread::{self}; 11 | use std::time::{Duration, Instant}; 12 | 13 | #[derive(Debug, Clone)] 14 | pub enum RowPos { 15 | Header, 16 | Row(usize), 17 | } 18 | 19 | #[derive(Debug, Clone)] 20 | pub struct FinderCursor { 21 | pub row: RowPos, 22 | pub column: usize, 23 | } 24 | 25 | impl FinderCursor { 26 | fn next_row(&self, total_count: usize) -> FinderCursor { 27 | match self.row { 28 | RowPos::Header => FinderCursor { 29 | row: if total_count > 0 { 30 | RowPos::Row(0) 31 | } else { 32 | RowPos::Header 33 | }, 34 | column: 0, 35 | }, 36 | RowPos::Row(n) => FinderCursor { 37 | row: if n + 1 < total_count { 38 | RowPos::Row(n + 1) 39 | } else { 40 | RowPos::Row(n) 41 | }, 42 | column: 0, 43 | }, 44 | } 45 | } 46 | 47 | fn prev_row(&self, has_header_found: bool) -> FinderCursor { 48 | match self.row { 49 | RowPos::Header => FinderCursor { 50 | row: RowPos::Header, 51 | column: 0, 52 | }, 53 | RowPos::Row(0) => FinderCursor { 54 | row: if has_header_found { 55 | RowPos::Header 56 | } else { 57 | RowPos::Row(0) 58 | }, 59 | column: 0, 60 | }, 61 | RowPos::Row(n) => FinderCursor { 62 | row: RowPos::Row(n.saturating_sub(1)), 63 | column: 0, 64 | }, 65 | } 66 | } 67 | 68 | fn next_column(&self) -> FinderCursor { 69 | match self.row { 70 | RowPos::Header => FinderCursor { 71 | row: RowPos::Header, 72 | column: self.column.saturating_add(1), 73 | }, 74 | RowPos::Row(n) => FinderCursor { 75 | row: RowPos::Row(n), 76 | column: self.column.saturating_add(1), 77 | }, 78 | } 79 | } 80 | 81 | fn prev_column(&self) -> FinderCursor { 82 | match self.row { 83 | RowPos::Header => FinderCursor { 84 | row: RowPos::Header, 85 | column: self.column.saturating_sub(1), 86 | }, 87 | RowPos::Row(n) => FinderCursor { 88 | row: RowPos::Row(n), 89 | column: self.column.saturating_sub(1), 90 | }, 91 | } 92 | } 93 | } 94 | 95 | pub struct Finder { 96 | internal: Arc>, 97 | pub cursor: Option, 98 | row_hint: RowPos, 99 | target: Regex, 100 | column_index: Option, 101 | sorter: Option>, 102 | pub sort_order: SortOrder, 103 | } 104 | 105 | pub enum FoundEntry { 106 | Header(HeaderEntry), 107 | Row(RowEntry), 108 | } 109 | 110 | #[derive(Clone, Debug)] 111 | pub struct RowEntry { 112 | row_index: usize, 113 | row_order: usize, 114 | column_index: usize, 115 | } 116 | 117 | impl RowEntry { 118 | pub fn row_index(&self) -> usize { 119 | self.row_index 120 | } 121 | 122 | pub fn row_order(&self) -> usize { 123 | self.row_order 124 | } 125 | 126 | pub fn column_index(&self) -> usize { 127 | self.column_index 128 | } 129 | } 130 | 131 | #[derive(Clone, Debug)] 132 | pub struct HeaderEntry { 133 | column_index: usize, 134 | } 135 | 136 | impl HeaderEntry { 137 | pub fn column_index(&self) -> usize { 138 | self.column_index 139 | } 140 | } 141 | 142 | #[derive(Clone, Debug)] 143 | pub struct FoundHeader { 144 | column_indices: Vec, 145 | } 146 | 147 | impl FoundHeader { 148 | pub fn column_indices(&self) -> &Vec { 149 | &self.column_indices 150 | } 151 | 152 | pub fn get_entry(&self, entry_index: usize) -> Option { 153 | self.column_indices 154 | .get(entry_index) 155 | .map(|column_index| HeaderEntry { 156 | column_index: *column_index, 157 | }) 158 | } 159 | } 160 | 161 | #[derive(Clone, Debug)] 162 | pub struct FoundRow { 163 | row_index: usize, 164 | row_order: usize, 165 | column_indices: Vec, 166 | } 167 | 168 | impl FoundRow { 169 | pub fn row_index(&self) -> usize { 170 | self.row_index 171 | } 172 | 173 | pub fn row_order(&self) -> usize { 174 | self.row_order 175 | } 176 | 177 | pub fn column_indices(&self) -> &Vec { 178 | &self.column_indices 179 | } 180 | 181 | pub fn get_entry(&self, entry_index: usize) -> Option { 182 | self.column_indices 183 | .get(entry_index) 184 | .map(|column_index| RowEntry { 185 | row_index: self.row_index, 186 | row_order: self.row_order, 187 | column_index: *column_index, 188 | }) 189 | } 190 | } 191 | 192 | impl Ord for FoundRow { 193 | fn cmp(&self, other: &Self) -> std::cmp::Ordering { 194 | self.row_order.cmp(&other.row_order) 195 | } 196 | } 197 | 198 | impl PartialOrd for FoundRow { 199 | fn partial_cmp(&self, other: &Self) -> Option { 200 | Some(self.row_order.cmp(&other.row_order)) 201 | } 202 | } 203 | 204 | impl PartialEq for FoundRow { 205 | fn eq(&self, other: &Self) -> bool { 206 | self.row_order == other.row_order 207 | } 208 | } 209 | 210 | impl Eq for FoundRow {} 211 | 212 | impl Finder { 213 | pub fn new( 214 | config: Arc, 215 | target: Regex, 216 | column_index: Option, 217 | sorter: Option>, 218 | sort_order: SortOrder, 219 | columns_filter: Option>, 220 | ) -> CsvlensResult { 221 | let internal = FinderInternalState::init( 222 | config, 223 | target.clone(), 224 | column_index, 225 | sorter.clone(), 226 | sort_order, 227 | columns_filter, 228 | ); 229 | let finder = Finder { 230 | internal, 231 | cursor: None, 232 | row_hint: RowPos::Header, 233 | target, 234 | column_index, 235 | sorter: sorter.clone(), 236 | sort_order, 237 | }; 238 | Ok(finder) 239 | } 240 | 241 | pub fn count(&self) -> usize { 242 | (self.internal.lock().unwrap()).count 243 | } 244 | 245 | pub fn count_and_max_row_index(&self) -> (usize, Option) { 246 | let g = self.internal.lock().unwrap(); 247 | (g.count, g.founds.last().map(|x| x.row_index() as u64)) 248 | } 249 | 250 | pub fn found_any(&self) -> bool { 251 | let g = self.internal.lock().unwrap(); 252 | g.count > 0 || g.found_header.is_some() 253 | } 254 | 255 | pub fn header_has_match(&self) -> bool { 256 | (self.internal.lock().unwrap()).found_header.is_some() 257 | } 258 | 259 | pub fn done(&self) -> bool { 260 | (self.internal.lock().unwrap()).done 261 | } 262 | 263 | pub fn cursor(&self) -> Option { 264 | self.cursor.as_ref().cloned() 265 | } 266 | 267 | pub fn cursor_row_order(&self) -> Option { 268 | let m_guard = self.internal.lock().unwrap(); 269 | if let Some(FoundEntry::Row(entry)) = self.get_found_record_at_cursor(&m_guard) { 270 | Some(entry.row_order()) 271 | } else { 272 | None 273 | } 274 | } 275 | 276 | pub fn target(&self) -> Regex { 277 | self.target.clone() 278 | } 279 | 280 | pub fn column_index(&self) -> Option { 281 | self.column_index 282 | } 283 | 284 | pub fn sorter(&self) -> &Option> { 285 | &self.sorter 286 | } 287 | 288 | pub fn reset_cursor(&mut self) { 289 | self.cursor = None; 290 | } 291 | 292 | pub fn set_row_hint(&mut self, row_hint: RowPos) { 293 | self.row_hint = row_hint; 294 | } 295 | 296 | pub fn next(&mut self) -> Option { 297 | let m_guard = self.internal.lock().unwrap(); 298 | let count = m_guard.count; 299 | let founds = &m_guard.founds; 300 | if let Some(cursor) = &self.cursor { 301 | let column_indices = match cursor.row { 302 | RowPos::Header => m_guard.found_header.as_ref().map(|x| x.column_indices()), 303 | RowPos::Row(n) => founds.get(n).map(|x| x.column_indices()), 304 | }; 305 | if let Some(column_indices) = column_indices { 306 | if cursor.column + 1 < column_indices.len() { 307 | // Try next column first if available 308 | self.cursor = Some(cursor.next_column()); 309 | } else { 310 | // Next row if available 311 | self.cursor = Some(cursor.next_row(count)); 312 | } 313 | } 314 | } else if matches!(self.row_hint, RowPos::Header) && m_guard.found_header.is_some() { 315 | self.cursor = Some(FinderCursor { 316 | row: RowPos::Header, 317 | column: 0, 318 | }); 319 | } else if count > 0 { 320 | let n = match self.row_hint { 321 | // If here, we know there is no matches in header even though row_hint is still 322 | // Header. Start from first found row. 323 | RowPos::Header => 0, 324 | RowPos::Row(n) => n, 325 | }; 326 | self.cursor = Some(FinderCursor { 327 | row: RowPos::Row(m_guard.next_from(n)), 328 | column: 0, 329 | }); 330 | } 331 | self.get_found_record_at_cursor(&m_guard) 332 | } 333 | 334 | pub fn prev(&mut self) -> Option { 335 | let m_guard = self.internal.lock().unwrap(); 336 | if let Some(cursor) = &self.cursor { 337 | if cursor.column > 0 { 338 | // Try previous column first if available 339 | self.cursor = Some(cursor.prev_column()); 340 | } else { 341 | // Previous row if available 342 | self.cursor = Some(cursor.prev_row(m_guard.found_header.is_some())); 343 | } 344 | } else if matches!(self.row_hint, RowPos::Header) && m_guard.found_header.is_some() { 345 | self.cursor = Some(FinderCursor { 346 | row: RowPos::Header, 347 | column: 0, 348 | }); 349 | } else if m_guard.count > 0 { 350 | if let RowPos::Row(n) = self.row_hint { 351 | self.cursor = Some(FinderCursor { 352 | row: RowPos::Row(m_guard.prev_from(n)), 353 | column: 0, 354 | }); 355 | } 356 | } 357 | self.get_found_record_at_cursor(&m_guard) 358 | } 359 | 360 | pub fn current(&self) -> Option { 361 | let m_guard = self.internal.lock().unwrap(); 362 | self.get_found_record_at_cursor(&m_guard) 363 | } 364 | 365 | fn get_found_record_at_cursor( 366 | &self, 367 | m_guard: &MutexGuard, 368 | ) -> Option { 369 | if let Some(cursor) = &self.cursor { 370 | match cursor.row { 371 | RowPos::Header => m_guard 372 | .found_header 373 | .as_ref() 374 | .and_then(|x| x.get_entry(cursor.column)) 375 | .map(FoundEntry::Header), 376 | RowPos::Row(n) => m_guard 377 | .founds 378 | .get(n) 379 | .and_then(|x| x.get_entry(cursor.column)) 380 | .map(FoundEntry::Row), 381 | } 382 | } else { 383 | None 384 | } 385 | } 386 | 387 | fn terminate(&self) { 388 | let mut m_guard = self.internal.lock().unwrap(); 389 | m_guard.terminate(); 390 | } 391 | 392 | pub fn elapsed(&self) -> Option { 393 | let m_guard = self.internal.lock().unwrap(); 394 | m_guard.elapsed() 395 | } 396 | 397 | pub fn get_subset_found(&self, offset: usize, num_rows: usize) -> Vec { 398 | let m_guard = self.internal.lock().unwrap(); 399 | let founds = &m_guard.founds; 400 | let start = min(offset, founds.len().saturating_sub(1)); 401 | let end = start.saturating_add(num_rows); 402 | let end = min(end, founds.len()); 403 | let indices: Vec = founds[start..end] 404 | .iter() 405 | .map(|x| x.row_index() as u64) 406 | .collect(); 407 | indices 408 | } 409 | 410 | #[cfg(test)] 411 | pub fn wait_internal(&self) { 412 | loop { 413 | if self.internal.lock().unwrap().done { 414 | break; 415 | } 416 | thread::sleep(core::time::Duration::from_millis(100)); 417 | } 418 | } 419 | } 420 | 421 | impl Drop for Finder { 422 | fn drop(&mut self) { 423 | self.terminate(); 424 | } 425 | } 426 | 427 | struct FinderInternalState { 428 | count: usize, 429 | found_header: Option, 430 | founds: SortedVec, 431 | done: bool, 432 | should_terminate: bool, 433 | elapsed: Option, 434 | } 435 | 436 | impl FinderInternalState { 437 | pub fn init( 438 | config: Arc, 439 | target: Regex, 440 | target_local_column_index: Option, 441 | sorter: Option>, 442 | sort_order: SortOrder, 443 | columns_filter: Option>, 444 | ) -> Arc> { 445 | let internal = FinderInternalState { 446 | count: 0, 447 | found_header: None, 448 | founds: SortedVec::new(), 449 | done: false, 450 | should_terminate: false, 451 | elapsed: None, 452 | }; 453 | 454 | let m_state = Arc::new(Mutex::new(internal)); 455 | 456 | let _m = m_state.clone(); 457 | let _filename = config.filename().to_owned(); 458 | 459 | let _handle = thread::spawn(move || { 460 | let mut bg_reader = config.new_reader().unwrap(); 461 | 462 | // search header 463 | let mut column_indices = vec![]; 464 | if let Ok(header) = bg_reader.headers() { 465 | let mut local_column_index = 0; 466 | for (column_index, field) in header.iter().enumerate() { 467 | if let Some(columns_filter) = &columns_filter { 468 | if !columns_filter.is_column_filtered(column_index) { 469 | continue; 470 | } 471 | } 472 | if target.is_match(field) { 473 | column_indices.push(local_column_index); 474 | } 475 | local_column_index += 1; 476 | } 477 | } 478 | if !column_indices.is_empty() { 479 | let found = FoundHeader { column_indices }; 480 | let mut m = _m.lock().unwrap(); 481 | m.found_header = Some(found); 482 | } 483 | 484 | // note that records() excludes header 485 | let records = bg_reader.records(); 486 | 487 | let start = Instant::now(); 488 | for (row_index, r) in records.enumerate() { 489 | let mut column_indices = vec![]; 490 | if let Ok(valid_record) = r { 491 | let mut local_column_index = 0; 492 | for (column_index, field) in valid_record.iter().enumerate() { 493 | if let Some(columns_filter) = &columns_filter { 494 | if !columns_filter.is_column_filtered(column_index) { 495 | continue; 496 | } 497 | } 498 | let should_check_regex = 499 | if let Some(target_local_column_index) = target_local_column_index { 500 | local_column_index == target_local_column_index 501 | } else { 502 | true 503 | }; 504 | if should_check_regex && target.is_match(field) { 505 | column_indices.push(local_column_index); 506 | } 507 | local_column_index += 1; 508 | } 509 | } 510 | if !column_indices.is_empty() { 511 | let row_order = match &sorter { 512 | Some(s) => { 513 | s.get_record_order(row_index as u64, sort_order).unwrap() as usize 514 | } 515 | _ => row_index, 516 | }; 517 | let found = FoundRow { 518 | row_index, 519 | row_order, 520 | column_indices, 521 | }; 522 | let mut m = _m.lock().unwrap(); 523 | (*m).found_one(found); 524 | } 525 | let m = _m.lock().unwrap(); 526 | if m.should_terminate { 527 | break; 528 | } 529 | } 530 | 531 | let mut m = _m.lock().unwrap(); 532 | m.done = true; 533 | m.elapsed = Some(start.elapsed()); 534 | }); 535 | 536 | m_state 537 | } 538 | 539 | fn found_one(&mut self, found: FoundRow) { 540 | self.founds.push(found); 541 | self.count += 1; 542 | } 543 | 544 | fn next_from(&self, row_hint: usize) -> usize { 545 | let mut index = self.founds.partition_point(|r| r.row_order() < row_hint); 546 | if index >= self.founds.len() { 547 | index -= 1; 548 | } 549 | index 550 | } 551 | 552 | fn prev_from(&self, row_hint: usize) -> usize { 553 | let next = self.next_from(row_hint); 554 | if next > 0 { next - 1 } else { next } 555 | } 556 | 557 | fn terminate(&mut self) { 558 | self.should_terminate = true; 559 | } 560 | 561 | fn elapsed(&self) -> Option { 562 | self.elapsed 563 | } 564 | } 565 | -------------------------------------------------------------------------------- /src/help.rs: -------------------------------------------------------------------------------- 1 | use ratatui::{ 2 | buffer::Buffer, 3 | layout::Rect, 4 | style::{Color, Modifier, Style}, 5 | text::{Line, Span}, 6 | widgets::{Block, Borders, Paragraph, StatefulWidget, Widget, Wrap}, 7 | }; 8 | 9 | const HELP_CONTENT: &str = " 10 | csvlens is an interactive CSV file viewer in the command line. 11 | 12 | These are the key bindings. Press q to exit. 13 | 14 | # Moving 15 | 16 | hjkl (or ← ↓ ↑→ ) : Scroll one row or column in the given direction 17 | Ctrl + f (or Page Down) : Scroll one window down 18 | Ctrl + b (or Page Up) : Scroll one window up 19 | Ctrl + d (or d) : Scroll half a window down 20 | Ctrl + u (or u) : Scroll half a window up 21 | Ctrl + h : Scroll one window left 22 | Ctrl + l : Scroll one window right 23 | Ctrl + ← : Scroll left to first column 24 | Ctrl + → : Scroll right to last column 25 | G (or End) : Go to bottom 26 | g (or Home) : Go to top 27 | G : Go to line n 28 | 29 | # Search 30 | 31 | / : Find content matching regex and highlight matches 32 | n (in Find mode) : Jump to next result 33 | N (in Find mode) : Jump to previous result 34 | & : Filter rows using regex (show only matches) 35 | * : Filter columns using regex (show only matches) 36 | 37 | # Selection modes 38 | 39 | TAB : Toggle between row, column or cell selection modes 40 | > : Increase selected column's width 41 | < : Decrease selected column's width 42 | Shift + ↓ (or J) : Sort rows by the selected column 43 | # (in Cell mode) : Find and highlight rows like the selected cell 44 | @ (in Cell mode) : Filter rows like the selected cell 45 | y : Copy the selected row or cell to clipboard 46 | Enter (in Cell mode) : Print the selected cell to stdout and exit 47 | 48 | # Other options 49 | 50 | -S : Toggle line wrapping 51 | -W : Toggle line wrapping by words 52 | f : Freeze this number of columns from the left 53 | r : Reset to default view (clear all filters and custom column widths) 54 | H (or ?) : Display this help 55 | q : Exit"; 56 | 57 | pub struct HelpPage {} 58 | 59 | pub struct HelpPageState { 60 | active: bool, 61 | offset: u16, 62 | render_complete: bool, 63 | } 64 | 65 | impl HelpPage { 66 | pub fn new() -> Self { 67 | HelpPage {} 68 | } 69 | } 70 | 71 | impl HelpPageState { 72 | pub fn new() -> Self { 73 | HelpPageState { 74 | active: false, 75 | offset: 0, 76 | render_complete: true, 77 | } 78 | } 79 | 80 | pub fn activate(&mut self) -> &Self { 81 | self.active = true; 82 | self.offset = 0; 83 | self 84 | } 85 | 86 | pub fn deactivate(&mut self) -> &Self { 87 | self.active = false; 88 | self.offset = 0; 89 | self 90 | } 91 | 92 | pub fn is_active(&self) -> bool { 93 | self.active 94 | } 95 | 96 | pub fn scroll_up(&mut self) -> &Self { 97 | if self.offset > 0 { 98 | self.offset -= 1; 99 | } 100 | self 101 | } 102 | 103 | pub fn scroll_down(&mut self) -> &Self { 104 | if !self.render_complete { 105 | self.offset += 1; 106 | } 107 | self 108 | } 109 | } 110 | 111 | impl StatefulWidget for HelpPage { 112 | type State = HelpPageState; 113 | 114 | fn render(self, area: Rect, buf: &mut Buffer, state: &mut Self::State) { 115 | fn line_to_span(line: &str) -> Span { 116 | if line.starts_with("# ") && !line.contains(':') { 117 | let header_style = Style::default() 118 | .add_modifier(Modifier::BOLD) 119 | .fg(Color::Rgb(200, 200, 200)); 120 | let header_formatted = format!("[{}]", line.strip_prefix("# ").unwrap()); 121 | Span::styled(header_formatted, header_style) 122 | } else { 123 | Span::raw(line) 124 | } 125 | } 126 | 127 | let text: Vec = HELP_CONTENT 128 | .split('\n') 129 | .map(|s| Line::from(line_to_span(s))) 130 | .collect(); 131 | 132 | // Minus 2 to account for borders. 133 | let num_lines_to_be_rendered = (text.len() as u16).saturating_sub(state.offset); 134 | state.render_complete = area.height.saturating_sub(2) >= num_lines_to_be_rendered; 135 | 136 | let paragraph = Paragraph::new(text) 137 | .block(Block::default().title("Help").borders(Borders::ALL)) 138 | .wrap(Wrap { trim: true }) 139 | .scroll((state.offset, 0)); 140 | 141 | paragraph.render(area, buf); 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /src/history.rs: -------------------------------------------------------------------------------- 1 | use crate::common::InputMode; 2 | use std::collections::HashMap; 3 | use std::collections::hash_map::Entry::{Occupied, Vacant}; 4 | 5 | pub struct BufferHistory { 6 | buffers: Vec, 7 | cursor: usize, 8 | } 9 | 10 | impl BufferHistory { 11 | fn new_with(buf: &str) -> Self { 12 | BufferHistory { 13 | buffers: vec![buf.to_string()], 14 | cursor: 1, 15 | } 16 | } 17 | 18 | fn push(&mut self, buf: &str) { 19 | if buf.is_empty() { 20 | // Don't keep empty entries 21 | return; 22 | } 23 | if let Some(index) = self.buffers.iter().position(|x| x == buf) { 24 | // Don't keep duplicate entries 25 | self.buffers.remove(index); 26 | } 27 | self.buffers.push(buf.to_string()); 28 | self.reset_cursor(); 29 | } 30 | 31 | fn prev(&mut self) -> Option { 32 | if self.cursor == 0 { 33 | return None; 34 | } 35 | self.cursor = self.cursor.saturating_sub(1); 36 | Some(self.buffers[self.cursor].clone()) 37 | } 38 | 39 | fn next(&mut self) -> Option { 40 | if self.cursor >= self.buffers.len() - 1 { 41 | return None; 42 | } 43 | self.cursor = self.cursor.saturating_add(1); 44 | Some(self.buffers[self.cursor].clone()) 45 | } 46 | 47 | fn reset_cursor(&mut self) { 48 | self.cursor = self.buffers.len(); 49 | } 50 | } 51 | 52 | pub struct BufferHistoryContainer { 53 | inner: HashMap, 54 | } 55 | 56 | impl BufferHistoryContainer { 57 | pub fn new() -> Self { 58 | BufferHistoryContainer { 59 | inner: HashMap::new(), 60 | } 61 | } 62 | 63 | pub fn set(&mut self, input_mode: InputMode, content: &str) { 64 | match self.inner.entry(input_mode) { 65 | Occupied(mut e) => { 66 | e.get_mut().push(content); 67 | } 68 | Vacant(e) => { 69 | e.insert(BufferHistory::new_with(content)); 70 | } 71 | } 72 | } 73 | 74 | pub fn prev(&mut self, input_mode: InputMode) -> Option { 75 | self.inner 76 | .get_mut(&input_mode) 77 | .and_then(|history| history.prev()) 78 | } 79 | 80 | pub fn next(&mut self, input_mode: InputMode) -> Option { 81 | self.inner 82 | .get_mut(&input_mode) 83 | .and_then(|history| history.next()) 84 | } 85 | 86 | pub fn reset_cursors(&mut self) { 87 | for (_, history) in self.inner.iter_mut() { 88 | history.reset_cursor(); 89 | } 90 | } 91 | } 92 | 93 | #[cfg(test)] 94 | mod tests { 95 | 96 | use super::*; 97 | 98 | #[test] 99 | fn test_prev_next() { 100 | let mut history = BufferHistory::new_with("foo"); 101 | history.push("bar"); 102 | history.push("baz"); 103 | history.push("foo"); 104 | assert_eq!(history.prev(), Some("foo".to_string())); 105 | assert_eq!(history.prev(), Some("baz".to_string())); 106 | assert_eq!(history.prev(), Some("bar".to_string())); 107 | assert_eq!(history.prev(), None); 108 | assert_eq!(history.prev(), None); 109 | assert_eq!(history.next(), Some("baz".to_string())); 110 | assert_eq!(history.next(), Some("foo".to_string())); 111 | assert_eq!(history.next(), None); 112 | assert_eq!(history.next(), None); 113 | } 114 | 115 | #[test] 116 | fn test_push_duplicate() { 117 | let mut history = BufferHistory::new_with("foo"); 118 | history.push("bar"); 119 | history.push("baz"); 120 | history.push("foo"); 121 | history.push("bar"); 122 | assert_eq!(history.prev(), Some("bar".to_string())); 123 | assert_eq!(history.prev(), Some("foo".to_string())); 124 | assert_eq!(history.prev(), Some("baz".to_string())); 125 | assert_eq!(history.prev(), None); 126 | } 127 | 128 | #[test] 129 | fn test_container() { 130 | let mut history_container = BufferHistoryContainer::new(); 131 | history_container.set(InputMode::Find, "foo"); 132 | history_container.set(InputMode::Find, "bar"); 133 | history_container.set(InputMode::GotoLine, "123"); 134 | history_container.set(InputMode::GotoLine, "456"); 135 | assert_eq!(history_container.prev(InputMode::Default), None); 136 | assert_eq!( 137 | history_container.prev(InputMode::Find), 138 | Some("bar".to_string()) 139 | ); 140 | assert_eq!( 141 | history_container.prev(InputMode::GotoLine), 142 | Some("456".to_string()) 143 | ); 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /src/input.rs: -------------------------------------------------------------------------------- 1 | use crate::common::InputMode; 2 | use crate::history::BufferHistoryContainer; 3 | use crate::util::events::{CsvlensEvent, CsvlensEvents}; 4 | use crossterm::event::{Event, KeyCode, KeyEvent, KeyModifiers}; 5 | use tui_input::Input; 6 | use tui_input::backend::crossterm::EventHandler; 7 | 8 | pub enum Control { 9 | ScrollUp, 10 | ScrollDown, 11 | ScrollLeft, 12 | ScrollRight, 13 | ScrollTop, 14 | ScrollBottom, 15 | ScrollPageUp, 16 | ScrollPageDown, 17 | ScrollHalfPageUp, 18 | ScrollHalfPageDown, 19 | ScrollPageLeft, 20 | ScrollPageRight, 21 | ScrollLeftMost, 22 | ScrollRightMost, 23 | ScrollTo(usize), 24 | ScrollToNextFound, 25 | ScrollToPrevFound, 26 | IncreaseWidth, 27 | DecreaseWidth, 28 | Find(String), 29 | FindLikeCell, 30 | Filter(String), 31 | FilterColumns(String), 32 | FilterLikeCell, 33 | FreezeColumns(usize), 34 | Quit, 35 | BufferContent(Input), 36 | BufferReset, 37 | Select, 38 | CopySelection, 39 | ToggleSelectionType, 40 | ToggleLineWrap(bool), 41 | ToggleSort, 42 | Reset, 43 | Help, 44 | UnknownOption(String), 45 | UserError(String), 46 | Nothing, 47 | } 48 | 49 | impl Control { 50 | fn empty_buffer() -> Control { 51 | Control::BufferContent("".into()) 52 | } 53 | } 54 | 55 | enum BufferState { 56 | Active(Input), 57 | Inactive, 58 | } 59 | 60 | pub struct InputHandler { 61 | events: CsvlensEvents, 62 | mode: InputMode, 63 | buffer_state: BufferState, 64 | buffer_history_container: BufferHistoryContainer, 65 | } 66 | 67 | impl InputHandler { 68 | pub fn new() -> InputHandler { 69 | InputHandler { 70 | events: CsvlensEvents::new(), 71 | mode: InputMode::Default, 72 | buffer_state: BufferState::Inactive, 73 | buffer_history_container: BufferHistoryContainer::new(), 74 | } 75 | } 76 | 77 | pub fn next(&mut self) -> Control { 78 | if let CsvlensEvent::Input(mut key) = self.events.next().unwrap() { 79 | /* 80 | The shift key modifier is not consistent across platforms. 81 | 82 | For upper case alphabets, e.g. 'A' 83 | 84 | Unix: Char("A") + SHIFT 85 | Windows: Char("A") + SHIFT 86 | 87 | For non-alphabets, e.g. '>' 88 | 89 | Unix: Char(">") + NULL 90 | Windows: Char(">") + SHIFT 91 | 92 | But the key event handling below assumes that the shift key modifier is only added for 93 | alphabets. To satisfy the assumption, the following ensures that the presence or absence 94 | of shift modifier is consistent across platforms. 95 | 96 | Idea borrowed from: https://github.com/sxyazi/yazi/pull/174 97 | */ 98 | let platform_consistent_shift = match (key.code, key.modifiers) { 99 | (KeyCode::Char(c), _) => c.is_ascii_uppercase(), 100 | (_, m) => m.contains(KeyModifiers::SHIFT), 101 | }; 102 | if platform_consistent_shift { 103 | key.modifiers.insert(KeyModifiers::SHIFT); 104 | } else { 105 | key.modifiers.remove(KeyModifiers::SHIFT); 106 | } 107 | if self.is_help_mode() { 108 | return self.handler_help(key); 109 | } else if self.is_input_buffering() { 110 | return self.handler_buffering(key); 111 | } else { 112 | return self.handler_default(key); 113 | } 114 | } 115 | // tick event, no need to distinguish it for now 116 | Control::Nothing 117 | } 118 | 119 | fn handler_default(&mut self, key_event: KeyEvent) -> Control { 120 | match key_event.modifiers { 121 | KeyModifiers::NONE => match key_event.code { 122 | KeyCode::Char('q') => Control::Quit, 123 | KeyCode::Char('j') | KeyCode::Down => Control::ScrollDown, 124 | KeyCode::Char('k') | KeyCode::Up => Control::ScrollUp, 125 | KeyCode::Char('l') | KeyCode::Right => Control::ScrollRight, 126 | KeyCode::Char('h') | KeyCode::Left => Control::ScrollLeft, 127 | KeyCode::Char('g') | KeyCode::Home => Control::ScrollTop, 128 | KeyCode::End => Control::ScrollBottom, 129 | KeyCode::Char('n') => Control::ScrollToNextFound, 130 | KeyCode::PageDown => Control::ScrollPageDown, 131 | KeyCode::PageUp => Control::ScrollPageUp, 132 | KeyCode::Char('d') => Control::ScrollHalfPageDown, 133 | KeyCode::Char('u') => Control::ScrollHalfPageUp, 134 | KeyCode::Char(x) if "0123456789".contains(x.to_string().as_str()) => { 135 | self.buffer_state = BufferState::Active(Input::new(x.to_string())); 136 | self.mode = InputMode::GotoLine; 137 | Control::BufferContent(Input::new(x.to_string())) 138 | } 139 | KeyCode::Char('/') => { 140 | self.init_buffer(InputMode::Find); 141 | Control::empty_buffer() 142 | } 143 | KeyCode::Char('&') => { 144 | self.init_buffer(InputMode::Filter); 145 | Control::empty_buffer() 146 | } 147 | KeyCode::Char('*') => { 148 | self.init_buffer(InputMode::FilterColumns); 149 | Control::empty_buffer() 150 | } 151 | KeyCode::Char('-') => { 152 | self.init_buffer(InputMode::Option); 153 | Control::empty_buffer() 154 | } 155 | KeyCode::Char('f') => { 156 | self.init_buffer(InputMode::FreezeColumns); 157 | Control::empty_buffer() 158 | } 159 | KeyCode::Enter => Control::Select, 160 | KeyCode::Tab => Control::ToggleSelectionType, 161 | KeyCode::Char('>') => Control::IncreaseWidth, 162 | KeyCode::Char('<') => Control::DecreaseWidth, 163 | KeyCode::Char('r') => Control::Reset, 164 | KeyCode::Char('?') => Control::Help, 165 | KeyCode::Char('#') => Control::FindLikeCell, 166 | KeyCode::Char('@') => Control::FilterLikeCell, 167 | KeyCode::Char('y') => Control::CopySelection, 168 | _ => Control::Nothing, 169 | }, 170 | KeyModifiers::SHIFT => match key_event.code { 171 | KeyCode::Char('G') | KeyCode::End => Control::ScrollBottom, 172 | KeyCode::Char('N') => Control::ScrollToPrevFound, 173 | KeyCode::Char('H') => Control::Help, 174 | KeyCode::Char('J') | KeyCode::Down => Control::ToggleSort, 175 | _ => Control::Nothing, 176 | }, 177 | KeyModifiers::CONTROL => match key_event.code { 178 | KeyCode::Char('f') => Control::ScrollPageDown, 179 | KeyCode::Char('b') => Control::ScrollPageUp, 180 | KeyCode::Char('d') => Control::ScrollHalfPageDown, 181 | KeyCode::Char('u') => Control::ScrollHalfPageUp, 182 | KeyCode::Char('h') => Control::ScrollPageLeft, 183 | KeyCode::Char('l') => Control::ScrollPageRight, 184 | KeyCode::Left => Control::ScrollLeftMost, 185 | KeyCode::Right => Control::ScrollRightMost, 186 | _ => Control::Nothing, 187 | }, 188 | _ => Control::Nothing, 189 | } 190 | } 191 | 192 | fn handler_buffering(&mut self, key_event: KeyEvent) -> Control { 193 | let input = match &mut self.buffer_state { 194 | BufferState::Active(input) => input, 195 | BufferState::Inactive => return Control::Nothing, 196 | }; 197 | if self.mode == InputMode::Option { 198 | return self.handler_buffering_option_mode(key_event); 199 | } 200 | match key_event.code { 201 | KeyCode::Esc => { 202 | self.reset_buffer(); 203 | Control::BufferReset 204 | } 205 | KeyCode::Char('g' | 'G') | KeyCode::Enter if self.mode == InputMode::GotoLine => { 206 | self.buffer_history_container.set(self.mode, input.value()); 207 | let goto_line = match &self.buffer_state { 208 | BufferState::Active(input) => input.value().parse::().ok(), 209 | BufferState::Inactive => None, 210 | }; 211 | let res = if let Some(n) = goto_line { 212 | Control::ScrollTo(n) 213 | } else { 214 | Control::BufferReset 215 | }; 216 | self.reset_buffer(); 217 | res 218 | } 219 | KeyCode::Up => { 220 | let mode = match self.mode { 221 | InputMode::Filter => InputMode::Find, 222 | _ => self.mode, 223 | }; 224 | if let Some(buf) = self.buffer_history_container.prev(mode) { 225 | self.buffer_state = BufferState::Active(Input::new(buf.clone())); 226 | Control::BufferContent(Input::new(buf)) 227 | } else { 228 | Control::Nothing 229 | } 230 | } 231 | KeyCode::Down => { 232 | let mode = match self.mode { 233 | InputMode::Filter => InputMode::Find, 234 | _ => self.mode, 235 | }; 236 | if let Some(buf) = self.buffer_history_container.next(mode) { 237 | self.buffer_state = BufferState::Active(Input::new(buf.clone())); 238 | Control::BufferContent(Input::new(buf)) 239 | } else { 240 | self.buffer_state = BufferState::Active(Input::default()); 241 | Control::BufferContent(Input::default()) 242 | } 243 | } 244 | KeyCode::Enter => { 245 | let control; 246 | if input.value().is_empty() { 247 | control = Control::BufferReset; 248 | } else if self.mode == InputMode::Find { 249 | control = Control::Find(input.value().to_string()); 250 | } else if self.mode == InputMode::Filter { 251 | control = Control::Filter(input.value().to_string()); 252 | } else if self.mode == InputMode::FilterColumns { 253 | control = Control::FilterColumns(input.value().to_string()); 254 | } else { 255 | control = Control::BufferReset; 256 | } 257 | if self.mode == InputMode::Filter { 258 | // Share buffer history between Find and Filter, see also KeyCode::Up 259 | self.buffer_history_container 260 | .set(InputMode::Find, input.value()); 261 | } else { 262 | self.buffer_history_container.set(self.mode, input.value()); 263 | } 264 | self.reset_buffer(); 265 | control 266 | } 267 | _ => { 268 | if input.handle_event(&Event::Key(key_event)).is_some() { 269 | // Parse immediately for FreezeColumns since it should just be a number 270 | let control = if self.mode == InputMode::FreezeColumns { 271 | let control = if let Ok(n) = input.value().parse::() { 272 | Control::FreezeColumns(n) 273 | } else { 274 | Control::UserError(format!("Invalid number: {}", input.value())) 275 | }; 276 | self.reset_buffer(); 277 | control 278 | } else { 279 | Control::BufferContent(input.clone()) 280 | }; 281 | return control; 282 | } 283 | Control::Nothing 284 | } 285 | } 286 | } 287 | 288 | fn handler_buffering_option_mode(&mut self, key_event: KeyEvent) -> Control { 289 | match key_event.code { 290 | KeyCode::Esc | KeyCode::Backspace | KeyCode::Enter => { 291 | self.reset_buffer(); 292 | Control::BufferReset 293 | } 294 | KeyCode::Char('S') => { 295 | self.reset_buffer(); 296 | Control::ToggleLineWrap(false) 297 | } 298 | KeyCode::Char('W') | KeyCode::Char('w') => { 299 | self.reset_buffer(); 300 | Control::ToggleLineWrap(true) 301 | } 302 | KeyCode::Char(x) => { 303 | self.reset_buffer(); 304 | Control::UnknownOption(x.to_string()) 305 | } 306 | _ => Control::Nothing, 307 | } 308 | } 309 | 310 | fn handler_help(&mut self, key_event: KeyEvent) -> Control { 311 | match key_event.code { 312 | KeyCode::Char('q') | KeyCode::Esc => Control::Quit, 313 | KeyCode::Char('j') | KeyCode::Down => Control::ScrollDown, 314 | KeyCode::Char('k') | KeyCode::Up => Control::ScrollUp, 315 | _ => Control::Nothing, 316 | } 317 | } 318 | 319 | fn is_input_buffering(&self) -> bool { 320 | matches!(self.buffer_state, BufferState::Active(_)) 321 | } 322 | 323 | fn init_buffer(&mut self, mode: InputMode) { 324 | self.buffer_state = BufferState::Active(Input::default()); 325 | self.mode = mode; 326 | } 327 | 328 | fn reset_buffer(&mut self) { 329 | self.buffer_state = BufferState::Inactive; 330 | self.buffer_history_container.reset_cursors(); 331 | self.mode = InputMode::Default; 332 | } 333 | 334 | pub fn mode(&self) -> InputMode { 335 | self.mode 336 | } 337 | 338 | pub fn enter_help_mode(&mut self) { 339 | self.mode = InputMode::Help; 340 | } 341 | 342 | pub fn exit_help_mode(&mut self) { 343 | self.mode = InputMode::Default; 344 | } 345 | 346 | fn is_help_mode(&mut self) -> bool { 347 | self.mode == InputMode::Help 348 | } 349 | } 350 | -------------------------------------------------------------------------------- /src/io.rs: -------------------------------------------------------------------------------- 1 | use std::fs::File; 2 | use std::io::{Read, Seek, SeekFrom, Write}; 3 | use tempfile::NamedTempFile; 4 | 5 | use crate::errors::{CsvlensError, CsvlensResult}; 6 | 7 | pub struct SeekableFile { 8 | filename: Option, 9 | inner_file: Option, 10 | } 11 | 12 | impl SeekableFile { 13 | pub fn new(maybe_filename: &Option) -> CsvlensResult { 14 | let mut inner_file = NamedTempFile::new()?; 15 | let inner_file_res; 16 | 17 | if let Some(filename) = maybe_filename { 18 | let mut f = File::open(filename).map_err(|e| match e.kind() { 19 | std::io::ErrorKind::NotFound => CsvlensError::FileNotFound(filename.clone()), 20 | _ => e.into(), 21 | })?; 22 | // If not seekable, it most likely is due to process substitution using 23 | // pipe - write out to a temp file to make it seekable 24 | if f.seek(SeekFrom::Start(0)).is_err() { 25 | Self::chunked_copy(&mut f, &mut inner_file)?; 26 | inner_file_res = Some(inner_file); 27 | } else { 28 | inner_file_res = None; 29 | } 30 | } else { 31 | // Handle input from stdin 32 | let mut stdin = std::io::stdin(); 33 | Self::chunked_copy(&mut stdin, &mut inner_file)?; 34 | inner_file_res = Some(inner_file); 35 | } 36 | 37 | Ok(SeekableFile { 38 | filename: maybe_filename.clone(), 39 | inner_file: inner_file_res, 40 | }) 41 | } 42 | 43 | pub fn filename(&self) -> &str { 44 | if let Some(f) = &self.inner_file { 45 | f.path().to_str().unwrap() 46 | } else { 47 | // If data is from stdin, then inner_file must be there 48 | self.filename.as_ref().unwrap() 49 | } 50 | } 51 | 52 | fn chunked_copy(source: &mut R, dest: &mut W) -> CsvlensResult { 53 | let mut total_copied = 0; 54 | let mut buffer = vec![0; 1_000_000]; 55 | loop { 56 | let n = source.read(&mut buffer)?; 57 | if n == 0 { 58 | break; 59 | } 60 | let n_written = dest.write(&buffer[..n])?; 61 | total_copied += n_written; 62 | } 63 | Ok(total_copied) 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # csvlens 2 | //! 3 | //! This crate allows you to use csvlens as a library. 4 | //! 5 | //! In your `Cargo.toml`, add the following: 6 | //! 7 | //! ```toml 8 | //! [dependencies] 9 | //! csvlens = { version = "0.11.0", default-features = false, features = ["clipboard"] } 10 | //! ``` 11 | //! 12 | //! ## Example 13 | //! 14 | //! ```rust,no_run 15 | //! use csvlens::run_csvlens; 16 | //! 17 | //! let out = run_csvlens(&["/path/to/your.csv"]).unwrap(); 18 | //! if let Some(selected_cell) = out { 19 | //! println!("Selected: {}", selected_cell); 20 | //! } 21 | //! ``` 22 | //! 23 | //! ## Library Usage with options 24 | //! 25 | //! ```ignore 26 | //! use csvlens::{run_csvlens_with_options, CsvlensOptions}; 27 | //! 28 | //! let options = CsvlensOptions { 29 | //! filename: "/path/to/your.csv".to_string(), 30 | //! delimiter: Some("|".to_string()), 31 | //! ignore_case: true, 32 | //! debug: true, 33 | //! ..Default::default() 34 | //! }; 35 | //! let out = run_csvlens_with_options(options).unwrap(); 36 | //! if let Some(selected_cell) = out { 37 | //! println!("Selected: {}", selected_cell); 38 | //! } 39 | //! ``` 40 | mod app; 41 | mod columns_filter; 42 | mod common; 43 | mod csv; 44 | mod delimiter; 45 | pub mod errors; 46 | mod find; 47 | mod help; 48 | mod history; 49 | mod input; 50 | mod io; 51 | mod runner; 52 | mod sort; 53 | mod theme; 54 | mod ui; 55 | mod util; 56 | mod view; 57 | mod wrap; 58 | 59 | pub use runner::CsvlensOptions; 60 | pub use runner::run_csvlens; 61 | pub use runner::run_csvlens_with_options; 62 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use csvlens::run_csvlens; 2 | 3 | fn main() { 4 | let args_itr = std::env::args_os().skip(1); 5 | match run_csvlens(args_itr) { 6 | Err(e) => { 7 | println!("{e:#}"); 8 | std::process::exit(1); 9 | } 10 | Ok(Some(selection)) => { 11 | println!("{selection}"); 12 | } 13 | _ => {} 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/runner.rs: -------------------------------------------------------------------------------- 1 | use crate::app::App; 2 | use crate::delimiter::Delimiter; 3 | use crate::errors::CsvlensResult; 4 | use crate::io::SeekableFile; 5 | 6 | #[cfg(feature = "cli")] 7 | use clap::{Parser, command}; 8 | use crossterm::execute; 9 | use crossterm::terminal::{ 10 | EnterAlternateScreen, LeaveAlternateScreen, disable_raw_mode, enable_raw_mode, 11 | }; 12 | use ratatui::Terminal; 13 | use ratatui::backend::CrosstermBackend; 14 | use std::ffi::OsString; 15 | use std::io::LineWriter; 16 | use std::panic; 17 | use std::thread::panicking; 18 | 19 | #[cfg(feature = "cli")] 20 | #[derive(Parser, Debug)] 21 | #[command(version)] 22 | struct Args { 23 | /// CSV filename 24 | filename: Option, 25 | 26 | /// Delimiter character (comma by default) or "auto" to auto-detect the delimiter 27 | #[clap(short, long, value_name = "char")] 28 | delimiter: Option, 29 | 30 | /// Use tab separation. Shortcut for -d '\t'. 31 | #[clap(short = 't', long)] 32 | tab_separated: bool, 33 | 34 | /// Do not interpret the first row as headers. 35 | #[clap(long)] 36 | no_headers: bool, 37 | 38 | /// Use this regex to select columns to display by default 39 | /// 40 | /// Example: "column1|column2" matches "column1", "column2", and also column names like 41 | /// "column11", "column22". 42 | #[arg(long, value_name = "regex")] 43 | columns: Option, 44 | 45 | /// Use this regex to filter rows to display by default 46 | /// 47 | /// The regex is matched against each cell in every column. 48 | /// 49 | /// Example: "value1|value2" filters rows with any cells containing "value1", "value2", or text 50 | /// like "my_value1" or "value234". 51 | #[arg(long, value_name = "regex")] 52 | filter: Option, 53 | 54 | /// Use this regex to find and highlight matches by default 55 | /// 56 | /// The regex is matched against each cell in every column. 57 | /// 58 | /// Example: "value1|value2" highlights text in any cells containing "value1", "value2", or 59 | /// longer text like "value1_ok". 60 | #[arg(long, value_name = "regex")] 61 | find: Option, 62 | 63 | /// Searches ignore case. Ignored if any uppercase letters are present in the search string 64 | #[clap(short, long)] 65 | ignore_case: bool, 66 | 67 | /// Print the value of this column to stdout for the selected row 68 | #[arg(long, value_name = "column_name")] 69 | echo_column: Option, 70 | 71 | /// Whether to display each column in a different color 72 | #[arg(long, alias = "colorful", visible_alias = "colorful")] 73 | color_columns: bool, 74 | 75 | /// Show a custom prompt message in the status bar. Supports ANSI escape codes for colored or 76 | /// styled text. 77 | #[arg(long, value_name = "prompt")] 78 | prompt: Option, 79 | 80 | /// Show stats for debugging 81 | #[clap(long)] 82 | debug: bool, 83 | } 84 | 85 | #[cfg(feature = "cli")] 86 | impl From for CsvlensOptions { 87 | fn from(args: Args) -> Self { 88 | Self { 89 | filename: args.filename, 90 | delimiter: args.delimiter, 91 | tab_separated: args.tab_separated, 92 | no_headers: args.no_headers, 93 | columns: args.columns, 94 | filter: args.filter, 95 | find: args.find, 96 | ignore_case: args.ignore_case, 97 | echo_column: args.echo_column, 98 | debug: args.debug, 99 | freeze_cols_offset: None, 100 | color_columns: args.color_columns, 101 | prompt: args.prompt, 102 | } 103 | } 104 | } 105 | 106 | // Struct for library usage without clap directives 107 | #[derive(Debug, Default)] 108 | pub struct CsvlensOptions { 109 | pub filename: Option, 110 | pub delimiter: Option, 111 | pub tab_separated: bool, 112 | pub no_headers: bool, 113 | pub columns: Option, 114 | pub filter: Option, 115 | pub find: Option, 116 | pub ignore_case: bool, 117 | pub echo_column: Option, 118 | pub debug: bool, 119 | pub freeze_cols_offset: Option, 120 | pub color_columns: bool, 121 | pub prompt: Option, 122 | } 123 | 124 | struct AppRunner { 125 | app: App, 126 | } 127 | 128 | impl AppRunner { 129 | fn new(app: App) -> AppRunner { 130 | let original_panic_hook = panic::take_hook(); 131 | 132 | panic::set_hook(Box::new(move |info| { 133 | // Restore terminal states first so that the backtrace on panic can 134 | // be printed with proper line breaks 135 | disable_raw_mode().unwrap(); 136 | execute!(std::io::stderr(), LeaveAlternateScreen).unwrap(); 137 | original_panic_hook(info); 138 | })); 139 | 140 | AppRunner { app } 141 | } 142 | 143 | fn run(&mut self) -> CsvlensResult> { 144 | enable_raw_mode()?; 145 | let mut output = std::io::stderr(); 146 | execute!(output, EnterAlternateScreen)?; 147 | 148 | let backend = CrosstermBackend::new(LineWriter::new(output)); 149 | let mut terminal = Terminal::new(backend)?; 150 | 151 | self.app.main_loop(&mut terminal) 152 | } 153 | } 154 | 155 | impl Drop for AppRunner { 156 | fn drop(&mut self) { 157 | // If panicked, restoring of terminal states would have been done in the 158 | // panic hook. Avoid doing that twice since that would clear the printed 159 | // backtrace. 160 | if !panicking() { 161 | disable_raw_mode().unwrap(); 162 | execute!(std::io::stderr(), LeaveAlternateScreen).unwrap(); 163 | } 164 | } 165 | } 166 | 167 | /// Run csvlens with options provided in a `CsvlensOptions` struct. 168 | /// 169 | /// On success, the result contains an optional string that is the value of the selected cell if 170 | /// any. If csvlens exits without selecting a cell, the result is None. 171 | /// 172 | /// Example: 173 | /// 174 | /// ``` 175 | /// use csvlens::{run_csvlens_with_options, CsvlensOptions}; 176 | /// 177 | /// let options = CsvlensOptions { 178 | /// filename: Some("/path/to/your.csv".to_string()), 179 | /// ..Default::default() 180 | /// }; 181 | /// match run_csvlens_with_options(options) { 182 | /// Ok(Some(selected_cell)) => println!("Selected: {}", selected_cell), 183 | /// Ok(None) => {}, 184 | /// Err(e) => eprintln!("Error: {:?}", e), 185 | /// } 186 | /// ``` 187 | pub fn run_csvlens_with_options(options: CsvlensOptions) -> CsvlensResult> { 188 | let show_stats = options.debug; 189 | let delimiter = Delimiter::from_arg(&options.delimiter, options.tab_separated)?; 190 | 191 | let file = SeekableFile::new(&options.filename)?; 192 | let filename = file.filename(); 193 | 194 | let app = App::new( 195 | filename, 196 | delimiter, 197 | options.filename, 198 | show_stats, 199 | options.echo_column, 200 | options.ignore_case, 201 | options.no_headers, 202 | options.columns, 203 | options.filter, 204 | options.find, 205 | options.freeze_cols_offset, 206 | options.color_columns, 207 | options.prompt, 208 | )?; 209 | 210 | let mut app_runner = AppRunner::new(app); 211 | app_runner.run() 212 | } 213 | 214 | /// Run csvlens with a list of arguments. The accepted arguments are the same as the command line 215 | /// arguments for the csvlens binary. 216 | /// 217 | /// On success, the result contains an optional string that is the value of the selected cell if 218 | /// any. If csvlens exits without selecting a cell, the result is None. 219 | /// 220 | /// Example: 221 | /// 222 | /// ``` 223 | /// use csvlens::run_csvlens; 224 | /// 225 | /// match run_csvlens(&["/path/to/your.csv", "--delimiter", "\t"]) { 226 | /// Ok(Some(selected_cell)) => println!("Selected: {}", selected_cell), 227 | /// Ok(None) => {}, 228 | /// Err(e) => eprintln!("Error: {:?}", e), 229 | /// } 230 | /// ``` 231 | #[cfg(feature = "cli")] 232 | pub fn run_csvlens(args: I) -> CsvlensResult> 233 | where 234 | I: IntoIterator, 235 | T: Into + Clone, 236 | { 237 | let mut args_items = vec![OsString::from("csvlens")]; 238 | for item in args { 239 | args_items.push(item.into()); 240 | } 241 | let args = Args::parse_from(args_items); 242 | run_csvlens_with_options(args.into()) 243 | } 244 | 245 | #[cfg(not(feature = "cli"))] 246 | pub fn run_csvlens(_args: I) -> CsvlensResult> 247 | where 248 | I: IntoIterator, 249 | T: Into + Clone, 250 | { 251 | eprintln!("Error: CLI is not enabled. Compile with the 'cli' feature to use this binary."); 252 | std::process::exit(1); 253 | } 254 | -------------------------------------------------------------------------------- /src/sort.rs: -------------------------------------------------------------------------------- 1 | use crate::csv; 2 | use crate::errors::CsvlensResult; 3 | 4 | use std::fs::File; 5 | use std::sync::Arc; 6 | use std::sync::Mutex; 7 | use std::thread::{self}; 8 | 9 | use arrow::array::{Array, ArrayIter}; 10 | use arrow::compute::concat; 11 | use arrow::compute::kernels; 12 | use arrow::datatypes::Fields; 13 | use arrow::datatypes::Schema; 14 | use arrow::datatypes::SchemaBuilder; 15 | 16 | #[derive(Clone, Debug, PartialEq)] 17 | pub enum SorterStatus { 18 | Running, 19 | Finished, 20 | Error(String), 21 | } 22 | 23 | #[derive(Clone, Copy, Debug, PartialEq)] 24 | pub enum SortOrder { 25 | Ascending, 26 | Descending, 27 | } 28 | 29 | #[derive(Debug)] 30 | pub struct Sorter { 31 | pub column_index: usize, 32 | column_name: String, 33 | internal: Arc>, 34 | } 35 | 36 | impl Sorter { 37 | pub fn new(csv_config: Arc, column_index: usize, column_name: String) -> Self { 38 | let internal = SorterInternalState::init(csv_config, column_index); 39 | Sorter { 40 | column_index, 41 | column_name, 42 | internal, 43 | } 44 | } 45 | 46 | pub fn get_sorted_indices( 47 | &self, 48 | rows_from: u64, 49 | num_rows: u64, 50 | order: SortOrder, 51 | ) -> Option> { 52 | let m_guard = self.internal.lock().unwrap(); 53 | if let Some(sort_result) = &m_guard.sort_result { 54 | let mut out = vec![]; 55 | let index_range: Box> = if order == SortOrder::Ascending { 56 | let start = rows_from; 57 | let end = start.saturating_add(num_rows); 58 | Box::new(start..end) 59 | } else { 60 | let end = sort_result.num_rows() as u64 - rows_from; 61 | let start = end.saturating_sub(num_rows); 62 | Box::new((start..end).rev()) 63 | }; 64 | for i in index_range { 65 | if let Some(record_index) = sort_result.record_indices.get(i as usize) { 66 | out.push(*record_index as u64) 67 | } 68 | } 69 | return Some(out); 70 | } 71 | None 72 | } 73 | 74 | pub fn get_record_order(&self, row_index: u64, order: SortOrder) -> Option { 75 | let m_guard = self.internal.lock().unwrap(); 76 | if let Some(sort_result) = &m_guard.sort_result { 77 | if let Some(mut record_order) = 78 | sort_result.record_orders.get(row_index as usize).cloned() 79 | { 80 | if order == SortOrder::Descending { 81 | record_order = sort_result.num_rows() - record_order - 1; 82 | } 83 | return Some(record_order as u64); 84 | } 85 | } 86 | None 87 | } 88 | 89 | pub fn status(&self) -> SorterStatus { 90 | (self.internal.lock().unwrap()).status.clone() 91 | } 92 | 93 | pub fn column_name(&self) -> &str { 94 | self.column_name.as_str() 95 | } 96 | 97 | pub fn terminate(&self) { 98 | let mut m = self.internal.lock().unwrap(); 99 | m.terminate(); 100 | } 101 | 102 | #[cfg(test)] 103 | pub fn wait_internal(&self) { 104 | loop { 105 | if self.internal.lock().unwrap().done { 106 | break; 107 | } 108 | thread::sleep(core::time::Duration::from_millis(100)); 109 | } 110 | } 111 | } 112 | 113 | impl Drop for Sorter { 114 | fn drop(&mut self) { 115 | self.terminate(); 116 | } 117 | } 118 | 119 | #[derive(Debug)] 120 | struct SortResult { 121 | record_indices: Vec, 122 | record_orders: Vec, 123 | } 124 | 125 | impl SortResult { 126 | fn num_rows(&self) -> usize { 127 | self.record_indices.len() 128 | } 129 | } 130 | 131 | #[derive(Debug)] 132 | struct SorterInternalState { 133 | sort_result: Option, 134 | status: SorterStatus, 135 | should_terminate: bool, 136 | done: bool, 137 | } 138 | 139 | impl SorterInternalState { 140 | pub fn init( 141 | config: Arc, 142 | column_index: usize, 143 | ) -> Arc> { 144 | let internal = SorterInternalState { 145 | sort_result: None, 146 | status: SorterStatus::Running, 147 | should_terminate: false, 148 | done: false, 149 | }; 150 | 151 | let m_state = Arc::new(Mutex::new(internal)); 152 | 153 | let _m = m_state.clone(); 154 | 155 | let _handle = thread::spawn(move || { 156 | fn run( 157 | m: Arc>, 158 | config: Arc, 159 | column_index: usize, 160 | ) -> CsvlensResult { 161 | // Get schema 162 | let schema = 163 | SorterInternalState::infer_schema(config.filename(), config.delimiter())?; 164 | let file = File::open(config.filename())?; 165 | let arrow_csv_reader = arrow::csv::ReaderBuilder::new(Arc::new(schema)) 166 | .with_delimiter(config.delimiter()) 167 | .with_header(!config.no_headers()) 168 | .with_projection(vec![column_index]) 169 | .build(file)?; 170 | 171 | // Parse csv in batches to construct the column 172 | let mut arrs: Vec> = Vec::new(); 173 | for record_batch_result in arrow_csv_reader { 174 | let record_batch = record_batch_result?; 175 | let arr = record_batch.column(0); 176 | arrs.push(arr.clone()); 177 | if m.lock().unwrap().should_terminate { 178 | return Ok(SortResult { 179 | record_indices: vec![], 180 | record_orders: vec![], 181 | }); 182 | } 183 | } 184 | let ref_arrs = arrs 185 | .iter() 186 | .map(|arr| arr.as_ref()) 187 | .collect::>(); 188 | let combined_arr = concat(&ref_arrs)?; 189 | 190 | // Sort 191 | let sorted_indices = 192 | kernels::sort::sort_to_indices(combined_arr.as_ref(), None, None)?; 193 | 194 | // Construct the result. Maybe this can be kept as arrow Arrays? 195 | let mut sorted_record_indices: Vec = vec![]; 196 | let mut record_orders: Vec = vec![0; sorted_indices.len()]; 197 | for (record_order, sorted_record_index) in 198 | ArrayIter::new(&sorted_indices).flatten().enumerate() 199 | { 200 | sorted_record_indices.push(sorted_record_index as usize); 201 | record_orders[sorted_record_index as usize] = record_order; 202 | } 203 | let sort_result = SortResult { 204 | record_indices: sorted_record_indices, 205 | record_orders, 206 | }; 207 | Ok(sort_result) 208 | } 209 | 210 | let sort_result = run(_m.clone(), config, column_index); 211 | 212 | let mut m = _m.lock().unwrap(); 213 | if let Ok(sort_result) = sort_result { 214 | m.sort_result = Some(sort_result); 215 | m.status = SorterStatus::Finished; 216 | } else { 217 | m.status = SorterStatus::Error(sort_result.err().unwrap().to_string()); 218 | } 219 | m.done = true; 220 | }); 221 | 222 | m_state 223 | } 224 | 225 | fn infer_schema(filename: &str, delimiter: u8) -> CsvlensResult { 226 | let schema = arrow::csv::infer_schema_from_files( 227 | &[filename.to_string()], 228 | delimiter, 229 | Some(1000), 230 | true, 231 | )?; 232 | 233 | // Convert integer fields to float64 to be more permissive 234 | let mut updated_fields = vec![]; 235 | for field in schema.fields() { 236 | if field.data_type().is_integer() { 237 | let new_field = field 238 | .as_ref() 239 | .clone() 240 | .with_data_type(arrow::datatypes::DataType::Float64); 241 | updated_fields.push(new_field); 242 | } else { 243 | updated_fields.push(field.as_ref().clone()); 244 | } 245 | } 246 | let updated_fields = Fields::from(updated_fields); 247 | 248 | Ok(SchemaBuilder::from(updated_fields).finish()) 249 | } 250 | 251 | fn terminate(&mut self) { 252 | self.should_terminate = true; 253 | } 254 | } 255 | 256 | #[cfg(test)] 257 | mod tests { 258 | 259 | use super::*; 260 | 261 | #[test] 262 | fn test_simple() { 263 | let config = Arc::new(csv::CsvConfig::new("tests/data/simple.csv", b',', false)); 264 | let s = Sorter::new(config, 0, "A1".to_string()); 265 | s.wait_internal(); 266 | let rows = s.get_sorted_indices(0, 5, SortOrder::Ascending).unwrap(); 267 | let expected = vec![0, 9, 99, 999, 1000]; 268 | assert_eq!(rows, expected); 269 | } 270 | 271 | #[test] 272 | fn test_descending() { 273 | let config = Arc::new(csv::CsvConfig::new("tests/data/simple.csv", b',', false)); 274 | let s = Sorter::new(config, 0, "A1".to_string()); 275 | s.wait_internal(); 276 | let rows = s.get_sorted_indices(0, 5, SortOrder::Descending).unwrap(); 277 | let expected = vec![998, 997, 996, 995, 994]; 278 | assert_eq!(rows, expected); 279 | } 280 | 281 | #[test] 282 | fn test_empty() { 283 | let config = Arc::new(csv::CsvConfig::new("tests/data/empty.csv", b',', false)); 284 | let s = Sorter::new(config, 1, "b".to_string()); 285 | s.wait_internal(); 286 | assert_eq!( 287 | s.status(), 288 | SorterStatus::Error("Compute error: Sort not supported for data type Null".to_string()) 289 | ); 290 | } 291 | } 292 | -------------------------------------------------------------------------------- /src/theme.rs: -------------------------------------------------------------------------------- 1 | use ratatui::style::Color; 2 | use terminal_colorsaurus::{ColorScheme, QueryOptions, color_scheme}; 3 | 4 | pub struct Theme { 5 | pub row_number: Color, 6 | pub border: Color, 7 | pub selected_foreground: Color, 8 | pub selected_background: Color, 9 | pub found: Color, 10 | pub found_selected_background: Color, 11 | pub status: Color, 12 | pub column_colors: [Color; 5], 13 | } 14 | 15 | impl Theme { 16 | pub fn default() -> Self { 17 | match color_scheme(QueryOptions::default()) { 18 | Ok(ColorScheme::Dark) => Theme::dark(), 19 | Ok(ColorScheme::Light) => Theme::light(), 20 | _ => Theme::dark(), 21 | } 22 | } 23 | 24 | pub fn dark() -> Self { 25 | let gutter = Color::Rgb(131, 148, 150); 26 | Theme { 27 | row_number: gutter, 28 | border: gutter, 29 | selected_foreground: Color::Rgb(192, 192, 192), 30 | selected_background: Color::Rgb(62, 61, 50), 31 | found: Color::Rgb(200, 0, 0), 32 | found_selected_background: Color::LightYellow, 33 | status: gutter, 34 | column_colors: [ 35 | Color::Rgb(253, 151, 31), 36 | Color::Rgb(102, 217, 239), 37 | Color::Rgb(190, 132, 255), 38 | Color::Rgb(249, 38, 114), 39 | Color::Rgb(230, 219, 116), 40 | ], 41 | } 42 | } 43 | 44 | pub fn light() -> Self { 45 | let gutter = Color::Rgb(131, 148, 150); 46 | Theme { 47 | row_number: gutter, 48 | border: gutter, 49 | selected_foreground: Color::Rgb(73, 72, 62), 50 | selected_background: Color::Rgb(230, 227, 196), 51 | found: Color::Rgb(200, 0, 0), 52 | found_selected_background: Color::LightYellow, 53 | status: gutter, 54 | column_colors: [ 55 | Color::Rgb(207, 112, 0), 56 | Color::Rgb(0, 137, 179), 57 | Color::Rgb(104, 77, 153), 58 | Color::Rgb(249, 0, 90), 59 | Color::Rgb(153, 143, 47), 60 | ], 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/util/events.rs: -------------------------------------------------------------------------------- 1 | use std::time::{Duration, Instant}; 2 | 3 | use crossterm::event::{Event, KeyEvent, KeyEventKind, poll, read}; 4 | 5 | pub enum CsvlensEvent { 6 | Input(I), 7 | Tick, 8 | } 9 | 10 | /// A small event handler that wrap termion input and tick events. Each event 11 | /// type is handled in its own thread and returned to a common `Receiver` 12 | pub struct CsvlensEvents { 13 | tick_rate: Duration, 14 | } 15 | 16 | #[derive(Debug, Clone, Copy)] 17 | pub struct Config { 18 | pub tick_rate: Duration, 19 | } 20 | 21 | impl Default for Config { 22 | fn default() -> Config { 23 | Config { 24 | tick_rate: Duration::from_millis(250), 25 | } 26 | } 27 | } 28 | 29 | impl CsvlensEvents { 30 | pub fn new() -> CsvlensEvents { 31 | CsvlensEvents::with_config(Config::default()) 32 | } 33 | 34 | pub fn with_config(config: Config) -> CsvlensEvents { 35 | CsvlensEvents { 36 | tick_rate: config.tick_rate, 37 | } 38 | } 39 | 40 | pub fn next(&self) -> std::io::Result> { 41 | let now = Instant::now(); 42 | match poll(self.tick_rate) { 43 | Ok(true) => match read()? { 44 | Event::Key(event) if event.kind == KeyEventKind::Press => { 45 | Ok(CsvlensEvent::Input(event)) 46 | } 47 | _ => { 48 | let time_spent = now.elapsed(); 49 | let rest = self.tick_rate.saturating_sub(time_spent); 50 | 51 | Self { tick_rate: rest }.next() 52 | } 53 | }, 54 | Ok(false) => Ok(CsvlensEvent::Tick), 55 | Err(_) => todo!(), 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/util/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod events; 2 | -------------------------------------------------------------------------------- /src/view.rs: -------------------------------------------------------------------------------- 1 | use crate::columns_filter::ColumnsFilter; 2 | use crate::csv::{CsvLensReader, Row}; 3 | use crate::errors::CsvlensResult; 4 | use crate::find; 5 | use crate::input::Control; 6 | use crate::sort::{SortOrder, Sorter}; 7 | 8 | use std::cmp::min; 9 | use std::sync::Arc; 10 | use std::time::{Duration, Instant}; 11 | 12 | struct RowsFilter { 13 | indices: Vec, 14 | total: usize, 15 | max_index: Option, 16 | } 17 | 18 | impl RowsFilter { 19 | fn new(finder: &find::Finder, rows_from: u64, num_rows: u64) -> RowsFilter { 20 | let (total, max_index) = finder.count_and_max_row_index(); 21 | let indices = finder.get_subset_found(rows_from as usize, num_rows as usize); 22 | RowsFilter { 23 | indices, 24 | total, 25 | max_index, 26 | } 27 | } 28 | } 29 | 30 | #[derive(Clone)] 31 | pub struct SelectionDimension { 32 | index: Option, 33 | pub bound: u64, 34 | last_selected: Option, 35 | } 36 | 37 | impl SelectionDimension { 38 | /// Create a new SelectionDimension 39 | pub fn new(index: Option, bound: u64) -> Self { 40 | Self { 41 | index, 42 | bound, 43 | last_selected: None, 44 | } 45 | } 46 | 47 | /// The currently selected index 48 | /// 49 | /// This index is dumb as in it is always between 0 and bound - 1 and 50 | /// has nothing to do with the actual record number in the data. 51 | pub fn index(&self) -> Option { 52 | self.index 53 | } 54 | 55 | /// Set selected to the given index and adjust it to be within bounds 56 | pub fn set_index(&mut self, index: u64) { 57 | self.index = Some(min(index, self.bound.saturating_sub(1))); 58 | self.last_selected = Some(index); 59 | } 60 | 61 | /// Unset the selected index 62 | pub fn unset_index(&mut self) { 63 | self.index = None; 64 | } 65 | 66 | /// Set the maximum allowed value for for index 67 | pub fn set_bound(&mut self, bound: u64) { 68 | self.bound = bound; 69 | if let Some(i) = self.index { 70 | self.set_index(i); 71 | } 72 | } 73 | 74 | /// Increase selected index by 1. Does nothing if nothing is currently selected. 75 | pub fn select_next(&mut self) { 76 | if let Some(i) = self.index() { 77 | self.set_index(i.saturating_add(1)); 78 | }; 79 | } 80 | 81 | /// Decrease selected index by 1. Does nothing if nothing is currently selected. 82 | pub fn select_previous(&mut self) { 83 | if let Some(i) = self.index() { 84 | self.set_index(i.saturating_sub(1)); 85 | }; 86 | } 87 | 88 | /// Select the first index. Does nothing if nothing is currently selected. 89 | pub fn select_first(&mut self) { 90 | if self.index.is_some() { 91 | self.set_index(0); 92 | } 93 | } 94 | 95 | /// Select the last index. Does nothing if nothing is currently selected. 96 | pub fn select_last(&mut self) { 97 | if self.index.is_some() { 98 | self.set_index(self.bound.saturating_sub(1)) 99 | } 100 | } 101 | 102 | /// Whether the given index is currently selected 103 | pub fn is_selected(&self, i: usize) -> bool { 104 | if let Some(selected) = self.index { 105 | return selected == i as u64; 106 | } 107 | false 108 | } 109 | 110 | /// The last selected index even if the current selection is None 111 | pub fn last_selected(&self) -> Option { 112 | self.last_selected 113 | } 114 | } 115 | 116 | pub enum SelectionType { 117 | Row, 118 | Column, 119 | Cell, 120 | None, 121 | } 122 | 123 | #[derive(Clone)] 124 | pub struct Selection { 125 | pub row: SelectionDimension, 126 | pub column: SelectionDimension, 127 | } 128 | 129 | impl Selection { 130 | pub fn default(row_bound: u64) -> Self { 131 | Selection { 132 | row: SelectionDimension::new(Some(0), row_bound), 133 | column: SelectionDimension::new(None, 0), 134 | } 135 | } 136 | 137 | pub fn selection_type(&self) -> SelectionType { 138 | if self.row.index.is_some() && self.column.index.is_some() { 139 | SelectionType::Cell 140 | } else if self.row.index.is_some() { 141 | SelectionType::Row 142 | } else if self.column.index.is_some() { 143 | SelectionType::Column 144 | } else { 145 | SelectionType::None 146 | } 147 | } 148 | 149 | fn set_selection_type(&mut self, selection_type: SelectionType) { 150 | let target_row_index = self.row.last_selected().unwrap_or(0); 151 | let target_column_index = self.column.last_selected().unwrap_or(0); 152 | 153 | match selection_type { 154 | SelectionType::Row => { 155 | self.row.set_index(target_row_index); 156 | self.column.unset_index(); 157 | } 158 | SelectionType::Column => { 159 | self.row.unset_index(); 160 | self.column.set_index(target_column_index); 161 | } 162 | SelectionType::Cell => { 163 | self.row.set_index(target_row_index); 164 | self.column.set_index(target_column_index); 165 | } 166 | SelectionType::None => { 167 | self.row.unset_index(); 168 | self.column.unset_index(); 169 | } 170 | } 171 | } 172 | 173 | pub fn toggle_selection_type(&mut self) { 174 | let selection_type = self.selection_type(); 175 | match selection_type { 176 | SelectionType::Row => self.set_selection_type(SelectionType::Column), 177 | SelectionType::Column => self.set_selection_type(SelectionType::Cell), 178 | SelectionType::Cell => self.set_selection_type(SelectionType::Row), // for now don't allow toggling to None 179 | SelectionType::None => self.set_selection_type(SelectionType::Row), 180 | } 181 | } 182 | } 183 | 184 | #[derive(Debug)] 185 | pub struct Header { 186 | pub name: String, 187 | pub origin_index: usize, 188 | } 189 | 190 | #[derive(Debug, Clone)] 191 | pub struct PerfStats { 192 | pub elapsed: Duration, 193 | pub reader_stats: crate::csv::GetRowsStats, 194 | } 195 | 196 | #[derive(Debug, Clone, Copy, Default)] 197 | pub struct ColumnsOffset { 198 | /// Number of columns that are frozen on the left side (always visible) 199 | pub num_freeze: u64, 200 | 201 | /// Number of columns that are skipped after the frozen columns (not visible) 202 | pub num_skip: u64, 203 | } 204 | 205 | impl ColumnsOffset { 206 | /// Check if the column is frozen 207 | pub fn is_frozen(&self, filtered_columns_index: u64) -> bool { 208 | filtered_columns_index < self.num_freeze 209 | } 210 | 211 | /// Get the index of the column in the columns-filtered data given the view port column index 212 | pub fn get_filtered_column_index(&self, view_port_column_index: u64) -> u64 { 213 | if view_port_column_index < self.num_freeze { 214 | return view_port_column_index; 215 | } 216 | view_port_column_index.saturating_add(self.num_skip) 217 | } 218 | 219 | pub fn should_filtered_column_index_be_rendered(&self, filtered_column_index: u64) -> bool { 220 | if filtered_column_index < self.num_freeze { 221 | return true; 222 | } 223 | filtered_column_index >= self.num_freeze.saturating_add(self.num_skip) 224 | } 225 | 226 | pub fn is_filtered_column_index_visible( 227 | &self, 228 | filtered_column_index: u64, 229 | num_cols_rendered: u64, 230 | ) -> bool { 231 | if filtered_column_index < self.num_freeze { 232 | return true; 233 | } 234 | let rendered_start_index = self.num_freeze.saturating_add(self.num_skip); 235 | let num_non_frozen_cols_rendered = num_cols_rendered.saturating_sub(self.num_freeze); 236 | let rendered_end_index = rendered_start_index.saturating_add(num_non_frozen_cols_rendered); 237 | filtered_column_index >= rendered_start_index && filtered_column_index < rendered_end_index 238 | } 239 | 240 | pub fn get_num_skip_to_make_visible(&self, filtered_column_index: u64) -> u64 { 241 | if filtered_column_index < self.num_freeze { 242 | 0 243 | } else { 244 | filtered_column_index.saturating_sub(self.num_freeze) 245 | } 246 | } 247 | } 248 | 249 | pub struct RowsView { 250 | reader: CsvLensReader, 251 | rows: Vec, 252 | headers: Vec
, 253 | num_rows: u64, 254 | num_rows_rendered: u64, 255 | rows_from: u64, 256 | cols_offset: ColumnsOffset, 257 | filter: Option, 258 | columns_filter: Option>, 259 | sorter: Option>, 260 | sort_order: SortOrder, 261 | pub selection: Selection, 262 | perf_stats: Option, 263 | } 264 | 265 | impl RowsView { 266 | pub fn new(mut reader: CsvLensReader, num_rows: u64) -> CsvlensResult { 267 | let rows_from = 0; 268 | let rows = reader.get_rows(rows_from, num_rows)?.0; 269 | let headers = Self::get_default_headers_from_reader(&reader); 270 | let view = Self { 271 | reader, 272 | rows, 273 | headers, 274 | num_rows, 275 | num_rows_rendered: num_rows, 276 | rows_from, 277 | cols_offset: ColumnsOffset::default(), 278 | filter: None, 279 | columns_filter: None, 280 | sorter: None, 281 | sort_order: SortOrder::Ascending, 282 | selection: Selection::default(num_rows), 283 | perf_stats: None, 284 | }; 285 | Ok(view) 286 | } 287 | 288 | pub fn headers(&self) -> &Vec
{ 289 | &self.headers 290 | } 291 | 292 | pub fn raw_headers(&self) -> &Vec { 293 | &self.reader.headers 294 | } 295 | 296 | pub fn rows(&self) -> &Vec { 297 | &self.rows 298 | } 299 | 300 | pub fn get_column_name_from_global_index(&self, column_index: usize) -> String { 301 | self.raw_headers() 302 | .get(column_index) 303 | .cloned() 304 | .unwrap_or_default() 305 | } 306 | 307 | pub fn get_column_name_from_local_index(&self, column_index: usize) -> String { 308 | self.headers() 309 | .get(column_index) 310 | .map(|header| header.name.clone()) 311 | .unwrap_or_default() 312 | } 313 | 314 | pub fn get_cell_value(&self, column_name: &str) -> Option { 315 | if let (Some(column_index), Some(row_index)) = ( 316 | self.headers() 317 | .iter() 318 | .position(|header| header.name == column_name), 319 | self.selection.row.index(), 320 | ) { 321 | return self 322 | .rows() 323 | .get(row_index as usize) 324 | .and_then(|row| row.fields.get(column_index)) 325 | .cloned(); 326 | } 327 | None 328 | } 329 | 330 | /// Get the value of the cell at the current selection. Only returns a value 331 | /// if the selection type is Cell. 332 | pub fn get_cell_value_from_selection(&self) -> Option { 333 | if let (Some(column_index), Some(row_index)) = 334 | (self.selection.column.index(), self.selection.row.index()) 335 | { 336 | // Note: row_index and column_index are "local" index. 337 | return self 338 | .rows() 339 | .get(row_index as usize) 340 | .and_then(|row| { 341 | row.fields 342 | .get(self.cols_offset.get_filtered_column_index(column_index) as usize) 343 | }) 344 | .cloned(); 345 | } 346 | None 347 | } 348 | 349 | pub fn get_row_value(&self) -> Option<(usize, String)> { 350 | if let Some(row_index) = self.selection.row.index() { 351 | if let Some(row) = self.rows().get(row_index as usize) { 352 | return Some((row.record_num, row.fields.join("\t"))); 353 | } 354 | } 355 | None 356 | } 357 | 358 | pub fn num_rows(&self) -> u64 { 359 | self.num_rows 360 | } 361 | 362 | pub fn set_num_rows(&mut self, num_rows: u64) -> CsvlensResult<()> { 363 | if num_rows == self.num_rows { 364 | return Ok(()); 365 | } 366 | self.num_rows = num_rows; 367 | self.do_get_rows()?; 368 | Ok(()) 369 | } 370 | 371 | pub fn set_num_rows_rendered(&mut self, num_rows_rendered: u64) { 372 | self.num_rows_rendered = num_rows_rendered; 373 | // current selected might be out of range, reset it 374 | self.selection.row.set_bound(num_rows_rendered); 375 | } 376 | 377 | pub fn set_filter(&mut self, finder: &find::Finder) -> CsvlensResult<()> { 378 | let filter = RowsFilter::new(finder, self.rows_from, self.num_rows); 379 | // only need to reload rows if the currently shown indices changed 380 | let mut needs_reload = true; 381 | if let Some(cur_filter) = &self.filter { 382 | if cur_filter.indices == filter.indices { 383 | needs_reload = false; 384 | } 385 | } 386 | // but always need to update filter because it holds other states such 387 | // as total count 388 | self.filter = Some(filter); 389 | if needs_reload { 390 | self.do_get_rows() 391 | } else { 392 | Ok(()) 393 | } 394 | } 395 | 396 | pub fn is_filter(&self) -> bool { 397 | self.filter.is_some() 398 | } 399 | 400 | pub fn reset_filter(&mut self) -> CsvlensResult<()> { 401 | if !self.is_filter() { 402 | return Ok(()); 403 | } 404 | self.filter = None; 405 | self.do_get_rows() 406 | } 407 | 408 | pub fn columns_filter(&self) -> Option<&Arc> { 409 | self.columns_filter.as_ref() 410 | } 411 | 412 | pub fn set_columns_filter(&mut self, columns_filter: &Arc) -> CsvlensResult<()> { 413 | let columns_filter = columns_filter.clone(); 414 | self.headers = columns_filter 415 | .indices() 416 | .iter() 417 | .zip(columns_filter.filtered_headers()) 418 | .map(|(i, h)| Header { 419 | name: h.clone(), 420 | origin_index: *i, 421 | }) 422 | .collect(); 423 | self.columns_filter = Some(columns_filter); 424 | self.do_get_rows() 425 | } 426 | 427 | pub fn reset_columns_filter(&mut self) -> CsvlensResult<()> { 428 | self.columns_filter = None; 429 | self.headers = Self::get_default_headers_from_reader(&self.reader); 430 | self.do_get_rows() 431 | } 432 | 433 | pub fn get_column_origin_index(&self, column_index: usize) -> usize { 434 | self.headers[column_index].origin_index 435 | } 436 | 437 | fn get_default_headers_from_reader(reader: &CsvLensReader) -> Vec
{ 438 | reader 439 | .headers 440 | .iter() 441 | .enumerate() 442 | .map(|(i, h)| Header { 443 | name: h.clone(), 444 | origin_index: i, 445 | }) 446 | .collect::>() 447 | } 448 | 449 | pub fn sorter(&self) -> &Option> { 450 | &self.sorter 451 | } 452 | 453 | pub fn set_sorter(&mut self, sorter: &Arc) -> CsvlensResult<()> { 454 | self.sorter = Some(sorter.clone()); 455 | self.do_get_rows() 456 | } 457 | 458 | pub fn reset_sorter(&mut self) -> CsvlensResult<()> { 459 | self.sorter = None; 460 | self.do_get_rows() 461 | } 462 | 463 | pub fn set_sort_order(&mut self, sort_order: SortOrder) -> CsvlensResult<()> { 464 | if self.sort_order != sort_order { 465 | self.sort_order = sort_order; 466 | return self.do_get_rows(); 467 | } 468 | Ok(()) 469 | } 470 | 471 | pub fn rows_from(&self) -> u64 { 472 | self.rows_from 473 | } 474 | 475 | pub fn set_rows_from(&mut self, rows_from_: u64) -> CsvlensResult<()> { 476 | let rows_from = if let Some(n) = self.bottom_rows_from() { 477 | min(rows_from_, n) 478 | } else { 479 | rows_from_ 480 | }; 481 | if rows_from == self.rows_from { 482 | return Ok(()); 483 | } 484 | self.rows_from = rows_from; 485 | self.do_get_rows()?; 486 | Ok(()) 487 | } 488 | 489 | /// Offset of the first column to show. All columns are still read into Row 490 | /// (per ColumnsFilter if any). 491 | pub fn cols_offset(&self) -> ColumnsOffset { 492 | self.cols_offset 493 | } 494 | 495 | pub fn set_cols_offset_num_skip(&mut self, cols_offset: u64) { 496 | self.cols_offset.num_skip = min(cols_offset, self.max_cols_offset_num_skip()); 497 | } 498 | 499 | pub fn set_cols_offset_num_freeze(&mut self, num_freeze: u64) { 500 | self.cols_offset.num_freeze = 501 | min(num_freeze, self.headers().len().saturating_sub(1) as u64); 502 | } 503 | 504 | pub fn max_cols_offset_num_skip(&self) -> u64 { 505 | (self.headers().len() as u64) 506 | .saturating_sub(self.cols_offset.num_freeze) 507 | .saturating_sub(1) 508 | } 509 | 510 | pub fn selected_offset(&self) -> Option { 511 | self.selection 512 | .row 513 | .index() 514 | .map(|x| x.saturating_add(self.rows_from)) 515 | } 516 | 517 | pub fn perf_stats(&self) -> Option { 518 | self.perf_stats.as_ref().cloned() 519 | } 520 | 521 | pub fn get_total_line_numbers(&self) -> Option { 522 | self.reader.get_total_line_numbers() 523 | } 524 | 525 | pub fn get_total_line_numbers_approx(&self) -> Option { 526 | self.reader.get_last_indexed_line_number() 527 | } 528 | 529 | pub fn in_view(&self, row_index: u64) -> bool { 530 | let last_row = self.rows_from().saturating_add(self.num_rows()); 531 | if row_index >= self.rows_from() && row_index < last_row { 532 | return true; 533 | } 534 | false 535 | } 536 | 537 | pub fn handle_control(&mut self, control: &Control) -> CsvlensResult<()> { 538 | match control { 539 | Control::ScrollDown => { 540 | if let Some(i) = self.selection.row.index() { 541 | if i >= self.num_rows_rendered.saturating_sub(1) { 542 | self.increase_rows_from(1)?; 543 | } else { 544 | self.selection.row.select_next(); 545 | } 546 | } else { 547 | self.increase_rows_from(1)?; 548 | } 549 | } 550 | Control::ScrollHalfPageDown => { 551 | self.increase_rows_from(self.num_rows_rendered / 2)?; 552 | self.selection.row.select_first() 553 | } 554 | Control::ScrollPageDown => { 555 | self.increase_rows_from(self.num_rows_rendered)?; 556 | self.selection.row.select_first() 557 | } 558 | Control::ScrollUp => { 559 | if let Some(i) = self.selection.row.index() { 560 | if i == 0 { 561 | self.decrease_rows_from(1)?; 562 | } else { 563 | self.selection.row.select_previous(); 564 | } 565 | } else { 566 | self.decrease_rows_from(1)?; 567 | } 568 | } 569 | Control::ScrollHalfPageUp => { 570 | self.decrease_rows_from(self.num_rows_rendered / 2)?; 571 | self.selection.row.select_first() 572 | } 573 | Control::ScrollPageUp => { 574 | self.decrease_rows_from(self.num_rows_rendered)?; 575 | self.selection.row.select_first() 576 | } 577 | Control::ScrollTop => { 578 | self.set_rows_from(0)?; 579 | self.selection.row.select_first() 580 | } 581 | Control::ScrollBottom => { 582 | if let Some(total) = self.get_total_line_numbers_indexed() { 583 | // Note: Using num_rows_rendered is not exactly correct, but it's simple and 584 | // a bit better than num_rows. To be exact, this should use row heights to 585 | // determine exactly how many rows to show from the bottom. 586 | let rows_from = total.saturating_sub(self.num_rows_rendered as usize) as u64; 587 | self.set_rows_from(rows_from)?; 588 | } 589 | self.selection.row.select_last() 590 | } 591 | Control::ScrollTo(n) => { 592 | let mut rows_from = n.saturating_sub(1) as u64; 593 | if let Some(n) = self.bottom_rows_from() { 594 | rows_from = min(rows_from, n); 595 | } 596 | self.set_rows_from(rows_from)?; 597 | self.selection.row.select_first() 598 | } 599 | _ => {} 600 | } 601 | Ok(()) 602 | } 603 | 604 | fn get_total_line_numbers_indexed(&self) -> Option { 605 | if let Some(max_line_number) = self 606 | .reader 607 | .get_total_line_numbers() 608 | .or_else(|| self.reader.get_last_indexed_line_number()) 609 | { 610 | if let Some(filter) = &self.filter { 611 | if let Some(max_index) = filter.max_index { 612 | if max_index < max_line_number as u64 { 613 | // Only allow jumping to the bottom of found records if it can be 614 | // efficiently retrieved 615 | return Some(filter.total); 616 | } 617 | return None; 618 | } 619 | } else { 620 | return Some(max_line_number); 621 | } 622 | } 623 | None 624 | } 625 | 626 | fn increase_rows_from(&mut self, delta: u64) -> CsvlensResult<()> { 627 | let new_rows_from = self.rows_from.saturating_add(delta); 628 | self.set_rows_from(new_rows_from)?; 629 | Ok(()) 630 | } 631 | 632 | fn decrease_rows_from(&mut self, delta: u64) -> CsvlensResult<()> { 633 | let new_rows_from = self.rows_from.saturating_sub(delta); 634 | self.set_rows_from(new_rows_from)?; 635 | Ok(()) 636 | } 637 | 638 | fn bottom_rows_from(&self) -> Option { 639 | // fix type conversion craziness 640 | if let Some(n) = self.get_total_line_numbers_indexed() { 641 | return Some(n.saturating_sub(self.num_rows_rendered as usize) as u64); 642 | } 643 | None 644 | } 645 | 646 | fn subset_columns(rows: &Vec, indices: &[usize]) -> Vec { 647 | let mut out = vec![]; 648 | for row in rows { 649 | out.push(row.subset(indices)); 650 | } 651 | out 652 | } 653 | 654 | fn do_get_rows(&mut self) -> CsvlensResult<()> { 655 | let start = Instant::now(); 656 | let (mut rows, reader_stats) = if let Some(filter) = &self.filter { 657 | let indices = &filter.indices; 658 | self.reader.get_rows_for_indices(indices)? 659 | } else if let Some(sorter) = &self.sorter { 660 | if let Some(sorted_indices) = 661 | sorter.get_sorted_indices(self.rows_from, self.num_rows, self.sort_order) 662 | { 663 | self.reader.get_rows_for_indices(&sorted_indices)? 664 | } else { 665 | self.reader.get_rows(self.rows_from, self.num_rows)? 666 | } 667 | } else { 668 | self.reader.get_rows(self.rows_from, self.num_rows)? 669 | }; 670 | let elapsed = start.elapsed(); 671 | if let Some(columns_filter) = &self.columns_filter { 672 | rows = Self::subset_columns(&rows, columns_filter.indices()); 673 | } 674 | self.rows = rows; 675 | self.perf_stats = Some(PerfStats { 676 | elapsed, 677 | reader_stats, 678 | }); 679 | // current selected might be out of range, reset it 680 | // self.selection.row.set_bound(self.rows.len() as u64); 681 | Ok(()) 682 | } 683 | 684 | #[cfg(test)] 685 | pub fn wait_internal(&self) { 686 | self.reader.wait_internal() 687 | } 688 | } 689 | -------------------------------------------------------------------------------- /src/wrap.rs: -------------------------------------------------------------------------------- 1 | use ratatui::text::{Line, Span}; 2 | 3 | pub struct LineWrapper<'a> { 4 | spans: &'a [Span<'a>], 5 | max_width: usize, 6 | word_wrap: bool, 7 | index: usize, 8 | pending: Option>, 9 | } 10 | 11 | impl<'a> LineWrapper<'a> { 12 | pub fn new(spans: &'a [Span<'a>], max_width: usize, word_wrap: bool) -> Self { 13 | LineWrapper { 14 | spans, 15 | max_width, 16 | word_wrap, 17 | index: 0, 18 | pending: None, 19 | } 20 | } 21 | 22 | pub fn next(&mut self) -> Option> { 23 | if self.finished() { 24 | return None; 25 | } 26 | let mut out_spans = vec![]; 27 | let mut remaining_width = self.max_width; 28 | loop { 29 | let mut span = None; 30 | if let Some(s) = self.pending.take() { 31 | span = Some(s); 32 | } else if self.index < self.spans.len() { 33 | span = Some(self.spans.get(self.index).cloned().unwrap()); 34 | self.index += 1; 35 | } 36 | if let Some(span) = span { 37 | let chars_count = span.content.chars().count(); 38 | let newline_pos = span.content.chars().position(|c| c == '\n'); 39 | if let Some((pos, true)) = newline_pos.map(|x| (x, x <= remaining_width)) { 40 | out_spans.push(Span::styled( 41 | span.content.chars().take(pos).collect::(), 42 | span.style, 43 | )); 44 | self.pending = Some(Span::styled( 45 | span.content.chars().skip(pos + 1).collect::(), 46 | span.style, 47 | )); 48 | // Technically this might not be zero, but this is to force the loop to break - 49 | // we must wrap now. 50 | remaining_width = 0; 51 | } else if chars_count <= remaining_width { 52 | remaining_width = remaining_width.saturating_sub(chars_count); 53 | out_spans.push(span); 54 | } else { 55 | let mut current: String = span.content.chars().take(remaining_width).collect(); 56 | let pending: String; 57 | 58 | if self.word_wrap { 59 | if let Some(wrapped) = LineWrapper::wrap_by_whitespace(current.as_str()) { 60 | current = wrapped; 61 | pending = span.content.chars().skip(current.chars().count()).collect(); 62 | } else { 63 | pending = span.content.chars().skip(remaining_width).collect(); 64 | } 65 | } else { 66 | pending = span.content.chars().skip(remaining_width).collect(); 67 | } 68 | out_spans.push(Span::styled(current, span.style)); 69 | self.pending = Some(Span::styled(pending, span.style)); 70 | remaining_width = 0; 71 | } 72 | } else { 73 | break; 74 | } 75 | if remaining_width == 0 { 76 | break; 77 | } 78 | } 79 | Some(Line::from(out_spans)) 80 | } 81 | 82 | pub fn finished(&self) -> bool { 83 | self.pending.is_none() && self.index >= self.spans.len() 84 | } 85 | 86 | fn wrap_by_whitespace(s: &str) -> Option { 87 | let mut s_split = s.split(' '); 88 | let last = s_split.next_back(); 89 | if last.is_some() { 90 | let front = s_split.collect::>().join(" "); 91 | if front.chars().filter(|c| !c.is_whitespace()).count() > 0 { 92 | Some(front + " ") 93 | } else { 94 | None 95 | } 96 | } else { 97 | None 98 | } 99 | } 100 | } 101 | 102 | #[cfg(test)] 103 | mod tests { 104 | 105 | use super::*; 106 | use ratatui::style::{Color, Style}; 107 | 108 | #[test] 109 | fn test_no_wrapping() { 110 | let s = Span::raw("hello"); 111 | let spans = vec![s.clone()]; 112 | let mut wrapper = LineWrapper::new(&spans, 10, false); 113 | assert_eq!(wrapper.next(), Some(Line::from(vec![s.clone()]))); 114 | assert_eq!(wrapper.next(), None); 115 | } 116 | 117 | #[test] 118 | fn test_with_wrapping() { 119 | let s = Span::raw("hello"); 120 | let spans = vec![s.clone()]; 121 | let mut wrapper = LineWrapper::new(&spans, 2, false); 122 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("he")]))); 123 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("ll")]))); 124 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("o")]))); 125 | assert_eq!(wrapper.next(), None); 126 | } 127 | 128 | #[test] 129 | fn test_new_lines_before_max_width() { 130 | let s = Span::raw("hello\nworld"); 131 | let spans = vec![s.clone()]; 132 | let mut wrapper = LineWrapper::new(&spans, 10, false); 133 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("hello")]))); 134 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("world")]))); 135 | assert_eq!(wrapper.next(), None); 136 | } 137 | 138 | #[test] 139 | fn test_new_lines_after_max_width() { 140 | let s = Span::raw("hello\nworld"); 141 | let spans = vec![s.clone()]; 142 | let mut wrapper = LineWrapper::new(&spans, 3, false); 143 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("hel")]))); 144 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("lo")]))); 145 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("wor")]))); 146 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("ld")]))); 147 | assert_eq!(wrapper.next(), None); 148 | } 149 | 150 | #[test] 151 | fn test_multiple_spans() { 152 | let style = Style::default().fg(Color::Red); 153 | let spans = vec![ 154 | Span::raw("hello\n"), 155 | Span::styled("my", style), 156 | Span::raw("world"), 157 | ]; 158 | let mut wrapper = LineWrapper::new(&spans, 5, false); 159 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("hello")]))); 160 | assert_eq!( 161 | wrapper.next(), 162 | Some(Line::from(vec![ 163 | Span::raw(""), 164 | Span::styled("my", style), 165 | Span::raw("wor") 166 | ])) 167 | ); 168 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("ld")]))); 169 | assert_eq!(wrapper.next(), None); 170 | } 171 | 172 | #[test] 173 | fn test_wrap_at_styled_span() { 174 | let style = Style::default().fg(Color::Red); 175 | let spans = vec![ 176 | Span::raw("hello"), 177 | Span::styled("m\ny", style), 178 | Span::raw("world"), 179 | ]; 180 | let mut wrapper = LineWrapper::new(&spans, 5, false); 181 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("hello")]))); 182 | assert_eq!( 183 | wrapper.next(), 184 | Some(Line::from(vec![Span::styled("m", style)])) 185 | ); 186 | assert_eq!( 187 | wrapper.next(), 188 | Some(Line::from(vec![ 189 | Span::styled("y", style), 190 | Span::raw("worl") 191 | ])) 192 | ); 193 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("d")]))); 194 | assert_eq!(wrapper.next(), None); 195 | } 196 | 197 | #[test] 198 | fn test_unicode() { 199 | let s = Span::raw("héllo"); 200 | let spans = vec![s.clone()]; 201 | let mut wrapper = LineWrapper::new(&spans, 2, false); 202 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("hé")]))); 203 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("ll")]))); 204 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("o")]))); 205 | assert_eq!(wrapper.next(), None); 206 | } 207 | 208 | #[test] 209 | fn test_unicode_with_newline_w1() { 210 | let s = Span::raw("éé\néééééé"); 211 | let spans = vec![s.clone()]; 212 | let mut wrapper = LineWrapper::new(&spans, 1, false); 213 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("é")]))); 214 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("é")]))); 215 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("é")]))); 216 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("é")]))); 217 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("é")]))); 218 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("é")]))); 219 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("é")]))); 220 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("é")]))); 221 | assert_eq!(wrapper.next(), None); 222 | } 223 | 224 | #[test] 225 | fn test_unicode_with_newline_w2() { 226 | let s = Span::raw("éé\néééééé"); 227 | let spans = vec![s.clone()]; 228 | let mut wrapper = LineWrapper::new(&spans, 2, false); 229 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("éé")]))); 230 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("éé")]))); 231 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("éé")]))); 232 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("éé")]))); 233 | assert_eq!(wrapper.next(), None); 234 | } 235 | 236 | #[test] 237 | fn test_unicode_with_newline_w3() { 238 | let s = Span::raw("éé\néééééé"); 239 | let spans = vec![s.clone()]; 240 | let mut wrapper = LineWrapper::new(&spans, 3, false); 241 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("éé")]))); 242 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("ééé")]))); 243 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("ééé")]))); 244 | assert_eq!(wrapper.next(), None); 245 | } 246 | 247 | #[test] 248 | fn test_unicode_with_newline_w4() { 249 | let s = Span::raw("éé\néééééé"); 250 | let spans = vec![s.clone()]; 251 | let mut wrapper = LineWrapper::new(&spans, 4, false); 252 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("éé")]))); 253 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("éééé")]))); 254 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("éé")]))); 255 | assert_eq!(wrapper.next(), None); 256 | } 257 | 258 | #[test] 259 | fn test_wrap_by_whitespace_1() { 260 | let s = Span::raw("é é"); 261 | let out = LineWrapper::wrap_by_whitespace(&s.content); 262 | assert_eq!(out, Some("é ".to_string())); 263 | } 264 | 265 | #[test] 266 | fn test_wrap_by_whitespace_2() { 267 | let s = Span::raw(" éé"); 268 | let out = LineWrapper::wrap_by_whitespace(&s.content); 269 | assert_eq!(out, None); 270 | } 271 | 272 | #[test] 273 | fn test_word_wrap_1() { 274 | let s = Span::raw("éé\né éé ééé"); 275 | let spans = vec![s.clone()]; 276 | let mut wrapper = LineWrapper::new(&spans, 3, true); 277 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("éé")]))); 278 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("é ")]))); 279 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("éé ")]))); 280 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("ééé")]))); 281 | assert_eq!(wrapper.next(), None); 282 | } 283 | 284 | #[test] 285 | fn test_word_wrap_2() { 286 | let s = Span::raw("ééé é ééé ééé"); 287 | let spans = vec![s.clone()]; 288 | let mut wrapper = LineWrapper::new(&spans, 3, true); 289 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("ééé")]))); 290 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw(" é ")]))); 291 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("ééé")]))); 292 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw(" éé")]))); 293 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("é")]))); 294 | assert_eq!(wrapper.next(), None); 295 | } 296 | 297 | #[test] 298 | fn test_multiple_newlines() { 299 | let s = Span::raw("ééé\n\nééé"); 300 | let spans = vec![s.clone()]; 301 | let mut wrapper = LineWrapper::new(&spans, 4, false); 302 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("ééé")]))); 303 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("")]))); 304 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("ééé")]))); 305 | assert_eq!(wrapper.next(), None); 306 | } 307 | 308 | #[test] 309 | fn test_zero_max_width() { 310 | let s = Span::raw("ééé\n\nééé"); 311 | let spans = vec![s.clone()]; 312 | let mut wrapper = LineWrapper::new(&spans, 0, false); 313 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("")]))); 314 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("")]))); 315 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("")]))); 316 | assert_eq!(wrapper.next(), Some(Line::from(vec![Span::raw("")]))); 317 | assert_eq!(wrapper.finished(), false); 318 | } 319 | } 320 | -------------------------------------------------------------------------------- /tests/data/bad_73.csv: -------------------------------------------------------------------------------- 1 | COL1, COL2 2 | c1 3 | c2, v2 4 | c2, 4, 5 | c3 6 | c4 7 | c5 8 | c7 9 | c9 10 | c10 11 | c11 12 | c12 13 | c13 14 | c14,2 -------------------------------------------------------------------------------- /tests/data/bad_double_quote.csv: -------------------------------------------------------------------------------- 1 | Column1, "column2" 2 | 1, "quote" 3 | 5, "Comma, comma" 4 | -------------------------------------------------------------------------------- /tests/data/cities.csv: -------------------------------------------------------------------------------- 1 | LatD,LatM,LatS,NS,LonD,LonM,LonS,EW,City,State 2 | 41,5,59,N,80,39,0,W,Youngstown,OH 3 | 42,52,48,N,97,23,23,,Yankton,SD 4 | 46,35,59,N,120,30,36,W,Yakima,WA 5 | 42,16,12,N,71,48,0,W,Worcester,MA 6 | 43,37,48,N,89,46,11,W,Wisconsin Dells,WI 7 | 36,5,59,N,80,15,0,W,Winston-Salem,NC 8 | 49,52,48,N,97,9,0,W,Winnipeg,MB 9 | 39,11,23,N,78,9,36,W,Winchester,VA 10 | 34,14,24,N,77,55,11,W,Wilmington,NC 11 | 39,45,0,N,75,33,0,W,Wilmington,DE 12 | 48,9,0,N,103,37,12,W,Williston,ND 13 | 41,15,0,N,77,0,0,W,Williamsport,PA 14 | 37,40,48,N,82,16,47,W,Williamson,WV 15 | 33,54,0,N,98,29,23,W,Wichita Falls,TX 16 | 37,41,23,N,97,20,23,W,Wichita,KS 17 | 40,4,11,N,80,43,12,W,Wheeling,WV 18 | 26,43,11,N,80,3,0,W,West Palm Beach,FL 19 | 47,25,11,N,120,19,11,W,Wenatchee,WA 20 | 41,25,11,N,122,23,23,W,Weed,CA 21 | 31,13,11,N,82,20,59,W,Waycross,GA 22 | 44,57,35,N,89,38,23,W,Wausau,WI 23 | 42,21,36,N,87,49,48,W,Waukegan,IL 24 | 44,54,0,N,97,6,36,W,Watertown,SD 25 | 43,58,47,N,75,55,11,W,Watertown,NY 26 | 42,30,0,N,92,20,23,W,Waterloo,IA 27 | 41,32,59,N,73,3,0,W,Waterbury,CT 28 | 38,53,23,N,77,1,47,W,Washington,DC 29 | 41,50,59,N,79,8,23,W,Warren,PA 30 | 46,4,11,N,118,19,48,W,Walla Walla,WA 31 | 31,32,59,N,97,8,23,W,Waco,TX 32 | 38,40,48,N,87,31,47,W,Vincennes,IN 33 | 28,48,35,N,97,0,36,W,Victoria,TX 34 | 32,20,59,N,90,52,47,W,Vicksburg,MS 35 | 49,16,12,N,123,7,12,W,Vancouver,BC 36 | 46,55,11,N,98,0,36,W,Valley City,ND 37 | 30,49,47,N,83,16,47,W,Valdosta,GA 38 | 43,6,36,N,75,13,48,W,Utica,NY 39 | 39,54,0,N,79,43,48,W,Uniontown,PA 40 | 32,20,59,N,95,18,0,W,Tyler,TX 41 | 42,33,36,N,114,28,12,W,Twin Falls,ID 42 | 33,12,35,N,87,34,11,W,Tuscaloosa,AL 43 | 34,15,35,N,88,42,35,W,Tupelo,MS 44 | 36,9,35,N,95,54,36,W,Tulsa,OK 45 | 32,13,12,N,110,58,12,W,Tucson,AZ 46 | 37,10,11,N,104,30,36,W,Trinidad,CO 47 | 40,13,47,N,74,46,11,W,Trenton,NJ 48 | 44,45,35,N,85,37,47,W,Traverse City,MI 49 | 43,39,0,N,79,22,47,W,Toronto,ON 50 | 39,2,59,N,95,40,11,W,Topeka,KS 51 | 41,39,0,N,83,32,24,W,Toledo,OH 52 | 33,25,48,N,94,3,0,W,Texarkana,TX 53 | 39,28,12,N,87,24,36,W,Terre Haute,IN 54 | 27,57,0,N,82,26,59,W,Tampa,FL 55 | 30,27,0,N,84,16,47,W,Tallahassee,FL 56 | 47,14,24,N,122,25,48,W,Tacoma,WA 57 | 43,2,59,N,76,9,0,W,Syracuse,NY 58 | 32,35,59,N,82,20,23,W,Swainsboro,GA 59 | 33,55,11,N,80,20,59,W,Sumter,SC 60 | 40,59,24,N,75,11,24,W,Stroudsburg,PA 61 | 37,57,35,N,121,17,24,W,Stockton,CA 62 | 44,31,12,N,89,34,11,W,Stevens Point,WI 63 | 40,21,36,N,80,37,12,W,Steubenville,OH 64 | 40,37,11,N,103,13,12,W,Sterling,CO 65 | 38,9,0,N,79,4,11,W,Staunton,VA 66 | 39,55,11,N,83,48,35,W,Springfield,OH 67 | 37,13,12,N,93,17,24,W,Springfield,MO 68 | 42,5,59,N,72,35,23,W,Springfield,MA 69 | 39,47,59,N,89,39,0,W,Springfield,IL 70 | 47,40,11,N,117,24,36,W,Spokane,WA 71 | 41,40,48,N,86,15,0,W,South Bend,IN 72 | 43,32,24,N,96,43,48,W,Sioux Falls,SD 73 | 42,29,24,N,96,23,23,W,Sioux City,IA 74 | 32,30,35,N,93,45,0,W,Shreveport,LA 75 | 33,38,23,N,96,36,36,W,Sherman,TX 76 | 44,47,59,N,106,57,35,W,Sheridan,WY 77 | 35,13,47,N,96,40,48,W,Seminole,OK 78 | 32,25,11,N,87,1,11,W,Selma,AL 79 | 38,42,35,N,93,13,48,W,Sedalia,MO 80 | 47,35,59,N,122,19,48,W,Seattle,WA 81 | 41,24,35,N,75,40,11,W,Scranton,PA 82 | 41,52,11,N,103,39,36,W,Scottsbluff,NB 83 | 42,49,11,N,73,56,59,W,Schenectady,NY 84 | 32,4,48,N,81,5,23,W,Savannah,GA 85 | 46,29,24,N,84,20,59,W,Sault Sainte Marie,MI 86 | 27,20,24,N,82,31,47,W,Sarasota,FL 87 | 38,26,23,N,122,43,12,W,Santa Rosa,CA 88 | 35,40,48,N,105,56,59,W,Santa Fe,NM 89 | 34,25,11,N,119,41,59,W,Santa Barbara,CA 90 | 33,45,35,N,117,52,12,W,Santa Ana,CA 91 | 37,20,24,N,121,52,47,W,San Jose,CA 92 | 37,46,47,N,122,25,11,W,San Francisco,CA 93 | 41,27,0,N,82,42,35,W,Sandusky,OH 94 | 32,42,35,N,117,9,0,W,San Diego,CA 95 | 34,6,36,N,117,18,35,W,San Bernardino,CA 96 | 29,25,12,N,98,30,0,W,San Antonio,TX 97 | 31,27,35,N,100,26,24,W,San Angelo,TX 98 | 40,45,35,N,111,52,47,W,Salt Lake City,UT 99 | 38,22,11,N,75,35,59,W,Salisbury,MD 100 | 36,40,11,N,121,39,0,W,Salinas,CA 101 | 38,50,24,N,97,36,36,W,Salina,KS 102 | 38,31,47,N,106,0,0,W,Salida,CO 103 | 44,56,23,N,123,1,47,W,Salem,OR 104 | 44,57,0,N,93,5,59,W,Saint Paul,MN 105 | 38,37,11,N,90,11,24,W,Saint Louis,MO 106 | 39,46,12,N,94,50,23,W,Saint Joseph,MO 107 | 42,5,59,N,86,28,48,W,Saint Joseph,MI 108 | 44,25,11,N,72,1,11,W,Saint Johnsbury,VT 109 | 45,34,11,N,94,10,11,W,Saint Cloud,MN 110 | 29,53,23,N,81,19,11,W,Saint Augustine,FL 111 | 43,25,48,N,83,56,24,W,Saginaw,MI 112 | 38,35,24,N,121,29,23,W,Sacramento,CA 113 | 43,36,36,N,72,58,12,W,Rutland,VT 114 | 33,24,0,N,104,31,47,W,Roswell,NM 115 | 35,56,23,N,77,48,0,W,Rocky Mount,NC 116 | 41,35,24,N,109,13,48,W,Rock Springs,WY 117 | 42,16,12,N,89,5,59,W,Rockford,IL 118 | 43,9,35,N,77,36,36,W,Rochester,NY 119 | 44,1,12,N,92,27,35,W,Rochester,MN 120 | 37,16,12,N,79,56,24,W,Roanoke,VA 121 | 37,32,24,N,77,26,59,W,Richmond,VA 122 | 39,49,48,N,84,53,23,W,Richmond,IN 123 | 38,46,12,N,112,5,23,W,Richfield,UT 124 | 45,38,23,N,89,25,11,W,Rhinelander,WI 125 | 39,31,12,N,119,48,35,W,Reno,NV 126 | 50,25,11,N,104,39,0,W,Regina,SA 127 | 40,10,48,N,122,14,23,W,Red Bluff,CA 128 | 40,19,48,N,75,55,48,W,Reading,PA 129 | 41,9,35,N,81,14,23,W,Ravenna,OH 130 | -------------------------------------------------------------------------------- /tests/data/empty.csv: -------------------------------------------------------------------------------- 1 | a,b 2 | 1, 3 | 2, -------------------------------------------------------------------------------- /tests/data/filter.csv: -------------------------------------------------------------------------------- 1 | a,b 2 | $(#1#2#.3),1 3 | 123,2 4 | 456,3 -------------------------------------------------------------------------------- /tests/data/gnu_lgpl.txt: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /tests/data/good_double_quote.csv: -------------------------------------------------------------------------------- 1 | Column1,"column2" 2 | 1,"quote" 3 | 5,"Comma, comma" -------------------------------------------------------------------------------- /tests/data/irregular.csv: -------------------------------------------------------------------------------- 1 | COL1, COL2 2 | c1 3 | c2, v2 -------------------------------------------------------------------------------- /tests/data/irregular_last_row.csv: -------------------------------------------------------------------------------- 1 | AAAAAAAAAAAAAAAAAAAA,BBBBBBBBBBBBBBBBBBBB,AAAAAAAAAAAAAAAAAAAA,BBBBBBBBBBBBBBBBBBBB,AAAAAAAAAAAAAAAAAAAA,BBBBBBBBBBBBBBBBBBBB,AAAAAAAAAAAAAAAAAAAA,BBBBBBBBBBBBBBBBBBBB,AAAAAAAAAAAAAAAAAAAA,BBBBBBBBBBBBBBBBBBBB 2 | AAAAAAAAAAAAAAAAAAAA,BBBBBBBBBBBBBBBBBBBB,AAAAAAAAAAAAAAAAAAAA,BBBBBBBBBBBBBBBBBBBB,AAAAAAAAAAAAAAAAAAAA,BBBBBBBBBBBBBBBBBBBB,AAAAAAAAAAAAAAAAAAAA,BBBBBBBBBBBBBBBBBBBB,AAAAAAAAAAAAAAAAAAAA,BBBBBBBBBBBBBBBBBBBB 3 | A, 4 | -------------------------------------------------------------------------------- /tests/data/irregular_more_fields.csv: -------------------------------------------------------------------------------- 1 | COL1,COL2 2 | x1,x2,x3 3 | y1,y2,y3,y4 -------------------------------------------------------------------------------- /tests/data/multi_lines.csv: -------------------------------------------------------------------------------- 1 | a,"b","c" 2 | 1,"this is a very long text that surely will not fit in your small screen",12345 3 | 2,"this 4 | is 5 | an 6 | even 7 | longer 8 | text 9 | that 10 | surely 11 | will 12 | not 13 | fit 14 | in 15 | your 16 | small 17 | screen",678910 18 | 3,normal text now,"123,456,789" -------------------------------------------------------------------------------- /tests/data/multi_lines_carriage_return.csv: -------------------------------------------------------------------------------- 1 | a,b,c 2 | 1,this is a very long text that surely will not fit in your small screen,12345 3 | 2,"this 4 | 5 | is 6 | 7 | an 8 | 9 | even 10 | 11 | longer 12 | 13 | text 14 | 15 | that 16 | 17 | surely 18 | 19 | will 20 | 21 | not 22 | 23 | fit 24 | 25 | in 26 | 27 | your 28 | 29 | small 30 | 31 | screen",678910 32 | 3,normal text now,"123,456,789" 33 | -------------------------------------------------------------------------------- /tests/data/multiple_newlines.csv: -------------------------------------------------------------------------------- 1 | a,b,c 2 | 1,this is a very long text that surely will not fit in your small screen,12345 3 | 2,"this 4 | 5 | is 6 | 7 | an 8 | 9 | even 10 | 11 | longer 12 | 13 | text 14 | 15 | that 16 | 17 | surely 18 | 19 | will 20 | 21 | not 22 | 23 | fit 24 | 25 | in 26 | 27 | your 28 | 29 | small 30 | 31 | screen",678910 32 | 3,normal text now,"123,456,789" 33 | -------------------------------------------------------------------------------- /tests/data/no_headers.csv: -------------------------------------------------------------------------------- 1 | A1,B1 2 | A2,B2 3 | A3,B3 4 | A4,B4 5 | A5,B5 6 | A6,B6 7 | A7,B7 8 | A8,B8 9 | A9,B9 10 | A10,B10 11 | A11,B11 12 | A12,B12 13 | A13,B13 14 | A14,B14 15 | A15,B15 16 | A16,B16 17 | A17,B17 18 | A18,B18 19 | A19,B19 20 | A20,B20 -------------------------------------------------------------------------------- /tests/data/one_wide_column.txt: -------------------------------------------------------------------------------- 1 | id,text,label 2 | 1,this is a very very very very very very very very very very very very very very very very very very very very very very very long thing,hotdog 3 | 2,this is a very very very very very very very very very very very very very very very very very very very very very very very short thing,not_hotdog -------------------------------------------------------------------------------- /tests/data/small.bsv: -------------------------------------------------------------------------------- 1 | COL1|COL2 2 | c1|v1 3 | c2|v2 -------------------------------------------------------------------------------- /tests/data/small.csv: -------------------------------------------------------------------------------- 1 | COL1, COL2 2 | c1, v1 3 | c2, v2 -------------------------------------------------------------------------------- /tests/data/starts_with_newline.csv: -------------------------------------------------------------------------------- 1 | a,b,c 2 | 1,this is a very long text that surely will not fit in your small screen,12345 3 | 2," 4 | starts with new line",678910 5 | 3,normal text now,"123,456,789" 6 | --------------------------------------------------------------------------------