├── tests ├── fixtures │ ├── empty │ │ ├── input │ │ ├── opts │ │ └── stdout │ ├── hello │ │ ├── opts │ │ ├── input │ │ └── stdout │ ├── shift_jis │ │ ├── errcode │ │ ├── stderr │ │ └── input │ ├── empty_line_mode │ │ ├── input │ │ ├── opts │ │ └── stdout │ ├── flags_bp │ │ ├── opts │ │ ├── input │ │ └── stdout │ ├── flags_cl │ │ ├── opts │ │ ├── input │ │ └── stdout │ ├── flags_w │ │ ├── opts │ │ ├── input │ │ └── stdout │ ├── i_can_eat_glass │ │ ├── opts │ │ ├── input │ │ └── stdout │ ├── all_newlines │ │ ├── opts │ │ ├── input │ │ └── stdout │ ├── i_can_eat_glass_multi │ │ ├── opts │ │ ├── input_turkish │ │ ├── input_greek │ │ ├── input_vietnamese_nfc │ │ ├── input_vietnamese_nfd │ │ └── stdout │ ├── partial_shift_jis │ │ ├── errcode │ │ ├── opts │ │ ├── stderr │ │ ├── input │ │ ├── stdout │ │ └── README.md │ ├── ladle_rat_rotten_hut │ │ ├── opts │ │ ├── input_hebrew │ │ ├── input_english │ │ ├── input_bengali │ │ └── stdout │ ├── line_mode │ │ ├── opts │ │ ├── input │ │ └── stdout │ ├── line_mode_all_newlines │ │ ├── opts │ │ ├── input │ │ └── stdout │ ├── ladle_rat_rotten_hut_line_mode │ │ ├── opts │ │ ├── input_hebrew │ │ ├── input_english │ │ ├── input_bengali │ │ └── stdout │ ├── line_mode_no_trailing_newline │ │ ├── opts │ │ ├── input │ │ └── stdout │ ├── ladle_rat_rotten_hut_line_mode_chunk_size │ │ ├── opts │ │ ├── README.md │ │ ├── input_hebrew │ │ ├── input_english │ │ ├── input_bengali │ │ └── stdout │ ├── line_mode_all_newlines_count_newlines │ │ ├── opts │ │ ├── input │ │ └── stdout │ ├── ladle_rat_rotten_hut_line_mode_count_newlines │ │ ├── opts │ │ ├── input_hebrew │ │ ├── input_english │ │ ├── input_bengali │ │ └── stdout │ └── line_mode_no_trailing_newline_count_newlines │ │ ├── opts │ │ ├── input │ │ └── stdout └── cli.rs ├── release.toml ├── src ├── error.rs ├── input.rs ├── constants.rs ├── opt.rs ├── counter.rs ├── ubufreader.rs └── main.rs ├── .github ├── dependabot.yml └── workflows │ └── test.yml ├── Cargo.toml ├── CHANGELOG.md ├── README.md ├── LICENSE └── Cargo.lock /tests/fixtures/empty/input: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/fixtures/hello/opts: -------------------------------------------------------------------------------- 1 | -a -------------------------------------------------------------------------------- /tests/fixtures/empty/opts: -------------------------------------------------------------------------------- 1 | -a 2 | -------------------------------------------------------------------------------- /tests/fixtures/hello/input: -------------------------------------------------------------------------------- 1 | hello -------------------------------------------------------------------------------- /tests/fixtures/shift_jis/errcode: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/fixtures/empty_line_mode/input: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/fixtures/flags_bp/opts: -------------------------------------------------------------------------------- 1 | -bp 2 | -------------------------------------------------------------------------------- /tests/fixtures/flags_cl/opts: -------------------------------------------------------------------------------- 1 | -cl 2 | -------------------------------------------------------------------------------- /tests/fixtures/flags_w/opts: -------------------------------------------------------------------------------- 1 | -w 2 | -------------------------------------------------------------------------------- /tests/fixtures/i_can_eat_glass/opts: -------------------------------------------------------------------------------- 1 | -a -------------------------------------------------------------------------------- /tests/fixtures/all_newlines/opts: -------------------------------------------------------------------------------- 1 | -a 2 | -------------------------------------------------------------------------------- /tests/fixtures/i_can_eat_glass_multi/opts: -------------------------------------------------------------------------------- 1 | -a -------------------------------------------------------------------------------- /tests/fixtures/partial_shift_jis/errcode: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut/opts: -------------------------------------------------------------------------------- 1 | -a 2 | -------------------------------------------------------------------------------- /tests/fixtures/line_mode/opts: -------------------------------------------------------------------------------- 1 | -a --mode line 2 | -------------------------------------------------------------------------------- /tests/fixtures/partial_shift_jis/opts: -------------------------------------------------------------------------------- 1 | -a 2 | -------------------------------------------------------------------------------- /tests/fixtures/shift_jis/stderr: -------------------------------------------------------------------------------- 1 | invalid utf-8 2 | -------------------------------------------------------------------------------- /tests/fixtures/empty_line_mode/opts: -------------------------------------------------------------------------------- 1 | -a --mode line 2 | -------------------------------------------------------------------------------- /tests/fixtures/partial_shift_jis/stderr: -------------------------------------------------------------------------------- 1 | invalid utf-8 2 | -------------------------------------------------------------------------------- /release.toml: -------------------------------------------------------------------------------- 1 | publish = false 2 | push-remote = "origin" 3 | -------------------------------------------------------------------------------- /tests/fixtures/line_mode_all_newlines/opts: -------------------------------------------------------------------------------- 1 | -a --mode line 2 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut_line_mode/opts: -------------------------------------------------------------------------------- 1 | -a -m line 2 | -------------------------------------------------------------------------------- /tests/fixtures/line_mode_no_trailing_newline/opts: -------------------------------------------------------------------------------- 1 | -a --mode line 2 | -------------------------------------------------------------------------------- /tests/fixtures/all_newlines/input: -------------------------------------------------------------------------------- 1 | foo 2 | bar 3 | 4 | baz…quux 
xi
 5 | -------------------------------------------------------------------------------- /tests/fixtures/flags_w/input: -------------------------------------------------------------------------------- 1 | Μπορῶ νὰ φάω σπασμένα γυαλιὰ χωρὶς νὰ πάθω τίποτα. 2 | -------------------------------------------------------------------------------- /tests/fixtures/line_mode_all_newlines/input: -------------------------------------------------------------------------------- 1 | foo 2 | bar 3 | 4 | baz…quux 
xi
 5 | -------------------------------------------------------------------------------- /tests/fixtures/flags_bp/input: -------------------------------------------------------------------------------- 1 | Μπορῶ νὰ φάω σπασμένα γυαλιὰ χωρὶς νὰ πάθω τίποτα. 2 | -------------------------------------------------------------------------------- /tests/fixtures/flags_cl/input: -------------------------------------------------------------------------------- 1 | Μπορῶ νὰ φάω σπασμένα γυαλιὰ χωρὶς νὰ πάθω τίποτα. 2 | -------------------------------------------------------------------------------- /tests/fixtures/i_can_eat_glass_multi/input_turkish: -------------------------------------------------------------------------------- 1 | جام ييه بلورم بڭا ضررى طوقونمز 2 | -------------------------------------------------------------------------------- /tests/fixtures/flags_w/stdout: -------------------------------------------------------------------------------- 1 | words filename 2 | 9 tests/fixtures/flags_w/input 3 | -------------------------------------------------------------------------------- /tests/fixtures/i_can_eat_glass/input: -------------------------------------------------------------------------------- 1 | Μπορῶ νὰ φάω σπασμένα γυαλιὰ χωρὶς νὰ πάθω τίποτα. 2 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/opts: -------------------------------------------------------------------------------- 1 | -a -m line --chunk-size 3 2 | -------------------------------------------------------------------------------- /tests/fixtures/line_mode/input: -------------------------------------------------------------------------------- 1 | a 2 | aa 3 | aaa 4 | aaaá 5 | b 6 | bbbb 7 | bb 8 | bbb 9 | -------------------------------------------------------------------------------- /tests/fixtures/line_mode_all_newlines_count_newlines/opts: -------------------------------------------------------------------------------- 1 | -a --mode line --count-newlines 2 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/opts: -------------------------------------------------------------------------------- 1 | -a -m line --count-newlines 2 | -------------------------------------------------------------------------------- /tests/fixtures/line_mode_all_newlines_count_newlines/input: -------------------------------------------------------------------------------- 1 | foo 2 | bar 3 | 4 | baz…quux 
xi
 5 | -------------------------------------------------------------------------------- /tests/fixtures/i_can_eat_glass_multi/input_greek: -------------------------------------------------------------------------------- 1 | Μπορῶ νὰ φάω σπασμένα γυαλιὰ χωρὶς νὰ πάθω τίποτα. 2 | -------------------------------------------------------------------------------- /tests/fixtures/i_can_eat_glass_multi/input_vietnamese_nfc: -------------------------------------------------------------------------------- 1 | Tôi có thể ăn thủy tinh mà không hại gì. 2 | -------------------------------------------------------------------------------- /tests/fixtures/line_mode_no_trailing_newline_count_newlines/opts: -------------------------------------------------------------------------------- 1 | -a --mode line --count-newlines 2 | -------------------------------------------------------------------------------- /tests/fixtures/line_mode_no_trailing_newline/input: -------------------------------------------------------------------------------- 1 | a 2 | aa 3 | aaa 4 | aaaá 5 | b 6 | bbbb 7 | bb 8 | bbb -------------------------------------------------------------------------------- /tests/fixtures/shift_jis/input: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dead10ck/uwc/HEAD/tests/fixtures/shift_jis/input -------------------------------------------------------------------------------- /tests/fixtures/flags_cl/stdout: -------------------------------------------------------------------------------- 1 | lines graphemes filename 2 | 1 51 tests/fixtures/flags_cl/input 3 | -------------------------------------------------------------------------------- /tests/fixtures/i_can_eat_glass_multi/input_vietnamese_nfd: -------------------------------------------------------------------------------- 1 | Tôi có thể ăn thủy tinh mà không hại gì. 2 | -------------------------------------------------------------------------------- /tests/fixtures/flags_bp/stdout: -------------------------------------------------------------------------------- 1 | bytes codepoints filename 2 | 97 51 tests/fixtures/flags_bp/input 3 | -------------------------------------------------------------------------------- /tests/fixtures/line_mode_no_trailing_newline_count_newlines/input: -------------------------------------------------------------------------------- 1 | a 2 | aa 3 | aaa 4 | aaaá 5 | b 6 | bbbb 7 | bb 8 | bbb -------------------------------------------------------------------------------- /tests/fixtures/partial_shift_jis/input: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dead10ck/uwc/HEAD/tests/fixtures/partial_shift_jis/input -------------------------------------------------------------------------------- /tests/fixtures/empty/stdout: -------------------------------------------------------------------------------- 1 | lines words bytes graphemes codepoints filename 2 | 0 0 0 0 0 tests/fixtures/empty/input 3 | -------------------------------------------------------------------------------- /tests/fixtures/hello/stdout: -------------------------------------------------------------------------------- 1 | lines words bytes graphemes codepoints filename 2 | 0 1 5 5 5 tests/fixtures/hello/input 3 | -------------------------------------------------------------------------------- /tests/fixtures/all_newlines/stdout: -------------------------------------------------------------------------------- 1 | lines words bytes graphemes codepoints filename 2 | 8 5 29 23 24 tests/fixtures/all_newlines/input 3 | -------------------------------------------------------------------------------- /tests/fixtures/i_can_eat_glass/stdout: -------------------------------------------------------------------------------- 1 | lines words bytes graphemes codepoints filename 2 | 1 9 97 51 51 tests/fixtures/i_can_eat_glass/input 3 | -------------------------------------------------------------------------------- /tests/fixtures/partial_shift_jis/stdout: -------------------------------------------------------------------------------- 1 | lines words bytes graphemes codepoints filename 2 | 1 1 6 6 6 tests/fixtures/partial_shift_jis/input 3 | -------------------------------------------------------------------------------- /tests/fixtures/empty_line_mode/stdout: -------------------------------------------------------------------------------- 1 | lines words bytes graphemes codepoints filename 2 | 0 0 0 0 0 tests/fixtures/empty_line_mode/input:total 3 | -------------------------------------------------------------------------------- /tests/fixtures/partial_shift_jis/README.md: -------------------------------------------------------------------------------- 1 | In the event of an error while reading a file, it should still print out 2 | anything it was able to count successfully up until the error. 3 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/README.md: -------------------------------------------------------------------------------- 1 | This is the same test as `ladle_rat_rotten_hut_line_mode`, but specifies a small 2 | chunk size so ensure the line mode's line number suffix is tracked correctly across 3 | chunks. 4 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut/input_hebrew: -------------------------------------------------------------------------------- 1 | "הו דקדוק, שקית מים עכברוש הבטן!" 2 | 3 | דאגה דאגה על - זין-אגוז מצקת שחפים פן יבלות. שמן להציע sodden, caking להציע carvers 4 | הימור מתיז מתיז, הדיסק מטמון אגרוף wipped שפתיים עצמו נקבוביות הדלג חולדה Rotten 5 | צריף פרוע משובש. 6 | 7 | מורל: עוד לא סורגום סרחון סגור מצקת שחפים פקק מומנט רטוב מסננים 8 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut_line_mode/input_hebrew: -------------------------------------------------------------------------------- 1 | "הו דקדוק, שקית מים עכברוש הבטן!" 2 | 3 | דאגה דאגה על - זין-אגוז מצקת שחפים פן יבלות. שמן להציע sodden, caking להציע carvers 4 | הימור מתיז מתיז, הדיסק מטמון אגרוף wipped שפתיים עצמו נקבוביות הדלג חולדה Rotten 5 | צריף פרוע משובש. 6 | 7 | מורל: עוד לא סורגום סרחון סגור מצקת שחפים פקק מומנט רטוב מסננים 8 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_hebrew: -------------------------------------------------------------------------------- 1 | "הו דקדוק, שקית מים עכברוש הבטן!" 2 | 3 | דאגה דאגה על - זין-אגוז מצקת שחפים פן יבלות. שמן להציע sodden, caking להציע carvers 4 | הימור מתיז מתיז, הדיסק מטמון אגרוף wipped שפתיים עצמו נקבוביות הדלג חולדה Rotten 5 | צריף פרוע משובש. 6 | 7 | מורל: עוד לא סורגום סרחון סגור מצקת שחפים פקק מומנט רטוב מסננים 8 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_hebrew: -------------------------------------------------------------------------------- 1 | "הו דקדוק, שקית מים עכברוש הבטן!" 2 | 3 | דאגה דאגה על - זין-אגוז מצקת שחפים פן יבלות. שמן להציע sodden, caking להציע carvers 4 | הימור מתיז מתיז, הדיסק מטמון אגרוף wipped שפתיים עצמו נקבוביות הדלג חולדה Rotten 5 | צריף פרוע משובש. 6 | 7 | מורל: עוד לא סורגום סרחון סגור מצקת שחפים פקק מומנט רטוב מסננים 8 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut/input_english: -------------------------------------------------------------------------------- 1 | "O Grammar, water bag mouser gut! A nervous sore suture bag mouse!" 2 | 3 | Daze worry on-forger-nut ladle gull's lest warts. Oil offer sodden, caking offer 4 | carvers an sprinkling otter bet, disk hoard-hoarded woof lipped own pore Ladle 5 | Rat Rotten Hut an garbled erupt. 6 | 7 | MURAL: Yonder nor sorghum stenches shut ladle gulls stopper torque wet strainers 8 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut_line_mode/input_english: -------------------------------------------------------------------------------- 1 | "O Grammar, water bag mouser gut! A nervous sore suture bag mouse!" 2 | 3 | Daze worry on-forger-nut ladle gull's lest warts. Oil offer sodden, caking offer 4 | carvers an sprinkling otter bet, disk hoard-hoarded woof lipped own pore Ladle 5 | Rat Rotten Hut an garbled erupt. 6 | 7 | MURAL: Yonder nor sorghum stenches shut ladle gulls stopper torque wet strainers 8 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut/input_bengali: -------------------------------------------------------------------------------- 1 | "হে ব্যাকরণ, জলের ব্যাগ মুরগির গট! একটি স্নায়বিক গর্জনকারী ব্যাগ মাউস!" 2 | 3 | ডেজের উপর বিরক্ত থাকুন, ক্ষীণ-কাঁটা বালি গল এর টানা warts। তেল অফারটি নিখুঁতভাবে, 4 | একটি ছিড় ছিঁড়ে ছিঁড়ে বেঁধে বেঁধে কাকিং কেকিং করে, ডিস্কের জমাট বাঁধাকৃতির কাঁধের লোমের রাড 5 | রটেন হট একটি বিকৃত আগুন 6 | 7 | মিরির: স্কারর বা সোরগামের স্টেনচ্যাচ বন্ধনী গ্লস স্টপ টর্কে ভিজা স্ট্রেনারগুলি বন্ধ করে দেয় 8 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut/stdout: -------------------------------------------------------------------------------- 1 | lines words bytes graphemes codepoints filename 2 | 7 59 954 234 368 tests/fixtures/ladle_rat_rotten_hut/input_bengali 3 | 7 57 346 346 346 tests/fixtures/ladle_rat_rotten_hut/input_english 4 | 7 49 471 282 282 tests/fixtures/ladle_rat_rotten_hut/input_hebrew 5 | 21 165 1771 862 996 total 6 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_english: -------------------------------------------------------------------------------- 1 | "O Grammar, water bag mouser gut! A nervous sore suture bag mouse!" 2 | 3 | Daze worry on-forger-nut ladle gull's lest warts. Oil offer sodden, caking offer 4 | carvers an sprinkling otter bet, disk hoard-hoarded woof lipped own pore Ladle 5 | Rat Rotten Hut an garbled erupt. 6 | 7 | MURAL: Yonder nor sorghum stenches shut ladle gulls stopper torque wet strainers 8 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_english: -------------------------------------------------------------------------------- 1 | "O Grammar, water bag mouser gut! A nervous sore suture bag mouse!" 2 | 3 | Daze worry on-forger-nut ladle gull's lest warts. Oil offer sodden, caking offer 4 | carvers an sprinkling otter bet, disk hoard-hoarded woof lipped own pore Ladle 5 | Rat Rotten Hut an garbled erupt. 6 | 7 | MURAL: Yonder nor sorghum stenches shut ladle gulls stopper torque wet strainers 8 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut_line_mode/input_bengali: -------------------------------------------------------------------------------- 1 | "হে ব্যাকরণ, জলের ব্যাগ মুরগির গট! একটি স্নায়বিক গর্জনকারী ব্যাগ মাউস!" 2 | 3 | ডেজের উপর বিরক্ত থাকুন, ক্ষীণ-কাঁটা বালি গল এর টানা warts। তেল অফারটি নিখুঁতভাবে, 4 | একটি ছিড় ছিঁড়ে ছিঁড়ে বেঁধে বেঁধে কাকিং কেকিং করে, ডিস্কের জমাট বাঁধাকৃতির কাঁধের লোমের রাড 5 | রটেন হট একটি বিকৃত আগুন 6 | 7 | মিরির: স্কারর বা সোরগামের স্টেনচ্যাচ বন্ধনী গ্লস স্টপ টর্কে ভিজা স্ট্রেনারগুলি বন্ধ করে দেয় 8 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | use crate::io; 2 | use std; 3 | 4 | use failure::Fail; 5 | 6 | /// An error that can occur during a run of `uwc`. 7 | #[derive(Debug, Fail)] 8 | pub enum UwcError { 9 | #[fail(display = "io error occurred: {}", _0)] 10 | IoError(io::Error), 11 | 12 | #[fail(display = "read non-utf8 bytes: {}", _0)] 13 | Utf8Error(std::string::FromUtf8Error), 14 | } 15 | 16 | pub type Result = std::result::Result; 17 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_bengali: -------------------------------------------------------------------------------- 1 | "হে ব্যাকরণ, জলের ব্যাগ মুরগির গট! একটি স্নায়বিক গর্জনকারী ব্যাগ মাউস!" 2 | 3 | ডেজের উপর বিরক্ত থাকুন, ক্ষীণ-কাঁটা বালি গল এর টানা warts। তেল অফারটি নিখুঁতভাবে, 4 | একটি ছিড় ছিঁড়ে ছিঁড়ে বেঁধে বেঁধে কাকিং কেকিং করে, ডিস্কের জমাট বাঁধাকৃতির কাঁধের লোমের রাড 5 | রটেন হট একটি বিকৃত আগুন 6 | 7 | মিরির: স্কারর বা সোরগামের স্টেনচ্যাচ বন্ধনী গ্লস স্টপ টর্কে ভিজা স্ট্রেনারগুলি বন্ধ করে দেয় 8 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_bengali: -------------------------------------------------------------------------------- 1 | "হে ব্যাকরণ, জলের ব্যাগ মুরগির গট! একটি স্নায়বিক গর্জনকারী ব্যাগ মাউস!" 2 | 3 | ডেজের উপর বিরক্ত থাকুন, ক্ষীণ-কাঁটা বালি গল এর টানা warts। তেল অফারটি নিখুঁতভাবে, 4 | একটি ছিড় ছিঁড়ে ছিঁড়ে বেঁধে বেঁধে কাকিং কেকিং করে, ডিস্কের জমাট বাঁধাকৃতির কাঁধের লোমের রাড 5 | রটেন হট একটি বিকৃত আগুন 6 | 7 | মিরির: স্কারর বা সোরগামের স্টেনচ্যাচ বন্ধনী গ্লস স্টপ টর্কে ভিজা স্ট্রেনারগুলি বন্ধ করে দেয় 8 | -------------------------------------------------------------------------------- /tests/fixtures/i_can_eat_glass_multi/stdout: -------------------------------------------------------------------------------- 1 | lines words bytes graphemes codepoints filename 2 | 1 9 97 51 51 tests/fixtures/i_can_eat_glass_multi/input_greek 3 | 1 6 56 31 31 tests/fixtures/i_can_eat_glass_multi/input_turkish 4 | 1 10 53 41 41 tests/fixtures/i_can_eat_glass_multi/input_vietnamese_nfc 5 | 1 10 61 41 51 tests/fixtures/i_can_eat_glass_multi/input_vietnamese_nfd 6 | 4 35 267 164 174 total 7 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "cargo" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "weekly" 12 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | on: [push] 3 | env: 4 | CARGO_TERM_COLOR: always 5 | jobs: 6 | test: 7 | name: test 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v2 11 | 12 | - name: Install latest stable 13 | uses: actions-rs/toolchain@v1 14 | with: 15 | toolchain: stable 16 | override: true 17 | profile: minimal 18 | 19 | - name: Run cargo test 20 | uses: actions-rs/cargo@v1 21 | with: 22 | command: test 23 | args: --all 24 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Skyler Hawthorne "] 3 | categories = ["command-line-utilities", "text-processing"] 4 | description = "Counts things in unicode text files" 5 | edition = "2018" 6 | keywords = ["wc", "unicode", "word", "count"] 7 | license = "MPL-2.0" 8 | name = "uwc" 9 | readme = "README.md" 10 | repository = "https://github.com/dead10ck/uwc" 11 | version = "1.0.9-alpha.1" 12 | 13 | [badges.github] 14 | branch = "master" 15 | repository = "dead10ck/uwc" 16 | 17 | [dependencies] 18 | env_logger = "0.11.5" 19 | failure = "0.1.8" 20 | itertools = "0.13.0" 21 | lazy_static = "1.5.0" 22 | log = "0.4.22" 23 | rayon = "1.10.0" 24 | regex = "1.10.6" 25 | structopt = "0.3.26" 26 | structopt-derive = "0.4.18" 27 | tabwriter = "1.4.0" 28 | unicode-segmentation = "1.12.0" 29 | -------------------------------------------------------------------------------- /tests/fixtures/line_mode/stdout: -------------------------------------------------------------------------------- 1 | lines words bytes graphemes codepoints filename 2 | 0 1 1 1 1 tests/fixtures/line_mode/input:1 3 | 0 1 2 2 2 tests/fixtures/line_mode/input:2 4 | 0 1 3 3 3 tests/fixtures/line_mode/input:3 5 | 0 1 5 4 4 tests/fixtures/line_mode/input:4 6 | 0 1 1 1 1 tests/fixtures/line_mode/input:5 7 | 0 1 4 4 4 tests/fixtures/line_mode/input:6 8 | 0 1 2 2 2 tests/fixtures/line_mode/input:7 9 | 0 1 3 3 3 tests/fixtures/line_mode/input:8 10 | 0 8 21 20 20 tests/fixtures/line_mode/input:total 11 | -------------------------------------------------------------------------------- /tests/fixtures/line_mode_all_newlines/stdout: -------------------------------------------------------------------------------- 1 | lines words bytes graphemes codepoints filename 2 | 0 1 3 3 3 tests/fixtures/line_mode_all_newlines/input:1 3 | 0 1 3 3 3 tests/fixtures/line_mode_all_newlines/input:2 4 | 0 0 0 0 0 tests/fixtures/line_mode_all_newlines/input:3 5 | 0 1 3 3 3 tests/fixtures/line_mode_all_newlines/input:4 6 | 0 1 4 4 4 tests/fixtures/line_mode_all_newlines/input:5 7 | 0 0 0 0 0 tests/fixtures/line_mode_all_newlines/input:6 8 | 0 1 2 2 2 tests/fixtures/line_mode_all_newlines/input:7 9 | 0 0 0 0 0 tests/fixtures/line_mode_all_newlines/input:8 10 | 0 5 15 15 15 tests/fixtures/line_mode_all_newlines/input:total 11 | -------------------------------------------------------------------------------- /src/input.rs: -------------------------------------------------------------------------------- 1 | use std::fs::{self, File}; 2 | use std::io::{self, Read}; 3 | use std::path::Path; 4 | 5 | /// The string used to identify stdin. 6 | pub const STDIN_IDENTIFIER: &str = "-"; 7 | 8 | /// Choose between a regular file and stdin. 9 | pub enum Input { 10 | File(fs::File), 11 | Stdin(io::Stdin), 12 | } 13 | 14 | impl Input { 15 | pub fn new>(path: P) -> io::Result { 16 | let path = path.as_ref(); 17 | 18 | if path.as_os_str() == STDIN_IDENTIFIER { 19 | return Ok(Input::Stdin(io::stdin())); 20 | } 21 | 22 | let file = File::open(path)?; 23 | Ok(Input::File(file)) 24 | } 25 | } 26 | 27 | impl Read for Input { 28 | fn read(&mut self, buf: &mut [u8]) -> io::Result { 29 | match *self { 30 | Input::File(ref mut file) => file.read(buf), 31 | Input::Stdin(ref mut stdin) => stdin.read(buf), 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /tests/fixtures/line_mode_no_trailing_newline/stdout: -------------------------------------------------------------------------------- 1 | lines words bytes graphemes codepoints filename 2 | 0 1 1 1 1 tests/fixtures/line_mode_no_trailing_newline/input:1 3 | 0 1 2 2 2 tests/fixtures/line_mode_no_trailing_newline/input:2 4 | 0 1 3 3 3 tests/fixtures/line_mode_no_trailing_newline/input:3 5 | 0 1 5 4 4 tests/fixtures/line_mode_no_trailing_newline/input:4 6 | 0 1 1 1 1 tests/fixtures/line_mode_no_trailing_newline/input:5 7 | 0 1 4 4 4 tests/fixtures/line_mode_no_trailing_newline/input:6 8 | 0 1 2 2 2 tests/fixtures/line_mode_no_trailing_newline/input:7 9 | 0 1 3 3 3 tests/fixtures/line_mode_no_trailing_newline/input:8 10 | 0 8 21 20 20 tests/fixtures/line_mode_no_trailing_newline/input:total 11 | -------------------------------------------------------------------------------- /tests/fixtures/line_mode_all_newlines_count_newlines/stdout: -------------------------------------------------------------------------------- 1 | lines words bytes graphemes codepoints filename 2 | 1 1 5 4 5 tests/fixtures/line_mode_all_newlines_count_newlines/input:1 3 | 1 1 4 4 4 tests/fixtures/line_mode_all_newlines_count_newlines/input:2 4 | 1 0 1 1 1 tests/fixtures/line_mode_all_newlines_count_newlines/input:3 5 | 1 1 5 4 4 tests/fixtures/line_mode_all_newlines_count_newlines/input:4 6 | 1 1 5 5 5 tests/fixtures/line_mode_all_newlines_count_newlines/input:5 7 | 1 0 3 1 1 tests/fixtures/line_mode_all_newlines_count_newlines/input:6 8 | 1 1 5 3 3 tests/fixtures/line_mode_all_newlines_count_newlines/input:7 9 | 1 0 1 1 1 tests/fixtures/line_mode_all_newlines_count_newlines/input:8 10 | 8 5 29 23 24 tests/fixtures/line_mode_all_newlines_count_newlines/input:total 11 | -------------------------------------------------------------------------------- /tests/fixtures/line_mode_no_trailing_newline_count_newlines/stdout: -------------------------------------------------------------------------------- 1 | lines words bytes graphemes codepoints filename 2 | 1 1 2 2 2 tests/fixtures/line_mode_no_trailing_newline_count_newlines/input:1 3 | 1 1 3 3 3 tests/fixtures/line_mode_no_trailing_newline_count_newlines/input:2 4 | 1 1 4 4 4 tests/fixtures/line_mode_no_trailing_newline_count_newlines/input:3 5 | 1 1 6 5 5 tests/fixtures/line_mode_no_trailing_newline_count_newlines/input:4 6 | 1 1 2 2 2 tests/fixtures/line_mode_no_trailing_newline_count_newlines/input:5 7 | 1 1 5 5 5 tests/fixtures/line_mode_no_trailing_newline_count_newlines/input:6 8 | 1 1 3 3 3 tests/fixtures/line_mode_no_trailing_newline_count_newlines/input:7 9 | 0 1 3 3 3 tests/fixtures/line_mode_no_trailing_newline_count_newlines/input:8 10 | 7 8 28 27 27 tests/fixtures/line_mode_no_trailing_newline_count_newlines/input:total 11 | -------------------------------------------------------------------------------- /src/constants.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashSet; 2 | 3 | use lazy_static::*; 4 | use regex::bytes::Regex; 5 | 6 | #[rustfmt::skip] pub(crate) const LF: &'static str = "\n"; // 0xe0000a 7 | #[rustfmt::skip] pub(crate) const CR: &'static str = "\r"; // 0xe0000d 8 | #[rustfmt::skip] pub(crate) const CRLF: &'static str = "\r\n"; // 0xe00d0a 9 | #[rustfmt::skip] pub(crate) const NEL: &'static str = "\u{0085}"; // 0x00c285 10 | #[rustfmt::skip] pub(crate) const FF: &'static str = "\u{000C}"; // 0x00000c 11 | #[rustfmt::skip] pub(crate) const LS: &'static str = "\u{2028}"; // 0xe280a8 12 | #[rustfmt::skip] pub(crate) const PS: &'static str = "\u{2029}"; // 0xe280a9 13 | 14 | lazy_static! { 15 | /// New line sequences according to: 16 | /// http://www.unicode.org/standard/reports/tr13/tr13-5.html 17 | pub(crate) static ref NEWLINES: HashSet<&'static str> = { 18 | let mut s = HashSet::new(); 19 | s.insert(CR); 20 | s.insert(LF); 21 | s.insert(CRLF); 22 | s.insert(NEL); 23 | s.insert(FF); 24 | s.insert(LS); 25 | s.insert(PS); 26 | s 27 | }; 28 | 29 | pub(crate) static ref NEWLINE_PATTERN : Regex = { 30 | // need to specify this order so CRLF is preferred over 31 | // CR and LF on their own 32 | let pattern = &[ CRLF, LF, CR, NEL, FF, LS, PS ].join("|"); 33 | Regex::new(&pattern).unwrap() 34 | }; 35 | } 36 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 5 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 6 | 7 | ## [Unreleased] 8 | 9 | ## [1.0.7] - 2024-09-28 10 | 11 | * Update dependencies 12 | 13 | ## [1.0.3] - 2020-08-03 14 | 15 | * Update dependencies 16 | 17 | ## [1.0.2] - 2019-10-03 18 | 19 | * If a file given on input failed to open, it would stop the whole program. 20 | Instead, just print the error on opening the file and continue trying the 21 | rest of the input files. 22 | 23 | ## [1.0.1] - 2019-10-02 24 | 25 | * Update dependencies 26 | 27 | ## [1.0.0] - 2019-10-01 28 | 29 | * Fix a bug where line mode was showing multiple lines in a single line. 30 | * Change to 1.0, as I think this package is feature complete. 31 | 32 | ## [0.3.1] - 2019-08-22 33 | 34 | * Update dependencies 35 | * Fix typo in `repository` field of the `Cargo.toml`, so the repository link 36 | now shows up in crates.io. 37 | 38 | ## [0.3.0] - 2019-08-02 39 | 40 | In this release, the minor version was bumped by mistake. 41 | 42 | * Update dependencies 43 | 44 | ## [0.2.0] - 2019-01-28 45 | 46 | ### Added 47 | 48 | * This adds parallelization with rayon. It does this by chunking up the lines 49 | it reads and doing those in parallel. Local testing found 10,000 to be the 50 | optimal number, so that is the default. A consequence of this behavior is 51 | that if the input is slow, it will seem like it is doing nothing because it 52 | is waiting for a complete chunk before doing any counting. The `--chunk-size` 53 | option is given for this situation. 54 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut_line_mode/stdout: -------------------------------------------------------------------------------- 1 | lines words bytes graphemes codepoints filename 2 | 0 11 186 48 72 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_bengali:1 3 | 0 0 0 0 0 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_bengali:2 4 | 0 14 203 56 81 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_bengali:3 5 | 0 15 249 53 93 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_bengali:4 6 | 0 5 61 18 23 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_bengali:5 7 | 0 0 0 0 0 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_bengali:6 8 | 0 14 248 52 92 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_bengali:7 9 | 0 59 947 227 361 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_bengali:total 10 | 0 12 67 67 67 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_english:1 11 | 0 0 0 0 0 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_english:2 12 | 0 14 80 80 80 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_english:3 13 | 0 13 80 80 80 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_english:4 14 | 0 6 32 32 32 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_english:5 15 | 0 0 0 0 0 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_english:6 16 | 0 12 80 80 80 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_english:7 17 | 0 57 339 339 339 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_english:total 18 | 0 6 57 33 33 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_hebrew:1 19 | 0 0 0 0 0 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_hebrew:2 20 | 0 15 129 83 83 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_hebrew:3 21 | 0 13 135 80 80 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_hebrew:4 22 | 0 3 29 16 16 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_hebrew:5 23 | 0 0 0 0 0 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_hebrew:6 24 | 0 12 114 63 63 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_hebrew:7 25 | 0 49 464 275 275 tests/fixtures/ladle_rat_rotten_hut_line_mode/input_hebrew:total 26 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/stdout: -------------------------------------------------------------------------------- 1 | lines words bytes graphemes codepoints filename 2 | 0 11 186 48 72 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_bengali:1 3 | 0 0 0 0 0 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_bengali:2 4 | 0 14 203 56 81 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_bengali:3 5 | 0 15 249 53 93 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_bengali:4 6 | 0 5 61 18 23 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_bengali:5 7 | 0 0 0 0 0 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_bengali:6 8 | 0 14 248 52 92 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_bengali:7 9 | 0 59 947 227 361 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_bengali:total 10 | 0 12 67 67 67 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_english:1 11 | 0 0 0 0 0 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_english:2 12 | 0 14 80 80 80 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_english:3 13 | 0 13 80 80 80 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_english:4 14 | 0 6 32 32 32 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_english:5 15 | 0 0 0 0 0 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_english:6 16 | 0 12 80 80 80 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_english:7 17 | 0 57 339 339 339 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_english:total 18 | 0 6 57 33 33 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_hebrew:1 19 | 0 0 0 0 0 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_hebrew:2 20 | 0 15 129 83 83 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_hebrew:3 21 | 0 13 135 80 80 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_hebrew:4 22 | 0 3 29 16 16 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_hebrew:5 23 | 0 0 0 0 0 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_hebrew:6 24 | 0 12 114 63 63 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_hebrew:7 25 | 0 49 464 275 275 tests/fixtures/ladle_rat_rotten_hut_line_mode_chunk_size/input_hebrew:total 26 | -------------------------------------------------------------------------------- /tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/stdout: -------------------------------------------------------------------------------- 1 | lines words bytes graphemes codepoints filename 2 | 1 11 187 49 73 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_bengali:1 3 | 1 0 1 1 1 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_bengali:2 4 | 1 14 204 57 82 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_bengali:3 5 | 1 15 250 54 94 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_bengali:4 6 | 1 5 62 19 24 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_bengali:5 7 | 1 0 1 1 1 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_bengali:6 8 | 1 14 249 53 93 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_bengali:7 9 | 7 59 954 234 368 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_bengali:total 10 | 1 12 68 68 68 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_english:1 11 | 1 0 1 1 1 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_english:2 12 | 1 14 81 81 81 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_english:3 13 | 1 13 81 81 81 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_english:4 14 | 1 6 33 33 33 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_english:5 15 | 1 0 1 1 1 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_english:6 16 | 1 12 81 81 81 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_english:7 17 | 7 57 346 346 346 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_english:total 18 | 1 6 58 34 34 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_hebrew:1 19 | 1 0 1 1 1 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_hebrew:2 20 | 1 15 130 84 84 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_hebrew:3 21 | 1 13 136 81 81 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_hebrew:4 22 | 1 3 30 17 17 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_hebrew:5 23 | 1 0 1 1 1 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_hebrew:6 24 | 1 12 115 64 64 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_hebrew:7 25 | 7 49 471 282 282 tests/fixtures/ladle_rat_rotten_hut_line_mode_count_newlines/input_hebrew:total 26 | -------------------------------------------------------------------------------- /src/opt.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeSet; 2 | use std::str::FromStr; 3 | 4 | use structopt::clap::AppSettings; 5 | 6 | use crate::counter::{self, Counter}; 7 | 8 | #[derive(StructOpt, Debug)] 9 | #[structopt( 10 | name = "uwc", 11 | about = "Counts things in strings.", 12 | author, 13 | setting = AppSettings::ColoredHelp 14 | )] 15 | pub struct Opt { 16 | /// Counts the grapheme clusters 17 | #[structopt(short = "c", long = "grapheme-clusters")] 18 | pub grapheme_clusters: bool, 19 | 20 | /// Counts the number of bytes 21 | #[structopt(short = "b", long = "bytes")] 22 | pub bytes: bool, 23 | 24 | /// Counts the number of lines 25 | #[structopt(short = "l", long = "lines")] 26 | pub lines: bool, 27 | 28 | /// Counts the number of words 29 | #[structopt(short = "w", long = "words")] 30 | pub words: bool, 31 | 32 | /// Counts the number of Unicode code points 33 | #[structopt(short = "p", long = "code-points")] 34 | pub codepoints: bool, 35 | 36 | /// Counts everything. (The default counters are: lines, words, bytes) 37 | #[structopt(short = "a", long = "all")] 38 | pub all: bool, 39 | 40 | /// Don't print the field names on the first line. 41 | #[structopt(short = "n", long = "no-header")] 42 | pub no_header: bool, 43 | 44 | /// Don't print the output with elastic tabstops. Instead, fields will just be 45 | /// separated with hard tab characters. Use this if you want streaming output, 46 | /// or if you want the output to be more scriptable. 47 | #[structopt(short = "e", long = "no-elastic")] 48 | pub no_elastic: bool, 49 | 50 | /// The counting mode. 51 | #[structopt( 52 | short = "m", 53 | long = "mode", 54 | default_value = "file", 55 | help = "The format checker to use. Line mode will count things \ 56 | within lines, and by default, it will not count newline \ 57 | characters. See --count-newlines." 58 | )] 59 | #[structopt(possible_values = &["file", "line"])] 60 | pub mode: CountMode, 61 | 62 | /// When in line mode, count newline characters. 63 | #[structopt(long = "count-newlines")] 64 | pub count_newlines: bool, 65 | 66 | /// How many "chunks" of the file to operate on in parallel. (As of this 67 | /// version, "chunks" means lines.) You probably don't need to mess with this. 68 | /// uwc will wait until it reads this many chunks (or the end of the file) 69 | /// to start counting. For normal files, you won't notice this, but if 70 | /// you're piping a slow command into uwc, you may wonder why it doesn't 71 | /// seem to be counting anything. You can set this value lower for this case. 72 | #[structopt(long = "chunk-size", default_value = "10000")] 73 | pub chunk_size: usize, 74 | 75 | /// Sets the input file(s) to use. "-" gets treated as stdin. 76 | #[structopt(default_value = "-")] 77 | pub files: Vec, 78 | } 79 | 80 | #[derive(Debug, Copy, Clone, PartialEq, StructOpt)] 81 | pub enum CountMode { 82 | /// Performs counts for every file. 83 | File, 84 | 85 | /// Performs counts for every line. 86 | Line, 87 | } 88 | 89 | impl FromStr for CountMode { 90 | type Err = String; 91 | 92 | fn from_str(s: &str) -> Result { 93 | match s { 94 | "file" | "f" => Ok(CountMode::File), 95 | "line" | "l" => Ok(CountMode::Line), 96 | _ => Err(format!("Unknown count mode: {}", s)), 97 | } 98 | } 99 | } 100 | 101 | impl Opt { 102 | /// Gets the [`Counter`]s from the CLI options. 103 | pub fn get_counters(&self) -> BTreeSet { 104 | let mut counters = BTreeSet::new(); 105 | 106 | if self.all { 107 | counters.extend(&counter::ALL_COUNTERS[..]); 108 | return counters; 109 | } 110 | 111 | if self.grapheme_clusters { 112 | counters.insert(Counter::GraphemeCluster); 113 | } 114 | 115 | if self.bytes { 116 | counters.insert(Counter::NumByte); 117 | } 118 | 119 | if self.lines { 120 | counters.insert(Counter::Line); 121 | } 122 | 123 | if self.words { 124 | counters.insert(Counter::Words); 125 | } 126 | 127 | if self.codepoints { 128 | counters.insert(Counter::CodePoints); 129 | } 130 | 131 | // pick some defaults if the user doesn't specify any counters 132 | if counters.is_empty() { 133 | counters.extend(&counter::DEFAULT_COUNTERS[..]); 134 | } 135 | 136 | counters 137 | } 138 | 139 | /// Determines if the input buffer should count newlines. 140 | pub fn should_keep_newlines(&self) -> bool { 141 | match self.mode { 142 | CountMode::File => true, 143 | CountMode::Line => self.count_newlines, 144 | } 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # uwc 2 | 3 | [![crates.io page](https://img.shields.io/crates/v/uwc.svg)](https://crates.io/crates/uwc) 4 | 5 | Like `wc`, but unicode-aware, and with line mode. 6 | 7 | `uwc` can count: 8 | 9 | * Lines 10 | * Words 11 | * Bytes 12 | * Grapheme clusters 13 | * Unicode code points 14 | 15 | Additionally, it can operate in *line mode*, which will count things *within* lines. 16 | 17 | ## Usage example 18 | 19 | By default, `uwc` will count lines, words, and bytes. You can specify the counters 20 | you'd like, or ask for all counters with the `-a` flag. 21 | 22 | ```sh 23 | $ uwc tests/fixtures/**/input 24 | lines words bytes filename 25 | 8 5 29 tests/fixtures/all_newlines/input 26 | 0 0 0 tests/fixtures/empty/input 27 | 0 0 0 tests/fixtures/empty_line_mode/input 28 | 1 9 97 tests/fixtures/flags_bp/input 29 | 1 9 97 tests/fixtures/flags_cl/input 30 | 1 9 97 tests/fixtures/flags_w/input 31 | 0 1 5 tests/fixtures/hello/input 32 | 1 9 97 tests/fixtures/i_can_eat_glass/input 33 | 8 8 29 tests/fixtures/line_mode/input 34 | 7 8 28 tests/fixtures/line_mode_no_trailing_newline/input 35 | 7 8 28 tests/fixtures/line_mode_no_trailing_newline_count_newlines/input 36 | 34 66 507 total 37 | 38 | $ uwc -a tests/fixtures/**/input 39 | lines words bytes graphemes codepoints filename 40 | 8 5 29 23 24 tests/fixtures/all_newlines/input 41 | 0 0 0 0 0 tests/fixtures/empty/input 42 | 0 0 0 0 0 tests/fixtures/empty_line_mode/input 43 | 1 9 97 51 51 tests/fixtures/flags_bp/input 44 | 1 9 97 51 51 tests/fixtures/flags_cl/input 45 | 1 9 97 51 51 tests/fixtures/flags_w/input 46 | 0 1 5 5 5 tests/fixtures/hello/input 47 | 1 9 97 51 51 tests/fixtures/i_can_eat_glass/input 48 | 8 8 29 28 28 tests/fixtures/line_mode/input 49 | 7 8 28 27 27 tests/fixtures/line_mode_no_trailing_newline/input 50 | 7 8 28 27 27 tests/fixtures/line_mode_no_trailing_newline_count_newlines/input 51 | 34 66 507 314 315 total 52 | ``` 53 | 54 | You can also switch into line mode with the `--mode` flag: 55 | 56 | ```sh 57 | $ uwc -a --mode line tests/fixtures/line_mode/input 58 | lines words bytes graphemes codepoints filename 59 | 0 1 1 1 1 tests/fixtures/line_mode/input:1 60 | 0 1 2 2 2 tests/fixtures/line_mode/input:2 61 | 0 1 3 3 3 tests/fixtures/line_mode/input:3 62 | 0 1 5 4 4 tests/fixtures/line_mode/input:4 63 | 0 1 1 1 1 tests/fixtures/line_mode/input:5 64 | 0 1 4 4 4 tests/fixtures/line_mode/input:6 65 | 0 1 2 2 2 tests/fixtures/line_mode/input:7 66 | 0 1 3 3 3 tests/fixtures/line_mode/input:8 67 | 0 8 21 20 20 tests/fixtures/line_mode/input:total 68 | ``` 69 | 70 | ## Why? 71 | 72 | The goal of this project is to consider unicode rules correctly when counting 73 | things. Specifically, it should: 74 | 75 | * Count all newline characters correctly. This includes lesser-known line breaks, 76 | like NEL (U+0085), FF (U+000C), LS (U+2028), and PS (U+2029). 77 | * Count all words using the Unicode standard's word boundary rules. 78 | * Count all complete grapheme clusters correctly, so that even edge cases like 79 | Z҉͈͓͈͎a̘͈̠̭l̨̯g̶̬͇̭o̝̹̗͎̙ ͟t͖̙̟̹͇̥̝͡e̥͘x͚̺̭̻͘t͉͔̩̲̘, for example, are counted correctly. 80 | 81 | It does *not* aim to implement these unicode algorithms, however, so it makes use of 82 | the [`unicode-segmentation`](https://crates.io/crates/unicode-segmentation) library 83 | for most of the heavy lifting. And since Unicode support in the Rust ecosystem is 84 | not quite mature yet, that has some consequences for this project. See the 85 | caveats below. 86 | 87 | ## Installation 88 | 89 | It is published on crates.io, so simply: 90 | 91 | ```sh 92 | $ cargo install uwc 93 | ``` 94 | 95 | ## Caveats 96 | 97 | ### UTF-8 98 | 99 | It only supports UTF-8 files. UTF-16 can go on my to-do list if there is demand. 100 | For now, you can use `iconv` to convert non-UTF-8 files first. 101 | 102 | ### Memory usage 103 | 104 | The current implementation will always read complete lines before proceeding to 105 | do its counts; without hand-rolling my own streaming implementation of the 106 | Unicode line splitting algorithm, this is necessary for correctness with line 107 | mode. The consequence of this is that if you give it files with very large 108 | lines, it will use memory proportional to the size of the lines. If you give it 109 | a file with no newline sequences, it will soak up the whole file into memory. 110 | Beware. 111 | 112 | ### Speed 113 | 114 | It is slower than `wc`. My analysis hasn't been extensive, but as far as I can 115 | tell, the reasons are: 116 | 117 | * It is using unicode algorithms, which are just going to be slower than 118 | ASCII no matter what. 119 | * I am not that experienced with Rust, so it's quite possible I'm not doing 120 | something as efficiently as possible. 121 | * My free time is limited, and I am prioritizing correctness over speed 122 | (though speed is good). 123 | 124 | With that said, it is parallelized, which helps. With testing on my local 125 | laptop with larger data sets, the speed is within an order of magnitude of 126 | `wc`. I measured `uwc` being 1.5x slower than `wc` on a collection of 18 MiB of 127 | text files. 128 | 129 | ### Localization 130 | 131 | Rust, as yet, has no localization libraries, so this has some consequences. Some 132 | counts will just be wrong, such as hyphenated words, which is locale-specific 133 | and requires language dictionary lookups to be correct. Also, there are some 134 | languages that have no syntactic word separators, such as Japanese, so e.g. 135 | 136 | **私**は**ガラス**を**食べられます**。 137 | 138 | should be 5 words, but without localization, we cannot determine that. 139 | -------------------------------------------------------------------------------- /src/counter.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | use std::fmt; 3 | use std::str; 4 | 5 | use log::*; 6 | use unicode_segmentation::UnicodeSegmentation; 7 | 8 | use crate::constants::NEWLINES; 9 | 10 | pub type Counted = BTreeMap; 11 | 12 | /// Take all the counts in `other_counts` and sum them into `accum`. 13 | pub fn sum_counts(accum: &mut Counted, other_counts: &Counted) { 14 | for (counter, count) in other_counts { 15 | let entry = accum.entry(*counter).or_insert(0); 16 | *entry += count; 17 | } 18 | } 19 | 20 | /// Sums all the `Counted` instances into a new one. 21 | pub fn sum_all_counts<'a, I>(counts: I) -> Counted 22 | where 23 | I: IntoIterator, 24 | { 25 | let mut totals = BTreeMap::new(); 26 | 27 | for counts in counts { 28 | sum_counts(&mut totals, counts); 29 | } 30 | 31 | totals 32 | } 33 | 34 | /// Something that counts things in `&str`s. 35 | pub trait Count { 36 | /// Counts something in the given `&str`. 37 | fn count(&self, s: &str) -> usize; 38 | } 39 | 40 | impl Count for Counter { 41 | fn count(&self, s: &str) -> usize { 42 | match *self { 43 | Counter::GraphemeCluster => s.graphemes(true).count(), 44 | Counter::NumByte => s.len(), 45 | Counter::Line => s 46 | .graphemes(true) 47 | .filter(|grapheme| NEWLINES.contains(grapheme)) 48 | .count(), 49 | Counter::Words => s.unicode_words().count(), 50 | Counter::CodePoints => s.chars().count(), 51 | } 52 | } 53 | } 54 | 55 | /// Different types of counters. 56 | #[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Copy, Clone)] 57 | pub enum Counter { 58 | /// Counts lines. 59 | Line, 60 | 61 | /// Counts words. 62 | Words, 63 | 64 | /// Counts the total number of bytes. 65 | NumByte, 66 | 67 | /// Counts grapheme clusters. The input is required to be valid UTF-8. 68 | GraphemeCluster, 69 | 70 | /// Counts unicode code points 71 | CodePoints, 72 | } 73 | 74 | /// A convenience array of all counter types. 75 | pub const ALL_COUNTERS: [Counter; 5] = [ 76 | Counter::GraphemeCluster, 77 | Counter::NumByte, 78 | Counter::Line, 79 | Counter::Words, 80 | Counter::CodePoints, 81 | ]; 82 | 83 | /// A convenience array of the default counter types. 84 | pub const DEFAULT_COUNTERS: [Counter; 3] = [Counter::Line, Counter::Words, Counter::NumByte]; 85 | 86 | impl fmt::Display for Counter { 87 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 88 | let s = match *self { 89 | Counter::GraphemeCluster => "graphemes", 90 | Counter::NumByte => "bytes", 91 | Counter::Line => "lines", 92 | Counter::Words => "words", 93 | Counter::CodePoints => "codepoints", 94 | }; 95 | 96 | write!(f, "{}", s) 97 | } 98 | } 99 | 100 | /// Counts the given `Counter`s in the given `&str`. 101 | pub fn count<'a, I>(counters: I, s: &str) -> Counted 102 | where 103 | I: IntoIterator, 104 | { 105 | let counts: Counted = counters.into_iter().map(|c| (*c, c.count(s))).collect(); 106 | debug!("s: {}, counted: {:#?}", s, counts); 107 | counts 108 | } 109 | 110 | #[cfg(test)] 111 | mod test { 112 | use super::*; 113 | use crate::counter; 114 | use crate::constants::*; 115 | use env_logger; 116 | 117 | #[test] 118 | fn test_count_hello() { 119 | let s = "hello"; 120 | let counts = count(&counter::ALL_COUNTERS[..], s); 121 | 122 | let mut correct_counts = BTreeMap::new(); 123 | correct_counts.insert(Counter::GraphemeCluster, 5); 124 | correct_counts.insert(Counter::Line, 0); 125 | correct_counts.insert(Counter::NumByte, 5); 126 | correct_counts.insert(Counter::Words, 1); 127 | correct_counts.insert(Counter::CodePoints, 5); 128 | 129 | assert_eq!(correct_counts, counts); 130 | } 131 | 132 | #[test] 133 | fn test_count_counts_lines() { 134 | let _ = env_logger::try_init(); 135 | 136 | // * \r\n is a single graheme cluster 137 | // * trailing newlines are counted 138 | // * NEL is 2 bytes 139 | // * FF is 1 byte 140 | // * LS is 3 bytes 141 | // * PS is 3 bytes 142 | let mut s = String::from("foo\r\nbar\n\nbaz"); 143 | s += NEL; 144 | s += "quux"; 145 | s += FF; 146 | s += LS; 147 | s += "xi"; 148 | s += PS; 149 | s += "\n"; 150 | 151 | debug!("NEL: {:?}", NEL.as_bytes()); 152 | debug!("FF: {:?}", FF.as_bytes()); 153 | debug!("LS: {:?}", LS.as_bytes()); 154 | debug!("PS: {:?}", PS.as_bytes()); 155 | 156 | debug!("s: {}", s); 157 | 158 | for grapheme in s.graphemes(true) { 159 | debug!("grapheme: {}", grapheme); 160 | } 161 | 162 | let counts = count(&counter::ALL_COUNTERS[..], &s); 163 | 164 | let mut correct_counts = BTreeMap::new(); 165 | correct_counts.insert(Counter::GraphemeCluster, 23); 166 | correct_counts.insert(Counter::Line, 8); 167 | correct_counts.insert(Counter::NumByte, 29); 168 | correct_counts.insert(Counter::Words, 5); 169 | 170 | // one more than grapheme clusters because of \r\n 171 | correct_counts.insert(Counter::CodePoints, 24); 172 | 173 | assert_eq!(correct_counts, counts); 174 | } 175 | 176 | #[test] 177 | fn test_count_counts_words() { 178 | let _ = env_logger::try_init(); 179 | 180 | let i_can_eat_glass = 181 | "Μπορῶ νὰ φάω σπασμένα γυαλιὰ χωρὶς νὰ πάθω τίποτα."; 182 | let s = String::from(i_can_eat_glass); 183 | 184 | //debug!("words: {:?}", i_can_eat_glass.unicode_words().collect::>()); 185 | 186 | let counts = count(&counter::ALL_COUNTERS[..], &s); 187 | 188 | let mut correct_counts = BTreeMap::new(); 189 | correct_counts.insert(Counter::GraphemeCluster, 50); 190 | correct_counts.insert(Counter::Line, 0); 191 | correct_counts.insert(Counter::NumByte, i_can_eat_glass.len()); 192 | correct_counts.insert(Counter::Words, 9); 193 | correct_counts.insert(Counter::CodePoints, 50); 194 | 195 | assert_eq!(correct_counts, counts); 196 | } 197 | 198 | #[test] 199 | fn test_count_counts_codepoints() { 200 | let _ = env_logger::try_init(); 201 | 202 | // these are NOT the same! One is e + ́́ , and one is é, a single codepoint 203 | let one = "é"; 204 | let two = "é"; 205 | 206 | let counters = [Counter::CodePoints]; 207 | 208 | let counts = count(&counters[..], &one); 209 | 210 | let mut correct_counts = BTreeMap::new(); 211 | correct_counts.insert(Counter::CodePoints, 1); 212 | 213 | assert_eq!(correct_counts, counts); 214 | 215 | let counts = count(&counters[..], &two); 216 | 217 | let mut correct_counts = BTreeMap::new(); 218 | correct_counts.insert(Counter::CodePoints, 2); 219 | 220 | assert_eq!(correct_counts, counts); 221 | } 222 | } 223 | -------------------------------------------------------------------------------- /src/ubufreader.rs: -------------------------------------------------------------------------------- 1 | use std::io::BufRead; 2 | use std::mem; 3 | 4 | use crate::error::{Result, UwcError}; 5 | 6 | /// An iterator over `&str`s read from a `BufRead`. For now, it reads lines, 7 | /// similar to `BufRead::lines`, but it includes the newline character for 8 | /// accurate counts. 9 | // 10 | // In the future, this should attempt to be more memory-stable by chunking by a 11 | // fixed size, or close to a fixed size, that splits on grapheme cluster 12 | // boundaries. 13 | pub struct UStrChunksIter<'a, R: BufRead + 'a> { 14 | /// The `BufRead` to read from. 15 | pub reader: &'a mut R, 16 | 17 | /// Marks whether this iterator should keep reading from the reader or not. It 18 | /// will become false if the underlying reader has been closed, or some 19 | /// error has occurred. 20 | keep_reading: bool, 21 | 22 | /// For line mode. Indicates whether the newline should be kept or not. 23 | keep_newline: bool, 24 | 25 | /// Internal buffer for reading until a break point is found 26 | buf: Vec, 27 | } 28 | 29 | impl<'a, R: BufRead> UStrChunksIter<'a, R> { 30 | pub fn new(reader: &'a mut R, keep_newline: bool) -> UStrChunksIter<'a, R> { 31 | UStrChunksIter { 32 | reader, 33 | keep_reading: true, 34 | keep_newline: keep_newline, 35 | buf: Vec::new(), 36 | } 37 | } 38 | } 39 | 40 | impl<'a, R: BufRead> Iterator for UStrChunksIter<'a, R> { 41 | type Item = Result; 42 | 43 | fn next(&mut self) -> Option { 44 | if !self.keep_reading { 45 | return None; 46 | } 47 | 48 | loop { 49 | let buffer = match self.reader.fill_buf() { 50 | Ok(buf) => buf, 51 | Err(err) => { 52 | self.keep_reading = false; 53 | return Some(Err(UwcError::IoError(err))); 54 | } 55 | }; 56 | 57 | if buffer.len() == 0 { 58 | self.keep_reading = false; 59 | break; 60 | } 61 | 62 | let mat = crate::constants::NEWLINE_PATTERN.find(buffer); 63 | 64 | // if we didn't find a newline sequence, stuff the bytes into our 65 | // buffer and keep reading 66 | if mat.is_none() { 67 | self.buf.extend_from_slice(buffer); 68 | let length = buffer.len(); 69 | self.reader.consume(length); 70 | continue; 71 | } 72 | 73 | let mat = mat.unwrap(); 74 | 75 | let end = match self.keep_newline { 76 | true => mat.end(), 77 | false => mat.start(), 78 | }; 79 | 80 | // copy up to the delimiter we found 81 | self.buf.extend_from_slice(&buffer[..end]); 82 | 83 | // consume the bytes including the delimiter regardless of whether we 84 | // want to keep the newlines for counting 85 | let consume_length = mat.end(); 86 | self.reader.consume(consume_length); 87 | 88 | break; 89 | } 90 | 91 | if !self.keep_reading && self.buf.len() == 0 { 92 | return None; 93 | } 94 | 95 | // consume the buffer we've built so far and replace it with a new one 96 | let new_str_bytes = mem::replace(&mut self.buf, Vec::new()); 97 | 98 | let new_str = match String::from_utf8(new_str_bytes) { 99 | Ok(s) => s, 100 | Err(err) => { 101 | self.keep_reading = false; 102 | return Some(Err(UwcError::Utf8Error(err))); 103 | } 104 | }; 105 | 106 | Some(Ok(new_str)) 107 | } 108 | } 109 | 110 | #[cfg(test)] 111 | mod test { 112 | use super::*; 113 | use env_logger; 114 | use log::*; 115 | use std::io; 116 | use std::io::BufReader; 117 | 118 | #[test] 119 | fn test_basic() { 120 | let _ = env_logger::try_init(); 121 | let mut cursor = io::Cursor::new(b"hello"); 122 | let mut chunks = UStrChunksIter::new(&mut cursor, true); 123 | let mut s = chunks.next(); 124 | assert_eq!("hello", s.unwrap().unwrap()); 125 | 126 | s = chunks.next(); 127 | debug!("{:?}", s); 128 | assert!(s.is_none()); 129 | assert!(s.is_none()); 130 | } 131 | 132 | #[test] 133 | fn test_chunks_by_newline() { 134 | let _ = env_logger::try_init(); 135 | let mut cursor = io::Cursor::new( 136 | "hello\ngoodbye\r\nwindows?\u{0085}\u{000C}unicode\u{2028}newline\u{2029}sequences" 137 | .as_bytes()); 138 | 139 | let mut chunks = UStrChunksIter::new(&mut cursor, true); 140 | assert_eq!("hello\n", chunks.next().unwrap().unwrap()); 141 | assert_eq!("goodbye\r\n", chunks.next().unwrap().unwrap()); 142 | assert_eq!("windows?\u{0085}", chunks.next().unwrap().unwrap()); 143 | assert_eq!("\u{000C}", chunks.next().unwrap().unwrap()); 144 | assert_eq!("unicode\u{2028}", chunks.next().unwrap().unwrap()); 145 | assert_eq!("newline\u{2029}", chunks.next().unwrap().unwrap()); 146 | assert_eq!("sequences", chunks.next().unwrap().unwrap()); 147 | 148 | assert!(chunks.next().is_none()); 149 | assert!(chunks.next().is_none()); 150 | } 151 | 152 | #[test] 153 | fn test_chunks_by_newline_no_newlines() { 154 | let _ = env_logger::try_init(); 155 | let mut cursor = io::Cursor::new( 156 | "hello\ngoodbye\r\nwindows?\u{0085}\u{000C}unicode\u{2028}newline\u{2029}sequences" 157 | .as_bytes()); 158 | 159 | let mut chunks = UStrChunksIter::new(&mut cursor, false); 160 | assert_eq!("hello", chunks.next().unwrap().unwrap()); 161 | assert_eq!("goodbye", chunks.next().unwrap().unwrap()); 162 | assert_eq!("windows?", chunks.next().unwrap().unwrap()); 163 | assert_eq!("", chunks.next().unwrap().unwrap()); 164 | assert_eq!("unicode", chunks.next().unwrap().unwrap()); 165 | assert_eq!("newline", chunks.next().unwrap().unwrap()); 166 | assert_eq!("sequences", chunks.next().unwrap().unwrap()); 167 | 168 | assert!(chunks.next().is_none()); 169 | assert!(chunks.next().is_none()); 170 | } 171 | 172 | #[test] 173 | fn test_basic_buffered() { 174 | let cursor = io::Cursor::new(b"hello"); 175 | let mut reader = BufReader::with_capacity(3, cursor); 176 | let mut chunks = UStrChunksIter::new(&mut reader, true); 177 | assert_eq!("hello", chunks.next().unwrap().unwrap()); 178 | assert!(chunks.next().is_none()); 179 | assert!(chunks.next().is_none()); 180 | } 181 | 182 | #[test] 183 | fn test_buffered_stops_in_middle() { 184 | // 😬 is 4 bytes 185 | let cursor = io::Cursor::new("hello 😬 whoops".as_bytes()); 186 | 187 | // this should stop reading 2 bytes into the emoji 188 | let mut reader = BufReader::with_capacity(8, cursor); 189 | let mut chunks = UStrChunksIter::new(&mut reader, true); 190 | 191 | assert_eq!("hello 😬 whoops", chunks.next().unwrap().unwrap()); 192 | assert!(chunks.next().is_none()); 193 | assert!(chunks.next().is_none()); 194 | } 195 | 196 | #[test] 197 | fn test_buffered_stops_in_middle_japanese() { 198 | let _ = env_logger::try_init(); 199 | 200 | let cursor = 201 | io::Cursor::new("私はガラスを食べられます。\nそれは私を傷つけません。".as_bytes()); 202 | 203 | // with a capacity of 10, it should stop in the middle of some graphemes 204 | let mut reader = BufReader::with_capacity(10, cursor); 205 | let mut chunks = UStrChunksIter::new(&mut reader, true); 206 | 207 | assert_eq!( 208 | "私はガラスを食べられます。\n", 209 | chunks.next().unwrap().unwrap() 210 | ); 211 | assert_eq!("それは私を傷つけません。", chunks.next().unwrap().unwrap()); 212 | 213 | assert!(chunks.next().is_none()); 214 | assert!(chunks.next().is_none()); 215 | } 216 | } 217 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate structopt_derive; 3 | 4 | mod constants; 5 | mod counter; 6 | mod error; 7 | mod input; 8 | mod opt; 9 | mod ubufreader; 10 | 11 | use std::collections::BTreeMap; 12 | use std::fmt::Display; 13 | use std::io::{self, BufReader, Write}; 14 | use std::iter::IntoIterator; 15 | use std::sync::{Arc, Mutex}; 16 | 17 | use failure::Error; 18 | use itertools::Itertools; 19 | use log::*; 20 | use rayon::prelude::*; 21 | use structopt::StructOpt; 22 | use tabwriter::TabWriter; 23 | 24 | use crate::counter::{Counted, Counter}; 25 | use crate::input::Input; 26 | use crate::opt::{CountMode, Opt}; 27 | use crate::ubufreader::UStrChunksIter; 28 | 29 | const TOTAL: &'static str = "total"; 30 | 31 | fn main() { 32 | env_logger::init(); 33 | 34 | let run_result = run(); 35 | 36 | match run_result { 37 | Err(error) => { 38 | eprintln!("Error: {}", error); 39 | std::process::exit(1); 40 | } 41 | Ok(success) if success == false => std::process::exit(2), 42 | _ => {} 43 | } 44 | } 45 | 46 | fn write_counts( 47 | mut writer: W, 48 | counts: &BTreeMap, 49 | title: Option<&str>, 50 | ) -> Result<(), Error> { 51 | let mut out_str = String::new(); 52 | 53 | for count in counts.values() { 54 | out_str.push_str(&count.to_string()); 55 | out_str.push_str("\t"); 56 | } 57 | 58 | // remove the trailing tab 59 | out_str.pop(); 60 | 61 | if let Some(name) = title { 62 | out_str.push_str("\t"); 63 | out_str.push_str(name); 64 | } 65 | 66 | out_str.push_str("\n"); 67 | 68 | Ok(writer.write_all(&out_str.as_bytes())?) 69 | } 70 | 71 | /// Construct the "file name" to display for line mode. 72 | fn file_name_with_line(fname: &str, thing: D) -> String { 73 | format!("{}:{}", fname, thing) 74 | } 75 | 76 | /// Write the header that displays counter names in columns. 77 | fn write_header<'a, W, I>(mut writer: W, counters: I) -> Result<(), Error> 78 | where 79 | W: Write, 80 | I: IntoIterator, 81 | { 82 | let mut out_str = String::new(); 83 | 84 | for counter in counters.into_iter() { 85 | out_str.push_str(&counter.to_string()); 86 | out_str.push_str("\t"); 87 | } 88 | 89 | out_str.push_str("filename\n"); 90 | 91 | Ok(writer.write_all(&out_str.as_bytes())?) 92 | } 93 | 94 | fn count_chunks( 95 | file_name: &str, 96 | chunk: Vec>, 97 | opts: &Opt, 98 | line_offset: usize, 99 | output_writer: &mut Arc>, 100 | ) -> Result<(bool, Counted), Error> { 101 | let counters = opts.get_counters(); 102 | 103 | chunk 104 | .into_par_iter() 105 | .enumerate() 106 | .map(|(line_no, line)| { 107 | let line_no = line_no + line_offset; 108 | let line = match line { 109 | Ok(l) => l, 110 | Err(e) => { 111 | eprintln!("{}:{}: {}", file_name, line_no, e); 112 | return Ok((false, BTreeMap::new())); 113 | } 114 | }; 115 | 116 | debug!("line: {:?}", line); 117 | 118 | let cur_counts = counter::count(&counters, &line); 119 | 120 | if opts.mode == CountMode::Line { 121 | let name = file_name_with_line(&file_name, line_no); 122 | write_counts( 123 | &mut *output_writer.lock().unwrap(), 124 | &cur_counts, 125 | Some(&name), 126 | )?; 127 | } 128 | 129 | Ok((true, cur_counts)) 130 | }) 131 | // sum up the counts for each line into the total counts for 132 | // the file 133 | .reduce( 134 | || Ok((true, Counted::new())), 135 | |mut acc: Result<_, Error>, r: Result<_, Error>| { 136 | if r.is_err() { 137 | return r; 138 | } 139 | match acc { 140 | Err(e) => return Err(e), 141 | Ok(ref mut acc_counts_success) => { 142 | // already guaranteed to be ok by the check above 143 | let (r_success, r_current) = r.unwrap(); 144 | let &mut (ref mut acc_success, ref mut acc_counts) = acc_counts_success; 145 | 146 | for (ctr, total) in r_current { 147 | let entry = acc_counts.entry(ctr).or_insert(0); 148 | *entry += total; 149 | } 150 | 151 | *acc_success &= r_success; 152 | } 153 | } 154 | 155 | acc 156 | }, 157 | ) 158 | } 159 | 160 | fn count_file( 161 | file_name: &str, 162 | mut file_counts: &mut Counted, 163 | opts: &Opt, 164 | mut output_writer: Arc>, 165 | ) -> Result { 166 | let keep_newlines = opts.should_keep_newlines(); 167 | 168 | info!("Counting file: {}", file_name); 169 | 170 | let mut success = true; 171 | 172 | let input = match Input::new(&file_name) { 173 | Ok(i) => i, 174 | Err(e) => { 175 | eprintln!("{}: {}", &file_name, e); 176 | return Ok(false); 177 | } 178 | }; 179 | 180 | let mut reader = BufReader::new(input); 181 | let chunks = UStrChunksIter::new(&mut reader, keep_newlines); 182 | 183 | let mut line_no = 1; 184 | for chunk in &chunks.chunks(opts.chunk_size) { 185 | let chunk: Vec<_> = chunk.collect(); 186 | let num_lines = chunk.len(); 187 | 188 | let (chunk_success, line_counts) = 189 | count_chunks(file_name, chunk, opts, line_no, &mut output_writer)?; 190 | 191 | // NOTE: Fix this if the chunks are ever a different unit than lines. 192 | line_no += num_lines; 193 | 194 | counter::sum_counts(&mut file_counts, &line_counts); 195 | success &= chunk_success; 196 | } 197 | 198 | match opts.mode { 199 | CountMode::File => write_counts( 200 | &mut *output_writer.lock().unwrap(), 201 | &file_counts, 202 | Some(&file_name), 203 | )?, 204 | CountMode::Line => { 205 | let name = file_name_with_line(&file_name, TOTAL); 206 | write_counts( 207 | &mut *output_writer.lock().unwrap(), 208 | &file_counts, 209 | Some(&name), 210 | )? 211 | } 212 | } 213 | 214 | Ok(success) 215 | } 216 | 217 | /// The return type indicates error conditions. In some error cases, it will just 218 | /// print the error and continue counting (e.g., if the user passes a directory 219 | /// as input). A return value of Ok(true) indicates that the run was successful 220 | /// with no errors; Ok(false) indicates that there were errors, but not fatal 221 | /// to the `run` function. A return value of `Err` indicates a fatal error that 222 | /// needed to exit immediately, e.g., writing to stdout failed. 223 | fn run() -> Result { 224 | let opts = Opt::from_args(); 225 | 226 | debug!("opts: {:?}", opts); 227 | 228 | let counters = opts.get_counters(); 229 | let mode = opts.mode; 230 | 231 | let mut counts: BTreeMap = opts 232 | .files 233 | .clone() 234 | .into_iter() 235 | .map(|fname| { 236 | ( 237 | fname, 238 | counters.iter().map(|c| (*c, 0usize)).collect::(), 239 | ) 240 | }) 241 | .collect(); 242 | 243 | let stdout = io::stdout(); 244 | 245 | let writer: Arc> = if opts.no_elastic { 246 | Arc::new(Mutex::new(stdout)) 247 | } else { 248 | Arc::new(Mutex::new(TabWriter::new(stdout))) 249 | }; 250 | 251 | if !opts.no_header { 252 | write_header(&mut *writer.lock().unwrap(), &counters)?; 253 | } 254 | 255 | let success = counts 256 | .par_iter_mut() 257 | .map(|(file_name, file_counts)| count_file(file_name, file_counts, &opts, writer.clone())) 258 | .reduce( 259 | || Ok(true), 260 | |acc_result, success_result| { 261 | let acc = acc_result?; 262 | let success = success_result?; 263 | Ok(acc && success) 264 | }, 265 | )?; 266 | 267 | info!("final_counts: {:?}", counts); 268 | 269 | if mode == CountMode::File && counts.len() > 1 { 270 | let totals = counter::sum_all_counts(counts.values()); 271 | write_counts(&mut *writer.lock().unwrap(), &totals, Some(TOTAL))?; 272 | } 273 | 274 | writer.lock().unwrap().flush()?; 275 | 276 | Ok(success) 277 | } 278 | -------------------------------------------------------------------------------- /tests/cli.rs: -------------------------------------------------------------------------------- 1 | /// Note that because std::process::Output::std{out,err} is just a Vec and 2 | /// OsString::from_vec is unstable, these tests assume that stdout is valid UTF-8. 3 | extern crate env_logger; 4 | #[macro_use] 5 | extern crate log; 6 | 7 | use std::collections::{HashSet, VecDeque}; 8 | use std::ffi::{OsStr, OsString}; 9 | use std::fs::{self, File}; 10 | use std::io::Read; 11 | use std::path::{Path, PathBuf}; 12 | use std::process::Command; 13 | 14 | fn main_binary() -> Command { 15 | let mut cmd = Command::new("cargo"); 16 | cmd.arg("run"); 17 | cmd.arg("-q"); 18 | cmd 19 | } 20 | 21 | fn main_binary_with_args(args: I) -> Command 22 | where 23 | I: IntoIterator, 24 | S: AsRef, 25 | { 26 | let mut cmd = main_binary(); 27 | cmd.arg("--"); 28 | cmd.args(args); 29 | cmd 30 | } 31 | 32 | /// Takes a String that should be the output of a run, discards the header, and 33 | /// parses the rest of the output into their fields. 34 | fn parse_lines<'a>(output: &'a str, has_header: bool) -> HashSet<(Vec, &'a str)> { 35 | let mut lines: VecDeque<&str> = output.lines().collect(); 36 | 37 | // If there's a header, there should be at least 2 lines. If there is no 38 | // header, there should be at least one. 39 | let min_lines = if has_header { 2 } else { 1 }; 40 | 41 | assert!(lines.len() >= min_lines, "bad output: {}", output); 42 | 43 | // discard the header if it has one 44 | if has_header { 45 | lines.pop_front(); 46 | } 47 | 48 | let mut parsed = HashSet::new(); 49 | 50 | for line in lines { 51 | let mut fields: Vec<&str> = line.split_whitespace().collect(); 52 | let fname = fields.pop().unwrap(); 53 | parsed.insert(( 54 | fields 55 | .into_iter() 56 | .map(str::parse) 57 | .map(Result::unwrap) 58 | .collect(), 59 | fname, 60 | )); 61 | } 62 | 63 | parsed 64 | } 65 | 66 | /// Tests that the CLI run with no arguments prints the header with the default 67 | /// counters and all 0s. 68 | #[test] 69 | fn test_no_args() { 70 | let out = main_binary().output().unwrap(); 71 | 72 | let stdout = String::from_utf8(out.stdout).unwrap(); 73 | let fields = parse_lines(&stdout, true); 74 | 75 | // lines words bytes filename 76 | // 0 0 0 - 77 | let correct_fields: HashSet<_> = vec![(vec![0usize, 0, 0], "-")].into_iter().collect(); 78 | assert_eq!(correct_fields, fields); 79 | 80 | // should be no stderr 81 | let stderr = out.stderr; 82 | assert_eq!(0, stderr.len()); 83 | } 84 | 85 | /// Tests that the CLI run with no arguments prints the header with the default 86 | /// counters and all 0s. 87 | #[test] 88 | fn test_no_args_no_elastic_tabs() { 89 | let out = main_binary_with_args(&["--no-elastic"]).output().unwrap(); 90 | 91 | let stdout = String::from_utf8(out.stdout).unwrap(); 92 | let correct_output = String::from("lines\twords\tbytes\tfilename\n0\t0\t0\t-\n"); 93 | 94 | assert_eq!(correct_output, stdout); 95 | 96 | // should be no stderr 97 | let stderr = out.stderr; 98 | assert_eq!(0, stderr.len()); 99 | } 100 | 101 | #[test] 102 | fn test_file_not_found() { 103 | let non_existent_file = "nofile"; 104 | let test_dir = Path::new(env!("CARGO_MANIFEST_DIR")); 105 | let self_test_file = Path::join(test_dir, "tests/cli.rs"); 106 | let self_test_file_str = self_test_file.to_str().unwrap(); 107 | 108 | let mut cmd = main_binary_with_args(&[non_existent_file, self_test_file_str]); 109 | assert!(!cmd.status().unwrap().success(), "should fail"); 110 | 111 | let output = cmd.output().unwrap(); 112 | let stderr = String::from_utf8(output.stderr).unwrap(); 113 | let stdout = String::from_utf8(output.stdout).unwrap(); 114 | 115 | let expected_err_msg = format!("{}: No such file", non_existent_file); 116 | 117 | assert!( 118 | stderr.contains(&expected_err_msg), 119 | "expected '{}' in stderr, got: '{}'", 120 | expected_err_msg, 121 | stderr 122 | ); 123 | 124 | assert!( 125 | stdout.contains(self_test_file_str), 126 | "expected '{}' in stdout, got: '{}'", 127 | self_test_file_str, 128 | stdout 129 | ); 130 | } 131 | 132 | // ---------------------------- 133 | // FIXTURE TESTS 134 | // ---------------------------- 135 | 136 | const FIXTURES_DIR: &str = "tests/fixtures"; 137 | const INPUT_FILE_NAME: &str = "input"; 138 | const STDOUT_FILE_NAME: &str = "stdout"; 139 | const STDERR_FILE_NAME: &str = "stderr"; 140 | const ERRCODE_FILE_NAME: &str = "errcode"; 141 | const OPTS_FILE_NAME: &str = "opts"; 142 | 143 | /// Get the input files from the given directory. 144 | fn get_input_files(base: &Path) -> Vec { 145 | fs::read_dir(base) 146 | .unwrap() 147 | .map(Result::unwrap) 148 | .map(|entry| entry.path()) 149 | .filter(|path| { 150 | path.file_name() 151 | .unwrap() 152 | .to_str() 153 | .unwrap() 154 | .starts_with(INPUT_FILE_NAME) 155 | }) 156 | .collect() 157 | } 158 | 159 | /// Soak up the given file into a String, unwrapping along the way. 160 | fn soak_string(path: &Path) -> Option { 161 | if !path.exists() { 162 | return None; 163 | } 164 | 165 | let mut file = File::open(path).expect(&format!("error on test entry: {:?}", path)); 166 | let mut string = String::new(); 167 | file.read_to_string(&mut string).unwrap(); 168 | Some(string) 169 | } 170 | 171 | /// In the 'fixtures' directory, there is a set of fixed files that provide 172 | /// a sample input file and an accompanying file that contains what the output 173 | /// is expected to be. This test walks the directory and verfies each one. 174 | /// 175 | /// The files are laid out like: 176 | /// 177 | /// ``` 178 | /// tests/fixtures 179 | /// └── hello 180 | /// ├── input.* 🠜 These files contains the sample text to give to the binary as 181 | /// │ input. 182 | /// ├── opts 🠜 This file contains the options to pass to the binary, passed 183 | /// │ after the binary name itself, but before the input file's 184 | /// │ positional argument. 185 | /// ├── stdout 🠜 This file contains the expected stdout. The fields will 186 | /// │ be parsed, so whitespace formatting doesn't matter, only 187 | /// │ order. 188 | /// ├── stderr 🠜 This file contains the expected stderr. It will take each 189 | /// │ line and verify that it is a substring of some line in the 190 | /// │ test run's stderr. 191 | /// └── errcode 🠜 If this file is present, it indicates the run should fail— 192 | /// i.e., terminate with a non-zero exit code. 193 | /// ``` 194 | #[test] 195 | fn test_fixtures() { 196 | let _ = env_logger::try_init(); 197 | 198 | let fixtures_path = Path::new(FIXTURES_DIR); 199 | 200 | for entry in fs::read_dir(fixtures_path).unwrap() { 201 | let test_path = entry.unwrap().path(); 202 | 203 | if !test_path.is_dir() { 204 | continue; 205 | } 206 | 207 | let opts = soak_string(&test_path.join(OPTS_FILE_NAME)); 208 | let input_paths = get_input_files(&test_path); 209 | 210 | let mut args: Vec = match opts { 211 | Some(opts_str) => opts_str.split_whitespace().map(OsString::from).collect(), 212 | None => Vec::new(), 213 | }; 214 | 215 | args.extend(input_paths.into_iter().map(PathBuf::into_os_string)); 216 | 217 | let mut cmd = main_binary_with_args(&args); 218 | debug!("Running command: {:?}", cmd); 219 | 220 | let out = cmd.output().unwrap(); 221 | 222 | // parse the fields from stdout and compare for exact equality 223 | let expected_stdout = soak_string(&test_path.join(STDOUT_FILE_NAME)); 224 | 225 | if expected_stdout.is_some() { 226 | let stdout = String::from_utf8(out.stdout).unwrap(); 227 | let fields = parse_lines(&stdout, true); 228 | let expected_stdout = expected_stdout.unwrap(); 229 | let correct_fields = parse_lines(&expected_stdout, true); 230 | assert_eq!( 231 | correct_fields, fields, 232 | "expected: {:#?}\ngot: {:#?}", 233 | correct_fields, fields 234 | ); 235 | } 236 | 237 | // check that the string inside the fixture file is a substring of 238 | // the actual stderr 239 | let expected_stderr = soak_string(&test_path.join(STDERR_FILE_NAME)); 240 | 241 | if expected_stderr.is_some() { 242 | let expected_stderr = expected_stderr.unwrap(); 243 | let expected_stderr_trimmed = expected_stderr.trim(); 244 | let stderr = String::from_utf8(out.stderr).unwrap(); 245 | 246 | assert!( 247 | stderr.contains(&expected_stderr_trimmed), 248 | "Wrong stderr. Expected `{}`, got `{}`", 249 | expected_stderr_trimmed, 250 | stderr.trim() 251 | ); 252 | } 253 | 254 | // if the `errcode` file is present, make sure the exit code is non-zero 255 | if test_path.join(ERRCODE_FILE_NAME).exists() { 256 | assert!(!out.status.success(), "Expected a non-zero exit code"); 257 | } else { 258 | assert!(out.status.success(), "Expected a zero exit code"); 259 | } 260 | } 261 | } 262 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Mozilla Public License Version 2.0 2 | ================================== 3 | 4 | 1. Definitions 5 | -------------- 6 | 7 | 1.1. "Contributor" 8 | means each individual or legal entity that creates, contributes to 9 | the creation of, or owns Covered Software. 10 | 11 | 1.2. "Contributor Version" 12 | means the combination of the Contributions of others (if any) used 13 | by a Contributor and that particular Contributor's Contribution. 14 | 15 | 1.3. "Contribution" 16 | means Covered Software of a particular Contributor. 17 | 18 | 1.4. "Covered Software" 19 | means Source Code Form to which the initial Contributor has attached 20 | the notice in Exhibit A, the Executable Form of such Source Code 21 | Form, and Modifications of such Source Code Form, in each case 22 | including portions thereof. 23 | 24 | 1.5. "Incompatible With Secondary Licenses" 25 | means 26 | 27 | (a) that the initial Contributor has attached the notice described 28 | in Exhibit B to the Covered Software; or 29 | 30 | (b) that the Covered Software was made available under the terms of 31 | version 1.1 or earlier of the License, but not also under the 32 | terms of a Secondary License. 33 | 34 | 1.6. "Executable Form" 35 | means any form of the work other than Source Code Form. 36 | 37 | 1.7. "Larger Work" 38 | means a work that combines Covered Software with other material, in 39 | a separate file or files, that is not Covered Software. 40 | 41 | 1.8. "License" 42 | means this document. 43 | 44 | 1.9. "Licensable" 45 | means having the right to grant, to the maximum extent possible, 46 | whether at the time of the initial grant or subsequently, any and 47 | all of the rights conveyed by this License. 48 | 49 | 1.10. "Modifications" 50 | means any of the following: 51 | 52 | (a) any file in Source Code Form that results from an addition to, 53 | deletion from, or modification of the contents of Covered 54 | Software; or 55 | 56 | (b) any new file in Source Code Form that contains any Covered 57 | Software. 58 | 59 | 1.11. "Patent Claims" of a Contributor 60 | means any patent claim(s), including without limitation, method, 61 | process, and apparatus claims, in any patent Licensable by such 62 | Contributor that would be infringed, but for the grant of the 63 | License, by the making, using, selling, offering for sale, having 64 | made, import, or transfer of either its Contributions or its 65 | Contributor Version. 66 | 67 | 1.12. "Secondary License" 68 | means either the GNU General Public License, Version 2.0, the GNU 69 | Lesser General Public License, Version 2.1, the GNU Affero General 70 | Public License, Version 3.0, or any later versions of those 71 | licenses. 72 | 73 | 1.13. "Source Code Form" 74 | means the form of the work preferred for making modifications. 75 | 76 | 1.14. "You" (or "Your") 77 | means an individual or a legal entity exercising rights under this 78 | License. For legal entities, "You" includes any entity that 79 | controls, is controlled by, or is under common control with You. For 80 | purposes of this definition, "control" means (a) the power, direct 81 | or indirect, to cause the direction or management of such entity, 82 | whether by contract or otherwise, or (b) ownership of more than 83 | fifty percent (50%) of the outstanding shares or beneficial 84 | ownership of such entity. 85 | 86 | 2. License Grants and Conditions 87 | -------------------------------- 88 | 89 | 2.1. Grants 90 | 91 | Each Contributor hereby grants You a world-wide, royalty-free, 92 | non-exclusive license: 93 | 94 | (a) under intellectual property rights (other than patent or trademark) 95 | Licensable by such Contributor to use, reproduce, make available, 96 | modify, display, perform, distribute, and otherwise exploit its 97 | Contributions, either on an unmodified basis, with Modifications, or 98 | as part of a Larger Work; and 99 | 100 | (b) under Patent Claims of such Contributor to make, use, sell, offer 101 | for sale, have made, import, and otherwise transfer either its 102 | Contributions or its Contributor Version. 103 | 104 | 2.2. Effective Date 105 | 106 | The licenses granted in Section 2.1 with respect to any Contribution 107 | become effective for each Contribution on the date the Contributor first 108 | distributes such Contribution. 109 | 110 | 2.3. Limitations on Grant Scope 111 | 112 | The licenses granted in this Section 2 are the only rights granted under 113 | this License. No additional rights or licenses will be implied from the 114 | distribution or licensing of Covered Software under this License. 115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 116 | Contributor: 117 | 118 | (a) for any code that a Contributor has removed from Covered Software; 119 | or 120 | 121 | (b) for infringements caused by: (i) Your and any other third party's 122 | modifications of Covered Software, or (ii) the combination of its 123 | Contributions with other software (except as part of its Contributor 124 | Version); or 125 | 126 | (c) under Patent Claims infringed by Covered Software in the absence of 127 | its Contributions. 128 | 129 | This License does not grant any rights in the trademarks, service marks, 130 | or logos of any Contributor (except as may be necessary to comply with 131 | the notice requirements in Section 3.4). 132 | 133 | 2.4. Subsequent Licenses 134 | 135 | No Contributor makes additional grants as a result of Your choice to 136 | distribute the Covered Software under a subsequent version of this 137 | License (see Section 10.2) or under the terms of a Secondary License (if 138 | permitted under the terms of Section 3.3). 139 | 140 | 2.5. Representation 141 | 142 | Each Contributor represents that the Contributor believes its 143 | Contributions are its original creation(s) or it has sufficient rights 144 | to grant the rights to its Contributions conveyed by this License. 145 | 146 | 2.6. Fair Use 147 | 148 | This License is not intended to limit any rights You have under 149 | applicable copyright doctrines of fair use, fair dealing, or other 150 | equivalents. 151 | 152 | 2.7. Conditions 153 | 154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 155 | in Section 2.1. 156 | 157 | 3. Responsibilities 158 | ------------------- 159 | 160 | 3.1. Distribution of Source Form 161 | 162 | All distribution of Covered Software in Source Code Form, including any 163 | Modifications that You create or to which You contribute, must be under 164 | the terms of this License. You must inform recipients that the Source 165 | Code Form of the Covered Software is governed by the terms of this 166 | License, and how they can obtain a copy of this License. You may not 167 | attempt to alter or restrict the recipients' rights in the Source Code 168 | Form. 169 | 170 | 3.2. Distribution of Executable Form 171 | 172 | If You distribute Covered Software in Executable Form then: 173 | 174 | (a) such Covered Software must also be made available in Source Code 175 | Form, as described in Section 3.1, and You must inform recipients of 176 | the Executable Form how they can obtain a copy of such Source Code 177 | Form by reasonable means in a timely manner, at a charge no more 178 | than the cost of distribution to the recipient; and 179 | 180 | (b) You may distribute such Executable Form under the terms of this 181 | License, or sublicense it under different terms, provided that the 182 | license for the Executable Form does not attempt to limit or alter 183 | the recipients' rights in the Source Code Form under this License. 184 | 185 | 3.3. Distribution of a Larger Work 186 | 187 | You may create and distribute a Larger Work under terms of Your choice, 188 | provided that You also comply with the requirements of this License for 189 | the Covered Software. If the Larger Work is a combination of Covered 190 | Software with a work governed by one or more Secondary Licenses, and the 191 | Covered Software is not Incompatible With Secondary Licenses, this 192 | License permits You to additionally distribute such Covered Software 193 | under the terms of such Secondary License(s), so that the recipient of 194 | the Larger Work may, at their option, further distribute the Covered 195 | Software under the terms of either this License or such Secondary 196 | License(s). 197 | 198 | 3.4. Notices 199 | 200 | You may not remove or alter the substance of any license notices 201 | (including copyright notices, patent notices, disclaimers of warranty, 202 | or limitations of liability) contained within the Source Code Form of 203 | the Covered Software, except that You may alter any license notices to 204 | the extent required to remedy known factual inaccuracies. 205 | 206 | 3.5. Application of Additional Terms 207 | 208 | You may choose to offer, and to charge a fee for, warranty, support, 209 | indemnity or liability obligations to one or more recipients of Covered 210 | Software. However, You may do so only on Your own behalf, and not on 211 | behalf of any Contributor. You must make it absolutely clear that any 212 | such warranty, support, indemnity, or liability obligation is offered by 213 | You alone, and You hereby agree to indemnify every Contributor for any 214 | liability incurred by such Contributor as a result of warranty, support, 215 | indemnity or liability terms You offer. You may include additional 216 | disclaimers of warranty and limitations of liability specific to any 217 | jurisdiction. 218 | 219 | 4. Inability to Comply Due to Statute or Regulation 220 | --------------------------------------------------- 221 | 222 | If it is impossible for You to comply with any of the terms of this 223 | License with respect to some or all of the Covered Software due to 224 | statute, judicial order, or regulation then You must: (a) comply with 225 | the terms of this License to the maximum extent possible; and (b) 226 | describe the limitations and the code they affect. Such description must 227 | be placed in a text file included with all distributions of the Covered 228 | Software under this License. Except to the extent prohibited by statute 229 | or regulation, such description must be sufficiently detailed for a 230 | recipient of ordinary skill to be able to understand it. 231 | 232 | 5. Termination 233 | -------------- 234 | 235 | 5.1. The rights granted under this License will terminate automatically 236 | if You fail to comply with any of its terms. However, if You become 237 | compliant, then the rights granted under this License from a particular 238 | Contributor are reinstated (a) provisionally, unless and until such 239 | Contributor explicitly and finally terminates Your grants, and (b) on an 240 | ongoing basis, if such Contributor fails to notify You of the 241 | non-compliance by some reasonable means prior to 60 days after You have 242 | come back into compliance. Moreover, Your grants from a particular 243 | Contributor are reinstated on an ongoing basis if such Contributor 244 | notifies You of the non-compliance by some reasonable means, this is the 245 | first time You have received notice of non-compliance with this License 246 | from such Contributor, and You become compliant prior to 30 days after 247 | Your receipt of the notice. 248 | 249 | 5.2. If You initiate litigation against any entity by asserting a patent 250 | infringement claim (excluding declaratory judgment actions, 251 | counter-claims, and cross-claims) alleging that a Contributor Version 252 | directly or indirectly infringes any patent, then the rights granted to 253 | You by any and all Contributors for the Covered Software under Section 254 | 2.1 of this License shall terminate. 255 | 256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 257 | end user license agreements (excluding distributors and resellers) which 258 | have been validly granted by You or Your distributors under this License 259 | prior to termination shall survive termination. 260 | 261 | ************************************************************************ 262 | * * 263 | * 6. Disclaimer of Warranty * 264 | * ------------------------- * 265 | * * 266 | * Covered Software is provided under this License on an "as is" * 267 | * basis, without warranty of any kind, either expressed, implied, or * 268 | * statutory, including, without limitation, warranties that the * 269 | * Covered Software is free of defects, merchantable, fit for a * 270 | * particular purpose or non-infringing. The entire risk as to the * 271 | * quality and performance of the Covered Software is with You. * 272 | * Should any Covered Software prove defective in any respect, You * 273 | * (not any Contributor) assume the cost of any necessary servicing, * 274 | * repair, or correction. This disclaimer of warranty constitutes an * 275 | * essential part of this License. No use of any Covered Software is * 276 | * authorized under this License except under this disclaimer. * 277 | * * 278 | ************************************************************************ 279 | 280 | ************************************************************************ 281 | * * 282 | * 7. Limitation of Liability * 283 | * -------------------------- * 284 | * * 285 | * Under no circumstances and under no legal theory, whether tort * 286 | * (including negligence), contract, or otherwise, shall any * 287 | * Contributor, or anyone who distributes Covered Software as * 288 | * permitted above, be liable to You for any direct, indirect, * 289 | * special, incidental, or consequential damages of any character * 290 | * including, without limitation, damages for lost profits, loss of * 291 | * goodwill, work stoppage, computer failure or malfunction, or any * 292 | * and all other commercial damages or losses, even if such party * 293 | * shall have been informed of the possibility of such damages. This * 294 | * limitation of liability shall not apply to liability for death or * 295 | * personal injury resulting from such party's negligence to the * 296 | * extent applicable law prohibits such limitation. Some * 297 | * jurisdictions do not allow the exclusion or limitation of * 298 | * incidental or consequential damages, so this exclusion and * 299 | * limitation may not apply to You. * 300 | * * 301 | ************************************************************************ 302 | 303 | 8. Litigation 304 | ------------- 305 | 306 | Any litigation relating to this License may be brought only in the 307 | courts of a jurisdiction where the defendant maintains its principal 308 | place of business and such litigation shall be governed by laws of that 309 | jurisdiction, without reference to its conflict-of-law provisions. 310 | Nothing in this Section shall prevent a party's ability to bring 311 | cross-claims or counter-claims. 312 | 313 | 9. Miscellaneous 314 | ---------------- 315 | 316 | This License represents the complete agreement concerning the subject 317 | matter hereof. If any provision of this License is held to be 318 | unenforceable, such provision shall be reformed only to the extent 319 | necessary to make it enforceable. Any law or regulation which provides 320 | that the language of a contract shall be construed against the drafter 321 | shall not be used to construe this License against a Contributor. 322 | 323 | 10. Versions of the License 324 | --------------------------- 325 | 326 | 10.1. New Versions 327 | 328 | Mozilla Foundation is the license steward. Except as provided in Section 329 | 10.3, no one other than the license steward has the right to modify or 330 | publish new versions of this License. Each version will be given a 331 | distinguishing version number. 332 | 333 | 10.2. Effect of New Versions 334 | 335 | You may distribute the Covered Software under the terms of the version 336 | of the License under which You originally received the Covered Software, 337 | or under the terms of any subsequent version published by the license 338 | steward. 339 | 340 | 10.3. Modified Versions 341 | 342 | If you create software not governed by this License, and you want to 343 | create a new license for such software, you may create and use a 344 | modified version of this License if you rename the license and remove 345 | any references to the name of the license steward (except to note that 346 | such modified license differs from this License). 347 | 348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 349 | Licenses 350 | 351 | If You choose to distribute Source Code Form that is Incompatible With 352 | Secondary Licenses under the terms of this version of the License, the 353 | notice described in Exhibit B of this License must be attached. 354 | 355 | Exhibit A - Source Code Form License Notice 356 | ------------------------------------------- 357 | 358 | This Source Code Form is subject to the terms of the Mozilla Public 359 | License, v. 2.0. If a copy of the MPL was not distributed with this 360 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 361 | 362 | If it is not possible or desirable to put the notice in a particular 363 | file, then You may include the notice in a location (such as a LICENSE 364 | file in a relevant directory) where a recipient would be likely to look 365 | for such a notice. 366 | 367 | You may add additional accurate notices of copyright ownership. 368 | 369 | Exhibit B - "Incompatible With Secondary Licenses" Notice 370 | --------------------------------------------------------- 371 | 372 | This Source Code Form is "Incompatible With Secondary Licenses", as 373 | defined by the Mozilla Public License, v. 2.0. 374 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "addr2line" 7 | version = "0.13.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "1b6a2d3371669ab3ca9797670853d61402b03d0b4b9ebf33d677dfa720203072" 10 | dependencies = [ 11 | "gimli", 12 | ] 13 | 14 | [[package]] 15 | name = "adler" 16 | version = "0.2.3" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e" 19 | 20 | [[package]] 21 | name = "aho-corasick" 22 | version = "1.1.3" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 25 | dependencies = [ 26 | "memchr", 27 | ] 28 | 29 | [[package]] 30 | name = "ansi_term" 31 | version = "0.11.0" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" 34 | dependencies = [ 35 | "winapi", 36 | ] 37 | 38 | [[package]] 39 | name = "anstream" 40 | version = "0.6.15" 41 | source = "registry+https://github.com/rust-lang/crates.io-index" 42 | checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" 43 | dependencies = [ 44 | "anstyle", 45 | "anstyle-parse", 46 | "anstyle-query", 47 | "anstyle-wincon", 48 | "colorchoice", 49 | "is_terminal_polyfill", 50 | "utf8parse", 51 | ] 52 | 53 | [[package]] 54 | name = "anstyle" 55 | version = "1.0.8" 56 | source = "registry+https://github.com/rust-lang/crates.io-index" 57 | checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" 58 | 59 | [[package]] 60 | name = "anstyle-parse" 61 | version = "0.2.5" 62 | source = "registry+https://github.com/rust-lang/crates.io-index" 63 | checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" 64 | dependencies = [ 65 | "utf8parse", 66 | ] 67 | 68 | [[package]] 69 | name = "anstyle-query" 70 | version = "1.1.1" 71 | source = "registry+https://github.com/rust-lang/crates.io-index" 72 | checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" 73 | dependencies = [ 74 | "windows-sys", 75 | ] 76 | 77 | [[package]] 78 | name = "anstyle-wincon" 79 | version = "3.0.4" 80 | source = "registry+https://github.com/rust-lang/crates.io-index" 81 | checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" 82 | dependencies = [ 83 | "anstyle", 84 | "windows-sys", 85 | ] 86 | 87 | [[package]] 88 | name = "atty" 89 | version = "0.2.14" 90 | source = "registry+https://github.com/rust-lang/crates.io-index" 91 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 92 | dependencies = [ 93 | "hermit-abi", 94 | "libc", 95 | "winapi", 96 | ] 97 | 98 | [[package]] 99 | name = "autocfg" 100 | version = "1.3.0" 101 | source = "registry+https://github.com/rust-lang/crates.io-index" 102 | checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" 103 | 104 | [[package]] 105 | name = "backtrace" 106 | version = "0.3.53" 107 | source = "registry+https://github.com/rust-lang/crates.io-index" 108 | checksum = "707b586e0e2f247cbde68cdd2c3ce69ea7b7be43e1c5b426e37c9319c4b9838e" 109 | dependencies = [ 110 | "addr2line", 111 | "cfg-if", 112 | "libc", 113 | "miniz_oxide", 114 | "object", 115 | "rustc-demangle", 116 | ] 117 | 118 | [[package]] 119 | name = "bitflags" 120 | version = "1.2.1" 121 | source = "registry+https://github.com/rust-lang/crates.io-index" 122 | checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" 123 | 124 | [[package]] 125 | name = "cfg-if" 126 | version = "1.0.0" 127 | source = "registry+https://github.com/rust-lang/crates.io-index" 128 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 129 | 130 | [[package]] 131 | name = "clap" 132 | version = "2.33.3" 133 | source = "registry+https://github.com/rust-lang/crates.io-index" 134 | checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" 135 | dependencies = [ 136 | "ansi_term", 137 | "atty", 138 | "bitflags", 139 | "strsim", 140 | "textwrap", 141 | "unicode-width", 142 | "vec_map", 143 | ] 144 | 145 | [[package]] 146 | name = "colorchoice" 147 | version = "1.0.2" 148 | source = "registry+https://github.com/rust-lang/crates.io-index" 149 | checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" 150 | 151 | [[package]] 152 | name = "crossbeam-deque" 153 | version = "0.8.5" 154 | source = "registry+https://github.com/rust-lang/crates.io-index" 155 | checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" 156 | dependencies = [ 157 | "crossbeam-epoch", 158 | "crossbeam-utils", 159 | ] 160 | 161 | [[package]] 162 | name = "crossbeam-epoch" 163 | version = "0.9.18" 164 | source = "registry+https://github.com/rust-lang/crates.io-index" 165 | checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" 166 | dependencies = [ 167 | "crossbeam-utils", 168 | ] 169 | 170 | [[package]] 171 | name = "crossbeam-utils" 172 | version = "0.8.20" 173 | source = "registry+https://github.com/rust-lang/crates.io-index" 174 | checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" 175 | 176 | [[package]] 177 | name = "either" 178 | version = "1.13.0" 179 | source = "registry+https://github.com/rust-lang/crates.io-index" 180 | checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" 181 | 182 | [[package]] 183 | name = "env_filter" 184 | version = "0.1.2" 185 | source = "registry+https://github.com/rust-lang/crates.io-index" 186 | checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab" 187 | dependencies = [ 188 | "log", 189 | "regex", 190 | ] 191 | 192 | [[package]] 193 | name = "env_logger" 194 | version = "0.11.5" 195 | source = "registry+https://github.com/rust-lang/crates.io-index" 196 | checksum = "e13fa619b91fb2381732789fc5de83b45675e882f66623b7d8cb4f643017018d" 197 | dependencies = [ 198 | "anstream", 199 | "anstyle", 200 | "env_filter", 201 | "humantime", 202 | "log", 203 | ] 204 | 205 | [[package]] 206 | name = "failure" 207 | version = "0.1.8" 208 | source = "registry+https://github.com/rust-lang/crates.io-index" 209 | checksum = "d32e9bd16cc02eae7db7ef620b392808b89f6a5e16bb3497d159c6b92a0f4f86" 210 | dependencies = [ 211 | "backtrace", 212 | "failure_derive", 213 | ] 214 | 215 | [[package]] 216 | name = "failure_derive" 217 | version = "0.1.8" 218 | source = "registry+https://github.com/rust-lang/crates.io-index" 219 | checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4" 220 | dependencies = [ 221 | "proc-macro2", 222 | "quote", 223 | "syn", 224 | "synstructure", 225 | ] 226 | 227 | [[package]] 228 | name = "gimli" 229 | version = "0.22.0" 230 | source = "registry+https://github.com/rust-lang/crates.io-index" 231 | checksum = "aaf91faf136cb47367fa430cd46e37a788775e7fa104f8b4bcb3861dc389b724" 232 | 233 | [[package]] 234 | name = "heck" 235 | version = "0.3.1" 236 | source = "registry+https://github.com/rust-lang/crates.io-index" 237 | checksum = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205" 238 | dependencies = [ 239 | "unicode-segmentation", 240 | ] 241 | 242 | [[package]] 243 | name = "hermit-abi" 244 | version = "0.1.17" 245 | source = "registry+https://github.com/rust-lang/crates.io-index" 246 | checksum = "5aca5565f760fb5b220e499d72710ed156fdb74e631659e99377d9ebfbd13ae8" 247 | dependencies = [ 248 | "libc", 249 | ] 250 | 251 | [[package]] 252 | name = "humantime" 253 | version = "2.1.0" 254 | source = "registry+https://github.com/rust-lang/crates.io-index" 255 | checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" 256 | 257 | [[package]] 258 | name = "is_terminal_polyfill" 259 | version = "1.70.1" 260 | source = "registry+https://github.com/rust-lang/crates.io-index" 261 | checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" 262 | 263 | [[package]] 264 | name = "itertools" 265 | version = "0.13.0" 266 | source = "registry+https://github.com/rust-lang/crates.io-index" 267 | checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" 268 | dependencies = [ 269 | "either", 270 | ] 271 | 272 | [[package]] 273 | name = "lazy_static" 274 | version = "1.5.0" 275 | source = "registry+https://github.com/rust-lang/crates.io-index" 276 | checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" 277 | 278 | [[package]] 279 | name = "libc" 280 | version = "0.2.79" 281 | source = "registry+https://github.com/rust-lang/crates.io-index" 282 | checksum = "2448f6066e80e3bfc792e9c98bf705b4b0fc6e8ef5b43e5889aff0eaa9c58743" 283 | 284 | [[package]] 285 | name = "log" 286 | version = "0.4.22" 287 | source = "registry+https://github.com/rust-lang/crates.io-index" 288 | checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" 289 | 290 | [[package]] 291 | name = "memchr" 292 | version = "2.7.4" 293 | source = "registry+https://github.com/rust-lang/crates.io-index" 294 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" 295 | 296 | [[package]] 297 | name = "miniz_oxide" 298 | version = "0.4.3" 299 | source = "registry+https://github.com/rust-lang/crates.io-index" 300 | checksum = "0f2d26ec3309788e423cfbf68ad1800f061638098d76a83681af979dc4eda19d" 301 | dependencies = [ 302 | "adler", 303 | "autocfg", 304 | ] 305 | 306 | [[package]] 307 | name = "object" 308 | version = "0.21.1" 309 | source = "registry+https://github.com/rust-lang/crates.io-index" 310 | checksum = "37fd5004feb2ce328a52b0b3d01dbf4ffff72583493900ed15f22d4111c51693" 311 | 312 | [[package]] 313 | name = "proc-macro-error" 314 | version = "1.0.4" 315 | source = "registry+https://github.com/rust-lang/crates.io-index" 316 | checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" 317 | dependencies = [ 318 | "proc-macro-error-attr", 319 | "proc-macro2", 320 | "quote", 321 | "syn", 322 | "version_check", 323 | ] 324 | 325 | [[package]] 326 | name = "proc-macro-error-attr" 327 | version = "1.0.4" 328 | source = "registry+https://github.com/rust-lang/crates.io-index" 329 | checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" 330 | dependencies = [ 331 | "proc-macro2", 332 | "quote", 333 | "version_check", 334 | ] 335 | 336 | [[package]] 337 | name = "proc-macro2" 338 | version = "1.0.24" 339 | source = "registry+https://github.com/rust-lang/crates.io-index" 340 | checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" 341 | dependencies = [ 342 | "unicode-xid", 343 | ] 344 | 345 | [[package]] 346 | name = "quote" 347 | version = "1.0.7" 348 | source = "registry+https://github.com/rust-lang/crates.io-index" 349 | checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37" 350 | dependencies = [ 351 | "proc-macro2", 352 | ] 353 | 354 | [[package]] 355 | name = "rayon" 356 | version = "1.10.0" 357 | source = "registry+https://github.com/rust-lang/crates.io-index" 358 | checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" 359 | dependencies = [ 360 | "either", 361 | "rayon-core", 362 | ] 363 | 364 | [[package]] 365 | name = "rayon-core" 366 | version = "1.12.1" 367 | source = "registry+https://github.com/rust-lang/crates.io-index" 368 | checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" 369 | dependencies = [ 370 | "crossbeam-deque", 371 | "crossbeam-utils", 372 | ] 373 | 374 | [[package]] 375 | name = "regex" 376 | version = "1.10.6" 377 | source = "registry+https://github.com/rust-lang/crates.io-index" 378 | checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" 379 | dependencies = [ 380 | "aho-corasick", 381 | "memchr", 382 | "regex-automata", 383 | "regex-syntax", 384 | ] 385 | 386 | [[package]] 387 | name = "regex-automata" 388 | version = "0.4.7" 389 | source = "registry+https://github.com/rust-lang/crates.io-index" 390 | checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" 391 | dependencies = [ 392 | "aho-corasick", 393 | "memchr", 394 | "regex-syntax", 395 | ] 396 | 397 | [[package]] 398 | name = "regex-syntax" 399 | version = "0.8.4" 400 | source = "registry+https://github.com/rust-lang/crates.io-index" 401 | checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" 402 | 403 | [[package]] 404 | name = "rustc-demangle" 405 | version = "0.1.18" 406 | source = "registry+https://github.com/rust-lang/crates.io-index" 407 | checksum = "6e3bad0ee36814ca07d7968269dd4b7ec89ec2da10c4bb613928d3077083c232" 408 | 409 | [[package]] 410 | name = "strsim" 411 | version = "0.8.0" 412 | source = "registry+https://github.com/rust-lang/crates.io-index" 413 | checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" 414 | 415 | [[package]] 416 | name = "structopt" 417 | version = "0.3.26" 418 | source = "registry+https://github.com/rust-lang/crates.io-index" 419 | checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" 420 | dependencies = [ 421 | "clap", 422 | "lazy_static", 423 | "structopt-derive", 424 | ] 425 | 426 | [[package]] 427 | name = "structopt-derive" 428 | version = "0.4.18" 429 | source = "registry+https://github.com/rust-lang/crates.io-index" 430 | checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" 431 | dependencies = [ 432 | "heck", 433 | "proc-macro-error", 434 | "proc-macro2", 435 | "quote", 436 | "syn", 437 | ] 438 | 439 | [[package]] 440 | name = "syn" 441 | version = "1.0.46" 442 | source = "registry+https://github.com/rust-lang/crates.io-index" 443 | checksum = "5ad5de3220ea04da322618ded2c42233d02baca219d6f160a3e9c87cda16c942" 444 | dependencies = [ 445 | "proc-macro2", 446 | "quote", 447 | "unicode-xid", 448 | ] 449 | 450 | [[package]] 451 | name = "synstructure" 452 | version = "0.12.4" 453 | source = "registry+https://github.com/rust-lang/crates.io-index" 454 | checksum = "b834f2d66f734cb897113e34aaff2f1ab4719ca946f9a7358dba8f8064148701" 455 | dependencies = [ 456 | "proc-macro2", 457 | "quote", 458 | "syn", 459 | "unicode-xid", 460 | ] 461 | 462 | [[package]] 463 | name = "tabwriter" 464 | version = "1.4.0" 465 | source = "registry+https://github.com/rust-lang/crates.io-index" 466 | checksum = "a327282c4f64f6dc37e3bba4c2b6842cc3a992f204fa58d917696a89f691e5f6" 467 | dependencies = [ 468 | "unicode-width", 469 | ] 470 | 471 | [[package]] 472 | name = "textwrap" 473 | version = "0.11.0" 474 | source = "registry+https://github.com/rust-lang/crates.io-index" 475 | checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" 476 | dependencies = [ 477 | "unicode-width", 478 | ] 479 | 480 | [[package]] 481 | name = "unicode-segmentation" 482 | version = "1.12.0" 483 | source = "registry+https://github.com/rust-lang/crates.io-index" 484 | checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" 485 | 486 | [[package]] 487 | name = "unicode-width" 488 | version = "0.1.14" 489 | source = "registry+https://github.com/rust-lang/crates.io-index" 490 | checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" 491 | 492 | [[package]] 493 | name = "unicode-xid" 494 | version = "0.2.1" 495 | source = "registry+https://github.com/rust-lang/crates.io-index" 496 | checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" 497 | 498 | [[package]] 499 | name = "utf8parse" 500 | version = "0.2.2" 501 | source = "registry+https://github.com/rust-lang/crates.io-index" 502 | checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" 503 | 504 | [[package]] 505 | name = "uwc" 506 | version = "1.0.9-alpha.1" 507 | dependencies = [ 508 | "env_logger", 509 | "failure", 510 | "itertools", 511 | "lazy_static", 512 | "log", 513 | "rayon", 514 | "regex", 515 | "structopt", 516 | "structopt-derive", 517 | "tabwriter", 518 | "unicode-segmentation", 519 | ] 520 | 521 | [[package]] 522 | name = "vec_map" 523 | version = "0.8.2" 524 | source = "registry+https://github.com/rust-lang/crates.io-index" 525 | checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" 526 | 527 | [[package]] 528 | name = "version_check" 529 | version = "0.9.2" 530 | source = "registry+https://github.com/rust-lang/crates.io-index" 531 | checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed" 532 | 533 | [[package]] 534 | name = "winapi" 535 | version = "0.3.9" 536 | source = "registry+https://github.com/rust-lang/crates.io-index" 537 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 538 | dependencies = [ 539 | "winapi-i686-pc-windows-gnu", 540 | "winapi-x86_64-pc-windows-gnu", 541 | ] 542 | 543 | [[package]] 544 | name = "winapi-i686-pc-windows-gnu" 545 | version = "0.4.0" 546 | source = "registry+https://github.com/rust-lang/crates.io-index" 547 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 548 | 549 | [[package]] 550 | name = "winapi-x86_64-pc-windows-gnu" 551 | version = "0.4.0" 552 | source = "registry+https://github.com/rust-lang/crates.io-index" 553 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 554 | 555 | [[package]] 556 | name = "windows-sys" 557 | version = "0.52.0" 558 | source = "registry+https://github.com/rust-lang/crates.io-index" 559 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 560 | dependencies = [ 561 | "windows-targets", 562 | ] 563 | 564 | [[package]] 565 | name = "windows-targets" 566 | version = "0.52.6" 567 | source = "registry+https://github.com/rust-lang/crates.io-index" 568 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 569 | dependencies = [ 570 | "windows_aarch64_gnullvm", 571 | "windows_aarch64_msvc", 572 | "windows_i686_gnu", 573 | "windows_i686_gnullvm", 574 | "windows_i686_msvc", 575 | "windows_x86_64_gnu", 576 | "windows_x86_64_gnullvm", 577 | "windows_x86_64_msvc", 578 | ] 579 | 580 | [[package]] 581 | name = "windows_aarch64_gnullvm" 582 | version = "0.52.6" 583 | source = "registry+https://github.com/rust-lang/crates.io-index" 584 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 585 | 586 | [[package]] 587 | name = "windows_aarch64_msvc" 588 | version = "0.52.6" 589 | source = "registry+https://github.com/rust-lang/crates.io-index" 590 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 591 | 592 | [[package]] 593 | name = "windows_i686_gnu" 594 | version = "0.52.6" 595 | source = "registry+https://github.com/rust-lang/crates.io-index" 596 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 597 | 598 | [[package]] 599 | name = "windows_i686_gnullvm" 600 | version = "0.52.6" 601 | source = "registry+https://github.com/rust-lang/crates.io-index" 602 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 603 | 604 | [[package]] 605 | name = "windows_i686_msvc" 606 | version = "0.52.6" 607 | source = "registry+https://github.com/rust-lang/crates.io-index" 608 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 609 | 610 | [[package]] 611 | name = "windows_x86_64_gnu" 612 | version = "0.52.6" 613 | source = "registry+https://github.com/rust-lang/crates.io-index" 614 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 615 | 616 | [[package]] 617 | name = "windows_x86_64_gnullvm" 618 | version = "0.52.6" 619 | source = "registry+https://github.com/rust-lang/crates.io-index" 620 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 621 | 622 | [[package]] 623 | name = "windows_x86_64_msvc" 624 | version = "0.52.6" 625 | source = "registry+https://github.com/rust-lang/crates.io-index" 626 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 627 | --------------------------------------------------------------------------------