├── .gitignore ├── TODO ├── Cargo.toml ├── compare.py ├── README.md ├── LICENSE ├── test.sh └── src ├── main.rs ├── ignore.rs ├── options.rs ├── search.rs ├── display.rs └── pcre.rs /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | oprofile_data 3 | callgrind.* 4 | Cargo.lock 5 | tst 6 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | TODOs 2 | ===== 3 | 4 | * more test/bench infrastructure and cases 5 | * help texts for command-line options 6 | * comprehensive test suite and tests against grep, ag, ack 7 | * more command-line options for ag compatibility 8 | * error handling: no unwraps and "if let Ok", add messages to stderr 9 | * ignore handling for more than gitignore files 10 | * better binary file detection 11 | * switching to other regex impl for complex things like backrefs 12 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ruthenium" 3 | version = "0.1.0" 4 | authors = ["Georg Brandl "] 5 | 6 | [[bin]] 7 | name = "ru" 8 | 9 | [features] 10 | default = [] 11 | pcre = [] 12 | 13 | [profile.release] 14 | lto = true 15 | 16 | [dependencies] 17 | libc = "*" 18 | atty = "*" 19 | walkdir = "*" 20 | memmap = "*" 21 | scoped-pool = "*" 22 | num_cpus = "*" 23 | glob = "*" 24 | regex = "*" 25 | 26 | [dependencies.clap] 27 | version = "2" 28 | default_features = false 29 | features = [] 30 | -------------------------------------------------------------------------------- /compare.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import difflib 3 | import subprocess 4 | 5 | args = sys.argv[1:] 6 | 7 | print 'Running ack...' 8 | output_ack = subprocess.Popen(['ack', '--smart-case'] + args, stdout=subprocess.PIPE).communicate()[0] 9 | print 'Running ag...' 10 | output_ag = subprocess.Popen(['ag'] + args, stdout=subprocess.PIPE).communicate()[0] 11 | print 'Running ru...' 12 | output_ru = subprocess.Popen(['ru'] + args, stdout=subprocess.PIPE).communicate()[0] 13 | print 'Sorting...' 14 | output_ack = sorted(output_ack.splitlines()) 15 | output_ag = sorted(output_ag.splitlines()) 16 | output_ru = sorted(output_ru.splitlines()) 17 | 18 | for line in difflib.unified_diff(output_ag, output_ru): 19 | print line 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ruthenium, an Ack-like searcher 2 | 3 | Ruthenium is an attempt to implement the well-known Perl tool `ack` in Rust. 4 | 5 | When finished, it is supposed to show the strengths of Rust, for example simple 6 | and efficient concurrency without locks, and speed comparable with C programs, 7 | in this case the implementation called `ag` or `the_silver_searcher`. 8 | 9 | ## How to build 10 | 11 | Use `cargo build --release`. `target/release/ru` is the binary. 12 | 13 | ## How to use 14 | 15 | The resulting binary is linked statically against Rust dependencies, so it can 16 | be copied into a `bin` directory and used. 17 | 18 | ### Command line 19 | 20 | Command-line options are designed to be mostly compatible with Ag. There are 21 | probably small differences, especially in the handling of ignore files. 22 | 23 | ### Regex engines 24 | 25 | Currently, the regex engine can be selected to be either Andrew Gallant's Rust 26 | implementation `regex` (the default) or PCRE (requires libpcre and its headers 27 | to be installed). Select the latter with the Cargo feature flag `pcre`. 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Georg Brandl 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/zsh 2 | 3 | export RUST_BACKTRACE=1 4 | NEEDLE="$1" 5 | if [ -z "$NEEDLE" ]; then NEEDLE=p.th; fi 6 | 7 | run-timed() { 8 | /usr/bin/time --format="%Us user %Ss system %P%% cpu %e total, max RSS %Mk" "$@" 9 | } 10 | 11 | run-grep() { 12 | run-timed grep --color=always -n -E -ri "$@" $NEEDLE tst > /dev/null 13 | } 14 | 15 | run-ag() { 16 | run-timed ag --color "$@" $NEEDLE tst > /dev/null 17 | } 18 | 19 | run-ru-lr() { 20 | run-timed target/release/ru-line-regex --color "$@" $NEEDLE tst > /dev/null 21 | } 22 | 23 | run-ru-lp() { 24 | run-timed target/release/ru-line-pcre --color "$@" $NEEDLE tst > /dev/null 25 | } 26 | 27 | run-ru() { 28 | run-timed target/release/ru --color "$@" $NEEDLE tst > /dev/null 29 | } 30 | 31 | run-all() { 32 | echo -n "Grep: " 33 | run-grep "$@" 34 | echo -n "Ag: " 35 | run-ag "$@" 36 | echo -n "RuLR: " 37 | run-ru-lr "$@" 38 | echo -n "RuLP: " 39 | run-ru-lp "$@" 40 | echo -n "Ru: " 41 | run-ru "$@" 42 | } 43 | 44 | #cargo build --release || exit 1 45 | 46 | echo "List matches" 47 | run-all 48 | echo 49 | echo "List matches with context" 50 | run-all -C 10 51 | echo 52 | echo "List inverted matches" 53 | run-all -v 54 | echo 55 | echo "List files" 56 | run-all -l 57 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | // --------------------------------------------------------------------------------------- 2 | // Ruthenium, an ack-like searcher, (c) 2015 Georg Brandl. 3 | // Licensed under the MIT license. 4 | // --------------------------------------------------------------------------------------- 5 | 6 | #[macro_use] 7 | extern crate clap; 8 | extern crate libc; 9 | extern crate atty; 10 | extern crate walkdir; 11 | extern crate memmap; 12 | extern crate scoped_pool; 13 | extern crate num_cpus; 14 | extern crate glob; 15 | extern crate regex; 16 | 17 | mod search; 18 | mod ignore; 19 | mod display; 20 | mod options; 21 | #[cfg(feature = "pcre")] 22 | mod pcre; 23 | 24 | use std::cmp::max; 25 | use std::sync::mpsc::{sync_channel, SyncSender}; 26 | use std::thread; 27 | use std::io::{stdout, BufWriter}; 28 | use memmap::{Mmap, Protection}; 29 | use scoped_pool::Pool; 30 | use walkdir::WalkDirIterator; 31 | 32 | use display::DisplayMode; 33 | use search::FileResult; 34 | use options::Opts; 35 | 36 | 37 | /// Walk a directory (given in Opts) and check all found files. 38 | /// 39 | /// The channel is used to send result structs to the main thread, which gives 40 | /// them to the DisplayMode for output. 41 | /// 42 | /// The thread of this function only does the directory walking, it spawns a 43 | /// number of worker threads in a pool to grep individual files. 44 | fn walk(chan: SyncSender, opts: &Opts) { 45 | // thread pool for individual file grep worker threads 46 | let pool = Pool::new(max(opts.workers - 1, 1)); 47 | // create the regex object 48 | let regex = search::create_rx(&opts); 49 | 50 | let walker = walkdir::WalkDir::new(&opts.path) 51 | .follow_links(opts.follow_links) 52 | .max_depth(opts.depth); 53 | pool.scoped(|scope| { 54 | let rx = ®ex; // borrow for closures 55 | // stack of directories being walked, maintained in the filter closure 56 | let mut parent_stack: Vec<::std::path::PathBuf> = Vec::new(); 57 | // stack of Ignore structs per directory in parent_stack, they accumulate 58 | // XXX: add global ignores from cmdline and a config file here 59 | let mut ignore_stack = Vec::new(); 60 | let walker = walker.into_iter().filter_entry(|entry| { 61 | // remove parents from stack that are not applicable anymore 62 | let new_parent = entry.path().parent().unwrap(); 63 | while !parent_stack.is_empty() && 64 | parent_stack.last().unwrap().as_path() != new_parent 65 | { 66 | ignore_stack.pop(); 67 | parent_stack.pop(); 68 | } 69 | // weed out hidden files (this is separate from ignored) 70 | let path = entry.path(); 71 | if let Some(fname) = path.file_name() { 72 | if !opts.do_hidden && fname.to_string_lossy().starts_with(".") { 73 | return false; 74 | } 75 | } 76 | // weed out ignored files and directories (if we return false here for 77 | // directories, the contents are pruned from the iterator) 78 | if opts.check_ignores && ignore::match_patterns(path, &ignore_stack) { 79 | return false; 80 | } 81 | // we got a new dir? put it onto the stack 82 | if entry.file_type().is_dir() { 83 | let new_path = entry.path().to_path_buf(); 84 | // read ignore patterns specific to this directory 85 | ignore_stack.push(ignore::read_patterns(&new_path)); 86 | parent_stack.push(new_path); 87 | } 88 | true 89 | }); 90 | for entry in walker { 91 | if let Ok(entry) = entry { 92 | // only touch normal files 93 | if !entry.file_type().is_file() { 94 | continue; 95 | } 96 | // open and search file in one of the worker threads 97 | let ch = chan.clone(); 98 | scope.execute(move || { 99 | let path = entry.path(); 100 | if let Ok(map) = Mmap::open_path(path, Protection::Read) { 101 | let buf = unsafe { map.as_slice() }; 102 | let res = search::search(rx, &opts, path, buf); 103 | ch.send(res).unwrap(); 104 | } 105 | }); 106 | } 107 | } 108 | }); 109 | } 110 | 111 | /// Run the main action. This is separated from `main` so that it can get a generic 112 | /// DisplayMode argument. 113 | /// 114 | /// Spawns the walker thread and prints the results. 115 | fn run(display: &mut D, opts: Opts) { 116 | // The sync_channel has a bound on pending items. We don't want to 117 | // generate results much faster than we can print them. 118 | let (w_chan, r_chan) = sync_channel(4 * opts.workers as usize); 119 | thread::spawn(move || { 120 | walk(w_chan, &opts); 121 | }); 122 | while let Ok(r) = r_chan.recv() { 123 | display.print_result(r); 124 | } 125 | } 126 | 127 | /// Main entry point. 128 | fn main() { 129 | let mut opts = Opts::from_cmdline(); 130 | let colors = opts.colors.take().unwrap(); // guaranteed to be Some() 131 | 132 | let stdout = stdout(); 133 | let writer = BufWriter::new(stdout.lock()); 134 | 135 | // determine which display mode we are using 136 | if opts.only_count { 137 | run(&mut display::CountMode::new(writer, colors), opts); 138 | } else if opts.only_files == Some(true) { 139 | run(&mut display::FilesOnlyMode::new(writer, colors, true), opts); 140 | } else if opts.only_files == Some(false) { 141 | run(&mut display::FilesOnlyMode::new(writer, colors, false), opts); 142 | } else if opts.ackmate_format { 143 | run(&mut display::AckMateMode::new(writer), opts); 144 | } else if opts.vimgrep_format { 145 | run(&mut display::VimGrepMode::new(writer), opts); 146 | } else { 147 | run(&mut display::DefaultMode::new(writer, colors, opts.show_break, 148 | opts.show_heading), opts); 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /src/ignore.rs: -------------------------------------------------------------------------------- 1 | // --------------------------------------------------------------------------------------- 2 | // Ruthenium, an ack-like searcher, (c) 2015 Georg Brandl. 3 | // Licensed under the MIT license. 4 | // --------------------------------------------------------------------------------------- 5 | 6 | use std::borrow::Cow; 7 | use std::collections::BTreeSet; 8 | use std::str::FromStr; 9 | use std::fs::{File, metadata}; 10 | use std::io::{BufReader, BufRead}; 11 | use std::path::{Path, PathBuf}; 12 | use glob::{Pattern, MatchOptions}; 13 | 14 | 15 | /// Represents the ignore patterns for one directory, the `root`. 16 | #[derive(Debug)] 17 | pub struct Ignores { 18 | /// Path patterns are relative to this directory 19 | root: PathBuf, 20 | /// Literal filenames to exclude 21 | filenames: BTreeSet, 22 | /// Literal file extensions to exclude 23 | extensions: BTreeSet, 24 | /// Patterns to exclude (can have paths) 25 | patterns: Vec, 26 | /// "Negated patterns": matched after a file would be excluded, 27 | /// if it matches, the exclusion is canceled 28 | negated_patterns: Vec, 29 | } 30 | 31 | fn is_literal_filename(s: &str) -> bool { 32 | s.chars().all(|v| !(v == '*' || v == '?' || v == '[' || v == ']' || v == '/')) 33 | } 34 | 35 | fn is_literal_extension(s: &str) -> bool { 36 | s.chars().all(|v| !(v == '*' || v == '?' || v == '[' || v == ']' || v == '/' || v == '.')) 37 | } 38 | 39 | /// Read gitignore-style patterns from a filename and add all recognized 40 | /// patterns to the Ignores object. 41 | fn read_git_patterns_from(path: &Path, ignores: &mut Ignores) { 42 | // add a complex pattern 43 | fn add_pat(line: &str, vec: &mut Vec) { 44 | let pat = Pattern::from_str( 45 | // if a pattern doesn't start with "/", it is not anchored to the root, 46 | // so to make glob match any such file we need to start it with "**/" 47 | if !line.starts_with("/") { 48 | Cow::Owned(String::from("**/") + line) 49 | } else { 50 | Cow::Borrowed(line) 51 | }.as_ref()); 52 | if let Ok(pat) = pat { 53 | vec.push(pat); 54 | } 55 | } 56 | if let Ok(file) = File::open(path) { 57 | let reader = BufReader::new(file); 58 | for line in reader.lines() { 59 | if let Ok(line) = line { 60 | let line = line.trim(); 61 | // empty line or comment, ignore 62 | if line.is_empty() || line.starts_with("#") { 63 | continue; 64 | } 65 | // negated pattern (no special casing for filenames/exts here) 66 | if line.starts_with("!") { 67 | add_pat(&line[1..], &mut ignores.negated_patterns); 68 | // simple filename 69 | } else if is_literal_filename(line) { 70 | ignores.filenames.insert(line.into()); 71 | // simple *.ext 72 | } else if line.starts_with("*.") && is_literal_extension(&line[2..]) { 73 | ignores.extensions.insert(line[2..].into()); 74 | // complex non-negated pattern 75 | } else { 76 | add_pat(line, &mut ignores.patterns); 77 | } 78 | } 79 | } 80 | } 81 | } 82 | 83 | /// Read patterns from all recognized and existing ignore files in `dir`. 84 | pub fn read_patterns(dir: &Path) -> Ignores { 85 | let mut result = Ignores { 86 | root: dir.to_path_buf(), 87 | filenames: BTreeSet::new(), 88 | extensions: BTreeSet::new(), 89 | patterns: Vec::new(), 90 | negated_patterns: Vec::new(), 91 | }; 92 | for gitexcludes in &[".gitignore", ".git/info/excludes"] { 93 | if metadata(dir.join(gitexcludes)).map(|f| f.is_file()).unwrap_or(false) { 94 | read_git_patterns_from(&dir.join(gitexcludes), &mut result); 95 | } 96 | } 97 | result 98 | } 99 | 100 | /// Return relative path from `base` to `path`. 101 | /// 102 | /// Copied from std::path::Path, where it is still unstable. 103 | pub fn relative_path_from<'a, P: AsRef>(path: &'a Path, base: &'a P) -> Option<&'a Path> 104 | { 105 | fn iter_after(mut iter: I, mut prefix: J) -> Option where 106 | I: Iterator + Clone, J: Iterator, A: PartialEq 107 | { 108 | loop { 109 | let mut iter_next = iter.clone(); 110 | match (iter_next.next(), prefix.next()) { 111 | (Some(x), Some(y)) => { 112 | if x != y { return None } 113 | } 114 | (Some(_), None) => return Some(iter), 115 | (None, None) => return Some(iter), 116 | (None, Some(_)) => return None, 117 | } 118 | iter = iter_next; 119 | } 120 | } 121 | 122 | iter_after(path.components(), base.as_ref().components()).map(|c| c.as_path()) 123 | } 124 | 125 | /// Match `path` against the ignore stack `ignores`, return true if match found. 126 | pub fn match_patterns(path: &Path, ignores: &[Ignores]) -> bool { 127 | const OPTS: MatchOptions = MatchOptions { 128 | case_sensitive: true, 129 | require_literal_separator: true, 130 | require_literal_leading_dot: false, 131 | }; 132 | 133 | let name = path.file_name().and_then(|s| s.to_str()); 134 | let ext = path.extension().and_then(|s| s.to_str()); 135 | 136 | let mut is_ignored = false; 137 | for ignore in ignores { 138 | if name.is_some() && ignore.filenames.contains(name.unwrap()) { 139 | is_ignored = true; 140 | } else if ext.is_some() && ignore.extensions.contains(ext.unwrap()) { 141 | is_ignored = true; 142 | } else if !ignore.patterns.is_empty() { 143 | let relpath = relative_path_from(path, &ignore.root).unwrap(); 144 | for pattern in &ignore.patterns { 145 | if pattern.matches_path_with(relpath, &OPTS) { 146 | is_ignored = true; 147 | break; 148 | } 149 | } 150 | } 151 | // apply negated patterns if necessary 152 | if is_ignored && !ignore.negated_patterns.is_empty() { 153 | let relpath = relative_path_from(path, &ignore.root).unwrap(); 154 | for pattern in &ignore.negated_patterns { 155 | if pattern.matches_path_with(relpath, &OPTS) { 156 | is_ignored = false; 157 | } 158 | } 159 | } 160 | if is_ignored { 161 | break; 162 | } 163 | } 164 | is_ignored 165 | } 166 | -------------------------------------------------------------------------------- /src/options.rs: -------------------------------------------------------------------------------- 1 | // --------------------------------------------------------------------------------------- 2 | // Ruthenium, an ack-like searcher, (c) 2015 Georg Brandl. 3 | // Licensed under the MIT license. 4 | // --------------------------------------------------------------------------------------- 5 | 6 | use std::cmp::min; 7 | use std::usize; 8 | 9 | use atty; 10 | use clap::{App, AppSettings, Arg}; 11 | use num_cpus; 12 | 13 | /// Contains the ANSI codes needed to set the terminal to a certain color. 14 | #[derive(Clone)] 15 | pub struct Colors { 16 | pub reset: Vec, 17 | pub path: Vec, 18 | pub lineno: Vec, 19 | pub span: Vec, 20 | pub punct: Vec, 21 | pub empty: bool, 22 | } 23 | 24 | impl Colors { 25 | /// Create a struct where no colors are emitted. 26 | fn empty() -> Colors { 27 | Colors { 28 | reset: vec![], 29 | path: vec![], 30 | lineno: vec![], 31 | span: vec![], 32 | punct: vec![], 33 | empty: true, 34 | } 35 | } 36 | 37 | /// Create a struct from given color specs. Color specs are the payload 38 | /// of the color ANSI sequences, e.g. "01;31". 39 | fn from(path: &str, lineno: &str, span: &str, punct: &str) -> Colors { 40 | Colors { 41 | reset: b"\x1b[0m".to_vec(), 42 | path: format!("\x1b[{}m", path).into_bytes(), 43 | lineno: format!("\x1b[{}m", lineno).into_bytes(), 44 | span: format!("\x1b[{}m", span).into_bytes(), 45 | punct: format!("\x1b[{}m", punct).into_bytes(), 46 | empty: false, 47 | } 48 | } 49 | } 50 | 51 | /// Case-sensitivity matching options. 52 | /// 53 | /// Smart casing means insensitive as long as the pattern contains no uppercase 54 | /// letters. 55 | #[derive(Clone)] 56 | pub enum Casing { 57 | Default, 58 | Smart, 59 | Insensitive, 60 | } 61 | 62 | /// Holds all options for the search. 63 | #[derive(Clone)] 64 | pub struct Opts { 65 | // file related options 66 | pub path: String, 67 | pub depth: usize, 68 | pub follow_links: bool, 69 | pub do_binaries: bool, 70 | pub do_hidden: bool, 71 | // ignore file related options 72 | pub check_ignores: bool, 73 | // pattern related options 74 | pub pattern: String, 75 | pub casing: Casing, 76 | pub literal: bool, 77 | pub invert: bool, 78 | // display related options 79 | pub colors: Option, 80 | pub only_files: Option, 81 | pub only_count: bool, 82 | pub show_break: bool, 83 | pub show_heading: bool, 84 | pub ackmate_format: bool, 85 | pub vimgrep_format: bool, 86 | pub max_count: usize, 87 | pub before: usize, 88 | pub after: usize, 89 | // others 90 | pub workers: usize, 91 | } 92 | 93 | /// Somewhat simpler creation of flag Args. 94 | macro_rules! flag { 95 | ($n:ident -$f:ident) => { 96 | Arg::with_name(stringify!($n)).short(stringify!($f)) 97 | }; 98 | ($n:ident -$f:ident --$l:expr) => { 99 | Arg::with_name(stringify!($n)).short(stringify!($f)).long($l) 100 | }; 101 | ($n:ident / --$l:expr) => { 102 | Arg::with_name(stringify!($n)).long($l) 103 | }; 104 | } 105 | 106 | impl Opts { 107 | pub fn from_cmdline() -> Opts { 108 | let version = format!("v{}", crate_version!()); 109 | // XXX: sort and group the arguments once they are all done 110 | let app = App::new("Ruthenium") 111 | .version(&*version) 112 | .usage("ru [options] PATTERN [PATH]") 113 | .about("Recursively search for a pattern, like ack") 114 | .setting(AppSettings::UnifiedHelpMessage) 115 | .setting(AppSettings::ArgRequiredElseHelp) // seems to be not working 116 | .arg(Arg::with_name("pattern").required(true).index(1)) 117 | .arg(Arg::with_name("path").index(2)) 118 | .arg(flag!(all -a --"all-types")) 119 | .arg(flag!(depth / --"depth").takes_value(true)) 120 | .arg(flag!(literal -Q --"literal")) 121 | .arg(flag!(fixedstrings -F --"fixed-strings")) 122 | .arg(flag!(alltext -t --"all-text").conflicts_with("all")) 123 | .arg(flag!(unrestricted -u --"unrestricted").conflicts_with("all")) 124 | .arg(flag!(searchbinary / --"search-binary")) 125 | .arg(flag!(searchhidden / --"hidden")) 126 | .arg(flag!(fileswith -l --"files-with-matches")) 127 | .arg(flag!(fileswithout -L --"files-without-matches").conflicts_with("fileswith")) 128 | .arg(flag!(count -c --"count").conflicts_with("fileswith")) 129 | .arg(flag!(follow -f --"follow")) 130 | .arg(flag!(nofollow / --"nofollow").conflicts_with("follow")) 131 | .arg(flag!(nocolor / --"nocolor")) 132 | .arg(flag!(color / --"color")) 133 | .arg(flag!(colorlineno / --"color-line-number").takes_value(true)) 134 | .arg(flag!(colorspan / --"color-match").takes_value(true)) 135 | .arg(flag!(colorpath / --"color-path").takes_value(true)) 136 | .arg(flag!(colorpunct / --"color-punct").takes_value(true)) 137 | .arg(flag!(casesens -s --"case-sensitive").conflicts_with("caseinsens")) 138 | .arg(flag!(casesmart -S --"smart-case").conflicts_with("casesens")) 139 | .arg(flag!(caseinsens -i --"ignore-case").conflicts_with("casesmart")) 140 | .arg(flag!(group / --"group")) 141 | .arg(flag!(nogroup / --"nogroup").conflicts_with("gorup")) 142 | .arg(flag!(heading -H --"heading")) 143 | .arg(flag!(noheading / --"noheading").conflicts_with("heading")) 144 | .arg(flag!(break / --"break")) 145 | .arg(flag!(nobreak / --"nobreak").conflicts_with("break")) 146 | .arg(flag!(ackmate / --"ackmate")) 147 | .arg(flag!(vimgrep / --"vimgrep")) 148 | .arg(flag!(maxcount -m --"max-count").takes_value(true)) 149 | .arg(flag!(before -B --"before").takes_value(true)) 150 | .arg(flag!(after -A --"after").takes_value(true)) 151 | .arg(flag!(context -C --"context").takes_value(true)) 152 | .arg(flag!(workers / --"workers").takes_value(true)) 153 | .arg(flag!(invert -v --"invert-match")) 154 | ; 155 | let m = app.get_matches(); 156 | 157 | // process option values 158 | let depth = m.value_of("depth").and_then(|v| v.parse::().ok()) 159 | .map(|v| v + 1) // 0 == immediate children 160 | .unwrap_or(usize::MAX); 161 | 162 | let mut binaries = m.is_present("searchbinary"); 163 | let mut hidden = m.is_present("searchhidden"); 164 | let mut ignores = true; 165 | if m.is_present("all") { 166 | binaries = true; 167 | ignores = false; 168 | } else if m.is_present("alltext") { 169 | ignores = false; 170 | } else if m.is_present("unrestricted") { 171 | binaries = true; 172 | hidden = true; 173 | ignores = false; 174 | } 175 | 176 | let mut casing = Casing::Smart; 177 | if m.is_present("caseinsens") { 178 | casing = Casing::Insensitive; 179 | } else if m.is_present("casesens") { 180 | casing = Casing::Default; 181 | } 182 | let mut literal = m.is_present("literal"); 183 | if m.is_present("fixedstrings") { 184 | literal = true; 185 | } 186 | 187 | let out_to_tty = atty::is(); 188 | let colors = if !m.is_present("color") && 189 | (!out_to_tty || m.is_present("nocolor")) 190 | { 191 | Colors::empty() 192 | } else { 193 | Colors::from( 194 | m.value_of("colorpath").unwrap_or("35"), 195 | m.value_of("colorlineno").unwrap_or("32"), 196 | m.value_of("colorspan").unwrap_or("4"), 197 | m.value_of("colorpunct").unwrap_or("36"), 198 | ) 199 | }; 200 | let mut heading = out_to_tty; 201 | let mut showbreak = out_to_tty; 202 | if m.is_present("heading") { 203 | heading = true; 204 | } else if m.is_present("noheading") { 205 | heading = false; 206 | } 207 | if m.is_present("break") { 208 | showbreak = true; 209 | } else if m.is_present("nobreak") { 210 | showbreak = false; 211 | } 212 | if m.is_present("group") { 213 | heading = true; 214 | showbreak = true; 215 | } else if m.is_present("nogroup") { 216 | heading = false; 217 | showbreak = false; 218 | } 219 | let maxcount = m.value_of("maxcount").and_then(|v| v.parse().ok()) 220 | .unwrap_or(usize::MAX); 221 | let mut before = m.value_of("before").and_then(|v| v.parse().ok()) 222 | .unwrap_or(0); 223 | let mut after = m.value_of("after").and_then(|v| v.parse().ok()) 224 | .unwrap_or(0); 225 | if m.is_present("context") { 226 | before = m.value_of("context").unwrap().parse().ok().unwrap_or(0); 227 | after = before; 228 | } 229 | 230 | let workers = m.value_of("workers").and_then(|v| v.parse().ok()) 231 | .unwrap_or(min(4, num_cpus::get())); 232 | 233 | Opts { 234 | // file related 235 | path: m.value_of("path").unwrap_or(".").into(), 236 | depth: depth, 237 | follow_links: m.is_present("follow"), 238 | do_binaries: binaries, 239 | do_hidden: hidden, 240 | // ignore file related 241 | check_ignores: ignores, 242 | // pattern related 243 | pattern: m.value_of("pattern").unwrap().into(), 244 | casing: casing, 245 | literal: literal, 246 | invert: m.is_present("invert"), 247 | // display related 248 | colors: Some(colors), 249 | only_files: if m.is_present("fileswith") { 250 | Some(true) 251 | } else if m.is_present("fileswithout") { 252 | Some(false) 253 | } else { None }, 254 | only_count: m.is_present("count"), 255 | show_break: showbreak, 256 | show_heading: heading, 257 | ackmate_format: m.is_present("ackmate"), 258 | vimgrep_format: m.is_present("vimgrep"), 259 | max_count: maxcount, 260 | before: before, 261 | after: after, 262 | // other 263 | workers: workers, 264 | } 265 | } 266 | } 267 | -------------------------------------------------------------------------------- /src/search.rs: -------------------------------------------------------------------------------- 1 | // --------------------------------------------------------------------------------------- 2 | // Ruthenium, an ack-like searcher, (c) 2015 Georg Brandl. 3 | // Licensed under the MIT license. 4 | // --------------------------------------------------------------------------------------- 5 | 6 | use std::cmp::min; 7 | use std::path::Path; 8 | 9 | #[cfg(feature = "pcre")] 10 | use pcre::Regex; 11 | #[cfg(not(feature = "pcre"))] 12 | use regex::bytes::Regex; 13 | 14 | use options::{Casing, Opts}; 15 | 16 | /// Represents a line that matched the pattern (maybe multiple times). 17 | #[derive(Debug)] 18 | pub struct Match { 19 | /// Line number in the file 20 | pub lineno: usize, 21 | /// Line text 22 | pub line: Vec, 23 | /// Spans (start, end) of matching parts in the line 24 | pub spans: Vec<(usize, usize)>, 25 | /// Context lines before the matched line 26 | pub before: Vec>, 27 | /// Context lines after the matched line 28 | pub after: Vec>, 29 | } 30 | 31 | impl Match { 32 | fn new(lineno: usize, line: Vec, spans: Vec<(usize, usize)>) -> Match { 33 | Match { 34 | lineno: lineno, 35 | line: line, 36 | spans: spans, 37 | before: Vec::new(), 38 | after: Vec::new(), 39 | } 40 | } 41 | } 42 | 43 | /// Represents all matches from a single file. 44 | #[derive(Debug)] 45 | pub struct FileResult { 46 | /// File name, relative to initial argument 47 | pub fname: String, 48 | /// Is the file binary? If yes, matches contains 0 or 1 element 49 | pub is_binary: bool, 50 | /// Do we provide (and print) context lines? 51 | pub has_context: bool, 52 | /// Matches relevant for printing 53 | pub matches: Vec, 54 | } 55 | 56 | impl FileResult { 57 | fn new(fname: String) -> FileResult { 58 | FileResult { 59 | fname: fname, 60 | is_binary: false, 61 | has_context: false, 62 | matches: Vec::new(), 63 | } 64 | } 65 | } 66 | 67 | /// Create a regular expression to search for matches from the given options. 68 | /// 69 | /// The final regex is determined by several options, such as casing options 70 | /// and options to take the search string literally. 71 | pub fn create_rx(opts: &Opts) -> Regex { 72 | let mut pattern = opts.pattern.to_owned(); 73 | if opts.literal { 74 | // escape regex meta-chars and create a normal pattern 75 | const ESCAPE: &'static str = ".?*+|^$(){}[]\\"; 76 | pattern = pattern.chars().map(|c| { 77 | if ESCAPE.find(c).is_some() { 78 | format!("\\{}", c) 79 | } else { 80 | format!("{}", c) 81 | } 82 | }).collect(); 83 | } 84 | if let Casing::Insensitive = opts.casing { 85 | pattern = format!("(?i){}", pattern); 86 | } else if let Casing::Smart = opts.casing { 87 | // smart casing: only case-insensitive when pattern contains no uppercase 88 | if !pattern.chars().any(|c| c.is_uppercase()) { 89 | pattern = format!("(?i){}", pattern); 90 | } 91 | } 92 | Regex::new(&pattern).unwrap() 93 | } 94 | 95 | /// Return normalized path: get rid of leading ./ and make leading // into /. 96 | fn normalized_path(path: &Path) -> String { 97 | let s = path.to_string_lossy(); 98 | if s.starts_with("./") { 99 | String::from(&s[2..]) 100 | } else if s.starts_with("//") { 101 | String::from(&s[1..]) 102 | } else { 103 | s.into_owned() 104 | } 105 | } 106 | 107 | /// Check file for binary-ness. 108 | /// 109 | /// Currently only null-bytes are recognized to constitute binary file content. 110 | /// However, this clashes with UTF-16 and UTF-32, so a more clever heuristic is 111 | /// required at some point. 112 | fn is_binary(buf: &[u8], len: usize) -> bool { 113 | if len == 0 { 114 | return false; 115 | } 116 | if len >= 3 && &buf[0..3] == b"\xEF\xBB\xBF" { 117 | // UTF-8 BOM 118 | return false; 119 | } 120 | let n = min(512, len); 121 | for b in buf[..n].iter() { 122 | if *b == b'\x00' { 123 | return true; // null byte always means binary 124 | } 125 | } 126 | false 127 | } 128 | 129 | /// Cache for collecting line offsets and slices within a u8 buffer. 130 | struct Lines<'a> { 131 | buf: &'a [u8], 132 | offset: usize, 133 | lines: Vec<(usize, &'a [u8])>, 134 | } 135 | 136 | impl<'a> Lines<'a> { 137 | pub fn new(buf: &[u8]) -> Lines { 138 | Lines { buf: buf, offset: 0, lines: Vec::with_capacity(100) } 139 | } 140 | 141 | /// Advance the line detection until we have at least lineno lines. 142 | /// Return false if EOF was reached before given number of lines. 143 | fn advance_to_line(&mut self, lineno: usize) -> bool { 144 | while self.lines.len() < lineno + 1 { 145 | if self.buf.len() == self.offset { 146 | return false; 147 | } 148 | let line = match self.buf[self.offset..].iter().position(|&x| x == b'\n') { 149 | Some(idx) => &self.buf[self.offset..self.offset+idx+1], 150 | None => &self.buf[self.offset..self.buf.len()], 151 | }; 152 | self.lines.push((self.offset, line)); 153 | self.offset += line.len(); 154 | } 155 | true 156 | } 157 | 158 | /// Advance to a given byte offset in the buffer. 159 | fn advance_to_offset(&mut self, offset: usize) { 160 | while self.offset < offset { 161 | let next_line = self.lines.len(); 162 | self.advance_to_line(next_line); 163 | } 164 | } 165 | 166 | /// Get line number of offset. 167 | pub fn get_lineno(&mut self, offset: usize) -> usize { 168 | self.advance_to_offset(offset); 169 | for (n, &(o, _)) in self.lines.iter().enumerate().rev() { 170 | if o <= offset { 171 | return n; 172 | } 173 | } 174 | return 0; 175 | } 176 | 177 | /// Get offset of line number. 178 | pub fn get_offset(&mut self, lineno: usize) -> usize { 179 | if self.advance_to_line(lineno) { 180 | self.lines[lineno].0 181 | } else { 182 | self.buf.len() 183 | } 184 | } 185 | 186 | /// Get an arbitrary line (maybe beyond end of file) as a string. 187 | pub fn get_line(&mut self, lineno: usize) -> Option> { 188 | if self.advance_to_line(lineno) { 189 | Some(self.lines[lineno].1.to_vec()) 190 | } else { 191 | None 192 | } 193 | } 194 | } 195 | 196 | /// Create a line-match for a given line with context lines determined by options. 197 | fn create_match(lines: &mut Lines, opts: &Opts, lineno: usize) -> Match { 198 | let line = lines.get_line(lineno).expect("matched line missing"); 199 | let mut new_match = Match::new(lineno + 1, line, vec![]); 200 | if opts.before > 0 { 201 | for lno in lineno.saturating_sub(opts.before)..lineno { 202 | new_match.before.push(lines.get_line(lno).unwrap()); 203 | } 204 | } 205 | if opts.after > 0 { 206 | for lno in lineno+1..lineno+opts.after+1 { 207 | if let Some(line) = lines.get_line(lno) { 208 | new_match.after.push(line); 209 | } 210 | } 211 | } 212 | new_match 213 | } 214 | 215 | /// Add a new match and maybe finish 216 | macro_rules! new_match { 217 | ($result:expr, $lines:expr, $opts:expr, $lineno:expr) => {{ 218 | if $result.matches.len() >= $opts.max_count { 219 | return $result; 220 | } 221 | let m = create_match(&mut $lines, $opts, $lineno); 222 | $result.matches.push(m); 223 | if $opts.only_files.is_some() { 224 | return $result; 225 | } 226 | }}; 227 | } 228 | 229 | /// Search a single file (represented as a u8 buffer) for matching lines. 230 | pub fn search(regex: &Regex, opts: &Opts, path: &Path, buf: &[u8]) -> FileResult { 231 | let len = buf.len(); 232 | let mut result = FileResult::new(normalized_path(path)); 233 | result.has_context = opts.before > 0 || opts.after > 0; 234 | // binary file? 235 | if is_binary(buf, len) { 236 | result.is_binary = true; 237 | // if we care for binaries at all 238 | if opts.do_binaries { 239 | if regex.is_match(buf) { 240 | // found a match: create a dummy match object, and 241 | // leave it there (we never need more info than 242 | // "matched" or "didn't match") 243 | result.matches.push(Match::new(0, "".into(), Vec::new())); 244 | } 245 | } 246 | } else { 247 | let mut lines = Lines::new(buf); 248 | let mut match_offset = 0; 249 | let mut matched_lineno = !0_usize; // let's say this is an invalid line number 250 | 251 | while let Some((mut start, mut end)) = regex.find(&buf[match_offset..]) { 252 | // back to offsets into buf 253 | start += match_offset; 254 | end += match_offset; 255 | 256 | // find the line numbers of the match 257 | let lineno = lines.get_lineno(start); 258 | let lineno_end = lines.get_lineno(end); 259 | if lineno != lineno_end { 260 | // match spans multiple lines: ignore it and start at the 261 | // beginning of the next line 262 | match_offset = lines.get_offset(lineno + 1); 263 | continue; 264 | } else if start == end { 265 | // are we at the end of the text? 266 | if start == buf.len() { 267 | break; 268 | } 269 | // zero-size match: match this line and go to next 270 | match_offset = lines.get_offset(lineno + 1); 271 | } else { 272 | // start next match where this one ended 273 | match_offset = end; 274 | } 275 | 276 | if opts.invert { 277 | if lineno != matched_lineno { 278 | // create matches for all inbetween lines: 279 | // - matched_lineno is the last one with a match 280 | // - lineno is the one with this match 281 | for inb_lineno in matched_lineno.wrapping_add(1)..lineno { 282 | new_match!(result, lines, opts, inb_lineno); 283 | } 284 | matched_lineno = lineno; 285 | } 286 | } else { 287 | // we have a new matching line? 288 | if lineno != matched_lineno { 289 | new_match!(result, lines, opts, lineno); 290 | matched_lineno = lineno; 291 | } 292 | // add this span to the match for this line 293 | if let Some(ref mut m) = result.matches.last_mut() { 294 | let line_offset = lines.get_offset(lineno); 295 | m.spans.push((start - line_offset, end - line_offset)); 296 | } 297 | } 298 | } 299 | if opts.invert { 300 | // create matches for final lines 301 | for inb_lineno in matched_lineno.wrapping_add(1)..lines.get_lineno(buf.len())+1 { 302 | new_match!(result, lines, opts, inb_lineno); 303 | } 304 | } 305 | } 306 | result 307 | } 308 | -------------------------------------------------------------------------------- /src/display.rs: -------------------------------------------------------------------------------- 1 | // --------------------------------------------------------------------------------------- 2 | // Ruthenium, an ack-like searcher, (c) 2015 Georg Brandl. 3 | // Licensed under the MIT license. 4 | // --------------------------------------------------------------------------------------- 5 | 6 | use std::io::Write; 7 | use std::usize; 8 | 9 | use search::{FileResult, Match}; 10 | use options::Colors; 11 | 12 | macro_rules! w { 13 | ($out:expr, $first:expr, $($rest:expr),*) => { 14 | let _ = $out.write($first); 15 | w!($out, $($rest),*); 16 | }; 17 | ($out:expr, $first:expr) => { 18 | let _ = $out.write($first); 19 | } 20 | } 21 | 22 | fn w_maybe_nl(out: &mut T, line: &[u8]) { 23 | w!(out, line); 24 | if !line.ends_with(b"\n") { 25 | w!(out, b"\n"); 26 | } 27 | } 28 | 29 | /// A trait for printing search results to stdout. 30 | pub trait DisplayMode { 31 | /// Print results from a single file. 32 | fn print_result(&mut self, res: FileResult); 33 | } 34 | 35 | /// The default mode, used when printing to tty stdout. 36 | /// 37 | /// Uses grouping by file names by default and can use colors. Can print context. 38 | pub struct DefaultMode { 39 | colors: Colors, 40 | grouping: bool, 41 | heading: bool, 42 | is_first: bool, 43 | out: T, 44 | } 45 | 46 | impl DefaultMode { 47 | pub fn new(out: T, colors: Colors, grouping: bool, heading: bool) -> DefaultMode { 48 | DefaultMode { 49 | colors: colors, 50 | grouping: grouping, 51 | heading: heading, 52 | is_first: true, 53 | out: out, 54 | } 55 | } 56 | 57 | fn print_separator(&mut self) { 58 | w!(self.out, &self.colors.punct, b"--", &self.colors.reset, b"\n"); 59 | } 60 | 61 | /// Helper: print a line with matched spans highlighted. 62 | fn print_line_with_spans(&mut self, m: &Match) { 63 | if self.colors.empty { 64 | w_maybe_nl(&mut self.out, &m.line); 65 | } else { 66 | let mut pos = 0; 67 | for &(start, end) in &m.spans { 68 | if start > pos { 69 | w!(self.out, &m.line[pos..start]); 70 | } 71 | w!(self.out, &self.colors.span, &m.line[start..end], &self.colors.reset); 72 | pos = end; 73 | } 74 | w_maybe_nl(&mut self.out, &m.line[pos..]); 75 | } 76 | } 77 | 78 | /// Helper: print a match with custom callbacks for file header and match line. 79 | fn match_printer(&mut self, res: &FileResult, file_func: FF, line_func: LF) 80 | where FF: Fn(&mut Self, &FileResult), LF: Fn(&mut Self, &FileResult, usize, &'static [u8]) 81 | { 82 | // (maybe) print a heading for the whole file 83 | file_func(self, &res); 84 | // easy case without context lines 85 | if !res.has_context { 86 | for m in &res.matches { 87 | line_func(self, res, m.lineno, b":"); 88 | self.print_line_with_spans(&m); 89 | } 90 | return; 91 | } 92 | // remember the last printed line: to be able to print "--" separators 93 | // between non-consecutive lines in context mode 94 | let mut last_printed_line = 0; 95 | for (im, m) in res.matches.iter().enumerate() { 96 | // print before-context 97 | for (i, line) in m.before.iter().enumerate() { 98 | let lno = m.lineno - m.before.len() + i; 99 | if last_printed_line > 0 && lno > last_printed_line + 1 { 100 | self.print_separator(); 101 | } 102 | // only print this line if we didn't print it before, e.g. 103 | // as a match line or after-context line 104 | if lno > last_printed_line { 105 | line_func(self, res, lno, b"-"); 106 | w_maybe_nl(&mut self.out, &line); 107 | last_printed_line = lno; 108 | } 109 | } 110 | if last_printed_line > 0 && m.lineno > last_printed_line + 1 { 111 | self.print_separator(); 112 | } 113 | line_func(self, res, m.lineno, b":"); 114 | self.print_line_with_spans(&m); 115 | // print after-context 116 | last_printed_line = m.lineno; 117 | // determine line number of next match, since we have to stop 118 | // printing context *before* that line 119 | let next_match_line = if im < res.matches.len() - 1 { 120 | res.matches[im + 1].lineno 121 | } else { 122 | usize::MAX 123 | }; 124 | for (i, line) in m.after.iter().enumerate() { 125 | let lno = m.lineno + i + 1; 126 | // stop when we hit the next match 127 | if lno >= next_match_line { 128 | break; 129 | } 130 | line_func(self, res, lno, b"-"); 131 | w_maybe_nl(&mut self.out, &line); 132 | last_printed_line = lno; 133 | } 134 | } 135 | } 136 | } 137 | 138 | impl DisplayMode for DefaultMode { 139 | 140 | fn print_result(&mut self, res: FileResult) { 141 | // files with no matches never print anything 142 | if res.matches.is_empty() { 143 | return; 144 | } 145 | // grouping separator, but not on the first file 146 | if !self.is_first && self.grouping { 147 | w!(self.out, b"\n"); 148 | if res.has_context && !self.heading { 149 | // in context mode, we have to print a "--" separator between files 150 | self.print_separator(); 151 | } 152 | } 153 | if res.is_binary { 154 | // special message for binary files 155 | w!(self.out, b"Binary file ", res.fname.as_bytes(), b" matches.\n"); 156 | } else if self.heading { 157 | // headings mode: print file name first, then omit it from match lines 158 | self.match_printer(&res, |slf, res| { 159 | w!(slf.out, 160 | &slf.colors.path, res.fname.as_bytes(), &slf.colors.reset, b"\n"); 161 | }, |slf, _, lineno, sep| { 162 | w!(slf.out, 163 | &slf.colors.lineno, format!("{}", lineno).as_bytes(), &slf.colors.reset, 164 | &slf.colors.punct, sep, &slf.colors.reset); 165 | }); 166 | } else { 167 | // no headings mode: print file name on every match line 168 | self.match_printer(&res, |_, _| { }, |slf, res, lineno, sep| { 169 | w!(slf.out, 170 | &slf.colors.path, res.fname.as_bytes(), &slf.colors.reset, 171 | &slf.colors.punct, sep, &slf.colors.reset, 172 | &slf.colors.lineno, format!("{}", lineno).as_bytes(), &slf.colors.reset, 173 | &slf.colors.punct, sep, &slf.colors.reset); 174 | }); 175 | } 176 | self.is_first = false; 177 | } 178 | } 179 | 180 | /// The mode used for --ackmate mode. 181 | /// 182 | /// No colors, one matched line per line, all spans indicated numerically. 183 | pub struct AckMateMode { 184 | is_first: bool, 185 | out: T, 186 | } 187 | 188 | impl AckMateMode { 189 | pub fn new(out: T) -> AckMateMode { 190 | AckMateMode { 191 | is_first: true, 192 | out: out, 193 | } 194 | } 195 | } 196 | 197 | impl DisplayMode for AckMateMode { 198 | fn print_result(&mut self, res: FileResult) { 199 | if res.matches.is_empty() { 200 | return; 201 | } 202 | if !self.is_first { 203 | w!(self.out, b"\n"); 204 | } 205 | if res.is_binary { 206 | w!(self.out, b"Binary file ", res.fname.as_bytes(), b" matches.\n"); 207 | } else { 208 | w!(self.out, b":", res.fname.as_bytes()); 209 | for m in res.matches { 210 | let spans = m.spans.iter() 211 | .map(|&(s, e)| format!("{} {}", s, e - s)) 212 | .collect::>().join(","); 213 | w!(self.out, &format!("{};{}:", m.lineno, spans).as_bytes()); 214 | w_maybe_nl(&mut self.out, &m.line); 215 | } 216 | } 217 | self.is_first = false; 218 | } 219 | } 220 | 221 | /// The mode used for --vimgrep mode. 222 | /// 223 | /// No colors, one match per line (so lines with multiple matches are printed 224 | /// multiple times). 225 | pub struct VimGrepMode { 226 | out: T, 227 | } 228 | 229 | impl VimGrepMode { 230 | pub fn new(out: T) -> VimGrepMode { 231 | VimGrepMode { 232 | out: out, 233 | } 234 | } 235 | } 236 | 237 | impl DisplayMode for VimGrepMode { 238 | fn print_result(&mut self, res: FileResult) { 239 | if res.matches.is_empty() { 240 | return; 241 | } 242 | if res.is_binary { 243 | println!("Binary file {} matches.", res.fname); 244 | } else { 245 | for m in res.matches { 246 | for s in &m.spans { 247 | w!(self.out, &format!("{}:{}:{}:", res.fname, m.lineno, s.0 + 1).as_bytes()); 248 | w_maybe_nl(&mut self.out, &m.line); 249 | } 250 | } 251 | } 252 | } 253 | } 254 | 255 | /// The mode used for --files-with-matches and --files-without-matches. 256 | /// 257 | /// One file per line, no contents printed. 258 | pub struct FilesOnlyMode { 259 | colors: Colors, 260 | need_match: bool, 261 | out: T, 262 | } 263 | 264 | impl FilesOnlyMode { 265 | pub fn new(out: T, colors: Colors, need_match: bool) -> FilesOnlyMode { 266 | FilesOnlyMode { 267 | colors: colors, 268 | need_match: need_match, 269 | out: out, 270 | } 271 | } 272 | } 273 | 274 | impl DisplayMode for FilesOnlyMode { 275 | fn print_result(&mut self, res: FileResult) { 276 | if res.matches.is_empty() != self.need_match { 277 | w!(self.out, &self.colors.path, &res.fname.as_bytes(), &self.colors.reset, b"\n"); 278 | } 279 | } 280 | } 281 | 282 | /// The mode used for --count mode. 283 | /// 284 | /// One file per line, followed by match count (not matched line count). 285 | pub struct CountMode { 286 | colors: Colors, 287 | out: T, 288 | } 289 | 290 | impl CountMode { 291 | pub fn new(out: T, colors: Colors) -> CountMode { 292 | CountMode { 293 | colors: colors, 294 | out: out, 295 | } 296 | } 297 | } 298 | 299 | impl DisplayMode for CountMode { 300 | fn print_result(&mut self, res: FileResult) { 301 | if res.matches.is_empty() { 302 | return; 303 | } 304 | let count: usize = res.matches.iter().map(|m| m.spans.iter().count()) 305 | .fold(0, |a, v| a + v); 306 | w!(self.out, 307 | &self.colors.path, &res.fname.as_bytes(), &self.colors.reset, 308 | &self.colors.punct, b":", &self.colors.reset, 309 | &self.colors.lineno, &format!("{}", count).as_bytes(), &self.colors.reset, 310 | b"\n"); 311 | } 312 | } 313 | -------------------------------------------------------------------------------- /src/pcre.rs: -------------------------------------------------------------------------------- 1 | // --------------------------------------------------------------------------------------- 2 | // Ruthenium, an ack-like searcher, (c) 2015 Georg Brandl. 3 | // Licensed under the MIT license. 4 | // --------------------------------------------------------------------------------------- 5 | 6 | // This file derived from rust-pcre: 7 | // Copyright 2015 The rust-pcre authors. 8 | // 9 | // Licensed under the Apache License, Version 2.0 or the MIT license 11 | // , at your 12 | // option. This file may not be copied, modified, or distributed 13 | // except according to those terms. 14 | 15 | use std::ffi::{CStr, CString}; 16 | use std::{fmt, ptr}; 17 | use libc::{c_char, c_int, c_uchar, c_void}; 18 | 19 | mod ffi { 20 | use libc::{c_char, c_int, c_uchar, c_ulong, c_void}; 21 | 22 | #[allow(non_camel_case_types)] 23 | pub type compile_options = c_int; 24 | #[allow(non_camel_case_types)] 25 | pub type exec_options = c_int; 26 | #[allow(non_camel_case_types)] 27 | pub type fullinfo_field = c_int; 28 | #[allow(non_camel_case_types)] 29 | pub type study_options = c_int; 30 | 31 | #[link(name = "pcre")] 32 | extern { 33 | pub static pcre_free: extern "C" fn(ptr: *mut c_void); 34 | 35 | pub fn pcre_compile(pattern: *const c_char, options: compile_options, 36 | errptr: *mut *const c_char, erroffset: *mut c_int, 37 | tableptr: *const c_uchar) -> *mut pcre; 38 | pub fn pcre_exec(code: *const pcre, extra: *const pcre_extra, subject: *const c_char, 39 | length: c_int, startoffset: c_int, options: exec_options, 40 | ovector: *mut c_int, ovecsize: c_int) -> c_int; 41 | pub fn pcre_free_study(extra: *mut pcre_extra); 42 | pub fn pcre_fullinfo(code: *const pcre, extra: *const pcre_extra, what: fullinfo_field, 43 | where_: *mut c_void) -> c_int; 44 | pub fn pcre_study(code: *const pcre, options: study_options, 45 | errptr: *mut *const c_char) -> *mut pcre_extra; 46 | } 47 | 48 | // pub const PCRE_UTF8: compile_options = 0x00000800; 49 | 50 | // PCRE_NO_UTF8_CHECK is both a compile and exec option 51 | pub const PCRE_NO_UTF8_CHECK: c_int = 0x00002000; 52 | 53 | pub const PCRE_ERROR_NOMATCH: c_int = -1; 54 | pub const PCRE_ERROR_NULL: c_int = -2; 55 | 56 | pub const PCRE_INFO_CAPTURECOUNT: fullinfo_field = 2; 57 | // pub const PCRE_INFO_NAMEENTRYSIZE: fullinfo_field = 7; 58 | // pub const PCRE_INFO_NAMECOUNT: fullinfo_field = 8; 59 | // pub const PCRE_INFO_NAMETABLE: fullinfo_field = 9; 60 | 61 | pub const PCRE_STUDY_JIT_COMPILE: c_int = 0x0001; 62 | // pub const PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE: c_int = 0x0002; 63 | // pub const PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE: c_int = 0x0004; 64 | // pub const PCRE_STUDY_EXTRA_NEEDED: c_int = 0x0008; 65 | 66 | #[allow(non_camel_case_types)] 67 | pub enum pcre {} 68 | 69 | #[allow(non_camel_case_types)] 70 | #[repr(C)] 71 | pub struct pcre_extra { 72 | flags: c_ulong, 73 | study_data: *mut c_void, 74 | match_limit: c_ulong, 75 | callout_data: *mut c_void, 76 | tables: *const c_uchar, 77 | match_limit_recursion_: c_ulong, 78 | mark: *mut *mut c_uchar, 79 | executable_jit: *mut c_void 80 | } 81 | } 82 | 83 | pub unsafe fn pcre_compile(pattern: *const c_char, options: ffi::compile_options, 84 | tableptr: *const c_uchar) -> Result<*mut ffi::pcre, (String, c_int)> { 85 | assert!(!pattern.is_null()); 86 | // the pattern is always UTF-8 87 | let options = options | ffi::PCRE_NO_UTF8_CHECK; 88 | let mut err: *const c_char = ptr::null(); 89 | let mut erroffset: c_int = 0; 90 | let code = ffi::pcre_compile(pattern, options, &mut err, &mut erroffset, tableptr); 91 | 92 | if code.is_null() { 93 | // "Otherwise, if compilation of a pattern fails, pcre_compile() returns 94 | // NULL, and sets the variable pointed to by errptr to point to a textual 95 | // error message. This is a static string that is part of the library. You 96 | // must not try to free it." 97 | Err((CStr::from_ptr(err).to_string_lossy().into_owned(), erroffset)) 98 | } else { 99 | assert!(!code.is_null()); 100 | assert_eq!(erroffset, 0); 101 | Ok(code) 102 | } 103 | } 104 | 105 | pub unsafe fn pcre_exec(code: *const ffi::pcre, extra: *const ffi::pcre_extra, 106 | subject: *const c_char, length: c_int, startoffset: c_int, 107 | options: ffi::compile_options, 108 | ovector: *mut c_int, ovecsize: c_int) -> Result { 109 | assert!(!code.is_null()); 110 | assert!(ovecsize >= 0 && ovecsize % 3 == 0); 111 | let options = options | ffi::PCRE_NO_UTF8_CHECK; 112 | let rc = ffi::pcre_exec(code, extra, subject, length, startoffset, options, ovector, ovecsize); 113 | if rc == ffi::PCRE_ERROR_NOMATCH { 114 | Ok(-1) 115 | } else if rc < 0 { 116 | Err(()) 117 | } else { 118 | Ok(rc) 119 | } 120 | } 121 | 122 | pub unsafe fn pcre_free(ptr: *mut c_void) { 123 | ffi::pcre_free(ptr); 124 | } 125 | 126 | pub unsafe fn pcre_free_study(extra: *mut ffi::pcre_extra) { 127 | ffi::pcre_free_study(extra); 128 | } 129 | 130 | pub unsafe fn pcre_fullinfo(code: *const ffi::pcre, extra: *const ffi::pcre_extra, 131 | what: ffi::fullinfo_field, where_: *mut c_void) { 132 | assert!(!code.is_null()); 133 | let rc = ffi::pcre_fullinfo(code, extra, what, where_); 134 | if rc < 0 && rc != ffi::PCRE_ERROR_NULL { 135 | panic!("pcre_fullinfo"); 136 | } 137 | } 138 | 139 | pub unsafe fn pcre_study(code: *const ffi::pcre, options: ffi::study_options) 140 | -> Result<*mut ffi::pcre_extra, String> { 141 | assert!(!code.is_null()); 142 | let converted_options = options; 143 | let mut err: *const c_char = ptr::null(); 144 | let extra = ffi::pcre_study(code, converted_options, &mut err); 145 | // "The third argument for pcre_study() is a pointer for an error message. If 146 | // studying succeeds (even if no data is returned), the variable it points to is 147 | // set to NULL. Otherwise it is set to point to a textual error message. This is 148 | // a static string that is part of the library. You must not try to free it." 149 | // http://pcre.org/pcre.txt 150 | if !err.is_null() { 151 | Err(CStr::from_ptr(err).to_string_lossy().into_owned()) 152 | } else { 153 | assert!(err.is_null()); 154 | Ok(extra) 155 | } 156 | } 157 | 158 | pub type Pcre = ffi::pcre; 159 | pub type PcreExtra = ffi::pcre_extra; 160 | pub type CompileOptions = ffi::compile_options; 161 | pub type ExecOptions = ffi::exec_options; 162 | pub type StudyOptions = ffi::study_options; 163 | 164 | /// Wrapper for libpcre's `pcre` object (representing a compiled regular expression). 165 | #[derive(Debug)] 166 | pub struct Regex { 167 | code: *const Pcre, 168 | extra: *mut PcreExtra, 169 | capture_count: c_int, 170 | } 171 | 172 | /// Represents a match of a subject string against a regular expression. 173 | #[allow(unused)] 174 | pub struct Match<'s> { 175 | subject: &'s [u8], 176 | partial_ovector: Vec, 177 | string_count: c_int 178 | } 179 | 180 | /// Iterator type for iterating matches within a subject string. 181 | pub struct MatchIterator<'r, 's> { 182 | regex: &'r Regex, 183 | subject: &'s [u8], 184 | offset: c_int, 185 | options: ExecOptions, 186 | ovector: Vec 187 | } 188 | 189 | #[derive(Debug)] 190 | pub struct CompilationError(String, c_int); 191 | 192 | impl fmt::Display for CompilationError { 193 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 194 | write!(f, "compilation failed at offset {}: {}", self.1, self.0) 195 | } 196 | } 197 | 198 | impl Regex { 199 | 200 | pub fn new(pattern: &str) -> Result { 201 | Regex::compile_with_options(pattern, 0).map(|mut rx| { 202 | rx.study_with_options(ffi::PCRE_STUDY_JIT_COMPILE); 203 | rx 204 | }) 205 | } 206 | 207 | pub fn compile_with_options(pattern: &str, options: CompileOptions) 208 | -> Result { 209 | let pattern_cstring = CString::new(pattern).unwrap(); 210 | // Use the default character tables. 211 | let tableptr: *const c_uchar = ptr::null(); 212 | match unsafe { pcre_compile(pattern_cstring.as_ptr(), options, tableptr) } { 213 | Err((errstr, offset)) => Err(CompilationError(errstr, offset)), 214 | Ok(mut_code) => { 215 | let code = mut_code as *const Pcre; 216 | assert!(!code.is_null()); 217 | 218 | // Default extra is null. 219 | let extra: *mut PcreExtra = ptr::null_mut(); 220 | let mut capture_count: c_int = 0; 221 | unsafe { 222 | pcre_fullinfo(code, extra as *const PcreExtra, ffi::PCRE_INFO_CAPTURECOUNT, 223 | &mut capture_count as *mut c_int as *mut c_void); 224 | } 225 | 226 | Ok(Regex { 227 | code: code, 228 | extra: extra, 229 | capture_count: capture_count, 230 | }) 231 | } 232 | } 233 | } 234 | 235 | #[inline] 236 | pub fn exec<'a>(&self, subject: &'a [u8]) -> Option> { 237 | self.exec_from(subject, 0) 238 | } 239 | 240 | #[inline] 241 | pub fn exec_from<'a>(&self, subject: &'a [u8], startoffset: usize) -> Option> { 242 | self.exec_from_with_options(subject, startoffset, 0) 243 | } 244 | 245 | #[inline] 246 | pub fn exec_from_with_options<'a>(&self, subject: &'a [u8], startoffset: usize, 247 | options: ExecOptions) -> Option> { 248 | let ovecsize = (self.capture_count + 1) * 3; 249 | let mut ovector = vec![0 as c_int; ovecsize as usize]; 250 | 251 | let rc = unsafe { 252 | pcre_exec(self.code, 253 | self.extra as *const PcreExtra, 254 | subject.as_ptr() as *const c_char, 255 | subject.len() as c_int, 256 | startoffset as c_int, 257 | options, 258 | ovector.as_mut_ptr(), 259 | ovecsize as c_int) 260 | }; 261 | match rc { 262 | Ok(rc) if rc >= 0 => { 263 | Some(Match { 264 | subject: subject, 265 | partial_ovector: ovector[..(((self.capture_count + 1) * 2) as usize)].to_vec(), 266 | string_count: rc 267 | }) 268 | } 269 | _ => { None } 270 | } 271 | } 272 | 273 | #[inline] 274 | pub fn find(&self, subject: &[u8]) -> Option<(usize, usize)> { 275 | self.exec(subject).map(|m| m.group_span(0)) 276 | } 277 | 278 | #[inline] 279 | pub fn is_match(&self, subject: &[u8]) -> bool { 280 | self.exec(subject).is_some() 281 | } 282 | 283 | pub fn study_with_options(&mut self, options: StudyOptions) -> bool { 284 | let extra = unsafe { 285 | // Free any current study data. 286 | pcre_free_study(self.extra as *mut PcreExtra); 287 | self.extra = ptr::null_mut(); 288 | pcre_study(self.code, options) 289 | }; 290 | match extra { 291 | Ok(extra) => { 292 | self.extra = extra; 293 | !extra.is_null() 294 | } 295 | Err(_) => false 296 | } 297 | } 298 | } 299 | 300 | impl Drop for Regex { 301 | fn drop(&mut self) { 302 | unsafe { 303 | pcre_free_study(self.extra as *mut PcreExtra); 304 | pcre_free(self.code as *mut Pcre as *mut c_void); 305 | } 306 | self.extra = ptr::null_mut(); 307 | self.code = ptr::null(); 308 | } 309 | } 310 | 311 | impl<'a> Match<'a> { 312 | pub fn group_start(&self, n: usize) -> usize { 313 | self.partial_ovector[(n * 2) as usize] as usize 314 | } 315 | 316 | pub fn group_end(&self, n: usize) -> usize { 317 | self.partial_ovector[(n * 2 + 1) as usize] as usize 318 | } 319 | 320 | pub fn group_span(&self, n: usize) -> (usize, usize) { 321 | (self.group_start(n), self.group_end(n)) 322 | } 323 | } 324 | 325 | impl<'r, 's> Clone for MatchIterator<'r, 's> { 326 | #[inline] 327 | fn clone(&self) -> MatchIterator<'r, 's> { 328 | MatchIterator { 329 | regex: self.regex, 330 | subject: self.subject, 331 | offset: self.offset, 332 | options: self.options.clone(), 333 | ovector: self.ovector.clone() 334 | } 335 | } 336 | } 337 | 338 | impl<'r, 's> Iterator for MatchIterator<'r, 's> { 339 | type Item = Match<'s>; 340 | 341 | /// Gets the next match. 342 | #[inline] 343 | fn next(&mut self) -> Option> { 344 | let rc = unsafe { 345 | pcre_exec(self.regex.code, 346 | self.regex.extra, 347 | self.subject.as_ptr() as *const c_char, 348 | self.subject.len() as c_int, 349 | self.offset, 350 | self.options, 351 | self.ovector.as_mut_ptr(), 352 | self.ovector.len() as c_int) 353 | }; 354 | match rc { 355 | Ok(rc) if rc >= 0 => { 356 | // Update the iterator state. 357 | self.offset = self.ovector[1]; 358 | 359 | let cc = self.regex.capture_count; 360 | Some(Match { 361 | subject: self.subject, 362 | partial_ovector: self.ovector[..(((cc + 1) * 2) as usize)].to_vec(), 363 | string_count: rc 364 | }) 365 | } 366 | _ => None 367 | } 368 | } 369 | } 370 | 371 | /// Read-only access is guaranteed to be thread-safe. 372 | unsafe impl Sync for Regex {} 373 | --------------------------------------------------------------------------------