├── .gitignore
├── TODO
├── Cargo.toml
├── compare.py
├── README.md
├── LICENSE
├── test.sh
└── src
    ├── main.rs
    ├── ignore.rs
    ├── options.rs
    ├── search.rs
    ├── display.rs
    └── pcre.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | oprofile_data
3 | callgrind.*
4 | Cargo.lock
5 | tst
6 | 


--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
 1 | TODOs
 2 | =====
 3 | 
 4 | * more test/bench infrastructure and cases
 5 | * help texts for command-line options
 6 | * comprehensive test suite and tests against grep, ag, ack
 7 | * more command-line options for ag compatibility
 8 | * error handling: no unwraps and "if let Ok", add messages to stderr
 9 | * ignore handling for more than gitignore files
10 | * better binary file detection
11 | * switching to other regex impl for complex things like backrefs
12 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "ruthenium"
 3 | version = "0.1.0"
 4 | authors = ["Georg Brandl <georg@python.org>"]
 5 | 
 6 | [[bin]]
 7 | name = "ru"
 8 | 
 9 | [features]
10 | default = []
11 | pcre = []
12 | 
13 | [profile.release]
14 | lto = true
15 | 
16 | [dependencies]
17 | libc = "*"
18 | atty = "*"
19 | walkdir = "*"
20 | memmap = "*"
21 | scoped-pool = "*"
22 | num_cpus = "*"
23 | glob = "*"
24 | regex = "*"
25 | 
26 | [dependencies.clap]
27 | version = "2"
28 | default_features = false
29 | features = []
30 | 


--------------------------------------------------------------------------------
/compare.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import difflib
 3 | import subprocess
 4 | 
 5 | args = sys.argv[1:]
 6 | 
 7 | print 'Running ack...'
 8 | output_ack = subprocess.Popen(['ack', '--smart-case'] + args, stdout=subprocess.PIPE).communicate()[0]
 9 | print 'Running ag...'
10 | output_ag = subprocess.Popen(['ag'] + args, stdout=subprocess.PIPE).communicate()[0]
11 | print 'Running ru...'
12 | output_ru = subprocess.Popen(['ru'] + args, stdout=subprocess.PIPE).communicate()[0]
13 | print 'Sorting...'
14 | output_ack = sorted(output_ack.splitlines())
15 | output_ag = sorted(output_ag.splitlines())
16 | output_ru = sorted(output_ru.splitlines())
17 | 
18 | for line in difflib.unified_diff(output_ag, output_ru):
19 |     print line
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Ruthenium, an Ack-like searcher
 2 | 
 3 | Ruthenium is an attempt to implement the well-known Perl tool `ack` in Rust.
 4 | 
 5 | When finished, it is supposed to show the strengths of Rust, for example simple
 6 | and efficient concurrency without locks, and speed comparable with C programs,
 7 | in this case the implementation called `ag` or `the_silver_searcher`.
 8 | 
 9 | ## How to build
10 | 
11 | Use `cargo build --release`.  `target/release/ru` is the binary.
12 | 
13 | ## How to use
14 | 
15 | The resulting binary is linked statically against Rust dependencies, so it can
16 | be copied into a `bin` directory and used.
17 | 
18 | ### Command line
19 | 
20 | Command-line options are designed to be mostly compatible with Ag.  There are
21 | probably small differences, especially in the handling of ignore files.
22 | 
23 | ### Regex engines
24 | 
25 | Currently, the regex engine can be selected to be either Andrew Gallant's Rust
26 | implementation `regex` (the default) or PCRE (requires libpcre and its headers
27 | to be installed).  Select the latter with the Cargo feature flag `pcre`.
28 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015 Georg Brandl
 2 | 
 3 | Permission is hereby granted, free of charge, to any
 4 | person obtaining a copy of this software and associated
 5 | documentation files (the "Software"), to deal in the
 6 | Software without restriction, including without
 7 | limitation the rights to use, copy, modify, merge,
 8 | publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software
10 | is furnished to do so, subject to the following
11 | conditions:
12 | 
13 | The above copyright notice and this permission notice
14 | shall be included in all copies or substantial portions
15 | of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 | DEALINGS IN THE SOFTWARE.
26 | 


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/zsh
 2 | 
 3 | export RUST_BACKTRACE=1
 4 | NEEDLE="$1"
 5 | if [ -z "$NEEDLE" ]; then NEEDLE=p.th; fi
 6 | 
 7 | run-timed() {
 8 |   /usr/bin/time --format="%Us user %Ss system %P%% cpu %e total, max RSS %Mk" "$@"
 9 | }
10 | 
11 | run-grep() {
12 |   run-timed grep --color=always -n -E -ri "$@" $NEEDLE tst > /dev/null
13 | }
14 | 
15 | run-ag() {
16 |   run-timed ag --color "$@" $NEEDLE tst > /dev/null
17 | }
18 | 
19 | run-ru-lr() {
20 |   run-timed target/release/ru-line-regex --color "$@" $NEEDLE tst > /dev/null
21 | }
22 | 
23 | run-ru-lp() {
24 |   run-timed target/release/ru-line-pcre --color "$@" $NEEDLE tst > /dev/null
25 | }
26 | 
27 | run-ru() {
28 |   run-timed target/release/ru --color "$@" $NEEDLE tst > /dev/null
29 | }
30 | 
31 | run-all() {
32 |   echo -n "Grep: "
33 |   run-grep "$@"
34 |   echo -n "Ag:   "
35 |   run-ag "$@"
36 |   echo -n "RuLR: "
37 |   run-ru-lr "$@"
38 |   echo -n "RuLP: "
39 |   run-ru-lp "$@"
40 |   echo -n "Ru:   "
41 |   run-ru "$@"
42 | }
43 | 
44 | #cargo build --release || exit 1
45 | 
46 | echo "List matches"
47 | run-all
48 | echo
49 | echo "List matches with context"
50 | run-all -C 10
51 | echo
52 | echo "List inverted matches"
53 | run-all -v
54 | echo
55 | echo "List files"
56 | run-all -l
57 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
  1 | // ---------------------------------------------------------------------------------------
  2 | // Ruthenium, an ack-like searcher, (c) 2015 Georg Brandl.
  3 | // Licensed under the MIT license.
  4 | // ---------------------------------------------------------------------------------------
  5 | 
  6 | #[macro_use]
  7 | extern crate clap;
  8 | extern crate libc;
  9 | extern crate atty;
 10 | extern crate walkdir;
 11 | extern crate memmap;
 12 | extern crate scoped_pool;
 13 | extern crate num_cpus;
 14 | extern crate glob;
 15 | extern crate regex;
 16 | 
 17 | mod search;
 18 | mod ignore;
 19 | mod display;
 20 | mod options;
 21 | #[cfg(feature = "pcre")]
 22 | mod pcre;
 23 | 
 24 | use std::cmp::max;
 25 | use std::sync::mpsc::{sync_channel, SyncSender};
 26 | use std::thread;
 27 | use std::io::{stdout, BufWriter};
 28 | use memmap::{Mmap, Protection};
 29 | use scoped_pool::Pool;
 30 | use walkdir::WalkDirIterator;
 31 | 
 32 | use display::DisplayMode;
 33 | use search::FileResult;
 34 | use options::Opts;
 35 | 
 36 | 
 37 | /// Walk a directory (given in Opts) and check all found files.
 38 | ///
 39 | /// The channel is used to send result structs to the main thread, which gives
 40 | /// them to the DisplayMode for output.
 41 | ///
 42 | /// The thread of this function only does the directory walking, it spawns a
 43 | /// number of worker threads in a pool to grep individual files.
 44 | fn walk(chan: SyncSender<FileResult>, opts: &Opts) {
 45 |     // thread pool for individual file grep worker threads
 46 |     let pool = Pool::new(max(opts.workers - 1, 1));
 47 |     // create the regex object
 48 |     let regex = search::create_rx(&opts);
 49 | 
 50 |     let walker = walkdir::WalkDir::new(&opts.path)
 51 |         .follow_links(opts.follow_links)
 52 |         .max_depth(opts.depth);
 53 |     pool.scoped(|scope| {
 54 |         let rx = &regex;  // borrow for closures
 55 |         // stack of directories being walked, maintained in the filter closure
 56 |         let mut parent_stack: Vec<::std::path::PathBuf> = Vec::new();
 57 |         // stack of Ignore structs per directory in parent_stack, they accumulate
 58 |         // XXX: add global ignores from cmdline and a config file here
 59 |         let mut ignore_stack = Vec::new();
 60 |         let walker = walker.into_iter().filter_entry(|entry| {
 61 |             // remove parents from stack that are not applicable anymore
 62 |             let new_parent = entry.path().parent().unwrap();
 63 |             while !parent_stack.is_empty() &&
 64 |                 parent_stack.last().unwrap().as_path() != new_parent
 65 |             {
 66 |                 ignore_stack.pop();
 67 |                 parent_stack.pop();
 68 |             }
 69 |             // weed out hidden files (this is separate from ignored)
 70 |             let path = entry.path();
 71 |             if let Some(fname) = path.file_name() {
 72 |                 if !opts.do_hidden && fname.to_string_lossy().starts_with(".") {
 73 |                     return false;
 74 |                 }
 75 |             }
 76 |             // weed out ignored files and directories (if we return false here for
 77 |             // directories, the contents are pruned from the iterator)
 78 |             if opts.check_ignores && ignore::match_patterns(path, &ignore_stack) {
 79 |                 return false;
 80 |             }
 81 |             // we got a new dir? put it onto the stack
 82 |             if entry.file_type().is_dir() {
 83 |                 let new_path = entry.path().to_path_buf();
 84 |                 // read ignore patterns specific to this directory
 85 |                 ignore_stack.push(ignore::read_patterns(&new_path));
 86 |                 parent_stack.push(new_path);
 87 |             }
 88 |             true
 89 |         });
 90 |         for entry in walker {
 91 |             if let Ok(entry) = entry {
 92 |                 // only touch normal files
 93 |                 if !entry.file_type().is_file() {
 94 |                     continue;
 95 |                 }
 96 |                 // open and search file in one of the worker threads
 97 |                 let ch = chan.clone();
 98 |                 scope.execute(move || {
 99 |                     let path = entry.path();
100 |                     if let Ok(map) = Mmap::open_path(path, Protection::Read) {
101 |                         let buf = unsafe { map.as_slice() };
102 |                         let res = search::search(rx, &opts, path, buf);
103 |                         ch.send(res).unwrap();
104 |                     }
105 |                 });
106 |             }
107 |         }
108 |     });
109 | }
110 | 
111 | /// Run the main action.  This is separated from `main` so that it can get a generic
112 | /// DisplayMode argument.
113 | ///
114 | /// Spawns the walker thread and prints the results.
115 | fn run<D: DisplayMode>(display: &mut D, opts: Opts) {
116 |     // The sync_channel has a bound on pending items.  We don't want to
117 |     // generate results much faster than we can print them.
118 |     let (w_chan, r_chan) = sync_channel(4 * opts.workers as usize);
119 |     thread::spawn(move || {
120 |         walk(w_chan, &opts);
121 |     });
122 |     while let Ok(r) = r_chan.recv() {
123 |         display.print_result(r);
124 |     }
125 | }
126 | 
127 | /// Main entry point.
128 | fn main() {
129 |     let mut opts = Opts::from_cmdline();
130 |     let colors = opts.colors.take().unwrap();  // guaranteed to be Some()
131 | 
132 |     let stdout = stdout();
133 |     let writer = BufWriter::new(stdout.lock());
134 | 
135 |     // determine which display mode we are using
136 |     if opts.only_count {
137 |         run(&mut display::CountMode::new(writer, colors), opts);
138 |     } else if opts.only_files == Some(true) {
139 |         run(&mut display::FilesOnlyMode::new(writer, colors, true), opts);
140 |     } else if opts.only_files == Some(false) {
141 |         run(&mut display::FilesOnlyMode::new(writer, colors, false), opts);
142 |     } else if opts.ackmate_format {
143 |         run(&mut display::AckMateMode::new(writer), opts);
144 |     } else if opts.vimgrep_format {
145 |         run(&mut display::VimGrepMode::new(writer), opts);
146 |     } else {
147 |         run(&mut display::DefaultMode::new(writer, colors, opts.show_break,
148 |                                            opts.show_heading), opts);
149 |     }
150 | }
151 | 


--------------------------------------------------------------------------------
/src/ignore.rs:
--------------------------------------------------------------------------------
  1 | // ---------------------------------------------------------------------------------------
  2 | // Ruthenium, an ack-like searcher, (c) 2015 Georg Brandl.
  3 | // Licensed under the MIT license.
  4 | // ---------------------------------------------------------------------------------------
  5 | 
  6 | use std::borrow::Cow;
  7 | use std::collections::BTreeSet;
  8 | use std::str::FromStr;
  9 | use std::fs::{File, metadata};
 10 | use std::io::{BufReader, BufRead};
 11 | use std::path::{Path, PathBuf};
 12 | use glob::{Pattern, MatchOptions};
 13 | 
 14 | 
 15 | /// Represents the ignore patterns for one directory, the `root`.
 16 | #[derive(Debug)]
 17 | pub struct Ignores {
 18 |     /// Path patterns are relative to this directory
 19 |     root: PathBuf,
 20 |     /// Literal filenames to exclude
 21 |     filenames: BTreeSet<String>,
 22 |     /// Literal file extensions to exclude
 23 |     extensions: BTreeSet<String>,
 24 |     /// Patterns to exclude (can have paths)
 25 |     patterns: Vec<Pattern>,
 26 |     /// "Negated patterns": matched after a file would be excluded,
 27 |     /// if it matches, the exclusion is canceled
 28 |     negated_patterns: Vec<Pattern>,
 29 | }
 30 | 
 31 | fn is_literal_filename(s: &str) -> bool {
 32 |     s.chars().all(|v| !(v == '*' || v == '?' || v == '[' || v == ']' || v == '/'))
 33 | }
 34 | 
 35 | fn is_literal_extension(s: &str) -> bool {
 36 |     s.chars().all(|v| !(v == '*' || v == '?' || v == '[' || v == ']' || v == '/' || v == '.'))
 37 | }
 38 | 
 39 | /// Read gitignore-style patterns from a filename and add all recognized
 40 | /// patterns to the Ignores object.
 41 | fn read_git_patterns_from(path: &Path, ignores: &mut Ignores) {
 42 |     // add a complex pattern
 43 |     fn add_pat(line: &str, vec: &mut Vec<Pattern>) {
 44 |         let pat = Pattern::from_str(
 45 |             // if a pattern doesn't start with "/", it is not anchored to the root,
 46 |             // so to make glob match any such file we need to start it with "**/"
 47 |             if !line.starts_with("/") {
 48 |                 Cow::Owned(String::from("**/") + line)
 49 |             } else {
 50 |                 Cow::Borrowed(line)
 51 |             }.as_ref());
 52 |         if let Ok(pat) = pat {
 53 |             vec.push(pat);
 54 |         }
 55 |     }
 56 |     if let Ok(file) = File::open(path) {
 57 |         let reader = BufReader::new(file);
 58 |         for line in reader.lines() {
 59 |             if let Ok(line) = line {
 60 |                 let line = line.trim();
 61 |                 // empty line or comment, ignore
 62 |                 if line.is_empty() || line.starts_with("#") {
 63 |                     continue;
 64 |                 }
 65 |                 // negated pattern (no special casing for filenames/exts here)
 66 |                 if line.starts_with("!") {
 67 |                     add_pat(&line[1..], &mut ignores.negated_patterns);
 68 |                 // simple filename
 69 |                 } else if is_literal_filename(line) {
 70 |                     ignores.filenames.insert(line.into());
 71 |                 // simple *.ext
 72 |                 } else if line.starts_with("*.") && is_literal_extension(&line[2..]) {
 73 |                     ignores.extensions.insert(line[2..].into());
 74 |                 // complex non-negated pattern
 75 |                 } else {
 76 |                     add_pat(line, &mut ignores.patterns);
 77 |                 }
 78 |             }
 79 |         }
 80 |     }
 81 | }
 82 | 
 83 | /// Read patterns from all recognized and existing ignore files in `dir`.
 84 | pub fn read_patterns(dir: &Path) -> Ignores {
 85 |     let mut result = Ignores {
 86 |         root: dir.to_path_buf(),
 87 |         filenames: BTreeSet::new(),
 88 |         extensions: BTreeSet::new(),
 89 |         patterns: Vec::new(),
 90 |         negated_patterns: Vec::new(),
 91 |     };
 92 |     for gitexcludes in &[".gitignore", ".git/info/excludes"] {
 93 |         if metadata(dir.join(gitexcludes)).map(|f| f.is_file()).unwrap_or(false) {
 94 |             read_git_patterns_from(&dir.join(gitexcludes), &mut result);
 95 |         }
 96 |     }
 97 |     result
 98 | }
 99 | 
100 | /// Return relative path from `base` to `path`.
101 | ///
102 | /// Copied from std::path::Path, where it is still unstable.
103 | pub fn relative_path_from<'a, P: AsRef<Path>>(path: &'a Path, base: &'a P) -> Option<&'a Path>
104 | {
105 |     fn iter_after<A, I, J>(mut iter: I, mut prefix: J) -> Option<I> where
106 |         I: Iterator<Item=A> + Clone, J: Iterator<Item=A>, A: PartialEq
107 |     {
108 |         loop {
109 |             let mut iter_next = iter.clone();
110 |             match (iter_next.next(), prefix.next()) {
111 |                 (Some(x), Some(y)) => {
112 |                     if x != y { return None }
113 |                 }
114 |                 (Some(_), None) => return Some(iter),
115 |                 (None, None) => return Some(iter),
116 |                 (None, Some(_)) => return None,
117 |             }
118 |             iter = iter_next;
119 |         }
120 |     }
121 | 
122 |     iter_after(path.components(), base.as_ref().components()).map(|c| c.as_path())
123 | }
124 | 
125 | /// Match `path` against the ignore stack `ignores`, return true if match found.
126 | pub fn match_patterns(path: &Path, ignores: &[Ignores]) -> bool {
127 |     const OPTS: MatchOptions = MatchOptions {
128 |         case_sensitive: true,
129 |         require_literal_separator: true,
130 |         require_literal_leading_dot: false,
131 |     };
132 | 
133 |     let name = path.file_name().and_then(|s| s.to_str());
134 |     let ext = path.extension().and_then(|s| s.to_str());
135 | 
136 |     let mut is_ignored = false;
137 |     for ignore in ignores {
138 |         if name.is_some() && ignore.filenames.contains(name.unwrap()) {
139 |             is_ignored = true;
140 |         } else if ext.is_some() && ignore.extensions.contains(ext.unwrap()) {
141 |             is_ignored = true;
142 |         } else if !ignore.patterns.is_empty() {
143 |             let relpath = relative_path_from(path, &ignore.root).unwrap();
144 |             for pattern in &ignore.patterns {
145 |                 if pattern.matches_path_with(relpath, &OPTS) {
146 |                     is_ignored = true;
147 |                     break;
148 |                 }
149 |             }
150 |         }
151 |         // apply negated patterns if necessary
152 |         if is_ignored && !ignore.negated_patterns.is_empty() {
153 |             let relpath = relative_path_from(path, &ignore.root).unwrap();
154 |             for pattern in &ignore.negated_patterns {
155 |                 if pattern.matches_path_with(relpath, &OPTS) {
156 |                     is_ignored = false;
157 |                 }
158 |             }
159 |         }
160 |         if is_ignored {
161 |             break;
162 |         }
163 |     }
164 |     is_ignored
165 | }
166 | 


--------------------------------------------------------------------------------
/src/options.rs:
--------------------------------------------------------------------------------
  1 | // ---------------------------------------------------------------------------------------
  2 | // Ruthenium, an ack-like searcher, (c) 2015 Georg Brandl.
  3 | // Licensed under the MIT license.
  4 | // ---------------------------------------------------------------------------------------
  5 | 
  6 | use std::cmp::min;
  7 | use std::usize;
  8 | 
  9 | use atty;
 10 | use clap::{App, AppSettings, Arg};
 11 | use num_cpus;
 12 | 
 13 | /// Contains the ANSI codes needed to set the terminal to a certain color.
 14 | #[derive(Clone)]
 15 | pub struct Colors {
 16 |     pub reset: Vec<u8>,
 17 |     pub path: Vec<u8>,
 18 |     pub lineno: Vec<u8>,
 19 |     pub span: Vec<u8>,
 20 |     pub punct: Vec<u8>,
 21 |     pub empty: bool,
 22 | }
 23 | 
 24 | impl Colors {
 25 |     /// Create a struct where no colors are emitted.
 26 |     fn empty() -> Colors {
 27 |         Colors {
 28 |             reset: vec![],
 29 |             path: vec![],
 30 |             lineno: vec![],
 31 |             span: vec![],
 32 |             punct: vec![],
 33 |             empty: true,
 34 |         }
 35 |     }
 36 | 
 37 |     /// Create a struct from given color specs.  Color specs are the payload
 38 |     /// of the color ANSI sequences, e.g. "01;31".
 39 |     fn from(path: &str, lineno: &str, span: &str, punct: &str) -> Colors {
 40 |         Colors {
 41 |             reset: b"\x1b[0m".to_vec(),
 42 |             path: format!("\x1b[{}m", path).into_bytes(),
 43 |             lineno: format!("\x1b[{}m", lineno).into_bytes(),
 44 |             span: format!("\x1b[{}m", span).into_bytes(),
 45 |             punct: format!("\x1b[{}m", punct).into_bytes(),
 46 |             empty: false,
 47 |         }
 48 |     }
 49 | }
 50 | 
 51 | /// Case-sensitivity matching options.
 52 | ///
 53 | /// Smart casing means insensitive as long as the pattern contains no uppercase
 54 | /// letters.
 55 | #[derive(Clone)]
 56 | pub enum Casing {
 57 |     Default,
 58 |     Smart,
 59 |     Insensitive,
 60 | }
 61 | 
 62 | /// Holds all options for the search.
 63 | #[derive(Clone)]
 64 | pub struct Opts {
 65 |     // file related options
 66 |     pub path: String,
 67 |     pub depth: usize,
 68 |     pub follow_links: bool,
 69 |     pub do_binaries: bool,
 70 |     pub do_hidden: bool,
 71 |     // ignore file related options
 72 |     pub check_ignores: bool,
 73 |     // pattern related options
 74 |     pub pattern: String,
 75 |     pub casing: Casing,
 76 |     pub literal: bool,
 77 |     pub invert: bool,
 78 |     // display related options
 79 |     pub colors: Option<Colors>,
 80 |     pub only_files: Option<bool>,
 81 |     pub only_count: bool,
 82 |     pub show_break: bool,
 83 |     pub show_heading: bool,
 84 |     pub ackmate_format: bool,
 85 |     pub vimgrep_format: bool,
 86 |     pub max_count: usize,
 87 |     pub before: usize,
 88 |     pub after: usize,
 89 |     // others
 90 |     pub workers: usize,
 91 | }
 92 | 
 93 | /// Somewhat simpler creation of flag Args.
 94 | macro_rules! flag {
 95 |     ($n:ident -$f:ident) => {
 96 |         Arg::with_name(stringify!($n)).short(stringify!($f))
 97 |     };
 98 |     ($n:ident -$f:ident --$l:expr) => {
 99 |         Arg::with_name(stringify!($n)).short(stringify!($f)).long($l)
100 |     };
101 |     ($n:ident / --$l:expr) => {
102 |         Arg::with_name(stringify!($n)).long($l)
103 |     };
104 | }
105 | 
106 | impl Opts {
107 |     pub fn from_cmdline() -> Opts {
108 |         let version = format!("v{}", crate_version!());
109 |         // XXX: sort and group the arguments once they are all done
110 |         let app = App::new("Ruthenium")
111 |             .version(&*version)
112 |             .usage("ru [options] PATTERN [PATH]")
113 |             .about("Recursively search for a pattern, like ack")
114 |             .setting(AppSettings::UnifiedHelpMessage)
115 |             .setting(AppSettings::ArgRequiredElseHelp)  // seems to be not working
116 |             .arg(Arg::with_name("pattern").required(true).index(1))
117 |             .arg(Arg::with_name("path").index(2))
118 |             .arg(flag!(all -a --"all-types"))
119 |             .arg(flag!(depth / --"depth").takes_value(true))
120 |             .arg(flag!(literal -Q --"literal"))
121 |             .arg(flag!(fixedstrings -F --"fixed-strings"))
122 |             .arg(flag!(alltext -t --"all-text").conflicts_with("all"))
123 |             .arg(flag!(unrestricted -u --"unrestricted").conflicts_with("all"))
124 |             .arg(flag!(searchbinary / --"search-binary"))
125 |             .arg(flag!(searchhidden / --"hidden"))
126 |             .arg(flag!(fileswith -l --"files-with-matches"))
127 |             .arg(flag!(fileswithout -L --"files-without-matches").conflicts_with("fileswith"))
128 |             .arg(flag!(count -c --"count").conflicts_with("fileswith"))
129 |             .arg(flag!(follow -f --"follow"))
130 |             .arg(flag!(nofollow / --"nofollow").conflicts_with("follow"))
131 |             .arg(flag!(nocolor / --"nocolor"))
132 |             .arg(flag!(color / --"color"))
133 |             .arg(flag!(colorlineno / --"color-line-number").takes_value(true))
134 |             .arg(flag!(colorspan / --"color-match").takes_value(true))
135 |             .arg(flag!(colorpath / --"color-path").takes_value(true))
136 |             .arg(flag!(colorpunct / --"color-punct").takes_value(true))
137 |             .arg(flag!(casesens -s --"case-sensitive").conflicts_with("caseinsens"))
138 |             .arg(flag!(casesmart -S --"smart-case").conflicts_with("casesens"))
139 |             .arg(flag!(caseinsens -i --"ignore-case").conflicts_with("casesmart"))
140 |             .arg(flag!(group / --"group"))
141 |             .arg(flag!(nogroup / --"nogroup").conflicts_with("gorup"))
142 |             .arg(flag!(heading -H --"heading"))
143 |             .arg(flag!(noheading / --"noheading").conflicts_with("heading"))
144 |             .arg(flag!(break / --"break"))
145 |             .arg(flag!(nobreak / --"nobreak").conflicts_with("break"))
146 |             .arg(flag!(ackmate / --"ackmate"))
147 |             .arg(flag!(vimgrep / --"vimgrep"))
148 |             .arg(flag!(maxcount -m --"max-count").takes_value(true))
149 |             .arg(flag!(before -B --"before").takes_value(true))
150 |             .arg(flag!(after -A --"after").takes_value(true))
151 |             .arg(flag!(context -C --"context").takes_value(true))
152 |             .arg(flag!(workers / --"workers").takes_value(true))
153 |             .arg(flag!(invert -v --"invert-match"))
154 |             ;
155 |         let m = app.get_matches();
156 | 
157 |         // process option values
158 |         let depth = m.value_of("depth").and_then(|v| v.parse::<usize>().ok())
159 |                                        .map(|v| v + 1) // 0 == immediate children
160 |                                        .unwrap_or(usize::MAX);
161 | 
162 |         let mut binaries = m.is_present("searchbinary");
163 |         let mut hidden = m.is_present("searchhidden");
164 |         let mut ignores = true;
165 |         if m.is_present("all") {
166 |             binaries = true;
167 |             ignores = false;
168 |         } else if m.is_present("alltext") {
169 |             ignores = false;
170 |         } else if m.is_present("unrestricted") {
171 |             binaries = true;
172 |             hidden = true;
173 |             ignores = false;
174 |         }
175 | 
176 |         let mut casing = Casing::Smart;
177 |         if m.is_present("caseinsens") {
178 |             casing = Casing::Insensitive;
179 |         } else if m.is_present("casesens") {
180 |             casing = Casing::Default;
181 |         }
182 |         let mut literal = m.is_present("literal");
183 |         if m.is_present("fixedstrings") {
184 |             literal = true;
185 |         }
186 | 
187 |         let out_to_tty = atty::is();
188 |         let colors = if !m.is_present("color") &&
189 |             (!out_to_tty || m.is_present("nocolor"))
190 |         {
191 |             Colors::empty()
192 |         } else {
193 |             Colors::from(
194 |                 m.value_of("colorpath").unwrap_or("35"),
195 |                 m.value_of("colorlineno").unwrap_or("32"),
196 |                 m.value_of("colorspan").unwrap_or("4"),
197 |                 m.value_of("colorpunct").unwrap_or("36"),
198 |             )
199 |         };
200 |         let mut heading = out_to_tty;
201 |         let mut showbreak = out_to_tty;
202 |         if m.is_present("heading") {
203 |             heading = true;
204 |         } else if m.is_present("noheading") {
205 |             heading = false;
206 |         }
207 |         if m.is_present("break") {
208 |             showbreak = true;
209 |         } else if m.is_present("nobreak") {
210 |             showbreak = false;
211 |         }
212 |         if m.is_present("group") {
213 |             heading = true;
214 |             showbreak = true;
215 |         } else if m.is_present("nogroup") {
216 |             heading = false;
217 |             showbreak = false;
218 |         }
219 |         let maxcount = m.value_of("maxcount").and_then(|v| v.parse().ok())
220 |                                              .unwrap_or(usize::MAX);
221 |         let mut before = m.value_of("before").and_then(|v| v.parse().ok())
222 |                                              .unwrap_or(0);
223 |         let mut after = m.value_of("after").and_then(|v| v.parse().ok())
224 |                                            .unwrap_or(0);
225 |         if m.is_present("context") {
226 |             before = m.value_of("context").unwrap().parse().ok().unwrap_or(0);
227 |             after = before;
228 |         }
229 | 
230 |         let workers = m.value_of("workers").and_then(|v| v.parse().ok())
231 |                                            .unwrap_or(min(4, num_cpus::get()));
232 | 
233 |         Opts {
234 |             // file related
235 |             path: m.value_of("path").unwrap_or(".").into(),
236 |             depth: depth,
237 |             follow_links: m.is_present("follow"),
238 |             do_binaries: binaries,
239 |             do_hidden: hidden,
240 |             // ignore file related
241 |             check_ignores: ignores,
242 |             // pattern related
243 |             pattern: m.value_of("pattern").unwrap().into(),
244 |             casing: casing,
245 |             literal: literal,
246 |             invert: m.is_present("invert"),
247 |             // display related
248 |             colors: Some(colors),
249 |             only_files: if m.is_present("fileswith") {
250 |                 Some(true)
251 |             } else if m.is_present("fileswithout") {
252 |                 Some(false)
253 |             } else { None },
254 |             only_count: m.is_present("count"),
255 |             show_break: showbreak,
256 |             show_heading: heading,
257 |             ackmate_format: m.is_present("ackmate"),
258 |             vimgrep_format: m.is_present("vimgrep"),
259 |             max_count: maxcount,
260 |             before: before,
261 |             after: after,
262 |             // other
263 |             workers: workers,
264 |         }
265 |     }
266 | }
267 | 


--------------------------------------------------------------------------------
/src/search.rs:
--------------------------------------------------------------------------------
  1 | // ---------------------------------------------------------------------------------------
  2 | // Ruthenium, an ack-like searcher, (c) 2015 Georg Brandl.
  3 | // Licensed under the MIT license.
  4 | // ---------------------------------------------------------------------------------------
  5 | 
  6 | use std::cmp::min;
  7 | use std::path::Path;
  8 | 
  9 | #[cfg(feature = "pcre")]
 10 | use pcre::Regex;
 11 | #[cfg(not(feature = "pcre"))]
 12 | use regex::bytes::Regex;
 13 | 
 14 | use options::{Casing, Opts};
 15 | 
 16 | /// Represents a line that matched the pattern (maybe multiple times).
 17 | #[derive(Debug)]
 18 | pub struct Match {
 19 |     /// Line number in the file
 20 |     pub lineno: usize,
 21 |     /// Line text
 22 |     pub line: Vec<u8>,
 23 |     /// Spans (start, end) of matching parts in the line
 24 |     pub spans: Vec<(usize, usize)>,
 25 |     /// Context lines before the matched line
 26 |     pub before: Vec<Vec<u8>>,
 27 |     /// Context lines after the matched line
 28 |     pub after: Vec<Vec<u8>>,
 29 | }
 30 | 
 31 | impl Match {
 32 |     fn new(lineno: usize, line: Vec<u8>, spans: Vec<(usize, usize)>) -> Match {
 33 |         Match {
 34 |             lineno: lineno,
 35 |             line: line,
 36 |             spans: spans,
 37 |             before: Vec::new(),
 38 |             after: Vec::new(),
 39 |         }
 40 |     }
 41 | }
 42 | 
 43 | /// Represents all matches from a single file.
 44 | #[derive(Debug)]
 45 | pub struct FileResult {
 46 |     /// File name, relative to initial argument
 47 |     pub fname: String,
 48 |     /// Is the file binary?  If yes, matches contains 0 or 1 element
 49 |     pub is_binary: bool,
 50 |     /// Do we provide (and print) context lines?
 51 |     pub has_context: bool,
 52 |     /// Matches relevant for printing
 53 |     pub matches: Vec<Match>,
 54 | }
 55 | 
 56 | impl FileResult {
 57 |     fn new(fname: String) -> FileResult {
 58 |         FileResult {
 59 |             fname: fname,
 60 |             is_binary: false,
 61 |             has_context: false,
 62 |             matches: Vec::new(),
 63 |         }
 64 |     }
 65 | }
 66 | 
 67 | /// Create a regular expression to search for matches from the given options.
 68 | ///
 69 | /// The final regex is determined by several options, such as casing options
 70 | /// and options to take the search string literally.
 71 | pub fn create_rx(opts: &Opts) -> Regex {
 72 |     let mut pattern = opts.pattern.to_owned();
 73 |     if opts.literal {
 74 |         // escape regex meta-chars and create a normal pattern
 75 |         const ESCAPE: &'static str = ".?*+|^$(){}[]\\";
 76 |         pattern = pattern.chars().map(|c| {
 77 |             if ESCAPE.find(c).is_some() {
 78 |                 format!("\\{}", c)
 79 |             } else {
 80 |                 format!("{}", c)
 81 |             }
 82 |         }).collect();
 83 |     }
 84 |     if let Casing::Insensitive = opts.casing {
 85 |         pattern = format!("(?i){}", pattern);
 86 |     } else if let Casing::Smart = opts.casing {
 87 |         // smart casing: only case-insensitive when pattern contains no uppercase
 88 |         if !pattern.chars().any(|c| c.is_uppercase()) {
 89 |             pattern = format!("(?i){}", pattern);
 90 |         }
 91 |     }
 92 |     Regex::new(&pattern).unwrap()
 93 | }
 94 | 
 95 | /// Return normalized path: get rid of leading ./ and make leading // into /.
 96 | fn normalized_path(path: &Path) -> String {
 97 |     let s = path.to_string_lossy();
 98 |     if s.starts_with("./") {
 99 |         String::from(&s[2..])
100 |     } else if s.starts_with("//") {
101 |         String::from(&s[1..])
102 |     } else {
103 |         s.into_owned()
104 |     }
105 | }
106 | 
107 | /// Check file for binary-ness.
108 | ///
109 | /// Currently only null-bytes are recognized to constitute binary file content.
110 | /// However, this clashes with UTF-16 and UTF-32, so a more clever heuristic is
111 | /// required at some point.
112 | fn is_binary(buf: &[u8], len: usize) -> bool {
113 |     if len == 0 {
114 |         return false;
115 |     }
116 |     if len >= 3 && &buf[0..3] == b"\xEF\xBB\xBF" {
117 |         // UTF-8 BOM
118 |         return false;
119 |     }
120 |     let n = min(512, len);
121 |     for b in buf[..n].iter() {
122 |         if *b == b'\x00' {
123 |             return true;  // null byte always means binary
124 |         }
125 |     }
126 |     false
127 | }
128 | 
129 | /// Cache for collecting line offsets and slices within a u8 buffer.
130 | struct Lines<'a> {
131 |     buf: &'a [u8],
132 |     offset: usize,
133 |     lines: Vec<(usize, &'a [u8])>,
134 | }
135 | 
136 | impl<'a> Lines<'a> {
137 |     pub fn new(buf: &[u8]) -> Lines {
138 |         Lines { buf: buf, offset: 0, lines: Vec::with_capacity(100) }
139 |     }
140 | 
141 |     /// Advance the line detection until we have at least lineno lines.
142 |     /// Return false if EOF was reached before given number of lines.
143 |     fn advance_to_line(&mut self, lineno: usize) -> bool {
144 |         while self.lines.len() < lineno + 1 {
145 |             if self.buf.len() == self.offset {
146 |                 return false;
147 |             }
148 |             let line = match self.buf[self.offset..].iter().position(|&x| x == b'\n') {
149 |                 Some(idx) => &self.buf[self.offset..self.offset+idx+1],
150 |                 None      => &self.buf[self.offset..self.buf.len()],
151 |             };
152 |             self.lines.push((self.offset, line));
153 |             self.offset += line.len();
154 |         }
155 |         true
156 |     }
157 | 
158 |     /// Advance to a given byte offset in the buffer.
159 |     fn advance_to_offset(&mut self, offset: usize) {
160 |         while self.offset < offset {
161 |             let next_line = self.lines.len();
162 |             self.advance_to_line(next_line);
163 |         }
164 |     }
165 | 
166 |     /// Get line number of offset.
167 |     pub fn get_lineno(&mut self, offset: usize) -> usize {
168 |         self.advance_to_offset(offset);
169 |         for (n, &(o, _)) in self.lines.iter().enumerate().rev() {
170 |             if o <= offset {
171 |                 return n;
172 |             }
173 |         }
174 |         return 0;
175 |     }
176 | 
177 |     /// Get offset of line number.
178 |     pub fn get_offset(&mut self, lineno: usize) -> usize {
179 |         if self.advance_to_line(lineno) {
180 |             self.lines[lineno].0
181 |         } else {
182 |             self.buf.len()
183 |         }
184 |     }
185 | 
186 |     /// Get an arbitrary line (maybe beyond end of file) as a string.
187 |     pub fn get_line(&mut self, lineno: usize) -> Option<Vec<u8>> {
188 |         if self.advance_to_line(lineno) {
189 |             Some(self.lines[lineno].1.to_vec())
190 |         } else {
191 |             None
192 |         }
193 |     }
194 | }
195 | 
196 | /// Create a line-match for a given line with context lines determined by options.
197 | fn create_match(lines: &mut Lines, opts: &Opts, lineno: usize) -> Match {
198 |     let line = lines.get_line(lineno).expect("matched line missing");
199 |     let mut new_match = Match::new(lineno + 1, line, vec![]);
200 |     if opts.before > 0 {
201 |         for lno in lineno.saturating_sub(opts.before)..lineno {
202 |             new_match.before.push(lines.get_line(lno).unwrap());
203 |         }
204 |     }
205 |     if opts.after > 0 {
206 |         for lno in lineno+1..lineno+opts.after+1 {
207 |             if let Some(line) = lines.get_line(lno) {
208 |                 new_match.after.push(line);
209 |             }
210 |         }
211 |     }
212 |     new_match
213 | }
214 | 
215 | /// Add a new match and maybe finish
216 | macro_rules! new_match {
217 |     ($result:expr, $lines:expr, $opts:expr, $lineno:expr) => {{
218 |         if $result.matches.len() >= $opts.max_count {
219 |             return $result;
220 |         }
221 |         let m = create_match(&mut $lines, $opts, $lineno);
222 |         $result.matches.push(m);
223 |         if $opts.only_files.is_some() {
224 |             return $result;
225 |         }
226 |     }};
227 | }
228 | 
229 | /// Search a single file (represented as a u8 buffer) for matching lines.
230 | pub fn search(regex: &Regex, opts: &Opts, path: &Path, buf: &[u8]) -> FileResult {
231 |     let len = buf.len();
232 |     let mut result = FileResult::new(normalized_path(path));
233 |     result.has_context = opts.before > 0 || opts.after > 0;
234 |     // binary file?
235 |     if is_binary(buf, len) {
236 |         result.is_binary = true;
237 |         // if we care for binaries at all
238 |         if opts.do_binaries {
239 |             if regex.is_match(buf) {
240 |                 // found a match: create a dummy match object, and
241 |                 // leave it there (we never need more info than
242 |                 // "matched" or "didn't match")
243 |                 result.matches.push(Match::new(0, "".into(), Vec::new()));
244 |             }
245 |         }
246 |     } else {
247 |         let mut lines = Lines::new(buf);
248 |         let mut match_offset = 0;
249 |         let mut matched_lineno = !0_usize;  // let's say this is an invalid line number
250 | 
251 |         while let Some((mut start, mut end)) = regex.find(&buf[match_offset..]) {
252 |             // back to offsets into buf
253 |             start += match_offset;
254 |             end += match_offset;
255 | 
256 |             // find the line numbers of the match
257 |             let lineno = lines.get_lineno(start);
258 |             let lineno_end = lines.get_lineno(end);
259 |             if lineno != lineno_end {
260 |                 // match spans multiple lines: ignore it and start at the
261 |                 // beginning of the next line
262 |                 match_offset = lines.get_offset(lineno + 1);
263 |                 continue;
264 |             } else if start == end {
265 |                 // are we at the end of the text?
266 |                 if start == buf.len() {
267 |                     break;
268 |                 }
269 |                 // zero-size match: match this line and go to next
270 |                 match_offset = lines.get_offset(lineno + 1);
271 |             } else {
272 |                 // start next match where this one ended
273 |                 match_offset = end;
274 |             }
275 | 
276 |             if opts.invert {
277 |                 if lineno != matched_lineno {
278 |                     // create matches for all inbetween lines:
279 |                     // - matched_lineno is the last one with a match
280 |                     // - lineno is the one with this match
281 |                     for inb_lineno in matched_lineno.wrapping_add(1)..lineno {
282 |                         new_match!(result, lines, opts, inb_lineno);
283 |                     }
284 |                     matched_lineno = lineno;
285 |                 }
286 |             } else {
287 |                 // we have a new matching line?
288 |                 if lineno != matched_lineno {
289 |                     new_match!(result, lines, opts, lineno);
290 |                     matched_lineno = lineno;
291 |                 }
292 |                 // add this span to the match for this line
293 |                 if let Some(ref mut m) = result.matches.last_mut() {
294 |                     let line_offset = lines.get_offset(lineno);
295 |                     m.spans.push((start - line_offset, end - line_offset));
296 |                 }
297 |             }
298 |         }
299 |         if opts.invert {
300 |             // create matches for final lines
301 |             for inb_lineno in matched_lineno.wrapping_add(1)..lines.get_lineno(buf.len())+1 {
302 |                 new_match!(result, lines, opts, inb_lineno);
303 |             }
304 |         }
305 |     }
306 |     result
307 | }
308 | 


--------------------------------------------------------------------------------
/src/display.rs:
--------------------------------------------------------------------------------
  1 | // ---------------------------------------------------------------------------------------
  2 | // Ruthenium, an ack-like searcher, (c) 2015 Georg Brandl.
  3 | // Licensed under the MIT license.
  4 | // ---------------------------------------------------------------------------------------
  5 | 
  6 | use std::io::Write;
  7 | use std::usize;
  8 | 
  9 | use search::{FileResult, Match};
 10 | use options::Colors;
 11 | 
 12 | macro_rules! w {
 13 |     ($out:expr, $first:expr, $($rest:expr),*) => {
 14 |         let _ = $out.write($first);
 15 |         w!($out, $($rest),*);
 16 |     };
 17 |     ($out:expr, $first:expr) => {
 18 |         let _ = $out.write($first);
 19 |     }
 20 | }
 21 | 
 22 | fn w_maybe_nl<T: Write>(out: &mut T, line: &[u8]) {
 23 |     w!(out, line);
 24 |     if !line.ends_with(b"\n") {
 25 |         w!(out, b"\n");
 26 |     }
 27 | }
 28 | 
 29 | /// A trait for printing search results to stdout.
 30 | pub trait DisplayMode {
 31 |     /// Print results from a single file.
 32 |     fn print_result(&mut self, res: FileResult);
 33 | }
 34 | 
 35 | /// The default mode, used when printing to tty stdout.
 36 | ///
 37 | /// Uses grouping by file names by default and can use colors.  Can print context.
 38 | pub struct DefaultMode<T: Write> {
 39 |     colors: Colors,
 40 |     grouping: bool,
 41 |     heading: bool,
 42 |     is_first: bool,
 43 |     out: T,
 44 | }
 45 | 
 46 | impl<T: Write> DefaultMode<T> {
 47 |     pub fn new(out: T, colors: Colors, grouping: bool, heading: bool) -> DefaultMode<T> {
 48 |         DefaultMode {
 49 |             colors: colors,
 50 |             grouping: grouping,
 51 |             heading: heading,
 52 |             is_first: true,
 53 |             out: out,
 54 |         }
 55 |     }
 56 | 
 57 |     fn print_separator(&mut self) {
 58 |         w!(self.out, &self.colors.punct, b"--", &self.colors.reset, b"\n");
 59 |     }
 60 | 
 61 |     /// Helper: print a line with matched spans highlighted.
 62 |     fn print_line_with_spans(&mut self, m: &Match) {
 63 |         if self.colors.empty {
 64 |             w_maybe_nl(&mut self.out, &m.line);
 65 |         } else {
 66 |             let mut pos = 0;
 67 |             for &(start, end) in &m.spans {
 68 |                 if start > pos {
 69 |                     w!(self.out, &m.line[pos..start]);
 70 |                 }
 71 |                 w!(self.out, &self.colors.span, &m.line[start..end], &self.colors.reset);
 72 |                 pos = end;
 73 |             }
 74 |             w_maybe_nl(&mut self.out, &m.line[pos..]);
 75 |         }
 76 |     }
 77 | 
 78 |     /// Helper: print a match with custom callbacks for file header and match line.
 79 |     fn match_printer<FF, LF>(&mut self, res: &FileResult, file_func: FF, line_func: LF)
 80 |         where FF: Fn(&mut Self, &FileResult), LF: Fn(&mut Self, &FileResult, usize, &'static [u8])
 81 |     {
 82 |         // (maybe) print a heading for the whole file
 83 |         file_func(self, &res);
 84 |         // easy case without context lines
 85 |         if !res.has_context {
 86 |             for m in &res.matches {
 87 |                 line_func(self, res, m.lineno, b":");
 88 |                 self.print_line_with_spans(&m);
 89 |             }
 90 |             return;
 91 |         }
 92 |         // remember the last printed line: to be able to print "--" separators
 93 |         // between non-consecutive lines in context mode
 94 |         let mut last_printed_line = 0;
 95 |         for (im, m) in res.matches.iter().enumerate() {
 96 |             // print before-context
 97 |             for (i, line) in m.before.iter().enumerate() {
 98 |                 let lno = m.lineno - m.before.len() + i;
 99 |                 if last_printed_line > 0 && lno > last_printed_line + 1 {
100 |                     self.print_separator();
101 |                 }
102 |                 // only print this line if we didn't print it before, e.g.
103 |                 // as a match line or after-context line
104 |                 if lno > last_printed_line {
105 |                     line_func(self, res, lno, b"-");
106 |                     w_maybe_nl(&mut self.out, &line);
107 |                     last_printed_line = lno;
108 |                 }
109 |             }
110 |             if last_printed_line > 0 && m.lineno > last_printed_line + 1 {
111 |                 self.print_separator();
112 |             }
113 |             line_func(self, res, m.lineno, b":");
114 |             self.print_line_with_spans(&m);
115 |             // print after-context
116 |             last_printed_line = m.lineno;
117 |             // determine line number of next match, since we have to stop
118 |             // printing context *before* that line
119 |             let next_match_line = if im < res.matches.len() - 1 {
120 |                 res.matches[im + 1].lineno
121 |             } else {
122 |                 usize::MAX
123 |             };
124 |             for (i, line) in m.after.iter().enumerate() {
125 |                 let lno = m.lineno + i + 1;
126 |                 // stop when we hit the next match
127 |                 if lno >= next_match_line {
128 |                     break;
129 |                 }
130 |                 line_func(self, res, lno, b"-");
131 |                 w_maybe_nl(&mut self.out, &line);
132 |                 last_printed_line = lno;
133 |             }
134 |         }
135 |     }
136 | }
137 | 
138 | impl<T: Write> DisplayMode for DefaultMode<T> {
139 | 
140 |     fn print_result(&mut self, res: FileResult) {
141 |         // files with no matches never print anything
142 |         if res.matches.is_empty() {
143 |             return;
144 |         }
145 |         // grouping separator, but not on the first file
146 |         if !self.is_first && self.grouping {
147 |             w!(self.out, b"\n");
148 |             if res.has_context && !self.heading {
149 |                 // in context mode, we have to print a "--" separator between files
150 |                 self.print_separator();
151 |             }
152 |         }
153 |         if res.is_binary {
154 |             // special message for binary files
155 |             w!(self.out, b"Binary file ", res.fname.as_bytes(), b" matches.\n");
156 |         } else if self.heading {
157 |             // headings mode: print file name first, then omit it from match lines
158 |             self.match_printer(&res, |slf, res| {
159 |                 w!(slf.out,
160 |                    &slf.colors.path, res.fname.as_bytes(), &slf.colors.reset, b"\n");
161 |             }, |slf, _, lineno, sep| {
162 |                 w!(slf.out,
163 |                    &slf.colors.lineno, format!("{}", lineno).as_bytes(), &slf.colors.reset,
164 |                    &slf.colors.punct, sep, &slf.colors.reset);
165 |             });
166 |         } else {
167 |             // no headings mode: print file name on every match line
168 |             self.match_printer(&res, |_, _| { }, |slf, res, lineno, sep| {
169 |                 w!(slf.out,
170 |                    &slf.colors.path, res.fname.as_bytes(), &slf.colors.reset,
171 |                    &slf.colors.punct, sep, &slf.colors.reset,
172 |                    &slf.colors.lineno, format!("{}", lineno).as_bytes(), &slf.colors.reset,
173 |                    &slf.colors.punct, sep, &slf.colors.reset);
174 |             });
175 |         }
176 |         self.is_first = false;
177 |     }
178 | }
179 | 
180 | /// The mode used for --ackmate mode.
181 | ///
182 | /// No colors, one matched line per line, all spans indicated numerically.
183 | pub struct AckMateMode<T: Write> {
184 |     is_first: bool,
185 |     out: T,
186 | }
187 | 
188 | impl<T: Write> AckMateMode<T> {
189 |     pub fn new(out: T) -> AckMateMode<T> {
190 |         AckMateMode {
191 |             is_first: true,
192 |             out: out,
193 |         }
194 |     }
195 | }
196 | 
197 | impl<T: Write> DisplayMode for AckMateMode<T> {
198 |     fn print_result(&mut self, res: FileResult) {
199 |         if res.matches.is_empty() {
200 |             return;
201 |         }
202 |         if !self.is_first {
203 |             w!(self.out, b"\n");
204 |         }
205 |         if res.is_binary {
206 |             w!(self.out, b"Binary file ", res.fname.as_bytes(), b" matches.\n");
207 |         } else {
208 |             w!(self.out, b":", res.fname.as_bytes());
209 |             for m in res.matches {
210 |                 let spans = m.spans.iter()
211 |                                    .map(|&(s, e)| format!("{} {}", s, e - s))
212 |                                    .collect::<Vec<_>>().join(",");
213 |                 w!(self.out, &format!("{};{}:", m.lineno, spans).as_bytes());
214 |                 w_maybe_nl(&mut self.out, &m.line);
215 |             }
216 |         }
217 |         self.is_first = false;
218 |     }
219 | }
220 | 
221 | /// The mode used for --vimgrep mode.
222 | ///
223 | /// No colors, one match per line (so lines with multiple matches are printed
224 | /// multiple times).
225 | pub struct VimGrepMode<T: Write> {
226 |     out: T,
227 | }
228 | 
229 | impl<T: Write> VimGrepMode<T> {
230 |     pub fn new(out: T) -> VimGrepMode<T> {
231 |         VimGrepMode {
232 |             out: out,
233 |         }
234 |     }
235 | }
236 | 
237 | impl<T: Write> DisplayMode for VimGrepMode<T> {
238 |     fn print_result(&mut self, res: FileResult) {
239 |         if res.matches.is_empty() {
240 |             return;
241 |         }
242 |         if res.is_binary {
243 |             println!("Binary file {} matches.", res.fname);
244 |         } else {
245 |             for m in res.matches {
246 |                 for s in &m.spans {
247 |                     w!(self.out, &format!("{}:{}:{}:", res.fname, m.lineno, s.0 + 1).as_bytes());
248 |                     w_maybe_nl(&mut self.out, &m.line);
249 |                 }
250 |             }
251 |         }
252 |     }
253 | }
254 | 
255 | /// The mode used for --files-with-matches and --files-without-matches.
256 | ///
257 | /// One file per line, no contents printed.
258 | pub struct FilesOnlyMode<T: Write> {
259 |     colors: Colors,
260 |     need_match: bool,
261 |     out: T,
262 | }
263 | 
264 | impl<T: Write> FilesOnlyMode<T> {
265 |     pub fn new(out: T, colors: Colors, need_match: bool) -> FilesOnlyMode<T> {
266 |         FilesOnlyMode {
267 |             colors: colors,
268 |             need_match: need_match,
269 |             out: out,
270 |         }
271 |     }
272 | }
273 | 
274 | impl<T: Write> DisplayMode for FilesOnlyMode<T> {
275 |     fn print_result(&mut self, res: FileResult) {
276 |         if res.matches.is_empty() != self.need_match {
277 |             w!(self.out, &self.colors.path, &res.fname.as_bytes(), &self.colors.reset, b"\n");
278 |         }
279 |     }
280 | }
281 | 
282 | /// The mode used for --count mode.
283 | ///
284 | /// One file per line, followed by match count (not matched line count).
285 | pub struct CountMode<T: Write> {
286 |     colors: Colors,
287 |     out: T,
288 | }
289 | 
290 | impl<T: Write> CountMode<T> {
291 |     pub fn new(out: T, colors: Colors) -> CountMode<T> {
292 |         CountMode {
293 |             colors: colors,
294 |             out: out,
295 |         }
296 |     }
297 | }
298 | 
299 | impl<T: Write> DisplayMode for CountMode<T> {
300 |     fn print_result(&mut self, res: FileResult) {
301 |         if res.matches.is_empty() {
302 |             return;
303 |         }
304 |         let count: usize = res.matches.iter().map(|m| m.spans.iter().count())
305 |                                              .fold(0, |a, v| a + v);
306 |         w!(self.out,
307 |            &self.colors.path, &res.fname.as_bytes(), &self.colors.reset,
308 |            &self.colors.punct, b":", &self.colors.reset,
309 |            &self.colors.lineno, &format!("{}", count).as_bytes(), &self.colors.reset,
310 |            b"\n");
311 |     }
312 | }
313 | 


--------------------------------------------------------------------------------
/src/pcre.rs:
--------------------------------------------------------------------------------
  1 | // ---------------------------------------------------------------------------------------
  2 | // Ruthenium, an ack-like searcher, (c) 2015 Georg Brandl.
  3 | // Licensed under the MIT license.
  4 | // ---------------------------------------------------------------------------------------
  5 | 
  6 | // This file derived from rust-pcre:
  7 | // Copyright 2015 The rust-pcre authors.
  8 | //
  9 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 10 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 11 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 12 | // option. This file may not be copied, modified, or distributed
 13 | // except according to those terms.
 14 | 
 15 | use std::ffi::{CStr, CString};
 16 | use std::{fmt, ptr};
 17 | use libc::{c_char, c_int, c_uchar, c_void};
 18 | 
 19 | mod ffi {
 20 |     use libc::{c_char, c_int, c_uchar, c_ulong, c_void};
 21 | 
 22 |     #[allow(non_camel_case_types)]
 23 |     pub type compile_options = c_int;
 24 |     #[allow(non_camel_case_types)]
 25 |     pub type exec_options = c_int;
 26 |     #[allow(non_camel_case_types)]
 27 |     pub type fullinfo_field = c_int;
 28 |     #[allow(non_camel_case_types)]
 29 |     pub type study_options = c_int;
 30 | 
 31 |     #[link(name = "pcre")]
 32 |     extern {
 33 |         pub static pcre_free: extern "C" fn(ptr: *mut c_void);
 34 | 
 35 |         pub fn pcre_compile(pattern: *const c_char, options: compile_options,
 36 |                             errptr: *mut *const c_char, erroffset: *mut c_int,
 37 |                             tableptr: *const c_uchar) -> *mut pcre;
 38 |         pub fn pcre_exec(code: *const pcre, extra: *const pcre_extra, subject: *const c_char,
 39 |                          length: c_int, startoffset: c_int, options: exec_options,
 40 |                          ovector: *mut c_int, ovecsize: c_int) -> c_int;
 41 |         pub fn pcre_free_study(extra: *mut pcre_extra);
 42 |         pub fn pcre_fullinfo(code: *const pcre, extra: *const pcre_extra, what: fullinfo_field,
 43 |                              where_: *mut c_void) -> c_int;
 44 |         pub fn pcre_study(code: *const pcre, options: study_options,
 45 |                           errptr: *mut *const c_char) -> *mut pcre_extra;
 46 |     }
 47 | 
 48 |     // pub const PCRE_UTF8: compile_options = 0x00000800;
 49 | 
 50 |     // PCRE_NO_UTF8_CHECK is both a compile and exec option
 51 |     pub const PCRE_NO_UTF8_CHECK: c_int = 0x00002000;
 52 | 
 53 |     pub const PCRE_ERROR_NOMATCH: c_int = -1;
 54 |     pub const PCRE_ERROR_NULL: c_int = -2;
 55 | 
 56 |     pub const PCRE_INFO_CAPTURECOUNT: fullinfo_field = 2;
 57 |     // pub const PCRE_INFO_NAMEENTRYSIZE: fullinfo_field = 7;
 58 |     // pub const PCRE_INFO_NAMECOUNT: fullinfo_field = 8;
 59 |     // pub const PCRE_INFO_NAMETABLE: fullinfo_field = 9;
 60 | 
 61 |     pub const PCRE_STUDY_JIT_COMPILE: c_int = 0x0001;
 62 |     // pub const PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE: c_int = 0x0002;
 63 |     // pub const PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE: c_int = 0x0004;
 64 |     // pub const PCRE_STUDY_EXTRA_NEEDED: c_int = 0x0008;
 65 | 
 66 |     #[allow(non_camel_case_types)]
 67 |     pub enum pcre {}
 68 | 
 69 |     #[allow(non_camel_case_types)]
 70 |     #[repr(C)]
 71 |     pub struct pcre_extra {
 72 |         flags: c_ulong,
 73 |         study_data: *mut c_void,
 74 |         match_limit: c_ulong,
 75 |         callout_data: *mut c_void,
 76 |         tables: *const c_uchar,
 77 |         match_limit_recursion_: c_ulong,
 78 |         mark: *mut *mut c_uchar,
 79 |         executable_jit: *mut c_void
 80 |     }
 81 | }
 82 | 
 83 | pub unsafe fn pcre_compile(pattern: *const c_char, options: ffi::compile_options,
 84 |                            tableptr: *const c_uchar) -> Result<*mut ffi::pcre, (String, c_int)> {
 85 |     assert!(!pattern.is_null());
 86 |     // the pattern is always UTF-8
 87 |     let options = options | ffi::PCRE_NO_UTF8_CHECK;
 88 |     let mut err: *const c_char = ptr::null();
 89 |     let mut erroffset: c_int = 0;
 90 |     let code = ffi::pcre_compile(pattern, options, &mut err, &mut erroffset, tableptr);
 91 | 
 92 |     if code.is_null() {
 93 |         // "Otherwise, if  compilation  of  a  pattern fails, pcre_compile() returns
 94 |         // NULL, and sets the variable pointed to by errptr to point to a textual
 95 |         // error message. This is a static string that is part of the library. You
 96 |         // must not try to free it."
 97 |         Err((CStr::from_ptr(err).to_string_lossy().into_owned(), erroffset))
 98 |     } else {
 99 |         assert!(!code.is_null());
100 |         assert_eq!(erroffset, 0);
101 |         Ok(code)
102 |     }
103 | }
104 | 
105 | pub unsafe fn pcre_exec(code: *const ffi::pcre, extra: *const ffi::pcre_extra,
106 |                         subject: *const c_char, length: c_int, startoffset: c_int,
107 |                         options: ffi::compile_options,
108 |                         ovector: *mut c_int, ovecsize: c_int) -> Result<c_int, ()> {
109 |     assert!(!code.is_null());
110 |     assert!(ovecsize >= 0 && ovecsize % 3 == 0);
111 |     let options = options | ffi::PCRE_NO_UTF8_CHECK;
112 |     let rc = ffi::pcre_exec(code, extra, subject, length, startoffset, options, ovector, ovecsize);
113 |     if rc == ffi::PCRE_ERROR_NOMATCH {
114 |         Ok(-1)
115 |     } else if rc < 0 {
116 |         Err(())
117 |     } else {
118 |         Ok(rc)
119 |     }
120 | }
121 | 
122 | pub unsafe fn pcre_free(ptr: *mut c_void) {
123 |     ffi::pcre_free(ptr);
124 | }
125 | 
126 | pub unsafe fn pcre_free_study(extra: *mut ffi::pcre_extra) {
127 |     ffi::pcre_free_study(extra);
128 | }
129 | 
130 | pub unsafe fn pcre_fullinfo(code: *const ffi::pcre, extra: *const ffi::pcre_extra,
131 |                             what: ffi::fullinfo_field, where_: *mut c_void) {
132 |     assert!(!code.is_null());
133 |     let rc = ffi::pcre_fullinfo(code, extra, what, where_);
134 |     if rc < 0 && rc != ffi::PCRE_ERROR_NULL {
135 |         panic!("pcre_fullinfo");
136 |     }
137 | }
138 | 
139 | pub unsafe fn pcre_study(code: *const ffi::pcre, options: ffi::study_options)
140 |                          -> Result<*mut ffi::pcre_extra, String> {
141 |     assert!(!code.is_null());
142 |     let converted_options = options;
143 |     let mut err: *const c_char = ptr::null();
144 |     let extra = ffi::pcre_study(code, converted_options, &mut err);
145 |     // "The third argument for pcre_study() is a pointer for an error message. If
146 |     // studying succeeds (even if no data is returned), the variable it points to is
147 |     // set to NULL. Otherwise it is set to point to a textual error message. This is
148 |     // a static string that is part of the library. You must not try to free it."
149 |     // http://pcre.org/pcre.txt
150 |     if !err.is_null() {
151 |         Err(CStr::from_ptr(err).to_string_lossy().into_owned())
152 |     } else {
153 |         assert!(err.is_null());
154 |         Ok(extra)
155 |     }
156 | }
157 | 
158 | pub type Pcre = ffi::pcre;
159 | pub type PcreExtra = ffi::pcre_extra;
160 | pub type CompileOptions = ffi::compile_options;
161 | pub type ExecOptions = ffi::exec_options;
162 | pub type StudyOptions = ffi::study_options;
163 | 
164 | /// Wrapper for libpcre's `pcre` object (representing a compiled regular expression).
165 | #[derive(Debug)]
166 | pub struct Regex {
167 |     code: *const Pcre,
168 |     extra: *mut PcreExtra,
169 |     capture_count: c_int,
170 | }
171 | 
172 | /// Represents a match of a subject string against a regular expression.
173 | #[allow(unused)]
174 | pub struct Match<'s> {
175 |     subject: &'s [u8],
176 |     partial_ovector: Vec<c_int>,
177 |     string_count: c_int
178 | }
179 | 
180 | /// Iterator type for iterating matches within a subject string.
181 | pub struct MatchIterator<'r, 's> {
182 |     regex: &'r Regex,
183 |     subject: &'s [u8],
184 |     offset: c_int,
185 |     options: ExecOptions,
186 |     ovector: Vec<c_int>
187 | }
188 | 
189 | #[derive(Debug)]
190 | pub struct CompilationError(String, c_int);
191 | 
192 | impl fmt::Display for CompilationError {
193 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
194 |         write!(f, "compilation failed at offset {}: {}", self.1, self.0)
195 |     }
196 | }
197 | 
198 | impl Regex {
199 | 
200 |     pub fn new(pattern: &str) -> Result<Regex, CompilationError> {
201 |         Regex::compile_with_options(pattern, 0).map(|mut rx| {
202 |             rx.study_with_options(ffi::PCRE_STUDY_JIT_COMPILE);
203 |             rx
204 |         })
205 |     }
206 | 
207 |     pub fn compile_with_options(pattern: &str, options: CompileOptions)
208 |                                 -> Result<Regex, CompilationError> {
209 |         let pattern_cstring = CString::new(pattern).unwrap();
210 |         // Use the default character tables.
211 |         let tableptr: *const c_uchar = ptr::null();
212 |         match unsafe { pcre_compile(pattern_cstring.as_ptr(), options, tableptr) } {
213 |             Err((errstr, offset)) => Err(CompilationError(errstr, offset)),
214 |             Ok(mut_code) => {
215 |                 let code = mut_code as *const Pcre;
216 |                 assert!(!code.is_null());
217 | 
218 |                 // Default extra is null.
219 |                 let extra: *mut PcreExtra = ptr::null_mut();
220 |                 let mut capture_count: c_int = 0;
221 |                 unsafe {
222 |                     pcre_fullinfo(code, extra as *const PcreExtra, ffi::PCRE_INFO_CAPTURECOUNT,
223 |                                   &mut capture_count as *mut c_int as *mut c_void);
224 |                 }
225 | 
226 |                 Ok(Regex {
227 |                     code: code,
228 |                     extra: extra,
229 |                     capture_count: capture_count,
230 |                 })
231 |             }
232 |         }
233 |     }
234 | 
235 |     #[inline]
236 |     pub fn exec<'a>(&self, subject: &'a [u8]) -> Option<Match<'a>> {
237 |         self.exec_from(subject, 0)
238 |     }
239 | 
240 |     #[inline]
241 |     pub fn exec_from<'a>(&self, subject: &'a [u8], startoffset: usize) -> Option<Match<'a>> {
242 |         self.exec_from_with_options(subject, startoffset, 0)
243 |     }
244 | 
245 |     #[inline]
246 |     pub fn exec_from_with_options<'a>(&self, subject: &'a [u8], startoffset: usize,
247 |                                       options: ExecOptions) -> Option<Match<'a>> {
248 |         let ovecsize = (self.capture_count + 1) * 3;
249 |         let mut ovector = vec![0 as c_int; ovecsize as usize];
250 | 
251 |         let rc = unsafe {
252 |             pcre_exec(self.code,
253 |                       self.extra as *const PcreExtra,
254 |                       subject.as_ptr() as *const c_char,
255 |                       subject.len() as c_int,
256 |                       startoffset as c_int,
257 |                       options,
258 |                       ovector.as_mut_ptr(),
259 |                       ovecsize as c_int)
260 |         };
261 |         match rc {
262 |             Ok(rc) if rc >= 0 => {
263 |                 Some(Match {
264 |                     subject: subject,
265 |                     partial_ovector: ovector[..(((self.capture_count + 1) * 2) as usize)].to_vec(),
266 |                     string_count: rc
267 |                 })
268 |             }
269 |             _ => { None }
270 |         }
271 |     }
272 | 
273 |     #[inline]
274 |     pub fn find(&self, subject: &[u8]) -> Option<(usize, usize)> {
275 |         self.exec(subject).map(|m| m.group_span(0))
276 |     }
277 | 
278 |     #[inline]
279 |     pub fn is_match(&self, subject: &[u8]) -> bool {
280 |         self.exec(subject).is_some()
281 |     }
282 | 
283 |     pub fn study_with_options(&mut self, options: StudyOptions) -> bool {
284 |         let extra = unsafe {
285 |             // Free any current study data.
286 |             pcre_free_study(self.extra as *mut PcreExtra);
287 |             self.extra = ptr::null_mut();
288 |             pcre_study(self.code, options)
289 |         };
290 |         match extra {
291 |             Ok(extra) => {
292 |                 self.extra = extra;
293 |                 !extra.is_null()
294 |             }
295 |             Err(_) => false
296 |         }
297 |     }
298 | }
299 | 
300 | impl Drop for Regex {
301 |     fn drop(&mut self) {
302 |         unsafe {
303 |             pcre_free_study(self.extra as *mut PcreExtra);
304 |             pcre_free(self.code as *mut Pcre as *mut c_void);
305 |         }
306 |         self.extra = ptr::null_mut();
307 |         self.code = ptr::null();
308 |     }
309 | }
310 | 
311 | impl<'a> Match<'a> {
312 |     pub fn group_start(&self, n: usize) -> usize {
313 |         self.partial_ovector[(n * 2) as usize] as usize
314 |     }
315 | 
316 |     pub fn group_end(&self, n: usize) -> usize {
317 |         self.partial_ovector[(n * 2 + 1) as usize] as usize
318 |     }
319 | 
320 |     pub fn group_span(&self, n: usize) -> (usize, usize) {
321 |         (self.group_start(n), self.group_end(n))
322 |     }
323 | }
324 | 
325 | impl<'r, 's> Clone for MatchIterator<'r, 's> {
326 |     #[inline]
327 |     fn clone(&self) -> MatchIterator<'r, 's> {
328 |         MatchIterator {
329 |             regex: self.regex,
330 |             subject: self.subject,
331 |             offset: self.offset,
332 |             options: self.options.clone(),
333 |             ovector: self.ovector.clone()
334 |         }
335 |     }
336 | }
337 | 
338 | impl<'r, 's> Iterator for MatchIterator<'r, 's> {
339 |     type Item = Match<'s>;
340 | 
341 |     /// Gets the next match.
342 |     #[inline]
343 |     fn next(&mut self) -> Option<Match<'s>> {
344 |         let rc = unsafe {
345 |             pcre_exec(self.regex.code,
346 |                       self.regex.extra,
347 |                       self.subject.as_ptr() as *const c_char,
348 |                       self.subject.len() as c_int,
349 |                       self.offset,
350 |                       self.options,
351 |                       self.ovector.as_mut_ptr(),
352 |                       self.ovector.len() as c_int)
353 |         };
354 |         match rc {
355 |             Ok(rc) if rc >= 0 => {
356 |                 // Update the iterator state.
357 |                 self.offset = self.ovector[1];
358 | 
359 |                 let cc = self.regex.capture_count;
360 |                 Some(Match {
361 |                     subject: self.subject,
362 |                     partial_ovector: self.ovector[..(((cc + 1) * 2) as usize)].to_vec(),
363 |                     string_count: rc
364 |                 })
365 |             }
366 |             _ => None
367 |         }
368 |     }
369 | }
370 | 
371 | /// Read-only access is guaranteed to be thread-safe.
372 | unsafe impl Sync for Regex {}
373 | 


--------------------------------------------------------------------------------