├── src ├── win32 │ └── config.h ├── print.h ├── scandir.h ├── decompress.h ├── lang.h ├── log.h ├── ignore.h ├── search.h ├── log.c ├── scandir.c ├── options.h ├── util.h ├── lang.c ├── main.c ├── print.c ├── decompress.c ├── ignore.c ├── util.c ├── search.c └── options.c ├── NOTICE ├── format.sh ├── tests ├── setup.sh ├── bad_path.t ├── stupid_fnmatch.t.disabled ├── exitcodes.t ├── ignore_subdir.t ├── ignore_abs_path.t ├── invert_match.t ├── passthrough.t ├── case_sensitivity.t ├── big │ ├── big_file.t │ └── create_big_file.py ├── hidden_option.t ├── ignore_backups.t └── list_file_types.t ├── .clang-format ├── .travis.yml ├── .gitignore ├── Makefile.am ├── Makefile.w32 ├── doc ├── generate_man.sh ├── ag.1.md └── ag.1 ├── configure.ac ├── the_silver_searcher.spec.in ├── ag.bashcomp.sh ├── README.md ├── LICENSE └── m4 └── ax_pthread.m4 /src/win32/config.h: -------------------------------------------------------------------------------- 1 | #define HAVE_LZMA_H 2 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | The Silver Searcher 2 | Copyright 2011-2014 Geoff Greer 3 | -------------------------------------------------------------------------------- /format.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CLANG_FORMAT=clang-format-3.5 4 | 5 | $CLANG_FORMAT -i src/*.c 6 | -------------------------------------------------------------------------------- /tests/setup.sh: -------------------------------------------------------------------------------- 1 | # All cram tests should use this. Make sure that "ag" runs the version 2 | # of ag we just built, and make the output really simple. 3 | 4 | alias ag="$TESTDIR/../ag --nocolor --workers=1 --parallel" 5 | -------------------------------------------------------------------------------- /tests/bad_path.t: -------------------------------------------------------------------------------- 1 | Setup: 2 | 3 | $ . $TESTDIR/setup.sh 4 | 5 | Complain about nonexistent path: 6 | 7 | $ ag foo doesnt_exist 8 | ERR: Error stat()ing: doesnt_exist 9 | ERR: Error opening directory doesnt_exist: No such file or directory 10 | [1] 11 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | #BasedOnStyle: LLVM 2 | AllowShortIfStatementsOnASingleLine: false 3 | BreakBeforeBraces: Attach 4 | ColumnLimit: 0 5 | IndentWidth: 4 6 | IndentCaseLabels: true 7 | Language: Cpp 8 | MaxEmptyLinesToKeep: 2 9 | SpaceBeforeParens: ControlStatements 10 | UseTab: Never 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | 3 | branches: 4 | only: 5 | - master 6 | 7 | notifications: 8 | email: 9 | - geoff@greer.fm 10 | 11 | install: 12 | - sudo apt-get install -y automake pkg-config libpcre3-dev zlib1g-dev liblzma-dev 13 | 14 | script: 15 | - ./build.sh && make test 16 | 17 | before_script: 18 | - sudo pip install cram 19 | -------------------------------------------------------------------------------- /tests/stupid_fnmatch.t.disabled: -------------------------------------------------------------------------------- 1 | Setup: 2 | 3 | $ . $TESTDIR/setup.sh 4 | $ mkdir -p ./a/bomb 5 | $ echo 'whatever' > ./a/bomb/foo.yml 6 | $ echo '*b/foo.yml' > ./.gitignore 7 | 8 | Ignore foo.yml but not blah.yml: 9 | 10 | $ ag whatever . 11 | 12 | Dont ignore anything (unrestricted search): 13 | 14 | $ ag -u whatever . 15 | a/bomb/foo.yml:1:whatever 16 | -------------------------------------------------------------------------------- /tests/exitcodes.t: -------------------------------------------------------------------------------- 1 | Setup: 2 | 3 | $ . $TESTDIR/setup.sh 4 | $ echo foo > ./exitcodes_test.txt 5 | $ echo bar >> ./exitcodes_test.txt 6 | 7 | Normal matching: 8 | 9 | $ ag foo exitcodes_test.txt 10 | 1:foo 11 | $ ag zoo exitcodes_test.txt 12 | [1] 13 | 14 | Inverted matching: 15 | 16 | $ ag -v foo exitcodes_test.txt 17 | 2:bar 18 | $ ag -v zoo exitcodes_test.txt 19 | 1:foo 20 | 2:bar 21 | $ ag -v "foo|bar" exitcodes_test.txt 22 | [1] 23 | -------------------------------------------------------------------------------- /tests/ignore_subdir.t: -------------------------------------------------------------------------------- 1 | Setup: 2 | 3 | $ . $TESTDIR/setup.sh 4 | $ mkdir -p ./a/b/c 5 | $ echo 'whatever1' > ./a/b/c/blah.yml 6 | $ echo 'whatever2' > ./a/b/foo.yml 7 | $ echo 'a/b/*.yml' > ./.gitignore 8 | 9 | Ignore foo.yml but not blah.yml: 10 | 11 | $ ag whatever . 12 | a/b/c/blah.yml:1:whatever1 13 | 14 | Dont ignore anything (unrestricted search): 15 | 16 | $ ag -u whatever . | sort 17 | a/b/c/blah.yml:1:whatever1 18 | a/b/foo.yml:1:whatever2 19 | -------------------------------------------------------------------------------- /tests/ignore_abs_path.t: -------------------------------------------------------------------------------- 1 | Setup: 2 | 3 | $ . $TESTDIR/setup.sh 4 | $ mkdir -p ./a/b/c 5 | $ echo 'whatever1' > ./a/b/c/blah.yml 6 | $ echo 'whatever2' > ./a/b/foo.yml 7 | $ echo '/a/b/foo.yml' > ./.gitignore 8 | 9 | Ignore foo.yml but not blah.yml: 10 | 11 | $ ag whatever . 12 | a/b/c/blah.yml:1:whatever1 13 | 14 | Dont ignore anything (unrestricted search): 15 | 16 | $ ag -u whatever . | sort 17 | a/b/c/blah.yml:1:whatever1 18 | a/b/foo.yml:1:whatever2 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.dSYM 2 | *.o 3 | *.plist 4 | .deps 5 | .dirstamp 6 | .DS_Store 7 | aclocal.m4 8 | ag 9 | autom4te.cache 10 | cachegrind.out.* 11 | callgrind.out.* 12 | clang_output_* 13 | compile 14 | config.guess 15 | config.log 16 | config.status 17 | config.sub 18 | configure 19 | depcomp 20 | gmon.out 21 | install-sh 22 | Makefile 23 | Makefile.in 24 | missing 25 | src/config.h* 26 | stamp-h1 27 | tests/*.err 28 | tests/big/*.err 29 | tests/big/big_file.txt 30 | the_silver_searcher.spec -------------------------------------------------------------------------------- /src/print.h: -------------------------------------------------------------------------------- 1 | #ifndef PRINT_H 2 | #define PRINT_H 3 | 4 | #include "util.h" 5 | 6 | void print_path(const char *path, const char sep); 7 | void print_binary_file_matches(const char *path); 8 | void print_file_matches(const char *path, const char *buf, const size_t buf_len, const match_t matches[], const size_t matches_len); 9 | void print_line_number(size_t line, const char sep); 10 | void print_file_separator(void); 11 | const char *normalize_path(const char *path); 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /src/scandir.h: -------------------------------------------------------------------------------- 1 | #ifndef SCANDIR_H 2 | #define SCANDIR_H 3 | 4 | #include "ignore.h" 5 | 6 | typedef struct { 7 | const ignores *ig; 8 | const char *base_path; 9 | size_t base_path_len; 10 | } scandir_baton_t; 11 | 12 | typedef int (*filter_fp)(const char *path, const struct dirent *, void *); 13 | 14 | int ag_scandir(const char *dirname, 15 | struct dirent ***namelist, 16 | filter_fp filter, 17 | void *baton); 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /tests/invert_match.t: -------------------------------------------------------------------------------- 1 | Setup: 2 | 3 | $ . $TESTDIR/setup.sh 4 | $ echo 'valid: 1' > ./blah.txt 5 | $ echo 'some_string' >> ./blah.txt 6 | $ echo 'valid: 654' >> ./blah.txt 7 | $ echo 'some_other_string' >> ./blah.txt 8 | $ echo 'valid: 0' >> ./blah.txt 9 | $ echo 'valid: 23' >> ./blah.txt 10 | $ echo 'valid: 0' >> ./blah.txt 11 | 12 | Search for lines not matching "valid: 0" in blah.txt: 13 | 14 | $ ag -v 'valid: ' 15 | blah.txt:2:some_string 16 | blah.txt:4:some_other_string 17 | -------------------------------------------------------------------------------- /src/decompress.h: -------------------------------------------------------------------------------- 1 | #ifndef DECOMPRESS_H 2 | #define DECOMPRESS_H 3 | 4 | #include "config.h" 5 | #include "log.h" 6 | #include "options.h" 7 | 8 | typedef enum { 9 | AG_NO_COMPRESSION, 10 | AG_GZIP, 11 | AG_COMPRESS, 12 | AG_ZIP, 13 | AG_XZ, 14 | } ag_compression_type; 15 | 16 | ag_compression_type is_zipped(const void *buf, const int buf_len); 17 | 18 | void *decompress(const ag_compression_type zip_type, const void *buf, const int buf_len, const char *dir_full_path, int *new_buf_len); 19 | #endif 20 | -------------------------------------------------------------------------------- /src/lang.h: -------------------------------------------------------------------------------- 1 | #ifndef LANG_H 2 | #define LANG_H 3 | 4 | #define MAX_EXTENSIONS 12 5 | #define LANG_COUNT 66 6 | 7 | typedef struct { 8 | const char *name; 9 | const char *extensions[MAX_EXTENSIONS]; 10 | } lang_spec_t; 11 | 12 | extern lang_spec_t langs[]; 13 | 14 | /** 15 | Convert a NULL-terminated array of language extensions 16 | into a regular expression of the form \.(extension1|extension2...)$ 17 | 18 | Caller is responsible for freeing the returned string. 19 | */ 20 | char *make_lang_regex(const char **extensions); 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/log.h: -------------------------------------------------------------------------------- 1 | #ifndef LOG_H 2 | #define LOG_H 3 | 4 | #include 5 | 6 | enum log_level { 7 | LOG_LEVEL_DEBUG = 10, 8 | LOG_LEVEL_MSG = 20, 9 | LOG_LEVEL_WARN = 30, 10 | LOG_LEVEL_ERR = 40, 11 | LOG_LEVEL_NONE = 100 12 | }; 13 | 14 | void set_log_level(enum log_level threshold); 15 | 16 | void log_debug(const char *fmt, ...); 17 | void log_msg(const char *fmt, ...); 18 | void log_warn(const char *fmt, ...); 19 | void log_err(const char *fmt, ...); 20 | 21 | void vplog(const unsigned int level, const char *fmt, va_list args); 22 | void plog(const unsigned int level, const char *fmt, ...); 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /tests/passthrough.t: -------------------------------------------------------------------------------- 1 | Setup: 2 | 3 | $ . $TESTDIR/setup.sh 4 | $ unalias ag 5 | $ alias ag="$TESTDIR/../ag --nocolor --workers=1" 6 | $ echo "foo bar" > passthrough_test.txt 7 | $ echo "zoo zar" >> passthrough_test.txt 8 | $ echo "foo test" >> passthrough_test.txt 9 | 10 | No impact on non-stream: 11 | 12 | $ ag --passthrough zoo passthrough_test.txt 13 | zoo zar 14 | 15 | Match stream with --passthrough: 16 | 17 | $ cat passthrough_test.txt | ag --passthrough foo 18 | foo bar 19 | zoo zar 20 | foo test 21 | 22 | Match stream without --passthrough: 23 | 24 | $ cat passthrough_test.txt | ag foo 25 | foo bar 26 | foo test 27 | -------------------------------------------------------------------------------- /tests/case_sensitivity.t: -------------------------------------------------------------------------------- 1 | Setup: 2 | 3 | $ . $TESTDIR/setup.sh 4 | $ echo Foo >> ./sample 5 | $ echo bar >> ./sample 6 | 7 | Smart case by default: 8 | 9 | $ ag foo sample 10 | 1:Foo 11 | $ ag FOO sample 12 | [1] 13 | $ ag 'f.o' sample 14 | 1:Foo 15 | $ ag Foo sample 16 | 1:Foo 17 | $ ag 'F.o' sample 18 | 1:Foo 19 | 20 | Case sensitive mode: 21 | 22 | $ ag -s foo sample 23 | [1] 24 | $ ag -s FOO sample 25 | [1] 26 | $ ag -s 'f.o' sample 27 | [1] 28 | $ ag -s Foo sample 29 | 1:Foo 30 | $ ag -s 'F.o' sample 31 | 1:Foo 32 | Case insensitive mode: 33 | 34 | $ ag foo -i sample 35 | 1:Foo 36 | $ ag foo --ignore-case sample 37 | 1:Foo 38 | $ ag 'f.o' -i sample 39 | 1:Foo 40 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS} 2 | 3 | bin_PROGRAMS = ag 4 | ag_SOURCES = src/ignore.c src/ignore.h src/log.c src/log.h src/options.c src/options.h src/print.c src/print.h src/scandir.c src/scandir.h src/search.c src/search.h src/lang.c src/lang.h src/util.c src/util.h src/decompress.c src/decompress.h src/uthash.h src/main.c 5 | ag_LDADD = ${PCRE_LIBS} ${LZMA_LIBS} ${ZLIB_LIBS} $(PTHREAD_LIBS) 6 | 7 | dist_man_MANS = doc/ag.1 8 | 9 | bashcompdir = $(pkgdatadir)/completions 10 | dist_bashcomp_DATA = ag.bashcomp.sh 11 | 12 | EXTRA_DIST = Makefile.w32 LICENSE NOTICE the_silver_searcher.spec README.md 13 | 14 | test: 15 | cram -v tests/*.t 16 | 17 | test_big: 18 | cram -v tests/big/*.t 19 | 20 | .PHONY : all test clean 21 | -------------------------------------------------------------------------------- /tests/big/big_file.t: -------------------------------------------------------------------------------- 1 | Setup and create really big file: 2 | 3 | $ . $TESTDIR/../setup.sh 4 | $ python3 $TESTDIR/create_big_file.py $TESTDIR/big_file.txt 5 | 6 | Search a big file: 7 | 8 | $ $TESTDIR/../../ag --nocolor --workers=1 --parallel hello $TESTDIR/big_file.txt 9 | 33554432:hello1073741824 10 | 67108864:hello2147483648 11 | 100663296:hello3221225472 12 | 134217728:hello4294967296 13 | 167772160:hello5368709120 14 | 201326592:hello6442450944 15 | 234881024:hello7516192768 16 | 268435456:hello 17 | 18 | Fail to regex search a big file: 19 | $ $TESTDIR/../../ag --nocolor --workers=1 --parallel 'hello.*' $TESTDIR/big_file.txt 20 | ERR: Skipping */big_file.txt: pcre_exec() can't handle files larger than 2147483647 bytes. (glob) 21 | [1] 22 | -------------------------------------------------------------------------------- /tests/big/create_big_file.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Create an 8GB file of mostly "abcdefghijklmnopqrstuvwxyz01234", 4 | # with a few instances of "hello" 5 | 6 | import sys 7 | 8 | if len(sys.argv) != 2: 9 | print("Usage: %s big_file.txt" % sys.argv[0]) 10 | sys.exit(1) 11 | 12 | big_file = sys.argv[1] 13 | 14 | 15 | def create_big_file(): 16 | with open(big_file, "w") as fd: 17 | for i in range(1, 2**28): 18 | byte = i * 32 19 | if byte % 2**30 == 0: 20 | fd.write("hello%s\n" % byte) 21 | else: 22 | fd.write("abcdefghijklmnopqrstuvwxyz01234\n") 23 | fd.write("hello\n") 24 | 25 | 26 | try: 27 | fd = open(big_file, "r") 28 | except Exception as e: 29 | create_big_file() 30 | -------------------------------------------------------------------------------- /Makefile.w32: -------------------------------------------------------------------------------- 1 | VERSION=$(shell grep -Po "(?<=\[)([0-9.]+.[0-9]+.[0-9]+)(?=\])" configure.ac) 2 | 3 | CC=gcc 4 | 5 | SRCS = \ 6 | src/decompress.c \ 7 | src/ignore.c \ 8 | src/lang.c \ 9 | src/log.c \ 10 | src/main.c \ 11 | src/options.c \ 12 | src/print.c \ 13 | src/scandir.c \ 14 | src/search.c \ 15 | src/util.c 16 | OBJS = $(subst .c,.o,$(SRCS)) 17 | 18 | CFLAGS = -O2 -Isrc/win32 -DPACKAGE_VERSION=\"$(VERSION)\" -DHAVE_PTHREAD_H 19 | LIBS = -lz -lpthread -lpcre -llzma -lshlwapi 20 | CFLAGS := -Ic:/appl/mingw/local/include $(CFLAGS) 21 | LIBS := -Lc:/appl/mingw/local/lib $(LIBS) 22 | TARGET = ag.exe 23 | 24 | all : $(TARGET) 25 | 26 | $(TARGET) : $(OBJS) 27 | $(CC) -o $@ $(OBJS) $(LIBS) 28 | 29 | .c.o : 30 | $(CC) -c $(CFLAGS) -Isrc $< -o $@ 31 | 32 | clean : 33 | rm -f src/*.o $(TARGET) 34 | -------------------------------------------------------------------------------- /tests/hidden_option.t: -------------------------------------------------------------------------------- 1 | Setup: 2 | 3 | $ . $TESTDIR/setup.sh 4 | $ mkdir hidden_bug 5 | $ cd hidden_bug 6 | $ echo "test" > a.txt 7 | $ git init --quiet 8 | $ if [ ! -d .git/info ] ; then mkdir .git/info ; fi 9 | $ echo "a.txt" > .git/info/exclude 10 | 11 | $ ag --ignore-dir .git test 12 | [1] 13 | 14 | $ ag --hidden --ignore-dir .git test 15 | [1] 16 | 17 | $ ag -U --ignore-dir .git test 18 | a.txt:1:test 19 | 20 | $ ag --hidden -U --ignore-dir .git test 21 | a.txt:1:test 22 | 23 | $ mkdir -p ./.hidden 24 | $ echo 'whatever' > ./.hidden/a.txt 25 | 26 | $ ag whatever 27 | [1] 28 | 29 | $ ag --hidden whatever 30 | [1] 31 | 32 | $ echo "" > .git/info/exclude 33 | 34 | $ ag whatever 35 | [1] 36 | 37 | $ ag --hidden whatever 38 | .hidden/a.txt:1:whatever 39 | -------------------------------------------------------------------------------- /src/ignore.h: -------------------------------------------------------------------------------- 1 | #ifndef IGNORE_H 2 | #define IGNORE_H 3 | 4 | #include 5 | #include 6 | 7 | #define SVN_DIR_PROP_BASE "dir-prop-base" 8 | #define SVN_DIR ".svn" 9 | #define SVN_PROP_IGNORE "svn:ignore" 10 | 11 | struct ignores { 12 | char **names; /* Non-regex ignore lines. Sorted so we can binary search them. */ 13 | size_t names_len; 14 | char **regexes; /* For patterns that need fnmatch */ 15 | size_t regexes_len; 16 | struct ignores *parent; 17 | }; 18 | typedef struct ignores ignores; 19 | 20 | ignores *root_ignores; 21 | 22 | extern const char *evil_hardcoded_ignore_files[]; 23 | extern const char *ignore_pattern_files[]; 24 | 25 | ignores *init_ignore(ignores *parent); 26 | void cleanup_ignore(ignores *ig); 27 | 28 | void add_ignore_pattern(ignores *ig, const char *pattern); 29 | 30 | void load_ignore_patterns(ignores *ig, const char *path); 31 | void load_svn_ignore_patterns(ignores *ig, const char *path); 32 | 33 | int filename_filter(const char *path, const struct dirent *dir, void *baton); 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /doc/generate_man.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # ronn is used to turn the markdown into a manpage. 4 | # Get ronn at https://github.com/rtomayko/ronn 5 | 6 | awk ' 7 | BEGIN{ 8 | in_options_block = 0; 9 | first_item_in_list_of_options = 1; 10 | } 11 | 12 | { 13 | if ($0 == "## OPTIONS") { 14 | in_options_block = 1; 15 | } 16 | 17 | if (in_options_block == 1) { # in options block 18 | first_4_chars = substr($0, 0, 4); 19 | 20 | if (first_4_chars == " * ") { # this line contains the option name 21 | 22 | # print only 1 new line for cases like the following 23 | # * `--[no]group` 24 | # * `-g PATTERN`: 25 | if (first_item_in_list_of_options == 1) { 26 | print ""; 27 | first_item_in_list_of_options = 0; 28 | } 29 | 30 | # end the line with 2 spaces, so a literal
is inserted! 31 | # more info at http://daringfireball.net/projects/markdown/syntax.php#p 32 | printf("%s \n", substr($0, 5)); 33 | 34 | } else if (first_4_chars == " ") { # we are in a description line 35 | printf("     %s\n", substr($0, 5)); 36 | first_item_in_list_of_options = 1; 37 | } else if (first_4_chars == "## F") { # reached the end of #OPTIONS part 38 | in_options_block = 0; 39 | print $0; 40 | } else { 41 | print $0; 42 | } 43 | } else { # outside options block 44 | print $0; 45 | } 46 | }' ag.1.md.tmp 47 | 48 | ronn -r ag.1.md.tmp 49 | 50 | rm -f ag.1.md.tmp 51 | -------------------------------------------------------------------------------- /tests/ignore_backups.t: -------------------------------------------------------------------------------- 1 | Setup: 2 | 3 | $ . $TESTDIR/setup.sh 4 | $ mkdir -p ./a/b/c 5 | $ echo 'whatever1' > ./a/b/c/foo.yml 6 | $ echo 'whatever2' > ./a/b/c/foo.yml~ 7 | $ echo 'whatever3' > ./a/b/c/.foo.yml.swp 8 | $ echo 'whatever4' > ./a/b/c/.foo.yml.swo 9 | $ echo 'whatever5' > ./a/b/foo.yml 10 | $ echo 'whatever6' > ./a/b/foo.yml~ 11 | $ echo 'whatever7' > ./a/b/.foo.yml.swp 12 | $ echo 'whatever8' > ./a/b/.foo.yml.swo 13 | $ echo 'whatever9' > ./a/foo.yml 14 | $ echo 'whatever10' > ./a/foo.yml~ 15 | $ echo 'whatever11' > ./a/.foo.yml.swp 16 | $ echo 'whatever12' > ./a/.foo.yml.swo 17 | $ echo 'whatever13' > ./foo.yml 18 | $ echo 'whatever14' > ./foo.yml~ 19 | $ echo 'whatever15' > ./.foo.yml.swp 20 | $ echo 'whatever16' > ./.foo.yml.swo 21 | $ echo '*~\n*.sw[po]' > ./.gitignore 22 | 23 | Ignore all files except foo.yml 24 | 25 | $ ag whatever . | sort 26 | a/b/c/foo.yml:1:whatever1 27 | a/b/foo.yml:1:whatever5 28 | a/foo.yml:1:whatever9 29 | foo.yml:1:whatever13 30 | 31 | Dont ignore anything (unrestricted search): 32 | 33 | $ ag -u whatever . | sort 34 | .foo.yml.swo:1:whatever16 35 | .foo.yml.swp:1:whatever15 36 | a/.foo.yml.swo:1:whatever12 37 | a/.foo.yml.swp:1:whatever11 38 | a/b/.foo.yml.swo:1:whatever8 39 | a/b/.foo.yml.swp:1:whatever7 40 | a/b/c/.foo.yml.swo:1:whatever4 41 | a/b/c/.foo.yml.swp:1:whatever3 42 | a/b/c/foo.yml:1:whatever1 43 | a/b/c/foo.yml~:1:whatever2 44 | a/b/foo.yml:1:whatever5 45 | a/b/foo.yml~:1:whatever6 46 | a/foo.yml:1:whatever9 47 | a/foo.yml~:1:whatever10 48 | foo.yml:1:whatever13 49 | foo.yml~:1:whatever14 50 | -------------------------------------------------------------------------------- /src/search.h: -------------------------------------------------------------------------------- 1 | #ifndef SEARCH_H 2 | #define SEARCH_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #ifdef _WIN32 13 | #include 14 | #else 15 | #include 16 | #endif 17 | #include 18 | #include 19 | 20 | #include "config.h" 21 | 22 | #ifdef HAVE_PTHREAD_H 23 | #include 24 | #endif 25 | 26 | #include "ignore.h" 27 | #include "log.h" 28 | #include "options.h" 29 | #include "print.h" 30 | #include "util.h" 31 | #include "uthash.h" 32 | 33 | size_t alpha_skip_lookup[256]; 34 | size_t *find_skip_lookup; 35 | 36 | struct work_queue_t { 37 | char *path; 38 | struct work_queue_t *next; 39 | }; 40 | typedef struct work_queue_t work_queue_t; 41 | 42 | work_queue_t *work_queue; 43 | work_queue_t *work_queue_tail; 44 | int done_adding_files; 45 | pthread_cond_t files_ready; 46 | pthread_mutex_t print_mtx; 47 | pthread_mutex_t stats_mtx; 48 | pthread_mutex_t work_queue_mtx; 49 | 50 | 51 | /* For symlink loop detection */ 52 | #define SYMLOOP_ERROR (-1) 53 | #define SYMLOOP_OK (0) 54 | #define SYMLOOP_LOOP (1) 55 | 56 | typedef struct { 57 | dev_t dev; 58 | ino_t ino; 59 | } dirkey_t; 60 | 61 | typedef struct { 62 | dirkey_t key; 63 | UT_hash_handle hh; 64 | } symdir_t; 65 | 66 | symdir_t *symhash; 67 | 68 | void search_buf(const char *buf, const size_t buf_len, 69 | const char *dir_full_path); 70 | void search_stream(FILE *stream, const char *path); 71 | void search_file(const char *file_full_path); 72 | 73 | void *search_file_worker(void *i); 74 | 75 | void search_dir(ignores *ig, const char *base_path, const char *path, const int depth); 76 | 77 | #endif 78 | -------------------------------------------------------------------------------- /src/log.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "log.h" 5 | #include "util.h" 6 | 7 | static enum log_level log_threshold = LOG_LEVEL_ERR; 8 | 9 | void set_log_level(enum log_level threshold) { 10 | log_threshold = threshold; 11 | } 12 | 13 | void log_debug(const char *fmt, ...) { 14 | va_list args; 15 | va_start(args, fmt); 16 | vplog(LOG_LEVEL_DEBUG, fmt, args); 17 | va_end(args); 18 | } 19 | 20 | void log_msg(const char *fmt, ...) { 21 | va_list args; 22 | va_start(args, fmt); 23 | vplog(LOG_LEVEL_MSG, fmt, args); 24 | va_end(args); 25 | } 26 | 27 | void log_warn(const char *fmt, ...) { 28 | va_list args; 29 | va_start(args, fmt); 30 | vplog(LOG_LEVEL_WARN, fmt, args); 31 | va_end(args); 32 | } 33 | 34 | void log_err(const char *fmt, ...) { 35 | va_list args; 36 | va_start(args, fmt); 37 | vplog(LOG_LEVEL_ERR, fmt, args); 38 | va_end(args); 39 | } 40 | 41 | void vplog(const unsigned int level, const char *fmt, va_list args) { 42 | if (level < log_threshold) { 43 | return; 44 | } 45 | 46 | FILE *stream = out_fd; 47 | 48 | switch (level) { 49 | case LOG_LEVEL_DEBUG: 50 | fprintf(stream, "DEBUG: "); 51 | break; 52 | case LOG_LEVEL_MSG: 53 | fprintf(stream, "MSG: "); 54 | break; 55 | case LOG_LEVEL_WARN: 56 | fprintf(stream, "WARN: "); 57 | break; 58 | case LOG_LEVEL_ERR: 59 | stream = stderr; 60 | fprintf(stream, "ERR: "); 61 | break; 62 | } 63 | 64 | vfprintf(stream, fmt, args); 65 | fprintf(stream, "\n"); 66 | } 67 | 68 | void plog(const unsigned int level, const char *fmt, ...) { 69 | va_list args; 70 | va_start(args, fmt); 71 | vplog(level, fmt, args); 72 | va_end(args); 73 | } 74 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | AC_INIT( 2 | [the_silver_searcher], 3 | [0.24.1], 4 | [https://github.com/ggreer/the_silver_searcher/issues], 5 | [the_silver_searcher], 6 | [https://github.com/ggreer/the_silver_searcher]) 7 | 8 | AM_INIT_AUTOMAKE([no-define foreign subdir-objects]) 9 | 10 | AC_PROG_CC 11 | AM_PROG_CC_C_O 12 | AC_PREREQ([2.59]) 13 | 14 | m4_ifdef( 15 | [AM_SILENT_RULES], 16 | [AM_SILENT_RULES([yes])]) 17 | 18 | PKG_CHECK_MODULES([PCRE], [libpcre]) 19 | 20 | m4_include([m4/ax_pthread.m4]) 21 | AX_PTHREAD([ 22 | AC_CHECK_HEADERS([pthread.h]) 23 | ]) 24 | 25 | # Run CFLAGS="-pg" ./configure if you want debug symbols 26 | CFLAGS="$CFLAGS $PTHREAD_CFLAGS $PCRE_CFLAGS -Wall -Wextra -Wformat=2 -Wno-format-nonliteral -Wshadow -Wpointer-arith -Wcast-qual -Wmissing-prototypes -Wno-missing-braces -std=gnu89 -D_GNU_SOURCE -O2" 27 | LDFLAGS="$LDFLAGS" 28 | 29 | LIBS="$PTHREAD_LIBS $LIBS" 30 | 31 | AC_ARG_ENABLE([zlib], 32 | AS_HELP_STRING([--disable-zlib], [Disable zlib compressed search support])) 33 | 34 | AS_IF([test "x$enable_zlib" != "xno"], [ 35 | AC_CHECK_HEADERS([zlib.h]) 36 | AC_SEARCH_LIBS([inflate], [zlib, z]) 37 | ]) 38 | 39 | AC_ARG_ENABLE([lzma], 40 | AS_HELP_STRING([--disable-lzma], [Disable lzma compressed search support])) 41 | 42 | AS_IF([test "x$enable_lzma" != "xno"], [ 43 | AC_CHECK_HEADERS([lzma.h]) 44 | PKG_CHECK_MODULES([LZMA], [liblzma]) 45 | ]) 46 | 47 | AC_CHECK_DECL([PCRE_CONFIG_JIT], [AC_DEFINE([USE_PCRE_JIT], [], [Use PCRE JIT])], [], [#include ]) 48 | 49 | AC_CHECK_MEMBER([struct dirent.d_type], [AC_DEFINE([HAVE_DIRENT_DTYPE], [], [Have dirent struct member d_type])], [], [[#include ]]) 50 | 51 | AC_CHECK_FUNCS(fgetln getline realpath strlcpy strndup vasprintf madvise posix_fadvise) 52 | 53 | AC_CONFIG_FILES([Makefile the_silver_searcher.spec]) 54 | AC_CONFIG_HEADERS([src/config.h]) 55 | 56 | AC_OUTPUT 57 | -------------------------------------------------------------------------------- /src/scandir.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "scandir.h" 5 | #include "util.h" 6 | 7 | int ag_scandir(const char *dirname, 8 | struct dirent ***namelist, 9 | filter_fp filter, 10 | void *baton) { 11 | DIR *dirp = NULL; 12 | struct dirent **names = NULL; 13 | struct dirent *entry, *d; 14 | int names_len = 32; 15 | int results_len = 0; 16 | 17 | dirp = opendir(dirname); 18 | if (dirp == NULL) { 19 | goto fail; 20 | } 21 | 22 | names = malloc(sizeof(struct dirent *) * names_len); 23 | if (names == NULL) { 24 | goto fail; 25 | } 26 | 27 | while ((entry = readdir(dirp)) != NULL) { 28 | if ((*filter)(dirname, entry, baton) == FALSE) { 29 | continue; 30 | } 31 | if (results_len >= names_len) { 32 | struct dirent **tmp_names = names; 33 | names_len *= 2; 34 | names = realloc(names, sizeof(struct dirent *) * names_len); 35 | if (names == NULL) { 36 | free(tmp_names); 37 | goto fail; 38 | } 39 | } 40 | 41 | #if defined(__MINGW32__) || defined(__CYGWIN__) 42 | d = malloc(sizeof(struct dirent)); 43 | #else 44 | d = malloc(entry->d_reclen); 45 | #endif 46 | 47 | if (d == NULL) { 48 | goto fail; 49 | } 50 | #if defined(__MINGW32__) || defined(__CYGWIN__) 51 | memcpy(d, entry, sizeof(struct dirent)); 52 | #else 53 | memcpy(d, entry, entry->d_reclen); 54 | #endif 55 | 56 | names[results_len] = d; 57 | results_len++; 58 | } 59 | 60 | closedir(dirp); 61 | *namelist = names; 62 | return results_len; 63 | 64 | fail: 65 | if (dirp) { 66 | closedir(dirp); 67 | } 68 | 69 | if (names != NULL) { 70 | int i; 71 | for (i = 0; i < results_len; i++) { 72 | free(names[i]); 73 | } 74 | free(names); 75 | } 76 | return -1; 77 | } 78 | -------------------------------------------------------------------------------- /the_silver_searcher.spec.in: -------------------------------------------------------------------------------- 1 | %define _bashcompdir %_sysconfdir/bash_completion.d 2 | 3 | 4 | Name: the_silver_searcher 5 | Version: @VERSION@ 6 | Release: 1%{?dist} 7 | Summary: A code-searching tool similar to ack, but faster 8 | 9 | Group: Applications/Utilities 10 | License: Apache v2.0 11 | URL: https://github.com/ggreer/%{name} 12 | Source0: https://github.com/downloads/ggreer/%{name}/%{name}-%{version}.tar.gz 13 | BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX) 14 | 15 | BuildRequires: pcre-devel, xz-devel, zlib-devel 16 | Requires: pcre, xz, zlib 17 | 18 | %description 19 | The Silver Searcher 20 | An attempt to make something better than ack (which itself is better than grep). 21 | 22 | Why use Ag? 23 | * It searches code about 3–5× faster than ack. 24 | * It ignores file patterns from your .gitignore and .hgignore. 25 | * If there are files in your source repo you don't want to search, just add their patterns to a .agignore file. *cough* extern *cough* 26 | * The command name is 33% shorter than ack! 27 | 28 | How is it so fast? 29 | * Searching for literals (no regex) uses Boyer-Moore-Horspool strstr. 30 | * Files are mmap()ed instead of read into a buffer. 31 | * If you're building with PCRE 8.21 or greater, regex searches use the JIT compiler. 32 | * Ag calls pcre_study() before executing the regex on a jillion files. 33 | * Instead of calling fnmatch() on every pattern in your ignore files, non-regex patterns are loaded into an array and binary searched. 34 | * Ag uses Pthreads to take advantage of multiple CPU cores and search files in parallel. 35 | 36 | %prep 37 | %setup -q 38 | 39 | 40 | %build 41 | aclocal 42 | autoconf 43 | autoheader 44 | automake --add-missing 45 | %configure 46 | make %{?_smp_mflags} 47 | 48 | 49 | %install 50 | rm -rf ${RPM_BUILD_ROOT} 51 | make install DESTDIR=${RPM_BUILD_ROOT} 52 | mkdir -p ${RPM_BUILD_ROOT}%{_bashcompdir} 53 | install -m 644 ag.bashcomp.sh ${RPM_BUILD_ROOT}%{_bashcompdir} 54 | 55 | %clean 56 | rm -rf ${RPM_BUILD_ROOT} 57 | 58 | 59 | %files 60 | %defattr(-,root,root,-) 61 | %{_bindir}/* 62 | %{_mandir}/* 63 | %config %{_bashcompdir}/ag.bashcomp.sh 64 | %config %{_datadir}/%{name}/completions/ag.bashcomp.sh 65 | 66 | 67 | %changelog 68 | * Thu Dec 5 2013 Emily Strickland - 0.18.1-1 69 | - More accurate build and install requirements 70 | 71 | * Fri Aug 16 2013 Andrew Seidl - 0.15.0-1 72 | - Install bash completion file 73 | 74 | * Wed Dec 05 2012 Daniel Nelson - 0.13.1-1 75 | - Initial Build 76 | -------------------------------------------------------------------------------- /src/options.h: -------------------------------------------------------------------------------- 1 | #ifndef OPTIONS_H 2 | #define OPTIONS_H 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #define DEFAULT_AFTER_LEN 2 10 | #define DEFAULT_BEFORE_LEN 2 11 | #define DEFAULT_CONTEXT_LEN 2 12 | 13 | enum case_behavior { 14 | CASE_SENSITIVE, 15 | CASE_INSENSITIVE, 16 | CASE_SMART, 17 | CASE_SENSITIVE_RETRY_INSENSITIVE /* for future use */ 18 | }; 19 | 20 | enum path_print_behavior { 21 | PATH_PRINT_DEFAULT, /* PRINT_TOP if > 1 file being searched, else PRINT_NOTHING */ 22 | PATH_PRINT_DEFAULT_EACH_LINE, /* PRINT_EACH_LINE if > 1 file being searched, else PRINT_NOTHING */ 23 | PATH_PRINT_TOP, 24 | PATH_PRINT_EACH_LINE, 25 | PATH_PRINT_NOTHING 26 | }; 27 | 28 | typedef struct { 29 | int ackmate; 30 | pcre *ackmate_dir_filter; 31 | pcre_extra *ackmate_dir_filter_extra; 32 | size_t after; 33 | size_t before; 34 | enum case_behavior casing; 35 | const char *file_search_string; 36 | int match_files; 37 | pcre *file_search_regex; 38 | pcre_extra *file_search_regex_extra; 39 | int color; 40 | char *color_line_number; 41 | char *color_match; 42 | char *color_path; 43 | int column; 44 | int context; 45 | int follow_symlinks; 46 | int invert_match; 47 | int literal; 48 | int literal_starts_wordchar; 49 | int literal_ends_wordchar; 50 | int max_matches_per_file; 51 | int max_search_depth; 52 | int null_follows_filename; 53 | char *path_to_agignore; 54 | int print_break; 55 | int print_filename_only; 56 | int print_path; 57 | int print_line_numbers; 58 | int print_long_lines; /* TODO: support this in print.c */ 59 | int passthrough; 60 | pcre *re; 61 | pcre_extra *re_extra; 62 | int recurse_dirs; 63 | int search_all_files; 64 | int skip_vcs_ignores; 65 | int search_binary_files; 66 | int search_zip_files; 67 | int search_hidden_files; 68 | int search_stream; /* true if tail -F blah | ag */ 69 | int stats; 70 | size_t stream_line_num; /* This should totally not be in here */ 71 | int match_found; /* This should totally not be in here */ 72 | ino_t stdout_inode; 73 | char *query; 74 | int query_len; 75 | char *pager; 76 | int paths_len; 77 | int parallel; 78 | int word_regexp; 79 | int workers; 80 | } cli_options; 81 | 82 | /* global options. parse_options gives it sane values, everything else reads from it */ 83 | cli_options opts; 84 | 85 | typedef struct option option_t; 86 | 87 | void usage(void); 88 | void print_version(void); 89 | 90 | void init_options(void); 91 | void parse_options(int argc, char **argv, char **base_paths[], char **paths[]); 92 | void cleanup_options(void); 93 | 94 | #endif 95 | -------------------------------------------------------------------------------- /ag.bashcomp.sh: -------------------------------------------------------------------------------- 1 | have ag && 2 | _ag() { 3 | local lngopt shtopt split=false 4 | local cur prev 5 | 6 | COMPREPLY=() 7 | cur=$(_get_cword "=") 8 | prev="${COMP_WORDS[COMP_CWORD-1]}" 9 | 10 | _expand || return 0 11 | 12 | lngopt=' 13 | --ackmate 14 | --all-text 15 | --all-types 16 | --after 17 | --before 18 | --break 19 | --nobreak 20 | --case-sensitive 21 | --color-line-number 22 | --color-match 23 | --color-path 24 | --color 25 | --nocolor 26 | --column 27 | --context 28 | --debug 29 | --depth 30 | --file-search-regex 31 | --files-with-matches 32 | --files-without-matches 33 | --follow 34 | --group 35 | --nogroup 36 | --heading 37 | --noheading 38 | --help 39 | --hidden 40 | --ignore 41 | --ignore-case 42 | --ignore-dir 43 | --invert-match 44 | --line-numbers 45 | --list-file-types 46 | --literal 47 | --max-count 48 | --no-numbers 49 | --pager 50 | --nopager 51 | --parallel 52 | --path-to-agignore 53 | --print-long-lines 54 | --recurse 55 | --no-recurse 56 | --search-binary 57 | --search-files 58 | --search-zip 59 | --silent 60 | --skip-vcs-ignores 61 | --smart-case 62 | --stats 63 | --unrestricted 64 | --version 65 | --word-regexp 66 | --workers 67 | ' 68 | shtopt=' 69 | -a -A -B -C -D 70 | -f -g -G -h -i 71 | -l -L -m -n -p 72 | -Q -r -R -s -S 73 | -t -u -U -v -V 74 | -w -z 75 | ' 76 | 77 | # these options require an argument 78 | if [[ "${prev}" == -@(A|B|C|G|g|m) ]] ; then 79 | return 0 80 | fi 81 | 82 | _split_longopt && split=true 83 | 84 | case "${prev}" in 85 | --ignore-dir) # directory completion 86 | _filedir -d 87 | return 0;; 88 | --path-to-agignore) # file completion 89 | _filedir 90 | return 0;; 91 | --pager) # command completion 92 | COMPREPLY=( $(compgen -c -- "${cur}") ) 93 | return 0;; 94 | --ackmate-dir-filter|--after|--before|--color-*|--context|--depth\ 95 | |--file-search-regex|--ignore|--max-count|--workers) 96 | return 0;; 97 | esac 98 | 99 | $split && return 0 100 | 101 | case "${cur}" in 102 | -*) 103 | if [[ "${COMP_CWORD}" -eq 1 ]] ; then 104 | COMPREPLY=( $(compgen -W \ 105 | "${lngopt} ${shtopt}" -- "${cur}") ) 106 | else 107 | COMPREPLY=( $(compgen -W \ 108 | "${lngopt} ${shtopt}" -- "${cur}") ) 109 | fi 110 | return 0;; 111 | *) 112 | _filedir 113 | return 0;; 114 | esac 115 | } && 116 | complete -F _ag ${nospace} ag 117 | -------------------------------------------------------------------------------- /tests/list_file_types.t: -------------------------------------------------------------------------------- 1 | Setup: 2 | 3 | $ . $TESTDIR/setup.sh 4 | 5 | Language types are output: 6 | 7 | $ ag --list-file-types 8 | The following file types are supported: 9 | --actionscript 10 | .as .mxml 11 | 12 | --ada 13 | .ada .adb .ads 14 | 15 | --asm 16 | .asm .s 17 | 18 | --batch 19 | .bat .cmd 20 | 21 | --cc 22 | .c .h .xs 23 | 24 | --cfmx 25 | .cfc .cfm .cfml 26 | 27 | --clojure 28 | .clj 29 | 30 | --coffee 31 | .coffee 32 | 33 | --cpp 34 | .cpp .cc .C .cxx .m .hpp .hh .h .H .hxx 35 | 36 | --csharp 37 | .cs 38 | 39 | --css 40 | .css 41 | 42 | --delphi 43 | .pas .int .dfm .nfm .dof .dpk .dproj .groupproj .bdsgroup .bdsproj 44 | 45 | --elisp 46 | .el 47 | 48 | --erlang 49 | .erl .hrl 50 | 51 | --fortran 52 | .f .f77 .f90 .f95 .f03 .for .ftn .fpp 53 | 54 | --gettext 55 | .po .pot .mo 56 | 57 | --go 58 | .go 59 | 60 | --groovy 61 | .groovy .gtmpl .gpp .grunit 62 | 63 | --haml 64 | .haml 65 | 66 | --haskell 67 | .hs .lhs 68 | 69 | --hh 70 | .h 71 | 72 | --html 73 | .htm .html .shtml .xhtml 74 | 75 | --ini 76 | .ini 77 | 78 | --jade 79 | .jade 80 | 81 | --java 82 | .java .properties 83 | 84 | --js 85 | .js 86 | 87 | --json 88 | .json 89 | 90 | --jsp 91 | .jsp .jspx .jhtm .jhtml 92 | 93 | --less 94 | .less 95 | 96 | --lisp 97 | .lisp .lsp 98 | 99 | --lua 100 | .lua 101 | 102 | --m4 103 | .m4 104 | 105 | --make 106 | .Makefiles .mk .mak 107 | 108 | --mason 109 | .mas .mhtml .mpl .mtxt 110 | 111 | --matlab 112 | .m 113 | 114 | --objc 115 | .m .h 116 | 117 | --objcpp 118 | .mm .h 119 | 120 | --ocaml 121 | .ml .mli 122 | 123 | --octave 124 | .m 125 | 126 | --parrot 127 | .pir .pasm .pmc .ops .pod .pg .tg 128 | 129 | --perl 130 | .pl .pm .pm6 .pod .t 131 | 132 | --php 133 | .php .phpt .php3 .php4 .php5 .phtml 134 | 135 | --plone 136 | .pt .cpt .metadata .cpy .py 137 | 138 | --python 139 | .py 140 | 141 | --rake 142 | .Rakefiles 143 | 144 | --rs 145 | .rs 146 | 147 | --ruby 148 | .rb .rhtml .rjs .rxml .erb .rake .spec 149 | 150 | --rust 151 | .rs 152 | 153 | --salt 154 | .sls 155 | 156 | --sass 157 | .sass .scss 158 | 159 | --scala 160 | .scala 161 | 162 | --scheme 163 | .scm .ss 164 | 165 | --shell 166 | .sh .bash .csh .tcsh .ksh .zsh 167 | 168 | --smalltalk 169 | .st 170 | 171 | --sql 172 | .sql .ctl 173 | 174 | --stylus 175 | .styl 176 | 177 | --swift 178 | .swift 179 | 180 | --tcl 181 | .tcl .itcl .itk 182 | 183 | --tex 184 | .tex .cls .sty 185 | 186 | --tt 187 | .tt .tt2 .ttml 188 | 189 | --vb 190 | .bas .cls .frm .ctl .vb .resx 191 | 192 | --verilog 193 | .v .vh .sv 194 | 195 | --vhdl 196 | .vhd .vhdl 197 | 198 | --vim 199 | .vim 200 | 201 | --xml 202 | .xml .dtd .xsl .xslt .ent 203 | 204 | --yaml 205 | .yaml .yml 206 | 207 | -------------------------------------------------------------------------------- /src/util.h: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_H 2 | #define UTIL_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "config.h" 12 | #include "log.h" 13 | #include "options.h" 14 | 15 | FILE *out_fd; 16 | 17 | #ifndef TRUE 18 | #define TRUE 1 19 | #endif 20 | 21 | #ifndef FALSE 22 | #define FALSE 0 23 | #endif 24 | 25 | void *ag_malloc(size_t size); 26 | void *ag_realloc(void *ptr, size_t size); 27 | void *ag_calloc(size_t nelem, size_t elsize); 28 | char *ag_strdup(const char *s); 29 | char *ag_strndup(const char *s, size_t size); 30 | 31 | typedef struct { 32 | size_t start; /* Byte at which the match starts */ 33 | size_t end; /* and where it ends */ 34 | } match_t; 35 | 36 | typedef struct { 37 | long total_bytes; 38 | long total_files; 39 | long total_matches; 40 | struct timeval time_start; 41 | struct timeval time_end; 42 | } ag_stats; 43 | 44 | typedef enum { 45 | AG_NO_COMPRESSION, 46 | AG_GZIP, 47 | AG_COMPRESS, 48 | AG_ZIP 49 | } ag_compression_type; 50 | 51 | ag_stats stats; 52 | 53 | typedef const char *(*strncmp_fp)(const char *, const char *, const size_t, const size_t, const size_t[], const size_t *); 54 | 55 | void generate_alpha_skip(const char *find, size_t f_len, size_t skip_lookup[], const int case_sensitive); 56 | int is_prefix(const char *s, const size_t s_len, const size_t pos, const int case_sensitive); 57 | size_t suffix_len(const char *s, const size_t s_len, const size_t pos, const int case_sensitive); 58 | void generate_find_skip(const char *find, const size_t f_len, size_t **skip_lookup, const int case_sensitive); 59 | 60 | /* max is already defined on spec-violating compilers such as MinGW */ 61 | size_t ag_max(size_t a, size_t b); 62 | 63 | const char *boyer_moore_strnstr(const char *s, const char *find, const size_t s_len, const size_t f_len, 64 | const size_t alpha_skip_lookup[], const size_t *find_skip_lookup); 65 | const char *boyer_moore_strncasestr(const char *s, const char *find, const size_t s_len, const size_t f_len, 66 | const size_t alpha_skip_lookup[], const size_t *find_skip_lookup); 67 | 68 | strncmp_fp get_strstr(enum case_behavior opts); 69 | 70 | size_t invert_matches(const char *buf, const size_t buf_len, match_t matches[], size_t matches_len); 71 | void compile_study(pcre **re, pcre_extra **re_extra, char *q, const int pcre_opts, const int study_opts); 72 | 73 | void *decompress(const ag_compression_type zip_type, const void *buf, const int buf_len, const char *dir_full_path, int *new_buf_len); 74 | ag_compression_type is_zipped(const void *buf, const int buf_len); 75 | 76 | int is_binary(const void *buf, const size_t buf_len); 77 | int is_regex(const char *query); 78 | int is_fnmatch(const char *filename); 79 | int binary_search(const char *needle, char **haystack, int start, int end); 80 | 81 | void init_wordchar_table(void); 82 | int is_wordchar(char ch); 83 | 84 | int is_lowercase(const char *s); 85 | 86 | int is_directory(const char *path, const struct dirent *d); 87 | int is_symlink(const char *path, const struct dirent *d); 88 | int is_named_pipe(const char *path, const struct dirent *d); 89 | 90 | void die(const char *fmt, ...); 91 | 92 | void ag_asprintf(char **ret, const char *fmt, ...); 93 | 94 | #ifndef HAVE_FGETLN 95 | char *fgetln(FILE *fp, size_t *lenp); 96 | #endif 97 | #ifndef HAVE_GETLINE 98 | ssize_t getline(char **lineptr, size_t *n, FILE *stream); 99 | #endif 100 | #ifndef HAVE_REALPATH 101 | char *realpath(const char *path, char *resolved_path); 102 | #endif 103 | #ifndef HAVE_STRLCPY 104 | size_t strlcpy(char *dest, const char *src, size_t size); 105 | #endif 106 | #ifndef HAVE_VASPRINTF 107 | int vasprintf(char **ret, const char *fmt, va_list args); 108 | #endif 109 | 110 | #endif 111 | -------------------------------------------------------------------------------- /src/lang.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "lang.h" 5 | #include "util.h" 6 | 7 | lang_spec_t langs[] = { 8 | { "actionscript", { "as", "mxml" } }, 9 | { "ada", { "ada", "adb", "ads" } }, 10 | { "asm", { "asm", "s" } }, 11 | { "batch", { "bat", "cmd" } }, 12 | { "cc", { "c", "h", "xs" } }, 13 | { "cfmx", { "cfc", "cfm", "cfml" } }, 14 | { "clojure", { "clj" } }, 15 | { "coffee", { "coffee" } }, 16 | { "cpp", { "cpp", "cc", "C", "cxx", "m", "hpp", "hh", "h", "H", "hxx" } }, 17 | { "csharp", { "cs" } }, 18 | { "css", { "css" } }, 19 | { "delphi", { "pas", "int", "dfm", "nfm", "dof", "dpk", "dproj", "groupproj", "bdsgroup", "bdsproj" } }, 20 | { "elisp", { "el" } }, 21 | { "erlang", { "erl", "hrl" } }, 22 | { "fortran", { "f", "f77", "f90", "f95", "f03", "for", "ftn", "fpp" } }, 23 | { "gettext", { "po", "pot", "mo" } }, 24 | { "go", { "go" } }, 25 | { "groovy", { "groovy", "gtmpl", "gpp", "grunit" } }, 26 | { "haml", { "haml" } }, 27 | { "haskell", { "hs", "lhs" } }, 28 | { "hh", { "h" } }, 29 | { "html", { "htm", "html", "shtml", "xhtml" } }, 30 | { "ini", { "ini" } }, 31 | { "jade", { "jade" } }, 32 | { "java", { "java", "properties" } }, 33 | { "js", { "js" } }, 34 | { "json", { "json" } }, 35 | { "jsp", { "jsp", "jspx", "jhtm", "jhtml" } }, 36 | { "less", { "less" } }, 37 | { "lisp", { "lisp", "lsp" } }, 38 | { "lua", { "lua" } }, 39 | { "m4", { "m4" } }, 40 | { "make", { "Makefiles", "mk", "mak" } }, 41 | { "mason", { "mas", "mhtml", "mpl", "mtxt" } }, 42 | { "matlab", { "m" } }, 43 | { "objc", { "m", "h" } }, 44 | { "objcpp", { "mm", "h" } }, 45 | { "ocaml", { "ml", "mli" } }, 46 | { "octave", { "m" } }, 47 | { "parrot", { "pir", "pasm", "pmc", "ops", "pod", "pg", "tg" } }, 48 | { "perl", { "pl", "pm", "pm6", "pod", "t" } }, 49 | { "php", { "php", "phpt", "php3", "php4", "php5", "phtml" } }, 50 | { "plone", { "pt", "cpt", "metadata", "cpy", "py" } }, 51 | { "python", { "py" } }, 52 | { "rake", { "Rakefiles" } }, 53 | { "rs", { "rs" } }, 54 | { "ruby", { "rb", "rhtml", "rjs", "rxml", "erb", "rake", "spec" } }, 55 | { "rust", { "rs" } }, 56 | { "salt", { "sls" } }, 57 | { "sass", { "sass", "scss" } }, 58 | { "scala", { "scala" } }, 59 | { "scheme", { "scm", "ss" } }, 60 | { "shell", { "sh", "bash", "csh", "tcsh", "ksh", "zsh" } }, 61 | { "smalltalk", { "st" } }, 62 | { "sql", { "sql", "ctl" } }, 63 | { "stylus", { "styl" } }, 64 | { "swift", { "swift" } }, 65 | { "tcl", { "tcl", "itcl", "itk" } }, 66 | { "tex", { "tex", "cls", "sty" } }, 67 | { "tt", { "tt", "tt2", "ttml" } }, 68 | { "vb", { "bas", "cls", "frm", "ctl", "vb", "resx" } }, 69 | { "verilog", { "v", "vh", "sv" } }, 70 | { "vhdl", { "vhd", "vhdl" } }, 71 | { "vim", { "vim" } }, 72 | { "xml", { "xml", "dtd", "xsl", "xslt", "ent" } }, 73 | { "yaml", { "yaml", "yml" } }, 74 | { NULL, { NULL } } 75 | }; 76 | 77 | char *make_lang_regex(const char **extensions) { 78 | int regex_capacity = 100; 79 | char *regex = ag_malloc(regex_capacity); 80 | int regex_length = 3; 81 | int subsequent = 0; 82 | const char **extension; 83 | 84 | strcpy(regex, "\\.("); 85 | 86 | for (extension = extensions; *extension; ++extension) { 87 | int extension_length = strlen(*extension); 88 | while (regex_length + extension_length + 3 + subsequent > regex_capacity) { 89 | regex_capacity *= 2; 90 | regex = ag_realloc(regex, regex_capacity); 91 | } 92 | if (subsequent) { 93 | regex[regex_length++] = '|'; 94 | } else { 95 | subsequent = 1; 96 | } 97 | strcpy(regex + regex_length, *extension); 98 | regex_length += extension_length; 99 | } 100 | 101 | regex[regex_length++] = ')'; 102 | regex[regex_length++] = '$'; 103 | regex[regex_length++] = 0; 104 | return regex; 105 | } 106 | -------------------------------------------------------------------------------- /doc/ag.1.md: -------------------------------------------------------------------------------- 1 | ag(1) -- The Silver Searcher. Like ack, but faster. 2 | ============================================= 3 | 4 | ## SYNOPSIS 5 | 6 | `ag` [] [] PATTERN [PATH] 7 | 8 | ## DESCRIPTION 9 | 10 | Recursively search for PATTERN in PATH. Like grep or ack, but faster. 11 | 12 | ## OPTIONS 13 | 14 | * `--ackmate`: 15 | Output results in a format parseable by [AckMate](https://github.com/protocool/AckMate). 16 | * `-a --all-types`: 17 | Search all files. This doesn't include hidden files, and also doesn't respect any ignore files 18 | * `-A --after [LINES]`: 19 | Print lines after match. Defaults to 2. 20 | * `-B --before [LINES]`: 21 | Print lines before match. Defaults to 2. 22 | * `--[no]break`: 23 | Print a newline between matches in different files. Enabled by default. 24 | * `--[no]color`: 25 | Print color codes in results. Enabled by default. 26 | * `--color-line-number`: 27 | Color codes for line numbers. Defaults to 1;33. 28 | * `--color-match`: 29 | Color codes for result match numbers. Defaults to 30;43. 30 | * `--color-path`: 31 | Color codes for path names. Defaults to 1;32. 32 | * `--column`: 33 | Print column numbers in results. 34 | * `-C --context [LINES]`: 35 | Print lines before and after matches. Defaults to 2. 36 | * `-D --debug`: 37 | Output ridiculous amounts of debugging info. Probably not useful. 38 | * `--depth NUM`: 39 | Search up to NUM directories deep. Default is 25. 40 | * `-f --follow`: 41 | Follow symlinks. 42 | * `--[no]group` 43 | * `-g PATTERN`: 44 | Print filenames matching PATTERN. 45 | * `-G`, `--file-search-regex PATTERN`: 46 | Only search filenames matching PATTERN. 47 | * `-H`, `--[no]heading`: 48 | Print file names above matching contents. 49 | * `--hidden`: 50 | Search hidden files. This option obeys ignore files. 51 | * `--ignore PATTERN`: 52 | Ignore files/directories matching this pattern. Literal file and directory names are also allowed. 53 | * `--ignore-dir NAME`: 54 | Alias for --ignore for compatibility with ack. 55 | * `-i --ignore-case`: 56 | Match case insensitively. 57 | * `-l --files-with-matches`: 58 | Only print filenames containing matches, not matching lines. An empty query will print all files that would be searched. 59 | * `-L --files-without-matches`: 60 | Only print filenames that don't contain matches. 61 | * `--list-file-types`: 62 | See `FILE TYPES` below. 63 | * `-m --max-count NUM`: 64 | Skip the rest of a file after NUM matches. Default is 10,000. 65 | * `--no-numbers`: 66 | Don't show line numbers 67 | * `--null`: 68 | Separate files output with -l or -L by \0 rather than \n, this allows 'xargs -0 ' to correctly process filenames with spaces. 69 | * `-p --path-to-agignore STRING`: 70 | Provide a path to a specific .agignore file. 71 | * `--pager COMMAND`: 72 | Use a pager such as less. Use `--nopager` to override. This option is also ignored if output is piped to another program. 73 | * `--print-long-lines`: 74 | Print matches on very long lines (> 2k characters by default) 75 | * `--passthrough`: 76 | When searching a stream, print all lines even if they don't match. 77 | * `-Q --literal`: 78 | Do not parse PATTERN as a regular expression. Try to match it literally. 79 | * `-s --case-sensitive`: 80 | Match case sensitively. 81 | * `-S --smart-case`: 82 | Match case sensitively if there are any uppercase letters in PATTERN, or case insensitively otherwise. Enabled by default. 83 | * `--search-binary`: 84 | Search binary files for matches. 85 | * `--silent`: 86 | Suppress all log messages, including errors. 87 | * `--stats`: 88 | Print stats (files scanned, time taken, etc) 89 | * `-t --all-text`: 90 | Search all text files. This doesn't include hidden files. 91 | * `-u --unrestricted`: 92 | Search *all* files. This ignores .agignore, .gitignore, etc. It searches binary and hidden files as well. 93 | * `-U --skip-vcs-ignores`: 94 | Ignore VCS ignore files (.gitignore, .hgignore, svn:ignore), but still use .agignore. 95 | * `-v --invert-match` 96 | * `-w --word-regexp`: 97 | Only match whole words. 98 | * `-z --search-zip`: 99 | Search contents of compressed files. 100 | 101 | ## FILE TYPES 102 | 103 | It is possible to restrict the types of files searched. For example, passing `--html` as the `file-types` parameter will search only files with the extensions `htm`, `html`, `shtml` or `xhtml`. For a list of supported `file-types` run `ag --list-file-types`. 104 | 105 | ## IGNORING FILES 106 | 107 | By default, ag will ignore files matched by patterns in .gitignore, .hgignore, 108 | or .agignore. These files can be anywhere in the directories being searched. Ag 109 | also ignores files matched by the svn:ignore property if `svn --version` is 1.6 110 | or older. Finally, ag looks in $HOME/.agignore for 111 | ignore patterns. Binary files are ignored by default as well. 112 | 113 | If you want to ignore .gitignore, .hgignore, and svn:ignore but still take .agignore into account, use `-U`. 114 | 115 | Use the `-t` option to search all text files, `-a` to search all files, and `-u` to search all including hidden files. 116 | 117 | ## EXAMPLES 118 | 119 | `ag printf`: 120 | Find matches for "printf" in the current directory. 121 | 122 | `ag foo /bar/`: 123 | Find matches for "foo" in path /bar/. 124 | 125 | ## SEE ALSO 126 | 127 | grep(1) 128 | -------------------------------------------------------------------------------- /src/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #ifdef _WIN32 9 | #include 10 | #endif 11 | 12 | #include "config.h" 13 | 14 | #ifdef HAVE_PTHREAD_H 15 | #include 16 | #endif 17 | 18 | #include "log.h" 19 | #include "options.h" 20 | #include "search.h" 21 | #include "util.h" 22 | 23 | int main(int argc, char **argv) { 24 | char **base_paths = NULL; 25 | char **paths = NULL; 26 | int i; 27 | int pcre_opts = PCRE_MULTILINE; 28 | int study_opts = 0; 29 | double time_diff; 30 | pthread_t *workers = NULL; 31 | int workers_len; 32 | 33 | set_log_level(LOG_LEVEL_WARN); 34 | 35 | work_queue = NULL; 36 | work_queue_tail = NULL; 37 | memset(&stats, 0, sizeof(stats)); 38 | root_ignores = init_ignore(NULL); 39 | out_fd = stdout; 40 | #ifdef USE_PCRE_JIT 41 | int has_jit = 0; 42 | pcre_config(PCRE_CONFIG_JIT, &has_jit); 43 | if (has_jit) { 44 | study_opts |= PCRE_STUDY_JIT_COMPILE; 45 | } 46 | #endif 47 | 48 | gettimeofday(&(stats.time_start), NULL); 49 | 50 | parse_options(argc, argv, &base_paths, &paths); 51 | log_debug("PCRE Version: %s", pcre_version()); 52 | 53 | #ifdef _WIN32 54 | { 55 | SYSTEM_INFO si; 56 | GetSystemInfo(&si); 57 | workers_len = si.dwNumberOfProcessors; 58 | } 59 | #else 60 | workers_len = (int)sysconf(_SC_NPROCESSORS_ONLN); 61 | #endif 62 | if (opts.literal) { 63 | workers_len--; 64 | } 65 | if (opts.workers) { 66 | workers_len = opts.workers; 67 | } 68 | if (workers_len < 1) { 69 | workers_len = 1; 70 | } 71 | 72 | log_debug("Using %i workers", workers_len); 73 | done_adding_files = FALSE; 74 | workers = ag_calloc(workers_len, sizeof(pthread_t)); 75 | if (pthread_cond_init(&files_ready, NULL)) { 76 | die("pthread_cond_init failed!"); 77 | } 78 | if (pthread_mutex_init(&print_mtx, NULL)) { 79 | die("pthread_mutex_init failed!"); 80 | } 81 | if (pthread_mutex_init(&stats_mtx, NULL)) { 82 | die("pthread_mutex_init failed!"); 83 | } 84 | if (pthread_mutex_init(&work_queue_mtx, NULL)) { 85 | die("pthread_mutex_init failed!"); 86 | } 87 | 88 | if (opts.casing == CASE_SMART) { 89 | opts.casing = is_lowercase(opts.query) ? CASE_INSENSITIVE : CASE_SENSITIVE; 90 | } 91 | 92 | if (opts.literal) { 93 | if (opts.casing == CASE_INSENSITIVE) { 94 | /* Search routine needs the query to be lowercase */ 95 | char *c = opts.query; 96 | for (; *c != '\0'; ++c) { 97 | *c = (char)tolower(*c); 98 | } 99 | } 100 | generate_alpha_skip(opts.query, opts.query_len, alpha_skip_lookup, opts.casing == CASE_SENSITIVE); 101 | find_skip_lookup = NULL; 102 | generate_find_skip(opts.query, opts.query_len, &find_skip_lookup, opts.casing == CASE_SENSITIVE); 103 | if (opts.word_regexp) { 104 | init_wordchar_table(); 105 | opts.literal_starts_wordchar = is_wordchar(opts.query[0]); 106 | opts.literal_ends_wordchar = is_wordchar(opts.query[opts.query_len - 1]); 107 | } 108 | } else { 109 | if (opts.casing == CASE_INSENSITIVE) { 110 | pcre_opts |= PCRE_CASELESS; 111 | } 112 | if (opts.word_regexp) { 113 | char *word_regexp_query; 114 | ag_asprintf(&word_regexp_query, "\\b%s\\b", opts.query); 115 | free(opts.query); 116 | opts.query = word_regexp_query; 117 | opts.query_len = strlen(opts.query); 118 | } 119 | compile_study(&opts.re, &opts.re_extra, opts.query, pcre_opts, study_opts); 120 | } 121 | 122 | if (opts.search_stream) { 123 | search_stream(stdin, ""); 124 | } else { 125 | for (i = 0; i < workers_len; i++) { 126 | int rv = pthread_create(&(workers[i]), NULL, &search_file_worker, &i); 127 | if (rv != 0) { 128 | die("error in pthread_create(): %s", strerror(rv)); 129 | } 130 | } 131 | for (i = 0; paths[i] != NULL; i++) { 132 | log_debug("searching path %s for %s", paths[i], opts.query); 133 | symhash = NULL; 134 | search_dir(root_ignores, base_paths[i], paths[i], 0); 135 | } 136 | pthread_mutex_lock(&work_queue_mtx); 137 | done_adding_files = TRUE; 138 | pthread_cond_broadcast(&files_ready); 139 | pthread_mutex_unlock(&work_queue_mtx); 140 | for (i = 0; i < workers_len; i++) { 141 | if (pthread_join(workers[i], NULL)) { 142 | die("pthread_join failed!"); 143 | } 144 | } 145 | } 146 | 147 | if (opts.stats) { 148 | gettimeofday(&(stats.time_end), NULL); 149 | time_diff = ((long)stats.time_end.tv_sec * 1000000 + stats.time_end.tv_usec) - 150 | ((long)stats.time_start.tv_sec * 1000000 + stats.time_start.tv_usec); 151 | time_diff /= 1000000; 152 | 153 | printf("%ld matches\n%ld files searched\n%ld bytes searched\n%f seconds\n", stats.total_matches, stats.total_files, stats.total_bytes, time_diff); 154 | } 155 | 156 | if (opts.pager) { 157 | pclose(out_fd); 158 | } 159 | cleanup_options(); 160 | pthread_cond_destroy(&files_ready); 161 | pthread_mutex_destroy(&work_queue_mtx); 162 | pthread_mutex_destroy(&stats_mtx); 163 | pthread_mutex_destroy(&print_mtx); 164 | cleanup_ignore(root_ignores); 165 | free(workers); 166 | for (i = 0; paths[i] != NULL; i++) { 167 | free(paths[i]); 168 | free(base_paths[i]); 169 | } 170 | free(base_paths); 171 | free(paths); 172 | if (find_skip_lookup) { 173 | free(find_skip_lookup); 174 | } 175 | return !opts.match_found; 176 | } 177 | -------------------------------------------------------------------------------- /doc/ag.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "AG" "1" "August 2014" "" "" 5 | . 6 | .SH "NAME" 7 | \fBag\fR \- The Silver Searcher\. Like ack, but faster\. 8 | . 9 | .SH "SYNOPSIS" 10 | \fBag\fR [\fIfile\-type\fR] [\fIoptions\fR] PATTERN [PATH] 11 | . 12 | .SH "DESCRIPTION" 13 | Recursively search for PATTERN in PATH\. Like grep or ack, but faster\. 14 | . 15 | .SH "OPTIONS" 16 | \fB\-\-ackmate\fR: 17 | . 18 | .br 19 | \~\~\~\~ Output results in a format parseable by AckMate \fIhttps://github\.com/protocool/AckMate\fR\. 20 | . 21 | .P 22 | \fB\-a \-\-all\-types\fR: 23 | . 24 | .br 25 | \~\~\~\~ Search all files\. This doesn\'t include hidden files, and also doesn\'t respect any ignore files 26 | . 27 | .P 28 | \fB\-A \-\-after [LINES]\fR: 29 | . 30 | .br 31 | \~\~\~\~ Print lines after match\. Defaults to 2\. 32 | . 33 | .P 34 | \fB\-B \-\-before [LINES]\fR: 35 | . 36 | .br 37 | \~\~\~\~ Print lines before match\. Defaults to 2\. 38 | . 39 | .P 40 | \fB\-\-[no]break\fR: 41 | . 42 | .br 43 | \~\~\~\~ Print a newline between matches in different files\. Enabled by default\. 44 | . 45 | .P 46 | \fB\-\-[no]color\fR: 47 | . 48 | .br 49 | \~\~\~\~ Print color codes in results\. Enabled by default\. 50 | . 51 | .P 52 | \fB\-\-color\-line\-number\fR: 53 | . 54 | .br 55 | \~\~\~\~ Color codes for line numbers\. Defaults to 1;33\. 56 | . 57 | .P 58 | \fB\-\-color\-match\fR: 59 | . 60 | .br 61 | \~\~\~\~ Color codes for result match numbers\. Defaults to 30;43\. 62 | . 63 | .P 64 | \fB\-\-color\-path\fR: 65 | . 66 | .br 67 | \~\~\~\~ Color codes for path names\. Defaults to 1;32\. 68 | . 69 | .P 70 | \fB\-\-column\fR: 71 | . 72 | .br 73 | \~\~\~\~ Print column numbers in results\. 74 | . 75 | .P 76 | \fB\-C \-\-context [LINES]\fR: 77 | . 78 | .br 79 | \~\~\~\~ Print lines before and after matches\. Defaults to 2\. 80 | . 81 | .P 82 | \fB\-D \-\-debug\fR: 83 | . 84 | .br 85 | \~\~\~\~ Output ridiculous amounts of debugging info\. Probably not useful\. 86 | . 87 | .P 88 | \fB\-\-depth NUM\fR: 89 | . 90 | .br 91 | \~\~\~\~ Search up to NUM directories deep\. Default is 25\. 92 | . 93 | .P 94 | \fB\-f \-\-follow\fR: 95 | . 96 | .br 97 | \~\~\~\~ Follow symlinks\. 98 | . 99 | .P 100 | \fB\-\-[no]group\fR 101 | . 102 | .br 103 | \fB\-g PATTERN\fR: 104 | . 105 | .br 106 | \~\~\~\~ Print filenames matching PATTERN\. 107 | . 108 | .P 109 | \fB\-G\fR, \fB\-\-file\-search\-regex PATTERN\fR: 110 | . 111 | .br 112 | \~\~\~\~ Only search filenames matching PATTERN\. 113 | . 114 | .P 115 | \fB\-H\fR, \fB\-\-[no]heading\fR: 116 | . 117 | .br 118 | \~\~\~\~ Print file names above matching contents\. 119 | . 120 | .P 121 | \fB\-\-hidden\fR: 122 | . 123 | .br 124 | \~\~\~\~ Search hidden files\. This option obeys ignore files\. 125 | . 126 | .P 127 | \fB\-\-ignore PATTERN\fR: 128 | . 129 | .br 130 | \~\~\~\~ Ignore files/directories matching this pattern\. Literal file and directory names are also allowed\. 131 | . 132 | .P 133 | \fB\-\-ignore\-dir NAME\fR: 134 | . 135 | .br 136 | \~\~\~\~ Alias for \-\-ignore for compatibility with ack\. 137 | . 138 | .P 139 | \fB\-i \-\-ignore\-case\fR: 140 | . 141 | .br 142 | \~\~\~\~ Match case insensitively\. 143 | . 144 | .P 145 | \fB\-l \-\-files\-with\-matches\fR: 146 | . 147 | .br 148 | \~\~\~\~ Only print filenames containing matches, not matching lines\. An empty query will print all files that would be searched\. 149 | . 150 | .P 151 | \fB\-L \-\-files\-without\-matches\fR: 152 | . 153 | .br 154 | \~\~\~\~ Only print filenames that don\'t contain matches\. 155 | . 156 | .P 157 | \fB\-\-list\-file\-types\fR: 158 | . 159 | .br 160 | \~\~\~\~ See \fBFILE TYPES\fR below\. 161 | . 162 | .P 163 | \fB\-m \-\-max\-count NUM\fR: 164 | . 165 | .br 166 | \~\~\~\~ Skip the rest of a file after NUM matches\. Default is 10,000\. 167 | . 168 | .P 169 | \fB\-\-no\-numbers\fR: 170 | . 171 | .br 172 | \~\~\~\~ Don\'t show line numbers 173 | . 174 | .P 175 | \fB\-p \-\-path\-to\-agignore STRING\fR: 176 | . 177 | .br 178 | \~\~\~\~ Provide a path to a specific \.agignore file\. 179 | . 180 | .P 181 | \fB\-\-pager COMMAND\fR: 182 | . 183 | .br 184 | \~\~\~\~ Use a pager such as less\. Use \fB\-\-nopager\fR to override\. This option is also ignored if output is piped to another program\. 185 | . 186 | .P 187 | \fB\-\-print\-long\-lines\fR: 188 | . 189 | .br 190 | \~\~\~\~ Print matches on very long lines (> 2k characters by default) 191 | . 192 | .P 193 | \fB\-\-passthrough\fR: 194 | . 195 | .br 196 | \~\~\~\~ When searching a stream, print all lines even if they don\'t match\. 197 | . 198 | .P 199 | \fB\-Q \-\-literal\fR: 200 | . 201 | .br 202 | \~\~\~\~ Do not parse PATTERN as a regular expression\. Try to match it literally\. 203 | . 204 | .P 205 | \fB\-s \-\-case\-sensitive\fR: 206 | . 207 | .br 208 | \~\~\~\~ Match case sensitively\. 209 | . 210 | .P 211 | \fB\-S \-\-smart\-case\fR: 212 | . 213 | .br 214 | \~\~\~\~ Match case sensitively if there are any uppercase letters in PATTERN, or case insensitively otherwise\. Enabled by default\. 215 | . 216 | .P 217 | \fB\-\-search\-binary\fR: 218 | . 219 | .br 220 | \~\~\~\~ Search binary files for matches\. 221 | . 222 | .P 223 | \fB\-\-silent\fR: 224 | . 225 | .br 226 | \~\~\~\~ Suppress all log messages, including errors\. 227 | . 228 | .P 229 | \fB\-\-stats\fR: 230 | . 231 | .br 232 | \~\~\~\~ Print stats (files scanned, time taken, etc) 233 | . 234 | .P 235 | \fB\-t \-\-all\-text\fR: 236 | . 237 | .br 238 | \~\~\~\~ Search all text files\. This doesn\'t include hidden files\. 239 | . 240 | .P 241 | \fB\-u \-\-unrestricted\fR: 242 | . 243 | .br 244 | \~\~\~\~ Search \fIall\fR files\. This ignores \.agignore, \.gitignore, etc\. It searches binary and hidden files as well\. 245 | . 246 | .P 247 | \fB\-U \-\-skip\-vcs\-ignores\fR: 248 | . 249 | .br 250 | \~\~\~\~ Ignore VCS ignore files (\.gitignore, \.hgignore, svn:ignore), but still use \.agignore\. 251 | . 252 | .P 253 | \fB\-v \-\-invert\-match\fR 254 | . 255 | .br 256 | \fB\-w \-\-word\-regexp\fR: 257 | . 258 | .br 259 | \~\~\~\~ Only match whole words\. 260 | . 261 | .P 262 | \fB\-z \-\-search\-zip\fR: 263 | . 264 | .br 265 | \~\~\~\~ Search contents of compressed files\. 266 | . 267 | .SH "FILE TYPES" 268 | It is possible to restrict the types of files searched\. For example, passing \fB\-\-html\fR as the \fBfile\-types\fR parameter will search only files with the extensions \fBhtm\fR, \fBhtml\fR, \fBshtml\fR or \fBxhtml\fR\. For a list of supported \fBfile\-types\fR run \fBag \-\-list\-file\-types\fR\. 269 | . 270 | .SH "IGNORING FILES" 271 | By default, ag will ignore files matched by patterns in \.gitignore, \.hgignore, or \.agignore\. These files can be anywhere in the directories being searched\. Ag also ignores files matched by the svn:ignore property if \fBsvn \-\-version\fR is 1\.6 or older\. Finally, ag looks in $HOME/\.agignore for ignore patterns\. Binary files are ignored by default as well\. 272 | . 273 | .P 274 | If you want to ignore \.gitignore, \.hgignore, and svn:ignore but still take \.agignore into account, use \fB\-U\fR\. 275 | . 276 | .P 277 | Use the \fB\-t\fR option to search all text files, \fB\-a\fR to search all files, and \fB\-u\fR to search all including hidden files\. 278 | . 279 | .SH "EXAMPLES" 280 | \fBag printf\fR: Find matches for "printf" in the current directory\. 281 | . 282 | .P 283 | \fBag foo /bar/\fR: Find matches for "foo" in path /bar/\. 284 | . 285 | .SH "SEE ALSO" 286 | grep(1) 287 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The Silver Searcher # 2 | 3 | A code searching tool similar to `ack`, with a focus on speed. 4 | 5 | [![Build Status](https://travis-ci.org/ggreer/the_silver_searcher.svg?branch=master)](https://travis-ci.org/ggreer/the_silver_searcher) 6 | 7 | 8 | Floobits status 9 | 10 | 11 | 12 | ## What's so great about Ag? ## 13 | 14 | * It searches code about 3–5× faster than `ack`. 15 | * It ignores file patterns from your `.gitignore` and `.hgignore`. 16 | * If there are files in your source repo you don't want to search, just add their patterns to a `.agignore` file. \*cough\* extern \*cough\* 17 | * The command name is 33% shorter than `ack`, and all keys are on the home row! 18 | 19 | 20 | ## How is it so fast? ## 21 | 22 | * Searching for literals (no regex) uses [Boyer-Moore-Horspool strstr](http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm). 23 | * Files are `mmap()`ed instead of read into a buffer. 24 | * If you're building with PCRE 8.21 or greater, regex searches use [the JIT compiler](http://sljit.sourceforge.net/pcre.html). 25 | * Ag calls `pcre_study()` before executing the regex on a jillion files. 26 | * Instead of calling `fnmatch()` on every pattern in your ignore files, non-regex patterns are loaded into an array and binary searched. 27 | * Ag uses [Pthreads](http://en.wikipedia.org/wiki/POSIX_Threads) to take advantage of multiple CPU cores and search files in parallel. 28 | 29 | I've written several blog posts showing how I've improved performance. These include how I [added pthreads](http://geoff.greer.fm/2012/09/07/the-silver-searcher-adding-pthreads/), [wrote my own `scandir()`](http://geoff.greer.fm/2012/09/03/profiling-ag-writing-my-own-scandir/), [benchmarked every revision to find performance regressions](http://geoff.greer.fm/2012/08/25/the-silver-searcher-benchmarking-revisions/), and profiled with [gprof](http://geoff.greer.fm/2012/02/08/profiling-with-gprof/) and [Valgrind](http://geoff.greer.fm/2012/01/23/making-programs-faster-profiling/). 30 | 31 | 32 | ## Installation ## 33 | 34 | ### Gentoo 35 | 36 | emerge the_silver_searcher 37 | 38 | ### OS X 39 | 40 | brew install the_silver_searcher 41 | 42 | or 43 | 44 | port install the_silver_searcher 45 | 46 | ### Arch Linux 47 | 48 | pacman -S the_silver_searcher 49 | 50 | ### Debian unstable 51 | 52 | apt-get install silversearcher-ag 53 | 54 | ### Ubuntu 13.10 or later 55 | 56 | apt-get install silversearcher-ag 57 | 58 | ### FreeBSD 59 | 60 | pkg install the_silver_searcher 61 | 62 | or 63 | 64 | pkg_add -r the_silver_searcher 65 | 66 | To build from source on FreeBSD: 67 | 68 | make -C /usr/ports/textproc/the_silver_searcher install clean 69 | 70 | ### OpenBSD 71 | 72 | pkg_add the_silver_searcher 73 | 74 | To build from source on OpenBSD: 75 | 76 | cd /usr/ports/textproc/the_silver_searcher && make install 77 | 78 | 79 | If you want a CentOS rpm or Ubuntu deb, take a look at [Vikram Dighe's packages](http://swiftsignal.com/packages/). 80 | 81 | 82 | ## Building from source ## 83 | 84 | 1. Install dependencies (Automake, pkg-config, PCRE, LZMA): 85 | * Ubuntu: 86 | 87 | apt-get install -y automake pkg-config libpcre3-dev zlib1g-dev liblzma-dev 88 | * Fedora: 89 | 90 | yum -y install pkgconfig automake gcc zlib-devel pcre-devel xz-devel 91 | * CentOS: 92 | 93 | yum -y groupinstall "Development Tools" 94 | yum -y install pcre-devel xz-devel 95 | * OS X: 96 | 97 | brew install automake pkg-config pcre 98 | or 99 | 100 | port install automake pkgconfig pcre 101 | * Windows: It's complicated. See [this wiki page](https://github.com/ggreer/the_silver_searcher/wiki/Windows). 102 | 2. Run the build script (which just runs aclocal, automake, etc): 103 | 104 | ./build.sh 105 | 106 | On Windows: 107 | 108 | mingw32-make -f Makefile.w32 109 | 3. Make install: 110 | 111 | sudo make install 112 | 113 | 114 | 115 | ## Current development status ## 116 | 117 | It's quite stable now. Most changes are new features, minor bug fixes, or performance improvements. It's much faster than Ack in my benchmarks. 118 | 119 | ack blahblahblah ~/code 6.59s user 1.94s system 99% cpu 8.547 total 120 | 121 | ag blahblahblah ~/code 1.39s user 1.81s system 229% cpu 1.396 total 122 | 123 | 124 | ## Editor Integration ## 125 | 126 | ### TextMate ### 127 | 128 | TextMate users can use Ag with [my fork](https://github.com/ggreer/AckMate) of the popular AckMate plugin, which lets you use both Ack and Ag for searching. If you already have AckMate you just want to replace Ack with Ag, move or delete `"~/Library/Application Support/TextMate/PlugIns/AckMate.tmplugin/Contents/Resources/ackmate_ack"` and run `ln -s /usr/local/bin/ag "~/Library/Application Support/TextMate/PlugIns/AckMate.tmplugin/Contents/Resources/ackmate_ack"` 129 | 130 | ### Vim ### 131 | 132 | You can use Ag with [ack.vim][] by adding the following line to your `.vimrc`: 133 | 134 | let g:ackprg = 'ag --nogroup --nocolor --column' 135 | 136 | There's also a fork of ack.vim tailored for use with Ag: [ag.vim][] 137 | [ack.vim]: https://github.com/mileszs/ack.vim 138 | [ag.vim]: https://github.com/rking/ag.vim 139 | 140 | ### Emacs ### 141 | 142 | You can use use [ag.el][] as an Emacs fronted to Ag. 143 | 144 | [ag.el]: https://github.com/Wilfred/ag.el 145 | 146 | 147 | ## Contributing ## 148 | 149 | I like when people send pull requests. It validates my existence. If you want to help out, check the [issue list](https://github.com/ggreer/the_silver_searcher/issues?sort=updated&state=open) or search the codebase for `TODO`. Don't worry if you lack experience writing C. If I think a pull request isn't ready to be merged, I'll give feedback in comments. Once everything looks good, I'll comment on your pull request with a cool animated gif and hit the merge button. 150 | 151 | 152 | ## TODO ## 153 | 154 | A special thanks goes out to Alex Davies. He has given me some excellent recommendations to improve Ag. Many of these things are still on my list: 155 | 156 | * Optimizations 157 | * Write a benchmarking script that tweaks various settings to find what's fastest. 158 | * Features 159 | * Behave better when matching in files with really long lines. 160 | * Report "match found at position X of line N" if line is > 10k chars. 161 | * Windows support 162 | * `readdir()` and `stat()` are much slower on Windows. Use `FindNextFile()` instead. 163 | * Support Visual Studio instead of autotools? 164 | * Need to use pthreads-win32 or something similar. 165 | 166 | 167 | ## Other stuff you might like ## 168 | 169 | * [Ack](https://github.com/petdance/ack) - Better than grep. Without Ack, Ag would not exist. 170 | * [AckMate](https://github.com/protocool/AckMate) - An ack-powered replacement for TextMate's slow built-in search. 171 | * [ack.vim](https://github.com/mileszs/ack.vim) 172 | * [ag.vim]( https://github.com/rking/ag.vim) 173 | * [Exuberant Ctags](http://ctags.sourceforge.net/) - Faster than Ag, but it builds an index beforehand. Good for *really* big codebases. 174 | * [Git-grep](http://git-scm.com/docs/git-grep) - As fast as Ag but only works on git repos. 175 | * [Sack](https://github.com/sampson-chen/sack) - A utility that wraps Ack and Ag. It removes a lot of repetition from searching and opening matching files. 176 | -------------------------------------------------------------------------------- /src/print.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "ignore.h" 7 | #include "log.h" 8 | #include "options.h" 9 | #include "print.h" 10 | #include "util.h" 11 | 12 | int first_file_match = 1; 13 | 14 | const char *color_reset = "\033[0m\033[K"; 15 | 16 | void print_path(const char *path, const char sep) { 17 | path = normalize_path(path); 18 | 19 | if (opts.ackmate) { 20 | fprintf(out_fd, ":%s%c", path, sep); 21 | } else { 22 | if (opts.color) { 23 | fprintf(out_fd, "%s%s%s%c", opts.color_path, path, color_reset, sep); 24 | } else { 25 | fprintf(out_fd, "%s%c", path, sep); 26 | } 27 | } 28 | } 29 | 30 | void print_binary_file_matches(const char *path) { 31 | path = normalize_path(path); 32 | print_file_separator(); 33 | fprintf(out_fd, "Binary file %s matches.\n", path); 34 | } 35 | 36 | void print_file_matches(const char *path, const char *buf, const size_t buf_len, const match_t matches[], const size_t matches_len) { 37 | size_t line = 1; 38 | char **context_prev_lines = NULL; 39 | size_t prev_line = 0; 40 | size_t last_prev_line = 0; 41 | size_t prev_line_offset = 0; 42 | size_t cur_match = 0; 43 | /* TODO the line below contains a terrible hack */ 44 | size_t lines_since_last_match = 1000000; /* if I initialize this to INT_MAX it'll overflow */ 45 | ssize_t lines_to_print = 0; 46 | size_t last_printed_match = 0; 47 | char sep = '-'; 48 | size_t i, j; 49 | int in_a_match = FALSE; 50 | int printing_a_match = FALSE; 51 | 52 | if (opts.ackmate) { 53 | sep = ':'; 54 | } 55 | 56 | print_file_separator(); 57 | 58 | if (opts.print_path == PATH_PRINT_DEFAULT) { 59 | opts.print_path = PATH_PRINT_TOP; 60 | } else if (opts.print_path == PATH_PRINT_DEFAULT_EACH_LINE) { 61 | opts.print_path = PATH_PRINT_EACH_LINE; 62 | } 63 | 64 | if (opts.print_path == PATH_PRINT_TOP) { 65 | print_path(path, '\n'); 66 | } 67 | 68 | context_prev_lines = ag_calloc(sizeof(char *), (opts.before + 1)); 69 | 70 | for (i = 0; i <= buf_len && (cur_match < matches_len || lines_since_last_match <= opts.after); i++) { 71 | if (cur_match < matches_len && i == matches[cur_match].end) { 72 | /* We found the end of a match. */ 73 | cur_match++; 74 | in_a_match = FALSE; 75 | } 76 | 77 | if (cur_match < matches_len && i == matches[cur_match].start) { 78 | in_a_match = TRUE; 79 | /* We found the start of a match */ 80 | if (cur_match > 0 && opts.context && lines_since_last_match > (opts.before + opts.after + 1)) { 81 | fprintf(out_fd, "--\n"); 82 | } 83 | 84 | if (lines_since_last_match > 0 && opts.before > 0) { 85 | /* TODO: better, but still needs work */ 86 | /* print the previous line(s) */ 87 | lines_to_print = lines_since_last_match - (opts.after + 1); 88 | if (lines_to_print < 0) { 89 | lines_to_print = 0; 90 | } else if ((size_t)lines_to_print > opts.before) { 91 | lines_to_print = opts.before; 92 | } 93 | 94 | for (j = (opts.before - lines_to_print); j < opts.before; j++) { 95 | prev_line = (last_prev_line + j) % opts.before; 96 | if (context_prev_lines[prev_line] != NULL) { 97 | if (opts.print_path == PATH_PRINT_EACH_LINE) { 98 | print_path(path, ':'); 99 | } 100 | print_line_number(line - (opts.before - j), sep); 101 | fprintf(out_fd, "%s\n", context_prev_lines[prev_line]); 102 | } 103 | } 104 | } 105 | lines_since_last_match = 0; 106 | } 107 | 108 | /* We found the end of a line. */ 109 | if (buf[i] == '\n' && opts.before > 0) { 110 | if (context_prev_lines[last_prev_line] != NULL) { 111 | free(context_prev_lines[last_prev_line]); 112 | } 113 | /* We don't want to strcpy the \n */ 114 | context_prev_lines[last_prev_line] = 115 | ag_strndup(&buf[prev_line_offset], i - prev_line_offset); 116 | last_prev_line = (last_prev_line + 1) % opts.before; 117 | } 118 | 119 | if (buf[i] == '\n' || i == buf_len) { 120 | if (lines_since_last_match == 0) { 121 | if (opts.print_path == PATH_PRINT_EACH_LINE && !opts.search_stream) { 122 | print_path(path, ':'); 123 | } 124 | 125 | if (opts.ackmate) { 126 | /* print headers for ackmate to parse */ 127 | print_line_number(line, ';'); 128 | for (; last_printed_match < cur_match; last_printed_match++) { 129 | fprintf(out_fd, "%lu %lu", 130 | (matches[last_printed_match].start - prev_line_offset), 131 | (matches[last_printed_match].end - matches[last_printed_match].start)); 132 | last_printed_match == cur_match - 1 ? fputc(':', out_fd) : fputc(',', out_fd); 133 | } 134 | j = prev_line_offset; 135 | /* print up to current char */ 136 | for (; j <= i; j++) { 137 | fputc(buf[j], out_fd); 138 | } 139 | } else { 140 | print_line_number(line, ':'); 141 | if (opts.column) { 142 | fprintf(out_fd, "%lu:", (matches[last_printed_match].start - prev_line_offset) + 1); 143 | } 144 | 145 | if (printing_a_match && opts.color) { 146 | fprintf(out_fd, "%s", opts.color_match); 147 | } 148 | for (j = prev_line_offset; j <= i; j++) { 149 | if (last_printed_match < matches_len && j == matches[last_printed_match].end) { 150 | if (opts.color) { 151 | fprintf(out_fd, "%s", color_reset); 152 | } 153 | printing_a_match = FALSE; 154 | last_printed_match++; 155 | } 156 | if (last_printed_match < matches_len && j == matches[last_printed_match].start) { 157 | if (opts.color) { 158 | fprintf(out_fd, "%s", opts.color_match); 159 | } 160 | printing_a_match = TRUE; 161 | } 162 | /* Don't print the null terminator */ 163 | if (j < buf_len) { 164 | fputc(buf[j], out_fd); 165 | } 166 | } 167 | if (printing_a_match && opts.color) { 168 | fprintf(out_fd, "%s", color_reset); 169 | } 170 | } 171 | } else if (lines_since_last_match <= opts.after) { 172 | /* print context after matching line */ 173 | if (opts.print_path == PATH_PRINT_EACH_LINE) { 174 | print_path(path, ':'); 175 | } 176 | print_line_number(line, sep); 177 | 178 | for (j = prev_line_offset; j < i; j++) { 179 | fputc(buf[j], out_fd); 180 | } 181 | fputc('\n', out_fd); 182 | } 183 | 184 | prev_line_offset = i + 1; /* skip the newline */ 185 | line++; 186 | if (!in_a_match) { 187 | lines_since_last_match++; 188 | } 189 | /* File doesn't end with a newline. Print one so the output is pretty. */ 190 | if (i == buf_len && buf[i] != '\n' && !opts.search_stream) { 191 | fputc('\n', out_fd); 192 | } 193 | } 194 | } 195 | 196 | for (i = 0; i < opts.before; i++) { 197 | if (context_prev_lines[i] != NULL) { 198 | free(context_prev_lines[i]); 199 | } 200 | } 201 | free(context_prev_lines); 202 | } 203 | 204 | void print_line_number(size_t line, const char sep) { 205 | if (!opts.print_line_numbers) { 206 | return; 207 | } 208 | if (opts.search_stream && opts.stream_line_num) { 209 | line = opts.stream_line_num; 210 | } 211 | if (opts.color) { 212 | fprintf(out_fd, "%s%lu%s%c", opts.color_line_number, line, color_reset, sep); 213 | } else { 214 | fprintf(out_fd, "%lu%c", line, sep); 215 | } 216 | } 217 | 218 | void print_file_separator(void) { 219 | if (first_file_match == 0 && opts.print_break) { 220 | fprintf(out_fd, "\n"); 221 | } 222 | first_file_match = 0; 223 | } 224 | 225 | const char *normalize_path(const char *path) { 226 | if (strlen(path) < 3) { 227 | return path; 228 | } 229 | if (path[0] == '.' && path[1] == '/') { 230 | return path + 2; 231 | } 232 | if (path[0] == '/' && path[1] == '/') { 233 | return path + 1; 234 | } 235 | return path; 236 | } 237 | -------------------------------------------------------------------------------- /src/decompress.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "decompress.h" 5 | 6 | #ifdef HAVE_LZMA_H 7 | #include 8 | 9 | /* http://tukaani.org/xz/xz-file-format.txt */ 10 | const uint8_t XZ_HEADER_MAGIC[6] = { 0xFD, '7', 'z', 'X', 'Z', 0x00 }; 11 | const uint8_t LZMA_HEADER_SOMETIMES[3] = { 0x5D, 0x00, 0x00 }; 12 | #endif 13 | 14 | 15 | #ifdef HAVE_ZLIB_H 16 | #define ZLIB_CONST 1 17 | #include 18 | 19 | /* Code in decompress_zlib from 20 | * 21 | * https://raw.github.com/madler/zlib/master/examples/zpipe.c 22 | * 23 | * zpipe.c: example of proper use of zlib's inflate() and deflate() 24 | * Not copyrighted -- provided to the public domain 25 | * Version 1.4 11 December 2005 Mark Adler 26 | */ 27 | static void *decompress_zlib(const void *buf, const int buf_len, 28 | const char *dir_full_path, int *new_buf_len) { 29 | int ret = 0; 30 | unsigned char *result = NULL; 31 | size_t result_size = 0; 32 | size_t pagesize = 0; 33 | z_stream stream; 34 | 35 | log_debug("Decompressing zlib file %s", dir_full_path); 36 | 37 | /* allocate inflate state */ 38 | stream.zalloc = Z_NULL; 39 | stream.zfree = Z_NULL; 40 | stream.opaque = Z_NULL; 41 | stream.avail_in = 0; 42 | stream.next_in = Z_NULL; 43 | 44 | /* Add 32 to allow zlib and gzip format detection */ 45 | if (inflateInit2(&stream, 32 + 15) != Z_OK) { 46 | log_err("Unable to initialize zlib: %s", stream.msg); 47 | goto error_out; 48 | } 49 | 50 | stream.avail_in = buf_len; 51 | stream.next_in = buf; 52 | 53 | pagesize = getpagesize(); 54 | result_size = ((buf_len + pagesize - 1) & ~(pagesize - 1)); 55 | do { 56 | do { 57 | unsigned char *tmp_result = result; 58 | /* Double the buffer size and realloc */ 59 | result_size *= 2; 60 | result = (unsigned char *)realloc(result, result_size * sizeof(unsigned char)); 61 | if (result == NULL) { 62 | free(tmp_result); 63 | log_err("Unable to allocate %d bytes to decompress file %s", result_size * sizeof(unsigned char), dir_full_path); 64 | inflateEnd(&stream); 65 | goto error_out; 66 | } 67 | 68 | stream.avail_out = result_size / 2; 69 | stream.next_out = &result[stream.total_out]; 70 | ret = inflate(&stream, Z_SYNC_FLUSH); 71 | log_debug("inflate ret = %d", ret); 72 | switch (ret) { 73 | case Z_STREAM_ERROR: { 74 | log_err("Found stream error while decompressing zlib stream: %s", stream.msg); 75 | inflateEnd(&stream); 76 | goto error_out; 77 | } 78 | case Z_NEED_DICT: 79 | case Z_DATA_ERROR: 80 | case Z_MEM_ERROR: { 81 | log_err("Found mem/data error while decompressing zlib stream: %s", stream.msg); 82 | inflateEnd(&stream); 83 | goto error_out; 84 | } 85 | } 86 | } while (stream.avail_out == 0); 87 | } while (ret == Z_OK); 88 | 89 | *new_buf_len = stream.total_out; 90 | inflateEnd(&stream); 91 | 92 | if (ret == Z_STREAM_END) { 93 | return result; 94 | } 95 | 96 | error_out: 97 | *new_buf_len = 0; 98 | return NULL; 99 | } 100 | #endif 101 | 102 | 103 | static void *decompress_lzw(const void *buf, const int buf_len, 104 | const char *dir_full_path, int *new_buf_len) { 105 | (void)buf; 106 | (void)buf_len; 107 | log_err("LZW (UNIX compress) files not yet supported: %s", dir_full_path); 108 | *new_buf_len = 0; 109 | return NULL; 110 | } 111 | 112 | 113 | static void *decompress_zip(const void *buf, const int buf_len, 114 | const char *dir_full_path, int *new_buf_len) { 115 | (void)buf; 116 | (void)buf_len; 117 | log_err("Zip files not yet supported: %s", dir_full_path); 118 | *new_buf_len = 0; 119 | return NULL; 120 | } 121 | 122 | 123 | #ifdef HAVE_LZMA_H 124 | static void *decompress_lzma(const void *buf, const int buf_len, 125 | const char *dir_full_path, int *new_buf_len) { 126 | lzma_stream stream = LZMA_STREAM_INIT; 127 | lzma_ret lzrt; 128 | unsigned char *result = NULL; 129 | size_t result_size = 0; 130 | size_t pagesize = 0; 131 | 132 | stream.avail_in = buf_len; 133 | stream.next_in = buf; 134 | 135 | lzrt = lzma_auto_decoder(&stream, -1, 0); 136 | 137 | if (lzrt != LZMA_OK) { 138 | log_err("Unable to initialize lzma_auto_decoder: %d", lzrt); 139 | goto error_out; 140 | } 141 | 142 | pagesize = getpagesize(); 143 | result_size = ((buf_len + pagesize - 1) & ~(pagesize - 1)); 144 | do { 145 | do { 146 | unsigned char *tmp_result = result; 147 | /* Double the buffer size and realloc */ 148 | result_size *= 2; 149 | result = (unsigned char *)realloc(result, result_size * sizeof(unsigned char)); 150 | if (result == NULL) { 151 | free(tmp_result); 152 | log_err("Unable to allocate %d bytes to decompress file %s", result_size * sizeof(unsigned char), dir_full_path); 153 | goto error_out; 154 | } 155 | 156 | stream.avail_out = result_size / 2; 157 | stream.next_out = &result[stream.total_out]; 158 | lzrt = lzma_code(&stream, LZMA_RUN); 159 | log_debug("lzma_code ret = %d", lzrt); 160 | switch (lzrt) { 161 | case LZMA_OK: 162 | case LZMA_STREAM_END: 163 | break; 164 | default: 165 | log_err("Found mem/data error while decompressing xz/lzma stream: %d", lzrt); 166 | goto error_out; 167 | } 168 | } while (stream.avail_out == 0); 169 | } while (lzrt == LZMA_OK); 170 | 171 | *new_buf_len = stream.total_out; 172 | 173 | if (lzrt == LZMA_STREAM_END) { 174 | lzma_end(&stream); 175 | return result; 176 | } 177 | 178 | 179 | error_out: 180 | lzma_end(&stream); 181 | *new_buf_len = 0; 182 | if (result) { 183 | free(result); 184 | } 185 | return NULL; 186 | } 187 | #endif 188 | 189 | 190 | /* This function is very hot. It's called on every file when zip is enabled. */ 191 | void *decompress(const ag_compression_type zip_type, const void *buf, const int buf_len, 192 | const char *dir_full_path, int *new_buf_len) { 193 | 194 | switch (zip_type) { 195 | #ifdef HAVE_ZLIB_H 196 | case AG_GZIP: 197 | return decompress_zlib(buf, buf_len, dir_full_path, new_buf_len); 198 | #endif 199 | case AG_COMPRESS: 200 | return decompress_lzw(buf, buf_len, dir_full_path, new_buf_len); 201 | case AG_ZIP: 202 | return decompress_zip(buf, buf_len, dir_full_path, new_buf_len); 203 | #ifdef HAVE_LZMA_H 204 | case AG_XZ: 205 | return decompress_lzma(buf, buf_len, dir_full_path, new_buf_len); 206 | #endif 207 | case AG_NO_COMPRESSION: 208 | log_err("File %s is not compressed", dir_full_path); 209 | break; 210 | default: 211 | log_err("Unsupported compression type: %d", zip_type); 212 | } 213 | 214 | *new_buf_len = 0; 215 | return NULL; 216 | } 217 | 218 | 219 | /* This function is very hot. It's called on every file. */ 220 | ag_compression_type is_zipped(const void *buf, const int buf_len) { 221 | /* Zip magic numbers 222 | * compressed file: { 0x1F, 0x9B } 223 | * http://en.wikipedia.org/wiki/Compress 224 | * 225 | * gzip file: { 0x1F, 0x8B } 226 | * http://www.gzip.org/zlib/rfc-gzip.html#file-format 227 | * 228 | * zip file: { 0x50, 0x4B, 0x03, 0x04 } 229 | * http://www.pkware.com/documents/casestudies/APPNOTE.TXT (Section 4.3) 230 | */ 231 | 232 | const unsigned char *buf_c = buf; 233 | 234 | if (buf_len == 0) 235 | return AG_NO_COMPRESSION; 236 | 237 | /* Check for gzip & compress */ 238 | if (buf_len >= 2) { 239 | if (buf_c[0] == 0x1F) { 240 | if (buf_c[1] == 0x8B) { 241 | #ifdef HAVE_ZLIB_H 242 | log_debug("Found gzip-based stream"); 243 | return AG_GZIP; 244 | #endif 245 | } else if (buf_c[1] == 0x9B) { 246 | log_debug("Found compress-based stream"); 247 | return AG_COMPRESS; 248 | } 249 | } 250 | } 251 | 252 | /* Check for zip */ 253 | if (buf_len >= 4) { 254 | if (buf_c[0] == 0x50 && buf_c[1] == 0x4B && buf_c[2] == 0x03 && buf_c[3] == 0x04) { 255 | log_debug("Found zip-based stream"); 256 | return AG_ZIP; 257 | } 258 | } 259 | 260 | #ifdef HAVE_LZMA_H 261 | if (buf_len >= 6) { 262 | if (memcmp(XZ_HEADER_MAGIC, buf_c, 6) == 0) { 263 | log_debug("Found xz based stream"); 264 | return AG_XZ; 265 | } 266 | } 267 | 268 | /* LZMA doesn't really have a header: http://www.mail-archive.com/xz-devel@tukaani.org/msg00003.html */ 269 | if (buf_len >= 3) { 270 | if (memcmp(LZMA_HEADER_SOMETIMES, buf_c, 3) == 0) { 271 | log_debug("Found lzma-based stream"); 272 | return AG_XZ; 273 | } 274 | } 275 | #endif 276 | 277 | return AG_NO_COMPRESSION; 278 | } 279 | -------------------------------------------------------------------------------- /src/ignore.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "ignore.h" 9 | #include "log.h" 10 | #include "options.h" 11 | #include "scandir.h" 12 | #include "util.h" 13 | 14 | #ifdef _WIN32 15 | #include 16 | #define fnmatch(x, y, z) (!PathMatchSpec(y, x)) 17 | #else 18 | #include 19 | const int fnmatch_flags = FNM_PATHNAME; 20 | #endif 21 | 22 | /* TODO: build a huge-ass list of files we want to ignore by default (build cache stuff, pyc files, etc) */ 23 | 24 | const char *evil_hardcoded_ignore_files[] = { 25 | ".", 26 | "..", 27 | NULL 28 | }; 29 | 30 | /* Warning: changing the first string will break skip_vcs_ignores. */ 31 | const char *ignore_pattern_files[] = { 32 | ".agignore", 33 | ".gitignore", 34 | ".git/info/exclude", 35 | ".hgignore", 36 | ".svn", 37 | NULL 38 | }; 39 | 40 | ignores *init_ignore(ignores *parent) { 41 | ignores *ig = ag_malloc(sizeof(ignores)); 42 | ig->names = NULL; 43 | ig->names_len = 0; 44 | ig->regexes = NULL; 45 | ig->regexes_len = 0; 46 | ig->parent = parent; 47 | return ig; 48 | } 49 | 50 | void cleanup_ignore(ignores *ig) { 51 | size_t i; 52 | 53 | if (ig) { 54 | if (ig->regexes) { 55 | for (i = 0; i < ig->regexes_len; i++) { 56 | free(ig->regexes[i]); 57 | } 58 | free(ig->regexes); 59 | } 60 | if (ig->names) { 61 | for (i = 0; i < ig->names_len; i++) { 62 | free(ig->names[i]); 63 | } 64 | free(ig->names); 65 | } 66 | free(ig); 67 | } 68 | } 69 | 70 | void add_ignore_pattern(ignores *ig, const char *pattern) { 71 | int i; 72 | int pattern_len; 73 | 74 | /* Strip off the leading dot so that matches are more likely. */ 75 | if (strncmp(pattern, "./", 2) == 0) { 76 | pattern++; 77 | } 78 | 79 | /* Kill trailing whitespace */ 80 | for (pattern_len = strlen(pattern); pattern_len > 0; pattern_len--) { 81 | if (!isspace(pattern[pattern_len - 1])) { 82 | break; 83 | } 84 | } 85 | 86 | if (pattern_len == 0) { 87 | log_debug("Pattern is empty. Not adding any ignores."); 88 | return; 89 | } 90 | 91 | /* TODO: de-dupe these patterns */ 92 | if (is_fnmatch(pattern)) { 93 | ig->regexes_len++; 94 | ig->regexes = ag_realloc(ig->regexes, ig->regexes_len * sizeof(char *)); 95 | /* Prepend '/' if the pattern contains '/' but doesn't start with '/' */ 96 | if ((pattern[0] != '/') && (strchr(pattern, '/') != NULL)) { 97 | ag_asprintf(&(ig->regexes[ig->regexes_len - 1]), "/%s", pattern); 98 | log_debug("added regex ignore pattern /%s", pattern); 99 | } else { 100 | ig->regexes[ig->regexes_len - 1] = ag_strndup(pattern, pattern_len); 101 | log_debug("added regex ignore pattern %s", pattern); 102 | } 103 | } else { 104 | /* a balanced binary tree is best for performance, but I'm lazy */ 105 | ig->names_len++; 106 | ig->names = ag_realloc(ig->names, ig->names_len * sizeof(char *)); 107 | for (i = ig->names_len - 1; i > 0; i--) { 108 | if (strcmp(pattern, ig->names[i - 1]) > 0) { 109 | break; 110 | } 111 | ig->names[i] = ig->names[i - 1]; 112 | } 113 | ig->names[i] = ag_strndup(pattern, pattern_len); 114 | log_debug("added literal ignore pattern %s", pattern); 115 | } 116 | } 117 | 118 | /* For loading git/hg ignore patterns */ 119 | void load_ignore_patterns(ignores *ig, const char *path) { 120 | FILE *fp = NULL; 121 | fp = fopen(path, "r"); 122 | if (fp == NULL) { 123 | log_debug("Skipping ignore file %s", path); 124 | return; 125 | } 126 | 127 | char *line = NULL; 128 | ssize_t line_len = 0; 129 | size_t line_cap = 0; 130 | 131 | while ((line_len = getline(&line, &line_cap, fp)) > 0) { 132 | if (line_len == 0 || line[0] == '\n' || line[0] == '#') { 133 | continue; 134 | } 135 | if (line[line_len - 1] == '\n') { 136 | line[line_len - 1] = '\0'; /* kill the \n */ 137 | } 138 | add_ignore_pattern(ig, line); 139 | } 140 | 141 | free(line); 142 | fclose(fp); 143 | } 144 | 145 | void load_svn_ignore_patterns(ignores *ig, const char *path) { 146 | FILE *fp = NULL; 147 | char *dir_prop_base; 148 | ag_asprintf(&dir_prop_base, "%s/%s", path, SVN_DIR_PROP_BASE); 149 | 150 | fp = fopen(dir_prop_base, "r"); 151 | if (fp == NULL) { 152 | log_debug("Skipping svn ignore file %s", dir_prop_base); 153 | free(dir_prop_base); 154 | return; 155 | } 156 | 157 | char *entry = NULL; 158 | size_t entry_len = 0; 159 | char *key = ag_malloc(32); /* Sane start for max key length. */ 160 | size_t key_len = 0; 161 | size_t bytes_read = 0; 162 | char *entry_line; 163 | size_t line_len; 164 | int matches; 165 | 166 | while (fscanf(fp, "K %zu\n", &key_len) == 1) { 167 | key = ag_realloc(key, key_len + 1); 168 | bytes_read = fread(key, 1, key_len, fp); 169 | key[key_len] = '\0'; 170 | matches = fscanf(fp, "\nV %zu\n", &entry_len); 171 | if (matches != 1) { 172 | log_debug("Unable to parse svnignore file %s: fscanf() got %i matches, expected 1.", dir_prop_base, matches); 173 | goto cleanup; 174 | } 175 | 176 | if (strncmp(SVN_PROP_IGNORE, key, bytes_read) != 0) { 177 | log_debug("key is %s, not %s. skipping %u bytes", key, SVN_PROP_IGNORE, entry_len); 178 | /* Not the key we care about. fseek and repeat */ 179 | fseek(fp, entry_len + 1, SEEK_CUR); /* +1 to account for newline. yes I know this is hacky */ 180 | continue; 181 | } 182 | /* Aww yeah. Time to ignore stuff */ 183 | entry = ag_malloc(entry_len + 1); 184 | bytes_read = fread(entry, 1, entry_len, fp); 185 | entry[bytes_read] = '\0'; 186 | log_debug("entry: %s", entry); 187 | break; 188 | } 189 | if (entry == NULL) { 190 | goto cleanup; 191 | } 192 | char *patterns = entry; 193 | size_t patterns_len = strlen(patterns); 194 | while (*patterns != '\0' && patterns < (entry + bytes_read)) { 195 | for (line_len = 0; line_len < patterns_len; line_len++) { 196 | if (patterns[line_len] == '\n') { 197 | break; 198 | } 199 | } 200 | if (line_len > 0) { 201 | entry_line = ag_strndup(patterns, line_len); 202 | add_ignore_pattern(ig, entry_line); 203 | free(entry_line); 204 | } 205 | patterns += line_len + 1; 206 | patterns_len -= line_len + 1; 207 | } 208 | free(entry); 209 | cleanup: 210 | free(dir_prop_base); 211 | free(key); 212 | fclose(fp); 213 | } 214 | 215 | static int ackmate_dir_match(const char *dir_name) { 216 | if (opts.ackmate_dir_filter == NULL) { 217 | return 0; 218 | } 219 | /* we just care about the match, not where the matches are */ 220 | return pcre_exec(opts.ackmate_dir_filter, NULL, dir_name, strlen(dir_name), 0, 0, NULL, 0); 221 | } 222 | 223 | static int filename_ignore_search(const ignores *ig, const char *filename) { 224 | size_t i; 225 | int match_pos; 226 | 227 | if (strncmp(filename, "./", 2) == 0) { 228 | filename++; 229 | } 230 | 231 | match_pos = binary_search(filename, ig->names, 0, ig->names_len); 232 | if (match_pos >= 0) { 233 | log_debug("file %s ignored because name matches static pattern %s", filename, ig->names[match_pos]); 234 | return 1; 235 | } 236 | 237 | for (i = 0; i < ig->regexes_len; i++) { 238 | if (fnmatch(ig->regexes[i], filename, fnmatch_flags) == 0) { 239 | log_debug("file %s ignored because name matches regex pattern %s", filename, ig->regexes[i]); 240 | return 1; 241 | } 242 | log_debug("pattern %s doesn't match file %s", ig->regexes[i], filename); 243 | } 244 | 245 | log_debug("file %s not ignored", filename); 246 | return 0; 247 | } 248 | 249 | static int path_ignore_search(const ignores *ig, const char *path, const char *filename) { 250 | char *temp; 251 | 252 | if (filename_ignore_search(ig, filename)) { 253 | return 1; 254 | } 255 | 256 | ag_asprintf(&temp, "%s/%s", path[0] == '.' ? path + 1 : path, filename); 257 | 258 | if (filename_ignore_search(ig, temp)) { 259 | free(temp); 260 | return 1; 261 | } 262 | 263 | int rv = ackmate_dir_match(temp); 264 | free(temp); 265 | return rv; 266 | } 267 | 268 | /* This function is REALLY HOT. It gets called for every file */ 269 | int filename_filter(const char *path, const struct dirent *dir, void *baton) { 270 | const char *filename = dir->d_name; 271 | /* TODO: don't call strlen on filename every time we call filename_filter() */ 272 | size_t filename_len = strlen(filename); 273 | size_t i; 274 | scandir_baton_t *scandir_baton = (scandir_baton_t *)baton; 275 | const ignores *ig = scandir_baton->ig; 276 | const char *base_path = scandir_baton->base_path; 277 | const size_t base_path_len = scandir_baton->base_path_len; 278 | const char *path_start = path; 279 | char *temp; 280 | 281 | if (!opts.follow_symlinks && is_symlink(path, dir)) { 282 | log_debug("File %s ignored becaused it's a symlink", dir->d_name); 283 | return 0; 284 | } 285 | 286 | if (is_named_pipe(path, dir)) { 287 | log_debug("%s ignored because it's a named pipe", path); 288 | return 0; 289 | } 290 | 291 | for (i = 0; evil_hardcoded_ignore_files[i] != NULL; i++) { 292 | if (strcmp(filename, evil_hardcoded_ignore_files[i]) == 0) { 293 | return 0; 294 | } 295 | } 296 | 297 | if (!opts.search_hidden_files && filename[0] == '.') { 298 | return 0; 299 | } 300 | if (opts.search_all_files && !opts.path_to_agignore) { 301 | return 1; 302 | } 303 | 304 | for (i = 0; base_path[i] == path[i] && i < base_path_len; i++) { 305 | /* base_path always ends with "/\0" while path doesn't, so this is safe */ 306 | path_start = path + i + 2; 307 | } 308 | log_debug("path_start %s filename %s", path_start, filename); 309 | 310 | while (ig != NULL) { 311 | if (path_ignore_search(ig, path_start, filename)) { 312 | return 0; 313 | } 314 | 315 | if (is_directory(path, dir) && filename[filename_len - 1] != '/') { 316 | ag_asprintf(&temp, "%s/", filename); 317 | int rv = path_ignore_search(ig, path_start, temp); 318 | free(temp); 319 | if (rv) { 320 | return 0; 321 | } 322 | } 323 | ig = ig->parent; 324 | } 325 | 326 | return 1; 327 | } 328 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /m4/ax_pthread.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.gnu.org/software/autoconf-archive/ax_pthread.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) 8 | # 9 | # DESCRIPTION 10 | # 11 | # This macro figures out how to build C programs using POSIX threads. It 12 | # sets the PTHREAD_LIBS output variable to the threads library and linker 13 | # flags, and the PTHREAD_CFLAGS output variable to any special C compiler 14 | # flags that are needed. (The user can also force certain compiler 15 | # flags/libs to be tested by setting these environment variables.) 16 | # 17 | # Also sets PTHREAD_CC to any special C compiler that is needed for 18 | # multi-threaded programs (defaults to the value of CC otherwise). (This 19 | # is necessary on AIX to use the special cc_r compiler alias.) 20 | # 21 | # NOTE: You are assumed to not only compile your program with these flags, 22 | # but also link it with them as well. e.g. you should link with 23 | # $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS 24 | # 25 | # If you are only building threads programs, you may wish to use these 26 | # variables in your default LIBS, CFLAGS, and CC: 27 | # 28 | # LIBS="$PTHREAD_LIBS $LIBS" 29 | # CFLAGS="$CFLAGS $PTHREAD_CFLAGS" 30 | # CC="$PTHREAD_CC" 31 | # 32 | # In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant 33 | # has a nonstandard name, defines PTHREAD_CREATE_JOINABLE to that name 34 | # (e.g. PTHREAD_CREATE_UNDETACHED on AIX). 35 | # 36 | # Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the 37 | # PTHREAD_PRIO_INHERIT symbol is defined when compiling with 38 | # PTHREAD_CFLAGS. 39 | # 40 | # ACTION-IF-FOUND is a list of shell commands to run if a threads library 41 | # is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it 42 | # is not found. If ACTION-IF-FOUND is not specified, the default action 43 | # will define HAVE_PTHREAD. 44 | # 45 | # Please let the authors know if this macro fails on any platform, or if 46 | # you have any other suggestions or comments. This macro was based on work 47 | # by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) (with help 48 | # from M. Frigo), as well as ac_pthread and hb_pthread macros posted by 49 | # Alejandro Forero Cuervo to the autoconf macro repository. We are also 50 | # grateful for the helpful feedback of numerous users. 51 | # 52 | # Updated for Autoconf 2.68 by Daniel Richard G. 53 | # 54 | # LICENSE 55 | # 56 | # Copyright (c) 2008 Steven G. Johnson 57 | # Copyright (c) 2011 Daniel Richard G. 58 | # 59 | # This program is free software: you can redistribute it and/or modify it 60 | # under the terms of the GNU General Public License as published by the 61 | # Free Software Foundation, either version 3 of the License, or (at your 62 | # option) any later version. 63 | # 64 | # This program is distributed in the hope that it will be useful, but 65 | # WITHOUT ANY WARRANTY; without even the implied warranty of 66 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 67 | # Public License for more details. 68 | # 69 | # You should have received a copy of the GNU General Public License along 70 | # with this program. If not, see . 71 | # 72 | # As a special exception, the respective Autoconf Macro's copyright owner 73 | # gives unlimited permission to copy, distribute and modify the configure 74 | # scripts that are the output of Autoconf when processing the Macro. You 75 | # need not follow the terms of the GNU General Public License when using 76 | # or distributing such scripts, even though portions of the text of the 77 | # Macro appear in them. The GNU General Public License (GPL) does govern 78 | # all other use of the material that constitutes the Autoconf Macro. 79 | # 80 | # This special exception to the GPL applies to versions of the Autoconf 81 | # Macro released by the Autoconf Archive. When you make and distribute a 82 | # modified version of the Autoconf Macro, you may extend this special 83 | # exception to the GPL to apply to your modified version as well. 84 | 85 | #serial 21 86 | 87 | AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD]) 88 | AC_DEFUN([AX_PTHREAD], [ 89 | AC_REQUIRE([AC_CANONICAL_HOST]) 90 | AC_LANG_PUSH([C]) 91 | ax_pthread_ok=no 92 | 93 | # We used to check for pthread.h first, but this fails if pthread.h 94 | # requires special compiler flags (e.g. on True64 or Sequent). 95 | # It gets checked for in the link test anyway. 96 | 97 | # First of all, check if the user has set any of the PTHREAD_LIBS, 98 | # etcetera environment variables, and if threads linking works using 99 | # them: 100 | if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then 101 | save_CFLAGS="$CFLAGS" 102 | CFLAGS="$CFLAGS $PTHREAD_CFLAGS" 103 | save_LIBS="$LIBS" 104 | LIBS="$PTHREAD_LIBS $LIBS" 105 | AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS]) 106 | AC_TRY_LINK_FUNC([pthread_join], [ax_pthread_ok=yes]) 107 | AC_MSG_RESULT([$ax_pthread_ok]) 108 | if test x"$ax_pthread_ok" = xno; then 109 | PTHREAD_LIBS="" 110 | PTHREAD_CFLAGS="" 111 | fi 112 | LIBS="$save_LIBS" 113 | CFLAGS="$save_CFLAGS" 114 | fi 115 | 116 | # We must check for the threads library under a number of different 117 | # names; the ordering is very important because some systems 118 | # (e.g. DEC) have both -lpthread and -lpthreads, where one of the 119 | # libraries is broken (non-POSIX). 120 | 121 | # Create a list of thread flags to try. Items starting with a "-" are 122 | # C compiler flags, and other items are library names, except for "none" 123 | # which indicates that we try without any flags at all, and "pthread-config" 124 | # which is a program returning the flags for the Pth emulation library. 125 | 126 | ax_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" 127 | 128 | # The ordering *is* (sometimes) important. Some notes on the 129 | # individual items follow: 130 | 131 | # pthreads: AIX (must check this before -lpthread) 132 | # none: in case threads are in libc; should be tried before -Kthread and 133 | # other compiler flags to prevent continual compiler warnings 134 | # -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) 135 | # -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) 136 | # lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) 137 | # -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) 138 | # -pthreads: Solaris/gcc 139 | # -mthreads: Mingw32/gcc, Lynx/gcc 140 | # -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it 141 | # doesn't hurt to check since this sometimes defines pthreads too; 142 | # also defines -D_REENTRANT) 143 | # ... -mt is also the pthreads flag for HP/aCC 144 | # pthread: Linux, etcetera 145 | # --thread-safe: KAI C++ 146 | # pthread-config: use pthread-config program (for GNU Pth library) 147 | 148 | case ${host_os} in 149 | solaris*) 150 | 151 | # On Solaris (at least, for some versions), libc contains stubbed 152 | # (non-functional) versions of the pthreads routines, so link-based 153 | # tests will erroneously succeed. (We need to link with -pthreads/-mt/ 154 | # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather 155 | # a function called by this macro, so we could check for that, but 156 | # who knows whether they'll stub that too in a future libc.) So, 157 | # we'll just look for -pthreads and -lpthread first: 158 | 159 | ax_pthread_flags="-pthreads pthread -mt -pthread $ax_pthread_flags" 160 | ;; 161 | 162 | darwin*) 163 | ax_pthread_flags="-pthread $ax_pthread_flags" 164 | ;; 165 | esac 166 | 167 | # Clang doesn't consider unrecognized options an error unless we specify 168 | # -Werror. We throw in some extra Clang-specific options to ensure that 169 | # this doesn't happen for GCC, which also accepts -Werror. 170 | 171 | AC_MSG_CHECKING([if compiler needs -Werror to reject unknown flags]) 172 | save_CFLAGS="$CFLAGS" 173 | ax_pthread_extra_flags="-Werror" 174 | CFLAGS="$CFLAGS $ax_pthread_extra_flags -Wunknown-warning-option -Wsizeof-array-argument" 175 | AC_COMPILE_IFELSE([AC_LANG_PROGRAM([int foo(void);],[foo()])], 176 | [AC_MSG_RESULT([yes])], 177 | [ax_pthread_extra_flags= 178 | AC_MSG_RESULT([no])]) 179 | CFLAGS="$save_CFLAGS" 180 | 181 | if test x"$ax_pthread_ok" = xno; then 182 | for flag in $ax_pthread_flags; do 183 | 184 | case $flag in 185 | none) 186 | AC_MSG_CHECKING([whether pthreads work without any flags]) 187 | ;; 188 | 189 | -*) 190 | AC_MSG_CHECKING([whether pthreads work with $flag]) 191 | PTHREAD_CFLAGS="$flag" 192 | ;; 193 | 194 | pthread-config) 195 | AC_CHECK_PROG([ax_pthread_config], [pthread-config], [yes], [no]) 196 | if test x"$ax_pthread_config" = xno; then continue; fi 197 | PTHREAD_CFLAGS="`pthread-config --cflags`" 198 | PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" 199 | ;; 200 | 201 | *) 202 | AC_MSG_CHECKING([for the pthreads library -l$flag]) 203 | PTHREAD_LIBS="-l$flag" 204 | ;; 205 | esac 206 | 207 | save_LIBS="$LIBS" 208 | save_CFLAGS="$CFLAGS" 209 | LIBS="$PTHREAD_LIBS $LIBS" 210 | CFLAGS="$CFLAGS $PTHREAD_CFLAGS $ax_pthread_extra_flags" 211 | 212 | # Check for various functions. We must include pthread.h, 213 | # since some functions may be macros. (On the Sequent, we 214 | # need a special flag -Kthread to make this header compile.) 215 | # We check for pthread_join because it is in -lpthread on IRIX 216 | # while pthread_create is in libc. We check for pthread_attr_init 217 | # due to DEC craziness with -lpthreads. We check for 218 | # pthread_cleanup_push because it is one of the few pthread 219 | # functions on Solaris that doesn't have a non-functional libc stub. 220 | # We try pthread_create on general principles. 221 | AC_LINK_IFELSE([AC_LANG_PROGRAM([#include 222 | static void routine(void *a) { a = 0; } 223 | static void *start_routine(void *a) { return a; }], 224 | [pthread_t th; pthread_attr_t attr; 225 | pthread_create(&th, 0, start_routine, 0); 226 | pthread_join(th, 0); 227 | pthread_attr_init(&attr); 228 | pthread_cleanup_push(routine, 0); 229 | pthread_cleanup_pop(0) /* ; */])], 230 | [ax_pthread_ok=yes], 231 | []) 232 | 233 | LIBS="$save_LIBS" 234 | CFLAGS="$save_CFLAGS" 235 | 236 | AC_MSG_RESULT([$ax_pthread_ok]) 237 | if test "x$ax_pthread_ok" = xyes; then 238 | break; 239 | fi 240 | 241 | PTHREAD_LIBS="" 242 | PTHREAD_CFLAGS="" 243 | done 244 | fi 245 | 246 | # Various other checks: 247 | if test "x$ax_pthread_ok" = xyes; then 248 | save_LIBS="$LIBS" 249 | LIBS="$PTHREAD_LIBS $LIBS" 250 | save_CFLAGS="$CFLAGS" 251 | CFLAGS="$CFLAGS $PTHREAD_CFLAGS" 252 | 253 | # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. 254 | AC_MSG_CHECKING([for joinable pthread attribute]) 255 | attr_name=unknown 256 | for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do 257 | AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], 258 | [int attr = $attr; return attr /* ; */])], 259 | [attr_name=$attr; break], 260 | []) 261 | done 262 | AC_MSG_RESULT([$attr_name]) 263 | if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then 264 | AC_DEFINE_UNQUOTED([PTHREAD_CREATE_JOINABLE], [$attr_name], 265 | [Define to necessary symbol if this constant 266 | uses a non-standard name on your system.]) 267 | fi 268 | 269 | AC_MSG_CHECKING([if more special flags are required for pthreads]) 270 | flag=no 271 | case ${host_os} in 272 | aix* | freebsd* | darwin*) flag="-D_THREAD_SAFE";; 273 | osf* | hpux*) flag="-D_REENTRANT";; 274 | solaris*) 275 | if test "$GCC" = "yes"; then 276 | flag="-D_REENTRANT" 277 | else 278 | # TODO: What about Clang on Solaris? 279 | flag="-mt -D_REENTRANT" 280 | fi 281 | ;; 282 | esac 283 | AC_MSG_RESULT([$flag]) 284 | if test "x$flag" != xno; then 285 | PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" 286 | fi 287 | 288 | AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT], 289 | [ax_cv_PTHREAD_PRIO_INHERIT], [ 290 | AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include ]], 291 | [[int i = PTHREAD_PRIO_INHERIT;]])], 292 | [ax_cv_PTHREAD_PRIO_INHERIT=yes], 293 | [ax_cv_PTHREAD_PRIO_INHERIT=no]) 294 | ]) 295 | AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes"], 296 | [AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], [1], [Have PTHREAD_PRIO_INHERIT.])]) 297 | 298 | LIBS="$save_LIBS" 299 | CFLAGS="$save_CFLAGS" 300 | 301 | # More AIX lossage: compile with *_r variant 302 | if test "x$GCC" != xyes; then 303 | case $host_os in 304 | aix*) 305 | AS_CASE(["x/$CC"], 306 | [x*/c89|x*/c89_128|x*/c99|x*/c99_128|x*/cc|x*/cc128|x*/xlc|x*/xlc_v6|x*/xlc128|x*/xlc128_v6], 307 | [#handle absolute path differently from PATH based program lookup 308 | AS_CASE(["x$CC"], 309 | [x/*], 310 | [AS_IF([AS_EXECUTABLE_P([${CC}_r])],[PTHREAD_CC="${CC}_r"])], 311 | [AC_CHECK_PROGS([PTHREAD_CC],[${CC}_r],[$CC])])]) 312 | ;; 313 | esac 314 | fi 315 | fi 316 | 317 | test -n "$PTHREAD_CC" || PTHREAD_CC="$CC" 318 | 319 | AC_SUBST([PTHREAD_LIBS]) 320 | AC_SUBST([PTHREAD_CFLAGS]) 321 | AC_SUBST([PTHREAD_CC]) 322 | 323 | # Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: 324 | if test x"$ax_pthread_ok" = xyes; then 325 | ifelse([$1],,[AC_DEFINE([HAVE_PTHREAD],[1],[Define if you have POSIX threads libraries and header files.])],[$1]) 326 | : 327 | else 328 | ax_pthread_ok=no 329 | $2 330 | fi 331 | AC_LANG_POP 332 | ])dnl AX_PTHREAD 333 | -------------------------------------------------------------------------------- /src/util.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "util.h" 9 | #include "config.h" 10 | 11 | #ifdef _WIN32 12 | #define flockfile(x) 13 | #define funlockfile(x) 14 | #define getc_unlocked(x) getc(x) 15 | #endif 16 | 17 | #define CHECK_AND_RETURN(ptr) \ 18 | if (ptr == NULL) { \ 19 | die("Memory allocation failed."); \ 20 | } \ 21 | return ptr; 22 | 23 | void *ag_malloc(size_t size) { 24 | void *ptr = malloc(size); 25 | CHECK_AND_RETURN(ptr) 26 | } 27 | 28 | void *ag_realloc(void *ptr, size_t size) { 29 | void *new_ptr = realloc(ptr, size); 30 | CHECK_AND_RETURN(new_ptr) 31 | } 32 | 33 | void *ag_calloc(size_t count, size_t size) { 34 | void *ptr = calloc(count, size); 35 | CHECK_AND_RETURN(ptr) 36 | } 37 | 38 | char *ag_strdup(const char *s) { 39 | char *str = strdup(s); 40 | CHECK_AND_RETURN(str) 41 | } 42 | 43 | char *ag_strndup(const char *s, size_t size) { 44 | char *str = NULL; 45 | #ifdef HAVE_STRNDUP 46 | str = strndup(s, size); 47 | CHECK_AND_RETURN(str) 48 | #else 49 | str = (char *)ag_malloc(size + 1); 50 | strlcpy(str, s, size + 1); 51 | return str; 52 | #endif 53 | } 54 | 55 | void generate_alpha_skip(const char *find, size_t f_len, size_t skip_lookup[], const int case_sensitive) { 56 | size_t i; 57 | 58 | for (i = 0; i < 256; i++) { 59 | skip_lookup[i] = f_len; 60 | } 61 | 62 | f_len--; 63 | 64 | for (i = 0; i < f_len; i++) { 65 | if (case_sensitive) { 66 | skip_lookup[(unsigned char)find[i]] = f_len - i; 67 | } else { 68 | skip_lookup[(unsigned char)tolower(find[i])] = f_len - i; 69 | skip_lookup[(unsigned char)toupper(find[i])] = f_len - i; 70 | } 71 | } 72 | } 73 | 74 | int is_prefix(const char *s, const size_t s_len, const size_t pos, const int case_sensitive) { 75 | size_t i; 76 | 77 | for (i = 0; pos + i < s_len; i++) { 78 | if (case_sensitive) { 79 | if (s[i] != s[i + pos]) { 80 | return 0; 81 | } 82 | } else { 83 | if (tolower(s[i]) != tolower(s[i + pos])) { 84 | return 0; 85 | } 86 | } 87 | } 88 | 89 | return 1; 90 | } 91 | 92 | size_t suffix_len(const char *s, const size_t s_len, const size_t pos, const int case_sensitive) { 93 | size_t i; 94 | 95 | for (i = 0; i < pos; i++) { 96 | if (case_sensitive) { 97 | if (s[pos - i] != s[s_len - i - 1]) { 98 | break; 99 | } 100 | } else { 101 | if (tolower(s[pos - i]) != tolower(s[s_len - i - 1])) { 102 | break; 103 | } 104 | } 105 | } 106 | 107 | return i; 108 | } 109 | 110 | void generate_find_skip(const char *find, const size_t f_len, size_t **skip_lookup, const int case_sensitive) { 111 | size_t i; 112 | size_t s_len; 113 | size_t *sl = ag_malloc(f_len * sizeof(size_t)); 114 | *skip_lookup = sl; 115 | size_t last_prefix = f_len; 116 | 117 | for (i = last_prefix; i > 0; i--) { 118 | if (is_prefix(find, f_len, i, case_sensitive)) { 119 | last_prefix = i; 120 | } 121 | sl[i - 1] = last_prefix + (f_len - i); 122 | } 123 | 124 | for (i = 0; i < f_len; i++) { 125 | s_len = suffix_len(find, f_len, i, case_sensitive); 126 | if (find[i - s_len] != find[f_len - 1 - s_len]) { 127 | sl[f_len - 1 - s_len] = f_len - 1 - i + s_len; 128 | } 129 | } 130 | } 131 | 132 | size_t ag_max(size_t a, size_t b) { 133 | if (b > a) { 134 | return b; 135 | } 136 | return a; 137 | } 138 | 139 | /* Boyer-Moore strstr */ 140 | const char *boyer_moore_strnstr(const char *s, const char *find, const size_t s_len, const size_t f_len, 141 | const size_t alpha_skip_lookup[], const size_t *find_skip_lookup) { 142 | ssize_t i; 143 | size_t pos = f_len - 1; 144 | 145 | while (pos < s_len) { 146 | for (i = f_len - 1; i >= 0 && s[pos] == find[i]; pos--, i--) { 147 | } 148 | if (i < 0) { 149 | return s + pos + 1; 150 | } 151 | pos += ag_max(alpha_skip_lookup[(unsigned char)s[pos]], find_skip_lookup[i]); 152 | } 153 | 154 | return NULL; 155 | } 156 | 157 | /* Copy-pasted from above. Yes I know this is bad. One day I might even fix it. */ 158 | const char *boyer_moore_strncasestr(const char *s, const char *find, const size_t s_len, const size_t f_len, 159 | const size_t alpha_skip_lookup[], const size_t *find_skip_lookup) { 160 | ssize_t i; 161 | size_t pos = f_len - 1; 162 | 163 | while (pos < s_len) { 164 | for (i = f_len - 1; i >= 0 && tolower(s[pos]) == find[i]; pos--, i--) { 165 | } 166 | if (i < 0) { 167 | return s + pos + 1; 168 | } 169 | pos += ag_max(alpha_skip_lookup[(unsigned char)s[pos]], find_skip_lookup[i]); 170 | } 171 | 172 | return NULL; 173 | } 174 | 175 | strncmp_fp get_strstr(enum case_behavior casing) { 176 | strncmp_fp ag_strncmp_fp = &boyer_moore_strnstr; 177 | 178 | if (casing == CASE_INSENSITIVE) { 179 | ag_strncmp_fp = &boyer_moore_strncasestr; 180 | } 181 | 182 | return ag_strncmp_fp; 183 | } 184 | 185 | size_t invert_matches(const char *buf, const size_t buf_len, match_t matches[], size_t matches_len) { 186 | size_t i; 187 | size_t match_read_index = 0; 188 | size_t inverted_match_count = 0; 189 | size_t inverted_match_start = 0; 190 | size_t last_line_end = 0; 191 | int in_inverted_match = TRUE; 192 | match_t next_match; 193 | 194 | log_debug("Inverting %u matches.", matches_len); 195 | 196 | if (matches_len > 0) { 197 | next_match = matches[0]; 198 | } else { 199 | next_match.start = buf_len + 1; 200 | } 201 | 202 | /* No matches, so the whole buffer is now a match. */ 203 | if (matches_len == 0) { 204 | matches[0].start = 0; 205 | matches[0].end = buf_len - 1; 206 | return 1; 207 | } 208 | 209 | for (i = 0; i < buf_len; i++) { 210 | if (i == next_match.start) { 211 | i = next_match.end - 1; 212 | 213 | match_read_index++; 214 | 215 | if (match_read_index < matches_len) { 216 | next_match = matches[match_read_index]; 217 | } 218 | 219 | if (in_inverted_match && last_line_end > inverted_match_start) { 220 | matches[inverted_match_count].start = inverted_match_start; 221 | matches[inverted_match_count].end = last_line_end - 1; 222 | 223 | inverted_match_count++; 224 | } 225 | 226 | in_inverted_match = FALSE; 227 | } else if (i == buf_len - 1 && in_inverted_match) { 228 | matches[inverted_match_count].start = inverted_match_start; 229 | matches[inverted_match_count].end = i; 230 | 231 | inverted_match_count++; 232 | } else if (buf[i] == '\n') { 233 | last_line_end = i + 1; 234 | 235 | if (!in_inverted_match) { 236 | inverted_match_start = last_line_end; 237 | } 238 | 239 | in_inverted_match = TRUE; 240 | } 241 | } 242 | 243 | for (i = 0; i < matches_len; i++) { 244 | log_debug("Inverted match %i start %i end %i.", i, matches[i].start, matches[i].end); 245 | } 246 | 247 | return inverted_match_count; 248 | } 249 | 250 | void compile_study(pcre **re, pcre_extra **re_extra, char *q, const int pcre_opts, const int study_opts) { 251 | const char *pcre_err = NULL; 252 | int pcre_err_offset = 0; 253 | 254 | *re = pcre_compile(q, pcre_opts, &pcre_err, &pcre_err_offset, NULL); 255 | if (*re == NULL) { 256 | die("pcre_compile failed at position %i. Error: %s", pcre_err_offset, pcre_err); 257 | } 258 | *re_extra = pcre_study(*re, study_opts, &pcre_err); 259 | if (*re_extra == NULL) { 260 | log_debug("pcre_study returned nothing useful. Error: %s", pcre_err); 261 | } 262 | } 263 | 264 | /* This function is very hot. It's called on every file. */ 265 | int is_binary(const void *buf, const size_t buf_len) { 266 | size_t suspicious_bytes = 0; 267 | size_t total_bytes = buf_len > 512 ? 512 : buf_len; 268 | const unsigned char *buf_c = buf; 269 | size_t i; 270 | 271 | if (buf_len == 0) { 272 | return 0; 273 | } 274 | 275 | if (buf_len >= 3 && buf_c[0] == 0xEF && buf_c[1] == 0xBB && buf_c[2] == 0xBF) { 276 | /* UTF-8 BOM. This isn't binary. */ 277 | return 0; 278 | } 279 | 280 | for (i = 0; i < total_bytes; i++) { 281 | if (buf_c[i] == '\0') { 282 | /* NULL char. It's binary */ 283 | return 1; 284 | } else if ((buf_c[i] < 7 || buf_c[i] > 14) && (buf_c[i] < 32 || buf_c[i] > 127)) { 285 | /* UTF-8 detection */ 286 | if (buf_c[i] > 193 && buf_c[i] < 224 && i + 1 < total_bytes) { 287 | i++; 288 | if (buf_c[i] > 127 && buf_c[i] < 192) { 289 | continue; 290 | } 291 | } else if (buf_c[i] > 223 && buf_c[i] < 240 && i + 2 < total_bytes) { 292 | i++; 293 | if (buf_c[i] > 127 && buf_c[i] < 192 && buf_c[i + 1] > 127 && buf_c[i + 1] < 192) { 294 | i++; 295 | continue; 296 | } 297 | } 298 | suspicious_bytes++; 299 | /* Disk IO is so slow that it's worthwhile to do this calculation after every suspicious byte. */ 300 | /* This is true even on a 1.6Ghz Atom with an Intel 320 SSD. */ 301 | /* Read at least 32 bytes before making a decision */ 302 | if (i >= 32 && (suspicious_bytes * 100) / total_bytes > 10) { 303 | return 1; 304 | } 305 | } 306 | } 307 | if ((suspicious_bytes * 100) / total_bytes > 10) { 308 | return 1; 309 | } 310 | 311 | return 0; 312 | } 313 | 314 | int is_regex(const char *query) { 315 | char regex_chars[] = { 316 | '$', 317 | '(', 318 | ')', 319 | '*', 320 | '+', 321 | '.', 322 | '?', 323 | '[', 324 | '\\', 325 | '^', 326 | '{', 327 | '|', 328 | '\0' 329 | }; 330 | 331 | return (strpbrk(query, regex_chars) != NULL); 332 | } 333 | 334 | int is_fnmatch(const char *filename) { 335 | char fnmatch_chars[] = { 336 | '!', 337 | '*', 338 | '?', 339 | '[', 340 | ']', 341 | '\0' 342 | }; 343 | 344 | return (strpbrk(filename, fnmatch_chars) != NULL); 345 | } 346 | 347 | int binary_search(const char *needle, char **haystack, int start, int end) { 348 | int mid; 349 | int rc; 350 | 351 | if (start == end) { 352 | return -1; 353 | } 354 | 355 | mid = (start + end) / 2; /* can screw up on arrays with > 2 billion elements */ 356 | 357 | rc = strcmp(needle, haystack[mid]); 358 | if (rc < 0) { 359 | return binary_search(needle, haystack, start, mid); 360 | } else if (rc > 0) { 361 | return binary_search(needle, haystack, mid + 1, end); 362 | } 363 | 364 | return mid; 365 | } 366 | 367 | static int wordchar_table[256]; 368 | 369 | void init_wordchar_table(void) { 370 | int i; 371 | for (i = 0; i < 256; ++i) { 372 | char ch = (char)i; 373 | wordchar_table[i] = 374 | ('a' <= ch && ch <= 'z') || 375 | ('A' <= ch && ch <= 'Z') || 376 | ('0' <= ch && ch <= '9') || 377 | ch == '_'; 378 | } 379 | } 380 | 381 | int is_wordchar(char ch) { 382 | return wordchar_table[(unsigned char)ch]; 383 | } 384 | 385 | int is_lowercase(const char *s) { 386 | int i; 387 | for (i = 0; s[i] != '\0'; i++) { 388 | if (!isascii(s[i]) || isupper(s[i])) { 389 | return FALSE; 390 | } 391 | } 392 | return TRUE; 393 | } 394 | 395 | int is_directory(const char *path, const struct dirent *d) { 396 | #ifdef HAVE_DIRENT_DTYPE 397 | /* Some filesystems, e.g. ReiserFS, always return a type DT_UNKNOWN from readdir or scandir. */ 398 | /* Call stat if we don't find DT_DIR to get the information we need. */ 399 | /* Also works for symbolic links to directories. */ 400 | if (d->d_type != DT_UNKNOWN && d->d_type != DT_LNK) { 401 | return d->d_type == DT_DIR; 402 | } 403 | #endif 404 | char *full_path; 405 | struct stat s; 406 | ag_asprintf(&full_path, "%s/%s", path, d->d_name); 407 | if (stat(full_path, &s) != 0) { 408 | free(full_path); 409 | return FALSE; 410 | } 411 | free(full_path); 412 | return S_ISDIR(s.st_mode); 413 | } 414 | 415 | int is_symlink(const char *path, const struct dirent *d) { 416 | #ifdef _WIN32 417 | return 0; 418 | #else 419 | #ifdef HAVE_DIRENT_DTYPE 420 | /* Some filesystems, e.g. ReiserFS, always return a type DT_UNKNOWN from readdir or scandir. */ 421 | /* Call lstat if we find DT_UNKNOWN to get the information we need. */ 422 | if (d->d_type != DT_UNKNOWN) { 423 | return (d->d_type == DT_LNK); 424 | } 425 | #endif 426 | char *full_path; 427 | struct stat s; 428 | ag_asprintf(&full_path, "%s/%s", path, d->d_name); 429 | if (lstat(full_path, &s) != 0) { 430 | free(full_path); 431 | return FALSE; 432 | } 433 | free(full_path); 434 | return S_ISLNK(s.st_mode); 435 | #endif 436 | } 437 | 438 | int is_named_pipe(const char *path, const struct dirent *d) { 439 | #ifdef HAVE_DIRENT_DTYPE 440 | if (d->d_type != DT_UNKNOWN) { 441 | return d->d_type == DT_FIFO; 442 | } 443 | #endif 444 | char *full_path; 445 | struct stat s; 446 | ag_asprintf(&full_path, "%s/%s", path, d->d_name); 447 | if (stat(full_path, &s) != 0) { 448 | free(full_path); 449 | return FALSE; 450 | } 451 | free(full_path); 452 | return S_ISFIFO(s.st_mode); 453 | } 454 | 455 | void ag_asprintf(char **ret, const char *fmt, ...) { 456 | va_list args; 457 | va_start(args, fmt); 458 | if (vasprintf(ret, fmt, args) == -1) { 459 | die("vasprintf returned -1"); 460 | } 461 | va_end(args); 462 | } 463 | 464 | void die(const char *fmt, ...) { 465 | va_list args; 466 | va_start(args, fmt); 467 | vplog(LOG_LEVEL_ERR, fmt, args); 468 | va_end(args); 469 | exit(2); 470 | } 471 | 472 | #ifndef HAVE_FGETLN 473 | char *fgetln(FILE *fp, size_t *lenp) { 474 | char *buf = NULL; 475 | int c, used = 0, len = 0; 476 | 477 | flockfile(fp); 478 | while ((c = getc_unlocked(fp)) != EOF) { 479 | if (!buf || len >= used) { 480 | size_t nsize; 481 | char *newbuf; 482 | nsize = used + BUFSIZ; 483 | if (!(newbuf = realloc(buf, nsize))) { 484 | funlockfile(fp); 485 | if (buf) 486 | free(buf); 487 | return NULL; 488 | } 489 | buf = newbuf; 490 | used = nsize; 491 | } 492 | buf[len++] = c; 493 | if (c == '\n') { 494 | break; 495 | } 496 | } 497 | funlockfile(fp); 498 | *lenp = len; 499 | return buf; 500 | } 501 | #endif 502 | 503 | #ifndef HAVE_GETLINE 504 | /* 505 | * Do it yourself getline() implementation 506 | */ 507 | ssize_t getline(char **lineptr, size_t *n, FILE *stream) { 508 | size_t len = 0; 509 | char *srcln = NULL; 510 | char *newlnptr = NULL; 511 | 512 | /* get line, bail on error */ 513 | if (!(srcln = fgetln(stream, &len))) { 514 | return -1; 515 | } 516 | 517 | if (len >= *n) { 518 | /* line is too big for buffer, must realloc */ 519 | /* double the buffer, bail on error */ 520 | if (!(newlnptr = realloc(*lineptr, len * 2))) { 521 | return -1; 522 | } 523 | *lineptr = newlnptr; 524 | *n = len * 2; 525 | } 526 | 527 | memcpy(*lineptr, srcln, len); 528 | 529 | #ifndef HAVE_FGETLN 530 | /* Our own implementation of fgetln() returns a malloc()d buffer that we 531 | * must free 532 | */ 533 | free(srcln); 534 | #endif 535 | 536 | (*lineptr)[len] = '\0'; 537 | return len; 538 | } 539 | #endif 540 | 541 | #ifndef HAVE_REALPATH 542 | /* 543 | * realpath() for Windows. Turns slashes into backslashes and calls _fullpath 544 | */ 545 | char *realpath(const char *path, char *resolved_path) { 546 | char *p; 547 | char tmp[MAX_PATH + 1]; 548 | strlcpy(tmp, path, sizeof(tmp)); 549 | p = tmp; 550 | while (*p) { 551 | if (*p == '/') { 552 | *p = '\\'; 553 | } 554 | p++; 555 | } 556 | return _fullpath(resolved_path, tmp, _MAX_PATH); 557 | } 558 | #endif 559 | 560 | #ifndef HAVE_STRLCPY 561 | size_t strlcpy(char *dst, const char *src, size_t size) { 562 | char *d = dst; 563 | const char *s = src; 564 | size_t n = size; 565 | 566 | /* Copy as many bytes as will fit */ 567 | if (n != 0) { 568 | while (--n != 0) { 569 | if ((*d++ = *s++) == '\0') { 570 | break; 571 | } 572 | } 573 | } 574 | 575 | /* Not enough room in dst, add NUL and traverse rest of src */ 576 | if (n == 0) { 577 | if (size != 0) { 578 | *d = '\0'; /* NUL-terminate dst */ 579 | } 580 | 581 | while (*s++) { 582 | } 583 | } 584 | 585 | return (s - src - 1); /* count does not include NUL */ 586 | } 587 | #endif 588 | 589 | #ifndef HAVE_VASPRINTF 590 | int vasprintf(char **ret, const char *fmt, va_list args) { 591 | int rv; 592 | *ret = NULL; 593 | va_list args2; 594 | /* vsnprintf can destroy args, so we need to copy it for the second call */ 595 | #ifdef __va_copy 596 | /* non-standard macro, but usually exists */ 597 | __va_copy(args2, args); 598 | #elif va_copy 599 | /* C99 macro. We compile with -std=c89 but you never know */ 600 | va_copy(args2, args); 601 | #else 602 | /* Ancient compiler. This usually works but there are no guarantees. */ 603 | memcpy(args2, args, sizeof(va_list)); 604 | #endif 605 | rv = vsnprintf(NULL, 0, fmt, args); 606 | va_end(args); 607 | if (rv < 0) { 608 | return rv; 609 | } 610 | *ret = malloc(++rv); /* vsnprintf doesn't count \0 */ 611 | if (*ret == NULL) { 612 | return -1; 613 | } 614 | rv = vsnprintf(*ret, rv, fmt, args2); 615 | va_end(args2); 616 | if (rv < 0) { 617 | free(*ret); 618 | } 619 | return rv; 620 | } 621 | #endif 622 | -------------------------------------------------------------------------------- /src/search.c: -------------------------------------------------------------------------------- 1 | #include "search.h" 2 | #include "scandir.h" 3 | 4 | void search_buf(const char *buf, const size_t buf_len, 5 | const char *dir_full_path) { 6 | int binary = -1; /* 1 = yes, 0 = no, -1 = don't know */ 7 | size_t buf_offset = 0; 8 | 9 | if (opts.search_stream) { 10 | binary = 0; 11 | } else if (!opts.search_binary_files) { 12 | binary = is_binary((const void *)buf, buf_len); 13 | if (binary) { 14 | log_debug("File %s is binary. Skipping...", dir_full_path); 15 | return; 16 | } 17 | } 18 | 19 | int matches_len = 0; 20 | match_t *matches; 21 | size_t matches_size; 22 | size_t matches_spare; 23 | 24 | if (opts.invert_match) { 25 | /* If we are going to invert the set of matches at the end, we will need 26 | * one extra match struct, even if there are no matches at all. So make 27 | * sure we have a nonempty array; and make sure we always have spare 28 | * capacity for one extra. 29 | */ 30 | matches_size = 100; 31 | matches = ag_malloc(matches_size * sizeof(match_t)); 32 | matches_spare = 1; 33 | } else { 34 | matches_size = 0; 35 | matches = NULL; 36 | matches_spare = 0; 37 | } 38 | 39 | if (opts.query_len == 1 && opts.query[0] == '.') { 40 | matches_size = 1; 41 | matches = ag_malloc(matches_size * sizeof(match_t)); 42 | matches[0].start = 0; 43 | matches[0].end = buf_len; 44 | matches_len = 1; 45 | } else if (opts.literal) { 46 | const char *match_ptr = buf; 47 | strncmp_fp ag_strnstr_fp = get_strstr(opts.casing); 48 | 49 | while (buf_offset < buf_len) { 50 | match_ptr = ag_strnstr_fp(match_ptr, opts.query, buf_len - buf_offset, opts.query_len, alpha_skip_lookup, find_skip_lookup); 51 | if (match_ptr == NULL) { 52 | break; 53 | } 54 | 55 | if (opts.word_regexp) { 56 | const char *start = match_ptr; 57 | const char *end = match_ptr + opts.query_len; 58 | 59 | /* Check whether both start and end of the match lie on a word 60 | * boundary 61 | */ 62 | if ((start == buf || 63 | is_wordchar(*(start - 1)) != opts.literal_starts_wordchar) && 64 | (end == buf + buf_len || 65 | is_wordchar(*end) != opts.literal_ends_wordchar)) { 66 | /* It's a match */ 67 | } else { 68 | /* It's not a match */ 69 | match_ptr += opts.query_len; 70 | buf_offset = end - buf; 71 | continue; 72 | } 73 | } 74 | 75 | if ((size_t)matches_len + matches_spare >= matches_size) { 76 | /* TODO: benchmark initial size of matches. 100 may be too small/big */ 77 | matches_size = matches ? matches_size * 2 : 100; 78 | log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size); 79 | matches = ag_realloc(matches, matches_size * sizeof(match_t)); 80 | } 81 | 82 | matches[matches_len].start = match_ptr - buf; 83 | matches[matches_len].end = matches[matches_len].start + opts.query_len; 84 | buf_offset = matches[matches_len].end; 85 | log_debug("Match found. File %s, offset %lu bytes.", dir_full_path, matches[matches_len].start); 86 | matches_len++; 87 | match_ptr += opts.query_len; 88 | 89 | if (matches_len >= opts.max_matches_per_file) { 90 | log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path); 91 | break; 92 | } 93 | } 94 | } else { 95 | int offset_vector[3]; 96 | while (buf_offset < buf_len && 97 | (pcre_exec(opts.re, opts.re_extra, buf, buf_len, buf_offset, 0, offset_vector, 3)) >= 0) { 98 | log_debug("Regex match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]); 99 | buf_offset = offset_vector[1]; 100 | 101 | /* TODO: copy-pasted from above. FIXME */ 102 | if ((size_t)matches_len + matches_spare >= matches_size) { 103 | matches_size = matches ? matches_size * 2 : 100; 104 | log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size); 105 | matches = ag_realloc(matches, matches_size * sizeof(match_t)); 106 | } 107 | 108 | matches[matches_len].start = offset_vector[0]; 109 | matches[matches_len].end = offset_vector[1]; 110 | matches_len++; 111 | 112 | if (matches_len >= opts.max_matches_per_file) { 113 | log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path); 114 | break; 115 | } 116 | } 117 | } 118 | 119 | if (opts.invert_match) { 120 | matches_len = invert_matches(buf, buf_len, matches, matches_len); 121 | } 122 | 123 | if (opts.stats) { 124 | pthread_mutex_lock(&stats_mtx); 125 | stats.total_bytes += buf_len; 126 | stats.total_files++; 127 | stats.total_matches += matches_len; 128 | pthread_mutex_unlock(&stats_mtx); 129 | } 130 | 131 | if (matches_len > 0) { 132 | if (binary == -1 && !opts.print_filename_only) { 133 | binary = is_binary((const void *)buf, buf_len); 134 | } 135 | pthread_mutex_lock(&print_mtx); 136 | if (opts.print_filename_only) { 137 | /* If the --files-without-matches or -L option in passed we should 138 | * not print a matching line. This option currently sets 139 | * opts.print_filename_only and opts.invert_match. Unfortunately 140 | * setting the latter has the side effect of making matches.len = 1 141 | * on a file-without-matches which is not desired behaviour. See 142 | * GitHub issue 206 for the consequences if this behaviour is not 143 | * checked. */ 144 | if (!opts.invert_match || matches_len < 2) { 145 | print_path(dir_full_path, opts.null_follows_filename ? 0 : '\n'); 146 | } 147 | } else if (binary) { 148 | print_binary_file_matches(dir_full_path); 149 | } else { 150 | print_file_matches(dir_full_path, buf, buf_len, matches, matches_len); 151 | } 152 | pthread_mutex_unlock(&print_mtx); 153 | opts.match_found = 1; 154 | } else if (opts.search_stream && opts.passthrough) { 155 | fprintf(out_fd, "%s", buf); 156 | } else { 157 | log_debug("No match in %s", dir_full_path); 158 | } 159 | 160 | if (matches_size > 0) { 161 | free(matches); 162 | } 163 | } 164 | 165 | /* TODO: this will only match single lines. multi-line regexes silently don't match */ 166 | void search_stream(FILE *stream, const char *path) { 167 | char *line = NULL; 168 | ssize_t line_len = 0; 169 | size_t line_cap = 0; 170 | size_t i; 171 | 172 | for (i = 1; (line_len = getline(&line, &line_cap, stream)) > 0; i++) { 173 | opts.stream_line_num = i; 174 | search_buf(line, line_len, path); 175 | } 176 | 177 | free(line); 178 | } 179 | 180 | void search_file(const char *file_full_path) { 181 | int fd; 182 | off_t f_len = 0; 183 | char *buf = NULL; 184 | struct stat statbuf; 185 | int rv = 0; 186 | FILE *pipe = NULL; 187 | 188 | fd = open(file_full_path, O_RDONLY); 189 | if (fd < 0) { 190 | /* XXXX: strerror is not thread-safe */ 191 | log_err("Skipping %s: Error opening file: %s", file_full_path, strerror(errno)); 192 | goto cleanup; 193 | } 194 | 195 | rv = fstat(fd, &statbuf); 196 | if (rv != 0) { 197 | log_err("Skipping %s: Error fstat()ing file.", file_full_path); 198 | goto cleanup; 199 | } 200 | 201 | if (opts.stdout_inode != 0 && opts.stdout_inode == statbuf.st_ino) { 202 | log_debug("Skipping %s: stdout is redirected to it", file_full_path); 203 | goto cleanup; 204 | } 205 | 206 | if ((statbuf.st_mode & S_IFMT) == 0) { 207 | log_err("Skipping %s: Mode %u is not a file.", file_full_path, statbuf.st_mode); 208 | goto cleanup; 209 | } 210 | 211 | if (statbuf.st_mode & S_IFIFO) { 212 | log_debug("%s is a named pipe. stream searching", file_full_path); 213 | pipe = fdopen(fd, "r"); 214 | search_stream(pipe, file_full_path); 215 | fclose(pipe); 216 | goto cleanup; 217 | } 218 | 219 | f_len = statbuf.st_size; 220 | 221 | if (f_len == 0) { 222 | log_debug("Skipping %s: file is empty.", file_full_path); 223 | goto cleanup; 224 | } 225 | 226 | if (!opts.literal && f_len > INT_MAX) { 227 | log_err("Skipping %s: pcre_exec() can't handle files larger than %i bytes.", file_full_path, INT_MAX); 228 | goto cleanup; 229 | } 230 | 231 | #ifdef _WIN32 232 | { 233 | HANDLE hmmap = CreateFileMapping( 234 | (HANDLE)_get_osfhandle(fd), 0, PAGE_READONLY, 0, f_len, NULL); 235 | buf = (char *)MapViewOfFile(hmmap, FILE_SHARE_READ, 0, 0, f_len); 236 | if (hmmap != NULL) 237 | CloseHandle(hmmap); 238 | } 239 | if (buf == NULL) { 240 | FormatMessageA( 241 | FORMAT_MESSAGE_ALLOCATE_BUFFER | 242 | FORMAT_MESSAGE_FROM_SYSTEM | 243 | FORMAT_MESSAGE_IGNORE_INSERTS, 244 | NULL, GetLastError(), 0, (void *)&buf, 0, NULL); 245 | log_err("File %s failed to load: %s.", file_full_path, buf); 246 | LocalFree((void *)buf); 247 | goto cleanup; 248 | } 249 | #else 250 | buf = mmap(0, f_len, PROT_READ, MAP_SHARED, fd, 0); 251 | if (buf == MAP_FAILED) { 252 | log_err("File %s failed to load: %s.", file_full_path, strerror(errno)); 253 | goto cleanup; 254 | } 255 | #if HAVE_MADVISE 256 | madvise(buf, f_len, MADV_SEQUENTIAL); 257 | #elif HAVE_POSIX_FADVISE 258 | posix_fadvise(fd, 0, f_len, POSIX_MADV_SEQUENTIAL); 259 | #endif 260 | #endif 261 | 262 | if (opts.search_zip_files) { 263 | ag_compression_type zip_type = is_zipped(buf, f_len); 264 | if (zip_type != AG_NO_COMPRESSION) { 265 | int _buf_len = (int)f_len; 266 | char *_buf = decompress(zip_type, buf, f_len, file_full_path, &_buf_len); 267 | if (_buf == NULL || _buf_len == 0) { 268 | log_err("Cannot decompress zipped file %s", file_full_path); 269 | goto cleanup; 270 | } 271 | search_buf(_buf, _buf_len, file_full_path); 272 | free(_buf); 273 | goto cleanup; 274 | } 275 | } 276 | 277 | search_buf(buf, f_len, file_full_path); 278 | 279 | cleanup: 280 | 281 | if (buf != NULL) { 282 | #ifdef _WIN32 283 | UnmapViewOfFile(buf); 284 | #else 285 | munmap(buf, f_len); 286 | #endif 287 | } 288 | if (fd != -1) { 289 | close(fd); 290 | } 291 | } 292 | 293 | void *search_file_worker(void *i) { 294 | work_queue_t *queue_item; 295 | int worker_id = *(int *)i; 296 | 297 | log_debug("Worker %i started", worker_id); 298 | while (TRUE) { 299 | pthread_mutex_lock(&work_queue_mtx); 300 | while (work_queue == NULL) { 301 | if (done_adding_files) { 302 | pthread_mutex_unlock(&work_queue_mtx); 303 | log_debug("Worker %i finished.", worker_id); 304 | pthread_exit(NULL); 305 | } 306 | pthread_cond_wait(&files_ready, &work_queue_mtx); 307 | } 308 | queue_item = work_queue; 309 | work_queue = work_queue->next; 310 | if (work_queue == NULL) { 311 | work_queue_tail = NULL; 312 | } 313 | pthread_mutex_unlock(&work_queue_mtx); 314 | 315 | search_file(queue_item->path); 316 | free(queue_item->path); 317 | free(queue_item); 318 | } 319 | } 320 | 321 | static int check_symloop_enter(const char *path, dirkey_t *outkey) { 322 | #ifdef _WIN32 323 | return SYMLOOP_OK; 324 | #else 325 | struct stat buf; 326 | symdir_t *item_found = NULL; 327 | symdir_t *new_item = NULL; 328 | 329 | memset(outkey, 0, sizeof(dirkey_t)); 330 | outkey->dev = 0; 331 | outkey->ino = 0; 332 | 333 | int res = stat(path, &buf); 334 | if (res != 0) { 335 | log_err("Error stat()ing: %s", path); 336 | return SYMLOOP_ERROR; 337 | } 338 | 339 | outkey->dev = buf.st_dev; 340 | outkey->ino = buf.st_ino; 341 | 342 | HASH_FIND(hh, symhash, outkey, sizeof(dirkey_t), item_found); 343 | if (item_found) { 344 | return SYMLOOP_LOOP; 345 | } 346 | 347 | new_item = (symdir_t *)ag_malloc(sizeof(symdir_t)); 348 | memcpy(&new_item->key, outkey, sizeof(dirkey_t)); 349 | HASH_ADD(hh, symhash, key, sizeof(dirkey_t), new_item); 350 | return SYMLOOP_OK; 351 | #endif 352 | } 353 | 354 | static int check_symloop_leave(dirkey_t *dirkey) { 355 | #ifdef _WIN32 356 | return SYMLOOP_OK; 357 | #else 358 | symdir_t *item_found = NULL; 359 | 360 | if (dirkey->dev == 0 && dirkey->ino == 0) { 361 | return SYMLOOP_ERROR; 362 | } 363 | 364 | HASH_FIND(hh, symhash, dirkey, sizeof(dirkey_t), item_found); 365 | if (!item_found) { 366 | log_err("item not found! weird stuff...\n"); 367 | return SYMLOOP_ERROR; 368 | } 369 | 370 | HASH_DELETE(hh, symhash, item_found); 371 | free(item_found); 372 | return SYMLOOP_OK; 373 | #endif 374 | } 375 | 376 | /* TODO: Append matches to some data structure instead of just printing them out. 377 | * Then ag can have sweet summaries of matches/files scanned/time/etc. 378 | */ 379 | void search_dir(ignores *ig, const char *base_path, const char *path, const int depth) { 380 | struct dirent **dir_list = NULL; 381 | struct dirent *dir = NULL; 382 | scandir_baton_t scandir_baton; 383 | int results = 0; 384 | 385 | char *dir_full_path = NULL; 386 | const char *ignore_file = NULL; 387 | int i; 388 | 389 | int symres; 390 | dirkey_t current_dirkey; 391 | 392 | symres = check_symloop_enter(path, ¤t_dirkey); 393 | if (symres == SYMLOOP_LOOP) { 394 | log_err("Recursive directory loop: %s", path); 395 | return; 396 | } 397 | 398 | /* find agignore/gitignore/hgignore/etc files to load ignore patterns from */ 399 | for (i = 0; opts.skip_vcs_ignores ? (i == 0) : (ignore_pattern_files[i] != NULL); i++) { 400 | ignore_file = ignore_pattern_files[i]; 401 | ag_asprintf(&dir_full_path, "%s/%s", path, ignore_file); 402 | if (strcmp(SVN_DIR, ignore_file) == 0) { 403 | load_svn_ignore_patterns(ig, dir_full_path); 404 | } else { 405 | load_ignore_patterns(ig, dir_full_path); 406 | } 407 | free(dir_full_path); 408 | dir_full_path = NULL; 409 | } 410 | 411 | if (opts.path_to_agignore) { 412 | load_ignore_patterns(ig, opts.path_to_agignore); 413 | } 414 | 415 | scandir_baton.ig = ig; 416 | scandir_baton.base_path = base_path; 417 | scandir_baton.base_path_len = base_path ? strlen(base_path) : 0; 418 | results = ag_scandir(path, &dir_list, &filename_filter, &scandir_baton); 419 | if (results == 0) { 420 | log_debug("No results found in directory %s", path); 421 | goto search_dir_cleanup; 422 | } else if (results == -1) { 423 | if (errno == ENOTDIR) { 424 | /* Not a directory. Probably a file. */ 425 | if (depth == 0 && opts.paths_len == 1) { 426 | /* If we're only searching one file, don't print the filename header at the top. */ 427 | if (opts.print_path == PATH_PRINT_DEFAULT || opts.print_path == PATH_PRINT_DEFAULT_EACH_LINE) { 428 | opts.print_path = PATH_PRINT_NOTHING; 429 | } 430 | } 431 | search_file(path); 432 | } else { 433 | log_err("Error opening directory %s: %s", path, strerror(errno)); 434 | } 435 | goto search_dir_cleanup; 436 | } 437 | 438 | int offset_vector[3]; 439 | int rc = 0; 440 | work_queue_t *queue_item; 441 | 442 | for (i = 0; i < results; i++) { 443 | queue_item = NULL; 444 | dir = dir_list[i]; 445 | ag_asprintf(&dir_full_path, "%s/%s", path, dir->d_name); 446 | 447 | /* If a link points to a directory then we need to treat it as a directory. */ 448 | if (!opts.follow_symlinks && is_symlink(path, dir)) { 449 | log_debug("File %s ignored becaused it's a symlink", dir->d_name); 450 | goto cleanup; 451 | } 452 | 453 | if (!is_directory(path, dir)) { 454 | if (opts.file_search_regex) { 455 | rc = pcre_exec(opts.file_search_regex, NULL, dir_full_path, strlen(dir_full_path), 456 | 0, 0, offset_vector, 3); 457 | if (rc < 0) { /* no match */ 458 | log_debug("Skipping %s due to file_search_regex.", dir_full_path); 459 | goto cleanup; 460 | } else if (opts.match_files) { 461 | log_debug("match_files: file_search_regex matched for %s.", dir_full_path); 462 | pthread_mutex_lock(&print_mtx); 463 | print_path(dir_full_path, '\n'); 464 | pthread_mutex_unlock(&print_mtx); 465 | goto cleanup; 466 | } 467 | } 468 | 469 | queue_item = ag_malloc(sizeof(work_queue_t)); 470 | queue_item->path = dir_full_path; 471 | queue_item->next = NULL; 472 | pthread_mutex_lock(&work_queue_mtx); 473 | if (work_queue_tail == NULL) { 474 | work_queue = queue_item; 475 | } else { 476 | work_queue_tail->next = queue_item; 477 | } 478 | work_queue_tail = queue_item; 479 | pthread_cond_signal(&files_ready); 480 | pthread_mutex_unlock(&work_queue_mtx); 481 | log_debug("%s added to work queue", dir_full_path); 482 | } else if (opts.recurse_dirs) { 483 | if (depth < opts.max_search_depth) { 484 | log_debug("Searching dir %s", dir_full_path); 485 | ignores *child_ig = init_ignore(ig); 486 | search_dir(child_ig, base_path, dir_full_path, depth + 1); 487 | cleanup_ignore(child_ig); 488 | } else { 489 | log_err("Skipping %s. Use the --depth option to search deeper.", dir_full_path); 490 | } 491 | } 492 | 493 | cleanup: 494 | free(dir); 495 | dir = NULL; 496 | if (queue_item == NULL) { 497 | free(dir_full_path); 498 | dir_full_path = NULL; 499 | } 500 | } 501 | 502 | search_dir_cleanup: 503 | check_symloop_leave(¤t_dirkey); 504 | free(dir_list); 505 | dir_list = NULL; 506 | } 507 | -------------------------------------------------------------------------------- /src/options.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "config.h" 12 | #include "ignore.h" 13 | #include "options.h" 14 | #include "lang.h" 15 | #include "log.h" 16 | #include "util.h" 17 | 18 | const char *color_line_number = "\033[1;33m"; /* yellow with black background */ 19 | const char *color_match = "\033[30;43m"; /* black with yellow background */ 20 | const char *color_path = "\033[1;32m"; /* bold green */ 21 | 22 | /* TODO: try to obey out_fd? */ 23 | void usage(void) { 24 | printf("\n"); 25 | printf("Usage: ag [FILE-TYPE] [OPTIONS] PATTERN [PATH]\n\n"); 26 | 27 | printf(" Recursively search for PATTERN in PATH.\n"); 28 | printf(" Like grep or ack, but faster.\n\n"); 29 | 30 | printf("Example:\n ag -i foo /bar/\n\n"); 31 | 32 | printf("\ 33 | Output Options:\n\ 34 | --ackmate Print results in AckMate-parseable format\n\ 35 | -A --after [LINES] Print lines after match (Default: 2)\n\ 36 | -B --before [LINES] Print lines before match (Default: 2)\n\ 37 | --[no]break Print newlines between matches in different files\n\ 38 | (Enabled by default)\n\ 39 | --[no]color Print color codes in results (Enabled by default)\n\ 40 | --color-line-number Color codes for line numbers (Default: 1;33)\n\ 41 | --color-match Color codes for result match numbers (Default: 30;43)\n\ 42 | --color-path Color codes for path names (Default: 1;32)\n\ 43 | --column Print column numbers in results\n\ 44 | -H --[no]heading Print file names (Enabled unless searching a single file)\n\ 45 | --line-numbers Print line numbers even for streams\n\ 46 | -C --context [LINES] Print lines before and after matches (Default: 2)\n\ 47 | --[no]group Same as --[no]break --[no]heading\n\ 48 | -g PATTERN Print filenames matching PATTERN\n\ 49 | -l --files-with-matches Only print filenames that contain matches\n\ 50 | (don't print the matching lines)\n\ 51 | -L --files-without-matches\n\ 52 | Only print filenames that don't contain matches\n\ 53 | --null Follow filename (-l|-L) with null for 'xargs -0'\n\ 54 | --no-numbers Don't print line numbers\n\ 55 | --print-long-lines Print matches on very long lines (Default: >2k characters)\n\ 56 | --passthrough When searching a stream, print all lines even if they\n\ 57 | don't match\n\ 58 | --silent Suppress all log messages, including errors\n\ 59 | --stats Print stats (files scanned, time taken, etc.)\n\ 60 | \n\ 61 | Search Options:\n\ 62 | -a --all-types Search all files (doesn't include hidden files\n\ 63 | or patterns from ignore files)\n\ 64 | -D --debug Ridiculous debugging (probably not useful)\n\ 65 | --depth NUM Search up to NUM directories deep (Default: 25)\n\ 66 | -f --follow Follow symlinks\n\ 67 | -G --file-search-regex PATTERN Limit search to filenames matching PATTERN\n\ 68 | --hidden Search hidden files (obeys .*ignore files)\n\ 69 | -i --ignore-case Match case insensitively\n\ 70 | --ignore PATTERN Ignore files/directories matching PATTERN\n\ 71 | (literal file/directory names also allowed)\n\ 72 | --ignore-dir NAME Alias for --ignore for compatibility with ack.\n\ 73 | -m --max-count NUM Skip the rest of a file after NUM matches (Default: 10,000)\n\ 74 | -p --path-to-agignore STRING\n\ 75 | Use .agignore file at STRING\n\ 76 | -Q --literal Don't parse PATTERN as a regular expression\n\ 77 | -s --case-sensitive Match case sensitively\n\ 78 | -S --smart-case Match case insensitively unless PATTERN contains\n\ 79 | uppercase characters (Enabled by default)\n\ 80 | --search-binary Search binary files for matches\n\ 81 | -t --all-text Search all text files (doesn't include hidden files)\n\ 82 | -u --unrestricted Search all files (ignore .agignore, .gitignore, etc.;\n\ 83 | searches binary and hidden files as well)\n\ 84 | -U --skip-vcs-ignores Ignore VCS ignore files\n\ 85 | (.gitignore, .hgignore, .svnignore; still obey .agignore)\n\ 86 | -v --invert-match\n\ 87 | -w --word-regexp Only match whole words\n\ 88 | -z --search-zip Search contents of compressed (e.g., gzip) files\n\ 89 | \n"); 90 | printf("File Types:\n\ 91 | The search can be restricted to certain types of files. Example:\n\ 92 | ag --html needle\n\ 93 | - Searches for 'needle' in files with suffix .htm, .html, .shtml or .xhtml.\n\ 94 | \n\ 95 | For a list of supported file types run:\n\ 96 | ag --list-file-types\n\n"); 97 | } 98 | 99 | void print_version(void) { 100 | printf("ag version %s\n", PACKAGE_VERSION); 101 | } 102 | 103 | void init_options(void) { 104 | memset(&opts, 0, sizeof(opts)); 105 | opts.casing = CASE_SMART; 106 | #ifdef _WIN32 107 | opts.color = getenv("ANSICON") ? TRUE : FALSE; 108 | #else 109 | opts.color = TRUE; 110 | #endif 111 | opts.max_matches_per_file = 10000; 112 | opts.max_search_depth = 25; 113 | opts.print_break = TRUE; 114 | opts.print_path = PATH_PRINT_DEFAULT; 115 | opts.print_line_numbers = TRUE; 116 | opts.recurse_dirs = TRUE; 117 | opts.color_path = ag_strdup(color_path); 118 | opts.color_match = ag_strdup(color_match); 119 | opts.color_line_number = ag_strdup(color_line_number); 120 | } 121 | 122 | void cleanup_options(void) { 123 | free(opts.color_path); 124 | free(opts.color_match); 125 | free(opts.color_line_number); 126 | 127 | if (opts.query) { 128 | free(opts.query); 129 | } 130 | 131 | pcre_free(opts.re); 132 | if (opts.re_extra) { 133 | /* Using pcre_free_study on pcre_extra* can segfault on some versions of PCRE */ 134 | pcre_free(opts.re_extra); 135 | } 136 | 137 | if (opts.ackmate_dir_filter) { 138 | pcre_free(opts.ackmate_dir_filter); 139 | } 140 | if (opts.ackmate_dir_filter_extra) { 141 | pcre_free(opts.ackmate_dir_filter_extra); 142 | } 143 | 144 | if (opts.file_search_regex) { 145 | pcre_free(opts.file_search_regex); 146 | } 147 | if (opts.file_search_regex_extra) { 148 | pcre_free(opts.file_search_regex_extra); 149 | } 150 | } 151 | 152 | void parse_options(int argc, char **argv, char **base_paths[], char **paths[]) { 153 | int ch; 154 | int i; 155 | int path_len = 0; 156 | int useless = 0; 157 | int group = 1; 158 | int help = 0; 159 | int version = 0; 160 | int list_file_types = 0; 161 | int opt_index = 0; 162 | char *num_end; 163 | const char *home_dir = getenv("HOME"); 164 | char *ignore_file_path = NULL; 165 | int needs_query = 1; 166 | struct stat statbuf; 167 | int rv; 168 | 169 | size_t longopts_len, full_len; 170 | option_t *longopts; 171 | char *lang_regex = NULL; 172 | 173 | init_options(); 174 | 175 | option_t base_longopts[] = { 176 | { "ackmate", no_argument, &opts.ackmate, 1 }, 177 | { "ackmate-dir-filter", required_argument, NULL, 0 }, 178 | { "after", optional_argument, NULL, 'A' }, 179 | { "all-text", no_argument, NULL, 't' }, 180 | { "all-types", no_argument, NULL, 'a' }, 181 | { "before", optional_argument, NULL, 'B' }, 182 | { "break", no_argument, &opts.print_break, 1 }, 183 | { "case-sensitive", no_argument, NULL, 's' }, 184 | { "color", no_argument, &opts.color, 1 }, 185 | { "color-line-number", required_argument, NULL, 0 }, 186 | { "color-match", required_argument, NULL, 0 }, 187 | { "color-path", required_argument, NULL, 0 }, 188 | { "column", no_argument, &opts.column, 1 }, 189 | { "context", optional_argument, NULL, 'C' }, 190 | { "debug", no_argument, NULL, 'D' }, 191 | { "depth", required_argument, NULL, 0 }, 192 | { "file-search-regex", required_argument, NULL, 'G' }, 193 | { "files-with-matches", no_argument, NULL, 'l' }, 194 | { "files-without-matches", no_argument, NULL, 'L' }, 195 | { "follow", no_argument, &opts.follow_symlinks, 1 }, 196 | { "group", no_argument, &group, 1 }, 197 | { "heading", no_argument, &opts.print_path, PATH_PRINT_TOP }, 198 | { "help", no_argument, NULL, 'h' }, 199 | { "hidden", no_argument, &opts.search_hidden_files, 1 }, 200 | { "ignore", required_argument, NULL, 0 }, 201 | { "ignore-case", no_argument, NULL, 'i' }, 202 | { "ignore-dir", required_argument, NULL, 0 }, 203 | { "invert-match", no_argument, &opts.invert_match, 1 }, 204 | { "line-numbers", no_argument, &opts.print_line_numbers, 2 }, 205 | { "list-file-types", no_argument, &list_file_types, 1 }, 206 | { "literal", no_argument, NULL, 'Q' }, 207 | { "match", no_argument, &useless, 0 }, 208 | { "max-count", required_argument, NULL, 'm' }, 209 | { "no-numbers", no_argument, NULL, 0 }, 210 | { "no-recurse", no_argument, NULL, 'n' }, 211 | { "nobreak", no_argument, &opts.print_break, 0 }, 212 | { "nocolor", no_argument, &opts.color, 0 }, 213 | { "nofollow", no_argument, &opts.follow_symlinks, 0 }, 214 | { "nogroup", no_argument, &group, 0 }, 215 | { "noheading", no_argument, &opts.print_path, PATH_PRINT_EACH_LINE }, 216 | { "nopager", no_argument, NULL, 0 }, 217 | { "null", no_argument, &opts.null_follows_filename, 1 }, 218 | { "pager", required_argument, NULL, 0 }, 219 | { "parallel", no_argument, &opts.parallel, 1 }, 220 | { "passthrough", no_argument, &opts.passthrough, 1 }, 221 | { "passthru", no_argument, &opts.passthrough, 1 }, 222 | { "path-to-agignore", required_argument, NULL, 'p' }, 223 | { "print-long-lines", no_argument, &opts.print_long_lines, 1 }, 224 | { "recurse", no_argument, NULL, 'r' }, 225 | { "search-binary", no_argument, &opts.search_binary_files, 1 }, 226 | { "search-files", no_argument, &opts.search_stream, 0 }, 227 | { "search-zip", no_argument, &opts.search_zip_files, 1 }, 228 | { "silent", no_argument, NULL, 0 }, 229 | { "skip-vcs-ignores", no_argument, NULL, 'U' }, 230 | { "smart-case", no_argument, NULL, 'S' }, 231 | { "stats", no_argument, &opts.stats, 1 }, 232 | { "unrestricted", no_argument, NULL, 'u' }, 233 | { "version", no_argument, &version, 1 }, 234 | { "word-regexp", no_argument, NULL, 'w' }, 235 | { "workers", required_argument, NULL, 0 }, 236 | }; 237 | 238 | longopts_len = (sizeof(base_longopts) / sizeof(option_t)); 239 | full_len = (longopts_len + LANG_COUNT + 1); 240 | longopts = ag_malloc(full_len * sizeof(option_t)); 241 | memcpy(longopts, base_longopts, sizeof(base_longopts)); 242 | 243 | for (i = 0; i < LANG_COUNT; i++) { 244 | option_t opt = { langs[i].name, no_argument, NULL, 0 }; 245 | longopts[i + longopts_len] = opt; 246 | } 247 | longopts[full_len - 1] = (option_t) { NULL, 0, NULL, 0 }; 248 | 249 | if (argc < 2) { 250 | usage(); 251 | cleanup_ignore(root_ignores); 252 | cleanup_options(); 253 | exit(1); 254 | } 255 | 256 | rv = fstat(fileno(stdin), &statbuf); 257 | if (rv == 0) { 258 | if (S_ISFIFO(statbuf.st_mode)) { 259 | opts.search_stream = 1; 260 | } 261 | } 262 | 263 | /* If we're not outputting to a terminal. change output to: 264 | * turn off colors 265 | * print filenames on every line 266 | */ 267 | if (!isatty(fileno(stdout))) { 268 | opts.color = 0; 269 | group = 0; 270 | 271 | /* Don't search the file that stdout is redirected to */ 272 | rv = fstat(fileno(stdout), &statbuf); 273 | if (rv != 0) { 274 | die("Error fstat()ing stdout"); 275 | } 276 | opts.stdout_inode = statbuf.st_ino; 277 | } 278 | 279 | while ((ch = getopt_long(argc, argv, "A:aB:C:DG:g:fHhiLlm:np:QRrSsvVtuUwz", longopts, &opt_index)) != -1) { 280 | switch (ch) { 281 | case 'A': 282 | if (optarg) { 283 | opts.after = strtol(optarg, &num_end, 10); 284 | if (num_end == optarg || *num_end != '\0' || errno == ERANGE) { 285 | /* This arg must be the search string instead of the after length */ 286 | optind--; 287 | opts.after = DEFAULT_AFTER_LEN; 288 | } 289 | } else { 290 | opts.after = DEFAULT_AFTER_LEN; 291 | } 292 | break; 293 | case 'a': 294 | opts.search_all_files = 1; 295 | opts.search_binary_files = 1; 296 | break; 297 | case 'B': 298 | if (optarg) { 299 | opts.before = strtol(optarg, &num_end, 10); 300 | if (num_end == optarg || *num_end != '\0' || errno == ERANGE) { 301 | /* This arg must be the search string instead of the before length */ 302 | optind--; 303 | opts.before = DEFAULT_BEFORE_LEN; 304 | } 305 | } else { 306 | opts.before = DEFAULT_BEFORE_LEN; 307 | } 308 | break; 309 | case 'C': 310 | if (optarg) { 311 | opts.context = strtol(optarg, &num_end, 10); 312 | if (num_end == optarg || *num_end != '\0' || errno == ERANGE) { 313 | /* This arg must be the search string instead of the context length */ 314 | optind--; 315 | opts.context = DEFAULT_CONTEXT_LEN; 316 | } 317 | } else { 318 | opts.context = DEFAULT_CONTEXT_LEN; 319 | } 320 | break; 321 | case 'D': 322 | set_log_level(LOG_LEVEL_DEBUG); 323 | break; 324 | case 'f': 325 | opts.follow_symlinks = 1; 326 | break; 327 | case 'g': 328 | needs_query = 0; 329 | opts.match_files = 1; 330 | /* Fall through and build regex */ 331 | case 'G': 332 | compile_study(&opts.file_search_regex, &opts.file_search_regex_extra, optarg, opts.casing & PCRE_CASELESS, 0); 333 | opts.casing = CASE_SENSITIVE; 334 | break; 335 | case 'H': 336 | opts.print_path = PATH_PRINT_TOP; 337 | break; 338 | case 'h': 339 | help = 1; 340 | break; 341 | case 'i': 342 | opts.casing = CASE_INSENSITIVE; 343 | break; 344 | case 'L': 345 | opts.invert_match = 1; 346 | /* fall through */ 347 | case 'l': 348 | opts.print_filename_only = 1; 349 | break; 350 | case 'm': 351 | opts.max_matches_per_file = atoi(optarg); 352 | break; 353 | case 'n': 354 | opts.recurse_dirs = 0; 355 | break; 356 | case 'p': 357 | opts.path_to_agignore = optarg; 358 | break; 359 | case 'Q': 360 | opts.literal = 1; 361 | break; 362 | case 'R': 363 | case 'r': 364 | opts.recurse_dirs = 1; 365 | break; 366 | case 'S': 367 | opts.casing = CASE_SMART; 368 | break; 369 | case 's': 370 | opts.casing = CASE_SENSITIVE; 371 | break; 372 | case 't': 373 | opts.search_all_files = 1; 374 | break; 375 | case 'u': 376 | opts.search_binary_files = 1; 377 | opts.search_all_files = 1; 378 | opts.search_hidden_files = 1; 379 | break; 380 | case 'U': 381 | opts.skip_vcs_ignores = 1; 382 | break; 383 | case 'v': 384 | opts.invert_match = 1; 385 | break; 386 | case 'V': 387 | version = 1; 388 | break; 389 | case 'w': 390 | opts.word_regexp = 1; 391 | break; 392 | case 'z': 393 | opts.search_zip_files = 1; 394 | break; 395 | case 0: /* Long option */ 396 | if (strcmp(longopts[opt_index].name, "ackmate-dir-filter") == 0) { 397 | compile_study(&opts.ackmate_dir_filter, &opts.ackmate_dir_filter_extra, optarg, 0, 0); 398 | break; 399 | } else if (strcmp(longopts[opt_index].name, "depth") == 0) { 400 | opts.max_search_depth = atoi(optarg); 401 | break; 402 | } else if (strcmp(longopts[opt_index].name, "no-numbers") == 0) { 403 | opts.print_line_numbers = FALSE; 404 | break; 405 | } else if (strcmp(longopts[opt_index].name, "ignore-dir") == 0) { 406 | add_ignore_pattern(root_ignores, optarg); 407 | break; 408 | } else if (strcmp(longopts[opt_index].name, "ignore") == 0) { 409 | add_ignore_pattern(root_ignores, optarg); 410 | break; 411 | } else if (strcmp(longopts[opt_index].name, "nopager") == 0) { 412 | out_fd = stdout; 413 | opts.pager = NULL; 414 | break; 415 | } else if (strcmp(longopts[opt_index].name, "pager") == 0) { 416 | opts.pager = optarg; 417 | break; 418 | } else if (strcmp(longopts[opt_index].name, "workers") == 0) { 419 | opts.workers = atoi(optarg); 420 | break; 421 | } else if (strcmp(longopts[opt_index].name, "color-line-number") == 0) { 422 | free(opts.color_line_number); 423 | ag_asprintf(&opts.color_line_number, "\033[%sm", optarg); 424 | break; 425 | } else if (strcmp(longopts[opt_index].name, "color-match") == 0) { 426 | free(opts.color_match); 427 | ag_asprintf(&opts.color_match, "\033[%sm", optarg); 428 | break; 429 | } else if (strcmp(longopts[opt_index].name, "color-path") == 0) { 430 | free(opts.color_path); 431 | ag_asprintf(&opts.color_path, "\033[%sm", optarg); 432 | break; 433 | } else if (strcmp(longopts[opt_index].name, "silent") == 0) { 434 | set_log_level(LOG_LEVEL_NONE); 435 | break; 436 | } 437 | 438 | /* Continue to usage if we don't recognize the option */ 439 | if (longopts[opt_index].flag != 0) { 440 | break; 441 | } 442 | 443 | for (i = 0; i < LANG_COUNT; i++) { 444 | if (strcmp(longopts[opt_index].name, langs[i].name) == 0) { 445 | lang_regex = make_lang_regex(langs[i].extensions); 446 | compile_study(&opts.file_search_regex, &opts.file_search_regex_extra, lang_regex, 0, 0); 447 | break; 448 | } 449 | } 450 | if (lang_regex) { 451 | free(lang_regex); 452 | lang_regex = NULL; 453 | break; 454 | } 455 | 456 | log_err("option %s does not take a value", longopts[opt_index].name); 457 | default: 458 | usage(); 459 | exit(1); 460 | } 461 | } 462 | 463 | free(longopts); 464 | 465 | argc -= optind; 466 | argv += optind; 467 | 468 | if (opts.pager) { 469 | out_fd = popen(opts.pager, "w"); 470 | if (!out_fd) { 471 | perror("Failed to run pager"); 472 | exit(1); 473 | } 474 | } 475 | 476 | if (help) { 477 | usage(); 478 | exit(0); 479 | } 480 | 481 | if (version) { 482 | print_version(); 483 | exit(0); 484 | } 485 | 486 | if (list_file_types) { 487 | int lang_index; 488 | printf("The following file types are supported:\n"); 489 | for (lang_index = 0; lang_index < LANG_COUNT; lang_index++) { 490 | printf(" --%s\n ", langs[lang_index].name); 491 | int j; 492 | for (j = 0; j < MAX_EXTENSIONS && langs[lang_index].extensions[j]; j++) { 493 | printf(" .%s", langs[lang_index].extensions[j]); 494 | } 495 | printf("\n\n"); 496 | } 497 | exit(0); 498 | } 499 | 500 | if (needs_query && argc == 0) { 501 | log_err("What do you want to search for?"); 502 | exit(1); 503 | } 504 | 505 | if (home_dir && !opts.search_all_files) { 506 | log_debug("Found user's home dir: %s", home_dir); 507 | ag_asprintf(&ignore_file_path, "%s/%s", home_dir, ignore_pattern_files[0]); 508 | load_ignore_patterns(root_ignores, ignore_file_path); 509 | free(ignore_file_path); 510 | } 511 | 512 | if (!opts.skip_vcs_ignores) { 513 | FILE *gitconfig_file = NULL; 514 | size_t buf_len = 0; 515 | char *gitconfig_res = NULL; 516 | 517 | gitconfig_file = popen("git config -z --get core.excludesfile 2>/dev/null", "r"); 518 | if (gitconfig_file != NULL) { 519 | do { 520 | gitconfig_res = ag_realloc(gitconfig_res, buf_len + 65); 521 | buf_len += fread(gitconfig_res + buf_len, 1, 64, gitconfig_file); 522 | } while (!feof(gitconfig_file) && buf_len > 0 && buf_len % 64 == 0); 523 | gitconfig_res[buf_len] = '\0'; 524 | load_ignore_patterns(root_ignores, gitconfig_res); 525 | free(gitconfig_res); 526 | pclose(gitconfig_file); 527 | } 528 | } 529 | 530 | if (opts.context > 0) { 531 | opts.before = opts.context; 532 | opts.after = opts.context; 533 | } 534 | 535 | if (opts.ackmate) { 536 | opts.color = 0; 537 | opts.print_break = 1; 538 | group = 1; 539 | opts.search_stream = 0; 540 | } 541 | 542 | if (opts.parallel) { 543 | opts.search_stream = 0; 544 | } 545 | 546 | if (opts.print_path != PATH_PRINT_DEFAULT || opts.print_break == 0) { 547 | goto skip_group; 548 | } 549 | 550 | if (group) { 551 | opts.print_break = 1; 552 | } else { 553 | opts.print_path = PATH_PRINT_DEFAULT_EACH_LINE; 554 | opts.print_break = 0; 555 | } 556 | 557 | skip_group: 558 | if (opts.search_stream) { 559 | opts.print_break = 0; 560 | opts.print_path = PATH_PRINT_NOTHING; 561 | if (opts.print_line_numbers != 2) { 562 | opts.print_line_numbers = 0; 563 | } 564 | } 565 | 566 | if (needs_query) { 567 | opts.query = ag_strdup(argv[0]); 568 | argc--; 569 | argv++; 570 | } else { 571 | opts.query = ag_strdup("."); 572 | } 573 | opts.query_len = strlen(opts.query); 574 | 575 | log_debug("Query is %s", opts.query); 576 | 577 | if (opts.query_len == 0) { 578 | log_err("Error: No query. What do you want to search for?"); 579 | exit(1); 580 | } 581 | 582 | if (!is_regex(opts.query)) { 583 | opts.literal = 1; 584 | } 585 | 586 | char *path = NULL; 587 | char *tmp = NULL; 588 | opts.paths_len = argc; 589 | if (argc > 0) { 590 | *paths = ag_calloc(sizeof(char *), argc + 1); 591 | *base_paths = ag_calloc(sizeof(char *), argc + 1); 592 | for (i = 0; i < argc; i++) { 593 | path = ag_strdup(argv[i]); 594 | path_len = strlen(path); 595 | /* kill trailing slash */ 596 | if (path_len > 1 && path[path_len - 1] == '/') { 597 | path[path_len - 1] = '\0'; 598 | } 599 | (*paths)[i] = path; 600 | tmp = ag_malloc(PATH_MAX); 601 | (*base_paths)[i] = realpath(path, tmp); 602 | } 603 | /* Make sure we search these paths instead of stdin. */ 604 | opts.search_stream = 0; 605 | } else { 606 | path = ag_strdup("."); 607 | *paths = ag_malloc(sizeof(char *) * 2); 608 | *base_paths = ag_malloc(sizeof(char *) * 2); 609 | (*paths)[0] = path; 610 | tmp = ag_malloc(PATH_MAX); 611 | (*base_paths)[0] = realpath(path, tmp); 612 | i = 1; 613 | } 614 | (*paths)[i] = NULL; 615 | (*base_paths)[i] = NULL; 616 | } 617 | --------------------------------------------------------------------------------